1//===--- BuiltinsPTX.def - PTX Builtin function database ----*- C++ -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the PTX-specific builtin function database. Users of 10// this file must define the BUILTIN macro to make use of this information. 11// 12//===----------------------------------------------------------------------===// 13 14// The format of this database matches clang/Basic/Builtins.def. 15 16#if defined(BUILTIN) && !defined(TARGET_BUILTIN) 17# define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS) 18#endif 19 20#pragma push_macro("SM_53") 21#pragma push_macro("SM_70") 22#pragma push_macro("SM_72") 23#pragma push_macro("SM_75") 24#pragma push_macro("SM_80") 25#pragma push_macro("SM_86") 26#pragma push_macro("SM_87") 27#pragma push_macro("SM_89") 28#pragma push_macro("SM_90") 29#pragma push_macro("SM_90a") 30#define SM_90a "sm_90a" 31#define SM_90 "sm_90|" SM_90a 32#define SM_89 "sm_89|" SM_90 33#define SM_87 "sm_87|" SM_89 34#define SM_86 "sm_86|" SM_87 35#define SM_80 "sm_80|" SM_86 36#define SM_75 "sm_75|" SM_80 37#define SM_72 "sm_72|" SM_75 38#define SM_70 "sm_70|" SM_72 39 40#pragma push_macro("SM_60") 41#define SM_60 "sm_60|sm_61|sm_62|" SM_70 42#define SM_53 "sm_53|" SM_60 43 44#pragma push_macro("PTX42") 45#pragma push_macro("PTX60") 46#pragma push_macro("PTX61") 47#pragma push_macro("PTX62") 48#pragma push_macro("PTX63") 49#pragma push_macro("PTX64") 50#pragma push_macro("PTX65") 51#pragma push_macro("PTX70") 52#pragma push_macro("PTX71") 53#pragma push_macro("PTX72") 54#pragma push_macro("PTX73") 55#pragma push_macro("PTX74") 56#pragma push_macro("PTX75") 57#pragma push_macro("PTX76") 58#pragma push_macro("PTX77") 59#pragma push_macro("PTX78") 60#pragma push_macro("PTX80") 61#pragma push_macro("PTX81") 62#pragma push_macro("PTX82") 63#pragma push_macro("PTX83") 64#pragma push_macro("PTX84") 65#pragma push_macro("PTX85") 66#define PTX85 "ptx85" 67#define PTX84 "ptx84|" PTX85 68#define PTX83 "ptx83|" PTX84 69#define PTX82 "ptx82|" PTX83 70#define PTX81 "ptx81|" PTX82 71#define PTX80 "ptx80|" PTX81 72#define PTX78 "ptx78|" PTX80 73#define PTX77 "ptx77|" PTX78 74#define PTX76 "ptx76|" PTX77 75#define PTX75 "ptx75|" PTX76 76#define PTX74 "ptx74|" PTX75 77#define PTX73 "ptx73|" PTX74 78#define PTX72 "ptx72|" PTX73 79#define PTX71 "ptx71|" PTX72 80#define PTX70 "ptx70|" PTX71 81#define PTX65 "ptx65|" PTX70 82#define PTX64 "ptx64|" PTX65 83#define PTX63 "ptx63|" PTX64 84#define PTX62 "ptx62|" PTX63 85#define PTX61 "ptx61|" PTX62 86#define PTX60 "ptx60|" PTX61 87#define PTX42 "ptx42|" PTX60 88 89#pragma push_macro("AND") 90#define AND(a, b) "(" a "),(" b ")" 91 92// Special Registers 93 94BUILTIN(__nvvm_read_ptx_sreg_tid_x, "i", "nc") 95BUILTIN(__nvvm_read_ptx_sreg_tid_y, "i", "nc") 96BUILTIN(__nvvm_read_ptx_sreg_tid_z, "i", "nc") 97BUILTIN(__nvvm_read_ptx_sreg_tid_w, "i", "nc") 98 99BUILTIN(__nvvm_read_ptx_sreg_ntid_x, "i", "nc") 100BUILTIN(__nvvm_read_ptx_sreg_ntid_y, "i", "nc") 101BUILTIN(__nvvm_read_ptx_sreg_ntid_z, "i", "nc") 102BUILTIN(__nvvm_read_ptx_sreg_ntid_w, "i", "nc") 103 104BUILTIN(__nvvm_read_ptx_sreg_ctaid_x, "i", "nc") 105BUILTIN(__nvvm_read_ptx_sreg_ctaid_y, "i", "nc") 106BUILTIN(__nvvm_read_ptx_sreg_ctaid_z, "i", "nc") 107BUILTIN(__nvvm_read_ptx_sreg_ctaid_w, "i", "nc") 108 109BUILTIN(__nvvm_read_ptx_sreg_nctaid_x, "i", "nc") 110BUILTIN(__nvvm_read_ptx_sreg_nctaid_y, "i", "nc") 111BUILTIN(__nvvm_read_ptx_sreg_nctaid_z, "i", "nc") 112BUILTIN(__nvvm_read_ptx_sreg_nctaid_w, "i", "nc") 113 114TARGET_BUILTIN(__nvvm_read_ptx_sreg_clusterid_x, "i", "nc", AND(SM_90, PTX78)) 115TARGET_BUILTIN(__nvvm_read_ptx_sreg_clusterid_y, "i", "nc", AND(SM_90, PTX78)) 116TARGET_BUILTIN(__nvvm_read_ptx_sreg_clusterid_z, "i", "nc", AND(SM_90, PTX78)) 117TARGET_BUILTIN(__nvvm_read_ptx_sreg_clusterid_w, "i", "nc", AND(SM_90, PTX78)) 118 119TARGET_BUILTIN(__nvvm_read_ptx_sreg_nclusterid_x, "i", "nc", AND(SM_90, PTX78)) 120TARGET_BUILTIN(__nvvm_read_ptx_sreg_nclusterid_y, "i", "nc", AND(SM_90, PTX78)) 121TARGET_BUILTIN(__nvvm_read_ptx_sreg_nclusterid_z, "i", "nc", AND(SM_90, PTX78)) 122TARGET_BUILTIN(__nvvm_read_ptx_sreg_nclusterid_w, "i", "nc", AND(SM_90, PTX78)) 123 124TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctaid_x, "i", "nc", AND(SM_90, PTX78)) 125TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctaid_y, "i", "nc", AND(SM_90, PTX78)) 126TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctaid_z, "i", "nc", AND(SM_90, PTX78)) 127TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctaid_w, "i", "nc", AND(SM_90, PTX78)) 128 129TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctaid_x, "i", "nc", AND(SM_90, PTX78)) 130TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctaid_y, "i", "nc", AND(SM_90, PTX78)) 131TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctaid_z, "i", "nc", AND(SM_90, PTX78)) 132TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctaid_w, "i", "nc", AND(SM_90, PTX78)) 133 134TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_ctarank, "i", "nc", AND(SM_90, PTX78)) 135TARGET_BUILTIN(__nvvm_read_ptx_sreg_cluster_nctarank, "i", "nc", AND(SM_90, PTX78)) 136 137TARGET_BUILTIN(__nvvm_is_explicit_cluster, "b", "nc", AND(SM_90, PTX78)) 138 139BUILTIN(__nvvm_read_ptx_sreg_laneid, "i", "nc") 140BUILTIN(__nvvm_read_ptx_sreg_warpid, "i", "nc") 141BUILTIN(__nvvm_read_ptx_sreg_nwarpid, "i", "nc") 142 143BUILTIN(__nvvm_read_ptx_sreg_smid, "i", "nc") 144BUILTIN(__nvvm_read_ptx_sreg_nsmid, "i", "nc") 145BUILTIN(__nvvm_read_ptx_sreg_gridid, "i", "nc") 146 147BUILTIN(__nvvm_read_ptx_sreg_lanemask_eq, "i", "nc") 148BUILTIN(__nvvm_read_ptx_sreg_lanemask_le, "i", "nc") 149BUILTIN(__nvvm_read_ptx_sreg_lanemask_lt, "i", "nc") 150BUILTIN(__nvvm_read_ptx_sreg_lanemask_ge, "i", "nc") 151BUILTIN(__nvvm_read_ptx_sreg_lanemask_gt, "i", "nc") 152 153BUILTIN(__nvvm_read_ptx_sreg_clock, "i", "n") 154BUILTIN(__nvvm_read_ptx_sreg_clock64, "LLi", "n") 155BUILTIN(__nvvm_read_ptx_sreg_globaltimer, "LLi", "n") 156 157BUILTIN(__nvvm_read_ptx_sreg_pm0, "i", "n") 158BUILTIN(__nvvm_read_ptx_sreg_pm1, "i", "n") 159BUILTIN(__nvvm_read_ptx_sreg_pm2, "i", "n") 160BUILTIN(__nvvm_read_ptx_sreg_pm3, "i", "n") 161 162// MISC 163 164BUILTIN(__nvvm_prmt, "UiUiUiUi", "") 165BUILTIN(__nvvm_exit, "v", "r") 166BUILTIN(__nvvm_reflect, "UicC*", "r") 167TARGET_BUILTIN(__nvvm_nanosleep, "vUi", "n", AND(SM_70, PTX63)) 168 169// Min Max 170 171TARGET_BUILTIN(__nvvm_fmin_f16, "hhh", "", AND(SM_80, PTX70)) 172TARGET_BUILTIN(__nvvm_fmin_ftz_f16, "hhh", "", AND(SM_80, PTX70)) 173TARGET_BUILTIN(__nvvm_fmin_nan_f16, "hhh", "", AND(SM_80, PTX70)) 174TARGET_BUILTIN(__nvvm_fmin_ftz_nan_f16, "hhh", "", AND(SM_80, PTX70)) 175TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72)) 176TARGET_BUILTIN(__nvvm_fmin_ftz_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72)) 177TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72)) 178TARGET_BUILTIN(__nvvm_fmin_ftz_nan_xorsign_abs_f16, "hhh", "", 179 AND(SM_86, PTX72)) 180TARGET_BUILTIN(__nvvm_fmin_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70)) 181TARGET_BUILTIN(__nvvm_fmin_ftz_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70)) 182TARGET_BUILTIN(__nvvm_fmin_nan_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70)) 183TARGET_BUILTIN(__nvvm_fmin_ftz_nan_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70)) 184TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_f16x2, "V2hV2hV2h", "", 185 AND(SM_86, PTX72)) 186TARGET_BUILTIN(__nvvm_fmin_ftz_xorsign_abs_f16x2, "V2hV2hV2h", "", 187 AND(SM_86, PTX72)) 188TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_f16x2, "V2hV2hV2h", "", 189 AND(SM_86, PTX72)) 190TARGET_BUILTIN(__nvvm_fmin_ftz_nan_xorsign_abs_f16x2, "V2hV2hV2h", "", 191 AND(SM_86, PTX72)) 192TARGET_BUILTIN(__nvvm_fmin_bf16, "yyy", "", AND(SM_80, PTX70)) 193TARGET_BUILTIN(__nvvm_fmin_ftz_bf16, "yyy", "", AND(SM_80, PTX70)) 194TARGET_BUILTIN(__nvvm_fmin_nan_bf16, "yyy", "", AND(SM_80, PTX70)) 195TARGET_BUILTIN(__nvvm_fmin_ftz_nan_bf16, "yyy", "", AND(SM_80, PTX70)) 196TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_bf16, "yyy", "", AND(SM_86, PTX72)) 197TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_bf16, "yyy", "", 198 AND(SM_86, PTX72)) 199TARGET_BUILTIN(__nvvm_fmin_bf16x2, "V2yV2yV2y", "", AND(SM_80, PTX70)) 200TARGET_BUILTIN(__nvvm_fmin_ftz_bf16x2, "V2yV2yV2y", "", AND(SM_80, PTX70)) 201TARGET_BUILTIN(__nvvm_fmin_nan_bf16x2, "V2yV2yV2y", "", AND(SM_80, PTX70)) 202TARGET_BUILTIN(__nvvm_fmin_ftz_nan_bf16x2, "V2yV2yV2y", "", AND(SM_80, PTX70)) 203TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_bf16x2, "V2yV2yV2y", "", 204 AND(SM_86, PTX72)) 205TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_bf16x2, "V2yV2yV2y", "", 206 AND(SM_86, PTX72)) 207BUILTIN(__nvvm_fmin_f, "fff", "") 208BUILTIN(__nvvm_fmin_ftz_f, "fff", "") 209TARGET_BUILTIN(__nvvm_fmin_nan_f, "fff", "", AND(SM_80, PTX70)) 210TARGET_BUILTIN(__nvvm_fmin_ftz_nan_f, "fff", "", AND(SM_80, PTX70)) 211TARGET_BUILTIN(__nvvm_fmin_xorsign_abs_f, "fff", "", AND(SM_86, PTX72)) 212TARGET_BUILTIN(__nvvm_fmin_ftz_xorsign_abs_f, "fff", "", AND(SM_86, PTX72)) 213TARGET_BUILTIN(__nvvm_fmin_nan_xorsign_abs_f, "fff", "", AND(SM_86, PTX72)) 214TARGET_BUILTIN(__nvvm_fmin_ftz_nan_xorsign_abs_f, "fff", "", AND(SM_86, PTX72)) 215BUILTIN(__nvvm_fmin_d, "ddd", "") 216 217TARGET_BUILTIN(__nvvm_fmax_f16, "hhh", "", AND(SM_80, PTX70)) 218TARGET_BUILTIN(__nvvm_fmax_ftz_f16, "hhh", "", AND(SM_80, PTX70)) 219TARGET_BUILTIN(__nvvm_fmax_nan_f16, "hhh", "", AND(SM_80, PTX70)) 220TARGET_BUILTIN(__nvvm_fmax_ftz_nan_f16, "hhh", "", AND(SM_80, PTX70)) 221TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72)) 222TARGET_BUILTIN(__nvvm_fmax_ftz_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72)) 223TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_f16, "hhh", "", AND(SM_86, PTX72)) 224TARGET_BUILTIN(__nvvm_fmax_ftz_nan_xorsign_abs_f16, "hhh", "", 225 AND(SM_86, PTX72)) 226TARGET_BUILTIN(__nvvm_fmax_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70)) 227TARGET_BUILTIN(__nvvm_fmax_ftz_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70)) 228TARGET_BUILTIN(__nvvm_fmax_nan_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70)) 229TARGET_BUILTIN(__nvvm_fmax_ftz_nan_f16x2, "V2hV2hV2h", "", AND(SM_80, PTX70)) 230TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_f16x2, "V2hV2hV2h", "", 231 AND(SM_86, PTX72)) 232TARGET_BUILTIN(__nvvm_fmax_ftz_xorsign_abs_f16x2, "V2hV2hV2h", "", 233 AND(SM_86, PTX72)) 234TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_f16x2, "V2hV2hV2h", "", 235 AND(SM_86, PTX72)) 236TARGET_BUILTIN(__nvvm_fmax_ftz_nan_xorsign_abs_f16x2, "V2hV2hV2h", "", 237 AND(SM_86, PTX72)) 238TARGET_BUILTIN(__nvvm_fmax_bf16, "yyy", "", AND(SM_80, PTX70)) 239TARGET_BUILTIN(__nvvm_fmax_ftz_bf16, "yyy", "", AND(SM_80, PTX70)) 240TARGET_BUILTIN(__nvvm_fmax_nan_bf16, "yyy", "", AND(SM_80, PTX70)) 241TARGET_BUILTIN(__nvvm_fmax_ftz_nan_bf16, "yyy", "", AND(SM_80, PTX70)) 242TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_bf16, "yyy", "", AND(SM_86, PTX72)) 243TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_bf16, "yyy", "", 244 AND(SM_86, PTX72)) 245TARGET_BUILTIN(__nvvm_fmax_bf16x2, "V2yV2yV2y", "", AND(SM_80, PTX70)) 246TARGET_BUILTIN(__nvvm_fmax_ftz_bf16x2, "V2yV2yV2y", "", AND(SM_80, PTX70)) 247TARGET_BUILTIN(__nvvm_fmax_nan_bf16x2, "V2yV2yV2y", "", AND(SM_80, PTX70)) 248TARGET_BUILTIN(__nvvm_fmax_ftz_nan_bf16x2, "V2yV2yV2y", "", AND(SM_80, PTX70)) 249TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_bf16x2, "V2yV2yV2y", "", 250 AND(SM_86, PTX72)) 251TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_bf16x2, "V2yV2yV2y", "", 252 AND(SM_86, PTX72)) 253BUILTIN(__nvvm_fmax_f, "fff", "") 254BUILTIN(__nvvm_fmax_ftz_f, "fff", "") 255TARGET_BUILTIN(__nvvm_fmax_nan_f, "fff", "", AND(SM_80, PTX70)) 256TARGET_BUILTIN(__nvvm_fmax_ftz_nan_f, "fff", "", AND(SM_80, PTX70)) 257TARGET_BUILTIN(__nvvm_fmax_xorsign_abs_f, "fff", "", AND(SM_86, PTX72)) 258TARGET_BUILTIN(__nvvm_fmax_ftz_xorsign_abs_f, "fff", "", AND(SM_86, PTX72)) 259TARGET_BUILTIN(__nvvm_fmax_nan_xorsign_abs_f, "fff", "", AND(SM_86, PTX72)) 260TARGET_BUILTIN(__nvvm_fmax_ftz_nan_xorsign_abs_f, "fff", "", AND(SM_86, PTX72)) 261BUILTIN(__nvvm_fmax_d, "ddd", "") 262 263// Multiplication 264 265BUILTIN(__nvvm_mulhi_i, "iii", "") 266BUILTIN(__nvvm_mulhi_ui, "UiUiUi", "") 267BUILTIN(__nvvm_mulhi_ll, "LLiLLiLLi", "") 268BUILTIN(__nvvm_mulhi_ull, "ULLiULLiULLi", "") 269 270BUILTIN(__nvvm_mul_rn_ftz_f, "fff", "") 271BUILTIN(__nvvm_mul_rn_f, "fff", "") 272BUILTIN(__nvvm_mul_rz_ftz_f, "fff", "") 273BUILTIN(__nvvm_mul_rz_f, "fff", "") 274BUILTIN(__nvvm_mul_rm_ftz_f, "fff", "") 275BUILTIN(__nvvm_mul_rm_f, "fff", "") 276BUILTIN(__nvvm_mul_rp_ftz_f, "fff", "") 277BUILTIN(__nvvm_mul_rp_f, "fff", "") 278 279BUILTIN(__nvvm_mul_rn_d, "ddd", "") 280BUILTIN(__nvvm_mul_rz_d, "ddd", "") 281BUILTIN(__nvvm_mul_rm_d, "ddd", "") 282BUILTIN(__nvvm_mul_rp_d, "ddd", "") 283 284BUILTIN(__nvvm_mul24_i, "iii", "") 285BUILTIN(__nvvm_mul24_ui, "UiUiUi", "") 286 287// Div 288 289BUILTIN(__nvvm_div_approx_ftz_f, "fff", "") 290BUILTIN(__nvvm_div_approx_f, "fff", "") 291 292BUILTIN(__nvvm_div_rn_ftz_f, "fff", "") 293BUILTIN(__nvvm_div_rn_f, "fff", "") 294BUILTIN(__nvvm_div_rz_ftz_f, "fff", "") 295BUILTIN(__nvvm_div_rz_f, "fff", "") 296BUILTIN(__nvvm_div_rm_ftz_f, "fff", "") 297BUILTIN(__nvvm_div_rm_f, "fff", "") 298BUILTIN(__nvvm_div_rp_ftz_f, "fff", "") 299BUILTIN(__nvvm_div_rp_f, "fff", "") 300 301BUILTIN(__nvvm_div_rn_d, "ddd", "") 302BUILTIN(__nvvm_div_rz_d, "ddd", "") 303BUILTIN(__nvvm_div_rm_d, "ddd", "") 304BUILTIN(__nvvm_div_rp_d, "ddd", "") 305 306// Sad 307 308BUILTIN(__nvvm_sad_i, "iiii", "") 309BUILTIN(__nvvm_sad_ui, "UiUiUiUi", "") 310 311// Floor, Ceil 312 313BUILTIN(__nvvm_floor_ftz_f, "ff", "") 314BUILTIN(__nvvm_floor_f, "ff", "") 315BUILTIN(__nvvm_floor_d, "dd", "") 316 317BUILTIN(__nvvm_ceil_ftz_f, "ff", "") 318BUILTIN(__nvvm_ceil_f, "ff", "") 319BUILTIN(__nvvm_ceil_d, "dd", "") 320 321// Abs 322 323BUILTIN(__nvvm_fabs_ftz_f, "ff", "") 324BUILTIN(__nvvm_fabs_f, "ff", "") 325BUILTIN(__nvvm_fabs_d, "dd", "") 326 327// Round 328 329BUILTIN(__nvvm_round_ftz_f, "ff", "") 330BUILTIN(__nvvm_round_f, "ff", "") 331BUILTIN(__nvvm_round_d, "dd", "") 332 333// Trunc 334 335BUILTIN(__nvvm_trunc_ftz_f, "ff", "") 336BUILTIN(__nvvm_trunc_f, "ff", "") 337BUILTIN(__nvvm_trunc_d, "dd", "") 338 339// Saturate 340 341BUILTIN(__nvvm_saturate_ftz_f, "ff", "") 342BUILTIN(__nvvm_saturate_f, "ff", "") 343BUILTIN(__nvvm_saturate_d, "dd", "") 344 345// Exp2, Log2 346 347BUILTIN(__nvvm_ex2_approx_ftz_f, "ff", "") 348BUILTIN(__nvvm_ex2_approx_f, "ff", "") 349BUILTIN(__nvvm_ex2_approx_d, "dd", "") 350TARGET_BUILTIN(__nvvm_ex2_approx_f16, "hh", "", AND(SM_75, PTX70)) 351TARGET_BUILTIN(__nvvm_ex2_approx_f16x2, "V2hV2h", "", AND(SM_75, PTX70)) 352 353BUILTIN(__nvvm_lg2_approx_ftz_f, "ff", "") 354BUILTIN(__nvvm_lg2_approx_f, "ff", "") 355BUILTIN(__nvvm_lg2_approx_d, "dd", "") 356 357// Sin, Cos 358 359BUILTIN(__nvvm_sin_approx_ftz_f, "ff", "") 360BUILTIN(__nvvm_sin_approx_f, "ff", "") 361 362BUILTIN(__nvvm_cos_approx_ftz_f, "ff", "") 363BUILTIN(__nvvm_cos_approx_f, "ff", "") 364 365// Fma 366 367TARGET_BUILTIN(__nvvm_fma_rn_f16, "hhhh", "", AND(SM_53, PTX42)) 368TARGET_BUILTIN(__nvvm_fma_rn_ftz_f16, "hhhh", "", AND(SM_53, PTX42)) 369TARGET_BUILTIN(__nvvm_fma_rn_sat_f16, "hhhh", "", AND(SM_53, PTX42)) 370TARGET_BUILTIN(__nvvm_fma_rn_ftz_sat_f16, "hhhh", "", AND(SM_53, PTX42)) 371TARGET_BUILTIN(__nvvm_fma_rn_relu_f16, "hhhh", "", AND(SM_80, PTX70)) 372TARGET_BUILTIN(__nvvm_fma_rn_ftz_relu_f16, "hhhh", "", AND(SM_80, PTX70)) 373TARGET_BUILTIN(__nvvm_fma_rn_f16x2, "V2hV2hV2hV2h", "", AND(SM_53, PTX42)) 374TARGET_BUILTIN(__nvvm_fma_rn_ftz_f16x2, "V2hV2hV2hV2h", "", AND(SM_53, PTX42)) 375TARGET_BUILTIN(__nvvm_fma_rn_sat_f16x2, "V2hV2hV2hV2h", "", AND(SM_53, PTX42)) 376TARGET_BUILTIN(__nvvm_fma_rn_ftz_sat_f16x2, "V2hV2hV2hV2h", "", AND(SM_53, PTX42)) 377TARGET_BUILTIN(__nvvm_fma_rn_relu_f16x2, "V2hV2hV2hV2h", "", AND(SM_80, PTX70)) 378TARGET_BUILTIN(__nvvm_fma_rn_ftz_relu_f16x2, "V2hV2hV2hV2h", "", AND(SM_80, PTX70)) 379TARGET_BUILTIN(__nvvm_fma_rn_bf16, "yyyy", "", AND(SM_80, PTX70)) 380TARGET_BUILTIN(__nvvm_fma_rn_relu_bf16, "yyyy", "", AND(SM_80, PTX70)) 381TARGET_BUILTIN(__nvvm_fma_rn_bf16x2, "V2yV2yV2yV2y", "", AND(SM_80, PTX70)) 382TARGET_BUILTIN(__nvvm_fma_rn_relu_bf16x2, "V2yV2yV2yV2y", "", AND(SM_80, PTX70)) 383BUILTIN(__nvvm_fma_rn_ftz_f, "ffff", "") 384BUILTIN(__nvvm_fma_rn_f, "ffff", "") 385BUILTIN(__nvvm_fma_rz_ftz_f, "ffff", "") 386BUILTIN(__nvvm_fma_rz_f, "ffff", "") 387BUILTIN(__nvvm_fma_rm_ftz_f, "ffff", "") 388BUILTIN(__nvvm_fma_rm_f, "ffff", "") 389BUILTIN(__nvvm_fma_rp_ftz_f, "ffff", "") 390BUILTIN(__nvvm_fma_rp_f, "ffff", "") 391BUILTIN(__nvvm_fma_rn_d, "dddd", "") 392BUILTIN(__nvvm_fma_rz_d, "dddd", "") 393BUILTIN(__nvvm_fma_rm_d, "dddd", "") 394BUILTIN(__nvvm_fma_rp_d, "dddd", "") 395 396// Rcp 397 398BUILTIN(__nvvm_rcp_rn_ftz_f, "ff", "") 399BUILTIN(__nvvm_rcp_rn_f, "ff", "") 400BUILTIN(__nvvm_rcp_rz_ftz_f, "ff", "") 401BUILTIN(__nvvm_rcp_rz_f, "ff", "") 402BUILTIN(__nvvm_rcp_rm_ftz_f, "ff", "") 403BUILTIN(__nvvm_rcp_rm_f, "ff", "") 404BUILTIN(__nvvm_rcp_rp_ftz_f, "ff", "") 405BUILTIN(__nvvm_rcp_rp_f, "ff", "") 406 407BUILTIN(__nvvm_rcp_rn_d, "dd", "") 408BUILTIN(__nvvm_rcp_rz_d, "dd", "") 409BUILTIN(__nvvm_rcp_rm_d, "dd", "") 410BUILTIN(__nvvm_rcp_rp_d, "dd", "") 411 412BUILTIN(__nvvm_rcp_approx_ftz_f, "ff", "") 413BUILTIN(__nvvm_rcp_approx_ftz_d, "dd", "") 414 415// Sqrt 416 417BUILTIN(__nvvm_sqrt_rn_ftz_f, "ff", "") 418BUILTIN(__nvvm_sqrt_rn_f, "ff", "") 419BUILTIN(__nvvm_sqrt_rz_ftz_f, "ff", "") 420BUILTIN(__nvvm_sqrt_rz_f, "ff", "") 421BUILTIN(__nvvm_sqrt_rm_ftz_f, "ff", "") 422BUILTIN(__nvvm_sqrt_rm_f, "ff", "") 423BUILTIN(__nvvm_sqrt_rp_ftz_f, "ff", "") 424BUILTIN(__nvvm_sqrt_rp_f, "ff", "") 425BUILTIN(__nvvm_sqrt_approx_ftz_f, "ff", "") 426BUILTIN(__nvvm_sqrt_approx_f, "ff", "") 427 428BUILTIN(__nvvm_sqrt_rn_d, "dd", "") 429BUILTIN(__nvvm_sqrt_rz_d, "dd", "") 430BUILTIN(__nvvm_sqrt_rm_d, "dd", "") 431BUILTIN(__nvvm_sqrt_rp_d, "dd", "") 432 433// Rsqrt 434 435BUILTIN(__nvvm_rsqrt_approx_ftz_f, "ff", "") 436BUILTIN(__nvvm_rsqrt_approx_f, "ff", "") 437BUILTIN(__nvvm_rsqrt_approx_d, "dd", "") 438 439// Add 440 441BUILTIN(__nvvm_add_rn_ftz_f, "fff", "") 442BUILTIN(__nvvm_add_rn_f, "fff", "") 443BUILTIN(__nvvm_add_rz_ftz_f, "fff", "") 444BUILTIN(__nvvm_add_rz_f, "fff", "") 445BUILTIN(__nvvm_add_rm_ftz_f, "fff", "") 446BUILTIN(__nvvm_add_rm_f, "fff", "") 447BUILTIN(__nvvm_add_rp_ftz_f, "fff", "") 448BUILTIN(__nvvm_add_rp_f, "fff", "") 449 450BUILTIN(__nvvm_add_rn_d, "ddd", "") 451BUILTIN(__nvvm_add_rz_d, "ddd", "") 452BUILTIN(__nvvm_add_rm_d, "ddd", "") 453BUILTIN(__nvvm_add_rp_d, "ddd", "") 454 455// Convert 456 457BUILTIN(__nvvm_d2f_rn_ftz, "fd", "") 458BUILTIN(__nvvm_d2f_rn, "fd", "") 459BUILTIN(__nvvm_d2f_rz_ftz, "fd", "") 460BUILTIN(__nvvm_d2f_rz, "fd", "") 461BUILTIN(__nvvm_d2f_rm_ftz, "fd", "") 462BUILTIN(__nvvm_d2f_rm, "fd", "") 463BUILTIN(__nvvm_d2f_rp_ftz, "fd", "") 464BUILTIN(__nvvm_d2f_rp, "fd", "") 465 466BUILTIN(__nvvm_d2i_rn, "id", "") 467BUILTIN(__nvvm_d2i_rz, "id", "") 468BUILTIN(__nvvm_d2i_rm, "id", "") 469BUILTIN(__nvvm_d2i_rp, "id", "") 470 471BUILTIN(__nvvm_d2ui_rn, "Uid", "") 472BUILTIN(__nvvm_d2ui_rz, "Uid", "") 473BUILTIN(__nvvm_d2ui_rm, "Uid", "") 474BUILTIN(__nvvm_d2ui_rp, "Uid", "") 475 476BUILTIN(__nvvm_i2d_rn, "di", "") 477BUILTIN(__nvvm_i2d_rz, "di", "") 478BUILTIN(__nvvm_i2d_rm, "di", "") 479BUILTIN(__nvvm_i2d_rp, "di", "") 480 481BUILTIN(__nvvm_ui2d_rn, "dUi", "") 482BUILTIN(__nvvm_ui2d_rz, "dUi", "") 483BUILTIN(__nvvm_ui2d_rm, "dUi", "") 484BUILTIN(__nvvm_ui2d_rp, "dUi", "") 485 486BUILTIN(__nvvm_f2i_rn_ftz, "if", "") 487BUILTIN(__nvvm_f2i_rn, "if", "") 488BUILTIN(__nvvm_f2i_rz_ftz, "if", "") 489BUILTIN(__nvvm_f2i_rz, "if", "") 490BUILTIN(__nvvm_f2i_rm_ftz, "if", "") 491BUILTIN(__nvvm_f2i_rm, "if", "") 492BUILTIN(__nvvm_f2i_rp_ftz, "if", "") 493BUILTIN(__nvvm_f2i_rp, "if", "") 494 495BUILTIN(__nvvm_f2ui_rn_ftz, "Uif", "") 496BUILTIN(__nvvm_f2ui_rn, "Uif", "") 497BUILTIN(__nvvm_f2ui_rz_ftz, "Uif", "") 498BUILTIN(__nvvm_f2ui_rz, "Uif", "") 499BUILTIN(__nvvm_f2ui_rm_ftz, "Uif", "") 500BUILTIN(__nvvm_f2ui_rm, "Uif", "") 501BUILTIN(__nvvm_f2ui_rp_ftz, "Uif", "") 502BUILTIN(__nvvm_f2ui_rp, "Uif", "") 503 504BUILTIN(__nvvm_i2f_rn, "fi", "") 505BUILTIN(__nvvm_i2f_rz, "fi", "") 506BUILTIN(__nvvm_i2f_rm, "fi", "") 507BUILTIN(__nvvm_i2f_rp, "fi", "") 508 509BUILTIN(__nvvm_ui2f_rn, "fUi", "") 510BUILTIN(__nvvm_ui2f_rz, "fUi", "") 511BUILTIN(__nvvm_ui2f_rm, "fUi", "") 512BUILTIN(__nvvm_ui2f_rp, "fUi", "") 513 514BUILTIN(__nvvm_lohi_i2d, "dii", "") 515 516BUILTIN(__nvvm_d2i_lo, "id", "") 517BUILTIN(__nvvm_d2i_hi, "id", "") 518 519BUILTIN(__nvvm_f2ll_rn_ftz, "LLif", "") 520BUILTIN(__nvvm_f2ll_rn, "LLif", "") 521BUILTIN(__nvvm_f2ll_rz_ftz, "LLif", "") 522BUILTIN(__nvvm_f2ll_rz, "LLif", "") 523BUILTIN(__nvvm_f2ll_rm_ftz, "LLif", "") 524BUILTIN(__nvvm_f2ll_rm, "LLif", "") 525BUILTIN(__nvvm_f2ll_rp_ftz, "LLif", "") 526BUILTIN(__nvvm_f2ll_rp, "LLif", "") 527 528BUILTIN(__nvvm_f2ull_rn_ftz, "ULLif", "") 529BUILTIN(__nvvm_f2ull_rn, "ULLif", "") 530BUILTIN(__nvvm_f2ull_rz_ftz, "ULLif", "") 531BUILTIN(__nvvm_f2ull_rz, "ULLif", "") 532BUILTIN(__nvvm_f2ull_rm_ftz, "ULLif", "") 533BUILTIN(__nvvm_f2ull_rm, "ULLif", "") 534BUILTIN(__nvvm_f2ull_rp_ftz, "ULLif", "") 535BUILTIN(__nvvm_f2ull_rp, "ULLif", "") 536 537BUILTIN(__nvvm_d2ll_rn, "LLid", "") 538BUILTIN(__nvvm_d2ll_rz, "LLid", "") 539BUILTIN(__nvvm_d2ll_rm, "LLid", "") 540BUILTIN(__nvvm_d2ll_rp, "LLid", "") 541 542BUILTIN(__nvvm_d2ull_rn, "ULLid", "") 543BUILTIN(__nvvm_d2ull_rz, "ULLid", "") 544BUILTIN(__nvvm_d2ull_rm, "ULLid", "") 545BUILTIN(__nvvm_d2ull_rp, "ULLid", "") 546 547BUILTIN(__nvvm_ll2f_rn, "fLLi", "") 548BUILTIN(__nvvm_ll2f_rz, "fLLi", "") 549BUILTIN(__nvvm_ll2f_rm, "fLLi", "") 550BUILTIN(__nvvm_ll2f_rp, "fLLi", "") 551 552BUILTIN(__nvvm_ull2f_rn, "fULLi", "") 553BUILTIN(__nvvm_ull2f_rz, "fULLi", "") 554BUILTIN(__nvvm_ull2f_rm, "fULLi", "") 555BUILTIN(__nvvm_ull2f_rp, "fULLi", "") 556 557BUILTIN(__nvvm_ll2d_rn, "dLLi", "") 558BUILTIN(__nvvm_ll2d_rz, "dLLi", "") 559BUILTIN(__nvvm_ll2d_rm, "dLLi", "") 560BUILTIN(__nvvm_ll2d_rp, "dLLi", "") 561 562BUILTIN(__nvvm_ull2d_rn, "dULLi", "") 563BUILTIN(__nvvm_ull2d_rz, "dULLi", "") 564BUILTIN(__nvvm_ull2d_rm, "dULLi", "") 565BUILTIN(__nvvm_ull2d_rp, "dULLi", "") 566 567BUILTIN(__nvvm_f2h_rn_ftz, "Usf", "") 568BUILTIN(__nvvm_f2h_rn, "Usf", "") 569 570TARGET_BUILTIN(__nvvm_ff2bf16x2_rn, "V2yff", "", AND(SM_80,PTX70)) 571TARGET_BUILTIN(__nvvm_ff2bf16x2_rn_relu, "V2yff", "", AND(SM_80,PTX70)) 572TARGET_BUILTIN(__nvvm_ff2bf16x2_rz, "V2yff", "", AND(SM_80,PTX70)) 573TARGET_BUILTIN(__nvvm_ff2bf16x2_rz_relu, "V2yff", "", AND(SM_80,PTX70)) 574 575TARGET_BUILTIN(__nvvm_ff2f16x2_rn, "V2hff", "", AND(SM_80,PTX70)) 576TARGET_BUILTIN(__nvvm_ff2f16x2_rn_relu, "V2hff", "", AND(SM_80,PTX70)) 577TARGET_BUILTIN(__nvvm_ff2f16x2_rz, "V2hff", "", AND(SM_80,PTX70)) 578TARGET_BUILTIN(__nvvm_ff2f16x2_rz_relu, "V2hff", "", AND(SM_80,PTX70)) 579 580TARGET_BUILTIN(__nvvm_f2bf16_rn, "yf", "", AND(SM_80,PTX70)) 581TARGET_BUILTIN(__nvvm_f2bf16_rn_relu, "yf", "", AND(SM_80,PTX70)) 582TARGET_BUILTIN(__nvvm_f2bf16_rz, "yf", "", AND(SM_80,PTX70)) 583TARGET_BUILTIN(__nvvm_f2bf16_rz_relu, "yf", "", AND(SM_80,PTX70)) 584 585TARGET_BUILTIN(__nvvm_f2tf32_rna, "ZUif", "", AND(SM_80,PTX70)) 586 587// Bitcast 588 589BUILTIN(__nvvm_bitcast_f2i, "if", "") 590BUILTIN(__nvvm_bitcast_i2f, "fi", "") 591 592BUILTIN(__nvvm_bitcast_ll2d, "dLLi", "") 593BUILTIN(__nvvm_bitcast_d2ll, "LLid", "") 594 595// FNS 596TARGET_BUILTIN(__nvvm_fns, "UiUiUii", "n", PTX60) 597 598// Sync 599 600BUILTIN(__syncthreads, "v", "") 601BUILTIN(__nvvm_bar0_popc, "ii", "") 602BUILTIN(__nvvm_bar0_and, "ii", "") 603BUILTIN(__nvvm_bar0_or, "ii", "") 604BUILTIN(__nvvm_bar_sync, "vi", "n") 605TARGET_BUILTIN(__nvvm_bar_warp_sync, "vUi", "n", PTX60) 606TARGET_BUILTIN(__nvvm_barrier_sync, "vUi", "n", PTX60) 607TARGET_BUILTIN(__nvvm_barrier_sync_cnt, "vUiUi", "n", PTX60) 608 609TARGET_BUILTIN(__nvvm_barrier_cluster_arrive, "v", "n", AND(SM_90,PTX78)) 610TARGET_BUILTIN(__nvvm_barrier_cluster_arrive_relaxed, "v", "n", AND(SM_90,PTX80)) 611TARGET_BUILTIN(__nvvm_barrier_cluster_wait, "v", "n", AND(SM_90,PTX78)) 612TARGET_BUILTIN(__nvvm_fence_sc_cluster, "v", "n", AND(SM_90,PTX78)) 613 614// Shuffle 615 616BUILTIN(__nvvm_shfl_down_i32, "iiii", "") 617BUILTIN(__nvvm_shfl_down_f32, "ffii", "") 618BUILTIN(__nvvm_shfl_up_i32, "iiii", "") 619BUILTIN(__nvvm_shfl_up_f32, "ffii", "") 620BUILTIN(__nvvm_shfl_bfly_i32, "iiii", "") 621BUILTIN(__nvvm_shfl_bfly_f32, "ffii", "") 622BUILTIN(__nvvm_shfl_idx_i32, "iiii", "") 623BUILTIN(__nvvm_shfl_idx_f32, "ffii", "") 624 625TARGET_BUILTIN(__nvvm_shfl_sync_down_i32, "iUiiii", "", PTX60) 626TARGET_BUILTIN(__nvvm_shfl_sync_down_f32, "fUifii", "", PTX60) 627TARGET_BUILTIN(__nvvm_shfl_sync_up_i32, "iUiiii", "", PTX60) 628TARGET_BUILTIN(__nvvm_shfl_sync_up_f32, "fUifii", "", PTX60) 629TARGET_BUILTIN(__nvvm_shfl_sync_bfly_i32, "iUiiii", "", PTX60) 630TARGET_BUILTIN(__nvvm_shfl_sync_bfly_f32, "fUifii", "", PTX60) 631TARGET_BUILTIN(__nvvm_shfl_sync_idx_i32, "iUiiii", "", PTX60) 632TARGET_BUILTIN(__nvvm_shfl_sync_idx_f32, "fUifii", "", PTX60) 633 634// Vote 635BUILTIN(__nvvm_vote_all, "bb", "") 636BUILTIN(__nvvm_vote_any, "bb", "") 637BUILTIN(__nvvm_vote_uni, "bb", "") 638BUILTIN(__nvvm_vote_ballot, "Uib", "") 639 640TARGET_BUILTIN(__nvvm_vote_all_sync, "bUib", "", PTX60) 641TARGET_BUILTIN(__nvvm_vote_any_sync, "bUib", "", PTX60) 642TARGET_BUILTIN(__nvvm_vote_uni_sync, "bUib", "", PTX60) 643TARGET_BUILTIN(__nvvm_vote_ballot_sync, "UiUib", "", PTX60) 644 645// Mask 646TARGET_BUILTIN(__nvvm_activemask, "Ui", "n", PTX62) 647 648// Match 649TARGET_BUILTIN(__nvvm_match_any_sync_i32, "UiUiUi", "", AND(SM_70,PTX60)) 650TARGET_BUILTIN(__nvvm_match_any_sync_i64, "UiUiWi", "", AND(SM_70,PTX60)) 651// These return a pair {value, predicate}, which requires custom lowering. 652TARGET_BUILTIN(__nvvm_match_all_sync_i32p, "UiUiUii*", "", AND(SM_70,PTX60)) 653TARGET_BUILTIN(__nvvm_match_all_sync_i64p, "UiUiWii*", "", AND(SM_70,PTX60)) 654 655// Redux 656TARGET_BUILTIN(__nvvm_redux_sync_add, "iii", "", AND(SM_80,PTX70)) 657TARGET_BUILTIN(__nvvm_redux_sync_min, "iii", "", AND(SM_80,PTX70)) 658TARGET_BUILTIN(__nvvm_redux_sync_max, "iii", "", AND(SM_80,PTX70)) 659TARGET_BUILTIN(__nvvm_redux_sync_umin, "UiUii", "", AND(SM_80,PTX70)) 660TARGET_BUILTIN(__nvvm_redux_sync_umax, "UiUii", "", AND(SM_80,PTX70)) 661TARGET_BUILTIN(__nvvm_redux_sync_and, "iii", "", AND(SM_80,PTX70)) 662TARGET_BUILTIN(__nvvm_redux_sync_xor, "iii", "", AND(SM_80,PTX70)) 663TARGET_BUILTIN(__nvvm_redux_sync_or, "iii", "", AND(SM_80,PTX70)) 664 665// Membar 666 667BUILTIN(__nvvm_membar_cta, "v", "") 668BUILTIN(__nvvm_membar_gl, "v", "") 669BUILTIN(__nvvm_membar_sys, "v", "") 670 671// mbarrier 672 673TARGET_BUILTIN(__nvvm_mbarrier_init, "vWi*i", "", AND(SM_80,PTX70)) 674TARGET_BUILTIN(__nvvm_mbarrier_init_shared, "vWi*3i", "", AND(SM_80,PTX70)) 675 676TARGET_BUILTIN(__nvvm_mbarrier_inval, "vWi*", "", AND(SM_80,PTX70)) 677TARGET_BUILTIN(__nvvm_mbarrier_inval_shared, "vWi*3", "", AND(SM_80,PTX70)) 678 679TARGET_BUILTIN(__nvvm_mbarrier_arrive, "WiWi*", "", AND(SM_80,PTX70)) 680TARGET_BUILTIN(__nvvm_mbarrier_arrive_shared, "WiWi*3", "", AND(SM_80,PTX70)) 681TARGET_BUILTIN(__nvvm_mbarrier_arrive_noComplete, "WiWi*i", "", AND(SM_80,PTX70)) 682TARGET_BUILTIN(__nvvm_mbarrier_arrive_noComplete_shared, "WiWi*3i", "", AND(SM_80,PTX70)) 683 684TARGET_BUILTIN(__nvvm_mbarrier_arrive_drop, "WiWi*", "", AND(SM_80,PTX70)) 685TARGET_BUILTIN(__nvvm_mbarrier_arrive_drop_shared, "WiWi*3", "", AND(SM_80,PTX70)) 686TARGET_BUILTIN(__nvvm_mbarrier_arrive_drop_noComplete, "WiWi*i", "", AND(SM_80,PTX70)) 687TARGET_BUILTIN(__nvvm_mbarrier_arrive_drop_noComplete_shared, "WiWi*3i", "", AND(SM_80,PTX70)) 688 689TARGET_BUILTIN(__nvvm_mbarrier_test_wait, "bWi*Wi", "", AND(SM_80,PTX70)) 690TARGET_BUILTIN(__nvvm_mbarrier_test_wait_shared, "bWi*3Wi", "", AND(SM_80,PTX70)) 691 692TARGET_BUILTIN(__nvvm_mbarrier_pending_count, "iWi", "", AND(SM_80,PTX70)) 693 694// Memcpy, Memset 695 696BUILTIN(__nvvm_memcpy, "vUc*Uc*zi","") 697BUILTIN(__nvvm_memset, "vUc*Uczi","") 698 699// Image 700 701BUILTIN(__builtin_ptx_read_image2Dfi_, "V4fiiii", "") 702BUILTIN(__builtin_ptx_read_image2Dff_, "V4fiiff", "") 703BUILTIN(__builtin_ptx_read_image2Dii_, "V4iiiii", "") 704BUILTIN(__builtin_ptx_read_image2Dif_, "V4iiiff", "") 705 706BUILTIN(__builtin_ptx_read_image3Dfi_, "V4fiiiiii", "") 707BUILTIN(__builtin_ptx_read_image3Dff_, "V4fiiffff", "") 708BUILTIN(__builtin_ptx_read_image3Dii_, "V4iiiiiii", "") 709BUILTIN(__builtin_ptx_read_image3Dif_, "V4iiiffff", "") 710 711BUILTIN(__builtin_ptx_write_image2Df_, "viiiffff", "") 712BUILTIN(__builtin_ptx_write_image2Di_, "viiiiiii", "") 713BUILTIN(__builtin_ptx_write_image2Dui_, "viiiUiUiUiUi", "") 714BUILTIN(__builtin_ptx_get_image_depthi_, "ii", "") 715BUILTIN(__builtin_ptx_get_image_heighti_, "ii", "") 716BUILTIN(__builtin_ptx_get_image_widthi_, "ii", "") 717BUILTIN(__builtin_ptx_get_image_channel_data_typei_, "ii", "") 718BUILTIN(__builtin_ptx_get_image_channel_orderi_, "ii", "") 719 720// Atomic 721// 722// We need the atom intrinsics because 723// - they are used in converging analysis 724// - they are used in address space analysis and optimization 725// So it does not hurt to expose them as builtins. 726// 727BUILTIN(__nvvm_atom_add_gen_i, "iiD*i", "n") 728TARGET_BUILTIN(__nvvm_atom_cta_add_gen_i, "iiD*i", "n", SM_60) 729TARGET_BUILTIN(__nvvm_atom_sys_add_gen_i, "iiD*i", "n", SM_60) 730BUILTIN(__nvvm_atom_add_gen_l, "LiLiD*Li", "n") 731TARGET_BUILTIN(__nvvm_atom_cta_add_gen_l, "LiLiD*Li", "n", SM_60) 732TARGET_BUILTIN(__nvvm_atom_sys_add_gen_l, "LiLiD*Li", "n", SM_60) 733BUILTIN(__nvvm_atom_add_gen_ll, "LLiLLiD*LLi", "n") 734TARGET_BUILTIN(__nvvm_atom_cta_add_gen_ll, "LLiLLiD*LLi", "n", SM_60) 735TARGET_BUILTIN(__nvvm_atom_sys_add_gen_ll, "LLiLLiD*LLi", "n", SM_60) 736BUILTIN(__nvvm_atom_add_gen_f, "ffD*f", "n") 737TARGET_BUILTIN(__nvvm_atom_cta_add_gen_f, "ffD*f", "n", SM_60) 738TARGET_BUILTIN(__nvvm_atom_sys_add_gen_f, "ffD*f", "n", SM_60) 739TARGET_BUILTIN(__nvvm_atom_add_gen_d, "ddD*d", "n", SM_60) 740TARGET_BUILTIN(__nvvm_atom_cta_add_gen_d, "ddD*d", "n", SM_60) 741TARGET_BUILTIN(__nvvm_atom_sys_add_gen_d, "ddD*d", "n", SM_60) 742 743BUILTIN(__nvvm_atom_sub_gen_i, "iiD*i", "n") 744BUILTIN(__nvvm_atom_sub_gen_l, "LiLiD*Li", "n") 745BUILTIN(__nvvm_atom_sub_gen_ll, "LLiLLiD*LLi", "n") 746 747BUILTIN(__nvvm_atom_xchg_gen_i, "iiD*i", "n") 748TARGET_BUILTIN(__nvvm_atom_cta_xchg_gen_i, "iiD*i", "n", SM_60) 749TARGET_BUILTIN(__nvvm_atom_sys_xchg_gen_i, "iiD*i", "n", SM_60) 750BUILTIN(__nvvm_atom_xchg_gen_l, "LiLiD*Li", "n") 751TARGET_BUILTIN(__nvvm_atom_cta_xchg_gen_l, "LiLiD*Li", "n", SM_60) 752TARGET_BUILTIN(__nvvm_atom_sys_xchg_gen_l, "LiLiD*Li", "n", SM_60) 753BUILTIN(__nvvm_atom_xchg_gen_ll, "LLiLLiD*LLi", "n") 754TARGET_BUILTIN(__nvvm_atom_cta_xchg_gen_ll, "LLiLLiD*LLi", "n", SM_60) 755TARGET_BUILTIN(__nvvm_atom_sys_xchg_gen_ll, "LLiLLiD*LLi", "n", SM_60) 756 757BUILTIN(__nvvm_atom_max_gen_i, "iiD*i", "n") 758TARGET_BUILTIN(__nvvm_atom_cta_max_gen_i, "iiD*i", "n", SM_60) 759TARGET_BUILTIN(__nvvm_atom_sys_max_gen_i, "iiD*i", "n", SM_60) 760BUILTIN(__nvvm_atom_max_gen_ui, "UiUiD*Ui", "n") 761TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ui, "UiUiD*Ui", "n", SM_60) 762TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ui, "UiUiD*Ui", "n", SM_60) 763BUILTIN(__nvvm_atom_max_gen_l, "LiLiD*Li", "n") 764TARGET_BUILTIN(__nvvm_atom_cta_max_gen_l, "LiLiD*Li", "n", SM_60) 765TARGET_BUILTIN(__nvvm_atom_sys_max_gen_l, "LiLiD*Li", "n", SM_60) 766BUILTIN(__nvvm_atom_max_gen_ul, "ULiULiD*ULi", "n") 767TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ul, "ULiULiD*ULi", "n", SM_60) 768TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ul, "ULiULiD*ULi", "n", SM_60) 769BUILTIN(__nvvm_atom_max_gen_ll, "LLiLLiD*LLi", "n") 770TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ll, "LLiLLiD*LLi", "n", SM_60) 771TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ll, "LLiLLiD*LLi", "n", SM_60) 772BUILTIN(__nvvm_atom_max_gen_ull, "ULLiULLiD*ULLi", "n") 773TARGET_BUILTIN(__nvvm_atom_cta_max_gen_ull, "ULLiULLiD*ULLi", "n", SM_60) 774TARGET_BUILTIN(__nvvm_atom_sys_max_gen_ull, "ULLiULLiD*ULLi", "n", SM_60) 775 776BUILTIN(__nvvm_atom_min_gen_i, "iiD*i", "n") 777TARGET_BUILTIN(__nvvm_atom_cta_min_gen_i, "iiD*i", "n", SM_60) 778TARGET_BUILTIN(__nvvm_atom_sys_min_gen_i, "iiD*i", "n", SM_60) 779BUILTIN(__nvvm_atom_min_gen_ui, "UiUiD*Ui", "n") 780TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ui, "UiUiD*Ui", "n", SM_60) 781TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ui, "UiUiD*Ui", "n", SM_60) 782BUILTIN(__nvvm_atom_min_gen_l, "LiLiD*Li", "n") 783TARGET_BUILTIN(__nvvm_atom_cta_min_gen_l, "LiLiD*Li", "n", SM_60) 784TARGET_BUILTIN(__nvvm_atom_sys_min_gen_l, "LiLiD*Li", "n", SM_60) 785BUILTIN(__nvvm_atom_min_gen_ul, "ULiULiD*ULi", "n") 786TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ul, "ULiULiD*ULi", "n", SM_60) 787TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ul, "ULiULiD*ULi", "n", SM_60) 788BUILTIN(__nvvm_atom_min_gen_ll, "LLiLLiD*LLi", "n") 789TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ll, "LLiLLiD*LLi", "n", SM_60) 790TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ll, "LLiLLiD*LLi", "n", SM_60) 791BUILTIN(__nvvm_atom_min_gen_ull, "ULLiULLiD*ULLi", "n") 792TARGET_BUILTIN(__nvvm_atom_cta_min_gen_ull, "ULLiULLiD*ULLi", "n", SM_60) 793TARGET_BUILTIN(__nvvm_atom_sys_min_gen_ull, "ULLiULLiD*ULLi", "n", SM_60) 794 795BUILTIN(__nvvm_atom_inc_gen_ui, "UiUiD*Ui", "n") 796TARGET_BUILTIN(__nvvm_atom_cta_inc_gen_ui, "UiUiD*Ui", "n", SM_60) 797TARGET_BUILTIN(__nvvm_atom_sys_inc_gen_ui, "UiUiD*Ui", "n", SM_60) 798BUILTIN(__nvvm_atom_dec_gen_ui, "UiUiD*Ui", "n") 799TARGET_BUILTIN(__nvvm_atom_cta_dec_gen_ui, "UiUiD*Ui", "n", SM_60) 800TARGET_BUILTIN(__nvvm_atom_sys_dec_gen_ui, "UiUiD*Ui", "n", SM_60) 801 802BUILTIN(__nvvm_atom_and_gen_i, "iiD*i", "n") 803TARGET_BUILTIN(__nvvm_atom_cta_and_gen_i, "iiD*i", "n", SM_60) 804TARGET_BUILTIN(__nvvm_atom_sys_and_gen_i, "iiD*i", "n", SM_60) 805BUILTIN(__nvvm_atom_and_gen_l, "LiLiD*Li", "n") 806TARGET_BUILTIN(__nvvm_atom_cta_and_gen_l, "LiLiD*Li", "n", SM_60) 807TARGET_BUILTIN(__nvvm_atom_sys_and_gen_l, "LiLiD*Li", "n", SM_60) 808BUILTIN(__nvvm_atom_and_gen_ll, "LLiLLiD*LLi", "n") 809TARGET_BUILTIN(__nvvm_atom_cta_and_gen_ll, "LLiLLiD*LLi", "n", SM_60) 810TARGET_BUILTIN(__nvvm_atom_sys_and_gen_ll, "LLiLLiD*LLi", "n", SM_60) 811 812BUILTIN(__nvvm_atom_or_gen_i, "iiD*i", "n") 813TARGET_BUILTIN(__nvvm_atom_cta_or_gen_i, "iiD*i", "n", SM_60) 814TARGET_BUILTIN(__nvvm_atom_sys_or_gen_i, "iiD*i", "n", SM_60) 815BUILTIN(__nvvm_atom_or_gen_l, "LiLiD*Li", "n") 816TARGET_BUILTIN(__nvvm_atom_cta_or_gen_l, "LiLiD*Li", "n", SM_60) 817TARGET_BUILTIN(__nvvm_atom_sys_or_gen_l, "LiLiD*Li", "n", SM_60) 818BUILTIN(__nvvm_atom_or_gen_ll, "LLiLLiD*LLi", "n") 819TARGET_BUILTIN(__nvvm_atom_cta_or_gen_ll, "LLiLLiD*LLi", "n", SM_60) 820TARGET_BUILTIN(__nvvm_atom_sys_or_gen_ll, "LLiLLiD*LLi", "n", SM_60) 821 822BUILTIN(__nvvm_atom_xor_gen_i, "iiD*i", "n") 823TARGET_BUILTIN(__nvvm_atom_cta_xor_gen_i, "iiD*i", "n", SM_60) 824TARGET_BUILTIN(__nvvm_atom_sys_xor_gen_i, "iiD*i", "n", SM_60) 825BUILTIN(__nvvm_atom_xor_gen_l, "LiLiD*Li", "n") 826TARGET_BUILTIN(__nvvm_atom_cta_xor_gen_l, "LiLiD*Li", "n", SM_60) 827TARGET_BUILTIN(__nvvm_atom_sys_xor_gen_l, "LiLiD*Li", "n", SM_60) 828BUILTIN(__nvvm_atom_xor_gen_ll, "LLiLLiD*LLi", "n") 829TARGET_BUILTIN(__nvvm_atom_cta_xor_gen_ll, "LLiLLiD*LLi", "n", SM_60) 830TARGET_BUILTIN(__nvvm_atom_sys_xor_gen_ll, "LLiLLiD*LLi", "n", SM_60) 831 832BUILTIN(__nvvm_atom_cas_gen_i, "iiD*ii", "n") 833TARGET_BUILTIN(__nvvm_atom_cta_cas_gen_i, "iiD*ii", "n", SM_60) 834TARGET_BUILTIN(__nvvm_atom_sys_cas_gen_i, "iiD*ii", "n", SM_60) 835BUILTIN(__nvvm_atom_cas_gen_l, "LiLiD*LiLi", "n") 836TARGET_BUILTIN(__nvvm_atom_cta_cas_gen_l, "LiLiD*LiLi", "n", SM_60) 837TARGET_BUILTIN(__nvvm_atom_sys_cas_gen_l, "LiLiD*LiLi", "n", SM_60) 838BUILTIN(__nvvm_atom_cas_gen_ll, "LLiLLiD*LLiLLi", "n") 839TARGET_BUILTIN(__nvvm_atom_cta_cas_gen_ll, "LLiLLiD*LLiLLi", "n", SM_60) 840TARGET_BUILTIN(__nvvm_atom_sys_cas_gen_ll, "LLiLLiD*LLiLLi", "n", SM_60) 841 842// Compiler Error Warn 843BUILTIN(__nvvm_compiler_error, "vcC*4", "n") 844BUILTIN(__nvvm_compiler_warn, "vcC*4", "n") 845 846BUILTIN(__nvvm_ldu_c, "ccC*", "") 847BUILTIN(__nvvm_ldu_sc, "ScScC*", "") 848BUILTIN(__nvvm_ldu_s, "ssC*", "") 849BUILTIN(__nvvm_ldu_i, "iiC*", "") 850BUILTIN(__nvvm_ldu_l, "LiLiC*", "") 851BUILTIN(__nvvm_ldu_ll, "LLiLLiC*", "") 852 853BUILTIN(__nvvm_ldu_uc, "UcUcC*", "") 854BUILTIN(__nvvm_ldu_us, "UsUsC*", "") 855BUILTIN(__nvvm_ldu_ui, "UiUiC*", "") 856BUILTIN(__nvvm_ldu_ul, "ULiULiC*", "") 857BUILTIN(__nvvm_ldu_ull, "ULLiULLiC*", "") 858 859BUILTIN(__nvvm_ldu_h, "hhC*", "") 860BUILTIN(__nvvm_ldu_f, "ffC*", "") 861BUILTIN(__nvvm_ldu_d, "ddC*", "") 862 863BUILTIN(__nvvm_ldu_c2, "E2cE2cC*", "") 864BUILTIN(__nvvm_ldu_sc2, "E2ScE2ScC*", "") 865BUILTIN(__nvvm_ldu_c4, "E4cE4cC*", "") 866BUILTIN(__nvvm_ldu_sc4, "E4ScE4ScC*", "") 867BUILTIN(__nvvm_ldu_s2, "E2sE2sC*", "") 868BUILTIN(__nvvm_ldu_s4, "E4sE4sC*", "") 869BUILTIN(__nvvm_ldu_i2, "E2iE2iC*", "") 870BUILTIN(__nvvm_ldu_i4, "E4iE4iC*", "") 871BUILTIN(__nvvm_ldu_l2, "E2LiE2LiC*", "") 872BUILTIN(__nvvm_ldu_ll2, "E2LLiE2LLiC*", "") 873 874BUILTIN(__nvvm_ldu_uc2, "E2UcE2UcC*", "") 875BUILTIN(__nvvm_ldu_uc4, "E4UcE4UcC*", "") 876BUILTIN(__nvvm_ldu_us2, "E2UsE2UsC*", "") 877BUILTIN(__nvvm_ldu_us4, "E4UsE4UsC*", "") 878BUILTIN(__nvvm_ldu_ui2, "E2UiE2UiC*", "") 879BUILTIN(__nvvm_ldu_ui4, "E4UiE4UiC*", "") 880BUILTIN(__nvvm_ldu_ul2, "E2ULiE2ULiC*", "") 881BUILTIN(__nvvm_ldu_ull2, "E2ULLiE2ULLiC*", "") 882 883BUILTIN(__nvvm_ldu_h2, "E2hE2hC*", "") 884BUILTIN(__nvvm_ldu_f2, "E2fE2fC*", "") 885BUILTIN(__nvvm_ldu_f4, "E4fE4fC*", "") 886BUILTIN(__nvvm_ldu_d2, "E2dE2dC*", "") 887 888BUILTIN(__nvvm_ldg_c, "ccC*", "") 889BUILTIN(__nvvm_ldg_sc, "ScScC*", "") 890BUILTIN(__nvvm_ldg_s, "ssC*", "") 891BUILTIN(__nvvm_ldg_i, "iiC*", "") 892BUILTIN(__nvvm_ldg_l, "LiLiC*", "") 893BUILTIN(__nvvm_ldg_ll, "LLiLLiC*", "") 894 895BUILTIN(__nvvm_ldg_uc, "UcUcC*", "") 896BUILTIN(__nvvm_ldg_us, "UsUsC*", "") 897BUILTIN(__nvvm_ldg_ui, "UiUiC*", "") 898BUILTIN(__nvvm_ldg_ul, "ULiULiC*", "") 899BUILTIN(__nvvm_ldg_ull, "ULLiULLiC*", "") 900 901BUILTIN(__nvvm_ldg_h, "hhC*", "") 902BUILTIN(__nvvm_ldg_f, "ffC*", "") 903BUILTIN(__nvvm_ldg_d, "ddC*", "") 904 905BUILTIN(__nvvm_ldg_c2, "E2cE2cC*", "") 906BUILTIN(__nvvm_ldg_sc2, "E2ScE2ScC*", "") 907BUILTIN(__nvvm_ldg_c4, "E4cE4cC*", "") 908BUILTIN(__nvvm_ldg_sc4, "E4ScE4ScC*", "") 909BUILTIN(__nvvm_ldg_s2, "E2sE2sC*", "") 910BUILTIN(__nvvm_ldg_s4, "E4sE4sC*", "") 911BUILTIN(__nvvm_ldg_i2, "E2iE2iC*", "") 912BUILTIN(__nvvm_ldg_i4, "E4iE4iC*", "") 913BUILTIN(__nvvm_ldg_l2, "E2LiE2LiC*", "") 914BUILTIN(__nvvm_ldg_ll2, "E2LLiE2LLiC*", "") 915 916BUILTIN(__nvvm_ldg_uc2, "E2UcE2UcC*", "") 917BUILTIN(__nvvm_ldg_uc4, "E4UcE4UcC*", "") 918BUILTIN(__nvvm_ldg_us2, "E2UsE2UsC*", "") 919BUILTIN(__nvvm_ldg_us4, "E4UsE4UsC*", "") 920BUILTIN(__nvvm_ldg_ui2, "E2UiE2UiC*", "") 921BUILTIN(__nvvm_ldg_ui4, "E4UiE4UiC*", "") 922BUILTIN(__nvvm_ldg_ul2, "E2ULiE2ULiC*", "") 923BUILTIN(__nvvm_ldg_ull2, "E2ULLiE2ULLiC*", "") 924 925BUILTIN(__nvvm_ldg_h2, "E2hE2hC*", "") 926BUILTIN(__nvvm_ldg_f2, "E2fE2fC*", "") 927BUILTIN(__nvvm_ldg_f4, "E4fE4fC*", "") 928BUILTIN(__nvvm_ldg_d2, "E2dE2dC*", "") 929 930// Address space predicates. 931BUILTIN(__nvvm_isspacep_const, "bvC*", "nc") 932BUILTIN(__nvvm_isspacep_global, "bvC*", "nc") 933BUILTIN(__nvvm_isspacep_local, "bvC*", "nc") 934BUILTIN(__nvvm_isspacep_shared, "bvC*", "nc") 935TARGET_BUILTIN(__nvvm_isspacep_shared_cluster,"bvC*", "nc", AND(SM_90,PTX78)) 936 937// Builtins to support WMMA instructions on sm_70 938TARGET_BUILTIN(__hmma_m16n16k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX60)) 939TARGET_BUILTIN(__hmma_m16n16k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX60)) 940TARGET_BUILTIN(__hmma_m16n16k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX60)) 941TARGET_BUILTIN(__hmma_m16n16k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX60)) 942TARGET_BUILTIN(__hmma_m16n16k16_st_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX60)) 943TARGET_BUILTIN(__hmma_m16n16k16_st_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX60)) 944 945TARGET_BUILTIN(__hmma_m32n8k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX61)) 946TARGET_BUILTIN(__hmma_m32n8k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX61)) 947TARGET_BUILTIN(__hmma_m32n8k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX61)) 948TARGET_BUILTIN(__hmma_m32n8k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX61)) 949TARGET_BUILTIN(__hmma_m32n8k16_st_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX61)) 950TARGET_BUILTIN(__hmma_m32n8k16_st_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX61)) 951 952TARGET_BUILTIN(__hmma_m8n32k16_ld_a, "vi*iC*UiIi", "", AND(SM_70,PTX61)) 953TARGET_BUILTIN(__hmma_m8n32k16_ld_b, "vi*iC*UiIi", "", AND(SM_70,PTX61)) 954TARGET_BUILTIN(__hmma_m8n32k16_ld_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX61)) 955TARGET_BUILTIN(__hmma_m8n32k16_ld_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX61)) 956TARGET_BUILTIN(__hmma_m8n32k16_st_c_f16, "vi*iC*UiIi", "", AND(SM_70,PTX61)) 957TARGET_BUILTIN(__hmma_m8n32k16_st_c_f32, "vf*fC*UiIi", "", AND(SM_70,PTX61)) 958 959TARGET_BUILTIN(__hmma_m16n16k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", AND(SM_70,PTX60)) 960TARGET_BUILTIN(__hmma_m16n16k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", AND(SM_70,PTX60)) 961TARGET_BUILTIN(__hmma_m16n16k16_mma_f32f32, "vf*iC*iC*fC*IiIi", "", AND(SM_70,PTX60)) 962TARGET_BUILTIN(__hmma_m16n16k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", AND(SM_70,PTX60)) 963 964TARGET_BUILTIN(__hmma_m32n8k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", AND(SM_70,PTX61)) 965TARGET_BUILTIN(__hmma_m32n8k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", AND(SM_70,PTX61)) 966TARGET_BUILTIN(__hmma_m32n8k16_mma_f32f32, "vf*iC*iC*fC*IiIi", "", AND(SM_70,PTX61)) 967TARGET_BUILTIN(__hmma_m32n8k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", AND(SM_70,PTX61)) 968 969TARGET_BUILTIN(__hmma_m8n32k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", AND(SM_70,PTX61)) 970TARGET_BUILTIN(__hmma_m8n32k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", AND(SM_70,PTX61)) 971TARGET_BUILTIN(__hmma_m8n32k16_mma_f32f32, "vf*iC*iC*fC*IiIi", "", AND(SM_70,PTX61)) 972TARGET_BUILTIN(__hmma_m8n32k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", AND(SM_70,PTX61)) 973 974// Builtins to support integer and sub-integer WMMA instructions on sm_72/sm_75 975TARGET_BUILTIN(__bmma_m8n8k128_ld_a_b1, "vi*iC*UiIi", "", AND(SM_75,PTX63)) 976TARGET_BUILTIN(__bmma_m8n8k128_ld_b_b1, "vi*iC*UiIi", "", AND(SM_75,PTX63)) 977TARGET_BUILTIN(__bmma_m8n8k128_ld_c, "vi*iC*UiIi", "", AND(SM_75,PTX63)) 978TARGET_BUILTIN(__bmma_m8n8k128_mma_and_popc_b1, "vi*iC*iC*iC*Ii", "", AND(SM_80,PTX71)) 979TARGET_BUILTIN(__bmma_m8n8k128_mma_xor_popc_b1, "vi*iC*iC*iC*Ii", "", AND(SM_75,PTX63)) 980TARGET_BUILTIN(__bmma_m8n8k128_st_c_i32, "vi*iC*UiIi", "", AND(SM_75,PTX63)) 981TARGET_BUILTIN(__imma_m16n16k16_ld_a_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 982TARGET_BUILTIN(__imma_m16n16k16_ld_a_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 983TARGET_BUILTIN(__imma_m16n16k16_ld_b_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 984TARGET_BUILTIN(__imma_m16n16k16_ld_b_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 985TARGET_BUILTIN(__imma_m16n16k16_ld_c, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 986TARGET_BUILTIN(__imma_m16n16k16_mma_s8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63)) 987TARGET_BUILTIN(__imma_m16n16k16_mma_u8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63)) 988TARGET_BUILTIN(__imma_m16n16k16_st_c_i32, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 989TARGET_BUILTIN(__imma_m32n8k16_ld_a_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 990TARGET_BUILTIN(__imma_m32n8k16_ld_a_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 991TARGET_BUILTIN(__imma_m32n8k16_ld_b_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 992TARGET_BUILTIN(__imma_m32n8k16_ld_b_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 993TARGET_BUILTIN(__imma_m32n8k16_ld_c, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 994TARGET_BUILTIN(__imma_m32n8k16_mma_s8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63)) 995TARGET_BUILTIN(__imma_m32n8k16_mma_u8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63)) 996TARGET_BUILTIN(__imma_m32n8k16_st_c_i32, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 997TARGET_BUILTIN(__imma_m8n32k16_ld_a_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 998TARGET_BUILTIN(__imma_m8n32k16_ld_a_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 999TARGET_BUILTIN(__imma_m8n32k16_ld_b_s8, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 1000TARGET_BUILTIN(__imma_m8n32k16_ld_b_u8, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 1001TARGET_BUILTIN(__imma_m8n32k16_ld_c, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 1002TARGET_BUILTIN(__imma_m8n32k16_mma_s8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63)) 1003TARGET_BUILTIN(__imma_m8n32k16_mma_u8, "vi*iC*iC*iC*IiIi", "", AND(SM_72,PTX63)) 1004TARGET_BUILTIN(__imma_m8n32k16_st_c_i32, "vi*iC*UiIi", "", AND(SM_72,PTX63)) 1005TARGET_BUILTIN(__imma_m8n8k32_ld_a_s4, "vi*iC*UiIi", "", AND(SM_75,PTX63)) 1006TARGET_BUILTIN(__imma_m8n8k32_ld_a_u4, "vi*iC*UiIi", "", AND(SM_75,PTX63)) 1007TARGET_BUILTIN(__imma_m8n8k32_ld_b_s4, "vi*iC*UiIi", "", AND(SM_75,PTX63)) 1008TARGET_BUILTIN(__imma_m8n8k32_ld_b_u4, "vi*iC*UiIi", "", AND(SM_75,PTX63)) 1009TARGET_BUILTIN(__imma_m8n8k32_ld_c, "vi*iC*UiIi", "", AND(SM_75,PTX63)) 1010TARGET_BUILTIN(__imma_m8n8k32_mma_s4, "vi*iC*iC*iC*IiIi", "", AND(SM_75,PTX63)) 1011TARGET_BUILTIN(__imma_m8n8k32_mma_u4, "vi*iC*iC*iC*IiIi", "", AND(SM_75,PTX63)) 1012TARGET_BUILTIN(__imma_m8n8k32_st_c_i32, "vi*iC*UiIi", "", AND(SM_75,PTX63)) 1013 1014// Builtins to support double and alternate float WMMA instructions on sm_80 1015TARGET_BUILTIN(__dmma_m8n8k4_ld_a, "vd*dC*UiIi", "", AND(SM_80,PTX70)) 1016TARGET_BUILTIN(__dmma_m8n8k4_ld_b, "vd*dC*UiIi", "", AND(SM_80,PTX70)) 1017TARGET_BUILTIN(__dmma_m8n8k4_ld_c, "vd*dC*UiIi", "", AND(SM_80,PTX70)) 1018TARGET_BUILTIN(__dmma_m8n8k4_st_c_f64, "vd*dC*UiIi", "", AND(SM_80,PTX70)) 1019TARGET_BUILTIN(__dmma_m8n8k4_mma_f64, "vd*dC*dC*dC*IiIi", "", AND(SM_80,PTX70)) 1020 1021TARGET_BUILTIN(__mma_bf16_m16n16k16_ld_a, "vi*iC*UiIi", "", AND(SM_80,PTX70)) 1022TARGET_BUILTIN(__mma_bf16_m16n16k16_ld_b, "vi*iC*UiIi", "", AND(SM_80,PTX70)) 1023TARGET_BUILTIN(__mma_bf16_m16n16k16_mma_f32, "vf*iC*iC*fC*IiIi", "", AND(SM_80,PTX70)) 1024TARGET_BUILTIN(__mma_bf16_m8n32k16_ld_a, "vi*iC*UiIi", "", AND(SM_80,PTX70)) 1025TARGET_BUILTIN(__mma_bf16_m8n32k16_ld_b, "vi*iC*UiIi", "", AND(SM_80,PTX70)) 1026TARGET_BUILTIN(__mma_bf16_m8n32k16_mma_f32, "vf*iC*iC*fC*IiIi", "", AND(SM_80,PTX70)) 1027TARGET_BUILTIN(__mma_bf16_m32n8k16_ld_a, "vi*iC*UiIi", "", AND(SM_80,PTX70)) 1028TARGET_BUILTIN(__mma_bf16_m32n8k16_ld_b, "vi*iC*UiIi", "", AND(SM_80,PTX70)) 1029TARGET_BUILTIN(__mma_bf16_m32n8k16_mma_f32, "vf*iC*iC*fC*IiIi", "", AND(SM_80,PTX70)) 1030 1031TARGET_BUILTIN(__mma_tf32_m16n16k8_ld_a, "vi*iC*UiIi", "", AND(SM_80,PTX70)) 1032TARGET_BUILTIN(__mma_tf32_m16n16k8_ld_b, "vi*iC*UiIi", "", AND(SM_80,PTX70)) 1033TARGET_BUILTIN(__mma_tf32_m16n16k8_ld_c, "vf*fC*UiIi", "", AND(SM_80,PTX70)) 1034TARGET_BUILTIN(__mma_m16n16k8_st_c_f32, "vf*fC*UiIi", "", AND(SM_80,PTX70)) 1035TARGET_BUILTIN(__mma_tf32_m16n16k8_mma_f32, "vf*iC*iC*fC*IiIi", "", AND(SM_80,PTX70)) 1036 1037// Async Copy 1038TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive, "vWi*", "", AND(SM_80,PTX70)) 1039TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive_shared, "vWi*3", "", AND(SM_80,PTX70)) 1040TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive_noinc, "vWi*", "", AND(SM_80,PTX70)) 1041TARGET_BUILTIN(__nvvm_cp_async_mbarrier_arrive_noinc_shared, "vWi*3", "", AND(SM_80,PTX70)) 1042 1043TARGET_BUILTIN(__nvvm_cp_async_ca_shared_global_4, "vv*3vC*1.", "", AND(SM_80,PTX70)) 1044TARGET_BUILTIN(__nvvm_cp_async_ca_shared_global_8, "vv*3vC*1.", "", AND(SM_80,PTX70)) 1045TARGET_BUILTIN(__nvvm_cp_async_ca_shared_global_16, "vv*3vC*1.", "", AND(SM_80,PTX70)) 1046TARGET_BUILTIN(__nvvm_cp_async_cg_shared_global_16, "vv*3vC*1.", "", AND(SM_80,PTX70)) 1047 1048TARGET_BUILTIN(__nvvm_cp_async_commit_group, "v", "", AND(SM_80,PTX70)) 1049TARGET_BUILTIN(__nvvm_cp_async_wait_group, "vIi", "", AND(SM_80,PTX70)) 1050TARGET_BUILTIN(__nvvm_cp_async_wait_all, "v", "", AND(SM_80,PTX70)) 1051 1052 1053// bf16, bf16x2 abs, neg 1054TARGET_BUILTIN(__nvvm_abs_bf16, "yy", "", AND(SM_80,PTX70)) 1055TARGET_BUILTIN(__nvvm_abs_bf16x2, "V2yV2y", "", AND(SM_80,PTX70)) 1056TARGET_BUILTIN(__nvvm_neg_bf16, "yy", "", AND(SM_80,PTX70)) 1057TARGET_BUILTIN(__nvvm_neg_bf16x2, "V2yV2y", "", AND(SM_80,PTX70)) 1058 1059TARGET_BUILTIN(__nvvm_mapa, "v*v*i", "", AND(SM_90, PTX78)) 1060TARGET_BUILTIN(__nvvm_mapa_shared_cluster, "v*3v*3i", "", AND(SM_90, PTX78)) 1061TARGET_BUILTIN(__nvvm_getctarank, "iv*", "", AND(SM_90, PTX78)) 1062TARGET_BUILTIN(__nvvm_getctarank_shared_cluster, "iv*3", "", AND(SM_90,PTX78)) 1063 1064#undef BUILTIN 1065#undef TARGET_BUILTIN 1066#pragma pop_macro("AND") 1067#pragma pop_macro("SM_53") 1068#pragma pop_macro("SM_60") 1069#pragma pop_macro("SM_70") 1070#pragma pop_macro("SM_72") 1071#pragma pop_macro("SM_75") 1072#pragma pop_macro("SM_80") 1073#pragma pop_macro("SM_86") 1074#pragma pop_macro("SM_87") 1075#pragma pop_macro("SM_89") 1076#pragma pop_macro("SM_90") 1077#pragma pop_macro("SM_90a") 1078#pragma pop_macro("PTX42") 1079#pragma pop_macro("PTX60") 1080#pragma pop_macro("PTX61") 1081#pragma pop_macro("PTX62") 1082#pragma pop_macro("PTX63") 1083#pragma pop_macro("PTX64") 1084#pragma pop_macro("PTX65") 1085#pragma pop_macro("PTX70") 1086#pragma pop_macro("PTX71") 1087#pragma pop_macro("PTX72") 1088#pragma pop_macro("PTX73") 1089#pragma pop_macro("PTX74") 1090#pragma pop_macro("PTX75") 1091#pragma pop_macro("PTX76") 1092#pragma pop_macro("PTX77") 1093#pragma pop_macro("PTX78") 1094#pragma pop_macro("PTX80") 1095#pragma pop_macro("PTX81") 1096#pragma pop_macro("PTX82") 1097#pragma pop_macro("PTX83") 1098#pragma pop_macro("PTX84") 1099#pragma pop_macro("PTX85") 1100