1 //===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the SystemZTargetLowering class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "SystemZISelLowering.h" 14 #include "SystemZCallingConv.h" 15 #include "SystemZConstantPoolValue.h" 16 #include "SystemZMachineFunctionInfo.h" 17 #include "SystemZTargetMachine.h" 18 #include "llvm/CodeGen/CallingConvLower.h" 19 #include "llvm/CodeGen/ISDOpcodes.h" 20 #include "llvm/CodeGen/MachineInstrBuilder.h" 21 #include "llvm/CodeGen/MachineRegisterInfo.h" 22 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 23 #include "llvm/IR/GlobalAlias.h" 24 #include "llvm/IR/IntrinsicInst.h" 25 #include "llvm/IR/Intrinsics.h" 26 #include "llvm/IR/IntrinsicsS390.h" 27 #include "llvm/Support/CommandLine.h" 28 #include "llvm/Support/ErrorHandling.h" 29 #include "llvm/Support/KnownBits.h" 30 #include <cctype> 31 #include <optional> 32 33 using namespace llvm; 34 35 #define DEBUG_TYPE "systemz-lower" 36 37 namespace { 38 // Represents information about a comparison. 39 struct Comparison { 40 Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn) 41 : Op0(Op0In), Op1(Op1In), Chain(ChainIn), 42 Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {} 43 44 // The operands to the comparison. 45 SDValue Op0, Op1; 46 47 // Chain if this is a strict floating-point comparison. 48 SDValue Chain; 49 50 // The opcode that should be used to compare Op0 and Op1. 51 unsigned Opcode; 52 53 // A SystemZICMP value. Only used for integer comparisons. 54 unsigned ICmpType; 55 56 // The mask of CC values that Opcode can produce. 57 unsigned CCValid; 58 59 // The mask of CC values for which the original condition is true. 60 unsigned CCMask; 61 }; 62 } // end anonymous namespace 63 64 // Classify VT as either 32 or 64 bit. 65 static bool is32Bit(EVT VT) { 66 switch (VT.getSimpleVT().SimpleTy) { 67 case MVT::i32: 68 return true; 69 case MVT::i64: 70 return false; 71 default: 72 llvm_unreachable("Unsupported type"); 73 } 74 } 75 76 // Return a version of MachineOperand that can be safely used before the 77 // final use. 78 static MachineOperand earlyUseOperand(MachineOperand Op) { 79 if (Op.isReg()) 80 Op.setIsKill(false); 81 return Op; 82 } 83 84 SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, 85 const SystemZSubtarget &STI) 86 : TargetLowering(TM), Subtarget(STI) { 87 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0)); 88 89 auto *Regs = STI.getSpecialRegisters(); 90 91 // Set up the register classes. 92 if (Subtarget.hasHighWord()) 93 addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass); 94 else 95 addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass); 96 addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass); 97 if (!useSoftFloat()) { 98 if (Subtarget.hasVector()) { 99 addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass); 100 addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass); 101 } else { 102 addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass); 103 addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass); 104 } 105 if (Subtarget.hasVectorEnhancements1()) 106 addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass); 107 else 108 addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass); 109 110 if (Subtarget.hasVector()) { 111 addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass); 112 addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass); 113 addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass); 114 addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass); 115 addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass); 116 addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass); 117 } 118 119 if (Subtarget.hasVector()) 120 addRegisterClass(MVT::i128, &SystemZ::VR128BitRegClass); 121 } 122 123 // Compute derived properties from the register classes 124 computeRegisterProperties(Subtarget.getRegisterInfo()); 125 126 // Set up special registers. 127 setStackPointerRegisterToSaveRestore(Regs->getStackPointerRegister()); 128 129 // TODO: It may be better to default to latency-oriented scheduling, however 130 // LLVM's current latency-oriented scheduler can't handle physreg definitions 131 // such as SystemZ has with CC, so set this to the register-pressure 132 // scheduler, because it can. 133 setSchedulingPreference(Sched::RegPressure); 134 135 setBooleanContents(ZeroOrOneBooleanContent); 136 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 137 138 setMaxAtomicSizeInBitsSupported(128); 139 140 // Instructions are strings of 2-byte aligned 2-byte values. 141 setMinFunctionAlignment(Align(2)); 142 // For performance reasons we prefer 16-byte alignment. 143 setPrefFunctionAlignment(Align(16)); 144 145 // Handle operations that are handled in a similar way for all types. 146 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE; 147 I <= MVT::LAST_FP_VALUETYPE; 148 ++I) { 149 MVT VT = MVT::SimpleValueType(I); 150 if (isTypeLegal(VT)) { 151 // Lower SET_CC into an IPM-based sequence. 152 setOperationAction(ISD::SETCC, VT, Custom); 153 setOperationAction(ISD::STRICT_FSETCC, VT, Custom); 154 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom); 155 156 // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE). 157 setOperationAction(ISD::SELECT, VT, Expand); 158 159 // Lower SELECT_CC and BR_CC into separate comparisons and branches. 160 setOperationAction(ISD::SELECT_CC, VT, Custom); 161 setOperationAction(ISD::BR_CC, VT, Custom); 162 } 163 } 164 165 // Expand jump table branches as address arithmetic followed by an 166 // indirect jump. 167 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 168 169 // Expand BRCOND into a BR_CC (see above). 170 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 171 172 // Handle integer types except i128. 173 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE; 174 I <= MVT::LAST_INTEGER_VALUETYPE; 175 ++I) { 176 MVT VT = MVT::SimpleValueType(I); 177 if (isTypeLegal(VT) && VT != MVT::i128) { 178 setOperationAction(ISD::ABS, VT, Legal); 179 180 // Expand individual DIV and REMs into DIVREMs. 181 setOperationAction(ISD::SDIV, VT, Expand); 182 setOperationAction(ISD::UDIV, VT, Expand); 183 setOperationAction(ISD::SREM, VT, Expand); 184 setOperationAction(ISD::UREM, VT, Expand); 185 setOperationAction(ISD::SDIVREM, VT, Custom); 186 setOperationAction(ISD::UDIVREM, VT, Custom); 187 188 // Support addition/subtraction with overflow. 189 setOperationAction(ISD::SADDO, VT, Custom); 190 setOperationAction(ISD::SSUBO, VT, Custom); 191 192 // Support addition/subtraction with carry. 193 setOperationAction(ISD::UADDO, VT, Custom); 194 setOperationAction(ISD::USUBO, VT, Custom); 195 196 // Support carry in as value rather than glue. 197 setOperationAction(ISD::UADDO_CARRY, VT, Custom); 198 setOperationAction(ISD::USUBO_CARRY, VT, Custom); 199 200 // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are 201 // available, or if the operand is constant. 202 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom); 203 204 // Use POPCNT on z196 and above. 205 if (Subtarget.hasPopulationCount()) 206 setOperationAction(ISD::CTPOP, VT, Custom); 207 else 208 setOperationAction(ISD::CTPOP, VT, Expand); 209 210 // No special instructions for these. 211 setOperationAction(ISD::CTTZ, VT, Expand); 212 setOperationAction(ISD::ROTR, VT, Expand); 213 214 // Use *MUL_LOHI where possible instead of MULH*. 215 setOperationAction(ISD::MULHS, VT, Expand); 216 setOperationAction(ISD::MULHU, VT, Expand); 217 setOperationAction(ISD::SMUL_LOHI, VT, Custom); 218 setOperationAction(ISD::UMUL_LOHI, VT, Custom); 219 220 // Only z196 and above have native support for conversions to unsigned. 221 // On z10, promoting to i64 doesn't generate an inexact condition for 222 // values that are outside the i32 range but in the i64 range, so use 223 // the default expansion. 224 if (!Subtarget.hasFPExtension()) 225 setOperationAction(ISD::FP_TO_UINT, VT, Expand); 226 227 // Mirror those settings for STRICT_FP_TO_[SU]INT. Note that these all 228 // default to Expand, so need to be modified to Legal where appropriate. 229 setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Legal); 230 if (Subtarget.hasFPExtension()) 231 setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Legal); 232 233 // And similarly for STRICT_[SU]INT_TO_FP. 234 setOperationAction(ISD::STRICT_SINT_TO_FP, VT, Legal); 235 if (Subtarget.hasFPExtension()) 236 setOperationAction(ISD::STRICT_UINT_TO_FP, VT, Legal); 237 } 238 } 239 240 // Handle i128 if legal. 241 if (isTypeLegal(MVT::i128)) { 242 // No special instructions for these. 243 setOperationAction(ISD::SDIVREM, MVT::i128, Expand); 244 setOperationAction(ISD::UDIVREM, MVT::i128, Expand); 245 setOperationAction(ISD::SMUL_LOHI, MVT::i128, Expand); 246 setOperationAction(ISD::UMUL_LOHI, MVT::i128, Expand); 247 setOperationAction(ISD::ROTR, MVT::i128, Expand); 248 setOperationAction(ISD::ROTL, MVT::i128, Expand); 249 setOperationAction(ISD::MUL, MVT::i128, Expand); 250 setOperationAction(ISD::MULHS, MVT::i128, Expand); 251 setOperationAction(ISD::MULHU, MVT::i128, Expand); 252 setOperationAction(ISD::SDIV, MVT::i128, Expand); 253 setOperationAction(ISD::UDIV, MVT::i128, Expand); 254 setOperationAction(ISD::SREM, MVT::i128, Expand); 255 setOperationAction(ISD::UREM, MVT::i128, Expand); 256 setOperationAction(ISD::CTLZ, MVT::i128, Expand); 257 setOperationAction(ISD::CTTZ, MVT::i128, Expand); 258 259 // Support addition/subtraction with carry. 260 setOperationAction(ISD::UADDO, MVT::i128, Custom); 261 setOperationAction(ISD::USUBO, MVT::i128, Custom); 262 setOperationAction(ISD::UADDO_CARRY, MVT::i128, Custom); 263 setOperationAction(ISD::USUBO_CARRY, MVT::i128, Custom); 264 265 // Use VPOPCT and add up partial results. 266 setOperationAction(ISD::CTPOP, MVT::i128, Custom); 267 268 // We have to use libcalls for these. 269 setOperationAction(ISD::FP_TO_UINT, MVT::i128, LibCall); 270 setOperationAction(ISD::FP_TO_SINT, MVT::i128, LibCall); 271 setOperationAction(ISD::UINT_TO_FP, MVT::i128, LibCall); 272 setOperationAction(ISD::SINT_TO_FP, MVT::i128, LibCall); 273 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i128, LibCall); 274 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i128, LibCall); 275 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, LibCall); 276 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i128, LibCall); 277 } 278 279 // Type legalization will convert 8- and 16-bit atomic operations into 280 // forms that operate on i32s (but still keeping the original memory VT). 281 // Lower them into full i32 operations. 282 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Custom); 283 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Custom); 284 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom); 285 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom); 286 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Custom); 287 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Custom); 288 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Custom); 289 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Custom); 290 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Custom); 291 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Custom); 292 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom); 293 294 // Whether or not i128 is not a legal type, we need to custom lower 295 // the atomic operations in order to exploit SystemZ instructions. 296 setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom); 297 setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom); 298 setOperationAction(ISD::ATOMIC_LOAD, MVT::f128, Custom); 299 setOperationAction(ISD::ATOMIC_STORE, MVT::f128, Custom); 300 301 // Mark sign/zero extending atomic loads as legal, which will make 302 // DAGCombiner fold extensions into atomic loads if possible. 303 setAtomicLoadExtAction({ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::i64, 304 {MVT::i8, MVT::i16, MVT::i32}, Legal); 305 setAtomicLoadExtAction({ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::i32, 306 {MVT::i8, MVT::i16}, Legal); 307 setAtomicLoadExtAction({ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::i16, 308 MVT::i8, Legal); 309 310 // We can use the CC result of compare-and-swap to implement 311 // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS. 312 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i32, Custom); 313 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i64, Custom); 314 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom); 315 316 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); 317 318 // Traps are legal, as we will convert them to "j .+2". 319 setOperationAction(ISD::TRAP, MVT::Other, Legal); 320 321 // z10 has instructions for signed but not unsigned FP conversion. 322 // Handle unsigned 32-bit types as signed 64-bit types. 323 if (!Subtarget.hasFPExtension()) { 324 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); 325 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); 326 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Promote); 327 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Expand); 328 } 329 330 // We have native support for a 64-bit CTLZ, via FLOGR. 331 setOperationAction(ISD::CTLZ, MVT::i32, Promote); 332 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote); 333 setOperationAction(ISD::CTLZ, MVT::i64, Legal); 334 335 // On z15 we have native support for a 64-bit CTPOP. 336 if (Subtarget.hasMiscellaneousExtensions3()) { 337 setOperationAction(ISD::CTPOP, MVT::i32, Promote); 338 setOperationAction(ISD::CTPOP, MVT::i64, Legal); 339 } 340 341 // Give LowerOperation the chance to replace 64-bit ORs with subregs. 342 setOperationAction(ISD::OR, MVT::i64, Custom); 343 344 // Expand 128 bit shifts without using a libcall. 345 setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand); 346 setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand); 347 setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand); 348 349 // Also expand 256 bit shifts if i128 is a legal type. 350 if (isTypeLegal(MVT::i128)) { 351 setOperationAction(ISD::SRL_PARTS, MVT::i128, Expand); 352 setOperationAction(ISD::SHL_PARTS, MVT::i128, Expand); 353 setOperationAction(ISD::SRA_PARTS, MVT::i128, Expand); 354 } 355 356 // Handle bitcast from fp128 to i128. 357 if (!isTypeLegal(MVT::i128)) 358 setOperationAction(ISD::BITCAST, MVT::i128, Custom); 359 360 // We have native instructions for i8, i16 and i32 extensions, but not i1. 361 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 362 for (MVT VT : MVT::integer_valuetypes()) { 363 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); 364 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); 365 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); 366 } 367 368 // Handle the various types of symbolic address. 369 setOperationAction(ISD::ConstantPool, PtrVT, Custom); 370 setOperationAction(ISD::GlobalAddress, PtrVT, Custom); 371 setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom); 372 setOperationAction(ISD::BlockAddress, PtrVT, Custom); 373 setOperationAction(ISD::JumpTable, PtrVT, Custom); 374 375 // We need to handle dynamic allocations specially because of the 376 // 160-byte area at the bottom of the stack. 377 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom); 378 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, PtrVT, Custom); 379 380 setOperationAction(ISD::STACKSAVE, MVT::Other, Custom); 381 setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom); 382 383 // Handle prefetches with PFD or PFDRL. 384 setOperationAction(ISD::PREFETCH, MVT::Other, Custom); 385 386 // Handle readcyclecounter with STCKF. 387 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom); 388 389 for (MVT VT : MVT::fixedlen_vector_valuetypes()) { 390 // Assume by default that all vector operations need to be expanded. 391 for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode) 392 if (getOperationAction(Opcode, VT) == Legal) 393 setOperationAction(Opcode, VT, Expand); 394 395 // Likewise all truncating stores and extending loads. 396 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { 397 setTruncStoreAction(VT, InnerVT, Expand); 398 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); 399 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); 400 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); 401 } 402 403 if (isTypeLegal(VT)) { 404 // These operations are legal for anything that can be stored in a 405 // vector register, even if there is no native support for the format 406 // as such. In particular, we can do these for v4f32 even though there 407 // are no specific instructions for that format. 408 setOperationAction(ISD::LOAD, VT, Legal); 409 setOperationAction(ISD::STORE, VT, Legal); 410 setOperationAction(ISD::VSELECT, VT, Legal); 411 setOperationAction(ISD::BITCAST, VT, Legal); 412 setOperationAction(ISD::UNDEF, VT, Legal); 413 414 // Likewise, except that we need to replace the nodes with something 415 // more specific. 416 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 417 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 418 } 419 } 420 421 // Handle integer vector types. 422 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) { 423 if (isTypeLegal(VT)) { 424 // These operations have direct equivalents. 425 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); 426 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal); 427 setOperationAction(ISD::ADD, VT, Legal); 428 setOperationAction(ISD::SUB, VT, Legal); 429 if (VT != MVT::v2i64) 430 setOperationAction(ISD::MUL, VT, Legal); 431 setOperationAction(ISD::ABS, VT, Legal); 432 setOperationAction(ISD::AND, VT, Legal); 433 setOperationAction(ISD::OR, VT, Legal); 434 setOperationAction(ISD::XOR, VT, Legal); 435 if (Subtarget.hasVectorEnhancements1()) 436 setOperationAction(ISD::CTPOP, VT, Legal); 437 else 438 setOperationAction(ISD::CTPOP, VT, Custom); 439 setOperationAction(ISD::CTTZ, VT, Legal); 440 setOperationAction(ISD::CTLZ, VT, Legal); 441 442 // Convert a GPR scalar to a vector by inserting it into element 0. 443 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); 444 445 // Use a series of unpacks for extensions. 446 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom); 447 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom); 448 449 // Detect shifts/rotates by a scalar amount and convert them into 450 // V*_BY_SCALAR. 451 setOperationAction(ISD::SHL, VT, Custom); 452 setOperationAction(ISD::SRA, VT, Custom); 453 setOperationAction(ISD::SRL, VT, Custom); 454 setOperationAction(ISD::ROTL, VT, Custom); 455 456 // Add ISD::VECREDUCE_ADD as custom in order to implement 457 // it with VZERO+VSUM 458 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); 459 460 // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands 461 // and inverting the result as necessary. 462 setOperationAction(ISD::SETCC, VT, Custom); 463 } 464 } 465 466 if (Subtarget.hasVector()) { 467 // There should be no need to check for float types other than v2f64 468 // since <2 x f32> isn't a legal type. 469 setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal); 470 setOperationAction(ISD::FP_TO_SINT, MVT::v2f64, Legal); 471 setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal); 472 setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Legal); 473 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal); 474 setOperationAction(ISD::SINT_TO_FP, MVT::v2f64, Legal); 475 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); 476 setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Legal); 477 478 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i64, Legal); 479 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f64, Legal); 480 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal); 481 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f64, Legal); 482 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal); 483 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f64, Legal); 484 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal); 485 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f64, Legal); 486 } 487 488 if (Subtarget.hasVectorEnhancements2()) { 489 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); 490 setOperationAction(ISD::FP_TO_SINT, MVT::v4f32, Legal); 491 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); 492 setOperationAction(ISD::FP_TO_UINT, MVT::v4f32, Legal); 493 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); 494 setOperationAction(ISD::SINT_TO_FP, MVT::v4f32, Legal); 495 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); 496 setOperationAction(ISD::UINT_TO_FP, MVT::v4f32, Legal); 497 498 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal); 499 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4f32, Legal); 500 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal); 501 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4f32, Legal); 502 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal); 503 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4f32, Legal); 504 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal); 505 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4f32, Legal); 506 } 507 508 // Handle floating-point types. 509 for (unsigned I = MVT::FIRST_FP_VALUETYPE; 510 I <= MVT::LAST_FP_VALUETYPE; 511 ++I) { 512 MVT VT = MVT::SimpleValueType(I); 513 if (isTypeLegal(VT)) { 514 // We can use FI for FRINT. 515 setOperationAction(ISD::FRINT, VT, Legal); 516 517 // We can use the extended form of FI for other rounding operations. 518 if (Subtarget.hasFPExtension()) { 519 setOperationAction(ISD::FNEARBYINT, VT, Legal); 520 setOperationAction(ISD::FFLOOR, VT, Legal); 521 setOperationAction(ISD::FCEIL, VT, Legal); 522 setOperationAction(ISD::FTRUNC, VT, Legal); 523 setOperationAction(ISD::FROUND, VT, Legal); 524 } 525 526 // No special instructions for these. 527 setOperationAction(ISD::FSIN, VT, Expand); 528 setOperationAction(ISD::FCOS, VT, Expand); 529 setOperationAction(ISD::FSINCOS, VT, Expand); 530 setOperationAction(ISD::FREM, VT, Expand); 531 setOperationAction(ISD::FPOW, VT, Expand); 532 533 // Special treatment. 534 setOperationAction(ISD::IS_FPCLASS, VT, Custom); 535 536 // Handle constrained floating-point operations. 537 setOperationAction(ISD::STRICT_FADD, VT, Legal); 538 setOperationAction(ISD::STRICT_FSUB, VT, Legal); 539 setOperationAction(ISD::STRICT_FMUL, VT, Legal); 540 setOperationAction(ISD::STRICT_FDIV, VT, Legal); 541 setOperationAction(ISD::STRICT_FMA, VT, Legal); 542 setOperationAction(ISD::STRICT_FSQRT, VT, Legal); 543 setOperationAction(ISD::STRICT_FRINT, VT, Legal); 544 setOperationAction(ISD::STRICT_FP_ROUND, VT, Legal); 545 setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal); 546 if (Subtarget.hasFPExtension()) { 547 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal); 548 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal); 549 setOperationAction(ISD::STRICT_FCEIL, VT, Legal); 550 setOperationAction(ISD::STRICT_FROUND, VT, Legal); 551 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal); 552 } 553 } 554 } 555 556 // Handle floating-point vector types. 557 if (Subtarget.hasVector()) { 558 // Scalar-to-vector conversion is just a subreg. 559 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); 560 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); 561 562 // Some insertions and extractions can be done directly but others 563 // need to go via integers. 564 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 565 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom); 566 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 567 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); 568 569 // These operations have direct equivalents. 570 setOperationAction(ISD::FADD, MVT::v2f64, Legal); 571 setOperationAction(ISD::FNEG, MVT::v2f64, Legal); 572 setOperationAction(ISD::FSUB, MVT::v2f64, Legal); 573 setOperationAction(ISD::FMUL, MVT::v2f64, Legal); 574 setOperationAction(ISD::FMA, MVT::v2f64, Legal); 575 setOperationAction(ISD::FDIV, MVT::v2f64, Legal); 576 setOperationAction(ISD::FABS, MVT::v2f64, Legal); 577 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); 578 setOperationAction(ISD::FRINT, MVT::v2f64, Legal); 579 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); 580 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); 581 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); 582 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); 583 setOperationAction(ISD::FROUND, MVT::v2f64, Legal); 584 585 // Handle constrained floating-point operations. 586 setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal); 587 setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal); 588 setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal); 589 setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal); 590 setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal); 591 setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal); 592 setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal); 593 setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v2f64, Legal); 594 setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal); 595 setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal); 596 setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal); 597 setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal); 598 599 setOperationAction(ISD::SETCC, MVT::v2f64, Custom); 600 setOperationAction(ISD::SETCC, MVT::v4f32, Custom); 601 setOperationAction(ISD::STRICT_FSETCC, MVT::v2f64, Custom); 602 setOperationAction(ISD::STRICT_FSETCC, MVT::v4f32, Custom); 603 if (Subtarget.hasVectorEnhancements1()) { 604 setOperationAction(ISD::STRICT_FSETCCS, MVT::v2f64, Custom); 605 setOperationAction(ISD::STRICT_FSETCCS, MVT::v4f32, Custom); 606 } 607 } 608 609 // The vector enhancements facility 1 has instructions for these. 610 if (Subtarget.hasVectorEnhancements1()) { 611 setOperationAction(ISD::FADD, MVT::v4f32, Legal); 612 setOperationAction(ISD::FNEG, MVT::v4f32, Legal); 613 setOperationAction(ISD::FSUB, MVT::v4f32, Legal); 614 setOperationAction(ISD::FMUL, MVT::v4f32, Legal); 615 setOperationAction(ISD::FMA, MVT::v4f32, Legal); 616 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 617 setOperationAction(ISD::FABS, MVT::v4f32, Legal); 618 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); 619 setOperationAction(ISD::FRINT, MVT::v4f32, Legal); 620 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); 621 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); 622 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); 623 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); 624 setOperationAction(ISD::FROUND, MVT::v4f32, Legal); 625 626 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); 627 setOperationAction(ISD::FMAXIMUM, MVT::f64, Legal); 628 setOperationAction(ISD::FMINNUM, MVT::f64, Legal); 629 setOperationAction(ISD::FMINIMUM, MVT::f64, Legal); 630 631 setOperationAction(ISD::FMAXNUM, MVT::v2f64, Legal); 632 setOperationAction(ISD::FMAXIMUM, MVT::v2f64, Legal); 633 setOperationAction(ISD::FMINNUM, MVT::v2f64, Legal); 634 setOperationAction(ISD::FMINIMUM, MVT::v2f64, Legal); 635 636 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); 637 setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal); 638 setOperationAction(ISD::FMINNUM, MVT::f32, Legal); 639 setOperationAction(ISD::FMINIMUM, MVT::f32, Legal); 640 641 setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal); 642 setOperationAction(ISD::FMAXIMUM, MVT::v4f32, Legal); 643 setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal); 644 setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal); 645 646 setOperationAction(ISD::FMAXNUM, MVT::f128, Legal); 647 setOperationAction(ISD::FMAXIMUM, MVT::f128, Legal); 648 setOperationAction(ISD::FMINNUM, MVT::f128, Legal); 649 setOperationAction(ISD::FMINIMUM, MVT::f128, Legal); 650 651 // Handle constrained floating-point operations. 652 setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal); 653 setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal); 654 setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal); 655 setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal); 656 setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal); 657 setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal); 658 setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal); 659 setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v4f32, Legal); 660 setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal); 661 setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal); 662 setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal); 663 setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal); 664 for (auto VT : { MVT::f32, MVT::f64, MVT::f128, 665 MVT::v4f32, MVT::v2f64 }) { 666 setOperationAction(ISD::STRICT_FMAXNUM, VT, Legal); 667 setOperationAction(ISD::STRICT_FMINNUM, VT, Legal); 668 setOperationAction(ISD::STRICT_FMAXIMUM, VT, Legal); 669 setOperationAction(ISD::STRICT_FMINIMUM, VT, Legal); 670 } 671 } 672 673 // We only have fused f128 multiply-addition on vector registers. 674 if (!Subtarget.hasVectorEnhancements1()) { 675 setOperationAction(ISD::FMA, MVT::f128, Expand); 676 setOperationAction(ISD::STRICT_FMA, MVT::f128, Expand); 677 } 678 679 // We don't have a copysign instruction on vector registers. 680 if (Subtarget.hasVectorEnhancements1()) 681 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand); 682 683 // Needed so that we don't try to implement f128 constant loads using 684 // a load-and-extend of a f80 constant (in cases where the constant 685 // would fit in an f80). 686 for (MVT VT : MVT::fp_valuetypes()) 687 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand); 688 689 // We don't have extending load instruction on vector registers. 690 if (Subtarget.hasVectorEnhancements1()) { 691 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand); 692 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand); 693 } 694 695 // Floating-point truncation and stores need to be done separately. 696 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 697 setTruncStoreAction(MVT::f128, MVT::f32, Expand); 698 setTruncStoreAction(MVT::f128, MVT::f64, Expand); 699 700 // We have 64-bit FPR<->GPR moves, but need special handling for 701 // 32-bit forms. 702 if (!Subtarget.hasVector()) { 703 setOperationAction(ISD::BITCAST, MVT::i32, Custom); 704 setOperationAction(ISD::BITCAST, MVT::f32, Custom); 705 } 706 707 // VASTART and VACOPY need to deal with the SystemZ-specific varargs 708 // structure, but VAEND is a no-op. 709 setOperationAction(ISD::VASTART, MVT::Other, Custom); 710 setOperationAction(ISD::VACOPY, MVT::Other, Custom); 711 setOperationAction(ISD::VAEND, MVT::Other, Expand); 712 713 setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom); 714 715 // Codes for which we want to perform some z-specific combinations. 716 setTargetDAGCombine({ISD::ZERO_EXTEND, 717 ISD::SIGN_EXTEND, 718 ISD::SIGN_EXTEND_INREG, 719 ISD::LOAD, 720 ISD::STORE, 721 ISD::VECTOR_SHUFFLE, 722 ISD::EXTRACT_VECTOR_ELT, 723 ISD::FP_ROUND, 724 ISD::STRICT_FP_ROUND, 725 ISD::FP_EXTEND, 726 ISD::SINT_TO_FP, 727 ISD::UINT_TO_FP, 728 ISD::STRICT_FP_EXTEND, 729 ISD::BSWAP, 730 ISD::SDIV, 731 ISD::UDIV, 732 ISD::SREM, 733 ISD::UREM, 734 ISD::INTRINSIC_VOID, 735 ISD::INTRINSIC_W_CHAIN}); 736 737 // Handle intrinsics. 738 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 739 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 740 741 // We want to use MVC in preference to even a single load/store pair. 742 MaxStoresPerMemcpy = Subtarget.hasVector() ? 2 : 0; 743 MaxStoresPerMemcpyOptSize = 0; 744 745 // The main memset sequence is a byte store followed by an MVC. 746 // Two STC or MV..I stores win over that, but the kind of fused stores 747 // generated by target-independent code don't when the byte value is 748 // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better 749 // than "STC;MVC". Handle the choice in target-specific code instead. 750 MaxStoresPerMemset = Subtarget.hasVector() ? 2 : 0; 751 MaxStoresPerMemsetOptSize = 0; 752 753 // Default to having -disable-strictnode-mutation on 754 IsStrictFPEnabled = true; 755 756 if (Subtarget.isTargetzOS()) { 757 struct RTLibCallMapping { 758 RTLIB::Libcall Code; 759 const char *Name; 760 }; 761 static RTLibCallMapping RTLibCallCommon[] = { 762 #define HANDLE_LIBCALL(code, name) {RTLIB::code, name}, 763 #include "ZOSLibcallNames.def" 764 }; 765 for (auto &E : RTLibCallCommon) 766 setLibcallName(E.Code, E.Name); 767 } 768 } 769 770 bool SystemZTargetLowering::useSoftFloat() const { 771 return Subtarget.hasSoftFloat(); 772 } 773 774 EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL, 775 LLVMContext &, EVT VT) const { 776 if (!VT.isVector()) 777 return MVT::i32; 778 return VT.changeVectorElementTypeToInteger(); 779 } 780 781 bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd( 782 const MachineFunction &MF, EVT VT) const { 783 VT = VT.getScalarType(); 784 785 if (!VT.isSimple()) 786 return false; 787 788 switch (VT.getSimpleVT().SimpleTy) { 789 case MVT::f32: 790 case MVT::f64: 791 return true; 792 case MVT::f128: 793 return Subtarget.hasVectorEnhancements1(); 794 default: 795 break; 796 } 797 798 return false; 799 } 800 801 // Return true if the constant can be generated with a vector instruction, 802 // such as VGM, VGMB or VREPI. 803 bool SystemZVectorConstantInfo::isVectorConstantLegal( 804 const SystemZSubtarget &Subtarget) { 805 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 806 if (!Subtarget.hasVector() || 807 (isFP128 && !Subtarget.hasVectorEnhancements1())) 808 return false; 809 810 // Try using VECTOR GENERATE BYTE MASK. This is the architecturally- 811 // preferred way of creating all-zero and all-one vectors so give it 812 // priority over other methods below. 813 unsigned Mask = 0; 814 unsigned I = 0; 815 for (; I < SystemZ::VectorBytes; ++I) { 816 uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue(); 817 if (Byte == 0xff) 818 Mask |= 1ULL << I; 819 else if (Byte != 0) 820 break; 821 } 822 if (I == SystemZ::VectorBytes) { 823 Opcode = SystemZISD::BYTE_MASK; 824 OpVals.push_back(Mask); 825 VecVT = MVT::getVectorVT(MVT::getIntegerVT(8), 16); 826 return true; 827 } 828 829 if (SplatBitSize > 64) 830 return false; 831 832 auto tryValue = [&](uint64_t Value) -> bool { 833 // Try VECTOR REPLICATE IMMEDIATE 834 int64_t SignedValue = SignExtend64(Value, SplatBitSize); 835 if (isInt<16>(SignedValue)) { 836 OpVals.push_back(((unsigned) SignedValue)); 837 Opcode = SystemZISD::REPLICATE; 838 VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize), 839 SystemZ::VectorBits / SplatBitSize); 840 return true; 841 } 842 // Try VECTOR GENERATE MASK 843 unsigned Start, End; 844 if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) { 845 // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0 846 // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for 847 // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1). 848 OpVals.push_back(Start - (64 - SplatBitSize)); 849 OpVals.push_back(End - (64 - SplatBitSize)); 850 Opcode = SystemZISD::ROTATE_MASK; 851 VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize), 852 SystemZ::VectorBits / SplatBitSize); 853 return true; 854 } 855 return false; 856 }; 857 858 // First try assuming that any undefined bits above the highest set bit 859 // and below the lowest set bit are 1s. This increases the likelihood of 860 // being able to use a sign-extended element value in VECTOR REPLICATE 861 // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK. 862 uint64_t SplatBitsZ = SplatBits.getZExtValue(); 863 uint64_t SplatUndefZ = SplatUndef.getZExtValue(); 864 unsigned LowerBits = llvm::countr_zero(SplatBitsZ); 865 unsigned UpperBits = llvm::countl_zero(SplatBitsZ); 866 uint64_t Lower = SplatUndefZ & maskTrailingOnes<uint64_t>(LowerBits); 867 uint64_t Upper = SplatUndefZ & maskLeadingOnes<uint64_t>(UpperBits); 868 if (tryValue(SplatBitsZ | Upper | Lower)) 869 return true; 870 871 // Now try assuming that any undefined bits between the first and 872 // last defined set bits are set. This increases the chances of 873 // using a non-wraparound mask. 874 uint64_t Middle = SplatUndefZ & ~Upper & ~Lower; 875 return tryValue(SplatBitsZ | Middle); 876 } 877 878 SystemZVectorConstantInfo::SystemZVectorConstantInfo(APInt IntImm) { 879 if (IntImm.isSingleWord()) { 880 IntBits = APInt(128, IntImm.getZExtValue()); 881 IntBits <<= (SystemZ::VectorBits - IntImm.getBitWidth()); 882 } else 883 IntBits = IntImm; 884 assert(IntBits.getBitWidth() == 128 && "Unsupported APInt."); 885 886 // Find the smallest splat. 887 SplatBits = IntImm; 888 unsigned Width = SplatBits.getBitWidth(); 889 while (Width > 8) { 890 unsigned HalfSize = Width / 2; 891 APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize); 892 APInt LowValue = SplatBits.trunc(HalfSize); 893 894 // If the two halves do not match, stop here. 895 if (HighValue != LowValue || 8 > HalfSize) 896 break; 897 898 SplatBits = HighValue; 899 Width = HalfSize; 900 } 901 SplatUndef = 0; 902 SplatBitSize = Width; 903 } 904 905 SystemZVectorConstantInfo::SystemZVectorConstantInfo(BuildVectorSDNode *BVN) { 906 assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR"); 907 bool HasAnyUndefs; 908 909 // Get IntBits by finding the 128 bit splat. 910 BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128, 911 true); 912 913 // Get SplatBits by finding the 8 bit or greater splat. 914 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8, 915 true); 916 } 917 918 bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, 919 bool ForCodeSize) const { 920 // We can load zero using LZ?R and negative zero using LZ?R;LC?BR. 921 if (Imm.isZero() || Imm.isNegZero()) 922 return true; 923 924 return SystemZVectorConstantInfo(Imm).isVectorConstantLegal(Subtarget); 925 } 926 927 /// Returns true if stack probing through inline assembly is requested. 928 bool SystemZTargetLowering::hasInlineStackProbe(const MachineFunction &MF) const { 929 // If the function specifically requests inline stack probes, emit them. 930 if (MF.getFunction().hasFnAttribute("probe-stack")) 931 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() == 932 "inline-asm"; 933 return false; 934 } 935 936 TargetLowering::AtomicExpansionKind 937 SystemZTargetLowering::shouldCastAtomicLoadInIR(LoadInst *LI) const { 938 return AtomicExpansionKind::None; 939 } 940 941 TargetLowering::AtomicExpansionKind 942 SystemZTargetLowering::shouldCastAtomicStoreInIR(StoreInst *SI) const { 943 return AtomicExpansionKind::None; 944 } 945 946 TargetLowering::AtomicExpansionKind 947 SystemZTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { 948 // Don't expand subword operations as they require special treatment. 949 if (RMW->getType()->isIntegerTy(8) || RMW->getType()->isIntegerTy(16)) 950 return AtomicExpansionKind::None; 951 952 // Don't expand if there is a target instruction available. 953 if (Subtarget.hasInterlockedAccess1() && 954 (RMW->getType()->isIntegerTy(32) || RMW->getType()->isIntegerTy(64)) && 955 (RMW->getOperation() == AtomicRMWInst::BinOp::Add || 956 RMW->getOperation() == AtomicRMWInst::BinOp::Sub || 957 RMW->getOperation() == AtomicRMWInst::BinOp::And || 958 RMW->getOperation() == AtomicRMWInst::BinOp::Or || 959 RMW->getOperation() == AtomicRMWInst::BinOp::Xor)) 960 return AtomicExpansionKind::None; 961 962 return AtomicExpansionKind::CmpXChg; 963 } 964 965 bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 966 // We can use CGFI or CLGFI. 967 return isInt<32>(Imm) || isUInt<32>(Imm); 968 } 969 970 bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const { 971 // We can use ALGFI or SLGFI. 972 return isUInt<32>(Imm) || isUInt<32>(-Imm); 973 } 974 975 bool SystemZTargetLowering::allowsMisalignedMemoryAccesses( 976 EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const { 977 // Unaligned accesses should never be slower than the expanded version. 978 // We check specifically for aligned accesses in the few cases where 979 // they are required. 980 if (Fast) 981 *Fast = 1; 982 return true; 983 } 984 985 // Information about the addressing mode for a memory access. 986 struct AddressingMode { 987 // True if a long displacement is supported. 988 bool LongDisplacement; 989 990 // True if use of index register is supported. 991 bool IndexReg; 992 993 AddressingMode(bool LongDispl, bool IdxReg) : 994 LongDisplacement(LongDispl), IndexReg(IdxReg) {} 995 }; 996 997 // Return the desired addressing mode for a Load which has only one use (in 998 // the same block) which is a Store. 999 static AddressingMode getLoadStoreAddrMode(bool HasVector, 1000 Type *Ty) { 1001 // With vector support a Load->Store combination may be combined to either 1002 // an MVC or vector operations and it seems to work best to allow the 1003 // vector addressing mode. 1004 if (HasVector) 1005 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/); 1006 1007 // Otherwise only the MVC case is special. 1008 bool MVC = Ty->isIntegerTy(8); 1009 return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/); 1010 } 1011 1012 // Return the addressing mode which seems most desirable given an LLVM 1013 // Instruction pointer. 1014 static AddressingMode 1015 supportedAddressingMode(Instruction *I, bool HasVector) { 1016 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 1017 switch (II->getIntrinsicID()) { 1018 default: break; 1019 case Intrinsic::memset: 1020 case Intrinsic::memmove: 1021 case Intrinsic::memcpy: 1022 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/); 1023 } 1024 } 1025 1026 if (isa<LoadInst>(I) && I->hasOneUse()) { 1027 auto *SingleUser = cast<Instruction>(*I->user_begin()); 1028 if (SingleUser->getParent() == I->getParent()) { 1029 if (isa<ICmpInst>(SingleUser)) { 1030 if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1))) 1031 if (C->getBitWidth() <= 64 && 1032 (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue()))) 1033 // Comparison of memory with 16 bit signed / unsigned immediate 1034 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/); 1035 } else if (isa<StoreInst>(SingleUser)) 1036 // Load->Store 1037 return getLoadStoreAddrMode(HasVector, I->getType()); 1038 } 1039 } else if (auto *StoreI = dyn_cast<StoreInst>(I)) { 1040 if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand())) 1041 if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent()) 1042 // Load->Store 1043 return getLoadStoreAddrMode(HasVector, LoadI->getType()); 1044 } 1045 1046 if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) { 1047 1048 // * Use LDE instead of LE/LEY for z13 to avoid partial register 1049 // dependencies (LDE only supports small offsets). 1050 // * Utilize the vector registers to hold floating point 1051 // values (vector load / store instructions only support small 1052 // offsets). 1053 1054 Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() : 1055 I->getOperand(0)->getType()); 1056 bool IsFPAccess = MemAccessTy->isFloatingPointTy(); 1057 bool IsVectorAccess = MemAccessTy->isVectorTy(); 1058 1059 // A store of an extracted vector element will be combined into a VSTE type 1060 // instruction. 1061 if (!IsVectorAccess && isa<StoreInst>(I)) { 1062 Value *DataOp = I->getOperand(0); 1063 if (isa<ExtractElementInst>(DataOp)) 1064 IsVectorAccess = true; 1065 } 1066 1067 // A load which gets inserted into a vector element will be combined into a 1068 // VLE type instruction. 1069 if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) { 1070 User *LoadUser = *I->user_begin(); 1071 if (isa<InsertElementInst>(LoadUser)) 1072 IsVectorAccess = true; 1073 } 1074 1075 if (IsFPAccess || IsVectorAccess) 1076 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/); 1077 } 1078 1079 return AddressingMode(true/*LongDispl*/, true/*IdxReg*/); 1080 } 1081 1082 bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL, 1083 const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const { 1084 // Punt on globals for now, although they can be used in limited 1085 // RELATIVE LONG cases. 1086 if (AM.BaseGV) 1087 return false; 1088 1089 // Require a 20-bit signed offset. 1090 if (!isInt<20>(AM.BaseOffs)) 1091 return false; 1092 1093 bool RequireD12 = 1094 Subtarget.hasVector() && (Ty->isVectorTy() || Ty->isIntegerTy(128)); 1095 AddressingMode SupportedAM(!RequireD12, true); 1096 if (I != nullptr) 1097 SupportedAM = supportedAddressingMode(I, Subtarget.hasVector()); 1098 1099 if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs)) 1100 return false; 1101 1102 if (!SupportedAM.IndexReg) 1103 // No indexing allowed. 1104 return AM.Scale == 0; 1105 else 1106 // Indexing is OK but no scale factor can be applied. 1107 return AM.Scale == 0 || AM.Scale == 1; 1108 } 1109 1110 bool SystemZTargetLowering::findOptimalMemOpLowering( 1111 std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, 1112 unsigned SrcAS, const AttributeList &FuncAttributes) const { 1113 const int MVCFastLen = 16; 1114 1115 if (Limit != ~unsigned(0)) { 1116 // Don't expand Op into scalar loads/stores in these cases: 1117 if (Op.isMemcpy() && Op.allowOverlap() && Op.size() <= MVCFastLen) 1118 return false; // Small memcpy: Use MVC 1119 if (Op.isMemset() && Op.size() - 1 <= MVCFastLen) 1120 return false; // Small memset (first byte with STC/MVI): Use MVC 1121 if (Op.isZeroMemset()) 1122 return false; // Memset zero: Use XC 1123 } 1124 1125 return TargetLowering::findOptimalMemOpLowering(MemOps, Limit, Op, DstAS, 1126 SrcAS, FuncAttributes); 1127 } 1128 1129 EVT SystemZTargetLowering::getOptimalMemOpType(const MemOp &Op, 1130 const AttributeList &FuncAttributes) const { 1131 return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other; 1132 } 1133 1134 bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const { 1135 if (!FromType->isIntegerTy() || !ToType->isIntegerTy()) 1136 return false; 1137 unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedValue(); 1138 unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedValue(); 1139 return FromBits > ToBits; 1140 } 1141 1142 bool SystemZTargetLowering::isTruncateFree(EVT FromVT, EVT ToVT) const { 1143 if (!FromVT.isInteger() || !ToVT.isInteger()) 1144 return false; 1145 unsigned FromBits = FromVT.getFixedSizeInBits(); 1146 unsigned ToBits = ToVT.getFixedSizeInBits(); 1147 return FromBits > ToBits; 1148 } 1149 1150 //===----------------------------------------------------------------------===// 1151 // Inline asm support 1152 //===----------------------------------------------------------------------===// 1153 1154 TargetLowering::ConstraintType 1155 SystemZTargetLowering::getConstraintType(StringRef Constraint) const { 1156 if (Constraint.size() == 1) { 1157 switch (Constraint[0]) { 1158 case 'a': // Address register 1159 case 'd': // Data register (equivalent to 'r') 1160 case 'f': // Floating-point register 1161 case 'h': // High-part register 1162 case 'r': // General-purpose register 1163 case 'v': // Vector register 1164 return C_RegisterClass; 1165 1166 case 'Q': // Memory with base and unsigned 12-bit displacement 1167 case 'R': // Likewise, plus an index 1168 case 'S': // Memory with base and signed 20-bit displacement 1169 case 'T': // Likewise, plus an index 1170 case 'm': // Equivalent to 'T'. 1171 return C_Memory; 1172 1173 case 'I': // Unsigned 8-bit constant 1174 case 'J': // Unsigned 12-bit constant 1175 case 'K': // Signed 16-bit constant 1176 case 'L': // Signed 20-bit displacement (on all targets we support) 1177 case 'M': // 0x7fffffff 1178 return C_Immediate; 1179 1180 default: 1181 break; 1182 } 1183 } else if (Constraint.size() == 2 && Constraint[0] == 'Z') { 1184 switch (Constraint[1]) { 1185 case 'Q': // Address with base and unsigned 12-bit displacement 1186 case 'R': // Likewise, plus an index 1187 case 'S': // Address with base and signed 20-bit displacement 1188 case 'T': // Likewise, plus an index 1189 return C_Address; 1190 1191 default: 1192 break; 1193 } 1194 } 1195 return TargetLowering::getConstraintType(Constraint); 1196 } 1197 1198 TargetLowering::ConstraintWeight SystemZTargetLowering:: 1199 getSingleConstraintMatchWeight(AsmOperandInfo &info, 1200 const char *constraint) const { 1201 ConstraintWeight weight = CW_Invalid; 1202 Value *CallOperandVal = info.CallOperandVal; 1203 // If we don't have a value, we can't do a match, 1204 // but allow it at the lowest weight. 1205 if (!CallOperandVal) 1206 return CW_Default; 1207 Type *type = CallOperandVal->getType(); 1208 // Look at the constraint type. 1209 switch (*constraint) { 1210 default: 1211 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); 1212 break; 1213 1214 case 'a': // Address register 1215 case 'd': // Data register (equivalent to 'r') 1216 case 'h': // High-part register 1217 case 'r': // General-purpose register 1218 weight = CallOperandVal->getType()->isIntegerTy() ? CW_Register : CW_Default; 1219 break; 1220 1221 case 'f': // Floating-point register 1222 if (!useSoftFloat()) 1223 weight = type->isFloatingPointTy() ? CW_Register : CW_Default; 1224 break; 1225 1226 case 'v': // Vector register 1227 if (Subtarget.hasVector()) 1228 weight = (type->isVectorTy() || type->isFloatingPointTy()) ? CW_Register 1229 : CW_Default; 1230 break; 1231 1232 case 'I': // Unsigned 8-bit constant 1233 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) 1234 if (isUInt<8>(C->getZExtValue())) 1235 weight = CW_Constant; 1236 break; 1237 1238 case 'J': // Unsigned 12-bit constant 1239 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) 1240 if (isUInt<12>(C->getZExtValue())) 1241 weight = CW_Constant; 1242 break; 1243 1244 case 'K': // Signed 16-bit constant 1245 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) 1246 if (isInt<16>(C->getSExtValue())) 1247 weight = CW_Constant; 1248 break; 1249 1250 case 'L': // Signed 20-bit displacement (on all targets we support) 1251 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) 1252 if (isInt<20>(C->getSExtValue())) 1253 weight = CW_Constant; 1254 break; 1255 1256 case 'M': // 0x7fffffff 1257 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) 1258 if (C->getZExtValue() == 0x7fffffff) 1259 weight = CW_Constant; 1260 break; 1261 } 1262 return weight; 1263 } 1264 1265 // Parse a "{tNNN}" register constraint for which the register type "t" 1266 // has already been verified. MC is the class associated with "t" and 1267 // Map maps 0-based register numbers to LLVM register numbers. 1268 static std::pair<unsigned, const TargetRegisterClass *> 1269 parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC, 1270 const unsigned *Map, unsigned Size) { 1271 assert(*(Constraint.end()-1) == '}' && "Missing '}'"); 1272 if (isdigit(Constraint[2])) { 1273 unsigned Index; 1274 bool Failed = 1275 Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index); 1276 if (!Failed && Index < Size && Map[Index]) 1277 return std::make_pair(Map[Index], RC); 1278 } 1279 return std::make_pair(0U, nullptr); 1280 } 1281 1282 std::pair<unsigned, const TargetRegisterClass *> 1283 SystemZTargetLowering::getRegForInlineAsmConstraint( 1284 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { 1285 if (Constraint.size() == 1) { 1286 // GCC Constraint Letters 1287 switch (Constraint[0]) { 1288 default: break; 1289 case 'd': // Data register (equivalent to 'r') 1290 case 'r': // General-purpose register 1291 if (VT.getSizeInBits() == 64) 1292 return std::make_pair(0U, &SystemZ::GR64BitRegClass); 1293 else if (VT.getSizeInBits() == 128) 1294 return std::make_pair(0U, &SystemZ::GR128BitRegClass); 1295 return std::make_pair(0U, &SystemZ::GR32BitRegClass); 1296 1297 case 'a': // Address register 1298 if (VT == MVT::i64) 1299 return std::make_pair(0U, &SystemZ::ADDR64BitRegClass); 1300 else if (VT == MVT::i128) 1301 return std::make_pair(0U, &SystemZ::ADDR128BitRegClass); 1302 return std::make_pair(0U, &SystemZ::ADDR32BitRegClass); 1303 1304 case 'h': // High-part register (an LLVM extension) 1305 return std::make_pair(0U, &SystemZ::GRH32BitRegClass); 1306 1307 case 'f': // Floating-point register 1308 if (!useSoftFloat()) { 1309 if (VT.getSizeInBits() == 64) 1310 return std::make_pair(0U, &SystemZ::FP64BitRegClass); 1311 else if (VT.getSizeInBits() == 128) 1312 return std::make_pair(0U, &SystemZ::FP128BitRegClass); 1313 return std::make_pair(0U, &SystemZ::FP32BitRegClass); 1314 } 1315 break; 1316 1317 case 'v': // Vector register 1318 if (Subtarget.hasVector()) { 1319 if (VT.getSizeInBits() == 32) 1320 return std::make_pair(0U, &SystemZ::VR32BitRegClass); 1321 if (VT.getSizeInBits() == 64) 1322 return std::make_pair(0U, &SystemZ::VR64BitRegClass); 1323 return std::make_pair(0U, &SystemZ::VR128BitRegClass); 1324 } 1325 break; 1326 } 1327 } 1328 if (Constraint.starts_with("{")) { 1329 1330 // A clobber constraint (e.g. ~{f0}) will have MVT::Other which is illegal 1331 // to check the size on. 1332 auto getVTSizeInBits = [&VT]() { 1333 return VT == MVT::Other ? 0 : VT.getSizeInBits(); 1334 }; 1335 1336 // We need to override the default register parsing for GPRs and FPRs 1337 // because the interpretation depends on VT. The internal names of 1338 // the registers are also different from the external names 1339 // (F0D and F0S instead of F0, etc.). 1340 if (Constraint[1] == 'r') { 1341 if (getVTSizeInBits() == 32) 1342 return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass, 1343 SystemZMC::GR32Regs, 16); 1344 if (getVTSizeInBits() == 128) 1345 return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass, 1346 SystemZMC::GR128Regs, 16); 1347 return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass, 1348 SystemZMC::GR64Regs, 16); 1349 } 1350 if (Constraint[1] == 'f') { 1351 if (useSoftFloat()) 1352 return std::make_pair( 1353 0u, static_cast<const TargetRegisterClass *>(nullptr)); 1354 if (getVTSizeInBits() == 32) 1355 return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass, 1356 SystemZMC::FP32Regs, 16); 1357 if (getVTSizeInBits() == 128) 1358 return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass, 1359 SystemZMC::FP128Regs, 16); 1360 return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass, 1361 SystemZMC::FP64Regs, 16); 1362 } 1363 if (Constraint[1] == 'v') { 1364 if (!Subtarget.hasVector()) 1365 return std::make_pair( 1366 0u, static_cast<const TargetRegisterClass *>(nullptr)); 1367 if (getVTSizeInBits() == 32) 1368 return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass, 1369 SystemZMC::VR32Regs, 32); 1370 if (getVTSizeInBits() == 64) 1371 return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass, 1372 SystemZMC::VR64Regs, 32); 1373 return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass, 1374 SystemZMC::VR128Regs, 32); 1375 } 1376 } 1377 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 1378 } 1379 1380 // FIXME? Maybe this could be a TableGen attribute on some registers and 1381 // this table could be generated automatically from RegInfo. 1382 Register 1383 SystemZTargetLowering::getRegisterByName(const char *RegName, LLT VT, 1384 const MachineFunction &MF) const { 1385 Register Reg = 1386 StringSwitch<Register>(RegName) 1387 .Case("r4", Subtarget.isTargetXPLINK64() ? SystemZ::R4D : 0) 1388 .Case("r15", Subtarget.isTargetELF() ? SystemZ::R15D : 0) 1389 .Default(0); 1390 1391 if (Reg) 1392 return Reg; 1393 report_fatal_error("Invalid register name global variable"); 1394 } 1395 1396 Register SystemZTargetLowering::getExceptionPointerRegister( 1397 const Constant *PersonalityFn) const { 1398 return Subtarget.isTargetXPLINK64() ? SystemZ::R1D : SystemZ::R6D; 1399 } 1400 1401 Register SystemZTargetLowering::getExceptionSelectorRegister( 1402 const Constant *PersonalityFn) const { 1403 return Subtarget.isTargetXPLINK64() ? SystemZ::R2D : SystemZ::R7D; 1404 } 1405 1406 void SystemZTargetLowering::LowerAsmOperandForConstraint( 1407 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops, 1408 SelectionDAG &DAG) const { 1409 // Only support length 1 constraints for now. 1410 if (Constraint.size() == 1) { 1411 switch (Constraint[0]) { 1412 case 'I': // Unsigned 8-bit constant 1413 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 1414 if (isUInt<8>(C->getZExtValue())) 1415 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), 1416 Op.getValueType())); 1417 return; 1418 1419 case 'J': // Unsigned 12-bit constant 1420 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 1421 if (isUInt<12>(C->getZExtValue())) 1422 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), 1423 Op.getValueType())); 1424 return; 1425 1426 case 'K': // Signed 16-bit constant 1427 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 1428 if (isInt<16>(C->getSExtValue())) 1429 Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), 1430 Op.getValueType())); 1431 return; 1432 1433 case 'L': // Signed 20-bit displacement (on all targets we support) 1434 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 1435 if (isInt<20>(C->getSExtValue())) 1436 Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), 1437 Op.getValueType())); 1438 return; 1439 1440 case 'M': // 0x7fffffff 1441 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 1442 if (C->getZExtValue() == 0x7fffffff) 1443 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), 1444 Op.getValueType())); 1445 return; 1446 } 1447 } 1448 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 1449 } 1450 1451 //===----------------------------------------------------------------------===// 1452 // Calling conventions 1453 //===----------------------------------------------------------------------===// 1454 1455 #include "SystemZGenCallingConv.inc" 1456 1457 const MCPhysReg *SystemZTargetLowering::getScratchRegisters( 1458 CallingConv::ID) const { 1459 static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D, 1460 SystemZ::R14D, 0 }; 1461 return ScratchRegs; 1462 } 1463 1464 bool SystemZTargetLowering::allowTruncateForTailCall(Type *FromType, 1465 Type *ToType) const { 1466 return isTruncateFree(FromType, ToType); 1467 } 1468 1469 bool SystemZTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { 1470 return CI->isTailCall(); 1471 } 1472 1473 // Value is a value that has been passed to us in the location described by VA 1474 // (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining 1475 // any loads onto Chain. 1476 static SDValue convertLocVTToValVT(SelectionDAG &DAG, const SDLoc &DL, 1477 CCValAssign &VA, SDValue Chain, 1478 SDValue Value) { 1479 // If the argument has been promoted from a smaller type, insert an 1480 // assertion to capture this. 1481 if (VA.getLocInfo() == CCValAssign::SExt) 1482 Value = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Value, 1483 DAG.getValueType(VA.getValVT())); 1484 else if (VA.getLocInfo() == CCValAssign::ZExt) 1485 Value = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Value, 1486 DAG.getValueType(VA.getValVT())); 1487 1488 if (VA.isExtInLoc()) 1489 Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value); 1490 else if (VA.getLocInfo() == CCValAssign::BCvt) { 1491 // If this is a short vector argument loaded from the stack, 1492 // extend from i64 to full vector size and then bitcast. 1493 assert(VA.getLocVT() == MVT::i64); 1494 assert(VA.getValVT().isVector()); 1495 Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)}); 1496 Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value); 1497 } else 1498 assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo"); 1499 return Value; 1500 } 1501 1502 // Value is a value of type VA.getValVT() that we need to copy into 1503 // the location described by VA. Return a copy of Value converted to 1504 // VA.getValVT(). The caller is responsible for handling indirect values. 1505 static SDValue convertValVTToLocVT(SelectionDAG &DAG, const SDLoc &DL, 1506 CCValAssign &VA, SDValue Value) { 1507 switch (VA.getLocInfo()) { 1508 case CCValAssign::SExt: 1509 return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value); 1510 case CCValAssign::ZExt: 1511 return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value); 1512 case CCValAssign::AExt: 1513 return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value); 1514 case CCValAssign::BCvt: { 1515 assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128); 1516 assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f32 || 1517 VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::f128); 1518 // For an f32 vararg we need to first promote it to an f64 and then 1519 // bitcast it to an i64. 1520 if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i64) 1521 Value = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, Value); 1522 MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64 1523 ? MVT::v2i64 1524 : VA.getLocVT(); 1525 Value = DAG.getNode(ISD::BITCAST, DL, BitCastToType, Value); 1526 // For ELF, this is a short vector argument to be stored to the stack, 1527 // bitcast to v2i64 and then extract first element. 1528 if (BitCastToType == MVT::v2i64) 1529 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value, 1530 DAG.getConstant(0, DL, MVT::i32)); 1531 return Value; 1532 } 1533 case CCValAssign::Full: 1534 return Value; 1535 default: 1536 llvm_unreachable("Unhandled getLocInfo()"); 1537 } 1538 } 1539 1540 static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In) { 1541 SDLoc DL(In); 1542 SDValue Lo, Hi; 1543 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) { 1544 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, In); 1545 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, 1546 DAG.getNode(ISD::SRL, DL, MVT::i128, In, 1547 DAG.getConstant(64, DL, MVT::i32))); 1548 } else { 1549 std::tie(Lo, Hi) = DAG.SplitScalar(In, DL, MVT::i64, MVT::i64); 1550 } 1551 1552 // FIXME: If v2i64 were a legal type, we could use it instead of 1553 // Untyped here. This might enable improved folding. 1554 SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL, 1555 MVT::Untyped, Hi, Lo); 1556 return SDValue(Pair, 0); 1557 } 1558 1559 static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In) { 1560 SDLoc DL(In); 1561 SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64, 1562 DL, MVT::i64, In); 1563 SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64, 1564 DL, MVT::i64, In); 1565 1566 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) { 1567 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Lo); 1568 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Hi); 1569 Hi = DAG.getNode(ISD::SHL, DL, MVT::i128, Hi, 1570 DAG.getConstant(64, DL, MVT::i32)); 1571 return DAG.getNode(ISD::OR, DL, MVT::i128, Lo, Hi); 1572 } else { 1573 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi); 1574 } 1575 } 1576 1577 bool SystemZTargetLowering::splitValueIntoRegisterParts( 1578 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, 1579 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const { 1580 EVT ValueVT = Val.getValueType(); 1581 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) { 1582 // Inline assembly operand. 1583 Parts[0] = lowerI128ToGR128(DAG, DAG.getBitcast(MVT::i128, Val)); 1584 return true; 1585 } 1586 1587 return false; 1588 } 1589 1590 SDValue SystemZTargetLowering::joinRegisterPartsIntoValue( 1591 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, 1592 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const { 1593 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) { 1594 // Inline assembly operand. 1595 SDValue Res = lowerGR128ToI128(DAG, Parts[0]); 1596 return DAG.getBitcast(ValueVT, Res); 1597 } 1598 1599 return SDValue(); 1600 } 1601 1602 SDValue SystemZTargetLowering::LowerFormalArguments( 1603 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 1604 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 1605 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 1606 MachineFunction &MF = DAG.getMachineFunction(); 1607 MachineFrameInfo &MFI = MF.getFrameInfo(); 1608 MachineRegisterInfo &MRI = MF.getRegInfo(); 1609 SystemZMachineFunctionInfo *FuncInfo = 1610 MF.getInfo<SystemZMachineFunctionInfo>(); 1611 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>(); 1612 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 1613 1614 // Assign locations to all of the incoming arguments. 1615 SmallVector<CCValAssign, 16> ArgLocs; 1616 SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 1617 CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ); 1618 FuncInfo->setSizeOfFnParams(CCInfo.getStackSize()); 1619 1620 unsigned NumFixedGPRs = 0; 1621 unsigned NumFixedFPRs = 0; 1622 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { 1623 SDValue ArgValue; 1624 CCValAssign &VA = ArgLocs[I]; 1625 EVT LocVT = VA.getLocVT(); 1626 if (VA.isRegLoc()) { 1627 // Arguments passed in registers 1628 const TargetRegisterClass *RC; 1629 switch (LocVT.getSimpleVT().SimpleTy) { 1630 default: 1631 // Integers smaller than i64 should be promoted to i64. 1632 llvm_unreachable("Unexpected argument type"); 1633 case MVT::i32: 1634 NumFixedGPRs += 1; 1635 RC = &SystemZ::GR32BitRegClass; 1636 break; 1637 case MVT::i64: 1638 NumFixedGPRs += 1; 1639 RC = &SystemZ::GR64BitRegClass; 1640 break; 1641 case MVT::f32: 1642 NumFixedFPRs += 1; 1643 RC = &SystemZ::FP32BitRegClass; 1644 break; 1645 case MVT::f64: 1646 NumFixedFPRs += 1; 1647 RC = &SystemZ::FP64BitRegClass; 1648 break; 1649 case MVT::f128: 1650 NumFixedFPRs += 2; 1651 RC = &SystemZ::FP128BitRegClass; 1652 break; 1653 case MVT::v16i8: 1654 case MVT::v8i16: 1655 case MVT::v4i32: 1656 case MVT::v2i64: 1657 case MVT::v4f32: 1658 case MVT::v2f64: 1659 RC = &SystemZ::VR128BitRegClass; 1660 break; 1661 } 1662 1663 Register VReg = MRI.createVirtualRegister(RC); 1664 MRI.addLiveIn(VA.getLocReg(), VReg); 1665 ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); 1666 } else { 1667 assert(VA.isMemLoc() && "Argument not register or memory"); 1668 1669 // Create the frame index object for this incoming parameter. 1670 // FIXME: Pre-include call frame size in the offset, should not 1671 // need to manually add it here. 1672 int64_t ArgSPOffset = VA.getLocMemOffset(); 1673 if (Subtarget.isTargetXPLINK64()) { 1674 auto &XPRegs = 1675 Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>(); 1676 ArgSPOffset += XPRegs.getCallFrameSize(); 1677 } 1678 int FI = 1679 MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, ArgSPOffset, true); 1680 1681 // Create the SelectionDAG nodes corresponding to a load 1682 // from this parameter. Unpromoted ints and floats are 1683 // passed as right-justified 8-byte values. 1684 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 1685 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32) 1686 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, 1687 DAG.getIntPtrConstant(4, DL)); 1688 ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN, 1689 MachinePointerInfo::getFixedStack(MF, FI)); 1690 } 1691 1692 // Convert the value of the argument register into the value that's 1693 // being passed. 1694 if (VA.getLocInfo() == CCValAssign::Indirect) { 1695 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, 1696 MachinePointerInfo())); 1697 // If the original argument was split (e.g. i128), we need 1698 // to load all parts of it here (using the same address). 1699 unsigned ArgIndex = Ins[I].OrigArgIndex; 1700 assert (Ins[I].PartOffset == 0); 1701 while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) { 1702 CCValAssign &PartVA = ArgLocs[I + 1]; 1703 unsigned PartOffset = Ins[I + 1].PartOffset; 1704 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, 1705 DAG.getIntPtrConstant(PartOffset, DL)); 1706 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, 1707 MachinePointerInfo())); 1708 ++I; 1709 } 1710 } else 1711 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue)); 1712 } 1713 1714 if (IsVarArg && Subtarget.isTargetXPLINK64()) { 1715 // Save the number of non-varargs registers for later use by va_start, etc. 1716 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs); 1717 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs); 1718 1719 auto *Regs = static_cast<SystemZXPLINK64Registers *>( 1720 Subtarget.getSpecialRegisters()); 1721 1722 // Likewise the address (in the form of a frame index) of where the 1723 // first stack vararg would be. The 1-byte size here is arbitrary. 1724 // FIXME: Pre-include call frame size in the offset, should not 1725 // need to manually add it here. 1726 int64_t VarArgOffset = CCInfo.getStackSize() + Regs->getCallFrameSize(); 1727 int FI = MFI.CreateFixedObject(1, VarArgOffset, true); 1728 FuncInfo->setVarArgsFrameIndex(FI); 1729 } 1730 1731 if (IsVarArg && Subtarget.isTargetELF()) { 1732 // Save the number of non-varargs registers for later use by va_start, etc. 1733 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs); 1734 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs); 1735 1736 // Likewise the address (in the form of a frame index) of where the 1737 // first stack vararg would be. The 1-byte size here is arbitrary. 1738 int64_t VarArgsOffset = CCInfo.getStackSize(); 1739 FuncInfo->setVarArgsFrameIndex( 1740 MFI.CreateFixedObject(1, VarArgsOffset, true)); 1741 1742 // ...and a similar frame index for the caller-allocated save area 1743 // that will be used to store the incoming registers. 1744 int64_t RegSaveOffset = 1745 -SystemZMC::ELFCallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16; 1746 unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true); 1747 FuncInfo->setRegSaveFrameIndex(RegSaveIndex); 1748 1749 // Store the FPR varargs in the reserved frame slots. (We store the 1750 // GPRs as part of the prologue.) 1751 if (NumFixedFPRs < SystemZ::ELFNumArgFPRs && !useSoftFloat()) { 1752 SDValue MemOps[SystemZ::ELFNumArgFPRs]; 1753 for (unsigned I = NumFixedFPRs; I < SystemZ::ELFNumArgFPRs; ++I) { 1754 unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ELFArgFPRs[I]); 1755 int FI = 1756 MFI.CreateFixedObject(8, -SystemZMC::ELFCallFrameSize + Offset, true); 1757 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 1758 Register VReg = MF.addLiveIn(SystemZ::ELFArgFPRs[I], 1759 &SystemZ::FP64BitRegClass); 1760 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64); 1761 MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN, 1762 MachinePointerInfo::getFixedStack(MF, FI)); 1763 } 1764 // Join the stores, which are independent of one another. 1765 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, 1766 ArrayRef(&MemOps[NumFixedFPRs], 1767 SystemZ::ELFNumArgFPRs - NumFixedFPRs)); 1768 } 1769 } 1770 1771 if (Subtarget.isTargetXPLINK64()) { 1772 // Create virual register for handling incoming "ADA" special register (R5) 1773 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass; 1774 Register ADAvReg = MRI.createVirtualRegister(RC); 1775 auto *Regs = static_cast<SystemZXPLINK64Registers *>( 1776 Subtarget.getSpecialRegisters()); 1777 MRI.addLiveIn(Regs->getADARegister(), ADAvReg); 1778 FuncInfo->setADAVirtualRegister(ADAvReg); 1779 } 1780 return Chain; 1781 } 1782 1783 static bool canUseSiblingCall(const CCState &ArgCCInfo, 1784 SmallVectorImpl<CCValAssign> &ArgLocs, 1785 SmallVectorImpl<ISD::OutputArg> &Outs) { 1786 // Punt if there are any indirect or stack arguments, or if the call 1787 // needs the callee-saved argument register R6, or if the call uses 1788 // the callee-saved register arguments SwiftSelf and SwiftError. 1789 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { 1790 CCValAssign &VA = ArgLocs[I]; 1791 if (VA.getLocInfo() == CCValAssign::Indirect) 1792 return false; 1793 if (!VA.isRegLoc()) 1794 return false; 1795 Register Reg = VA.getLocReg(); 1796 if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D) 1797 return false; 1798 if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError()) 1799 return false; 1800 } 1801 return true; 1802 } 1803 1804 static SDValue getADAEntry(SelectionDAG &DAG, SDValue Val, SDLoc DL, 1805 unsigned Offset, bool LoadAdr = false) { 1806 MachineFunction &MF = DAG.getMachineFunction(); 1807 SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>(); 1808 unsigned ADAvReg = MFI->getADAVirtualRegister(); 1809 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); 1810 1811 SDValue Reg = DAG.getRegister(ADAvReg, PtrVT); 1812 SDValue Ofs = DAG.getTargetConstant(Offset, DL, PtrVT); 1813 1814 SDValue Result = DAG.getNode(SystemZISD::ADA_ENTRY, DL, PtrVT, Val, Reg, Ofs); 1815 if (!LoadAdr) 1816 Result = DAG.getLoad( 1817 PtrVT, DL, DAG.getEntryNode(), Result, MachinePointerInfo(), Align(8), 1818 MachineMemOperand::MODereferenceable | MachineMemOperand::MOInvariant); 1819 1820 return Result; 1821 } 1822 1823 // ADA access using Global value 1824 // Note: for functions, address of descriptor is returned 1825 static SDValue getADAEntry(SelectionDAG &DAG, const GlobalValue *GV, SDLoc DL, 1826 EVT PtrVT) { 1827 unsigned ADAtype; 1828 bool LoadAddr = false; 1829 const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV); 1830 bool IsFunction = 1831 (isa<Function>(GV)) || (GA && isa<Function>(GA->getAliaseeObject())); 1832 bool IsInternal = (GV->hasInternalLinkage() || GV->hasPrivateLinkage()); 1833 1834 if (IsFunction) { 1835 if (IsInternal) { 1836 ADAtype = SystemZII::MO_ADA_DIRECT_FUNC_DESC; 1837 LoadAddr = true; 1838 } else 1839 ADAtype = SystemZII::MO_ADA_INDIRECT_FUNC_DESC; 1840 } else { 1841 ADAtype = SystemZII::MO_ADA_DATA_SYMBOL_ADDR; 1842 } 1843 SDValue Val = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ADAtype); 1844 1845 return getADAEntry(DAG, Val, DL, 0, LoadAddr); 1846 } 1847 1848 static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA, 1849 SDLoc &DL, SDValue &Chain) { 1850 unsigned ADADelta = 0; // ADA offset in desc. 1851 unsigned EPADelta = 8; // EPA offset in desc. 1852 MachineFunction &MF = DAG.getMachineFunction(); 1853 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); 1854 1855 // XPLink calling convention. 1856 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1857 bool IsInternal = (G->getGlobal()->hasInternalLinkage() || 1858 G->getGlobal()->hasPrivateLinkage()); 1859 if (IsInternal) { 1860 SystemZMachineFunctionInfo *MFI = 1861 MF.getInfo<SystemZMachineFunctionInfo>(); 1862 unsigned ADAvReg = MFI->getADAVirtualRegister(); 1863 ADA = DAG.getCopyFromReg(Chain, DL, ADAvReg, PtrVT); 1864 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT); 1865 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee); 1866 return true; 1867 } else { 1868 SDValue GA = DAG.getTargetGlobalAddress( 1869 G->getGlobal(), DL, PtrVT, 0, SystemZII::MO_ADA_DIRECT_FUNC_DESC); 1870 ADA = getADAEntry(DAG, GA, DL, ADADelta); 1871 Callee = getADAEntry(DAG, GA, DL, EPADelta); 1872 } 1873 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) { 1874 SDValue ES = DAG.getTargetExternalSymbol( 1875 E->getSymbol(), PtrVT, SystemZII::MO_ADA_DIRECT_FUNC_DESC); 1876 ADA = getADAEntry(DAG, ES, DL, ADADelta); 1877 Callee = getADAEntry(DAG, ES, DL, EPADelta); 1878 } else { 1879 // Function pointer case 1880 ADA = DAG.getNode(ISD::ADD, DL, PtrVT, Callee, 1881 DAG.getConstant(ADADelta, DL, PtrVT)); 1882 ADA = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), ADA, 1883 MachinePointerInfo::getGOT(DAG.getMachineFunction())); 1884 Callee = DAG.getNode(ISD::ADD, DL, PtrVT, Callee, 1885 DAG.getConstant(EPADelta, DL, PtrVT)); 1886 Callee = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Callee, 1887 MachinePointerInfo::getGOT(DAG.getMachineFunction())); 1888 } 1889 return false; 1890 } 1891 1892 SDValue 1893 SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, 1894 SmallVectorImpl<SDValue> &InVals) const { 1895 SelectionDAG &DAG = CLI.DAG; 1896 SDLoc &DL = CLI.DL; 1897 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 1898 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 1899 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 1900 SDValue Chain = CLI.Chain; 1901 SDValue Callee = CLI.Callee; 1902 bool &IsTailCall = CLI.IsTailCall; 1903 CallingConv::ID CallConv = CLI.CallConv; 1904 bool IsVarArg = CLI.IsVarArg; 1905 MachineFunction &MF = DAG.getMachineFunction(); 1906 EVT PtrVT = getPointerTy(MF.getDataLayout()); 1907 LLVMContext &Ctx = *DAG.getContext(); 1908 SystemZCallingConventionRegisters *Regs = Subtarget.getSpecialRegisters(); 1909 1910 // FIXME: z/OS support to be added in later. 1911 if (Subtarget.isTargetXPLINK64()) 1912 IsTailCall = false; 1913 1914 // Analyze the operands of the call, assigning locations to each operand. 1915 SmallVector<CCValAssign, 16> ArgLocs; 1916 SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx); 1917 ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ); 1918 1919 // We don't support GuaranteedTailCallOpt, only automatically-detected 1920 // sibling calls. 1921 if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs)) 1922 IsTailCall = false; 1923 1924 // Get a count of how many bytes are to be pushed on the stack. 1925 unsigned NumBytes = ArgCCInfo.getStackSize(); 1926 1927 // Mark the start of the call. 1928 if (!IsTailCall) 1929 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL); 1930 1931 // Copy argument values to their designated locations. 1932 SmallVector<std::pair<unsigned, SDValue>, 9> RegsToPass; 1933 SmallVector<SDValue, 8> MemOpChains; 1934 SDValue StackPtr; 1935 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { 1936 CCValAssign &VA = ArgLocs[I]; 1937 SDValue ArgValue = OutVals[I]; 1938 1939 if (VA.getLocInfo() == CCValAssign::Indirect) { 1940 // Store the argument in a stack slot and pass its address. 1941 unsigned ArgIndex = Outs[I].OrigArgIndex; 1942 EVT SlotVT; 1943 if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) { 1944 // Allocate the full stack space for a promoted (and split) argument. 1945 Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty; 1946 EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType); 1947 MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT); 1948 unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT); 1949 SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N); 1950 } else { 1951 SlotVT = Outs[I].VT; 1952 } 1953 SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT); 1954 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); 1955 MemOpChains.push_back( 1956 DAG.getStore(Chain, DL, ArgValue, SpillSlot, 1957 MachinePointerInfo::getFixedStack(MF, FI))); 1958 // If the original argument was split (e.g. i128), we need 1959 // to store all parts of it here (and pass just one address). 1960 assert (Outs[I].PartOffset == 0); 1961 while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) { 1962 SDValue PartValue = OutVals[I + 1]; 1963 unsigned PartOffset = Outs[I + 1].PartOffset; 1964 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, 1965 DAG.getIntPtrConstant(PartOffset, DL)); 1966 MemOpChains.push_back( 1967 DAG.getStore(Chain, DL, PartValue, Address, 1968 MachinePointerInfo::getFixedStack(MF, FI))); 1969 assert((PartOffset + PartValue.getValueType().getStoreSize() <= 1970 SlotVT.getStoreSize()) && "Not enough space for argument part!"); 1971 ++I; 1972 } 1973 ArgValue = SpillSlot; 1974 } else 1975 ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue); 1976 1977 if (VA.isRegLoc()) { 1978 // In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a 1979 // MVT::i128 type. We decompose the 128-bit type to a pair of its high 1980 // and low values. 1981 if (VA.getLocVT() == MVT::i128) 1982 ArgValue = lowerI128ToGR128(DAG, ArgValue); 1983 // Queue up the argument copies and emit them at the end. 1984 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); 1985 } else { 1986 assert(VA.isMemLoc() && "Argument not register or memory"); 1987 1988 // Work out the address of the stack slot. Unpromoted ints and 1989 // floats are passed as right-justified 8-byte values. 1990 if (!StackPtr.getNode()) 1991 StackPtr = DAG.getCopyFromReg(Chain, DL, 1992 Regs->getStackPointerRegister(), PtrVT); 1993 unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() + 1994 VA.getLocMemOffset(); 1995 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32) 1996 Offset += 4; 1997 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, 1998 DAG.getIntPtrConstant(Offset, DL)); 1999 2000 // Emit the store. 2001 MemOpChains.push_back( 2002 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); 2003 2004 // Although long doubles or vectors are passed through the stack when 2005 // they are vararg (non-fixed arguments), if a long double or vector 2006 // occupies the third and fourth slot of the argument list GPR3 should 2007 // still shadow the third slot of the argument list. 2008 if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) { 2009 SDValue ShadowArgValue = 2010 DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, ArgValue, 2011 DAG.getIntPtrConstant(1, DL)); 2012 RegsToPass.push_back(std::make_pair(SystemZ::R3D, ShadowArgValue)); 2013 } 2014 } 2015 } 2016 2017 // Join the stores, which are independent of one another. 2018 if (!MemOpChains.empty()) 2019 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); 2020 2021 // Accept direct calls by converting symbolic call addresses to the 2022 // associated Target* opcodes. Force %r1 to be used for indirect 2023 // tail calls. 2024 SDValue Glue; 2025 2026 if (Subtarget.isTargetXPLINK64()) { 2027 SDValue ADA; 2028 bool IsBRASL = getzOSCalleeAndADA(DAG, Callee, ADA, DL, Chain); 2029 if (!IsBRASL) { 2030 unsigned CalleeReg = static_cast<SystemZXPLINK64Registers *>(Regs) 2031 ->getAddressOfCalleeRegister(); 2032 Chain = DAG.getCopyToReg(Chain, DL, CalleeReg, Callee, Glue); 2033 Glue = Chain.getValue(1); 2034 Callee = DAG.getRegister(CalleeReg, Callee.getValueType()); 2035 } 2036 RegsToPass.push_back(std::make_pair( 2037 static_cast<SystemZXPLINK64Registers *>(Regs)->getADARegister(), ADA)); 2038 } else { 2039 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 2040 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT); 2041 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee); 2042 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) { 2043 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT); 2044 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee); 2045 } else if (IsTailCall) { 2046 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue); 2047 Glue = Chain.getValue(1); 2048 Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType()); 2049 } 2050 } 2051 2052 // Build a sequence of copy-to-reg nodes, chained and glued together. 2053 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) { 2054 Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first, 2055 RegsToPass[I].second, Glue); 2056 Glue = Chain.getValue(1); 2057 } 2058 2059 // The first call operand is the chain and the second is the target address. 2060 SmallVector<SDValue, 8> Ops; 2061 Ops.push_back(Chain); 2062 Ops.push_back(Callee); 2063 2064 // Add argument registers to the end of the list so that they are 2065 // known live into the call. 2066 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) 2067 Ops.push_back(DAG.getRegister(RegsToPass[I].first, 2068 RegsToPass[I].second.getValueType())); 2069 2070 // Add a register mask operand representing the call-preserved registers. 2071 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 2072 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); 2073 assert(Mask && "Missing call preserved mask for calling convention"); 2074 Ops.push_back(DAG.getRegisterMask(Mask)); 2075 2076 // Glue the call to the argument copies, if any. 2077 if (Glue.getNode()) 2078 Ops.push_back(Glue); 2079 2080 // Emit the call. 2081 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 2082 if (IsTailCall) { 2083 SDValue Ret = DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops); 2084 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge); 2085 return Ret; 2086 } 2087 Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops); 2088 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); 2089 Glue = Chain.getValue(1); 2090 2091 // Mark the end of the call, which is glued to the call itself. 2092 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL); 2093 Glue = Chain.getValue(1); 2094 2095 // Assign locations to each value returned by this call. 2096 SmallVector<CCValAssign, 16> RetLocs; 2097 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx); 2098 RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ); 2099 2100 // Copy all of the result registers out of their specified physreg. 2101 for (CCValAssign &VA : RetLocs) { 2102 // Copy the value out, gluing the copy to the end of the call sequence. 2103 SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), 2104 VA.getLocVT(), Glue); 2105 Chain = RetValue.getValue(1); 2106 Glue = RetValue.getValue(2); 2107 2108 // Convert the value of the return register into the value that's 2109 // being returned. 2110 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue)); 2111 } 2112 2113 return Chain; 2114 } 2115 2116 // Generate a call taking the given operands as arguments and returning a 2117 // result of type RetVT. 2118 std::pair<SDValue, SDValue> SystemZTargetLowering::makeExternalCall( 2119 SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT, 2120 ArrayRef<SDValue> Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL, 2121 bool DoesNotReturn, bool IsReturnValueUsed) const { 2122 TargetLowering::ArgListTy Args; 2123 Args.reserve(Ops.size()); 2124 2125 TargetLowering::ArgListEntry Entry; 2126 for (SDValue Op : Ops) { 2127 Entry.Node = Op; 2128 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); 2129 Entry.IsSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned); 2130 Entry.IsZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned); 2131 Args.push_back(Entry); 2132 } 2133 2134 SDValue Callee = 2135 DAG.getExternalSymbol(CalleeName, getPointerTy(DAG.getDataLayout())); 2136 2137 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); 2138 TargetLowering::CallLoweringInfo CLI(DAG); 2139 bool SignExtend = shouldSignExtendTypeInLibCall(RetVT, IsSigned); 2140 CLI.setDebugLoc(DL) 2141 .setChain(Chain) 2142 .setCallee(CallConv, RetTy, Callee, std::move(Args)) 2143 .setNoReturn(DoesNotReturn) 2144 .setDiscardResult(!IsReturnValueUsed) 2145 .setSExtResult(SignExtend) 2146 .setZExtResult(!SignExtend); 2147 return LowerCallTo(CLI); 2148 } 2149 2150 bool SystemZTargetLowering:: 2151 CanLowerReturn(CallingConv::ID CallConv, 2152 MachineFunction &MF, bool isVarArg, 2153 const SmallVectorImpl<ISD::OutputArg> &Outs, 2154 LLVMContext &Context) const { 2155 // Special case that we cannot easily detect in RetCC_SystemZ since 2156 // i128 may not be a legal type. 2157 for (auto &Out : Outs) 2158 if (Out.ArgVT == MVT::i128) 2159 return false; 2160 2161 SmallVector<CCValAssign, 16> RetLocs; 2162 CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context); 2163 return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ); 2164 } 2165 2166 SDValue 2167 SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 2168 bool IsVarArg, 2169 const SmallVectorImpl<ISD::OutputArg> &Outs, 2170 const SmallVectorImpl<SDValue> &OutVals, 2171 const SDLoc &DL, SelectionDAG &DAG) const { 2172 MachineFunction &MF = DAG.getMachineFunction(); 2173 2174 // Assign locations to each returned value. 2175 SmallVector<CCValAssign, 16> RetLocs; 2176 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext()); 2177 RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ); 2178 2179 // Quick exit for void returns 2180 if (RetLocs.empty()) 2181 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, Chain); 2182 2183 if (CallConv == CallingConv::GHC) 2184 report_fatal_error("GHC functions return void only"); 2185 2186 // Copy the result values into the output registers. 2187 SDValue Glue; 2188 SmallVector<SDValue, 4> RetOps; 2189 RetOps.push_back(Chain); 2190 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) { 2191 CCValAssign &VA = RetLocs[I]; 2192 SDValue RetValue = OutVals[I]; 2193 2194 // Make the return register live on exit. 2195 assert(VA.isRegLoc() && "Can only return in registers!"); 2196 2197 // Promote the value as required. 2198 RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue); 2199 2200 // Chain and glue the copies together. 2201 Register Reg = VA.getLocReg(); 2202 Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue); 2203 Glue = Chain.getValue(1); 2204 RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT())); 2205 } 2206 2207 // Update chain and glue. 2208 RetOps[0] = Chain; 2209 if (Glue.getNode()) 2210 RetOps.push_back(Glue); 2211 2212 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, RetOps); 2213 } 2214 2215 // Return true if Op is an intrinsic node with chain that returns the CC value 2216 // as its only (other) argument. Provide the associated SystemZISD opcode and 2217 // the mask of valid CC values if so. 2218 static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, 2219 unsigned &CCValid) { 2220 unsigned Id = Op.getConstantOperandVal(1); 2221 switch (Id) { 2222 case Intrinsic::s390_tbegin: 2223 Opcode = SystemZISD::TBEGIN; 2224 CCValid = SystemZ::CCMASK_TBEGIN; 2225 return true; 2226 2227 case Intrinsic::s390_tbegin_nofloat: 2228 Opcode = SystemZISD::TBEGIN_NOFLOAT; 2229 CCValid = SystemZ::CCMASK_TBEGIN; 2230 return true; 2231 2232 case Intrinsic::s390_tend: 2233 Opcode = SystemZISD::TEND; 2234 CCValid = SystemZ::CCMASK_TEND; 2235 return true; 2236 2237 default: 2238 return false; 2239 } 2240 } 2241 2242 // Return true if Op is an intrinsic node without chain that returns the 2243 // CC value as its final argument. Provide the associated SystemZISD 2244 // opcode and the mask of valid CC values if so. 2245 static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) { 2246 unsigned Id = Op.getConstantOperandVal(0); 2247 switch (Id) { 2248 case Intrinsic::s390_vpkshs: 2249 case Intrinsic::s390_vpksfs: 2250 case Intrinsic::s390_vpksgs: 2251 Opcode = SystemZISD::PACKS_CC; 2252 CCValid = SystemZ::CCMASK_VCMP; 2253 return true; 2254 2255 case Intrinsic::s390_vpklshs: 2256 case Intrinsic::s390_vpklsfs: 2257 case Intrinsic::s390_vpklsgs: 2258 Opcode = SystemZISD::PACKLS_CC; 2259 CCValid = SystemZ::CCMASK_VCMP; 2260 return true; 2261 2262 case Intrinsic::s390_vceqbs: 2263 case Intrinsic::s390_vceqhs: 2264 case Intrinsic::s390_vceqfs: 2265 case Intrinsic::s390_vceqgs: 2266 Opcode = SystemZISD::VICMPES; 2267 CCValid = SystemZ::CCMASK_VCMP; 2268 return true; 2269 2270 case Intrinsic::s390_vchbs: 2271 case Intrinsic::s390_vchhs: 2272 case Intrinsic::s390_vchfs: 2273 case Intrinsic::s390_vchgs: 2274 Opcode = SystemZISD::VICMPHS; 2275 CCValid = SystemZ::CCMASK_VCMP; 2276 return true; 2277 2278 case Intrinsic::s390_vchlbs: 2279 case Intrinsic::s390_vchlhs: 2280 case Intrinsic::s390_vchlfs: 2281 case Intrinsic::s390_vchlgs: 2282 Opcode = SystemZISD::VICMPHLS; 2283 CCValid = SystemZ::CCMASK_VCMP; 2284 return true; 2285 2286 case Intrinsic::s390_vtm: 2287 Opcode = SystemZISD::VTM; 2288 CCValid = SystemZ::CCMASK_VCMP; 2289 return true; 2290 2291 case Intrinsic::s390_vfaebs: 2292 case Intrinsic::s390_vfaehs: 2293 case Intrinsic::s390_vfaefs: 2294 Opcode = SystemZISD::VFAE_CC; 2295 CCValid = SystemZ::CCMASK_ANY; 2296 return true; 2297 2298 case Intrinsic::s390_vfaezbs: 2299 case Intrinsic::s390_vfaezhs: 2300 case Intrinsic::s390_vfaezfs: 2301 Opcode = SystemZISD::VFAEZ_CC; 2302 CCValid = SystemZ::CCMASK_ANY; 2303 return true; 2304 2305 case Intrinsic::s390_vfeebs: 2306 case Intrinsic::s390_vfeehs: 2307 case Intrinsic::s390_vfeefs: 2308 Opcode = SystemZISD::VFEE_CC; 2309 CCValid = SystemZ::CCMASK_ANY; 2310 return true; 2311 2312 case Intrinsic::s390_vfeezbs: 2313 case Intrinsic::s390_vfeezhs: 2314 case Intrinsic::s390_vfeezfs: 2315 Opcode = SystemZISD::VFEEZ_CC; 2316 CCValid = SystemZ::CCMASK_ANY; 2317 return true; 2318 2319 case Intrinsic::s390_vfenebs: 2320 case Intrinsic::s390_vfenehs: 2321 case Intrinsic::s390_vfenefs: 2322 Opcode = SystemZISD::VFENE_CC; 2323 CCValid = SystemZ::CCMASK_ANY; 2324 return true; 2325 2326 case Intrinsic::s390_vfenezbs: 2327 case Intrinsic::s390_vfenezhs: 2328 case Intrinsic::s390_vfenezfs: 2329 Opcode = SystemZISD::VFENEZ_CC; 2330 CCValid = SystemZ::CCMASK_ANY; 2331 return true; 2332 2333 case Intrinsic::s390_vistrbs: 2334 case Intrinsic::s390_vistrhs: 2335 case Intrinsic::s390_vistrfs: 2336 Opcode = SystemZISD::VISTR_CC; 2337 CCValid = SystemZ::CCMASK_0 | SystemZ::CCMASK_3; 2338 return true; 2339 2340 case Intrinsic::s390_vstrcbs: 2341 case Intrinsic::s390_vstrchs: 2342 case Intrinsic::s390_vstrcfs: 2343 Opcode = SystemZISD::VSTRC_CC; 2344 CCValid = SystemZ::CCMASK_ANY; 2345 return true; 2346 2347 case Intrinsic::s390_vstrczbs: 2348 case Intrinsic::s390_vstrczhs: 2349 case Intrinsic::s390_vstrczfs: 2350 Opcode = SystemZISD::VSTRCZ_CC; 2351 CCValid = SystemZ::CCMASK_ANY; 2352 return true; 2353 2354 case Intrinsic::s390_vstrsb: 2355 case Intrinsic::s390_vstrsh: 2356 case Intrinsic::s390_vstrsf: 2357 Opcode = SystemZISD::VSTRS_CC; 2358 CCValid = SystemZ::CCMASK_ANY; 2359 return true; 2360 2361 case Intrinsic::s390_vstrszb: 2362 case Intrinsic::s390_vstrszh: 2363 case Intrinsic::s390_vstrszf: 2364 Opcode = SystemZISD::VSTRSZ_CC; 2365 CCValid = SystemZ::CCMASK_ANY; 2366 return true; 2367 2368 case Intrinsic::s390_vfcedbs: 2369 case Intrinsic::s390_vfcesbs: 2370 Opcode = SystemZISD::VFCMPES; 2371 CCValid = SystemZ::CCMASK_VCMP; 2372 return true; 2373 2374 case Intrinsic::s390_vfchdbs: 2375 case Intrinsic::s390_vfchsbs: 2376 Opcode = SystemZISD::VFCMPHS; 2377 CCValid = SystemZ::CCMASK_VCMP; 2378 return true; 2379 2380 case Intrinsic::s390_vfchedbs: 2381 case Intrinsic::s390_vfchesbs: 2382 Opcode = SystemZISD::VFCMPHES; 2383 CCValid = SystemZ::CCMASK_VCMP; 2384 return true; 2385 2386 case Intrinsic::s390_vftcidb: 2387 case Intrinsic::s390_vftcisb: 2388 Opcode = SystemZISD::VFTCI; 2389 CCValid = SystemZ::CCMASK_VCMP; 2390 return true; 2391 2392 case Intrinsic::s390_tdc: 2393 Opcode = SystemZISD::TDC; 2394 CCValid = SystemZ::CCMASK_TDC; 2395 return true; 2396 2397 default: 2398 return false; 2399 } 2400 } 2401 2402 // Emit an intrinsic with chain and an explicit CC register result. 2403 static SDNode *emitIntrinsicWithCCAndChain(SelectionDAG &DAG, SDValue Op, 2404 unsigned Opcode) { 2405 // Copy all operands except the intrinsic ID. 2406 unsigned NumOps = Op.getNumOperands(); 2407 SmallVector<SDValue, 6> Ops; 2408 Ops.reserve(NumOps - 1); 2409 Ops.push_back(Op.getOperand(0)); 2410 for (unsigned I = 2; I < NumOps; ++I) 2411 Ops.push_back(Op.getOperand(I)); 2412 2413 assert(Op->getNumValues() == 2 && "Expected only CC result and chain"); 2414 SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other); 2415 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops); 2416 SDValue OldChain = SDValue(Op.getNode(), 1); 2417 SDValue NewChain = SDValue(Intr.getNode(), 1); 2418 DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain); 2419 return Intr.getNode(); 2420 } 2421 2422 // Emit an intrinsic with an explicit CC register result. 2423 static SDNode *emitIntrinsicWithCC(SelectionDAG &DAG, SDValue Op, 2424 unsigned Opcode) { 2425 // Copy all operands except the intrinsic ID. 2426 unsigned NumOps = Op.getNumOperands(); 2427 SmallVector<SDValue, 6> Ops; 2428 Ops.reserve(NumOps - 1); 2429 for (unsigned I = 1; I < NumOps; ++I) 2430 Ops.push_back(Op.getOperand(I)); 2431 2432 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), Op->getVTList(), Ops); 2433 return Intr.getNode(); 2434 } 2435 2436 // CC is a comparison that will be implemented using an integer or 2437 // floating-point comparison. Return the condition code mask for 2438 // a branch on true. In the integer case, CCMASK_CMP_UO is set for 2439 // unsigned comparisons and clear for signed ones. In the floating-point 2440 // case, CCMASK_CMP_UO has its normal mask meaning (unordered). 2441 static unsigned CCMaskForCondCode(ISD::CondCode CC) { 2442 #define CONV(X) \ 2443 case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \ 2444 case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \ 2445 case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X 2446 2447 switch (CC) { 2448 default: 2449 llvm_unreachable("Invalid integer condition!"); 2450 2451 CONV(EQ); 2452 CONV(NE); 2453 CONV(GT); 2454 CONV(GE); 2455 CONV(LT); 2456 CONV(LE); 2457 2458 case ISD::SETO: return SystemZ::CCMASK_CMP_O; 2459 case ISD::SETUO: return SystemZ::CCMASK_CMP_UO; 2460 } 2461 #undef CONV 2462 } 2463 2464 // If C can be converted to a comparison against zero, adjust the operands 2465 // as necessary. 2466 static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) { 2467 if (C.ICmpType == SystemZICMP::UnsignedOnly) 2468 return; 2469 2470 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode()); 2471 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64) 2472 return; 2473 2474 int64_t Value = ConstOp1->getSExtValue(); 2475 if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) || 2476 (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) || 2477 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) || 2478 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) { 2479 C.CCMask ^= SystemZ::CCMASK_CMP_EQ; 2480 C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType()); 2481 } 2482 } 2483 2484 // If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI, 2485 // adjust the operands as necessary. 2486 static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL, 2487 Comparison &C) { 2488 // For us to make any changes, it must a comparison between a single-use 2489 // load and a constant. 2490 if (!C.Op0.hasOneUse() || 2491 C.Op0.getOpcode() != ISD::LOAD || 2492 C.Op1.getOpcode() != ISD::Constant) 2493 return; 2494 2495 // We must have an 8- or 16-bit load. 2496 auto *Load = cast<LoadSDNode>(C.Op0); 2497 unsigned NumBits = Load->getMemoryVT().getSizeInBits(); 2498 if ((NumBits != 8 && NumBits != 16) || 2499 NumBits != Load->getMemoryVT().getStoreSizeInBits()) 2500 return; 2501 2502 // The load must be an extending one and the constant must be within the 2503 // range of the unextended value. 2504 auto *ConstOp1 = cast<ConstantSDNode>(C.Op1); 2505 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64) 2506 return; 2507 uint64_t Value = ConstOp1->getZExtValue(); 2508 uint64_t Mask = (1 << NumBits) - 1; 2509 if (Load->getExtensionType() == ISD::SEXTLOAD) { 2510 // Make sure that ConstOp1 is in range of C.Op0. 2511 int64_t SignedValue = ConstOp1->getSExtValue(); 2512 if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask) 2513 return; 2514 if (C.ICmpType != SystemZICMP::SignedOnly) { 2515 // Unsigned comparison between two sign-extended values is equivalent 2516 // to unsigned comparison between two zero-extended values. 2517 Value &= Mask; 2518 } else if (NumBits == 8) { 2519 // Try to treat the comparison as unsigned, so that we can use CLI. 2520 // Adjust CCMask and Value as necessary. 2521 if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT) 2522 // Test whether the high bit of the byte is set. 2523 Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT; 2524 else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE) 2525 // Test whether the high bit of the byte is clear. 2526 Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT; 2527 else 2528 // No instruction exists for this combination. 2529 return; 2530 C.ICmpType = SystemZICMP::UnsignedOnly; 2531 } 2532 } else if (Load->getExtensionType() == ISD::ZEXTLOAD) { 2533 if (Value > Mask) 2534 return; 2535 // If the constant is in range, we can use any comparison. 2536 C.ICmpType = SystemZICMP::Any; 2537 } else 2538 return; 2539 2540 // Make sure that the first operand is an i32 of the right extension type. 2541 ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ? 2542 ISD::SEXTLOAD : 2543 ISD::ZEXTLOAD); 2544 if (C.Op0.getValueType() != MVT::i32 || 2545 Load->getExtensionType() != ExtType) { 2546 C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(), 2547 Load->getBasePtr(), Load->getPointerInfo(), 2548 Load->getMemoryVT(), Load->getAlign(), 2549 Load->getMemOperand()->getFlags()); 2550 // Update the chain uses. 2551 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1)); 2552 } 2553 2554 // Make sure that the second operand is an i32 with the right value. 2555 if (C.Op1.getValueType() != MVT::i32 || 2556 Value != ConstOp1->getZExtValue()) 2557 C.Op1 = DAG.getConstant(Value, DL, MVT::i32); 2558 } 2559 2560 // Return true if Op is either an unextended load, or a load suitable 2561 // for integer register-memory comparisons of type ICmpType. 2562 static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) { 2563 auto *Load = dyn_cast<LoadSDNode>(Op.getNode()); 2564 if (Load) { 2565 // There are no instructions to compare a register with a memory byte. 2566 if (Load->getMemoryVT() == MVT::i8) 2567 return false; 2568 // Otherwise decide on extension type. 2569 switch (Load->getExtensionType()) { 2570 case ISD::NON_EXTLOAD: 2571 return true; 2572 case ISD::SEXTLOAD: 2573 return ICmpType != SystemZICMP::UnsignedOnly; 2574 case ISD::ZEXTLOAD: 2575 return ICmpType != SystemZICMP::SignedOnly; 2576 default: 2577 break; 2578 } 2579 } 2580 return false; 2581 } 2582 2583 // Return true if it is better to swap the operands of C. 2584 static bool shouldSwapCmpOperands(const Comparison &C) { 2585 // Leave i128 and f128 comparisons alone, since they have no memory forms. 2586 if (C.Op0.getValueType() == MVT::i128) 2587 return false; 2588 if (C.Op0.getValueType() == MVT::f128) 2589 return false; 2590 2591 // Always keep a floating-point constant second, since comparisons with 2592 // zero can use LOAD TEST and comparisons with other constants make a 2593 // natural memory operand. 2594 if (isa<ConstantFPSDNode>(C.Op1)) 2595 return false; 2596 2597 // Never swap comparisons with zero since there are many ways to optimize 2598 // those later. 2599 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1); 2600 if (ConstOp1 && ConstOp1->getZExtValue() == 0) 2601 return false; 2602 2603 // Also keep natural memory operands second if the loaded value is 2604 // only used here. Several comparisons have memory forms. 2605 if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse()) 2606 return false; 2607 2608 // Look for cases where Cmp0 is a single-use load and Cmp1 isn't. 2609 // In that case we generally prefer the memory to be second. 2610 if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) { 2611 // The only exceptions are when the second operand is a constant and 2612 // we can use things like CHHSI. 2613 if (!ConstOp1) 2614 return true; 2615 // The unsigned memory-immediate instructions can handle 16-bit 2616 // unsigned integers. 2617 if (C.ICmpType != SystemZICMP::SignedOnly && 2618 isUInt<16>(ConstOp1->getZExtValue())) 2619 return false; 2620 // The signed memory-immediate instructions can handle 16-bit 2621 // signed integers. 2622 if (C.ICmpType != SystemZICMP::UnsignedOnly && 2623 isInt<16>(ConstOp1->getSExtValue())) 2624 return false; 2625 return true; 2626 } 2627 2628 // Try to promote the use of CGFR and CLGFR. 2629 unsigned Opcode0 = C.Op0.getOpcode(); 2630 if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND) 2631 return true; 2632 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND) 2633 return true; 2634 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::AND && 2635 C.Op0.getOperand(1).getOpcode() == ISD::Constant && 2636 C.Op0.getConstantOperandVal(1) == 0xffffffff) 2637 return true; 2638 2639 return false; 2640 } 2641 2642 // Check whether C tests for equality between X and Y and whether X - Y 2643 // or Y - X is also computed. In that case it's better to compare the 2644 // result of the subtraction against zero. 2645 static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL, 2646 Comparison &C) { 2647 if (C.CCMask == SystemZ::CCMASK_CMP_EQ || 2648 C.CCMask == SystemZ::CCMASK_CMP_NE) { 2649 for (SDNode *N : C.Op0->uses()) { 2650 if (N->getOpcode() == ISD::SUB && 2651 ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) || 2652 (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) { 2653 // Disable the nsw and nuw flags: the backend needs to handle 2654 // overflow as well during comparison elimination. 2655 SDNodeFlags Flags = N->getFlags(); 2656 Flags.setNoSignedWrap(false); 2657 Flags.setNoUnsignedWrap(false); 2658 N->setFlags(Flags); 2659 C.Op0 = SDValue(N, 0); 2660 C.Op1 = DAG.getConstant(0, DL, N->getValueType(0)); 2661 return; 2662 } 2663 } 2664 } 2665 } 2666 2667 // Check whether C compares a floating-point value with zero and if that 2668 // floating-point value is also negated. In this case we can use the 2669 // negation to set CC, so avoiding separate LOAD AND TEST and 2670 // LOAD (NEGATIVE/COMPLEMENT) instructions. 2671 static void adjustForFNeg(Comparison &C) { 2672 // This optimization is invalid for strict comparisons, since FNEG 2673 // does not raise any exceptions. 2674 if (C.Chain) 2675 return; 2676 auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1); 2677 if (C1 && C1->isZero()) { 2678 for (SDNode *N : C.Op0->uses()) { 2679 if (N->getOpcode() == ISD::FNEG) { 2680 C.Op0 = SDValue(N, 0); 2681 C.CCMask = SystemZ::reverseCCMask(C.CCMask); 2682 return; 2683 } 2684 } 2685 } 2686 } 2687 2688 // Check whether C compares (shl X, 32) with 0 and whether X is 2689 // also sign-extended. In that case it is better to test the result 2690 // of the sign extension using LTGFR. 2691 // 2692 // This case is important because InstCombine transforms a comparison 2693 // with (sext (trunc X)) into a comparison with (shl X, 32). 2694 static void adjustForLTGFR(Comparison &C) { 2695 // Check for a comparison between (shl X, 32) and 0. 2696 if (C.Op0.getOpcode() == ISD::SHL && C.Op0.getValueType() == MVT::i64 && 2697 C.Op1.getOpcode() == ISD::Constant && C.Op1->getAsZExtVal() == 0) { 2698 auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1)); 2699 if (C1 && C1->getZExtValue() == 32) { 2700 SDValue ShlOp0 = C.Op0.getOperand(0); 2701 // See whether X has any SIGN_EXTEND_INREG uses. 2702 for (SDNode *N : ShlOp0->uses()) { 2703 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG && 2704 cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) { 2705 C.Op0 = SDValue(N, 0); 2706 return; 2707 } 2708 } 2709 } 2710 } 2711 } 2712 2713 // If C compares the truncation of an extending load, try to compare 2714 // the untruncated value instead. This exposes more opportunities to 2715 // reuse CC. 2716 static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL, 2717 Comparison &C) { 2718 if (C.Op0.getOpcode() == ISD::TRUNCATE && 2719 C.Op0.getOperand(0).getOpcode() == ISD::LOAD && 2720 C.Op1.getOpcode() == ISD::Constant && 2721 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 && 2722 C.Op1->getAsZExtVal() == 0) { 2723 auto *L = cast<LoadSDNode>(C.Op0.getOperand(0)); 2724 if (L->getMemoryVT().getStoreSizeInBits().getFixedValue() <= 2725 C.Op0.getValueSizeInBits().getFixedValue()) { 2726 unsigned Type = L->getExtensionType(); 2727 if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) || 2728 (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) { 2729 C.Op0 = C.Op0.getOperand(0); 2730 C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType()); 2731 } 2732 } 2733 } 2734 } 2735 2736 // Return true if shift operation N has an in-range constant shift value. 2737 // Store it in ShiftVal if so. 2738 static bool isSimpleShift(SDValue N, unsigned &ShiftVal) { 2739 auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1)); 2740 if (!Shift) 2741 return false; 2742 2743 uint64_t Amount = Shift->getZExtValue(); 2744 if (Amount >= N.getValueSizeInBits()) 2745 return false; 2746 2747 ShiftVal = Amount; 2748 return true; 2749 } 2750 2751 // Check whether an AND with Mask is suitable for a TEST UNDER MASK 2752 // instruction and whether the CC value is descriptive enough to handle 2753 // a comparison of type Opcode between the AND result and CmpVal. 2754 // CCMask says which comparison result is being tested and BitSize is 2755 // the number of bits in the operands. If TEST UNDER MASK can be used, 2756 // return the corresponding CC mask, otherwise return 0. 2757 static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, 2758 uint64_t Mask, uint64_t CmpVal, 2759 unsigned ICmpType) { 2760 assert(Mask != 0 && "ANDs with zero should have been removed by now"); 2761 2762 // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL. 2763 if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) && 2764 !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask)) 2765 return 0; 2766 2767 // Work out the masks for the lowest and highest bits. 2768 uint64_t High = llvm::bit_floor(Mask); 2769 uint64_t Low = uint64_t(1) << llvm::countr_zero(Mask); 2770 2771 // Signed ordered comparisons are effectively unsigned if the sign 2772 // bit is dropped. 2773 bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly); 2774 2775 // Check for equality comparisons with 0, or the equivalent. 2776 if (CmpVal == 0) { 2777 if (CCMask == SystemZ::CCMASK_CMP_EQ) 2778 return SystemZ::CCMASK_TM_ALL_0; 2779 if (CCMask == SystemZ::CCMASK_CMP_NE) 2780 return SystemZ::CCMASK_TM_SOME_1; 2781 } 2782 if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) { 2783 if (CCMask == SystemZ::CCMASK_CMP_LT) 2784 return SystemZ::CCMASK_TM_ALL_0; 2785 if (CCMask == SystemZ::CCMASK_CMP_GE) 2786 return SystemZ::CCMASK_TM_SOME_1; 2787 } 2788 if (EffectivelyUnsigned && CmpVal < Low) { 2789 if (CCMask == SystemZ::CCMASK_CMP_LE) 2790 return SystemZ::CCMASK_TM_ALL_0; 2791 if (CCMask == SystemZ::CCMASK_CMP_GT) 2792 return SystemZ::CCMASK_TM_SOME_1; 2793 } 2794 2795 // Check for equality comparisons with the mask, or the equivalent. 2796 if (CmpVal == Mask) { 2797 if (CCMask == SystemZ::CCMASK_CMP_EQ) 2798 return SystemZ::CCMASK_TM_ALL_1; 2799 if (CCMask == SystemZ::CCMASK_CMP_NE) 2800 return SystemZ::CCMASK_TM_SOME_0; 2801 } 2802 if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) { 2803 if (CCMask == SystemZ::CCMASK_CMP_GT) 2804 return SystemZ::CCMASK_TM_ALL_1; 2805 if (CCMask == SystemZ::CCMASK_CMP_LE) 2806 return SystemZ::CCMASK_TM_SOME_0; 2807 } 2808 if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) { 2809 if (CCMask == SystemZ::CCMASK_CMP_GE) 2810 return SystemZ::CCMASK_TM_ALL_1; 2811 if (CCMask == SystemZ::CCMASK_CMP_LT) 2812 return SystemZ::CCMASK_TM_SOME_0; 2813 } 2814 2815 // Check for ordered comparisons with the top bit. 2816 if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) { 2817 if (CCMask == SystemZ::CCMASK_CMP_LE) 2818 return SystemZ::CCMASK_TM_MSB_0; 2819 if (CCMask == SystemZ::CCMASK_CMP_GT) 2820 return SystemZ::CCMASK_TM_MSB_1; 2821 } 2822 if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) { 2823 if (CCMask == SystemZ::CCMASK_CMP_LT) 2824 return SystemZ::CCMASK_TM_MSB_0; 2825 if (CCMask == SystemZ::CCMASK_CMP_GE) 2826 return SystemZ::CCMASK_TM_MSB_1; 2827 } 2828 2829 // If there are just two bits, we can do equality checks for Low and High 2830 // as well. 2831 if (Mask == Low + High) { 2832 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low) 2833 return SystemZ::CCMASK_TM_MIXED_MSB_0; 2834 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low) 2835 return SystemZ::CCMASK_TM_MIXED_MSB_0 ^ SystemZ::CCMASK_ANY; 2836 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High) 2837 return SystemZ::CCMASK_TM_MIXED_MSB_1; 2838 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High) 2839 return SystemZ::CCMASK_TM_MIXED_MSB_1 ^ SystemZ::CCMASK_ANY; 2840 } 2841 2842 // Looks like we've exhausted our options. 2843 return 0; 2844 } 2845 2846 // See whether C can be implemented as a TEST UNDER MASK instruction. 2847 // Update the arguments with the TM version if so. 2848 static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL, 2849 Comparison &C) { 2850 // Use VECTOR TEST UNDER MASK for i128 operations. 2851 if (C.Op0.getValueType() == MVT::i128) { 2852 // We can use VTM for EQ/NE comparisons of x & y against 0. 2853 if (C.Op0.getOpcode() == ISD::AND && 2854 (C.CCMask == SystemZ::CCMASK_CMP_EQ || 2855 C.CCMask == SystemZ::CCMASK_CMP_NE)) { 2856 auto *Mask = dyn_cast<ConstantSDNode>(C.Op1); 2857 if (Mask && Mask->getAPIntValue() == 0) { 2858 C.Opcode = SystemZISD::VTM; 2859 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(1)); 2860 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(0)); 2861 C.CCValid = SystemZ::CCMASK_VCMP; 2862 if (C.CCMask == SystemZ::CCMASK_CMP_EQ) 2863 C.CCMask = SystemZ::CCMASK_VCMP_ALL; 2864 else 2865 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid; 2866 } 2867 } 2868 return; 2869 } 2870 2871 // Check that we have a comparison with a constant. 2872 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1); 2873 if (!ConstOp1) 2874 return; 2875 uint64_t CmpVal = ConstOp1->getZExtValue(); 2876 2877 // Check whether the nonconstant input is an AND with a constant mask. 2878 Comparison NewC(C); 2879 uint64_t MaskVal; 2880 ConstantSDNode *Mask = nullptr; 2881 if (C.Op0.getOpcode() == ISD::AND) { 2882 NewC.Op0 = C.Op0.getOperand(0); 2883 NewC.Op1 = C.Op0.getOperand(1); 2884 Mask = dyn_cast<ConstantSDNode>(NewC.Op1); 2885 if (!Mask) 2886 return; 2887 MaskVal = Mask->getZExtValue(); 2888 } else { 2889 // There is no instruction to compare with a 64-bit immediate 2890 // so use TMHH instead if possible. We need an unsigned ordered 2891 // comparison with an i64 immediate. 2892 if (NewC.Op0.getValueType() != MVT::i64 || 2893 NewC.CCMask == SystemZ::CCMASK_CMP_EQ || 2894 NewC.CCMask == SystemZ::CCMASK_CMP_NE || 2895 NewC.ICmpType == SystemZICMP::SignedOnly) 2896 return; 2897 // Convert LE and GT comparisons into LT and GE. 2898 if (NewC.CCMask == SystemZ::CCMASK_CMP_LE || 2899 NewC.CCMask == SystemZ::CCMASK_CMP_GT) { 2900 if (CmpVal == uint64_t(-1)) 2901 return; 2902 CmpVal += 1; 2903 NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ; 2904 } 2905 // If the low N bits of Op1 are zero than the low N bits of Op0 can 2906 // be masked off without changing the result. 2907 MaskVal = -(CmpVal & -CmpVal); 2908 NewC.ICmpType = SystemZICMP::UnsignedOnly; 2909 } 2910 if (!MaskVal) 2911 return; 2912 2913 // Check whether the combination of mask, comparison value and comparison 2914 // type are suitable. 2915 unsigned BitSize = NewC.Op0.getValueSizeInBits(); 2916 unsigned NewCCMask, ShiftVal; 2917 if (NewC.ICmpType != SystemZICMP::SignedOnly && 2918 NewC.Op0.getOpcode() == ISD::SHL && 2919 isSimpleShift(NewC.Op0, ShiftVal) && 2920 (MaskVal >> ShiftVal != 0) && 2921 ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal && 2922 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, 2923 MaskVal >> ShiftVal, 2924 CmpVal >> ShiftVal, 2925 SystemZICMP::Any))) { 2926 NewC.Op0 = NewC.Op0.getOperand(0); 2927 MaskVal >>= ShiftVal; 2928 } else if (NewC.ICmpType != SystemZICMP::SignedOnly && 2929 NewC.Op0.getOpcode() == ISD::SRL && 2930 isSimpleShift(NewC.Op0, ShiftVal) && 2931 (MaskVal << ShiftVal != 0) && 2932 ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal && 2933 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, 2934 MaskVal << ShiftVal, 2935 CmpVal << ShiftVal, 2936 SystemZICMP::UnsignedOnly))) { 2937 NewC.Op0 = NewC.Op0.getOperand(0); 2938 MaskVal <<= ShiftVal; 2939 } else { 2940 NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal, 2941 NewC.ICmpType); 2942 if (!NewCCMask) 2943 return; 2944 } 2945 2946 // Go ahead and make the change. 2947 C.Opcode = SystemZISD::TM; 2948 C.Op0 = NewC.Op0; 2949 if (Mask && Mask->getZExtValue() == MaskVal) 2950 C.Op1 = SDValue(Mask, 0); 2951 else 2952 C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType()); 2953 C.CCValid = SystemZ::CCMASK_TM; 2954 C.CCMask = NewCCMask; 2955 } 2956 2957 // Implement i128 comparison in vector registers. 2958 static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL, 2959 Comparison &C) { 2960 if (C.Opcode != SystemZISD::ICMP) 2961 return; 2962 if (C.Op0.getValueType() != MVT::i128) 2963 return; 2964 2965 // (In-)Equality comparisons can be implemented via VCEQGS. 2966 if (C.CCMask == SystemZ::CCMASK_CMP_EQ || 2967 C.CCMask == SystemZ::CCMASK_CMP_NE) { 2968 C.Opcode = SystemZISD::VICMPES; 2969 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op0); 2970 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op1); 2971 C.CCValid = SystemZ::CCMASK_VCMP; 2972 if (C.CCMask == SystemZ::CCMASK_CMP_EQ) 2973 C.CCMask = SystemZ::CCMASK_VCMP_ALL; 2974 else 2975 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid; 2976 return; 2977 } 2978 2979 // Normalize other comparisons to GT. 2980 bool Swap = false, Invert = false; 2981 switch (C.CCMask) { 2982 case SystemZ::CCMASK_CMP_GT: break; 2983 case SystemZ::CCMASK_CMP_LT: Swap = true; break; 2984 case SystemZ::CCMASK_CMP_LE: Invert = true; break; 2985 case SystemZ::CCMASK_CMP_GE: Swap = Invert = true; break; 2986 default: llvm_unreachable("Invalid integer condition!"); 2987 } 2988 if (Swap) 2989 std::swap(C.Op0, C.Op1); 2990 2991 if (C.ICmpType == SystemZICMP::UnsignedOnly) 2992 C.Opcode = SystemZISD::UCMP128HI; 2993 else 2994 C.Opcode = SystemZISD::SCMP128HI; 2995 C.CCValid = SystemZ::CCMASK_ANY; 2996 C.CCMask = SystemZ::CCMASK_1; 2997 2998 if (Invert) 2999 C.CCMask ^= C.CCValid; 3000 } 3001 3002 // See whether the comparison argument contains a redundant AND 3003 // and remove it if so. This sometimes happens due to the generic 3004 // BRCOND expansion. 3005 static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL, 3006 Comparison &C) { 3007 if (C.Op0.getOpcode() != ISD::AND) 3008 return; 3009 auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1)); 3010 if (!Mask || Mask->getValueSizeInBits(0) > 64) 3011 return; 3012 KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0)); 3013 if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue()) 3014 return; 3015 3016 C.Op0 = C.Op0.getOperand(0); 3017 } 3018 3019 // Return a Comparison that tests the condition-code result of intrinsic 3020 // node Call against constant integer CC using comparison code Cond. 3021 // Opcode is the opcode of the SystemZISD operation for the intrinsic 3022 // and CCValid is the set of possible condition-code results. 3023 static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, 3024 SDValue Call, unsigned CCValid, uint64_t CC, 3025 ISD::CondCode Cond) { 3026 Comparison C(Call, SDValue(), SDValue()); 3027 C.Opcode = Opcode; 3028 C.CCValid = CCValid; 3029 if (Cond == ISD::SETEQ) 3030 // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3. 3031 C.CCMask = CC < 4 ? 1 << (3 - CC) : 0; 3032 else if (Cond == ISD::SETNE) 3033 // ...and the inverse of that. 3034 C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1; 3035 else if (Cond == ISD::SETLT || Cond == ISD::SETULT) 3036 // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3, 3037 // always true for CC>3. 3038 C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1; 3039 else if (Cond == ISD::SETGE || Cond == ISD::SETUGE) 3040 // ...and the inverse of that. 3041 C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0; 3042 else if (Cond == ISD::SETLE || Cond == ISD::SETULE) 3043 // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true), 3044 // always true for CC>3. 3045 C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1; 3046 else if (Cond == ISD::SETGT || Cond == ISD::SETUGT) 3047 // ...and the inverse of that. 3048 C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0; 3049 else 3050 llvm_unreachable("Unexpected integer comparison type"); 3051 C.CCMask &= CCValid; 3052 return C; 3053 } 3054 3055 // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1. 3056 static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, 3057 ISD::CondCode Cond, const SDLoc &DL, 3058 SDValue Chain = SDValue(), 3059 bool IsSignaling = false) { 3060 if (CmpOp1.getOpcode() == ISD::Constant) { 3061 assert(!Chain); 3062 unsigned Opcode, CCValid; 3063 if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN && 3064 CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) && 3065 isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid)) 3066 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, 3067 CmpOp1->getAsZExtVal(), Cond); 3068 if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN && 3069 CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 && 3070 isIntrinsicWithCC(CmpOp0, Opcode, CCValid)) 3071 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, 3072 CmpOp1->getAsZExtVal(), Cond); 3073 } 3074 Comparison C(CmpOp0, CmpOp1, Chain); 3075 C.CCMask = CCMaskForCondCode(Cond); 3076 if (C.Op0.getValueType().isFloatingPoint()) { 3077 C.CCValid = SystemZ::CCMASK_FCMP; 3078 if (!C.Chain) 3079 C.Opcode = SystemZISD::FCMP; 3080 else if (!IsSignaling) 3081 C.Opcode = SystemZISD::STRICT_FCMP; 3082 else 3083 C.Opcode = SystemZISD::STRICT_FCMPS; 3084 adjustForFNeg(C); 3085 } else { 3086 assert(!C.Chain); 3087 C.CCValid = SystemZ::CCMASK_ICMP; 3088 C.Opcode = SystemZISD::ICMP; 3089 // Choose the type of comparison. Equality and inequality tests can 3090 // use either signed or unsigned comparisons. The choice also doesn't 3091 // matter if both sign bits are known to be clear. In those cases we 3092 // want to give the main isel code the freedom to choose whichever 3093 // form fits best. 3094 if (C.CCMask == SystemZ::CCMASK_CMP_EQ || 3095 C.CCMask == SystemZ::CCMASK_CMP_NE || 3096 (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1))) 3097 C.ICmpType = SystemZICMP::Any; 3098 else if (C.CCMask & SystemZ::CCMASK_CMP_UO) 3099 C.ICmpType = SystemZICMP::UnsignedOnly; 3100 else 3101 C.ICmpType = SystemZICMP::SignedOnly; 3102 C.CCMask &= ~SystemZ::CCMASK_CMP_UO; 3103 adjustForRedundantAnd(DAG, DL, C); 3104 adjustZeroCmp(DAG, DL, C); 3105 adjustSubwordCmp(DAG, DL, C); 3106 adjustForSubtraction(DAG, DL, C); 3107 adjustForLTGFR(C); 3108 adjustICmpTruncate(DAG, DL, C); 3109 } 3110 3111 if (shouldSwapCmpOperands(C)) { 3112 std::swap(C.Op0, C.Op1); 3113 C.CCMask = SystemZ::reverseCCMask(C.CCMask); 3114 } 3115 3116 adjustForTestUnderMask(DAG, DL, C); 3117 adjustICmp128(DAG, DL, C); 3118 return C; 3119 } 3120 3121 // Emit the comparison instruction described by C. 3122 static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) { 3123 if (!C.Op1.getNode()) { 3124 SDNode *Node; 3125 switch (C.Op0.getOpcode()) { 3126 case ISD::INTRINSIC_W_CHAIN: 3127 Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode); 3128 return SDValue(Node, 0); 3129 case ISD::INTRINSIC_WO_CHAIN: 3130 Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode); 3131 return SDValue(Node, Node->getNumValues() - 1); 3132 default: 3133 llvm_unreachable("Invalid comparison operands"); 3134 } 3135 } 3136 if (C.Opcode == SystemZISD::ICMP) 3137 return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1, 3138 DAG.getTargetConstant(C.ICmpType, DL, MVT::i32)); 3139 if (C.Opcode == SystemZISD::TM) { 3140 bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) != 3141 bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1)); 3142 return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1, 3143 DAG.getTargetConstant(RegisterOnly, DL, MVT::i32)); 3144 } 3145 if (C.Opcode == SystemZISD::VICMPES) { 3146 SDVTList VTs = DAG.getVTList(C.Op0.getValueType(), MVT::i32); 3147 SDValue Val = DAG.getNode(C.Opcode, DL, VTs, C.Op0, C.Op1); 3148 return SDValue(Val.getNode(), 1); 3149 } 3150 if (C.Chain) { 3151 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); 3152 return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1); 3153 } 3154 return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1); 3155 } 3156 3157 // Implement a 32-bit *MUL_LOHI operation by extending both operands to 3158 // 64 bits. Extend is the extension type to use. Store the high part 3159 // in Hi and the low part in Lo. 3160 static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend, 3161 SDValue Op0, SDValue Op1, SDValue &Hi, 3162 SDValue &Lo) { 3163 Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0); 3164 Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1); 3165 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1); 3166 Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul, 3167 DAG.getConstant(32, DL, MVT::i64)); 3168 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi); 3169 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul); 3170 } 3171 3172 // Lower a binary operation that produces two VT results, one in each 3173 // half of a GR128 pair. Op0 and Op1 are the VT operands to the operation, 3174 // and Opcode performs the GR128 operation. Store the even register result 3175 // in Even and the odd register result in Odd. 3176 static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT, 3177 unsigned Opcode, SDValue Op0, SDValue Op1, 3178 SDValue &Even, SDValue &Odd) { 3179 SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1); 3180 bool Is32Bit = is32Bit(VT); 3181 Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result); 3182 Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result); 3183 } 3184 3185 // Return an i32 value that is 1 if the CC value produced by CCReg is 3186 // in the mask CCMask and 0 otherwise. CC is known to have a value 3187 // in CCValid, so other values can be ignored. 3188 static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg, 3189 unsigned CCValid, unsigned CCMask) { 3190 SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32), 3191 DAG.getConstant(0, DL, MVT::i32), 3192 DAG.getTargetConstant(CCValid, DL, MVT::i32), 3193 DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg}; 3194 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops); 3195 } 3196 3197 // Return the SystemISD vector comparison operation for CC, or 0 if it cannot 3198 // be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP 3199 // for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet) 3200 // floating-point comparisons, and CmpMode::SignalingFP for strict signaling 3201 // floating-point comparisons. 3202 enum class CmpMode { Int, FP, StrictFP, SignalingFP }; 3203 static unsigned getVectorComparison(ISD::CondCode CC, CmpMode Mode) { 3204 switch (CC) { 3205 case ISD::SETOEQ: 3206 case ISD::SETEQ: 3207 switch (Mode) { 3208 case CmpMode::Int: return SystemZISD::VICMPE; 3209 case CmpMode::FP: return SystemZISD::VFCMPE; 3210 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPE; 3211 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPES; 3212 } 3213 llvm_unreachable("Bad mode"); 3214 3215 case ISD::SETOGE: 3216 case ISD::SETGE: 3217 switch (Mode) { 3218 case CmpMode::Int: return 0; 3219 case CmpMode::FP: return SystemZISD::VFCMPHE; 3220 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPHE; 3221 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHES; 3222 } 3223 llvm_unreachable("Bad mode"); 3224 3225 case ISD::SETOGT: 3226 case ISD::SETGT: 3227 switch (Mode) { 3228 case CmpMode::Int: return SystemZISD::VICMPH; 3229 case CmpMode::FP: return SystemZISD::VFCMPH; 3230 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPH; 3231 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHS; 3232 } 3233 llvm_unreachable("Bad mode"); 3234 3235 case ISD::SETUGT: 3236 switch (Mode) { 3237 case CmpMode::Int: return SystemZISD::VICMPHL; 3238 case CmpMode::FP: return 0; 3239 case CmpMode::StrictFP: return 0; 3240 case CmpMode::SignalingFP: return 0; 3241 } 3242 llvm_unreachable("Bad mode"); 3243 3244 default: 3245 return 0; 3246 } 3247 } 3248 3249 // Return the SystemZISD vector comparison operation for CC or its inverse, 3250 // or 0 if neither can be done directly. Indicate in Invert whether the 3251 // result is for the inverse of CC. Mode is as above. 3252 static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, CmpMode Mode, 3253 bool &Invert) { 3254 if (unsigned Opcode = getVectorComparison(CC, Mode)) { 3255 Invert = false; 3256 return Opcode; 3257 } 3258 3259 CC = ISD::getSetCCInverse(CC, Mode == CmpMode::Int ? MVT::i32 : MVT::f32); 3260 if (unsigned Opcode = getVectorComparison(CC, Mode)) { 3261 Invert = true; 3262 return Opcode; 3263 } 3264 3265 return 0; 3266 } 3267 3268 // Return a v2f64 that contains the extended form of elements Start and Start+1 3269 // of v4f32 value Op. If Chain is nonnull, return the strict form. 3270 static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL, 3271 SDValue Op, SDValue Chain) { 3272 int Mask[] = { Start, -1, Start + 1, -1 }; 3273 Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask); 3274 if (Chain) { 3275 SDVTList VTs = DAG.getVTList(MVT::v2f64, MVT::Other); 3276 return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op); 3277 } 3278 return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op); 3279 } 3280 3281 // Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode, 3282 // producing a result of type VT. If Chain is nonnull, return the strict form. 3283 SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode, 3284 const SDLoc &DL, EVT VT, 3285 SDValue CmpOp0, 3286 SDValue CmpOp1, 3287 SDValue Chain) const { 3288 // There is no hardware support for v4f32 (unless we have the vector 3289 // enhancements facility 1), so extend the vector into two v2f64s 3290 // and compare those. 3291 if (CmpOp0.getValueType() == MVT::v4f32 && 3292 !Subtarget.hasVectorEnhancements1()) { 3293 SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain); 3294 SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain); 3295 SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain); 3296 SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain); 3297 if (Chain) { 3298 SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::Other); 3299 SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1); 3300 SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1); 3301 SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes); 3302 SDValue Chains[6] = { H0.getValue(1), L0.getValue(1), 3303 H1.getValue(1), L1.getValue(1), 3304 HRes.getValue(1), LRes.getValue(1) }; 3305 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); 3306 SDValue Ops[2] = { Res, NewChain }; 3307 return DAG.getMergeValues(Ops, DL); 3308 } 3309 SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1); 3310 SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1); 3311 return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes); 3312 } 3313 if (Chain) { 3314 SDVTList VTs = DAG.getVTList(VT, MVT::Other); 3315 return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1); 3316 } 3317 return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1); 3318 } 3319 3320 // Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing 3321 // an integer mask of type VT. If Chain is nonnull, we have a strict 3322 // floating-point comparison. If in addition IsSignaling is true, we have 3323 // a strict signaling floating-point comparison. 3324 SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG, 3325 const SDLoc &DL, EVT VT, 3326 ISD::CondCode CC, 3327 SDValue CmpOp0, 3328 SDValue CmpOp1, 3329 SDValue Chain, 3330 bool IsSignaling) const { 3331 bool IsFP = CmpOp0.getValueType().isFloatingPoint(); 3332 assert (!Chain || IsFP); 3333 assert (!IsSignaling || Chain); 3334 CmpMode Mode = IsSignaling ? CmpMode::SignalingFP : 3335 Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int; 3336 bool Invert = false; 3337 SDValue Cmp; 3338 switch (CC) { 3339 // Handle tests for order using (or (ogt y x) (oge x y)). 3340 case ISD::SETUO: 3341 Invert = true; 3342 [[fallthrough]]; 3343 case ISD::SETO: { 3344 assert(IsFP && "Unexpected integer comparison"); 3345 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode), 3346 DL, VT, CmpOp1, CmpOp0, Chain); 3347 SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode), 3348 DL, VT, CmpOp0, CmpOp1, Chain); 3349 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE); 3350 if (Chain) 3351 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, 3352 LT.getValue(1), GE.getValue(1)); 3353 break; 3354 } 3355 3356 // Handle <> tests using (or (ogt y x) (ogt x y)). 3357 case ISD::SETUEQ: 3358 Invert = true; 3359 [[fallthrough]]; 3360 case ISD::SETONE: { 3361 assert(IsFP && "Unexpected integer comparison"); 3362 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode), 3363 DL, VT, CmpOp1, CmpOp0, Chain); 3364 SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode), 3365 DL, VT, CmpOp0, CmpOp1, Chain); 3366 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT); 3367 if (Chain) 3368 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, 3369 LT.getValue(1), GT.getValue(1)); 3370 break; 3371 } 3372 3373 // Otherwise a single comparison is enough. It doesn't really 3374 // matter whether we try the inversion or the swap first, since 3375 // there are no cases where both work. 3376 default: 3377 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert)) 3378 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain); 3379 else { 3380 CC = ISD::getSetCCSwappedOperands(CC); 3381 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert)) 3382 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain); 3383 else 3384 llvm_unreachable("Unhandled comparison"); 3385 } 3386 if (Chain) 3387 Chain = Cmp.getValue(1); 3388 break; 3389 } 3390 if (Invert) { 3391 SDValue Mask = 3392 DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64)); 3393 Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask); 3394 } 3395 if (Chain && Chain.getNode() != Cmp.getNode()) { 3396 SDValue Ops[2] = { Cmp, Chain }; 3397 Cmp = DAG.getMergeValues(Ops, DL); 3398 } 3399 return Cmp; 3400 } 3401 3402 SDValue SystemZTargetLowering::lowerSETCC(SDValue Op, 3403 SelectionDAG &DAG) const { 3404 SDValue CmpOp0 = Op.getOperand(0); 3405 SDValue CmpOp1 = Op.getOperand(1); 3406 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 3407 SDLoc DL(Op); 3408 EVT VT = Op.getValueType(); 3409 if (VT.isVector()) 3410 return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1); 3411 3412 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); 3413 SDValue CCReg = emitCmp(DAG, DL, C); 3414 return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask); 3415 } 3416 3417 SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op, 3418 SelectionDAG &DAG, 3419 bool IsSignaling) const { 3420 SDValue Chain = Op.getOperand(0); 3421 SDValue CmpOp0 = Op.getOperand(1); 3422 SDValue CmpOp1 = Op.getOperand(2); 3423 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get(); 3424 SDLoc DL(Op); 3425 EVT VT = Op.getNode()->getValueType(0); 3426 if (VT.isVector()) { 3427 SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1, 3428 Chain, IsSignaling); 3429 return Res.getValue(Op.getResNo()); 3430 } 3431 3432 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling)); 3433 SDValue CCReg = emitCmp(DAG, DL, C); 3434 CCReg->setFlags(Op->getFlags()); 3435 SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask); 3436 SDValue Ops[2] = { Result, CCReg.getValue(1) }; 3437 return DAG.getMergeValues(Ops, DL); 3438 } 3439 3440 SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const { 3441 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 3442 SDValue CmpOp0 = Op.getOperand(2); 3443 SDValue CmpOp1 = Op.getOperand(3); 3444 SDValue Dest = Op.getOperand(4); 3445 SDLoc DL(Op); 3446 3447 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); 3448 SDValue CCReg = emitCmp(DAG, DL, C); 3449 return DAG.getNode( 3450 SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0), 3451 DAG.getTargetConstant(C.CCValid, DL, MVT::i32), 3452 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg); 3453 } 3454 3455 // Return true if Pos is CmpOp and Neg is the negative of CmpOp, 3456 // allowing Pos and Neg to be wider than CmpOp. 3457 static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) { 3458 return (Neg.getOpcode() == ISD::SUB && 3459 Neg.getOperand(0).getOpcode() == ISD::Constant && 3460 Neg.getConstantOperandVal(0) == 0 && Neg.getOperand(1) == Pos && 3461 (Pos == CmpOp || (Pos.getOpcode() == ISD::SIGN_EXTEND && 3462 Pos.getOperand(0) == CmpOp))); 3463 } 3464 3465 // Return the absolute or negative absolute of Op; IsNegative decides which. 3466 static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op, 3467 bool IsNegative) { 3468 Op = DAG.getNode(ISD::ABS, DL, Op.getValueType(), Op); 3469 if (IsNegative) 3470 Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(), 3471 DAG.getConstant(0, DL, Op.getValueType()), Op); 3472 return Op; 3473 } 3474 3475 SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op, 3476 SelectionDAG &DAG) const { 3477 SDValue CmpOp0 = Op.getOperand(0); 3478 SDValue CmpOp1 = Op.getOperand(1); 3479 SDValue TrueOp = Op.getOperand(2); 3480 SDValue FalseOp = Op.getOperand(3); 3481 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 3482 SDLoc DL(Op); 3483 3484 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); 3485 3486 // Check for absolute and negative-absolute selections, including those 3487 // where the comparison value is sign-extended (for LPGFR and LNGFR). 3488 // This check supplements the one in DAGCombiner. 3489 if (C.Opcode == SystemZISD::ICMP && C.CCMask != SystemZ::CCMASK_CMP_EQ && 3490 C.CCMask != SystemZ::CCMASK_CMP_NE && 3491 C.Op1.getOpcode() == ISD::Constant && 3492 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 && 3493 C.Op1->getAsZExtVal() == 0) { 3494 if (isAbsolute(C.Op0, TrueOp, FalseOp)) 3495 return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT); 3496 if (isAbsolute(C.Op0, FalseOp, TrueOp)) 3497 return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT); 3498 } 3499 3500 SDValue CCReg = emitCmp(DAG, DL, C); 3501 SDValue Ops[] = {TrueOp, FalseOp, 3502 DAG.getTargetConstant(C.CCValid, DL, MVT::i32), 3503 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg}; 3504 3505 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops); 3506 } 3507 3508 SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node, 3509 SelectionDAG &DAG) const { 3510 SDLoc DL(Node); 3511 const GlobalValue *GV = Node->getGlobal(); 3512 int64_t Offset = Node->getOffset(); 3513 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3514 CodeModel::Model CM = DAG.getTarget().getCodeModel(); 3515 3516 SDValue Result; 3517 if (Subtarget.isPC32DBLSymbol(GV, CM)) { 3518 if (isInt<32>(Offset)) { 3519 // Assign anchors at 1<<12 byte boundaries. 3520 uint64_t Anchor = Offset & ~uint64_t(0xfff); 3521 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor); 3522 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); 3523 3524 // The offset can be folded into the address if it is aligned to a 3525 // halfword. 3526 Offset -= Anchor; 3527 if (Offset != 0 && (Offset & 1) == 0) { 3528 SDValue Full = 3529 DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset); 3530 Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result); 3531 Offset = 0; 3532 } 3533 } else { 3534 // Conservatively load a constant offset greater than 32 bits into a 3535 // register below. 3536 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT); 3537 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); 3538 } 3539 } else if (Subtarget.isTargetELF()) { 3540 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT); 3541 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); 3542 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result, 3543 MachinePointerInfo::getGOT(DAG.getMachineFunction())); 3544 } else if (Subtarget.isTargetzOS()) { 3545 Result = getADAEntry(DAG, GV, DL, PtrVT); 3546 } else 3547 llvm_unreachable("Unexpected Subtarget"); 3548 3549 // If there was a non-zero offset that we didn't fold, create an explicit 3550 // addition for it. 3551 if (Offset != 0) 3552 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result, 3553 DAG.getConstant(Offset, DL, PtrVT)); 3554 3555 return Result; 3556 } 3557 3558 SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node, 3559 SelectionDAG &DAG, 3560 unsigned Opcode, 3561 SDValue GOTOffset) const { 3562 SDLoc DL(Node); 3563 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3564 SDValue Chain = DAG.getEntryNode(); 3565 SDValue Glue; 3566 3567 if (DAG.getMachineFunction().getFunction().getCallingConv() == 3568 CallingConv::GHC) 3569 report_fatal_error("In GHC calling convention TLS is not supported"); 3570 3571 // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12. 3572 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); 3573 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue); 3574 Glue = Chain.getValue(1); 3575 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue); 3576 Glue = Chain.getValue(1); 3577 3578 // The first call operand is the chain and the second is the TLS symbol. 3579 SmallVector<SDValue, 8> Ops; 3580 Ops.push_back(Chain); 3581 Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL, 3582 Node->getValueType(0), 3583 0, 0)); 3584 3585 // Add argument registers to the end of the list so that they are 3586 // known live into the call. 3587 Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT)); 3588 Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT)); 3589 3590 // Add a register mask operand representing the call-preserved registers. 3591 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 3592 const uint32_t *Mask = 3593 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C); 3594 assert(Mask && "Missing call preserved mask for calling convention"); 3595 Ops.push_back(DAG.getRegisterMask(Mask)); 3596 3597 // Glue the call to the argument copies. 3598 Ops.push_back(Glue); 3599 3600 // Emit the call. 3601 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 3602 Chain = DAG.getNode(Opcode, DL, NodeTys, Ops); 3603 Glue = Chain.getValue(1); 3604 3605 // Copy the return value from %r2. 3606 return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue); 3607 } 3608 3609 SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL, 3610 SelectionDAG &DAG) const { 3611 SDValue Chain = DAG.getEntryNode(); 3612 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3613 3614 // The high part of the thread pointer is in access register 0. 3615 SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32); 3616 TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi); 3617 3618 // The low part of the thread pointer is in access register 1. 3619 SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32); 3620 TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo); 3621 3622 // Merge them into a single 64-bit address. 3623 SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi, 3624 DAG.getConstant(32, DL, PtrVT)); 3625 return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo); 3626 } 3627 3628 SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, 3629 SelectionDAG &DAG) const { 3630 if (DAG.getTarget().useEmulatedTLS()) 3631 return LowerToTLSEmulatedModel(Node, DAG); 3632 SDLoc DL(Node); 3633 const GlobalValue *GV = Node->getGlobal(); 3634 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3635 TLSModel::Model model = DAG.getTarget().getTLSModel(GV); 3636 3637 if (DAG.getMachineFunction().getFunction().getCallingConv() == 3638 CallingConv::GHC) 3639 report_fatal_error("In GHC calling convention TLS is not supported"); 3640 3641 SDValue TP = lowerThreadPointer(DL, DAG); 3642 3643 // Get the offset of GA from the thread pointer, based on the TLS model. 3644 SDValue Offset; 3645 switch (model) { 3646 case TLSModel::GeneralDynamic: { 3647 // Load the GOT offset of the tls_index (module ID / per-symbol offset). 3648 SystemZConstantPoolValue *CPV = 3649 SystemZConstantPoolValue::Create(GV, SystemZCP::TLSGD); 3650 3651 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8)); 3652 Offset = DAG.getLoad( 3653 PtrVT, DL, DAG.getEntryNode(), Offset, 3654 MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); 3655 3656 // Call __tls_get_offset to retrieve the offset. 3657 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset); 3658 break; 3659 } 3660 3661 case TLSModel::LocalDynamic: { 3662 // Load the GOT offset of the module ID. 3663 SystemZConstantPoolValue *CPV = 3664 SystemZConstantPoolValue::Create(GV, SystemZCP::TLSLDM); 3665 3666 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8)); 3667 Offset = DAG.getLoad( 3668 PtrVT, DL, DAG.getEntryNode(), Offset, 3669 MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); 3670 3671 // Call __tls_get_offset to retrieve the module base offset. 3672 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset); 3673 3674 // Note: The SystemZLDCleanupPass will remove redundant computations 3675 // of the module base offset. Count total number of local-dynamic 3676 // accesses to trigger execution of that pass. 3677 SystemZMachineFunctionInfo* MFI = 3678 DAG.getMachineFunction().getInfo<SystemZMachineFunctionInfo>(); 3679 MFI->incNumLocalDynamicTLSAccesses(); 3680 3681 // Add the per-symbol offset. 3682 CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::DTPOFF); 3683 3684 SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8)); 3685 DTPOffset = DAG.getLoad( 3686 PtrVT, DL, DAG.getEntryNode(), DTPOffset, 3687 MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); 3688 3689 Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset); 3690 break; 3691 } 3692 3693 case TLSModel::InitialExec: { 3694 // Load the offset from the GOT. 3695 Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 3696 SystemZII::MO_INDNTPOFF); 3697 Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset); 3698 Offset = 3699 DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset, 3700 MachinePointerInfo::getGOT(DAG.getMachineFunction())); 3701 break; 3702 } 3703 3704 case TLSModel::LocalExec: { 3705 // Force the offset into the constant pool and load it from there. 3706 SystemZConstantPoolValue *CPV = 3707 SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF); 3708 3709 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8)); 3710 Offset = DAG.getLoad( 3711 PtrVT, DL, DAG.getEntryNode(), Offset, 3712 MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); 3713 break; 3714 } 3715 } 3716 3717 // Add the base and offset together. 3718 return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset); 3719 } 3720 3721 SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node, 3722 SelectionDAG &DAG) const { 3723 SDLoc DL(Node); 3724 const BlockAddress *BA = Node->getBlockAddress(); 3725 int64_t Offset = Node->getOffset(); 3726 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3727 3728 SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset); 3729 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); 3730 return Result; 3731 } 3732 3733 SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT, 3734 SelectionDAG &DAG) const { 3735 SDLoc DL(JT); 3736 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3737 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); 3738 3739 // Use LARL to load the address of the table. 3740 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); 3741 } 3742 3743 SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP, 3744 SelectionDAG &DAG) const { 3745 SDLoc DL(CP); 3746 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3747 3748 SDValue Result; 3749 if (CP->isMachineConstantPoolEntry()) 3750 Result = 3751 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign()); 3752 else 3753 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(), 3754 CP->getOffset()); 3755 3756 // Use LARL to load the address of the constant pool entry. 3757 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); 3758 } 3759 3760 SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op, 3761 SelectionDAG &DAG) const { 3762 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>(); 3763 MachineFunction &MF = DAG.getMachineFunction(); 3764 MachineFrameInfo &MFI = MF.getFrameInfo(); 3765 MFI.setFrameAddressIsTaken(true); 3766 3767 SDLoc DL(Op); 3768 unsigned Depth = Op.getConstantOperandVal(0); 3769 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3770 3771 // By definition, the frame address is the address of the back chain. (In 3772 // the case of packed stack without backchain, return the address where the 3773 // backchain would have been stored. This will either be an unused space or 3774 // contain a saved register). 3775 int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF); 3776 SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT); 3777 3778 if (Depth > 0) { 3779 // FIXME The frontend should detect this case. 3780 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain()) 3781 report_fatal_error("Unsupported stack frame traversal count"); 3782 3783 SDValue Offset = DAG.getConstant(TFL->getBackchainOffset(MF), DL, PtrVT); 3784 while (Depth--) { 3785 BackChain = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), BackChain, 3786 MachinePointerInfo()); 3787 BackChain = DAG.getNode(ISD::ADD, DL, PtrVT, BackChain, Offset); 3788 } 3789 } 3790 3791 return BackChain; 3792 } 3793 3794 SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op, 3795 SelectionDAG &DAG) const { 3796 MachineFunction &MF = DAG.getMachineFunction(); 3797 MachineFrameInfo &MFI = MF.getFrameInfo(); 3798 MFI.setReturnAddressIsTaken(true); 3799 3800 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 3801 return SDValue(); 3802 3803 SDLoc DL(Op); 3804 unsigned Depth = Op.getConstantOperandVal(0); 3805 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3806 3807 if (Depth > 0) { 3808 // FIXME The frontend should detect this case. 3809 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain()) 3810 report_fatal_error("Unsupported stack frame traversal count"); 3811 3812 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG); 3813 const auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>(); 3814 int Offset = TFL->getReturnAddressOffset(MF); 3815 SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, FrameAddr, 3816 DAG.getConstant(Offset, DL, PtrVT)); 3817 return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, 3818 MachinePointerInfo()); 3819 } 3820 3821 // Return R14D (Elf) / R7D (XPLINK), which has the return address. Mark it an 3822 // implicit live-in. 3823 SystemZCallingConventionRegisters *CCR = Subtarget.getSpecialRegisters(); 3824 Register LinkReg = MF.addLiveIn(CCR->getReturnFunctionAddressRegister(), 3825 &SystemZ::GR64BitRegClass); 3826 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT); 3827 } 3828 3829 SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op, 3830 SelectionDAG &DAG) const { 3831 SDLoc DL(Op); 3832 SDValue In = Op.getOperand(0); 3833 EVT InVT = In.getValueType(); 3834 EVT ResVT = Op.getValueType(); 3835 3836 // Convert loads directly. This is normally done by DAGCombiner, 3837 // but we need this case for bitcasts that are created during lowering 3838 // and which are then lowered themselves. 3839 if (auto *LoadN = dyn_cast<LoadSDNode>(In)) 3840 if (ISD::isNormalLoad(LoadN)) { 3841 SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(), 3842 LoadN->getBasePtr(), LoadN->getMemOperand()); 3843 // Update the chain uses. 3844 DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1)); 3845 return NewLoad; 3846 } 3847 3848 if (InVT == MVT::i32 && ResVT == MVT::f32) { 3849 SDValue In64; 3850 if (Subtarget.hasHighWord()) { 3851 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, 3852 MVT::i64); 3853 In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL, 3854 MVT::i64, SDValue(U64, 0), In); 3855 } else { 3856 In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In); 3857 In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64, 3858 DAG.getConstant(32, DL, MVT::i64)); 3859 } 3860 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64); 3861 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, 3862 DL, MVT::f32, Out64); 3863 } 3864 if (InVT == MVT::f32 && ResVT == MVT::i32) { 3865 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64); 3866 SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL, 3867 MVT::f64, SDValue(U64, 0), In); 3868 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64); 3869 if (Subtarget.hasHighWord()) 3870 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL, 3871 MVT::i32, Out64); 3872 SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64, 3873 DAG.getConstant(32, DL, MVT::i64)); 3874 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift); 3875 } 3876 llvm_unreachable("Unexpected bitcast combination"); 3877 } 3878 3879 SDValue SystemZTargetLowering::lowerVASTART(SDValue Op, 3880 SelectionDAG &DAG) const { 3881 3882 if (Subtarget.isTargetXPLINK64()) 3883 return lowerVASTART_XPLINK(Op, DAG); 3884 else 3885 return lowerVASTART_ELF(Op, DAG); 3886 } 3887 3888 SDValue SystemZTargetLowering::lowerVASTART_XPLINK(SDValue Op, 3889 SelectionDAG &DAG) const { 3890 MachineFunction &MF = DAG.getMachineFunction(); 3891 SystemZMachineFunctionInfo *FuncInfo = 3892 MF.getInfo<SystemZMachineFunctionInfo>(); 3893 3894 SDLoc DL(Op); 3895 3896 // vastart just stores the address of the VarArgsFrameIndex slot into the 3897 // memory location argument. 3898 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3899 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 3900 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 3901 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1), 3902 MachinePointerInfo(SV)); 3903 } 3904 3905 SDValue SystemZTargetLowering::lowerVASTART_ELF(SDValue Op, 3906 SelectionDAG &DAG) const { 3907 MachineFunction &MF = DAG.getMachineFunction(); 3908 SystemZMachineFunctionInfo *FuncInfo = 3909 MF.getInfo<SystemZMachineFunctionInfo>(); 3910 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3911 3912 SDValue Chain = Op.getOperand(0); 3913 SDValue Addr = Op.getOperand(1); 3914 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 3915 SDLoc DL(Op); 3916 3917 // The initial values of each field. 3918 const unsigned NumFields = 4; 3919 SDValue Fields[NumFields] = { 3920 DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT), 3921 DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT), 3922 DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT), 3923 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT) 3924 }; 3925 3926 // Store each field into its respective slot. 3927 SDValue MemOps[NumFields]; 3928 unsigned Offset = 0; 3929 for (unsigned I = 0; I < NumFields; ++I) { 3930 SDValue FieldAddr = Addr; 3931 if (Offset != 0) 3932 FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr, 3933 DAG.getIntPtrConstant(Offset, DL)); 3934 MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr, 3935 MachinePointerInfo(SV, Offset)); 3936 Offset += 8; 3937 } 3938 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps); 3939 } 3940 3941 SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op, 3942 SelectionDAG &DAG) const { 3943 SDValue Chain = Op.getOperand(0); 3944 SDValue DstPtr = Op.getOperand(1); 3945 SDValue SrcPtr = Op.getOperand(2); 3946 const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue(); 3947 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); 3948 SDLoc DL(Op); 3949 3950 uint32_t Sz = 3951 Subtarget.isTargetXPLINK64() ? getTargetMachine().getPointerSize(0) : 32; 3952 return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(Sz, DL), 3953 Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false, 3954 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(DstSV), 3955 MachinePointerInfo(SrcSV)); 3956 } 3957 3958 SDValue 3959 SystemZTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op, 3960 SelectionDAG &DAG) const { 3961 if (Subtarget.isTargetXPLINK64()) 3962 return lowerDYNAMIC_STACKALLOC_XPLINK(Op, DAG); 3963 else 3964 return lowerDYNAMIC_STACKALLOC_ELF(Op, DAG); 3965 } 3966 3967 SDValue 3968 SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op, 3969 SelectionDAG &DAG) const { 3970 const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); 3971 MachineFunction &MF = DAG.getMachineFunction(); 3972 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack"); 3973 SDValue Chain = Op.getOperand(0); 3974 SDValue Size = Op.getOperand(1); 3975 SDValue Align = Op.getOperand(2); 3976 SDLoc DL(Op); 3977 3978 // If user has set the no alignment function attribute, ignore 3979 // alloca alignments. 3980 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0); 3981 3982 uint64_t StackAlign = TFI->getStackAlignment(); 3983 uint64_t RequiredAlign = std::max(AlignVal, StackAlign); 3984 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign; 3985 3986 SDValue NeededSpace = Size; 3987 3988 // Add extra space for alignment if needed. 3989 EVT PtrVT = getPointerTy(MF.getDataLayout()); 3990 if (ExtraAlignSpace) 3991 NeededSpace = DAG.getNode(ISD::ADD, DL, PtrVT, NeededSpace, 3992 DAG.getConstant(ExtraAlignSpace, DL, PtrVT)); 3993 3994 bool IsSigned = false; 3995 bool DoesNotReturn = false; 3996 bool IsReturnValueUsed = false; 3997 EVT VT = Op.getValueType(); 3998 SDValue AllocaCall = 3999 makeExternalCall(Chain, DAG, "@@ALCAXP", VT, ArrayRef(NeededSpace), 4000 CallingConv::C, IsSigned, DL, DoesNotReturn, 4001 IsReturnValueUsed) 4002 .first; 4003 4004 // Perform a CopyFromReg from %GPR4 (stack pointer register). Chain and Glue 4005 // to end of call in order to ensure it isn't broken up from the call 4006 // sequence. 4007 auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>(); 4008 Register SPReg = Regs.getStackPointerRegister(); 4009 Chain = AllocaCall.getValue(1); 4010 SDValue Glue = AllocaCall.getValue(2); 4011 SDValue NewSPRegNode = DAG.getCopyFromReg(Chain, DL, SPReg, PtrVT, Glue); 4012 Chain = NewSPRegNode.getValue(1); 4013 4014 MVT PtrMVT = getPointerMemTy(MF.getDataLayout()); 4015 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, PtrMVT); 4016 SDValue Result = DAG.getNode(ISD::ADD, DL, PtrMVT, NewSPRegNode, ArgAdjust); 4017 4018 // Dynamically realign if needed. 4019 if (ExtraAlignSpace) { 4020 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result, 4021 DAG.getConstant(ExtraAlignSpace, DL, PtrVT)); 4022 Result = DAG.getNode(ISD::AND, DL, PtrVT, Result, 4023 DAG.getConstant(~(RequiredAlign - 1), DL, PtrVT)); 4024 } 4025 4026 SDValue Ops[2] = {Result, Chain}; 4027 return DAG.getMergeValues(Ops, DL); 4028 } 4029 4030 SDValue 4031 SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_ELF(SDValue Op, 4032 SelectionDAG &DAG) const { 4033 const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); 4034 MachineFunction &MF = DAG.getMachineFunction(); 4035 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack"); 4036 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain(); 4037 4038 SDValue Chain = Op.getOperand(0); 4039 SDValue Size = Op.getOperand(1); 4040 SDValue Align = Op.getOperand(2); 4041 SDLoc DL(Op); 4042 4043 // If user has set the no alignment function attribute, ignore 4044 // alloca alignments. 4045 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0); 4046 4047 uint64_t StackAlign = TFI->getStackAlignment(); 4048 uint64_t RequiredAlign = std::max(AlignVal, StackAlign); 4049 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign; 4050 4051 Register SPReg = getStackPointerRegisterToSaveRestore(); 4052 SDValue NeededSpace = Size; 4053 4054 // Get a reference to the stack pointer. 4055 SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64); 4056 4057 // If we need a backchain, save it now. 4058 SDValue Backchain; 4059 if (StoreBackchain) 4060 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG), 4061 MachinePointerInfo()); 4062 4063 // Add extra space for alignment if needed. 4064 if (ExtraAlignSpace) 4065 NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace, 4066 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64)); 4067 4068 // Get the new stack pointer value. 4069 SDValue NewSP; 4070 if (hasInlineStackProbe(MF)) { 4071 NewSP = DAG.getNode(SystemZISD::PROBED_ALLOCA, DL, 4072 DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace); 4073 Chain = NewSP.getValue(1); 4074 } 4075 else { 4076 NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace); 4077 // Copy the new stack pointer back. 4078 Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP); 4079 } 4080 4081 // The allocated data lives above the 160 bytes allocated for the standard 4082 // frame, plus any outgoing stack arguments. We don't know how much that 4083 // amounts to yet, so emit a special ADJDYNALLOC placeholder. 4084 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64); 4085 SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust); 4086 4087 // Dynamically realign if needed. 4088 if (RequiredAlign > StackAlign) { 4089 Result = 4090 DAG.getNode(ISD::ADD, DL, MVT::i64, Result, 4091 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64)); 4092 Result = 4093 DAG.getNode(ISD::AND, DL, MVT::i64, Result, 4094 DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64)); 4095 } 4096 4097 if (StoreBackchain) 4098 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG), 4099 MachinePointerInfo()); 4100 4101 SDValue Ops[2] = { Result, Chain }; 4102 return DAG.getMergeValues(Ops, DL); 4103 } 4104 4105 SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET( 4106 SDValue Op, SelectionDAG &DAG) const { 4107 SDLoc DL(Op); 4108 4109 return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64); 4110 } 4111 4112 SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op, 4113 SelectionDAG &DAG) const { 4114 EVT VT = Op.getValueType(); 4115 SDLoc DL(Op); 4116 SDValue Ops[2]; 4117 if (is32Bit(VT)) 4118 // Just do a normal 64-bit multiplication and extract the results. 4119 // We define this so that it can be used for constant division. 4120 lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0), 4121 Op.getOperand(1), Ops[1], Ops[0]); 4122 else if (Subtarget.hasMiscellaneousExtensions2()) 4123 // SystemZISD::SMUL_LOHI returns the low result in the odd register and 4124 // the high result in the even register. ISD::SMUL_LOHI is defined to 4125 // return the low half first, so the results are in reverse order. 4126 lowerGR128Binary(DAG, DL, VT, SystemZISD::SMUL_LOHI, 4127 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); 4128 else { 4129 // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI: 4130 // 4131 // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64) 4132 // 4133 // but using the fact that the upper halves are either all zeros 4134 // or all ones: 4135 // 4136 // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64) 4137 // 4138 // and grouping the right terms together since they are quicker than the 4139 // multiplication: 4140 // 4141 // (ll * rl) - (((lh & rl) + (ll & rh)) << 64) 4142 SDValue C63 = DAG.getConstant(63, DL, MVT::i64); 4143 SDValue LL = Op.getOperand(0); 4144 SDValue RL = Op.getOperand(1); 4145 SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63); 4146 SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63); 4147 // SystemZISD::UMUL_LOHI returns the low result in the odd register and 4148 // the high result in the even register. ISD::SMUL_LOHI is defined to 4149 // return the low half first, so the results are in reverse order. 4150 lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI, 4151 LL, RL, Ops[1], Ops[0]); 4152 SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH); 4153 SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL); 4154 SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL); 4155 Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum); 4156 } 4157 return DAG.getMergeValues(Ops, DL); 4158 } 4159 4160 SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op, 4161 SelectionDAG &DAG) const { 4162 EVT VT = Op.getValueType(); 4163 SDLoc DL(Op); 4164 SDValue Ops[2]; 4165 if (is32Bit(VT)) 4166 // Just do a normal 64-bit multiplication and extract the results. 4167 // We define this so that it can be used for constant division. 4168 lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0), 4169 Op.getOperand(1), Ops[1], Ops[0]); 4170 else 4171 // SystemZISD::UMUL_LOHI returns the low result in the odd register and 4172 // the high result in the even register. ISD::UMUL_LOHI is defined to 4173 // return the low half first, so the results are in reverse order. 4174 lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI, 4175 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); 4176 return DAG.getMergeValues(Ops, DL); 4177 } 4178 4179 SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op, 4180 SelectionDAG &DAG) const { 4181 SDValue Op0 = Op.getOperand(0); 4182 SDValue Op1 = Op.getOperand(1); 4183 EVT VT = Op.getValueType(); 4184 SDLoc DL(Op); 4185 4186 // We use DSGF for 32-bit division. This means the first operand must 4187 // always be 64-bit, and the second operand should be 32-bit whenever 4188 // that is possible, to improve performance. 4189 if (is32Bit(VT)) 4190 Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0); 4191 else if (DAG.ComputeNumSignBits(Op1) > 32) 4192 Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1); 4193 4194 // DSG(F) returns the remainder in the even register and the 4195 // quotient in the odd register. 4196 SDValue Ops[2]; 4197 lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]); 4198 return DAG.getMergeValues(Ops, DL); 4199 } 4200 4201 SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op, 4202 SelectionDAG &DAG) const { 4203 EVT VT = Op.getValueType(); 4204 SDLoc DL(Op); 4205 4206 // DL(G) returns the remainder in the even register and the 4207 // quotient in the odd register. 4208 SDValue Ops[2]; 4209 lowerGR128Binary(DAG, DL, VT, SystemZISD::UDIVREM, 4210 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); 4211 return DAG.getMergeValues(Ops, DL); 4212 } 4213 4214 SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const { 4215 assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation"); 4216 4217 // Get the known-zero masks for each operand. 4218 SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)}; 4219 KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]), 4220 DAG.computeKnownBits(Ops[1])}; 4221 4222 // See if the upper 32 bits of one operand and the lower 32 bits of the 4223 // other are known zero. They are the low and high operands respectively. 4224 uint64_t Masks[] = { Known[0].Zero.getZExtValue(), 4225 Known[1].Zero.getZExtValue() }; 4226 unsigned High, Low; 4227 if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff) 4228 High = 1, Low = 0; 4229 else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff) 4230 High = 0, Low = 1; 4231 else 4232 return Op; 4233 4234 SDValue LowOp = Ops[Low]; 4235 SDValue HighOp = Ops[High]; 4236 4237 // If the high part is a constant, we're better off using IILH. 4238 if (HighOp.getOpcode() == ISD::Constant) 4239 return Op; 4240 4241 // If the low part is a constant that is outside the range of LHI, 4242 // then we're better off using IILF. 4243 if (LowOp.getOpcode() == ISD::Constant) { 4244 int64_t Value = int32_t(LowOp->getAsZExtVal()); 4245 if (!isInt<16>(Value)) 4246 return Op; 4247 } 4248 4249 // Check whether the high part is an AND that doesn't change the 4250 // high 32 bits and just masks out low bits. We can skip it if so. 4251 if (HighOp.getOpcode() == ISD::AND && 4252 HighOp.getOperand(1).getOpcode() == ISD::Constant) { 4253 SDValue HighOp0 = HighOp.getOperand(0); 4254 uint64_t Mask = HighOp.getConstantOperandVal(1); 4255 if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff)))) 4256 HighOp = HighOp0; 4257 } 4258 4259 // Take advantage of the fact that all GR32 operations only change the 4260 // low 32 bits by truncating Low to an i32 and inserting it directly 4261 // using a subreg. The interesting cases are those where the truncation 4262 // can be folded. 4263 SDLoc DL(Op); 4264 SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp); 4265 return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL, 4266 MVT::i64, HighOp, Low32); 4267 } 4268 4269 // Lower SADDO/SSUBO/UADDO/USUBO nodes. 4270 SDValue SystemZTargetLowering::lowerXALUO(SDValue Op, 4271 SelectionDAG &DAG) const { 4272 SDNode *N = Op.getNode(); 4273 SDValue LHS = N->getOperand(0); 4274 SDValue RHS = N->getOperand(1); 4275 SDLoc DL(N); 4276 4277 if (N->getValueType(0) == MVT::i128) { 4278 unsigned BaseOp = 0; 4279 unsigned FlagOp = 0; 4280 bool IsBorrow = false; 4281 switch (Op.getOpcode()) { 4282 default: llvm_unreachable("Unknown instruction!"); 4283 case ISD::UADDO: 4284 BaseOp = ISD::ADD; 4285 FlagOp = SystemZISD::VACC; 4286 break; 4287 case ISD::USUBO: 4288 BaseOp = ISD::SUB; 4289 FlagOp = SystemZISD::VSCBI; 4290 IsBorrow = true; 4291 break; 4292 } 4293 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS); 4294 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS); 4295 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag, 4296 DAG.getValueType(MVT::i1)); 4297 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1)); 4298 if (IsBorrow) 4299 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(), 4300 Flag, DAG.getConstant(1, DL, Flag.getValueType())); 4301 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag); 4302 } 4303 4304 unsigned BaseOp = 0; 4305 unsigned CCValid = 0; 4306 unsigned CCMask = 0; 4307 4308 switch (Op.getOpcode()) { 4309 default: llvm_unreachable("Unknown instruction!"); 4310 case ISD::SADDO: 4311 BaseOp = SystemZISD::SADDO; 4312 CCValid = SystemZ::CCMASK_ARITH; 4313 CCMask = SystemZ::CCMASK_ARITH_OVERFLOW; 4314 break; 4315 case ISD::SSUBO: 4316 BaseOp = SystemZISD::SSUBO; 4317 CCValid = SystemZ::CCMASK_ARITH; 4318 CCMask = SystemZ::CCMASK_ARITH_OVERFLOW; 4319 break; 4320 case ISD::UADDO: 4321 BaseOp = SystemZISD::UADDO; 4322 CCValid = SystemZ::CCMASK_LOGICAL; 4323 CCMask = SystemZ::CCMASK_LOGICAL_CARRY; 4324 break; 4325 case ISD::USUBO: 4326 BaseOp = SystemZISD::USUBO; 4327 CCValid = SystemZ::CCMASK_LOGICAL; 4328 CCMask = SystemZ::CCMASK_LOGICAL_BORROW; 4329 break; 4330 } 4331 4332 SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32); 4333 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS); 4334 4335 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask); 4336 if (N->getValueType(1) == MVT::i1) 4337 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC); 4338 4339 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC); 4340 } 4341 4342 static bool isAddCarryChain(SDValue Carry) { 4343 while (Carry.getOpcode() == ISD::UADDO_CARRY) 4344 Carry = Carry.getOperand(2); 4345 return Carry.getOpcode() == ISD::UADDO; 4346 } 4347 4348 static bool isSubBorrowChain(SDValue Carry) { 4349 while (Carry.getOpcode() == ISD::USUBO_CARRY) 4350 Carry = Carry.getOperand(2); 4351 return Carry.getOpcode() == ISD::USUBO; 4352 } 4353 4354 // Lower UADDO_CARRY/USUBO_CARRY nodes. 4355 SDValue SystemZTargetLowering::lowerUADDSUBO_CARRY(SDValue Op, 4356 SelectionDAG &DAG) const { 4357 4358 SDNode *N = Op.getNode(); 4359 MVT VT = N->getSimpleValueType(0); 4360 4361 // Let legalize expand this if it isn't a legal type yet. 4362 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) 4363 return SDValue(); 4364 4365 SDValue LHS = N->getOperand(0); 4366 SDValue RHS = N->getOperand(1); 4367 SDValue Carry = Op.getOperand(2); 4368 SDLoc DL(N); 4369 4370 if (VT == MVT::i128) { 4371 unsigned BaseOp = 0; 4372 unsigned FlagOp = 0; 4373 bool IsBorrow = false; 4374 switch (Op.getOpcode()) { 4375 default: llvm_unreachable("Unknown instruction!"); 4376 case ISD::UADDO_CARRY: 4377 BaseOp = SystemZISD::VAC; 4378 FlagOp = SystemZISD::VACCC; 4379 break; 4380 case ISD::USUBO_CARRY: 4381 BaseOp = SystemZISD::VSBI; 4382 FlagOp = SystemZISD::VSBCBI; 4383 IsBorrow = true; 4384 break; 4385 } 4386 if (IsBorrow) 4387 Carry = DAG.getNode(ISD::XOR, DL, Carry.getValueType(), 4388 Carry, DAG.getConstant(1, DL, Carry.getValueType())); 4389 Carry = DAG.getZExtOrTrunc(Carry, DL, MVT::i128); 4390 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS, Carry); 4391 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS, Carry); 4392 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag, 4393 DAG.getValueType(MVT::i1)); 4394 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1)); 4395 if (IsBorrow) 4396 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(), 4397 Flag, DAG.getConstant(1, DL, Flag.getValueType())); 4398 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag); 4399 } 4400 4401 unsigned BaseOp = 0; 4402 unsigned CCValid = 0; 4403 unsigned CCMask = 0; 4404 4405 switch (Op.getOpcode()) { 4406 default: llvm_unreachable("Unknown instruction!"); 4407 case ISD::UADDO_CARRY: 4408 if (!isAddCarryChain(Carry)) 4409 return SDValue(); 4410 4411 BaseOp = SystemZISD::ADDCARRY; 4412 CCValid = SystemZ::CCMASK_LOGICAL; 4413 CCMask = SystemZ::CCMASK_LOGICAL_CARRY; 4414 break; 4415 case ISD::USUBO_CARRY: 4416 if (!isSubBorrowChain(Carry)) 4417 return SDValue(); 4418 4419 BaseOp = SystemZISD::SUBCARRY; 4420 CCValid = SystemZ::CCMASK_LOGICAL; 4421 CCMask = SystemZ::CCMASK_LOGICAL_BORROW; 4422 break; 4423 } 4424 4425 // Set the condition code from the carry flag. 4426 Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry, 4427 DAG.getConstant(CCValid, DL, MVT::i32), 4428 DAG.getConstant(CCMask, DL, MVT::i32)); 4429 4430 SDVTList VTs = DAG.getVTList(VT, MVT::i32); 4431 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry); 4432 4433 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask); 4434 if (N->getValueType(1) == MVT::i1) 4435 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC); 4436 4437 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC); 4438 } 4439 4440 SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op, 4441 SelectionDAG &DAG) const { 4442 EVT VT = Op.getValueType(); 4443 SDLoc DL(Op); 4444 Op = Op.getOperand(0); 4445 4446 if (VT.getScalarSizeInBits() == 128) { 4447 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op); 4448 Op = DAG.getNode(ISD::CTPOP, DL, MVT::v2i64, Op); 4449 SDValue Tmp = DAG.getSplatBuildVector(MVT::v2i64, DL, 4450 DAG.getConstant(0, DL, MVT::i64)); 4451 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp); 4452 return Op; 4453 } 4454 4455 // Handle vector types via VPOPCT. 4456 if (VT.isVector()) { 4457 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op); 4458 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op); 4459 switch (VT.getScalarSizeInBits()) { 4460 case 8: 4461 break; 4462 case 16: { 4463 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op); 4464 SDValue Shift = DAG.getConstant(8, DL, MVT::i32); 4465 SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift); 4466 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp); 4467 Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift); 4468 break; 4469 } 4470 case 32: { 4471 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL, 4472 DAG.getConstant(0, DL, MVT::i32)); 4473 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp); 4474 break; 4475 } 4476 case 64: { 4477 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL, 4478 DAG.getConstant(0, DL, MVT::i32)); 4479 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp); 4480 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp); 4481 break; 4482 } 4483 default: 4484 llvm_unreachable("Unexpected type"); 4485 } 4486 return Op; 4487 } 4488 4489 // Get the known-zero mask for the operand. 4490 KnownBits Known = DAG.computeKnownBits(Op); 4491 unsigned NumSignificantBits = Known.getMaxValue().getActiveBits(); 4492 if (NumSignificantBits == 0) 4493 return DAG.getConstant(0, DL, VT); 4494 4495 // Skip known-zero high parts of the operand. 4496 int64_t OrigBitSize = VT.getSizeInBits(); 4497 int64_t BitSize = llvm::bit_ceil(NumSignificantBits); 4498 BitSize = std::min(BitSize, OrigBitSize); 4499 4500 // The POPCNT instruction counts the number of bits in each byte. 4501 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op); 4502 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op); 4503 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op); 4504 4505 // Add up per-byte counts in a binary tree. All bits of Op at 4506 // position larger than BitSize remain zero throughout. 4507 for (int64_t I = BitSize / 2; I >= 8; I = I / 2) { 4508 SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT)); 4509 if (BitSize != OrigBitSize) 4510 Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp, 4511 DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT)); 4512 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp); 4513 } 4514 4515 // Extract overall result from high byte. 4516 if (BitSize > 8) 4517 Op = DAG.getNode(ISD::SRL, DL, VT, Op, 4518 DAG.getConstant(BitSize - 8, DL, VT)); 4519 4520 return Op; 4521 } 4522 4523 SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op, 4524 SelectionDAG &DAG) const { 4525 SDLoc DL(Op); 4526 AtomicOrdering FenceOrdering = 4527 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1)); 4528 SyncScope::ID FenceSSID = 4529 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2)); 4530 4531 // The only fence that needs an instruction is a sequentially-consistent 4532 // cross-thread fence. 4533 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent && 4534 FenceSSID == SyncScope::System) { 4535 return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other, 4536 Op.getOperand(0)), 4537 0); 4538 } 4539 4540 // MEMBARRIER is a compiler barrier; it codegens to a no-op. 4541 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0)); 4542 } 4543 4544 SDValue SystemZTargetLowering::lowerATOMIC_LDST_I128(SDValue Op, 4545 SelectionDAG &DAG) const { 4546 auto *Node = cast<AtomicSDNode>(Op.getNode()); 4547 assert( 4548 (Node->getMemoryVT() == MVT::i128 || Node->getMemoryVT() == MVT::f128) && 4549 "Only custom lowering i128 or f128."); 4550 // Use same code to handle both legal and non-legal i128 types. 4551 SmallVector<SDValue, 2> Results; 4552 LowerOperationWrapper(Node, Results, DAG); 4553 return DAG.getMergeValues(Results, SDLoc(Op)); 4554 } 4555 4556 // Prepare for a Compare And Swap for a subword operation. This needs to be 4557 // done in memory with 4 bytes at natural alignment. 4558 static void getCSAddressAndShifts(SDValue Addr, SelectionDAG &DAG, SDLoc DL, 4559 SDValue &AlignedAddr, SDValue &BitShift, 4560 SDValue &NegBitShift) { 4561 EVT PtrVT = Addr.getValueType(); 4562 EVT WideVT = MVT::i32; 4563 4564 // Get the address of the containing word. 4565 AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr, 4566 DAG.getConstant(-4, DL, PtrVT)); 4567 4568 // Get the number of bits that the word must be rotated left in order 4569 // to bring the field to the top bits of a GR32. 4570 BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr, 4571 DAG.getConstant(3, DL, PtrVT)); 4572 BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift); 4573 4574 // Get the complementing shift amount, for rotating a field in the top 4575 // bits back to its proper position. 4576 NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT, 4577 DAG.getConstant(0, DL, WideVT), BitShift); 4578 4579 } 4580 4581 // Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first 4582 // two into the fullword ATOMIC_LOADW_* operation given by Opcode. 4583 SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op, 4584 SelectionDAG &DAG, 4585 unsigned Opcode) const { 4586 auto *Node = cast<AtomicSDNode>(Op.getNode()); 4587 4588 // 32-bit operations need no special handling. 4589 EVT NarrowVT = Node->getMemoryVT(); 4590 EVT WideVT = MVT::i32; 4591 if (NarrowVT == WideVT) 4592 return Op; 4593 4594 int64_t BitSize = NarrowVT.getSizeInBits(); 4595 SDValue ChainIn = Node->getChain(); 4596 SDValue Addr = Node->getBasePtr(); 4597 SDValue Src2 = Node->getVal(); 4598 MachineMemOperand *MMO = Node->getMemOperand(); 4599 SDLoc DL(Node); 4600 4601 // Convert atomic subtracts of constants into additions. 4602 if (Opcode == SystemZISD::ATOMIC_LOADW_SUB) 4603 if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) { 4604 Opcode = SystemZISD::ATOMIC_LOADW_ADD; 4605 Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType()); 4606 } 4607 4608 SDValue AlignedAddr, BitShift, NegBitShift; 4609 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift); 4610 4611 // Extend the source operand to 32 bits and prepare it for the inner loop. 4612 // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other 4613 // operations require the source to be shifted in advance. (This shift 4614 // can be folded if the source is constant.) For AND and NAND, the lower 4615 // bits must be set, while for other opcodes they should be left clear. 4616 if (Opcode != SystemZISD::ATOMIC_SWAPW) 4617 Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2, 4618 DAG.getConstant(32 - BitSize, DL, WideVT)); 4619 if (Opcode == SystemZISD::ATOMIC_LOADW_AND || 4620 Opcode == SystemZISD::ATOMIC_LOADW_NAND) 4621 Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2, 4622 DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT)); 4623 4624 // Construct the ATOMIC_LOADW_* node. 4625 SDVTList VTList = DAG.getVTList(WideVT, MVT::Other); 4626 SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift, 4627 DAG.getConstant(BitSize, DL, WideVT) }; 4628 SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops, 4629 NarrowVT, MMO); 4630 4631 // Rotate the result of the final CS so that the field is in the lower 4632 // bits of a GR32, then truncate it. 4633 SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift, 4634 DAG.getConstant(BitSize, DL, WideVT)); 4635 SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift); 4636 4637 SDValue RetOps[2] = { Result, AtomicOp.getValue(1) }; 4638 return DAG.getMergeValues(RetOps, DL); 4639 } 4640 4641 // Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations into 4642 // ATOMIC_LOADW_SUBs and convert 32- and 64-bit operations into additions. 4643 SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op, 4644 SelectionDAG &DAG) const { 4645 auto *Node = cast<AtomicSDNode>(Op.getNode()); 4646 EVT MemVT = Node->getMemoryVT(); 4647 if (MemVT == MVT::i32 || MemVT == MVT::i64) { 4648 // A full-width operation: negate and use LAA(G). 4649 assert(Op.getValueType() == MemVT && "Mismatched VTs"); 4650 assert(Subtarget.hasInterlockedAccess1() && 4651 "Should have been expanded by AtomicExpand pass."); 4652 SDValue Src2 = Node->getVal(); 4653 SDLoc DL(Src2); 4654 SDValue NegSrc2 = 4655 DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT), Src2); 4656 return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT, 4657 Node->getChain(), Node->getBasePtr(), NegSrc2, 4658 Node->getMemOperand()); 4659 } 4660 4661 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB); 4662 } 4663 4664 // Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node. 4665 SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op, 4666 SelectionDAG &DAG) const { 4667 auto *Node = cast<AtomicSDNode>(Op.getNode()); 4668 SDValue ChainIn = Node->getOperand(0); 4669 SDValue Addr = Node->getOperand(1); 4670 SDValue CmpVal = Node->getOperand(2); 4671 SDValue SwapVal = Node->getOperand(3); 4672 MachineMemOperand *MMO = Node->getMemOperand(); 4673 SDLoc DL(Node); 4674 4675 if (Node->getMemoryVT() == MVT::i128) { 4676 // Use same code to handle both legal and non-legal i128 types. 4677 SmallVector<SDValue, 3> Results; 4678 LowerOperationWrapper(Node, Results, DAG); 4679 return DAG.getMergeValues(Results, DL); 4680 } 4681 4682 // We have native support for 32-bit and 64-bit compare and swap, but we 4683 // still need to expand extracting the "success" result from the CC. 4684 EVT NarrowVT = Node->getMemoryVT(); 4685 EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32; 4686 if (NarrowVT == WideVT) { 4687 SDVTList Tys = DAG.getVTList(WideVT, MVT::i32, MVT::Other); 4688 SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal }; 4689 SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP, 4690 DL, Tys, Ops, NarrowVT, MMO); 4691 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1), 4692 SystemZ::CCMASK_CS, SystemZ::CCMASK_CS_EQ); 4693 4694 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0)); 4695 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success); 4696 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2)); 4697 return SDValue(); 4698 } 4699 4700 // Convert 8-bit and 16-bit compare and swap to a loop, implemented 4701 // via a fullword ATOMIC_CMP_SWAPW operation. 4702 int64_t BitSize = NarrowVT.getSizeInBits(); 4703 4704 SDValue AlignedAddr, BitShift, NegBitShift; 4705 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift); 4706 4707 // Construct the ATOMIC_CMP_SWAPW node. 4708 SDVTList VTList = DAG.getVTList(WideVT, MVT::i32, MVT::Other); 4709 SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift, 4710 NegBitShift, DAG.getConstant(BitSize, DL, WideVT) }; 4711 SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL, 4712 VTList, Ops, NarrowVT, MMO); 4713 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1), 4714 SystemZ::CCMASK_ICMP, SystemZ::CCMASK_CMP_EQ); 4715 4716 // emitAtomicCmpSwapW() will zero extend the result (original value). 4717 SDValue OrigVal = DAG.getNode(ISD::AssertZext, DL, WideVT, AtomicOp.getValue(0), 4718 DAG.getValueType(NarrowVT)); 4719 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), OrigVal); 4720 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success); 4721 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2)); 4722 return SDValue(); 4723 } 4724 4725 MachineMemOperand::Flags 4726 SystemZTargetLowering::getTargetMMOFlags(const Instruction &I) const { 4727 // Because of how we convert atomic_load and atomic_store to normal loads and 4728 // stores in the DAG, we need to ensure that the MMOs are marked volatile 4729 // since DAGCombine hasn't been updated to account for atomic, but non 4730 // volatile loads. (See D57601) 4731 if (auto *SI = dyn_cast<StoreInst>(&I)) 4732 if (SI->isAtomic()) 4733 return MachineMemOperand::MOVolatile; 4734 if (auto *LI = dyn_cast<LoadInst>(&I)) 4735 if (LI->isAtomic()) 4736 return MachineMemOperand::MOVolatile; 4737 if (auto *AI = dyn_cast<AtomicRMWInst>(&I)) 4738 if (AI->isAtomic()) 4739 return MachineMemOperand::MOVolatile; 4740 if (auto *AI = dyn_cast<AtomicCmpXchgInst>(&I)) 4741 if (AI->isAtomic()) 4742 return MachineMemOperand::MOVolatile; 4743 return MachineMemOperand::MONone; 4744 } 4745 4746 SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op, 4747 SelectionDAG &DAG) const { 4748 MachineFunction &MF = DAG.getMachineFunction(); 4749 auto *Regs = Subtarget.getSpecialRegisters(); 4750 if (MF.getFunction().getCallingConv() == CallingConv::GHC) 4751 report_fatal_error("Variable-sized stack allocations are not supported " 4752 "in GHC calling convention"); 4753 return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op), 4754 Regs->getStackPointerRegister(), Op.getValueType()); 4755 } 4756 4757 SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op, 4758 SelectionDAG &DAG) const { 4759 MachineFunction &MF = DAG.getMachineFunction(); 4760 auto *Regs = Subtarget.getSpecialRegisters(); 4761 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain(); 4762 4763 if (MF.getFunction().getCallingConv() == CallingConv::GHC) 4764 report_fatal_error("Variable-sized stack allocations are not supported " 4765 "in GHC calling convention"); 4766 4767 SDValue Chain = Op.getOperand(0); 4768 SDValue NewSP = Op.getOperand(1); 4769 SDValue Backchain; 4770 SDLoc DL(Op); 4771 4772 if (StoreBackchain) { 4773 SDValue OldSP = DAG.getCopyFromReg( 4774 Chain, DL, Regs->getStackPointerRegister(), MVT::i64); 4775 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG), 4776 MachinePointerInfo()); 4777 } 4778 4779 Chain = DAG.getCopyToReg(Chain, DL, Regs->getStackPointerRegister(), NewSP); 4780 4781 if (StoreBackchain) 4782 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG), 4783 MachinePointerInfo()); 4784 4785 return Chain; 4786 } 4787 4788 SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op, 4789 SelectionDAG &DAG) const { 4790 bool IsData = Op.getConstantOperandVal(4); 4791 if (!IsData) 4792 // Just preserve the chain. 4793 return Op.getOperand(0); 4794 4795 SDLoc DL(Op); 4796 bool IsWrite = Op.getConstantOperandVal(2); 4797 unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ; 4798 auto *Node = cast<MemIntrinsicSDNode>(Op.getNode()); 4799 SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32), 4800 Op.getOperand(1)}; 4801 return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, DL, 4802 Node->getVTList(), Ops, 4803 Node->getMemoryVT(), Node->getMemOperand()); 4804 } 4805 4806 // Convert condition code in CCReg to an i32 value. 4807 static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg) { 4808 SDLoc DL(CCReg); 4809 SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg); 4810 return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM, 4811 DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32)); 4812 } 4813 4814 SDValue 4815 SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, 4816 SelectionDAG &DAG) const { 4817 unsigned Opcode, CCValid; 4818 if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) { 4819 assert(Op->getNumValues() == 2 && "Expected only CC result and chain"); 4820 SDNode *Node = emitIntrinsicWithCCAndChain(DAG, Op, Opcode); 4821 SDValue CC = getCCResult(DAG, SDValue(Node, 0)); 4822 DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC); 4823 return SDValue(); 4824 } 4825 4826 return SDValue(); 4827 } 4828 4829 SDValue 4830 SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, 4831 SelectionDAG &DAG) const { 4832 unsigned Opcode, CCValid; 4833 if (isIntrinsicWithCC(Op, Opcode, CCValid)) { 4834 SDNode *Node = emitIntrinsicWithCC(DAG, Op, Opcode); 4835 if (Op->getNumValues() == 1) 4836 return getCCResult(DAG, SDValue(Node, 0)); 4837 assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result"); 4838 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(), 4839 SDValue(Node, 0), getCCResult(DAG, SDValue(Node, 1))); 4840 } 4841 4842 unsigned Id = Op.getConstantOperandVal(0); 4843 switch (Id) { 4844 case Intrinsic::thread_pointer: 4845 return lowerThreadPointer(SDLoc(Op), DAG); 4846 4847 case Intrinsic::s390_vpdi: 4848 return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(), 4849 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); 4850 4851 case Intrinsic::s390_vperm: 4852 return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(), 4853 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); 4854 4855 case Intrinsic::s390_vuphb: 4856 case Intrinsic::s390_vuphh: 4857 case Intrinsic::s390_vuphf: 4858 return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(), 4859 Op.getOperand(1)); 4860 4861 case Intrinsic::s390_vuplhb: 4862 case Intrinsic::s390_vuplhh: 4863 case Intrinsic::s390_vuplhf: 4864 return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(), 4865 Op.getOperand(1)); 4866 4867 case Intrinsic::s390_vuplb: 4868 case Intrinsic::s390_vuplhw: 4869 case Intrinsic::s390_vuplf: 4870 return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(), 4871 Op.getOperand(1)); 4872 4873 case Intrinsic::s390_vupllb: 4874 case Intrinsic::s390_vupllh: 4875 case Intrinsic::s390_vupllf: 4876 return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(), 4877 Op.getOperand(1)); 4878 4879 case Intrinsic::s390_vsumb: 4880 case Intrinsic::s390_vsumh: 4881 case Intrinsic::s390_vsumgh: 4882 case Intrinsic::s390_vsumgf: 4883 case Intrinsic::s390_vsumqf: 4884 case Intrinsic::s390_vsumqg: 4885 return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(), 4886 Op.getOperand(1), Op.getOperand(2)); 4887 4888 case Intrinsic::s390_vaq: 4889 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(), 4890 Op.getOperand(1), Op.getOperand(2)); 4891 case Intrinsic::s390_vaccb: 4892 case Intrinsic::s390_vacch: 4893 case Intrinsic::s390_vaccf: 4894 case Intrinsic::s390_vaccg: 4895 case Intrinsic::s390_vaccq: 4896 return DAG.getNode(SystemZISD::VACC, SDLoc(Op), Op.getValueType(), 4897 Op.getOperand(1), Op.getOperand(2)); 4898 case Intrinsic::s390_vacq: 4899 return DAG.getNode(SystemZISD::VAC, SDLoc(Op), Op.getValueType(), 4900 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); 4901 case Intrinsic::s390_vacccq: 4902 return DAG.getNode(SystemZISD::VACCC, SDLoc(Op), Op.getValueType(), 4903 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); 4904 4905 case Intrinsic::s390_vsq: 4906 return DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(), 4907 Op.getOperand(1), Op.getOperand(2)); 4908 case Intrinsic::s390_vscbib: 4909 case Intrinsic::s390_vscbih: 4910 case Intrinsic::s390_vscbif: 4911 case Intrinsic::s390_vscbig: 4912 case Intrinsic::s390_vscbiq: 4913 return DAG.getNode(SystemZISD::VSCBI, SDLoc(Op), Op.getValueType(), 4914 Op.getOperand(1), Op.getOperand(2)); 4915 case Intrinsic::s390_vsbiq: 4916 return DAG.getNode(SystemZISD::VSBI, SDLoc(Op), Op.getValueType(), 4917 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); 4918 case Intrinsic::s390_vsbcbiq: 4919 return DAG.getNode(SystemZISD::VSBCBI, SDLoc(Op), Op.getValueType(), 4920 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); 4921 } 4922 4923 return SDValue(); 4924 } 4925 4926 namespace { 4927 // Says that SystemZISD operation Opcode can be used to perform the equivalent 4928 // of a VPERM with permute vector Bytes. If Opcode takes three operands, 4929 // Operand is the constant third operand, otherwise it is the number of 4930 // bytes in each element of the result. 4931 struct Permute { 4932 unsigned Opcode; 4933 unsigned Operand; 4934 unsigned char Bytes[SystemZ::VectorBytes]; 4935 }; 4936 } 4937 4938 static const Permute PermuteForms[] = { 4939 // VMRHG 4940 { SystemZISD::MERGE_HIGH, 8, 4941 { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } }, 4942 // VMRHF 4943 { SystemZISD::MERGE_HIGH, 4, 4944 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } }, 4945 // VMRHH 4946 { SystemZISD::MERGE_HIGH, 2, 4947 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } }, 4948 // VMRHB 4949 { SystemZISD::MERGE_HIGH, 1, 4950 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } }, 4951 // VMRLG 4952 { SystemZISD::MERGE_LOW, 8, 4953 { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } }, 4954 // VMRLF 4955 { SystemZISD::MERGE_LOW, 4, 4956 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } }, 4957 // VMRLH 4958 { SystemZISD::MERGE_LOW, 2, 4959 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } }, 4960 // VMRLB 4961 { SystemZISD::MERGE_LOW, 1, 4962 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } }, 4963 // VPKG 4964 { SystemZISD::PACK, 4, 4965 { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } }, 4966 // VPKF 4967 { SystemZISD::PACK, 2, 4968 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } }, 4969 // VPKH 4970 { SystemZISD::PACK, 1, 4971 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } }, 4972 // VPDI V1, V2, 4 (low half of V1, high half of V2) 4973 { SystemZISD::PERMUTE_DWORDS, 4, 4974 { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } }, 4975 // VPDI V1, V2, 1 (high half of V1, low half of V2) 4976 { SystemZISD::PERMUTE_DWORDS, 1, 4977 { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } } 4978 }; 4979 4980 // Called after matching a vector shuffle against a particular pattern. 4981 // Both the original shuffle and the pattern have two vector operands. 4982 // OpNos[0] is the operand of the original shuffle that should be used for 4983 // operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything. 4984 // OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and 4985 // set OpNo0 and OpNo1 to the shuffle operands that should actually be used 4986 // for operands 0 and 1 of the pattern. 4987 static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) { 4988 if (OpNos[0] < 0) { 4989 if (OpNos[1] < 0) 4990 return false; 4991 OpNo0 = OpNo1 = OpNos[1]; 4992 } else if (OpNos[1] < 0) { 4993 OpNo0 = OpNo1 = OpNos[0]; 4994 } else { 4995 OpNo0 = OpNos[0]; 4996 OpNo1 = OpNos[1]; 4997 } 4998 return true; 4999 } 5000 5001 // Bytes is a VPERM-like permute vector, except that -1 is used for 5002 // undefined bytes. Return true if the VPERM can be implemented using P. 5003 // When returning true set OpNo0 to the VPERM operand that should be 5004 // used for operand 0 of P and likewise OpNo1 for operand 1 of P. 5005 // 5006 // For example, if swapping the VPERM operands allows P to match, OpNo0 5007 // will be 1 and OpNo1 will be 0. If instead Bytes only refers to one 5008 // operand, but rewriting it to use two duplicated operands allows it to 5009 // match P, then OpNo0 and OpNo1 will be the same. 5010 static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P, 5011 unsigned &OpNo0, unsigned &OpNo1) { 5012 int OpNos[] = { -1, -1 }; 5013 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) { 5014 int Elt = Bytes[I]; 5015 if (Elt >= 0) { 5016 // Make sure that the two permute vectors use the same suboperand 5017 // byte number. Only the operand numbers (the high bits) are 5018 // allowed to differ. 5019 if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1)) 5020 return false; 5021 int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes; 5022 int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes; 5023 // Make sure that the operand mappings are consistent with previous 5024 // elements. 5025 if (OpNos[ModelOpNo] == 1 - RealOpNo) 5026 return false; 5027 OpNos[ModelOpNo] = RealOpNo; 5028 } 5029 } 5030 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1); 5031 } 5032 5033 // As above, but search for a matching permute. 5034 static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes, 5035 unsigned &OpNo0, unsigned &OpNo1) { 5036 for (auto &P : PermuteForms) 5037 if (matchPermute(Bytes, P, OpNo0, OpNo1)) 5038 return &P; 5039 return nullptr; 5040 } 5041 5042 // Bytes is a VPERM-like permute vector, except that -1 is used for 5043 // undefined bytes. This permute is an operand of an outer permute. 5044 // See whether redistributing the -1 bytes gives a shuffle that can be 5045 // implemented using P. If so, set Transform to a VPERM-like permute vector 5046 // that, when applied to the result of P, gives the original permute in Bytes. 5047 static bool matchDoublePermute(const SmallVectorImpl<int> &Bytes, 5048 const Permute &P, 5049 SmallVectorImpl<int> &Transform) { 5050 unsigned To = 0; 5051 for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) { 5052 int Elt = Bytes[From]; 5053 if (Elt < 0) 5054 // Byte number From of the result is undefined. 5055 Transform[From] = -1; 5056 else { 5057 while (P.Bytes[To] != Elt) { 5058 To += 1; 5059 if (To == SystemZ::VectorBytes) 5060 return false; 5061 } 5062 Transform[From] = To; 5063 } 5064 } 5065 return true; 5066 } 5067 5068 // As above, but search for a matching permute. 5069 static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes, 5070 SmallVectorImpl<int> &Transform) { 5071 for (auto &P : PermuteForms) 5072 if (matchDoublePermute(Bytes, P, Transform)) 5073 return &P; 5074 return nullptr; 5075 } 5076 5077 // Convert the mask of the given shuffle op into a byte-level mask, 5078 // as if it had type vNi8. 5079 static bool getVPermMask(SDValue ShuffleOp, 5080 SmallVectorImpl<int> &Bytes) { 5081 EVT VT = ShuffleOp.getValueType(); 5082 unsigned NumElements = VT.getVectorNumElements(); 5083 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize(); 5084 5085 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(ShuffleOp)) { 5086 Bytes.resize(NumElements * BytesPerElement, -1); 5087 for (unsigned I = 0; I < NumElements; ++I) { 5088 int Index = VSN->getMaskElt(I); 5089 if (Index >= 0) 5090 for (unsigned J = 0; J < BytesPerElement; ++J) 5091 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J; 5092 } 5093 return true; 5094 } 5095 if (SystemZISD::SPLAT == ShuffleOp.getOpcode() && 5096 isa<ConstantSDNode>(ShuffleOp.getOperand(1))) { 5097 unsigned Index = ShuffleOp.getConstantOperandVal(1); 5098 Bytes.resize(NumElements * BytesPerElement, -1); 5099 for (unsigned I = 0; I < NumElements; ++I) 5100 for (unsigned J = 0; J < BytesPerElement; ++J) 5101 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J; 5102 return true; 5103 } 5104 return false; 5105 } 5106 5107 // Bytes is a VPERM-like permute vector, except that -1 is used for 5108 // undefined bytes. See whether bytes [Start, Start + BytesPerElement) of 5109 // the result come from a contiguous sequence of bytes from one input. 5110 // Set Base to the selector for the first byte if so. 5111 static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start, 5112 unsigned BytesPerElement, int &Base) { 5113 Base = -1; 5114 for (unsigned I = 0; I < BytesPerElement; ++I) { 5115 if (Bytes[Start + I] >= 0) { 5116 unsigned Elem = Bytes[Start + I]; 5117 if (Base < 0) { 5118 Base = Elem - I; 5119 // Make sure the bytes would come from one input operand. 5120 if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size()) 5121 return false; 5122 } else if (unsigned(Base) != Elem - I) 5123 return false; 5124 } 5125 } 5126 return true; 5127 } 5128 5129 // Bytes is a VPERM-like permute vector, except that -1 is used for 5130 // undefined bytes. Return true if it can be performed using VSLDB. 5131 // When returning true, set StartIndex to the shift amount and OpNo0 5132 // and OpNo1 to the VPERM operands that should be used as the first 5133 // and second shift operand respectively. 5134 static bool isShlDoublePermute(const SmallVectorImpl<int> &Bytes, 5135 unsigned &StartIndex, unsigned &OpNo0, 5136 unsigned &OpNo1) { 5137 int OpNos[] = { -1, -1 }; 5138 int Shift = -1; 5139 for (unsigned I = 0; I < 16; ++I) { 5140 int Index = Bytes[I]; 5141 if (Index >= 0) { 5142 int ExpectedShift = (Index - I) % SystemZ::VectorBytes; 5143 int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes; 5144 int RealOpNo = unsigned(Index) / SystemZ::VectorBytes; 5145 if (Shift < 0) 5146 Shift = ExpectedShift; 5147 else if (Shift != ExpectedShift) 5148 return false; 5149 // Make sure that the operand mappings are consistent with previous 5150 // elements. 5151 if (OpNos[ModelOpNo] == 1 - RealOpNo) 5152 return false; 5153 OpNos[ModelOpNo] = RealOpNo; 5154 } 5155 } 5156 StartIndex = Shift; 5157 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1); 5158 } 5159 5160 // Create a node that performs P on operands Op0 and Op1, casting the 5161 // operands to the appropriate type. The type of the result is determined by P. 5162 static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL, 5163 const Permute &P, SDValue Op0, SDValue Op1) { 5164 // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input 5165 // elements of a PACK are twice as wide as the outputs. 5166 unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 : 5167 P.Opcode == SystemZISD::PACK ? P.Operand * 2 : 5168 P.Operand); 5169 // Cast both operands to the appropriate type. 5170 MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8), 5171 SystemZ::VectorBytes / InBytes); 5172 Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0); 5173 Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1); 5174 SDValue Op; 5175 if (P.Opcode == SystemZISD::PERMUTE_DWORDS) { 5176 SDValue Op2 = DAG.getTargetConstant(P.Operand, DL, MVT::i32); 5177 Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2); 5178 } else if (P.Opcode == SystemZISD::PACK) { 5179 MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8), 5180 SystemZ::VectorBytes / P.Operand); 5181 Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1); 5182 } else { 5183 Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1); 5184 } 5185 return Op; 5186 } 5187 5188 static bool isZeroVector(SDValue N) { 5189 if (N->getOpcode() == ISD::BITCAST) 5190 N = N->getOperand(0); 5191 if (N->getOpcode() == ISD::SPLAT_VECTOR) 5192 if (auto *Op = dyn_cast<ConstantSDNode>(N->getOperand(0))) 5193 return Op->getZExtValue() == 0; 5194 return ISD::isBuildVectorAllZeros(N.getNode()); 5195 } 5196 5197 // Return the index of the zero/undef vector, or UINT32_MAX if not found. 5198 static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num) { 5199 for (unsigned I = 0; I < Num ; I++) 5200 if (isZeroVector(Ops[I])) 5201 return I; 5202 return UINT32_MAX; 5203 } 5204 5205 // Bytes is a VPERM-like permute vector, except that -1 is used for 5206 // undefined bytes. Implement it on operands Ops[0] and Ops[1] using 5207 // VSLDB or VPERM. 5208 static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL, 5209 SDValue *Ops, 5210 const SmallVectorImpl<int> &Bytes) { 5211 for (unsigned I = 0; I < 2; ++I) 5212 Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]); 5213 5214 // First see whether VSLDB can be used. 5215 unsigned StartIndex, OpNo0, OpNo1; 5216 if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1)) 5217 return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0], 5218 Ops[OpNo1], 5219 DAG.getTargetConstant(StartIndex, DL, MVT::i32)); 5220 5221 // Fall back on VPERM. Construct an SDNode for the permute vector. Try to 5222 // eliminate a zero vector by reusing any zero index in the permute vector. 5223 unsigned ZeroVecIdx = findZeroVectorIdx(&Ops[0], 2); 5224 if (ZeroVecIdx != UINT32_MAX) { 5225 bool MaskFirst = true; 5226 int ZeroIdx = -1; 5227 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) { 5228 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes; 5229 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes; 5230 if (OpNo == ZeroVecIdx && I == 0) { 5231 // If the first byte is zero, use mask as first operand. 5232 ZeroIdx = 0; 5233 break; 5234 } 5235 if (OpNo != ZeroVecIdx && Byte == 0) { 5236 // If mask contains a zero, use it by placing that vector first. 5237 ZeroIdx = I + SystemZ::VectorBytes; 5238 MaskFirst = false; 5239 break; 5240 } 5241 } 5242 if (ZeroIdx != -1) { 5243 SDValue IndexNodes[SystemZ::VectorBytes]; 5244 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) { 5245 if (Bytes[I] >= 0) { 5246 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes; 5247 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes; 5248 if (OpNo == ZeroVecIdx) 5249 IndexNodes[I] = DAG.getConstant(ZeroIdx, DL, MVT::i32); 5250 else { 5251 unsigned BIdx = MaskFirst ? Byte + SystemZ::VectorBytes : Byte; 5252 IndexNodes[I] = DAG.getConstant(BIdx, DL, MVT::i32); 5253 } 5254 } else 5255 IndexNodes[I] = DAG.getUNDEF(MVT::i32); 5256 } 5257 SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes); 5258 SDValue Src = ZeroVecIdx == 0 ? Ops[1] : Ops[0]; 5259 if (MaskFirst) 5260 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Mask, Src, 5261 Mask); 5262 else 5263 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Src, Mask, 5264 Mask); 5265 } 5266 } 5267 5268 SDValue IndexNodes[SystemZ::VectorBytes]; 5269 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) 5270 if (Bytes[I] >= 0) 5271 IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32); 5272 else 5273 IndexNodes[I] = DAG.getUNDEF(MVT::i32); 5274 SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes); 5275 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], 5276 (!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2); 5277 } 5278 5279 namespace { 5280 // Describes a general N-operand vector shuffle. 5281 struct GeneralShuffle { 5282 GeneralShuffle(EVT vt) : VT(vt), UnpackFromEltSize(UINT_MAX) {} 5283 void addUndef(); 5284 bool add(SDValue, unsigned); 5285 SDValue getNode(SelectionDAG &, const SDLoc &); 5286 void tryPrepareForUnpack(); 5287 bool unpackWasPrepared() { return UnpackFromEltSize <= 4; } 5288 SDValue insertUnpackIfPrepared(SelectionDAG &DAG, const SDLoc &DL, SDValue Op); 5289 5290 // The operands of the shuffle. 5291 SmallVector<SDValue, SystemZ::VectorBytes> Ops; 5292 5293 // Index I is -1 if byte I of the result is undefined. Otherwise the 5294 // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand 5295 // Bytes[I] / SystemZ::VectorBytes. 5296 SmallVector<int, SystemZ::VectorBytes> Bytes; 5297 5298 // The type of the shuffle result. 5299 EVT VT; 5300 5301 // Holds a value of 1, 2 or 4 if a final unpack has been prepared for. 5302 unsigned UnpackFromEltSize; 5303 }; 5304 } 5305 5306 // Add an extra undefined element to the shuffle. 5307 void GeneralShuffle::addUndef() { 5308 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize(); 5309 for (unsigned I = 0; I < BytesPerElement; ++I) 5310 Bytes.push_back(-1); 5311 } 5312 5313 // Add an extra element to the shuffle, taking it from element Elem of Op. 5314 // A null Op indicates a vector input whose value will be calculated later; 5315 // there is at most one such input per shuffle and it always has the same 5316 // type as the result. Aborts and returns false if the source vector elements 5317 // of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per 5318 // LLVM they become implicitly extended, but this is rare and not optimized. 5319 bool GeneralShuffle::add(SDValue Op, unsigned Elem) { 5320 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize(); 5321 5322 // The source vector can have wider elements than the result, 5323 // either through an explicit TRUNCATE or because of type legalization. 5324 // We want the least significant part. 5325 EVT FromVT = Op.getNode() ? Op.getValueType() : VT; 5326 unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize(); 5327 5328 // Return false if the source elements are smaller than their destination 5329 // elements. 5330 if (FromBytesPerElement < BytesPerElement) 5331 return false; 5332 5333 unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes + 5334 (FromBytesPerElement - BytesPerElement)); 5335 5336 // Look through things like shuffles and bitcasts. 5337 while (Op.getNode()) { 5338 if (Op.getOpcode() == ISD::BITCAST) 5339 Op = Op.getOperand(0); 5340 else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) { 5341 // See whether the bytes we need come from a contiguous part of one 5342 // operand. 5343 SmallVector<int, SystemZ::VectorBytes> OpBytes; 5344 if (!getVPermMask(Op, OpBytes)) 5345 break; 5346 int NewByte; 5347 if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte)) 5348 break; 5349 if (NewByte < 0) { 5350 addUndef(); 5351 return true; 5352 } 5353 Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes); 5354 Byte = unsigned(NewByte) % SystemZ::VectorBytes; 5355 } else if (Op.isUndef()) { 5356 addUndef(); 5357 return true; 5358 } else 5359 break; 5360 } 5361 5362 // Make sure that the source of the extraction is in Ops. 5363 unsigned OpNo = 0; 5364 for (; OpNo < Ops.size(); ++OpNo) 5365 if (Ops[OpNo] == Op) 5366 break; 5367 if (OpNo == Ops.size()) 5368 Ops.push_back(Op); 5369 5370 // Add the element to Bytes. 5371 unsigned Base = OpNo * SystemZ::VectorBytes + Byte; 5372 for (unsigned I = 0; I < BytesPerElement; ++I) 5373 Bytes.push_back(Base + I); 5374 5375 return true; 5376 } 5377 5378 // Return SDNodes for the completed shuffle. 5379 SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) { 5380 assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector"); 5381 5382 if (Ops.size() == 0) 5383 return DAG.getUNDEF(VT); 5384 5385 // Use a single unpack if possible as the last operation. 5386 tryPrepareForUnpack(); 5387 5388 // Make sure that there are at least two shuffle operands. 5389 if (Ops.size() == 1) 5390 Ops.push_back(DAG.getUNDEF(MVT::v16i8)); 5391 5392 // Create a tree of shuffles, deferring root node until after the loop. 5393 // Try to redistribute the undefined elements of non-root nodes so that 5394 // the non-root shuffles match something like a pack or merge, then adjust 5395 // the parent node's permute vector to compensate for the new order. 5396 // Among other things, this copes with vectors like <2 x i16> that were 5397 // padded with undefined elements during type legalization. 5398 // 5399 // In the best case this redistribution will lead to the whole tree 5400 // using packs and merges. It should rarely be a loss in other cases. 5401 unsigned Stride = 1; 5402 for (; Stride * 2 < Ops.size(); Stride *= 2) { 5403 for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) { 5404 SDValue SubOps[] = { Ops[I], Ops[I + Stride] }; 5405 5406 // Create a mask for just these two operands. 5407 SmallVector<int, SystemZ::VectorBytes> NewBytes(SystemZ::VectorBytes); 5408 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) { 5409 unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes; 5410 unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes; 5411 if (OpNo == I) 5412 NewBytes[J] = Byte; 5413 else if (OpNo == I + Stride) 5414 NewBytes[J] = SystemZ::VectorBytes + Byte; 5415 else 5416 NewBytes[J] = -1; 5417 } 5418 // See if it would be better to reorganize NewMask to avoid using VPERM. 5419 SmallVector<int, SystemZ::VectorBytes> NewBytesMap(SystemZ::VectorBytes); 5420 if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) { 5421 Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]); 5422 // Applying NewBytesMap to Ops[I] gets back to NewBytes. 5423 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) { 5424 if (NewBytes[J] >= 0) { 5425 assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes && 5426 "Invalid double permute"); 5427 Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J]; 5428 } else 5429 assert(NewBytesMap[J] < 0 && "Invalid double permute"); 5430 } 5431 } else { 5432 // Just use NewBytes on the operands. 5433 Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes); 5434 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) 5435 if (NewBytes[J] >= 0) 5436 Bytes[J] = I * SystemZ::VectorBytes + J; 5437 } 5438 } 5439 } 5440 5441 // Now we just have 2 inputs. Put the second operand in Ops[1]. 5442 if (Stride > 1) { 5443 Ops[1] = Ops[Stride]; 5444 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) 5445 if (Bytes[I] >= int(SystemZ::VectorBytes)) 5446 Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes; 5447 } 5448 5449 // Look for an instruction that can do the permute without resorting 5450 // to VPERM. 5451 unsigned OpNo0, OpNo1; 5452 SDValue Op; 5453 if (unpackWasPrepared() && Ops[1].isUndef()) 5454 Op = Ops[0]; 5455 else if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1)) 5456 Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]); 5457 else 5458 Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes); 5459 5460 Op = insertUnpackIfPrepared(DAG, DL, Op); 5461 5462 return DAG.getNode(ISD::BITCAST, DL, VT, Op); 5463 } 5464 5465 #ifndef NDEBUG 5466 static void dumpBytes(const SmallVectorImpl<int> &Bytes, std::string Msg) { 5467 dbgs() << Msg.c_str() << " { "; 5468 for (unsigned i = 0; i < Bytes.size(); i++) 5469 dbgs() << Bytes[i] << " "; 5470 dbgs() << "}\n"; 5471 } 5472 #endif 5473 5474 // If the Bytes vector matches an unpack operation, prepare to do the unpack 5475 // after all else by removing the zero vector and the effect of the unpack on 5476 // Bytes. 5477 void GeneralShuffle::tryPrepareForUnpack() { 5478 uint32_t ZeroVecOpNo = findZeroVectorIdx(&Ops[0], Ops.size()); 5479 if (ZeroVecOpNo == UINT32_MAX || Ops.size() == 1) 5480 return; 5481 5482 // Only do this if removing the zero vector reduces the depth, otherwise 5483 // the critical path will increase with the final unpack. 5484 if (Ops.size() > 2 && 5485 Log2_32_Ceil(Ops.size()) == Log2_32_Ceil(Ops.size() - 1)) 5486 return; 5487 5488 // Find an unpack that would allow removing the zero vector from Ops. 5489 UnpackFromEltSize = 1; 5490 for (; UnpackFromEltSize <= 4; UnpackFromEltSize *= 2) { 5491 bool MatchUnpack = true; 5492 SmallVector<int, SystemZ::VectorBytes> SrcBytes; 5493 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes; Elt++) { 5494 unsigned ToEltSize = UnpackFromEltSize * 2; 5495 bool IsZextByte = (Elt % ToEltSize) < UnpackFromEltSize; 5496 if (!IsZextByte) 5497 SrcBytes.push_back(Bytes[Elt]); 5498 if (Bytes[Elt] != -1) { 5499 unsigned OpNo = unsigned(Bytes[Elt]) / SystemZ::VectorBytes; 5500 if (IsZextByte != (OpNo == ZeroVecOpNo)) { 5501 MatchUnpack = false; 5502 break; 5503 } 5504 } 5505 } 5506 if (MatchUnpack) { 5507 if (Ops.size() == 2) { 5508 // Don't use unpack if a single source operand needs rearrangement. 5509 for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++) 5510 if (SrcBytes[i] != -1 && SrcBytes[i] % 16 != int(i)) { 5511 UnpackFromEltSize = UINT_MAX; 5512 return; 5513 } 5514 } 5515 break; 5516 } 5517 } 5518 if (UnpackFromEltSize > 4) 5519 return; 5520 5521 LLVM_DEBUG(dbgs() << "Preparing for final unpack of element size " 5522 << UnpackFromEltSize << ". Zero vector is Op#" << ZeroVecOpNo 5523 << ".\n"; 5524 dumpBytes(Bytes, "Original Bytes vector:");); 5525 5526 // Apply the unpack in reverse to the Bytes array. 5527 unsigned B = 0; 5528 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) { 5529 Elt += UnpackFromEltSize; 5530 for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++) 5531 Bytes[B] = Bytes[Elt]; 5532 } 5533 while (B < SystemZ::VectorBytes) 5534 Bytes[B++] = -1; 5535 5536 // Remove the zero vector from Ops 5537 Ops.erase(&Ops[ZeroVecOpNo]); 5538 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) 5539 if (Bytes[I] >= 0) { 5540 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes; 5541 if (OpNo > ZeroVecOpNo) 5542 Bytes[I] -= SystemZ::VectorBytes; 5543 } 5544 5545 LLVM_DEBUG(dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:"); 5546 dbgs() << "\n";); 5547 } 5548 5549 SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG, 5550 const SDLoc &DL, 5551 SDValue Op) { 5552 if (!unpackWasPrepared()) 5553 return Op; 5554 unsigned InBits = UnpackFromEltSize * 8; 5555 EVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBits), 5556 SystemZ::VectorBits / InBits); 5557 SDValue PackedOp = DAG.getNode(ISD::BITCAST, DL, InVT, Op); 5558 unsigned OutBits = InBits * 2; 5559 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(OutBits), 5560 SystemZ::VectorBits / OutBits); 5561 return DAG.getNode(SystemZISD::UNPACKL_HIGH, DL, OutVT, PackedOp); 5562 } 5563 5564 // Return true if the given BUILD_VECTOR is a scalar-to-vector conversion. 5565 static bool isScalarToVector(SDValue Op) { 5566 for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I) 5567 if (!Op.getOperand(I).isUndef()) 5568 return false; 5569 return true; 5570 } 5571 5572 // Return a vector of type VT that contains Value in the first element. 5573 // The other elements don't matter. 5574 static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, 5575 SDValue Value) { 5576 // If we have a constant, replicate it to all elements and let the 5577 // BUILD_VECTOR lowering take care of it. 5578 if (Value.getOpcode() == ISD::Constant || 5579 Value.getOpcode() == ISD::ConstantFP) { 5580 SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Value); 5581 return DAG.getBuildVector(VT, DL, Ops); 5582 } 5583 if (Value.isUndef()) 5584 return DAG.getUNDEF(VT); 5585 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value); 5586 } 5587 5588 // Return a vector of type VT in which Op0 is in element 0 and Op1 is in 5589 // element 1. Used for cases in which replication is cheap. 5590 static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT, 5591 SDValue Op0, SDValue Op1) { 5592 if (Op0.isUndef()) { 5593 if (Op1.isUndef()) 5594 return DAG.getUNDEF(VT); 5595 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1); 5596 } 5597 if (Op1.isUndef()) 5598 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0); 5599 return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT, 5600 buildScalarToVector(DAG, DL, VT, Op0), 5601 buildScalarToVector(DAG, DL, VT, Op1)); 5602 } 5603 5604 // Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64 5605 // vector for them. 5606 static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0, 5607 SDValue Op1) { 5608 if (Op0.isUndef() && Op1.isUndef()) 5609 return DAG.getUNDEF(MVT::v2i64); 5610 // If one of the two inputs is undefined then replicate the other one, 5611 // in order to avoid using another register unnecessarily. 5612 if (Op0.isUndef()) 5613 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1); 5614 else if (Op1.isUndef()) 5615 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); 5616 else { 5617 Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); 5618 Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1); 5619 } 5620 return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1); 5621 } 5622 5623 // If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually 5624 // better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for 5625 // the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR 5626 // would benefit from this representation and return it if so. 5627 static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, 5628 BuildVectorSDNode *BVN) { 5629 EVT VT = BVN->getValueType(0); 5630 unsigned NumElements = VT.getVectorNumElements(); 5631 5632 // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation 5633 // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still 5634 // need a BUILD_VECTOR, add an additional placeholder operand for that 5635 // BUILD_VECTOR and store its operands in ResidueOps. 5636 GeneralShuffle GS(VT); 5637 SmallVector<SDValue, SystemZ::VectorBytes> ResidueOps; 5638 bool FoundOne = false; 5639 for (unsigned I = 0; I < NumElements; ++I) { 5640 SDValue Op = BVN->getOperand(I); 5641 if (Op.getOpcode() == ISD::TRUNCATE) 5642 Op = Op.getOperand(0); 5643 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 5644 Op.getOperand(1).getOpcode() == ISD::Constant) { 5645 unsigned Elem = Op.getConstantOperandVal(1); 5646 if (!GS.add(Op.getOperand(0), Elem)) 5647 return SDValue(); 5648 FoundOne = true; 5649 } else if (Op.isUndef()) { 5650 GS.addUndef(); 5651 } else { 5652 if (!GS.add(SDValue(), ResidueOps.size())) 5653 return SDValue(); 5654 ResidueOps.push_back(BVN->getOperand(I)); 5655 } 5656 } 5657 5658 // Nothing to do if there are no EXTRACT_VECTOR_ELTs. 5659 if (!FoundOne) 5660 return SDValue(); 5661 5662 // Create the BUILD_VECTOR for the remaining elements, if any. 5663 if (!ResidueOps.empty()) { 5664 while (ResidueOps.size() < NumElements) 5665 ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType())); 5666 for (auto &Op : GS.Ops) { 5667 if (!Op.getNode()) { 5668 Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps); 5669 break; 5670 } 5671 } 5672 } 5673 return GS.getNode(DAG, SDLoc(BVN)); 5674 } 5675 5676 bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const { 5677 if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Op)->isUnindexed()) 5678 return true; 5679 if (auto *AL = dyn_cast<AtomicSDNode>(Op)) 5680 if (AL->getOpcode() == ISD::ATOMIC_LOAD) 5681 return true; 5682 if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV) 5683 return true; 5684 return false; 5685 } 5686 5687 // Combine GPR scalar values Elems into a vector of type VT. 5688 SDValue 5689 SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, 5690 SmallVectorImpl<SDValue> &Elems) const { 5691 // See whether there is a single replicated value. 5692 SDValue Single; 5693 unsigned int NumElements = Elems.size(); 5694 unsigned int Count = 0; 5695 for (auto Elem : Elems) { 5696 if (!Elem.isUndef()) { 5697 if (!Single.getNode()) 5698 Single = Elem; 5699 else if (Elem != Single) { 5700 Single = SDValue(); 5701 break; 5702 } 5703 Count += 1; 5704 } 5705 } 5706 // There are three cases here: 5707 // 5708 // - if the only defined element is a loaded one, the best sequence 5709 // is a replicating load. 5710 // 5711 // - otherwise, if the only defined element is an i64 value, we will 5712 // end up with the same VLVGP sequence regardless of whether we short-cut 5713 // for replication or fall through to the later code. 5714 // 5715 // - otherwise, if the only defined element is an i32 or smaller value, 5716 // we would need 2 instructions to replicate it: VLVGP followed by VREPx. 5717 // This is only a win if the single defined element is used more than once. 5718 // In other cases we're better off using a single VLVGx. 5719 if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single))) 5720 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single); 5721 5722 // If all elements are loads, use VLREP/VLEs (below). 5723 bool AllLoads = true; 5724 for (auto Elem : Elems) 5725 if (!isVectorElementLoad(Elem)) { 5726 AllLoads = false; 5727 break; 5728 } 5729 5730 // The best way of building a v2i64 from two i64s is to use VLVGP. 5731 if (VT == MVT::v2i64 && !AllLoads) 5732 return joinDwords(DAG, DL, Elems[0], Elems[1]); 5733 5734 // Use a 64-bit merge high to combine two doubles. 5735 if (VT == MVT::v2f64 && !AllLoads) 5736 return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]); 5737 5738 // Build v4f32 values directly from the FPRs: 5739 // 5740 // <Axxx> <Bxxx> <Cxxxx> <Dxxx> 5741 // V V VMRHF 5742 // <ABxx> <CDxx> 5743 // V VMRHG 5744 // <ABCD> 5745 if (VT == MVT::v4f32 && !AllLoads) { 5746 SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]); 5747 SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]); 5748 // Avoid unnecessary undefs by reusing the other operand. 5749 if (Op01.isUndef()) 5750 Op01 = Op23; 5751 else if (Op23.isUndef()) 5752 Op23 = Op01; 5753 // Merging identical replications is a no-op. 5754 if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23) 5755 return Op01; 5756 Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01); 5757 Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23); 5758 SDValue Op = DAG.getNode(SystemZISD::MERGE_HIGH, 5759 DL, MVT::v2i64, Op01, Op23); 5760 return DAG.getNode(ISD::BITCAST, DL, VT, Op); 5761 } 5762 5763 // Collect the constant terms. 5764 SmallVector<SDValue, SystemZ::VectorBytes> Constants(NumElements, SDValue()); 5765 SmallVector<bool, SystemZ::VectorBytes> Done(NumElements, false); 5766 5767 unsigned NumConstants = 0; 5768 for (unsigned I = 0; I < NumElements; ++I) { 5769 SDValue Elem = Elems[I]; 5770 if (Elem.getOpcode() == ISD::Constant || 5771 Elem.getOpcode() == ISD::ConstantFP) { 5772 NumConstants += 1; 5773 Constants[I] = Elem; 5774 Done[I] = true; 5775 } 5776 } 5777 // If there was at least one constant, fill in the other elements of 5778 // Constants with undefs to get a full vector constant and use that 5779 // as the starting point. 5780 SDValue Result; 5781 SDValue ReplicatedVal; 5782 if (NumConstants > 0) { 5783 for (unsigned I = 0; I < NumElements; ++I) 5784 if (!Constants[I].getNode()) 5785 Constants[I] = DAG.getUNDEF(Elems[I].getValueType()); 5786 Result = DAG.getBuildVector(VT, DL, Constants); 5787 } else { 5788 // Otherwise try to use VLREP or VLVGP to start the sequence in order to 5789 // avoid a false dependency on any previous contents of the vector 5790 // register. 5791 5792 // Use a VLREP if at least one element is a load. Make sure to replicate 5793 // the load with the most elements having its value. 5794 std::map<const SDNode*, unsigned> UseCounts; 5795 SDNode *LoadMaxUses = nullptr; 5796 for (unsigned I = 0; I < NumElements; ++I) 5797 if (isVectorElementLoad(Elems[I])) { 5798 SDNode *Ld = Elems[I].getNode(); 5799 UseCounts[Ld]++; 5800 if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < UseCounts[Ld]) 5801 LoadMaxUses = Ld; 5802 } 5803 if (LoadMaxUses != nullptr) { 5804 ReplicatedVal = SDValue(LoadMaxUses, 0); 5805 Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, ReplicatedVal); 5806 } else { 5807 // Try to use VLVGP. 5808 unsigned I1 = NumElements / 2 - 1; 5809 unsigned I2 = NumElements - 1; 5810 bool Def1 = !Elems[I1].isUndef(); 5811 bool Def2 = !Elems[I2].isUndef(); 5812 if (Def1 || Def2) { 5813 SDValue Elem1 = Elems[Def1 ? I1 : I2]; 5814 SDValue Elem2 = Elems[Def2 ? I2 : I1]; 5815 Result = DAG.getNode(ISD::BITCAST, DL, VT, 5816 joinDwords(DAG, DL, Elem1, Elem2)); 5817 Done[I1] = true; 5818 Done[I2] = true; 5819 } else 5820 Result = DAG.getUNDEF(VT); 5821 } 5822 } 5823 5824 // Use VLVGx to insert the other elements. 5825 for (unsigned I = 0; I < NumElements; ++I) 5826 if (!Done[I] && !Elems[I].isUndef() && Elems[I] != ReplicatedVal) 5827 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I], 5828 DAG.getConstant(I, DL, MVT::i32)); 5829 return Result; 5830 } 5831 5832 SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op, 5833 SelectionDAG &DAG) const { 5834 auto *BVN = cast<BuildVectorSDNode>(Op.getNode()); 5835 SDLoc DL(Op); 5836 EVT VT = Op.getValueType(); 5837 5838 if (BVN->isConstant()) { 5839 if (SystemZVectorConstantInfo(BVN).isVectorConstantLegal(Subtarget)) 5840 return Op; 5841 5842 // Fall back to loading it from memory. 5843 return SDValue(); 5844 } 5845 5846 // See if we should use shuffles to construct the vector from other vectors. 5847 if (SDValue Res = tryBuildVectorShuffle(DAG, BVN)) 5848 return Res; 5849 5850 // Detect SCALAR_TO_VECTOR conversions. 5851 if (isOperationLegal(ISD::SCALAR_TO_VECTOR, VT) && isScalarToVector(Op)) 5852 return buildScalarToVector(DAG, DL, VT, Op.getOperand(0)); 5853 5854 // Otherwise use buildVector to build the vector up from GPRs. 5855 unsigned NumElements = Op.getNumOperands(); 5856 SmallVector<SDValue, SystemZ::VectorBytes> Ops(NumElements); 5857 for (unsigned I = 0; I < NumElements; ++I) 5858 Ops[I] = Op.getOperand(I); 5859 return buildVector(DAG, DL, VT, Ops); 5860 } 5861 5862 SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, 5863 SelectionDAG &DAG) const { 5864 auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode()); 5865 SDLoc DL(Op); 5866 EVT VT = Op.getValueType(); 5867 unsigned NumElements = VT.getVectorNumElements(); 5868 5869 if (VSN->isSplat()) { 5870 SDValue Op0 = Op.getOperand(0); 5871 unsigned Index = VSN->getSplatIndex(); 5872 assert(Index < VT.getVectorNumElements() && 5873 "Splat index should be defined and in first operand"); 5874 // See whether the value we're splatting is directly available as a scalar. 5875 if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) || 5876 Op0.getOpcode() == ISD::BUILD_VECTOR) 5877 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index)); 5878 // Otherwise keep it as a vector-to-vector operation. 5879 return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0), 5880 DAG.getTargetConstant(Index, DL, MVT::i32)); 5881 } 5882 5883 GeneralShuffle GS(VT); 5884 for (unsigned I = 0; I < NumElements; ++I) { 5885 int Elt = VSN->getMaskElt(I); 5886 if (Elt < 0) 5887 GS.addUndef(); 5888 else if (!GS.add(Op.getOperand(unsigned(Elt) / NumElements), 5889 unsigned(Elt) % NumElements)) 5890 return SDValue(); 5891 } 5892 return GS.getNode(DAG, SDLoc(VSN)); 5893 } 5894 5895 SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op, 5896 SelectionDAG &DAG) const { 5897 SDLoc DL(Op); 5898 // Just insert the scalar into element 0 of an undefined vector. 5899 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, 5900 Op.getValueType(), DAG.getUNDEF(Op.getValueType()), 5901 Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32)); 5902 } 5903 5904 SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, 5905 SelectionDAG &DAG) const { 5906 // Handle insertions of floating-point values. 5907 SDLoc DL(Op); 5908 SDValue Op0 = Op.getOperand(0); 5909 SDValue Op1 = Op.getOperand(1); 5910 SDValue Op2 = Op.getOperand(2); 5911 EVT VT = Op.getValueType(); 5912 5913 // Insertions into constant indices of a v2f64 can be done using VPDI. 5914 // However, if the inserted value is a bitcast or a constant then it's 5915 // better to use GPRs, as below. 5916 if (VT == MVT::v2f64 && 5917 Op1.getOpcode() != ISD::BITCAST && 5918 Op1.getOpcode() != ISD::ConstantFP && 5919 Op2.getOpcode() == ISD::Constant) { 5920 uint64_t Index = Op2->getAsZExtVal(); 5921 unsigned Mask = VT.getVectorNumElements() - 1; 5922 if (Index <= Mask) 5923 return Op; 5924 } 5925 5926 // Otherwise bitcast to the equivalent integer form and insert via a GPR. 5927 MVT IntVT = MVT::getIntegerVT(VT.getScalarSizeInBits()); 5928 MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements()); 5929 SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT, 5930 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), 5931 DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2); 5932 return DAG.getNode(ISD::BITCAST, DL, VT, Res); 5933 } 5934 5935 SDValue 5936 SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, 5937 SelectionDAG &DAG) const { 5938 // Handle extractions of floating-point values. 5939 SDLoc DL(Op); 5940 SDValue Op0 = Op.getOperand(0); 5941 SDValue Op1 = Op.getOperand(1); 5942 EVT VT = Op.getValueType(); 5943 EVT VecVT = Op0.getValueType(); 5944 5945 // Extractions of constant indices can be done directly. 5946 if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) { 5947 uint64_t Index = CIndexN->getZExtValue(); 5948 unsigned Mask = VecVT.getVectorNumElements() - 1; 5949 if (Index <= Mask) 5950 return Op; 5951 } 5952 5953 // Otherwise bitcast to the equivalent integer form and extract via a GPR. 5954 MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits()); 5955 MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements()); 5956 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT, 5957 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1); 5958 return DAG.getNode(ISD::BITCAST, DL, VT, Res); 5959 } 5960 5961 SDValue SystemZTargetLowering:: 5962 lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const { 5963 SDValue PackedOp = Op.getOperand(0); 5964 EVT OutVT = Op.getValueType(); 5965 EVT InVT = PackedOp.getValueType(); 5966 unsigned ToBits = OutVT.getScalarSizeInBits(); 5967 unsigned FromBits = InVT.getScalarSizeInBits(); 5968 do { 5969 FromBits *= 2; 5970 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits), 5971 SystemZ::VectorBits / FromBits); 5972 PackedOp = 5973 DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(PackedOp), OutVT, PackedOp); 5974 } while (FromBits != ToBits); 5975 return PackedOp; 5976 } 5977 5978 // Lower a ZERO_EXTEND_VECTOR_INREG to a vector shuffle with a zero vector. 5979 SDValue SystemZTargetLowering:: 5980 lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const { 5981 SDValue PackedOp = Op.getOperand(0); 5982 SDLoc DL(Op); 5983 EVT OutVT = Op.getValueType(); 5984 EVT InVT = PackedOp.getValueType(); 5985 unsigned InNumElts = InVT.getVectorNumElements(); 5986 unsigned OutNumElts = OutVT.getVectorNumElements(); 5987 unsigned NumInPerOut = InNumElts / OutNumElts; 5988 5989 SDValue ZeroVec = 5990 DAG.getSplatVector(InVT, DL, DAG.getConstant(0, DL, InVT.getScalarType())); 5991 5992 SmallVector<int, 16> Mask(InNumElts); 5993 unsigned ZeroVecElt = InNumElts; 5994 for (unsigned PackedElt = 0; PackedElt < OutNumElts; PackedElt++) { 5995 unsigned MaskElt = PackedElt * NumInPerOut; 5996 unsigned End = MaskElt + NumInPerOut - 1; 5997 for (; MaskElt < End; MaskElt++) 5998 Mask[MaskElt] = ZeroVecElt++; 5999 Mask[MaskElt] = PackedElt; 6000 } 6001 SDValue Shuf = DAG.getVectorShuffle(InVT, DL, PackedOp, ZeroVec, Mask); 6002 return DAG.getNode(ISD::BITCAST, DL, OutVT, Shuf); 6003 } 6004 6005 SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG, 6006 unsigned ByScalar) const { 6007 // Look for cases where a vector shift can use the *_BY_SCALAR form. 6008 SDValue Op0 = Op.getOperand(0); 6009 SDValue Op1 = Op.getOperand(1); 6010 SDLoc DL(Op); 6011 EVT VT = Op.getValueType(); 6012 unsigned ElemBitSize = VT.getScalarSizeInBits(); 6013 6014 // See whether the shift vector is a splat represented as BUILD_VECTOR. 6015 if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) { 6016 APInt SplatBits, SplatUndef; 6017 unsigned SplatBitSize; 6018 bool HasAnyUndefs; 6019 // Check for constant splats. Use ElemBitSize as the minimum element 6020 // width and reject splats that need wider elements. 6021 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 6022 ElemBitSize, true) && 6023 SplatBitSize == ElemBitSize) { 6024 SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff, 6025 DL, MVT::i32); 6026 return DAG.getNode(ByScalar, DL, VT, Op0, Shift); 6027 } 6028 // Check for variable splats. 6029 BitVector UndefElements; 6030 SDValue Splat = BVN->getSplatValue(&UndefElements); 6031 if (Splat) { 6032 // Since i32 is the smallest legal type, we either need a no-op 6033 // or a truncation. 6034 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat); 6035 return DAG.getNode(ByScalar, DL, VT, Op0, Shift); 6036 } 6037 } 6038 6039 // See whether the shift vector is a splat represented as SHUFFLE_VECTOR, 6040 // and the shift amount is directly available in a GPR. 6041 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) { 6042 if (VSN->isSplat()) { 6043 SDValue VSNOp0 = VSN->getOperand(0); 6044 unsigned Index = VSN->getSplatIndex(); 6045 assert(Index < VT.getVectorNumElements() && 6046 "Splat index should be defined and in first operand"); 6047 if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) || 6048 VSNOp0.getOpcode() == ISD::BUILD_VECTOR) { 6049 // Since i32 is the smallest legal type, we either need a no-op 6050 // or a truncation. 6051 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, 6052 VSNOp0.getOperand(Index)); 6053 return DAG.getNode(ByScalar, DL, VT, Op0, Shift); 6054 } 6055 } 6056 } 6057 6058 // Otherwise just treat the current form as legal. 6059 return Op; 6060 } 6061 6062 SDValue SystemZTargetLowering::lowerIS_FPCLASS(SDValue Op, 6063 SelectionDAG &DAG) const { 6064 SDLoc DL(Op); 6065 MVT ResultVT = Op.getSimpleValueType(); 6066 SDValue Arg = Op.getOperand(0); 6067 unsigned Check = Op.getConstantOperandVal(1); 6068 6069 unsigned TDCMask = 0; 6070 if (Check & fcSNan) 6071 TDCMask |= SystemZ::TDCMASK_SNAN_PLUS | SystemZ::TDCMASK_SNAN_MINUS; 6072 if (Check & fcQNan) 6073 TDCMask |= SystemZ::TDCMASK_QNAN_PLUS | SystemZ::TDCMASK_QNAN_MINUS; 6074 if (Check & fcPosInf) 6075 TDCMask |= SystemZ::TDCMASK_INFINITY_PLUS; 6076 if (Check & fcNegInf) 6077 TDCMask |= SystemZ::TDCMASK_INFINITY_MINUS; 6078 if (Check & fcPosNormal) 6079 TDCMask |= SystemZ::TDCMASK_NORMAL_PLUS; 6080 if (Check & fcNegNormal) 6081 TDCMask |= SystemZ::TDCMASK_NORMAL_MINUS; 6082 if (Check & fcPosSubnormal) 6083 TDCMask |= SystemZ::TDCMASK_SUBNORMAL_PLUS; 6084 if (Check & fcNegSubnormal) 6085 TDCMask |= SystemZ::TDCMASK_SUBNORMAL_MINUS; 6086 if (Check & fcPosZero) 6087 TDCMask |= SystemZ::TDCMASK_ZERO_PLUS; 6088 if (Check & fcNegZero) 6089 TDCMask |= SystemZ::TDCMASK_ZERO_MINUS; 6090 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, MVT::i64); 6091 6092 SDValue Intr = DAG.getNode(SystemZISD::TDC, DL, ResultVT, Arg, TDCMaskV); 6093 return getCCResult(DAG, Intr); 6094 } 6095 6096 SDValue SystemZTargetLowering::lowerREADCYCLECOUNTER(SDValue Op, 6097 SelectionDAG &DAG) const { 6098 SDLoc DL(Op); 6099 SDValue Chain = Op.getOperand(0); 6100 6101 // STCKF only supports a memory operand, so we have to use a temporary. 6102 SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64); 6103 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); 6104 MachinePointerInfo MPI = 6105 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); 6106 6107 // Use STCFK to store the TOD clock into the temporary. 6108 SDValue StoreOps[] = {Chain, StackPtr}; 6109 Chain = DAG.getMemIntrinsicNode( 6110 SystemZISD::STCKF, DL, DAG.getVTList(MVT::Other), StoreOps, MVT::i64, 6111 MPI, MaybeAlign(), MachineMemOperand::MOStore); 6112 6113 // And read it back from there. 6114 return DAG.getLoad(MVT::i64, DL, Chain, StackPtr, MPI); 6115 } 6116 6117 SDValue SystemZTargetLowering::LowerOperation(SDValue Op, 6118 SelectionDAG &DAG) const { 6119 switch (Op.getOpcode()) { 6120 case ISD::FRAMEADDR: 6121 return lowerFRAMEADDR(Op, DAG); 6122 case ISD::RETURNADDR: 6123 return lowerRETURNADDR(Op, DAG); 6124 case ISD::BR_CC: 6125 return lowerBR_CC(Op, DAG); 6126 case ISD::SELECT_CC: 6127 return lowerSELECT_CC(Op, DAG); 6128 case ISD::SETCC: 6129 return lowerSETCC(Op, DAG); 6130 case ISD::STRICT_FSETCC: 6131 return lowerSTRICT_FSETCC(Op, DAG, false); 6132 case ISD::STRICT_FSETCCS: 6133 return lowerSTRICT_FSETCC(Op, DAG, true); 6134 case ISD::GlobalAddress: 6135 return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG); 6136 case ISD::GlobalTLSAddress: 6137 return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG); 6138 case ISD::BlockAddress: 6139 return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG); 6140 case ISD::JumpTable: 6141 return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG); 6142 case ISD::ConstantPool: 6143 return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG); 6144 case ISD::BITCAST: 6145 return lowerBITCAST(Op, DAG); 6146 case ISD::VASTART: 6147 return lowerVASTART(Op, DAG); 6148 case ISD::VACOPY: 6149 return lowerVACOPY(Op, DAG); 6150 case ISD::DYNAMIC_STACKALLOC: 6151 return lowerDYNAMIC_STACKALLOC(Op, DAG); 6152 case ISD::GET_DYNAMIC_AREA_OFFSET: 6153 return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG); 6154 case ISD::SMUL_LOHI: 6155 return lowerSMUL_LOHI(Op, DAG); 6156 case ISD::UMUL_LOHI: 6157 return lowerUMUL_LOHI(Op, DAG); 6158 case ISD::SDIVREM: 6159 return lowerSDIVREM(Op, DAG); 6160 case ISD::UDIVREM: 6161 return lowerUDIVREM(Op, DAG); 6162 case ISD::SADDO: 6163 case ISD::SSUBO: 6164 case ISD::UADDO: 6165 case ISD::USUBO: 6166 return lowerXALUO(Op, DAG); 6167 case ISD::UADDO_CARRY: 6168 case ISD::USUBO_CARRY: 6169 return lowerUADDSUBO_CARRY(Op, DAG); 6170 case ISD::OR: 6171 return lowerOR(Op, DAG); 6172 case ISD::CTPOP: 6173 return lowerCTPOP(Op, DAG); 6174 case ISD::VECREDUCE_ADD: 6175 return lowerVECREDUCE_ADD(Op, DAG); 6176 case ISD::ATOMIC_FENCE: 6177 return lowerATOMIC_FENCE(Op, DAG); 6178 case ISD::ATOMIC_SWAP: 6179 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW); 6180 case ISD::ATOMIC_STORE: 6181 case ISD::ATOMIC_LOAD: 6182 return lowerATOMIC_LDST_I128(Op, DAG); 6183 case ISD::ATOMIC_LOAD_ADD: 6184 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD); 6185 case ISD::ATOMIC_LOAD_SUB: 6186 return lowerATOMIC_LOAD_SUB(Op, DAG); 6187 case ISD::ATOMIC_LOAD_AND: 6188 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND); 6189 case ISD::ATOMIC_LOAD_OR: 6190 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR); 6191 case ISD::ATOMIC_LOAD_XOR: 6192 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR); 6193 case ISD::ATOMIC_LOAD_NAND: 6194 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND); 6195 case ISD::ATOMIC_LOAD_MIN: 6196 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN); 6197 case ISD::ATOMIC_LOAD_MAX: 6198 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX); 6199 case ISD::ATOMIC_LOAD_UMIN: 6200 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN); 6201 case ISD::ATOMIC_LOAD_UMAX: 6202 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX); 6203 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: 6204 return lowerATOMIC_CMP_SWAP(Op, DAG); 6205 case ISD::STACKSAVE: 6206 return lowerSTACKSAVE(Op, DAG); 6207 case ISD::STACKRESTORE: 6208 return lowerSTACKRESTORE(Op, DAG); 6209 case ISD::PREFETCH: 6210 return lowerPREFETCH(Op, DAG); 6211 case ISD::INTRINSIC_W_CHAIN: 6212 return lowerINTRINSIC_W_CHAIN(Op, DAG); 6213 case ISD::INTRINSIC_WO_CHAIN: 6214 return lowerINTRINSIC_WO_CHAIN(Op, DAG); 6215 case ISD::BUILD_VECTOR: 6216 return lowerBUILD_VECTOR(Op, DAG); 6217 case ISD::VECTOR_SHUFFLE: 6218 return lowerVECTOR_SHUFFLE(Op, DAG); 6219 case ISD::SCALAR_TO_VECTOR: 6220 return lowerSCALAR_TO_VECTOR(Op, DAG); 6221 case ISD::INSERT_VECTOR_ELT: 6222 return lowerINSERT_VECTOR_ELT(Op, DAG); 6223 case ISD::EXTRACT_VECTOR_ELT: 6224 return lowerEXTRACT_VECTOR_ELT(Op, DAG); 6225 case ISD::SIGN_EXTEND_VECTOR_INREG: 6226 return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG); 6227 case ISD::ZERO_EXTEND_VECTOR_INREG: 6228 return lowerZERO_EXTEND_VECTOR_INREG(Op, DAG); 6229 case ISD::SHL: 6230 return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR); 6231 case ISD::SRL: 6232 return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR); 6233 case ISD::SRA: 6234 return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR); 6235 case ISD::ROTL: 6236 return lowerShift(Op, DAG, SystemZISD::VROTL_BY_SCALAR); 6237 case ISD::IS_FPCLASS: 6238 return lowerIS_FPCLASS(Op, DAG); 6239 case ISD::GET_ROUNDING: 6240 return lowerGET_ROUNDING(Op, DAG); 6241 case ISD::READCYCLECOUNTER: 6242 return lowerREADCYCLECOUNTER(Op, DAG); 6243 default: 6244 llvm_unreachable("Unexpected node to lower"); 6245 } 6246 } 6247 6248 static SDValue expandBitCastI128ToF128(SelectionDAG &DAG, SDValue Src, 6249 const SDLoc &SL) { 6250 // If i128 is legal, just use a normal bitcast. 6251 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) 6252 return DAG.getBitcast(MVT::f128, Src); 6253 6254 // Otherwise, f128 must live in FP128, so do a partwise move. 6255 assert(DAG.getTargetLoweringInfo().getRepRegClassFor(MVT::f128) == 6256 &SystemZ::FP128BitRegClass); 6257 6258 SDValue Hi, Lo; 6259 std::tie(Lo, Hi) = DAG.SplitScalar(Src, SL, MVT::i64, MVT::i64); 6260 6261 Hi = DAG.getBitcast(MVT::f64, Hi); 6262 Lo = DAG.getBitcast(MVT::f64, Lo); 6263 6264 SDNode *Pair = DAG.getMachineNode( 6265 SystemZ::REG_SEQUENCE, SL, MVT::f128, 6266 {DAG.getTargetConstant(SystemZ::FP128BitRegClassID, SL, MVT::i32), Lo, 6267 DAG.getTargetConstant(SystemZ::subreg_l64, SL, MVT::i32), Hi, 6268 DAG.getTargetConstant(SystemZ::subreg_h64, SL, MVT::i32)}); 6269 return SDValue(Pair, 0); 6270 } 6271 6272 static SDValue expandBitCastF128ToI128(SelectionDAG &DAG, SDValue Src, 6273 const SDLoc &SL) { 6274 // If i128 is legal, just use a normal bitcast. 6275 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) 6276 return DAG.getBitcast(MVT::i128, Src); 6277 6278 // Otherwise, f128 must live in FP128, so do a partwise move. 6279 assert(DAG.getTargetLoweringInfo().getRepRegClassFor(MVT::f128) == 6280 &SystemZ::FP128BitRegClass); 6281 6282 SDValue LoFP = 6283 DAG.getTargetExtractSubreg(SystemZ::subreg_l64, SL, MVT::f64, Src); 6284 SDValue HiFP = 6285 DAG.getTargetExtractSubreg(SystemZ::subreg_h64, SL, MVT::f64, Src); 6286 SDValue Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i64, LoFP); 6287 SDValue Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i64, HiFP); 6288 6289 return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i128, Lo, Hi); 6290 } 6291 6292 // Lower operations with invalid operand or result types (currently used 6293 // only for 128-bit integer types). 6294 void 6295 SystemZTargetLowering::LowerOperationWrapper(SDNode *N, 6296 SmallVectorImpl<SDValue> &Results, 6297 SelectionDAG &DAG) const { 6298 switch (N->getOpcode()) { 6299 case ISD::ATOMIC_LOAD: { 6300 SDLoc DL(N); 6301 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other); 6302 SDValue Ops[] = { N->getOperand(0), N->getOperand(1) }; 6303 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand(); 6304 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_LOAD_128, 6305 DL, Tys, Ops, MVT::i128, MMO); 6306 6307 SDValue Lowered = lowerGR128ToI128(DAG, Res); 6308 if (N->getValueType(0) == MVT::f128) 6309 Lowered = expandBitCastI128ToF128(DAG, Lowered, DL); 6310 Results.push_back(Lowered); 6311 Results.push_back(Res.getValue(1)); 6312 break; 6313 } 6314 case ISD::ATOMIC_STORE: { 6315 SDLoc DL(N); 6316 SDVTList Tys = DAG.getVTList(MVT::Other); 6317 SDValue Val = N->getOperand(1); 6318 if (Val.getValueType() == MVT::f128) 6319 Val = expandBitCastF128ToI128(DAG, Val, DL); 6320 Val = lowerI128ToGR128(DAG, Val); 6321 6322 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2)}; 6323 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand(); 6324 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_STORE_128, 6325 DL, Tys, Ops, MVT::i128, MMO); 6326 // We have to enforce sequential consistency by performing a 6327 // serialization operation after the store. 6328 if (cast<AtomicSDNode>(N)->getSuccessOrdering() == 6329 AtomicOrdering::SequentiallyConsistent) 6330 Res = SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, 6331 MVT::Other, Res), 0); 6332 Results.push_back(Res); 6333 break; 6334 } 6335 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: { 6336 SDLoc DL(N); 6337 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other); 6338 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), 6339 lowerI128ToGR128(DAG, N->getOperand(2)), 6340 lowerI128ToGR128(DAG, N->getOperand(3)) }; 6341 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand(); 6342 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP_128, 6343 DL, Tys, Ops, MVT::i128, MMO); 6344 SDValue Success = emitSETCC(DAG, DL, Res.getValue(1), 6345 SystemZ::CCMASK_CS, SystemZ::CCMASK_CS_EQ); 6346 Success = DAG.getZExtOrTrunc(Success, DL, N->getValueType(1)); 6347 Results.push_back(lowerGR128ToI128(DAG, Res)); 6348 Results.push_back(Success); 6349 Results.push_back(Res.getValue(2)); 6350 break; 6351 } 6352 case ISD::BITCAST: { 6353 SDValue Src = N->getOperand(0); 6354 if (N->getValueType(0) == MVT::i128 && Src.getValueType() == MVT::f128 && 6355 !useSoftFloat()) { 6356 SDLoc DL(N); 6357 Results.push_back(expandBitCastF128ToI128(DAG, Src, DL)); 6358 } 6359 break; 6360 } 6361 default: 6362 llvm_unreachable("Unexpected node to lower"); 6363 } 6364 } 6365 6366 void 6367 SystemZTargetLowering::ReplaceNodeResults(SDNode *N, 6368 SmallVectorImpl<SDValue> &Results, 6369 SelectionDAG &DAG) const { 6370 return LowerOperationWrapper(N, Results, DAG); 6371 } 6372 6373 const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { 6374 #define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME 6375 switch ((SystemZISD::NodeType)Opcode) { 6376 case SystemZISD::FIRST_NUMBER: break; 6377 OPCODE(RET_GLUE); 6378 OPCODE(CALL); 6379 OPCODE(SIBCALL); 6380 OPCODE(TLS_GDCALL); 6381 OPCODE(TLS_LDCALL); 6382 OPCODE(PCREL_WRAPPER); 6383 OPCODE(PCREL_OFFSET); 6384 OPCODE(ICMP); 6385 OPCODE(FCMP); 6386 OPCODE(STRICT_FCMP); 6387 OPCODE(STRICT_FCMPS); 6388 OPCODE(TM); 6389 OPCODE(BR_CCMASK); 6390 OPCODE(SELECT_CCMASK); 6391 OPCODE(ADJDYNALLOC); 6392 OPCODE(PROBED_ALLOCA); 6393 OPCODE(POPCNT); 6394 OPCODE(SMUL_LOHI); 6395 OPCODE(UMUL_LOHI); 6396 OPCODE(SDIVREM); 6397 OPCODE(UDIVREM); 6398 OPCODE(SADDO); 6399 OPCODE(SSUBO); 6400 OPCODE(UADDO); 6401 OPCODE(USUBO); 6402 OPCODE(ADDCARRY); 6403 OPCODE(SUBCARRY); 6404 OPCODE(GET_CCMASK); 6405 OPCODE(MVC); 6406 OPCODE(NC); 6407 OPCODE(OC); 6408 OPCODE(XC); 6409 OPCODE(CLC); 6410 OPCODE(MEMSET_MVC); 6411 OPCODE(STPCPY); 6412 OPCODE(STRCMP); 6413 OPCODE(SEARCH_STRING); 6414 OPCODE(IPM); 6415 OPCODE(TBEGIN); 6416 OPCODE(TBEGIN_NOFLOAT); 6417 OPCODE(TEND); 6418 OPCODE(BYTE_MASK); 6419 OPCODE(ROTATE_MASK); 6420 OPCODE(REPLICATE); 6421 OPCODE(JOIN_DWORDS); 6422 OPCODE(SPLAT); 6423 OPCODE(MERGE_HIGH); 6424 OPCODE(MERGE_LOW); 6425 OPCODE(SHL_DOUBLE); 6426 OPCODE(PERMUTE_DWORDS); 6427 OPCODE(PERMUTE); 6428 OPCODE(PACK); 6429 OPCODE(PACKS_CC); 6430 OPCODE(PACKLS_CC); 6431 OPCODE(UNPACK_HIGH); 6432 OPCODE(UNPACKL_HIGH); 6433 OPCODE(UNPACK_LOW); 6434 OPCODE(UNPACKL_LOW); 6435 OPCODE(VSHL_BY_SCALAR); 6436 OPCODE(VSRL_BY_SCALAR); 6437 OPCODE(VSRA_BY_SCALAR); 6438 OPCODE(VROTL_BY_SCALAR); 6439 OPCODE(VSUM); 6440 OPCODE(VACC); 6441 OPCODE(VSCBI); 6442 OPCODE(VAC); 6443 OPCODE(VSBI); 6444 OPCODE(VACCC); 6445 OPCODE(VSBCBI); 6446 OPCODE(VICMPE); 6447 OPCODE(VICMPH); 6448 OPCODE(VICMPHL); 6449 OPCODE(VICMPES); 6450 OPCODE(VICMPHS); 6451 OPCODE(VICMPHLS); 6452 OPCODE(VFCMPE); 6453 OPCODE(STRICT_VFCMPE); 6454 OPCODE(STRICT_VFCMPES); 6455 OPCODE(VFCMPH); 6456 OPCODE(STRICT_VFCMPH); 6457 OPCODE(STRICT_VFCMPHS); 6458 OPCODE(VFCMPHE); 6459 OPCODE(STRICT_VFCMPHE); 6460 OPCODE(STRICT_VFCMPHES); 6461 OPCODE(VFCMPES); 6462 OPCODE(VFCMPHS); 6463 OPCODE(VFCMPHES); 6464 OPCODE(VFTCI); 6465 OPCODE(VEXTEND); 6466 OPCODE(STRICT_VEXTEND); 6467 OPCODE(VROUND); 6468 OPCODE(STRICT_VROUND); 6469 OPCODE(VTM); 6470 OPCODE(SCMP128HI); 6471 OPCODE(UCMP128HI); 6472 OPCODE(VFAE_CC); 6473 OPCODE(VFAEZ_CC); 6474 OPCODE(VFEE_CC); 6475 OPCODE(VFEEZ_CC); 6476 OPCODE(VFENE_CC); 6477 OPCODE(VFENEZ_CC); 6478 OPCODE(VISTR_CC); 6479 OPCODE(VSTRC_CC); 6480 OPCODE(VSTRCZ_CC); 6481 OPCODE(VSTRS_CC); 6482 OPCODE(VSTRSZ_CC); 6483 OPCODE(TDC); 6484 OPCODE(ATOMIC_SWAPW); 6485 OPCODE(ATOMIC_LOADW_ADD); 6486 OPCODE(ATOMIC_LOADW_SUB); 6487 OPCODE(ATOMIC_LOADW_AND); 6488 OPCODE(ATOMIC_LOADW_OR); 6489 OPCODE(ATOMIC_LOADW_XOR); 6490 OPCODE(ATOMIC_LOADW_NAND); 6491 OPCODE(ATOMIC_LOADW_MIN); 6492 OPCODE(ATOMIC_LOADW_MAX); 6493 OPCODE(ATOMIC_LOADW_UMIN); 6494 OPCODE(ATOMIC_LOADW_UMAX); 6495 OPCODE(ATOMIC_CMP_SWAPW); 6496 OPCODE(ATOMIC_CMP_SWAP); 6497 OPCODE(ATOMIC_LOAD_128); 6498 OPCODE(ATOMIC_STORE_128); 6499 OPCODE(ATOMIC_CMP_SWAP_128); 6500 OPCODE(LRV); 6501 OPCODE(STRV); 6502 OPCODE(VLER); 6503 OPCODE(VSTER); 6504 OPCODE(STCKF); 6505 OPCODE(PREFETCH); 6506 OPCODE(ADA_ENTRY); 6507 } 6508 return nullptr; 6509 #undef OPCODE 6510 } 6511 6512 // Return true if VT is a vector whose elements are a whole number of bytes 6513 // in width. Also check for presence of vector support. 6514 bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const { 6515 if (!Subtarget.hasVector()) 6516 return false; 6517 6518 return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0 && VT.isSimple(); 6519 } 6520 6521 // Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT 6522 // producing a result of type ResVT. Op is a possibly bitcast version 6523 // of the input vector and Index is the index (based on type VecVT) that 6524 // should be extracted. Return the new extraction if a simplification 6525 // was possible or if Force is true. 6526 SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT, 6527 EVT VecVT, SDValue Op, 6528 unsigned Index, 6529 DAGCombinerInfo &DCI, 6530 bool Force) const { 6531 SelectionDAG &DAG = DCI.DAG; 6532 6533 // The number of bytes being extracted. 6534 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize(); 6535 6536 for (;;) { 6537 unsigned Opcode = Op.getOpcode(); 6538 if (Opcode == ISD::BITCAST) 6539 // Look through bitcasts. 6540 Op = Op.getOperand(0); 6541 else if ((Opcode == ISD::VECTOR_SHUFFLE || Opcode == SystemZISD::SPLAT) && 6542 canTreatAsByteVector(Op.getValueType())) { 6543 // Get a VPERM-like permute mask and see whether the bytes covered 6544 // by the extracted element are a contiguous sequence from one 6545 // source operand. 6546 SmallVector<int, SystemZ::VectorBytes> Bytes; 6547 if (!getVPermMask(Op, Bytes)) 6548 break; 6549 int First; 6550 if (!getShuffleInput(Bytes, Index * BytesPerElement, 6551 BytesPerElement, First)) 6552 break; 6553 if (First < 0) 6554 return DAG.getUNDEF(ResVT); 6555 // Make sure the contiguous sequence starts at a multiple of the 6556 // original element size. 6557 unsigned Byte = unsigned(First) % Bytes.size(); 6558 if (Byte % BytesPerElement != 0) 6559 break; 6560 // We can get the extracted value directly from an input. 6561 Index = Byte / BytesPerElement; 6562 Op = Op.getOperand(unsigned(First) / Bytes.size()); 6563 Force = true; 6564 } else if (Opcode == ISD::BUILD_VECTOR && 6565 canTreatAsByteVector(Op.getValueType())) { 6566 // We can only optimize this case if the BUILD_VECTOR elements are 6567 // at least as wide as the extracted value. 6568 EVT OpVT = Op.getValueType(); 6569 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize(); 6570 if (OpBytesPerElement < BytesPerElement) 6571 break; 6572 // Make sure that the least-significant bit of the extracted value 6573 // is the least significant bit of an input. 6574 unsigned End = (Index + 1) * BytesPerElement; 6575 if (End % OpBytesPerElement != 0) 6576 break; 6577 // We're extracting the low part of one operand of the BUILD_VECTOR. 6578 Op = Op.getOperand(End / OpBytesPerElement - 1); 6579 if (!Op.getValueType().isInteger()) { 6580 EVT VT = MVT::getIntegerVT(Op.getValueSizeInBits()); 6581 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op); 6582 DCI.AddToWorklist(Op.getNode()); 6583 } 6584 EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits()); 6585 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op); 6586 if (VT != ResVT) { 6587 DCI.AddToWorklist(Op.getNode()); 6588 Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op); 6589 } 6590 return Op; 6591 } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || 6592 Opcode == ISD::ZERO_EXTEND_VECTOR_INREG || 6593 Opcode == ISD::ANY_EXTEND_VECTOR_INREG) && 6594 canTreatAsByteVector(Op.getValueType()) && 6595 canTreatAsByteVector(Op.getOperand(0).getValueType())) { 6596 // Make sure that only the unextended bits are significant. 6597 EVT ExtVT = Op.getValueType(); 6598 EVT OpVT = Op.getOperand(0).getValueType(); 6599 unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize(); 6600 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize(); 6601 unsigned Byte = Index * BytesPerElement; 6602 unsigned SubByte = Byte % ExtBytesPerElement; 6603 unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement; 6604 if (SubByte < MinSubByte || 6605 SubByte + BytesPerElement > ExtBytesPerElement) 6606 break; 6607 // Get the byte offset of the unextended element 6608 Byte = Byte / ExtBytesPerElement * OpBytesPerElement; 6609 // ...then add the byte offset relative to that element. 6610 Byte += SubByte - MinSubByte; 6611 if (Byte % BytesPerElement != 0) 6612 break; 6613 Op = Op.getOperand(0); 6614 Index = Byte / BytesPerElement; 6615 Force = true; 6616 } else 6617 break; 6618 } 6619 if (Force) { 6620 if (Op.getValueType() != VecVT) { 6621 Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op); 6622 DCI.AddToWorklist(Op.getNode()); 6623 } 6624 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op, 6625 DAG.getConstant(Index, DL, MVT::i32)); 6626 } 6627 return SDValue(); 6628 } 6629 6630 // Optimize vector operations in scalar value Op on the basis that Op 6631 // is truncated to TruncVT. 6632 SDValue SystemZTargetLowering::combineTruncateExtract( 6633 const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const { 6634 // If we have (trunc (extract_vector_elt X, Y)), try to turn it into 6635 // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements 6636 // of type TruncVT. 6637 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 6638 TruncVT.getSizeInBits() % 8 == 0) { 6639 SDValue Vec = Op.getOperand(0); 6640 EVT VecVT = Vec.getValueType(); 6641 if (canTreatAsByteVector(VecVT)) { 6642 if (auto *IndexN = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 6643 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize(); 6644 unsigned TruncBytes = TruncVT.getStoreSize(); 6645 if (BytesPerElement % TruncBytes == 0) { 6646 // Calculate the value of Y' in the above description. We are 6647 // splitting the original elements into Scale equal-sized pieces 6648 // and for truncation purposes want the last (least-significant) 6649 // of these pieces for IndexN. This is easiest to do by calculating 6650 // the start index of the following element and then subtracting 1. 6651 unsigned Scale = BytesPerElement / TruncBytes; 6652 unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1; 6653 6654 // Defer the creation of the bitcast from X to combineExtract, 6655 // which might be able to optimize the extraction. 6656 VecVT = EVT::getVectorVT(*DCI.DAG.getContext(), 6657 MVT::getIntegerVT(TruncBytes * 8), 6658 VecVT.getStoreSize() / TruncBytes); 6659 EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT); 6660 return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true); 6661 } 6662 } 6663 } 6664 } 6665 return SDValue(); 6666 } 6667 6668 SDValue SystemZTargetLowering::combineZERO_EXTEND( 6669 SDNode *N, DAGCombinerInfo &DCI) const { 6670 // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2') 6671 SelectionDAG &DAG = DCI.DAG; 6672 SDValue N0 = N->getOperand(0); 6673 EVT VT = N->getValueType(0); 6674 if (N0.getOpcode() == SystemZISD::SELECT_CCMASK) { 6675 auto *TrueOp = dyn_cast<ConstantSDNode>(N0.getOperand(0)); 6676 auto *FalseOp = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 6677 if (TrueOp && FalseOp) { 6678 SDLoc DL(N0); 6679 SDValue Ops[] = { DAG.getConstant(TrueOp->getZExtValue(), DL, VT), 6680 DAG.getConstant(FalseOp->getZExtValue(), DL, VT), 6681 N0.getOperand(2), N0.getOperand(3), N0.getOperand(4) }; 6682 SDValue NewSelect = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VT, Ops); 6683 // If N0 has multiple uses, change other uses as well. 6684 if (!N0.hasOneUse()) { 6685 SDValue TruncSelect = 6686 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), NewSelect); 6687 DCI.CombineTo(N0.getNode(), TruncSelect); 6688 } 6689 return NewSelect; 6690 } 6691 } 6692 // Convert (zext (xor (trunc X), C)) into (xor (trunc X), C') if the size 6693 // of the result is smaller than the size of X and all the truncated bits 6694 // of X are already zero. 6695 if (N0.getOpcode() == ISD::XOR && 6696 N0.hasOneUse() && N0.getOperand(0).hasOneUse() && 6697 N0.getOperand(0).getOpcode() == ISD::TRUNCATE && 6698 N0.getOperand(1).getOpcode() == ISD::Constant) { 6699 SDValue X = N0.getOperand(0).getOperand(0); 6700 if (VT.isScalarInteger() && VT.getSizeInBits() < X.getValueSizeInBits()) { 6701 KnownBits Known = DAG.computeKnownBits(X); 6702 APInt TruncatedBits = APInt::getBitsSet(X.getValueSizeInBits(), 6703 N0.getValueSizeInBits(), 6704 VT.getSizeInBits()); 6705 if (TruncatedBits.isSubsetOf(Known.Zero)) { 6706 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X); 6707 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits()); 6708 return DAG.getNode(ISD::XOR, SDLoc(N0), VT, 6709 X, DAG.getConstant(Mask, SDLoc(N0), VT)); 6710 } 6711 } 6712 } 6713 6714 return SDValue(); 6715 } 6716 6717 SDValue SystemZTargetLowering::combineSIGN_EXTEND_INREG( 6718 SDNode *N, DAGCombinerInfo &DCI) const { 6719 // Convert (sext_in_reg (setcc LHS, RHS, COND), i1) 6720 // and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1) 6721 // into (select_cc LHS, RHS, -1, 0, COND) 6722 SelectionDAG &DAG = DCI.DAG; 6723 SDValue N0 = N->getOperand(0); 6724 EVT VT = N->getValueType(0); 6725 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); 6726 if (N0.hasOneUse() && N0.getOpcode() == ISD::ANY_EXTEND) 6727 N0 = N0.getOperand(0); 6728 if (EVT == MVT::i1 && N0.hasOneUse() && N0.getOpcode() == ISD::SETCC) { 6729 SDLoc DL(N0); 6730 SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1), 6731 DAG.getAllOnesConstant(DL, VT), 6732 DAG.getConstant(0, DL, VT), N0.getOperand(2) }; 6733 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); 6734 } 6735 return SDValue(); 6736 } 6737 6738 SDValue SystemZTargetLowering::combineSIGN_EXTEND( 6739 SDNode *N, DAGCombinerInfo &DCI) const { 6740 // Convert (sext (ashr (shl X, C1), C2)) to 6741 // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as 6742 // cheap as narrower ones. 6743 SelectionDAG &DAG = DCI.DAG; 6744 SDValue N0 = N->getOperand(0); 6745 EVT VT = N->getValueType(0); 6746 if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) { 6747 auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 6748 SDValue Inner = N0.getOperand(0); 6749 if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) { 6750 if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) { 6751 unsigned Extra = (VT.getSizeInBits() - N0.getValueSizeInBits()); 6752 unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra; 6753 unsigned NewSraAmt = SraAmt->getZExtValue() + Extra; 6754 EVT ShiftVT = N0.getOperand(1).getValueType(); 6755 SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT, 6756 Inner.getOperand(0)); 6757 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext, 6758 DAG.getConstant(NewShlAmt, SDLoc(Inner), 6759 ShiftVT)); 6760 return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, 6761 DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT)); 6762 } 6763 } 6764 } 6765 6766 return SDValue(); 6767 } 6768 6769 SDValue SystemZTargetLowering::combineMERGE( 6770 SDNode *N, DAGCombinerInfo &DCI) const { 6771 SelectionDAG &DAG = DCI.DAG; 6772 unsigned Opcode = N->getOpcode(); 6773 SDValue Op0 = N->getOperand(0); 6774 SDValue Op1 = N->getOperand(1); 6775 if (Op0.getOpcode() == ISD::BITCAST) 6776 Op0 = Op0.getOperand(0); 6777 if (ISD::isBuildVectorAllZeros(Op0.getNode())) { 6778 // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF 6779 // for v4f32. 6780 if (Op1 == N->getOperand(0)) 6781 return Op1; 6782 // (z_merge_? 0, X) -> (z_unpackl_? 0, X). 6783 EVT VT = Op1.getValueType(); 6784 unsigned ElemBytes = VT.getVectorElementType().getStoreSize(); 6785 if (ElemBytes <= 4) { 6786 Opcode = (Opcode == SystemZISD::MERGE_HIGH ? 6787 SystemZISD::UNPACKL_HIGH : SystemZISD::UNPACKL_LOW); 6788 EVT InVT = VT.changeVectorElementTypeToInteger(); 6789 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16), 6790 SystemZ::VectorBytes / ElemBytes / 2); 6791 if (VT != InVT) { 6792 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1); 6793 DCI.AddToWorklist(Op1.getNode()); 6794 } 6795 SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1); 6796 DCI.AddToWorklist(Op.getNode()); 6797 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op); 6798 } 6799 } 6800 return SDValue(); 6801 } 6802 6803 static bool isI128MovedToParts(LoadSDNode *LD, SDNode *&LoPart, 6804 SDNode *&HiPart) { 6805 LoPart = HiPart = nullptr; 6806 6807 // Scan through all users. 6808 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end(); 6809 UI != UIEnd; ++UI) { 6810 // Skip the uses of the chain. 6811 if (UI.getUse().getResNo() != 0) 6812 continue; 6813 6814 // Verify every user is a TRUNCATE to i64 of the low or high half. 6815 SDNode *User = *UI; 6816 bool IsLoPart = true; 6817 if (User->getOpcode() == ISD::SRL && 6818 User->getOperand(1).getOpcode() == ISD::Constant && 6819 User->getConstantOperandVal(1) == 64 && User->hasOneUse()) { 6820 User = *User->use_begin(); 6821 IsLoPart = false; 6822 } 6823 if (User->getOpcode() != ISD::TRUNCATE || User->getValueType(0) != MVT::i64) 6824 return false; 6825 6826 if (IsLoPart) { 6827 if (LoPart) 6828 return false; 6829 LoPart = User; 6830 } else { 6831 if (HiPart) 6832 return false; 6833 HiPart = User; 6834 } 6835 } 6836 return true; 6837 } 6838 6839 static bool isF128MovedToParts(LoadSDNode *LD, SDNode *&LoPart, 6840 SDNode *&HiPart) { 6841 LoPart = HiPart = nullptr; 6842 6843 // Scan through all users. 6844 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end(); 6845 UI != UIEnd; ++UI) { 6846 // Skip the uses of the chain. 6847 if (UI.getUse().getResNo() != 0) 6848 continue; 6849 6850 // Verify every user is an EXTRACT_SUBREG of the low or high half. 6851 SDNode *User = *UI; 6852 if (!User->hasOneUse() || !User->isMachineOpcode() || 6853 User->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG) 6854 return false; 6855 6856 switch (User->getConstantOperandVal(1)) { 6857 case SystemZ::subreg_l64: 6858 if (LoPart) 6859 return false; 6860 LoPart = User; 6861 break; 6862 case SystemZ::subreg_h64: 6863 if (HiPart) 6864 return false; 6865 HiPart = User; 6866 break; 6867 default: 6868 return false; 6869 } 6870 } 6871 return true; 6872 } 6873 6874 SDValue SystemZTargetLowering::combineLOAD( 6875 SDNode *N, DAGCombinerInfo &DCI) const { 6876 SelectionDAG &DAG = DCI.DAG; 6877 EVT LdVT = N->getValueType(0); 6878 SDLoc DL(N); 6879 6880 // Replace a 128-bit load that is used solely to move its value into GPRs 6881 // by separate loads of both halves. 6882 LoadSDNode *LD = cast<LoadSDNode>(N); 6883 if (LD->isSimple() && ISD::isNormalLoad(LD)) { 6884 SDNode *LoPart, *HiPart; 6885 if ((LdVT == MVT::i128 && isI128MovedToParts(LD, LoPart, HiPart)) || 6886 (LdVT == MVT::f128 && isF128MovedToParts(LD, LoPart, HiPart))) { 6887 // Rewrite each extraction as an independent load. 6888 SmallVector<SDValue, 2> ArgChains; 6889 if (HiPart) { 6890 SDValue EltLoad = DAG.getLoad( 6891 HiPart->getValueType(0), DL, LD->getChain(), LD->getBasePtr(), 6892 LD->getPointerInfo(), LD->getOriginalAlign(), 6893 LD->getMemOperand()->getFlags(), LD->getAAInfo()); 6894 6895 DCI.CombineTo(HiPart, EltLoad, true); 6896 ArgChains.push_back(EltLoad.getValue(1)); 6897 } 6898 if (LoPart) { 6899 SDValue EltLoad = DAG.getLoad( 6900 LoPart->getValueType(0), DL, LD->getChain(), 6901 DAG.getObjectPtrOffset(DL, LD->getBasePtr(), TypeSize::getFixed(8)), 6902 LD->getPointerInfo().getWithOffset(8), LD->getOriginalAlign(), 6903 LD->getMemOperand()->getFlags(), LD->getAAInfo()); 6904 6905 DCI.CombineTo(LoPart, EltLoad, true); 6906 ArgChains.push_back(EltLoad.getValue(1)); 6907 } 6908 6909 // Collect all chains via TokenFactor. 6910 SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, ArgChains); 6911 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); 6912 DCI.AddToWorklist(Chain.getNode()); 6913 return SDValue(N, 0); 6914 } 6915 } 6916 6917 if (LdVT.isVector() || LdVT.isInteger()) 6918 return SDValue(); 6919 // Transform a scalar load that is REPLICATEd as well as having other 6920 // use(s) to the form where the other use(s) use the first element of the 6921 // REPLICATE instead of the load. Otherwise instruction selection will not 6922 // produce a VLREP. Avoid extracting to a GPR, so only do this for floating 6923 // point loads. 6924 6925 SDValue Replicate; 6926 SmallVector<SDNode*, 8> OtherUses; 6927 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); 6928 UI != UE; ++UI) { 6929 if (UI->getOpcode() == SystemZISD::REPLICATE) { 6930 if (Replicate) 6931 return SDValue(); // Should never happen 6932 Replicate = SDValue(*UI, 0); 6933 } 6934 else if (UI.getUse().getResNo() == 0) 6935 OtherUses.push_back(*UI); 6936 } 6937 if (!Replicate || OtherUses.empty()) 6938 return SDValue(); 6939 6940 SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, LdVT, 6941 Replicate, DAG.getConstant(0, DL, MVT::i32)); 6942 // Update uses of the loaded Value while preserving old chains. 6943 for (SDNode *U : OtherUses) { 6944 SmallVector<SDValue, 8> Ops; 6945 for (SDValue Op : U->ops()) 6946 Ops.push_back((Op.getNode() == N && Op.getResNo() == 0) ? Extract0 : Op); 6947 DAG.UpdateNodeOperands(U, Ops); 6948 } 6949 return SDValue(N, 0); 6950 } 6951 6952 bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT) const { 6953 if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) 6954 return true; 6955 if (Subtarget.hasVectorEnhancements2()) 6956 if (VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64 || VT == MVT::i128) 6957 return true; 6958 return false; 6959 } 6960 6961 static bool isVectorElementSwap(ArrayRef<int> M, EVT VT) { 6962 if (!VT.isVector() || !VT.isSimple() || 6963 VT.getSizeInBits() != 128 || 6964 VT.getScalarSizeInBits() % 8 != 0) 6965 return false; 6966 6967 unsigned NumElts = VT.getVectorNumElements(); 6968 for (unsigned i = 0; i < NumElts; ++i) { 6969 if (M[i] < 0) continue; // ignore UNDEF indices 6970 if ((unsigned) M[i] != NumElts - 1 - i) 6971 return false; 6972 } 6973 6974 return true; 6975 } 6976 6977 static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG) { 6978 for (auto *U : StoredVal->uses()) { 6979 if (StoreSDNode *ST = dyn_cast<StoreSDNode>(U)) { 6980 EVT CurrMemVT = ST->getMemoryVT().getScalarType(); 6981 if (CurrMemVT.isRound() && CurrMemVT.getStoreSize() <= 16) 6982 continue; 6983 } else if (isa<BuildVectorSDNode>(U)) { 6984 SDValue BuildVector = SDValue(U, 0); 6985 if (DAG.isSplatValue(BuildVector, true/*AllowUndefs*/) && 6986 isOnlyUsedByStores(BuildVector, DAG)) 6987 continue; 6988 } 6989 return false; 6990 } 6991 return true; 6992 } 6993 6994 static bool isI128MovedFromParts(SDValue Val, SDValue &LoPart, 6995 SDValue &HiPart) { 6996 if (Val.getOpcode() != ISD::OR || !Val.getNode()->hasOneUse()) 6997 return false; 6998 6999 SDValue Op0 = Val.getOperand(0); 7000 SDValue Op1 = Val.getOperand(1); 7001 7002 if (Op0.getOpcode() == ISD::SHL) 7003 std::swap(Op0, Op1); 7004 if (Op1.getOpcode() != ISD::SHL || !Op1.getNode()->hasOneUse() || 7005 Op1.getOperand(1).getOpcode() != ISD::Constant || 7006 Op1.getConstantOperandVal(1) != 64) 7007 return false; 7008 Op1 = Op1.getOperand(0); 7009 7010 if (Op0.getOpcode() != ISD::ZERO_EXTEND || !Op0.getNode()->hasOneUse() || 7011 Op0.getOperand(0).getValueType() != MVT::i64) 7012 return false; 7013 if (Op1.getOpcode() != ISD::ANY_EXTEND || !Op1.getNode()->hasOneUse() || 7014 Op1.getOperand(0).getValueType() != MVT::i64) 7015 return false; 7016 7017 LoPart = Op0.getOperand(0); 7018 HiPart = Op1.getOperand(0); 7019 return true; 7020 } 7021 7022 static bool isF128MovedFromParts(SDValue Val, SDValue &LoPart, 7023 SDValue &HiPart) { 7024 if (!Val.getNode()->hasOneUse() || !Val.isMachineOpcode() || 7025 Val.getMachineOpcode() != TargetOpcode::REG_SEQUENCE) 7026 return false; 7027 7028 if (Val->getNumOperands() != 5 || 7029 Val->getOperand(0)->getAsZExtVal() != SystemZ::FP128BitRegClassID || 7030 Val->getOperand(2)->getAsZExtVal() != SystemZ::subreg_l64 || 7031 Val->getOperand(4)->getAsZExtVal() != SystemZ::subreg_h64) 7032 return false; 7033 7034 LoPart = Val->getOperand(1); 7035 HiPart = Val->getOperand(3); 7036 return true; 7037 } 7038 7039 SDValue SystemZTargetLowering::combineSTORE( 7040 SDNode *N, DAGCombinerInfo &DCI) const { 7041 SelectionDAG &DAG = DCI.DAG; 7042 auto *SN = cast<StoreSDNode>(N); 7043 auto &Op1 = N->getOperand(1); 7044 EVT MemVT = SN->getMemoryVT(); 7045 // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better 7046 // for the extraction to be done on a vMiN value, so that we can use VSTE. 7047 // If X has wider elements then convert it to: 7048 // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z). 7049 if (MemVT.isInteger() && SN->isTruncatingStore()) { 7050 if (SDValue Value = 7051 combineTruncateExtract(SDLoc(N), MemVT, SN->getValue(), DCI)) { 7052 DCI.AddToWorklist(Value.getNode()); 7053 7054 // Rewrite the store with the new form of stored value. 7055 return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value, 7056 SN->getBasePtr(), SN->getMemoryVT(), 7057 SN->getMemOperand()); 7058 } 7059 } 7060 // Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR 7061 if (!SN->isTruncatingStore() && 7062 Op1.getOpcode() == ISD::BSWAP && 7063 Op1.getNode()->hasOneUse() && 7064 canLoadStoreByteSwapped(Op1.getValueType())) { 7065 7066 SDValue BSwapOp = Op1.getOperand(0); 7067 7068 if (BSwapOp.getValueType() == MVT::i16) 7069 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp); 7070 7071 SDValue Ops[] = { 7072 N->getOperand(0), BSwapOp, N->getOperand(2) 7073 }; 7074 7075 return 7076 DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other), 7077 Ops, MemVT, SN->getMemOperand()); 7078 } 7079 // Combine STORE (element-swap) into VSTER 7080 if (!SN->isTruncatingStore() && 7081 Op1.getOpcode() == ISD::VECTOR_SHUFFLE && 7082 Op1.getNode()->hasOneUse() && 7083 Subtarget.hasVectorEnhancements2()) { 7084 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op1.getNode()); 7085 ArrayRef<int> ShuffleMask = SVN->getMask(); 7086 if (isVectorElementSwap(ShuffleMask, Op1.getValueType())) { 7087 SDValue Ops[] = { 7088 N->getOperand(0), Op1.getOperand(0), N->getOperand(2) 7089 }; 7090 7091 return DAG.getMemIntrinsicNode(SystemZISD::VSTER, SDLoc(N), 7092 DAG.getVTList(MVT::Other), 7093 Ops, MemVT, SN->getMemOperand()); 7094 } 7095 } 7096 7097 // Combine STORE (READCYCLECOUNTER) into STCKF. 7098 if (!SN->isTruncatingStore() && 7099 Op1.getOpcode() == ISD::READCYCLECOUNTER && 7100 Op1.hasOneUse() && 7101 N->getOperand(0).reachesChainWithoutSideEffects(SDValue(Op1.getNode(), 1))) { 7102 SDValue Ops[] = { Op1.getOperand(0), N->getOperand(2) }; 7103 return DAG.getMemIntrinsicNode(SystemZISD::STCKF, SDLoc(N), 7104 DAG.getVTList(MVT::Other), 7105 Ops, MemVT, SN->getMemOperand()); 7106 } 7107 7108 // Transform a store of a 128-bit value moved from parts into two stores. 7109 if (SN->isSimple() && ISD::isNormalStore(SN)) { 7110 SDValue LoPart, HiPart; 7111 if ((MemVT == MVT::i128 && isI128MovedFromParts(Op1, LoPart, HiPart)) || 7112 (MemVT == MVT::f128 && isF128MovedFromParts(Op1, LoPart, HiPart))) { 7113 SDLoc DL(SN); 7114 SDValue Chain0 = 7115 DAG.getStore(SN->getChain(), DL, HiPart, SN->getBasePtr(), 7116 SN->getPointerInfo(), SN->getOriginalAlign(), 7117 SN->getMemOperand()->getFlags(), SN->getAAInfo()); 7118 SDValue Chain1 = 7119 DAG.getStore(SN->getChain(), DL, LoPart, 7120 DAG.getObjectPtrOffset(DL, SN->getBasePtr(), 7121 TypeSize::getFixed(8)), 7122 SN->getPointerInfo().getWithOffset(8), 7123 SN->getOriginalAlign(), 7124 SN->getMemOperand()->getFlags(), SN->getAAInfo()); 7125 7126 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain0, Chain1); 7127 } 7128 } 7129 7130 // Replicate a reg or immediate with VREP instead of scalar multiply or 7131 // immediate load. It seems best to do this during the first DAGCombine as 7132 // it is straight-forward to handle the zero-extend node in the initial 7133 // DAG, and also not worry about the keeping the new MemVT legal (e.g. when 7134 // extracting an i16 element from a v16i8 vector). 7135 if (Subtarget.hasVector() && DCI.Level == BeforeLegalizeTypes && 7136 isOnlyUsedByStores(Op1, DAG)) { 7137 SDValue Word = SDValue(); 7138 EVT WordVT; 7139 7140 // Find a replicated immediate and return it if found in Word and its 7141 // type in WordVT. 7142 auto FindReplicatedImm = [&](ConstantSDNode *C, unsigned TotBytes) { 7143 // Some constants are better handled with a scalar store. 7144 if (C->getAPIntValue().getBitWidth() > 64 || C->isAllOnes() || 7145 isInt<16>(C->getSExtValue()) || MemVT.getStoreSize() <= 2) 7146 return; 7147 SystemZVectorConstantInfo VCI(APInt(TotBytes * 8, C->getZExtValue())); 7148 if (VCI.isVectorConstantLegal(Subtarget) && 7149 VCI.Opcode == SystemZISD::REPLICATE) { 7150 Word = DAG.getConstant(VCI.OpVals[0], SDLoc(SN), MVT::i32); 7151 WordVT = VCI.VecVT.getScalarType(); 7152 } 7153 }; 7154 7155 // Find a replicated register and return it if found in Word and its type 7156 // in WordVT. 7157 auto FindReplicatedReg = [&](SDValue MulOp) { 7158 EVT MulVT = MulOp.getValueType(); 7159 if (MulOp->getOpcode() == ISD::MUL && 7160 (MulVT == MVT::i16 || MulVT == MVT::i32 || MulVT == MVT::i64)) { 7161 // Find a zero extended value and its type. 7162 SDValue LHS = MulOp->getOperand(0); 7163 if (LHS->getOpcode() == ISD::ZERO_EXTEND) 7164 WordVT = LHS->getOperand(0).getValueType(); 7165 else if (LHS->getOpcode() == ISD::AssertZext) 7166 WordVT = cast<VTSDNode>(LHS->getOperand(1))->getVT(); 7167 else 7168 return; 7169 // Find a replicating constant, e.g. 0x00010001. 7170 if (auto *C = dyn_cast<ConstantSDNode>(MulOp->getOperand(1))) { 7171 SystemZVectorConstantInfo VCI( 7172 APInt(MulVT.getSizeInBits(), C->getZExtValue())); 7173 if (VCI.isVectorConstantLegal(Subtarget) && 7174 VCI.Opcode == SystemZISD::REPLICATE && VCI.OpVals[0] == 1 && 7175 WordVT == VCI.VecVT.getScalarType()) 7176 Word = DAG.getZExtOrTrunc(LHS->getOperand(0), SDLoc(SN), WordVT); 7177 } 7178 } 7179 }; 7180 7181 if (isa<BuildVectorSDNode>(Op1) && 7182 DAG.isSplatValue(Op1, true/*AllowUndefs*/)) { 7183 SDValue SplatVal = Op1->getOperand(0); 7184 if (auto *C = dyn_cast<ConstantSDNode>(SplatVal)) 7185 FindReplicatedImm(C, SplatVal.getValueType().getStoreSize()); 7186 else 7187 FindReplicatedReg(SplatVal); 7188 } else { 7189 if (auto *C = dyn_cast<ConstantSDNode>(Op1)) 7190 FindReplicatedImm(C, MemVT.getStoreSize()); 7191 else 7192 FindReplicatedReg(Op1); 7193 } 7194 7195 if (Word != SDValue()) { 7196 assert(MemVT.getSizeInBits() % WordVT.getSizeInBits() == 0 && 7197 "Bad type handling"); 7198 unsigned NumElts = MemVT.getSizeInBits() / WordVT.getSizeInBits(); 7199 EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), WordVT, NumElts); 7200 SDValue SplatVal = DAG.getSplatVector(SplatVT, SDLoc(SN), Word); 7201 return DAG.getStore(SN->getChain(), SDLoc(SN), SplatVal, 7202 SN->getBasePtr(), SN->getMemOperand()); 7203 } 7204 } 7205 7206 return SDValue(); 7207 } 7208 7209 SDValue SystemZTargetLowering::combineVECTOR_SHUFFLE( 7210 SDNode *N, DAGCombinerInfo &DCI) const { 7211 SelectionDAG &DAG = DCI.DAG; 7212 // Combine element-swap (LOAD) into VLER 7213 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && 7214 N->getOperand(0).hasOneUse() && 7215 Subtarget.hasVectorEnhancements2()) { 7216 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); 7217 ArrayRef<int> ShuffleMask = SVN->getMask(); 7218 if (isVectorElementSwap(ShuffleMask, N->getValueType(0))) { 7219 SDValue Load = N->getOperand(0); 7220 LoadSDNode *LD = cast<LoadSDNode>(Load); 7221 7222 // Create the element-swapping load. 7223 SDValue Ops[] = { 7224 LD->getChain(), // Chain 7225 LD->getBasePtr() // Ptr 7226 }; 7227 SDValue ESLoad = 7228 DAG.getMemIntrinsicNode(SystemZISD::VLER, SDLoc(N), 7229 DAG.getVTList(LD->getValueType(0), MVT::Other), 7230 Ops, LD->getMemoryVT(), LD->getMemOperand()); 7231 7232 // First, combine the VECTOR_SHUFFLE away. This makes the value produced 7233 // by the load dead. 7234 DCI.CombineTo(N, ESLoad); 7235 7236 // Next, combine the load away, we give it a bogus result value but a real 7237 // chain result. The result value is dead because the shuffle is dead. 7238 DCI.CombineTo(Load.getNode(), ESLoad, ESLoad.getValue(1)); 7239 7240 // Return N so it doesn't get rechecked! 7241 return SDValue(N, 0); 7242 } 7243 } 7244 7245 return SDValue(); 7246 } 7247 7248 SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT( 7249 SDNode *N, DAGCombinerInfo &DCI) const { 7250 SelectionDAG &DAG = DCI.DAG; 7251 7252 if (!Subtarget.hasVector()) 7253 return SDValue(); 7254 7255 // Look through bitcasts that retain the number of vector elements. 7256 SDValue Op = N->getOperand(0); 7257 if (Op.getOpcode() == ISD::BITCAST && 7258 Op.getValueType().isVector() && 7259 Op.getOperand(0).getValueType().isVector() && 7260 Op.getValueType().getVectorNumElements() == 7261 Op.getOperand(0).getValueType().getVectorNumElements()) 7262 Op = Op.getOperand(0); 7263 7264 // Pull BSWAP out of a vector extraction. 7265 if (Op.getOpcode() == ISD::BSWAP && Op.hasOneUse()) { 7266 EVT VecVT = Op.getValueType(); 7267 EVT EltVT = VecVT.getVectorElementType(); 7268 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), EltVT, 7269 Op.getOperand(0), N->getOperand(1)); 7270 DCI.AddToWorklist(Op.getNode()); 7271 Op = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Op); 7272 if (EltVT != N->getValueType(0)) { 7273 DCI.AddToWorklist(Op.getNode()); 7274 Op = DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op); 7275 } 7276 return Op; 7277 } 7278 7279 // Try to simplify a vector extraction. 7280 if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 7281 SDValue Op0 = N->getOperand(0); 7282 EVT VecVT = Op0.getValueType(); 7283 return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0, 7284 IndexN->getZExtValue(), DCI, false); 7285 } 7286 return SDValue(); 7287 } 7288 7289 SDValue SystemZTargetLowering::combineJOIN_DWORDS( 7290 SDNode *N, DAGCombinerInfo &DCI) const { 7291 SelectionDAG &DAG = DCI.DAG; 7292 // (join_dwords X, X) == (replicate X) 7293 if (N->getOperand(0) == N->getOperand(1)) 7294 return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0), 7295 N->getOperand(0)); 7296 return SDValue(); 7297 } 7298 7299 static SDValue MergeInputChains(SDNode *N1, SDNode *N2) { 7300 SDValue Chain1 = N1->getOperand(0); 7301 SDValue Chain2 = N2->getOperand(0); 7302 7303 // Trivial case: both nodes take the same chain. 7304 if (Chain1 == Chain2) 7305 return Chain1; 7306 7307 // FIXME - we could handle more complex cases via TokenFactor, 7308 // assuming we can verify that this would not create a cycle. 7309 return SDValue(); 7310 } 7311 7312 SDValue SystemZTargetLowering::combineFP_ROUND( 7313 SDNode *N, DAGCombinerInfo &DCI) const { 7314 7315 if (!Subtarget.hasVector()) 7316 return SDValue(); 7317 7318 // (fpround (extract_vector_elt X 0)) 7319 // (fpround (extract_vector_elt X 1)) -> 7320 // (extract_vector_elt (VROUND X) 0) 7321 // (extract_vector_elt (VROUND X) 2) 7322 // 7323 // This is a special case since the target doesn't really support v2f32s. 7324 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0; 7325 SelectionDAG &DAG = DCI.DAG; 7326 SDValue Op0 = N->getOperand(OpNo); 7327 if (N->getValueType(0) == MVT::f32 && Op0.hasOneUse() && 7328 Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 7329 Op0.getOperand(0).getValueType() == MVT::v2f64 && 7330 Op0.getOperand(1).getOpcode() == ISD::Constant && 7331 Op0.getConstantOperandVal(1) == 0) { 7332 SDValue Vec = Op0.getOperand(0); 7333 for (auto *U : Vec->uses()) { 7334 if (U != Op0.getNode() && U->hasOneUse() && 7335 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT && 7336 U->getOperand(0) == Vec && 7337 U->getOperand(1).getOpcode() == ISD::Constant && 7338 U->getConstantOperandVal(1) == 1) { 7339 SDValue OtherRound = SDValue(*U->use_begin(), 0); 7340 if (OtherRound.getOpcode() == N->getOpcode() && 7341 OtherRound.getOperand(OpNo) == SDValue(U, 0) && 7342 OtherRound.getValueType() == MVT::f32) { 7343 SDValue VRound, Chain; 7344 if (N->isStrictFPOpcode()) { 7345 Chain = MergeInputChains(N, OtherRound.getNode()); 7346 if (!Chain) 7347 continue; 7348 VRound = DAG.getNode(SystemZISD::STRICT_VROUND, SDLoc(N), 7349 {MVT::v4f32, MVT::Other}, {Chain, Vec}); 7350 Chain = VRound.getValue(1); 7351 } else 7352 VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N), 7353 MVT::v4f32, Vec); 7354 DCI.AddToWorklist(VRound.getNode()); 7355 SDValue Extract1 = 7356 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32, 7357 VRound, DAG.getConstant(2, SDLoc(U), MVT::i32)); 7358 DCI.AddToWorklist(Extract1.getNode()); 7359 DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1); 7360 if (Chain) 7361 DAG.ReplaceAllUsesOfValueWith(OtherRound.getValue(1), Chain); 7362 SDValue Extract0 = 7363 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32, 7364 VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32)); 7365 if (Chain) 7366 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0), 7367 N->getVTList(), Extract0, Chain); 7368 return Extract0; 7369 } 7370 } 7371 } 7372 } 7373 return SDValue(); 7374 } 7375 7376 SDValue SystemZTargetLowering::combineFP_EXTEND( 7377 SDNode *N, DAGCombinerInfo &DCI) const { 7378 7379 if (!Subtarget.hasVector()) 7380 return SDValue(); 7381 7382 // (fpextend (extract_vector_elt X 0)) 7383 // (fpextend (extract_vector_elt X 2)) -> 7384 // (extract_vector_elt (VEXTEND X) 0) 7385 // (extract_vector_elt (VEXTEND X) 1) 7386 // 7387 // This is a special case since the target doesn't really support v2f32s. 7388 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0; 7389 SelectionDAG &DAG = DCI.DAG; 7390 SDValue Op0 = N->getOperand(OpNo); 7391 if (N->getValueType(0) == MVT::f64 && Op0.hasOneUse() && 7392 Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 7393 Op0.getOperand(0).getValueType() == MVT::v4f32 && 7394 Op0.getOperand(1).getOpcode() == ISD::Constant && 7395 Op0.getConstantOperandVal(1) == 0) { 7396 SDValue Vec = Op0.getOperand(0); 7397 for (auto *U : Vec->uses()) { 7398 if (U != Op0.getNode() && U->hasOneUse() && 7399 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT && 7400 U->getOperand(0) == Vec && 7401 U->getOperand(1).getOpcode() == ISD::Constant && 7402 U->getConstantOperandVal(1) == 2) { 7403 SDValue OtherExtend = SDValue(*U->use_begin(), 0); 7404 if (OtherExtend.getOpcode() == N->getOpcode() && 7405 OtherExtend.getOperand(OpNo) == SDValue(U, 0) && 7406 OtherExtend.getValueType() == MVT::f64) { 7407 SDValue VExtend, Chain; 7408 if (N->isStrictFPOpcode()) { 7409 Chain = MergeInputChains(N, OtherExtend.getNode()); 7410 if (!Chain) 7411 continue; 7412 VExtend = DAG.getNode(SystemZISD::STRICT_VEXTEND, SDLoc(N), 7413 {MVT::v2f64, MVT::Other}, {Chain, Vec}); 7414 Chain = VExtend.getValue(1); 7415 } else 7416 VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N), 7417 MVT::v2f64, Vec); 7418 DCI.AddToWorklist(VExtend.getNode()); 7419 SDValue Extract1 = 7420 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64, 7421 VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32)); 7422 DCI.AddToWorklist(Extract1.getNode()); 7423 DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1); 7424 if (Chain) 7425 DAG.ReplaceAllUsesOfValueWith(OtherExtend.getValue(1), Chain); 7426 SDValue Extract0 = 7427 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64, 7428 VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32)); 7429 if (Chain) 7430 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0), 7431 N->getVTList(), Extract0, Chain); 7432 return Extract0; 7433 } 7434 } 7435 } 7436 } 7437 return SDValue(); 7438 } 7439 7440 SDValue SystemZTargetLowering::combineINT_TO_FP( 7441 SDNode *N, DAGCombinerInfo &DCI) const { 7442 if (DCI.Level != BeforeLegalizeTypes) 7443 return SDValue(); 7444 SelectionDAG &DAG = DCI.DAG; 7445 LLVMContext &Ctx = *DAG.getContext(); 7446 unsigned Opcode = N->getOpcode(); 7447 EVT OutVT = N->getValueType(0); 7448 Type *OutLLVMTy = OutVT.getTypeForEVT(Ctx); 7449 SDValue Op = N->getOperand(0); 7450 unsigned OutScalarBits = OutLLVMTy->getScalarSizeInBits(); 7451 unsigned InScalarBits = Op->getValueType(0).getScalarSizeInBits(); 7452 7453 // Insert an extension before type-legalization to avoid scalarization, e.g.: 7454 // v2f64 = uint_to_fp v2i16 7455 // => 7456 // v2f64 = uint_to_fp (v2i64 zero_extend v2i16) 7457 if (OutLLVMTy->isVectorTy() && OutScalarBits > InScalarBits && 7458 OutScalarBits <= 64) { 7459 unsigned NumElts = cast<FixedVectorType>(OutLLVMTy)->getNumElements(); 7460 EVT ExtVT = EVT::getVectorVT( 7461 Ctx, EVT::getIntegerVT(Ctx, OutLLVMTy->getScalarSizeInBits()), NumElts); 7462 unsigned ExtOpcode = 7463 (Opcode == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND); 7464 SDValue ExtOp = DAG.getNode(ExtOpcode, SDLoc(N), ExtVT, Op); 7465 return DAG.getNode(Opcode, SDLoc(N), OutVT, ExtOp); 7466 } 7467 return SDValue(); 7468 } 7469 7470 SDValue SystemZTargetLowering::combineBSWAP( 7471 SDNode *N, DAGCombinerInfo &DCI) const { 7472 SelectionDAG &DAG = DCI.DAG; 7473 // Combine BSWAP (LOAD) into LRVH/LRV/LRVG/VLBR 7474 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && 7475 N->getOperand(0).hasOneUse() && 7476 canLoadStoreByteSwapped(N->getValueType(0))) { 7477 SDValue Load = N->getOperand(0); 7478 LoadSDNode *LD = cast<LoadSDNode>(Load); 7479 7480 // Create the byte-swapping load. 7481 SDValue Ops[] = { 7482 LD->getChain(), // Chain 7483 LD->getBasePtr() // Ptr 7484 }; 7485 EVT LoadVT = N->getValueType(0); 7486 if (LoadVT == MVT::i16) 7487 LoadVT = MVT::i32; 7488 SDValue BSLoad = 7489 DAG.getMemIntrinsicNode(SystemZISD::LRV, SDLoc(N), 7490 DAG.getVTList(LoadVT, MVT::Other), 7491 Ops, LD->getMemoryVT(), LD->getMemOperand()); 7492 7493 // If this is an i16 load, insert the truncate. 7494 SDValue ResVal = BSLoad; 7495 if (N->getValueType(0) == MVT::i16) 7496 ResVal = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i16, BSLoad); 7497 7498 // First, combine the bswap away. This makes the value produced by the 7499 // load dead. 7500 DCI.CombineTo(N, ResVal); 7501 7502 // Next, combine the load away, we give it a bogus result value but a real 7503 // chain result. The result value is dead because the bswap is dead. 7504 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1)); 7505 7506 // Return N so it doesn't get rechecked! 7507 return SDValue(N, 0); 7508 } 7509 7510 // Look through bitcasts that retain the number of vector elements. 7511 SDValue Op = N->getOperand(0); 7512 if (Op.getOpcode() == ISD::BITCAST && 7513 Op.getValueType().isVector() && 7514 Op.getOperand(0).getValueType().isVector() && 7515 Op.getValueType().getVectorNumElements() == 7516 Op.getOperand(0).getValueType().getVectorNumElements()) 7517 Op = Op.getOperand(0); 7518 7519 // Push BSWAP into a vector insertion if at least one side then simplifies. 7520 if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && Op.hasOneUse()) { 7521 SDValue Vec = Op.getOperand(0); 7522 SDValue Elt = Op.getOperand(1); 7523 SDValue Idx = Op.getOperand(2); 7524 7525 if (DAG.isConstantIntBuildVectorOrConstantInt(Vec) || 7526 Vec.getOpcode() == ISD::BSWAP || Vec.isUndef() || 7527 DAG.isConstantIntBuildVectorOrConstantInt(Elt) || 7528 Elt.getOpcode() == ISD::BSWAP || Elt.isUndef() || 7529 (canLoadStoreByteSwapped(N->getValueType(0)) && 7530 ISD::isNON_EXTLoad(Elt.getNode()) && Elt.hasOneUse())) { 7531 EVT VecVT = N->getValueType(0); 7532 EVT EltVT = N->getValueType(0).getVectorElementType(); 7533 if (VecVT != Vec.getValueType()) { 7534 Vec = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Vec); 7535 DCI.AddToWorklist(Vec.getNode()); 7536 } 7537 if (EltVT != Elt.getValueType()) { 7538 Elt = DAG.getNode(ISD::BITCAST, SDLoc(N), EltVT, Elt); 7539 DCI.AddToWorklist(Elt.getNode()); 7540 } 7541 Vec = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Vec); 7542 DCI.AddToWorklist(Vec.getNode()); 7543 Elt = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Elt); 7544 DCI.AddToWorklist(Elt.getNode()); 7545 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VecVT, 7546 Vec, Elt, Idx); 7547 } 7548 } 7549 7550 // Push BSWAP into a vector shuffle if at least one side then simplifies. 7551 ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(Op); 7552 if (SV && Op.hasOneUse()) { 7553 SDValue Op0 = Op.getOperand(0); 7554 SDValue Op1 = Op.getOperand(1); 7555 7556 if (DAG.isConstantIntBuildVectorOrConstantInt(Op0) || 7557 Op0.getOpcode() == ISD::BSWAP || Op0.isUndef() || 7558 DAG.isConstantIntBuildVectorOrConstantInt(Op1) || 7559 Op1.getOpcode() == ISD::BSWAP || Op1.isUndef()) { 7560 EVT VecVT = N->getValueType(0); 7561 if (VecVT != Op0.getValueType()) { 7562 Op0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op0); 7563 DCI.AddToWorklist(Op0.getNode()); 7564 } 7565 if (VecVT != Op1.getValueType()) { 7566 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op1); 7567 DCI.AddToWorklist(Op1.getNode()); 7568 } 7569 Op0 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op0); 7570 DCI.AddToWorklist(Op0.getNode()); 7571 Op1 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op1); 7572 DCI.AddToWorklist(Op1.getNode()); 7573 return DAG.getVectorShuffle(VecVT, SDLoc(N), Op0, Op1, SV->getMask()); 7574 } 7575 } 7576 7577 return SDValue(); 7578 } 7579 7580 static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) { 7581 // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code 7582 // set by the CCReg instruction using the CCValid / CCMask masks, 7583 // If the CCReg instruction is itself a ICMP testing the condition 7584 // code set by some other instruction, see whether we can directly 7585 // use that condition code. 7586 7587 // Verify that we have an ICMP against some constant. 7588 if (CCValid != SystemZ::CCMASK_ICMP) 7589 return false; 7590 auto *ICmp = CCReg.getNode(); 7591 if (ICmp->getOpcode() != SystemZISD::ICMP) 7592 return false; 7593 auto *CompareLHS = ICmp->getOperand(0).getNode(); 7594 auto *CompareRHS = dyn_cast<ConstantSDNode>(ICmp->getOperand(1)); 7595 if (!CompareRHS) 7596 return false; 7597 7598 // Optimize the case where CompareLHS is a SELECT_CCMASK. 7599 if (CompareLHS->getOpcode() == SystemZISD::SELECT_CCMASK) { 7600 // Verify that we have an appropriate mask for a EQ or NE comparison. 7601 bool Invert = false; 7602 if (CCMask == SystemZ::CCMASK_CMP_NE) 7603 Invert = !Invert; 7604 else if (CCMask != SystemZ::CCMASK_CMP_EQ) 7605 return false; 7606 7607 // Verify that the ICMP compares against one of select values. 7608 auto *TrueVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(0)); 7609 if (!TrueVal) 7610 return false; 7611 auto *FalseVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1)); 7612 if (!FalseVal) 7613 return false; 7614 if (CompareRHS->getZExtValue() == FalseVal->getZExtValue()) 7615 Invert = !Invert; 7616 else if (CompareRHS->getZExtValue() != TrueVal->getZExtValue()) 7617 return false; 7618 7619 // Compute the effective CC mask for the new branch or select. 7620 auto *NewCCValid = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(2)); 7621 auto *NewCCMask = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(3)); 7622 if (!NewCCValid || !NewCCMask) 7623 return false; 7624 CCValid = NewCCValid->getZExtValue(); 7625 CCMask = NewCCMask->getZExtValue(); 7626 if (Invert) 7627 CCMask ^= CCValid; 7628 7629 // Return the updated CCReg link. 7630 CCReg = CompareLHS->getOperand(4); 7631 return true; 7632 } 7633 7634 // Optimize the case where CompareRHS is (SRA (SHL (IPM))). 7635 if (CompareLHS->getOpcode() == ISD::SRA) { 7636 auto *SRACount = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1)); 7637 if (!SRACount || SRACount->getZExtValue() != 30) 7638 return false; 7639 auto *SHL = CompareLHS->getOperand(0).getNode(); 7640 if (SHL->getOpcode() != ISD::SHL) 7641 return false; 7642 auto *SHLCount = dyn_cast<ConstantSDNode>(SHL->getOperand(1)); 7643 if (!SHLCount || SHLCount->getZExtValue() != 30 - SystemZ::IPM_CC) 7644 return false; 7645 auto *IPM = SHL->getOperand(0).getNode(); 7646 if (IPM->getOpcode() != SystemZISD::IPM) 7647 return false; 7648 7649 // Avoid introducing CC spills (because SRA would clobber CC). 7650 if (!CompareLHS->hasOneUse()) 7651 return false; 7652 // Verify that the ICMP compares against zero. 7653 if (CompareRHS->getZExtValue() != 0) 7654 return false; 7655 7656 // Compute the effective CC mask for the new branch or select. 7657 CCMask = SystemZ::reverseCCMask(CCMask); 7658 7659 // Return the updated CCReg link. 7660 CCReg = IPM->getOperand(0); 7661 return true; 7662 } 7663 7664 return false; 7665 } 7666 7667 SDValue SystemZTargetLowering::combineBR_CCMASK( 7668 SDNode *N, DAGCombinerInfo &DCI) const { 7669 SelectionDAG &DAG = DCI.DAG; 7670 7671 // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK. 7672 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1)); 7673 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2)); 7674 if (!CCValid || !CCMask) 7675 return SDValue(); 7676 7677 int CCValidVal = CCValid->getZExtValue(); 7678 int CCMaskVal = CCMask->getZExtValue(); 7679 SDValue Chain = N->getOperand(0); 7680 SDValue CCReg = N->getOperand(4); 7681 7682 if (combineCCMask(CCReg, CCValidVal, CCMaskVal)) 7683 return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0), 7684 Chain, 7685 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32), 7686 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32), 7687 N->getOperand(3), CCReg); 7688 return SDValue(); 7689 } 7690 7691 SDValue SystemZTargetLowering::combineSELECT_CCMASK( 7692 SDNode *N, DAGCombinerInfo &DCI) const { 7693 SelectionDAG &DAG = DCI.DAG; 7694 7695 // Combine SELECT_CCMASK (ICMP (SELECT_CCMASK)) into a single SELECT_CCMASK. 7696 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(2)); 7697 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(3)); 7698 if (!CCValid || !CCMask) 7699 return SDValue(); 7700 7701 int CCValidVal = CCValid->getZExtValue(); 7702 int CCMaskVal = CCMask->getZExtValue(); 7703 SDValue CCReg = N->getOperand(4); 7704 7705 if (combineCCMask(CCReg, CCValidVal, CCMaskVal)) 7706 return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0), 7707 N->getOperand(0), N->getOperand(1), 7708 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32), 7709 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32), 7710 CCReg); 7711 return SDValue(); 7712 } 7713 7714 7715 SDValue SystemZTargetLowering::combineGET_CCMASK( 7716 SDNode *N, DAGCombinerInfo &DCI) const { 7717 7718 // Optimize away GET_CCMASK (SELECT_CCMASK) if the CC masks are compatible 7719 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1)); 7720 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2)); 7721 if (!CCValid || !CCMask) 7722 return SDValue(); 7723 int CCValidVal = CCValid->getZExtValue(); 7724 int CCMaskVal = CCMask->getZExtValue(); 7725 7726 SDValue Select = N->getOperand(0); 7727 if (Select->getOpcode() == ISD::TRUNCATE) 7728 Select = Select->getOperand(0); 7729 if (Select->getOpcode() != SystemZISD::SELECT_CCMASK) 7730 return SDValue(); 7731 7732 auto *SelectCCValid = dyn_cast<ConstantSDNode>(Select->getOperand(2)); 7733 auto *SelectCCMask = dyn_cast<ConstantSDNode>(Select->getOperand(3)); 7734 if (!SelectCCValid || !SelectCCMask) 7735 return SDValue(); 7736 int SelectCCValidVal = SelectCCValid->getZExtValue(); 7737 int SelectCCMaskVal = SelectCCMask->getZExtValue(); 7738 7739 auto *TrueVal = dyn_cast<ConstantSDNode>(Select->getOperand(0)); 7740 auto *FalseVal = dyn_cast<ConstantSDNode>(Select->getOperand(1)); 7741 if (!TrueVal || !FalseVal) 7742 return SDValue(); 7743 if (TrueVal->getZExtValue() == 1 && FalseVal->getZExtValue() == 0) 7744 ; 7745 else if (TrueVal->getZExtValue() == 0 && FalseVal->getZExtValue() == 1) 7746 SelectCCMaskVal ^= SelectCCValidVal; 7747 else 7748 return SDValue(); 7749 7750 if (SelectCCValidVal & ~CCValidVal) 7751 return SDValue(); 7752 if (SelectCCMaskVal != (CCMaskVal & SelectCCValidVal)) 7753 return SDValue(); 7754 7755 return Select->getOperand(4); 7756 } 7757 7758 SDValue SystemZTargetLowering::combineIntDIVREM( 7759 SDNode *N, DAGCombinerInfo &DCI) const { 7760 SelectionDAG &DAG = DCI.DAG; 7761 EVT VT = N->getValueType(0); 7762 // In the case where the divisor is a vector of constants a cheaper 7763 // sequence of instructions can replace the divide. BuildSDIV is called to 7764 // do this during DAG combining, but it only succeeds when it can build a 7765 // multiplication node. The only option for SystemZ is ISD::SMUL_LOHI, and 7766 // since it is not Legal but Custom it can only happen before 7767 // legalization. Therefore we must scalarize this early before Combine 7768 // 1. For widened vectors, this is already the result of type legalization. 7769 if (DCI.Level == BeforeLegalizeTypes && VT.isVector() && isTypeLegal(VT) && 7770 DAG.isConstantIntBuildVectorOrConstantInt(N->getOperand(1))) 7771 return DAG.UnrollVectorOp(N); 7772 return SDValue(); 7773 } 7774 7775 SDValue SystemZTargetLowering::combineINTRINSIC( 7776 SDNode *N, DAGCombinerInfo &DCI) const { 7777 SelectionDAG &DAG = DCI.DAG; 7778 7779 unsigned Id = N->getConstantOperandVal(1); 7780 switch (Id) { 7781 // VECTOR LOAD (RIGHTMOST) WITH LENGTH with a length operand of 15 7782 // or larger is simply a vector load. 7783 case Intrinsic::s390_vll: 7784 case Intrinsic::s390_vlrl: 7785 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(2))) 7786 if (C->getZExtValue() >= 15) 7787 return DAG.getLoad(N->getValueType(0), SDLoc(N), N->getOperand(0), 7788 N->getOperand(3), MachinePointerInfo()); 7789 break; 7790 // Likewise for VECTOR STORE (RIGHTMOST) WITH LENGTH. 7791 case Intrinsic::s390_vstl: 7792 case Intrinsic::s390_vstrl: 7793 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(3))) 7794 if (C->getZExtValue() >= 15) 7795 return DAG.getStore(N->getOperand(0), SDLoc(N), N->getOperand(2), 7796 N->getOperand(4), MachinePointerInfo()); 7797 break; 7798 } 7799 7800 return SDValue(); 7801 } 7802 7803 SDValue SystemZTargetLowering::unwrapAddress(SDValue N) const { 7804 if (N->getOpcode() == SystemZISD::PCREL_WRAPPER) 7805 return N->getOperand(0); 7806 return N; 7807 } 7808 7809 SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, 7810 DAGCombinerInfo &DCI) const { 7811 switch(N->getOpcode()) { 7812 default: break; 7813 case ISD::ZERO_EXTEND: return combineZERO_EXTEND(N, DCI); 7814 case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI); 7815 case ISD::SIGN_EXTEND_INREG: return combineSIGN_EXTEND_INREG(N, DCI); 7816 case SystemZISD::MERGE_HIGH: 7817 case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI); 7818 case ISD::LOAD: return combineLOAD(N, DCI); 7819 case ISD::STORE: return combineSTORE(N, DCI); 7820 case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI); 7821 case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI); 7822 case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI); 7823 case ISD::STRICT_FP_ROUND: 7824 case ISD::FP_ROUND: return combineFP_ROUND(N, DCI); 7825 case ISD::STRICT_FP_EXTEND: 7826 case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI); 7827 case ISD::SINT_TO_FP: 7828 case ISD::UINT_TO_FP: return combineINT_TO_FP(N, DCI); 7829 case ISD::BSWAP: return combineBSWAP(N, DCI); 7830 case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI); 7831 case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI); 7832 case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI); 7833 case ISD::SDIV: 7834 case ISD::UDIV: 7835 case ISD::SREM: 7836 case ISD::UREM: return combineIntDIVREM(N, DCI); 7837 case ISD::INTRINSIC_W_CHAIN: 7838 case ISD::INTRINSIC_VOID: return combineINTRINSIC(N, DCI); 7839 } 7840 7841 return SDValue(); 7842 } 7843 7844 // Return the demanded elements for the OpNo source operand of Op. DemandedElts 7845 // are for Op. 7846 static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts, 7847 unsigned OpNo) { 7848 EVT VT = Op.getValueType(); 7849 unsigned NumElts = (VT.isVector() ? VT.getVectorNumElements() : 1); 7850 APInt SrcDemE; 7851 unsigned Opcode = Op.getOpcode(); 7852 if (Opcode == ISD::INTRINSIC_WO_CHAIN) { 7853 unsigned Id = Op.getConstantOperandVal(0); 7854 switch (Id) { 7855 case Intrinsic::s390_vpksh: // PACKS 7856 case Intrinsic::s390_vpksf: 7857 case Intrinsic::s390_vpksg: 7858 case Intrinsic::s390_vpkshs: // PACKS_CC 7859 case Intrinsic::s390_vpksfs: 7860 case Intrinsic::s390_vpksgs: 7861 case Intrinsic::s390_vpklsh: // PACKLS 7862 case Intrinsic::s390_vpklsf: 7863 case Intrinsic::s390_vpklsg: 7864 case Intrinsic::s390_vpklshs: // PACKLS_CC 7865 case Intrinsic::s390_vpklsfs: 7866 case Intrinsic::s390_vpklsgs: 7867 // VECTOR PACK truncates the elements of two source vectors into one. 7868 SrcDemE = DemandedElts; 7869 if (OpNo == 2) 7870 SrcDemE.lshrInPlace(NumElts / 2); 7871 SrcDemE = SrcDemE.trunc(NumElts / 2); 7872 break; 7873 // VECTOR UNPACK extends half the elements of the source vector. 7874 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH 7875 case Intrinsic::s390_vuphh: 7876 case Intrinsic::s390_vuphf: 7877 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH 7878 case Intrinsic::s390_vuplhh: 7879 case Intrinsic::s390_vuplhf: 7880 SrcDemE = APInt(NumElts * 2, 0); 7881 SrcDemE.insertBits(DemandedElts, 0); 7882 break; 7883 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW 7884 case Intrinsic::s390_vuplhw: 7885 case Intrinsic::s390_vuplf: 7886 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW 7887 case Intrinsic::s390_vupllh: 7888 case Intrinsic::s390_vupllf: 7889 SrcDemE = APInt(NumElts * 2, 0); 7890 SrcDemE.insertBits(DemandedElts, NumElts); 7891 break; 7892 case Intrinsic::s390_vpdi: { 7893 // VECTOR PERMUTE DWORD IMMEDIATE selects one element from each source. 7894 SrcDemE = APInt(NumElts, 0); 7895 if (!DemandedElts[OpNo - 1]) 7896 break; 7897 unsigned Mask = Op.getConstantOperandVal(3); 7898 unsigned MaskBit = ((OpNo - 1) ? 1 : 4); 7899 // Demand input element 0 or 1, given by the mask bit value. 7900 SrcDemE.setBit((Mask & MaskBit)? 1 : 0); 7901 break; 7902 } 7903 case Intrinsic::s390_vsldb: { 7904 // VECTOR SHIFT LEFT DOUBLE BY BYTE 7905 assert(VT == MVT::v16i8 && "Unexpected type."); 7906 unsigned FirstIdx = Op.getConstantOperandVal(3); 7907 assert (FirstIdx > 0 && FirstIdx < 16 && "Unused operand."); 7908 unsigned NumSrc0Els = 16 - FirstIdx; 7909 SrcDemE = APInt(NumElts, 0); 7910 if (OpNo == 1) { 7911 APInt DemEls = DemandedElts.trunc(NumSrc0Els); 7912 SrcDemE.insertBits(DemEls, FirstIdx); 7913 } else { 7914 APInt DemEls = DemandedElts.lshr(NumSrc0Els); 7915 SrcDemE.insertBits(DemEls, 0); 7916 } 7917 break; 7918 } 7919 case Intrinsic::s390_vperm: 7920 SrcDemE = APInt(NumElts, -1); 7921 break; 7922 default: 7923 llvm_unreachable("Unhandled intrinsic."); 7924 break; 7925 } 7926 } else { 7927 switch (Opcode) { 7928 case SystemZISD::JOIN_DWORDS: 7929 // Scalar operand. 7930 SrcDemE = APInt(1, 1); 7931 break; 7932 case SystemZISD::SELECT_CCMASK: 7933 SrcDemE = DemandedElts; 7934 break; 7935 default: 7936 llvm_unreachable("Unhandled opcode."); 7937 break; 7938 } 7939 } 7940 return SrcDemE; 7941 } 7942 7943 static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known, 7944 const APInt &DemandedElts, 7945 const SelectionDAG &DAG, unsigned Depth, 7946 unsigned OpNo) { 7947 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo); 7948 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1); 7949 KnownBits LHSKnown = 7950 DAG.computeKnownBits(Op.getOperand(OpNo), Src0DemE, Depth + 1); 7951 KnownBits RHSKnown = 7952 DAG.computeKnownBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1); 7953 Known = LHSKnown.intersectWith(RHSKnown); 7954 } 7955 7956 void 7957 SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, 7958 KnownBits &Known, 7959 const APInt &DemandedElts, 7960 const SelectionDAG &DAG, 7961 unsigned Depth) const { 7962 Known.resetAll(); 7963 7964 // Intrinsic CC result is returned in the two low bits. 7965 unsigned tmp0, tmp1; // not used 7966 if (Op.getResNo() == 1 && isIntrinsicWithCC(Op, tmp0, tmp1)) { 7967 Known.Zero.setBitsFrom(2); 7968 return; 7969 } 7970 EVT VT = Op.getValueType(); 7971 if (Op.getResNo() != 0 || VT == MVT::Untyped) 7972 return; 7973 assert (Known.getBitWidth() == VT.getScalarSizeInBits() && 7974 "KnownBits does not match VT in bitwidth"); 7975 assert ((!VT.isVector() || 7976 (DemandedElts.getBitWidth() == VT.getVectorNumElements())) && 7977 "DemandedElts does not match VT number of elements"); 7978 unsigned BitWidth = Known.getBitWidth(); 7979 unsigned Opcode = Op.getOpcode(); 7980 if (Opcode == ISD::INTRINSIC_WO_CHAIN) { 7981 bool IsLogical = false; 7982 unsigned Id = Op.getConstantOperandVal(0); 7983 switch (Id) { 7984 case Intrinsic::s390_vpksh: // PACKS 7985 case Intrinsic::s390_vpksf: 7986 case Intrinsic::s390_vpksg: 7987 case Intrinsic::s390_vpkshs: // PACKS_CC 7988 case Intrinsic::s390_vpksfs: 7989 case Intrinsic::s390_vpksgs: 7990 case Intrinsic::s390_vpklsh: // PACKLS 7991 case Intrinsic::s390_vpklsf: 7992 case Intrinsic::s390_vpklsg: 7993 case Intrinsic::s390_vpklshs: // PACKLS_CC 7994 case Intrinsic::s390_vpklsfs: 7995 case Intrinsic::s390_vpklsgs: 7996 case Intrinsic::s390_vpdi: 7997 case Intrinsic::s390_vsldb: 7998 case Intrinsic::s390_vperm: 7999 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 1); 8000 break; 8001 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH 8002 case Intrinsic::s390_vuplhh: 8003 case Intrinsic::s390_vuplhf: 8004 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW 8005 case Intrinsic::s390_vupllh: 8006 case Intrinsic::s390_vupllf: 8007 IsLogical = true; 8008 [[fallthrough]]; 8009 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH 8010 case Intrinsic::s390_vuphh: 8011 case Intrinsic::s390_vuphf: 8012 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW 8013 case Intrinsic::s390_vuplhw: 8014 case Intrinsic::s390_vuplf: { 8015 SDValue SrcOp = Op.getOperand(1); 8016 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0); 8017 Known = DAG.computeKnownBits(SrcOp, SrcDemE, Depth + 1); 8018 if (IsLogical) { 8019 Known = Known.zext(BitWidth); 8020 } else 8021 Known = Known.sext(BitWidth); 8022 break; 8023 } 8024 default: 8025 break; 8026 } 8027 } else { 8028 switch (Opcode) { 8029 case SystemZISD::JOIN_DWORDS: 8030 case SystemZISD::SELECT_CCMASK: 8031 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 0); 8032 break; 8033 case SystemZISD::REPLICATE: { 8034 SDValue SrcOp = Op.getOperand(0); 8035 Known = DAG.computeKnownBits(SrcOp, Depth + 1); 8036 if (Known.getBitWidth() < BitWidth && isa<ConstantSDNode>(SrcOp)) 8037 Known = Known.sext(BitWidth); // VREPI sign extends the immedate. 8038 break; 8039 } 8040 default: 8041 break; 8042 } 8043 } 8044 8045 // Known has the width of the source operand(s). Adjust if needed to match 8046 // the passed bitwidth. 8047 if (Known.getBitWidth() != BitWidth) 8048 Known = Known.anyextOrTrunc(BitWidth); 8049 } 8050 8051 static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts, 8052 const SelectionDAG &DAG, unsigned Depth, 8053 unsigned OpNo) { 8054 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo); 8055 unsigned LHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo), Src0DemE, Depth + 1); 8056 if (LHS == 1) return 1; // Early out. 8057 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1); 8058 unsigned RHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1); 8059 if (RHS == 1) return 1; // Early out. 8060 unsigned Common = std::min(LHS, RHS); 8061 unsigned SrcBitWidth = Op.getOperand(OpNo).getScalarValueSizeInBits(); 8062 EVT VT = Op.getValueType(); 8063 unsigned VTBits = VT.getScalarSizeInBits(); 8064 if (SrcBitWidth > VTBits) { // PACK 8065 unsigned SrcExtraBits = SrcBitWidth - VTBits; 8066 if (Common > SrcExtraBits) 8067 return (Common - SrcExtraBits); 8068 return 1; 8069 } 8070 assert (SrcBitWidth == VTBits && "Expected operands of same bitwidth."); 8071 return Common; 8072 } 8073 8074 unsigned 8075 SystemZTargetLowering::ComputeNumSignBitsForTargetNode( 8076 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, 8077 unsigned Depth) const { 8078 if (Op.getResNo() != 0) 8079 return 1; 8080 unsigned Opcode = Op.getOpcode(); 8081 if (Opcode == ISD::INTRINSIC_WO_CHAIN) { 8082 unsigned Id = Op.getConstantOperandVal(0); 8083 switch (Id) { 8084 case Intrinsic::s390_vpksh: // PACKS 8085 case Intrinsic::s390_vpksf: 8086 case Intrinsic::s390_vpksg: 8087 case Intrinsic::s390_vpkshs: // PACKS_CC 8088 case Intrinsic::s390_vpksfs: 8089 case Intrinsic::s390_vpksgs: 8090 case Intrinsic::s390_vpklsh: // PACKLS 8091 case Intrinsic::s390_vpklsf: 8092 case Intrinsic::s390_vpklsg: 8093 case Intrinsic::s390_vpklshs: // PACKLS_CC 8094 case Intrinsic::s390_vpklsfs: 8095 case Intrinsic::s390_vpklsgs: 8096 case Intrinsic::s390_vpdi: 8097 case Intrinsic::s390_vsldb: 8098 case Intrinsic::s390_vperm: 8099 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 1); 8100 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH 8101 case Intrinsic::s390_vuphh: 8102 case Intrinsic::s390_vuphf: 8103 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW 8104 case Intrinsic::s390_vuplhw: 8105 case Intrinsic::s390_vuplf: { 8106 SDValue PackedOp = Op.getOperand(1); 8107 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 1); 8108 unsigned Tmp = DAG.ComputeNumSignBits(PackedOp, SrcDemE, Depth + 1); 8109 EVT VT = Op.getValueType(); 8110 unsigned VTBits = VT.getScalarSizeInBits(); 8111 Tmp += VTBits - PackedOp.getScalarValueSizeInBits(); 8112 return Tmp; 8113 } 8114 default: 8115 break; 8116 } 8117 } else { 8118 switch (Opcode) { 8119 case SystemZISD::SELECT_CCMASK: 8120 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 0); 8121 default: 8122 break; 8123 } 8124 } 8125 8126 return 1; 8127 } 8128 8129 bool SystemZTargetLowering:: 8130 isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, 8131 const APInt &DemandedElts, const SelectionDAG &DAG, 8132 bool PoisonOnly, unsigned Depth) const { 8133 switch (Op->getOpcode()) { 8134 case SystemZISD::PCREL_WRAPPER: 8135 case SystemZISD::PCREL_OFFSET: 8136 return true; 8137 } 8138 return false; 8139 } 8140 8141 unsigned 8142 SystemZTargetLowering::getStackProbeSize(const MachineFunction &MF) const { 8143 const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); 8144 unsigned StackAlign = TFI->getStackAlignment(); 8145 assert(StackAlign >=1 && isPowerOf2_32(StackAlign) && 8146 "Unexpected stack alignment"); 8147 // The default stack probe size is 4096 if the function has no 8148 // stack-probe-size attribute. 8149 unsigned StackProbeSize = 8150 MF.getFunction().getFnAttributeAsParsedInteger("stack-probe-size", 4096); 8151 // Round down to the stack alignment. 8152 StackProbeSize &= ~(StackAlign - 1); 8153 return StackProbeSize ? StackProbeSize : StackAlign; 8154 } 8155 8156 //===----------------------------------------------------------------------===// 8157 // Custom insertion 8158 //===----------------------------------------------------------------------===// 8159 8160 // Force base value Base into a register before MI. Return the register. 8161 static Register forceReg(MachineInstr &MI, MachineOperand &Base, 8162 const SystemZInstrInfo *TII) { 8163 MachineBasicBlock *MBB = MI.getParent(); 8164 MachineFunction &MF = *MBB->getParent(); 8165 MachineRegisterInfo &MRI = MF.getRegInfo(); 8166 8167 if (Base.isReg()) { 8168 // Copy Base into a new virtual register to help register coalescing in 8169 // cases with multiple uses. 8170 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); 8171 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::COPY), Reg) 8172 .add(Base); 8173 return Reg; 8174 } 8175 8176 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); 8177 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg) 8178 .add(Base) 8179 .addImm(0) 8180 .addReg(0); 8181 return Reg; 8182 } 8183 8184 // The CC operand of MI might be missing a kill marker because there 8185 // were multiple uses of CC, and ISel didn't know which to mark. 8186 // Figure out whether MI should have had a kill marker. 8187 static bool checkCCKill(MachineInstr &MI, MachineBasicBlock *MBB) { 8188 // Scan forward through BB for a use/def of CC. 8189 MachineBasicBlock::iterator miI(std::next(MachineBasicBlock::iterator(MI))); 8190 for (MachineBasicBlock::iterator miE = MBB->end(); miI != miE; ++miI) { 8191 const MachineInstr& mi = *miI; 8192 if (mi.readsRegister(SystemZ::CC, /*TRI=*/nullptr)) 8193 return false; 8194 if (mi.definesRegister(SystemZ::CC, /*TRI=*/nullptr)) 8195 break; // Should have kill-flag - update below. 8196 } 8197 8198 // If we hit the end of the block, check whether CC is live into a 8199 // successor. 8200 if (miI == MBB->end()) { 8201 for (const MachineBasicBlock *Succ : MBB->successors()) 8202 if (Succ->isLiveIn(SystemZ::CC)) 8203 return false; 8204 } 8205 8206 return true; 8207 } 8208 8209 // Return true if it is OK for this Select pseudo-opcode to be cascaded 8210 // together with other Select pseudo-opcodes into a single basic-block with 8211 // a conditional jump around it. 8212 static bool isSelectPseudo(MachineInstr &MI) { 8213 switch (MI.getOpcode()) { 8214 case SystemZ::Select32: 8215 case SystemZ::Select64: 8216 case SystemZ::Select128: 8217 case SystemZ::SelectF32: 8218 case SystemZ::SelectF64: 8219 case SystemZ::SelectF128: 8220 case SystemZ::SelectVR32: 8221 case SystemZ::SelectVR64: 8222 case SystemZ::SelectVR128: 8223 return true; 8224 8225 default: 8226 return false; 8227 } 8228 } 8229 8230 // Helper function, which inserts PHI functions into SinkMBB: 8231 // %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ], 8232 // where %FalseValue(i) and %TrueValue(i) are taken from Selects. 8233 static void createPHIsForSelects(SmallVector<MachineInstr*, 8> &Selects, 8234 MachineBasicBlock *TrueMBB, 8235 MachineBasicBlock *FalseMBB, 8236 MachineBasicBlock *SinkMBB) { 8237 MachineFunction *MF = TrueMBB->getParent(); 8238 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); 8239 8240 MachineInstr *FirstMI = Selects.front(); 8241 unsigned CCValid = FirstMI->getOperand(3).getImm(); 8242 unsigned CCMask = FirstMI->getOperand(4).getImm(); 8243 8244 MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin(); 8245 8246 // As we are creating the PHIs, we have to be careful if there is more than 8247 // one. Later Selects may reference the results of earlier Selects, but later 8248 // PHIs have to reference the individual true/false inputs from earlier PHIs. 8249 // That also means that PHI construction must work forward from earlier to 8250 // later, and that the code must maintain a mapping from earlier PHI's 8251 // destination registers, and the registers that went into the PHI. 8252 DenseMap<unsigned, std::pair<unsigned, unsigned>> RegRewriteTable; 8253 8254 for (auto *MI : Selects) { 8255 Register DestReg = MI->getOperand(0).getReg(); 8256 Register TrueReg = MI->getOperand(1).getReg(); 8257 Register FalseReg = MI->getOperand(2).getReg(); 8258 8259 // If this Select we are generating is the opposite condition from 8260 // the jump we generated, then we have to swap the operands for the 8261 // PHI that is going to be generated. 8262 if (MI->getOperand(4).getImm() == (CCValid ^ CCMask)) 8263 std::swap(TrueReg, FalseReg); 8264 8265 if (RegRewriteTable.contains(TrueReg)) 8266 TrueReg = RegRewriteTable[TrueReg].first; 8267 8268 if (RegRewriteTable.contains(FalseReg)) 8269 FalseReg = RegRewriteTable[FalseReg].second; 8270 8271 DebugLoc DL = MI->getDebugLoc(); 8272 BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(SystemZ::PHI), DestReg) 8273 .addReg(TrueReg).addMBB(TrueMBB) 8274 .addReg(FalseReg).addMBB(FalseMBB); 8275 8276 // Add this PHI to the rewrite table. 8277 RegRewriteTable[DestReg] = std::make_pair(TrueReg, FalseReg); 8278 } 8279 8280 MF->getProperties().reset(MachineFunctionProperties::Property::NoPHIs); 8281 } 8282 8283 MachineBasicBlock * 8284 SystemZTargetLowering::emitAdjCallStack(MachineInstr &MI, 8285 MachineBasicBlock *BB) const { 8286 MachineFunction &MF = *BB->getParent(); 8287 MachineFrameInfo &MFI = MF.getFrameInfo(); 8288 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>(); 8289 assert(TFL->hasReservedCallFrame(MF) && 8290 "ADJSTACKDOWN and ADJSTACKUP should be no-ops"); 8291 (void)TFL; 8292 // Get the MaxCallFrameSize value and erase MI since it serves no further 8293 // purpose as the call frame is statically reserved in the prolog. Set 8294 // AdjustsStack as MI is *not* mapped as a frame instruction. 8295 uint32_t NumBytes = MI.getOperand(0).getImm(); 8296 if (NumBytes > MFI.getMaxCallFrameSize()) 8297 MFI.setMaxCallFrameSize(NumBytes); 8298 MFI.setAdjustsStack(true); 8299 8300 MI.eraseFromParent(); 8301 return BB; 8302 } 8303 8304 // Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI. 8305 MachineBasicBlock * 8306 SystemZTargetLowering::emitSelect(MachineInstr &MI, 8307 MachineBasicBlock *MBB) const { 8308 assert(isSelectPseudo(MI) && "Bad call to emitSelect()"); 8309 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 8310 8311 unsigned CCValid = MI.getOperand(3).getImm(); 8312 unsigned CCMask = MI.getOperand(4).getImm(); 8313 8314 // If we have a sequence of Select* pseudo instructions using the 8315 // same condition code value, we want to expand all of them into 8316 // a single pair of basic blocks using the same condition. 8317 SmallVector<MachineInstr*, 8> Selects; 8318 SmallVector<MachineInstr*, 8> DbgValues; 8319 Selects.push_back(&MI); 8320 unsigned Count = 0; 8321 for (MachineInstr &NextMI : llvm::make_range( 8322 std::next(MachineBasicBlock::iterator(MI)), MBB->end())) { 8323 if (isSelectPseudo(NextMI)) { 8324 assert(NextMI.getOperand(3).getImm() == CCValid && 8325 "Bad CCValid operands since CC was not redefined."); 8326 if (NextMI.getOperand(4).getImm() == CCMask || 8327 NextMI.getOperand(4).getImm() == (CCValid ^ CCMask)) { 8328 Selects.push_back(&NextMI); 8329 continue; 8330 } 8331 break; 8332 } 8333 if (NextMI.definesRegister(SystemZ::CC, /*TRI=*/nullptr) || 8334 NextMI.usesCustomInsertionHook()) 8335 break; 8336 bool User = false; 8337 for (auto *SelMI : Selects) 8338 if (NextMI.readsVirtualRegister(SelMI->getOperand(0).getReg())) { 8339 User = true; 8340 break; 8341 } 8342 if (NextMI.isDebugInstr()) { 8343 if (User) { 8344 assert(NextMI.isDebugValue() && "Unhandled debug opcode."); 8345 DbgValues.push_back(&NextMI); 8346 } 8347 } else if (User || ++Count > 20) 8348 break; 8349 } 8350 8351 MachineInstr *LastMI = Selects.back(); 8352 bool CCKilled = (LastMI->killsRegister(SystemZ::CC, /*TRI=*/nullptr) || 8353 checkCCKill(*LastMI, MBB)); 8354 MachineBasicBlock *StartMBB = MBB; 8355 MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(LastMI, MBB); 8356 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB); 8357 8358 // Unless CC was killed in the last Select instruction, mark it as 8359 // live-in to both FalseMBB and JoinMBB. 8360 if (!CCKilled) { 8361 FalseMBB->addLiveIn(SystemZ::CC); 8362 JoinMBB->addLiveIn(SystemZ::CC); 8363 } 8364 8365 // StartMBB: 8366 // BRC CCMask, JoinMBB 8367 // # fallthrough to FalseMBB 8368 MBB = StartMBB; 8369 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC)) 8370 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB); 8371 MBB->addSuccessor(JoinMBB); 8372 MBB->addSuccessor(FalseMBB); 8373 8374 // FalseMBB: 8375 // # fallthrough to JoinMBB 8376 MBB = FalseMBB; 8377 MBB->addSuccessor(JoinMBB); 8378 8379 // JoinMBB: 8380 // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ] 8381 // ... 8382 MBB = JoinMBB; 8383 createPHIsForSelects(Selects, StartMBB, FalseMBB, MBB); 8384 for (auto *SelMI : Selects) 8385 SelMI->eraseFromParent(); 8386 8387 MachineBasicBlock::iterator InsertPos = MBB->getFirstNonPHI(); 8388 for (auto *DbgMI : DbgValues) 8389 MBB->splice(InsertPos, StartMBB, DbgMI); 8390 8391 return JoinMBB; 8392 } 8393 8394 // Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI. 8395 // StoreOpcode is the store to use and Invert says whether the store should 8396 // happen when the condition is false rather than true. If a STORE ON 8397 // CONDITION is available, STOCOpcode is its opcode, otherwise it is 0. 8398 MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI, 8399 MachineBasicBlock *MBB, 8400 unsigned StoreOpcode, 8401 unsigned STOCOpcode, 8402 bool Invert) const { 8403 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 8404 8405 Register SrcReg = MI.getOperand(0).getReg(); 8406 MachineOperand Base = MI.getOperand(1); 8407 int64_t Disp = MI.getOperand(2).getImm(); 8408 Register IndexReg = MI.getOperand(3).getReg(); 8409 unsigned CCValid = MI.getOperand(4).getImm(); 8410 unsigned CCMask = MI.getOperand(5).getImm(); 8411 DebugLoc DL = MI.getDebugLoc(); 8412 8413 StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp); 8414 8415 // ISel pattern matching also adds a load memory operand of the same 8416 // address, so take special care to find the storing memory operand. 8417 MachineMemOperand *MMO = nullptr; 8418 for (auto *I : MI.memoperands()) 8419 if (I->isStore()) { 8420 MMO = I; 8421 break; 8422 } 8423 8424 // Use STOCOpcode if possible. We could use different store patterns in 8425 // order to avoid matching the index register, but the performance trade-offs 8426 // might be more complicated in that case. 8427 if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) { 8428 if (Invert) 8429 CCMask ^= CCValid; 8430 8431 BuildMI(*MBB, MI, DL, TII->get(STOCOpcode)) 8432 .addReg(SrcReg) 8433 .add(Base) 8434 .addImm(Disp) 8435 .addImm(CCValid) 8436 .addImm(CCMask) 8437 .addMemOperand(MMO); 8438 8439 MI.eraseFromParent(); 8440 return MBB; 8441 } 8442 8443 // Get the condition needed to branch around the store. 8444 if (!Invert) 8445 CCMask ^= CCValid; 8446 8447 MachineBasicBlock *StartMBB = MBB; 8448 MachineBasicBlock *JoinMBB = SystemZ::splitBlockBefore(MI, MBB); 8449 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB); 8450 8451 // Unless CC was killed in the CondStore instruction, mark it as 8452 // live-in to both FalseMBB and JoinMBB. 8453 if (!MI.killsRegister(SystemZ::CC, /*TRI=*/nullptr) && 8454 !checkCCKill(MI, JoinMBB)) { 8455 FalseMBB->addLiveIn(SystemZ::CC); 8456 JoinMBB->addLiveIn(SystemZ::CC); 8457 } 8458 8459 // StartMBB: 8460 // BRC CCMask, JoinMBB 8461 // # fallthrough to FalseMBB 8462 MBB = StartMBB; 8463 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 8464 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB); 8465 MBB->addSuccessor(JoinMBB); 8466 MBB->addSuccessor(FalseMBB); 8467 8468 // FalseMBB: 8469 // store %SrcReg, %Disp(%Index,%Base) 8470 // # fallthrough to JoinMBB 8471 MBB = FalseMBB; 8472 BuildMI(MBB, DL, TII->get(StoreOpcode)) 8473 .addReg(SrcReg) 8474 .add(Base) 8475 .addImm(Disp) 8476 .addReg(IndexReg) 8477 .addMemOperand(MMO); 8478 MBB->addSuccessor(JoinMBB); 8479 8480 MI.eraseFromParent(); 8481 return JoinMBB; 8482 } 8483 8484 // Implement EmitInstrWithCustomInserter for pseudo [SU]Cmp128Hi instruction MI. 8485 MachineBasicBlock * 8486 SystemZTargetLowering::emitICmp128Hi(MachineInstr &MI, 8487 MachineBasicBlock *MBB, 8488 bool Unsigned) const { 8489 MachineFunction &MF = *MBB->getParent(); 8490 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 8491 MachineRegisterInfo &MRI = MF.getRegInfo(); 8492 8493 // Synthetic instruction to compare 128-bit values. 8494 // Sets CC 1 if Op0 > Op1, sets a different CC otherwise. 8495 Register Op0 = MI.getOperand(0).getReg(); 8496 Register Op1 = MI.getOperand(1).getReg(); 8497 8498 MachineBasicBlock *StartMBB = MBB; 8499 MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(MI, MBB); 8500 MachineBasicBlock *HiEqMBB = SystemZ::emitBlockAfter(StartMBB); 8501 8502 // StartMBB: 8503 // 8504 // Use VECTOR ELEMENT COMPARE [LOGICAL] to compare the high parts. 8505 // Swap the inputs to get: 8506 // CC 1 if high(Op0) > high(Op1) 8507 // CC 2 if high(Op0) < high(Op1) 8508 // CC 0 if high(Op0) == high(Op1) 8509 // 8510 // If CC != 0, we'd done, so jump over the next instruction. 8511 // 8512 // VEC[L]G Op1, Op0 8513 // JNE JoinMBB 8514 // # fallthrough to HiEqMBB 8515 MBB = StartMBB; 8516 int HiOpcode = Unsigned? SystemZ::VECLG : SystemZ::VECG; 8517 BuildMI(MBB, MI.getDebugLoc(), TII->get(HiOpcode)) 8518 .addReg(Op1).addReg(Op0); 8519 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC)) 8520 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE).addMBB(JoinMBB); 8521 MBB->addSuccessor(JoinMBB); 8522 MBB->addSuccessor(HiEqMBB); 8523 8524 // HiEqMBB: 8525 // 8526 // Otherwise, use VECTOR COMPARE HIGH LOGICAL. 8527 // Since we already know the high parts are equal, the CC 8528 // result will only depend on the low parts: 8529 // CC 1 if low(Op0) > low(Op1) 8530 // CC 3 if low(Op0) <= low(Op1) 8531 // 8532 // VCHLGS Tmp, Op0, Op1 8533 // # fallthrough to JoinMBB 8534 MBB = HiEqMBB; 8535 Register Temp = MRI.createVirtualRegister(&SystemZ::VR128BitRegClass); 8536 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::VCHLGS), Temp) 8537 .addReg(Op0).addReg(Op1); 8538 MBB->addSuccessor(JoinMBB); 8539 8540 // Mark CC as live-in to JoinMBB. 8541 JoinMBB->addLiveIn(SystemZ::CC); 8542 8543 MI.eraseFromParent(); 8544 return JoinMBB; 8545 } 8546 8547 // Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_LOADW_* or 8548 // ATOMIC_SWAPW instruction MI. BinOpcode is the instruction that performs 8549 // the binary operation elided by "*", or 0 for ATOMIC_SWAPW. Invert says 8550 // whether the field should be inverted after performing BinOpcode (e.g. for 8551 // NAND). 8552 MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary( 8553 MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode, 8554 bool Invert) const { 8555 MachineFunction &MF = *MBB->getParent(); 8556 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 8557 MachineRegisterInfo &MRI = MF.getRegInfo(); 8558 8559 // Extract the operands. Base can be a register or a frame index. 8560 // Src2 can be a register or immediate. 8561 Register Dest = MI.getOperand(0).getReg(); 8562 MachineOperand Base = earlyUseOperand(MI.getOperand(1)); 8563 int64_t Disp = MI.getOperand(2).getImm(); 8564 MachineOperand Src2 = earlyUseOperand(MI.getOperand(3)); 8565 Register BitShift = MI.getOperand(4).getReg(); 8566 Register NegBitShift = MI.getOperand(5).getReg(); 8567 unsigned BitSize = MI.getOperand(6).getImm(); 8568 DebugLoc DL = MI.getDebugLoc(); 8569 8570 // Get the right opcodes for the displacement. 8571 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp); 8572 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp); 8573 assert(LOpcode && CSOpcode && "Displacement out of range"); 8574 8575 // Create virtual registers for temporary results. 8576 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); 8577 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); 8578 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); 8579 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); 8580 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); 8581 8582 // Insert a basic block for the main loop. 8583 MachineBasicBlock *StartMBB = MBB; 8584 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB); 8585 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB); 8586 8587 // StartMBB: 8588 // ... 8589 // %OrigVal = L Disp(%Base) 8590 // # fall through to LoopMBB 8591 MBB = StartMBB; 8592 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0); 8593 MBB->addSuccessor(LoopMBB); 8594 8595 // LoopMBB: 8596 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ] 8597 // %RotatedOldVal = RLL %OldVal, 0(%BitShift) 8598 // %RotatedNewVal = OP %RotatedOldVal, %Src2 8599 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift) 8600 // %Dest = CS %OldVal, %NewVal, Disp(%Base) 8601 // JNE LoopMBB 8602 // # fall through to DoneMBB 8603 MBB = LoopMBB; 8604 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal) 8605 .addReg(OrigVal).addMBB(StartMBB) 8606 .addReg(Dest).addMBB(LoopMBB); 8607 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal) 8608 .addReg(OldVal).addReg(BitShift).addImm(0); 8609 if (Invert) { 8610 // Perform the operation normally and then invert every bit of the field. 8611 Register Tmp = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); 8612 BuildMI(MBB, DL, TII->get(BinOpcode), Tmp).addReg(RotatedOldVal).add(Src2); 8613 // XILF with the upper BitSize bits set. 8614 BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal) 8615 .addReg(Tmp).addImm(-1U << (32 - BitSize)); 8616 } else if (BinOpcode) 8617 // A simply binary operation. 8618 BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal) 8619 .addReg(RotatedOldVal) 8620 .add(Src2); 8621 else 8622 // Use RISBG to rotate Src2 into position and use it to replace the 8623 // field in RotatedOldVal. 8624 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal) 8625 .addReg(RotatedOldVal).addReg(Src2.getReg()) 8626 .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize); 8627 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal) 8628 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0); 8629 BuildMI(MBB, DL, TII->get(CSOpcode), Dest) 8630 .addReg(OldVal) 8631 .addReg(NewVal) 8632 .add(Base) 8633 .addImm(Disp); 8634 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 8635 .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB); 8636 MBB->addSuccessor(LoopMBB); 8637 MBB->addSuccessor(DoneMBB); 8638 8639 MI.eraseFromParent(); 8640 return DoneMBB; 8641 } 8642 8643 // Implement EmitInstrWithCustomInserter for subword pseudo 8644 // ATOMIC_LOADW_{,U}{MIN,MAX} instruction MI. CompareOpcode is the 8645 // instruction that should be used to compare the current field with the 8646 // minimum or maximum value. KeepOldMask is the BRC condition-code mask 8647 // for when the current field should be kept. 8648 MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax( 8649 MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode, 8650 unsigned KeepOldMask) const { 8651 MachineFunction &MF = *MBB->getParent(); 8652 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 8653 MachineRegisterInfo &MRI = MF.getRegInfo(); 8654 8655 // Extract the operands. Base can be a register or a frame index. 8656 Register Dest = MI.getOperand(0).getReg(); 8657 MachineOperand Base = earlyUseOperand(MI.getOperand(1)); 8658 int64_t Disp = MI.getOperand(2).getImm(); 8659 Register Src2 = MI.getOperand(3).getReg(); 8660 Register BitShift = MI.getOperand(4).getReg(); 8661 Register NegBitShift = MI.getOperand(5).getReg(); 8662 unsigned BitSize = MI.getOperand(6).getImm(); 8663 DebugLoc DL = MI.getDebugLoc(); 8664 8665 // Get the right opcodes for the displacement. 8666 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp); 8667 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp); 8668 assert(LOpcode && CSOpcode && "Displacement out of range"); 8669 8670 // Create virtual registers for temporary results. 8671 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); 8672 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); 8673 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); 8674 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); 8675 Register RotatedAltVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); 8676 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); 8677 8678 // Insert 3 basic blocks for the loop. 8679 MachineBasicBlock *StartMBB = MBB; 8680 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB); 8681 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB); 8682 MachineBasicBlock *UseAltMBB = SystemZ::emitBlockAfter(LoopMBB); 8683 MachineBasicBlock *UpdateMBB = SystemZ::emitBlockAfter(UseAltMBB); 8684 8685 // StartMBB: 8686 // ... 8687 // %OrigVal = L Disp(%Base) 8688 // # fall through to LoopMBB 8689 MBB = StartMBB; 8690 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0); 8691 MBB->addSuccessor(LoopMBB); 8692 8693 // LoopMBB: 8694 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ] 8695 // %RotatedOldVal = RLL %OldVal, 0(%BitShift) 8696 // CompareOpcode %RotatedOldVal, %Src2 8697 // BRC KeepOldMask, UpdateMBB 8698 MBB = LoopMBB; 8699 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal) 8700 .addReg(OrigVal).addMBB(StartMBB) 8701 .addReg(Dest).addMBB(UpdateMBB); 8702 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal) 8703 .addReg(OldVal).addReg(BitShift).addImm(0); 8704 BuildMI(MBB, DL, TII->get(CompareOpcode)) 8705 .addReg(RotatedOldVal).addReg(Src2); 8706 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 8707 .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB); 8708 MBB->addSuccessor(UpdateMBB); 8709 MBB->addSuccessor(UseAltMBB); 8710 8711 // UseAltMBB: 8712 // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0 8713 // # fall through to UpdateMBB 8714 MBB = UseAltMBB; 8715 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal) 8716 .addReg(RotatedOldVal).addReg(Src2) 8717 .addImm(32).addImm(31 + BitSize).addImm(0); 8718 MBB->addSuccessor(UpdateMBB); 8719 8720 // UpdateMBB: 8721 // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ], 8722 // [ %RotatedAltVal, UseAltMBB ] 8723 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift) 8724 // %Dest = CS %OldVal, %NewVal, Disp(%Base) 8725 // JNE LoopMBB 8726 // # fall through to DoneMBB 8727 MBB = UpdateMBB; 8728 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal) 8729 .addReg(RotatedOldVal).addMBB(LoopMBB) 8730 .addReg(RotatedAltVal).addMBB(UseAltMBB); 8731 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal) 8732 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0); 8733 BuildMI(MBB, DL, TII->get(CSOpcode), Dest) 8734 .addReg(OldVal) 8735 .addReg(NewVal) 8736 .add(Base) 8737 .addImm(Disp); 8738 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 8739 .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB); 8740 MBB->addSuccessor(LoopMBB); 8741 MBB->addSuccessor(DoneMBB); 8742 8743 MI.eraseFromParent(); 8744 return DoneMBB; 8745 } 8746 8747 // Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_CMP_SWAPW 8748 // instruction MI. 8749 MachineBasicBlock * 8750 SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI, 8751 MachineBasicBlock *MBB) const { 8752 MachineFunction &MF = *MBB->getParent(); 8753 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 8754 MachineRegisterInfo &MRI = MF.getRegInfo(); 8755 8756 // Extract the operands. Base can be a register or a frame index. 8757 Register Dest = MI.getOperand(0).getReg(); 8758 MachineOperand Base = earlyUseOperand(MI.getOperand(1)); 8759 int64_t Disp = MI.getOperand(2).getImm(); 8760 Register CmpVal = MI.getOperand(3).getReg(); 8761 Register OrigSwapVal = MI.getOperand(4).getReg(); 8762 Register BitShift = MI.getOperand(5).getReg(); 8763 Register NegBitShift = MI.getOperand(6).getReg(); 8764 int64_t BitSize = MI.getOperand(7).getImm(); 8765 DebugLoc DL = MI.getDebugLoc(); 8766 8767 const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass; 8768 8769 // Get the right opcodes for the displacement and zero-extension. 8770 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp); 8771 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp); 8772 unsigned ZExtOpcode = BitSize == 8 ? SystemZ::LLCR : SystemZ::LLHR; 8773 assert(LOpcode && CSOpcode && "Displacement out of range"); 8774 8775 // Create virtual registers for temporary results. 8776 Register OrigOldVal = MRI.createVirtualRegister(RC); 8777 Register OldVal = MRI.createVirtualRegister(RC); 8778 Register SwapVal = MRI.createVirtualRegister(RC); 8779 Register StoreVal = MRI.createVirtualRegister(RC); 8780 Register OldValRot = MRI.createVirtualRegister(RC); 8781 Register RetryOldVal = MRI.createVirtualRegister(RC); 8782 Register RetrySwapVal = MRI.createVirtualRegister(RC); 8783 8784 // Insert 2 basic blocks for the loop. 8785 MachineBasicBlock *StartMBB = MBB; 8786 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB); 8787 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB); 8788 MachineBasicBlock *SetMBB = SystemZ::emitBlockAfter(LoopMBB); 8789 8790 // StartMBB: 8791 // ... 8792 // %OrigOldVal = L Disp(%Base) 8793 // # fall through to LoopMBB 8794 MBB = StartMBB; 8795 BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal) 8796 .add(Base) 8797 .addImm(Disp) 8798 .addReg(0); 8799 MBB->addSuccessor(LoopMBB); 8800 8801 // LoopMBB: 8802 // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ] 8803 // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ] 8804 // %OldValRot = RLL %OldVal, BitSize(%BitShift) 8805 // ^^ The low BitSize bits contain the field 8806 // of interest. 8807 // %RetrySwapVal = RISBG32 %SwapVal, %OldValRot, 32, 63-BitSize, 0 8808 // ^^ Replace the upper 32-BitSize bits of the 8809 // swap value with those that we loaded and rotated. 8810 // %Dest = LL[CH] %OldValRot 8811 // CR %Dest, %CmpVal 8812 // JNE DoneMBB 8813 // # Fall through to SetMBB 8814 MBB = LoopMBB; 8815 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal) 8816 .addReg(OrigOldVal).addMBB(StartMBB) 8817 .addReg(RetryOldVal).addMBB(SetMBB); 8818 BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal) 8819 .addReg(OrigSwapVal).addMBB(StartMBB) 8820 .addReg(RetrySwapVal).addMBB(SetMBB); 8821 BuildMI(MBB, DL, TII->get(SystemZ::RLL), OldValRot) 8822 .addReg(OldVal).addReg(BitShift).addImm(BitSize); 8823 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal) 8824 .addReg(SwapVal).addReg(OldValRot).addImm(32).addImm(63 - BitSize).addImm(0); 8825 BuildMI(MBB, DL, TII->get(ZExtOpcode), Dest) 8826 .addReg(OldValRot); 8827 BuildMI(MBB, DL, TII->get(SystemZ::CR)) 8828 .addReg(Dest).addReg(CmpVal); 8829 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 8830 .addImm(SystemZ::CCMASK_ICMP) 8831 .addImm(SystemZ::CCMASK_CMP_NE).addMBB(DoneMBB); 8832 MBB->addSuccessor(DoneMBB); 8833 MBB->addSuccessor(SetMBB); 8834 8835 // SetMBB: 8836 // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift) 8837 // ^^ Rotate the new field to its proper position. 8838 // %RetryOldVal = CS %OldVal, %StoreVal, Disp(%Base) 8839 // JNE LoopMBB 8840 // # fall through to ExitMBB 8841 MBB = SetMBB; 8842 BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal) 8843 .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize); 8844 BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal) 8845 .addReg(OldVal) 8846 .addReg(StoreVal) 8847 .add(Base) 8848 .addImm(Disp); 8849 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 8850 .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB); 8851 MBB->addSuccessor(LoopMBB); 8852 MBB->addSuccessor(DoneMBB); 8853 8854 // If the CC def wasn't dead in the ATOMIC_CMP_SWAPW, mark CC as live-in 8855 // to the block after the loop. At this point, CC may have been defined 8856 // either by the CR in LoopMBB or by the CS in SetMBB. 8857 if (!MI.registerDefIsDead(SystemZ::CC, /*TRI=*/nullptr)) 8858 DoneMBB->addLiveIn(SystemZ::CC); 8859 8860 MI.eraseFromParent(); 8861 return DoneMBB; 8862 } 8863 8864 // Emit a move from two GR64s to a GR128. 8865 MachineBasicBlock * 8866 SystemZTargetLowering::emitPair128(MachineInstr &MI, 8867 MachineBasicBlock *MBB) const { 8868 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 8869 const DebugLoc &DL = MI.getDebugLoc(); 8870 8871 Register Dest = MI.getOperand(0).getReg(); 8872 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), Dest) 8873 .add(MI.getOperand(1)) 8874 .addImm(SystemZ::subreg_h64) 8875 .add(MI.getOperand(2)) 8876 .addImm(SystemZ::subreg_l64); 8877 MI.eraseFromParent(); 8878 return MBB; 8879 } 8880 8881 // Emit an extension from a GR64 to a GR128. ClearEven is true 8882 // if the high register of the GR128 value must be cleared or false if 8883 // it's "don't care". 8884 MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI, 8885 MachineBasicBlock *MBB, 8886 bool ClearEven) const { 8887 MachineFunction &MF = *MBB->getParent(); 8888 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 8889 MachineRegisterInfo &MRI = MF.getRegInfo(); 8890 DebugLoc DL = MI.getDebugLoc(); 8891 8892 Register Dest = MI.getOperand(0).getReg(); 8893 Register Src = MI.getOperand(1).getReg(); 8894 Register In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); 8895 8896 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128); 8897 if (ClearEven) { 8898 Register NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); 8899 Register Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass); 8900 8901 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64) 8902 .addImm(0); 8903 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128) 8904 .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64); 8905 In128 = NewIn128; 8906 } 8907 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest) 8908 .addReg(In128).addReg(Src).addImm(SystemZ::subreg_l64); 8909 8910 MI.eraseFromParent(); 8911 return MBB; 8912 } 8913 8914 MachineBasicBlock * 8915 SystemZTargetLowering::emitMemMemWrapper(MachineInstr &MI, 8916 MachineBasicBlock *MBB, 8917 unsigned Opcode, bool IsMemset) const { 8918 MachineFunction &MF = *MBB->getParent(); 8919 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 8920 MachineRegisterInfo &MRI = MF.getRegInfo(); 8921 DebugLoc DL = MI.getDebugLoc(); 8922 8923 MachineOperand DestBase = earlyUseOperand(MI.getOperand(0)); 8924 uint64_t DestDisp = MI.getOperand(1).getImm(); 8925 MachineOperand SrcBase = MachineOperand::CreateReg(0U, false); 8926 uint64_t SrcDisp; 8927 8928 // Fold the displacement Disp if it is out of range. 8929 auto foldDisplIfNeeded = [&](MachineOperand &Base, uint64_t &Disp) -> void { 8930 if (!isUInt<12>(Disp)) { 8931 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); 8932 unsigned Opcode = TII->getOpcodeForOffset(SystemZ::LA, Disp); 8933 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opcode), Reg) 8934 .add(Base).addImm(Disp).addReg(0); 8935 Base = MachineOperand::CreateReg(Reg, false); 8936 Disp = 0; 8937 } 8938 }; 8939 8940 if (!IsMemset) { 8941 SrcBase = earlyUseOperand(MI.getOperand(2)); 8942 SrcDisp = MI.getOperand(3).getImm(); 8943 } else { 8944 SrcBase = DestBase; 8945 SrcDisp = DestDisp++; 8946 foldDisplIfNeeded(DestBase, DestDisp); 8947 } 8948 8949 MachineOperand &LengthMO = MI.getOperand(IsMemset ? 2 : 4); 8950 bool IsImmForm = LengthMO.isImm(); 8951 bool IsRegForm = !IsImmForm; 8952 8953 // Build and insert one Opcode of Length, with special treatment for memset. 8954 auto insertMemMemOp = [&](MachineBasicBlock *InsMBB, 8955 MachineBasicBlock::iterator InsPos, 8956 MachineOperand DBase, uint64_t DDisp, 8957 MachineOperand SBase, uint64_t SDisp, 8958 unsigned Length) -> void { 8959 assert(Length > 0 && Length <= 256 && "Building memory op with bad length."); 8960 if (IsMemset) { 8961 MachineOperand ByteMO = earlyUseOperand(MI.getOperand(3)); 8962 if (ByteMO.isImm()) 8963 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::MVI)) 8964 .add(SBase).addImm(SDisp).add(ByteMO); 8965 else 8966 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::STC)) 8967 .add(ByteMO).add(SBase).addImm(SDisp).addReg(0); 8968 if (--Length == 0) 8969 return; 8970 } 8971 BuildMI(*MBB, InsPos, DL, TII->get(Opcode)) 8972 .add(DBase).addImm(DDisp).addImm(Length) 8973 .add(SBase).addImm(SDisp) 8974 .setMemRefs(MI.memoperands()); 8975 }; 8976 8977 bool NeedsLoop = false; 8978 uint64_t ImmLength = 0; 8979 Register LenAdjReg = SystemZ::NoRegister; 8980 if (IsImmForm) { 8981 ImmLength = LengthMO.getImm(); 8982 ImmLength += IsMemset ? 2 : 1; // Add back the subtracted adjustment. 8983 if (ImmLength == 0) { 8984 MI.eraseFromParent(); 8985 return MBB; 8986 } 8987 if (Opcode == SystemZ::CLC) { 8988 if (ImmLength > 3 * 256) 8989 // A two-CLC sequence is a clear win over a loop, not least because 8990 // it needs only one branch. A three-CLC sequence needs the same 8991 // number of branches as a loop (i.e. 2), but is shorter. That 8992 // brings us to lengths greater than 768 bytes. It seems relatively 8993 // likely that a difference will be found within the first 768 bytes, 8994 // so we just optimize for the smallest number of branch 8995 // instructions, in order to avoid polluting the prediction buffer 8996 // too much. 8997 NeedsLoop = true; 8998 } else if (ImmLength > 6 * 256) 8999 // The heuristic we use is to prefer loops for anything that would 9000 // require 7 or more MVCs. With these kinds of sizes there isn't much 9001 // to choose between straight-line code and looping code, since the 9002 // time will be dominated by the MVCs themselves. 9003 NeedsLoop = true; 9004 } else { 9005 NeedsLoop = true; 9006 LenAdjReg = LengthMO.getReg(); 9007 } 9008 9009 // When generating more than one CLC, all but the last will need to 9010 // branch to the end when a difference is found. 9011 MachineBasicBlock *EndMBB = 9012 (Opcode == SystemZ::CLC && (ImmLength > 256 || NeedsLoop) 9013 ? SystemZ::splitBlockAfter(MI, MBB) 9014 : nullptr); 9015 9016 if (NeedsLoop) { 9017 Register StartCountReg = 9018 MRI.createVirtualRegister(&SystemZ::GR64BitRegClass); 9019 if (IsImmForm) { 9020 TII->loadImmediate(*MBB, MI, StartCountReg, ImmLength / 256); 9021 ImmLength &= 255; 9022 } else { 9023 BuildMI(*MBB, MI, DL, TII->get(SystemZ::SRLG), StartCountReg) 9024 .addReg(LenAdjReg) 9025 .addReg(0) 9026 .addImm(8); 9027 } 9028 9029 bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase); 9030 auto loadZeroAddress = [&]() -> MachineOperand { 9031 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); 9032 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LGHI), Reg).addImm(0); 9033 return MachineOperand::CreateReg(Reg, false); 9034 }; 9035 if (DestBase.isReg() && DestBase.getReg() == SystemZ::NoRegister) 9036 DestBase = loadZeroAddress(); 9037 if (SrcBase.isReg() && SrcBase.getReg() == SystemZ::NoRegister) 9038 SrcBase = HaveSingleBase ? DestBase : loadZeroAddress(); 9039 9040 MachineBasicBlock *StartMBB = nullptr; 9041 MachineBasicBlock *LoopMBB = nullptr; 9042 MachineBasicBlock *NextMBB = nullptr; 9043 MachineBasicBlock *DoneMBB = nullptr; 9044 MachineBasicBlock *AllDoneMBB = nullptr; 9045 9046 Register StartSrcReg = forceReg(MI, SrcBase, TII); 9047 Register StartDestReg = 9048 (HaveSingleBase ? StartSrcReg : forceReg(MI, DestBase, TII)); 9049 9050 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass; 9051 Register ThisSrcReg = MRI.createVirtualRegister(RC); 9052 Register ThisDestReg = 9053 (HaveSingleBase ? ThisSrcReg : MRI.createVirtualRegister(RC)); 9054 Register NextSrcReg = MRI.createVirtualRegister(RC); 9055 Register NextDestReg = 9056 (HaveSingleBase ? NextSrcReg : MRI.createVirtualRegister(RC)); 9057 RC = &SystemZ::GR64BitRegClass; 9058 Register ThisCountReg = MRI.createVirtualRegister(RC); 9059 Register NextCountReg = MRI.createVirtualRegister(RC); 9060 9061 if (IsRegForm) { 9062 AllDoneMBB = SystemZ::splitBlockBefore(MI, MBB); 9063 StartMBB = SystemZ::emitBlockAfter(MBB); 9064 LoopMBB = SystemZ::emitBlockAfter(StartMBB); 9065 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB); 9066 DoneMBB = SystemZ::emitBlockAfter(NextMBB); 9067 9068 // MBB: 9069 // # Jump to AllDoneMBB if LenAdjReg means 0, or fall thru to StartMBB. 9070 BuildMI(MBB, DL, TII->get(SystemZ::CGHI)) 9071 .addReg(LenAdjReg).addImm(IsMemset ? -2 : -1); 9072 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 9073 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ) 9074 .addMBB(AllDoneMBB); 9075 MBB->addSuccessor(AllDoneMBB); 9076 if (!IsMemset) 9077 MBB->addSuccessor(StartMBB); 9078 else { 9079 // MemsetOneCheckMBB: 9080 // # Jump to MemsetOneMBB for a memset of length 1, or 9081 // # fall thru to StartMBB. 9082 MachineBasicBlock *MemsetOneCheckMBB = SystemZ::emitBlockAfter(MBB); 9083 MachineBasicBlock *MemsetOneMBB = SystemZ::emitBlockAfter(&*MF.rbegin()); 9084 MBB->addSuccessor(MemsetOneCheckMBB); 9085 MBB = MemsetOneCheckMBB; 9086 BuildMI(MBB, DL, TII->get(SystemZ::CGHI)) 9087 .addReg(LenAdjReg).addImm(-1); 9088 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 9089 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ) 9090 .addMBB(MemsetOneMBB); 9091 MBB->addSuccessor(MemsetOneMBB, {10, 100}); 9092 MBB->addSuccessor(StartMBB, {90, 100}); 9093 9094 // MemsetOneMBB: 9095 // # Jump back to AllDoneMBB after a single MVI or STC. 9096 MBB = MemsetOneMBB; 9097 insertMemMemOp(MBB, MBB->end(), 9098 MachineOperand::CreateReg(StartDestReg, false), DestDisp, 9099 MachineOperand::CreateReg(StartSrcReg, false), SrcDisp, 9100 1); 9101 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(AllDoneMBB); 9102 MBB->addSuccessor(AllDoneMBB); 9103 } 9104 9105 // StartMBB: 9106 // # Jump to DoneMBB if %StartCountReg is zero, or fall through to LoopMBB. 9107 MBB = StartMBB; 9108 BuildMI(MBB, DL, TII->get(SystemZ::CGHI)) 9109 .addReg(StartCountReg).addImm(0); 9110 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 9111 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ) 9112 .addMBB(DoneMBB); 9113 MBB->addSuccessor(DoneMBB); 9114 MBB->addSuccessor(LoopMBB); 9115 } 9116 else { 9117 StartMBB = MBB; 9118 DoneMBB = SystemZ::splitBlockBefore(MI, MBB); 9119 LoopMBB = SystemZ::emitBlockAfter(StartMBB); 9120 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB); 9121 9122 // StartMBB: 9123 // # fall through to LoopMBB 9124 MBB->addSuccessor(LoopMBB); 9125 9126 DestBase = MachineOperand::CreateReg(NextDestReg, false); 9127 SrcBase = MachineOperand::CreateReg(NextSrcReg, false); 9128 if (EndMBB && !ImmLength) 9129 // If the loop handled the whole CLC range, DoneMBB will be empty with 9130 // CC live-through into EndMBB, so add it as live-in. 9131 DoneMBB->addLiveIn(SystemZ::CC); 9132 } 9133 9134 // LoopMBB: 9135 // %ThisDestReg = phi [ %StartDestReg, StartMBB ], 9136 // [ %NextDestReg, NextMBB ] 9137 // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ], 9138 // [ %NextSrcReg, NextMBB ] 9139 // %ThisCountReg = phi [ %StartCountReg, StartMBB ], 9140 // [ %NextCountReg, NextMBB ] 9141 // ( PFD 2, 768+DestDisp(%ThisDestReg) ) 9142 // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg) 9143 // ( JLH EndMBB ) 9144 // 9145 // The prefetch is used only for MVC. The JLH is used only for CLC. 9146 MBB = LoopMBB; 9147 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg) 9148 .addReg(StartDestReg).addMBB(StartMBB) 9149 .addReg(NextDestReg).addMBB(NextMBB); 9150 if (!HaveSingleBase) 9151 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg) 9152 .addReg(StartSrcReg).addMBB(StartMBB) 9153 .addReg(NextSrcReg).addMBB(NextMBB); 9154 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg) 9155 .addReg(StartCountReg).addMBB(StartMBB) 9156 .addReg(NextCountReg).addMBB(NextMBB); 9157 if (Opcode == SystemZ::MVC) 9158 BuildMI(MBB, DL, TII->get(SystemZ::PFD)) 9159 .addImm(SystemZ::PFD_WRITE) 9160 .addReg(ThisDestReg).addImm(DestDisp - IsMemset + 768).addReg(0); 9161 insertMemMemOp(MBB, MBB->end(), 9162 MachineOperand::CreateReg(ThisDestReg, false), DestDisp, 9163 MachineOperand::CreateReg(ThisSrcReg, false), SrcDisp, 256); 9164 if (EndMBB) { 9165 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 9166 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE) 9167 .addMBB(EndMBB); 9168 MBB->addSuccessor(EndMBB); 9169 MBB->addSuccessor(NextMBB); 9170 } 9171 9172 // NextMBB: 9173 // %NextDestReg = LA 256(%ThisDestReg) 9174 // %NextSrcReg = LA 256(%ThisSrcReg) 9175 // %NextCountReg = AGHI %ThisCountReg, -1 9176 // CGHI %NextCountReg, 0 9177 // JLH LoopMBB 9178 // # fall through to DoneMBB 9179 // 9180 // The AGHI, CGHI and JLH should be converted to BRCTG by later passes. 9181 MBB = NextMBB; 9182 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg) 9183 .addReg(ThisDestReg).addImm(256).addReg(0); 9184 if (!HaveSingleBase) 9185 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg) 9186 .addReg(ThisSrcReg).addImm(256).addReg(0); 9187 BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg) 9188 .addReg(ThisCountReg).addImm(-1); 9189 BuildMI(MBB, DL, TII->get(SystemZ::CGHI)) 9190 .addReg(NextCountReg).addImm(0); 9191 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 9192 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE) 9193 .addMBB(LoopMBB); 9194 MBB->addSuccessor(LoopMBB); 9195 MBB->addSuccessor(DoneMBB); 9196 9197 MBB = DoneMBB; 9198 if (IsRegForm) { 9199 // DoneMBB: 9200 // # Make PHIs for RemDestReg/RemSrcReg as the loop may or may not run. 9201 // # Use EXecute Relative Long for the remainder of the bytes. The target 9202 // instruction of the EXRL will have a length field of 1 since 0 is an 9203 // illegal value. The number of bytes processed becomes (%LenAdjReg & 9204 // 0xff) + 1. 9205 // # Fall through to AllDoneMBB. 9206 Register RemSrcReg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); 9207 Register RemDestReg = HaveSingleBase ? RemSrcReg 9208 : MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); 9209 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemDestReg) 9210 .addReg(StartDestReg).addMBB(StartMBB) 9211 .addReg(NextDestReg).addMBB(NextMBB); 9212 if (!HaveSingleBase) 9213 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemSrcReg) 9214 .addReg(StartSrcReg).addMBB(StartMBB) 9215 .addReg(NextSrcReg).addMBB(NextMBB); 9216 if (IsMemset) 9217 insertMemMemOp(MBB, MBB->end(), 9218 MachineOperand::CreateReg(RemDestReg, false), DestDisp, 9219 MachineOperand::CreateReg(RemSrcReg, false), SrcDisp, 1); 9220 MachineInstrBuilder EXRL_MIB = 9221 BuildMI(MBB, DL, TII->get(SystemZ::EXRL_Pseudo)) 9222 .addImm(Opcode) 9223 .addReg(LenAdjReg) 9224 .addReg(RemDestReg).addImm(DestDisp) 9225 .addReg(RemSrcReg).addImm(SrcDisp); 9226 MBB->addSuccessor(AllDoneMBB); 9227 MBB = AllDoneMBB; 9228 if (Opcode != SystemZ::MVC) { 9229 EXRL_MIB.addReg(SystemZ::CC, RegState::ImplicitDefine); 9230 if (EndMBB) 9231 MBB->addLiveIn(SystemZ::CC); 9232 } 9233 } 9234 MF.getProperties().reset(MachineFunctionProperties::Property::NoPHIs); 9235 } 9236 9237 // Handle any remaining bytes with straight-line code. 9238 while (ImmLength > 0) { 9239 uint64_t ThisLength = std::min(ImmLength, uint64_t(256)); 9240 // The previous iteration might have created out-of-range displacements. 9241 // Apply them using LA/LAY if so. 9242 foldDisplIfNeeded(DestBase, DestDisp); 9243 foldDisplIfNeeded(SrcBase, SrcDisp); 9244 insertMemMemOp(MBB, MI, DestBase, DestDisp, SrcBase, SrcDisp, ThisLength); 9245 DestDisp += ThisLength; 9246 SrcDisp += ThisLength; 9247 ImmLength -= ThisLength; 9248 // If there's another CLC to go, branch to the end if a difference 9249 // was found. 9250 if (EndMBB && ImmLength > 0) { 9251 MachineBasicBlock *NextMBB = SystemZ::splitBlockBefore(MI, MBB); 9252 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 9253 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE) 9254 .addMBB(EndMBB); 9255 MBB->addSuccessor(EndMBB); 9256 MBB->addSuccessor(NextMBB); 9257 MBB = NextMBB; 9258 } 9259 } 9260 if (EndMBB) { 9261 MBB->addSuccessor(EndMBB); 9262 MBB = EndMBB; 9263 MBB->addLiveIn(SystemZ::CC); 9264 } 9265 9266 MI.eraseFromParent(); 9267 return MBB; 9268 } 9269 9270 // Decompose string pseudo-instruction MI into a loop that continually performs 9271 // Opcode until CC != 3. 9272 MachineBasicBlock *SystemZTargetLowering::emitStringWrapper( 9273 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const { 9274 MachineFunction &MF = *MBB->getParent(); 9275 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 9276 MachineRegisterInfo &MRI = MF.getRegInfo(); 9277 DebugLoc DL = MI.getDebugLoc(); 9278 9279 uint64_t End1Reg = MI.getOperand(0).getReg(); 9280 uint64_t Start1Reg = MI.getOperand(1).getReg(); 9281 uint64_t Start2Reg = MI.getOperand(2).getReg(); 9282 uint64_t CharReg = MI.getOperand(3).getReg(); 9283 9284 const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass; 9285 uint64_t This1Reg = MRI.createVirtualRegister(RC); 9286 uint64_t This2Reg = MRI.createVirtualRegister(RC); 9287 uint64_t End2Reg = MRI.createVirtualRegister(RC); 9288 9289 MachineBasicBlock *StartMBB = MBB; 9290 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB); 9291 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB); 9292 9293 // StartMBB: 9294 // # fall through to LoopMBB 9295 MBB->addSuccessor(LoopMBB); 9296 9297 // LoopMBB: 9298 // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ] 9299 // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ] 9300 // R0L = %CharReg 9301 // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L 9302 // JO LoopMBB 9303 // # fall through to DoneMBB 9304 // 9305 // The load of R0L can be hoisted by post-RA LICM. 9306 MBB = LoopMBB; 9307 9308 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg) 9309 .addReg(Start1Reg).addMBB(StartMBB) 9310 .addReg(End1Reg).addMBB(LoopMBB); 9311 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg) 9312 .addReg(Start2Reg).addMBB(StartMBB) 9313 .addReg(End2Reg).addMBB(LoopMBB); 9314 BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg); 9315 BuildMI(MBB, DL, TII->get(Opcode)) 9316 .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define) 9317 .addReg(This1Reg).addReg(This2Reg); 9318 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 9319 .addImm(SystemZ::CCMASK_ANY).addImm(SystemZ::CCMASK_3).addMBB(LoopMBB); 9320 MBB->addSuccessor(LoopMBB); 9321 MBB->addSuccessor(DoneMBB); 9322 9323 DoneMBB->addLiveIn(SystemZ::CC); 9324 9325 MI.eraseFromParent(); 9326 return DoneMBB; 9327 } 9328 9329 // Update TBEGIN instruction with final opcode and register clobbers. 9330 MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin( 9331 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode, 9332 bool NoFloat) const { 9333 MachineFunction &MF = *MBB->getParent(); 9334 const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); 9335 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 9336 9337 // Update opcode. 9338 MI.setDesc(TII->get(Opcode)); 9339 9340 // We cannot handle a TBEGIN that clobbers the stack or frame pointer. 9341 // Make sure to add the corresponding GRSM bits if they are missing. 9342 uint64_t Control = MI.getOperand(2).getImm(); 9343 static const unsigned GPRControlBit[16] = { 9344 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000, 9345 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100 9346 }; 9347 Control |= GPRControlBit[15]; 9348 if (TFI->hasFP(MF)) 9349 Control |= GPRControlBit[11]; 9350 MI.getOperand(2).setImm(Control); 9351 9352 // Add GPR clobbers. 9353 for (int I = 0; I < 16; I++) { 9354 if ((Control & GPRControlBit[I]) == 0) { 9355 unsigned Reg = SystemZMC::GR64Regs[I]; 9356 MI.addOperand(MachineOperand::CreateReg(Reg, true, true)); 9357 } 9358 } 9359 9360 // Add FPR/VR clobbers. 9361 if (!NoFloat && (Control & 4) != 0) { 9362 if (Subtarget.hasVector()) { 9363 for (unsigned Reg : SystemZMC::VR128Regs) { 9364 MI.addOperand(MachineOperand::CreateReg(Reg, true, true)); 9365 } 9366 } else { 9367 for (unsigned Reg : SystemZMC::FP64Regs) { 9368 MI.addOperand(MachineOperand::CreateReg(Reg, true, true)); 9369 } 9370 } 9371 } 9372 9373 return MBB; 9374 } 9375 9376 MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0( 9377 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const { 9378 MachineFunction &MF = *MBB->getParent(); 9379 MachineRegisterInfo *MRI = &MF.getRegInfo(); 9380 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 9381 DebugLoc DL = MI.getDebugLoc(); 9382 9383 Register SrcReg = MI.getOperand(0).getReg(); 9384 9385 // Create new virtual register of the same class as source. 9386 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); 9387 Register DstReg = MRI->createVirtualRegister(RC); 9388 9389 // Replace pseudo with a normal load-and-test that models the def as 9390 // well. 9391 BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg) 9392 .addReg(SrcReg) 9393 .setMIFlags(MI.getFlags()); 9394 MI.eraseFromParent(); 9395 9396 return MBB; 9397 } 9398 9399 MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca( 9400 MachineInstr &MI, MachineBasicBlock *MBB) const { 9401 MachineFunction &MF = *MBB->getParent(); 9402 MachineRegisterInfo *MRI = &MF.getRegInfo(); 9403 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 9404 DebugLoc DL = MI.getDebugLoc(); 9405 const unsigned ProbeSize = getStackProbeSize(MF); 9406 Register DstReg = MI.getOperand(0).getReg(); 9407 Register SizeReg = MI.getOperand(2).getReg(); 9408 9409 MachineBasicBlock *StartMBB = MBB; 9410 MachineBasicBlock *DoneMBB = SystemZ::splitBlockAfter(MI, MBB); 9411 MachineBasicBlock *LoopTestMBB = SystemZ::emitBlockAfter(StartMBB); 9412 MachineBasicBlock *LoopBodyMBB = SystemZ::emitBlockAfter(LoopTestMBB); 9413 MachineBasicBlock *TailTestMBB = SystemZ::emitBlockAfter(LoopBodyMBB); 9414 MachineBasicBlock *TailMBB = SystemZ::emitBlockAfter(TailTestMBB); 9415 9416 MachineMemOperand *VolLdMMO = MF.getMachineMemOperand(MachinePointerInfo(), 9417 MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad, 8, Align(1)); 9418 9419 Register PHIReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass); 9420 Register IncReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass); 9421 9422 // LoopTestMBB 9423 // BRC TailTestMBB 9424 // # fallthrough to LoopBodyMBB 9425 StartMBB->addSuccessor(LoopTestMBB); 9426 MBB = LoopTestMBB; 9427 BuildMI(MBB, DL, TII->get(SystemZ::PHI), PHIReg) 9428 .addReg(SizeReg) 9429 .addMBB(StartMBB) 9430 .addReg(IncReg) 9431 .addMBB(LoopBodyMBB); 9432 BuildMI(MBB, DL, TII->get(SystemZ::CLGFI)) 9433 .addReg(PHIReg) 9434 .addImm(ProbeSize); 9435 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 9436 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_LT) 9437 .addMBB(TailTestMBB); 9438 MBB->addSuccessor(LoopBodyMBB); 9439 MBB->addSuccessor(TailTestMBB); 9440 9441 // LoopBodyMBB: Allocate and probe by means of a volatile compare. 9442 // J LoopTestMBB 9443 MBB = LoopBodyMBB; 9444 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), IncReg) 9445 .addReg(PHIReg) 9446 .addImm(ProbeSize); 9447 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), SystemZ::R15D) 9448 .addReg(SystemZ::R15D) 9449 .addImm(ProbeSize); 9450 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D) 9451 .addReg(SystemZ::R15D).addImm(ProbeSize - 8).addReg(0) 9452 .setMemRefs(VolLdMMO); 9453 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(LoopTestMBB); 9454 MBB->addSuccessor(LoopTestMBB); 9455 9456 // TailTestMBB 9457 // BRC DoneMBB 9458 // # fallthrough to TailMBB 9459 MBB = TailTestMBB; 9460 BuildMI(MBB, DL, TII->get(SystemZ::CGHI)) 9461 .addReg(PHIReg) 9462 .addImm(0); 9463 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 9464 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ) 9465 .addMBB(DoneMBB); 9466 MBB->addSuccessor(TailMBB); 9467 MBB->addSuccessor(DoneMBB); 9468 9469 // TailMBB 9470 // # fallthrough to DoneMBB 9471 MBB = TailMBB; 9472 BuildMI(MBB, DL, TII->get(SystemZ::SLGR), SystemZ::R15D) 9473 .addReg(SystemZ::R15D) 9474 .addReg(PHIReg); 9475 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D) 9476 .addReg(SystemZ::R15D).addImm(-8).addReg(PHIReg) 9477 .setMemRefs(VolLdMMO); 9478 MBB->addSuccessor(DoneMBB); 9479 9480 // DoneMBB 9481 MBB = DoneMBB; 9482 BuildMI(*MBB, MBB->begin(), DL, TII->get(TargetOpcode::COPY), DstReg) 9483 .addReg(SystemZ::R15D); 9484 9485 MI.eraseFromParent(); 9486 return DoneMBB; 9487 } 9488 9489 SDValue SystemZTargetLowering:: 9490 getBackchainAddress(SDValue SP, SelectionDAG &DAG) const { 9491 MachineFunction &MF = DAG.getMachineFunction(); 9492 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>(); 9493 SDLoc DL(SP); 9494 return DAG.getNode(ISD::ADD, DL, MVT::i64, SP, 9495 DAG.getIntPtrConstant(TFL->getBackchainOffset(MF), DL)); 9496 } 9497 9498 MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter( 9499 MachineInstr &MI, MachineBasicBlock *MBB) const { 9500 switch (MI.getOpcode()) { 9501 case SystemZ::ADJCALLSTACKDOWN: 9502 case SystemZ::ADJCALLSTACKUP: 9503 return emitAdjCallStack(MI, MBB); 9504 9505 case SystemZ::Select32: 9506 case SystemZ::Select64: 9507 case SystemZ::Select128: 9508 case SystemZ::SelectF32: 9509 case SystemZ::SelectF64: 9510 case SystemZ::SelectF128: 9511 case SystemZ::SelectVR32: 9512 case SystemZ::SelectVR64: 9513 case SystemZ::SelectVR128: 9514 return emitSelect(MI, MBB); 9515 9516 case SystemZ::CondStore8Mux: 9517 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false); 9518 case SystemZ::CondStore8MuxInv: 9519 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true); 9520 case SystemZ::CondStore16Mux: 9521 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false); 9522 case SystemZ::CondStore16MuxInv: 9523 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true); 9524 case SystemZ::CondStore32Mux: 9525 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, false); 9526 case SystemZ::CondStore32MuxInv: 9527 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, true); 9528 case SystemZ::CondStore8: 9529 return emitCondStore(MI, MBB, SystemZ::STC, 0, false); 9530 case SystemZ::CondStore8Inv: 9531 return emitCondStore(MI, MBB, SystemZ::STC, 0, true); 9532 case SystemZ::CondStore16: 9533 return emitCondStore(MI, MBB, SystemZ::STH, 0, false); 9534 case SystemZ::CondStore16Inv: 9535 return emitCondStore(MI, MBB, SystemZ::STH, 0, true); 9536 case SystemZ::CondStore32: 9537 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false); 9538 case SystemZ::CondStore32Inv: 9539 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true); 9540 case SystemZ::CondStore64: 9541 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false); 9542 case SystemZ::CondStore64Inv: 9543 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true); 9544 case SystemZ::CondStoreF32: 9545 return emitCondStore(MI, MBB, SystemZ::STE, 0, false); 9546 case SystemZ::CondStoreF32Inv: 9547 return emitCondStore(MI, MBB, SystemZ::STE, 0, true); 9548 case SystemZ::CondStoreF64: 9549 return emitCondStore(MI, MBB, SystemZ::STD, 0, false); 9550 case SystemZ::CondStoreF64Inv: 9551 return emitCondStore(MI, MBB, SystemZ::STD, 0, true); 9552 9553 case SystemZ::SCmp128Hi: 9554 return emitICmp128Hi(MI, MBB, false); 9555 case SystemZ::UCmp128Hi: 9556 return emitICmp128Hi(MI, MBB, true); 9557 9558 case SystemZ::PAIR128: 9559 return emitPair128(MI, MBB); 9560 case SystemZ::AEXT128: 9561 return emitExt128(MI, MBB, false); 9562 case SystemZ::ZEXT128: 9563 return emitExt128(MI, MBB, true); 9564 9565 case SystemZ::ATOMIC_SWAPW: 9566 return emitAtomicLoadBinary(MI, MBB, 0); 9567 9568 case SystemZ::ATOMIC_LOADW_AR: 9569 return emitAtomicLoadBinary(MI, MBB, SystemZ::AR); 9570 case SystemZ::ATOMIC_LOADW_AFI: 9571 return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI); 9572 9573 case SystemZ::ATOMIC_LOADW_SR: 9574 return emitAtomicLoadBinary(MI, MBB, SystemZ::SR); 9575 9576 case SystemZ::ATOMIC_LOADW_NR: 9577 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR); 9578 case SystemZ::ATOMIC_LOADW_NILH: 9579 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH); 9580 9581 case SystemZ::ATOMIC_LOADW_OR: 9582 return emitAtomicLoadBinary(MI, MBB, SystemZ::OR); 9583 case SystemZ::ATOMIC_LOADW_OILH: 9584 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH); 9585 9586 case SystemZ::ATOMIC_LOADW_XR: 9587 return emitAtomicLoadBinary(MI, MBB, SystemZ::XR); 9588 case SystemZ::ATOMIC_LOADW_XILF: 9589 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF); 9590 9591 case SystemZ::ATOMIC_LOADW_NRi: 9592 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, true); 9593 case SystemZ::ATOMIC_LOADW_NILHi: 9594 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, true); 9595 9596 case SystemZ::ATOMIC_LOADW_MIN: 9597 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_LE); 9598 case SystemZ::ATOMIC_LOADW_MAX: 9599 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_GE); 9600 case SystemZ::ATOMIC_LOADW_UMIN: 9601 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_LE); 9602 case SystemZ::ATOMIC_LOADW_UMAX: 9603 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_GE); 9604 9605 case SystemZ::ATOMIC_CMP_SWAPW: 9606 return emitAtomicCmpSwapW(MI, MBB); 9607 case SystemZ::MVCImm: 9608 case SystemZ::MVCReg: 9609 return emitMemMemWrapper(MI, MBB, SystemZ::MVC); 9610 case SystemZ::NCImm: 9611 return emitMemMemWrapper(MI, MBB, SystemZ::NC); 9612 case SystemZ::OCImm: 9613 return emitMemMemWrapper(MI, MBB, SystemZ::OC); 9614 case SystemZ::XCImm: 9615 case SystemZ::XCReg: 9616 return emitMemMemWrapper(MI, MBB, SystemZ::XC); 9617 case SystemZ::CLCImm: 9618 case SystemZ::CLCReg: 9619 return emitMemMemWrapper(MI, MBB, SystemZ::CLC); 9620 case SystemZ::MemsetImmImm: 9621 case SystemZ::MemsetImmReg: 9622 case SystemZ::MemsetRegImm: 9623 case SystemZ::MemsetRegReg: 9624 return emitMemMemWrapper(MI, MBB, SystemZ::MVC, true/*IsMemset*/); 9625 case SystemZ::CLSTLoop: 9626 return emitStringWrapper(MI, MBB, SystemZ::CLST); 9627 case SystemZ::MVSTLoop: 9628 return emitStringWrapper(MI, MBB, SystemZ::MVST); 9629 case SystemZ::SRSTLoop: 9630 return emitStringWrapper(MI, MBB, SystemZ::SRST); 9631 case SystemZ::TBEGIN: 9632 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false); 9633 case SystemZ::TBEGIN_nofloat: 9634 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true); 9635 case SystemZ::TBEGINC: 9636 return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true); 9637 case SystemZ::LTEBRCompare_Pseudo: 9638 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR); 9639 case SystemZ::LTDBRCompare_Pseudo: 9640 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR); 9641 case SystemZ::LTXBRCompare_Pseudo: 9642 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR); 9643 9644 case SystemZ::PROBED_ALLOCA: 9645 return emitProbedAlloca(MI, MBB); 9646 9647 case TargetOpcode::STACKMAP: 9648 case TargetOpcode::PATCHPOINT: 9649 return emitPatchPoint(MI, MBB); 9650 9651 default: 9652 llvm_unreachable("Unexpected instr type to insert"); 9653 } 9654 } 9655 9656 // This is only used by the isel schedulers, and is needed only to prevent 9657 // compiler from crashing when list-ilp is used. 9658 const TargetRegisterClass * 9659 SystemZTargetLowering::getRepRegClassFor(MVT VT) const { 9660 if (VT == MVT::Untyped) 9661 return &SystemZ::ADDR128BitRegClass; 9662 return TargetLowering::getRepRegClassFor(VT); 9663 } 9664 9665 SDValue SystemZTargetLowering::lowerGET_ROUNDING(SDValue Op, 9666 SelectionDAG &DAG) const { 9667 SDLoc dl(Op); 9668 /* 9669 The rounding method is in FPC Byte 3 bits 6-7, and has the following 9670 settings: 9671 00 Round to nearest 9672 01 Round to 0 9673 10 Round to +inf 9674 11 Round to -inf 9675 9676 FLT_ROUNDS, on the other hand, expects the following: 9677 -1 Undefined 9678 0 Round to 0 9679 1 Round to nearest 9680 2 Round to +inf 9681 3 Round to -inf 9682 */ 9683 9684 // Save FPC to register. 9685 SDValue Chain = Op.getOperand(0); 9686 SDValue EFPC( 9687 DAG.getMachineNode(SystemZ::EFPC, dl, {MVT::i32, MVT::Other}, Chain), 0); 9688 Chain = EFPC.getValue(1); 9689 9690 // Transform as necessary 9691 SDValue CWD1 = DAG.getNode(ISD::AND, dl, MVT::i32, EFPC, 9692 DAG.getConstant(3, dl, MVT::i32)); 9693 // RetVal = (CWD1 ^ (CWD1 >> 1)) ^ 1 9694 SDValue CWD2 = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, 9695 DAG.getNode(ISD::SRL, dl, MVT::i32, CWD1, 9696 DAG.getConstant(1, dl, MVT::i32))); 9697 9698 SDValue RetVal = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD2, 9699 DAG.getConstant(1, dl, MVT::i32)); 9700 RetVal = DAG.getZExtOrTrunc(RetVal, dl, Op.getValueType()); 9701 9702 return DAG.getMergeValues({RetVal, Chain}, dl); 9703 } 9704 9705 SDValue SystemZTargetLowering::lowerVECREDUCE_ADD(SDValue Op, 9706 SelectionDAG &DAG) const { 9707 EVT VT = Op.getValueType(); 9708 Op = Op.getOperand(0); 9709 EVT OpVT = Op.getValueType(); 9710 9711 assert(OpVT.isVector() && "Operand type for VECREDUCE_ADD is not a vector."); 9712 9713 SDLoc DL(Op); 9714 9715 // load a 0 vector for the third operand of VSUM. 9716 SDValue Zero = DAG.getSplatBuildVector(OpVT, DL, DAG.getConstant(0, DL, VT)); 9717 9718 // execute VSUM. 9719 switch (OpVT.getScalarSizeInBits()) { 9720 case 8: 9721 case 16: 9722 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Zero); 9723 [[fallthrough]]; 9724 case 32: 9725 case 64: 9726 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::i128, Op, 9727 DAG.getBitcast(Op.getValueType(), Zero)); 9728 break; 9729 case 128: 9730 break; // VSUM over v1i128 should not happen and would be a noop 9731 default: 9732 llvm_unreachable("Unexpected scalar size."); 9733 } 9734 // Cast to original vector type, retrieve last element. 9735 return DAG.getNode( 9736 ISD::EXTRACT_VECTOR_ELT, DL, VT, DAG.getBitcast(OpVT, Op), 9737 DAG.getConstant(OpVT.getVectorNumElements() - 1, DL, MVT::i32)); 9738 } 9739