1 //===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the SystemZTargetLowering class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "SystemZISelLowering.h" 14 #include "SystemZCallingConv.h" 15 #include "SystemZConstantPoolValue.h" 16 #include "SystemZMachineFunctionInfo.h" 17 #include "SystemZTargetMachine.h" 18 #include "llvm/CodeGen/CallingConvLower.h" 19 #include "llvm/CodeGen/ISDOpcodes.h" 20 #include "llvm/CodeGen/MachineInstrBuilder.h" 21 #include "llvm/CodeGen/MachineRegisterInfo.h" 22 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 23 #include "llvm/IR/GlobalAlias.h" 24 #include "llvm/IR/IntrinsicInst.h" 25 #include "llvm/IR/Intrinsics.h" 26 #include "llvm/IR/IntrinsicsS390.h" 27 #include "llvm/Support/CommandLine.h" 28 #include "llvm/Support/ErrorHandling.h" 29 #include "llvm/Support/KnownBits.h" 30 #include <cctype> 31 #include <optional> 32 33 using namespace llvm; 34 35 #define DEBUG_TYPE "systemz-lower" 36 37 namespace { 38 // Represents information about a comparison. 39 struct Comparison { 40 Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn) 41 : Op0(Op0In), Op1(Op1In), Chain(ChainIn), 42 Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {} 43 44 // The operands to the comparison. 45 SDValue Op0, Op1; 46 47 // Chain if this is a strict floating-point comparison. 48 SDValue Chain; 49 50 // The opcode that should be used to compare Op0 and Op1. 51 unsigned Opcode; 52 53 // A SystemZICMP value. Only used for integer comparisons. 54 unsigned ICmpType; 55 56 // The mask of CC values that Opcode can produce. 57 unsigned CCValid; 58 59 // The mask of CC values for which the original condition is true. 60 unsigned CCMask; 61 }; 62 } // end anonymous namespace 63 64 // Classify VT as either 32 or 64 bit. 65 static bool is32Bit(EVT VT) { 66 switch (VT.getSimpleVT().SimpleTy) { 67 case MVT::i32: 68 return true; 69 case MVT::i64: 70 return false; 71 default: 72 llvm_unreachable("Unsupported type"); 73 } 74 } 75 76 // Return a version of MachineOperand that can be safely used before the 77 // final use. 78 static MachineOperand earlyUseOperand(MachineOperand Op) { 79 if (Op.isReg()) 80 Op.setIsKill(false); 81 return Op; 82 } 83 84 SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, 85 const SystemZSubtarget &STI) 86 : TargetLowering(TM), Subtarget(STI) { 87 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0)); 88 89 auto *Regs = STI.getSpecialRegisters(); 90 91 // Set up the register classes. 92 if (Subtarget.hasHighWord()) 93 addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass); 94 else 95 addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass); 96 addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass); 97 if (!useSoftFloat()) { 98 if (Subtarget.hasVector()) { 99 addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass); 100 addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass); 101 } else { 102 addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass); 103 addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass); 104 } 105 if (Subtarget.hasVectorEnhancements1()) 106 addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass); 107 else 108 addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass); 109 110 if (Subtarget.hasVector()) { 111 addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass); 112 addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass); 113 addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass); 114 addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass); 115 addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass); 116 addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass); 117 } 118 119 if (Subtarget.hasVector()) 120 addRegisterClass(MVT::i128, &SystemZ::VR128BitRegClass); 121 } 122 123 // Compute derived properties from the register classes 124 computeRegisterProperties(Subtarget.getRegisterInfo()); 125 126 // Set up special registers. 127 setStackPointerRegisterToSaveRestore(Regs->getStackPointerRegister()); 128 129 // TODO: It may be better to default to latency-oriented scheduling, however 130 // LLVM's current latency-oriented scheduler can't handle physreg definitions 131 // such as SystemZ has with CC, so set this to the register-pressure 132 // scheduler, because it can. 133 setSchedulingPreference(Sched::RegPressure); 134 135 setBooleanContents(ZeroOrOneBooleanContent); 136 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 137 138 setMaxAtomicSizeInBitsSupported(128); 139 140 // Instructions are strings of 2-byte aligned 2-byte values. 141 setMinFunctionAlignment(Align(2)); 142 // For performance reasons we prefer 16-byte alignment. 143 setPrefFunctionAlignment(Align(16)); 144 145 // Handle operations that are handled in a similar way for all types. 146 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE; 147 I <= MVT::LAST_FP_VALUETYPE; 148 ++I) { 149 MVT VT = MVT::SimpleValueType(I); 150 if (isTypeLegal(VT)) { 151 // Lower SET_CC into an IPM-based sequence. 152 setOperationAction(ISD::SETCC, VT, Custom); 153 setOperationAction(ISD::STRICT_FSETCC, VT, Custom); 154 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom); 155 156 // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE). 157 setOperationAction(ISD::SELECT, VT, Expand); 158 159 // Lower SELECT_CC and BR_CC into separate comparisons and branches. 160 setOperationAction(ISD::SELECT_CC, VT, Custom); 161 setOperationAction(ISD::BR_CC, VT, Custom); 162 } 163 } 164 165 // Expand jump table branches as address arithmetic followed by an 166 // indirect jump. 167 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 168 169 // Expand BRCOND into a BR_CC (see above). 170 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 171 172 // Handle integer types except i128. 173 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE; 174 I <= MVT::LAST_INTEGER_VALUETYPE; 175 ++I) { 176 MVT VT = MVT::SimpleValueType(I); 177 if (isTypeLegal(VT) && VT != MVT::i128) { 178 setOperationAction(ISD::ABS, VT, Legal); 179 180 // Expand individual DIV and REMs into DIVREMs. 181 setOperationAction(ISD::SDIV, VT, Expand); 182 setOperationAction(ISD::UDIV, VT, Expand); 183 setOperationAction(ISD::SREM, VT, Expand); 184 setOperationAction(ISD::UREM, VT, Expand); 185 setOperationAction(ISD::SDIVREM, VT, Custom); 186 setOperationAction(ISD::UDIVREM, VT, Custom); 187 188 // Support addition/subtraction with overflow. 189 setOperationAction(ISD::SADDO, VT, Custom); 190 setOperationAction(ISD::SSUBO, VT, Custom); 191 192 // Support addition/subtraction with carry. 193 setOperationAction(ISD::UADDO, VT, Custom); 194 setOperationAction(ISD::USUBO, VT, Custom); 195 196 // Support carry in as value rather than glue. 197 setOperationAction(ISD::UADDO_CARRY, VT, Custom); 198 setOperationAction(ISD::USUBO_CARRY, VT, Custom); 199 200 // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are 201 // available, or if the operand is constant. 202 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom); 203 204 // Use POPCNT on z196 and above. 205 if (Subtarget.hasPopulationCount()) 206 setOperationAction(ISD::CTPOP, VT, Custom); 207 else 208 setOperationAction(ISD::CTPOP, VT, Expand); 209 210 // No special instructions for these. 211 setOperationAction(ISD::CTTZ, VT, Expand); 212 setOperationAction(ISD::ROTR, VT, Expand); 213 214 // Use *MUL_LOHI where possible instead of MULH*. 215 setOperationAction(ISD::MULHS, VT, Expand); 216 setOperationAction(ISD::MULHU, VT, Expand); 217 setOperationAction(ISD::SMUL_LOHI, VT, Custom); 218 setOperationAction(ISD::UMUL_LOHI, VT, Custom); 219 220 // Only z196 and above have native support for conversions to unsigned. 221 // On z10, promoting to i64 doesn't generate an inexact condition for 222 // values that are outside the i32 range but in the i64 range, so use 223 // the default expansion. 224 if (!Subtarget.hasFPExtension()) 225 setOperationAction(ISD::FP_TO_UINT, VT, Expand); 226 227 // Mirror those settings for STRICT_FP_TO_[SU]INT. Note that these all 228 // default to Expand, so need to be modified to Legal where appropriate. 229 setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Legal); 230 if (Subtarget.hasFPExtension()) 231 setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Legal); 232 233 // And similarly for STRICT_[SU]INT_TO_FP. 234 setOperationAction(ISD::STRICT_SINT_TO_FP, VT, Legal); 235 if (Subtarget.hasFPExtension()) 236 setOperationAction(ISD::STRICT_UINT_TO_FP, VT, Legal); 237 } 238 } 239 240 // Handle i128 if legal. 241 if (isTypeLegal(MVT::i128)) { 242 // No special instructions for these. 243 setOperationAction(ISD::SDIVREM, MVT::i128, Expand); 244 setOperationAction(ISD::UDIVREM, MVT::i128, Expand); 245 setOperationAction(ISD::SMUL_LOHI, MVT::i128, Expand); 246 setOperationAction(ISD::UMUL_LOHI, MVT::i128, Expand); 247 setOperationAction(ISD::ROTR, MVT::i128, Expand); 248 setOperationAction(ISD::ROTL, MVT::i128, Expand); 249 setOperationAction(ISD::MUL, MVT::i128, Expand); 250 setOperationAction(ISD::MULHS, MVT::i128, Expand); 251 setOperationAction(ISD::MULHU, MVT::i128, Expand); 252 setOperationAction(ISD::SDIV, MVT::i128, Expand); 253 setOperationAction(ISD::UDIV, MVT::i128, Expand); 254 setOperationAction(ISD::SREM, MVT::i128, Expand); 255 setOperationAction(ISD::UREM, MVT::i128, Expand); 256 setOperationAction(ISD::CTLZ, MVT::i128, Expand); 257 setOperationAction(ISD::CTTZ, MVT::i128, Expand); 258 259 // Support addition/subtraction with carry. 260 setOperationAction(ISD::UADDO, MVT::i128, Custom); 261 setOperationAction(ISD::USUBO, MVT::i128, Custom); 262 setOperationAction(ISD::UADDO_CARRY, MVT::i128, Custom); 263 setOperationAction(ISD::USUBO_CARRY, MVT::i128, Custom); 264 265 // Use VPOPCT and add up partial results. 266 setOperationAction(ISD::CTPOP, MVT::i128, Custom); 267 268 // We have to use libcalls for these. 269 setOperationAction(ISD::FP_TO_UINT, MVT::i128, LibCall); 270 setOperationAction(ISD::FP_TO_SINT, MVT::i128, LibCall); 271 setOperationAction(ISD::UINT_TO_FP, MVT::i128, LibCall); 272 setOperationAction(ISD::SINT_TO_FP, MVT::i128, LibCall); 273 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i128, LibCall); 274 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i128, LibCall); 275 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, LibCall); 276 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i128, LibCall); 277 } 278 279 // Type legalization will convert 8- and 16-bit atomic operations into 280 // forms that operate on i32s (but still keeping the original memory VT). 281 // Lower them into full i32 operations. 282 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Custom); 283 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Custom); 284 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom); 285 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom); 286 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Custom); 287 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Custom); 288 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Custom); 289 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Custom); 290 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Custom); 291 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Custom); 292 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom); 293 294 // Whether or not i128 is not a legal type, we need to custom lower 295 // the atomic operations in order to exploit SystemZ instructions. 296 setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom); 297 setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom); 298 setOperationAction(ISD::ATOMIC_LOAD, MVT::f128, Custom); 299 setOperationAction(ISD::ATOMIC_STORE, MVT::f128, Custom); 300 301 // Mark sign/zero extending atomic loads as legal, which will make 302 // DAGCombiner fold extensions into atomic loads if possible. 303 setAtomicLoadExtAction({ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::i64, 304 {MVT::i8, MVT::i16, MVT::i32}, Legal); 305 setAtomicLoadExtAction({ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::i32, 306 {MVT::i8, MVT::i16}, Legal); 307 setAtomicLoadExtAction({ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::i16, 308 MVT::i8, Legal); 309 310 // We can use the CC result of compare-and-swap to implement 311 // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS. 312 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i32, Custom); 313 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i64, Custom); 314 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom); 315 316 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); 317 318 // Traps are legal, as we will convert them to "j .+2". 319 setOperationAction(ISD::TRAP, MVT::Other, Legal); 320 321 // z10 has instructions for signed but not unsigned FP conversion. 322 // Handle unsigned 32-bit types as signed 64-bit types. 323 if (!Subtarget.hasFPExtension()) { 324 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); 325 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); 326 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Promote); 327 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Expand); 328 } 329 330 // We have native support for a 64-bit CTLZ, via FLOGR. 331 setOperationAction(ISD::CTLZ, MVT::i32, Promote); 332 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote); 333 setOperationAction(ISD::CTLZ, MVT::i64, Legal); 334 335 // On z15 we have native support for a 64-bit CTPOP. 336 if (Subtarget.hasMiscellaneousExtensions3()) { 337 setOperationAction(ISD::CTPOP, MVT::i32, Promote); 338 setOperationAction(ISD::CTPOP, MVT::i64, Legal); 339 } 340 341 // Give LowerOperation the chance to replace 64-bit ORs with subregs. 342 setOperationAction(ISD::OR, MVT::i64, Custom); 343 344 // Expand 128 bit shifts without using a libcall. 345 setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand); 346 setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand); 347 setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand); 348 349 // Also expand 256 bit shifts if i128 is a legal type. 350 if (isTypeLegal(MVT::i128)) { 351 setOperationAction(ISD::SRL_PARTS, MVT::i128, Expand); 352 setOperationAction(ISD::SHL_PARTS, MVT::i128, Expand); 353 setOperationAction(ISD::SRA_PARTS, MVT::i128, Expand); 354 } 355 356 // Handle bitcast from fp128 to i128. 357 if (!isTypeLegal(MVT::i128)) 358 setOperationAction(ISD::BITCAST, MVT::i128, Custom); 359 360 // We have native instructions for i8, i16 and i32 extensions, but not i1. 361 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 362 for (MVT VT : MVT::integer_valuetypes()) { 363 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); 364 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); 365 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); 366 } 367 368 // Handle the various types of symbolic address. 369 setOperationAction(ISD::ConstantPool, PtrVT, Custom); 370 setOperationAction(ISD::GlobalAddress, PtrVT, Custom); 371 setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom); 372 setOperationAction(ISD::BlockAddress, PtrVT, Custom); 373 setOperationAction(ISD::JumpTable, PtrVT, Custom); 374 375 // We need to handle dynamic allocations specially because of the 376 // 160-byte area at the bottom of the stack. 377 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom); 378 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, PtrVT, Custom); 379 380 setOperationAction(ISD::STACKSAVE, MVT::Other, Custom); 381 setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom); 382 383 // Handle prefetches with PFD or PFDRL. 384 setOperationAction(ISD::PREFETCH, MVT::Other, Custom); 385 386 // Handle readcyclecounter with STCKF. 387 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom); 388 389 for (MVT VT : MVT::fixedlen_vector_valuetypes()) { 390 // Assume by default that all vector operations need to be expanded. 391 for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode) 392 if (getOperationAction(Opcode, VT) == Legal) 393 setOperationAction(Opcode, VT, Expand); 394 395 // Likewise all truncating stores and extending loads. 396 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { 397 setTruncStoreAction(VT, InnerVT, Expand); 398 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); 399 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); 400 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); 401 } 402 403 if (isTypeLegal(VT)) { 404 // These operations are legal for anything that can be stored in a 405 // vector register, even if there is no native support for the format 406 // as such. In particular, we can do these for v4f32 even though there 407 // are no specific instructions for that format. 408 setOperationAction(ISD::LOAD, VT, Legal); 409 setOperationAction(ISD::STORE, VT, Legal); 410 setOperationAction(ISD::VSELECT, VT, Legal); 411 setOperationAction(ISD::BITCAST, VT, Legal); 412 setOperationAction(ISD::UNDEF, VT, Legal); 413 414 // Likewise, except that we need to replace the nodes with something 415 // more specific. 416 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 417 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 418 } 419 } 420 421 // Handle integer vector types. 422 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) { 423 if (isTypeLegal(VT)) { 424 // These operations have direct equivalents. 425 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); 426 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal); 427 setOperationAction(ISD::ADD, VT, Legal); 428 setOperationAction(ISD::SUB, VT, Legal); 429 if (VT != MVT::v2i64) 430 setOperationAction(ISD::MUL, VT, Legal); 431 setOperationAction(ISD::ABS, VT, Legal); 432 setOperationAction(ISD::AND, VT, Legal); 433 setOperationAction(ISD::OR, VT, Legal); 434 setOperationAction(ISD::XOR, VT, Legal); 435 if (Subtarget.hasVectorEnhancements1()) 436 setOperationAction(ISD::CTPOP, VT, Legal); 437 else 438 setOperationAction(ISD::CTPOP, VT, Custom); 439 setOperationAction(ISD::CTTZ, VT, Legal); 440 setOperationAction(ISD::CTLZ, VT, Legal); 441 442 // Convert a GPR scalar to a vector by inserting it into element 0. 443 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); 444 445 // Use a series of unpacks for extensions. 446 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom); 447 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom); 448 449 // Detect shifts/rotates by a scalar amount and convert them into 450 // V*_BY_SCALAR. 451 setOperationAction(ISD::SHL, VT, Custom); 452 setOperationAction(ISD::SRA, VT, Custom); 453 setOperationAction(ISD::SRL, VT, Custom); 454 setOperationAction(ISD::ROTL, VT, Custom); 455 456 // Add ISD::VECREDUCE_ADD as custom in order to implement 457 // it with VZERO+VSUM 458 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); 459 460 // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands 461 // and inverting the result as necessary. 462 setOperationAction(ISD::SETCC, VT, Custom); 463 } 464 } 465 466 if (Subtarget.hasVector()) { 467 // There should be no need to check for float types other than v2f64 468 // since <2 x f32> isn't a legal type. 469 setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal); 470 setOperationAction(ISD::FP_TO_SINT, MVT::v2f64, Legal); 471 setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal); 472 setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Legal); 473 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal); 474 setOperationAction(ISD::SINT_TO_FP, MVT::v2f64, Legal); 475 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); 476 setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Legal); 477 478 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i64, Legal); 479 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f64, Legal); 480 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal); 481 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f64, Legal); 482 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal); 483 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f64, Legal); 484 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal); 485 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f64, Legal); 486 } 487 488 if (Subtarget.hasVectorEnhancements2()) { 489 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); 490 setOperationAction(ISD::FP_TO_SINT, MVT::v4f32, Legal); 491 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); 492 setOperationAction(ISD::FP_TO_UINT, MVT::v4f32, Legal); 493 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); 494 setOperationAction(ISD::SINT_TO_FP, MVT::v4f32, Legal); 495 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); 496 setOperationAction(ISD::UINT_TO_FP, MVT::v4f32, Legal); 497 498 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal); 499 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4f32, Legal); 500 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal); 501 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4f32, Legal); 502 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal); 503 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4f32, Legal); 504 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal); 505 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4f32, Legal); 506 } 507 508 // Handle floating-point types. 509 for (unsigned I = MVT::FIRST_FP_VALUETYPE; 510 I <= MVT::LAST_FP_VALUETYPE; 511 ++I) { 512 MVT VT = MVT::SimpleValueType(I); 513 if (isTypeLegal(VT)) { 514 // We can use FI for FRINT. 515 setOperationAction(ISD::FRINT, VT, Legal); 516 517 // We can use the extended form of FI for other rounding operations. 518 if (Subtarget.hasFPExtension()) { 519 setOperationAction(ISD::FNEARBYINT, VT, Legal); 520 setOperationAction(ISD::FFLOOR, VT, Legal); 521 setOperationAction(ISD::FCEIL, VT, Legal); 522 setOperationAction(ISD::FTRUNC, VT, Legal); 523 setOperationAction(ISD::FROUND, VT, Legal); 524 } 525 526 // No special instructions for these. 527 setOperationAction(ISD::FSIN, VT, Expand); 528 setOperationAction(ISD::FCOS, VT, Expand); 529 setOperationAction(ISD::FSINCOS, VT, Expand); 530 setOperationAction(ISD::FREM, VT, Expand); 531 setOperationAction(ISD::FPOW, VT, Expand); 532 533 // Special treatment. 534 setOperationAction(ISD::IS_FPCLASS, VT, Custom); 535 536 // Handle constrained floating-point operations. 537 setOperationAction(ISD::STRICT_FADD, VT, Legal); 538 setOperationAction(ISD::STRICT_FSUB, VT, Legal); 539 setOperationAction(ISD::STRICT_FMUL, VT, Legal); 540 setOperationAction(ISD::STRICT_FDIV, VT, Legal); 541 setOperationAction(ISD::STRICT_FMA, VT, Legal); 542 setOperationAction(ISD::STRICT_FSQRT, VT, Legal); 543 setOperationAction(ISD::STRICT_FRINT, VT, Legal); 544 setOperationAction(ISD::STRICT_FP_ROUND, VT, Legal); 545 setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal); 546 if (Subtarget.hasFPExtension()) { 547 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal); 548 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal); 549 setOperationAction(ISD::STRICT_FCEIL, VT, Legal); 550 setOperationAction(ISD::STRICT_FROUND, VT, Legal); 551 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal); 552 } 553 } 554 } 555 556 // Handle floating-point vector types. 557 if (Subtarget.hasVector()) { 558 // Scalar-to-vector conversion is just a subreg. 559 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); 560 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); 561 562 // Some insertions and extractions can be done directly but others 563 // need to go via integers. 564 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 565 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom); 566 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 567 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); 568 569 // These operations have direct equivalents. 570 setOperationAction(ISD::FADD, MVT::v2f64, Legal); 571 setOperationAction(ISD::FNEG, MVT::v2f64, Legal); 572 setOperationAction(ISD::FSUB, MVT::v2f64, Legal); 573 setOperationAction(ISD::FMUL, MVT::v2f64, Legal); 574 setOperationAction(ISD::FMA, MVT::v2f64, Legal); 575 setOperationAction(ISD::FDIV, MVT::v2f64, Legal); 576 setOperationAction(ISD::FABS, MVT::v2f64, Legal); 577 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); 578 setOperationAction(ISD::FRINT, MVT::v2f64, Legal); 579 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); 580 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); 581 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); 582 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); 583 setOperationAction(ISD::FROUND, MVT::v2f64, Legal); 584 585 // Handle constrained floating-point operations. 586 setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal); 587 setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal); 588 setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal); 589 setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal); 590 setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal); 591 setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal); 592 setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal); 593 setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v2f64, Legal); 594 setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal); 595 setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal); 596 setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal); 597 setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal); 598 599 setOperationAction(ISD::SETCC, MVT::v2f64, Custom); 600 setOperationAction(ISD::SETCC, MVT::v4f32, Custom); 601 setOperationAction(ISD::STRICT_FSETCC, MVT::v2f64, Custom); 602 setOperationAction(ISD::STRICT_FSETCC, MVT::v4f32, Custom); 603 if (Subtarget.hasVectorEnhancements1()) { 604 setOperationAction(ISD::STRICT_FSETCCS, MVT::v2f64, Custom); 605 setOperationAction(ISD::STRICT_FSETCCS, MVT::v4f32, Custom); 606 } 607 } 608 609 // The vector enhancements facility 1 has instructions for these. 610 if (Subtarget.hasVectorEnhancements1()) { 611 setOperationAction(ISD::FADD, MVT::v4f32, Legal); 612 setOperationAction(ISD::FNEG, MVT::v4f32, Legal); 613 setOperationAction(ISD::FSUB, MVT::v4f32, Legal); 614 setOperationAction(ISD::FMUL, MVT::v4f32, Legal); 615 setOperationAction(ISD::FMA, MVT::v4f32, Legal); 616 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 617 setOperationAction(ISD::FABS, MVT::v4f32, Legal); 618 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); 619 setOperationAction(ISD::FRINT, MVT::v4f32, Legal); 620 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); 621 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); 622 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); 623 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); 624 setOperationAction(ISD::FROUND, MVT::v4f32, Legal); 625 626 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); 627 setOperationAction(ISD::FMAXIMUM, MVT::f64, Legal); 628 setOperationAction(ISD::FMINNUM, MVT::f64, Legal); 629 setOperationAction(ISD::FMINIMUM, MVT::f64, Legal); 630 631 setOperationAction(ISD::FMAXNUM, MVT::v2f64, Legal); 632 setOperationAction(ISD::FMAXIMUM, MVT::v2f64, Legal); 633 setOperationAction(ISD::FMINNUM, MVT::v2f64, Legal); 634 setOperationAction(ISD::FMINIMUM, MVT::v2f64, Legal); 635 636 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); 637 setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal); 638 setOperationAction(ISD::FMINNUM, MVT::f32, Legal); 639 setOperationAction(ISD::FMINIMUM, MVT::f32, Legal); 640 641 setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal); 642 setOperationAction(ISD::FMAXIMUM, MVT::v4f32, Legal); 643 setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal); 644 setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal); 645 646 setOperationAction(ISD::FMAXNUM, MVT::f128, Legal); 647 setOperationAction(ISD::FMAXIMUM, MVT::f128, Legal); 648 setOperationAction(ISD::FMINNUM, MVT::f128, Legal); 649 setOperationAction(ISD::FMINIMUM, MVT::f128, Legal); 650 651 // Handle constrained floating-point operations. 652 setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal); 653 setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal); 654 setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal); 655 setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal); 656 setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal); 657 setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal); 658 setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal); 659 setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v4f32, Legal); 660 setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal); 661 setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal); 662 setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal); 663 setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal); 664 for (auto VT : { MVT::f32, MVT::f64, MVT::f128, 665 MVT::v4f32, MVT::v2f64 }) { 666 setOperationAction(ISD::STRICT_FMAXNUM, VT, Legal); 667 setOperationAction(ISD::STRICT_FMINNUM, VT, Legal); 668 setOperationAction(ISD::STRICT_FMAXIMUM, VT, Legal); 669 setOperationAction(ISD::STRICT_FMINIMUM, VT, Legal); 670 } 671 } 672 673 // We only have fused f128 multiply-addition on vector registers. 674 if (!Subtarget.hasVectorEnhancements1()) { 675 setOperationAction(ISD::FMA, MVT::f128, Expand); 676 setOperationAction(ISD::STRICT_FMA, MVT::f128, Expand); 677 } 678 679 // We don't have a copysign instruction on vector registers. 680 if (Subtarget.hasVectorEnhancements1()) 681 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand); 682 683 // Needed so that we don't try to implement f128 constant loads using 684 // a load-and-extend of a f80 constant (in cases where the constant 685 // would fit in an f80). 686 for (MVT VT : MVT::fp_valuetypes()) 687 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand); 688 689 // We don't have extending load instruction on vector registers. 690 if (Subtarget.hasVectorEnhancements1()) { 691 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand); 692 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand); 693 } 694 695 // Floating-point truncation and stores need to be done separately. 696 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 697 setTruncStoreAction(MVT::f128, MVT::f32, Expand); 698 setTruncStoreAction(MVT::f128, MVT::f64, Expand); 699 700 // We have 64-bit FPR<->GPR moves, but need special handling for 701 // 32-bit forms. 702 if (!Subtarget.hasVector()) { 703 setOperationAction(ISD::BITCAST, MVT::i32, Custom); 704 setOperationAction(ISD::BITCAST, MVT::f32, Custom); 705 } 706 707 // VASTART and VACOPY need to deal with the SystemZ-specific varargs 708 // structure, but VAEND is a no-op. 709 setOperationAction(ISD::VASTART, MVT::Other, Custom); 710 setOperationAction(ISD::VACOPY, MVT::Other, Custom); 711 setOperationAction(ISD::VAEND, MVT::Other, Expand); 712 713 setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom); 714 715 // Codes for which we want to perform some z-specific combinations. 716 setTargetDAGCombine({ISD::ZERO_EXTEND, 717 ISD::SIGN_EXTEND, 718 ISD::SIGN_EXTEND_INREG, 719 ISD::LOAD, 720 ISD::STORE, 721 ISD::VECTOR_SHUFFLE, 722 ISD::EXTRACT_VECTOR_ELT, 723 ISD::FP_ROUND, 724 ISD::STRICT_FP_ROUND, 725 ISD::FP_EXTEND, 726 ISD::SINT_TO_FP, 727 ISD::UINT_TO_FP, 728 ISD::STRICT_FP_EXTEND, 729 ISD::BSWAP, 730 ISD::SDIV, 731 ISD::UDIV, 732 ISD::SREM, 733 ISD::UREM, 734 ISD::INTRINSIC_VOID, 735 ISD::INTRINSIC_W_CHAIN}); 736 737 // Handle intrinsics. 738 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 739 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 740 741 // We want to use MVC in preference to even a single load/store pair. 742 MaxStoresPerMemcpy = Subtarget.hasVector() ? 2 : 0; 743 MaxStoresPerMemcpyOptSize = 0; 744 745 // The main memset sequence is a byte store followed by an MVC. 746 // Two STC or MV..I stores win over that, but the kind of fused stores 747 // generated by target-independent code don't when the byte value is 748 // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better 749 // than "STC;MVC". Handle the choice in target-specific code instead. 750 MaxStoresPerMemset = Subtarget.hasVector() ? 2 : 0; 751 MaxStoresPerMemsetOptSize = 0; 752 753 // Default to having -disable-strictnode-mutation on 754 IsStrictFPEnabled = true; 755 756 if (Subtarget.isTargetzOS()) { 757 struct RTLibCallMapping { 758 RTLIB::Libcall Code; 759 const char *Name; 760 }; 761 static RTLibCallMapping RTLibCallCommon[] = { 762 #define HANDLE_LIBCALL(code, name) {RTLIB::code, name}, 763 #include "ZOSLibcallNames.def" 764 }; 765 for (auto &E : RTLibCallCommon) 766 setLibcallName(E.Code, E.Name); 767 } 768 } 769 770 bool SystemZTargetLowering::useSoftFloat() const { 771 return Subtarget.hasSoftFloat(); 772 } 773 774 EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL, 775 LLVMContext &, EVT VT) const { 776 if (!VT.isVector()) 777 return MVT::i32; 778 return VT.changeVectorElementTypeToInteger(); 779 } 780 781 bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd( 782 const MachineFunction &MF, EVT VT) const { 783 VT = VT.getScalarType(); 784 785 if (!VT.isSimple()) 786 return false; 787 788 switch (VT.getSimpleVT().SimpleTy) { 789 case MVT::f32: 790 case MVT::f64: 791 return true; 792 case MVT::f128: 793 return Subtarget.hasVectorEnhancements1(); 794 default: 795 break; 796 } 797 798 return false; 799 } 800 801 // Return true if the constant can be generated with a vector instruction, 802 // such as VGM, VGMB or VREPI. 803 bool SystemZVectorConstantInfo::isVectorConstantLegal( 804 const SystemZSubtarget &Subtarget) { 805 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 806 if (!Subtarget.hasVector() || 807 (isFP128 && !Subtarget.hasVectorEnhancements1())) 808 return false; 809 810 // Try using VECTOR GENERATE BYTE MASK. This is the architecturally- 811 // preferred way of creating all-zero and all-one vectors so give it 812 // priority over other methods below. 813 unsigned Mask = 0; 814 unsigned I = 0; 815 for (; I < SystemZ::VectorBytes; ++I) { 816 uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue(); 817 if (Byte == 0xff) 818 Mask |= 1ULL << I; 819 else if (Byte != 0) 820 break; 821 } 822 if (I == SystemZ::VectorBytes) { 823 Opcode = SystemZISD::BYTE_MASK; 824 OpVals.push_back(Mask); 825 VecVT = MVT::getVectorVT(MVT::getIntegerVT(8), 16); 826 return true; 827 } 828 829 if (SplatBitSize > 64) 830 return false; 831 832 auto tryValue = [&](uint64_t Value) -> bool { 833 // Try VECTOR REPLICATE IMMEDIATE 834 int64_t SignedValue = SignExtend64(Value, SplatBitSize); 835 if (isInt<16>(SignedValue)) { 836 OpVals.push_back(((unsigned) SignedValue)); 837 Opcode = SystemZISD::REPLICATE; 838 VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize), 839 SystemZ::VectorBits / SplatBitSize); 840 return true; 841 } 842 // Try VECTOR GENERATE MASK 843 unsigned Start, End; 844 if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) { 845 // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0 846 // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for 847 // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1). 848 OpVals.push_back(Start - (64 - SplatBitSize)); 849 OpVals.push_back(End - (64 - SplatBitSize)); 850 Opcode = SystemZISD::ROTATE_MASK; 851 VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize), 852 SystemZ::VectorBits / SplatBitSize); 853 return true; 854 } 855 return false; 856 }; 857 858 // First try assuming that any undefined bits above the highest set bit 859 // and below the lowest set bit are 1s. This increases the likelihood of 860 // being able to use a sign-extended element value in VECTOR REPLICATE 861 // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK. 862 uint64_t SplatBitsZ = SplatBits.getZExtValue(); 863 uint64_t SplatUndefZ = SplatUndef.getZExtValue(); 864 unsigned LowerBits = llvm::countr_zero(SplatBitsZ); 865 unsigned UpperBits = llvm::countl_zero(SplatBitsZ); 866 uint64_t Lower = SplatUndefZ & maskTrailingOnes<uint64_t>(LowerBits); 867 uint64_t Upper = SplatUndefZ & maskLeadingOnes<uint64_t>(UpperBits); 868 if (tryValue(SplatBitsZ | Upper | Lower)) 869 return true; 870 871 // Now try assuming that any undefined bits between the first and 872 // last defined set bits are set. This increases the chances of 873 // using a non-wraparound mask. 874 uint64_t Middle = SplatUndefZ & ~Upper & ~Lower; 875 return tryValue(SplatBitsZ | Middle); 876 } 877 878 SystemZVectorConstantInfo::SystemZVectorConstantInfo(APInt IntImm) { 879 if (IntImm.isSingleWord()) { 880 IntBits = APInt(128, IntImm.getZExtValue()); 881 IntBits <<= (SystemZ::VectorBits - IntImm.getBitWidth()); 882 } else 883 IntBits = IntImm; 884 assert(IntBits.getBitWidth() == 128 && "Unsupported APInt."); 885 886 // Find the smallest splat. 887 SplatBits = IntImm; 888 unsigned Width = SplatBits.getBitWidth(); 889 while (Width > 8) { 890 unsigned HalfSize = Width / 2; 891 APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize); 892 APInt LowValue = SplatBits.trunc(HalfSize); 893 894 // If the two halves do not match, stop here. 895 if (HighValue != LowValue || 8 > HalfSize) 896 break; 897 898 SplatBits = HighValue; 899 Width = HalfSize; 900 } 901 SplatUndef = 0; 902 SplatBitSize = Width; 903 } 904 905 SystemZVectorConstantInfo::SystemZVectorConstantInfo(BuildVectorSDNode *BVN) { 906 assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR"); 907 bool HasAnyUndefs; 908 909 // Get IntBits by finding the 128 bit splat. 910 BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128, 911 true); 912 913 // Get SplatBits by finding the 8 bit or greater splat. 914 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8, 915 true); 916 } 917 918 bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, 919 bool ForCodeSize) const { 920 // We can load zero using LZ?R and negative zero using LZ?R;LC?BR. 921 if (Imm.isZero() || Imm.isNegZero()) 922 return true; 923 924 return SystemZVectorConstantInfo(Imm).isVectorConstantLegal(Subtarget); 925 } 926 927 /// Returns true if stack probing through inline assembly is requested. 928 bool SystemZTargetLowering::hasInlineStackProbe(const MachineFunction &MF) const { 929 // If the function specifically requests inline stack probes, emit them. 930 if (MF.getFunction().hasFnAttribute("probe-stack")) 931 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() == 932 "inline-asm"; 933 return false; 934 } 935 936 TargetLowering::AtomicExpansionKind 937 SystemZTargetLowering::shouldCastAtomicLoadInIR(LoadInst *LI) const { 938 return AtomicExpansionKind::None; 939 } 940 941 TargetLowering::AtomicExpansionKind 942 SystemZTargetLowering::shouldCastAtomicStoreInIR(StoreInst *SI) const { 943 return AtomicExpansionKind::None; 944 } 945 946 TargetLowering::AtomicExpansionKind 947 SystemZTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { 948 // Don't expand subword operations as they require special treatment. 949 if (RMW->getType()->isIntegerTy(8) || RMW->getType()->isIntegerTy(16)) 950 return AtomicExpansionKind::None; 951 952 // Don't expand if there is a target instruction available. 953 if (Subtarget.hasInterlockedAccess1() && 954 (RMW->getType()->isIntegerTy(32) || RMW->getType()->isIntegerTy(64)) && 955 (RMW->getOperation() == AtomicRMWInst::BinOp::Add || 956 RMW->getOperation() == AtomicRMWInst::BinOp::Sub || 957 RMW->getOperation() == AtomicRMWInst::BinOp::And || 958 RMW->getOperation() == AtomicRMWInst::BinOp::Or || 959 RMW->getOperation() == AtomicRMWInst::BinOp::Xor)) 960 return AtomicExpansionKind::None; 961 962 return AtomicExpansionKind::CmpXChg; 963 } 964 965 bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 966 // We can use CGFI or CLGFI. 967 return isInt<32>(Imm) || isUInt<32>(Imm); 968 } 969 970 bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const { 971 // We can use ALGFI or SLGFI. 972 return isUInt<32>(Imm) || isUInt<32>(-Imm); 973 } 974 975 bool SystemZTargetLowering::allowsMisalignedMemoryAccesses( 976 EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const { 977 // Unaligned accesses should never be slower than the expanded version. 978 // We check specifically for aligned accesses in the few cases where 979 // they are required. 980 if (Fast) 981 *Fast = 1; 982 return true; 983 } 984 985 // Information about the addressing mode for a memory access. 986 struct AddressingMode { 987 // True if a long displacement is supported. 988 bool LongDisplacement; 989 990 // True if use of index register is supported. 991 bool IndexReg; 992 993 AddressingMode(bool LongDispl, bool IdxReg) : 994 LongDisplacement(LongDispl), IndexReg(IdxReg) {} 995 }; 996 997 // Return the desired addressing mode for a Load which has only one use (in 998 // the same block) which is a Store. 999 static AddressingMode getLoadStoreAddrMode(bool HasVector, 1000 Type *Ty) { 1001 // With vector support a Load->Store combination may be combined to either 1002 // an MVC or vector operations and it seems to work best to allow the 1003 // vector addressing mode. 1004 if (HasVector) 1005 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/); 1006 1007 // Otherwise only the MVC case is special. 1008 bool MVC = Ty->isIntegerTy(8); 1009 return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/); 1010 } 1011 1012 // Return the addressing mode which seems most desirable given an LLVM 1013 // Instruction pointer. 1014 static AddressingMode 1015 supportedAddressingMode(Instruction *I, bool HasVector) { 1016 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 1017 switch (II->getIntrinsicID()) { 1018 default: break; 1019 case Intrinsic::memset: 1020 case Intrinsic::memmove: 1021 case Intrinsic::memcpy: 1022 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/); 1023 } 1024 } 1025 1026 if (isa<LoadInst>(I) && I->hasOneUse()) { 1027 auto *SingleUser = cast<Instruction>(*I->user_begin()); 1028 if (SingleUser->getParent() == I->getParent()) { 1029 if (isa<ICmpInst>(SingleUser)) { 1030 if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1))) 1031 if (C->getBitWidth() <= 64 && 1032 (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue()))) 1033 // Comparison of memory with 16 bit signed / unsigned immediate 1034 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/); 1035 } else if (isa<StoreInst>(SingleUser)) 1036 // Load->Store 1037 return getLoadStoreAddrMode(HasVector, I->getType()); 1038 } 1039 } else if (auto *StoreI = dyn_cast<StoreInst>(I)) { 1040 if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand())) 1041 if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent()) 1042 // Load->Store 1043 return getLoadStoreAddrMode(HasVector, LoadI->getType()); 1044 } 1045 1046 if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) { 1047 1048 // * Use LDE instead of LE/LEY for z13 to avoid partial register 1049 // dependencies (LDE only supports small offsets). 1050 // * Utilize the vector registers to hold floating point 1051 // values (vector load / store instructions only support small 1052 // offsets). 1053 1054 Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() : 1055 I->getOperand(0)->getType()); 1056 bool IsFPAccess = MemAccessTy->isFloatingPointTy(); 1057 bool IsVectorAccess = MemAccessTy->isVectorTy(); 1058 1059 // A store of an extracted vector element will be combined into a VSTE type 1060 // instruction. 1061 if (!IsVectorAccess && isa<StoreInst>(I)) { 1062 Value *DataOp = I->getOperand(0); 1063 if (isa<ExtractElementInst>(DataOp)) 1064 IsVectorAccess = true; 1065 } 1066 1067 // A load which gets inserted into a vector element will be combined into a 1068 // VLE type instruction. 1069 if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) { 1070 User *LoadUser = *I->user_begin(); 1071 if (isa<InsertElementInst>(LoadUser)) 1072 IsVectorAccess = true; 1073 } 1074 1075 if (IsFPAccess || IsVectorAccess) 1076 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/); 1077 } 1078 1079 return AddressingMode(true/*LongDispl*/, true/*IdxReg*/); 1080 } 1081 1082 bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL, 1083 const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const { 1084 // Punt on globals for now, although they can be used in limited 1085 // RELATIVE LONG cases. 1086 if (AM.BaseGV) 1087 return false; 1088 1089 // Require a 20-bit signed offset. 1090 if (!isInt<20>(AM.BaseOffs)) 1091 return false; 1092 1093 bool RequireD12 = 1094 Subtarget.hasVector() && (Ty->isVectorTy() || Ty->isIntegerTy(128)); 1095 AddressingMode SupportedAM(!RequireD12, true); 1096 if (I != nullptr) 1097 SupportedAM = supportedAddressingMode(I, Subtarget.hasVector()); 1098 1099 if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs)) 1100 return false; 1101 1102 if (!SupportedAM.IndexReg) 1103 // No indexing allowed. 1104 return AM.Scale == 0; 1105 else 1106 // Indexing is OK but no scale factor can be applied. 1107 return AM.Scale == 0 || AM.Scale == 1; 1108 } 1109 1110 bool SystemZTargetLowering::findOptimalMemOpLowering( 1111 std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, 1112 unsigned SrcAS, const AttributeList &FuncAttributes) const { 1113 const int MVCFastLen = 16; 1114 1115 if (Limit != ~unsigned(0)) { 1116 // Don't expand Op into scalar loads/stores in these cases: 1117 if (Op.isMemcpy() && Op.allowOverlap() && Op.size() <= MVCFastLen) 1118 return false; // Small memcpy: Use MVC 1119 if (Op.isMemset() && Op.size() - 1 <= MVCFastLen) 1120 return false; // Small memset (first byte with STC/MVI): Use MVC 1121 if (Op.isZeroMemset()) 1122 return false; // Memset zero: Use XC 1123 } 1124 1125 return TargetLowering::findOptimalMemOpLowering(MemOps, Limit, Op, DstAS, 1126 SrcAS, FuncAttributes); 1127 } 1128 1129 EVT SystemZTargetLowering::getOptimalMemOpType(const MemOp &Op, 1130 const AttributeList &FuncAttributes) const { 1131 return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other; 1132 } 1133 1134 bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const { 1135 if (!FromType->isIntegerTy() || !ToType->isIntegerTy()) 1136 return false; 1137 unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedValue(); 1138 unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedValue(); 1139 return FromBits > ToBits; 1140 } 1141 1142 bool SystemZTargetLowering::isTruncateFree(EVT FromVT, EVT ToVT) const { 1143 if (!FromVT.isInteger() || !ToVT.isInteger()) 1144 return false; 1145 unsigned FromBits = FromVT.getFixedSizeInBits(); 1146 unsigned ToBits = ToVT.getFixedSizeInBits(); 1147 return FromBits > ToBits; 1148 } 1149 1150 //===----------------------------------------------------------------------===// 1151 // Inline asm support 1152 //===----------------------------------------------------------------------===// 1153 1154 TargetLowering::ConstraintType 1155 SystemZTargetLowering::getConstraintType(StringRef Constraint) const { 1156 if (Constraint.size() == 1) { 1157 switch (Constraint[0]) { 1158 case 'a': // Address register 1159 case 'd': // Data register (equivalent to 'r') 1160 case 'f': // Floating-point register 1161 case 'h': // High-part register 1162 case 'r': // General-purpose register 1163 case 'v': // Vector register 1164 return C_RegisterClass; 1165 1166 case 'Q': // Memory with base and unsigned 12-bit displacement 1167 case 'R': // Likewise, plus an index 1168 case 'S': // Memory with base and signed 20-bit displacement 1169 case 'T': // Likewise, plus an index 1170 case 'm': // Equivalent to 'T'. 1171 return C_Memory; 1172 1173 case 'I': // Unsigned 8-bit constant 1174 case 'J': // Unsigned 12-bit constant 1175 case 'K': // Signed 16-bit constant 1176 case 'L': // Signed 20-bit displacement (on all targets we support) 1177 case 'M': // 0x7fffffff 1178 return C_Immediate; 1179 1180 default: 1181 break; 1182 } 1183 } else if (Constraint.size() == 2 && Constraint[0] == 'Z') { 1184 switch (Constraint[1]) { 1185 case 'Q': // Address with base and unsigned 12-bit displacement 1186 case 'R': // Likewise, plus an index 1187 case 'S': // Address with base and signed 20-bit displacement 1188 case 'T': // Likewise, plus an index 1189 return C_Address; 1190 1191 default: 1192 break; 1193 } 1194 } 1195 return TargetLowering::getConstraintType(Constraint); 1196 } 1197 1198 TargetLowering::ConstraintWeight SystemZTargetLowering:: 1199 getSingleConstraintMatchWeight(AsmOperandInfo &info, 1200 const char *constraint) const { 1201 ConstraintWeight weight = CW_Invalid; 1202 Value *CallOperandVal = info.CallOperandVal; 1203 // If we don't have a value, we can't do a match, 1204 // but allow it at the lowest weight. 1205 if (!CallOperandVal) 1206 return CW_Default; 1207 Type *type = CallOperandVal->getType(); 1208 // Look at the constraint type. 1209 switch (*constraint) { 1210 default: 1211 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); 1212 break; 1213 1214 case 'a': // Address register 1215 case 'd': // Data register (equivalent to 'r') 1216 case 'h': // High-part register 1217 case 'r': // General-purpose register 1218 weight = CallOperandVal->getType()->isIntegerTy() ? CW_Register : CW_Default; 1219 break; 1220 1221 case 'f': // Floating-point register 1222 if (!useSoftFloat()) 1223 weight = type->isFloatingPointTy() ? CW_Register : CW_Default; 1224 break; 1225 1226 case 'v': // Vector register 1227 if (Subtarget.hasVector()) 1228 weight = (type->isVectorTy() || type->isFloatingPointTy()) ? CW_Register 1229 : CW_Default; 1230 break; 1231 1232 case 'I': // Unsigned 8-bit constant 1233 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) 1234 if (isUInt<8>(C->getZExtValue())) 1235 weight = CW_Constant; 1236 break; 1237 1238 case 'J': // Unsigned 12-bit constant 1239 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) 1240 if (isUInt<12>(C->getZExtValue())) 1241 weight = CW_Constant; 1242 break; 1243 1244 case 'K': // Signed 16-bit constant 1245 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) 1246 if (isInt<16>(C->getSExtValue())) 1247 weight = CW_Constant; 1248 break; 1249 1250 case 'L': // Signed 20-bit displacement (on all targets we support) 1251 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) 1252 if (isInt<20>(C->getSExtValue())) 1253 weight = CW_Constant; 1254 break; 1255 1256 case 'M': // 0x7fffffff 1257 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) 1258 if (C->getZExtValue() == 0x7fffffff) 1259 weight = CW_Constant; 1260 break; 1261 } 1262 return weight; 1263 } 1264 1265 // Parse a "{tNNN}" register constraint for which the register type "t" 1266 // has already been verified. MC is the class associated with "t" and 1267 // Map maps 0-based register numbers to LLVM register numbers. 1268 static std::pair<unsigned, const TargetRegisterClass *> 1269 parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC, 1270 const unsigned *Map, unsigned Size) { 1271 assert(*(Constraint.end()-1) == '}' && "Missing '}'"); 1272 if (isdigit(Constraint[2])) { 1273 unsigned Index; 1274 bool Failed = 1275 Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index); 1276 if (!Failed && Index < Size && Map[Index]) 1277 return std::make_pair(Map[Index], RC); 1278 } 1279 return std::make_pair(0U, nullptr); 1280 } 1281 1282 std::pair<unsigned, const TargetRegisterClass *> 1283 SystemZTargetLowering::getRegForInlineAsmConstraint( 1284 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { 1285 if (Constraint.size() == 1) { 1286 // GCC Constraint Letters 1287 switch (Constraint[0]) { 1288 default: break; 1289 case 'd': // Data register (equivalent to 'r') 1290 case 'r': // General-purpose register 1291 if (VT.getSizeInBits() == 64) 1292 return std::make_pair(0U, &SystemZ::GR64BitRegClass); 1293 else if (VT.getSizeInBits() == 128) 1294 return std::make_pair(0U, &SystemZ::GR128BitRegClass); 1295 return std::make_pair(0U, &SystemZ::GR32BitRegClass); 1296 1297 case 'a': // Address register 1298 if (VT == MVT::i64) 1299 return std::make_pair(0U, &SystemZ::ADDR64BitRegClass); 1300 else if (VT == MVT::i128) 1301 return std::make_pair(0U, &SystemZ::ADDR128BitRegClass); 1302 return std::make_pair(0U, &SystemZ::ADDR32BitRegClass); 1303 1304 case 'h': // High-part register (an LLVM extension) 1305 return std::make_pair(0U, &SystemZ::GRH32BitRegClass); 1306 1307 case 'f': // Floating-point register 1308 if (!useSoftFloat()) { 1309 if (VT.getSizeInBits() == 64) 1310 return std::make_pair(0U, &SystemZ::FP64BitRegClass); 1311 else if (VT.getSizeInBits() == 128) 1312 return std::make_pair(0U, &SystemZ::FP128BitRegClass); 1313 return std::make_pair(0U, &SystemZ::FP32BitRegClass); 1314 } 1315 break; 1316 1317 case 'v': // Vector register 1318 if (Subtarget.hasVector()) { 1319 if (VT.getSizeInBits() == 32) 1320 return std::make_pair(0U, &SystemZ::VR32BitRegClass); 1321 if (VT.getSizeInBits() == 64) 1322 return std::make_pair(0U, &SystemZ::VR64BitRegClass); 1323 return std::make_pair(0U, &SystemZ::VR128BitRegClass); 1324 } 1325 break; 1326 } 1327 } 1328 if (Constraint.starts_with("{")) { 1329 1330 // A clobber constraint (e.g. ~{f0}) will have MVT::Other which is illegal 1331 // to check the size on. 1332 auto getVTSizeInBits = [&VT]() { 1333 return VT == MVT::Other ? 0 : VT.getSizeInBits(); 1334 }; 1335 1336 // We need to override the default register parsing for GPRs and FPRs 1337 // because the interpretation depends on VT. The internal names of 1338 // the registers are also different from the external names 1339 // (F0D and F0S instead of F0, etc.). 1340 if (Constraint[1] == 'r') { 1341 if (getVTSizeInBits() == 32) 1342 return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass, 1343 SystemZMC::GR32Regs, 16); 1344 if (getVTSizeInBits() == 128) 1345 return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass, 1346 SystemZMC::GR128Regs, 16); 1347 return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass, 1348 SystemZMC::GR64Regs, 16); 1349 } 1350 if (Constraint[1] == 'f') { 1351 if (useSoftFloat()) 1352 return std::make_pair( 1353 0u, static_cast<const TargetRegisterClass *>(nullptr)); 1354 if (getVTSizeInBits() == 32) 1355 return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass, 1356 SystemZMC::FP32Regs, 16); 1357 if (getVTSizeInBits() == 128) 1358 return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass, 1359 SystemZMC::FP128Regs, 16); 1360 return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass, 1361 SystemZMC::FP64Regs, 16); 1362 } 1363 if (Constraint[1] == 'v') { 1364 if (!Subtarget.hasVector()) 1365 return std::make_pair( 1366 0u, static_cast<const TargetRegisterClass *>(nullptr)); 1367 if (getVTSizeInBits() == 32) 1368 return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass, 1369 SystemZMC::VR32Regs, 32); 1370 if (getVTSizeInBits() == 64) 1371 return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass, 1372 SystemZMC::VR64Regs, 32); 1373 return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass, 1374 SystemZMC::VR128Regs, 32); 1375 } 1376 } 1377 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 1378 } 1379 1380 // FIXME? Maybe this could be a TableGen attribute on some registers and 1381 // this table could be generated automatically from RegInfo. 1382 Register 1383 SystemZTargetLowering::getRegisterByName(const char *RegName, LLT VT, 1384 const MachineFunction &MF) const { 1385 Register Reg = 1386 StringSwitch<Register>(RegName) 1387 .Case("r4", Subtarget.isTargetXPLINK64() ? SystemZ::R4D : 0) 1388 .Case("r15", Subtarget.isTargetELF() ? SystemZ::R15D : 0) 1389 .Default(0); 1390 1391 if (Reg) 1392 return Reg; 1393 report_fatal_error("Invalid register name global variable"); 1394 } 1395 1396 Register SystemZTargetLowering::getExceptionPointerRegister( 1397 const Constant *PersonalityFn) const { 1398 return Subtarget.isTargetXPLINK64() ? SystemZ::R1D : SystemZ::R6D; 1399 } 1400 1401 Register SystemZTargetLowering::getExceptionSelectorRegister( 1402 const Constant *PersonalityFn) const { 1403 return Subtarget.isTargetXPLINK64() ? SystemZ::R2D : SystemZ::R7D; 1404 } 1405 1406 void SystemZTargetLowering::LowerAsmOperandForConstraint( 1407 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops, 1408 SelectionDAG &DAG) const { 1409 // Only support length 1 constraints for now. 1410 if (Constraint.size() == 1) { 1411 switch (Constraint[0]) { 1412 case 'I': // Unsigned 8-bit constant 1413 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 1414 if (isUInt<8>(C->getZExtValue())) 1415 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), 1416 Op.getValueType())); 1417 return; 1418 1419 case 'J': // Unsigned 12-bit constant 1420 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 1421 if (isUInt<12>(C->getZExtValue())) 1422 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), 1423 Op.getValueType())); 1424 return; 1425 1426 case 'K': // Signed 16-bit constant 1427 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 1428 if (isInt<16>(C->getSExtValue())) 1429 Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), 1430 Op.getValueType())); 1431 return; 1432 1433 case 'L': // Signed 20-bit displacement (on all targets we support) 1434 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 1435 if (isInt<20>(C->getSExtValue())) 1436 Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), 1437 Op.getValueType())); 1438 return; 1439 1440 case 'M': // 0x7fffffff 1441 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 1442 if (C->getZExtValue() == 0x7fffffff) 1443 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), 1444 Op.getValueType())); 1445 return; 1446 } 1447 } 1448 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 1449 } 1450 1451 //===----------------------------------------------------------------------===// 1452 // Calling conventions 1453 //===----------------------------------------------------------------------===// 1454 1455 #include "SystemZGenCallingConv.inc" 1456 1457 const MCPhysReg *SystemZTargetLowering::getScratchRegisters( 1458 CallingConv::ID) const { 1459 static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D, 1460 SystemZ::R14D, 0 }; 1461 return ScratchRegs; 1462 } 1463 1464 bool SystemZTargetLowering::allowTruncateForTailCall(Type *FromType, 1465 Type *ToType) const { 1466 return isTruncateFree(FromType, ToType); 1467 } 1468 1469 bool SystemZTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { 1470 return CI->isTailCall(); 1471 } 1472 1473 // Value is a value that has been passed to us in the location described by VA 1474 // (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining 1475 // any loads onto Chain. 1476 static SDValue convertLocVTToValVT(SelectionDAG &DAG, const SDLoc &DL, 1477 CCValAssign &VA, SDValue Chain, 1478 SDValue Value) { 1479 // If the argument has been promoted from a smaller type, insert an 1480 // assertion to capture this. 1481 if (VA.getLocInfo() == CCValAssign::SExt) 1482 Value = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Value, 1483 DAG.getValueType(VA.getValVT())); 1484 else if (VA.getLocInfo() == CCValAssign::ZExt) 1485 Value = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Value, 1486 DAG.getValueType(VA.getValVT())); 1487 1488 if (VA.isExtInLoc()) 1489 Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value); 1490 else if (VA.getLocInfo() == CCValAssign::BCvt) { 1491 // If this is a short vector argument loaded from the stack, 1492 // extend from i64 to full vector size and then bitcast. 1493 assert(VA.getLocVT() == MVT::i64); 1494 assert(VA.getValVT().isVector()); 1495 Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)}); 1496 Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value); 1497 } else 1498 assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo"); 1499 return Value; 1500 } 1501 1502 // Value is a value of type VA.getValVT() that we need to copy into 1503 // the location described by VA. Return a copy of Value converted to 1504 // VA.getValVT(). The caller is responsible for handling indirect values. 1505 static SDValue convertValVTToLocVT(SelectionDAG &DAG, const SDLoc &DL, 1506 CCValAssign &VA, SDValue Value) { 1507 switch (VA.getLocInfo()) { 1508 case CCValAssign::SExt: 1509 return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value); 1510 case CCValAssign::ZExt: 1511 return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value); 1512 case CCValAssign::AExt: 1513 return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value); 1514 case CCValAssign::BCvt: { 1515 assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128); 1516 assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f32 || 1517 VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::f128); 1518 // For an f32 vararg we need to first promote it to an f64 and then 1519 // bitcast it to an i64. 1520 if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i64) 1521 Value = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, Value); 1522 MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64 1523 ? MVT::v2i64 1524 : VA.getLocVT(); 1525 Value = DAG.getNode(ISD::BITCAST, DL, BitCastToType, Value); 1526 // For ELF, this is a short vector argument to be stored to the stack, 1527 // bitcast to v2i64 and then extract first element. 1528 if (BitCastToType == MVT::v2i64) 1529 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value, 1530 DAG.getConstant(0, DL, MVT::i32)); 1531 return Value; 1532 } 1533 case CCValAssign::Full: 1534 return Value; 1535 default: 1536 llvm_unreachable("Unhandled getLocInfo()"); 1537 } 1538 } 1539 1540 static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In) { 1541 SDLoc DL(In); 1542 SDValue Lo, Hi; 1543 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) { 1544 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, In); 1545 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, 1546 DAG.getNode(ISD::SRL, DL, MVT::i128, In, 1547 DAG.getConstant(64, DL, MVT::i32))); 1548 } else { 1549 std::tie(Lo, Hi) = DAG.SplitScalar(In, DL, MVT::i64, MVT::i64); 1550 } 1551 1552 // FIXME: If v2i64 were a legal type, we could use it instead of 1553 // Untyped here. This might enable improved folding. 1554 SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL, 1555 MVT::Untyped, Hi, Lo); 1556 return SDValue(Pair, 0); 1557 } 1558 1559 static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In) { 1560 SDLoc DL(In); 1561 SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64, 1562 DL, MVT::i64, In); 1563 SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64, 1564 DL, MVT::i64, In); 1565 1566 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) { 1567 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Lo); 1568 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Hi); 1569 Hi = DAG.getNode(ISD::SHL, DL, MVT::i128, Hi, 1570 DAG.getConstant(64, DL, MVT::i32)); 1571 return DAG.getNode(ISD::OR, DL, MVT::i128, Lo, Hi); 1572 } else { 1573 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi); 1574 } 1575 } 1576 1577 bool SystemZTargetLowering::splitValueIntoRegisterParts( 1578 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, 1579 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const { 1580 EVT ValueVT = Val.getValueType(); 1581 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) { 1582 // Inline assembly operand. 1583 Parts[0] = lowerI128ToGR128(DAG, DAG.getBitcast(MVT::i128, Val)); 1584 return true; 1585 } 1586 1587 return false; 1588 } 1589 1590 SDValue SystemZTargetLowering::joinRegisterPartsIntoValue( 1591 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, 1592 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const { 1593 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) { 1594 // Inline assembly operand. 1595 SDValue Res = lowerGR128ToI128(DAG, Parts[0]); 1596 return DAG.getBitcast(ValueVT, Res); 1597 } 1598 1599 return SDValue(); 1600 } 1601 1602 SDValue SystemZTargetLowering::LowerFormalArguments( 1603 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 1604 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 1605 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 1606 MachineFunction &MF = DAG.getMachineFunction(); 1607 MachineFrameInfo &MFI = MF.getFrameInfo(); 1608 MachineRegisterInfo &MRI = MF.getRegInfo(); 1609 SystemZMachineFunctionInfo *FuncInfo = 1610 MF.getInfo<SystemZMachineFunctionInfo>(); 1611 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>(); 1612 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 1613 1614 // Assign locations to all of the incoming arguments. 1615 SmallVector<CCValAssign, 16> ArgLocs; 1616 SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 1617 CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ); 1618 FuncInfo->setSizeOfFnParams(CCInfo.getStackSize()); 1619 1620 unsigned NumFixedGPRs = 0; 1621 unsigned NumFixedFPRs = 0; 1622 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { 1623 SDValue ArgValue; 1624 CCValAssign &VA = ArgLocs[I]; 1625 EVT LocVT = VA.getLocVT(); 1626 if (VA.isRegLoc()) { 1627 // Arguments passed in registers 1628 const TargetRegisterClass *RC; 1629 switch (LocVT.getSimpleVT().SimpleTy) { 1630 default: 1631 // Integers smaller than i64 should be promoted to i64. 1632 llvm_unreachable("Unexpected argument type"); 1633 case MVT::i32: 1634 NumFixedGPRs += 1; 1635 RC = &SystemZ::GR32BitRegClass; 1636 break; 1637 case MVT::i64: 1638 NumFixedGPRs += 1; 1639 RC = &SystemZ::GR64BitRegClass; 1640 break; 1641 case MVT::f32: 1642 NumFixedFPRs += 1; 1643 RC = &SystemZ::FP32BitRegClass; 1644 break; 1645 case MVT::f64: 1646 NumFixedFPRs += 1; 1647 RC = &SystemZ::FP64BitRegClass; 1648 break; 1649 case MVT::f128: 1650 NumFixedFPRs += 2; 1651 RC = &SystemZ::FP128BitRegClass; 1652 break; 1653 case MVT::v16i8: 1654 case MVT::v8i16: 1655 case MVT::v4i32: 1656 case MVT::v2i64: 1657 case MVT::v4f32: 1658 case MVT::v2f64: 1659 RC = &SystemZ::VR128BitRegClass; 1660 break; 1661 } 1662 1663 Register VReg = MRI.createVirtualRegister(RC); 1664 MRI.addLiveIn(VA.getLocReg(), VReg); 1665 ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); 1666 } else { 1667 assert(VA.isMemLoc() && "Argument not register or memory"); 1668 1669 // Create the frame index object for this incoming parameter. 1670 // FIXME: Pre-include call frame size in the offset, should not 1671 // need to manually add it here. 1672 int64_t ArgSPOffset = VA.getLocMemOffset(); 1673 if (Subtarget.isTargetXPLINK64()) { 1674 auto &XPRegs = 1675 Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>(); 1676 ArgSPOffset += XPRegs.getCallFrameSize(); 1677 } 1678 int FI = 1679 MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, ArgSPOffset, true); 1680 1681 // Create the SelectionDAG nodes corresponding to a load 1682 // from this parameter. Unpromoted ints and floats are 1683 // passed as right-justified 8-byte values. 1684 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 1685 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32) 1686 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, 1687 DAG.getIntPtrConstant(4, DL)); 1688 ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN, 1689 MachinePointerInfo::getFixedStack(MF, FI)); 1690 } 1691 1692 // Convert the value of the argument register into the value that's 1693 // being passed. 1694 if (VA.getLocInfo() == CCValAssign::Indirect) { 1695 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, 1696 MachinePointerInfo())); 1697 // If the original argument was split (e.g. i128), we need 1698 // to load all parts of it here (using the same address). 1699 unsigned ArgIndex = Ins[I].OrigArgIndex; 1700 assert (Ins[I].PartOffset == 0); 1701 while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) { 1702 CCValAssign &PartVA = ArgLocs[I + 1]; 1703 unsigned PartOffset = Ins[I + 1].PartOffset; 1704 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, 1705 DAG.getIntPtrConstant(PartOffset, DL)); 1706 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, 1707 MachinePointerInfo())); 1708 ++I; 1709 } 1710 } else 1711 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue)); 1712 } 1713 1714 if (IsVarArg && Subtarget.isTargetXPLINK64()) { 1715 // Save the number of non-varargs registers for later use by va_start, etc. 1716 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs); 1717 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs); 1718 1719 auto *Regs = static_cast<SystemZXPLINK64Registers *>( 1720 Subtarget.getSpecialRegisters()); 1721 1722 // Likewise the address (in the form of a frame index) of where the 1723 // first stack vararg would be. The 1-byte size here is arbitrary. 1724 // FIXME: Pre-include call frame size in the offset, should not 1725 // need to manually add it here. 1726 int64_t VarArgOffset = CCInfo.getStackSize() + Regs->getCallFrameSize(); 1727 int FI = MFI.CreateFixedObject(1, VarArgOffset, true); 1728 FuncInfo->setVarArgsFrameIndex(FI); 1729 } 1730 1731 if (IsVarArg && Subtarget.isTargetELF()) { 1732 // Save the number of non-varargs registers for later use by va_start, etc. 1733 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs); 1734 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs); 1735 1736 // Likewise the address (in the form of a frame index) of where the 1737 // first stack vararg would be. The 1-byte size here is arbitrary. 1738 int64_t VarArgsOffset = CCInfo.getStackSize(); 1739 FuncInfo->setVarArgsFrameIndex( 1740 MFI.CreateFixedObject(1, VarArgsOffset, true)); 1741 1742 // ...and a similar frame index for the caller-allocated save area 1743 // that will be used to store the incoming registers. 1744 int64_t RegSaveOffset = 1745 -SystemZMC::ELFCallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16; 1746 unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true); 1747 FuncInfo->setRegSaveFrameIndex(RegSaveIndex); 1748 1749 // Store the FPR varargs in the reserved frame slots. (We store the 1750 // GPRs as part of the prologue.) 1751 if (NumFixedFPRs < SystemZ::ELFNumArgFPRs && !useSoftFloat()) { 1752 SDValue MemOps[SystemZ::ELFNumArgFPRs]; 1753 for (unsigned I = NumFixedFPRs; I < SystemZ::ELFNumArgFPRs; ++I) { 1754 unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ELFArgFPRs[I]); 1755 int FI = 1756 MFI.CreateFixedObject(8, -SystemZMC::ELFCallFrameSize + Offset, true); 1757 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 1758 Register VReg = MF.addLiveIn(SystemZ::ELFArgFPRs[I], 1759 &SystemZ::FP64BitRegClass); 1760 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64); 1761 MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN, 1762 MachinePointerInfo::getFixedStack(MF, FI)); 1763 } 1764 // Join the stores, which are independent of one another. 1765 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, 1766 ArrayRef(&MemOps[NumFixedFPRs], 1767 SystemZ::ELFNumArgFPRs - NumFixedFPRs)); 1768 } 1769 } 1770 1771 if (Subtarget.isTargetXPLINK64()) { 1772 // Create virual register for handling incoming "ADA" special register (R5) 1773 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass; 1774 Register ADAvReg = MRI.createVirtualRegister(RC); 1775 auto *Regs = static_cast<SystemZXPLINK64Registers *>( 1776 Subtarget.getSpecialRegisters()); 1777 MRI.addLiveIn(Regs->getADARegister(), ADAvReg); 1778 FuncInfo->setADAVirtualRegister(ADAvReg); 1779 } 1780 return Chain; 1781 } 1782 1783 static bool canUseSiblingCall(const CCState &ArgCCInfo, 1784 SmallVectorImpl<CCValAssign> &ArgLocs, 1785 SmallVectorImpl<ISD::OutputArg> &Outs) { 1786 // Punt if there are any indirect or stack arguments, or if the call 1787 // needs the callee-saved argument register R6, or if the call uses 1788 // the callee-saved register arguments SwiftSelf and SwiftError. 1789 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { 1790 CCValAssign &VA = ArgLocs[I]; 1791 if (VA.getLocInfo() == CCValAssign::Indirect) 1792 return false; 1793 if (!VA.isRegLoc()) 1794 return false; 1795 Register Reg = VA.getLocReg(); 1796 if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D) 1797 return false; 1798 if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError()) 1799 return false; 1800 } 1801 return true; 1802 } 1803 1804 static SDValue getADAEntry(SelectionDAG &DAG, SDValue Val, SDLoc DL, 1805 unsigned Offset, bool LoadAdr = false) { 1806 MachineFunction &MF = DAG.getMachineFunction(); 1807 SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>(); 1808 unsigned ADAvReg = MFI->getADAVirtualRegister(); 1809 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); 1810 1811 SDValue Reg = DAG.getRegister(ADAvReg, PtrVT); 1812 SDValue Ofs = DAG.getTargetConstant(Offset, DL, PtrVT); 1813 1814 SDValue Result = DAG.getNode(SystemZISD::ADA_ENTRY, DL, PtrVT, Val, Reg, Ofs); 1815 if (!LoadAdr) 1816 Result = DAG.getLoad( 1817 PtrVT, DL, DAG.getEntryNode(), Result, MachinePointerInfo(), Align(8), 1818 MachineMemOperand::MODereferenceable | MachineMemOperand::MOInvariant); 1819 1820 return Result; 1821 } 1822 1823 // ADA access using Global value 1824 // Note: for functions, address of descriptor is returned 1825 static SDValue getADAEntry(SelectionDAG &DAG, const GlobalValue *GV, SDLoc DL, 1826 EVT PtrVT) { 1827 unsigned ADAtype; 1828 bool LoadAddr = false; 1829 const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV); 1830 bool IsFunction = 1831 (isa<Function>(GV)) || (GA && isa<Function>(GA->getAliaseeObject())); 1832 bool IsInternal = (GV->hasInternalLinkage() || GV->hasPrivateLinkage()); 1833 1834 if (IsFunction) { 1835 if (IsInternal) { 1836 ADAtype = SystemZII::MO_ADA_DIRECT_FUNC_DESC; 1837 LoadAddr = true; 1838 } else 1839 ADAtype = SystemZII::MO_ADA_INDIRECT_FUNC_DESC; 1840 } else { 1841 ADAtype = SystemZII::MO_ADA_DATA_SYMBOL_ADDR; 1842 } 1843 SDValue Val = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ADAtype); 1844 1845 return getADAEntry(DAG, Val, DL, 0, LoadAddr); 1846 } 1847 1848 static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA, 1849 SDLoc &DL, SDValue &Chain) { 1850 unsigned ADADelta = 0; // ADA offset in desc. 1851 unsigned EPADelta = 8; // EPA offset in desc. 1852 MachineFunction &MF = DAG.getMachineFunction(); 1853 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); 1854 1855 // XPLink calling convention. 1856 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1857 bool IsInternal = (G->getGlobal()->hasInternalLinkage() || 1858 G->getGlobal()->hasPrivateLinkage()); 1859 if (IsInternal) { 1860 SystemZMachineFunctionInfo *MFI = 1861 MF.getInfo<SystemZMachineFunctionInfo>(); 1862 unsigned ADAvReg = MFI->getADAVirtualRegister(); 1863 ADA = DAG.getCopyFromReg(Chain, DL, ADAvReg, PtrVT); 1864 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT); 1865 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee); 1866 return true; 1867 } else { 1868 SDValue GA = DAG.getTargetGlobalAddress( 1869 G->getGlobal(), DL, PtrVT, 0, SystemZII::MO_ADA_DIRECT_FUNC_DESC); 1870 ADA = getADAEntry(DAG, GA, DL, ADADelta); 1871 Callee = getADAEntry(DAG, GA, DL, EPADelta); 1872 } 1873 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) { 1874 SDValue ES = DAG.getTargetExternalSymbol( 1875 E->getSymbol(), PtrVT, SystemZII::MO_ADA_DIRECT_FUNC_DESC); 1876 ADA = getADAEntry(DAG, ES, DL, ADADelta); 1877 Callee = getADAEntry(DAG, ES, DL, EPADelta); 1878 } else { 1879 // Function pointer case 1880 ADA = DAG.getNode(ISD::ADD, DL, PtrVT, Callee, 1881 DAG.getConstant(ADADelta, DL, PtrVT)); 1882 ADA = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), ADA, 1883 MachinePointerInfo::getGOT(DAG.getMachineFunction())); 1884 Callee = DAG.getNode(ISD::ADD, DL, PtrVT, Callee, 1885 DAG.getConstant(EPADelta, DL, PtrVT)); 1886 Callee = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Callee, 1887 MachinePointerInfo::getGOT(DAG.getMachineFunction())); 1888 } 1889 return false; 1890 } 1891 1892 SDValue 1893 SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, 1894 SmallVectorImpl<SDValue> &InVals) const { 1895 SelectionDAG &DAG = CLI.DAG; 1896 SDLoc &DL = CLI.DL; 1897 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 1898 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 1899 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 1900 SDValue Chain = CLI.Chain; 1901 SDValue Callee = CLI.Callee; 1902 bool &IsTailCall = CLI.IsTailCall; 1903 CallingConv::ID CallConv = CLI.CallConv; 1904 bool IsVarArg = CLI.IsVarArg; 1905 MachineFunction &MF = DAG.getMachineFunction(); 1906 EVT PtrVT = getPointerTy(MF.getDataLayout()); 1907 LLVMContext &Ctx = *DAG.getContext(); 1908 SystemZCallingConventionRegisters *Regs = Subtarget.getSpecialRegisters(); 1909 1910 // FIXME: z/OS support to be added in later. 1911 if (Subtarget.isTargetXPLINK64()) 1912 IsTailCall = false; 1913 1914 // Analyze the operands of the call, assigning locations to each operand. 1915 SmallVector<CCValAssign, 16> ArgLocs; 1916 SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx); 1917 ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ); 1918 1919 // We don't support GuaranteedTailCallOpt, only automatically-detected 1920 // sibling calls. 1921 if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs)) 1922 IsTailCall = false; 1923 1924 // Get a count of how many bytes are to be pushed on the stack. 1925 unsigned NumBytes = ArgCCInfo.getStackSize(); 1926 1927 // Mark the start of the call. 1928 if (!IsTailCall) 1929 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL); 1930 1931 // Copy argument values to their designated locations. 1932 SmallVector<std::pair<unsigned, SDValue>, 9> RegsToPass; 1933 SmallVector<SDValue, 8> MemOpChains; 1934 SDValue StackPtr; 1935 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { 1936 CCValAssign &VA = ArgLocs[I]; 1937 SDValue ArgValue = OutVals[I]; 1938 1939 if (VA.getLocInfo() == CCValAssign::Indirect) { 1940 // Store the argument in a stack slot and pass its address. 1941 unsigned ArgIndex = Outs[I].OrigArgIndex; 1942 EVT SlotVT; 1943 if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) { 1944 // Allocate the full stack space for a promoted (and split) argument. 1945 Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty; 1946 EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType); 1947 MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT); 1948 unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT); 1949 SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N); 1950 } else { 1951 SlotVT = Outs[I].VT; 1952 } 1953 SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT); 1954 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); 1955 MemOpChains.push_back( 1956 DAG.getStore(Chain, DL, ArgValue, SpillSlot, 1957 MachinePointerInfo::getFixedStack(MF, FI))); 1958 // If the original argument was split (e.g. i128), we need 1959 // to store all parts of it here (and pass just one address). 1960 assert (Outs[I].PartOffset == 0); 1961 while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) { 1962 SDValue PartValue = OutVals[I + 1]; 1963 unsigned PartOffset = Outs[I + 1].PartOffset; 1964 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, 1965 DAG.getIntPtrConstant(PartOffset, DL)); 1966 MemOpChains.push_back( 1967 DAG.getStore(Chain, DL, PartValue, Address, 1968 MachinePointerInfo::getFixedStack(MF, FI))); 1969 assert((PartOffset + PartValue.getValueType().getStoreSize() <= 1970 SlotVT.getStoreSize()) && "Not enough space for argument part!"); 1971 ++I; 1972 } 1973 ArgValue = SpillSlot; 1974 } else 1975 ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue); 1976 1977 if (VA.isRegLoc()) { 1978 // In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a 1979 // MVT::i128 type. We decompose the 128-bit type to a pair of its high 1980 // and low values. 1981 if (VA.getLocVT() == MVT::i128) 1982 ArgValue = lowerI128ToGR128(DAG, ArgValue); 1983 // Queue up the argument copies and emit them at the end. 1984 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); 1985 } else { 1986 assert(VA.isMemLoc() && "Argument not register or memory"); 1987 1988 // Work out the address of the stack slot. Unpromoted ints and 1989 // floats are passed as right-justified 8-byte values. 1990 if (!StackPtr.getNode()) 1991 StackPtr = DAG.getCopyFromReg(Chain, DL, 1992 Regs->getStackPointerRegister(), PtrVT); 1993 unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() + 1994 VA.getLocMemOffset(); 1995 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32) 1996 Offset += 4; 1997 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, 1998 DAG.getIntPtrConstant(Offset, DL)); 1999 2000 // Emit the store. 2001 MemOpChains.push_back( 2002 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); 2003 2004 // Although long doubles or vectors are passed through the stack when 2005 // they are vararg (non-fixed arguments), if a long double or vector 2006 // occupies the third and fourth slot of the argument list GPR3 should 2007 // still shadow the third slot of the argument list. 2008 if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) { 2009 SDValue ShadowArgValue = 2010 DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, ArgValue, 2011 DAG.getIntPtrConstant(1, DL)); 2012 RegsToPass.push_back(std::make_pair(SystemZ::R3D, ShadowArgValue)); 2013 } 2014 } 2015 } 2016 2017 // Join the stores, which are independent of one another. 2018 if (!MemOpChains.empty()) 2019 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); 2020 2021 // Accept direct calls by converting symbolic call addresses to the 2022 // associated Target* opcodes. Force %r1 to be used for indirect 2023 // tail calls. 2024 SDValue Glue; 2025 2026 if (Subtarget.isTargetXPLINK64()) { 2027 SDValue ADA; 2028 bool IsBRASL = getzOSCalleeAndADA(DAG, Callee, ADA, DL, Chain); 2029 if (!IsBRASL) { 2030 unsigned CalleeReg = static_cast<SystemZXPLINK64Registers *>(Regs) 2031 ->getAddressOfCalleeRegister(); 2032 Chain = DAG.getCopyToReg(Chain, DL, CalleeReg, Callee, Glue); 2033 Glue = Chain.getValue(1); 2034 Callee = DAG.getRegister(CalleeReg, Callee.getValueType()); 2035 } 2036 RegsToPass.push_back(std::make_pair( 2037 static_cast<SystemZXPLINK64Registers *>(Regs)->getADARegister(), ADA)); 2038 } else { 2039 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 2040 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT); 2041 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee); 2042 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) { 2043 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT); 2044 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee); 2045 } else if (IsTailCall) { 2046 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue); 2047 Glue = Chain.getValue(1); 2048 Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType()); 2049 } 2050 } 2051 2052 // Build a sequence of copy-to-reg nodes, chained and glued together. 2053 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) { 2054 Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first, 2055 RegsToPass[I].second, Glue); 2056 Glue = Chain.getValue(1); 2057 } 2058 2059 // The first call operand is the chain and the second is the target address. 2060 SmallVector<SDValue, 8> Ops; 2061 Ops.push_back(Chain); 2062 Ops.push_back(Callee); 2063 2064 // Add argument registers to the end of the list so that they are 2065 // known live into the call. 2066 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) 2067 Ops.push_back(DAG.getRegister(RegsToPass[I].first, 2068 RegsToPass[I].second.getValueType())); 2069 2070 // Add a register mask operand representing the call-preserved registers. 2071 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 2072 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); 2073 assert(Mask && "Missing call preserved mask for calling convention"); 2074 Ops.push_back(DAG.getRegisterMask(Mask)); 2075 2076 // Glue the call to the argument copies, if any. 2077 if (Glue.getNode()) 2078 Ops.push_back(Glue); 2079 2080 // Emit the call. 2081 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 2082 if (IsTailCall) { 2083 SDValue Ret = DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops); 2084 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge); 2085 return Ret; 2086 } 2087 Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops); 2088 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); 2089 Glue = Chain.getValue(1); 2090 2091 // Mark the end of the call, which is glued to the call itself. 2092 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL); 2093 Glue = Chain.getValue(1); 2094 2095 // Assign locations to each value returned by this call. 2096 SmallVector<CCValAssign, 16> RetLocs; 2097 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx); 2098 RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ); 2099 2100 // Copy all of the result registers out of their specified physreg. 2101 for (CCValAssign &VA : RetLocs) { 2102 // Copy the value out, gluing the copy to the end of the call sequence. 2103 SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), 2104 VA.getLocVT(), Glue); 2105 Chain = RetValue.getValue(1); 2106 Glue = RetValue.getValue(2); 2107 2108 // Convert the value of the return register into the value that's 2109 // being returned. 2110 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue)); 2111 } 2112 2113 return Chain; 2114 } 2115 2116 // Generate a call taking the given operands as arguments and returning a 2117 // result of type RetVT. 2118 std::pair<SDValue, SDValue> SystemZTargetLowering::makeExternalCall( 2119 SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT, 2120 ArrayRef<SDValue> Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL, 2121 bool DoesNotReturn, bool IsReturnValueUsed) const { 2122 TargetLowering::ArgListTy Args; 2123 Args.reserve(Ops.size()); 2124 2125 TargetLowering::ArgListEntry Entry; 2126 for (SDValue Op : Ops) { 2127 Entry.Node = Op; 2128 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); 2129 Entry.IsSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned); 2130 Entry.IsZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned); 2131 Args.push_back(Entry); 2132 } 2133 2134 SDValue Callee = 2135 DAG.getExternalSymbol(CalleeName, getPointerTy(DAG.getDataLayout())); 2136 2137 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); 2138 TargetLowering::CallLoweringInfo CLI(DAG); 2139 bool SignExtend = shouldSignExtendTypeInLibCall(RetVT, IsSigned); 2140 CLI.setDebugLoc(DL) 2141 .setChain(Chain) 2142 .setCallee(CallConv, RetTy, Callee, std::move(Args)) 2143 .setNoReturn(DoesNotReturn) 2144 .setDiscardResult(!IsReturnValueUsed) 2145 .setSExtResult(SignExtend) 2146 .setZExtResult(!SignExtend); 2147 return LowerCallTo(CLI); 2148 } 2149 2150 bool SystemZTargetLowering:: 2151 CanLowerReturn(CallingConv::ID CallConv, 2152 MachineFunction &MF, bool isVarArg, 2153 const SmallVectorImpl<ISD::OutputArg> &Outs, 2154 LLVMContext &Context) const { 2155 // Special case that we cannot easily detect in RetCC_SystemZ since 2156 // i128 may not be a legal type. 2157 for (auto &Out : Outs) 2158 if (Out.ArgVT == MVT::i128) 2159 return false; 2160 2161 SmallVector<CCValAssign, 16> RetLocs; 2162 CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context); 2163 return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ); 2164 } 2165 2166 SDValue 2167 SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 2168 bool IsVarArg, 2169 const SmallVectorImpl<ISD::OutputArg> &Outs, 2170 const SmallVectorImpl<SDValue> &OutVals, 2171 const SDLoc &DL, SelectionDAG &DAG) const { 2172 MachineFunction &MF = DAG.getMachineFunction(); 2173 2174 // Assign locations to each returned value. 2175 SmallVector<CCValAssign, 16> RetLocs; 2176 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext()); 2177 RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ); 2178 2179 // Quick exit for void returns 2180 if (RetLocs.empty()) 2181 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, Chain); 2182 2183 if (CallConv == CallingConv::GHC) 2184 report_fatal_error("GHC functions return void only"); 2185 2186 // Copy the result values into the output registers. 2187 SDValue Glue; 2188 SmallVector<SDValue, 4> RetOps; 2189 RetOps.push_back(Chain); 2190 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) { 2191 CCValAssign &VA = RetLocs[I]; 2192 SDValue RetValue = OutVals[I]; 2193 2194 // Make the return register live on exit. 2195 assert(VA.isRegLoc() && "Can only return in registers!"); 2196 2197 // Promote the value as required. 2198 RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue); 2199 2200 // Chain and glue the copies together. 2201 Register Reg = VA.getLocReg(); 2202 Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue); 2203 Glue = Chain.getValue(1); 2204 RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT())); 2205 } 2206 2207 // Update chain and glue. 2208 RetOps[0] = Chain; 2209 if (Glue.getNode()) 2210 RetOps.push_back(Glue); 2211 2212 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, RetOps); 2213 } 2214 2215 // Return true if Op is an intrinsic node with chain that returns the CC value 2216 // as its only (other) argument. Provide the associated SystemZISD opcode and 2217 // the mask of valid CC values if so. 2218 static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, 2219 unsigned &CCValid) { 2220 unsigned Id = Op.getConstantOperandVal(1); 2221 switch (Id) { 2222 case Intrinsic::s390_tbegin: 2223 Opcode = SystemZISD::TBEGIN; 2224 CCValid = SystemZ::CCMASK_TBEGIN; 2225 return true; 2226 2227 case Intrinsic::s390_tbegin_nofloat: 2228 Opcode = SystemZISD::TBEGIN_NOFLOAT; 2229 CCValid = SystemZ::CCMASK_TBEGIN; 2230 return true; 2231 2232 case Intrinsic::s390_tend: 2233 Opcode = SystemZISD::TEND; 2234 CCValid = SystemZ::CCMASK_TEND; 2235 return true; 2236 2237 default: 2238 return false; 2239 } 2240 } 2241 2242 // Return true if Op is an intrinsic node without chain that returns the 2243 // CC value as its final argument. Provide the associated SystemZISD 2244 // opcode and the mask of valid CC values if so. 2245 static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) { 2246 unsigned Id = Op.getConstantOperandVal(0); 2247 switch (Id) { 2248 case Intrinsic::s390_vpkshs: 2249 case Intrinsic::s390_vpksfs: 2250 case Intrinsic::s390_vpksgs: 2251 Opcode = SystemZISD::PACKS_CC; 2252 CCValid = SystemZ::CCMASK_VCMP; 2253 return true; 2254 2255 case Intrinsic::s390_vpklshs: 2256 case Intrinsic::s390_vpklsfs: 2257 case Intrinsic::s390_vpklsgs: 2258 Opcode = SystemZISD::PACKLS_CC; 2259 CCValid = SystemZ::CCMASK_VCMP; 2260 return true; 2261 2262 case Intrinsic::s390_vceqbs: 2263 case Intrinsic::s390_vceqhs: 2264 case Intrinsic::s390_vceqfs: 2265 case Intrinsic::s390_vceqgs: 2266 Opcode = SystemZISD::VICMPES; 2267 CCValid = SystemZ::CCMASK_VCMP; 2268 return true; 2269 2270 case Intrinsic::s390_vchbs: 2271 case Intrinsic::s390_vchhs: 2272 case Intrinsic::s390_vchfs: 2273 case Intrinsic::s390_vchgs: 2274 Opcode = SystemZISD::VICMPHS; 2275 CCValid = SystemZ::CCMASK_VCMP; 2276 return true; 2277 2278 case Intrinsic::s390_vchlbs: 2279 case Intrinsic::s390_vchlhs: 2280 case Intrinsic::s390_vchlfs: 2281 case Intrinsic::s390_vchlgs: 2282 Opcode = SystemZISD::VICMPHLS; 2283 CCValid = SystemZ::CCMASK_VCMP; 2284 return true; 2285 2286 case Intrinsic::s390_vtm: 2287 Opcode = SystemZISD::VTM; 2288 CCValid = SystemZ::CCMASK_VCMP; 2289 return true; 2290 2291 case Intrinsic::s390_vfaebs: 2292 case Intrinsic::s390_vfaehs: 2293 case Intrinsic::s390_vfaefs: 2294 Opcode = SystemZISD::VFAE_CC; 2295 CCValid = SystemZ::CCMASK_ANY; 2296 return true; 2297 2298 case Intrinsic::s390_vfaezbs: 2299 case Intrinsic::s390_vfaezhs: 2300 case Intrinsic::s390_vfaezfs: 2301 Opcode = SystemZISD::VFAEZ_CC; 2302 CCValid = SystemZ::CCMASK_ANY; 2303 return true; 2304 2305 case Intrinsic::s390_vfeebs: 2306 case Intrinsic::s390_vfeehs: 2307 case Intrinsic::s390_vfeefs: 2308 Opcode = SystemZISD::VFEE_CC; 2309 CCValid = SystemZ::CCMASK_ANY; 2310 return true; 2311 2312 case Intrinsic::s390_vfeezbs: 2313 case Intrinsic::s390_vfeezhs: 2314 case Intrinsic::s390_vfeezfs: 2315 Opcode = SystemZISD::VFEEZ_CC; 2316 CCValid = SystemZ::CCMASK_ANY; 2317 return true; 2318 2319 case Intrinsic::s390_vfenebs: 2320 case Intrinsic::s390_vfenehs: 2321 case Intrinsic::s390_vfenefs: 2322 Opcode = SystemZISD::VFENE_CC; 2323 CCValid = SystemZ::CCMASK_ANY; 2324 return true; 2325 2326 case Intrinsic::s390_vfenezbs: 2327 case Intrinsic::s390_vfenezhs: 2328 case Intrinsic::s390_vfenezfs: 2329 Opcode = SystemZISD::VFENEZ_CC; 2330 CCValid = SystemZ::CCMASK_ANY; 2331 return true; 2332 2333 case Intrinsic::s390_vistrbs: 2334 case Intrinsic::s390_vistrhs: 2335 case Intrinsic::s390_vistrfs: 2336 Opcode = SystemZISD::VISTR_CC; 2337 CCValid = SystemZ::CCMASK_0 | SystemZ::CCMASK_3; 2338 return true; 2339 2340 case Intrinsic::s390_vstrcbs: 2341 case Intrinsic::s390_vstrchs: 2342 case Intrinsic::s390_vstrcfs: 2343 Opcode = SystemZISD::VSTRC_CC; 2344 CCValid = SystemZ::CCMASK_ANY; 2345 return true; 2346 2347 case Intrinsic::s390_vstrczbs: 2348 case Intrinsic::s390_vstrczhs: 2349 case Intrinsic::s390_vstrczfs: 2350 Opcode = SystemZISD::VSTRCZ_CC; 2351 CCValid = SystemZ::CCMASK_ANY; 2352 return true; 2353 2354 case Intrinsic::s390_vstrsb: 2355 case Intrinsic::s390_vstrsh: 2356 case Intrinsic::s390_vstrsf: 2357 Opcode = SystemZISD::VSTRS_CC; 2358 CCValid = SystemZ::CCMASK_ANY; 2359 return true; 2360 2361 case Intrinsic::s390_vstrszb: 2362 case Intrinsic::s390_vstrszh: 2363 case Intrinsic::s390_vstrszf: 2364 Opcode = SystemZISD::VSTRSZ_CC; 2365 CCValid = SystemZ::CCMASK_ANY; 2366 return true; 2367 2368 case Intrinsic::s390_vfcedbs: 2369 case Intrinsic::s390_vfcesbs: 2370 Opcode = SystemZISD::VFCMPES; 2371 CCValid = SystemZ::CCMASK_VCMP; 2372 return true; 2373 2374 case Intrinsic::s390_vfchdbs: 2375 case Intrinsic::s390_vfchsbs: 2376 Opcode = SystemZISD::VFCMPHS; 2377 CCValid = SystemZ::CCMASK_VCMP; 2378 return true; 2379 2380 case Intrinsic::s390_vfchedbs: 2381 case Intrinsic::s390_vfchesbs: 2382 Opcode = SystemZISD::VFCMPHES; 2383 CCValid = SystemZ::CCMASK_VCMP; 2384 return true; 2385 2386 case Intrinsic::s390_vftcidb: 2387 case Intrinsic::s390_vftcisb: 2388 Opcode = SystemZISD::VFTCI; 2389 CCValid = SystemZ::CCMASK_VCMP; 2390 return true; 2391 2392 case Intrinsic::s390_tdc: 2393 Opcode = SystemZISD::TDC; 2394 CCValid = SystemZ::CCMASK_TDC; 2395 return true; 2396 2397 default: 2398 return false; 2399 } 2400 } 2401 2402 // Emit an intrinsic with chain and an explicit CC register result. 2403 static SDNode *emitIntrinsicWithCCAndChain(SelectionDAG &DAG, SDValue Op, 2404 unsigned Opcode) { 2405 // Copy all operands except the intrinsic ID. 2406 unsigned NumOps = Op.getNumOperands(); 2407 SmallVector<SDValue, 6> Ops; 2408 Ops.reserve(NumOps - 1); 2409 Ops.push_back(Op.getOperand(0)); 2410 for (unsigned I = 2; I < NumOps; ++I) 2411 Ops.push_back(Op.getOperand(I)); 2412 2413 assert(Op->getNumValues() == 2 && "Expected only CC result and chain"); 2414 SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other); 2415 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops); 2416 SDValue OldChain = SDValue(Op.getNode(), 1); 2417 SDValue NewChain = SDValue(Intr.getNode(), 1); 2418 DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain); 2419 return Intr.getNode(); 2420 } 2421 2422 // Emit an intrinsic with an explicit CC register result. 2423 static SDNode *emitIntrinsicWithCC(SelectionDAG &DAG, SDValue Op, 2424 unsigned Opcode) { 2425 // Copy all operands except the intrinsic ID. 2426 unsigned NumOps = Op.getNumOperands(); 2427 SmallVector<SDValue, 6> Ops; 2428 Ops.reserve(NumOps - 1); 2429 for (unsigned I = 1; I < NumOps; ++I) 2430 Ops.push_back(Op.getOperand(I)); 2431 2432 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), Op->getVTList(), Ops); 2433 return Intr.getNode(); 2434 } 2435 2436 // CC is a comparison that will be implemented using an integer or 2437 // floating-point comparison. Return the condition code mask for 2438 // a branch on true. In the integer case, CCMASK_CMP_UO is set for 2439 // unsigned comparisons and clear for signed ones. In the floating-point 2440 // case, CCMASK_CMP_UO has its normal mask meaning (unordered). 2441 static unsigned CCMaskForCondCode(ISD::CondCode CC) { 2442 #define CONV(X) \ 2443 case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \ 2444 case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \ 2445 case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X 2446 2447 switch (CC) { 2448 default: 2449 llvm_unreachable("Invalid integer condition!"); 2450 2451 CONV(EQ); 2452 CONV(NE); 2453 CONV(GT); 2454 CONV(GE); 2455 CONV(LT); 2456 CONV(LE); 2457 2458 case ISD::SETO: return SystemZ::CCMASK_CMP_O; 2459 case ISD::SETUO: return SystemZ::CCMASK_CMP_UO; 2460 } 2461 #undef CONV 2462 } 2463 2464 // If C can be converted to a comparison against zero, adjust the operands 2465 // as necessary. 2466 static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) { 2467 if (C.ICmpType == SystemZICMP::UnsignedOnly) 2468 return; 2469 2470 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode()); 2471 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64) 2472 return; 2473 2474 int64_t Value = ConstOp1->getSExtValue(); 2475 if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) || 2476 (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) || 2477 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) || 2478 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) { 2479 C.CCMask ^= SystemZ::CCMASK_CMP_EQ; 2480 C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType()); 2481 } 2482 } 2483 2484 // If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI, 2485 // adjust the operands as necessary. 2486 static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL, 2487 Comparison &C) { 2488 // For us to make any changes, it must a comparison between a single-use 2489 // load and a constant. 2490 if (!C.Op0.hasOneUse() || 2491 C.Op0.getOpcode() != ISD::LOAD || 2492 C.Op1.getOpcode() != ISD::Constant) 2493 return; 2494 2495 // We must have an 8- or 16-bit load. 2496 auto *Load = cast<LoadSDNode>(C.Op0); 2497 unsigned NumBits = Load->getMemoryVT().getSizeInBits(); 2498 if ((NumBits != 8 && NumBits != 16) || 2499 NumBits != Load->getMemoryVT().getStoreSizeInBits()) 2500 return; 2501 2502 // The load must be an extending one and the constant must be within the 2503 // range of the unextended value. 2504 auto *ConstOp1 = cast<ConstantSDNode>(C.Op1); 2505 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64) 2506 return; 2507 uint64_t Value = ConstOp1->getZExtValue(); 2508 uint64_t Mask = (1 << NumBits) - 1; 2509 if (Load->getExtensionType() == ISD::SEXTLOAD) { 2510 // Make sure that ConstOp1 is in range of C.Op0. 2511 int64_t SignedValue = ConstOp1->getSExtValue(); 2512 if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask) 2513 return; 2514 if (C.ICmpType != SystemZICMP::SignedOnly) { 2515 // Unsigned comparison between two sign-extended values is equivalent 2516 // to unsigned comparison between two zero-extended values. 2517 Value &= Mask; 2518 } else if (NumBits == 8) { 2519 // Try to treat the comparison as unsigned, so that we can use CLI. 2520 // Adjust CCMask and Value as necessary. 2521 if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT) 2522 // Test whether the high bit of the byte is set. 2523 Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT; 2524 else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE) 2525 // Test whether the high bit of the byte is clear. 2526 Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT; 2527 else 2528 // No instruction exists for this combination. 2529 return; 2530 C.ICmpType = SystemZICMP::UnsignedOnly; 2531 } 2532 } else if (Load->getExtensionType() == ISD::ZEXTLOAD) { 2533 if (Value > Mask) 2534 return; 2535 // If the constant is in range, we can use any comparison. 2536 C.ICmpType = SystemZICMP::Any; 2537 } else 2538 return; 2539 2540 // Make sure that the first operand is an i32 of the right extension type. 2541 ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ? 2542 ISD::SEXTLOAD : 2543 ISD::ZEXTLOAD); 2544 if (C.Op0.getValueType() != MVT::i32 || 2545 Load->getExtensionType() != ExtType) { 2546 C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(), 2547 Load->getBasePtr(), Load->getPointerInfo(), 2548 Load->getMemoryVT(), Load->getAlign(), 2549 Load->getMemOperand()->getFlags()); 2550 // Update the chain uses. 2551 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1)); 2552 } 2553 2554 // Make sure that the second operand is an i32 with the right value. 2555 if (C.Op1.getValueType() != MVT::i32 || 2556 Value != ConstOp1->getZExtValue()) 2557 C.Op1 = DAG.getConstant(Value, DL, MVT::i32); 2558 } 2559 2560 // Return true if Op is either an unextended load, or a load suitable 2561 // for integer register-memory comparisons of type ICmpType. 2562 static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) { 2563 auto *Load = dyn_cast<LoadSDNode>(Op.getNode()); 2564 if (Load) { 2565 // There are no instructions to compare a register with a memory byte. 2566 if (Load->getMemoryVT() == MVT::i8) 2567 return false; 2568 // Otherwise decide on extension type. 2569 switch (Load->getExtensionType()) { 2570 case ISD::NON_EXTLOAD: 2571 return true; 2572 case ISD::SEXTLOAD: 2573 return ICmpType != SystemZICMP::UnsignedOnly; 2574 case ISD::ZEXTLOAD: 2575 return ICmpType != SystemZICMP::SignedOnly; 2576 default: 2577 break; 2578 } 2579 } 2580 return false; 2581 } 2582 2583 // Return true if it is better to swap the operands of C. 2584 static bool shouldSwapCmpOperands(const Comparison &C) { 2585 // Leave i128 and f128 comparisons alone, since they have no memory forms. 2586 if (C.Op0.getValueType() == MVT::i128) 2587 return false; 2588 if (C.Op0.getValueType() == MVT::f128) 2589 return false; 2590 2591 // Always keep a floating-point constant second, since comparisons with 2592 // zero can use LOAD TEST and comparisons with other constants make a 2593 // natural memory operand. 2594 if (isa<ConstantFPSDNode>(C.Op1)) 2595 return false; 2596 2597 // Never swap comparisons with zero since there are many ways to optimize 2598 // those later. 2599 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1); 2600 if (ConstOp1 && ConstOp1->getZExtValue() == 0) 2601 return false; 2602 2603 // Also keep natural memory operands second if the loaded value is 2604 // only used here. Several comparisons have memory forms. 2605 if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse()) 2606 return false; 2607 2608 // Look for cases where Cmp0 is a single-use load and Cmp1 isn't. 2609 // In that case we generally prefer the memory to be second. 2610 if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) { 2611 // The only exceptions are when the second operand is a constant and 2612 // we can use things like CHHSI. 2613 if (!ConstOp1) 2614 return true; 2615 // The unsigned memory-immediate instructions can handle 16-bit 2616 // unsigned integers. 2617 if (C.ICmpType != SystemZICMP::SignedOnly && 2618 isUInt<16>(ConstOp1->getZExtValue())) 2619 return false; 2620 // The signed memory-immediate instructions can handle 16-bit 2621 // signed integers. 2622 if (C.ICmpType != SystemZICMP::UnsignedOnly && 2623 isInt<16>(ConstOp1->getSExtValue())) 2624 return false; 2625 return true; 2626 } 2627 2628 // Try to promote the use of CGFR and CLGFR. 2629 unsigned Opcode0 = C.Op0.getOpcode(); 2630 if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND) 2631 return true; 2632 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND) 2633 return true; 2634 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::AND && 2635 C.Op0.getOperand(1).getOpcode() == ISD::Constant && 2636 C.Op0.getConstantOperandVal(1) == 0xffffffff) 2637 return true; 2638 2639 return false; 2640 } 2641 2642 // Check whether C tests for equality between X and Y and whether X - Y 2643 // or Y - X is also computed. In that case it's better to compare the 2644 // result of the subtraction against zero. 2645 static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL, 2646 Comparison &C) { 2647 if (C.CCMask == SystemZ::CCMASK_CMP_EQ || 2648 C.CCMask == SystemZ::CCMASK_CMP_NE) { 2649 for (SDNode *N : C.Op0->uses()) { 2650 if (N->getOpcode() == ISD::SUB && 2651 ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) || 2652 (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) { 2653 // Disable the nsw and nuw flags: the backend needs to handle 2654 // overflow as well during comparison elimination. 2655 SDNodeFlags Flags = N->getFlags(); 2656 Flags.setNoSignedWrap(false); 2657 Flags.setNoUnsignedWrap(false); 2658 N->setFlags(Flags); 2659 C.Op0 = SDValue(N, 0); 2660 C.Op1 = DAG.getConstant(0, DL, N->getValueType(0)); 2661 return; 2662 } 2663 } 2664 } 2665 } 2666 2667 // Check whether C compares a floating-point value with zero and if that 2668 // floating-point value is also negated. In this case we can use the 2669 // negation to set CC, so avoiding separate LOAD AND TEST and 2670 // LOAD (NEGATIVE/COMPLEMENT) instructions. 2671 static void adjustForFNeg(Comparison &C) { 2672 // This optimization is invalid for strict comparisons, since FNEG 2673 // does not raise any exceptions. 2674 if (C.Chain) 2675 return; 2676 auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1); 2677 if (C1 && C1->isZero()) { 2678 for (SDNode *N : C.Op0->uses()) { 2679 if (N->getOpcode() == ISD::FNEG) { 2680 C.Op0 = SDValue(N, 0); 2681 C.CCMask = SystemZ::reverseCCMask(C.CCMask); 2682 return; 2683 } 2684 } 2685 } 2686 } 2687 2688 // Check whether C compares (shl X, 32) with 0 and whether X is 2689 // also sign-extended. In that case it is better to test the result 2690 // of the sign extension using LTGFR. 2691 // 2692 // This case is important because InstCombine transforms a comparison 2693 // with (sext (trunc X)) into a comparison with (shl X, 32). 2694 static void adjustForLTGFR(Comparison &C) { 2695 // Check for a comparison between (shl X, 32) and 0. 2696 if (C.Op0.getOpcode() == ISD::SHL && C.Op0.getValueType() == MVT::i64 && 2697 C.Op1.getOpcode() == ISD::Constant && C.Op1->getAsZExtVal() == 0) { 2698 auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1)); 2699 if (C1 && C1->getZExtValue() == 32) { 2700 SDValue ShlOp0 = C.Op0.getOperand(0); 2701 // See whether X has any SIGN_EXTEND_INREG uses. 2702 for (SDNode *N : ShlOp0->uses()) { 2703 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG && 2704 cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) { 2705 C.Op0 = SDValue(N, 0); 2706 return; 2707 } 2708 } 2709 } 2710 } 2711 } 2712 2713 // If C compares the truncation of an extending load, try to compare 2714 // the untruncated value instead. This exposes more opportunities to 2715 // reuse CC. 2716 static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL, 2717 Comparison &C) { 2718 if (C.Op0.getOpcode() == ISD::TRUNCATE && 2719 C.Op0.getOperand(0).getOpcode() == ISD::LOAD && 2720 C.Op1.getOpcode() == ISD::Constant && 2721 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 && 2722 C.Op1->getAsZExtVal() == 0) { 2723 auto *L = cast<LoadSDNode>(C.Op0.getOperand(0)); 2724 if (L->getMemoryVT().getStoreSizeInBits().getFixedValue() <= 2725 C.Op0.getValueSizeInBits().getFixedValue()) { 2726 unsigned Type = L->getExtensionType(); 2727 if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) || 2728 (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) { 2729 C.Op0 = C.Op0.getOperand(0); 2730 C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType()); 2731 } 2732 } 2733 } 2734 } 2735 2736 // Return true if shift operation N has an in-range constant shift value. 2737 // Store it in ShiftVal if so. 2738 static bool isSimpleShift(SDValue N, unsigned &ShiftVal) { 2739 auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1)); 2740 if (!Shift) 2741 return false; 2742 2743 uint64_t Amount = Shift->getZExtValue(); 2744 if (Amount >= N.getValueSizeInBits()) 2745 return false; 2746 2747 ShiftVal = Amount; 2748 return true; 2749 } 2750 2751 // Check whether an AND with Mask is suitable for a TEST UNDER MASK 2752 // instruction and whether the CC value is descriptive enough to handle 2753 // a comparison of type Opcode between the AND result and CmpVal. 2754 // CCMask says which comparison result is being tested and BitSize is 2755 // the number of bits in the operands. If TEST UNDER MASK can be used, 2756 // return the corresponding CC mask, otherwise return 0. 2757 static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, 2758 uint64_t Mask, uint64_t CmpVal, 2759 unsigned ICmpType) { 2760 assert(Mask != 0 && "ANDs with zero should have been removed by now"); 2761 2762 // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL. 2763 if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) && 2764 !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask)) 2765 return 0; 2766 2767 // Work out the masks for the lowest and highest bits. 2768 uint64_t High = llvm::bit_floor(Mask); 2769 uint64_t Low = uint64_t(1) << llvm::countr_zero(Mask); 2770 2771 // Signed ordered comparisons are effectively unsigned if the sign 2772 // bit is dropped. 2773 bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly); 2774 2775 // Check for equality comparisons with 0, or the equivalent. 2776 if (CmpVal == 0) { 2777 if (CCMask == SystemZ::CCMASK_CMP_EQ) 2778 return SystemZ::CCMASK_TM_ALL_0; 2779 if (CCMask == SystemZ::CCMASK_CMP_NE) 2780 return SystemZ::CCMASK_TM_SOME_1; 2781 } 2782 if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) { 2783 if (CCMask == SystemZ::CCMASK_CMP_LT) 2784 return SystemZ::CCMASK_TM_ALL_0; 2785 if (CCMask == SystemZ::CCMASK_CMP_GE) 2786 return SystemZ::CCMASK_TM_SOME_1; 2787 } 2788 if (EffectivelyUnsigned && CmpVal < Low) { 2789 if (CCMask == SystemZ::CCMASK_CMP_LE) 2790 return SystemZ::CCMASK_TM_ALL_0; 2791 if (CCMask == SystemZ::CCMASK_CMP_GT) 2792 return SystemZ::CCMASK_TM_SOME_1; 2793 } 2794 2795 // Check for equality comparisons with the mask, or the equivalent. 2796 if (CmpVal == Mask) { 2797 if (CCMask == SystemZ::CCMASK_CMP_EQ) 2798 return SystemZ::CCMASK_TM_ALL_1; 2799 if (CCMask == SystemZ::CCMASK_CMP_NE) 2800 return SystemZ::CCMASK_TM_SOME_0; 2801 } 2802 if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) { 2803 if (CCMask == SystemZ::CCMASK_CMP_GT) 2804 return SystemZ::CCMASK_TM_ALL_1; 2805 if (CCMask == SystemZ::CCMASK_CMP_LE) 2806 return SystemZ::CCMASK_TM_SOME_0; 2807 } 2808 if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) { 2809 if (CCMask == SystemZ::CCMASK_CMP_GE) 2810 return SystemZ::CCMASK_TM_ALL_1; 2811 if (CCMask == SystemZ::CCMASK_CMP_LT) 2812 return SystemZ::CCMASK_TM_SOME_0; 2813 } 2814 2815 // Check for ordered comparisons with the top bit. 2816 if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) { 2817 if (CCMask == SystemZ::CCMASK_CMP_LE) 2818 return SystemZ::CCMASK_TM_MSB_0; 2819 if (CCMask == SystemZ::CCMASK_CMP_GT) 2820 return SystemZ::CCMASK_TM_MSB_1; 2821 } 2822 if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) { 2823 if (CCMask == SystemZ::CCMASK_CMP_LT) 2824 return SystemZ::CCMASK_TM_MSB_0; 2825 if (CCMask == SystemZ::CCMASK_CMP_GE) 2826 return SystemZ::CCMASK_TM_MSB_1; 2827 } 2828 2829 // If there are just two bits, we can do equality checks for Low and High 2830 // as well. 2831 if (Mask == Low + High) { 2832 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low) 2833 return SystemZ::CCMASK_TM_MIXED_MSB_0; 2834 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low) 2835 return SystemZ::CCMASK_TM_MIXED_MSB_0 ^ SystemZ::CCMASK_ANY; 2836 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High) 2837 return SystemZ::CCMASK_TM_MIXED_MSB_1; 2838 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High) 2839 return SystemZ::CCMASK_TM_MIXED_MSB_1 ^ SystemZ::CCMASK_ANY; 2840 } 2841 2842 // Looks like we've exhausted our options. 2843 return 0; 2844 } 2845 2846 // See whether C can be implemented as a TEST UNDER MASK instruction. 2847 // Update the arguments with the TM version if so. 2848 static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL, 2849 Comparison &C) { 2850 // Use VECTOR TEST UNDER MASK for i128 operations. 2851 if (C.Op0.getValueType() == MVT::i128) { 2852 // We can use VTM for EQ/NE comparisons of x & y against 0. 2853 if (C.Op0.getOpcode() == ISD::AND && 2854 (C.CCMask == SystemZ::CCMASK_CMP_EQ || 2855 C.CCMask == SystemZ::CCMASK_CMP_NE)) { 2856 auto *Mask = dyn_cast<ConstantSDNode>(C.Op1); 2857 if (Mask && Mask->getAPIntValue() == 0) { 2858 C.Opcode = SystemZISD::VTM; 2859 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(1)); 2860 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(0)); 2861 C.CCValid = SystemZ::CCMASK_VCMP; 2862 if (C.CCMask == SystemZ::CCMASK_CMP_EQ) 2863 C.CCMask = SystemZ::CCMASK_VCMP_ALL; 2864 else 2865 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid; 2866 } 2867 } 2868 return; 2869 } 2870 2871 // Check that we have a comparison with a constant. 2872 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1); 2873 if (!ConstOp1) 2874 return; 2875 uint64_t CmpVal = ConstOp1->getZExtValue(); 2876 2877 // Check whether the nonconstant input is an AND with a constant mask. 2878 Comparison NewC(C); 2879 uint64_t MaskVal; 2880 ConstantSDNode *Mask = nullptr; 2881 if (C.Op0.getOpcode() == ISD::AND) { 2882 NewC.Op0 = C.Op0.getOperand(0); 2883 NewC.Op1 = C.Op0.getOperand(1); 2884 Mask = dyn_cast<ConstantSDNode>(NewC.Op1); 2885 if (!Mask) 2886 return; 2887 MaskVal = Mask->getZExtValue(); 2888 } else { 2889 // There is no instruction to compare with a 64-bit immediate 2890 // so use TMHH instead if possible. We need an unsigned ordered 2891 // comparison with an i64 immediate. 2892 if (NewC.Op0.getValueType() != MVT::i64 || 2893 NewC.CCMask == SystemZ::CCMASK_CMP_EQ || 2894 NewC.CCMask == SystemZ::CCMASK_CMP_NE || 2895 NewC.ICmpType == SystemZICMP::SignedOnly) 2896 return; 2897 // Convert LE and GT comparisons into LT and GE. 2898 if (NewC.CCMask == SystemZ::CCMASK_CMP_LE || 2899 NewC.CCMask == SystemZ::CCMASK_CMP_GT) { 2900 if (CmpVal == uint64_t(-1)) 2901 return; 2902 CmpVal += 1; 2903 NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ; 2904 } 2905 // If the low N bits of Op1 are zero than the low N bits of Op0 can 2906 // be masked off without changing the result. 2907 MaskVal = -(CmpVal & -CmpVal); 2908 NewC.ICmpType = SystemZICMP::UnsignedOnly; 2909 } 2910 if (!MaskVal) 2911 return; 2912 2913 // Check whether the combination of mask, comparison value and comparison 2914 // type are suitable. 2915 unsigned BitSize = NewC.Op0.getValueSizeInBits(); 2916 unsigned NewCCMask, ShiftVal; 2917 if (NewC.ICmpType != SystemZICMP::SignedOnly && 2918 NewC.Op0.getOpcode() == ISD::SHL && 2919 isSimpleShift(NewC.Op0, ShiftVal) && 2920 (MaskVal >> ShiftVal != 0) && 2921 ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal && 2922 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, 2923 MaskVal >> ShiftVal, 2924 CmpVal >> ShiftVal, 2925 SystemZICMP::Any))) { 2926 NewC.Op0 = NewC.Op0.getOperand(0); 2927 MaskVal >>= ShiftVal; 2928 } else if (NewC.ICmpType != SystemZICMP::SignedOnly && 2929 NewC.Op0.getOpcode() == ISD::SRL && 2930 isSimpleShift(NewC.Op0, ShiftVal) && 2931 (MaskVal << ShiftVal != 0) && 2932 ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal && 2933 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, 2934 MaskVal << ShiftVal, 2935 CmpVal << ShiftVal, 2936 SystemZICMP::UnsignedOnly))) { 2937 NewC.Op0 = NewC.Op0.getOperand(0); 2938 MaskVal <<= ShiftVal; 2939 } else { 2940 NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal, 2941 NewC.ICmpType); 2942 if (!NewCCMask) 2943 return; 2944 } 2945 2946 // Go ahead and make the change. 2947 C.Opcode = SystemZISD::TM; 2948 C.Op0 = NewC.Op0; 2949 if (Mask && Mask->getZExtValue() == MaskVal) 2950 C.Op1 = SDValue(Mask, 0); 2951 else 2952 C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType()); 2953 C.CCValid = SystemZ::CCMASK_TM; 2954 C.CCMask = NewCCMask; 2955 } 2956 2957 // Implement i128 comparison in vector registers. 2958 static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL, 2959 Comparison &C) { 2960 if (C.Opcode != SystemZISD::ICMP) 2961 return; 2962 if (C.Op0.getValueType() != MVT::i128) 2963 return; 2964 2965 // (In-)Equality comparisons can be implemented via VCEQGS. 2966 if (C.CCMask == SystemZ::CCMASK_CMP_EQ || 2967 C.CCMask == SystemZ::CCMASK_CMP_NE) { 2968 C.Opcode = SystemZISD::VICMPES; 2969 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op0); 2970 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op1); 2971 C.CCValid = SystemZ::CCMASK_VCMP; 2972 if (C.CCMask == SystemZ::CCMASK_CMP_EQ) 2973 C.CCMask = SystemZ::CCMASK_VCMP_ALL; 2974 else 2975 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid; 2976 return; 2977 } 2978 2979 // Normalize other comparisons to GT. 2980 bool Swap = false, Invert = false; 2981 switch (C.CCMask) { 2982 case SystemZ::CCMASK_CMP_GT: break; 2983 case SystemZ::CCMASK_CMP_LT: Swap = true; break; 2984 case SystemZ::CCMASK_CMP_LE: Invert = true; break; 2985 case SystemZ::CCMASK_CMP_GE: Swap = Invert = true; break; 2986 default: llvm_unreachable("Invalid integer condition!"); 2987 } 2988 if (Swap) 2989 std::swap(C.Op0, C.Op1); 2990 2991 if (C.ICmpType == SystemZICMP::UnsignedOnly) 2992 C.Opcode = SystemZISD::UCMP128HI; 2993 else 2994 C.Opcode = SystemZISD::SCMP128HI; 2995 C.CCValid = SystemZ::CCMASK_ANY; 2996 C.CCMask = SystemZ::CCMASK_1; 2997 2998 if (Invert) 2999 C.CCMask ^= C.CCValid; 3000 } 3001 3002 // See whether the comparison argument contains a redundant AND 3003 // and remove it if so. This sometimes happens due to the generic 3004 // BRCOND expansion. 3005 static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL, 3006 Comparison &C) { 3007 if (C.Op0.getOpcode() != ISD::AND) 3008 return; 3009 auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1)); 3010 if (!Mask || Mask->getValueSizeInBits(0) > 64) 3011 return; 3012 KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0)); 3013 if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue()) 3014 return; 3015 3016 C.Op0 = C.Op0.getOperand(0); 3017 } 3018 3019 // Return a Comparison that tests the condition-code result of intrinsic 3020 // node Call against constant integer CC using comparison code Cond. 3021 // Opcode is the opcode of the SystemZISD operation for the intrinsic 3022 // and CCValid is the set of possible condition-code results. 3023 static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, 3024 SDValue Call, unsigned CCValid, uint64_t CC, 3025 ISD::CondCode Cond) { 3026 Comparison C(Call, SDValue(), SDValue()); 3027 C.Opcode = Opcode; 3028 C.CCValid = CCValid; 3029 if (Cond == ISD::SETEQ) 3030 // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3. 3031 C.CCMask = CC < 4 ? 1 << (3 - CC) : 0; 3032 else if (Cond == ISD::SETNE) 3033 // ...and the inverse of that. 3034 C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1; 3035 else if (Cond == ISD::SETLT || Cond == ISD::SETULT) 3036 // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3, 3037 // always true for CC>3. 3038 C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1; 3039 else if (Cond == ISD::SETGE || Cond == ISD::SETUGE) 3040 // ...and the inverse of that. 3041 C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0; 3042 else if (Cond == ISD::SETLE || Cond == ISD::SETULE) 3043 // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true), 3044 // always true for CC>3. 3045 C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1; 3046 else if (Cond == ISD::SETGT || Cond == ISD::SETUGT) 3047 // ...and the inverse of that. 3048 C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0; 3049 else 3050 llvm_unreachable("Unexpected integer comparison type"); 3051 C.CCMask &= CCValid; 3052 return C; 3053 } 3054 3055 // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1. 3056 static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, 3057 ISD::CondCode Cond, const SDLoc &DL, 3058 SDValue Chain = SDValue(), 3059 bool IsSignaling = false) { 3060 if (CmpOp1.getOpcode() == ISD::Constant) { 3061 assert(!Chain); 3062 unsigned Opcode, CCValid; 3063 if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN && 3064 CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) && 3065 isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid)) 3066 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, 3067 CmpOp1->getAsZExtVal(), Cond); 3068 if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN && 3069 CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 && 3070 isIntrinsicWithCC(CmpOp0, Opcode, CCValid)) 3071 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, 3072 CmpOp1->getAsZExtVal(), Cond); 3073 } 3074 Comparison C(CmpOp0, CmpOp1, Chain); 3075 C.CCMask = CCMaskForCondCode(Cond); 3076 if (C.Op0.getValueType().isFloatingPoint()) { 3077 C.CCValid = SystemZ::CCMASK_FCMP; 3078 if (!C.Chain) 3079 C.Opcode = SystemZISD::FCMP; 3080 else if (!IsSignaling) 3081 C.Opcode = SystemZISD::STRICT_FCMP; 3082 else 3083 C.Opcode = SystemZISD::STRICT_FCMPS; 3084 adjustForFNeg(C); 3085 } else { 3086 assert(!C.Chain); 3087 C.CCValid = SystemZ::CCMASK_ICMP; 3088 C.Opcode = SystemZISD::ICMP; 3089 // Choose the type of comparison. Equality and inequality tests can 3090 // use either signed or unsigned comparisons. The choice also doesn't 3091 // matter if both sign bits are known to be clear. In those cases we 3092 // want to give the main isel code the freedom to choose whichever 3093 // form fits best. 3094 if (C.CCMask == SystemZ::CCMASK_CMP_EQ || 3095 C.CCMask == SystemZ::CCMASK_CMP_NE || 3096 (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1))) 3097 C.ICmpType = SystemZICMP::Any; 3098 else if (C.CCMask & SystemZ::CCMASK_CMP_UO) 3099 C.ICmpType = SystemZICMP::UnsignedOnly; 3100 else 3101 C.ICmpType = SystemZICMP::SignedOnly; 3102 C.CCMask &= ~SystemZ::CCMASK_CMP_UO; 3103 adjustForRedundantAnd(DAG, DL, C); 3104 adjustZeroCmp(DAG, DL, C); 3105 adjustSubwordCmp(DAG, DL, C); 3106 adjustForSubtraction(DAG, DL, C); 3107 adjustForLTGFR(C); 3108 adjustICmpTruncate(DAG, DL, C); 3109 } 3110 3111 if (shouldSwapCmpOperands(C)) { 3112 std::swap(C.Op0, C.Op1); 3113 C.CCMask = SystemZ::reverseCCMask(C.CCMask); 3114 } 3115 3116 adjustForTestUnderMask(DAG, DL, C); 3117 adjustICmp128(DAG, DL, C); 3118 return C; 3119 } 3120 3121 // Emit the comparison instruction described by C. 3122 static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) { 3123 if (!C.Op1.getNode()) { 3124 SDNode *Node; 3125 switch (C.Op0.getOpcode()) { 3126 case ISD::INTRINSIC_W_CHAIN: 3127 Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode); 3128 return SDValue(Node, 0); 3129 case ISD::INTRINSIC_WO_CHAIN: 3130 Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode); 3131 return SDValue(Node, Node->getNumValues() - 1); 3132 default: 3133 llvm_unreachable("Invalid comparison operands"); 3134 } 3135 } 3136 if (C.Opcode == SystemZISD::ICMP) 3137 return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1, 3138 DAG.getTargetConstant(C.ICmpType, DL, MVT::i32)); 3139 if (C.Opcode == SystemZISD::TM) { 3140 bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) != 3141 bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1)); 3142 return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1, 3143 DAG.getTargetConstant(RegisterOnly, DL, MVT::i32)); 3144 } 3145 if (C.Opcode == SystemZISD::VICMPES) { 3146 SDVTList VTs = DAG.getVTList(C.Op0.getValueType(), MVT::i32); 3147 SDValue Val = DAG.getNode(C.Opcode, DL, VTs, C.Op0, C.Op1); 3148 return SDValue(Val.getNode(), 1); 3149 } 3150 if (C.Chain) { 3151 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); 3152 return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1); 3153 } 3154 return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1); 3155 } 3156 3157 // Implement a 32-bit *MUL_LOHI operation by extending both operands to 3158 // 64 bits. Extend is the extension type to use. Store the high part 3159 // in Hi and the low part in Lo. 3160 static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend, 3161 SDValue Op0, SDValue Op1, SDValue &Hi, 3162 SDValue &Lo) { 3163 Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0); 3164 Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1); 3165 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1); 3166 Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul, 3167 DAG.getConstant(32, DL, MVT::i64)); 3168 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi); 3169 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul); 3170 } 3171 3172 // Lower a binary operation that produces two VT results, one in each 3173 // half of a GR128 pair. Op0 and Op1 are the VT operands to the operation, 3174 // and Opcode performs the GR128 operation. Store the even register result 3175 // in Even and the odd register result in Odd. 3176 static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT, 3177 unsigned Opcode, SDValue Op0, SDValue Op1, 3178 SDValue &Even, SDValue &Odd) { 3179 SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1); 3180 bool Is32Bit = is32Bit(VT); 3181 Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result); 3182 Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result); 3183 } 3184 3185 // Return an i32 value that is 1 if the CC value produced by CCReg is 3186 // in the mask CCMask and 0 otherwise. CC is known to have a value 3187 // in CCValid, so other values can be ignored. 3188 static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg, 3189 unsigned CCValid, unsigned CCMask) { 3190 SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32), 3191 DAG.getConstant(0, DL, MVT::i32), 3192 DAG.getTargetConstant(CCValid, DL, MVT::i32), 3193 DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg}; 3194 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops); 3195 } 3196 3197 // Return the SystemISD vector comparison operation for CC, or 0 if it cannot 3198 // be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP 3199 // for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet) 3200 // floating-point comparisons, and CmpMode::SignalingFP for strict signaling 3201 // floating-point comparisons. 3202 enum class CmpMode { Int, FP, StrictFP, SignalingFP }; 3203 static unsigned getVectorComparison(ISD::CondCode CC, CmpMode Mode) { 3204 switch (CC) { 3205 case ISD::SETOEQ: 3206 case ISD::SETEQ: 3207 switch (Mode) { 3208 case CmpMode::Int: return SystemZISD::VICMPE; 3209 case CmpMode::FP: return SystemZISD::VFCMPE; 3210 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPE; 3211 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPES; 3212 } 3213 llvm_unreachable("Bad mode"); 3214 3215 case ISD::SETOGE: 3216 case ISD::SETGE: 3217 switch (Mode) { 3218 case CmpMode::Int: return 0; 3219 case CmpMode::FP: return SystemZISD::VFCMPHE; 3220 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPHE; 3221 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHES; 3222 } 3223 llvm_unreachable("Bad mode"); 3224 3225 case ISD::SETOGT: 3226 case ISD::SETGT: 3227 switch (Mode) { 3228 case CmpMode::Int: return SystemZISD::VICMPH; 3229 case CmpMode::FP: return SystemZISD::VFCMPH; 3230 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPH; 3231 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHS; 3232 } 3233 llvm_unreachable("Bad mode"); 3234 3235 case ISD::SETUGT: 3236 switch (Mode) { 3237 case CmpMode::Int: return SystemZISD::VICMPHL; 3238 case CmpMode::FP: return 0; 3239 case CmpMode::StrictFP: return 0; 3240 case CmpMode::SignalingFP: return 0; 3241 } 3242 llvm_unreachable("Bad mode"); 3243 3244 default: 3245 return 0; 3246 } 3247 } 3248 3249 // Return the SystemZISD vector comparison operation for CC or its inverse, 3250 // or 0 if neither can be done directly. Indicate in Invert whether the 3251 // result is for the inverse of CC. Mode is as above. 3252 static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, CmpMode Mode, 3253 bool &Invert) { 3254 if (unsigned Opcode = getVectorComparison(CC, Mode)) { 3255 Invert = false; 3256 return Opcode; 3257 } 3258 3259 CC = ISD::getSetCCInverse(CC, Mode == CmpMode::Int ? MVT::i32 : MVT::f32); 3260 if (unsigned Opcode = getVectorComparison(CC, Mode)) { 3261 Invert = true; 3262 return Opcode; 3263 } 3264 3265 return 0; 3266 } 3267 3268 // Return a v2f64 that contains the extended form of elements Start and Start+1 3269 // of v4f32 value Op. If Chain is nonnull, return the strict form. 3270 static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL, 3271 SDValue Op, SDValue Chain) { 3272 int Mask[] = { Start, -1, Start + 1, -1 }; 3273 Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask); 3274 if (Chain) { 3275 SDVTList VTs = DAG.getVTList(MVT::v2f64, MVT::Other); 3276 return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op); 3277 } 3278 return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op); 3279 } 3280 3281 // Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode, 3282 // producing a result of type VT. If Chain is nonnull, return the strict form. 3283 SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode, 3284 const SDLoc &DL, EVT VT, 3285 SDValue CmpOp0, 3286 SDValue CmpOp1, 3287 SDValue Chain) const { 3288 // There is no hardware support for v4f32 (unless we have the vector 3289 // enhancements facility 1), so extend the vector into two v2f64s 3290 // and compare those. 3291 if (CmpOp0.getValueType() == MVT::v4f32 && 3292 !Subtarget.hasVectorEnhancements1()) { 3293 SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain); 3294 SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain); 3295 SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain); 3296 SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain); 3297 if (Chain) { 3298 SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::Other); 3299 SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1); 3300 SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1); 3301 SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes); 3302 SDValue Chains[6] = { H0.getValue(1), L0.getValue(1), 3303 H1.getValue(1), L1.getValue(1), 3304 HRes.getValue(1), LRes.getValue(1) }; 3305 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); 3306 SDValue Ops[2] = { Res, NewChain }; 3307 return DAG.getMergeValues(Ops, DL); 3308 } 3309 SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1); 3310 SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1); 3311 return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes); 3312 } 3313 if (Chain) { 3314 SDVTList VTs = DAG.getVTList(VT, MVT::Other); 3315 return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1); 3316 } 3317 return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1); 3318 } 3319 3320 // Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing 3321 // an integer mask of type VT. If Chain is nonnull, we have a strict 3322 // floating-point comparison. If in addition IsSignaling is true, we have 3323 // a strict signaling floating-point comparison. 3324 SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG, 3325 const SDLoc &DL, EVT VT, 3326 ISD::CondCode CC, 3327 SDValue CmpOp0, 3328 SDValue CmpOp1, 3329 SDValue Chain, 3330 bool IsSignaling) const { 3331 bool IsFP = CmpOp0.getValueType().isFloatingPoint(); 3332 assert (!Chain || IsFP); 3333 assert (!IsSignaling || Chain); 3334 CmpMode Mode = IsSignaling ? CmpMode::SignalingFP : 3335 Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int; 3336 bool Invert = false; 3337 SDValue Cmp; 3338 switch (CC) { 3339 // Handle tests for order using (or (ogt y x) (oge x y)). 3340 case ISD::SETUO: 3341 Invert = true; 3342 [[fallthrough]]; 3343 case ISD::SETO: { 3344 assert(IsFP && "Unexpected integer comparison"); 3345 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode), 3346 DL, VT, CmpOp1, CmpOp0, Chain); 3347 SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode), 3348 DL, VT, CmpOp0, CmpOp1, Chain); 3349 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE); 3350 if (Chain) 3351 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, 3352 LT.getValue(1), GE.getValue(1)); 3353 break; 3354 } 3355 3356 // Handle <> tests using (or (ogt y x) (ogt x y)). 3357 case ISD::SETUEQ: 3358 Invert = true; 3359 [[fallthrough]]; 3360 case ISD::SETONE: { 3361 assert(IsFP && "Unexpected integer comparison"); 3362 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode), 3363 DL, VT, CmpOp1, CmpOp0, Chain); 3364 SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode), 3365 DL, VT, CmpOp0, CmpOp1, Chain); 3366 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT); 3367 if (Chain) 3368 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, 3369 LT.getValue(1), GT.getValue(1)); 3370 break; 3371 } 3372 3373 // Otherwise a single comparison is enough. It doesn't really 3374 // matter whether we try the inversion or the swap first, since 3375 // there are no cases where both work. 3376 default: 3377 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert)) 3378 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain); 3379 else { 3380 CC = ISD::getSetCCSwappedOperands(CC); 3381 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert)) 3382 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain); 3383 else 3384 llvm_unreachable("Unhandled comparison"); 3385 } 3386 if (Chain) 3387 Chain = Cmp.getValue(1); 3388 break; 3389 } 3390 if (Invert) { 3391 SDValue Mask = 3392 DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64)); 3393 Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask); 3394 } 3395 if (Chain && Chain.getNode() != Cmp.getNode()) { 3396 SDValue Ops[2] = { Cmp, Chain }; 3397 Cmp = DAG.getMergeValues(Ops, DL); 3398 } 3399 return Cmp; 3400 } 3401 3402 SDValue SystemZTargetLowering::lowerSETCC(SDValue Op, 3403 SelectionDAG &DAG) const { 3404 SDValue CmpOp0 = Op.getOperand(0); 3405 SDValue CmpOp1 = Op.getOperand(1); 3406 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 3407 SDLoc DL(Op); 3408 EVT VT = Op.getValueType(); 3409 if (VT.isVector()) 3410 return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1); 3411 3412 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); 3413 SDValue CCReg = emitCmp(DAG, DL, C); 3414 return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask); 3415 } 3416 3417 SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op, 3418 SelectionDAG &DAG, 3419 bool IsSignaling) const { 3420 SDValue Chain = Op.getOperand(0); 3421 SDValue CmpOp0 = Op.getOperand(1); 3422 SDValue CmpOp1 = Op.getOperand(2); 3423 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get(); 3424 SDLoc DL(Op); 3425 EVT VT = Op.getNode()->getValueType(0); 3426 if (VT.isVector()) { 3427 SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1, 3428 Chain, IsSignaling); 3429 return Res.getValue(Op.getResNo()); 3430 } 3431 3432 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling)); 3433 SDValue CCReg = emitCmp(DAG, DL, C); 3434 CCReg->setFlags(Op->getFlags()); 3435 SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask); 3436 SDValue Ops[2] = { Result, CCReg.getValue(1) }; 3437 return DAG.getMergeValues(Ops, DL); 3438 } 3439 3440 SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const { 3441 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 3442 SDValue CmpOp0 = Op.getOperand(2); 3443 SDValue CmpOp1 = Op.getOperand(3); 3444 SDValue Dest = Op.getOperand(4); 3445 SDLoc DL(Op); 3446 3447 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); 3448 SDValue CCReg = emitCmp(DAG, DL, C); 3449 return DAG.getNode( 3450 SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0), 3451 DAG.getTargetConstant(C.CCValid, DL, MVT::i32), 3452 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg); 3453 } 3454 3455 // Return true if Pos is CmpOp and Neg is the negative of CmpOp, 3456 // allowing Pos and Neg to be wider than CmpOp. 3457 static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) { 3458 return (Neg.getOpcode() == ISD::SUB && 3459 Neg.getOperand(0).getOpcode() == ISD::Constant && 3460 Neg.getConstantOperandVal(0) == 0 && Neg.getOperand(1) == Pos && 3461 (Pos == CmpOp || (Pos.getOpcode() == ISD::SIGN_EXTEND && 3462 Pos.getOperand(0) == CmpOp))); 3463 } 3464 3465 // Return the absolute or negative absolute of Op; IsNegative decides which. 3466 static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op, 3467 bool IsNegative) { 3468 Op = DAG.getNode(ISD::ABS, DL, Op.getValueType(), Op); 3469 if (IsNegative) 3470 Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(), 3471 DAG.getConstant(0, DL, Op.getValueType()), Op); 3472 return Op; 3473 } 3474 3475 SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op, 3476 SelectionDAG &DAG) const { 3477 SDValue CmpOp0 = Op.getOperand(0); 3478 SDValue CmpOp1 = Op.getOperand(1); 3479 SDValue TrueOp = Op.getOperand(2); 3480 SDValue FalseOp = Op.getOperand(3); 3481 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 3482 SDLoc DL(Op); 3483 3484 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); 3485 3486 // Check for absolute and negative-absolute selections, including those 3487 // where the comparison value is sign-extended (for LPGFR and LNGFR). 3488 // This check supplements the one in DAGCombiner. 3489 if (C.Opcode == SystemZISD::ICMP && C.CCMask != SystemZ::CCMASK_CMP_EQ && 3490 C.CCMask != SystemZ::CCMASK_CMP_NE && 3491 C.Op1.getOpcode() == ISD::Constant && 3492 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 && 3493 C.Op1->getAsZExtVal() == 0) { 3494 if (isAbsolute(C.Op0, TrueOp, FalseOp)) 3495 return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT); 3496 if (isAbsolute(C.Op0, FalseOp, TrueOp)) 3497 return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT); 3498 } 3499 3500 SDValue CCReg = emitCmp(DAG, DL, C); 3501 SDValue Ops[] = {TrueOp, FalseOp, 3502 DAG.getTargetConstant(C.CCValid, DL, MVT::i32), 3503 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg}; 3504 3505 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops); 3506 } 3507 3508 SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node, 3509 SelectionDAG &DAG) const { 3510 SDLoc DL(Node); 3511 const GlobalValue *GV = Node->getGlobal(); 3512 int64_t Offset = Node->getOffset(); 3513 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3514 CodeModel::Model CM = DAG.getTarget().getCodeModel(); 3515 3516 SDValue Result; 3517 if (Subtarget.isPC32DBLSymbol(GV, CM)) { 3518 if (isInt<32>(Offset)) { 3519 // Assign anchors at 1<<12 byte boundaries. 3520 uint64_t Anchor = Offset & ~uint64_t(0xfff); 3521 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor); 3522 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); 3523 3524 // The offset can be folded into the address if it is aligned to a 3525 // halfword. 3526 Offset -= Anchor; 3527 if (Offset != 0 && (Offset & 1) == 0) { 3528 SDValue Full = 3529 DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset); 3530 Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result); 3531 Offset = 0; 3532 } 3533 } else { 3534 // Conservatively load a constant offset greater than 32 bits into a 3535 // register below. 3536 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT); 3537 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); 3538 } 3539 } else if (Subtarget.isTargetELF()) { 3540 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT); 3541 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); 3542 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result, 3543 MachinePointerInfo::getGOT(DAG.getMachineFunction())); 3544 } else if (Subtarget.isTargetzOS()) { 3545 Result = getADAEntry(DAG, GV, DL, PtrVT); 3546 } else 3547 llvm_unreachable("Unexpected Subtarget"); 3548 3549 // If there was a non-zero offset that we didn't fold, create an explicit 3550 // addition for it. 3551 if (Offset != 0) 3552 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result, 3553 DAG.getConstant(Offset, DL, PtrVT)); 3554 3555 return Result; 3556 } 3557 3558 SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node, 3559 SelectionDAG &DAG, 3560 unsigned Opcode, 3561 SDValue GOTOffset) const { 3562 SDLoc DL(Node); 3563 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3564 SDValue Chain = DAG.getEntryNode(); 3565 SDValue Glue; 3566 3567 if (DAG.getMachineFunction().getFunction().getCallingConv() == 3568 CallingConv::GHC) 3569 report_fatal_error("In GHC calling convention TLS is not supported"); 3570 3571 // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12. 3572 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); 3573 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue); 3574 Glue = Chain.getValue(1); 3575 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue); 3576 Glue = Chain.getValue(1); 3577 3578 // The first call operand is the chain and the second is the TLS symbol. 3579 SmallVector<SDValue, 8> Ops; 3580 Ops.push_back(Chain); 3581 Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL, 3582 Node->getValueType(0), 3583 0, 0)); 3584 3585 // Add argument registers to the end of the list so that they are 3586 // known live into the call. 3587 Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT)); 3588 Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT)); 3589 3590 // Add a register mask operand representing the call-preserved registers. 3591 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 3592 const uint32_t *Mask = 3593 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C); 3594 assert(Mask && "Missing call preserved mask for calling convention"); 3595 Ops.push_back(DAG.getRegisterMask(Mask)); 3596 3597 // Glue the call to the argument copies. 3598 Ops.push_back(Glue); 3599 3600 // Emit the call. 3601 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 3602 Chain = DAG.getNode(Opcode, DL, NodeTys, Ops); 3603 Glue = Chain.getValue(1); 3604 3605 // Copy the return value from %r2. 3606 return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue); 3607 } 3608 3609 SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL, 3610 SelectionDAG &DAG) const { 3611 SDValue Chain = DAG.getEntryNode(); 3612 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3613 3614 // The high part of the thread pointer is in access register 0. 3615 SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32); 3616 TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi); 3617 3618 // The low part of the thread pointer is in access register 1. 3619 SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32); 3620 TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo); 3621 3622 // Merge them into a single 64-bit address. 3623 SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi, 3624 DAG.getConstant(32, DL, PtrVT)); 3625 return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo); 3626 } 3627 3628 SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, 3629 SelectionDAG &DAG) const { 3630 if (DAG.getTarget().useEmulatedTLS()) 3631 return LowerToTLSEmulatedModel(Node, DAG); 3632 SDLoc DL(Node); 3633 const GlobalValue *GV = Node->getGlobal(); 3634 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3635 TLSModel::Model model = DAG.getTarget().getTLSModel(GV); 3636 3637 if (DAG.getMachineFunction().getFunction().getCallingConv() == 3638 CallingConv::GHC) 3639 report_fatal_error("In GHC calling convention TLS is not supported"); 3640 3641 SDValue TP = lowerThreadPointer(DL, DAG); 3642 3643 // Get the offset of GA from the thread pointer, based on the TLS model. 3644 SDValue Offset; 3645 switch (model) { 3646 case TLSModel::GeneralDynamic: { 3647 // Load the GOT offset of the tls_index (module ID / per-symbol offset). 3648 SystemZConstantPoolValue *CPV = 3649 SystemZConstantPoolValue::Create(GV, SystemZCP::TLSGD); 3650 3651 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8)); 3652 Offset = DAG.getLoad( 3653 PtrVT, DL, DAG.getEntryNode(), Offset, 3654 MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); 3655 3656 // Call __tls_get_offset to retrieve the offset. 3657 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset); 3658 break; 3659 } 3660 3661 case TLSModel::LocalDynamic: { 3662 // Load the GOT offset of the module ID. 3663 SystemZConstantPoolValue *CPV = 3664 SystemZConstantPoolValue::Create(GV, SystemZCP::TLSLDM); 3665 3666 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8)); 3667 Offset = DAG.getLoad( 3668 PtrVT, DL, DAG.getEntryNode(), Offset, 3669 MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); 3670 3671 // Call __tls_get_offset to retrieve the module base offset. 3672 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset); 3673 3674 // Note: The SystemZLDCleanupPass will remove redundant computations 3675 // of the module base offset. Count total number of local-dynamic 3676 // accesses to trigger execution of that pass. 3677 SystemZMachineFunctionInfo* MFI = 3678 DAG.getMachineFunction().getInfo<SystemZMachineFunctionInfo>(); 3679 MFI->incNumLocalDynamicTLSAccesses(); 3680 3681 // Add the per-symbol offset. 3682 CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::DTPOFF); 3683 3684 SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8)); 3685 DTPOffset = DAG.getLoad( 3686 PtrVT, DL, DAG.getEntryNode(), DTPOffset, 3687 MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); 3688 3689 Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset); 3690 break; 3691 } 3692 3693 case TLSModel::InitialExec: { 3694 // Load the offset from the GOT. 3695 Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 3696 SystemZII::MO_INDNTPOFF); 3697 Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset); 3698 Offset = 3699 DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset, 3700 MachinePointerInfo::getGOT(DAG.getMachineFunction())); 3701 break; 3702 } 3703 3704 case TLSModel::LocalExec: { 3705 // Force the offset into the constant pool and load it from there. 3706 SystemZConstantPoolValue *CPV = 3707 SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF); 3708 3709 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8)); 3710 Offset = DAG.getLoad( 3711 PtrVT, DL, DAG.getEntryNode(), Offset, 3712 MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); 3713 break; 3714 } 3715 } 3716 3717 // Add the base and offset together. 3718 return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset); 3719 } 3720 3721 SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node, 3722 SelectionDAG &DAG) const { 3723 SDLoc DL(Node); 3724 const BlockAddress *BA = Node->getBlockAddress(); 3725 int64_t Offset = Node->getOffset(); 3726 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3727 3728 SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset); 3729 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); 3730 return Result; 3731 } 3732 3733 SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT, 3734 SelectionDAG &DAG) const { 3735 SDLoc DL(JT); 3736 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3737 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); 3738 3739 // Use LARL to load the address of the table. 3740 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); 3741 } 3742 3743 SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP, 3744 SelectionDAG &DAG) const { 3745 SDLoc DL(CP); 3746 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3747 3748 SDValue Result; 3749 if (CP->isMachineConstantPoolEntry()) 3750 Result = 3751 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign()); 3752 else 3753 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(), 3754 CP->getOffset()); 3755 3756 // Use LARL to load the address of the constant pool entry. 3757 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); 3758 } 3759 3760 SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op, 3761 SelectionDAG &DAG) const { 3762 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>(); 3763 MachineFunction &MF = DAG.getMachineFunction(); 3764 MachineFrameInfo &MFI = MF.getFrameInfo(); 3765 MFI.setFrameAddressIsTaken(true); 3766 3767 SDLoc DL(Op); 3768 unsigned Depth = Op.getConstantOperandVal(0); 3769 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3770 3771 // By definition, the frame address is the address of the back chain. (In 3772 // the case of packed stack without backchain, return the address where the 3773 // backchain would have been stored. This will either be an unused space or 3774 // contain a saved register). 3775 int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF); 3776 SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT); 3777 3778 if (Depth > 0) { 3779 // FIXME The frontend should detect this case. 3780 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain()) 3781 report_fatal_error("Unsupported stack frame traversal count"); 3782 3783 SDValue Offset = DAG.getConstant(TFL->getBackchainOffset(MF), DL, PtrVT); 3784 while (Depth--) { 3785 BackChain = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), BackChain, 3786 MachinePointerInfo()); 3787 BackChain = DAG.getNode(ISD::ADD, DL, PtrVT, BackChain, Offset); 3788 } 3789 } 3790 3791 return BackChain; 3792 } 3793 3794 SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op, 3795 SelectionDAG &DAG) const { 3796 MachineFunction &MF = DAG.getMachineFunction(); 3797 MachineFrameInfo &MFI = MF.getFrameInfo(); 3798 MFI.setReturnAddressIsTaken(true); 3799 3800 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 3801 return SDValue(); 3802 3803 SDLoc DL(Op); 3804 unsigned Depth = Op.getConstantOperandVal(0); 3805 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3806 3807 if (Depth > 0) { 3808 // FIXME The frontend should detect this case. 3809 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain()) 3810 report_fatal_error("Unsupported stack frame traversal count"); 3811 3812 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG); 3813 const auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>(); 3814 int Offset = TFL->getReturnAddressOffset(MF); 3815 SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, FrameAddr, 3816 DAG.getConstant(Offset, DL, PtrVT)); 3817 return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, 3818 MachinePointerInfo()); 3819 } 3820 3821 // Return R14D (Elf) / R7D (XPLINK), which has the return address. Mark it an 3822 // implicit live-in. 3823 SystemZCallingConventionRegisters *CCR = Subtarget.getSpecialRegisters(); 3824 Register LinkReg = MF.addLiveIn(CCR->getReturnFunctionAddressRegister(), 3825 &SystemZ::GR64BitRegClass); 3826 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT); 3827 } 3828 3829 SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op, 3830 SelectionDAG &DAG) const { 3831 SDLoc DL(Op); 3832 SDValue In = Op.getOperand(0); 3833 EVT InVT = In.getValueType(); 3834 EVT ResVT = Op.getValueType(); 3835 3836 // Convert loads directly. This is normally done by DAGCombiner, 3837 // but we need this case for bitcasts that are created during lowering 3838 // and which are then lowered themselves. 3839 if (auto *LoadN = dyn_cast<LoadSDNode>(In)) 3840 if (ISD::isNormalLoad(LoadN)) { 3841 SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(), 3842 LoadN->getBasePtr(), LoadN->getMemOperand()); 3843 // Update the chain uses. 3844 DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1)); 3845 return NewLoad; 3846 } 3847 3848 if (InVT == MVT::i32 && ResVT == MVT::f32) { 3849 SDValue In64; 3850 if (Subtarget.hasHighWord()) { 3851 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, 3852 MVT::i64); 3853 In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL, 3854 MVT::i64, SDValue(U64, 0), In); 3855 } else { 3856 In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In); 3857 In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64, 3858 DAG.getConstant(32, DL, MVT::i64)); 3859 } 3860 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64); 3861 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, 3862 DL, MVT::f32, Out64); 3863 } 3864 if (InVT == MVT::f32 && ResVT == MVT::i32) { 3865 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64); 3866 SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL, 3867 MVT::f64, SDValue(U64, 0), In); 3868 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64); 3869 if (Subtarget.hasHighWord()) 3870 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL, 3871 MVT::i32, Out64); 3872 SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64, 3873 DAG.getConstant(32, DL, MVT::i64)); 3874 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift); 3875 } 3876 llvm_unreachable("Unexpected bitcast combination"); 3877 } 3878 3879 SDValue SystemZTargetLowering::lowerVASTART(SDValue Op, 3880 SelectionDAG &DAG) const { 3881 3882 if (Subtarget.isTargetXPLINK64()) 3883 return lowerVASTART_XPLINK(Op, DAG); 3884 else 3885 return lowerVASTART_ELF(Op, DAG); 3886 } 3887 3888 SDValue SystemZTargetLowering::lowerVASTART_XPLINK(SDValue Op, 3889 SelectionDAG &DAG) const { 3890 MachineFunction &MF = DAG.getMachineFunction(); 3891 SystemZMachineFunctionInfo *FuncInfo = 3892 MF.getInfo<SystemZMachineFunctionInfo>(); 3893 3894 SDLoc DL(Op); 3895 3896 // vastart just stores the address of the VarArgsFrameIndex slot into the 3897 // memory location argument. 3898 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3899 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 3900 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 3901 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1), 3902 MachinePointerInfo(SV)); 3903 } 3904 3905 SDValue SystemZTargetLowering::lowerVASTART_ELF(SDValue Op, 3906 SelectionDAG &DAG) const { 3907 MachineFunction &MF = DAG.getMachineFunction(); 3908 SystemZMachineFunctionInfo *FuncInfo = 3909 MF.getInfo<SystemZMachineFunctionInfo>(); 3910 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3911 3912 SDValue Chain = Op.getOperand(0); 3913 SDValue Addr = Op.getOperand(1); 3914 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 3915 SDLoc DL(Op); 3916 3917 // The initial values of each field. 3918 const unsigned NumFields = 4; 3919 SDValue Fields[NumFields] = { 3920 DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT), 3921 DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT), 3922 DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT), 3923 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT) 3924 }; 3925 3926 // Store each field into its respective slot. 3927 SDValue MemOps[NumFields]; 3928 unsigned Offset = 0; 3929 for (unsigned I = 0; I < NumFields; ++I) { 3930 SDValue FieldAddr = Addr; 3931 if (Offset != 0) 3932 FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr, 3933 DAG.getIntPtrConstant(Offset, DL)); 3934 MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr, 3935 MachinePointerInfo(SV, Offset)); 3936 Offset += 8; 3937 } 3938 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps); 3939 } 3940 3941 SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op, 3942 SelectionDAG &DAG) const { 3943 SDValue Chain = Op.getOperand(0); 3944 SDValue DstPtr = Op.getOperand(1); 3945 SDValue SrcPtr = Op.getOperand(2); 3946 const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue(); 3947 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); 3948 SDLoc DL(Op); 3949 3950 uint32_t Sz = 3951 Subtarget.isTargetXPLINK64() ? getTargetMachine().getPointerSize(0) : 32; 3952 return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(Sz, DL), 3953 Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false, 3954 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(DstSV), 3955 MachinePointerInfo(SrcSV)); 3956 } 3957 3958 SDValue 3959 SystemZTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op, 3960 SelectionDAG &DAG) const { 3961 if (Subtarget.isTargetXPLINK64()) 3962 return lowerDYNAMIC_STACKALLOC_XPLINK(Op, DAG); 3963 else 3964 return lowerDYNAMIC_STACKALLOC_ELF(Op, DAG); 3965 } 3966 3967 SDValue 3968 SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op, 3969 SelectionDAG &DAG) const { 3970 const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); 3971 MachineFunction &MF = DAG.getMachineFunction(); 3972 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack"); 3973 SDValue Chain = Op.getOperand(0); 3974 SDValue Size = Op.getOperand(1); 3975 SDValue Align = Op.getOperand(2); 3976 SDLoc DL(Op); 3977 3978 // If user has set the no alignment function attribute, ignore 3979 // alloca alignments. 3980 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0); 3981 3982 uint64_t StackAlign = TFI->getStackAlignment(); 3983 uint64_t RequiredAlign = std::max(AlignVal, StackAlign); 3984 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign; 3985 3986 SDValue NeededSpace = Size; 3987 3988 // Add extra space for alignment if needed. 3989 EVT PtrVT = getPointerTy(MF.getDataLayout()); 3990 if (ExtraAlignSpace) 3991 NeededSpace = DAG.getNode(ISD::ADD, DL, PtrVT, NeededSpace, 3992 DAG.getConstant(ExtraAlignSpace, DL, PtrVT)); 3993 3994 bool IsSigned = false; 3995 bool DoesNotReturn = false; 3996 bool IsReturnValueUsed = false; 3997 EVT VT = Op.getValueType(); 3998 SDValue AllocaCall = 3999 makeExternalCall(Chain, DAG, "@@ALCAXP", VT, ArrayRef(NeededSpace), 4000 CallingConv::C, IsSigned, DL, DoesNotReturn, 4001 IsReturnValueUsed) 4002 .first; 4003 4004 // Perform a CopyFromReg from %GPR4 (stack pointer register). Chain and Glue 4005 // to end of call in order to ensure it isn't broken up from the call 4006 // sequence. 4007 auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>(); 4008 Register SPReg = Regs.getStackPointerRegister(); 4009 Chain = AllocaCall.getValue(1); 4010 SDValue Glue = AllocaCall.getValue(2); 4011 SDValue NewSPRegNode = DAG.getCopyFromReg(Chain, DL, SPReg, PtrVT, Glue); 4012 Chain = NewSPRegNode.getValue(1); 4013 4014 MVT PtrMVT = getPointerMemTy(MF.getDataLayout()); 4015 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, PtrMVT); 4016 SDValue Result = DAG.getNode(ISD::ADD, DL, PtrMVT, NewSPRegNode, ArgAdjust); 4017 4018 // Dynamically realign if needed. 4019 if (ExtraAlignSpace) { 4020 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result, 4021 DAG.getConstant(ExtraAlignSpace, DL, PtrVT)); 4022 Result = DAG.getNode(ISD::AND, DL, PtrVT, Result, 4023 DAG.getConstant(~(RequiredAlign - 1), DL, PtrVT)); 4024 } 4025 4026 SDValue Ops[2] = {Result, Chain}; 4027 return DAG.getMergeValues(Ops, DL); 4028 } 4029 4030 SDValue 4031 SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_ELF(SDValue Op, 4032 SelectionDAG &DAG) const { 4033 const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); 4034 MachineFunction &MF = DAG.getMachineFunction(); 4035 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack"); 4036 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain(); 4037 4038 SDValue Chain = Op.getOperand(0); 4039 SDValue Size = Op.getOperand(1); 4040 SDValue Align = Op.getOperand(2); 4041 SDLoc DL(Op); 4042 4043 // If user has set the no alignment function attribute, ignore 4044 // alloca alignments. 4045 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0); 4046 4047 uint64_t StackAlign = TFI->getStackAlignment(); 4048 uint64_t RequiredAlign = std::max(AlignVal, StackAlign); 4049 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign; 4050 4051 Register SPReg = getStackPointerRegisterToSaveRestore(); 4052 SDValue NeededSpace = Size; 4053 4054 // Get a reference to the stack pointer. 4055 SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64); 4056 4057 // If we need a backchain, save it now. 4058 SDValue Backchain; 4059 if (StoreBackchain) 4060 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG), 4061 MachinePointerInfo()); 4062 4063 // Add extra space for alignment if needed. 4064 if (ExtraAlignSpace) 4065 NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace, 4066 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64)); 4067 4068 // Get the new stack pointer value. 4069 SDValue NewSP; 4070 if (hasInlineStackProbe(MF)) { 4071 NewSP = DAG.getNode(SystemZISD::PROBED_ALLOCA, DL, 4072 DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace); 4073 Chain = NewSP.getValue(1); 4074 } 4075 else { 4076 NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace); 4077 // Copy the new stack pointer back. 4078 Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP); 4079 } 4080 4081 // The allocated data lives above the 160 bytes allocated for the standard 4082 // frame, plus any outgoing stack arguments. We don't know how much that 4083 // amounts to yet, so emit a special ADJDYNALLOC placeholder. 4084 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64); 4085 SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust); 4086 4087 // Dynamically realign if needed. 4088 if (RequiredAlign > StackAlign) { 4089 Result = 4090 DAG.getNode(ISD::ADD, DL, MVT::i64, Result, 4091 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64)); 4092 Result = 4093 DAG.getNode(ISD::AND, DL, MVT::i64, Result, 4094 DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64)); 4095 } 4096 4097 if (StoreBackchain) 4098 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG), 4099 MachinePointerInfo()); 4100 4101 SDValue Ops[2] = { Result, Chain }; 4102 return DAG.getMergeValues(Ops, DL); 4103 } 4104 4105 SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET( 4106 SDValue Op, SelectionDAG &DAG) const { 4107 SDLoc DL(Op); 4108 4109 return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64); 4110 } 4111 4112 SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op, 4113 SelectionDAG &DAG) const { 4114 EVT VT = Op.getValueType(); 4115 SDLoc DL(Op); 4116 SDValue Ops[2]; 4117 if (is32Bit(VT)) 4118 // Just do a normal 64-bit multiplication and extract the results. 4119 // We define this so that it can be used for constant division. 4120 lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0), 4121 Op.getOperand(1), Ops[1], Ops[0]); 4122 else if (Subtarget.hasMiscellaneousExtensions2()) 4123 // SystemZISD::SMUL_LOHI returns the low result in the odd register and 4124 // the high result in the even register. ISD::SMUL_LOHI is defined to 4125 // return the low half first, so the results are in reverse order. 4126 lowerGR128Binary(DAG, DL, VT, SystemZISD::SMUL_LOHI, 4127 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); 4128 else { 4129 // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI: 4130 // 4131 // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64) 4132 // 4133 // but using the fact that the upper halves are either all zeros 4134 // or all ones: 4135 // 4136 // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64) 4137 // 4138 // and grouping the right terms together since they are quicker than the 4139 // multiplication: 4140 // 4141 // (ll * rl) - (((lh & rl) + (ll & rh)) << 64) 4142 SDValue C63 = DAG.getConstant(63, DL, MVT::i64); 4143 SDValue LL = Op.getOperand(0); 4144 SDValue RL = Op.getOperand(1); 4145 SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63); 4146 SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63); 4147 // SystemZISD::UMUL_LOHI returns the low result in the odd register and 4148 // the high result in the even register. ISD::SMUL_LOHI is defined to 4149 // return the low half first, so the results are in reverse order. 4150 lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI, 4151 LL, RL, Ops[1], Ops[0]); 4152 SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH); 4153 SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL); 4154 SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL); 4155 Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum); 4156 } 4157 return DAG.getMergeValues(Ops, DL); 4158 } 4159 4160 SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op, 4161 SelectionDAG &DAG) const { 4162 EVT VT = Op.getValueType(); 4163 SDLoc DL(Op); 4164 SDValue Ops[2]; 4165 if (is32Bit(VT)) 4166 // Just do a normal 64-bit multiplication and extract the results. 4167 // We define this so that it can be used for constant division. 4168 lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0), 4169 Op.getOperand(1), Ops[1], Ops[0]); 4170 else 4171 // SystemZISD::UMUL_LOHI returns the low result in the odd register and 4172 // the high result in the even register. ISD::UMUL_LOHI is defined to 4173 // return the low half first, so the results are in reverse order. 4174 lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI, 4175 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); 4176 return DAG.getMergeValues(Ops, DL); 4177 } 4178 4179 SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op, 4180 SelectionDAG &DAG) const { 4181 SDValue Op0 = Op.getOperand(0); 4182 SDValue Op1 = Op.getOperand(1); 4183 EVT VT = Op.getValueType(); 4184 SDLoc DL(Op); 4185 4186 // We use DSGF for 32-bit division. This means the first operand must 4187 // always be 64-bit, and the second operand should be 32-bit whenever 4188 // that is possible, to improve performance. 4189 if (is32Bit(VT)) 4190 Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0); 4191 else if (DAG.ComputeNumSignBits(Op1) > 32) 4192 Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1); 4193 4194 // DSG(F) returns the remainder in the even register and the 4195 // quotient in the odd register. 4196 SDValue Ops[2]; 4197 lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]); 4198 return DAG.getMergeValues(Ops, DL); 4199 } 4200 4201 SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op, 4202 SelectionDAG &DAG) const { 4203 EVT VT = Op.getValueType(); 4204 SDLoc DL(Op); 4205 4206 // DL(G) returns the remainder in the even register and the 4207 // quotient in the odd register. 4208 SDValue Ops[2]; 4209 lowerGR128Binary(DAG, DL, VT, SystemZISD::UDIVREM, 4210 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); 4211 return DAG.getMergeValues(Ops, DL); 4212 } 4213 4214 SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const { 4215 assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation"); 4216 4217 // Get the known-zero masks for each operand. 4218 SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)}; 4219 KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]), 4220 DAG.computeKnownBits(Ops[1])}; 4221 4222 // See if the upper 32 bits of one operand and the lower 32 bits of the 4223 // other are known zero. They are the low and high operands respectively. 4224 uint64_t Masks[] = { Known[0].Zero.getZExtValue(), 4225 Known[1].Zero.getZExtValue() }; 4226 unsigned High, Low; 4227 if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff) 4228 High = 1, Low = 0; 4229 else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff) 4230 High = 0, Low = 1; 4231 else 4232 return Op; 4233 4234 SDValue LowOp = Ops[Low]; 4235 SDValue HighOp = Ops[High]; 4236 4237 // If the high part is a constant, we're better off using IILH. 4238 if (HighOp.getOpcode() == ISD::Constant) 4239 return Op; 4240 4241 // If the low part is a constant that is outside the range of LHI, 4242 // then we're better off using IILF. 4243 if (LowOp.getOpcode() == ISD::Constant) { 4244 int64_t Value = int32_t(LowOp->getAsZExtVal()); 4245 if (!isInt<16>(Value)) 4246 return Op; 4247 } 4248 4249 // Check whether the high part is an AND that doesn't change the 4250 // high 32 bits and just masks out low bits. We can skip it if so. 4251 if (HighOp.getOpcode() == ISD::AND && 4252 HighOp.getOperand(1).getOpcode() == ISD::Constant) { 4253 SDValue HighOp0 = HighOp.getOperand(0); 4254 uint64_t Mask = HighOp.getConstantOperandVal(1); 4255 if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff)))) 4256 HighOp = HighOp0; 4257 } 4258 4259 // Take advantage of the fact that all GR32 operations only change the 4260 // low 32 bits by truncating Low to an i32 and inserting it directly 4261 // using a subreg. The interesting cases are those where the truncation 4262 // can be folded. 4263 SDLoc DL(Op); 4264 SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp); 4265 return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL, 4266 MVT::i64, HighOp, Low32); 4267 } 4268 4269 // Lower SADDO/SSUBO/UADDO/USUBO nodes. 4270 SDValue SystemZTargetLowering::lowerXALUO(SDValue Op, 4271 SelectionDAG &DAG) const { 4272 SDNode *N = Op.getNode(); 4273 SDValue LHS = N->getOperand(0); 4274 SDValue RHS = N->getOperand(1); 4275 SDLoc DL(N); 4276 4277 if (N->getValueType(0) == MVT::i128) { 4278 unsigned BaseOp = 0; 4279 unsigned FlagOp = 0; 4280 bool IsBorrow = false; 4281 switch (Op.getOpcode()) { 4282 default: llvm_unreachable("Unknown instruction!"); 4283 case ISD::UADDO: 4284 BaseOp = ISD::ADD; 4285 FlagOp = SystemZISD::VACC; 4286 break; 4287 case ISD::USUBO: 4288 BaseOp = ISD::SUB; 4289 FlagOp = SystemZISD::VSCBI; 4290 IsBorrow = true; 4291 break; 4292 } 4293 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS); 4294 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS); 4295 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag, 4296 DAG.getValueType(MVT::i1)); 4297 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1)); 4298 if (IsBorrow) 4299 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(), 4300 Flag, DAG.getConstant(1, DL, Flag.getValueType())); 4301 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag); 4302 } 4303 4304 unsigned BaseOp = 0; 4305 unsigned CCValid = 0; 4306 unsigned CCMask = 0; 4307 4308 switch (Op.getOpcode()) { 4309 default: llvm_unreachable("Unknown instruction!"); 4310 case ISD::SADDO: 4311 BaseOp = SystemZISD::SADDO; 4312 CCValid = SystemZ::CCMASK_ARITH; 4313 CCMask = SystemZ::CCMASK_ARITH_OVERFLOW; 4314 break; 4315 case ISD::SSUBO: 4316 BaseOp = SystemZISD::SSUBO; 4317 CCValid = SystemZ::CCMASK_ARITH; 4318 CCMask = SystemZ::CCMASK_ARITH_OVERFLOW; 4319 break; 4320 case ISD::UADDO: 4321 BaseOp = SystemZISD::UADDO; 4322 CCValid = SystemZ::CCMASK_LOGICAL; 4323 CCMask = SystemZ::CCMASK_LOGICAL_CARRY; 4324 break; 4325 case ISD::USUBO: 4326 BaseOp = SystemZISD::USUBO; 4327 CCValid = SystemZ::CCMASK_LOGICAL; 4328 CCMask = SystemZ::CCMASK_LOGICAL_BORROW; 4329 break; 4330 } 4331 4332 SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32); 4333 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS); 4334 4335 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask); 4336 if (N->getValueType(1) == MVT::i1) 4337 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC); 4338 4339 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC); 4340 } 4341 4342 static bool isAddCarryChain(SDValue Carry) { 4343 while (Carry.getOpcode() == ISD::UADDO_CARRY) 4344 Carry = Carry.getOperand(2); 4345 return Carry.getOpcode() == ISD::UADDO; 4346 } 4347 4348 static bool isSubBorrowChain(SDValue Carry) { 4349 while (Carry.getOpcode() == ISD::USUBO_CARRY) 4350 Carry = Carry.getOperand(2); 4351 return Carry.getOpcode() == ISD::USUBO; 4352 } 4353 4354 // Lower UADDO_CARRY/USUBO_CARRY nodes. 4355 SDValue SystemZTargetLowering::lowerUADDSUBO_CARRY(SDValue Op, 4356 SelectionDAG &DAG) const { 4357 4358 SDNode *N = Op.getNode(); 4359 MVT VT = N->getSimpleValueType(0); 4360 4361 // Let legalize expand this if it isn't a legal type yet. 4362 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) 4363 return SDValue(); 4364 4365 SDValue LHS = N->getOperand(0); 4366 SDValue RHS = N->getOperand(1); 4367 SDValue Carry = Op.getOperand(2); 4368 SDLoc DL(N); 4369 4370 if (VT == MVT::i128) { 4371 unsigned BaseOp = 0; 4372 unsigned FlagOp = 0; 4373 bool IsBorrow = false; 4374 switch (Op.getOpcode()) { 4375 default: llvm_unreachable("Unknown instruction!"); 4376 case ISD::UADDO_CARRY: 4377 BaseOp = SystemZISD::VAC; 4378 FlagOp = SystemZISD::VACCC; 4379 break; 4380 case ISD::USUBO_CARRY: 4381 BaseOp = SystemZISD::VSBI; 4382 FlagOp = SystemZISD::VSBCBI; 4383 IsBorrow = true; 4384 break; 4385 } 4386 if (IsBorrow) 4387 Carry = DAG.getNode(ISD::XOR, DL, Carry.getValueType(), 4388 Carry, DAG.getConstant(1, DL, Carry.getValueType())); 4389 Carry = DAG.getZExtOrTrunc(Carry, DL, MVT::i128); 4390 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS, Carry); 4391 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS, Carry); 4392 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag, 4393 DAG.getValueType(MVT::i1)); 4394 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1)); 4395 if (IsBorrow) 4396 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(), 4397 Flag, DAG.getConstant(1, DL, Flag.getValueType())); 4398 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag); 4399 } 4400 4401 unsigned BaseOp = 0; 4402 unsigned CCValid = 0; 4403 unsigned CCMask = 0; 4404 4405 switch (Op.getOpcode()) { 4406 default: llvm_unreachable("Unknown instruction!"); 4407 case ISD::UADDO_CARRY: 4408 if (!isAddCarryChain(Carry)) 4409 return SDValue(); 4410 4411 BaseOp = SystemZISD::ADDCARRY; 4412 CCValid = SystemZ::CCMASK_LOGICAL; 4413 CCMask = SystemZ::CCMASK_LOGICAL_CARRY; 4414 break; 4415 case ISD::USUBO_CARRY: 4416 if (!isSubBorrowChain(Carry)) 4417 return SDValue(); 4418 4419 BaseOp = SystemZISD::SUBCARRY; 4420 CCValid = SystemZ::CCMASK_LOGICAL; 4421 CCMask = SystemZ::CCMASK_LOGICAL_BORROW; 4422 break; 4423 } 4424 4425 // Set the condition code from the carry flag. 4426 Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry, 4427 DAG.getConstant(CCValid, DL, MVT::i32), 4428 DAG.getConstant(CCMask, DL, MVT::i32)); 4429 4430 SDVTList VTs = DAG.getVTList(VT, MVT::i32); 4431 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry); 4432 4433 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask); 4434 if (N->getValueType(1) == MVT::i1) 4435 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC); 4436 4437 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC); 4438 } 4439 4440 SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op, 4441 SelectionDAG &DAG) const { 4442 EVT VT = Op.getValueType(); 4443 SDLoc DL(Op); 4444 Op = Op.getOperand(0); 4445 4446 if (VT.getScalarSizeInBits() == 128) { 4447 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op); 4448 Op = DAG.getNode(ISD::CTPOP, DL, MVT::v2i64, Op); 4449 SDValue Tmp = DAG.getSplatBuildVector(MVT::v2i64, DL, 4450 DAG.getConstant(0, DL, MVT::i64)); 4451 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp); 4452 return Op; 4453 } 4454 4455 // Handle vector types via VPOPCT. 4456 if (VT.isVector()) { 4457 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op); 4458 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op); 4459 switch (VT.getScalarSizeInBits()) { 4460 case 8: 4461 break; 4462 case 16: { 4463 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op); 4464 SDValue Shift = DAG.getConstant(8, DL, MVT::i32); 4465 SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift); 4466 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp); 4467 Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift); 4468 break; 4469 } 4470 case 32: { 4471 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL, 4472 DAG.getConstant(0, DL, MVT::i32)); 4473 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp); 4474 break; 4475 } 4476 case 64: { 4477 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL, 4478 DAG.getConstant(0, DL, MVT::i32)); 4479 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp); 4480 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp); 4481 break; 4482 } 4483 default: 4484 llvm_unreachable("Unexpected type"); 4485 } 4486 return Op; 4487 } 4488 4489 // Get the known-zero mask for the operand. 4490 KnownBits Known = DAG.computeKnownBits(Op); 4491 unsigned NumSignificantBits = Known.getMaxValue().getActiveBits(); 4492 if (NumSignificantBits == 0) 4493 return DAG.getConstant(0, DL, VT); 4494 4495 // Skip known-zero high parts of the operand. 4496 int64_t OrigBitSize = VT.getSizeInBits(); 4497 int64_t BitSize = llvm::bit_ceil(NumSignificantBits); 4498 BitSize = std::min(BitSize, OrigBitSize); 4499 4500 // The POPCNT instruction counts the number of bits in each byte. 4501 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op); 4502 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op); 4503 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op); 4504 4505 // Add up per-byte counts in a binary tree. All bits of Op at 4506 // position larger than BitSize remain zero throughout. 4507 for (int64_t I = BitSize / 2; I >= 8; I = I / 2) { 4508 SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT)); 4509 if (BitSize != OrigBitSize) 4510 Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp, 4511 DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT)); 4512 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp); 4513 } 4514 4515 // Extract overall result from high byte. 4516 if (BitSize > 8) 4517 Op = DAG.getNode(ISD::SRL, DL, VT, Op, 4518 DAG.getConstant(BitSize - 8, DL, VT)); 4519 4520 return Op; 4521 } 4522 4523 SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op, 4524 SelectionDAG &DAG) const { 4525 SDLoc DL(Op); 4526 AtomicOrdering FenceOrdering = 4527 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1)); 4528 SyncScope::ID FenceSSID = 4529 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2)); 4530 4531 // The only fence that needs an instruction is a sequentially-consistent 4532 // cross-thread fence. 4533 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent && 4534 FenceSSID == SyncScope::System) { 4535 return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other, 4536 Op.getOperand(0)), 4537 0); 4538 } 4539 4540 // MEMBARRIER is a compiler barrier; it codegens to a no-op. 4541 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0)); 4542 } 4543 4544 SDValue SystemZTargetLowering::lowerATOMIC_LDST_I128(SDValue Op, 4545 SelectionDAG &DAG) const { 4546 auto *Node = cast<AtomicSDNode>(Op.getNode()); 4547 assert( 4548 (Node->getMemoryVT() == MVT::i128 || Node->getMemoryVT() == MVT::f128) && 4549 "Only custom lowering i128 or f128."); 4550 // Use same code to handle both legal and non-legal i128 types. 4551 SmallVector<SDValue, 2> Results; 4552 LowerOperationWrapper(Node, Results, DAG); 4553 return DAG.getMergeValues(Results, SDLoc(Op)); 4554 } 4555 4556 // Prepare for a Compare And Swap for a subword operation. This needs to be 4557 // done in memory with 4 bytes at natural alignment. 4558 static void getCSAddressAndShifts(SDValue Addr, SelectionDAG &DAG, SDLoc DL, 4559 SDValue &AlignedAddr, SDValue &BitShift, 4560 SDValue &NegBitShift) { 4561 EVT PtrVT = Addr.getValueType(); 4562 EVT WideVT = MVT::i32; 4563 4564 // Get the address of the containing word. 4565 AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr, 4566 DAG.getConstant(-4, DL, PtrVT)); 4567 4568 // Get the number of bits that the word must be rotated left in order 4569 // to bring the field to the top bits of a GR32. 4570 BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr, 4571 DAG.getConstant(3, DL, PtrVT)); 4572 BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift); 4573 4574 // Get the complementing shift amount, for rotating a field in the top 4575 // bits back to its proper position. 4576 NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT, 4577 DAG.getConstant(0, DL, WideVT), BitShift); 4578 4579 } 4580 4581 // Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first 4582 // two into the fullword ATOMIC_LOADW_* operation given by Opcode. 4583 SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op, 4584 SelectionDAG &DAG, 4585 unsigned Opcode) const { 4586 auto *Node = cast<AtomicSDNode>(Op.getNode()); 4587 4588 // 32-bit operations need no special handling. 4589 EVT NarrowVT = Node->getMemoryVT(); 4590 EVT WideVT = MVT::i32; 4591 if (NarrowVT == WideVT) 4592 return Op; 4593 4594 int64_t BitSize = NarrowVT.getSizeInBits(); 4595 SDValue ChainIn = Node->getChain(); 4596 SDValue Addr = Node->getBasePtr(); 4597 SDValue Src2 = Node->getVal(); 4598 MachineMemOperand *MMO = Node->getMemOperand(); 4599 SDLoc DL(Node); 4600 4601 // Convert atomic subtracts of constants into additions. 4602 if (Opcode == SystemZISD::ATOMIC_LOADW_SUB) 4603 if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) { 4604 Opcode = SystemZISD::ATOMIC_LOADW_ADD; 4605 Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType()); 4606 } 4607 4608 SDValue AlignedAddr, BitShift, NegBitShift; 4609 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift); 4610 4611 // Extend the source operand to 32 bits and prepare it for the inner loop. 4612 // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other 4613 // operations require the source to be shifted in advance. (This shift 4614 // can be folded if the source is constant.) For AND and NAND, the lower 4615 // bits must be set, while for other opcodes they should be left clear. 4616 if (Opcode != SystemZISD::ATOMIC_SWAPW) 4617 Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2, 4618 DAG.getConstant(32 - BitSize, DL, WideVT)); 4619 if (Opcode == SystemZISD::ATOMIC_LOADW_AND || 4620 Opcode == SystemZISD::ATOMIC_LOADW_NAND) 4621 Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2, 4622 DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT)); 4623 4624 // Construct the ATOMIC_LOADW_* node. 4625 SDVTList VTList = DAG.getVTList(WideVT, MVT::Other); 4626 SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift, 4627 DAG.getConstant(BitSize, DL, WideVT) }; 4628 SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops, 4629 NarrowVT, MMO); 4630 4631 // Rotate the result of the final CS so that the field is in the lower 4632 // bits of a GR32, then truncate it. 4633 SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift, 4634 DAG.getConstant(BitSize, DL, WideVT)); 4635 SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift); 4636 4637 SDValue RetOps[2] = { Result, AtomicOp.getValue(1) }; 4638 return DAG.getMergeValues(RetOps, DL); 4639 } 4640 4641 // Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations into 4642 // ATOMIC_LOADW_SUBs and convert 32- and 64-bit operations into additions. 4643 SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op, 4644 SelectionDAG &DAG) const { 4645 auto *Node = cast<AtomicSDNode>(Op.getNode()); 4646 EVT MemVT = Node->getMemoryVT(); 4647 if (MemVT == MVT::i32 || MemVT == MVT::i64) { 4648 // A full-width operation: negate and use LAA(G). 4649 assert(Op.getValueType() == MemVT && "Mismatched VTs"); 4650 assert(Subtarget.hasInterlockedAccess1() && 4651 "Should have been expanded by AtomicExpand pass."); 4652 SDValue Src2 = Node->getVal(); 4653 SDLoc DL(Src2); 4654 SDValue NegSrc2 = 4655 DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT), Src2); 4656 return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT, 4657 Node->getChain(), Node->getBasePtr(), NegSrc2, 4658 Node->getMemOperand()); 4659 } 4660 4661 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB); 4662 } 4663 4664 // Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node. 4665 SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op, 4666 SelectionDAG &DAG) const { 4667 auto *Node = cast<AtomicSDNode>(Op.getNode()); 4668 SDValue ChainIn = Node->getOperand(0); 4669 SDValue Addr = Node->getOperand(1); 4670 SDValue CmpVal = Node->getOperand(2); 4671 SDValue SwapVal = Node->getOperand(3); 4672 MachineMemOperand *MMO = Node->getMemOperand(); 4673 SDLoc DL(Node); 4674 4675 if (Node->getMemoryVT() == MVT::i128) { 4676 // Use same code to handle both legal and non-legal i128 types. 4677 SmallVector<SDValue, 3> Results; 4678 LowerOperationWrapper(Node, Results, DAG); 4679 return DAG.getMergeValues(Results, DL); 4680 } 4681 4682 // We have native support for 32-bit and 64-bit compare and swap, but we 4683 // still need to expand extracting the "success" result from the CC. 4684 EVT NarrowVT = Node->getMemoryVT(); 4685 EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32; 4686 if (NarrowVT == WideVT) { 4687 SDVTList Tys = DAG.getVTList(WideVT, MVT::i32, MVT::Other); 4688 SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal }; 4689 SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP, 4690 DL, Tys, Ops, NarrowVT, MMO); 4691 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1), 4692 SystemZ::CCMASK_CS, SystemZ::CCMASK_CS_EQ); 4693 4694 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0)); 4695 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success); 4696 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2)); 4697 return SDValue(); 4698 } 4699 4700 // Convert 8-bit and 16-bit compare and swap to a loop, implemented 4701 // via a fullword ATOMIC_CMP_SWAPW operation. 4702 int64_t BitSize = NarrowVT.getSizeInBits(); 4703 4704 SDValue AlignedAddr, BitShift, NegBitShift; 4705 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift); 4706 4707 // Construct the ATOMIC_CMP_SWAPW node. 4708 SDVTList VTList = DAG.getVTList(WideVT, MVT::i32, MVT::Other); 4709 SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift, 4710 NegBitShift, DAG.getConstant(BitSize, DL, WideVT) }; 4711 SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL, 4712 VTList, Ops, NarrowVT, MMO); 4713 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1), 4714 SystemZ::CCMASK_ICMP, SystemZ::CCMASK_CMP_EQ); 4715 4716 // emitAtomicCmpSwapW() will zero extend the result (original value). 4717 SDValue OrigVal = DAG.getNode(ISD::AssertZext, DL, WideVT, AtomicOp.getValue(0), 4718 DAG.getValueType(NarrowVT)); 4719 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), OrigVal); 4720 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success); 4721 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2)); 4722 return SDValue(); 4723 } 4724 4725 MachineMemOperand::Flags 4726 SystemZTargetLowering::getTargetMMOFlags(const Instruction &I) const { 4727 // Because of how we convert atomic_load and atomic_store to normal loads and 4728 // stores in the DAG, we need to ensure that the MMOs are marked volatile 4729 // since DAGCombine hasn't been updated to account for atomic, but non 4730 // volatile loads. (See D57601) 4731 if (auto *SI = dyn_cast<StoreInst>(&I)) 4732 if (SI->isAtomic()) 4733 return MachineMemOperand::MOVolatile; 4734 if (auto *LI = dyn_cast<LoadInst>(&I)) 4735 if (LI->isAtomic()) 4736 return MachineMemOperand::MOVolatile; 4737 if (auto *AI = dyn_cast<AtomicRMWInst>(&I)) 4738 if (AI->isAtomic()) 4739 return MachineMemOperand::MOVolatile; 4740 if (auto *AI = dyn_cast<AtomicCmpXchgInst>(&I)) 4741 if (AI->isAtomic()) 4742 return MachineMemOperand::MOVolatile; 4743 return MachineMemOperand::MONone; 4744 } 4745 4746 SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op, 4747 SelectionDAG &DAG) const { 4748 MachineFunction &MF = DAG.getMachineFunction(); 4749 auto *Regs = Subtarget.getSpecialRegisters(); 4750 if (MF.getFunction().getCallingConv() == CallingConv::GHC) 4751 report_fatal_error("Variable-sized stack allocations are not supported " 4752 "in GHC calling convention"); 4753 return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op), 4754 Regs->getStackPointerRegister(), Op.getValueType()); 4755 } 4756 4757 SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op, 4758 SelectionDAG &DAG) const { 4759 MachineFunction &MF = DAG.getMachineFunction(); 4760 auto *Regs = Subtarget.getSpecialRegisters(); 4761 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain(); 4762 4763 if (MF.getFunction().getCallingConv() == CallingConv::GHC) 4764 report_fatal_error("Variable-sized stack allocations are not supported " 4765 "in GHC calling convention"); 4766 4767 SDValue Chain = Op.getOperand(0); 4768 SDValue NewSP = Op.getOperand(1); 4769 SDValue Backchain; 4770 SDLoc DL(Op); 4771 4772 if (StoreBackchain) { 4773 SDValue OldSP = DAG.getCopyFromReg( 4774 Chain, DL, Regs->getStackPointerRegister(), MVT::i64); 4775 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG), 4776 MachinePointerInfo()); 4777 } 4778 4779 Chain = DAG.getCopyToReg(Chain, DL, Regs->getStackPointerRegister(), NewSP); 4780 4781 if (StoreBackchain) 4782 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG), 4783 MachinePointerInfo()); 4784 4785 return Chain; 4786 } 4787 4788 SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op, 4789 SelectionDAG &DAG) const { 4790 bool IsData = Op.getConstantOperandVal(4); 4791 if (!IsData) 4792 // Just preserve the chain. 4793 return Op.getOperand(0); 4794 4795 SDLoc DL(Op); 4796 bool IsWrite = Op.getConstantOperandVal(2); 4797 unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ; 4798 auto *Node = cast<MemIntrinsicSDNode>(Op.getNode()); 4799 SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32), 4800 Op.getOperand(1)}; 4801 return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, DL, 4802 Node->getVTList(), Ops, 4803 Node->getMemoryVT(), Node->getMemOperand()); 4804 } 4805 4806 // Convert condition code in CCReg to an i32 value. 4807 static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg) { 4808 SDLoc DL(CCReg); 4809 SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg); 4810 return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM, 4811 DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32)); 4812 } 4813 4814 SDValue 4815 SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, 4816 SelectionDAG &DAG) const { 4817 unsigned Opcode, CCValid; 4818 if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) { 4819 assert(Op->getNumValues() == 2 && "Expected only CC result and chain"); 4820 SDNode *Node = emitIntrinsicWithCCAndChain(DAG, Op, Opcode); 4821 SDValue CC = getCCResult(DAG, SDValue(Node, 0)); 4822 DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC); 4823 return SDValue(); 4824 } 4825 4826 return SDValue(); 4827 } 4828 4829 SDValue 4830 SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, 4831 SelectionDAG &DAG) const { 4832 unsigned Opcode, CCValid; 4833 if (isIntrinsicWithCC(Op, Opcode, CCValid)) { 4834 SDNode *Node = emitIntrinsicWithCC(DAG, Op, Opcode); 4835 if (Op->getNumValues() == 1) 4836 return getCCResult(DAG, SDValue(Node, 0)); 4837 assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result"); 4838 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(), 4839 SDValue(Node, 0), getCCResult(DAG, SDValue(Node, 1))); 4840 } 4841 4842 unsigned Id = Op.getConstantOperandVal(0); 4843 switch (Id) { 4844 case Intrinsic::thread_pointer: 4845 return lowerThreadPointer(SDLoc(Op), DAG); 4846 4847 case Intrinsic::s390_vpdi: 4848 return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(), 4849 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); 4850 4851 case Intrinsic::s390_vperm: 4852 return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(), 4853 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); 4854 4855 case Intrinsic::s390_vuphb: 4856 case Intrinsic::s390_vuphh: 4857 case Intrinsic::s390_vuphf: 4858 return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(), 4859 Op.getOperand(1)); 4860 4861 case Intrinsic::s390_vuplhb: 4862 case Intrinsic::s390_vuplhh: 4863 case Intrinsic::s390_vuplhf: 4864 return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(), 4865 Op.getOperand(1)); 4866 4867 case Intrinsic::s390_vuplb: 4868 case Intrinsic::s390_vuplhw: 4869 case Intrinsic::s390_vuplf: 4870 return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(), 4871 Op.getOperand(1)); 4872 4873 case Intrinsic::s390_vupllb: 4874 case Intrinsic::s390_vupllh: 4875 case Intrinsic::s390_vupllf: 4876 return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(), 4877 Op.getOperand(1)); 4878 4879 case Intrinsic::s390_vsumb: 4880 case Intrinsic::s390_vsumh: 4881 case Intrinsic::s390_vsumgh: 4882 case Intrinsic::s390_vsumgf: 4883 case Intrinsic::s390_vsumqf: 4884 case Intrinsic::s390_vsumqg: 4885 return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(), 4886 Op.getOperand(1), Op.getOperand(2)); 4887 4888 case Intrinsic::s390_vaq: 4889 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(), 4890 Op.getOperand(1), Op.getOperand(2)); 4891 case Intrinsic::s390_vaccb: 4892 case Intrinsic::s390_vacch: 4893 case Intrinsic::s390_vaccf: 4894 case Intrinsic::s390_vaccg: 4895 case Intrinsic::s390_vaccq: 4896 return DAG.getNode(SystemZISD::VACC, SDLoc(Op), Op.getValueType(), 4897 Op.getOperand(1), Op.getOperand(2)); 4898 case Intrinsic::s390_vacq: 4899 return DAG.getNode(SystemZISD::VAC, SDLoc(Op), Op.getValueType(), 4900 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); 4901 case Intrinsic::s390_vacccq: 4902 return DAG.getNode(SystemZISD::VACCC, SDLoc(Op), Op.getValueType(), 4903 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); 4904 4905 case Intrinsic::s390_vsq: 4906 return DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(), 4907 Op.getOperand(1), Op.getOperand(2)); 4908 case Intrinsic::s390_vscbib: 4909 case Intrinsic::s390_vscbih: 4910 case Intrinsic::s390_vscbif: 4911 case Intrinsic::s390_vscbig: 4912 case Intrinsic::s390_vscbiq: 4913 return DAG.getNode(SystemZISD::VSCBI, SDLoc(Op), Op.getValueType(), 4914 Op.getOperand(1), Op.getOperand(2)); 4915 case Intrinsic::s390_vsbiq: 4916 return DAG.getNode(SystemZISD::VSBI, SDLoc(Op), Op.getValueType(), 4917 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); 4918 case Intrinsic::s390_vsbcbiq: 4919 return DAG.getNode(SystemZISD::VSBCBI, SDLoc(Op), Op.getValueType(), 4920 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); 4921 } 4922 4923 return SDValue(); 4924 } 4925 4926 namespace { 4927 // Says that SystemZISD operation Opcode can be used to perform the equivalent 4928 // of a VPERM with permute vector Bytes. If Opcode takes three operands, 4929 // Operand is the constant third operand, otherwise it is the number of 4930 // bytes in each element of the result. 4931 struct Permute { 4932 unsigned Opcode; 4933 unsigned Operand; 4934 unsigned char Bytes[SystemZ::VectorBytes]; 4935 }; 4936 } 4937 4938 static const Permute PermuteForms[] = { 4939 // VMRHG 4940 { SystemZISD::MERGE_HIGH, 8, 4941 { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } }, 4942 // VMRHF 4943 { SystemZISD::MERGE_HIGH, 4, 4944 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } }, 4945 // VMRHH 4946 { SystemZISD::MERGE_HIGH, 2, 4947 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } }, 4948 // VMRHB 4949 { SystemZISD::MERGE_HIGH, 1, 4950 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } }, 4951 // VMRLG 4952 { SystemZISD::MERGE_LOW, 8, 4953 { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } }, 4954 // VMRLF 4955 { SystemZISD::MERGE_LOW, 4, 4956 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } }, 4957 // VMRLH 4958 { SystemZISD::MERGE_LOW, 2, 4959 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } }, 4960 // VMRLB 4961 { SystemZISD::MERGE_LOW, 1, 4962 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } }, 4963 // VPKG 4964 { SystemZISD::PACK, 4, 4965 { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } }, 4966 // VPKF 4967 { SystemZISD::PACK, 2, 4968 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } }, 4969 // VPKH 4970 { SystemZISD::PACK, 1, 4971 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } }, 4972 // VPDI V1, V2, 4 (low half of V1, high half of V2) 4973 { SystemZISD::PERMUTE_DWORDS, 4, 4974 { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } }, 4975 // VPDI V1, V2, 1 (high half of V1, low half of V2) 4976 { SystemZISD::PERMUTE_DWORDS, 1, 4977 { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } } 4978 }; 4979 4980 // Called after matching a vector shuffle against a particular pattern. 4981 // Both the original shuffle and the pattern have two vector operands. 4982 // OpNos[0] is the operand of the original shuffle that should be used for 4983 // operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything. 4984 // OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and 4985 // set OpNo0 and OpNo1 to the shuffle operands that should actually be used 4986 // for operands 0 and 1 of the pattern. 4987 static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) { 4988 if (OpNos[0] < 0) { 4989 if (OpNos[1] < 0) 4990 return false; 4991 OpNo0 = OpNo1 = OpNos[1]; 4992 } else if (OpNos[1] < 0) { 4993 OpNo0 = OpNo1 = OpNos[0]; 4994 } else { 4995 OpNo0 = OpNos[0]; 4996 OpNo1 = OpNos[1]; 4997 } 4998 return true; 4999 } 5000 5001 // Bytes is a VPERM-like permute vector, except that -1 is used for 5002 // undefined bytes. Return true if the VPERM can be implemented using P. 5003 // When returning true set OpNo0 to the VPERM operand that should be 5004 // used for operand 0 of P and likewise OpNo1 for operand 1 of P. 5005 // 5006 // For example, if swapping the VPERM operands allows P to match, OpNo0 5007 // will be 1 and OpNo1 will be 0. If instead Bytes only refers to one 5008 // operand, but rewriting it to use two duplicated operands allows it to 5009 // match P, then OpNo0 and OpNo1 will be the same. 5010 static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P, 5011 unsigned &OpNo0, unsigned &OpNo1) { 5012 int OpNos[] = { -1, -1 }; 5013 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) { 5014 int Elt = Bytes[I]; 5015 if (Elt >= 0) { 5016 // Make sure that the two permute vectors use the same suboperand 5017 // byte number. Only the operand numbers (the high bits) are 5018 // allowed to differ. 5019 if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1)) 5020 return false; 5021 int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes; 5022 int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes; 5023 // Make sure that the operand mappings are consistent with previous 5024 // elements. 5025 if (OpNos[ModelOpNo] == 1 - RealOpNo) 5026 return false; 5027 OpNos[ModelOpNo] = RealOpNo; 5028 } 5029 } 5030 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1); 5031 } 5032 5033 // As above, but search for a matching permute. 5034 static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes, 5035 unsigned &OpNo0, unsigned &OpNo1) { 5036 for (auto &P : PermuteForms) 5037 if (matchPermute(Bytes, P, OpNo0, OpNo1)) 5038 return &P; 5039 return nullptr; 5040 } 5041 5042 // Bytes is a VPERM-like permute vector, except that -1 is used for 5043 // undefined bytes. This permute is an operand of an outer permute. 5044 // See whether redistributing the -1 bytes gives a shuffle that can be 5045 // implemented using P. If so, set Transform to a VPERM-like permute vector 5046 // that, when applied to the result of P, gives the original permute in Bytes. 5047 static bool matchDoublePermute(const SmallVectorImpl<int> &Bytes, 5048 const Permute &P, 5049 SmallVectorImpl<int> &Transform) { 5050 unsigned To = 0; 5051 for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) { 5052 int Elt = Bytes[From]; 5053 if (Elt < 0) 5054 // Byte number From of the result is undefined. 5055 Transform[From] = -1; 5056 else { 5057 while (P.Bytes[To] != Elt) { 5058 To += 1; 5059 if (To == SystemZ::VectorBytes) 5060 return false; 5061 } 5062 Transform[From] = To; 5063 } 5064 } 5065 return true; 5066 } 5067 5068 // As above, but search for a matching permute. 5069 static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes, 5070 SmallVectorImpl<int> &Transform) { 5071 for (auto &P : PermuteForms) 5072 if (matchDoublePermute(Bytes, P, Transform)) 5073 return &P; 5074 return nullptr; 5075 } 5076 5077 // Convert the mask of the given shuffle op into a byte-level mask, 5078 // as if it had type vNi8. 5079 static bool getVPermMask(SDValue ShuffleOp, 5080 SmallVectorImpl<int> &Bytes) { 5081 EVT VT = ShuffleOp.getValueType(); 5082 unsigned NumElements = VT.getVectorNumElements(); 5083 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize(); 5084 5085 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(ShuffleOp)) { 5086 Bytes.resize(NumElements * BytesPerElement, -1); 5087 for (unsigned I = 0; I < NumElements; ++I) { 5088 int Index = VSN->getMaskElt(I); 5089 if (Index >= 0) 5090 for (unsigned J = 0; J < BytesPerElement; ++J) 5091 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J; 5092 } 5093 return true; 5094 } 5095 if (SystemZISD::SPLAT == ShuffleOp.getOpcode() && 5096 isa<ConstantSDNode>(ShuffleOp.getOperand(1))) { 5097 unsigned Index = ShuffleOp.getConstantOperandVal(1); 5098 Bytes.resize(NumElements * BytesPerElement, -1); 5099 for (unsigned I = 0; I < NumElements; ++I) 5100 for (unsigned J = 0; J < BytesPerElement; ++J) 5101 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J; 5102 return true; 5103 } 5104 return false; 5105 } 5106 5107 // Bytes is a VPERM-like permute vector, except that -1 is used for 5108 // undefined bytes. See whether bytes [Start, Start + BytesPerElement) of 5109 // the result come from a contiguous sequence of bytes from one input. 5110 // Set Base to the selector for the first byte if so. 5111 static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start, 5112 unsigned BytesPerElement, int &Base) { 5113 Base = -1; 5114 for (unsigned I = 0; I < BytesPerElement; ++I) { 5115 if (Bytes[Start + I] >= 0) { 5116 unsigned Elem = Bytes[Start + I]; 5117 if (Base < 0) { 5118 Base = Elem - I; 5119 // Make sure the bytes would come from one input operand. 5120 if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size()) 5121 return false; 5122 } else if (unsigned(Base) != Elem - I) 5123 return false; 5124 } 5125 } 5126 return true; 5127 } 5128 5129 // Bytes is a VPERM-like permute vector, except that -1 is used for 5130 // undefined bytes. Return true if it can be performed using VSLDB. 5131 // When returning true, set StartIndex to the shift amount and OpNo0 5132 // and OpNo1 to the VPERM operands that should be used as the first 5133 // and second shift operand respectively. 5134 static bool isShlDoublePermute(const SmallVectorImpl<int> &Bytes, 5135 unsigned &StartIndex, unsigned &OpNo0, 5136 unsigned &OpNo1) { 5137 int OpNos[] = { -1, -1 }; 5138 int Shift = -1; 5139 for (unsigned I = 0; I < 16; ++I) { 5140 int Index = Bytes[I]; 5141 if (Index >= 0) { 5142 int ExpectedShift = (Index - I) % SystemZ::VectorBytes; 5143 int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes; 5144 int RealOpNo = unsigned(Index) / SystemZ::VectorBytes; 5145 if (Shift < 0) 5146 Shift = ExpectedShift; 5147 else if (Shift != ExpectedShift) 5148 return false; 5149 // Make sure that the operand mappings are consistent with previous 5150 // elements. 5151 if (OpNos[ModelOpNo] == 1 - RealOpNo) 5152 return false; 5153 OpNos[ModelOpNo] = RealOpNo; 5154 } 5155 } 5156 StartIndex = Shift; 5157 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1); 5158 } 5159 5160 // Create a node that performs P on operands Op0 and Op1, casting the 5161 // operands to the appropriate type. The type of the result is determined by P. 5162 static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL, 5163 const Permute &P, SDValue Op0, SDValue Op1) { 5164 // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input 5165 // elements of a PACK are twice as wide as the outputs. 5166 unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 : 5167 P.Opcode == SystemZISD::PACK ? P.Operand * 2 : 5168 P.Operand); 5169 // Cast both operands to the appropriate type. 5170 MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8), 5171 SystemZ::VectorBytes / InBytes); 5172 Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0); 5173 Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1); 5174 SDValue Op; 5175 if (P.Opcode == SystemZISD::PERMUTE_DWORDS) { 5176 SDValue Op2 = DAG.getTargetConstant(P.Operand, DL, MVT::i32); 5177 Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2); 5178 } else if (P.Opcode == SystemZISD::PACK) { 5179 MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8), 5180 SystemZ::VectorBytes / P.Operand); 5181 Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1); 5182 } else { 5183 Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1); 5184 } 5185 return Op; 5186 } 5187 5188 static bool isZeroVector(SDValue N) { 5189 if (N->getOpcode() == ISD::BITCAST) 5190 N = N->getOperand(0); 5191 if (N->getOpcode() == ISD::SPLAT_VECTOR) 5192 if (auto *Op = dyn_cast<ConstantSDNode>(N->getOperand(0))) 5193 return Op->getZExtValue() == 0; 5194 return ISD::isBuildVectorAllZeros(N.getNode()); 5195 } 5196 5197 // Return the index of the zero/undef vector, or UINT32_MAX if not found. 5198 static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num) { 5199 for (unsigned I = 0; I < Num ; I++) 5200 if (isZeroVector(Ops[I])) 5201 return I; 5202 return UINT32_MAX; 5203 } 5204 5205 // Bytes is a VPERM-like permute vector, except that -1 is used for 5206 // undefined bytes. Implement it on operands Ops[0] and Ops[1] using 5207 // VSLDB or VPERM. 5208 static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL, 5209 SDValue *Ops, 5210 const SmallVectorImpl<int> &Bytes) { 5211 for (unsigned I = 0; I < 2; ++I) 5212 Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]); 5213 5214 // First see whether VSLDB can be used. 5215 unsigned StartIndex, OpNo0, OpNo1; 5216 if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1)) 5217 return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0], 5218 Ops[OpNo1], 5219 DAG.getTargetConstant(StartIndex, DL, MVT::i32)); 5220 5221 // Fall back on VPERM. Construct an SDNode for the permute vector. Try to 5222 // eliminate a zero vector by reusing any zero index in the permute vector. 5223 unsigned ZeroVecIdx = findZeroVectorIdx(&Ops[0], 2); 5224 if (ZeroVecIdx != UINT32_MAX) { 5225 bool MaskFirst = true; 5226 int ZeroIdx = -1; 5227 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) { 5228 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes; 5229 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes; 5230 if (OpNo == ZeroVecIdx && I == 0) { 5231 // If the first byte is zero, use mask as first operand. 5232 ZeroIdx = 0; 5233 break; 5234 } 5235 if (OpNo != ZeroVecIdx && Byte == 0) { 5236 // If mask contains a zero, use it by placing that vector first. 5237 ZeroIdx = I + SystemZ::VectorBytes; 5238 MaskFirst = false; 5239 break; 5240 } 5241 } 5242 if (ZeroIdx != -1) { 5243 SDValue IndexNodes[SystemZ::VectorBytes]; 5244 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) { 5245 if (Bytes[I] >= 0) { 5246 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes; 5247 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes; 5248 if (OpNo == ZeroVecIdx) 5249 IndexNodes[I] = DAG.getConstant(ZeroIdx, DL, MVT::i32); 5250 else { 5251 unsigned BIdx = MaskFirst ? Byte + SystemZ::VectorBytes : Byte; 5252 IndexNodes[I] = DAG.getConstant(BIdx, DL, MVT::i32); 5253 } 5254 } else 5255 IndexNodes[I] = DAG.getUNDEF(MVT::i32); 5256 } 5257 SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes); 5258 SDValue Src = ZeroVecIdx == 0 ? Ops[1] : Ops[0]; 5259 if (MaskFirst) 5260 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Mask, Src, 5261 Mask); 5262 else 5263 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Src, Mask, 5264 Mask); 5265 } 5266 } 5267 5268 SDValue IndexNodes[SystemZ::VectorBytes]; 5269 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) 5270 if (Bytes[I] >= 0) 5271 IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32); 5272 else 5273 IndexNodes[I] = DAG.getUNDEF(MVT::i32); 5274 SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes); 5275 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], 5276 (!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2); 5277 } 5278 5279 namespace { 5280 // Describes a general N-operand vector shuffle. 5281 struct GeneralShuffle { 5282 GeneralShuffle(EVT vt) : VT(vt), UnpackFromEltSize(UINT_MAX) {} 5283 void addUndef(); 5284 bool add(SDValue, unsigned); 5285 SDValue getNode(SelectionDAG &, const SDLoc &); 5286 void tryPrepareForUnpack(); 5287 bool unpackWasPrepared() { return UnpackFromEltSize <= 4; } 5288 SDValue insertUnpackIfPrepared(SelectionDAG &DAG, const SDLoc &DL, SDValue Op); 5289 5290 // The operands of the shuffle. 5291 SmallVector<SDValue, SystemZ::VectorBytes> Ops; 5292 5293 // Index I is -1 if byte I of the result is undefined. Otherwise the 5294 // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand 5295 // Bytes[I] / SystemZ::VectorBytes. 5296 SmallVector<int, SystemZ::VectorBytes> Bytes; 5297 5298 // The type of the shuffle result. 5299 EVT VT; 5300 5301 // Holds a value of 1, 2 or 4 if a final unpack has been prepared for. 5302 unsigned UnpackFromEltSize; 5303 }; 5304 } 5305 5306 // Add an extra undefined element to the shuffle. 5307 void GeneralShuffle::addUndef() { 5308 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize(); 5309 for (unsigned I = 0; I < BytesPerElement; ++I) 5310 Bytes.push_back(-1); 5311 } 5312 5313 // Add an extra element to the shuffle, taking it from element Elem of Op. 5314 // A null Op indicates a vector input whose value will be calculated later; 5315 // there is at most one such input per shuffle and it always has the same 5316 // type as the result. Aborts and returns false if the source vector elements 5317 // of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per 5318 // LLVM they become implicitly extended, but this is rare and not optimized. 5319 bool GeneralShuffle::add(SDValue Op, unsigned Elem) { 5320 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize(); 5321 5322 // The source vector can have wider elements than the result, 5323 // either through an explicit TRUNCATE or because of type legalization. 5324 // We want the least significant part. 5325 EVT FromVT = Op.getNode() ? Op.getValueType() : VT; 5326 unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize(); 5327 5328 // Return false if the source elements are smaller than their destination 5329 // elements. 5330 if (FromBytesPerElement < BytesPerElement) 5331 return false; 5332 5333 unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes + 5334 (FromBytesPerElement - BytesPerElement)); 5335 5336 // Look through things like shuffles and bitcasts. 5337 while (Op.getNode()) { 5338 if (Op.getOpcode() == ISD::BITCAST) 5339 Op = Op.getOperand(0); 5340 else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) { 5341 // See whether the bytes we need come from a contiguous part of one 5342 // operand. 5343 SmallVector<int, SystemZ::VectorBytes> OpBytes; 5344 if (!getVPermMask(Op, OpBytes)) 5345 break; 5346 int NewByte; 5347 if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte)) 5348 break; 5349 if (NewByte < 0) { 5350 addUndef(); 5351 return true; 5352 } 5353 Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes); 5354 Byte = unsigned(NewByte) % SystemZ::VectorBytes; 5355 } else if (Op.isUndef()) { 5356 addUndef(); 5357 return true; 5358 } else 5359 break; 5360 } 5361 5362 // Make sure that the source of the extraction is in Ops. 5363 unsigned OpNo = 0; 5364 for (; OpNo < Ops.size(); ++OpNo) 5365 if (Ops[OpNo] == Op) 5366 break; 5367 if (OpNo == Ops.size()) 5368 Ops.push_back(Op); 5369 5370 // Add the element to Bytes. 5371 unsigned Base = OpNo * SystemZ::VectorBytes + Byte; 5372 for (unsigned I = 0; I < BytesPerElement; ++I) 5373 Bytes.push_back(Base + I); 5374 5375 return true; 5376 } 5377 5378 // Return SDNodes for the completed shuffle. 5379 SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) { 5380 assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector"); 5381 5382 if (Ops.size() == 0) 5383 return DAG.getUNDEF(VT); 5384 5385 // Use a single unpack if possible as the last operation. 5386 tryPrepareForUnpack(); 5387 5388 // Make sure that there are at least two shuffle operands. 5389 if (Ops.size() == 1) 5390 Ops.push_back(DAG.getUNDEF(MVT::v16i8)); 5391 5392 // Create a tree of shuffles, deferring root node until after the loop. 5393 // Try to redistribute the undefined elements of non-root nodes so that 5394 // the non-root shuffles match something like a pack or merge, then adjust 5395 // the parent node's permute vector to compensate for the new order. 5396 // Among other things, this copes with vectors like <2 x i16> that were 5397 // padded with undefined elements during type legalization. 5398 // 5399 // In the best case this redistribution will lead to the whole tree 5400 // using packs and merges. It should rarely be a loss in other cases. 5401 unsigned Stride = 1; 5402 for (; Stride * 2 < Ops.size(); Stride *= 2) { 5403 for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) { 5404 SDValue SubOps[] = { Ops[I], Ops[I + Stride] }; 5405 5406 // Create a mask for just these two operands. 5407 SmallVector<int, SystemZ::VectorBytes> NewBytes(SystemZ::VectorBytes); 5408 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) { 5409 unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes; 5410 unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes; 5411 if (OpNo == I) 5412 NewBytes[J] = Byte; 5413 else if (OpNo == I + Stride) 5414 NewBytes[J] = SystemZ::VectorBytes + Byte; 5415 else 5416 NewBytes[J] = -1; 5417 } 5418 // See if it would be better to reorganize NewMask to avoid using VPERM. 5419 SmallVector<int, SystemZ::VectorBytes> NewBytesMap(SystemZ::VectorBytes); 5420 if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) { 5421 Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]); 5422 // Applying NewBytesMap to Ops[I] gets back to NewBytes. 5423 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) { 5424 if (NewBytes[J] >= 0) { 5425 assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes && 5426 "Invalid double permute"); 5427 Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J]; 5428 } else 5429 assert(NewBytesMap[J] < 0 && "Invalid double permute"); 5430 } 5431 } else { 5432 // Just use NewBytes on the operands. 5433 Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes); 5434 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) 5435 if (NewBytes[J] >= 0) 5436 Bytes[J] = I * SystemZ::VectorBytes + J; 5437 } 5438 } 5439 } 5440 5441 // Now we just have 2 inputs. Put the second operand in Ops[1]. 5442 if (Stride > 1) { 5443 Ops[1] = Ops[Stride]; 5444 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) 5445 if (Bytes[I] >= int(SystemZ::VectorBytes)) 5446 Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes; 5447 } 5448 5449 // Look for an instruction that can do the permute without resorting 5450 // to VPERM. 5451 unsigned OpNo0, OpNo1; 5452 SDValue Op; 5453 if (unpackWasPrepared() && Ops[1].isUndef()) 5454 Op = Ops[0]; 5455 else if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1)) 5456 Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]); 5457 else 5458 Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes); 5459 5460 Op = insertUnpackIfPrepared(DAG, DL, Op); 5461 5462 return DAG.getNode(ISD::BITCAST, DL, VT, Op); 5463 } 5464 5465 #ifndef NDEBUG 5466 static void dumpBytes(const SmallVectorImpl<int> &Bytes, std::string Msg) { 5467 dbgs() << Msg.c_str() << " { "; 5468 for (unsigned i = 0; i < Bytes.size(); i++) 5469 dbgs() << Bytes[i] << " "; 5470 dbgs() << "}\n"; 5471 } 5472 #endif 5473 5474 // If the Bytes vector matches an unpack operation, prepare to do the unpack 5475 // after all else by removing the zero vector and the effect of the unpack on 5476 // Bytes. 5477 void GeneralShuffle::tryPrepareForUnpack() { 5478 uint32_t ZeroVecOpNo = findZeroVectorIdx(&Ops[0], Ops.size()); 5479 if (ZeroVecOpNo == UINT32_MAX || Ops.size() == 1) 5480 return; 5481 5482 // Only do this if removing the zero vector reduces the depth, otherwise 5483 // the critical path will increase with the final unpack. 5484 if (Ops.size() > 2 && 5485 Log2_32_Ceil(Ops.size()) == Log2_32_Ceil(Ops.size() - 1)) 5486 return; 5487 5488 // Find an unpack that would allow removing the zero vector from Ops. 5489 UnpackFromEltSize = 1; 5490 for (; UnpackFromEltSize <= 4; UnpackFromEltSize *= 2) { 5491 bool MatchUnpack = true; 5492 SmallVector<int, SystemZ::VectorBytes> SrcBytes; 5493 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes; Elt++) { 5494 unsigned ToEltSize = UnpackFromEltSize * 2; 5495 bool IsZextByte = (Elt % ToEltSize) < UnpackFromEltSize; 5496 if (!IsZextByte) 5497 SrcBytes.push_back(Bytes[Elt]); 5498 if (Bytes[Elt] != -1) { 5499 unsigned OpNo = unsigned(Bytes[Elt]) / SystemZ::VectorBytes; 5500 if (IsZextByte != (OpNo == ZeroVecOpNo)) { 5501 MatchUnpack = false; 5502 break; 5503 } 5504 } 5505 } 5506 if (MatchUnpack) { 5507 if (Ops.size() == 2) { 5508 // Don't use unpack if a single source operand needs rearrangement. 5509 for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++) 5510 if (SrcBytes[i] != -1 && SrcBytes[i] % 16 != int(i)) { 5511 UnpackFromEltSize = UINT_MAX; 5512 return; 5513 } 5514 } 5515 break; 5516 } 5517 } 5518 if (UnpackFromEltSize > 4) 5519 return; 5520 5521 LLVM_DEBUG(dbgs() << "Preparing for final unpack of element size " 5522 << UnpackFromEltSize << ". Zero vector is Op#" << ZeroVecOpNo 5523 << ".\n"; 5524 dumpBytes(Bytes, "Original Bytes vector:");); 5525 5526 // Apply the unpack in reverse to the Bytes array. 5527 unsigned B = 0; 5528 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) { 5529 Elt += UnpackFromEltSize; 5530 for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++) 5531 Bytes[B] = Bytes[Elt]; 5532 } 5533 while (B < SystemZ::VectorBytes) 5534 Bytes[B++] = -1; 5535 5536 // Remove the zero vector from Ops 5537 Ops.erase(&Ops[ZeroVecOpNo]); 5538 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) 5539 if (Bytes[I] >= 0) { 5540 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes; 5541 if (OpNo > ZeroVecOpNo) 5542 Bytes[I] -= SystemZ::VectorBytes; 5543 } 5544 5545 LLVM_DEBUG(dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:"); 5546 dbgs() << "\n";); 5547 } 5548 5549 SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG, 5550 const SDLoc &DL, 5551 SDValue Op) { 5552 if (!unpackWasPrepared()) 5553 return Op; 5554 unsigned InBits = UnpackFromEltSize * 8; 5555 EVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBits), 5556 SystemZ::VectorBits / InBits); 5557 SDValue PackedOp = DAG.getNode(ISD::BITCAST, DL, InVT, Op); 5558 unsigned OutBits = InBits * 2; 5559 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(OutBits), 5560 SystemZ::VectorBits / OutBits); 5561 return DAG.getNode(SystemZISD::UNPACKL_HIGH, DL, OutVT, PackedOp); 5562 } 5563 5564 // Return true if the given BUILD_VECTOR is a scalar-to-vector conversion. 5565 static bool isScalarToVector(SDValue Op) { 5566 for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I) 5567 if (!Op.getOperand(I).isUndef()) 5568 return false; 5569 return true; 5570 } 5571 5572 // Return a vector of type VT that contains Value in the first element. 5573 // The other elements don't matter. 5574 static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, 5575 SDValue Value) { 5576 // If we have a constant, replicate it to all elements and let the 5577 // BUILD_VECTOR lowering take care of it. 5578 if (Value.getOpcode() == ISD::Constant || 5579 Value.getOpcode() == ISD::ConstantFP) { 5580 SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Value); 5581 return DAG.getBuildVector(VT, DL, Ops); 5582 } 5583 if (Value.isUndef()) 5584 return DAG.getUNDEF(VT); 5585 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value); 5586 } 5587 5588 // Return a vector of type VT in which Op0 is in element 0 and Op1 is in 5589 // element 1. Used for cases in which replication is cheap. 5590 static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT, 5591 SDValue Op0, SDValue Op1) { 5592 if (Op0.isUndef()) { 5593 if (Op1.isUndef()) 5594 return DAG.getUNDEF(VT); 5595 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1); 5596 } 5597 if (Op1.isUndef()) 5598 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0); 5599 return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT, 5600 buildScalarToVector(DAG, DL, VT, Op0), 5601 buildScalarToVector(DAG, DL, VT, Op1)); 5602 } 5603 5604 // Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64 5605 // vector for them. 5606 static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0, 5607 SDValue Op1) { 5608 if (Op0.isUndef() && Op1.isUndef()) 5609 return DAG.getUNDEF(MVT::v2i64); 5610 // If one of the two inputs is undefined then replicate the other one, 5611 // in order to avoid using another register unnecessarily. 5612 if (Op0.isUndef()) 5613 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1); 5614 else if (Op1.isUndef()) 5615 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); 5616 else { 5617 Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); 5618 Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1); 5619 } 5620 return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1); 5621 } 5622 5623 // If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually 5624 // better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for 5625 // the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR 5626 // would benefit from this representation and return it if so. 5627 static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, 5628 BuildVectorSDNode *BVN) { 5629 EVT VT = BVN->getValueType(0); 5630 unsigned NumElements = VT.getVectorNumElements(); 5631 5632 // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation 5633 // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still 5634 // need a BUILD_VECTOR, add an additional placeholder operand for that 5635 // BUILD_VECTOR and store its operands in ResidueOps. 5636 GeneralShuffle GS(VT); 5637 SmallVector<SDValue, SystemZ::VectorBytes> ResidueOps; 5638 bool FoundOne = false; 5639 for (unsigned I = 0; I < NumElements; ++I) { 5640 SDValue Op = BVN->getOperand(I); 5641 if (Op.getOpcode() == ISD::TRUNCATE) 5642 Op = Op.getOperand(0); 5643 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 5644 Op.getOperand(1).getOpcode() == ISD::Constant) { 5645 unsigned Elem = Op.getConstantOperandVal(1); 5646 if (!GS.add(Op.getOperand(0), Elem)) 5647 return SDValue(); 5648 FoundOne = true; 5649 } else if (Op.isUndef()) { 5650 GS.addUndef(); 5651 } else { 5652 if (!GS.add(SDValue(), ResidueOps.size())) 5653 return SDValue(); 5654 ResidueOps.push_back(BVN->getOperand(I)); 5655 } 5656 } 5657 5658 // Nothing to do if there are no EXTRACT_VECTOR_ELTs. 5659 if (!FoundOne) 5660 return SDValue(); 5661 5662 // Create the BUILD_VECTOR for the remaining elements, if any. 5663 if (!ResidueOps.empty()) { 5664 while (ResidueOps.size() < NumElements) 5665 ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType())); 5666 for (auto &Op : GS.Ops) { 5667 if (!Op.getNode()) { 5668 Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps); 5669 break; 5670 } 5671 } 5672 } 5673 return GS.getNode(DAG, SDLoc(BVN)); 5674 } 5675 5676 bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const { 5677 if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Op)->isUnindexed()) 5678 return true; 5679 if (auto *AL = dyn_cast<AtomicSDNode>(Op)) 5680 if (AL->getOpcode() == ISD::ATOMIC_LOAD) 5681 return true; 5682 if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV) 5683 return true; 5684 return false; 5685 } 5686 5687 // Combine GPR scalar values Elems into a vector of type VT. 5688 SDValue 5689 SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, 5690 SmallVectorImpl<SDValue> &Elems) const { 5691 // See whether there is a single replicated value. 5692 SDValue Single; 5693 unsigned int NumElements = Elems.size(); 5694 unsigned int Count = 0; 5695 for (auto Elem : Elems) { 5696 if (!Elem.isUndef()) { 5697 if (!Single.getNode()) 5698 Single = Elem; 5699 else if (Elem != Single) { 5700 Single = SDValue(); 5701 break; 5702 } 5703 Count += 1; 5704 } 5705 } 5706 // There are three cases here: 5707 // 5708 // - if the only defined element is a loaded one, the best sequence 5709 // is a replicating load. 5710 // 5711 // - otherwise, if the only defined element is an i64 value, we will 5712 // end up with the same VLVGP sequence regardless of whether we short-cut 5713 // for replication or fall through to the later code. 5714 // 5715 // - otherwise, if the only defined element is an i32 or smaller value, 5716 // we would need 2 instructions to replicate it: VLVGP followed by VREPx. 5717 // This is only a win if the single defined element is used more than once. 5718 // In other cases we're better off using a single VLVGx. 5719 if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single))) 5720 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single); 5721 5722 // If all elements are loads, use VLREP/VLEs (below). 5723 bool AllLoads = true; 5724 for (auto Elem : Elems) 5725 if (!isVectorElementLoad(Elem)) { 5726 AllLoads = false; 5727 break; 5728 } 5729 5730 // The best way of building a v2i64 from two i64s is to use VLVGP. 5731 if (VT == MVT::v2i64 && !AllLoads) 5732 return joinDwords(DAG, DL, Elems[0], Elems[1]); 5733 5734 // Use a 64-bit merge high to combine two doubles. 5735 if (VT == MVT::v2f64 && !AllLoads) 5736 return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]); 5737 5738 // Build v4f32 values directly from the FPRs: 5739 // 5740 // <Axxx> <Bxxx> <Cxxxx> <Dxxx> 5741 // V V VMRHF 5742 // <ABxx> <CDxx> 5743 // V VMRHG 5744 // <ABCD> 5745 if (VT == MVT::v4f32 && !AllLoads) { 5746 SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]); 5747 SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]); 5748 // Avoid unnecessary undefs by reusing the other operand. 5749 if (Op01.isUndef()) 5750 Op01 = Op23; 5751 else if (Op23.isUndef()) 5752 Op23 = Op01; 5753 // Merging identical replications is a no-op. 5754 if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23) 5755 return Op01; 5756 Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01); 5757 Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23); 5758 SDValue Op = DAG.getNode(SystemZISD::MERGE_HIGH, 5759 DL, MVT::v2i64, Op01, Op23); 5760 return DAG.getNode(ISD::BITCAST, DL, VT, Op); 5761 } 5762 5763 // Collect the constant terms. 5764 SmallVector<SDValue, SystemZ::VectorBytes> Constants(NumElements, SDValue()); 5765 SmallVector<bool, SystemZ::VectorBytes> Done(NumElements, false); 5766 5767 unsigned NumConstants = 0; 5768 for (unsigned I = 0; I < NumElements; ++I) { 5769 SDValue Elem = Elems[I]; 5770 if (Elem.getOpcode() == ISD::Constant || 5771 Elem.getOpcode() == ISD::ConstantFP) { 5772 NumConstants += 1; 5773 Constants[I] = Elem; 5774 Done[I] = true; 5775 } 5776 } 5777 // If there was at least one constant, fill in the other elements of 5778 // Constants with undefs to get a full vector constant and use that 5779 // as the starting point. 5780 SDValue Result; 5781 SDValue ReplicatedVal; 5782 if (NumConstants > 0) { 5783 for (unsigned I = 0; I < NumElements; ++I) 5784 if (!Constants[I].getNode()) 5785 Constants[I] = DAG.getUNDEF(Elems[I].getValueType()); 5786 Result = DAG.getBuildVector(VT, DL, Constants); 5787 } else { 5788 // Otherwise try to use VLREP or VLVGP to start the sequence in order to 5789 // avoid a false dependency on any previous contents of the vector 5790 // register. 5791 5792 // Use a VLREP if at least one element is a load. Make sure to replicate 5793 // the load with the most elements having its value. 5794 std::map<const SDNode*, unsigned> UseCounts; 5795 SDNode *LoadMaxUses = nullptr; 5796 for (unsigned I = 0; I < NumElements; ++I) 5797 if (isVectorElementLoad(Elems[I])) { 5798 SDNode *Ld = Elems[I].getNode(); 5799 UseCounts[Ld]++; 5800 if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < UseCounts[Ld]) 5801 LoadMaxUses = Ld; 5802 } 5803 if (LoadMaxUses != nullptr) { 5804 ReplicatedVal = SDValue(LoadMaxUses, 0); 5805 Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, ReplicatedVal); 5806 } else { 5807 // Try to use VLVGP. 5808 unsigned I1 = NumElements / 2 - 1; 5809 unsigned I2 = NumElements - 1; 5810 bool Def1 = !Elems[I1].isUndef(); 5811 bool Def2 = !Elems[I2].isUndef(); 5812 if (Def1 || Def2) { 5813 SDValue Elem1 = Elems[Def1 ? I1 : I2]; 5814 SDValue Elem2 = Elems[Def2 ? I2 : I1]; 5815 Result = DAG.getNode(ISD::BITCAST, DL, VT, 5816 joinDwords(DAG, DL, Elem1, Elem2)); 5817 Done[I1] = true; 5818 Done[I2] = true; 5819 } else 5820 Result = DAG.getUNDEF(VT); 5821 } 5822 } 5823 5824 // Use VLVGx to insert the other elements. 5825 for (unsigned I = 0; I < NumElements; ++I) 5826 if (!Done[I] && !Elems[I].isUndef() && Elems[I] != ReplicatedVal) 5827 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I], 5828 DAG.getConstant(I, DL, MVT::i32)); 5829 return Result; 5830 } 5831 5832 SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op, 5833 SelectionDAG &DAG) const { 5834 auto *BVN = cast<BuildVectorSDNode>(Op.getNode()); 5835 SDLoc DL(Op); 5836 EVT VT = Op.getValueType(); 5837 5838 if (BVN->isConstant()) { 5839 if (SystemZVectorConstantInfo(BVN).isVectorConstantLegal(Subtarget)) 5840 return Op; 5841 5842 // Fall back to loading it from memory. 5843 return SDValue(); 5844 } 5845 5846 // See if we should use shuffles to construct the vector from other vectors. 5847 if (SDValue Res = tryBuildVectorShuffle(DAG, BVN)) 5848 return Res; 5849 5850 // Detect SCALAR_TO_VECTOR conversions. 5851 if (isOperationLegal(ISD::SCALAR_TO_VECTOR, VT) && isScalarToVector(Op)) 5852 return buildScalarToVector(DAG, DL, VT, Op.getOperand(0)); 5853 5854 // Otherwise use buildVector to build the vector up from GPRs. 5855 unsigned NumElements = Op.getNumOperands(); 5856 SmallVector<SDValue, SystemZ::VectorBytes> Ops(NumElements); 5857 for (unsigned I = 0; I < NumElements; ++I) 5858 Ops[I] = Op.getOperand(I); 5859 return buildVector(DAG, DL, VT, Ops); 5860 } 5861 5862 SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, 5863 SelectionDAG &DAG) const { 5864 auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode()); 5865 SDLoc DL(Op); 5866 EVT VT = Op.getValueType(); 5867 unsigned NumElements = VT.getVectorNumElements(); 5868 5869 if (VSN->isSplat()) { 5870 SDValue Op0 = Op.getOperand(0); 5871 unsigned Index = VSN->getSplatIndex(); 5872 assert(Index < VT.getVectorNumElements() && 5873 "Splat index should be defined and in first operand"); 5874 // See whether the value we're splatting is directly available as a scalar. 5875 if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) || 5876 Op0.getOpcode() == ISD::BUILD_VECTOR) 5877 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index)); 5878 // Otherwise keep it as a vector-to-vector operation. 5879 return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0), 5880 DAG.getTargetConstant(Index, DL, MVT::i32)); 5881 } 5882 5883 GeneralShuffle GS(VT); 5884 for (unsigned I = 0; I < NumElements; ++I) { 5885 int Elt = VSN->getMaskElt(I); 5886 if (Elt < 0) 5887 GS.addUndef(); 5888 else if (!GS.add(Op.getOperand(unsigned(Elt) / NumElements), 5889 unsigned(Elt) % NumElements)) 5890 return SDValue(); 5891 } 5892 return GS.getNode(DAG, SDLoc(VSN)); 5893 } 5894 5895 SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op, 5896 SelectionDAG &DAG) const { 5897 SDLoc DL(Op); 5898 // Just insert the scalar into element 0 of an undefined vector. 5899 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, 5900 Op.getValueType(), DAG.getUNDEF(Op.getValueType()), 5901 Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32)); 5902 } 5903 5904 SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, 5905 SelectionDAG &DAG) const { 5906 // Handle insertions of floating-point values. 5907 SDLoc DL(Op); 5908 SDValue Op0 = Op.getOperand(0); 5909 SDValue Op1 = Op.getOperand(1); 5910 SDValue Op2 = Op.getOperand(2); 5911 EVT VT = Op.getValueType(); 5912 5913 // Insertions into constant indices of a v2f64 can be done using VPDI. 5914 // However, if the inserted value is a bitcast or a constant then it's 5915 // better to use GPRs, as below. 5916 if (VT == MVT::v2f64 && 5917 Op1.getOpcode() != ISD::BITCAST && 5918 Op1.getOpcode() != ISD::ConstantFP && 5919 Op2.getOpcode() == ISD::Constant) { 5920 uint64_t Index = Op2->getAsZExtVal(); 5921 unsigned Mask = VT.getVectorNumElements() - 1; 5922 if (Index <= Mask) 5923 return Op; 5924 } 5925 5926 // Otherwise bitcast to the equivalent integer form and insert via a GPR. 5927 MVT IntVT = MVT::getIntegerVT(VT.getScalarSizeInBits()); 5928 MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements()); 5929 SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT, 5930 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), 5931 DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2); 5932 return DAG.getNode(ISD::BITCAST, DL, VT, Res); 5933 } 5934 5935 SDValue 5936 SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, 5937 SelectionDAG &DAG) const { 5938 // Handle extractions of floating-point values. 5939 SDLoc DL(Op); 5940 SDValue Op0 = Op.getOperand(0); 5941 SDValue Op1 = Op.getOperand(1); 5942 EVT VT = Op.getValueType(); 5943 EVT VecVT = Op0.getValueType(); 5944 5945 // Extractions of constant indices can be done directly. 5946 if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) { 5947 uint64_t Index = CIndexN->getZExtValue(); 5948 unsigned Mask = VecVT.getVectorNumElements() - 1; 5949 if (Index <= Mask) 5950 return Op; 5951 } 5952 5953 // Otherwise bitcast to the equivalent integer form and extract via a GPR. 5954 MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits()); 5955 MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements()); 5956 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT, 5957 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1); 5958 return DAG.getNode(ISD::BITCAST, DL, VT, Res); 5959 } 5960 5961 SDValue SystemZTargetLowering:: 5962 lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const { 5963 SDValue PackedOp = Op.getOperand(0); 5964 EVT OutVT = Op.getValueType(); 5965 EVT InVT = PackedOp.getValueType(); 5966 unsigned ToBits = OutVT.getScalarSizeInBits(); 5967 unsigned FromBits = InVT.getScalarSizeInBits(); 5968 do { 5969 FromBits *= 2; 5970 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits), 5971 SystemZ::VectorBits / FromBits); 5972 PackedOp = 5973 DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(PackedOp), OutVT, PackedOp); 5974 } while (FromBits != ToBits); 5975 return PackedOp; 5976 } 5977 5978 // Lower a ZERO_EXTEND_VECTOR_INREG to a vector shuffle with a zero vector. 5979 SDValue SystemZTargetLowering:: 5980 lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const { 5981 SDValue PackedOp = Op.getOperand(0); 5982 SDLoc DL(Op); 5983 EVT OutVT = Op.getValueType(); 5984 EVT InVT = PackedOp.getValueType(); 5985 unsigned InNumElts = InVT.getVectorNumElements(); 5986 unsigned OutNumElts = OutVT.getVectorNumElements(); 5987 unsigned NumInPerOut = InNumElts / OutNumElts; 5988 5989 SDValue ZeroVec = 5990 DAG.getSplatVector(InVT, DL, DAG.getConstant(0, DL, InVT.getScalarType())); 5991 5992 SmallVector<int, 16> Mask(InNumElts); 5993 unsigned ZeroVecElt = InNumElts; 5994 for (unsigned PackedElt = 0; PackedElt < OutNumElts; PackedElt++) { 5995 unsigned MaskElt = PackedElt * NumInPerOut; 5996 unsigned End = MaskElt + NumInPerOut - 1; 5997 for (; MaskElt < End; MaskElt++) 5998 Mask[MaskElt] = ZeroVecElt++; 5999 Mask[MaskElt] = PackedElt; 6000 } 6001 SDValue Shuf = DAG.getVectorShuffle(InVT, DL, PackedOp, ZeroVec, Mask); 6002 return DAG.getNode(ISD::BITCAST, DL, OutVT, Shuf); 6003 } 6004 6005 SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG, 6006 unsigned ByScalar) const { 6007 // Look for cases where a vector shift can use the *_BY_SCALAR form. 6008 SDValue Op0 = Op.getOperand(0); 6009 SDValue Op1 = Op.getOperand(1); 6010 SDLoc DL(Op); 6011 EVT VT = Op.getValueType(); 6012 unsigned ElemBitSize = VT.getScalarSizeInBits(); 6013 6014 // See whether the shift vector is a splat represented as BUILD_VECTOR. 6015 if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) { 6016 APInt SplatBits, SplatUndef; 6017 unsigned SplatBitSize; 6018 bool HasAnyUndefs; 6019 // Check for constant splats. Use ElemBitSize as the minimum element 6020 // width and reject splats that need wider elements. 6021 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 6022 ElemBitSize, true) && 6023 SplatBitSize == ElemBitSize) { 6024 SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff, 6025 DL, MVT::i32); 6026 return DAG.getNode(ByScalar, DL, VT, Op0, Shift); 6027 } 6028 // Check for variable splats. 6029 BitVector UndefElements; 6030 SDValue Splat = BVN->getSplatValue(&UndefElements); 6031 if (Splat) { 6032 // Since i32 is the smallest legal type, we either need a no-op 6033 // or a truncation. 6034 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat); 6035 return DAG.getNode(ByScalar, DL, VT, Op0, Shift); 6036 } 6037 } 6038 6039 // See whether the shift vector is a splat represented as SHUFFLE_VECTOR, 6040 // and the shift amount is directly available in a GPR. 6041 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) { 6042 if (VSN->isSplat()) { 6043 SDValue VSNOp0 = VSN->getOperand(0); 6044 unsigned Index = VSN->getSplatIndex(); 6045 assert(Index < VT.getVectorNumElements() && 6046 "Splat index should be defined and in first operand"); 6047 if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) || 6048 VSNOp0.getOpcode() == ISD::BUILD_VECTOR) { 6049 // Since i32 is the smallest legal type, we either need a no-op 6050 // or a truncation. 6051 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, 6052 VSNOp0.getOperand(Index)); 6053 return DAG.getNode(ByScalar, DL, VT, Op0, Shift); 6054 } 6055 } 6056 } 6057 6058 // Otherwise just treat the current form as legal. 6059 return Op; 6060 } 6061 6062 SDValue SystemZTargetLowering::lowerIS_FPCLASS(SDValue Op, 6063 SelectionDAG &DAG) const { 6064 SDLoc DL(Op); 6065 MVT ResultVT = Op.getSimpleValueType(); 6066 SDValue Arg = Op.getOperand(0); 6067 unsigned Check = Op.getConstantOperandVal(1); 6068 6069 unsigned TDCMask = 0; 6070 if (Check & fcSNan) 6071 TDCMask |= SystemZ::TDCMASK_SNAN_PLUS | SystemZ::TDCMASK_SNAN_MINUS; 6072 if (Check & fcQNan) 6073 TDCMask |= SystemZ::TDCMASK_QNAN_PLUS | SystemZ::TDCMASK_QNAN_MINUS; 6074 if (Check & fcPosInf) 6075 TDCMask |= SystemZ::TDCMASK_INFINITY_PLUS; 6076 if (Check & fcNegInf) 6077 TDCMask |= SystemZ::TDCMASK_INFINITY_MINUS; 6078 if (Check & fcPosNormal) 6079 TDCMask |= SystemZ::TDCMASK_NORMAL_PLUS; 6080 if (Check & fcNegNormal) 6081 TDCMask |= SystemZ::TDCMASK_NORMAL_MINUS; 6082 if (Check & fcPosSubnormal) 6083 TDCMask |= SystemZ::TDCMASK_SUBNORMAL_PLUS; 6084 if (Check & fcNegSubnormal) 6085 TDCMask |= SystemZ::TDCMASK_SUBNORMAL_MINUS; 6086 if (Check & fcPosZero) 6087 TDCMask |= SystemZ::TDCMASK_ZERO_PLUS; 6088 if (Check & fcNegZero) 6089 TDCMask |= SystemZ::TDCMASK_ZERO_MINUS; 6090 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, MVT::i64); 6091 6092 SDValue Intr = DAG.getNode(SystemZISD::TDC, DL, ResultVT, Arg, TDCMaskV); 6093 return getCCResult(DAG, Intr); 6094 } 6095 6096 SDValue SystemZTargetLowering::lowerREADCYCLECOUNTER(SDValue Op, 6097 SelectionDAG &DAG) const { 6098 SDLoc DL(Op); 6099 SDValue Chain = Op.getOperand(0); 6100 6101 // STCKF only supports a memory operand, so we have to use a temporary. 6102 SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64); 6103 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); 6104 MachinePointerInfo MPI = 6105 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); 6106 6107 // Use STCFK to store the TOD clock into the temporary. 6108 SDValue StoreOps[] = {Chain, StackPtr}; 6109 Chain = DAG.getMemIntrinsicNode( 6110 SystemZISD::STCKF, DL, DAG.getVTList(MVT::Other), StoreOps, MVT::i64, 6111 MPI, MaybeAlign(), MachineMemOperand::MOStore); 6112 6113 // And read it back from there. 6114 return DAG.getLoad(MVT::i64, DL, Chain, StackPtr, MPI); 6115 } 6116 6117 SDValue SystemZTargetLowering::LowerOperation(SDValue Op, 6118 SelectionDAG &DAG) const { 6119 switch (Op.getOpcode()) { 6120 case ISD::FRAMEADDR: 6121 return lowerFRAMEADDR(Op, DAG); 6122 case ISD::RETURNADDR: 6123 return lowerRETURNADDR(Op, DAG); 6124 case ISD::BR_CC: 6125 return lowerBR_CC(Op, DAG); 6126 case ISD::SELECT_CC: 6127 return lowerSELECT_CC(Op, DAG); 6128 case ISD::SETCC: 6129 return lowerSETCC(Op, DAG); 6130 case ISD::STRICT_FSETCC: 6131 return lowerSTRICT_FSETCC(Op, DAG, false); 6132 case ISD::STRICT_FSETCCS: 6133 return lowerSTRICT_FSETCC(Op, DAG, true); 6134 case ISD::GlobalAddress: 6135 return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG); 6136 case ISD::GlobalTLSAddress: 6137 return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG); 6138 case ISD::BlockAddress: 6139 return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG); 6140 case ISD::JumpTable: 6141 return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG); 6142 case ISD::ConstantPool: 6143 return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG); 6144 case ISD::BITCAST: 6145 return lowerBITCAST(Op, DAG); 6146 case ISD::VASTART: 6147 return lowerVASTART(Op, DAG); 6148 case ISD::VACOPY: 6149 return lowerVACOPY(Op, DAG); 6150 case ISD::DYNAMIC_STACKALLOC: 6151 return lowerDYNAMIC_STACKALLOC(Op, DAG); 6152 case ISD::GET_DYNAMIC_AREA_OFFSET: 6153 return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG); 6154 case ISD::SMUL_LOHI: 6155 return lowerSMUL_LOHI(Op, DAG); 6156 case ISD::UMUL_LOHI: 6157 return lowerUMUL_LOHI(Op, DAG); 6158 case ISD::SDIVREM: 6159 return lowerSDIVREM(Op, DAG); 6160 case ISD::UDIVREM: 6161 return lowerUDIVREM(Op, DAG); 6162 case ISD::SADDO: 6163 case ISD::SSUBO: 6164 case ISD::UADDO: 6165 case ISD::USUBO: 6166 return lowerXALUO(Op, DAG); 6167 case ISD::UADDO_CARRY: 6168 case ISD::USUBO_CARRY: 6169 return lowerUADDSUBO_CARRY(Op, DAG); 6170 case ISD::OR: 6171 return lowerOR(Op, DAG); 6172 case ISD::CTPOP: 6173 return lowerCTPOP(Op, DAG); 6174 case ISD::VECREDUCE_ADD: 6175 return lowerVECREDUCE_ADD(Op, DAG); 6176 case ISD::ATOMIC_FENCE: 6177 return lowerATOMIC_FENCE(Op, DAG); 6178 case ISD::ATOMIC_SWAP: 6179 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW); 6180 case ISD::ATOMIC_STORE: 6181 case ISD::ATOMIC_LOAD: 6182 return lowerATOMIC_LDST_I128(Op, DAG); 6183 case ISD::ATOMIC_LOAD_ADD: 6184 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD); 6185 case ISD::ATOMIC_LOAD_SUB: 6186 return lowerATOMIC_LOAD_SUB(Op, DAG); 6187 case ISD::ATOMIC_LOAD_AND: 6188 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND); 6189 case ISD::ATOMIC_LOAD_OR: 6190 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR); 6191 case ISD::ATOMIC_LOAD_XOR: 6192 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR); 6193 case ISD::ATOMIC_LOAD_NAND: 6194 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND); 6195 case ISD::ATOMIC_LOAD_MIN: 6196 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN); 6197 case ISD::ATOMIC_LOAD_MAX: 6198 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX); 6199 case ISD::ATOMIC_LOAD_UMIN: 6200 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN); 6201 case ISD::ATOMIC_LOAD_UMAX: 6202 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX); 6203 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: 6204 return lowerATOMIC_CMP_SWAP(Op, DAG); 6205 case ISD::STACKSAVE: 6206 return lowerSTACKSAVE(Op, DAG); 6207 case ISD::STACKRESTORE: 6208 return lowerSTACKRESTORE(Op, DAG); 6209 case ISD::PREFETCH: 6210 return lowerPREFETCH(Op, DAG); 6211 case ISD::INTRINSIC_W_CHAIN: 6212 return lowerINTRINSIC_W_CHAIN(Op, DAG); 6213 case ISD::INTRINSIC_WO_CHAIN: 6214 return lowerINTRINSIC_WO_CHAIN(Op, DAG); 6215 case ISD::BUILD_VECTOR: 6216 return lowerBUILD_VECTOR(Op, DAG); 6217 case ISD::VECTOR_SHUFFLE: 6218 return lowerVECTOR_SHUFFLE(Op, DAG); 6219 case ISD::SCALAR_TO_VECTOR: 6220 return lowerSCALAR_TO_VECTOR(Op, DAG); 6221 case ISD::INSERT_VECTOR_ELT: 6222 return lowerINSERT_VECTOR_ELT(Op, DAG); 6223 case ISD::EXTRACT_VECTOR_ELT: 6224 return lowerEXTRACT_VECTOR_ELT(Op, DAG); 6225 case ISD::SIGN_EXTEND_VECTOR_INREG: 6226 return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG); 6227 case ISD::ZERO_EXTEND_VECTOR_INREG: 6228 return lowerZERO_EXTEND_VECTOR_INREG(Op, DAG); 6229 case ISD::SHL: 6230 return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR); 6231 case ISD::SRL: 6232 return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR); 6233 case ISD::SRA: 6234 return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR); 6235 case ISD::ROTL: 6236 return lowerShift(Op, DAG, SystemZISD::VROTL_BY_SCALAR); 6237 case ISD::IS_FPCLASS: 6238 return lowerIS_FPCLASS(Op, DAG); 6239 case ISD::GET_ROUNDING: 6240 return lowerGET_ROUNDING(Op, DAG); 6241 case ISD::READCYCLECOUNTER: 6242 return lowerREADCYCLECOUNTER(Op, DAG); 6243 default: 6244 llvm_unreachable("Unexpected node to lower"); 6245 } 6246 } 6247 6248 static SDValue expandBitCastI128ToF128(SelectionDAG &DAG, SDValue Src, 6249 const SDLoc &SL) { 6250 // If i128 is legal, just use a normal bitcast. 6251 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) 6252 return DAG.getBitcast(MVT::f128, Src); 6253 6254 // Otherwise, f128 must live in FP128, so do a partwise move. 6255 assert(DAG.getTargetLoweringInfo().getRepRegClassFor(MVT::f128) == 6256 &SystemZ::FP128BitRegClass); 6257 6258 SDValue Hi, Lo; 6259 std::tie(Lo, Hi) = DAG.SplitScalar(Src, SL, MVT::i64, MVT::i64); 6260 6261 Hi = DAG.getBitcast(MVT::f64, Hi); 6262 Lo = DAG.getBitcast(MVT::f64, Lo); 6263 6264 SDNode *Pair = DAG.getMachineNode( 6265 SystemZ::REG_SEQUENCE, SL, MVT::f128, 6266 {DAG.getTargetConstant(SystemZ::FP128BitRegClassID, SL, MVT::i32), Lo, 6267 DAG.getTargetConstant(SystemZ::subreg_l64, SL, MVT::i32), Hi, 6268 DAG.getTargetConstant(SystemZ::subreg_h64, SL, MVT::i32)}); 6269 return SDValue(Pair, 0); 6270 } 6271 6272 static SDValue expandBitCastF128ToI128(SelectionDAG &DAG, SDValue Src, 6273 const SDLoc &SL) { 6274 // If i128 is legal, just use a normal bitcast. 6275 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) 6276 return DAG.getBitcast(MVT::i128, Src); 6277 6278 // Otherwise, f128 must live in FP128, so do a partwise move. 6279 assert(DAG.getTargetLoweringInfo().getRepRegClassFor(MVT::f128) == 6280 &SystemZ::FP128BitRegClass); 6281 6282 SDValue LoFP = 6283 DAG.getTargetExtractSubreg(SystemZ::subreg_l64, SL, MVT::f64, Src); 6284 SDValue HiFP = 6285 DAG.getTargetExtractSubreg(SystemZ::subreg_h64, SL, MVT::f64, Src); 6286 SDValue Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i64, LoFP); 6287 SDValue Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i64, HiFP); 6288 6289 return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i128, Lo, Hi); 6290 } 6291 6292 // Lower operations with invalid operand or result types (currently used 6293 // only for 128-bit integer types). 6294 void 6295 SystemZTargetLowering::LowerOperationWrapper(SDNode *N, 6296 SmallVectorImpl<SDValue> &Results, 6297 SelectionDAG &DAG) const { 6298 switch (N->getOpcode()) { 6299 case ISD::ATOMIC_LOAD: { 6300 SDLoc DL(N); 6301 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other); 6302 SDValue Ops[] = { N->getOperand(0), N->getOperand(1) }; 6303 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand(); 6304 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_LOAD_128, 6305 DL, Tys, Ops, MVT::i128, MMO); 6306 6307 SDValue Lowered = lowerGR128ToI128(DAG, Res); 6308 if (N->getValueType(0) == MVT::f128) 6309 Lowered = expandBitCastI128ToF128(DAG, Lowered, DL); 6310 Results.push_back(Lowered); 6311 Results.push_back(Res.getValue(1)); 6312 break; 6313 } 6314 case ISD::ATOMIC_STORE: { 6315 SDLoc DL(N); 6316 SDVTList Tys = DAG.getVTList(MVT::Other); 6317 SDValue Val = N->getOperand(1); 6318 if (Val.getValueType() == MVT::f128) 6319 Val = expandBitCastF128ToI128(DAG, Val, DL); 6320 Val = lowerI128ToGR128(DAG, Val); 6321 6322 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2)}; 6323 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand(); 6324 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_STORE_128, 6325 DL, Tys, Ops, MVT::i128, MMO); 6326 // We have to enforce sequential consistency by performing a 6327 // serialization operation after the store. 6328 if (cast<AtomicSDNode>(N)->getSuccessOrdering() == 6329 AtomicOrdering::SequentiallyConsistent) 6330 Res = SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, 6331 MVT::Other, Res), 0); 6332 Results.push_back(Res); 6333 break; 6334 } 6335 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: { 6336 SDLoc DL(N); 6337 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other); 6338 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), 6339 lowerI128ToGR128(DAG, N->getOperand(2)), 6340 lowerI128ToGR128(DAG, N->getOperand(3)) }; 6341 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand(); 6342 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP_128, 6343 DL, Tys, Ops, MVT::i128, MMO); 6344 SDValue Success = emitSETCC(DAG, DL, Res.getValue(1), 6345 SystemZ::CCMASK_CS, SystemZ::CCMASK_CS_EQ); 6346 Success = DAG.getZExtOrTrunc(Success, DL, N->getValueType(1)); 6347 Results.push_back(lowerGR128ToI128(DAG, Res)); 6348 Results.push_back(Success); 6349 Results.push_back(Res.getValue(2)); 6350 break; 6351 } 6352 case ISD::BITCAST: { 6353 SDValue Src = N->getOperand(0); 6354 if (N->getValueType(0) == MVT::i128 && Src.getValueType() == MVT::f128 && 6355 !useSoftFloat()) { 6356 SDLoc DL(N); 6357 Results.push_back(expandBitCastF128ToI128(DAG, Src, DL)); 6358 } 6359 break; 6360 } 6361 default: 6362 llvm_unreachable("Unexpected node to lower"); 6363 } 6364 } 6365 6366 void 6367 SystemZTargetLowering::ReplaceNodeResults(SDNode *N, 6368 SmallVectorImpl<SDValue> &Results, 6369 SelectionDAG &DAG) const { 6370 return LowerOperationWrapper(N, Results, DAG); 6371 } 6372 6373 const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { 6374 #define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME 6375 switch ((SystemZISD::NodeType)Opcode) { 6376 case SystemZISD::FIRST_NUMBER: break; 6377 OPCODE(RET_GLUE); 6378 OPCODE(CALL); 6379 OPCODE(SIBCALL); 6380 OPCODE(TLS_GDCALL); 6381 OPCODE(TLS_LDCALL); 6382 OPCODE(PCREL_WRAPPER); 6383 OPCODE(PCREL_OFFSET); 6384 OPCODE(ICMP); 6385 OPCODE(FCMP); 6386 OPCODE(STRICT_FCMP); 6387 OPCODE(STRICT_FCMPS); 6388 OPCODE(TM); 6389 OPCODE(BR_CCMASK); 6390 OPCODE(SELECT_CCMASK); 6391 OPCODE(ADJDYNALLOC); 6392 OPCODE(PROBED_ALLOCA); 6393 OPCODE(POPCNT); 6394 OPCODE(SMUL_LOHI); 6395 OPCODE(UMUL_LOHI); 6396 OPCODE(SDIVREM); 6397 OPCODE(UDIVREM); 6398 OPCODE(SADDO); 6399 OPCODE(SSUBO); 6400 OPCODE(UADDO); 6401 OPCODE(USUBO); 6402 OPCODE(ADDCARRY); 6403 OPCODE(SUBCARRY); 6404 OPCODE(GET_CCMASK); 6405 OPCODE(MVC); 6406 OPCODE(NC); 6407 OPCODE(OC); 6408 OPCODE(XC); 6409 OPCODE(CLC); 6410 OPCODE(MEMSET_MVC); 6411 OPCODE(STPCPY); 6412 OPCODE(STRCMP); 6413 OPCODE(SEARCH_STRING); 6414 OPCODE(IPM); 6415 OPCODE(TBEGIN); 6416 OPCODE(TBEGIN_NOFLOAT); 6417 OPCODE(TEND); 6418 OPCODE(BYTE_MASK); 6419 OPCODE(ROTATE_MASK); 6420 OPCODE(REPLICATE); 6421 OPCODE(JOIN_DWORDS); 6422 OPCODE(SPLAT); 6423 OPCODE(MERGE_HIGH); 6424 OPCODE(MERGE_LOW); 6425 OPCODE(SHL_DOUBLE); 6426 OPCODE(PERMUTE_DWORDS); 6427 OPCODE(PERMUTE); 6428 OPCODE(PACK); 6429 OPCODE(PACKS_CC); 6430 OPCODE(PACKLS_CC); 6431 OPCODE(UNPACK_HIGH); 6432 OPCODE(UNPACKL_HIGH); 6433 OPCODE(UNPACK_LOW); 6434 OPCODE(UNPACKL_LOW); 6435 OPCODE(VSHL_BY_SCALAR); 6436 OPCODE(VSRL_BY_SCALAR); 6437 OPCODE(VSRA_BY_SCALAR); 6438 OPCODE(VROTL_BY_SCALAR); 6439 OPCODE(VSUM); 6440 OPCODE(VACC); 6441 OPCODE(VSCBI); 6442 OPCODE(VAC); 6443 OPCODE(VSBI); 6444 OPCODE(VACCC); 6445 OPCODE(VSBCBI); 6446 OPCODE(VICMPE); 6447 OPCODE(VICMPH); 6448 OPCODE(VICMPHL); 6449 OPCODE(VICMPES); 6450 OPCODE(VICMPHS); 6451 OPCODE(VICMPHLS); 6452 OPCODE(VFCMPE); 6453 OPCODE(STRICT_VFCMPE); 6454 OPCODE(STRICT_VFCMPES); 6455 OPCODE(VFCMPH); 6456 OPCODE(STRICT_VFCMPH); 6457 OPCODE(STRICT_VFCMPHS); 6458 OPCODE(VFCMPHE); 6459 OPCODE(STRICT_VFCMPHE); 6460 OPCODE(STRICT_VFCMPHES); 6461 OPCODE(VFCMPES); 6462 OPCODE(VFCMPHS); 6463 OPCODE(VFCMPHES); 6464 OPCODE(VFTCI); 6465 OPCODE(VEXTEND); 6466 OPCODE(STRICT_VEXTEND); 6467 OPCODE(VROUND); 6468 OPCODE(STRICT_VROUND); 6469 OPCODE(VTM); 6470 OPCODE(SCMP128HI); 6471 OPCODE(UCMP128HI); 6472 OPCODE(VFAE_CC); 6473 OPCODE(VFAEZ_CC); 6474 OPCODE(VFEE_CC); 6475 OPCODE(VFEEZ_CC); 6476 OPCODE(VFENE_CC); 6477 OPCODE(VFENEZ_CC); 6478 OPCODE(VISTR_CC); 6479 OPCODE(VSTRC_CC); 6480 OPCODE(VSTRCZ_CC); 6481 OPCODE(VSTRS_CC); 6482 OPCODE(VSTRSZ_CC); 6483 OPCODE(TDC); 6484 OPCODE(ATOMIC_SWAPW); 6485 OPCODE(ATOMIC_LOADW_ADD); 6486 OPCODE(ATOMIC_LOADW_SUB); 6487 OPCODE(ATOMIC_LOADW_AND); 6488 OPCODE(ATOMIC_LOADW_OR); 6489 OPCODE(ATOMIC_LOADW_XOR); 6490 OPCODE(ATOMIC_LOADW_NAND); 6491 OPCODE(ATOMIC_LOADW_MIN); 6492 OPCODE(ATOMIC_LOADW_MAX); 6493 OPCODE(ATOMIC_LOADW_UMIN); 6494 OPCODE(ATOMIC_LOADW_UMAX); 6495 OPCODE(ATOMIC_CMP_SWAPW); 6496 OPCODE(ATOMIC_CMP_SWAP); 6497 OPCODE(ATOMIC_LOAD_128); 6498 OPCODE(ATOMIC_STORE_128); 6499 OPCODE(ATOMIC_CMP_SWAP_128); 6500 OPCODE(LRV); 6501 OPCODE(STRV); 6502 OPCODE(VLER); 6503 OPCODE(VSTER); 6504 OPCODE(STCKF); 6505 OPCODE(PREFETCH); 6506 OPCODE(ADA_ENTRY); 6507 } 6508 return nullptr; 6509 #undef OPCODE 6510 } 6511 6512 // Return true if VT is a vector whose elements are a whole number of bytes 6513 // in width. Also check for presence of vector support. 6514 bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const { 6515 if (!Subtarget.hasVector()) 6516 return false; 6517 6518 return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0 && VT.isSimple(); 6519 } 6520 6521 // Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT 6522 // producing a result of type ResVT. Op is a possibly bitcast version 6523 // of the input vector and Index is the index (based on type VecVT) that 6524 // should be extracted. Return the new extraction if a simplification 6525 // was possible or if Force is true. 6526 SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT, 6527 EVT VecVT, SDValue Op, 6528 unsigned Index, 6529 DAGCombinerInfo &DCI, 6530 bool Force) const { 6531 SelectionDAG &DAG = DCI.DAG; 6532 6533 // The number of bytes being extracted. 6534 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize(); 6535 6536 for (;;) { 6537 unsigned Opcode = Op.getOpcode(); 6538 if (Opcode == ISD::BITCAST) 6539 // Look through bitcasts. 6540 Op = Op.getOperand(0); 6541 else if ((Opcode == ISD::VECTOR_SHUFFLE || Opcode == SystemZISD::SPLAT) && 6542 canTreatAsByteVector(Op.getValueType())) { 6543 // Get a VPERM-like permute mask and see whether the bytes covered 6544 // by the extracted element are a contiguous sequence from one 6545 // source operand. 6546 SmallVector<int, SystemZ::VectorBytes> Bytes; 6547 if (!getVPermMask(Op, Bytes)) 6548 break; 6549 int First; 6550 if (!getShuffleInput(Bytes, Index * BytesPerElement, 6551 BytesPerElement, First)) 6552 break; 6553 if (First < 0) 6554 return DAG.getUNDEF(ResVT); 6555 // Make sure the contiguous sequence starts at a multiple of the 6556 // original element size. 6557 unsigned Byte = unsigned(First) % Bytes.size(); 6558 if (Byte % BytesPerElement != 0) 6559 break; 6560 // We can get the extracted value directly from an input. 6561 Index = Byte / BytesPerElement; 6562 Op = Op.getOperand(unsigned(First) / Bytes.size()); 6563 Force = true; 6564 } else if (Opcode == ISD::BUILD_VECTOR && 6565 canTreatAsByteVector(Op.getValueType())) { 6566 // We can only optimize this case if the BUILD_VECTOR elements are 6567 // at least as wide as the extracted value. 6568 EVT OpVT = Op.getValueType(); 6569 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize(); 6570 if (OpBytesPerElement < BytesPerElement) 6571 break; 6572 // Make sure that the least-significant bit of the extracted value 6573 // is the least significant bit of an input. 6574 unsigned End = (Index + 1) * BytesPerElement; 6575 if (End % OpBytesPerElement != 0) 6576 break; 6577 // We're extracting the low part of one operand of the BUILD_VECTOR. 6578 Op = Op.getOperand(End / OpBytesPerElement - 1); 6579 if (!Op.getValueType().isInteger()) { 6580 EVT VT = MVT::getIntegerVT(Op.getValueSizeInBits()); 6581 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op); 6582 DCI.AddToWorklist(Op.getNode()); 6583 } 6584 EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits()); 6585 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op); 6586 if (VT != ResVT) { 6587 DCI.AddToWorklist(Op.getNode()); 6588 Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op); 6589 } 6590 return Op; 6591 } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || 6592 Opcode == ISD::ZERO_EXTEND_VECTOR_INREG || 6593 Opcode == ISD::ANY_EXTEND_VECTOR_INREG) && 6594 canTreatAsByteVector(Op.getValueType()) && 6595 canTreatAsByteVector(Op.getOperand(0).getValueType())) { 6596 // Make sure that only the unextended bits are significant. 6597 EVT ExtVT = Op.getValueType(); 6598 EVT OpVT = Op.getOperand(0).getValueType(); 6599 unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize(); 6600 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize(); 6601 unsigned Byte = Index * BytesPerElement; 6602 unsigned SubByte = Byte % ExtBytesPerElement; 6603 unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement; 6604 if (SubByte < MinSubByte || 6605 SubByte + BytesPerElement > ExtBytesPerElement) 6606 break; 6607 // Get the byte offset of the unextended element 6608 Byte = Byte / ExtBytesPerElement * OpBytesPerElement; 6609 // ...then add the byte offset relative to that element. 6610 Byte += SubByte - MinSubByte; 6611 if (Byte % BytesPerElement != 0) 6612 break; 6613 Op = Op.getOperand(0); 6614 Index = Byte / BytesPerElement; 6615 Force = true; 6616 } else 6617 break; 6618 } 6619 if (Force) { 6620 if (Op.getValueType() != VecVT) { 6621 Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op); 6622 DCI.AddToWorklist(Op.getNode()); 6623 } 6624 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op, 6625 DAG.getConstant(Index, DL, MVT::i32)); 6626 } 6627 return SDValue(); 6628 } 6629 6630 // Optimize vector operations in scalar value Op on the basis that Op 6631 // is truncated to TruncVT. 6632 SDValue SystemZTargetLowering::combineTruncateExtract( 6633 const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const { 6634 // If we have (trunc (extract_vector_elt X, Y)), try to turn it into 6635 // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements 6636 // of type TruncVT. 6637 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 6638 TruncVT.getSizeInBits() % 8 == 0) { 6639 SDValue Vec = Op.getOperand(0); 6640 EVT VecVT = Vec.getValueType(); 6641 if (canTreatAsByteVector(VecVT)) { 6642 if (auto *IndexN = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 6643 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize(); 6644 unsigned TruncBytes = TruncVT.getStoreSize(); 6645 if (BytesPerElement % TruncBytes == 0) { 6646 // Calculate the value of Y' in the above description. We are 6647 // splitting the original elements into Scale equal-sized pieces 6648 // and for truncation purposes want the last (least-significant) 6649 // of these pieces for IndexN. This is easiest to do by calculating 6650 // the start index of the following element and then subtracting 1. 6651 unsigned Scale = BytesPerElement / TruncBytes; 6652 unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1; 6653 6654 // Defer the creation of the bitcast from X to combineExtract, 6655 // which might be able to optimize the extraction. 6656 VecVT = MVT::getVectorVT(MVT::getIntegerVT(TruncBytes * 8), 6657 VecVT.getStoreSize() / TruncBytes); 6658 EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT); 6659 return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true); 6660 } 6661 } 6662 } 6663 } 6664 return SDValue(); 6665 } 6666 6667 SDValue SystemZTargetLowering::combineZERO_EXTEND( 6668 SDNode *N, DAGCombinerInfo &DCI) const { 6669 // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2') 6670 SelectionDAG &DAG = DCI.DAG; 6671 SDValue N0 = N->getOperand(0); 6672 EVT VT = N->getValueType(0); 6673 if (N0.getOpcode() == SystemZISD::SELECT_CCMASK) { 6674 auto *TrueOp = dyn_cast<ConstantSDNode>(N0.getOperand(0)); 6675 auto *FalseOp = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 6676 if (TrueOp && FalseOp) { 6677 SDLoc DL(N0); 6678 SDValue Ops[] = { DAG.getConstant(TrueOp->getZExtValue(), DL, VT), 6679 DAG.getConstant(FalseOp->getZExtValue(), DL, VT), 6680 N0.getOperand(2), N0.getOperand(3), N0.getOperand(4) }; 6681 SDValue NewSelect = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VT, Ops); 6682 // If N0 has multiple uses, change other uses as well. 6683 if (!N0.hasOneUse()) { 6684 SDValue TruncSelect = 6685 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), NewSelect); 6686 DCI.CombineTo(N0.getNode(), TruncSelect); 6687 } 6688 return NewSelect; 6689 } 6690 } 6691 // Convert (zext (xor (trunc X), C)) into (xor (trunc X), C') if the size 6692 // of the result is smaller than the size of X and all the truncated bits 6693 // of X are already zero. 6694 if (N0.getOpcode() == ISD::XOR && 6695 N0.hasOneUse() && N0.getOperand(0).hasOneUse() && 6696 N0.getOperand(0).getOpcode() == ISD::TRUNCATE && 6697 N0.getOperand(1).getOpcode() == ISD::Constant) { 6698 SDValue X = N0.getOperand(0).getOperand(0); 6699 if (VT.isScalarInteger() && VT.getSizeInBits() < X.getValueSizeInBits()) { 6700 KnownBits Known = DAG.computeKnownBits(X); 6701 APInt TruncatedBits = APInt::getBitsSet(X.getValueSizeInBits(), 6702 N0.getValueSizeInBits(), 6703 VT.getSizeInBits()); 6704 if (TruncatedBits.isSubsetOf(Known.Zero)) { 6705 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X); 6706 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits()); 6707 return DAG.getNode(ISD::XOR, SDLoc(N0), VT, 6708 X, DAG.getConstant(Mask, SDLoc(N0), VT)); 6709 } 6710 } 6711 } 6712 6713 return SDValue(); 6714 } 6715 6716 SDValue SystemZTargetLowering::combineSIGN_EXTEND_INREG( 6717 SDNode *N, DAGCombinerInfo &DCI) const { 6718 // Convert (sext_in_reg (setcc LHS, RHS, COND), i1) 6719 // and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1) 6720 // into (select_cc LHS, RHS, -1, 0, COND) 6721 SelectionDAG &DAG = DCI.DAG; 6722 SDValue N0 = N->getOperand(0); 6723 EVT VT = N->getValueType(0); 6724 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); 6725 if (N0.hasOneUse() && N0.getOpcode() == ISD::ANY_EXTEND) 6726 N0 = N0.getOperand(0); 6727 if (EVT == MVT::i1 && N0.hasOneUse() && N0.getOpcode() == ISD::SETCC) { 6728 SDLoc DL(N0); 6729 SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1), 6730 DAG.getAllOnesConstant(DL, VT), 6731 DAG.getConstant(0, DL, VT), N0.getOperand(2) }; 6732 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); 6733 } 6734 return SDValue(); 6735 } 6736 6737 SDValue SystemZTargetLowering::combineSIGN_EXTEND( 6738 SDNode *N, DAGCombinerInfo &DCI) const { 6739 // Convert (sext (ashr (shl X, C1), C2)) to 6740 // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as 6741 // cheap as narrower ones. 6742 SelectionDAG &DAG = DCI.DAG; 6743 SDValue N0 = N->getOperand(0); 6744 EVT VT = N->getValueType(0); 6745 if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) { 6746 auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 6747 SDValue Inner = N0.getOperand(0); 6748 if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) { 6749 if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) { 6750 unsigned Extra = (VT.getSizeInBits() - N0.getValueSizeInBits()); 6751 unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra; 6752 unsigned NewSraAmt = SraAmt->getZExtValue() + Extra; 6753 EVT ShiftVT = N0.getOperand(1).getValueType(); 6754 SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT, 6755 Inner.getOperand(0)); 6756 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext, 6757 DAG.getConstant(NewShlAmt, SDLoc(Inner), 6758 ShiftVT)); 6759 return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, 6760 DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT)); 6761 } 6762 } 6763 } 6764 6765 return SDValue(); 6766 } 6767 6768 SDValue SystemZTargetLowering::combineMERGE( 6769 SDNode *N, DAGCombinerInfo &DCI) const { 6770 SelectionDAG &DAG = DCI.DAG; 6771 unsigned Opcode = N->getOpcode(); 6772 SDValue Op0 = N->getOperand(0); 6773 SDValue Op1 = N->getOperand(1); 6774 if (Op0.getOpcode() == ISD::BITCAST) 6775 Op0 = Op0.getOperand(0); 6776 if (ISD::isBuildVectorAllZeros(Op0.getNode())) { 6777 // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF 6778 // for v4f32. 6779 if (Op1 == N->getOperand(0)) 6780 return Op1; 6781 // (z_merge_? 0, X) -> (z_unpackl_? 0, X). 6782 EVT VT = Op1.getValueType(); 6783 unsigned ElemBytes = VT.getVectorElementType().getStoreSize(); 6784 if (ElemBytes <= 4) { 6785 Opcode = (Opcode == SystemZISD::MERGE_HIGH ? 6786 SystemZISD::UNPACKL_HIGH : SystemZISD::UNPACKL_LOW); 6787 EVT InVT = VT.changeVectorElementTypeToInteger(); 6788 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16), 6789 SystemZ::VectorBytes / ElemBytes / 2); 6790 if (VT != InVT) { 6791 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1); 6792 DCI.AddToWorklist(Op1.getNode()); 6793 } 6794 SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1); 6795 DCI.AddToWorklist(Op.getNode()); 6796 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op); 6797 } 6798 } 6799 return SDValue(); 6800 } 6801 6802 static bool isI128MovedToParts(LoadSDNode *LD, SDNode *&LoPart, 6803 SDNode *&HiPart) { 6804 LoPart = HiPart = nullptr; 6805 6806 // Scan through all users. 6807 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end(); 6808 UI != UIEnd; ++UI) { 6809 // Skip the uses of the chain. 6810 if (UI.getUse().getResNo() != 0) 6811 continue; 6812 6813 // Verify every user is a TRUNCATE to i64 of the low or high half. 6814 SDNode *User = *UI; 6815 bool IsLoPart = true; 6816 if (User->getOpcode() == ISD::SRL && 6817 User->getOperand(1).getOpcode() == ISD::Constant && 6818 User->getConstantOperandVal(1) == 64 && User->hasOneUse()) { 6819 User = *User->use_begin(); 6820 IsLoPart = false; 6821 } 6822 if (User->getOpcode() != ISD::TRUNCATE || User->getValueType(0) != MVT::i64) 6823 return false; 6824 6825 if (IsLoPart) { 6826 if (LoPart) 6827 return false; 6828 LoPart = User; 6829 } else { 6830 if (HiPart) 6831 return false; 6832 HiPart = User; 6833 } 6834 } 6835 return true; 6836 } 6837 6838 static bool isF128MovedToParts(LoadSDNode *LD, SDNode *&LoPart, 6839 SDNode *&HiPart) { 6840 LoPart = HiPart = nullptr; 6841 6842 // Scan through all users. 6843 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end(); 6844 UI != UIEnd; ++UI) { 6845 // Skip the uses of the chain. 6846 if (UI.getUse().getResNo() != 0) 6847 continue; 6848 6849 // Verify every user is an EXTRACT_SUBREG of the low or high half. 6850 SDNode *User = *UI; 6851 if (!User->hasOneUse() || !User->isMachineOpcode() || 6852 User->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG) 6853 return false; 6854 6855 switch (User->getConstantOperandVal(1)) { 6856 case SystemZ::subreg_l64: 6857 if (LoPart) 6858 return false; 6859 LoPart = User; 6860 break; 6861 case SystemZ::subreg_h64: 6862 if (HiPart) 6863 return false; 6864 HiPart = User; 6865 break; 6866 default: 6867 return false; 6868 } 6869 } 6870 return true; 6871 } 6872 6873 SDValue SystemZTargetLowering::combineLOAD( 6874 SDNode *N, DAGCombinerInfo &DCI) const { 6875 SelectionDAG &DAG = DCI.DAG; 6876 EVT LdVT = N->getValueType(0); 6877 SDLoc DL(N); 6878 6879 // Replace a 128-bit load that is used solely to move its value into GPRs 6880 // by separate loads of both halves. 6881 LoadSDNode *LD = cast<LoadSDNode>(N); 6882 if (LD->isSimple() && ISD::isNormalLoad(LD)) { 6883 SDNode *LoPart, *HiPart; 6884 if ((LdVT == MVT::i128 && isI128MovedToParts(LD, LoPart, HiPart)) || 6885 (LdVT == MVT::f128 && isF128MovedToParts(LD, LoPart, HiPart))) { 6886 // Rewrite each extraction as an independent load. 6887 SmallVector<SDValue, 2> ArgChains; 6888 if (HiPart) { 6889 SDValue EltLoad = DAG.getLoad( 6890 HiPart->getValueType(0), DL, LD->getChain(), LD->getBasePtr(), 6891 LD->getPointerInfo(), LD->getOriginalAlign(), 6892 LD->getMemOperand()->getFlags(), LD->getAAInfo()); 6893 6894 DCI.CombineTo(HiPart, EltLoad, true); 6895 ArgChains.push_back(EltLoad.getValue(1)); 6896 } 6897 if (LoPart) { 6898 SDValue EltLoad = DAG.getLoad( 6899 LoPart->getValueType(0), DL, LD->getChain(), 6900 DAG.getObjectPtrOffset(DL, LD->getBasePtr(), TypeSize::getFixed(8)), 6901 LD->getPointerInfo().getWithOffset(8), LD->getOriginalAlign(), 6902 LD->getMemOperand()->getFlags(), LD->getAAInfo()); 6903 6904 DCI.CombineTo(LoPart, EltLoad, true); 6905 ArgChains.push_back(EltLoad.getValue(1)); 6906 } 6907 6908 // Collect all chains via TokenFactor. 6909 SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, ArgChains); 6910 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); 6911 DCI.AddToWorklist(Chain.getNode()); 6912 return SDValue(N, 0); 6913 } 6914 } 6915 6916 if (LdVT.isVector() || LdVT.isInteger()) 6917 return SDValue(); 6918 // Transform a scalar load that is REPLICATEd as well as having other 6919 // use(s) to the form where the other use(s) use the first element of the 6920 // REPLICATE instead of the load. Otherwise instruction selection will not 6921 // produce a VLREP. Avoid extracting to a GPR, so only do this for floating 6922 // point loads. 6923 6924 SDValue Replicate; 6925 SmallVector<SDNode*, 8> OtherUses; 6926 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); 6927 UI != UE; ++UI) { 6928 if (UI->getOpcode() == SystemZISD::REPLICATE) { 6929 if (Replicate) 6930 return SDValue(); // Should never happen 6931 Replicate = SDValue(*UI, 0); 6932 } 6933 else if (UI.getUse().getResNo() == 0) 6934 OtherUses.push_back(*UI); 6935 } 6936 if (!Replicate || OtherUses.empty()) 6937 return SDValue(); 6938 6939 SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, LdVT, 6940 Replicate, DAG.getConstant(0, DL, MVT::i32)); 6941 // Update uses of the loaded Value while preserving old chains. 6942 for (SDNode *U : OtherUses) { 6943 SmallVector<SDValue, 8> Ops; 6944 for (SDValue Op : U->ops()) 6945 Ops.push_back((Op.getNode() == N && Op.getResNo() == 0) ? Extract0 : Op); 6946 DAG.UpdateNodeOperands(U, Ops); 6947 } 6948 return SDValue(N, 0); 6949 } 6950 6951 bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT) const { 6952 if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) 6953 return true; 6954 if (Subtarget.hasVectorEnhancements2()) 6955 if (VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64 || VT == MVT::i128) 6956 return true; 6957 return false; 6958 } 6959 6960 static bool isVectorElementSwap(ArrayRef<int> M, EVT VT) { 6961 if (!VT.isVector() || !VT.isSimple() || 6962 VT.getSizeInBits() != 128 || 6963 VT.getScalarSizeInBits() % 8 != 0) 6964 return false; 6965 6966 unsigned NumElts = VT.getVectorNumElements(); 6967 for (unsigned i = 0; i < NumElts; ++i) { 6968 if (M[i] < 0) continue; // ignore UNDEF indices 6969 if ((unsigned) M[i] != NumElts - 1 - i) 6970 return false; 6971 } 6972 6973 return true; 6974 } 6975 6976 static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG) { 6977 for (auto *U : StoredVal->uses()) { 6978 if (StoreSDNode *ST = dyn_cast<StoreSDNode>(U)) { 6979 EVT CurrMemVT = ST->getMemoryVT().getScalarType(); 6980 if (CurrMemVT.isRound() && CurrMemVT.getStoreSize() <= 16) 6981 continue; 6982 } else if (isa<BuildVectorSDNode>(U)) { 6983 SDValue BuildVector = SDValue(U, 0); 6984 if (DAG.isSplatValue(BuildVector, true/*AllowUndefs*/) && 6985 isOnlyUsedByStores(BuildVector, DAG)) 6986 continue; 6987 } 6988 return false; 6989 } 6990 return true; 6991 } 6992 6993 static bool isI128MovedFromParts(SDValue Val, SDValue &LoPart, 6994 SDValue &HiPart) { 6995 if (Val.getOpcode() != ISD::OR || !Val.getNode()->hasOneUse()) 6996 return false; 6997 6998 SDValue Op0 = Val.getOperand(0); 6999 SDValue Op1 = Val.getOperand(1); 7000 7001 if (Op0.getOpcode() == ISD::SHL) 7002 std::swap(Op0, Op1); 7003 if (Op1.getOpcode() != ISD::SHL || !Op1.getNode()->hasOneUse() || 7004 Op1.getOperand(1).getOpcode() != ISD::Constant || 7005 Op1.getConstantOperandVal(1) != 64) 7006 return false; 7007 Op1 = Op1.getOperand(0); 7008 7009 if (Op0.getOpcode() != ISD::ZERO_EXTEND || !Op0.getNode()->hasOneUse() || 7010 Op0.getOperand(0).getValueType() != MVT::i64) 7011 return false; 7012 if (Op1.getOpcode() != ISD::ANY_EXTEND || !Op1.getNode()->hasOneUse() || 7013 Op1.getOperand(0).getValueType() != MVT::i64) 7014 return false; 7015 7016 LoPart = Op0.getOperand(0); 7017 HiPart = Op1.getOperand(0); 7018 return true; 7019 } 7020 7021 static bool isF128MovedFromParts(SDValue Val, SDValue &LoPart, 7022 SDValue &HiPart) { 7023 if (!Val.getNode()->hasOneUse() || !Val.isMachineOpcode() || 7024 Val.getMachineOpcode() != TargetOpcode::REG_SEQUENCE) 7025 return false; 7026 7027 if (Val->getNumOperands() != 5 || 7028 Val->getOperand(0)->getAsZExtVal() != SystemZ::FP128BitRegClassID || 7029 Val->getOperand(2)->getAsZExtVal() != SystemZ::subreg_l64 || 7030 Val->getOperand(4)->getAsZExtVal() != SystemZ::subreg_h64) 7031 return false; 7032 7033 LoPart = Val->getOperand(1); 7034 HiPart = Val->getOperand(3); 7035 return true; 7036 } 7037 7038 SDValue SystemZTargetLowering::combineSTORE( 7039 SDNode *N, DAGCombinerInfo &DCI) const { 7040 SelectionDAG &DAG = DCI.DAG; 7041 auto *SN = cast<StoreSDNode>(N); 7042 auto &Op1 = N->getOperand(1); 7043 EVT MemVT = SN->getMemoryVT(); 7044 // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better 7045 // for the extraction to be done on a vMiN value, so that we can use VSTE. 7046 // If X has wider elements then convert it to: 7047 // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z). 7048 if (MemVT.isInteger() && SN->isTruncatingStore()) { 7049 if (SDValue Value = 7050 combineTruncateExtract(SDLoc(N), MemVT, SN->getValue(), DCI)) { 7051 DCI.AddToWorklist(Value.getNode()); 7052 7053 // Rewrite the store with the new form of stored value. 7054 return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value, 7055 SN->getBasePtr(), SN->getMemoryVT(), 7056 SN->getMemOperand()); 7057 } 7058 } 7059 // Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR 7060 if (!SN->isTruncatingStore() && 7061 Op1.getOpcode() == ISD::BSWAP && 7062 Op1.getNode()->hasOneUse() && 7063 canLoadStoreByteSwapped(Op1.getValueType())) { 7064 7065 SDValue BSwapOp = Op1.getOperand(0); 7066 7067 if (BSwapOp.getValueType() == MVT::i16) 7068 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp); 7069 7070 SDValue Ops[] = { 7071 N->getOperand(0), BSwapOp, N->getOperand(2) 7072 }; 7073 7074 return 7075 DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other), 7076 Ops, MemVT, SN->getMemOperand()); 7077 } 7078 // Combine STORE (element-swap) into VSTER 7079 if (!SN->isTruncatingStore() && 7080 Op1.getOpcode() == ISD::VECTOR_SHUFFLE && 7081 Op1.getNode()->hasOneUse() && 7082 Subtarget.hasVectorEnhancements2()) { 7083 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op1.getNode()); 7084 ArrayRef<int> ShuffleMask = SVN->getMask(); 7085 if (isVectorElementSwap(ShuffleMask, Op1.getValueType())) { 7086 SDValue Ops[] = { 7087 N->getOperand(0), Op1.getOperand(0), N->getOperand(2) 7088 }; 7089 7090 return DAG.getMemIntrinsicNode(SystemZISD::VSTER, SDLoc(N), 7091 DAG.getVTList(MVT::Other), 7092 Ops, MemVT, SN->getMemOperand()); 7093 } 7094 } 7095 7096 // Combine STORE (READCYCLECOUNTER) into STCKF. 7097 if (!SN->isTruncatingStore() && 7098 Op1.getOpcode() == ISD::READCYCLECOUNTER && 7099 Op1.hasOneUse() && 7100 N->getOperand(0).reachesChainWithoutSideEffects(SDValue(Op1.getNode(), 1))) { 7101 SDValue Ops[] = { Op1.getOperand(0), N->getOperand(2) }; 7102 return DAG.getMemIntrinsicNode(SystemZISD::STCKF, SDLoc(N), 7103 DAG.getVTList(MVT::Other), 7104 Ops, MemVT, SN->getMemOperand()); 7105 } 7106 7107 // Transform a store of a 128-bit value moved from parts into two stores. 7108 if (SN->isSimple() && ISD::isNormalStore(SN)) { 7109 SDValue LoPart, HiPart; 7110 if ((MemVT == MVT::i128 && isI128MovedFromParts(Op1, LoPart, HiPart)) || 7111 (MemVT == MVT::f128 && isF128MovedFromParts(Op1, LoPart, HiPart))) { 7112 SDLoc DL(SN); 7113 SDValue Chain0 = 7114 DAG.getStore(SN->getChain(), DL, HiPart, SN->getBasePtr(), 7115 SN->getPointerInfo(), SN->getOriginalAlign(), 7116 SN->getMemOperand()->getFlags(), SN->getAAInfo()); 7117 SDValue Chain1 = 7118 DAG.getStore(SN->getChain(), DL, LoPart, 7119 DAG.getObjectPtrOffset(DL, SN->getBasePtr(), 7120 TypeSize::getFixed(8)), 7121 SN->getPointerInfo().getWithOffset(8), 7122 SN->getOriginalAlign(), 7123 SN->getMemOperand()->getFlags(), SN->getAAInfo()); 7124 7125 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain0, Chain1); 7126 } 7127 } 7128 7129 // Replicate a reg or immediate with VREP instead of scalar multiply or 7130 // immediate load. It seems best to do this during the first DAGCombine as 7131 // it is straight-forward to handle the zero-extend node in the initial 7132 // DAG, and also not worry about the keeping the new MemVT legal (e.g. when 7133 // extracting an i16 element from a v16i8 vector). 7134 if (Subtarget.hasVector() && DCI.Level == BeforeLegalizeTypes && 7135 isOnlyUsedByStores(Op1, DAG)) { 7136 SDValue Word = SDValue(); 7137 EVT WordVT; 7138 7139 // Find a replicated immediate and return it if found in Word and its 7140 // type in WordVT. 7141 auto FindReplicatedImm = [&](ConstantSDNode *C, unsigned TotBytes) { 7142 // Some constants are better handled with a scalar store. 7143 if (C->getAPIntValue().getBitWidth() > 64 || C->isAllOnes() || 7144 isInt<16>(C->getSExtValue()) || MemVT.getStoreSize() <= 2) 7145 return; 7146 SystemZVectorConstantInfo VCI(APInt(TotBytes * 8, C->getZExtValue())); 7147 if (VCI.isVectorConstantLegal(Subtarget) && 7148 VCI.Opcode == SystemZISD::REPLICATE) { 7149 Word = DAG.getConstant(VCI.OpVals[0], SDLoc(SN), MVT::i32); 7150 WordVT = VCI.VecVT.getScalarType(); 7151 } 7152 }; 7153 7154 // Find a replicated register and return it if found in Word and its type 7155 // in WordVT. 7156 auto FindReplicatedReg = [&](SDValue MulOp) { 7157 EVT MulVT = MulOp.getValueType(); 7158 if (MulOp->getOpcode() == ISD::MUL && 7159 (MulVT == MVT::i16 || MulVT == MVT::i32 || MulVT == MVT::i64)) { 7160 // Find a zero extended value and its type. 7161 SDValue LHS = MulOp->getOperand(0); 7162 if (LHS->getOpcode() == ISD::ZERO_EXTEND) 7163 WordVT = LHS->getOperand(0).getValueType(); 7164 else if (LHS->getOpcode() == ISD::AssertZext) 7165 WordVT = cast<VTSDNode>(LHS->getOperand(1))->getVT(); 7166 else 7167 return; 7168 // Find a replicating constant, e.g. 0x00010001. 7169 if (auto *C = dyn_cast<ConstantSDNode>(MulOp->getOperand(1))) { 7170 SystemZVectorConstantInfo VCI( 7171 APInt(MulVT.getSizeInBits(), C->getZExtValue())); 7172 if (VCI.isVectorConstantLegal(Subtarget) && 7173 VCI.Opcode == SystemZISD::REPLICATE && VCI.OpVals[0] == 1 && 7174 WordVT == VCI.VecVT.getScalarType()) 7175 Word = DAG.getZExtOrTrunc(LHS->getOperand(0), SDLoc(SN), WordVT); 7176 } 7177 } 7178 }; 7179 7180 if (isa<BuildVectorSDNode>(Op1) && 7181 DAG.isSplatValue(Op1, true/*AllowUndefs*/)) { 7182 SDValue SplatVal = Op1->getOperand(0); 7183 if (auto *C = dyn_cast<ConstantSDNode>(SplatVal)) 7184 FindReplicatedImm(C, SplatVal.getValueType().getStoreSize()); 7185 else 7186 FindReplicatedReg(SplatVal); 7187 } else { 7188 if (auto *C = dyn_cast<ConstantSDNode>(Op1)) 7189 FindReplicatedImm(C, MemVT.getStoreSize()); 7190 else 7191 FindReplicatedReg(Op1); 7192 } 7193 7194 if (Word != SDValue()) { 7195 assert(MemVT.getSizeInBits() % WordVT.getSizeInBits() == 0 && 7196 "Bad type handling"); 7197 unsigned NumElts = MemVT.getSizeInBits() / WordVT.getSizeInBits(); 7198 EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), WordVT, NumElts); 7199 SDValue SplatVal = DAG.getSplatVector(SplatVT, SDLoc(SN), Word); 7200 return DAG.getStore(SN->getChain(), SDLoc(SN), SplatVal, 7201 SN->getBasePtr(), SN->getMemOperand()); 7202 } 7203 } 7204 7205 return SDValue(); 7206 } 7207 7208 SDValue SystemZTargetLowering::combineVECTOR_SHUFFLE( 7209 SDNode *N, DAGCombinerInfo &DCI) const { 7210 SelectionDAG &DAG = DCI.DAG; 7211 // Combine element-swap (LOAD) into VLER 7212 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && 7213 N->getOperand(0).hasOneUse() && 7214 Subtarget.hasVectorEnhancements2()) { 7215 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); 7216 ArrayRef<int> ShuffleMask = SVN->getMask(); 7217 if (isVectorElementSwap(ShuffleMask, N->getValueType(0))) { 7218 SDValue Load = N->getOperand(0); 7219 LoadSDNode *LD = cast<LoadSDNode>(Load); 7220 7221 // Create the element-swapping load. 7222 SDValue Ops[] = { 7223 LD->getChain(), // Chain 7224 LD->getBasePtr() // Ptr 7225 }; 7226 SDValue ESLoad = 7227 DAG.getMemIntrinsicNode(SystemZISD::VLER, SDLoc(N), 7228 DAG.getVTList(LD->getValueType(0), MVT::Other), 7229 Ops, LD->getMemoryVT(), LD->getMemOperand()); 7230 7231 // First, combine the VECTOR_SHUFFLE away. This makes the value produced 7232 // by the load dead. 7233 DCI.CombineTo(N, ESLoad); 7234 7235 // Next, combine the load away, we give it a bogus result value but a real 7236 // chain result. The result value is dead because the shuffle is dead. 7237 DCI.CombineTo(Load.getNode(), ESLoad, ESLoad.getValue(1)); 7238 7239 // Return N so it doesn't get rechecked! 7240 return SDValue(N, 0); 7241 } 7242 } 7243 7244 return SDValue(); 7245 } 7246 7247 SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT( 7248 SDNode *N, DAGCombinerInfo &DCI) const { 7249 SelectionDAG &DAG = DCI.DAG; 7250 7251 if (!Subtarget.hasVector()) 7252 return SDValue(); 7253 7254 // Look through bitcasts that retain the number of vector elements. 7255 SDValue Op = N->getOperand(0); 7256 if (Op.getOpcode() == ISD::BITCAST && 7257 Op.getValueType().isVector() && 7258 Op.getOperand(0).getValueType().isVector() && 7259 Op.getValueType().getVectorNumElements() == 7260 Op.getOperand(0).getValueType().getVectorNumElements()) 7261 Op = Op.getOperand(0); 7262 7263 // Pull BSWAP out of a vector extraction. 7264 if (Op.getOpcode() == ISD::BSWAP && Op.hasOneUse()) { 7265 EVT VecVT = Op.getValueType(); 7266 EVT EltVT = VecVT.getVectorElementType(); 7267 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), EltVT, 7268 Op.getOperand(0), N->getOperand(1)); 7269 DCI.AddToWorklist(Op.getNode()); 7270 Op = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Op); 7271 if (EltVT != N->getValueType(0)) { 7272 DCI.AddToWorklist(Op.getNode()); 7273 Op = DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op); 7274 } 7275 return Op; 7276 } 7277 7278 // Try to simplify a vector extraction. 7279 if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 7280 SDValue Op0 = N->getOperand(0); 7281 EVT VecVT = Op0.getValueType(); 7282 return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0, 7283 IndexN->getZExtValue(), DCI, false); 7284 } 7285 return SDValue(); 7286 } 7287 7288 SDValue SystemZTargetLowering::combineJOIN_DWORDS( 7289 SDNode *N, DAGCombinerInfo &DCI) const { 7290 SelectionDAG &DAG = DCI.DAG; 7291 // (join_dwords X, X) == (replicate X) 7292 if (N->getOperand(0) == N->getOperand(1)) 7293 return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0), 7294 N->getOperand(0)); 7295 return SDValue(); 7296 } 7297 7298 static SDValue MergeInputChains(SDNode *N1, SDNode *N2) { 7299 SDValue Chain1 = N1->getOperand(0); 7300 SDValue Chain2 = N2->getOperand(0); 7301 7302 // Trivial case: both nodes take the same chain. 7303 if (Chain1 == Chain2) 7304 return Chain1; 7305 7306 // FIXME - we could handle more complex cases via TokenFactor, 7307 // assuming we can verify that this would not create a cycle. 7308 return SDValue(); 7309 } 7310 7311 SDValue SystemZTargetLowering::combineFP_ROUND( 7312 SDNode *N, DAGCombinerInfo &DCI) const { 7313 7314 if (!Subtarget.hasVector()) 7315 return SDValue(); 7316 7317 // (fpround (extract_vector_elt X 0)) 7318 // (fpround (extract_vector_elt X 1)) -> 7319 // (extract_vector_elt (VROUND X) 0) 7320 // (extract_vector_elt (VROUND X) 2) 7321 // 7322 // This is a special case since the target doesn't really support v2f32s. 7323 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0; 7324 SelectionDAG &DAG = DCI.DAG; 7325 SDValue Op0 = N->getOperand(OpNo); 7326 if (N->getValueType(0) == MVT::f32 && Op0.hasOneUse() && 7327 Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 7328 Op0.getOperand(0).getValueType() == MVT::v2f64 && 7329 Op0.getOperand(1).getOpcode() == ISD::Constant && 7330 Op0.getConstantOperandVal(1) == 0) { 7331 SDValue Vec = Op0.getOperand(0); 7332 for (auto *U : Vec->uses()) { 7333 if (U != Op0.getNode() && U->hasOneUse() && 7334 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT && 7335 U->getOperand(0) == Vec && 7336 U->getOperand(1).getOpcode() == ISD::Constant && 7337 U->getConstantOperandVal(1) == 1) { 7338 SDValue OtherRound = SDValue(*U->use_begin(), 0); 7339 if (OtherRound.getOpcode() == N->getOpcode() && 7340 OtherRound.getOperand(OpNo) == SDValue(U, 0) && 7341 OtherRound.getValueType() == MVT::f32) { 7342 SDValue VRound, Chain; 7343 if (N->isStrictFPOpcode()) { 7344 Chain = MergeInputChains(N, OtherRound.getNode()); 7345 if (!Chain) 7346 continue; 7347 VRound = DAG.getNode(SystemZISD::STRICT_VROUND, SDLoc(N), 7348 {MVT::v4f32, MVT::Other}, {Chain, Vec}); 7349 Chain = VRound.getValue(1); 7350 } else 7351 VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N), 7352 MVT::v4f32, Vec); 7353 DCI.AddToWorklist(VRound.getNode()); 7354 SDValue Extract1 = 7355 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32, 7356 VRound, DAG.getConstant(2, SDLoc(U), MVT::i32)); 7357 DCI.AddToWorklist(Extract1.getNode()); 7358 DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1); 7359 if (Chain) 7360 DAG.ReplaceAllUsesOfValueWith(OtherRound.getValue(1), Chain); 7361 SDValue Extract0 = 7362 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32, 7363 VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32)); 7364 if (Chain) 7365 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0), 7366 N->getVTList(), Extract0, Chain); 7367 return Extract0; 7368 } 7369 } 7370 } 7371 } 7372 return SDValue(); 7373 } 7374 7375 SDValue SystemZTargetLowering::combineFP_EXTEND( 7376 SDNode *N, DAGCombinerInfo &DCI) const { 7377 7378 if (!Subtarget.hasVector()) 7379 return SDValue(); 7380 7381 // (fpextend (extract_vector_elt X 0)) 7382 // (fpextend (extract_vector_elt X 2)) -> 7383 // (extract_vector_elt (VEXTEND X) 0) 7384 // (extract_vector_elt (VEXTEND X) 1) 7385 // 7386 // This is a special case since the target doesn't really support v2f32s. 7387 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0; 7388 SelectionDAG &DAG = DCI.DAG; 7389 SDValue Op0 = N->getOperand(OpNo); 7390 if (N->getValueType(0) == MVT::f64 && Op0.hasOneUse() && 7391 Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 7392 Op0.getOperand(0).getValueType() == MVT::v4f32 && 7393 Op0.getOperand(1).getOpcode() == ISD::Constant && 7394 Op0.getConstantOperandVal(1) == 0) { 7395 SDValue Vec = Op0.getOperand(0); 7396 for (auto *U : Vec->uses()) { 7397 if (U != Op0.getNode() && U->hasOneUse() && 7398 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT && 7399 U->getOperand(0) == Vec && 7400 U->getOperand(1).getOpcode() == ISD::Constant && 7401 U->getConstantOperandVal(1) == 2) { 7402 SDValue OtherExtend = SDValue(*U->use_begin(), 0); 7403 if (OtherExtend.getOpcode() == N->getOpcode() && 7404 OtherExtend.getOperand(OpNo) == SDValue(U, 0) && 7405 OtherExtend.getValueType() == MVT::f64) { 7406 SDValue VExtend, Chain; 7407 if (N->isStrictFPOpcode()) { 7408 Chain = MergeInputChains(N, OtherExtend.getNode()); 7409 if (!Chain) 7410 continue; 7411 VExtend = DAG.getNode(SystemZISD::STRICT_VEXTEND, SDLoc(N), 7412 {MVT::v2f64, MVT::Other}, {Chain, Vec}); 7413 Chain = VExtend.getValue(1); 7414 } else 7415 VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N), 7416 MVT::v2f64, Vec); 7417 DCI.AddToWorklist(VExtend.getNode()); 7418 SDValue Extract1 = 7419 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64, 7420 VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32)); 7421 DCI.AddToWorklist(Extract1.getNode()); 7422 DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1); 7423 if (Chain) 7424 DAG.ReplaceAllUsesOfValueWith(OtherExtend.getValue(1), Chain); 7425 SDValue Extract0 = 7426 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64, 7427 VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32)); 7428 if (Chain) 7429 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0), 7430 N->getVTList(), Extract0, Chain); 7431 return Extract0; 7432 } 7433 } 7434 } 7435 } 7436 return SDValue(); 7437 } 7438 7439 SDValue SystemZTargetLowering::combineINT_TO_FP( 7440 SDNode *N, DAGCombinerInfo &DCI) const { 7441 if (DCI.Level != BeforeLegalizeTypes) 7442 return SDValue(); 7443 SelectionDAG &DAG = DCI.DAG; 7444 LLVMContext &Ctx = *DAG.getContext(); 7445 unsigned Opcode = N->getOpcode(); 7446 EVT OutVT = N->getValueType(0); 7447 Type *OutLLVMTy = OutVT.getTypeForEVT(Ctx); 7448 SDValue Op = N->getOperand(0); 7449 unsigned OutScalarBits = OutLLVMTy->getScalarSizeInBits(); 7450 unsigned InScalarBits = Op->getValueType(0).getScalarSizeInBits(); 7451 7452 // Insert an extension before type-legalization to avoid scalarization, e.g.: 7453 // v2f64 = uint_to_fp v2i16 7454 // => 7455 // v2f64 = uint_to_fp (v2i64 zero_extend v2i16) 7456 if (OutLLVMTy->isVectorTy() && OutScalarBits > InScalarBits && 7457 OutScalarBits <= 64) { 7458 unsigned NumElts = cast<FixedVectorType>(OutLLVMTy)->getNumElements(); 7459 EVT ExtVT = EVT::getVectorVT( 7460 Ctx, EVT::getIntegerVT(Ctx, OutLLVMTy->getScalarSizeInBits()), NumElts); 7461 unsigned ExtOpcode = 7462 (Opcode == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND); 7463 SDValue ExtOp = DAG.getNode(ExtOpcode, SDLoc(N), ExtVT, Op); 7464 return DAG.getNode(Opcode, SDLoc(N), OutVT, ExtOp); 7465 } 7466 return SDValue(); 7467 } 7468 7469 SDValue SystemZTargetLowering::combineBSWAP( 7470 SDNode *N, DAGCombinerInfo &DCI) const { 7471 SelectionDAG &DAG = DCI.DAG; 7472 // Combine BSWAP (LOAD) into LRVH/LRV/LRVG/VLBR 7473 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && 7474 N->getOperand(0).hasOneUse() && 7475 canLoadStoreByteSwapped(N->getValueType(0))) { 7476 SDValue Load = N->getOperand(0); 7477 LoadSDNode *LD = cast<LoadSDNode>(Load); 7478 7479 // Create the byte-swapping load. 7480 SDValue Ops[] = { 7481 LD->getChain(), // Chain 7482 LD->getBasePtr() // Ptr 7483 }; 7484 EVT LoadVT = N->getValueType(0); 7485 if (LoadVT == MVT::i16) 7486 LoadVT = MVT::i32; 7487 SDValue BSLoad = 7488 DAG.getMemIntrinsicNode(SystemZISD::LRV, SDLoc(N), 7489 DAG.getVTList(LoadVT, MVT::Other), 7490 Ops, LD->getMemoryVT(), LD->getMemOperand()); 7491 7492 // If this is an i16 load, insert the truncate. 7493 SDValue ResVal = BSLoad; 7494 if (N->getValueType(0) == MVT::i16) 7495 ResVal = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i16, BSLoad); 7496 7497 // First, combine the bswap away. This makes the value produced by the 7498 // load dead. 7499 DCI.CombineTo(N, ResVal); 7500 7501 // Next, combine the load away, we give it a bogus result value but a real 7502 // chain result. The result value is dead because the bswap is dead. 7503 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1)); 7504 7505 // Return N so it doesn't get rechecked! 7506 return SDValue(N, 0); 7507 } 7508 7509 // Look through bitcasts that retain the number of vector elements. 7510 SDValue Op = N->getOperand(0); 7511 if (Op.getOpcode() == ISD::BITCAST && 7512 Op.getValueType().isVector() && 7513 Op.getOperand(0).getValueType().isVector() && 7514 Op.getValueType().getVectorNumElements() == 7515 Op.getOperand(0).getValueType().getVectorNumElements()) 7516 Op = Op.getOperand(0); 7517 7518 // Push BSWAP into a vector insertion if at least one side then simplifies. 7519 if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && Op.hasOneUse()) { 7520 SDValue Vec = Op.getOperand(0); 7521 SDValue Elt = Op.getOperand(1); 7522 SDValue Idx = Op.getOperand(2); 7523 7524 if (DAG.isConstantIntBuildVectorOrConstantInt(Vec) || 7525 Vec.getOpcode() == ISD::BSWAP || Vec.isUndef() || 7526 DAG.isConstantIntBuildVectorOrConstantInt(Elt) || 7527 Elt.getOpcode() == ISD::BSWAP || Elt.isUndef() || 7528 (canLoadStoreByteSwapped(N->getValueType(0)) && 7529 ISD::isNON_EXTLoad(Elt.getNode()) && Elt.hasOneUse())) { 7530 EVT VecVT = N->getValueType(0); 7531 EVT EltVT = N->getValueType(0).getVectorElementType(); 7532 if (VecVT != Vec.getValueType()) { 7533 Vec = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Vec); 7534 DCI.AddToWorklist(Vec.getNode()); 7535 } 7536 if (EltVT != Elt.getValueType()) { 7537 Elt = DAG.getNode(ISD::BITCAST, SDLoc(N), EltVT, Elt); 7538 DCI.AddToWorklist(Elt.getNode()); 7539 } 7540 Vec = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Vec); 7541 DCI.AddToWorklist(Vec.getNode()); 7542 Elt = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Elt); 7543 DCI.AddToWorklist(Elt.getNode()); 7544 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VecVT, 7545 Vec, Elt, Idx); 7546 } 7547 } 7548 7549 // Push BSWAP into a vector shuffle if at least one side then simplifies. 7550 ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(Op); 7551 if (SV && Op.hasOneUse()) { 7552 SDValue Op0 = Op.getOperand(0); 7553 SDValue Op1 = Op.getOperand(1); 7554 7555 if (DAG.isConstantIntBuildVectorOrConstantInt(Op0) || 7556 Op0.getOpcode() == ISD::BSWAP || Op0.isUndef() || 7557 DAG.isConstantIntBuildVectorOrConstantInt(Op1) || 7558 Op1.getOpcode() == ISD::BSWAP || Op1.isUndef()) { 7559 EVT VecVT = N->getValueType(0); 7560 if (VecVT != Op0.getValueType()) { 7561 Op0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op0); 7562 DCI.AddToWorklist(Op0.getNode()); 7563 } 7564 if (VecVT != Op1.getValueType()) { 7565 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op1); 7566 DCI.AddToWorklist(Op1.getNode()); 7567 } 7568 Op0 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op0); 7569 DCI.AddToWorklist(Op0.getNode()); 7570 Op1 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op1); 7571 DCI.AddToWorklist(Op1.getNode()); 7572 return DAG.getVectorShuffle(VecVT, SDLoc(N), Op0, Op1, SV->getMask()); 7573 } 7574 } 7575 7576 return SDValue(); 7577 } 7578 7579 static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) { 7580 // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code 7581 // set by the CCReg instruction using the CCValid / CCMask masks, 7582 // If the CCReg instruction is itself a ICMP testing the condition 7583 // code set by some other instruction, see whether we can directly 7584 // use that condition code. 7585 7586 // Verify that we have an ICMP against some constant. 7587 if (CCValid != SystemZ::CCMASK_ICMP) 7588 return false; 7589 auto *ICmp = CCReg.getNode(); 7590 if (ICmp->getOpcode() != SystemZISD::ICMP) 7591 return false; 7592 auto *CompareLHS = ICmp->getOperand(0).getNode(); 7593 auto *CompareRHS = dyn_cast<ConstantSDNode>(ICmp->getOperand(1)); 7594 if (!CompareRHS) 7595 return false; 7596 7597 // Optimize the case where CompareLHS is a SELECT_CCMASK. 7598 if (CompareLHS->getOpcode() == SystemZISD::SELECT_CCMASK) { 7599 // Verify that we have an appropriate mask for a EQ or NE comparison. 7600 bool Invert = false; 7601 if (CCMask == SystemZ::CCMASK_CMP_NE) 7602 Invert = !Invert; 7603 else if (CCMask != SystemZ::CCMASK_CMP_EQ) 7604 return false; 7605 7606 // Verify that the ICMP compares against one of select values. 7607 auto *TrueVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(0)); 7608 if (!TrueVal) 7609 return false; 7610 auto *FalseVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1)); 7611 if (!FalseVal) 7612 return false; 7613 if (CompareRHS->getZExtValue() == FalseVal->getZExtValue()) 7614 Invert = !Invert; 7615 else if (CompareRHS->getZExtValue() != TrueVal->getZExtValue()) 7616 return false; 7617 7618 // Compute the effective CC mask for the new branch or select. 7619 auto *NewCCValid = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(2)); 7620 auto *NewCCMask = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(3)); 7621 if (!NewCCValid || !NewCCMask) 7622 return false; 7623 CCValid = NewCCValid->getZExtValue(); 7624 CCMask = NewCCMask->getZExtValue(); 7625 if (Invert) 7626 CCMask ^= CCValid; 7627 7628 // Return the updated CCReg link. 7629 CCReg = CompareLHS->getOperand(4); 7630 return true; 7631 } 7632 7633 // Optimize the case where CompareRHS is (SRA (SHL (IPM))). 7634 if (CompareLHS->getOpcode() == ISD::SRA) { 7635 auto *SRACount = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1)); 7636 if (!SRACount || SRACount->getZExtValue() != 30) 7637 return false; 7638 auto *SHL = CompareLHS->getOperand(0).getNode(); 7639 if (SHL->getOpcode() != ISD::SHL) 7640 return false; 7641 auto *SHLCount = dyn_cast<ConstantSDNode>(SHL->getOperand(1)); 7642 if (!SHLCount || SHLCount->getZExtValue() != 30 - SystemZ::IPM_CC) 7643 return false; 7644 auto *IPM = SHL->getOperand(0).getNode(); 7645 if (IPM->getOpcode() != SystemZISD::IPM) 7646 return false; 7647 7648 // Avoid introducing CC spills (because SRA would clobber CC). 7649 if (!CompareLHS->hasOneUse()) 7650 return false; 7651 // Verify that the ICMP compares against zero. 7652 if (CompareRHS->getZExtValue() != 0) 7653 return false; 7654 7655 // Compute the effective CC mask for the new branch or select. 7656 CCMask = SystemZ::reverseCCMask(CCMask); 7657 7658 // Return the updated CCReg link. 7659 CCReg = IPM->getOperand(0); 7660 return true; 7661 } 7662 7663 return false; 7664 } 7665 7666 SDValue SystemZTargetLowering::combineBR_CCMASK( 7667 SDNode *N, DAGCombinerInfo &DCI) const { 7668 SelectionDAG &DAG = DCI.DAG; 7669 7670 // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK. 7671 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1)); 7672 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2)); 7673 if (!CCValid || !CCMask) 7674 return SDValue(); 7675 7676 int CCValidVal = CCValid->getZExtValue(); 7677 int CCMaskVal = CCMask->getZExtValue(); 7678 SDValue Chain = N->getOperand(0); 7679 SDValue CCReg = N->getOperand(4); 7680 7681 if (combineCCMask(CCReg, CCValidVal, CCMaskVal)) 7682 return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0), 7683 Chain, 7684 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32), 7685 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32), 7686 N->getOperand(3), CCReg); 7687 return SDValue(); 7688 } 7689 7690 SDValue SystemZTargetLowering::combineSELECT_CCMASK( 7691 SDNode *N, DAGCombinerInfo &DCI) const { 7692 SelectionDAG &DAG = DCI.DAG; 7693 7694 // Combine SELECT_CCMASK (ICMP (SELECT_CCMASK)) into a single SELECT_CCMASK. 7695 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(2)); 7696 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(3)); 7697 if (!CCValid || !CCMask) 7698 return SDValue(); 7699 7700 int CCValidVal = CCValid->getZExtValue(); 7701 int CCMaskVal = CCMask->getZExtValue(); 7702 SDValue CCReg = N->getOperand(4); 7703 7704 if (combineCCMask(CCReg, CCValidVal, CCMaskVal)) 7705 return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0), 7706 N->getOperand(0), N->getOperand(1), 7707 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32), 7708 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32), 7709 CCReg); 7710 return SDValue(); 7711 } 7712 7713 7714 SDValue SystemZTargetLowering::combineGET_CCMASK( 7715 SDNode *N, DAGCombinerInfo &DCI) const { 7716 7717 // Optimize away GET_CCMASK (SELECT_CCMASK) if the CC masks are compatible 7718 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1)); 7719 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2)); 7720 if (!CCValid || !CCMask) 7721 return SDValue(); 7722 int CCValidVal = CCValid->getZExtValue(); 7723 int CCMaskVal = CCMask->getZExtValue(); 7724 7725 SDValue Select = N->getOperand(0); 7726 if (Select->getOpcode() == ISD::TRUNCATE) 7727 Select = Select->getOperand(0); 7728 if (Select->getOpcode() != SystemZISD::SELECT_CCMASK) 7729 return SDValue(); 7730 7731 auto *SelectCCValid = dyn_cast<ConstantSDNode>(Select->getOperand(2)); 7732 auto *SelectCCMask = dyn_cast<ConstantSDNode>(Select->getOperand(3)); 7733 if (!SelectCCValid || !SelectCCMask) 7734 return SDValue(); 7735 int SelectCCValidVal = SelectCCValid->getZExtValue(); 7736 int SelectCCMaskVal = SelectCCMask->getZExtValue(); 7737 7738 auto *TrueVal = dyn_cast<ConstantSDNode>(Select->getOperand(0)); 7739 auto *FalseVal = dyn_cast<ConstantSDNode>(Select->getOperand(1)); 7740 if (!TrueVal || !FalseVal) 7741 return SDValue(); 7742 if (TrueVal->getZExtValue() == 1 && FalseVal->getZExtValue() == 0) 7743 ; 7744 else if (TrueVal->getZExtValue() == 0 && FalseVal->getZExtValue() == 1) 7745 SelectCCMaskVal ^= SelectCCValidVal; 7746 else 7747 return SDValue(); 7748 7749 if (SelectCCValidVal & ~CCValidVal) 7750 return SDValue(); 7751 if (SelectCCMaskVal != (CCMaskVal & SelectCCValidVal)) 7752 return SDValue(); 7753 7754 return Select->getOperand(4); 7755 } 7756 7757 SDValue SystemZTargetLowering::combineIntDIVREM( 7758 SDNode *N, DAGCombinerInfo &DCI) const { 7759 SelectionDAG &DAG = DCI.DAG; 7760 EVT VT = N->getValueType(0); 7761 // In the case where the divisor is a vector of constants a cheaper 7762 // sequence of instructions can replace the divide. BuildSDIV is called to 7763 // do this during DAG combining, but it only succeeds when it can build a 7764 // multiplication node. The only option for SystemZ is ISD::SMUL_LOHI, and 7765 // since it is not Legal but Custom it can only happen before 7766 // legalization. Therefore we must scalarize this early before Combine 7767 // 1. For widened vectors, this is already the result of type legalization. 7768 if (DCI.Level == BeforeLegalizeTypes && VT.isVector() && isTypeLegal(VT) && 7769 DAG.isConstantIntBuildVectorOrConstantInt(N->getOperand(1))) 7770 return DAG.UnrollVectorOp(N); 7771 return SDValue(); 7772 } 7773 7774 SDValue SystemZTargetLowering::combineINTRINSIC( 7775 SDNode *N, DAGCombinerInfo &DCI) const { 7776 SelectionDAG &DAG = DCI.DAG; 7777 7778 unsigned Id = N->getConstantOperandVal(1); 7779 switch (Id) { 7780 // VECTOR LOAD (RIGHTMOST) WITH LENGTH with a length operand of 15 7781 // or larger is simply a vector load. 7782 case Intrinsic::s390_vll: 7783 case Intrinsic::s390_vlrl: 7784 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(2))) 7785 if (C->getZExtValue() >= 15) 7786 return DAG.getLoad(N->getValueType(0), SDLoc(N), N->getOperand(0), 7787 N->getOperand(3), MachinePointerInfo()); 7788 break; 7789 // Likewise for VECTOR STORE (RIGHTMOST) WITH LENGTH. 7790 case Intrinsic::s390_vstl: 7791 case Intrinsic::s390_vstrl: 7792 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(3))) 7793 if (C->getZExtValue() >= 15) 7794 return DAG.getStore(N->getOperand(0), SDLoc(N), N->getOperand(2), 7795 N->getOperand(4), MachinePointerInfo()); 7796 break; 7797 } 7798 7799 return SDValue(); 7800 } 7801 7802 SDValue SystemZTargetLowering::unwrapAddress(SDValue N) const { 7803 if (N->getOpcode() == SystemZISD::PCREL_WRAPPER) 7804 return N->getOperand(0); 7805 return N; 7806 } 7807 7808 SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, 7809 DAGCombinerInfo &DCI) const { 7810 switch(N->getOpcode()) { 7811 default: break; 7812 case ISD::ZERO_EXTEND: return combineZERO_EXTEND(N, DCI); 7813 case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI); 7814 case ISD::SIGN_EXTEND_INREG: return combineSIGN_EXTEND_INREG(N, DCI); 7815 case SystemZISD::MERGE_HIGH: 7816 case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI); 7817 case ISD::LOAD: return combineLOAD(N, DCI); 7818 case ISD::STORE: return combineSTORE(N, DCI); 7819 case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI); 7820 case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI); 7821 case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI); 7822 case ISD::STRICT_FP_ROUND: 7823 case ISD::FP_ROUND: return combineFP_ROUND(N, DCI); 7824 case ISD::STRICT_FP_EXTEND: 7825 case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI); 7826 case ISD::SINT_TO_FP: 7827 case ISD::UINT_TO_FP: return combineINT_TO_FP(N, DCI); 7828 case ISD::BSWAP: return combineBSWAP(N, DCI); 7829 case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI); 7830 case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI); 7831 case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI); 7832 case ISD::SDIV: 7833 case ISD::UDIV: 7834 case ISD::SREM: 7835 case ISD::UREM: return combineIntDIVREM(N, DCI); 7836 case ISD::INTRINSIC_W_CHAIN: 7837 case ISD::INTRINSIC_VOID: return combineINTRINSIC(N, DCI); 7838 } 7839 7840 return SDValue(); 7841 } 7842 7843 // Return the demanded elements for the OpNo source operand of Op. DemandedElts 7844 // are for Op. 7845 static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts, 7846 unsigned OpNo) { 7847 EVT VT = Op.getValueType(); 7848 unsigned NumElts = (VT.isVector() ? VT.getVectorNumElements() : 1); 7849 APInt SrcDemE; 7850 unsigned Opcode = Op.getOpcode(); 7851 if (Opcode == ISD::INTRINSIC_WO_CHAIN) { 7852 unsigned Id = Op.getConstantOperandVal(0); 7853 switch (Id) { 7854 case Intrinsic::s390_vpksh: // PACKS 7855 case Intrinsic::s390_vpksf: 7856 case Intrinsic::s390_vpksg: 7857 case Intrinsic::s390_vpkshs: // PACKS_CC 7858 case Intrinsic::s390_vpksfs: 7859 case Intrinsic::s390_vpksgs: 7860 case Intrinsic::s390_vpklsh: // PACKLS 7861 case Intrinsic::s390_vpklsf: 7862 case Intrinsic::s390_vpklsg: 7863 case Intrinsic::s390_vpklshs: // PACKLS_CC 7864 case Intrinsic::s390_vpklsfs: 7865 case Intrinsic::s390_vpklsgs: 7866 // VECTOR PACK truncates the elements of two source vectors into one. 7867 SrcDemE = DemandedElts; 7868 if (OpNo == 2) 7869 SrcDemE.lshrInPlace(NumElts / 2); 7870 SrcDemE = SrcDemE.trunc(NumElts / 2); 7871 break; 7872 // VECTOR UNPACK extends half the elements of the source vector. 7873 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH 7874 case Intrinsic::s390_vuphh: 7875 case Intrinsic::s390_vuphf: 7876 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH 7877 case Intrinsic::s390_vuplhh: 7878 case Intrinsic::s390_vuplhf: 7879 SrcDemE = APInt(NumElts * 2, 0); 7880 SrcDemE.insertBits(DemandedElts, 0); 7881 break; 7882 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW 7883 case Intrinsic::s390_vuplhw: 7884 case Intrinsic::s390_vuplf: 7885 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW 7886 case Intrinsic::s390_vupllh: 7887 case Intrinsic::s390_vupllf: 7888 SrcDemE = APInt(NumElts * 2, 0); 7889 SrcDemE.insertBits(DemandedElts, NumElts); 7890 break; 7891 case Intrinsic::s390_vpdi: { 7892 // VECTOR PERMUTE DWORD IMMEDIATE selects one element from each source. 7893 SrcDemE = APInt(NumElts, 0); 7894 if (!DemandedElts[OpNo - 1]) 7895 break; 7896 unsigned Mask = Op.getConstantOperandVal(3); 7897 unsigned MaskBit = ((OpNo - 1) ? 1 : 4); 7898 // Demand input element 0 or 1, given by the mask bit value. 7899 SrcDemE.setBit((Mask & MaskBit)? 1 : 0); 7900 break; 7901 } 7902 case Intrinsic::s390_vsldb: { 7903 // VECTOR SHIFT LEFT DOUBLE BY BYTE 7904 assert(VT == MVT::v16i8 && "Unexpected type."); 7905 unsigned FirstIdx = Op.getConstantOperandVal(3); 7906 assert (FirstIdx > 0 && FirstIdx < 16 && "Unused operand."); 7907 unsigned NumSrc0Els = 16 - FirstIdx; 7908 SrcDemE = APInt(NumElts, 0); 7909 if (OpNo == 1) { 7910 APInt DemEls = DemandedElts.trunc(NumSrc0Els); 7911 SrcDemE.insertBits(DemEls, FirstIdx); 7912 } else { 7913 APInt DemEls = DemandedElts.lshr(NumSrc0Els); 7914 SrcDemE.insertBits(DemEls, 0); 7915 } 7916 break; 7917 } 7918 case Intrinsic::s390_vperm: 7919 SrcDemE = APInt(NumElts, -1); 7920 break; 7921 default: 7922 llvm_unreachable("Unhandled intrinsic."); 7923 break; 7924 } 7925 } else { 7926 switch (Opcode) { 7927 case SystemZISD::JOIN_DWORDS: 7928 // Scalar operand. 7929 SrcDemE = APInt(1, 1); 7930 break; 7931 case SystemZISD::SELECT_CCMASK: 7932 SrcDemE = DemandedElts; 7933 break; 7934 default: 7935 llvm_unreachable("Unhandled opcode."); 7936 break; 7937 } 7938 } 7939 return SrcDemE; 7940 } 7941 7942 static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known, 7943 const APInt &DemandedElts, 7944 const SelectionDAG &DAG, unsigned Depth, 7945 unsigned OpNo) { 7946 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo); 7947 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1); 7948 KnownBits LHSKnown = 7949 DAG.computeKnownBits(Op.getOperand(OpNo), Src0DemE, Depth + 1); 7950 KnownBits RHSKnown = 7951 DAG.computeKnownBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1); 7952 Known = LHSKnown.intersectWith(RHSKnown); 7953 } 7954 7955 void 7956 SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, 7957 KnownBits &Known, 7958 const APInt &DemandedElts, 7959 const SelectionDAG &DAG, 7960 unsigned Depth) const { 7961 Known.resetAll(); 7962 7963 // Intrinsic CC result is returned in the two low bits. 7964 unsigned tmp0, tmp1; // not used 7965 if (Op.getResNo() == 1 && isIntrinsicWithCC(Op, tmp0, tmp1)) { 7966 Known.Zero.setBitsFrom(2); 7967 return; 7968 } 7969 EVT VT = Op.getValueType(); 7970 if (Op.getResNo() != 0 || VT == MVT::Untyped) 7971 return; 7972 assert (Known.getBitWidth() == VT.getScalarSizeInBits() && 7973 "KnownBits does not match VT in bitwidth"); 7974 assert ((!VT.isVector() || 7975 (DemandedElts.getBitWidth() == VT.getVectorNumElements())) && 7976 "DemandedElts does not match VT number of elements"); 7977 unsigned BitWidth = Known.getBitWidth(); 7978 unsigned Opcode = Op.getOpcode(); 7979 if (Opcode == ISD::INTRINSIC_WO_CHAIN) { 7980 bool IsLogical = false; 7981 unsigned Id = Op.getConstantOperandVal(0); 7982 switch (Id) { 7983 case Intrinsic::s390_vpksh: // PACKS 7984 case Intrinsic::s390_vpksf: 7985 case Intrinsic::s390_vpksg: 7986 case Intrinsic::s390_vpkshs: // PACKS_CC 7987 case Intrinsic::s390_vpksfs: 7988 case Intrinsic::s390_vpksgs: 7989 case Intrinsic::s390_vpklsh: // PACKLS 7990 case Intrinsic::s390_vpklsf: 7991 case Intrinsic::s390_vpklsg: 7992 case Intrinsic::s390_vpklshs: // PACKLS_CC 7993 case Intrinsic::s390_vpklsfs: 7994 case Intrinsic::s390_vpklsgs: 7995 case Intrinsic::s390_vpdi: 7996 case Intrinsic::s390_vsldb: 7997 case Intrinsic::s390_vperm: 7998 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 1); 7999 break; 8000 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH 8001 case Intrinsic::s390_vuplhh: 8002 case Intrinsic::s390_vuplhf: 8003 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW 8004 case Intrinsic::s390_vupllh: 8005 case Intrinsic::s390_vupllf: 8006 IsLogical = true; 8007 [[fallthrough]]; 8008 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH 8009 case Intrinsic::s390_vuphh: 8010 case Intrinsic::s390_vuphf: 8011 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW 8012 case Intrinsic::s390_vuplhw: 8013 case Intrinsic::s390_vuplf: { 8014 SDValue SrcOp = Op.getOperand(1); 8015 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0); 8016 Known = DAG.computeKnownBits(SrcOp, SrcDemE, Depth + 1); 8017 if (IsLogical) { 8018 Known = Known.zext(BitWidth); 8019 } else 8020 Known = Known.sext(BitWidth); 8021 break; 8022 } 8023 default: 8024 break; 8025 } 8026 } else { 8027 switch (Opcode) { 8028 case SystemZISD::JOIN_DWORDS: 8029 case SystemZISD::SELECT_CCMASK: 8030 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 0); 8031 break; 8032 case SystemZISD::REPLICATE: { 8033 SDValue SrcOp = Op.getOperand(0); 8034 Known = DAG.computeKnownBits(SrcOp, Depth + 1); 8035 if (Known.getBitWidth() < BitWidth && isa<ConstantSDNode>(SrcOp)) 8036 Known = Known.sext(BitWidth); // VREPI sign extends the immedate. 8037 break; 8038 } 8039 default: 8040 break; 8041 } 8042 } 8043 8044 // Known has the width of the source operand(s). Adjust if needed to match 8045 // the passed bitwidth. 8046 if (Known.getBitWidth() != BitWidth) 8047 Known = Known.anyextOrTrunc(BitWidth); 8048 } 8049 8050 static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts, 8051 const SelectionDAG &DAG, unsigned Depth, 8052 unsigned OpNo) { 8053 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo); 8054 unsigned LHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo), Src0DemE, Depth + 1); 8055 if (LHS == 1) return 1; // Early out. 8056 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1); 8057 unsigned RHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1); 8058 if (RHS == 1) return 1; // Early out. 8059 unsigned Common = std::min(LHS, RHS); 8060 unsigned SrcBitWidth = Op.getOperand(OpNo).getScalarValueSizeInBits(); 8061 EVT VT = Op.getValueType(); 8062 unsigned VTBits = VT.getScalarSizeInBits(); 8063 if (SrcBitWidth > VTBits) { // PACK 8064 unsigned SrcExtraBits = SrcBitWidth - VTBits; 8065 if (Common > SrcExtraBits) 8066 return (Common - SrcExtraBits); 8067 return 1; 8068 } 8069 assert (SrcBitWidth == VTBits && "Expected operands of same bitwidth."); 8070 return Common; 8071 } 8072 8073 unsigned 8074 SystemZTargetLowering::ComputeNumSignBitsForTargetNode( 8075 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, 8076 unsigned Depth) const { 8077 if (Op.getResNo() != 0) 8078 return 1; 8079 unsigned Opcode = Op.getOpcode(); 8080 if (Opcode == ISD::INTRINSIC_WO_CHAIN) { 8081 unsigned Id = Op.getConstantOperandVal(0); 8082 switch (Id) { 8083 case Intrinsic::s390_vpksh: // PACKS 8084 case Intrinsic::s390_vpksf: 8085 case Intrinsic::s390_vpksg: 8086 case Intrinsic::s390_vpkshs: // PACKS_CC 8087 case Intrinsic::s390_vpksfs: 8088 case Intrinsic::s390_vpksgs: 8089 case Intrinsic::s390_vpklsh: // PACKLS 8090 case Intrinsic::s390_vpklsf: 8091 case Intrinsic::s390_vpklsg: 8092 case Intrinsic::s390_vpklshs: // PACKLS_CC 8093 case Intrinsic::s390_vpklsfs: 8094 case Intrinsic::s390_vpklsgs: 8095 case Intrinsic::s390_vpdi: 8096 case Intrinsic::s390_vsldb: 8097 case Intrinsic::s390_vperm: 8098 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 1); 8099 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH 8100 case Intrinsic::s390_vuphh: 8101 case Intrinsic::s390_vuphf: 8102 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW 8103 case Intrinsic::s390_vuplhw: 8104 case Intrinsic::s390_vuplf: { 8105 SDValue PackedOp = Op.getOperand(1); 8106 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 1); 8107 unsigned Tmp = DAG.ComputeNumSignBits(PackedOp, SrcDemE, Depth + 1); 8108 EVT VT = Op.getValueType(); 8109 unsigned VTBits = VT.getScalarSizeInBits(); 8110 Tmp += VTBits - PackedOp.getScalarValueSizeInBits(); 8111 return Tmp; 8112 } 8113 default: 8114 break; 8115 } 8116 } else { 8117 switch (Opcode) { 8118 case SystemZISD::SELECT_CCMASK: 8119 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 0); 8120 default: 8121 break; 8122 } 8123 } 8124 8125 return 1; 8126 } 8127 8128 bool SystemZTargetLowering:: 8129 isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, 8130 const APInt &DemandedElts, const SelectionDAG &DAG, 8131 bool PoisonOnly, unsigned Depth) const { 8132 switch (Op->getOpcode()) { 8133 case SystemZISD::PCREL_WRAPPER: 8134 case SystemZISD::PCREL_OFFSET: 8135 return true; 8136 } 8137 return false; 8138 } 8139 8140 unsigned 8141 SystemZTargetLowering::getStackProbeSize(const MachineFunction &MF) const { 8142 const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); 8143 unsigned StackAlign = TFI->getStackAlignment(); 8144 assert(StackAlign >=1 && isPowerOf2_32(StackAlign) && 8145 "Unexpected stack alignment"); 8146 // The default stack probe size is 4096 if the function has no 8147 // stack-probe-size attribute. 8148 unsigned StackProbeSize = 8149 MF.getFunction().getFnAttributeAsParsedInteger("stack-probe-size", 4096); 8150 // Round down to the stack alignment. 8151 StackProbeSize &= ~(StackAlign - 1); 8152 return StackProbeSize ? StackProbeSize : StackAlign; 8153 } 8154 8155 //===----------------------------------------------------------------------===// 8156 // Custom insertion 8157 //===----------------------------------------------------------------------===// 8158 8159 // Force base value Base into a register before MI. Return the register. 8160 static Register forceReg(MachineInstr &MI, MachineOperand &Base, 8161 const SystemZInstrInfo *TII) { 8162 MachineBasicBlock *MBB = MI.getParent(); 8163 MachineFunction &MF = *MBB->getParent(); 8164 MachineRegisterInfo &MRI = MF.getRegInfo(); 8165 8166 if (Base.isReg()) { 8167 // Copy Base into a new virtual register to help register coalescing in 8168 // cases with multiple uses. 8169 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); 8170 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::COPY), Reg) 8171 .add(Base); 8172 return Reg; 8173 } 8174 8175 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); 8176 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg) 8177 .add(Base) 8178 .addImm(0) 8179 .addReg(0); 8180 return Reg; 8181 } 8182 8183 // The CC operand of MI might be missing a kill marker because there 8184 // were multiple uses of CC, and ISel didn't know which to mark. 8185 // Figure out whether MI should have had a kill marker. 8186 static bool checkCCKill(MachineInstr &MI, MachineBasicBlock *MBB) { 8187 // Scan forward through BB for a use/def of CC. 8188 MachineBasicBlock::iterator miI(std::next(MachineBasicBlock::iterator(MI))); 8189 for (MachineBasicBlock::iterator miE = MBB->end(); miI != miE; ++miI) { 8190 const MachineInstr& mi = *miI; 8191 if (mi.readsRegister(SystemZ::CC, /*TRI=*/nullptr)) 8192 return false; 8193 if (mi.definesRegister(SystemZ::CC, /*TRI=*/nullptr)) 8194 break; // Should have kill-flag - update below. 8195 } 8196 8197 // If we hit the end of the block, check whether CC is live into a 8198 // successor. 8199 if (miI == MBB->end()) { 8200 for (const MachineBasicBlock *Succ : MBB->successors()) 8201 if (Succ->isLiveIn(SystemZ::CC)) 8202 return false; 8203 } 8204 8205 return true; 8206 } 8207 8208 // Return true if it is OK for this Select pseudo-opcode to be cascaded 8209 // together with other Select pseudo-opcodes into a single basic-block with 8210 // a conditional jump around it. 8211 static bool isSelectPseudo(MachineInstr &MI) { 8212 switch (MI.getOpcode()) { 8213 case SystemZ::Select32: 8214 case SystemZ::Select64: 8215 case SystemZ::Select128: 8216 case SystemZ::SelectF32: 8217 case SystemZ::SelectF64: 8218 case SystemZ::SelectF128: 8219 case SystemZ::SelectVR32: 8220 case SystemZ::SelectVR64: 8221 case SystemZ::SelectVR128: 8222 return true; 8223 8224 default: 8225 return false; 8226 } 8227 } 8228 8229 // Helper function, which inserts PHI functions into SinkMBB: 8230 // %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ], 8231 // where %FalseValue(i) and %TrueValue(i) are taken from Selects. 8232 static void createPHIsForSelects(SmallVector<MachineInstr*, 8> &Selects, 8233 MachineBasicBlock *TrueMBB, 8234 MachineBasicBlock *FalseMBB, 8235 MachineBasicBlock *SinkMBB) { 8236 MachineFunction *MF = TrueMBB->getParent(); 8237 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); 8238 8239 MachineInstr *FirstMI = Selects.front(); 8240 unsigned CCValid = FirstMI->getOperand(3).getImm(); 8241 unsigned CCMask = FirstMI->getOperand(4).getImm(); 8242 8243 MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin(); 8244 8245 // As we are creating the PHIs, we have to be careful if there is more than 8246 // one. Later Selects may reference the results of earlier Selects, but later 8247 // PHIs have to reference the individual true/false inputs from earlier PHIs. 8248 // That also means that PHI construction must work forward from earlier to 8249 // later, and that the code must maintain a mapping from earlier PHI's 8250 // destination registers, and the registers that went into the PHI. 8251 DenseMap<unsigned, std::pair<unsigned, unsigned>> RegRewriteTable; 8252 8253 for (auto *MI : Selects) { 8254 Register DestReg = MI->getOperand(0).getReg(); 8255 Register TrueReg = MI->getOperand(1).getReg(); 8256 Register FalseReg = MI->getOperand(2).getReg(); 8257 8258 // If this Select we are generating is the opposite condition from 8259 // the jump we generated, then we have to swap the operands for the 8260 // PHI that is going to be generated. 8261 if (MI->getOperand(4).getImm() == (CCValid ^ CCMask)) 8262 std::swap(TrueReg, FalseReg); 8263 8264 if (RegRewriteTable.contains(TrueReg)) 8265 TrueReg = RegRewriteTable[TrueReg].first; 8266 8267 if (RegRewriteTable.contains(FalseReg)) 8268 FalseReg = RegRewriteTable[FalseReg].second; 8269 8270 DebugLoc DL = MI->getDebugLoc(); 8271 BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(SystemZ::PHI), DestReg) 8272 .addReg(TrueReg).addMBB(TrueMBB) 8273 .addReg(FalseReg).addMBB(FalseMBB); 8274 8275 // Add this PHI to the rewrite table. 8276 RegRewriteTable[DestReg] = std::make_pair(TrueReg, FalseReg); 8277 } 8278 8279 MF->getProperties().reset(MachineFunctionProperties::Property::NoPHIs); 8280 } 8281 8282 MachineBasicBlock * 8283 SystemZTargetLowering::emitAdjCallStack(MachineInstr &MI, 8284 MachineBasicBlock *BB) const { 8285 MachineFunction &MF = *BB->getParent(); 8286 MachineFrameInfo &MFI = MF.getFrameInfo(); 8287 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>(); 8288 assert(TFL->hasReservedCallFrame(MF) && 8289 "ADJSTACKDOWN and ADJSTACKUP should be no-ops"); 8290 (void)TFL; 8291 // Get the MaxCallFrameSize value and erase MI since it serves no further 8292 // purpose as the call frame is statically reserved in the prolog. Set 8293 // AdjustsStack as MI is *not* mapped as a frame instruction. 8294 uint32_t NumBytes = MI.getOperand(0).getImm(); 8295 if (NumBytes > MFI.getMaxCallFrameSize()) 8296 MFI.setMaxCallFrameSize(NumBytes); 8297 MFI.setAdjustsStack(true); 8298 8299 MI.eraseFromParent(); 8300 return BB; 8301 } 8302 8303 // Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI. 8304 MachineBasicBlock * 8305 SystemZTargetLowering::emitSelect(MachineInstr &MI, 8306 MachineBasicBlock *MBB) const { 8307 assert(isSelectPseudo(MI) && "Bad call to emitSelect()"); 8308 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 8309 8310 unsigned CCValid = MI.getOperand(3).getImm(); 8311 unsigned CCMask = MI.getOperand(4).getImm(); 8312 8313 // If we have a sequence of Select* pseudo instructions using the 8314 // same condition code value, we want to expand all of them into 8315 // a single pair of basic blocks using the same condition. 8316 SmallVector<MachineInstr*, 8> Selects; 8317 SmallVector<MachineInstr*, 8> DbgValues; 8318 Selects.push_back(&MI); 8319 unsigned Count = 0; 8320 for (MachineInstr &NextMI : llvm::make_range( 8321 std::next(MachineBasicBlock::iterator(MI)), MBB->end())) { 8322 if (isSelectPseudo(NextMI)) { 8323 assert(NextMI.getOperand(3).getImm() == CCValid && 8324 "Bad CCValid operands since CC was not redefined."); 8325 if (NextMI.getOperand(4).getImm() == CCMask || 8326 NextMI.getOperand(4).getImm() == (CCValid ^ CCMask)) { 8327 Selects.push_back(&NextMI); 8328 continue; 8329 } 8330 break; 8331 } 8332 if (NextMI.definesRegister(SystemZ::CC, /*TRI=*/nullptr) || 8333 NextMI.usesCustomInsertionHook()) 8334 break; 8335 bool User = false; 8336 for (auto *SelMI : Selects) 8337 if (NextMI.readsVirtualRegister(SelMI->getOperand(0).getReg())) { 8338 User = true; 8339 break; 8340 } 8341 if (NextMI.isDebugInstr()) { 8342 if (User) { 8343 assert(NextMI.isDebugValue() && "Unhandled debug opcode."); 8344 DbgValues.push_back(&NextMI); 8345 } 8346 } else if (User || ++Count > 20) 8347 break; 8348 } 8349 8350 MachineInstr *LastMI = Selects.back(); 8351 bool CCKilled = (LastMI->killsRegister(SystemZ::CC, /*TRI=*/nullptr) || 8352 checkCCKill(*LastMI, MBB)); 8353 MachineBasicBlock *StartMBB = MBB; 8354 MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(LastMI, MBB); 8355 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB); 8356 8357 // Unless CC was killed in the last Select instruction, mark it as 8358 // live-in to both FalseMBB and JoinMBB. 8359 if (!CCKilled) { 8360 FalseMBB->addLiveIn(SystemZ::CC); 8361 JoinMBB->addLiveIn(SystemZ::CC); 8362 } 8363 8364 // StartMBB: 8365 // BRC CCMask, JoinMBB 8366 // # fallthrough to FalseMBB 8367 MBB = StartMBB; 8368 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC)) 8369 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB); 8370 MBB->addSuccessor(JoinMBB); 8371 MBB->addSuccessor(FalseMBB); 8372 8373 // FalseMBB: 8374 // # fallthrough to JoinMBB 8375 MBB = FalseMBB; 8376 MBB->addSuccessor(JoinMBB); 8377 8378 // JoinMBB: 8379 // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ] 8380 // ... 8381 MBB = JoinMBB; 8382 createPHIsForSelects(Selects, StartMBB, FalseMBB, MBB); 8383 for (auto *SelMI : Selects) 8384 SelMI->eraseFromParent(); 8385 8386 MachineBasicBlock::iterator InsertPos = MBB->getFirstNonPHI(); 8387 for (auto *DbgMI : DbgValues) 8388 MBB->splice(InsertPos, StartMBB, DbgMI); 8389 8390 return JoinMBB; 8391 } 8392 8393 // Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI. 8394 // StoreOpcode is the store to use and Invert says whether the store should 8395 // happen when the condition is false rather than true. If a STORE ON 8396 // CONDITION is available, STOCOpcode is its opcode, otherwise it is 0. 8397 MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI, 8398 MachineBasicBlock *MBB, 8399 unsigned StoreOpcode, 8400 unsigned STOCOpcode, 8401 bool Invert) const { 8402 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 8403 8404 Register SrcReg = MI.getOperand(0).getReg(); 8405 MachineOperand Base = MI.getOperand(1); 8406 int64_t Disp = MI.getOperand(2).getImm(); 8407 Register IndexReg = MI.getOperand(3).getReg(); 8408 unsigned CCValid = MI.getOperand(4).getImm(); 8409 unsigned CCMask = MI.getOperand(5).getImm(); 8410 DebugLoc DL = MI.getDebugLoc(); 8411 8412 StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp); 8413 8414 // ISel pattern matching also adds a load memory operand of the same 8415 // address, so take special care to find the storing memory operand. 8416 MachineMemOperand *MMO = nullptr; 8417 for (auto *I : MI.memoperands()) 8418 if (I->isStore()) { 8419 MMO = I; 8420 break; 8421 } 8422 8423 // Use STOCOpcode if possible. We could use different store patterns in 8424 // order to avoid matching the index register, but the performance trade-offs 8425 // might be more complicated in that case. 8426 if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) { 8427 if (Invert) 8428 CCMask ^= CCValid; 8429 8430 BuildMI(*MBB, MI, DL, TII->get(STOCOpcode)) 8431 .addReg(SrcReg) 8432 .add(Base) 8433 .addImm(Disp) 8434 .addImm(CCValid) 8435 .addImm(CCMask) 8436 .addMemOperand(MMO); 8437 8438 MI.eraseFromParent(); 8439 return MBB; 8440 } 8441 8442 // Get the condition needed to branch around the store. 8443 if (!Invert) 8444 CCMask ^= CCValid; 8445 8446 MachineBasicBlock *StartMBB = MBB; 8447 MachineBasicBlock *JoinMBB = SystemZ::splitBlockBefore(MI, MBB); 8448 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB); 8449 8450 // Unless CC was killed in the CondStore instruction, mark it as 8451 // live-in to both FalseMBB and JoinMBB. 8452 if (!MI.killsRegister(SystemZ::CC, /*TRI=*/nullptr) && 8453 !checkCCKill(MI, JoinMBB)) { 8454 FalseMBB->addLiveIn(SystemZ::CC); 8455 JoinMBB->addLiveIn(SystemZ::CC); 8456 } 8457 8458 // StartMBB: 8459 // BRC CCMask, JoinMBB 8460 // # fallthrough to FalseMBB 8461 MBB = StartMBB; 8462 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 8463 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB); 8464 MBB->addSuccessor(JoinMBB); 8465 MBB->addSuccessor(FalseMBB); 8466 8467 // FalseMBB: 8468 // store %SrcReg, %Disp(%Index,%Base) 8469 // # fallthrough to JoinMBB 8470 MBB = FalseMBB; 8471 BuildMI(MBB, DL, TII->get(StoreOpcode)) 8472 .addReg(SrcReg) 8473 .add(Base) 8474 .addImm(Disp) 8475 .addReg(IndexReg) 8476 .addMemOperand(MMO); 8477 MBB->addSuccessor(JoinMBB); 8478 8479 MI.eraseFromParent(); 8480 return JoinMBB; 8481 } 8482 8483 // Implement EmitInstrWithCustomInserter for pseudo [SU]Cmp128Hi instruction MI. 8484 MachineBasicBlock * 8485 SystemZTargetLowering::emitICmp128Hi(MachineInstr &MI, 8486 MachineBasicBlock *MBB, 8487 bool Unsigned) const { 8488 MachineFunction &MF = *MBB->getParent(); 8489 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 8490 MachineRegisterInfo &MRI = MF.getRegInfo(); 8491 8492 // Synthetic instruction to compare 128-bit values. 8493 // Sets CC 1 if Op0 > Op1, sets a different CC otherwise. 8494 Register Op0 = MI.getOperand(0).getReg(); 8495 Register Op1 = MI.getOperand(1).getReg(); 8496 8497 MachineBasicBlock *StartMBB = MBB; 8498 MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(MI, MBB); 8499 MachineBasicBlock *HiEqMBB = SystemZ::emitBlockAfter(StartMBB); 8500 8501 // StartMBB: 8502 // 8503 // Use VECTOR ELEMENT COMPARE [LOGICAL] to compare the high parts. 8504 // Swap the inputs to get: 8505 // CC 1 if high(Op0) > high(Op1) 8506 // CC 2 if high(Op0) < high(Op1) 8507 // CC 0 if high(Op0) == high(Op1) 8508 // 8509 // If CC != 0, we'd done, so jump over the next instruction. 8510 // 8511 // VEC[L]G Op1, Op0 8512 // JNE JoinMBB 8513 // # fallthrough to HiEqMBB 8514 MBB = StartMBB; 8515 int HiOpcode = Unsigned? SystemZ::VECLG : SystemZ::VECG; 8516 BuildMI(MBB, MI.getDebugLoc(), TII->get(HiOpcode)) 8517 .addReg(Op1).addReg(Op0); 8518 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC)) 8519 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE).addMBB(JoinMBB); 8520 MBB->addSuccessor(JoinMBB); 8521 MBB->addSuccessor(HiEqMBB); 8522 8523 // HiEqMBB: 8524 // 8525 // Otherwise, use VECTOR COMPARE HIGH LOGICAL. 8526 // Since we already know the high parts are equal, the CC 8527 // result will only depend on the low parts: 8528 // CC 1 if low(Op0) > low(Op1) 8529 // CC 3 if low(Op0) <= low(Op1) 8530 // 8531 // VCHLGS Tmp, Op0, Op1 8532 // # fallthrough to JoinMBB 8533 MBB = HiEqMBB; 8534 Register Temp = MRI.createVirtualRegister(&SystemZ::VR128BitRegClass); 8535 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::VCHLGS), Temp) 8536 .addReg(Op0).addReg(Op1); 8537 MBB->addSuccessor(JoinMBB); 8538 8539 // Mark CC as live-in to JoinMBB. 8540 JoinMBB->addLiveIn(SystemZ::CC); 8541 8542 MI.eraseFromParent(); 8543 return JoinMBB; 8544 } 8545 8546 // Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_LOADW_* or 8547 // ATOMIC_SWAPW instruction MI. BinOpcode is the instruction that performs 8548 // the binary operation elided by "*", or 0 for ATOMIC_SWAPW. Invert says 8549 // whether the field should be inverted after performing BinOpcode (e.g. for 8550 // NAND). 8551 MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary( 8552 MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode, 8553 bool Invert) const { 8554 MachineFunction &MF = *MBB->getParent(); 8555 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 8556 MachineRegisterInfo &MRI = MF.getRegInfo(); 8557 8558 // Extract the operands. Base can be a register or a frame index. 8559 // Src2 can be a register or immediate. 8560 Register Dest = MI.getOperand(0).getReg(); 8561 MachineOperand Base = earlyUseOperand(MI.getOperand(1)); 8562 int64_t Disp = MI.getOperand(2).getImm(); 8563 MachineOperand Src2 = earlyUseOperand(MI.getOperand(3)); 8564 Register BitShift = MI.getOperand(4).getReg(); 8565 Register NegBitShift = MI.getOperand(5).getReg(); 8566 unsigned BitSize = MI.getOperand(6).getImm(); 8567 DebugLoc DL = MI.getDebugLoc(); 8568 8569 // Get the right opcodes for the displacement. 8570 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp); 8571 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp); 8572 assert(LOpcode && CSOpcode && "Displacement out of range"); 8573 8574 // Create virtual registers for temporary results. 8575 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); 8576 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); 8577 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); 8578 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); 8579 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); 8580 8581 // Insert a basic block for the main loop. 8582 MachineBasicBlock *StartMBB = MBB; 8583 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB); 8584 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB); 8585 8586 // StartMBB: 8587 // ... 8588 // %OrigVal = L Disp(%Base) 8589 // # fall through to LoopMBB 8590 MBB = StartMBB; 8591 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0); 8592 MBB->addSuccessor(LoopMBB); 8593 8594 // LoopMBB: 8595 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ] 8596 // %RotatedOldVal = RLL %OldVal, 0(%BitShift) 8597 // %RotatedNewVal = OP %RotatedOldVal, %Src2 8598 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift) 8599 // %Dest = CS %OldVal, %NewVal, Disp(%Base) 8600 // JNE LoopMBB 8601 // # fall through to DoneMBB 8602 MBB = LoopMBB; 8603 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal) 8604 .addReg(OrigVal).addMBB(StartMBB) 8605 .addReg(Dest).addMBB(LoopMBB); 8606 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal) 8607 .addReg(OldVal).addReg(BitShift).addImm(0); 8608 if (Invert) { 8609 // Perform the operation normally and then invert every bit of the field. 8610 Register Tmp = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); 8611 BuildMI(MBB, DL, TII->get(BinOpcode), Tmp).addReg(RotatedOldVal).add(Src2); 8612 // XILF with the upper BitSize bits set. 8613 BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal) 8614 .addReg(Tmp).addImm(-1U << (32 - BitSize)); 8615 } else if (BinOpcode) 8616 // A simply binary operation. 8617 BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal) 8618 .addReg(RotatedOldVal) 8619 .add(Src2); 8620 else 8621 // Use RISBG to rotate Src2 into position and use it to replace the 8622 // field in RotatedOldVal. 8623 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal) 8624 .addReg(RotatedOldVal).addReg(Src2.getReg()) 8625 .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize); 8626 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal) 8627 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0); 8628 BuildMI(MBB, DL, TII->get(CSOpcode), Dest) 8629 .addReg(OldVal) 8630 .addReg(NewVal) 8631 .add(Base) 8632 .addImm(Disp); 8633 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 8634 .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB); 8635 MBB->addSuccessor(LoopMBB); 8636 MBB->addSuccessor(DoneMBB); 8637 8638 MI.eraseFromParent(); 8639 return DoneMBB; 8640 } 8641 8642 // Implement EmitInstrWithCustomInserter for subword pseudo 8643 // ATOMIC_LOADW_{,U}{MIN,MAX} instruction MI. CompareOpcode is the 8644 // instruction that should be used to compare the current field with the 8645 // minimum or maximum value. KeepOldMask is the BRC condition-code mask 8646 // for when the current field should be kept. 8647 MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax( 8648 MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode, 8649 unsigned KeepOldMask) const { 8650 MachineFunction &MF = *MBB->getParent(); 8651 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 8652 MachineRegisterInfo &MRI = MF.getRegInfo(); 8653 8654 // Extract the operands. Base can be a register or a frame index. 8655 Register Dest = MI.getOperand(0).getReg(); 8656 MachineOperand Base = earlyUseOperand(MI.getOperand(1)); 8657 int64_t Disp = MI.getOperand(2).getImm(); 8658 Register Src2 = MI.getOperand(3).getReg(); 8659 Register BitShift = MI.getOperand(4).getReg(); 8660 Register NegBitShift = MI.getOperand(5).getReg(); 8661 unsigned BitSize = MI.getOperand(6).getImm(); 8662 DebugLoc DL = MI.getDebugLoc(); 8663 8664 // Get the right opcodes for the displacement. 8665 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp); 8666 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp); 8667 assert(LOpcode && CSOpcode && "Displacement out of range"); 8668 8669 // Create virtual registers for temporary results. 8670 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); 8671 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); 8672 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); 8673 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); 8674 Register RotatedAltVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); 8675 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); 8676 8677 // Insert 3 basic blocks for the loop. 8678 MachineBasicBlock *StartMBB = MBB; 8679 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB); 8680 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB); 8681 MachineBasicBlock *UseAltMBB = SystemZ::emitBlockAfter(LoopMBB); 8682 MachineBasicBlock *UpdateMBB = SystemZ::emitBlockAfter(UseAltMBB); 8683 8684 // StartMBB: 8685 // ... 8686 // %OrigVal = L Disp(%Base) 8687 // # fall through to LoopMBB 8688 MBB = StartMBB; 8689 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0); 8690 MBB->addSuccessor(LoopMBB); 8691 8692 // LoopMBB: 8693 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ] 8694 // %RotatedOldVal = RLL %OldVal, 0(%BitShift) 8695 // CompareOpcode %RotatedOldVal, %Src2 8696 // BRC KeepOldMask, UpdateMBB 8697 MBB = LoopMBB; 8698 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal) 8699 .addReg(OrigVal).addMBB(StartMBB) 8700 .addReg(Dest).addMBB(UpdateMBB); 8701 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal) 8702 .addReg(OldVal).addReg(BitShift).addImm(0); 8703 BuildMI(MBB, DL, TII->get(CompareOpcode)) 8704 .addReg(RotatedOldVal).addReg(Src2); 8705 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 8706 .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB); 8707 MBB->addSuccessor(UpdateMBB); 8708 MBB->addSuccessor(UseAltMBB); 8709 8710 // UseAltMBB: 8711 // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0 8712 // # fall through to UpdateMBB 8713 MBB = UseAltMBB; 8714 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal) 8715 .addReg(RotatedOldVal).addReg(Src2) 8716 .addImm(32).addImm(31 + BitSize).addImm(0); 8717 MBB->addSuccessor(UpdateMBB); 8718 8719 // UpdateMBB: 8720 // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ], 8721 // [ %RotatedAltVal, UseAltMBB ] 8722 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift) 8723 // %Dest = CS %OldVal, %NewVal, Disp(%Base) 8724 // JNE LoopMBB 8725 // # fall through to DoneMBB 8726 MBB = UpdateMBB; 8727 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal) 8728 .addReg(RotatedOldVal).addMBB(LoopMBB) 8729 .addReg(RotatedAltVal).addMBB(UseAltMBB); 8730 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal) 8731 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0); 8732 BuildMI(MBB, DL, TII->get(CSOpcode), Dest) 8733 .addReg(OldVal) 8734 .addReg(NewVal) 8735 .add(Base) 8736 .addImm(Disp); 8737 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 8738 .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB); 8739 MBB->addSuccessor(LoopMBB); 8740 MBB->addSuccessor(DoneMBB); 8741 8742 MI.eraseFromParent(); 8743 return DoneMBB; 8744 } 8745 8746 // Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_CMP_SWAPW 8747 // instruction MI. 8748 MachineBasicBlock * 8749 SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI, 8750 MachineBasicBlock *MBB) const { 8751 MachineFunction &MF = *MBB->getParent(); 8752 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 8753 MachineRegisterInfo &MRI = MF.getRegInfo(); 8754 8755 // Extract the operands. Base can be a register or a frame index. 8756 Register Dest = MI.getOperand(0).getReg(); 8757 MachineOperand Base = earlyUseOperand(MI.getOperand(1)); 8758 int64_t Disp = MI.getOperand(2).getImm(); 8759 Register CmpVal = MI.getOperand(3).getReg(); 8760 Register OrigSwapVal = MI.getOperand(4).getReg(); 8761 Register BitShift = MI.getOperand(5).getReg(); 8762 Register NegBitShift = MI.getOperand(6).getReg(); 8763 int64_t BitSize = MI.getOperand(7).getImm(); 8764 DebugLoc DL = MI.getDebugLoc(); 8765 8766 const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass; 8767 8768 // Get the right opcodes for the displacement and zero-extension. 8769 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp); 8770 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp); 8771 unsigned ZExtOpcode = BitSize == 8 ? SystemZ::LLCR : SystemZ::LLHR; 8772 assert(LOpcode && CSOpcode && "Displacement out of range"); 8773 8774 // Create virtual registers for temporary results. 8775 Register OrigOldVal = MRI.createVirtualRegister(RC); 8776 Register OldVal = MRI.createVirtualRegister(RC); 8777 Register SwapVal = MRI.createVirtualRegister(RC); 8778 Register StoreVal = MRI.createVirtualRegister(RC); 8779 Register OldValRot = MRI.createVirtualRegister(RC); 8780 Register RetryOldVal = MRI.createVirtualRegister(RC); 8781 Register RetrySwapVal = MRI.createVirtualRegister(RC); 8782 8783 // Insert 2 basic blocks for the loop. 8784 MachineBasicBlock *StartMBB = MBB; 8785 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB); 8786 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB); 8787 MachineBasicBlock *SetMBB = SystemZ::emitBlockAfter(LoopMBB); 8788 8789 // StartMBB: 8790 // ... 8791 // %OrigOldVal = L Disp(%Base) 8792 // # fall through to LoopMBB 8793 MBB = StartMBB; 8794 BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal) 8795 .add(Base) 8796 .addImm(Disp) 8797 .addReg(0); 8798 MBB->addSuccessor(LoopMBB); 8799 8800 // LoopMBB: 8801 // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ] 8802 // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ] 8803 // %OldValRot = RLL %OldVal, BitSize(%BitShift) 8804 // ^^ The low BitSize bits contain the field 8805 // of interest. 8806 // %RetrySwapVal = RISBG32 %SwapVal, %OldValRot, 32, 63-BitSize, 0 8807 // ^^ Replace the upper 32-BitSize bits of the 8808 // swap value with those that we loaded and rotated. 8809 // %Dest = LL[CH] %OldValRot 8810 // CR %Dest, %CmpVal 8811 // JNE DoneMBB 8812 // # Fall through to SetMBB 8813 MBB = LoopMBB; 8814 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal) 8815 .addReg(OrigOldVal).addMBB(StartMBB) 8816 .addReg(RetryOldVal).addMBB(SetMBB); 8817 BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal) 8818 .addReg(OrigSwapVal).addMBB(StartMBB) 8819 .addReg(RetrySwapVal).addMBB(SetMBB); 8820 BuildMI(MBB, DL, TII->get(SystemZ::RLL), OldValRot) 8821 .addReg(OldVal).addReg(BitShift).addImm(BitSize); 8822 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal) 8823 .addReg(SwapVal).addReg(OldValRot).addImm(32).addImm(63 - BitSize).addImm(0); 8824 BuildMI(MBB, DL, TII->get(ZExtOpcode), Dest) 8825 .addReg(OldValRot); 8826 BuildMI(MBB, DL, TII->get(SystemZ::CR)) 8827 .addReg(Dest).addReg(CmpVal); 8828 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 8829 .addImm(SystemZ::CCMASK_ICMP) 8830 .addImm(SystemZ::CCMASK_CMP_NE).addMBB(DoneMBB); 8831 MBB->addSuccessor(DoneMBB); 8832 MBB->addSuccessor(SetMBB); 8833 8834 // SetMBB: 8835 // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift) 8836 // ^^ Rotate the new field to its proper position. 8837 // %RetryOldVal = CS %OldVal, %StoreVal, Disp(%Base) 8838 // JNE LoopMBB 8839 // # fall through to ExitMBB 8840 MBB = SetMBB; 8841 BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal) 8842 .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize); 8843 BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal) 8844 .addReg(OldVal) 8845 .addReg(StoreVal) 8846 .add(Base) 8847 .addImm(Disp); 8848 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 8849 .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB); 8850 MBB->addSuccessor(LoopMBB); 8851 MBB->addSuccessor(DoneMBB); 8852 8853 // If the CC def wasn't dead in the ATOMIC_CMP_SWAPW, mark CC as live-in 8854 // to the block after the loop. At this point, CC may have been defined 8855 // either by the CR in LoopMBB or by the CS in SetMBB. 8856 if (!MI.registerDefIsDead(SystemZ::CC, /*TRI=*/nullptr)) 8857 DoneMBB->addLiveIn(SystemZ::CC); 8858 8859 MI.eraseFromParent(); 8860 return DoneMBB; 8861 } 8862 8863 // Emit a move from two GR64s to a GR128. 8864 MachineBasicBlock * 8865 SystemZTargetLowering::emitPair128(MachineInstr &MI, 8866 MachineBasicBlock *MBB) const { 8867 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 8868 const DebugLoc &DL = MI.getDebugLoc(); 8869 8870 Register Dest = MI.getOperand(0).getReg(); 8871 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), Dest) 8872 .add(MI.getOperand(1)) 8873 .addImm(SystemZ::subreg_h64) 8874 .add(MI.getOperand(2)) 8875 .addImm(SystemZ::subreg_l64); 8876 MI.eraseFromParent(); 8877 return MBB; 8878 } 8879 8880 // Emit an extension from a GR64 to a GR128. ClearEven is true 8881 // if the high register of the GR128 value must be cleared or false if 8882 // it's "don't care". 8883 MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI, 8884 MachineBasicBlock *MBB, 8885 bool ClearEven) const { 8886 MachineFunction &MF = *MBB->getParent(); 8887 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 8888 MachineRegisterInfo &MRI = MF.getRegInfo(); 8889 DebugLoc DL = MI.getDebugLoc(); 8890 8891 Register Dest = MI.getOperand(0).getReg(); 8892 Register Src = MI.getOperand(1).getReg(); 8893 Register In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); 8894 8895 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128); 8896 if (ClearEven) { 8897 Register NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); 8898 Register Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass); 8899 8900 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64) 8901 .addImm(0); 8902 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128) 8903 .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64); 8904 In128 = NewIn128; 8905 } 8906 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest) 8907 .addReg(In128).addReg(Src).addImm(SystemZ::subreg_l64); 8908 8909 MI.eraseFromParent(); 8910 return MBB; 8911 } 8912 8913 MachineBasicBlock * 8914 SystemZTargetLowering::emitMemMemWrapper(MachineInstr &MI, 8915 MachineBasicBlock *MBB, 8916 unsigned Opcode, bool IsMemset) const { 8917 MachineFunction &MF = *MBB->getParent(); 8918 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 8919 MachineRegisterInfo &MRI = MF.getRegInfo(); 8920 DebugLoc DL = MI.getDebugLoc(); 8921 8922 MachineOperand DestBase = earlyUseOperand(MI.getOperand(0)); 8923 uint64_t DestDisp = MI.getOperand(1).getImm(); 8924 MachineOperand SrcBase = MachineOperand::CreateReg(0U, false); 8925 uint64_t SrcDisp; 8926 8927 // Fold the displacement Disp if it is out of range. 8928 auto foldDisplIfNeeded = [&](MachineOperand &Base, uint64_t &Disp) -> void { 8929 if (!isUInt<12>(Disp)) { 8930 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); 8931 unsigned Opcode = TII->getOpcodeForOffset(SystemZ::LA, Disp); 8932 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opcode), Reg) 8933 .add(Base).addImm(Disp).addReg(0); 8934 Base = MachineOperand::CreateReg(Reg, false); 8935 Disp = 0; 8936 } 8937 }; 8938 8939 if (!IsMemset) { 8940 SrcBase = earlyUseOperand(MI.getOperand(2)); 8941 SrcDisp = MI.getOperand(3).getImm(); 8942 } else { 8943 SrcBase = DestBase; 8944 SrcDisp = DestDisp++; 8945 foldDisplIfNeeded(DestBase, DestDisp); 8946 } 8947 8948 MachineOperand &LengthMO = MI.getOperand(IsMemset ? 2 : 4); 8949 bool IsImmForm = LengthMO.isImm(); 8950 bool IsRegForm = !IsImmForm; 8951 8952 // Build and insert one Opcode of Length, with special treatment for memset. 8953 auto insertMemMemOp = [&](MachineBasicBlock *InsMBB, 8954 MachineBasicBlock::iterator InsPos, 8955 MachineOperand DBase, uint64_t DDisp, 8956 MachineOperand SBase, uint64_t SDisp, 8957 unsigned Length) -> void { 8958 assert(Length > 0 && Length <= 256 && "Building memory op with bad length."); 8959 if (IsMemset) { 8960 MachineOperand ByteMO = earlyUseOperand(MI.getOperand(3)); 8961 if (ByteMO.isImm()) 8962 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::MVI)) 8963 .add(SBase).addImm(SDisp).add(ByteMO); 8964 else 8965 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::STC)) 8966 .add(ByteMO).add(SBase).addImm(SDisp).addReg(0); 8967 if (--Length == 0) 8968 return; 8969 } 8970 BuildMI(*MBB, InsPos, DL, TII->get(Opcode)) 8971 .add(DBase).addImm(DDisp).addImm(Length) 8972 .add(SBase).addImm(SDisp) 8973 .setMemRefs(MI.memoperands()); 8974 }; 8975 8976 bool NeedsLoop = false; 8977 uint64_t ImmLength = 0; 8978 Register LenAdjReg = SystemZ::NoRegister; 8979 if (IsImmForm) { 8980 ImmLength = LengthMO.getImm(); 8981 ImmLength += IsMemset ? 2 : 1; // Add back the subtracted adjustment. 8982 if (ImmLength == 0) { 8983 MI.eraseFromParent(); 8984 return MBB; 8985 } 8986 if (Opcode == SystemZ::CLC) { 8987 if (ImmLength > 3 * 256) 8988 // A two-CLC sequence is a clear win over a loop, not least because 8989 // it needs only one branch. A three-CLC sequence needs the same 8990 // number of branches as a loop (i.e. 2), but is shorter. That 8991 // brings us to lengths greater than 768 bytes. It seems relatively 8992 // likely that a difference will be found within the first 768 bytes, 8993 // so we just optimize for the smallest number of branch 8994 // instructions, in order to avoid polluting the prediction buffer 8995 // too much. 8996 NeedsLoop = true; 8997 } else if (ImmLength > 6 * 256) 8998 // The heuristic we use is to prefer loops for anything that would 8999 // require 7 or more MVCs. With these kinds of sizes there isn't much 9000 // to choose between straight-line code and looping code, since the 9001 // time will be dominated by the MVCs themselves. 9002 NeedsLoop = true; 9003 } else { 9004 NeedsLoop = true; 9005 LenAdjReg = LengthMO.getReg(); 9006 } 9007 9008 // When generating more than one CLC, all but the last will need to 9009 // branch to the end when a difference is found. 9010 MachineBasicBlock *EndMBB = 9011 (Opcode == SystemZ::CLC && (ImmLength > 256 || NeedsLoop) 9012 ? SystemZ::splitBlockAfter(MI, MBB) 9013 : nullptr); 9014 9015 if (NeedsLoop) { 9016 Register StartCountReg = 9017 MRI.createVirtualRegister(&SystemZ::GR64BitRegClass); 9018 if (IsImmForm) { 9019 TII->loadImmediate(*MBB, MI, StartCountReg, ImmLength / 256); 9020 ImmLength &= 255; 9021 } else { 9022 BuildMI(*MBB, MI, DL, TII->get(SystemZ::SRLG), StartCountReg) 9023 .addReg(LenAdjReg) 9024 .addReg(0) 9025 .addImm(8); 9026 } 9027 9028 bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase); 9029 auto loadZeroAddress = [&]() -> MachineOperand { 9030 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); 9031 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LGHI), Reg).addImm(0); 9032 return MachineOperand::CreateReg(Reg, false); 9033 }; 9034 if (DestBase.isReg() && DestBase.getReg() == SystemZ::NoRegister) 9035 DestBase = loadZeroAddress(); 9036 if (SrcBase.isReg() && SrcBase.getReg() == SystemZ::NoRegister) 9037 SrcBase = HaveSingleBase ? DestBase : loadZeroAddress(); 9038 9039 MachineBasicBlock *StartMBB = nullptr; 9040 MachineBasicBlock *LoopMBB = nullptr; 9041 MachineBasicBlock *NextMBB = nullptr; 9042 MachineBasicBlock *DoneMBB = nullptr; 9043 MachineBasicBlock *AllDoneMBB = nullptr; 9044 9045 Register StartSrcReg = forceReg(MI, SrcBase, TII); 9046 Register StartDestReg = 9047 (HaveSingleBase ? StartSrcReg : forceReg(MI, DestBase, TII)); 9048 9049 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass; 9050 Register ThisSrcReg = MRI.createVirtualRegister(RC); 9051 Register ThisDestReg = 9052 (HaveSingleBase ? ThisSrcReg : MRI.createVirtualRegister(RC)); 9053 Register NextSrcReg = MRI.createVirtualRegister(RC); 9054 Register NextDestReg = 9055 (HaveSingleBase ? NextSrcReg : MRI.createVirtualRegister(RC)); 9056 RC = &SystemZ::GR64BitRegClass; 9057 Register ThisCountReg = MRI.createVirtualRegister(RC); 9058 Register NextCountReg = MRI.createVirtualRegister(RC); 9059 9060 if (IsRegForm) { 9061 AllDoneMBB = SystemZ::splitBlockBefore(MI, MBB); 9062 StartMBB = SystemZ::emitBlockAfter(MBB); 9063 LoopMBB = SystemZ::emitBlockAfter(StartMBB); 9064 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB); 9065 DoneMBB = SystemZ::emitBlockAfter(NextMBB); 9066 9067 // MBB: 9068 // # Jump to AllDoneMBB if LenAdjReg means 0, or fall thru to StartMBB. 9069 BuildMI(MBB, DL, TII->get(SystemZ::CGHI)) 9070 .addReg(LenAdjReg).addImm(IsMemset ? -2 : -1); 9071 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 9072 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ) 9073 .addMBB(AllDoneMBB); 9074 MBB->addSuccessor(AllDoneMBB); 9075 if (!IsMemset) 9076 MBB->addSuccessor(StartMBB); 9077 else { 9078 // MemsetOneCheckMBB: 9079 // # Jump to MemsetOneMBB for a memset of length 1, or 9080 // # fall thru to StartMBB. 9081 MachineBasicBlock *MemsetOneCheckMBB = SystemZ::emitBlockAfter(MBB); 9082 MachineBasicBlock *MemsetOneMBB = SystemZ::emitBlockAfter(&*MF.rbegin()); 9083 MBB->addSuccessor(MemsetOneCheckMBB); 9084 MBB = MemsetOneCheckMBB; 9085 BuildMI(MBB, DL, TII->get(SystemZ::CGHI)) 9086 .addReg(LenAdjReg).addImm(-1); 9087 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 9088 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ) 9089 .addMBB(MemsetOneMBB); 9090 MBB->addSuccessor(MemsetOneMBB, {10, 100}); 9091 MBB->addSuccessor(StartMBB, {90, 100}); 9092 9093 // MemsetOneMBB: 9094 // # Jump back to AllDoneMBB after a single MVI or STC. 9095 MBB = MemsetOneMBB; 9096 insertMemMemOp(MBB, MBB->end(), 9097 MachineOperand::CreateReg(StartDestReg, false), DestDisp, 9098 MachineOperand::CreateReg(StartSrcReg, false), SrcDisp, 9099 1); 9100 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(AllDoneMBB); 9101 MBB->addSuccessor(AllDoneMBB); 9102 } 9103 9104 // StartMBB: 9105 // # Jump to DoneMBB if %StartCountReg is zero, or fall through to LoopMBB. 9106 MBB = StartMBB; 9107 BuildMI(MBB, DL, TII->get(SystemZ::CGHI)) 9108 .addReg(StartCountReg).addImm(0); 9109 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 9110 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ) 9111 .addMBB(DoneMBB); 9112 MBB->addSuccessor(DoneMBB); 9113 MBB->addSuccessor(LoopMBB); 9114 } 9115 else { 9116 StartMBB = MBB; 9117 DoneMBB = SystemZ::splitBlockBefore(MI, MBB); 9118 LoopMBB = SystemZ::emitBlockAfter(StartMBB); 9119 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB); 9120 9121 // StartMBB: 9122 // # fall through to LoopMBB 9123 MBB->addSuccessor(LoopMBB); 9124 9125 DestBase = MachineOperand::CreateReg(NextDestReg, false); 9126 SrcBase = MachineOperand::CreateReg(NextSrcReg, false); 9127 if (EndMBB && !ImmLength) 9128 // If the loop handled the whole CLC range, DoneMBB will be empty with 9129 // CC live-through into EndMBB, so add it as live-in. 9130 DoneMBB->addLiveIn(SystemZ::CC); 9131 } 9132 9133 // LoopMBB: 9134 // %ThisDestReg = phi [ %StartDestReg, StartMBB ], 9135 // [ %NextDestReg, NextMBB ] 9136 // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ], 9137 // [ %NextSrcReg, NextMBB ] 9138 // %ThisCountReg = phi [ %StartCountReg, StartMBB ], 9139 // [ %NextCountReg, NextMBB ] 9140 // ( PFD 2, 768+DestDisp(%ThisDestReg) ) 9141 // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg) 9142 // ( JLH EndMBB ) 9143 // 9144 // The prefetch is used only for MVC. The JLH is used only for CLC. 9145 MBB = LoopMBB; 9146 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg) 9147 .addReg(StartDestReg).addMBB(StartMBB) 9148 .addReg(NextDestReg).addMBB(NextMBB); 9149 if (!HaveSingleBase) 9150 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg) 9151 .addReg(StartSrcReg).addMBB(StartMBB) 9152 .addReg(NextSrcReg).addMBB(NextMBB); 9153 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg) 9154 .addReg(StartCountReg).addMBB(StartMBB) 9155 .addReg(NextCountReg).addMBB(NextMBB); 9156 if (Opcode == SystemZ::MVC) 9157 BuildMI(MBB, DL, TII->get(SystemZ::PFD)) 9158 .addImm(SystemZ::PFD_WRITE) 9159 .addReg(ThisDestReg).addImm(DestDisp - IsMemset + 768).addReg(0); 9160 insertMemMemOp(MBB, MBB->end(), 9161 MachineOperand::CreateReg(ThisDestReg, false), DestDisp, 9162 MachineOperand::CreateReg(ThisSrcReg, false), SrcDisp, 256); 9163 if (EndMBB) { 9164 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 9165 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE) 9166 .addMBB(EndMBB); 9167 MBB->addSuccessor(EndMBB); 9168 MBB->addSuccessor(NextMBB); 9169 } 9170 9171 // NextMBB: 9172 // %NextDestReg = LA 256(%ThisDestReg) 9173 // %NextSrcReg = LA 256(%ThisSrcReg) 9174 // %NextCountReg = AGHI %ThisCountReg, -1 9175 // CGHI %NextCountReg, 0 9176 // JLH LoopMBB 9177 // # fall through to DoneMBB 9178 // 9179 // The AGHI, CGHI and JLH should be converted to BRCTG by later passes. 9180 MBB = NextMBB; 9181 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg) 9182 .addReg(ThisDestReg).addImm(256).addReg(0); 9183 if (!HaveSingleBase) 9184 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg) 9185 .addReg(ThisSrcReg).addImm(256).addReg(0); 9186 BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg) 9187 .addReg(ThisCountReg).addImm(-1); 9188 BuildMI(MBB, DL, TII->get(SystemZ::CGHI)) 9189 .addReg(NextCountReg).addImm(0); 9190 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 9191 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE) 9192 .addMBB(LoopMBB); 9193 MBB->addSuccessor(LoopMBB); 9194 MBB->addSuccessor(DoneMBB); 9195 9196 MBB = DoneMBB; 9197 if (IsRegForm) { 9198 // DoneMBB: 9199 // # Make PHIs for RemDestReg/RemSrcReg as the loop may or may not run. 9200 // # Use EXecute Relative Long for the remainder of the bytes. The target 9201 // instruction of the EXRL will have a length field of 1 since 0 is an 9202 // illegal value. The number of bytes processed becomes (%LenAdjReg & 9203 // 0xff) + 1. 9204 // # Fall through to AllDoneMBB. 9205 Register RemSrcReg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); 9206 Register RemDestReg = HaveSingleBase ? RemSrcReg 9207 : MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); 9208 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemDestReg) 9209 .addReg(StartDestReg).addMBB(StartMBB) 9210 .addReg(NextDestReg).addMBB(NextMBB); 9211 if (!HaveSingleBase) 9212 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemSrcReg) 9213 .addReg(StartSrcReg).addMBB(StartMBB) 9214 .addReg(NextSrcReg).addMBB(NextMBB); 9215 if (IsMemset) 9216 insertMemMemOp(MBB, MBB->end(), 9217 MachineOperand::CreateReg(RemDestReg, false), DestDisp, 9218 MachineOperand::CreateReg(RemSrcReg, false), SrcDisp, 1); 9219 MachineInstrBuilder EXRL_MIB = 9220 BuildMI(MBB, DL, TII->get(SystemZ::EXRL_Pseudo)) 9221 .addImm(Opcode) 9222 .addReg(LenAdjReg) 9223 .addReg(RemDestReg).addImm(DestDisp) 9224 .addReg(RemSrcReg).addImm(SrcDisp); 9225 MBB->addSuccessor(AllDoneMBB); 9226 MBB = AllDoneMBB; 9227 if (Opcode != SystemZ::MVC) { 9228 EXRL_MIB.addReg(SystemZ::CC, RegState::ImplicitDefine); 9229 if (EndMBB) 9230 MBB->addLiveIn(SystemZ::CC); 9231 } 9232 } 9233 MF.getProperties().reset(MachineFunctionProperties::Property::NoPHIs); 9234 } 9235 9236 // Handle any remaining bytes with straight-line code. 9237 while (ImmLength > 0) { 9238 uint64_t ThisLength = std::min(ImmLength, uint64_t(256)); 9239 // The previous iteration might have created out-of-range displacements. 9240 // Apply them using LA/LAY if so. 9241 foldDisplIfNeeded(DestBase, DestDisp); 9242 foldDisplIfNeeded(SrcBase, SrcDisp); 9243 insertMemMemOp(MBB, MI, DestBase, DestDisp, SrcBase, SrcDisp, ThisLength); 9244 DestDisp += ThisLength; 9245 SrcDisp += ThisLength; 9246 ImmLength -= ThisLength; 9247 // If there's another CLC to go, branch to the end if a difference 9248 // was found. 9249 if (EndMBB && ImmLength > 0) { 9250 MachineBasicBlock *NextMBB = SystemZ::splitBlockBefore(MI, MBB); 9251 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 9252 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE) 9253 .addMBB(EndMBB); 9254 MBB->addSuccessor(EndMBB); 9255 MBB->addSuccessor(NextMBB); 9256 MBB = NextMBB; 9257 } 9258 } 9259 if (EndMBB) { 9260 MBB->addSuccessor(EndMBB); 9261 MBB = EndMBB; 9262 MBB->addLiveIn(SystemZ::CC); 9263 } 9264 9265 MI.eraseFromParent(); 9266 return MBB; 9267 } 9268 9269 // Decompose string pseudo-instruction MI into a loop that continually performs 9270 // Opcode until CC != 3. 9271 MachineBasicBlock *SystemZTargetLowering::emitStringWrapper( 9272 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const { 9273 MachineFunction &MF = *MBB->getParent(); 9274 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 9275 MachineRegisterInfo &MRI = MF.getRegInfo(); 9276 DebugLoc DL = MI.getDebugLoc(); 9277 9278 uint64_t End1Reg = MI.getOperand(0).getReg(); 9279 uint64_t Start1Reg = MI.getOperand(1).getReg(); 9280 uint64_t Start2Reg = MI.getOperand(2).getReg(); 9281 uint64_t CharReg = MI.getOperand(3).getReg(); 9282 9283 const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass; 9284 uint64_t This1Reg = MRI.createVirtualRegister(RC); 9285 uint64_t This2Reg = MRI.createVirtualRegister(RC); 9286 uint64_t End2Reg = MRI.createVirtualRegister(RC); 9287 9288 MachineBasicBlock *StartMBB = MBB; 9289 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB); 9290 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB); 9291 9292 // StartMBB: 9293 // # fall through to LoopMBB 9294 MBB->addSuccessor(LoopMBB); 9295 9296 // LoopMBB: 9297 // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ] 9298 // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ] 9299 // R0L = %CharReg 9300 // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L 9301 // JO LoopMBB 9302 // # fall through to DoneMBB 9303 // 9304 // The load of R0L can be hoisted by post-RA LICM. 9305 MBB = LoopMBB; 9306 9307 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg) 9308 .addReg(Start1Reg).addMBB(StartMBB) 9309 .addReg(End1Reg).addMBB(LoopMBB); 9310 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg) 9311 .addReg(Start2Reg).addMBB(StartMBB) 9312 .addReg(End2Reg).addMBB(LoopMBB); 9313 BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg); 9314 BuildMI(MBB, DL, TII->get(Opcode)) 9315 .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define) 9316 .addReg(This1Reg).addReg(This2Reg); 9317 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 9318 .addImm(SystemZ::CCMASK_ANY).addImm(SystemZ::CCMASK_3).addMBB(LoopMBB); 9319 MBB->addSuccessor(LoopMBB); 9320 MBB->addSuccessor(DoneMBB); 9321 9322 DoneMBB->addLiveIn(SystemZ::CC); 9323 9324 MI.eraseFromParent(); 9325 return DoneMBB; 9326 } 9327 9328 // Update TBEGIN instruction with final opcode and register clobbers. 9329 MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin( 9330 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode, 9331 bool NoFloat) const { 9332 MachineFunction &MF = *MBB->getParent(); 9333 const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); 9334 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 9335 9336 // Update opcode. 9337 MI.setDesc(TII->get(Opcode)); 9338 9339 // We cannot handle a TBEGIN that clobbers the stack or frame pointer. 9340 // Make sure to add the corresponding GRSM bits if they are missing. 9341 uint64_t Control = MI.getOperand(2).getImm(); 9342 static const unsigned GPRControlBit[16] = { 9343 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000, 9344 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100 9345 }; 9346 Control |= GPRControlBit[15]; 9347 if (TFI->hasFP(MF)) 9348 Control |= GPRControlBit[11]; 9349 MI.getOperand(2).setImm(Control); 9350 9351 // Add GPR clobbers. 9352 for (int I = 0; I < 16; I++) { 9353 if ((Control & GPRControlBit[I]) == 0) { 9354 unsigned Reg = SystemZMC::GR64Regs[I]; 9355 MI.addOperand(MachineOperand::CreateReg(Reg, true, true)); 9356 } 9357 } 9358 9359 // Add FPR/VR clobbers. 9360 if (!NoFloat && (Control & 4) != 0) { 9361 if (Subtarget.hasVector()) { 9362 for (unsigned Reg : SystemZMC::VR128Regs) { 9363 MI.addOperand(MachineOperand::CreateReg(Reg, true, true)); 9364 } 9365 } else { 9366 for (unsigned Reg : SystemZMC::FP64Regs) { 9367 MI.addOperand(MachineOperand::CreateReg(Reg, true, true)); 9368 } 9369 } 9370 } 9371 9372 return MBB; 9373 } 9374 9375 MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0( 9376 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const { 9377 MachineFunction &MF = *MBB->getParent(); 9378 MachineRegisterInfo *MRI = &MF.getRegInfo(); 9379 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 9380 DebugLoc DL = MI.getDebugLoc(); 9381 9382 Register SrcReg = MI.getOperand(0).getReg(); 9383 9384 // Create new virtual register of the same class as source. 9385 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); 9386 Register DstReg = MRI->createVirtualRegister(RC); 9387 9388 // Replace pseudo with a normal load-and-test that models the def as 9389 // well. 9390 BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg) 9391 .addReg(SrcReg) 9392 .setMIFlags(MI.getFlags()); 9393 MI.eraseFromParent(); 9394 9395 return MBB; 9396 } 9397 9398 MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca( 9399 MachineInstr &MI, MachineBasicBlock *MBB) const { 9400 MachineFunction &MF = *MBB->getParent(); 9401 MachineRegisterInfo *MRI = &MF.getRegInfo(); 9402 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 9403 DebugLoc DL = MI.getDebugLoc(); 9404 const unsigned ProbeSize = getStackProbeSize(MF); 9405 Register DstReg = MI.getOperand(0).getReg(); 9406 Register SizeReg = MI.getOperand(2).getReg(); 9407 9408 MachineBasicBlock *StartMBB = MBB; 9409 MachineBasicBlock *DoneMBB = SystemZ::splitBlockAfter(MI, MBB); 9410 MachineBasicBlock *LoopTestMBB = SystemZ::emitBlockAfter(StartMBB); 9411 MachineBasicBlock *LoopBodyMBB = SystemZ::emitBlockAfter(LoopTestMBB); 9412 MachineBasicBlock *TailTestMBB = SystemZ::emitBlockAfter(LoopBodyMBB); 9413 MachineBasicBlock *TailMBB = SystemZ::emitBlockAfter(TailTestMBB); 9414 9415 MachineMemOperand *VolLdMMO = MF.getMachineMemOperand(MachinePointerInfo(), 9416 MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad, 8, Align(1)); 9417 9418 Register PHIReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass); 9419 Register IncReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass); 9420 9421 // LoopTestMBB 9422 // BRC TailTestMBB 9423 // # fallthrough to LoopBodyMBB 9424 StartMBB->addSuccessor(LoopTestMBB); 9425 MBB = LoopTestMBB; 9426 BuildMI(MBB, DL, TII->get(SystemZ::PHI), PHIReg) 9427 .addReg(SizeReg) 9428 .addMBB(StartMBB) 9429 .addReg(IncReg) 9430 .addMBB(LoopBodyMBB); 9431 BuildMI(MBB, DL, TII->get(SystemZ::CLGFI)) 9432 .addReg(PHIReg) 9433 .addImm(ProbeSize); 9434 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 9435 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_LT) 9436 .addMBB(TailTestMBB); 9437 MBB->addSuccessor(LoopBodyMBB); 9438 MBB->addSuccessor(TailTestMBB); 9439 9440 // LoopBodyMBB: Allocate and probe by means of a volatile compare. 9441 // J LoopTestMBB 9442 MBB = LoopBodyMBB; 9443 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), IncReg) 9444 .addReg(PHIReg) 9445 .addImm(ProbeSize); 9446 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), SystemZ::R15D) 9447 .addReg(SystemZ::R15D) 9448 .addImm(ProbeSize); 9449 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D) 9450 .addReg(SystemZ::R15D).addImm(ProbeSize - 8).addReg(0) 9451 .setMemRefs(VolLdMMO); 9452 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(LoopTestMBB); 9453 MBB->addSuccessor(LoopTestMBB); 9454 9455 // TailTestMBB 9456 // BRC DoneMBB 9457 // # fallthrough to TailMBB 9458 MBB = TailTestMBB; 9459 BuildMI(MBB, DL, TII->get(SystemZ::CGHI)) 9460 .addReg(PHIReg) 9461 .addImm(0); 9462 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 9463 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ) 9464 .addMBB(DoneMBB); 9465 MBB->addSuccessor(TailMBB); 9466 MBB->addSuccessor(DoneMBB); 9467 9468 // TailMBB 9469 // # fallthrough to DoneMBB 9470 MBB = TailMBB; 9471 BuildMI(MBB, DL, TII->get(SystemZ::SLGR), SystemZ::R15D) 9472 .addReg(SystemZ::R15D) 9473 .addReg(PHIReg); 9474 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D) 9475 .addReg(SystemZ::R15D).addImm(-8).addReg(PHIReg) 9476 .setMemRefs(VolLdMMO); 9477 MBB->addSuccessor(DoneMBB); 9478 9479 // DoneMBB 9480 MBB = DoneMBB; 9481 BuildMI(*MBB, MBB->begin(), DL, TII->get(TargetOpcode::COPY), DstReg) 9482 .addReg(SystemZ::R15D); 9483 9484 MI.eraseFromParent(); 9485 return DoneMBB; 9486 } 9487 9488 SDValue SystemZTargetLowering:: 9489 getBackchainAddress(SDValue SP, SelectionDAG &DAG) const { 9490 MachineFunction &MF = DAG.getMachineFunction(); 9491 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>(); 9492 SDLoc DL(SP); 9493 return DAG.getNode(ISD::ADD, DL, MVT::i64, SP, 9494 DAG.getIntPtrConstant(TFL->getBackchainOffset(MF), DL)); 9495 } 9496 9497 MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter( 9498 MachineInstr &MI, MachineBasicBlock *MBB) const { 9499 switch (MI.getOpcode()) { 9500 case SystemZ::ADJCALLSTACKDOWN: 9501 case SystemZ::ADJCALLSTACKUP: 9502 return emitAdjCallStack(MI, MBB); 9503 9504 case SystemZ::Select32: 9505 case SystemZ::Select64: 9506 case SystemZ::Select128: 9507 case SystemZ::SelectF32: 9508 case SystemZ::SelectF64: 9509 case SystemZ::SelectF128: 9510 case SystemZ::SelectVR32: 9511 case SystemZ::SelectVR64: 9512 case SystemZ::SelectVR128: 9513 return emitSelect(MI, MBB); 9514 9515 case SystemZ::CondStore8Mux: 9516 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false); 9517 case SystemZ::CondStore8MuxInv: 9518 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true); 9519 case SystemZ::CondStore16Mux: 9520 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false); 9521 case SystemZ::CondStore16MuxInv: 9522 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true); 9523 case SystemZ::CondStore32Mux: 9524 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, false); 9525 case SystemZ::CondStore32MuxInv: 9526 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, true); 9527 case SystemZ::CondStore8: 9528 return emitCondStore(MI, MBB, SystemZ::STC, 0, false); 9529 case SystemZ::CondStore8Inv: 9530 return emitCondStore(MI, MBB, SystemZ::STC, 0, true); 9531 case SystemZ::CondStore16: 9532 return emitCondStore(MI, MBB, SystemZ::STH, 0, false); 9533 case SystemZ::CondStore16Inv: 9534 return emitCondStore(MI, MBB, SystemZ::STH, 0, true); 9535 case SystemZ::CondStore32: 9536 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false); 9537 case SystemZ::CondStore32Inv: 9538 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true); 9539 case SystemZ::CondStore64: 9540 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false); 9541 case SystemZ::CondStore64Inv: 9542 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true); 9543 case SystemZ::CondStoreF32: 9544 return emitCondStore(MI, MBB, SystemZ::STE, 0, false); 9545 case SystemZ::CondStoreF32Inv: 9546 return emitCondStore(MI, MBB, SystemZ::STE, 0, true); 9547 case SystemZ::CondStoreF64: 9548 return emitCondStore(MI, MBB, SystemZ::STD, 0, false); 9549 case SystemZ::CondStoreF64Inv: 9550 return emitCondStore(MI, MBB, SystemZ::STD, 0, true); 9551 9552 case SystemZ::SCmp128Hi: 9553 return emitICmp128Hi(MI, MBB, false); 9554 case SystemZ::UCmp128Hi: 9555 return emitICmp128Hi(MI, MBB, true); 9556 9557 case SystemZ::PAIR128: 9558 return emitPair128(MI, MBB); 9559 case SystemZ::AEXT128: 9560 return emitExt128(MI, MBB, false); 9561 case SystemZ::ZEXT128: 9562 return emitExt128(MI, MBB, true); 9563 9564 case SystemZ::ATOMIC_SWAPW: 9565 return emitAtomicLoadBinary(MI, MBB, 0); 9566 9567 case SystemZ::ATOMIC_LOADW_AR: 9568 return emitAtomicLoadBinary(MI, MBB, SystemZ::AR); 9569 case SystemZ::ATOMIC_LOADW_AFI: 9570 return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI); 9571 9572 case SystemZ::ATOMIC_LOADW_SR: 9573 return emitAtomicLoadBinary(MI, MBB, SystemZ::SR); 9574 9575 case SystemZ::ATOMIC_LOADW_NR: 9576 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR); 9577 case SystemZ::ATOMIC_LOADW_NILH: 9578 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH); 9579 9580 case SystemZ::ATOMIC_LOADW_OR: 9581 return emitAtomicLoadBinary(MI, MBB, SystemZ::OR); 9582 case SystemZ::ATOMIC_LOADW_OILH: 9583 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH); 9584 9585 case SystemZ::ATOMIC_LOADW_XR: 9586 return emitAtomicLoadBinary(MI, MBB, SystemZ::XR); 9587 case SystemZ::ATOMIC_LOADW_XILF: 9588 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF); 9589 9590 case SystemZ::ATOMIC_LOADW_NRi: 9591 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, true); 9592 case SystemZ::ATOMIC_LOADW_NILHi: 9593 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, true); 9594 9595 case SystemZ::ATOMIC_LOADW_MIN: 9596 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_LE); 9597 case SystemZ::ATOMIC_LOADW_MAX: 9598 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_GE); 9599 case SystemZ::ATOMIC_LOADW_UMIN: 9600 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_LE); 9601 case SystemZ::ATOMIC_LOADW_UMAX: 9602 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_GE); 9603 9604 case SystemZ::ATOMIC_CMP_SWAPW: 9605 return emitAtomicCmpSwapW(MI, MBB); 9606 case SystemZ::MVCImm: 9607 case SystemZ::MVCReg: 9608 return emitMemMemWrapper(MI, MBB, SystemZ::MVC); 9609 case SystemZ::NCImm: 9610 return emitMemMemWrapper(MI, MBB, SystemZ::NC); 9611 case SystemZ::OCImm: 9612 return emitMemMemWrapper(MI, MBB, SystemZ::OC); 9613 case SystemZ::XCImm: 9614 case SystemZ::XCReg: 9615 return emitMemMemWrapper(MI, MBB, SystemZ::XC); 9616 case SystemZ::CLCImm: 9617 case SystemZ::CLCReg: 9618 return emitMemMemWrapper(MI, MBB, SystemZ::CLC); 9619 case SystemZ::MemsetImmImm: 9620 case SystemZ::MemsetImmReg: 9621 case SystemZ::MemsetRegImm: 9622 case SystemZ::MemsetRegReg: 9623 return emitMemMemWrapper(MI, MBB, SystemZ::MVC, true/*IsMemset*/); 9624 case SystemZ::CLSTLoop: 9625 return emitStringWrapper(MI, MBB, SystemZ::CLST); 9626 case SystemZ::MVSTLoop: 9627 return emitStringWrapper(MI, MBB, SystemZ::MVST); 9628 case SystemZ::SRSTLoop: 9629 return emitStringWrapper(MI, MBB, SystemZ::SRST); 9630 case SystemZ::TBEGIN: 9631 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false); 9632 case SystemZ::TBEGIN_nofloat: 9633 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true); 9634 case SystemZ::TBEGINC: 9635 return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true); 9636 case SystemZ::LTEBRCompare_Pseudo: 9637 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR); 9638 case SystemZ::LTDBRCompare_Pseudo: 9639 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR); 9640 case SystemZ::LTXBRCompare_Pseudo: 9641 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR); 9642 9643 case SystemZ::PROBED_ALLOCA: 9644 return emitProbedAlloca(MI, MBB); 9645 9646 case TargetOpcode::STACKMAP: 9647 case TargetOpcode::PATCHPOINT: 9648 return emitPatchPoint(MI, MBB); 9649 9650 default: 9651 llvm_unreachable("Unexpected instr type to insert"); 9652 } 9653 } 9654 9655 // This is only used by the isel schedulers, and is needed only to prevent 9656 // compiler from crashing when list-ilp is used. 9657 const TargetRegisterClass * 9658 SystemZTargetLowering::getRepRegClassFor(MVT VT) const { 9659 if (VT == MVT::Untyped) 9660 return &SystemZ::ADDR128BitRegClass; 9661 return TargetLowering::getRepRegClassFor(VT); 9662 } 9663 9664 SDValue SystemZTargetLowering::lowerGET_ROUNDING(SDValue Op, 9665 SelectionDAG &DAG) const { 9666 SDLoc dl(Op); 9667 /* 9668 The rounding method is in FPC Byte 3 bits 6-7, and has the following 9669 settings: 9670 00 Round to nearest 9671 01 Round to 0 9672 10 Round to +inf 9673 11 Round to -inf 9674 9675 FLT_ROUNDS, on the other hand, expects the following: 9676 -1 Undefined 9677 0 Round to 0 9678 1 Round to nearest 9679 2 Round to +inf 9680 3 Round to -inf 9681 */ 9682 9683 // Save FPC to register. 9684 SDValue Chain = Op.getOperand(0); 9685 SDValue EFPC( 9686 DAG.getMachineNode(SystemZ::EFPC, dl, {MVT::i32, MVT::Other}, Chain), 0); 9687 Chain = EFPC.getValue(1); 9688 9689 // Transform as necessary 9690 SDValue CWD1 = DAG.getNode(ISD::AND, dl, MVT::i32, EFPC, 9691 DAG.getConstant(3, dl, MVT::i32)); 9692 // RetVal = (CWD1 ^ (CWD1 >> 1)) ^ 1 9693 SDValue CWD2 = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, 9694 DAG.getNode(ISD::SRL, dl, MVT::i32, CWD1, 9695 DAG.getConstant(1, dl, MVT::i32))); 9696 9697 SDValue RetVal = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD2, 9698 DAG.getConstant(1, dl, MVT::i32)); 9699 RetVal = DAG.getZExtOrTrunc(RetVal, dl, Op.getValueType()); 9700 9701 return DAG.getMergeValues({RetVal, Chain}, dl); 9702 } 9703 9704 SDValue SystemZTargetLowering::lowerVECREDUCE_ADD(SDValue Op, 9705 SelectionDAG &DAG) const { 9706 EVT VT = Op.getValueType(); 9707 Op = Op.getOperand(0); 9708 EVT OpVT = Op.getValueType(); 9709 9710 assert(OpVT.isVector() && "Operand type for VECREDUCE_ADD is not a vector."); 9711 9712 SDLoc DL(Op); 9713 9714 // load a 0 vector for the third operand of VSUM. 9715 SDValue Zero = DAG.getSplatBuildVector(OpVT, DL, DAG.getConstant(0, DL, VT)); 9716 9717 // execute VSUM. 9718 switch (OpVT.getScalarSizeInBits()) { 9719 case 8: 9720 case 16: 9721 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Zero); 9722 [[fallthrough]]; 9723 case 32: 9724 case 64: 9725 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::i128, Op, 9726 DAG.getBitcast(Op.getValueType(), Zero)); 9727 break; 9728 case 128: 9729 break; // VSUM over v1i128 should not happen and would be a noop 9730 default: 9731 llvm_unreachable("Unexpected scalar size."); 9732 } 9733 // Cast to original vector type, retrieve last element. 9734 return DAG.getNode( 9735 ISD::EXTRACT_VECTOR_ELT, DL, VT, DAG.getBitcast(OpVT, Op), 9736 DAG.getConstant(OpVT.getVectorNumElements() - 1, DL, MVT::i32)); 9737 } 9738