1 //===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the SystemZTargetLowering class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "SystemZISelLowering.h" 14 #include "SystemZCallingConv.h" 15 #include "SystemZConstantPoolValue.h" 16 #include "SystemZMachineFunctionInfo.h" 17 #include "SystemZTargetMachine.h" 18 #include "llvm/CodeGen/CallingConvLower.h" 19 #include "llvm/CodeGen/MachineInstrBuilder.h" 20 #include "llvm/CodeGen/MachineRegisterInfo.h" 21 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 22 #include "llvm/IR/IntrinsicInst.h" 23 #include "llvm/IR/Intrinsics.h" 24 #include "llvm/IR/IntrinsicsS390.h" 25 #include "llvm/Support/CommandLine.h" 26 #include "llvm/Support/KnownBits.h" 27 #include <cctype> 28 #include <optional> 29 30 using namespace llvm; 31 32 #define DEBUG_TYPE "systemz-lower" 33 34 namespace { 35 // Represents information about a comparison. 36 struct Comparison { 37 Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn) 38 : Op0(Op0In), Op1(Op1In), Chain(ChainIn), 39 Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {} 40 41 // The operands to the comparison. 42 SDValue Op0, Op1; 43 44 // Chain if this is a strict floating-point comparison. 45 SDValue Chain; 46 47 // The opcode that should be used to compare Op0 and Op1. 48 unsigned Opcode; 49 50 // A SystemZICMP value. Only used for integer comparisons. 51 unsigned ICmpType; 52 53 // The mask of CC values that Opcode can produce. 54 unsigned CCValid; 55 56 // The mask of CC values for which the original condition is true. 57 unsigned CCMask; 58 }; 59 } // end anonymous namespace 60 61 // Classify VT as either 32 or 64 bit. 62 static bool is32Bit(EVT VT) { 63 switch (VT.getSimpleVT().SimpleTy) { 64 case MVT::i32: 65 return true; 66 case MVT::i64: 67 return false; 68 default: 69 llvm_unreachable("Unsupported type"); 70 } 71 } 72 73 // Return a version of MachineOperand that can be safely used before the 74 // final use. 75 static MachineOperand earlyUseOperand(MachineOperand Op) { 76 if (Op.isReg()) 77 Op.setIsKill(false); 78 return Op; 79 } 80 81 SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, 82 const SystemZSubtarget &STI) 83 : TargetLowering(TM), Subtarget(STI) { 84 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0)); 85 86 auto *Regs = STI.getSpecialRegisters(); 87 88 // Set up the register classes. 89 if (Subtarget.hasHighWord()) 90 addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass); 91 else 92 addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass); 93 addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass); 94 if (!useSoftFloat()) { 95 if (Subtarget.hasVector()) { 96 addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass); 97 addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass); 98 } else { 99 addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass); 100 addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass); 101 } 102 if (Subtarget.hasVectorEnhancements1()) 103 addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass); 104 else 105 addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass); 106 107 if (Subtarget.hasVector()) { 108 addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass); 109 addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass); 110 addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass); 111 addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass); 112 addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass); 113 addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass); 114 } 115 116 if (Subtarget.hasVector()) 117 addRegisterClass(MVT::i128, &SystemZ::VR128BitRegClass); 118 } 119 120 // Compute derived properties from the register classes 121 computeRegisterProperties(Subtarget.getRegisterInfo()); 122 123 // Set up special registers. 124 setStackPointerRegisterToSaveRestore(Regs->getStackPointerRegister()); 125 126 // TODO: It may be better to default to latency-oriented scheduling, however 127 // LLVM's current latency-oriented scheduler can't handle physreg definitions 128 // such as SystemZ has with CC, so set this to the register-pressure 129 // scheduler, because it can. 130 setSchedulingPreference(Sched::RegPressure); 131 132 setBooleanContents(ZeroOrOneBooleanContent); 133 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 134 135 setMaxAtomicSizeInBitsSupported(128); 136 137 // Instructions are strings of 2-byte aligned 2-byte values. 138 setMinFunctionAlignment(Align(2)); 139 // For performance reasons we prefer 16-byte alignment. 140 setPrefFunctionAlignment(Align(16)); 141 142 // Handle operations that are handled in a similar way for all types. 143 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE; 144 I <= MVT::LAST_FP_VALUETYPE; 145 ++I) { 146 MVT VT = MVT::SimpleValueType(I); 147 if (isTypeLegal(VT)) { 148 // Lower SET_CC into an IPM-based sequence. 149 setOperationAction(ISD::SETCC, VT, Custom); 150 setOperationAction(ISD::STRICT_FSETCC, VT, Custom); 151 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom); 152 153 // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE). 154 setOperationAction(ISD::SELECT, VT, Expand); 155 156 // Lower SELECT_CC and BR_CC into separate comparisons and branches. 157 setOperationAction(ISD::SELECT_CC, VT, Custom); 158 setOperationAction(ISD::BR_CC, VT, Custom); 159 } 160 } 161 162 // Expand jump table branches as address arithmetic followed by an 163 // indirect jump. 164 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 165 166 // Expand BRCOND into a BR_CC (see above). 167 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 168 169 // Handle integer types except i128. 170 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE; 171 I <= MVT::LAST_INTEGER_VALUETYPE; 172 ++I) { 173 MVT VT = MVT::SimpleValueType(I); 174 if (isTypeLegal(VT) && VT != MVT::i128) { 175 setOperationAction(ISD::ABS, VT, Legal); 176 177 // Expand individual DIV and REMs into DIVREMs. 178 setOperationAction(ISD::SDIV, VT, Expand); 179 setOperationAction(ISD::UDIV, VT, Expand); 180 setOperationAction(ISD::SREM, VT, Expand); 181 setOperationAction(ISD::UREM, VT, Expand); 182 setOperationAction(ISD::SDIVREM, VT, Custom); 183 setOperationAction(ISD::UDIVREM, VT, Custom); 184 185 // Support addition/subtraction with overflow. 186 setOperationAction(ISD::SADDO, VT, Custom); 187 setOperationAction(ISD::SSUBO, VT, Custom); 188 189 // Support addition/subtraction with carry. 190 setOperationAction(ISD::UADDO, VT, Custom); 191 setOperationAction(ISD::USUBO, VT, Custom); 192 193 // Support carry in as value rather than glue. 194 setOperationAction(ISD::UADDO_CARRY, VT, Custom); 195 setOperationAction(ISD::USUBO_CARRY, VT, Custom); 196 197 // Lower ATOMIC_LOAD and ATOMIC_STORE into normal volatile loads and 198 // stores, putting a serialization instruction after the stores. 199 setOperationAction(ISD::ATOMIC_LOAD, VT, Custom); 200 setOperationAction(ISD::ATOMIC_STORE, VT, Custom); 201 202 // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are 203 // available, or if the operand is constant. 204 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom); 205 206 // Use POPCNT on z196 and above. 207 if (Subtarget.hasPopulationCount()) 208 setOperationAction(ISD::CTPOP, VT, Custom); 209 else 210 setOperationAction(ISD::CTPOP, VT, Expand); 211 212 // No special instructions for these. 213 setOperationAction(ISD::CTTZ, VT, Expand); 214 setOperationAction(ISD::ROTR, VT, Expand); 215 216 // Use *MUL_LOHI where possible instead of MULH*. 217 setOperationAction(ISD::MULHS, VT, Expand); 218 setOperationAction(ISD::MULHU, VT, Expand); 219 setOperationAction(ISD::SMUL_LOHI, VT, Custom); 220 setOperationAction(ISD::UMUL_LOHI, VT, Custom); 221 222 // Only z196 and above have native support for conversions to unsigned. 223 // On z10, promoting to i64 doesn't generate an inexact condition for 224 // values that are outside the i32 range but in the i64 range, so use 225 // the default expansion. 226 if (!Subtarget.hasFPExtension()) 227 setOperationAction(ISD::FP_TO_UINT, VT, Expand); 228 229 // Mirror those settings for STRICT_FP_TO_[SU]INT. Note that these all 230 // default to Expand, so need to be modified to Legal where appropriate. 231 setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Legal); 232 if (Subtarget.hasFPExtension()) 233 setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Legal); 234 235 // And similarly for STRICT_[SU]INT_TO_FP. 236 setOperationAction(ISD::STRICT_SINT_TO_FP, VT, Legal); 237 if (Subtarget.hasFPExtension()) 238 setOperationAction(ISD::STRICT_UINT_TO_FP, VT, Legal); 239 } 240 } 241 242 // Handle i128 if legal. 243 if (isTypeLegal(MVT::i128)) { 244 // No special instructions for these. 245 setOperationAction(ISD::SDIVREM, MVT::i128, Expand); 246 setOperationAction(ISD::UDIVREM, MVT::i128, Expand); 247 setOperationAction(ISD::SMUL_LOHI, MVT::i128, Expand); 248 setOperationAction(ISD::UMUL_LOHI, MVT::i128, Expand); 249 setOperationAction(ISD::ROTR, MVT::i128, Expand); 250 setOperationAction(ISD::ROTL, MVT::i128, Expand); 251 setOperationAction(ISD::MUL, MVT::i128, Expand); 252 setOperationAction(ISD::MULHS, MVT::i128, Expand); 253 setOperationAction(ISD::MULHU, MVT::i128, Expand); 254 setOperationAction(ISD::SDIV, MVT::i128, Expand); 255 setOperationAction(ISD::UDIV, MVT::i128, Expand); 256 setOperationAction(ISD::SREM, MVT::i128, Expand); 257 setOperationAction(ISD::UREM, MVT::i128, Expand); 258 setOperationAction(ISD::CTLZ, MVT::i128, Expand); 259 setOperationAction(ISD::CTTZ, MVT::i128, Expand); 260 261 // Support addition/subtraction with carry. 262 setOperationAction(ISD::UADDO, MVT::i128, Custom); 263 setOperationAction(ISD::USUBO, MVT::i128, Custom); 264 setOperationAction(ISD::UADDO_CARRY, MVT::i128, Custom); 265 setOperationAction(ISD::USUBO_CARRY, MVT::i128, Custom); 266 267 // Use VPOPCT and add up partial results. 268 setOperationAction(ISD::CTPOP, MVT::i128, Custom); 269 270 // We have to use libcalls for these. 271 setOperationAction(ISD::FP_TO_UINT, MVT::i128, LibCall); 272 setOperationAction(ISD::FP_TO_SINT, MVT::i128, LibCall); 273 setOperationAction(ISD::UINT_TO_FP, MVT::i128, LibCall); 274 setOperationAction(ISD::SINT_TO_FP, MVT::i128, LibCall); 275 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i128, LibCall); 276 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i128, LibCall); 277 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, LibCall); 278 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i128, LibCall); 279 } 280 281 // Type legalization will convert 8- and 16-bit atomic operations into 282 // forms that operate on i32s (but still keeping the original memory VT). 283 // Lower them into full i32 operations. 284 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Custom); 285 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Custom); 286 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom); 287 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom); 288 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Custom); 289 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Custom); 290 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Custom); 291 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Custom); 292 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Custom); 293 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Custom); 294 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom); 295 296 // Whether or not i128 is not a legal type, we need to custom lower 297 // the atomic operations in order to exploit SystemZ instructions. 298 setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom); 299 setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom); 300 301 // We can use the CC result of compare-and-swap to implement 302 // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS. 303 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i32, Custom); 304 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i64, Custom); 305 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom); 306 307 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); 308 309 // Traps are legal, as we will convert them to "j .+2". 310 setOperationAction(ISD::TRAP, MVT::Other, Legal); 311 312 // z10 has instructions for signed but not unsigned FP conversion. 313 // Handle unsigned 32-bit types as signed 64-bit types. 314 if (!Subtarget.hasFPExtension()) { 315 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); 316 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); 317 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Promote); 318 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Expand); 319 } 320 321 // We have native support for a 64-bit CTLZ, via FLOGR. 322 setOperationAction(ISD::CTLZ, MVT::i32, Promote); 323 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote); 324 setOperationAction(ISD::CTLZ, MVT::i64, Legal); 325 326 // On z15 we have native support for a 64-bit CTPOP. 327 if (Subtarget.hasMiscellaneousExtensions3()) { 328 setOperationAction(ISD::CTPOP, MVT::i32, Promote); 329 setOperationAction(ISD::CTPOP, MVT::i64, Legal); 330 } 331 332 // Give LowerOperation the chance to replace 64-bit ORs with subregs. 333 setOperationAction(ISD::OR, MVT::i64, Custom); 334 335 // Expand 128 bit shifts without using a libcall. 336 setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand); 337 setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand); 338 setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand); 339 setLibcallName(RTLIB::SRL_I128, nullptr); 340 setLibcallName(RTLIB::SHL_I128, nullptr); 341 setLibcallName(RTLIB::SRA_I128, nullptr); 342 343 // Also expand 256 bit shifts if i128 is a legal type. 344 if (isTypeLegal(MVT::i128)) { 345 setOperationAction(ISD::SRL_PARTS, MVT::i128, Expand); 346 setOperationAction(ISD::SHL_PARTS, MVT::i128, Expand); 347 setOperationAction(ISD::SRA_PARTS, MVT::i128, Expand); 348 } 349 350 // Handle bitcast from fp128 to i128. 351 if (!isTypeLegal(MVT::i128)) 352 setOperationAction(ISD::BITCAST, MVT::i128, Custom); 353 354 // We have native instructions for i8, i16 and i32 extensions, but not i1. 355 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 356 for (MVT VT : MVT::integer_valuetypes()) { 357 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); 358 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); 359 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); 360 } 361 362 // Handle the various types of symbolic address. 363 setOperationAction(ISD::ConstantPool, PtrVT, Custom); 364 setOperationAction(ISD::GlobalAddress, PtrVT, Custom); 365 setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom); 366 setOperationAction(ISD::BlockAddress, PtrVT, Custom); 367 setOperationAction(ISD::JumpTable, PtrVT, Custom); 368 369 // We need to handle dynamic allocations specially because of the 370 // 160-byte area at the bottom of the stack. 371 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom); 372 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, PtrVT, Custom); 373 374 setOperationAction(ISD::STACKSAVE, MVT::Other, Custom); 375 setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom); 376 377 // Handle prefetches with PFD or PFDRL. 378 setOperationAction(ISD::PREFETCH, MVT::Other, Custom); 379 380 for (MVT VT : MVT::fixedlen_vector_valuetypes()) { 381 // Assume by default that all vector operations need to be expanded. 382 for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode) 383 if (getOperationAction(Opcode, VT) == Legal) 384 setOperationAction(Opcode, VT, Expand); 385 386 // Likewise all truncating stores and extending loads. 387 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { 388 setTruncStoreAction(VT, InnerVT, Expand); 389 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); 390 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); 391 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); 392 } 393 394 if (isTypeLegal(VT)) { 395 // These operations are legal for anything that can be stored in a 396 // vector register, even if there is no native support for the format 397 // as such. In particular, we can do these for v4f32 even though there 398 // are no specific instructions for that format. 399 setOperationAction(ISD::LOAD, VT, Legal); 400 setOperationAction(ISD::STORE, VT, Legal); 401 setOperationAction(ISD::VSELECT, VT, Legal); 402 setOperationAction(ISD::BITCAST, VT, Legal); 403 setOperationAction(ISD::UNDEF, VT, Legal); 404 405 // Likewise, except that we need to replace the nodes with something 406 // more specific. 407 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 408 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 409 } 410 } 411 412 // Handle integer vector types. 413 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) { 414 if (isTypeLegal(VT)) { 415 // These operations have direct equivalents. 416 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); 417 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal); 418 setOperationAction(ISD::ADD, VT, Legal); 419 setOperationAction(ISD::SUB, VT, Legal); 420 if (VT != MVT::v2i64) 421 setOperationAction(ISD::MUL, VT, Legal); 422 setOperationAction(ISD::ABS, VT, Legal); 423 setOperationAction(ISD::AND, VT, Legal); 424 setOperationAction(ISD::OR, VT, Legal); 425 setOperationAction(ISD::XOR, VT, Legal); 426 if (Subtarget.hasVectorEnhancements1()) 427 setOperationAction(ISD::CTPOP, VT, Legal); 428 else 429 setOperationAction(ISD::CTPOP, VT, Custom); 430 setOperationAction(ISD::CTTZ, VT, Legal); 431 setOperationAction(ISD::CTLZ, VT, Legal); 432 433 // Convert a GPR scalar to a vector by inserting it into element 0. 434 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); 435 436 // Use a series of unpacks for extensions. 437 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom); 438 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom); 439 440 // Detect shifts/rotates by a scalar amount and convert them into 441 // V*_BY_SCALAR. 442 setOperationAction(ISD::SHL, VT, Custom); 443 setOperationAction(ISD::SRA, VT, Custom); 444 setOperationAction(ISD::SRL, VT, Custom); 445 setOperationAction(ISD::ROTL, VT, Custom); 446 447 // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands 448 // and inverting the result as necessary. 449 setOperationAction(ISD::SETCC, VT, Custom); 450 } 451 } 452 453 if (Subtarget.hasVector()) { 454 // There should be no need to check for float types other than v2f64 455 // since <2 x f32> isn't a legal type. 456 setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal); 457 setOperationAction(ISD::FP_TO_SINT, MVT::v2f64, Legal); 458 setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal); 459 setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Legal); 460 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal); 461 setOperationAction(ISD::SINT_TO_FP, MVT::v2f64, Legal); 462 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); 463 setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Legal); 464 465 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i64, Legal); 466 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f64, Legal); 467 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal); 468 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f64, Legal); 469 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal); 470 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f64, Legal); 471 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal); 472 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f64, Legal); 473 } 474 475 if (Subtarget.hasVectorEnhancements2()) { 476 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); 477 setOperationAction(ISD::FP_TO_SINT, MVT::v4f32, Legal); 478 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); 479 setOperationAction(ISD::FP_TO_UINT, MVT::v4f32, Legal); 480 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); 481 setOperationAction(ISD::SINT_TO_FP, MVT::v4f32, Legal); 482 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); 483 setOperationAction(ISD::UINT_TO_FP, MVT::v4f32, Legal); 484 485 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal); 486 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4f32, Legal); 487 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal); 488 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4f32, Legal); 489 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal); 490 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4f32, Legal); 491 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal); 492 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4f32, Legal); 493 } 494 495 // Handle floating-point types. 496 for (unsigned I = MVT::FIRST_FP_VALUETYPE; 497 I <= MVT::LAST_FP_VALUETYPE; 498 ++I) { 499 MVT VT = MVT::SimpleValueType(I); 500 if (isTypeLegal(VT)) { 501 // We can use FI for FRINT. 502 setOperationAction(ISD::FRINT, VT, Legal); 503 504 // We can use the extended form of FI for other rounding operations. 505 if (Subtarget.hasFPExtension()) { 506 setOperationAction(ISD::FNEARBYINT, VT, Legal); 507 setOperationAction(ISD::FFLOOR, VT, Legal); 508 setOperationAction(ISD::FCEIL, VT, Legal); 509 setOperationAction(ISD::FTRUNC, VT, Legal); 510 setOperationAction(ISD::FROUND, VT, Legal); 511 } 512 513 // No special instructions for these. 514 setOperationAction(ISD::FSIN, VT, Expand); 515 setOperationAction(ISD::FCOS, VT, Expand); 516 setOperationAction(ISD::FSINCOS, VT, Expand); 517 setOperationAction(ISD::FREM, VT, Expand); 518 setOperationAction(ISD::FPOW, VT, Expand); 519 520 // Special treatment. 521 setOperationAction(ISD::IS_FPCLASS, VT, Custom); 522 523 // Handle constrained floating-point operations. 524 setOperationAction(ISD::STRICT_FADD, VT, Legal); 525 setOperationAction(ISD::STRICT_FSUB, VT, Legal); 526 setOperationAction(ISD::STRICT_FMUL, VT, Legal); 527 setOperationAction(ISD::STRICT_FDIV, VT, Legal); 528 setOperationAction(ISD::STRICT_FMA, VT, Legal); 529 setOperationAction(ISD::STRICT_FSQRT, VT, Legal); 530 setOperationAction(ISD::STRICT_FRINT, VT, Legal); 531 setOperationAction(ISD::STRICT_FP_ROUND, VT, Legal); 532 setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal); 533 if (Subtarget.hasFPExtension()) { 534 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal); 535 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal); 536 setOperationAction(ISD::STRICT_FCEIL, VT, Legal); 537 setOperationAction(ISD::STRICT_FROUND, VT, Legal); 538 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal); 539 } 540 } 541 } 542 543 // Handle floating-point vector types. 544 if (Subtarget.hasVector()) { 545 // Scalar-to-vector conversion is just a subreg. 546 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); 547 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); 548 549 // Some insertions and extractions can be done directly but others 550 // need to go via integers. 551 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 552 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom); 553 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 554 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); 555 556 // These operations have direct equivalents. 557 setOperationAction(ISD::FADD, MVT::v2f64, Legal); 558 setOperationAction(ISD::FNEG, MVT::v2f64, Legal); 559 setOperationAction(ISD::FSUB, MVT::v2f64, Legal); 560 setOperationAction(ISD::FMUL, MVT::v2f64, Legal); 561 setOperationAction(ISD::FMA, MVT::v2f64, Legal); 562 setOperationAction(ISD::FDIV, MVT::v2f64, Legal); 563 setOperationAction(ISD::FABS, MVT::v2f64, Legal); 564 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); 565 setOperationAction(ISD::FRINT, MVT::v2f64, Legal); 566 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); 567 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); 568 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); 569 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); 570 setOperationAction(ISD::FROUND, MVT::v2f64, Legal); 571 572 // Handle constrained floating-point operations. 573 setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal); 574 setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal); 575 setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal); 576 setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal); 577 setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal); 578 setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal); 579 setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal); 580 setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v2f64, Legal); 581 setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal); 582 setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal); 583 setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal); 584 setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal); 585 586 setOperationAction(ISD::SETCC, MVT::v2f64, Custom); 587 setOperationAction(ISD::SETCC, MVT::v4f32, Custom); 588 setOperationAction(ISD::STRICT_FSETCC, MVT::v2f64, Custom); 589 setOperationAction(ISD::STRICT_FSETCC, MVT::v4f32, Custom); 590 if (Subtarget.hasVectorEnhancements1()) { 591 setOperationAction(ISD::STRICT_FSETCCS, MVT::v2f64, Custom); 592 setOperationAction(ISD::STRICT_FSETCCS, MVT::v4f32, Custom); 593 } 594 } 595 596 // The vector enhancements facility 1 has instructions for these. 597 if (Subtarget.hasVectorEnhancements1()) { 598 setOperationAction(ISD::FADD, MVT::v4f32, Legal); 599 setOperationAction(ISD::FNEG, MVT::v4f32, Legal); 600 setOperationAction(ISD::FSUB, MVT::v4f32, Legal); 601 setOperationAction(ISD::FMUL, MVT::v4f32, Legal); 602 setOperationAction(ISD::FMA, MVT::v4f32, Legal); 603 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 604 setOperationAction(ISD::FABS, MVT::v4f32, Legal); 605 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); 606 setOperationAction(ISD::FRINT, MVT::v4f32, Legal); 607 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); 608 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); 609 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); 610 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); 611 setOperationAction(ISD::FROUND, MVT::v4f32, Legal); 612 613 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); 614 setOperationAction(ISD::FMAXIMUM, MVT::f64, Legal); 615 setOperationAction(ISD::FMINNUM, MVT::f64, Legal); 616 setOperationAction(ISD::FMINIMUM, MVT::f64, Legal); 617 618 setOperationAction(ISD::FMAXNUM, MVT::v2f64, Legal); 619 setOperationAction(ISD::FMAXIMUM, MVT::v2f64, Legal); 620 setOperationAction(ISD::FMINNUM, MVT::v2f64, Legal); 621 setOperationAction(ISD::FMINIMUM, MVT::v2f64, Legal); 622 623 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); 624 setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal); 625 setOperationAction(ISD::FMINNUM, MVT::f32, Legal); 626 setOperationAction(ISD::FMINIMUM, MVT::f32, Legal); 627 628 setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal); 629 setOperationAction(ISD::FMAXIMUM, MVT::v4f32, Legal); 630 setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal); 631 setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal); 632 633 setOperationAction(ISD::FMAXNUM, MVT::f128, Legal); 634 setOperationAction(ISD::FMAXIMUM, MVT::f128, Legal); 635 setOperationAction(ISD::FMINNUM, MVT::f128, Legal); 636 setOperationAction(ISD::FMINIMUM, MVT::f128, Legal); 637 638 // Handle constrained floating-point operations. 639 setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal); 640 setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal); 641 setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal); 642 setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal); 643 setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal); 644 setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal); 645 setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal); 646 setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v4f32, Legal); 647 setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal); 648 setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal); 649 setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal); 650 setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal); 651 for (auto VT : { MVT::f32, MVT::f64, MVT::f128, 652 MVT::v4f32, MVT::v2f64 }) { 653 setOperationAction(ISD::STRICT_FMAXNUM, VT, Legal); 654 setOperationAction(ISD::STRICT_FMINNUM, VT, Legal); 655 setOperationAction(ISD::STRICT_FMAXIMUM, VT, Legal); 656 setOperationAction(ISD::STRICT_FMINIMUM, VT, Legal); 657 } 658 } 659 660 // We only have fused f128 multiply-addition on vector registers. 661 if (!Subtarget.hasVectorEnhancements1()) { 662 setOperationAction(ISD::FMA, MVT::f128, Expand); 663 setOperationAction(ISD::STRICT_FMA, MVT::f128, Expand); 664 } 665 666 // We don't have a copysign instruction on vector registers. 667 if (Subtarget.hasVectorEnhancements1()) 668 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand); 669 670 // Needed so that we don't try to implement f128 constant loads using 671 // a load-and-extend of a f80 constant (in cases where the constant 672 // would fit in an f80). 673 for (MVT VT : MVT::fp_valuetypes()) 674 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand); 675 676 // We don't have extending load instruction on vector registers. 677 if (Subtarget.hasVectorEnhancements1()) { 678 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand); 679 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand); 680 } 681 682 // Floating-point truncation and stores need to be done separately. 683 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 684 setTruncStoreAction(MVT::f128, MVT::f32, Expand); 685 setTruncStoreAction(MVT::f128, MVT::f64, Expand); 686 687 // We have 64-bit FPR<->GPR moves, but need special handling for 688 // 32-bit forms. 689 if (!Subtarget.hasVector()) { 690 setOperationAction(ISD::BITCAST, MVT::i32, Custom); 691 setOperationAction(ISD::BITCAST, MVT::f32, Custom); 692 } 693 694 // VASTART and VACOPY need to deal with the SystemZ-specific varargs 695 // structure, but VAEND is a no-op. 696 setOperationAction(ISD::VASTART, MVT::Other, Custom); 697 setOperationAction(ISD::VACOPY, MVT::Other, Custom); 698 setOperationAction(ISD::VAEND, MVT::Other, Expand); 699 700 setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom); 701 702 // Codes for which we want to perform some z-specific combinations. 703 setTargetDAGCombine({ISD::ZERO_EXTEND, 704 ISD::SIGN_EXTEND, 705 ISD::SIGN_EXTEND_INREG, 706 ISD::LOAD, 707 ISD::STORE, 708 ISD::VECTOR_SHUFFLE, 709 ISD::EXTRACT_VECTOR_ELT, 710 ISD::FP_ROUND, 711 ISD::STRICT_FP_ROUND, 712 ISD::FP_EXTEND, 713 ISD::SINT_TO_FP, 714 ISD::UINT_TO_FP, 715 ISD::STRICT_FP_EXTEND, 716 ISD::BSWAP, 717 ISD::SDIV, 718 ISD::UDIV, 719 ISD::SREM, 720 ISD::UREM, 721 ISD::INTRINSIC_VOID, 722 ISD::INTRINSIC_W_CHAIN}); 723 724 // Handle intrinsics. 725 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 726 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 727 728 // We want to use MVC in preference to even a single load/store pair. 729 MaxStoresPerMemcpy = Subtarget.hasVector() ? 2 : 0; 730 MaxStoresPerMemcpyOptSize = 0; 731 732 // The main memset sequence is a byte store followed by an MVC. 733 // Two STC or MV..I stores win over that, but the kind of fused stores 734 // generated by target-independent code don't when the byte value is 735 // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better 736 // than "STC;MVC". Handle the choice in target-specific code instead. 737 MaxStoresPerMemset = Subtarget.hasVector() ? 2 : 0; 738 MaxStoresPerMemsetOptSize = 0; 739 740 // Default to having -disable-strictnode-mutation on 741 IsStrictFPEnabled = true; 742 743 if (Subtarget.isTargetzOS()) { 744 struct RTLibCallMapping { 745 RTLIB::Libcall Code; 746 const char *Name; 747 }; 748 static RTLibCallMapping RTLibCallCommon[] = { 749 #define HANDLE_LIBCALL(code, name) {RTLIB::code, name}, 750 #include "ZOSLibcallNames.def" 751 }; 752 for (auto &E : RTLibCallCommon) 753 setLibcallName(E.Code, E.Name); 754 } 755 } 756 757 bool SystemZTargetLowering::useSoftFloat() const { 758 return Subtarget.hasSoftFloat(); 759 } 760 761 EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL, 762 LLVMContext &, EVT VT) const { 763 if (!VT.isVector()) 764 return MVT::i32; 765 return VT.changeVectorElementTypeToInteger(); 766 } 767 768 bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd( 769 const MachineFunction &MF, EVT VT) const { 770 VT = VT.getScalarType(); 771 772 if (!VT.isSimple()) 773 return false; 774 775 switch (VT.getSimpleVT().SimpleTy) { 776 case MVT::f32: 777 case MVT::f64: 778 return true; 779 case MVT::f128: 780 return Subtarget.hasVectorEnhancements1(); 781 default: 782 break; 783 } 784 785 return false; 786 } 787 788 // Return true if the constant can be generated with a vector instruction, 789 // such as VGM, VGMB or VREPI. 790 bool SystemZVectorConstantInfo::isVectorConstantLegal( 791 const SystemZSubtarget &Subtarget) { 792 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 793 if (!Subtarget.hasVector() || 794 (isFP128 && !Subtarget.hasVectorEnhancements1())) 795 return false; 796 797 // Try using VECTOR GENERATE BYTE MASK. This is the architecturally- 798 // preferred way of creating all-zero and all-one vectors so give it 799 // priority over other methods below. 800 unsigned Mask = 0; 801 unsigned I = 0; 802 for (; I < SystemZ::VectorBytes; ++I) { 803 uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue(); 804 if (Byte == 0xff) 805 Mask |= 1ULL << I; 806 else if (Byte != 0) 807 break; 808 } 809 if (I == SystemZ::VectorBytes) { 810 Opcode = SystemZISD::BYTE_MASK; 811 OpVals.push_back(Mask); 812 VecVT = MVT::getVectorVT(MVT::getIntegerVT(8), 16); 813 return true; 814 } 815 816 if (SplatBitSize > 64) 817 return false; 818 819 auto tryValue = [&](uint64_t Value) -> bool { 820 // Try VECTOR REPLICATE IMMEDIATE 821 int64_t SignedValue = SignExtend64(Value, SplatBitSize); 822 if (isInt<16>(SignedValue)) { 823 OpVals.push_back(((unsigned) SignedValue)); 824 Opcode = SystemZISD::REPLICATE; 825 VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize), 826 SystemZ::VectorBits / SplatBitSize); 827 return true; 828 } 829 // Try VECTOR GENERATE MASK 830 unsigned Start, End; 831 if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) { 832 // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0 833 // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for 834 // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1). 835 OpVals.push_back(Start - (64 - SplatBitSize)); 836 OpVals.push_back(End - (64 - SplatBitSize)); 837 Opcode = SystemZISD::ROTATE_MASK; 838 VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize), 839 SystemZ::VectorBits / SplatBitSize); 840 return true; 841 } 842 return false; 843 }; 844 845 // First try assuming that any undefined bits above the highest set bit 846 // and below the lowest set bit are 1s. This increases the likelihood of 847 // being able to use a sign-extended element value in VECTOR REPLICATE 848 // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK. 849 uint64_t SplatBitsZ = SplatBits.getZExtValue(); 850 uint64_t SplatUndefZ = SplatUndef.getZExtValue(); 851 unsigned LowerBits = llvm::countr_zero(SplatBitsZ); 852 unsigned UpperBits = llvm::countl_zero(SplatBitsZ); 853 uint64_t Lower = SplatUndefZ & maskTrailingOnes<uint64_t>(LowerBits); 854 uint64_t Upper = SplatUndefZ & maskLeadingOnes<uint64_t>(UpperBits); 855 if (tryValue(SplatBitsZ | Upper | Lower)) 856 return true; 857 858 // Now try assuming that any undefined bits between the first and 859 // last defined set bits are set. This increases the chances of 860 // using a non-wraparound mask. 861 uint64_t Middle = SplatUndefZ & ~Upper & ~Lower; 862 return tryValue(SplatBitsZ | Middle); 863 } 864 865 SystemZVectorConstantInfo::SystemZVectorConstantInfo(APInt IntImm) { 866 if (IntImm.isSingleWord()) { 867 IntBits = APInt(128, IntImm.getZExtValue()); 868 IntBits <<= (SystemZ::VectorBits - IntImm.getBitWidth()); 869 } else 870 IntBits = IntImm; 871 assert(IntBits.getBitWidth() == 128 && "Unsupported APInt."); 872 873 // Find the smallest splat. 874 SplatBits = IntImm; 875 unsigned Width = SplatBits.getBitWidth(); 876 while (Width > 8) { 877 unsigned HalfSize = Width / 2; 878 APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize); 879 APInt LowValue = SplatBits.trunc(HalfSize); 880 881 // If the two halves do not match, stop here. 882 if (HighValue != LowValue || 8 > HalfSize) 883 break; 884 885 SplatBits = HighValue; 886 Width = HalfSize; 887 } 888 SplatUndef = 0; 889 SplatBitSize = Width; 890 } 891 892 SystemZVectorConstantInfo::SystemZVectorConstantInfo(BuildVectorSDNode *BVN) { 893 assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR"); 894 bool HasAnyUndefs; 895 896 // Get IntBits by finding the 128 bit splat. 897 BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128, 898 true); 899 900 // Get SplatBits by finding the 8 bit or greater splat. 901 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8, 902 true); 903 } 904 905 bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, 906 bool ForCodeSize) const { 907 // We can load zero using LZ?R and negative zero using LZ?R;LC?BR. 908 if (Imm.isZero() || Imm.isNegZero()) 909 return true; 910 911 return SystemZVectorConstantInfo(Imm).isVectorConstantLegal(Subtarget); 912 } 913 914 /// Returns true if stack probing through inline assembly is requested. 915 bool SystemZTargetLowering::hasInlineStackProbe(const MachineFunction &MF) const { 916 // If the function specifically requests inline stack probes, emit them. 917 if (MF.getFunction().hasFnAttribute("probe-stack")) 918 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() == 919 "inline-asm"; 920 return false; 921 } 922 923 TargetLowering::AtomicExpansionKind 924 SystemZTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { 925 // Don't expand subword operations as they require special treatment. 926 if (RMW->getType()->isIntegerTy(8) || RMW->getType()->isIntegerTy(16)) 927 return AtomicExpansionKind::None; 928 929 // Don't expand if there is a target instruction available. 930 if (Subtarget.hasInterlockedAccess1() && 931 (RMW->getType()->isIntegerTy(32) || RMW->getType()->isIntegerTy(64)) && 932 (RMW->getOperation() == AtomicRMWInst::BinOp::Add || 933 RMW->getOperation() == AtomicRMWInst::BinOp::Sub || 934 RMW->getOperation() == AtomicRMWInst::BinOp::And || 935 RMW->getOperation() == AtomicRMWInst::BinOp::Or || 936 RMW->getOperation() == AtomicRMWInst::BinOp::Xor)) 937 return AtomicExpansionKind::None; 938 939 return AtomicExpansionKind::CmpXChg; 940 } 941 942 bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 943 // We can use CGFI or CLGFI. 944 return isInt<32>(Imm) || isUInt<32>(Imm); 945 } 946 947 bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const { 948 // We can use ALGFI or SLGFI. 949 return isUInt<32>(Imm) || isUInt<32>(-Imm); 950 } 951 952 bool SystemZTargetLowering::allowsMisalignedMemoryAccesses( 953 EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const { 954 // Unaligned accesses should never be slower than the expanded version. 955 // We check specifically for aligned accesses in the few cases where 956 // they are required. 957 if (Fast) 958 *Fast = 1; 959 return true; 960 } 961 962 // Information about the addressing mode for a memory access. 963 struct AddressingMode { 964 // True if a long displacement is supported. 965 bool LongDisplacement; 966 967 // True if use of index register is supported. 968 bool IndexReg; 969 970 AddressingMode(bool LongDispl, bool IdxReg) : 971 LongDisplacement(LongDispl), IndexReg(IdxReg) {} 972 }; 973 974 // Return the desired addressing mode for a Load which has only one use (in 975 // the same block) which is a Store. 976 static AddressingMode getLoadStoreAddrMode(bool HasVector, 977 Type *Ty) { 978 // With vector support a Load->Store combination may be combined to either 979 // an MVC or vector operations and it seems to work best to allow the 980 // vector addressing mode. 981 if (HasVector) 982 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/); 983 984 // Otherwise only the MVC case is special. 985 bool MVC = Ty->isIntegerTy(8); 986 return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/); 987 } 988 989 // Return the addressing mode which seems most desirable given an LLVM 990 // Instruction pointer. 991 static AddressingMode 992 supportedAddressingMode(Instruction *I, bool HasVector) { 993 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 994 switch (II->getIntrinsicID()) { 995 default: break; 996 case Intrinsic::memset: 997 case Intrinsic::memmove: 998 case Intrinsic::memcpy: 999 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/); 1000 } 1001 } 1002 1003 if (isa<LoadInst>(I) && I->hasOneUse()) { 1004 auto *SingleUser = cast<Instruction>(*I->user_begin()); 1005 if (SingleUser->getParent() == I->getParent()) { 1006 if (isa<ICmpInst>(SingleUser)) { 1007 if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1))) 1008 if (C->getBitWidth() <= 64 && 1009 (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue()))) 1010 // Comparison of memory with 16 bit signed / unsigned immediate 1011 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/); 1012 } else if (isa<StoreInst>(SingleUser)) 1013 // Load->Store 1014 return getLoadStoreAddrMode(HasVector, I->getType()); 1015 } 1016 } else if (auto *StoreI = dyn_cast<StoreInst>(I)) { 1017 if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand())) 1018 if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent()) 1019 // Load->Store 1020 return getLoadStoreAddrMode(HasVector, LoadI->getType()); 1021 } 1022 1023 if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) { 1024 1025 // * Use LDE instead of LE/LEY for z13 to avoid partial register 1026 // dependencies (LDE only supports small offsets). 1027 // * Utilize the vector registers to hold floating point 1028 // values (vector load / store instructions only support small 1029 // offsets). 1030 1031 Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() : 1032 I->getOperand(0)->getType()); 1033 bool IsFPAccess = MemAccessTy->isFloatingPointTy(); 1034 bool IsVectorAccess = MemAccessTy->isVectorTy(); 1035 1036 // A store of an extracted vector element will be combined into a VSTE type 1037 // instruction. 1038 if (!IsVectorAccess && isa<StoreInst>(I)) { 1039 Value *DataOp = I->getOperand(0); 1040 if (isa<ExtractElementInst>(DataOp)) 1041 IsVectorAccess = true; 1042 } 1043 1044 // A load which gets inserted into a vector element will be combined into a 1045 // VLE type instruction. 1046 if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) { 1047 User *LoadUser = *I->user_begin(); 1048 if (isa<InsertElementInst>(LoadUser)) 1049 IsVectorAccess = true; 1050 } 1051 1052 if (IsFPAccess || IsVectorAccess) 1053 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/); 1054 } 1055 1056 return AddressingMode(true/*LongDispl*/, true/*IdxReg*/); 1057 } 1058 1059 bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL, 1060 const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const { 1061 // Punt on globals for now, although they can be used in limited 1062 // RELATIVE LONG cases. 1063 if (AM.BaseGV) 1064 return false; 1065 1066 // Require a 20-bit signed offset. 1067 if (!isInt<20>(AM.BaseOffs)) 1068 return false; 1069 1070 bool RequireD12 = Subtarget.hasVector() && Ty->isVectorTy(); 1071 AddressingMode SupportedAM(!RequireD12, true); 1072 if (I != nullptr) 1073 SupportedAM = supportedAddressingMode(I, Subtarget.hasVector()); 1074 1075 if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs)) 1076 return false; 1077 1078 if (!SupportedAM.IndexReg) 1079 // No indexing allowed. 1080 return AM.Scale == 0; 1081 else 1082 // Indexing is OK but no scale factor can be applied. 1083 return AM.Scale == 0 || AM.Scale == 1; 1084 } 1085 1086 bool SystemZTargetLowering::findOptimalMemOpLowering( 1087 std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, 1088 unsigned SrcAS, const AttributeList &FuncAttributes) const { 1089 const int MVCFastLen = 16; 1090 1091 if (Limit != ~unsigned(0)) { 1092 // Don't expand Op into scalar loads/stores in these cases: 1093 if (Op.isMemcpy() && Op.allowOverlap() && Op.size() <= MVCFastLen) 1094 return false; // Small memcpy: Use MVC 1095 if (Op.isMemset() && Op.size() - 1 <= MVCFastLen) 1096 return false; // Small memset (first byte with STC/MVI): Use MVC 1097 if (Op.isZeroMemset()) 1098 return false; // Memset zero: Use XC 1099 } 1100 1101 return TargetLowering::findOptimalMemOpLowering(MemOps, Limit, Op, DstAS, 1102 SrcAS, FuncAttributes); 1103 } 1104 1105 EVT SystemZTargetLowering::getOptimalMemOpType(const MemOp &Op, 1106 const AttributeList &FuncAttributes) const { 1107 return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other; 1108 } 1109 1110 bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const { 1111 if (!FromType->isIntegerTy() || !ToType->isIntegerTy()) 1112 return false; 1113 unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedValue(); 1114 unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedValue(); 1115 return FromBits > ToBits; 1116 } 1117 1118 bool SystemZTargetLowering::isTruncateFree(EVT FromVT, EVT ToVT) const { 1119 if (!FromVT.isInteger() || !ToVT.isInteger()) 1120 return false; 1121 unsigned FromBits = FromVT.getFixedSizeInBits(); 1122 unsigned ToBits = ToVT.getFixedSizeInBits(); 1123 return FromBits > ToBits; 1124 } 1125 1126 //===----------------------------------------------------------------------===// 1127 // Inline asm support 1128 //===----------------------------------------------------------------------===// 1129 1130 TargetLowering::ConstraintType 1131 SystemZTargetLowering::getConstraintType(StringRef Constraint) const { 1132 if (Constraint.size() == 1) { 1133 switch (Constraint[0]) { 1134 case 'a': // Address register 1135 case 'd': // Data register (equivalent to 'r') 1136 case 'f': // Floating-point register 1137 case 'h': // High-part register 1138 case 'r': // General-purpose register 1139 case 'v': // Vector register 1140 return C_RegisterClass; 1141 1142 case 'Q': // Memory with base and unsigned 12-bit displacement 1143 case 'R': // Likewise, plus an index 1144 case 'S': // Memory with base and signed 20-bit displacement 1145 case 'T': // Likewise, plus an index 1146 case 'm': // Equivalent to 'T'. 1147 return C_Memory; 1148 1149 case 'I': // Unsigned 8-bit constant 1150 case 'J': // Unsigned 12-bit constant 1151 case 'K': // Signed 16-bit constant 1152 case 'L': // Signed 20-bit displacement (on all targets we support) 1153 case 'M': // 0x7fffffff 1154 return C_Immediate; 1155 1156 default: 1157 break; 1158 } 1159 } else if (Constraint.size() == 2 && Constraint[0] == 'Z') { 1160 switch (Constraint[1]) { 1161 case 'Q': // Address with base and unsigned 12-bit displacement 1162 case 'R': // Likewise, plus an index 1163 case 'S': // Address with base and signed 20-bit displacement 1164 case 'T': // Likewise, plus an index 1165 return C_Address; 1166 1167 default: 1168 break; 1169 } 1170 } 1171 return TargetLowering::getConstraintType(Constraint); 1172 } 1173 1174 TargetLowering::ConstraintWeight SystemZTargetLowering:: 1175 getSingleConstraintMatchWeight(AsmOperandInfo &info, 1176 const char *constraint) const { 1177 ConstraintWeight weight = CW_Invalid; 1178 Value *CallOperandVal = info.CallOperandVal; 1179 // If we don't have a value, we can't do a match, 1180 // but allow it at the lowest weight. 1181 if (!CallOperandVal) 1182 return CW_Default; 1183 Type *type = CallOperandVal->getType(); 1184 // Look at the constraint type. 1185 switch (*constraint) { 1186 default: 1187 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); 1188 break; 1189 1190 case 'a': // Address register 1191 case 'd': // Data register (equivalent to 'r') 1192 case 'h': // High-part register 1193 case 'r': // General-purpose register 1194 weight = CallOperandVal->getType()->isIntegerTy() ? CW_Register : CW_Default; 1195 break; 1196 1197 case 'f': // Floating-point register 1198 if (!useSoftFloat()) 1199 weight = type->isFloatingPointTy() ? CW_Register : CW_Default; 1200 break; 1201 1202 case 'v': // Vector register 1203 if (Subtarget.hasVector()) 1204 weight = (type->isVectorTy() || type->isFloatingPointTy()) ? CW_Register 1205 : CW_Default; 1206 break; 1207 1208 case 'I': // Unsigned 8-bit constant 1209 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) 1210 if (isUInt<8>(C->getZExtValue())) 1211 weight = CW_Constant; 1212 break; 1213 1214 case 'J': // Unsigned 12-bit constant 1215 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) 1216 if (isUInt<12>(C->getZExtValue())) 1217 weight = CW_Constant; 1218 break; 1219 1220 case 'K': // Signed 16-bit constant 1221 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) 1222 if (isInt<16>(C->getSExtValue())) 1223 weight = CW_Constant; 1224 break; 1225 1226 case 'L': // Signed 20-bit displacement (on all targets we support) 1227 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) 1228 if (isInt<20>(C->getSExtValue())) 1229 weight = CW_Constant; 1230 break; 1231 1232 case 'M': // 0x7fffffff 1233 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) 1234 if (C->getZExtValue() == 0x7fffffff) 1235 weight = CW_Constant; 1236 break; 1237 } 1238 return weight; 1239 } 1240 1241 // Parse a "{tNNN}" register constraint for which the register type "t" 1242 // has already been verified. MC is the class associated with "t" and 1243 // Map maps 0-based register numbers to LLVM register numbers. 1244 static std::pair<unsigned, const TargetRegisterClass *> 1245 parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC, 1246 const unsigned *Map, unsigned Size) { 1247 assert(*(Constraint.end()-1) == '}' && "Missing '}'"); 1248 if (isdigit(Constraint[2])) { 1249 unsigned Index; 1250 bool Failed = 1251 Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index); 1252 if (!Failed && Index < Size && Map[Index]) 1253 return std::make_pair(Map[Index], RC); 1254 } 1255 return std::make_pair(0U, nullptr); 1256 } 1257 1258 std::pair<unsigned, const TargetRegisterClass *> 1259 SystemZTargetLowering::getRegForInlineAsmConstraint( 1260 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { 1261 if (Constraint.size() == 1) { 1262 // GCC Constraint Letters 1263 switch (Constraint[0]) { 1264 default: break; 1265 case 'd': // Data register (equivalent to 'r') 1266 case 'r': // General-purpose register 1267 if (VT.getSizeInBits() == 64) 1268 return std::make_pair(0U, &SystemZ::GR64BitRegClass); 1269 else if (VT.getSizeInBits() == 128) 1270 return std::make_pair(0U, &SystemZ::GR128BitRegClass); 1271 return std::make_pair(0U, &SystemZ::GR32BitRegClass); 1272 1273 case 'a': // Address register 1274 if (VT == MVT::i64) 1275 return std::make_pair(0U, &SystemZ::ADDR64BitRegClass); 1276 else if (VT == MVT::i128) 1277 return std::make_pair(0U, &SystemZ::ADDR128BitRegClass); 1278 return std::make_pair(0U, &SystemZ::ADDR32BitRegClass); 1279 1280 case 'h': // High-part register (an LLVM extension) 1281 return std::make_pair(0U, &SystemZ::GRH32BitRegClass); 1282 1283 case 'f': // Floating-point register 1284 if (!useSoftFloat()) { 1285 if (VT.getSizeInBits() == 64) 1286 return std::make_pair(0U, &SystemZ::FP64BitRegClass); 1287 else if (VT.getSizeInBits() == 128) 1288 return std::make_pair(0U, &SystemZ::FP128BitRegClass); 1289 return std::make_pair(0U, &SystemZ::FP32BitRegClass); 1290 } 1291 break; 1292 1293 case 'v': // Vector register 1294 if (Subtarget.hasVector()) { 1295 if (VT.getSizeInBits() == 32) 1296 return std::make_pair(0U, &SystemZ::VR32BitRegClass); 1297 if (VT.getSizeInBits() == 64) 1298 return std::make_pair(0U, &SystemZ::VR64BitRegClass); 1299 return std::make_pair(0U, &SystemZ::VR128BitRegClass); 1300 } 1301 break; 1302 } 1303 } 1304 if (Constraint.size() > 0 && Constraint[0] == '{') { 1305 1306 // A clobber constraint (e.g. ~{f0}) will have MVT::Other which is illegal 1307 // to check the size on. 1308 auto getVTSizeInBits = [&VT]() { 1309 return VT == MVT::Other ? 0 : VT.getSizeInBits(); 1310 }; 1311 1312 // We need to override the default register parsing for GPRs and FPRs 1313 // because the interpretation depends on VT. The internal names of 1314 // the registers are also different from the external names 1315 // (F0D and F0S instead of F0, etc.). 1316 if (Constraint[1] == 'r') { 1317 if (getVTSizeInBits() == 32) 1318 return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass, 1319 SystemZMC::GR32Regs, 16); 1320 if (getVTSizeInBits() == 128) 1321 return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass, 1322 SystemZMC::GR128Regs, 16); 1323 return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass, 1324 SystemZMC::GR64Regs, 16); 1325 } 1326 if (Constraint[1] == 'f') { 1327 if (useSoftFloat()) 1328 return std::make_pair( 1329 0u, static_cast<const TargetRegisterClass *>(nullptr)); 1330 if (getVTSizeInBits() == 32) 1331 return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass, 1332 SystemZMC::FP32Regs, 16); 1333 if (getVTSizeInBits() == 128) 1334 return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass, 1335 SystemZMC::FP128Regs, 16); 1336 return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass, 1337 SystemZMC::FP64Regs, 16); 1338 } 1339 if (Constraint[1] == 'v') { 1340 if (!Subtarget.hasVector()) 1341 return std::make_pair( 1342 0u, static_cast<const TargetRegisterClass *>(nullptr)); 1343 if (getVTSizeInBits() == 32) 1344 return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass, 1345 SystemZMC::VR32Regs, 32); 1346 if (getVTSizeInBits() == 64) 1347 return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass, 1348 SystemZMC::VR64Regs, 32); 1349 return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass, 1350 SystemZMC::VR128Regs, 32); 1351 } 1352 } 1353 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 1354 } 1355 1356 // FIXME? Maybe this could be a TableGen attribute on some registers and 1357 // this table could be generated automatically from RegInfo. 1358 Register 1359 SystemZTargetLowering::getRegisterByName(const char *RegName, LLT VT, 1360 const MachineFunction &MF) const { 1361 Register Reg = 1362 StringSwitch<Register>(RegName) 1363 .Case("r4", Subtarget.isTargetXPLINK64() ? SystemZ::R4D : 0) 1364 .Case("r15", Subtarget.isTargetELF() ? SystemZ::R15D : 0) 1365 .Default(0); 1366 1367 if (Reg) 1368 return Reg; 1369 report_fatal_error("Invalid register name global variable"); 1370 } 1371 1372 Register SystemZTargetLowering::getExceptionPointerRegister( 1373 const Constant *PersonalityFn) const { 1374 return Subtarget.isTargetXPLINK64() ? SystemZ::R1D : SystemZ::R6D; 1375 } 1376 1377 Register SystemZTargetLowering::getExceptionSelectorRegister( 1378 const Constant *PersonalityFn) const { 1379 return Subtarget.isTargetXPLINK64() ? SystemZ::R2D : SystemZ::R7D; 1380 } 1381 1382 void SystemZTargetLowering::LowerAsmOperandForConstraint( 1383 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops, 1384 SelectionDAG &DAG) const { 1385 // Only support length 1 constraints for now. 1386 if (Constraint.size() == 1) { 1387 switch (Constraint[0]) { 1388 case 'I': // Unsigned 8-bit constant 1389 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 1390 if (isUInt<8>(C->getZExtValue())) 1391 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), 1392 Op.getValueType())); 1393 return; 1394 1395 case 'J': // Unsigned 12-bit constant 1396 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 1397 if (isUInt<12>(C->getZExtValue())) 1398 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), 1399 Op.getValueType())); 1400 return; 1401 1402 case 'K': // Signed 16-bit constant 1403 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 1404 if (isInt<16>(C->getSExtValue())) 1405 Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), 1406 Op.getValueType())); 1407 return; 1408 1409 case 'L': // Signed 20-bit displacement (on all targets we support) 1410 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 1411 if (isInt<20>(C->getSExtValue())) 1412 Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), 1413 Op.getValueType())); 1414 return; 1415 1416 case 'M': // 0x7fffffff 1417 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 1418 if (C->getZExtValue() == 0x7fffffff) 1419 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), 1420 Op.getValueType())); 1421 return; 1422 } 1423 } 1424 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 1425 } 1426 1427 //===----------------------------------------------------------------------===// 1428 // Calling conventions 1429 //===----------------------------------------------------------------------===// 1430 1431 #include "SystemZGenCallingConv.inc" 1432 1433 const MCPhysReg *SystemZTargetLowering::getScratchRegisters( 1434 CallingConv::ID) const { 1435 static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D, 1436 SystemZ::R14D, 0 }; 1437 return ScratchRegs; 1438 } 1439 1440 bool SystemZTargetLowering::allowTruncateForTailCall(Type *FromType, 1441 Type *ToType) const { 1442 return isTruncateFree(FromType, ToType); 1443 } 1444 1445 bool SystemZTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { 1446 return CI->isTailCall(); 1447 } 1448 1449 // Value is a value that has been passed to us in the location described by VA 1450 // (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining 1451 // any loads onto Chain. 1452 static SDValue convertLocVTToValVT(SelectionDAG &DAG, const SDLoc &DL, 1453 CCValAssign &VA, SDValue Chain, 1454 SDValue Value) { 1455 // If the argument has been promoted from a smaller type, insert an 1456 // assertion to capture this. 1457 if (VA.getLocInfo() == CCValAssign::SExt) 1458 Value = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Value, 1459 DAG.getValueType(VA.getValVT())); 1460 else if (VA.getLocInfo() == CCValAssign::ZExt) 1461 Value = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Value, 1462 DAG.getValueType(VA.getValVT())); 1463 1464 if (VA.isExtInLoc()) 1465 Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value); 1466 else if (VA.getLocInfo() == CCValAssign::BCvt) { 1467 // If this is a short vector argument loaded from the stack, 1468 // extend from i64 to full vector size and then bitcast. 1469 assert(VA.getLocVT() == MVT::i64); 1470 assert(VA.getValVT().isVector()); 1471 Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)}); 1472 Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value); 1473 } else 1474 assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo"); 1475 return Value; 1476 } 1477 1478 // Value is a value of type VA.getValVT() that we need to copy into 1479 // the location described by VA. Return a copy of Value converted to 1480 // VA.getValVT(). The caller is responsible for handling indirect values. 1481 static SDValue convertValVTToLocVT(SelectionDAG &DAG, const SDLoc &DL, 1482 CCValAssign &VA, SDValue Value) { 1483 switch (VA.getLocInfo()) { 1484 case CCValAssign::SExt: 1485 return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value); 1486 case CCValAssign::ZExt: 1487 return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value); 1488 case CCValAssign::AExt: 1489 return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value); 1490 case CCValAssign::BCvt: { 1491 assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128); 1492 assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f32 || 1493 VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::f128); 1494 // For an f32 vararg we need to first promote it to an f64 and then 1495 // bitcast it to an i64. 1496 if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i64) 1497 Value = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, Value); 1498 MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64 1499 ? MVT::v2i64 1500 : VA.getLocVT(); 1501 Value = DAG.getNode(ISD::BITCAST, DL, BitCastToType, Value); 1502 // For ELF, this is a short vector argument to be stored to the stack, 1503 // bitcast to v2i64 and then extract first element. 1504 if (BitCastToType == MVT::v2i64) 1505 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value, 1506 DAG.getConstant(0, DL, MVT::i32)); 1507 return Value; 1508 } 1509 case CCValAssign::Full: 1510 return Value; 1511 default: 1512 llvm_unreachable("Unhandled getLocInfo()"); 1513 } 1514 } 1515 1516 static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In) { 1517 SDLoc DL(In); 1518 SDValue Lo, Hi; 1519 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) { 1520 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, In); 1521 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, 1522 DAG.getNode(ISD::SRL, DL, MVT::i128, In, 1523 DAG.getConstant(64, DL, MVT::i32))); 1524 } else { 1525 std::tie(Lo, Hi) = DAG.SplitScalar(In, DL, MVT::i64, MVT::i64); 1526 } 1527 1528 SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL, 1529 MVT::Untyped, Hi, Lo); 1530 return SDValue(Pair, 0); 1531 } 1532 1533 static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In) { 1534 SDLoc DL(In); 1535 SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64, 1536 DL, MVT::i64, In); 1537 SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64, 1538 DL, MVT::i64, In); 1539 1540 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) { 1541 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Lo); 1542 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Hi); 1543 Hi = DAG.getNode(ISD::SHL, DL, MVT::i128, Hi, 1544 DAG.getConstant(64, DL, MVT::i32)); 1545 return DAG.getNode(ISD::OR, DL, MVT::i128, Lo, Hi); 1546 } else { 1547 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi); 1548 } 1549 } 1550 1551 bool SystemZTargetLowering::splitValueIntoRegisterParts( 1552 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, 1553 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const { 1554 EVT ValueVT = Val.getValueType(); 1555 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) { 1556 // Inline assembly operand. 1557 Parts[0] = lowerI128ToGR128(DAG, DAG.getBitcast(MVT::i128, Val)); 1558 return true; 1559 } 1560 1561 return false; 1562 } 1563 1564 SDValue SystemZTargetLowering::joinRegisterPartsIntoValue( 1565 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, 1566 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const { 1567 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) { 1568 // Inline assembly operand. 1569 SDValue Res = lowerGR128ToI128(DAG, Parts[0]); 1570 return DAG.getBitcast(ValueVT, Res); 1571 } 1572 1573 return SDValue(); 1574 } 1575 1576 SDValue SystemZTargetLowering::LowerFormalArguments( 1577 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 1578 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 1579 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 1580 MachineFunction &MF = DAG.getMachineFunction(); 1581 MachineFrameInfo &MFI = MF.getFrameInfo(); 1582 MachineRegisterInfo &MRI = MF.getRegInfo(); 1583 SystemZMachineFunctionInfo *FuncInfo = 1584 MF.getInfo<SystemZMachineFunctionInfo>(); 1585 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>(); 1586 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 1587 1588 // Assign locations to all of the incoming arguments. 1589 SmallVector<CCValAssign, 16> ArgLocs; 1590 SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 1591 CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ); 1592 FuncInfo->setSizeOfFnParams(CCInfo.getStackSize()); 1593 1594 unsigned NumFixedGPRs = 0; 1595 unsigned NumFixedFPRs = 0; 1596 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { 1597 SDValue ArgValue; 1598 CCValAssign &VA = ArgLocs[I]; 1599 EVT LocVT = VA.getLocVT(); 1600 if (VA.isRegLoc()) { 1601 // Arguments passed in registers 1602 const TargetRegisterClass *RC; 1603 switch (LocVT.getSimpleVT().SimpleTy) { 1604 default: 1605 // Integers smaller than i64 should be promoted to i64. 1606 llvm_unreachable("Unexpected argument type"); 1607 case MVT::i32: 1608 NumFixedGPRs += 1; 1609 RC = &SystemZ::GR32BitRegClass; 1610 break; 1611 case MVT::i64: 1612 NumFixedGPRs += 1; 1613 RC = &SystemZ::GR64BitRegClass; 1614 break; 1615 case MVT::f32: 1616 NumFixedFPRs += 1; 1617 RC = &SystemZ::FP32BitRegClass; 1618 break; 1619 case MVT::f64: 1620 NumFixedFPRs += 1; 1621 RC = &SystemZ::FP64BitRegClass; 1622 break; 1623 case MVT::f128: 1624 NumFixedFPRs += 2; 1625 RC = &SystemZ::FP128BitRegClass; 1626 break; 1627 case MVT::v16i8: 1628 case MVT::v8i16: 1629 case MVT::v4i32: 1630 case MVT::v2i64: 1631 case MVT::v4f32: 1632 case MVT::v2f64: 1633 RC = &SystemZ::VR128BitRegClass; 1634 break; 1635 } 1636 1637 Register VReg = MRI.createVirtualRegister(RC); 1638 MRI.addLiveIn(VA.getLocReg(), VReg); 1639 ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); 1640 } else { 1641 assert(VA.isMemLoc() && "Argument not register or memory"); 1642 1643 // Create the frame index object for this incoming parameter. 1644 // FIXME: Pre-include call frame size in the offset, should not 1645 // need to manually add it here. 1646 int64_t ArgSPOffset = VA.getLocMemOffset(); 1647 if (Subtarget.isTargetXPLINK64()) { 1648 auto &XPRegs = 1649 Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>(); 1650 ArgSPOffset += XPRegs.getCallFrameSize(); 1651 } 1652 int FI = 1653 MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, ArgSPOffset, true); 1654 1655 // Create the SelectionDAG nodes corresponding to a load 1656 // from this parameter. Unpromoted ints and floats are 1657 // passed as right-justified 8-byte values. 1658 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 1659 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32) 1660 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, 1661 DAG.getIntPtrConstant(4, DL)); 1662 ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN, 1663 MachinePointerInfo::getFixedStack(MF, FI)); 1664 } 1665 1666 // Convert the value of the argument register into the value that's 1667 // being passed. 1668 if (VA.getLocInfo() == CCValAssign::Indirect) { 1669 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, 1670 MachinePointerInfo())); 1671 // If the original argument was split (e.g. i128), we need 1672 // to load all parts of it here (using the same address). 1673 unsigned ArgIndex = Ins[I].OrigArgIndex; 1674 assert (Ins[I].PartOffset == 0); 1675 while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) { 1676 CCValAssign &PartVA = ArgLocs[I + 1]; 1677 unsigned PartOffset = Ins[I + 1].PartOffset; 1678 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, 1679 DAG.getIntPtrConstant(PartOffset, DL)); 1680 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, 1681 MachinePointerInfo())); 1682 ++I; 1683 } 1684 } else 1685 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue)); 1686 } 1687 1688 if (IsVarArg && Subtarget.isTargetXPLINK64()) { 1689 // Save the number of non-varargs registers for later use by va_start, etc. 1690 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs); 1691 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs); 1692 1693 auto *Regs = static_cast<SystemZXPLINK64Registers *>( 1694 Subtarget.getSpecialRegisters()); 1695 1696 // Likewise the address (in the form of a frame index) of where the 1697 // first stack vararg would be. The 1-byte size here is arbitrary. 1698 // FIXME: Pre-include call frame size in the offset, should not 1699 // need to manually add it here. 1700 int64_t VarArgOffset = CCInfo.getStackSize() + Regs->getCallFrameSize(); 1701 int FI = MFI.CreateFixedObject(1, VarArgOffset, true); 1702 FuncInfo->setVarArgsFrameIndex(FI); 1703 } 1704 1705 if (IsVarArg && Subtarget.isTargetELF()) { 1706 // Save the number of non-varargs registers for later use by va_start, etc. 1707 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs); 1708 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs); 1709 1710 // Likewise the address (in the form of a frame index) of where the 1711 // first stack vararg would be. The 1-byte size here is arbitrary. 1712 int64_t VarArgsOffset = CCInfo.getStackSize(); 1713 FuncInfo->setVarArgsFrameIndex( 1714 MFI.CreateFixedObject(1, VarArgsOffset, true)); 1715 1716 // ...and a similar frame index for the caller-allocated save area 1717 // that will be used to store the incoming registers. 1718 int64_t RegSaveOffset = 1719 -SystemZMC::ELFCallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16; 1720 unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true); 1721 FuncInfo->setRegSaveFrameIndex(RegSaveIndex); 1722 1723 // Store the FPR varargs in the reserved frame slots. (We store the 1724 // GPRs as part of the prologue.) 1725 if (NumFixedFPRs < SystemZ::ELFNumArgFPRs && !useSoftFloat()) { 1726 SDValue MemOps[SystemZ::ELFNumArgFPRs]; 1727 for (unsigned I = NumFixedFPRs; I < SystemZ::ELFNumArgFPRs; ++I) { 1728 unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ELFArgFPRs[I]); 1729 int FI = 1730 MFI.CreateFixedObject(8, -SystemZMC::ELFCallFrameSize + Offset, true); 1731 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 1732 Register VReg = MF.addLiveIn(SystemZ::ELFArgFPRs[I], 1733 &SystemZ::FP64BitRegClass); 1734 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64); 1735 MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN, 1736 MachinePointerInfo::getFixedStack(MF, FI)); 1737 } 1738 // Join the stores, which are independent of one another. 1739 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, 1740 ArrayRef(&MemOps[NumFixedFPRs], 1741 SystemZ::ELFNumArgFPRs - NumFixedFPRs)); 1742 } 1743 } 1744 1745 if (Subtarget.isTargetXPLINK64()) { 1746 // Create virual register for handling incoming "ADA" special register (R5) 1747 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass; 1748 Register ADAvReg = MRI.createVirtualRegister(RC); 1749 auto *Regs = static_cast<SystemZXPLINK64Registers *>( 1750 Subtarget.getSpecialRegisters()); 1751 MRI.addLiveIn(Regs->getADARegister(), ADAvReg); 1752 FuncInfo->setADAVirtualRegister(ADAvReg); 1753 } 1754 return Chain; 1755 } 1756 1757 static bool canUseSiblingCall(const CCState &ArgCCInfo, 1758 SmallVectorImpl<CCValAssign> &ArgLocs, 1759 SmallVectorImpl<ISD::OutputArg> &Outs) { 1760 // Punt if there are any indirect or stack arguments, or if the call 1761 // needs the callee-saved argument register R6, or if the call uses 1762 // the callee-saved register arguments SwiftSelf and SwiftError. 1763 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { 1764 CCValAssign &VA = ArgLocs[I]; 1765 if (VA.getLocInfo() == CCValAssign::Indirect) 1766 return false; 1767 if (!VA.isRegLoc()) 1768 return false; 1769 Register Reg = VA.getLocReg(); 1770 if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D) 1771 return false; 1772 if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError()) 1773 return false; 1774 } 1775 return true; 1776 } 1777 1778 static SDValue getADAEntry(SelectionDAG &DAG, SDValue Val, SDLoc DL, 1779 unsigned Offset, bool LoadAdr = false) { 1780 MachineFunction &MF = DAG.getMachineFunction(); 1781 SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>(); 1782 unsigned ADAvReg = MFI->getADAVirtualRegister(); 1783 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); 1784 1785 SDValue Reg = DAG.getRegister(ADAvReg, PtrVT); 1786 SDValue Ofs = DAG.getTargetConstant(Offset, DL, PtrVT); 1787 1788 SDValue Result = DAG.getNode(SystemZISD::ADA_ENTRY, DL, PtrVT, Val, Reg, Ofs); 1789 if (!LoadAdr) 1790 Result = DAG.getLoad( 1791 PtrVT, DL, DAG.getEntryNode(), Result, MachinePointerInfo(), Align(8), 1792 MachineMemOperand::MODereferenceable | MachineMemOperand::MOInvariant); 1793 1794 return Result; 1795 } 1796 1797 // ADA access using Global value 1798 // Note: for functions, address of descriptor is returned 1799 static SDValue getADAEntry(SelectionDAG &DAG, const GlobalValue *GV, SDLoc DL, 1800 EVT PtrVT) { 1801 unsigned ADAtype; 1802 bool LoadAddr = false; 1803 const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV); 1804 bool IsFunction = 1805 (isa<Function>(GV)) || (GA && isa<Function>(GA->getAliaseeObject())); 1806 bool IsInternal = (GV->hasInternalLinkage() || GV->hasPrivateLinkage()); 1807 1808 if (IsFunction) { 1809 if (IsInternal) { 1810 ADAtype = SystemZII::MO_ADA_DIRECT_FUNC_DESC; 1811 LoadAddr = true; 1812 } else 1813 ADAtype = SystemZII::MO_ADA_INDIRECT_FUNC_DESC; 1814 } else { 1815 ADAtype = SystemZII::MO_ADA_DATA_SYMBOL_ADDR; 1816 } 1817 SDValue Val = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ADAtype); 1818 1819 return getADAEntry(DAG, Val, DL, 0, LoadAddr); 1820 } 1821 1822 static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA, 1823 SDLoc &DL, SDValue &Chain) { 1824 unsigned ADADelta = 0; // ADA offset in desc. 1825 unsigned EPADelta = 8; // EPA offset in desc. 1826 MachineFunction &MF = DAG.getMachineFunction(); 1827 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); 1828 1829 // XPLink calling convention. 1830 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1831 bool IsInternal = (G->getGlobal()->hasInternalLinkage() || 1832 G->getGlobal()->hasPrivateLinkage()); 1833 if (IsInternal) { 1834 SystemZMachineFunctionInfo *MFI = 1835 MF.getInfo<SystemZMachineFunctionInfo>(); 1836 unsigned ADAvReg = MFI->getADAVirtualRegister(); 1837 ADA = DAG.getCopyFromReg(Chain, DL, ADAvReg, PtrVT); 1838 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT); 1839 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee); 1840 return true; 1841 } else { 1842 SDValue GA = DAG.getTargetGlobalAddress( 1843 G->getGlobal(), DL, PtrVT, 0, SystemZII::MO_ADA_DIRECT_FUNC_DESC); 1844 ADA = getADAEntry(DAG, GA, DL, ADADelta); 1845 Callee = getADAEntry(DAG, GA, DL, EPADelta); 1846 } 1847 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) { 1848 SDValue ES = DAG.getTargetExternalSymbol( 1849 E->getSymbol(), PtrVT, SystemZII::MO_ADA_DIRECT_FUNC_DESC); 1850 ADA = getADAEntry(DAG, ES, DL, ADADelta); 1851 Callee = getADAEntry(DAG, ES, DL, EPADelta); 1852 } else { 1853 // Function pointer case 1854 ADA = DAG.getNode(ISD::ADD, DL, PtrVT, Callee, 1855 DAG.getConstant(ADADelta, DL, PtrVT)); 1856 ADA = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), ADA, 1857 MachinePointerInfo::getGOT(DAG.getMachineFunction())); 1858 Callee = DAG.getNode(ISD::ADD, DL, PtrVT, Callee, 1859 DAG.getConstant(EPADelta, DL, PtrVT)); 1860 Callee = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Callee, 1861 MachinePointerInfo::getGOT(DAG.getMachineFunction())); 1862 } 1863 return false; 1864 } 1865 1866 SDValue 1867 SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, 1868 SmallVectorImpl<SDValue> &InVals) const { 1869 SelectionDAG &DAG = CLI.DAG; 1870 SDLoc &DL = CLI.DL; 1871 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 1872 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 1873 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 1874 SDValue Chain = CLI.Chain; 1875 SDValue Callee = CLI.Callee; 1876 bool &IsTailCall = CLI.IsTailCall; 1877 CallingConv::ID CallConv = CLI.CallConv; 1878 bool IsVarArg = CLI.IsVarArg; 1879 MachineFunction &MF = DAG.getMachineFunction(); 1880 EVT PtrVT = getPointerTy(MF.getDataLayout()); 1881 LLVMContext &Ctx = *DAG.getContext(); 1882 SystemZCallingConventionRegisters *Regs = Subtarget.getSpecialRegisters(); 1883 1884 // FIXME: z/OS support to be added in later. 1885 if (Subtarget.isTargetXPLINK64()) 1886 IsTailCall = false; 1887 1888 // Analyze the operands of the call, assigning locations to each operand. 1889 SmallVector<CCValAssign, 16> ArgLocs; 1890 SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx); 1891 ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ); 1892 1893 // We don't support GuaranteedTailCallOpt, only automatically-detected 1894 // sibling calls. 1895 if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs)) 1896 IsTailCall = false; 1897 1898 // Get a count of how many bytes are to be pushed on the stack. 1899 unsigned NumBytes = ArgCCInfo.getStackSize(); 1900 1901 // Mark the start of the call. 1902 if (!IsTailCall) 1903 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL); 1904 1905 // Copy argument values to their designated locations. 1906 SmallVector<std::pair<unsigned, SDValue>, 9> RegsToPass; 1907 SmallVector<SDValue, 8> MemOpChains; 1908 SDValue StackPtr; 1909 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { 1910 CCValAssign &VA = ArgLocs[I]; 1911 SDValue ArgValue = OutVals[I]; 1912 1913 if (VA.getLocInfo() == CCValAssign::Indirect) { 1914 // Store the argument in a stack slot and pass its address. 1915 unsigned ArgIndex = Outs[I].OrigArgIndex; 1916 EVT SlotVT; 1917 if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) { 1918 // Allocate the full stack space for a promoted (and split) argument. 1919 Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty; 1920 EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType); 1921 MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT); 1922 unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT); 1923 SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N); 1924 } else { 1925 SlotVT = Outs[I].ArgVT; 1926 } 1927 SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT); 1928 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); 1929 MemOpChains.push_back( 1930 DAG.getStore(Chain, DL, ArgValue, SpillSlot, 1931 MachinePointerInfo::getFixedStack(MF, FI))); 1932 // If the original argument was split (e.g. i128), we need 1933 // to store all parts of it here (and pass just one address). 1934 assert (Outs[I].PartOffset == 0); 1935 while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) { 1936 SDValue PartValue = OutVals[I + 1]; 1937 unsigned PartOffset = Outs[I + 1].PartOffset; 1938 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, 1939 DAG.getIntPtrConstant(PartOffset, DL)); 1940 MemOpChains.push_back( 1941 DAG.getStore(Chain, DL, PartValue, Address, 1942 MachinePointerInfo::getFixedStack(MF, FI))); 1943 assert((PartOffset + PartValue.getValueType().getStoreSize() <= 1944 SlotVT.getStoreSize()) && "Not enough space for argument part!"); 1945 ++I; 1946 } 1947 ArgValue = SpillSlot; 1948 } else 1949 ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue); 1950 1951 if (VA.isRegLoc()) { 1952 // In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a 1953 // MVT::i128 type. We decompose the 128-bit type to a pair of its high 1954 // and low values. 1955 if (VA.getLocVT() == MVT::i128) 1956 ArgValue = lowerI128ToGR128(DAG, ArgValue); 1957 // Queue up the argument copies and emit them at the end. 1958 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); 1959 } else { 1960 assert(VA.isMemLoc() && "Argument not register or memory"); 1961 1962 // Work out the address of the stack slot. Unpromoted ints and 1963 // floats are passed as right-justified 8-byte values. 1964 if (!StackPtr.getNode()) 1965 StackPtr = DAG.getCopyFromReg(Chain, DL, 1966 Regs->getStackPointerRegister(), PtrVT); 1967 unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() + 1968 VA.getLocMemOffset(); 1969 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32) 1970 Offset += 4; 1971 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, 1972 DAG.getIntPtrConstant(Offset, DL)); 1973 1974 // Emit the store. 1975 MemOpChains.push_back( 1976 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); 1977 1978 // Although long doubles or vectors are passed through the stack when 1979 // they are vararg (non-fixed arguments), if a long double or vector 1980 // occupies the third and fourth slot of the argument list GPR3 should 1981 // still shadow the third slot of the argument list. 1982 if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) { 1983 SDValue ShadowArgValue = 1984 DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, ArgValue, 1985 DAG.getIntPtrConstant(1, DL)); 1986 RegsToPass.push_back(std::make_pair(SystemZ::R3D, ShadowArgValue)); 1987 } 1988 } 1989 } 1990 1991 // Join the stores, which are independent of one another. 1992 if (!MemOpChains.empty()) 1993 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); 1994 1995 // Accept direct calls by converting symbolic call addresses to the 1996 // associated Target* opcodes. Force %r1 to be used for indirect 1997 // tail calls. 1998 SDValue Glue; 1999 2000 if (Subtarget.isTargetXPLINK64()) { 2001 SDValue ADA; 2002 bool IsBRASL = getzOSCalleeAndADA(DAG, Callee, ADA, DL, Chain); 2003 if (!IsBRASL) { 2004 unsigned CalleeReg = static_cast<SystemZXPLINK64Registers *>(Regs) 2005 ->getAddressOfCalleeRegister(); 2006 Chain = DAG.getCopyToReg(Chain, DL, CalleeReg, Callee, Glue); 2007 Glue = Chain.getValue(1); 2008 Callee = DAG.getRegister(CalleeReg, Callee.getValueType()); 2009 } 2010 RegsToPass.push_back(std::make_pair( 2011 static_cast<SystemZXPLINK64Registers *>(Regs)->getADARegister(), ADA)); 2012 } else { 2013 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 2014 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT); 2015 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee); 2016 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) { 2017 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT); 2018 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee); 2019 } else if (IsTailCall) { 2020 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue); 2021 Glue = Chain.getValue(1); 2022 Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType()); 2023 } 2024 } 2025 2026 // Build a sequence of copy-to-reg nodes, chained and glued together. 2027 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) { 2028 Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first, 2029 RegsToPass[I].second, Glue); 2030 Glue = Chain.getValue(1); 2031 } 2032 2033 // The first call operand is the chain and the second is the target address. 2034 SmallVector<SDValue, 8> Ops; 2035 Ops.push_back(Chain); 2036 Ops.push_back(Callee); 2037 2038 // Add argument registers to the end of the list so that they are 2039 // known live into the call. 2040 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) 2041 Ops.push_back(DAG.getRegister(RegsToPass[I].first, 2042 RegsToPass[I].second.getValueType())); 2043 2044 // Add a register mask operand representing the call-preserved registers. 2045 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 2046 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); 2047 assert(Mask && "Missing call preserved mask for calling convention"); 2048 Ops.push_back(DAG.getRegisterMask(Mask)); 2049 2050 // Glue the call to the argument copies, if any. 2051 if (Glue.getNode()) 2052 Ops.push_back(Glue); 2053 2054 // Emit the call. 2055 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 2056 if (IsTailCall) { 2057 SDValue Ret = DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops); 2058 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge); 2059 return Ret; 2060 } 2061 Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops); 2062 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); 2063 Glue = Chain.getValue(1); 2064 2065 // Mark the end of the call, which is glued to the call itself. 2066 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL); 2067 Glue = Chain.getValue(1); 2068 2069 // Assign locations to each value returned by this call. 2070 SmallVector<CCValAssign, 16> RetLocs; 2071 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx); 2072 RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ); 2073 2074 // Copy all of the result registers out of their specified physreg. 2075 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) { 2076 CCValAssign &VA = RetLocs[I]; 2077 2078 // Copy the value out, gluing the copy to the end of the call sequence. 2079 SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), 2080 VA.getLocVT(), Glue); 2081 Chain = RetValue.getValue(1); 2082 Glue = RetValue.getValue(2); 2083 2084 // Convert the value of the return register into the value that's 2085 // being returned. 2086 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue)); 2087 } 2088 2089 return Chain; 2090 } 2091 2092 // Generate a call taking the given operands as arguments and returning a 2093 // result of type RetVT. 2094 std::pair<SDValue, SDValue> SystemZTargetLowering::makeExternalCall( 2095 SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT, 2096 ArrayRef<SDValue> Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL, 2097 bool DoesNotReturn, bool IsReturnValueUsed) const { 2098 TargetLowering::ArgListTy Args; 2099 Args.reserve(Ops.size()); 2100 2101 TargetLowering::ArgListEntry Entry; 2102 for (SDValue Op : Ops) { 2103 Entry.Node = Op; 2104 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); 2105 Entry.IsSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned); 2106 Entry.IsZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned); 2107 Args.push_back(Entry); 2108 } 2109 2110 SDValue Callee = 2111 DAG.getExternalSymbol(CalleeName, getPointerTy(DAG.getDataLayout())); 2112 2113 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); 2114 TargetLowering::CallLoweringInfo CLI(DAG); 2115 bool SignExtend = shouldSignExtendTypeInLibCall(RetVT, IsSigned); 2116 CLI.setDebugLoc(DL) 2117 .setChain(Chain) 2118 .setCallee(CallConv, RetTy, Callee, std::move(Args)) 2119 .setNoReturn(DoesNotReturn) 2120 .setDiscardResult(!IsReturnValueUsed) 2121 .setSExtResult(SignExtend) 2122 .setZExtResult(!SignExtend); 2123 return LowerCallTo(CLI); 2124 } 2125 2126 bool SystemZTargetLowering:: 2127 CanLowerReturn(CallingConv::ID CallConv, 2128 MachineFunction &MF, bool isVarArg, 2129 const SmallVectorImpl<ISD::OutputArg> &Outs, 2130 LLVMContext &Context) const { 2131 // Special case that we cannot easily detect in RetCC_SystemZ since 2132 // i128 may not be a legal type. 2133 for (auto &Out : Outs) 2134 if (Out.ArgVT == MVT::i128) 2135 return false; 2136 2137 SmallVector<CCValAssign, 16> RetLocs; 2138 CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context); 2139 return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ); 2140 } 2141 2142 SDValue 2143 SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 2144 bool IsVarArg, 2145 const SmallVectorImpl<ISD::OutputArg> &Outs, 2146 const SmallVectorImpl<SDValue> &OutVals, 2147 const SDLoc &DL, SelectionDAG &DAG) const { 2148 MachineFunction &MF = DAG.getMachineFunction(); 2149 2150 // Assign locations to each returned value. 2151 SmallVector<CCValAssign, 16> RetLocs; 2152 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext()); 2153 RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ); 2154 2155 // Quick exit for void returns 2156 if (RetLocs.empty()) 2157 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, Chain); 2158 2159 if (CallConv == CallingConv::GHC) 2160 report_fatal_error("GHC functions return void only"); 2161 2162 // Copy the result values into the output registers. 2163 SDValue Glue; 2164 SmallVector<SDValue, 4> RetOps; 2165 RetOps.push_back(Chain); 2166 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) { 2167 CCValAssign &VA = RetLocs[I]; 2168 SDValue RetValue = OutVals[I]; 2169 2170 // Make the return register live on exit. 2171 assert(VA.isRegLoc() && "Can only return in registers!"); 2172 2173 // Promote the value as required. 2174 RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue); 2175 2176 // Chain and glue the copies together. 2177 Register Reg = VA.getLocReg(); 2178 Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue); 2179 Glue = Chain.getValue(1); 2180 RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT())); 2181 } 2182 2183 // Update chain and glue. 2184 RetOps[0] = Chain; 2185 if (Glue.getNode()) 2186 RetOps.push_back(Glue); 2187 2188 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, RetOps); 2189 } 2190 2191 // Return true if Op is an intrinsic node with chain that returns the CC value 2192 // as its only (other) argument. Provide the associated SystemZISD opcode and 2193 // the mask of valid CC values if so. 2194 static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, 2195 unsigned &CCValid) { 2196 unsigned Id = Op.getConstantOperandVal(1); 2197 switch (Id) { 2198 case Intrinsic::s390_tbegin: 2199 Opcode = SystemZISD::TBEGIN; 2200 CCValid = SystemZ::CCMASK_TBEGIN; 2201 return true; 2202 2203 case Intrinsic::s390_tbegin_nofloat: 2204 Opcode = SystemZISD::TBEGIN_NOFLOAT; 2205 CCValid = SystemZ::CCMASK_TBEGIN; 2206 return true; 2207 2208 case Intrinsic::s390_tend: 2209 Opcode = SystemZISD::TEND; 2210 CCValid = SystemZ::CCMASK_TEND; 2211 return true; 2212 2213 default: 2214 return false; 2215 } 2216 } 2217 2218 // Return true if Op is an intrinsic node without chain that returns the 2219 // CC value as its final argument. Provide the associated SystemZISD 2220 // opcode and the mask of valid CC values if so. 2221 static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) { 2222 unsigned Id = Op.getConstantOperandVal(0); 2223 switch (Id) { 2224 case Intrinsic::s390_vpkshs: 2225 case Intrinsic::s390_vpksfs: 2226 case Intrinsic::s390_vpksgs: 2227 Opcode = SystemZISD::PACKS_CC; 2228 CCValid = SystemZ::CCMASK_VCMP; 2229 return true; 2230 2231 case Intrinsic::s390_vpklshs: 2232 case Intrinsic::s390_vpklsfs: 2233 case Intrinsic::s390_vpklsgs: 2234 Opcode = SystemZISD::PACKLS_CC; 2235 CCValid = SystemZ::CCMASK_VCMP; 2236 return true; 2237 2238 case Intrinsic::s390_vceqbs: 2239 case Intrinsic::s390_vceqhs: 2240 case Intrinsic::s390_vceqfs: 2241 case Intrinsic::s390_vceqgs: 2242 Opcode = SystemZISD::VICMPES; 2243 CCValid = SystemZ::CCMASK_VCMP; 2244 return true; 2245 2246 case Intrinsic::s390_vchbs: 2247 case Intrinsic::s390_vchhs: 2248 case Intrinsic::s390_vchfs: 2249 case Intrinsic::s390_vchgs: 2250 Opcode = SystemZISD::VICMPHS; 2251 CCValid = SystemZ::CCMASK_VCMP; 2252 return true; 2253 2254 case Intrinsic::s390_vchlbs: 2255 case Intrinsic::s390_vchlhs: 2256 case Intrinsic::s390_vchlfs: 2257 case Intrinsic::s390_vchlgs: 2258 Opcode = SystemZISD::VICMPHLS; 2259 CCValid = SystemZ::CCMASK_VCMP; 2260 return true; 2261 2262 case Intrinsic::s390_vtm: 2263 Opcode = SystemZISD::VTM; 2264 CCValid = SystemZ::CCMASK_VCMP; 2265 return true; 2266 2267 case Intrinsic::s390_vfaebs: 2268 case Intrinsic::s390_vfaehs: 2269 case Intrinsic::s390_vfaefs: 2270 Opcode = SystemZISD::VFAE_CC; 2271 CCValid = SystemZ::CCMASK_ANY; 2272 return true; 2273 2274 case Intrinsic::s390_vfaezbs: 2275 case Intrinsic::s390_vfaezhs: 2276 case Intrinsic::s390_vfaezfs: 2277 Opcode = SystemZISD::VFAEZ_CC; 2278 CCValid = SystemZ::CCMASK_ANY; 2279 return true; 2280 2281 case Intrinsic::s390_vfeebs: 2282 case Intrinsic::s390_vfeehs: 2283 case Intrinsic::s390_vfeefs: 2284 Opcode = SystemZISD::VFEE_CC; 2285 CCValid = SystemZ::CCMASK_ANY; 2286 return true; 2287 2288 case Intrinsic::s390_vfeezbs: 2289 case Intrinsic::s390_vfeezhs: 2290 case Intrinsic::s390_vfeezfs: 2291 Opcode = SystemZISD::VFEEZ_CC; 2292 CCValid = SystemZ::CCMASK_ANY; 2293 return true; 2294 2295 case Intrinsic::s390_vfenebs: 2296 case Intrinsic::s390_vfenehs: 2297 case Intrinsic::s390_vfenefs: 2298 Opcode = SystemZISD::VFENE_CC; 2299 CCValid = SystemZ::CCMASK_ANY; 2300 return true; 2301 2302 case Intrinsic::s390_vfenezbs: 2303 case Intrinsic::s390_vfenezhs: 2304 case Intrinsic::s390_vfenezfs: 2305 Opcode = SystemZISD::VFENEZ_CC; 2306 CCValid = SystemZ::CCMASK_ANY; 2307 return true; 2308 2309 case Intrinsic::s390_vistrbs: 2310 case Intrinsic::s390_vistrhs: 2311 case Intrinsic::s390_vistrfs: 2312 Opcode = SystemZISD::VISTR_CC; 2313 CCValid = SystemZ::CCMASK_0 | SystemZ::CCMASK_3; 2314 return true; 2315 2316 case Intrinsic::s390_vstrcbs: 2317 case Intrinsic::s390_vstrchs: 2318 case Intrinsic::s390_vstrcfs: 2319 Opcode = SystemZISD::VSTRC_CC; 2320 CCValid = SystemZ::CCMASK_ANY; 2321 return true; 2322 2323 case Intrinsic::s390_vstrczbs: 2324 case Intrinsic::s390_vstrczhs: 2325 case Intrinsic::s390_vstrczfs: 2326 Opcode = SystemZISD::VSTRCZ_CC; 2327 CCValid = SystemZ::CCMASK_ANY; 2328 return true; 2329 2330 case Intrinsic::s390_vstrsb: 2331 case Intrinsic::s390_vstrsh: 2332 case Intrinsic::s390_vstrsf: 2333 Opcode = SystemZISD::VSTRS_CC; 2334 CCValid = SystemZ::CCMASK_ANY; 2335 return true; 2336 2337 case Intrinsic::s390_vstrszb: 2338 case Intrinsic::s390_vstrszh: 2339 case Intrinsic::s390_vstrszf: 2340 Opcode = SystemZISD::VSTRSZ_CC; 2341 CCValid = SystemZ::CCMASK_ANY; 2342 return true; 2343 2344 case Intrinsic::s390_vfcedbs: 2345 case Intrinsic::s390_vfcesbs: 2346 Opcode = SystemZISD::VFCMPES; 2347 CCValid = SystemZ::CCMASK_VCMP; 2348 return true; 2349 2350 case Intrinsic::s390_vfchdbs: 2351 case Intrinsic::s390_vfchsbs: 2352 Opcode = SystemZISD::VFCMPHS; 2353 CCValid = SystemZ::CCMASK_VCMP; 2354 return true; 2355 2356 case Intrinsic::s390_vfchedbs: 2357 case Intrinsic::s390_vfchesbs: 2358 Opcode = SystemZISD::VFCMPHES; 2359 CCValid = SystemZ::CCMASK_VCMP; 2360 return true; 2361 2362 case Intrinsic::s390_vftcidb: 2363 case Intrinsic::s390_vftcisb: 2364 Opcode = SystemZISD::VFTCI; 2365 CCValid = SystemZ::CCMASK_VCMP; 2366 return true; 2367 2368 case Intrinsic::s390_tdc: 2369 Opcode = SystemZISD::TDC; 2370 CCValid = SystemZ::CCMASK_TDC; 2371 return true; 2372 2373 default: 2374 return false; 2375 } 2376 } 2377 2378 // Emit an intrinsic with chain and an explicit CC register result. 2379 static SDNode *emitIntrinsicWithCCAndChain(SelectionDAG &DAG, SDValue Op, 2380 unsigned Opcode) { 2381 // Copy all operands except the intrinsic ID. 2382 unsigned NumOps = Op.getNumOperands(); 2383 SmallVector<SDValue, 6> Ops; 2384 Ops.reserve(NumOps - 1); 2385 Ops.push_back(Op.getOperand(0)); 2386 for (unsigned I = 2; I < NumOps; ++I) 2387 Ops.push_back(Op.getOperand(I)); 2388 2389 assert(Op->getNumValues() == 2 && "Expected only CC result and chain"); 2390 SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other); 2391 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops); 2392 SDValue OldChain = SDValue(Op.getNode(), 1); 2393 SDValue NewChain = SDValue(Intr.getNode(), 1); 2394 DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain); 2395 return Intr.getNode(); 2396 } 2397 2398 // Emit an intrinsic with an explicit CC register result. 2399 static SDNode *emitIntrinsicWithCC(SelectionDAG &DAG, SDValue Op, 2400 unsigned Opcode) { 2401 // Copy all operands except the intrinsic ID. 2402 unsigned NumOps = Op.getNumOperands(); 2403 SmallVector<SDValue, 6> Ops; 2404 Ops.reserve(NumOps - 1); 2405 for (unsigned I = 1; I < NumOps; ++I) 2406 Ops.push_back(Op.getOperand(I)); 2407 2408 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), Op->getVTList(), Ops); 2409 return Intr.getNode(); 2410 } 2411 2412 // CC is a comparison that will be implemented using an integer or 2413 // floating-point comparison. Return the condition code mask for 2414 // a branch on true. In the integer case, CCMASK_CMP_UO is set for 2415 // unsigned comparisons and clear for signed ones. In the floating-point 2416 // case, CCMASK_CMP_UO has its normal mask meaning (unordered). 2417 static unsigned CCMaskForCondCode(ISD::CondCode CC) { 2418 #define CONV(X) \ 2419 case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \ 2420 case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \ 2421 case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X 2422 2423 switch (CC) { 2424 default: 2425 llvm_unreachable("Invalid integer condition!"); 2426 2427 CONV(EQ); 2428 CONV(NE); 2429 CONV(GT); 2430 CONV(GE); 2431 CONV(LT); 2432 CONV(LE); 2433 2434 case ISD::SETO: return SystemZ::CCMASK_CMP_O; 2435 case ISD::SETUO: return SystemZ::CCMASK_CMP_UO; 2436 } 2437 #undef CONV 2438 } 2439 2440 // If C can be converted to a comparison against zero, adjust the operands 2441 // as necessary. 2442 static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) { 2443 if (C.ICmpType == SystemZICMP::UnsignedOnly) 2444 return; 2445 2446 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode()); 2447 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64) 2448 return; 2449 2450 int64_t Value = ConstOp1->getSExtValue(); 2451 if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) || 2452 (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) || 2453 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) || 2454 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) { 2455 C.CCMask ^= SystemZ::CCMASK_CMP_EQ; 2456 C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType()); 2457 } 2458 } 2459 2460 // If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI, 2461 // adjust the operands as necessary. 2462 static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL, 2463 Comparison &C) { 2464 // For us to make any changes, it must a comparison between a single-use 2465 // load and a constant. 2466 if (!C.Op0.hasOneUse() || 2467 C.Op0.getOpcode() != ISD::LOAD || 2468 C.Op1.getOpcode() != ISD::Constant) 2469 return; 2470 2471 // We must have an 8- or 16-bit load. 2472 auto *Load = cast<LoadSDNode>(C.Op0); 2473 unsigned NumBits = Load->getMemoryVT().getSizeInBits(); 2474 if ((NumBits != 8 && NumBits != 16) || 2475 NumBits != Load->getMemoryVT().getStoreSizeInBits()) 2476 return; 2477 2478 // The load must be an extending one and the constant must be within the 2479 // range of the unextended value. 2480 auto *ConstOp1 = cast<ConstantSDNode>(C.Op1); 2481 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64) 2482 return; 2483 uint64_t Value = ConstOp1->getZExtValue(); 2484 uint64_t Mask = (1 << NumBits) - 1; 2485 if (Load->getExtensionType() == ISD::SEXTLOAD) { 2486 // Make sure that ConstOp1 is in range of C.Op0. 2487 int64_t SignedValue = ConstOp1->getSExtValue(); 2488 if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask) 2489 return; 2490 if (C.ICmpType != SystemZICMP::SignedOnly) { 2491 // Unsigned comparison between two sign-extended values is equivalent 2492 // to unsigned comparison between two zero-extended values. 2493 Value &= Mask; 2494 } else if (NumBits == 8) { 2495 // Try to treat the comparison as unsigned, so that we can use CLI. 2496 // Adjust CCMask and Value as necessary. 2497 if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT) 2498 // Test whether the high bit of the byte is set. 2499 Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT; 2500 else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE) 2501 // Test whether the high bit of the byte is clear. 2502 Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT; 2503 else 2504 // No instruction exists for this combination. 2505 return; 2506 C.ICmpType = SystemZICMP::UnsignedOnly; 2507 } 2508 } else if (Load->getExtensionType() == ISD::ZEXTLOAD) { 2509 if (Value > Mask) 2510 return; 2511 // If the constant is in range, we can use any comparison. 2512 C.ICmpType = SystemZICMP::Any; 2513 } else 2514 return; 2515 2516 // Make sure that the first operand is an i32 of the right extension type. 2517 ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ? 2518 ISD::SEXTLOAD : 2519 ISD::ZEXTLOAD); 2520 if (C.Op0.getValueType() != MVT::i32 || 2521 Load->getExtensionType() != ExtType) { 2522 C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(), 2523 Load->getBasePtr(), Load->getPointerInfo(), 2524 Load->getMemoryVT(), Load->getAlign(), 2525 Load->getMemOperand()->getFlags()); 2526 // Update the chain uses. 2527 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1)); 2528 } 2529 2530 // Make sure that the second operand is an i32 with the right value. 2531 if (C.Op1.getValueType() != MVT::i32 || 2532 Value != ConstOp1->getZExtValue()) 2533 C.Op1 = DAG.getConstant(Value, DL, MVT::i32); 2534 } 2535 2536 // Return true if Op is either an unextended load, or a load suitable 2537 // for integer register-memory comparisons of type ICmpType. 2538 static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) { 2539 auto *Load = dyn_cast<LoadSDNode>(Op.getNode()); 2540 if (Load) { 2541 // There are no instructions to compare a register with a memory byte. 2542 if (Load->getMemoryVT() == MVT::i8) 2543 return false; 2544 // Otherwise decide on extension type. 2545 switch (Load->getExtensionType()) { 2546 case ISD::NON_EXTLOAD: 2547 return true; 2548 case ISD::SEXTLOAD: 2549 return ICmpType != SystemZICMP::UnsignedOnly; 2550 case ISD::ZEXTLOAD: 2551 return ICmpType != SystemZICMP::SignedOnly; 2552 default: 2553 break; 2554 } 2555 } 2556 return false; 2557 } 2558 2559 // Return true if it is better to swap the operands of C. 2560 static bool shouldSwapCmpOperands(const Comparison &C) { 2561 // Leave i128 and f128 comparisons alone, since they have no memory forms. 2562 if (C.Op0.getValueType() == MVT::i128) 2563 return false; 2564 if (C.Op0.getValueType() == MVT::f128) 2565 return false; 2566 2567 // Always keep a floating-point constant second, since comparisons with 2568 // zero can use LOAD TEST and comparisons with other constants make a 2569 // natural memory operand. 2570 if (isa<ConstantFPSDNode>(C.Op1)) 2571 return false; 2572 2573 // Never swap comparisons with zero since there are many ways to optimize 2574 // those later. 2575 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1); 2576 if (ConstOp1 && ConstOp1->getZExtValue() == 0) 2577 return false; 2578 2579 // Also keep natural memory operands second if the loaded value is 2580 // only used here. Several comparisons have memory forms. 2581 if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse()) 2582 return false; 2583 2584 // Look for cases where Cmp0 is a single-use load and Cmp1 isn't. 2585 // In that case we generally prefer the memory to be second. 2586 if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) { 2587 // The only exceptions are when the second operand is a constant and 2588 // we can use things like CHHSI. 2589 if (!ConstOp1) 2590 return true; 2591 // The unsigned memory-immediate instructions can handle 16-bit 2592 // unsigned integers. 2593 if (C.ICmpType != SystemZICMP::SignedOnly && 2594 isUInt<16>(ConstOp1->getZExtValue())) 2595 return false; 2596 // The signed memory-immediate instructions can handle 16-bit 2597 // signed integers. 2598 if (C.ICmpType != SystemZICMP::UnsignedOnly && 2599 isInt<16>(ConstOp1->getSExtValue())) 2600 return false; 2601 return true; 2602 } 2603 2604 // Try to promote the use of CGFR and CLGFR. 2605 unsigned Opcode0 = C.Op0.getOpcode(); 2606 if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND) 2607 return true; 2608 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND) 2609 return true; 2610 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::AND && 2611 C.Op0.getOperand(1).getOpcode() == ISD::Constant && 2612 C.Op0.getConstantOperandVal(1) == 0xffffffff) 2613 return true; 2614 2615 return false; 2616 } 2617 2618 // Check whether C tests for equality between X and Y and whether X - Y 2619 // or Y - X is also computed. In that case it's better to compare the 2620 // result of the subtraction against zero. 2621 static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL, 2622 Comparison &C) { 2623 if (C.CCMask == SystemZ::CCMASK_CMP_EQ || 2624 C.CCMask == SystemZ::CCMASK_CMP_NE) { 2625 for (SDNode *N : C.Op0->uses()) { 2626 if (N->getOpcode() == ISD::SUB && 2627 ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) || 2628 (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) { 2629 // Disable the nsw and nuw flags: the backend needs to handle 2630 // overflow as well during comparison elimination. 2631 SDNodeFlags Flags = N->getFlags(); 2632 Flags.setNoSignedWrap(false); 2633 Flags.setNoUnsignedWrap(false); 2634 N->setFlags(Flags); 2635 C.Op0 = SDValue(N, 0); 2636 C.Op1 = DAG.getConstant(0, DL, N->getValueType(0)); 2637 return; 2638 } 2639 } 2640 } 2641 } 2642 2643 // Check whether C compares a floating-point value with zero and if that 2644 // floating-point value is also negated. In this case we can use the 2645 // negation to set CC, so avoiding separate LOAD AND TEST and 2646 // LOAD (NEGATIVE/COMPLEMENT) instructions. 2647 static void adjustForFNeg(Comparison &C) { 2648 // This optimization is invalid for strict comparisons, since FNEG 2649 // does not raise any exceptions. 2650 if (C.Chain) 2651 return; 2652 auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1); 2653 if (C1 && C1->isZero()) { 2654 for (SDNode *N : C.Op0->uses()) { 2655 if (N->getOpcode() == ISD::FNEG) { 2656 C.Op0 = SDValue(N, 0); 2657 C.CCMask = SystemZ::reverseCCMask(C.CCMask); 2658 return; 2659 } 2660 } 2661 } 2662 } 2663 2664 // Check whether C compares (shl X, 32) with 0 and whether X is 2665 // also sign-extended. In that case it is better to test the result 2666 // of the sign extension using LTGFR. 2667 // 2668 // This case is important because InstCombine transforms a comparison 2669 // with (sext (trunc X)) into a comparison with (shl X, 32). 2670 static void adjustForLTGFR(Comparison &C) { 2671 // Check for a comparison between (shl X, 32) and 0. 2672 if (C.Op0.getOpcode() == ISD::SHL && C.Op0.getValueType() == MVT::i64 && 2673 C.Op1.getOpcode() == ISD::Constant && C.Op1->getAsZExtVal() == 0) { 2674 auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1)); 2675 if (C1 && C1->getZExtValue() == 32) { 2676 SDValue ShlOp0 = C.Op0.getOperand(0); 2677 // See whether X has any SIGN_EXTEND_INREG uses. 2678 for (SDNode *N : ShlOp0->uses()) { 2679 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG && 2680 cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) { 2681 C.Op0 = SDValue(N, 0); 2682 return; 2683 } 2684 } 2685 } 2686 } 2687 } 2688 2689 // If C compares the truncation of an extending load, try to compare 2690 // the untruncated value instead. This exposes more opportunities to 2691 // reuse CC. 2692 static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL, 2693 Comparison &C) { 2694 if (C.Op0.getOpcode() == ISD::TRUNCATE && 2695 C.Op0.getOperand(0).getOpcode() == ISD::LOAD && 2696 C.Op1.getOpcode() == ISD::Constant && 2697 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 && 2698 C.Op1->getAsZExtVal() == 0) { 2699 auto *L = cast<LoadSDNode>(C.Op0.getOperand(0)); 2700 if (L->getMemoryVT().getStoreSizeInBits().getFixedValue() <= 2701 C.Op0.getValueSizeInBits().getFixedValue()) { 2702 unsigned Type = L->getExtensionType(); 2703 if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) || 2704 (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) { 2705 C.Op0 = C.Op0.getOperand(0); 2706 C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType()); 2707 } 2708 } 2709 } 2710 } 2711 2712 // Return true if shift operation N has an in-range constant shift value. 2713 // Store it in ShiftVal if so. 2714 static bool isSimpleShift(SDValue N, unsigned &ShiftVal) { 2715 auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1)); 2716 if (!Shift) 2717 return false; 2718 2719 uint64_t Amount = Shift->getZExtValue(); 2720 if (Amount >= N.getValueSizeInBits()) 2721 return false; 2722 2723 ShiftVal = Amount; 2724 return true; 2725 } 2726 2727 // Check whether an AND with Mask is suitable for a TEST UNDER MASK 2728 // instruction and whether the CC value is descriptive enough to handle 2729 // a comparison of type Opcode between the AND result and CmpVal. 2730 // CCMask says which comparison result is being tested and BitSize is 2731 // the number of bits in the operands. If TEST UNDER MASK can be used, 2732 // return the corresponding CC mask, otherwise return 0. 2733 static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, 2734 uint64_t Mask, uint64_t CmpVal, 2735 unsigned ICmpType) { 2736 assert(Mask != 0 && "ANDs with zero should have been removed by now"); 2737 2738 // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL. 2739 if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) && 2740 !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask)) 2741 return 0; 2742 2743 // Work out the masks for the lowest and highest bits. 2744 uint64_t High = llvm::bit_floor(Mask); 2745 uint64_t Low = uint64_t(1) << llvm::countr_zero(Mask); 2746 2747 // Signed ordered comparisons are effectively unsigned if the sign 2748 // bit is dropped. 2749 bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly); 2750 2751 // Check for equality comparisons with 0, or the equivalent. 2752 if (CmpVal == 0) { 2753 if (CCMask == SystemZ::CCMASK_CMP_EQ) 2754 return SystemZ::CCMASK_TM_ALL_0; 2755 if (CCMask == SystemZ::CCMASK_CMP_NE) 2756 return SystemZ::CCMASK_TM_SOME_1; 2757 } 2758 if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) { 2759 if (CCMask == SystemZ::CCMASK_CMP_LT) 2760 return SystemZ::CCMASK_TM_ALL_0; 2761 if (CCMask == SystemZ::CCMASK_CMP_GE) 2762 return SystemZ::CCMASK_TM_SOME_1; 2763 } 2764 if (EffectivelyUnsigned && CmpVal < Low) { 2765 if (CCMask == SystemZ::CCMASK_CMP_LE) 2766 return SystemZ::CCMASK_TM_ALL_0; 2767 if (CCMask == SystemZ::CCMASK_CMP_GT) 2768 return SystemZ::CCMASK_TM_SOME_1; 2769 } 2770 2771 // Check for equality comparisons with the mask, or the equivalent. 2772 if (CmpVal == Mask) { 2773 if (CCMask == SystemZ::CCMASK_CMP_EQ) 2774 return SystemZ::CCMASK_TM_ALL_1; 2775 if (CCMask == SystemZ::CCMASK_CMP_NE) 2776 return SystemZ::CCMASK_TM_SOME_0; 2777 } 2778 if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) { 2779 if (CCMask == SystemZ::CCMASK_CMP_GT) 2780 return SystemZ::CCMASK_TM_ALL_1; 2781 if (CCMask == SystemZ::CCMASK_CMP_LE) 2782 return SystemZ::CCMASK_TM_SOME_0; 2783 } 2784 if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) { 2785 if (CCMask == SystemZ::CCMASK_CMP_GE) 2786 return SystemZ::CCMASK_TM_ALL_1; 2787 if (CCMask == SystemZ::CCMASK_CMP_LT) 2788 return SystemZ::CCMASK_TM_SOME_0; 2789 } 2790 2791 // Check for ordered comparisons with the top bit. 2792 if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) { 2793 if (CCMask == SystemZ::CCMASK_CMP_LE) 2794 return SystemZ::CCMASK_TM_MSB_0; 2795 if (CCMask == SystemZ::CCMASK_CMP_GT) 2796 return SystemZ::CCMASK_TM_MSB_1; 2797 } 2798 if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) { 2799 if (CCMask == SystemZ::CCMASK_CMP_LT) 2800 return SystemZ::CCMASK_TM_MSB_0; 2801 if (CCMask == SystemZ::CCMASK_CMP_GE) 2802 return SystemZ::CCMASK_TM_MSB_1; 2803 } 2804 2805 // If there are just two bits, we can do equality checks for Low and High 2806 // as well. 2807 if (Mask == Low + High) { 2808 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low) 2809 return SystemZ::CCMASK_TM_MIXED_MSB_0; 2810 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low) 2811 return SystemZ::CCMASK_TM_MIXED_MSB_0 ^ SystemZ::CCMASK_ANY; 2812 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High) 2813 return SystemZ::CCMASK_TM_MIXED_MSB_1; 2814 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High) 2815 return SystemZ::CCMASK_TM_MIXED_MSB_1 ^ SystemZ::CCMASK_ANY; 2816 } 2817 2818 // Looks like we've exhausted our options. 2819 return 0; 2820 } 2821 2822 // See whether C can be implemented as a TEST UNDER MASK instruction. 2823 // Update the arguments with the TM version if so. 2824 static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL, 2825 Comparison &C) { 2826 // Use VECTOR TEST UNDER MASK for i128 operations. 2827 if (C.Op0.getValueType() == MVT::i128) { 2828 // We can use VTM for EQ/NE comparisons of x & y against 0. 2829 if (C.Op0.getOpcode() == ISD::AND && 2830 (C.CCMask == SystemZ::CCMASK_CMP_EQ || 2831 C.CCMask == SystemZ::CCMASK_CMP_NE)) { 2832 auto *Mask = dyn_cast<ConstantSDNode>(C.Op1); 2833 if (Mask && Mask->getAPIntValue() == 0) { 2834 C.Opcode = SystemZISD::VTM; 2835 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(1)); 2836 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(0)); 2837 C.CCValid = SystemZ::CCMASK_VCMP; 2838 if (C.CCMask == SystemZ::CCMASK_CMP_EQ) 2839 C.CCMask = SystemZ::CCMASK_VCMP_ALL; 2840 else 2841 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid; 2842 } 2843 } 2844 return; 2845 } 2846 2847 // Check that we have a comparison with a constant. 2848 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1); 2849 if (!ConstOp1) 2850 return; 2851 uint64_t CmpVal = ConstOp1->getZExtValue(); 2852 2853 // Check whether the nonconstant input is an AND with a constant mask. 2854 Comparison NewC(C); 2855 uint64_t MaskVal; 2856 ConstantSDNode *Mask = nullptr; 2857 if (C.Op0.getOpcode() == ISD::AND) { 2858 NewC.Op0 = C.Op0.getOperand(0); 2859 NewC.Op1 = C.Op0.getOperand(1); 2860 Mask = dyn_cast<ConstantSDNode>(NewC.Op1); 2861 if (!Mask) 2862 return; 2863 MaskVal = Mask->getZExtValue(); 2864 } else { 2865 // There is no instruction to compare with a 64-bit immediate 2866 // so use TMHH instead if possible. We need an unsigned ordered 2867 // comparison with an i64 immediate. 2868 if (NewC.Op0.getValueType() != MVT::i64 || 2869 NewC.CCMask == SystemZ::CCMASK_CMP_EQ || 2870 NewC.CCMask == SystemZ::CCMASK_CMP_NE || 2871 NewC.ICmpType == SystemZICMP::SignedOnly) 2872 return; 2873 // Convert LE and GT comparisons into LT and GE. 2874 if (NewC.CCMask == SystemZ::CCMASK_CMP_LE || 2875 NewC.CCMask == SystemZ::CCMASK_CMP_GT) { 2876 if (CmpVal == uint64_t(-1)) 2877 return; 2878 CmpVal += 1; 2879 NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ; 2880 } 2881 // If the low N bits of Op1 are zero than the low N bits of Op0 can 2882 // be masked off without changing the result. 2883 MaskVal = -(CmpVal & -CmpVal); 2884 NewC.ICmpType = SystemZICMP::UnsignedOnly; 2885 } 2886 if (!MaskVal) 2887 return; 2888 2889 // Check whether the combination of mask, comparison value and comparison 2890 // type are suitable. 2891 unsigned BitSize = NewC.Op0.getValueSizeInBits(); 2892 unsigned NewCCMask, ShiftVal; 2893 if (NewC.ICmpType != SystemZICMP::SignedOnly && 2894 NewC.Op0.getOpcode() == ISD::SHL && 2895 isSimpleShift(NewC.Op0, ShiftVal) && 2896 (MaskVal >> ShiftVal != 0) && 2897 ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal && 2898 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, 2899 MaskVal >> ShiftVal, 2900 CmpVal >> ShiftVal, 2901 SystemZICMP::Any))) { 2902 NewC.Op0 = NewC.Op0.getOperand(0); 2903 MaskVal >>= ShiftVal; 2904 } else if (NewC.ICmpType != SystemZICMP::SignedOnly && 2905 NewC.Op0.getOpcode() == ISD::SRL && 2906 isSimpleShift(NewC.Op0, ShiftVal) && 2907 (MaskVal << ShiftVal != 0) && 2908 ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal && 2909 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, 2910 MaskVal << ShiftVal, 2911 CmpVal << ShiftVal, 2912 SystemZICMP::UnsignedOnly))) { 2913 NewC.Op0 = NewC.Op0.getOperand(0); 2914 MaskVal <<= ShiftVal; 2915 } else { 2916 NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal, 2917 NewC.ICmpType); 2918 if (!NewCCMask) 2919 return; 2920 } 2921 2922 // Go ahead and make the change. 2923 C.Opcode = SystemZISD::TM; 2924 C.Op0 = NewC.Op0; 2925 if (Mask && Mask->getZExtValue() == MaskVal) 2926 C.Op1 = SDValue(Mask, 0); 2927 else 2928 C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType()); 2929 C.CCValid = SystemZ::CCMASK_TM; 2930 C.CCMask = NewCCMask; 2931 } 2932 2933 // Implement i128 comparison in vector registers. 2934 static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL, 2935 Comparison &C) { 2936 if (C.Opcode != SystemZISD::ICMP) 2937 return; 2938 if (C.Op0.getValueType() != MVT::i128) 2939 return; 2940 2941 // (In-)Equality comparisons can be implemented via VCEQGS. 2942 if (C.CCMask == SystemZ::CCMASK_CMP_EQ || 2943 C.CCMask == SystemZ::CCMASK_CMP_NE) { 2944 C.Opcode = SystemZISD::VICMPES; 2945 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op0); 2946 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op1); 2947 C.CCValid = SystemZ::CCMASK_VCMP; 2948 if (C.CCMask == SystemZ::CCMASK_CMP_EQ) 2949 C.CCMask = SystemZ::CCMASK_VCMP_ALL; 2950 else 2951 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid; 2952 return; 2953 } 2954 2955 // Normalize other comparisons to GT. 2956 bool Swap = false, Invert = false; 2957 switch (C.CCMask) { 2958 case SystemZ::CCMASK_CMP_GT: break; 2959 case SystemZ::CCMASK_CMP_LT: Swap = true; break; 2960 case SystemZ::CCMASK_CMP_LE: Invert = true; break; 2961 case SystemZ::CCMASK_CMP_GE: Swap = Invert = true; break; 2962 default: llvm_unreachable("Invalid integer condition!"); 2963 } 2964 if (Swap) 2965 std::swap(C.Op0, C.Op1); 2966 2967 if (C.ICmpType == SystemZICMP::UnsignedOnly) 2968 C.Opcode = SystemZISD::UCMP128HI; 2969 else 2970 C.Opcode = SystemZISD::SCMP128HI; 2971 C.CCValid = SystemZ::CCMASK_ANY; 2972 C.CCMask = SystemZ::CCMASK_1; 2973 2974 if (Invert) 2975 C.CCMask ^= C.CCValid; 2976 } 2977 2978 // See whether the comparison argument contains a redundant AND 2979 // and remove it if so. This sometimes happens due to the generic 2980 // BRCOND expansion. 2981 static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL, 2982 Comparison &C) { 2983 if (C.Op0.getOpcode() != ISD::AND) 2984 return; 2985 auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1)); 2986 if (!Mask || Mask->getValueSizeInBits(0) > 64) 2987 return; 2988 KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0)); 2989 if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue()) 2990 return; 2991 2992 C.Op0 = C.Op0.getOperand(0); 2993 } 2994 2995 // Return a Comparison that tests the condition-code result of intrinsic 2996 // node Call against constant integer CC using comparison code Cond. 2997 // Opcode is the opcode of the SystemZISD operation for the intrinsic 2998 // and CCValid is the set of possible condition-code results. 2999 static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, 3000 SDValue Call, unsigned CCValid, uint64_t CC, 3001 ISD::CondCode Cond) { 3002 Comparison C(Call, SDValue(), SDValue()); 3003 C.Opcode = Opcode; 3004 C.CCValid = CCValid; 3005 if (Cond == ISD::SETEQ) 3006 // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3. 3007 C.CCMask = CC < 4 ? 1 << (3 - CC) : 0; 3008 else if (Cond == ISD::SETNE) 3009 // ...and the inverse of that. 3010 C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1; 3011 else if (Cond == ISD::SETLT || Cond == ISD::SETULT) 3012 // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3, 3013 // always true for CC>3. 3014 C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1; 3015 else if (Cond == ISD::SETGE || Cond == ISD::SETUGE) 3016 // ...and the inverse of that. 3017 C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0; 3018 else if (Cond == ISD::SETLE || Cond == ISD::SETULE) 3019 // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true), 3020 // always true for CC>3. 3021 C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1; 3022 else if (Cond == ISD::SETGT || Cond == ISD::SETUGT) 3023 // ...and the inverse of that. 3024 C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0; 3025 else 3026 llvm_unreachable("Unexpected integer comparison type"); 3027 C.CCMask &= CCValid; 3028 return C; 3029 } 3030 3031 // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1. 3032 static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, 3033 ISD::CondCode Cond, const SDLoc &DL, 3034 SDValue Chain = SDValue(), 3035 bool IsSignaling = false) { 3036 if (CmpOp1.getOpcode() == ISD::Constant) { 3037 assert(!Chain); 3038 unsigned Opcode, CCValid; 3039 if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN && 3040 CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) && 3041 isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid)) 3042 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, 3043 CmpOp1->getAsZExtVal(), Cond); 3044 if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN && 3045 CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 && 3046 isIntrinsicWithCC(CmpOp0, Opcode, CCValid)) 3047 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, 3048 CmpOp1->getAsZExtVal(), Cond); 3049 } 3050 Comparison C(CmpOp0, CmpOp1, Chain); 3051 C.CCMask = CCMaskForCondCode(Cond); 3052 if (C.Op0.getValueType().isFloatingPoint()) { 3053 C.CCValid = SystemZ::CCMASK_FCMP; 3054 if (!C.Chain) 3055 C.Opcode = SystemZISD::FCMP; 3056 else if (!IsSignaling) 3057 C.Opcode = SystemZISD::STRICT_FCMP; 3058 else 3059 C.Opcode = SystemZISD::STRICT_FCMPS; 3060 adjustForFNeg(C); 3061 } else { 3062 assert(!C.Chain); 3063 C.CCValid = SystemZ::CCMASK_ICMP; 3064 C.Opcode = SystemZISD::ICMP; 3065 // Choose the type of comparison. Equality and inequality tests can 3066 // use either signed or unsigned comparisons. The choice also doesn't 3067 // matter if both sign bits are known to be clear. In those cases we 3068 // want to give the main isel code the freedom to choose whichever 3069 // form fits best. 3070 if (C.CCMask == SystemZ::CCMASK_CMP_EQ || 3071 C.CCMask == SystemZ::CCMASK_CMP_NE || 3072 (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1))) 3073 C.ICmpType = SystemZICMP::Any; 3074 else if (C.CCMask & SystemZ::CCMASK_CMP_UO) 3075 C.ICmpType = SystemZICMP::UnsignedOnly; 3076 else 3077 C.ICmpType = SystemZICMP::SignedOnly; 3078 C.CCMask &= ~SystemZ::CCMASK_CMP_UO; 3079 adjustForRedundantAnd(DAG, DL, C); 3080 adjustZeroCmp(DAG, DL, C); 3081 adjustSubwordCmp(DAG, DL, C); 3082 adjustForSubtraction(DAG, DL, C); 3083 adjustForLTGFR(C); 3084 adjustICmpTruncate(DAG, DL, C); 3085 } 3086 3087 if (shouldSwapCmpOperands(C)) { 3088 std::swap(C.Op0, C.Op1); 3089 C.CCMask = SystemZ::reverseCCMask(C.CCMask); 3090 } 3091 3092 adjustForTestUnderMask(DAG, DL, C); 3093 adjustICmp128(DAG, DL, C); 3094 return C; 3095 } 3096 3097 // Emit the comparison instruction described by C. 3098 static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) { 3099 if (!C.Op1.getNode()) { 3100 SDNode *Node; 3101 switch (C.Op0.getOpcode()) { 3102 case ISD::INTRINSIC_W_CHAIN: 3103 Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode); 3104 return SDValue(Node, 0); 3105 case ISD::INTRINSIC_WO_CHAIN: 3106 Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode); 3107 return SDValue(Node, Node->getNumValues() - 1); 3108 default: 3109 llvm_unreachable("Invalid comparison operands"); 3110 } 3111 } 3112 if (C.Opcode == SystemZISD::ICMP) 3113 return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1, 3114 DAG.getTargetConstant(C.ICmpType, DL, MVT::i32)); 3115 if (C.Opcode == SystemZISD::TM) { 3116 bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) != 3117 bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1)); 3118 return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1, 3119 DAG.getTargetConstant(RegisterOnly, DL, MVT::i32)); 3120 } 3121 if (C.Opcode == SystemZISD::VICMPES) { 3122 SDVTList VTs = DAG.getVTList(C.Op0.getValueType(), MVT::i32); 3123 SDValue Val = DAG.getNode(C.Opcode, DL, VTs, C.Op0, C.Op1); 3124 return SDValue(Val.getNode(), 1); 3125 } 3126 if (C.Chain) { 3127 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); 3128 return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1); 3129 } 3130 return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1); 3131 } 3132 3133 // Implement a 32-bit *MUL_LOHI operation by extending both operands to 3134 // 64 bits. Extend is the extension type to use. Store the high part 3135 // in Hi and the low part in Lo. 3136 static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend, 3137 SDValue Op0, SDValue Op1, SDValue &Hi, 3138 SDValue &Lo) { 3139 Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0); 3140 Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1); 3141 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1); 3142 Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul, 3143 DAG.getConstant(32, DL, MVT::i64)); 3144 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi); 3145 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul); 3146 } 3147 3148 // Lower a binary operation that produces two VT results, one in each 3149 // half of a GR128 pair. Op0 and Op1 are the VT operands to the operation, 3150 // and Opcode performs the GR128 operation. Store the even register result 3151 // in Even and the odd register result in Odd. 3152 static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT, 3153 unsigned Opcode, SDValue Op0, SDValue Op1, 3154 SDValue &Even, SDValue &Odd) { 3155 SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1); 3156 bool Is32Bit = is32Bit(VT); 3157 Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result); 3158 Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result); 3159 } 3160 3161 // Return an i32 value that is 1 if the CC value produced by CCReg is 3162 // in the mask CCMask and 0 otherwise. CC is known to have a value 3163 // in CCValid, so other values can be ignored. 3164 static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg, 3165 unsigned CCValid, unsigned CCMask) { 3166 SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32), 3167 DAG.getConstant(0, DL, MVT::i32), 3168 DAG.getTargetConstant(CCValid, DL, MVT::i32), 3169 DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg}; 3170 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops); 3171 } 3172 3173 // Return the SystemISD vector comparison operation for CC, or 0 if it cannot 3174 // be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP 3175 // for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet) 3176 // floating-point comparisons, and CmpMode::SignalingFP for strict signaling 3177 // floating-point comparisons. 3178 enum class CmpMode { Int, FP, StrictFP, SignalingFP }; 3179 static unsigned getVectorComparison(ISD::CondCode CC, CmpMode Mode) { 3180 switch (CC) { 3181 case ISD::SETOEQ: 3182 case ISD::SETEQ: 3183 switch (Mode) { 3184 case CmpMode::Int: return SystemZISD::VICMPE; 3185 case CmpMode::FP: return SystemZISD::VFCMPE; 3186 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPE; 3187 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPES; 3188 } 3189 llvm_unreachable("Bad mode"); 3190 3191 case ISD::SETOGE: 3192 case ISD::SETGE: 3193 switch (Mode) { 3194 case CmpMode::Int: return 0; 3195 case CmpMode::FP: return SystemZISD::VFCMPHE; 3196 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPHE; 3197 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHES; 3198 } 3199 llvm_unreachable("Bad mode"); 3200 3201 case ISD::SETOGT: 3202 case ISD::SETGT: 3203 switch (Mode) { 3204 case CmpMode::Int: return SystemZISD::VICMPH; 3205 case CmpMode::FP: return SystemZISD::VFCMPH; 3206 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPH; 3207 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHS; 3208 } 3209 llvm_unreachable("Bad mode"); 3210 3211 case ISD::SETUGT: 3212 switch (Mode) { 3213 case CmpMode::Int: return SystemZISD::VICMPHL; 3214 case CmpMode::FP: return 0; 3215 case CmpMode::StrictFP: return 0; 3216 case CmpMode::SignalingFP: return 0; 3217 } 3218 llvm_unreachable("Bad mode"); 3219 3220 default: 3221 return 0; 3222 } 3223 } 3224 3225 // Return the SystemZISD vector comparison operation for CC or its inverse, 3226 // or 0 if neither can be done directly. Indicate in Invert whether the 3227 // result is for the inverse of CC. Mode is as above. 3228 static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, CmpMode Mode, 3229 bool &Invert) { 3230 if (unsigned Opcode = getVectorComparison(CC, Mode)) { 3231 Invert = false; 3232 return Opcode; 3233 } 3234 3235 CC = ISD::getSetCCInverse(CC, Mode == CmpMode::Int ? MVT::i32 : MVT::f32); 3236 if (unsigned Opcode = getVectorComparison(CC, Mode)) { 3237 Invert = true; 3238 return Opcode; 3239 } 3240 3241 return 0; 3242 } 3243 3244 // Return a v2f64 that contains the extended form of elements Start and Start+1 3245 // of v4f32 value Op. If Chain is nonnull, return the strict form. 3246 static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL, 3247 SDValue Op, SDValue Chain) { 3248 int Mask[] = { Start, -1, Start + 1, -1 }; 3249 Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask); 3250 if (Chain) { 3251 SDVTList VTs = DAG.getVTList(MVT::v2f64, MVT::Other); 3252 return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op); 3253 } 3254 return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op); 3255 } 3256 3257 // Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode, 3258 // producing a result of type VT. If Chain is nonnull, return the strict form. 3259 SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode, 3260 const SDLoc &DL, EVT VT, 3261 SDValue CmpOp0, 3262 SDValue CmpOp1, 3263 SDValue Chain) const { 3264 // There is no hardware support for v4f32 (unless we have the vector 3265 // enhancements facility 1), so extend the vector into two v2f64s 3266 // and compare those. 3267 if (CmpOp0.getValueType() == MVT::v4f32 && 3268 !Subtarget.hasVectorEnhancements1()) { 3269 SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain); 3270 SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain); 3271 SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain); 3272 SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain); 3273 if (Chain) { 3274 SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::Other); 3275 SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1); 3276 SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1); 3277 SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes); 3278 SDValue Chains[6] = { H0.getValue(1), L0.getValue(1), 3279 H1.getValue(1), L1.getValue(1), 3280 HRes.getValue(1), LRes.getValue(1) }; 3281 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); 3282 SDValue Ops[2] = { Res, NewChain }; 3283 return DAG.getMergeValues(Ops, DL); 3284 } 3285 SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1); 3286 SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1); 3287 return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes); 3288 } 3289 if (Chain) { 3290 SDVTList VTs = DAG.getVTList(VT, MVT::Other); 3291 return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1); 3292 } 3293 return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1); 3294 } 3295 3296 // Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing 3297 // an integer mask of type VT. If Chain is nonnull, we have a strict 3298 // floating-point comparison. If in addition IsSignaling is true, we have 3299 // a strict signaling floating-point comparison. 3300 SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG, 3301 const SDLoc &DL, EVT VT, 3302 ISD::CondCode CC, 3303 SDValue CmpOp0, 3304 SDValue CmpOp1, 3305 SDValue Chain, 3306 bool IsSignaling) const { 3307 bool IsFP = CmpOp0.getValueType().isFloatingPoint(); 3308 assert (!Chain || IsFP); 3309 assert (!IsSignaling || Chain); 3310 CmpMode Mode = IsSignaling ? CmpMode::SignalingFP : 3311 Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int; 3312 bool Invert = false; 3313 SDValue Cmp; 3314 switch (CC) { 3315 // Handle tests for order using (or (ogt y x) (oge x y)). 3316 case ISD::SETUO: 3317 Invert = true; 3318 [[fallthrough]]; 3319 case ISD::SETO: { 3320 assert(IsFP && "Unexpected integer comparison"); 3321 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode), 3322 DL, VT, CmpOp1, CmpOp0, Chain); 3323 SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode), 3324 DL, VT, CmpOp0, CmpOp1, Chain); 3325 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE); 3326 if (Chain) 3327 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, 3328 LT.getValue(1), GE.getValue(1)); 3329 break; 3330 } 3331 3332 // Handle <> tests using (or (ogt y x) (ogt x y)). 3333 case ISD::SETUEQ: 3334 Invert = true; 3335 [[fallthrough]]; 3336 case ISD::SETONE: { 3337 assert(IsFP && "Unexpected integer comparison"); 3338 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode), 3339 DL, VT, CmpOp1, CmpOp0, Chain); 3340 SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode), 3341 DL, VT, CmpOp0, CmpOp1, Chain); 3342 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT); 3343 if (Chain) 3344 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, 3345 LT.getValue(1), GT.getValue(1)); 3346 break; 3347 } 3348 3349 // Otherwise a single comparison is enough. It doesn't really 3350 // matter whether we try the inversion or the swap first, since 3351 // there are no cases where both work. 3352 default: 3353 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert)) 3354 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain); 3355 else { 3356 CC = ISD::getSetCCSwappedOperands(CC); 3357 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert)) 3358 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain); 3359 else 3360 llvm_unreachable("Unhandled comparison"); 3361 } 3362 if (Chain) 3363 Chain = Cmp.getValue(1); 3364 break; 3365 } 3366 if (Invert) { 3367 SDValue Mask = 3368 DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64)); 3369 Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask); 3370 } 3371 if (Chain && Chain.getNode() != Cmp.getNode()) { 3372 SDValue Ops[2] = { Cmp, Chain }; 3373 Cmp = DAG.getMergeValues(Ops, DL); 3374 } 3375 return Cmp; 3376 } 3377 3378 SDValue SystemZTargetLowering::lowerSETCC(SDValue Op, 3379 SelectionDAG &DAG) const { 3380 SDValue CmpOp0 = Op.getOperand(0); 3381 SDValue CmpOp1 = Op.getOperand(1); 3382 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 3383 SDLoc DL(Op); 3384 EVT VT = Op.getValueType(); 3385 if (VT.isVector()) 3386 return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1); 3387 3388 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); 3389 SDValue CCReg = emitCmp(DAG, DL, C); 3390 return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask); 3391 } 3392 3393 SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op, 3394 SelectionDAG &DAG, 3395 bool IsSignaling) const { 3396 SDValue Chain = Op.getOperand(0); 3397 SDValue CmpOp0 = Op.getOperand(1); 3398 SDValue CmpOp1 = Op.getOperand(2); 3399 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get(); 3400 SDLoc DL(Op); 3401 EVT VT = Op.getNode()->getValueType(0); 3402 if (VT.isVector()) { 3403 SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1, 3404 Chain, IsSignaling); 3405 return Res.getValue(Op.getResNo()); 3406 } 3407 3408 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling)); 3409 SDValue CCReg = emitCmp(DAG, DL, C); 3410 CCReg->setFlags(Op->getFlags()); 3411 SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask); 3412 SDValue Ops[2] = { Result, CCReg.getValue(1) }; 3413 return DAG.getMergeValues(Ops, DL); 3414 } 3415 3416 SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const { 3417 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 3418 SDValue CmpOp0 = Op.getOperand(2); 3419 SDValue CmpOp1 = Op.getOperand(3); 3420 SDValue Dest = Op.getOperand(4); 3421 SDLoc DL(Op); 3422 3423 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); 3424 SDValue CCReg = emitCmp(DAG, DL, C); 3425 return DAG.getNode( 3426 SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0), 3427 DAG.getTargetConstant(C.CCValid, DL, MVT::i32), 3428 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg); 3429 } 3430 3431 // Return true if Pos is CmpOp and Neg is the negative of CmpOp, 3432 // allowing Pos and Neg to be wider than CmpOp. 3433 static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) { 3434 return (Neg.getOpcode() == ISD::SUB && 3435 Neg.getOperand(0).getOpcode() == ISD::Constant && 3436 Neg.getConstantOperandVal(0) == 0 && Neg.getOperand(1) == Pos && 3437 (Pos == CmpOp || (Pos.getOpcode() == ISD::SIGN_EXTEND && 3438 Pos.getOperand(0) == CmpOp))); 3439 } 3440 3441 // Return the absolute or negative absolute of Op; IsNegative decides which. 3442 static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op, 3443 bool IsNegative) { 3444 Op = DAG.getNode(ISD::ABS, DL, Op.getValueType(), Op); 3445 if (IsNegative) 3446 Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(), 3447 DAG.getConstant(0, DL, Op.getValueType()), Op); 3448 return Op; 3449 } 3450 3451 SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op, 3452 SelectionDAG &DAG) const { 3453 SDValue CmpOp0 = Op.getOperand(0); 3454 SDValue CmpOp1 = Op.getOperand(1); 3455 SDValue TrueOp = Op.getOperand(2); 3456 SDValue FalseOp = Op.getOperand(3); 3457 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 3458 SDLoc DL(Op); 3459 3460 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); 3461 3462 // Check for absolute and negative-absolute selections, including those 3463 // where the comparison value is sign-extended (for LPGFR and LNGFR). 3464 // This check supplements the one in DAGCombiner. 3465 if (C.Opcode == SystemZISD::ICMP && C.CCMask != SystemZ::CCMASK_CMP_EQ && 3466 C.CCMask != SystemZ::CCMASK_CMP_NE && 3467 C.Op1.getOpcode() == ISD::Constant && 3468 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 && 3469 C.Op1->getAsZExtVal() == 0) { 3470 if (isAbsolute(C.Op0, TrueOp, FalseOp)) 3471 return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT); 3472 if (isAbsolute(C.Op0, FalseOp, TrueOp)) 3473 return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT); 3474 } 3475 3476 SDValue CCReg = emitCmp(DAG, DL, C); 3477 SDValue Ops[] = {TrueOp, FalseOp, 3478 DAG.getTargetConstant(C.CCValid, DL, MVT::i32), 3479 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg}; 3480 3481 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops); 3482 } 3483 3484 SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node, 3485 SelectionDAG &DAG) const { 3486 SDLoc DL(Node); 3487 const GlobalValue *GV = Node->getGlobal(); 3488 int64_t Offset = Node->getOffset(); 3489 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3490 CodeModel::Model CM = DAG.getTarget().getCodeModel(); 3491 3492 SDValue Result; 3493 if (Subtarget.isPC32DBLSymbol(GV, CM)) { 3494 if (isInt<32>(Offset)) { 3495 // Assign anchors at 1<<12 byte boundaries. 3496 uint64_t Anchor = Offset & ~uint64_t(0xfff); 3497 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor); 3498 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); 3499 3500 // The offset can be folded into the address if it is aligned to a 3501 // halfword. 3502 Offset -= Anchor; 3503 if (Offset != 0 && (Offset & 1) == 0) { 3504 SDValue Full = 3505 DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset); 3506 Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result); 3507 Offset = 0; 3508 } 3509 } else { 3510 // Conservatively load a constant offset greater than 32 bits into a 3511 // register below. 3512 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT); 3513 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); 3514 } 3515 } else if (Subtarget.isTargetELF()) { 3516 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT); 3517 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); 3518 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result, 3519 MachinePointerInfo::getGOT(DAG.getMachineFunction())); 3520 } else if (Subtarget.isTargetzOS()) { 3521 Result = getADAEntry(DAG, GV, DL, PtrVT); 3522 } else 3523 llvm_unreachable("Unexpected Subtarget"); 3524 3525 // If there was a non-zero offset that we didn't fold, create an explicit 3526 // addition for it. 3527 if (Offset != 0) 3528 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result, 3529 DAG.getConstant(Offset, DL, PtrVT)); 3530 3531 return Result; 3532 } 3533 3534 SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node, 3535 SelectionDAG &DAG, 3536 unsigned Opcode, 3537 SDValue GOTOffset) const { 3538 SDLoc DL(Node); 3539 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3540 SDValue Chain = DAG.getEntryNode(); 3541 SDValue Glue; 3542 3543 if (DAG.getMachineFunction().getFunction().getCallingConv() == 3544 CallingConv::GHC) 3545 report_fatal_error("In GHC calling convention TLS is not supported"); 3546 3547 // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12. 3548 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); 3549 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue); 3550 Glue = Chain.getValue(1); 3551 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue); 3552 Glue = Chain.getValue(1); 3553 3554 // The first call operand is the chain and the second is the TLS symbol. 3555 SmallVector<SDValue, 8> Ops; 3556 Ops.push_back(Chain); 3557 Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL, 3558 Node->getValueType(0), 3559 0, 0)); 3560 3561 // Add argument registers to the end of the list so that they are 3562 // known live into the call. 3563 Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT)); 3564 Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT)); 3565 3566 // Add a register mask operand representing the call-preserved registers. 3567 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 3568 const uint32_t *Mask = 3569 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C); 3570 assert(Mask && "Missing call preserved mask for calling convention"); 3571 Ops.push_back(DAG.getRegisterMask(Mask)); 3572 3573 // Glue the call to the argument copies. 3574 Ops.push_back(Glue); 3575 3576 // Emit the call. 3577 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 3578 Chain = DAG.getNode(Opcode, DL, NodeTys, Ops); 3579 Glue = Chain.getValue(1); 3580 3581 // Copy the return value from %r2. 3582 return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue); 3583 } 3584 3585 SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL, 3586 SelectionDAG &DAG) const { 3587 SDValue Chain = DAG.getEntryNode(); 3588 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3589 3590 // The high part of the thread pointer is in access register 0. 3591 SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32); 3592 TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi); 3593 3594 // The low part of the thread pointer is in access register 1. 3595 SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32); 3596 TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo); 3597 3598 // Merge them into a single 64-bit address. 3599 SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi, 3600 DAG.getConstant(32, DL, PtrVT)); 3601 return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo); 3602 } 3603 3604 SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, 3605 SelectionDAG &DAG) const { 3606 if (DAG.getTarget().useEmulatedTLS()) 3607 return LowerToTLSEmulatedModel(Node, DAG); 3608 SDLoc DL(Node); 3609 const GlobalValue *GV = Node->getGlobal(); 3610 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3611 TLSModel::Model model = DAG.getTarget().getTLSModel(GV); 3612 3613 if (DAG.getMachineFunction().getFunction().getCallingConv() == 3614 CallingConv::GHC) 3615 report_fatal_error("In GHC calling convention TLS is not supported"); 3616 3617 SDValue TP = lowerThreadPointer(DL, DAG); 3618 3619 // Get the offset of GA from the thread pointer, based on the TLS model. 3620 SDValue Offset; 3621 switch (model) { 3622 case TLSModel::GeneralDynamic: { 3623 // Load the GOT offset of the tls_index (module ID / per-symbol offset). 3624 SystemZConstantPoolValue *CPV = 3625 SystemZConstantPoolValue::Create(GV, SystemZCP::TLSGD); 3626 3627 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8)); 3628 Offset = DAG.getLoad( 3629 PtrVT, DL, DAG.getEntryNode(), Offset, 3630 MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); 3631 3632 // Call __tls_get_offset to retrieve the offset. 3633 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset); 3634 break; 3635 } 3636 3637 case TLSModel::LocalDynamic: { 3638 // Load the GOT offset of the module ID. 3639 SystemZConstantPoolValue *CPV = 3640 SystemZConstantPoolValue::Create(GV, SystemZCP::TLSLDM); 3641 3642 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8)); 3643 Offset = DAG.getLoad( 3644 PtrVT, DL, DAG.getEntryNode(), Offset, 3645 MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); 3646 3647 // Call __tls_get_offset to retrieve the module base offset. 3648 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset); 3649 3650 // Note: The SystemZLDCleanupPass will remove redundant computations 3651 // of the module base offset. Count total number of local-dynamic 3652 // accesses to trigger execution of that pass. 3653 SystemZMachineFunctionInfo* MFI = 3654 DAG.getMachineFunction().getInfo<SystemZMachineFunctionInfo>(); 3655 MFI->incNumLocalDynamicTLSAccesses(); 3656 3657 // Add the per-symbol offset. 3658 CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::DTPOFF); 3659 3660 SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8)); 3661 DTPOffset = DAG.getLoad( 3662 PtrVT, DL, DAG.getEntryNode(), DTPOffset, 3663 MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); 3664 3665 Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset); 3666 break; 3667 } 3668 3669 case TLSModel::InitialExec: { 3670 // Load the offset from the GOT. 3671 Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 3672 SystemZII::MO_INDNTPOFF); 3673 Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset); 3674 Offset = 3675 DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset, 3676 MachinePointerInfo::getGOT(DAG.getMachineFunction())); 3677 break; 3678 } 3679 3680 case TLSModel::LocalExec: { 3681 // Force the offset into the constant pool and load it from there. 3682 SystemZConstantPoolValue *CPV = 3683 SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF); 3684 3685 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8)); 3686 Offset = DAG.getLoad( 3687 PtrVT, DL, DAG.getEntryNode(), Offset, 3688 MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); 3689 break; 3690 } 3691 } 3692 3693 // Add the base and offset together. 3694 return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset); 3695 } 3696 3697 SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node, 3698 SelectionDAG &DAG) const { 3699 SDLoc DL(Node); 3700 const BlockAddress *BA = Node->getBlockAddress(); 3701 int64_t Offset = Node->getOffset(); 3702 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3703 3704 SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset); 3705 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); 3706 return Result; 3707 } 3708 3709 SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT, 3710 SelectionDAG &DAG) const { 3711 SDLoc DL(JT); 3712 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3713 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); 3714 3715 // Use LARL to load the address of the table. 3716 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); 3717 } 3718 3719 SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP, 3720 SelectionDAG &DAG) const { 3721 SDLoc DL(CP); 3722 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3723 3724 SDValue Result; 3725 if (CP->isMachineConstantPoolEntry()) 3726 Result = 3727 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign()); 3728 else 3729 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(), 3730 CP->getOffset()); 3731 3732 // Use LARL to load the address of the constant pool entry. 3733 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); 3734 } 3735 3736 SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op, 3737 SelectionDAG &DAG) const { 3738 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>(); 3739 MachineFunction &MF = DAG.getMachineFunction(); 3740 MachineFrameInfo &MFI = MF.getFrameInfo(); 3741 MFI.setFrameAddressIsTaken(true); 3742 3743 SDLoc DL(Op); 3744 unsigned Depth = Op.getConstantOperandVal(0); 3745 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3746 3747 // By definition, the frame address is the address of the back chain. (In 3748 // the case of packed stack without backchain, return the address where the 3749 // backchain would have been stored. This will either be an unused space or 3750 // contain a saved register). 3751 int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF); 3752 SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT); 3753 3754 if (Depth > 0) { 3755 // FIXME The frontend should detect this case. 3756 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain()) 3757 report_fatal_error("Unsupported stack frame traversal count"); 3758 3759 SDValue Offset = DAG.getConstant(TFL->getBackchainOffset(MF), DL, PtrVT); 3760 while (Depth--) { 3761 BackChain = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), BackChain, 3762 MachinePointerInfo()); 3763 BackChain = DAG.getNode(ISD::ADD, DL, PtrVT, BackChain, Offset); 3764 } 3765 } 3766 3767 return BackChain; 3768 } 3769 3770 SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op, 3771 SelectionDAG &DAG) const { 3772 MachineFunction &MF = DAG.getMachineFunction(); 3773 MachineFrameInfo &MFI = MF.getFrameInfo(); 3774 MFI.setReturnAddressIsTaken(true); 3775 3776 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 3777 return SDValue(); 3778 3779 SDLoc DL(Op); 3780 unsigned Depth = Op.getConstantOperandVal(0); 3781 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3782 3783 if (Depth > 0) { 3784 // FIXME The frontend should detect this case. 3785 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain()) 3786 report_fatal_error("Unsupported stack frame traversal count"); 3787 3788 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG); 3789 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>(); 3790 int Offset = (TFL->usePackedStack(MF) ? -2 : 14) * 3791 getTargetMachine().getPointerSize(0); 3792 SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, FrameAddr, 3793 DAG.getConstant(Offset, DL, PtrVT)); 3794 return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, 3795 MachinePointerInfo()); 3796 } 3797 3798 // Return R14D, which has the return address. Mark it an implicit live-in. 3799 Register LinkReg = MF.addLiveIn(SystemZ::R14D, &SystemZ::GR64BitRegClass); 3800 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT); 3801 } 3802 3803 SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op, 3804 SelectionDAG &DAG) const { 3805 SDLoc DL(Op); 3806 SDValue In = Op.getOperand(0); 3807 EVT InVT = In.getValueType(); 3808 EVT ResVT = Op.getValueType(); 3809 3810 // Convert loads directly. This is normally done by DAGCombiner, 3811 // but we need this case for bitcasts that are created during lowering 3812 // and which are then lowered themselves. 3813 if (auto *LoadN = dyn_cast<LoadSDNode>(In)) 3814 if (ISD::isNormalLoad(LoadN)) { 3815 SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(), 3816 LoadN->getBasePtr(), LoadN->getMemOperand()); 3817 // Update the chain uses. 3818 DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1)); 3819 return NewLoad; 3820 } 3821 3822 if (InVT == MVT::i32 && ResVT == MVT::f32) { 3823 SDValue In64; 3824 if (Subtarget.hasHighWord()) { 3825 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, 3826 MVT::i64); 3827 In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL, 3828 MVT::i64, SDValue(U64, 0), In); 3829 } else { 3830 In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In); 3831 In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64, 3832 DAG.getConstant(32, DL, MVT::i64)); 3833 } 3834 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64); 3835 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, 3836 DL, MVT::f32, Out64); 3837 } 3838 if (InVT == MVT::f32 && ResVT == MVT::i32) { 3839 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64); 3840 SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL, 3841 MVT::f64, SDValue(U64, 0), In); 3842 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64); 3843 if (Subtarget.hasHighWord()) 3844 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL, 3845 MVT::i32, Out64); 3846 SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64, 3847 DAG.getConstant(32, DL, MVT::i64)); 3848 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift); 3849 } 3850 llvm_unreachable("Unexpected bitcast combination"); 3851 } 3852 3853 SDValue SystemZTargetLowering::lowerVASTART(SDValue Op, 3854 SelectionDAG &DAG) const { 3855 3856 if (Subtarget.isTargetXPLINK64()) 3857 return lowerVASTART_XPLINK(Op, DAG); 3858 else 3859 return lowerVASTART_ELF(Op, DAG); 3860 } 3861 3862 SDValue SystemZTargetLowering::lowerVASTART_XPLINK(SDValue Op, 3863 SelectionDAG &DAG) const { 3864 MachineFunction &MF = DAG.getMachineFunction(); 3865 SystemZMachineFunctionInfo *FuncInfo = 3866 MF.getInfo<SystemZMachineFunctionInfo>(); 3867 3868 SDLoc DL(Op); 3869 3870 // vastart just stores the address of the VarArgsFrameIndex slot into the 3871 // memory location argument. 3872 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3873 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 3874 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 3875 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1), 3876 MachinePointerInfo(SV)); 3877 } 3878 3879 SDValue SystemZTargetLowering::lowerVASTART_ELF(SDValue Op, 3880 SelectionDAG &DAG) const { 3881 MachineFunction &MF = DAG.getMachineFunction(); 3882 SystemZMachineFunctionInfo *FuncInfo = 3883 MF.getInfo<SystemZMachineFunctionInfo>(); 3884 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3885 3886 SDValue Chain = Op.getOperand(0); 3887 SDValue Addr = Op.getOperand(1); 3888 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 3889 SDLoc DL(Op); 3890 3891 // The initial values of each field. 3892 const unsigned NumFields = 4; 3893 SDValue Fields[NumFields] = { 3894 DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT), 3895 DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT), 3896 DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT), 3897 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT) 3898 }; 3899 3900 // Store each field into its respective slot. 3901 SDValue MemOps[NumFields]; 3902 unsigned Offset = 0; 3903 for (unsigned I = 0; I < NumFields; ++I) { 3904 SDValue FieldAddr = Addr; 3905 if (Offset != 0) 3906 FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr, 3907 DAG.getIntPtrConstant(Offset, DL)); 3908 MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr, 3909 MachinePointerInfo(SV, Offset)); 3910 Offset += 8; 3911 } 3912 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps); 3913 } 3914 3915 SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op, 3916 SelectionDAG &DAG) const { 3917 SDValue Chain = Op.getOperand(0); 3918 SDValue DstPtr = Op.getOperand(1); 3919 SDValue SrcPtr = Op.getOperand(2); 3920 const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue(); 3921 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); 3922 SDLoc DL(Op); 3923 3924 uint32_t Sz = 3925 Subtarget.isTargetXPLINK64() ? getTargetMachine().getPointerSize(0) : 32; 3926 return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(Sz, DL), 3927 Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false, 3928 /*isTailCall*/ false, MachinePointerInfo(DstSV), 3929 MachinePointerInfo(SrcSV)); 3930 } 3931 3932 SDValue 3933 SystemZTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op, 3934 SelectionDAG &DAG) const { 3935 if (Subtarget.isTargetXPLINK64()) 3936 return lowerDYNAMIC_STACKALLOC_XPLINK(Op, DAG); 3937 else 3938 return lowerDYNAMIC_STACKALLOC_ELF(Op, DAG); 3939 } 3940 3941 SDValue 3942 SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op, 3943 SelectionDAG &DAG) const { 3944 const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); 3945 MachineFunction &MF = DAG.getMachineFunction(); 3946 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack"); 3947 SDValue Chain = Op.getOperand(0); 3948 SDValue Size = Op.getOperand(1); 3949 SDValue Align = Op.getOperand(2); 3950 SDLoc DL(Op); 3951 3952 // If user has set the no alignment function attribute, ignore 3953 // alloca alignments. 3954 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0); 3955 3956 uint64_t StackAlign = TFI->getStackAlignment(); 3957 uint64_t RequiredAlign = std::max(AlignVal, StackAlign); 3958 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign; 3959 3960 SDValue NeededSpace = Size; 3961 3962 // Add extra space for alignment if needed. 3963 EVT PtrVT = getPointerTy(MF.getDataLayout()); 3964 if (ExtraAlignSpace) 3965 NeededSpace = DAG.getNode(ISD::ADD, DL, PtrVT, NeededSpace, 3966 DAG.getConstant(ExtraAlignSpace, DL, PtrVT)); 3967 3968 bool IsSigned = false; 3969 bool DoesNotReturn = false; 3970 bool IsReturnValueUsed = false; 3971 EVT VT = Op.getValueType(); 3972 SDValue AllocaCall = 3973 makeExternalCall(Chain, DAG, "@@ALCAXP", VT, ArrayRef(NeededSpace), 3974 CallingConv::C, IsSigned, DL, DoesNotReturn, 3975 IsReturnValueUsed) 3976 .first; 3977 3978 // Perform a CopyFromReg from %GPR4 (stack pointer register). Chain and Glue 3979 // to end of call in order to ensure it isn't broken up from the call 3980 // sequence. 3981 auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>(); 3982 Register SPReg = Regs.getStackPointerRegister(); 3983 Chain = AllocaCall.getValue(1); 3984 SDValue Glue = AllocaCall.getValue(2); 3985 SDValue NewSPRegNode = DAG.getCopyFromReg(Chain, DL, SPReg, PtrVT, Glue); 3986 Chain = NewSPRegNode.getValue(1); 3987 3988 MVT PtrMVT = getPointerMemTy(MF.getDataLayout()); 3989 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, PtrMVT); 3990 SDValue Result = DAG.getNode(ISD::ADD, DL, PtrMVT, NewSPRegNode, ArgAdjust); 3991 3992 // Dynamically realign if needed. 3993 if (ExtraAlignSpace) { 3994 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result, 3995 DAG.getConstant(ExtraAlignSpace, DL, PtrVT)); 3996 Result = DAG.getNode(ISD::AND, DL, PtrVT, Result, 3997 DAG.getConstant(~(RequiredAlign - 1), DL, PtrVT)); 3998 } 3999 4000 SDValue Ops[2] = {Result, Chain}; 4001 return DAG.getMergeValues(Ops, DL); 4002 } 4003 4004 SDValue 4005 SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_ELF(SDValue Op, 4006 SelectionDAG &DAG) const { 4007 const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); 4008 MachineFunction &MF = DAG.getMachineFunction(); 4009 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack"); 4010 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain(); 4011 4012 SDValue Chain = Op.getOperand(0); 4013 SDValue Size = Op.getOperand(1); 4014 SDValue Align = Op.getOperand(2); 4015 SDLoc DL(Op); 4016 4017 // If user has set the no alignment function attribute, ignore 4018 // alloca alignments. 4019 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0); 4020 4021 uint64_t StackAlign = TFI->getStackAlignment(); 4022 uint64_t RequiredAlign = std::max(AlignVal, StackAlign); 4023 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign; 4024 4025 Register SPReg = getStackPointerRegisterToSaveRestore(); 4026 SDValue NeededSpace = Size; 4027 4028 // Get a reference to the stack pointer. 4029 SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64); 4030 4031 // If we need a backchain, save it now. 4032 SDValue Backchain; 4033 if (StoreBackchain) 4034 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG), 4035 MachinePointerInfo()); 4036 4037 // Add extra space for alignment if needed. 4038 if (ExtraAlignSpace) 4039 NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace, 4040 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64)); 4041 4042 // Get the new stack pointer value. 4043 SDValue NewSP; 4044 if (hasInlineStackProbe(MF)) { 4045 NewSP = DAG.getNode(SystemZISD::PROBED_ALLOCA, DL, 4046 DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace); 4047 Chain = NewSP.getValue(1); 4048 } 4049 else { 4050 NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace); 4051 // Copy the new stack pointer back. 4052 Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP); 4053 } 4054 4055 // The allocated data lives above the 160 bytes allocated for the standard 4056 // frame, plus any outgoing stack arguments. We don't know how much that 4057 // amounts to yet, so emit a special ADJDYNALLOC placeholder. 4058 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64); 4059 SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust); 4060 4061 // Dynamically realign if needed. 4062 if (RequiredAlign > StackAlign) { 4063 Result = 4064 DAG.getNode(ISD::ADD, DL, MVT::i64, Result, 4065 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64)); 4066 Result = 4067 DAG.getNode(ISD::AND, DL, MVT::i64, Result, 4068 DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64)); 4069 } 4070 4071 if (StoreBackchain) 4072 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG), 4073 MachinePointerInfo()); 4074 4075 SDValue Ops[2] = { Result, Chain }; 4076 return DAG.getMergeValues(Ops, DL); 4077 } 4078 4079 SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET( 4080 SDValue Op, SelectionDAG &DAG) const { 4081 SDLoc DL(Op); 4082 4083 return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64); 4084 } 4085 4086 SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op, 4087 SelectionDAG &DAG) const { 4088 EVT VT = Op.getValueType(); 4089 SDLoc DL(Op); 4090 SDValue Ops[2]; 4091 if (is32Bit(VT)) 4092 // Just do a normal 64-bit multiplication and extract the results. 4093 // We define this so that it can be used for constant division. 4094 lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0), 4095 Op.getOperand(1), Ops[1], Ops[0]); 4096 else if (Subtarget.hasMiscellaneousExtensions2()) 4097 // SystemZISD::SMUL_LOHI returns the low result in the odd register and 4098 // the high result in the even register. ISD::SMUL_LOHI is defined to 4099 // return the low half first, so the results are in reverse order. 4100 lowerGR128Binary(DAG, DL, VT, SystemZISD::SMUL_LOHI, 4101 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); 4102 else { 4103 // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI: 4104 // 4105 // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64) 4106 // 4107 // but using the fact that the upper halves are either all zeros 4108 // or all ones: 4109 // 4110 // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64) 4111 // 4112 // and grouping the right terms together since they are quicker than the 4113 // multiplication: 4114 // 4115 // (ll * rl) - (((lh & rl) + (ll & rh)) << 64) 4116 SDValue C63 = DAG.getConstant(63, DL, MVT::i64); 4117 SDValue LL = Op.getOperand(0); 4118 SDValue RL = Op.getOperand(1); 4119 SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63); 4120 SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63); 4121 // SystemZISD::UMUL_LOHI returns the low result in the odd register and 4122 // the high result in the even register. ISD::SMUL_LOHI is defined to 4123 // return the low half first, so the results are in reverse order. 4124 lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI, 4125 LL, RL, Ops[1], Ops[0]); 4126 SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH); 4127 SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL); 4128 SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL); 4129 Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum); 4130 } 4131 return DAG.getMergeValues(Ops, DL); 4132 } 4133 4134 SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op, 4135 SelectionDAG &DAG) const { 4136 EVT VT = Op.getValueType(); 4137 SDLoc DL(Op); 4138 SDValue Ops[2]; 4139 if (is32Bit(VT)) 4140 // Just do a normal 64-bit multiplication and extract the results. 4141 // We define this so that it can be used for constant division. 4142 lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0), 4143 Op.getOperand(1), Ops[1], Ops[0]); 4144 else 4145 // SystemZISD::UMUL_LOHI returns the low result in the odd register and 4146 // the high result in the even register. ISD::UMUL_LOHI is defined to 4147 // return the low half first, so the results are in reverse order. 4148 lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI, 4149 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); 4150 return DAG.getMergeValues(Ops, DL); 4151 } 4152 4153 SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op, 4154 SelectionDAG &DAG) const { 4155 SDValue Op0 = Op.getOperand(0); 4156 SDValue Op1 = Op.getOperand(1); 4157 EVT VT = Op.getValueType(); 4158 SDLoc DL(Op); 4159 4160 // We use DSGF for 32-bit division. This means the first operand must 4161 // always be 64-bit, and the second operand should be 32-bit whenever 4162 // that is possible, to improve performance. 4163 if (is32Bit(VT)) 4164 Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0); 4165 else if (DAG.ComputeNumSignBits(Op1) > 32) 4166 Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1); 4167 4168 // DSG(F) returns the remainder in the even register and the 4169 // quotient in the odd register. 4170 SDValue Ops[2]; 4171 lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]); 4172 return DAG.getMergeValues(Ops, DL); 4173 } 4174 4175 SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op, 4176 SelectionDAG &DAG) const { 4177 EVT VT = Op.getValueType(); 4178 SDLoc DL(Op); 4179 4180 // DL(G) returns the remainder in the even register and the 4181 // quotient in the odd register. 4182 SDValue Ops[2]; 4183 lowerGR128Binary(DAG, DL, VT, SystemZISD::UDIVREM, 4184 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); 4185 return DAG.getMergeValues(Ops, DL); 4186 } 4187 4188 SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const { 4189 assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation"); 4190 4191 // Get the known-zero masks for each operand. 4192 SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)}; 4193 KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]), 4194 DAG.computeKnownBits(Ops[1])}; 4195 4196 // See if the upper 32 bits of one operand and the lower 32 bits of the 4197 // other are known zero. They are the low and high operands respectively. 4198 uint64_t Masks[] = { Known[0].Zero.getZExtValue(), 4199 Known[1].Zero.getZExtValue() }; 4200 unsigned High, Low; 4201 if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff) 4202 High = 1, Low = 0; 4203 else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff) 4204 High = 0, Low = 1; 4205 else 4206 return Op; 4207 4208 SDValue LowOp = Ops[Low]; 4209 SDValue HighOp = Ops[High]; 4210 4211 // If the high part is a constant, we're better off using IILH. 4212 if (HighOp.getOpcode() == ISD::Constant) 4213 return Op; 4214 4215 // If the low part is a constant that is outside the range of LHI, 4216 // then we're better off using IILF. 4217 if (LowOp.getOpcode() == ISD::Constant) { 4218 int64_t Value = int32_t(LowOp->getAsZExtVal()); 4219 if (!isInt<16>(Value)) 4220 return Op; 4221 } 4222 4223 // Check whether the high part is an AND that doesn't change the 4224 // high 32 bits and just masks out low bits. We can skip it if so. 4225 if (HighOp.getOpcode() == ISD::AND && 4226 HighOp.getOperand(1).getOpcode() == ISD::Constant) { 4227 SDValue HighOp0 = HighOp.getOperand(0); 4228 uint64_t Mask = HighOp.getConstantOperandVal(1); 4229 if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff)))) 4230 HighOp = HighOp0; 4231 } 4232 4233 // Take advantage of the fact that all GR32 operations only change the 4234 // low 32 bits by truncating Low to an i32 and inserting it directly 4235 // using a subreg. The interesting cases are those where the truncation 4236 // can be folded. 4237 SDLoc DL(Op); 4238 SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp); 4239 return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL, 4240 MVT::i64, HighOp, Low32); 4241 } 4242 4243 // Lower SADDO/SSUBO/UADDO/USUBO nodes. 4244 SDValue SystemZTargetLowering::lowerXALUO(SDValue Op, 4245 SelectionDAG &DAG) const { 4246 SDNode *N = Op.getNode(); 4247 SDValue LHS = N->getOperand(0); 4248 SDValue RHS = N->getOperand(1); 4249 SDLoc DL(N); 4250 4251 if (N->getValueType(0) == MVT::i128) { 4252 unsigned BaseOp = 0; 4253 unsigned FlagOp = 0; 4254 switch (Op.getOpcode()) { 4255 default: llvm_unreachable("Unknown instruction!"); 4256 case ISD::UADDO: 4257 BaseOp = ISD::ADD; 4258 FlagOp = SystemZISD::VACC; 4259 break; 4260 case ISD::USUBO: 4261 BaseOp = ISD::SUB; 4262 FlagOp = SystemZISD::VSCBI; 4263 break; 4264 } 4265 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS); 4266 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS); 4267 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag, 4268 DAG.getValueType(MVT::i1)); 4269 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1)); 4270 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag); 4271 } 4272 4273 unsigned BaseOp = 0; 4274 unsigned CCValid = 0; 4275 unsigned CCMask = 0; 4276 4277 switch (Op.getOpcode()) { 4278 default: llvm_unreachable("Unknown instruction!"); 4279 case ISD::SADDO: 4280 BaseOp = SystemZISD::SADDO; 4281 CCValid = SystemZ::CCMASK_ARITH; 4282 CCMask = SystemZ::CCMASK_ARITH_OVERFLOW; 4283 break; 4284 case ISD::SSUBO: 4285 BaseOp = SystemZISD::SSUBO; 4286 CCValid = SystemZ::CCMASK_ARITH; 4287 CCMask = SystemZ::CCMASK_ARITH_OVERFLOW; 4288 break; 4289 case ISD::UADDO: 4290 BaseOp = SystemZISD::UADDO; 4291 CCValid = SystemZ::CCMASK_LOGICAL; 4292 CCMask = SystemZ::CCMASK_LOGICAL_CARRY; 4293 break; 4294 case ISD::USUBO: 4295 BaseOp = SystemZISD::USUBO; 4296 CCValid = SystemZ::CCMASK_LOGICAL; 4297 CCMask = SystemZ::CCMASK_LOGICAL_BORROW; 4298 break; 4299 } 4300 4301 SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32); 4302 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS); 4303 4304 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask); 4305 if (N->getValueType(1) == MVT::i1) 4306 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC); 4307 4308 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC); 4309 } 4310 4311 static bool isAddCarryChain(SDValue Carry) { 4312 while (Carry.getOpcode() == ISD::UADDO_CARRY) 4313 Carry = Carry.getOperand(2); 4314 return Carry.getOpcode() == ISD::UADDO; 4315 } 4316 4317 static bool isSubBorrowChain(SDValue Carry) { 4318 while (Carry.getOpcode() == ISD::USUBO_CARRY) 4319 Carry = Carry.getOperand(2); 4320 return Carry.getOpcode() == ISD::USUBO; 4321 } 4322 4323 // Lower UADDO_CARRY/USUBO_CARRY nodes. 4324 SDValue SystemZTargetLowering::lowerUADDSUBO_CARRY(SDValue Op, 4325 SelectionDAG &DAG) const { 4326 4327 SDNode *N = Op.getNode(); 4328 MVT VT = N->getSimpleValueType(0); 4329 4330 // Let legalize expand this if it isn't a legal type yet. 4331 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) 4332 return SDValue(); 4333 4334 SDValue LHS = N->getOperand(0); 4335 SDValue RHS = N->getOperand(1); 4336 SDValue Carry = Op.getOperand(2); 4337 SDLoc DL(N); 4338 4339 if (VT == MVT::i128) { 4340 unsigned BaseOp = 0; 4341 unsigned FlagOp = 0; 4342 switch (Op.getOpcode()) { 4343 default: llvm_unreachable("Unknown instruction!"); 4344 case ISD::UADDO_CARRY: 4345 BaseOp = SystemZISD::VAC; 4346 FlagOp = SystemZISD::VACCC; 4347 break; 4348 case ISD::USUBO_CARRY: 4349 BaseOp = SystemZISD::VSBI; 4350 FlagOp = SystemZISD::VSBCBI; 4351 break; 4352 } 4353 Carry = DAG.getZExtOrTrunc(Carry, DL, MVT::i128); 4354 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS, Carry); 4355 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS, Carry); 4356 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag, 4357 DAG.getValueType(MVT::i1)); 4358 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1)); 4359 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag); 4360 } 4361 4362 unsigned BaseOp = 0; 4363 unsigned CCValid = 0; 4364 unsigned CCMask = 0; 4365 4366 switch (Op.getOpcode()) { 4367 default: llvm_unreachable("Unknown instruction!"); 4368 case ISD::UADDO_CARRY: 4369 if (!isAddCarryChain(Carry)) 4370 return SDValue(); 4371 4372 BaseOp = SystemZISD::ADDCARRY; 4373 CCValid = SystemZ::CCMASK_LOGICAL; 4374 CCMask = SystemZ::CCMASK_LOGICAL_CARRY; 4375 break; 4376 case ISD::USUBO_CARRY: 4377 if (!isSubBorrowChain(Carry)) 4378 return SDValue(); 4379 4380 BaseOp = SystemZISD::SUBCARRY; 4381 CCValid = SystemZ::CCMASK_LOGICAL; 4382 CCMask = SystemZ::CCMASK_LOGICAL_BORROW; 4383 break; 4384 } 4385 4386 // Set the condition code from the carry flag. 4387 Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry, 4388 DAG.getConstant(CCValid, DL, MVT::i32), 4389 DAG.getConstant(CCMask, DL, MVT::i32)); 4390 4391 SDVTList VTs = DAG.getVTList(VT, MVT::i32); 4392 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry); 4393 4394 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask); 4395 if (N->getValueType(1) == MVT::i1) 4396 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC); 4397 4398 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC); 4399 } 4400 4401 SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op, 4402 SelectionDAG &DAG) const { 4403 EVT VT = Op.getValueType(); 4404 SDLoc DL(Op); 4405 Op = Op.getOperand(0); 4406 4407 if (VT.getScalarSizeInBits() == 128) { 4408 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op); 4409 Op = DAG.getNode(ISD::CTPOP, DL, MVT::v2i64, Op); 4410 SDValue Tmp = DAG.getSplatBuildVector(MVT::v2i64, DL, 4411 DAG.getConstant(0, DL, MVT::i64)); 4412 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp); 4413 return Op; 4414 } 4415 4416 // Handle vector types via VPOPCT. 4417 if (VT.isVector()) { 4418 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op); 4419 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op); 4420 switch (VT.getScalarSizeInBits()) { 4421 case 8: 4422 break; 4423 case 16: { 4424 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op); 4425 SDValue Shift = DAG.getConstant(8, DL, MVT::i32); 4426 SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift); 4427 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp); 4428 Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift); 4429 break; 4430 } 4431 case 32: { 4432 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL, 4433 DAG.getConstant(0, DL, MVT::i32)); 4434 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp); 4435 break; 4436 } 4437 case 64: { 4438 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL, 4439 DAG.getConstant(0, DL, MVT::i32)); 4440 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp); 4441 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp); 4442 break; 4443 } 4444 default: 4445 llvm_unreachable("Unexpected type"); 4446 } 4447 return Op; 4448 } 4449 4450 // Get the known-zero mask for the operand. 4451 KnownBits Known = DAG.computeKnownBits(Op); 4452 unsigned NumSignificantBits = Known.getMaxValue().getActiveBits(); 4453 if (NumSignificantBits == 0) 4454 return DAG.getConstant(0, DL, VT); 4455 4456 // Skip known-zero high parts of the operand. 4457 int64_t OrigBitSize = VT.getSizeInBits(); 4458 int64_t BitSize = llvm::bit_ceil(NumSignificantBits); 4459 BitSize = std::min(BitSize, OrigBitSize); 4460 4461 // The POPCNT instruction counts the number of bits in each byte. 4462 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op); 4463 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op); 4464 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op); 4465 4466 // Add up per-byte counts in a binary tree. All bits of Op at 4467 // position larger than BitSize remain zero throughout. 4468 for (int64_t I = BitSize / 2; I >= 8; I = I / 2) { 4469 SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT)); 4470 if (BitSize != OrigBitSize) 4471 Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp, 4472 DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT)); 4473 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp); 4474 } 4475 4476 // Extract overall result from high byte. 4477 if (BitSize > 8) 4478 Op = DAG.getNode(ISD::SRL, DL, VT, Op, 4479 DAG.getConstant(BitSize - 8, DL, VT)); 4480 4481 return Op; 4482 } 4483 4484 SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op, 4485 SelectionDAG &DAG) const { 4486 SDLoc DL(Op); 4487 AtomicOrdering FenceOrdering = 4488 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1)); 4489 SyncScope::ID FenceSSID = 4490 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2)); 4491 4492 // The only fence that needs an instruction is a sequentially-consistent 4493 // cross-thread fence. 4494 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent && 4495 FenceSSID == SyncScope::System) { 4496 return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other, 4497 Op.getOperand(0)), 4498 0); 4499 } 4500 4501 // MEMBARRIER is a compiler barrier; it codegens to a no-op. 4502 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0)); 4503 } 4504 4505 // Op is an atomic load. Lower it into a normal volatile load. 4506 SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op, 4507 SelectionDAG &DAG) const { 4508 auto *Node = cast<AtomicSDNode>(Op.getNode()); 4509 if (Node->getMemoryVT() == MVT::i128) { 4510 // Use same code to handle both legal and non-legal i128 types. 4511 SmallVector<SDValue, 2> Results; 4512 LowerOperationWrapper(Node, Results, DAG); 4513 return DAG.getMergeValues(Results, SDLoc(Op)); 4514 } 4515 return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), Op.getValueType(), 4516 Node->getChain(), Node->getBasePtr(), 4517 Node->getMemoryVT(), Node->getMemOperand()); 4518 } 4519 4520 // Op is an atomic store. Lower it into a normal volatile store. 4521 SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op, 4522 SelectionDAG &DAG) const { 4523 auto *Node = cast<AtomicSDNode>(Op.getNode()); 4524 if (Node->getMemoryVT() == MVT::i128) { 4525 // Use same code to handle both legal and non-legal i128 types. 4526 SmallVector<SDValue, 1> Results; 4527 LowerOperationWrapper(Node, Results, DAG); 4528 return DAG.getMergeValues(Results, SDLoc(Op)); 4529 } 4530 SDValue Chain = DAG.getTruncStore(Node->getChain(), SDLoc(Op), Node->getVal(), 4531 Node->getBasePtr(), Node->getMemoryVT(), 4532 Node->getMemOperand()); 4533 // We have to enforce sequential consistency by performing a 4534 // serialization operation after the store. 4535 if (Node->getSuccessOrdering() == AtomicOrdering::SequentiallyConsistent) 4536 Chain = SDValue(DAG.getMachineNode(SystemZ::Serialize, SDLoc(Op), 4537 MVT::Other, Chain), 0); 4538 return Chain; 4539 } 4540 4541 // Prepare for a Compare And Swap for a subword operation. This needs to be 4542 // done in memory with 4 bytes at natural alignment. 4543 static void getCSAddressAndShifts(SDValue Addr, SelectionDAG &DAG, SDLoc DL, 4544 SDValue &AlignedAddr, SDValue &BitShift, 4545 SDValue &NegBitShift) { 4546 EVT PtrVT = Addr.getValueType(); 4547 EVT WideVT = MVT::i32; 4548 4549 // Get the address of the containing word. 4550 AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr, 4551 DAG.getConstant(-4, DL, PtrVT)); 4552 4553 // Get the number of bits that the word must be rotated left in order 4554 // to bring the field to the top bits of a GR32. 4555 BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr, 4556 DAG.getConstant(3, DL, PtrVT)); 4557 BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift); 4558 4559 // Get the complementing shift amount, for rotating a field in the top 4560 // bits back to its proper position. 4561 NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT, 4562 DAG.getConstant(0, DL, WideVT), BitShift); 4563 4564 } 4565 4566 // Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first 4567 // two into the fullword ATOMIC_LOADW_* operation given by Opcode. 4568 SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op, 4569 SelectionDAG &DAG, 4570 unsigned Opcode) const { 4571 auto *Node = cast<AtomicSDNode>(Op.getNode()); 4572 4573 // 32-bit operations need no special handling. 4574 EVT NarrowVT = Node->getMemoryVT(); 4575 EVT WideVT = MVT::i32; 4576 if (NarrowVT == WideVT) 4577 return Op; 4578 4579 int64_t BitSize = NarrowVT.getSizeInBits(); 4580 SDValue ChainIn = Node->getChain(); 4581 SDValue Addr = Node->getBasePtr(); 4582 SDValue Src2 = Node->getVal(); 4583 MachineMemOperand *MMO = Node->getMemOperand(); 4584 SDLoc DL(Node); 4585 4586 // Convert atomic subtracts of constants into additions. 4587 if (Opcode == SystemZISD::ATOMIC_LOADW_SUB) 4588 if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) { 4589 Opcode = SystemZISD::ATOMIC_LOADW_ADD; 4590 Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType()); 4591 } 4592 4593 SDValue AlignedAddr, BitShift, NegBitShift; 4594 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift); 4595 4596 // Extend the source operand to 32 bits and prepare it for the inner loop. 4597 // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other 4598 // operations require the source to be shifted in advance. (This shift 4599 // can be folded if the source is constant.) For AND and NAND, the lower 4600 // bits must be set, while for other opcodes they should be left clear. 4601 if (Opcode != SystemZISD::ATOMIC_SWAPW) 4602 Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2, 4603 DAG.getConstant(32 - BitSize, DL, WideVT)); 4604 if (Opcode == SystemZISD::ATOMIC_LOADW_AND || 4605 Opcode == SystemZISD::ATOMIC_LOADW_NAND) 4606 Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2, 4607 DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT)); 4608 4609 // Construct the ATOMIC_LOADW_* node. 4610 SDVTList VTList = DAG.getVTList(WideVT, MVT::Other); 4611 SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift, 4612 DAG.getConstant(BitSize, DL, WideVT) }; 4613 SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops, 4614 NarrowVT, MMO); 4615 4616 // Rotate the result of the final CS so that the field is in the lower 4617 // bits of a GR32, then truncate it. 4618 SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift, 4619 DAG.getConstant(BitSize, DL, WideVT)); 4620 SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift); 4621 4622 SDValue RetOps[2] = { Result, AtomicOp.getValue(1) }; 4623 return DAG.getMergeValues(RetOps, DL); 4624 } 4625 4626 // Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations into 4627 // ATOMIC_LOADW_SUBs and convert 32- and 64-bit operations into additions. 4628 SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op, 4629 SelectionDAG &DAG) const { 4630 auto *Node = cast<AtomicSDNode>(Op.getNode()); 4631 EVT MemVT = Node->getMemoryVT(); 4632 if (MemVT == MVT::i32 || MemVT == MVT::i64) { 4633 // A full-width operation: negate and use LAA(G). 4634 assert(Op.getValueType() == MemVT && "Mismatched VTs"); 4635 assert(Subtarget.hasInterlockedAccess1() && 4636 "Should have been expanded by AtomicExpand pass."); 4637 SDValue Src2 = Node->getVal(); 4638 SDLoc DL(Src2); 4639 SDValue NegSrc2 = 4640 DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT), Src2); 4641 return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT, 4642 Node->getChain(), Node->getBasePtr(), NegSrc2, 4643 Node->getMemOperand()); 4644 } 4645 4646 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB); 4647 } 4648 4649 // Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node. 4650 SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op, 4651 SelectionDAG &DAG) const { 4652 auto *Node = cast<AtomicSDNode>(Op.getNode()); 4653 SDValue ChainIn = Node->getOperand(0); 4654 SDValue Addr = Node->getOperand(1); 4655 SDValue CmpVal = Node->getOperand(2); 4656 SDValue SwapVal = Node->getOperand(3); 4657 MachineMemOperand *MMO = Node->getMemOperand(); 4658 SDLoc DL(Node); 4659 4660 if (Node->getMemoryVT() == MVT::i128) { 4661 // Use same code to handle both legal and non-legal i128 types. 4662 SmallVector<SDValue, 3> Results; 4663 LowerOperationWrapper(Node, Results, DAG); 4664 return DAG.getMergeValues(Results, DL); 4665 } 4666 4667 // We have native support for 32-bit and 64-bit compare and swap, but we 4668 // still need to expand extracting the "success" result from the CC. 4669 EVT NarrowVT = Node->getMemoryVT(); 4670 EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32; 4671 if (NarrowVT == WideVT) { 4672 SDVTList Tys = DAG.getVTList(WideVT, MVT::i32, MVT::Other); 4673 SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal }; 4674 SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP, 4675 DL, Tys, Ops, NarrowVT, MMO); 4676 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1), 4677 SystemZ::CCMASK_CS, SystemZ::CCMASK_CS_EQ); 4678 4679 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0)); 4680 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success); 4681 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2)); 4682 return SDValue(); 4683 } 4684 4685 // Convert 8-bit and 16-bit compare and swap to a loop, implemented 4686 // via a fullword ATOMIC_CMP_SWAPW operation. 4687 int64_t BitSize = NarrowVT.getSizeInBits(); 4688 4689 SDValue AlignedAddr, BitShift, NegBitShift; 4690 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift); 4691 4692 // Construct the ATOMIC_CMP_SWAPW node. 4693 SDVTList VTList = DAG.getVTList(WideVT, MVT::i32, MVT::Other); 4694 SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift, 4695 NegBitShift, DAG.getConstant(BitSize, DL, WideVT) }; 4696 SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL, 4697 VTList, Ops, NarrowVT, MMO); 4698 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1), 4699 SystemZ::CCMASK_ICMP, SystemZ::CCMASK_CMP_EQ); 4700 4701 // emitAtomicCmpSwapW() will zero extend the result (original value). 4702 SDValue OrigVal = DAG.getNode(ISD::AssertZext, DL, WideVT, AtomicOp.getValue(0), 4703 DAG.getValueType(NarrowVT)); 4704 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), OrigVal); 4705 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success); 4706 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2)); 4707 return SDValue(); 4708 } 4709 4710 MachineMemOperand::Flags 4711 SystemZTargetLowering::getTargetMMOFlags(const Instruction &I) const { 4712 // Because of how we convert atomic_load and atomic_store to normal loads and 4713 // stores in the DAG, we need to ensure that the MMOs are marked volatile 4714 // since DAGCombine hasn't been updated to account for atomic, but non 4715 // volatile loads. (See D57601) 4716 if (auto *SI = dyn_cast<StoreInst>(&I)) 4717 if (SI->isAtomic()) 4718 return MachineMemOperand::MOVolatile; 4719 if (auto *LI = dyn_cast<LoadInst>(&I)) 4720 if (LI->isAtomic()) 4721 return MachineMemOperand::MOVolatile; 4722 if (auto *AI = dyn_cast<AtomicRMWInst>(&I)) 4723 if (AI->isAtomic()) 4724 return MachineMemOperand::MOVolatile; 4725 if (auto *AI = dyn_cast<AtomicCmpXchgInst>(&I)) 4726 if (AI->isAtomic()) 4727 return MachineMemOperand::MOVolatile; 4728 return MachineMemOperand::MONone; 4729 } 4730 4731 SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op, 4732 SelectionDAG &DAG) const { 4733 MachineFunction &MF = DAG.getMachineFunction(); 4734 auto *Regs = Subtarget.getSpecialRegisters(); 4735 if (MF.getFunction().getCallingConv() == CallingConv::GHC) 4736 report_fatal_error("Variable-sized stack allocations are not supported " 4737 "in GHC calling convention"); 4738 return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op), 4739 Regs->getStackPointerRegister(), Op.getValueType()); 4740 } 4741 4742 SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op, 4743 SelectionDAG &DAG) const { 4744 MachineFunction &MF = DAG.getMachineFunction(); 4745 auto *Regs = Subtarget.getSpecialRegisters(); 4746 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain(); 4747 4748 if (MF.getFunction().getCallingConv() == CallingConv::GHC) 4749 report_fatal_error("Variable-sized stack allocations are not supported " 4750 "in GHC calling convention"); 4751 4752 SDValue Chain = Op.getOperand(0); 4753 SDValue NewSP = Op.getOperand(1); 4754 SDValue Backchain; 4755 SDLoc DL(Op); 4756 4757 if (StoreBackchain) { 4758 SDValue OldSP = DAG.getCopyFromReg( 4759 Chain, DL, Regs->getStackPointerRegister(), MVT::i64); 4760 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG), 4761 MachinePointerInfo()); 4762 } 4763 4764 Chain = DAG.getCopyToReg(Chain, DL, Regs->getStackPointerRegister(), NewSP); 4765 4766 if (StoreBackchain) 4767 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG), 4768 MachinePointerInfo()); 4769 4770 return Chain; 4771 } 4772 4773 SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op, 4774 SelectionDAG &DAG) const { 4775 bool IsData = Op.getConstantOperandVal(4); 4776 if (!IsData) 4777 // Just preserve the chain. 4778 return Op.getOperand(0); 4779 4780 SDLoc DL(Op); 4781 bool IsWrite = Op.getConstantOperandVal(2); 4782 unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ; 4783 auto *Node = cast<MemIntrinsicSDNode>(Op.getNode()); 4784 SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32), 4785 Op.getOperand(1)}; 4786 return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, DL, 4787 Node->getVTList(), Ops, 4788 Node->getMemoryVT(), Node->getMemOperand()); 4789 } 4790 4791 // Convert condition code in CCReg to an i32 value. 4792 static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg) { 4793 SDLoc DL(CCReg); 4794 SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg); 4795 return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM, 4796 DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32)); 4797 } 4798 4799 SDValue 4800 SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, 4801 SelectionDAG &DAG) const { 4802 unsigned Opcode, CCValid; 4803 if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) { 4804 assert(Op->getNumValues() == 2 && "Expected only CC result and chain"); 4805 SDNode *Node = emitIntrinsicWithCCAndChain(DAG, Op, Opcode); 4806 SDValue CC = getCCResult(DAG, SDValue(Node, 0)); 4807 DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC); 4808 return SDValue(); 4809 } 4810 4811 return SDValue(); 4812 } 4813 4814 SDValue 4815 SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, 4816 SelectionDAG &DAG) const { 4817 unsigned Opcode, CCValid; 4818 if (isIntrinsicWithCC(Op, Opcode, CCValid)) { 4819 SDNode *Node = emitIntrinsicWithCC(DAG, Op, Opcode); 4820 if (Op->getNumValues() == 1) 4821 return getCCResult(DAG, SDValue(Node, 0)); 4822 assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result"); 4823 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(), 4824 SDValue(Node, 0), getCCResult(DAG, SDValue(Node, 1))); 4825 } 4826 4827 unsigned Id = Op.getConstantOperandVal(0); 4828 switch (Id) { 4829 case Intrinsic::thread_pointer: 4830 return lowerThreadPointer(SDLoc(Op), DAG); 4831 4832 case Intrinsic::s390_vpdi: 4833 return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(), 4834 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); 4835 4836 case Intrinsic::s390_vperm: 4837 return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(), 4838 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); 4839 4840 case Intrinsic::s390_vuphb: 4841 case Intrinsic::s390_vuphh: 4842 case Intrinsic::s390_vuphf: 4843 return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(), 4844 Op.getOperand(1)); 4845 4846 case Intrinsic::s390_vuplhb: 4847 case Intrinsic::s390_vuplhh: 4848 case Intrinsic::s390_vuplhf: 4849 return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(), 4850 Op.getOperand(1)); 4851 4852 case Intrinsic::s390_vuplb: 4853 case Intrinsic::s390_vuplhw: 4854 case Intrinsic::s390_vuplf: 4855 return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(), 4856 Op.getOperand(1)); 4857 4858 case Intrinsic::s390_vupllb: 4859 case Intrinsic::s390_vupllh: 4860 case Intrinsic::s390_vupllf: 4861 return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(), 4862 Op.getOperand(1)); 4863 4864 case Intrinsic::s390_vsumb: 4865 case Intrinsic::s390_vsumh: 4866 case Intrinsic::s390_vsumgh: 4867 case Intrinsic::s390_vsumgf: 4868 case Intrinsic::s390_vsumqf: 4869 case Intrinsic::s390_vsumqg: 4870 return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(), 4871 Op.getOperand(1), Op.getOperand(2)); 4872 4873 case Intrinsic::s390_vaq: 4874 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(), 4875 Op.getOperand(1), Op.getOperand(2)); 4876 case Intrinsic::s390_vaccb: 4877 case Intrinsic::s390_vacch: 4878 case Intrinsic::s390_vaccf: 4879 case Intrinsic::s390_vaccg: 4880 case Intrinsic::s390_vaccq: 4881 return DAG.getNode(SystemZISD::VACC, SDLoc(Op), Op.getValueType(), 4882 Op.getOperand(1), Op.getOperand(2)); 4883 case Intrinsic::s390_vacq: 4884 return DAG.getNode(SystemZISD::VAC, SDLoc(Op), Op.getValueType(), 4885 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); 4886 case Intrinsic::s390_vacccq: 4887 return DAG.getNode(SystemZISD::VACCC, SDLoc(Op), Op.getValueType(), 4888 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); 4889 4890 case Intrinsic::s390_vsq: 4891 return DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(), 4892 Op.getOperand(1), Op.getOperand(2)); 4893 case Intrinsic::s390_vscbib: 4894 case Intrinsic::s390_vscbih: 4895 case Intrinsic::s390_vscbif: 4896 case Intrinsic::s390_vscbig: 4897 case Intrinsic::s390_vscbiq: 4898 return DAG.getNode(SystemZISD::VSCBI, SDLoc(Op), Op.getValueType(), 4899 Op.getOperand(1), Op.getOperand(2)); 4900 case Intrinsic::s390_vsbiq: 4901 return DAG.getNode(SystemZISD::VSBI, SDLoc(Op), Op.getValueType(), 4902 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); 4903 case Intrinsic::s390_vsbcbiq: 4904 return DAG.getNode(SystemZISD::VSBCBI, SDLoc(Op), Op.getValueType(), 4905 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); 4906 } 4907 4908 return SDValue(); 4909 } 4910 4911 namespace { 4912 // Says that SystemZISD operation Opcode can be used to perform the equivalent 4913 // of a VPERM with permute vector Bytes. If Opcode takes three operands, 4914 // Operand is the constant third operand, otherwise it is the number of 4915 // bytes in each element of the result. 4916 struct Permute { 4917 unsigned Opcode; 4918 unsigned Operand; 4919 unsigned char Bytes[SystemZ::VectorBytes]; 4920 }; 4921 } 4922 4923 static const Permute PermuteForms[] = { 4924 // VMRHG 4925 { SystemZISD::MERGE_HIGH, 8, 4926 { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } }, 4927 // VMRHF 4928 { SystemZISD::MERGE_HIGH, 4, 4929 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } }, 4930 // VMRHH 4931 { SystemZISD::MERGE_HIGH, 2, 4932 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } }, 4933 // VMRHB 4934 { SystemZISD::MERGE_HIGH, 1, 4935 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } }, 4936 // VMRLG 4937 { SystemZISD::MERGE_LOW, 8, 4938 { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } }, 4939 // VMRLF 4940 { SystemZISD::MERGE_LOW, 4, 4941 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } }, 4942 // VMRLH 4943 { SystemZISD::MERGE_LOW, 2, 4944 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } }, 4945 // VMRLB 4946 { SystemZISD::MERGE_LOW, 1, 4947 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } }, 4948 // VPKG 4949 { SystemZISD::PACK, 4, 4950 { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } }, 4951 // VPKF 4952 { SystemZISD::PACK, 2, 4953 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } }, 4954 // VPKH 4955 { SystemZISD::PACK, 1, 4956 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } }, 4957 // VPDI V1, V2, 4 (low half of V1, high half of V2) 4958 { SystemZISD::PERMUTE_DWORDS, 4, 4959 { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } }, 4960 // VPDI V1, V2, 1 (high half of V1, low half of V2) 4961 { SystemZISD::PERMUTE_DWORDS, 1, 4962 { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } } 4963 }; 4964 4965 // Called after matching a vector shuffle against a particular pattern. 4966 // Both the original shuffle and the pattern have two vector operands. 4967 // OpNos[0] is the operand of the original shuffle that should be used for 4968 // operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything. 4969 // OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and 4970 // set OpNo0 and OpNo1 to the shuffle operands that should actually be used 4971 // for operands 0 and 1 of the pattern. 4972 static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) { 4973 if (OpNos[0] < 0) { 4974 if (OpNos[1] < 0) 4975 return false; 4976 OpNo0 = OpNo1 = OpNos[1]; 4977 } else if (OpNos[1] < 0) { 4978 OpNo0 = OpNo1 = OpNos[0]; 4979 } else { 4980 OpNo0 = OpNos[0]; 4981 OpNo1 = OpNos[1]; 4982 } 4983 return true; 4984 } 4985 4986 // Bytes is a VPERM-like permute vector, except that -1 is used for 4987 // undefined bytes. Return true if the VPERM can be implemented using P. 4988 // When returning true set OpNo0 to the VPERM operand that should be 4989 // used for operand 0 of P and likewise OpNo1 for operand 1 of P. 4990 // 4991 // For example, if swapping the VPERM operands allows P to match, OpNo0 4992 // will be 1 and OpNo1 will be 0. If instead Bytes only refers to one 4993 // operand, but rewriting it to use two duplicated operands allows it to 4994 // match P, then OpNo0 and OpNo1 will be the same. 4995 static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P, 4996 unsigned &OpNo0, unsigned &OpNo1) { 4997 int OpNos[] = { -1, -1 }; 4998 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) { 4999 int Elt = Bytes[I]; 5000 if (Elt >= 0) { 5001 // Make sure that the two permute vectors use the same suboperand 5002 // byte number. Only the operand numbers (the high bits) are 5003 // allowed to differ. 5004 if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1)) 5005 return false; 5006 int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes; 5007 int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes; 5008 // Make sure that the operand mappings are consistent with previous 5009 // elements. 5010 if (OpNos[ModelOpNo] == 1 - RealOpNo) 5011 return false; 5012 OpNos[ModelOpNo] = RealOpNo; 5013 } 5014 } 5015 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1); 5016 } 5017 5018 // As above, but search for a matching permute. 5019 static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes, 5020 unsigned &OpNo0, unsigned &OpNo1) { 5021 for (auto &P : PermuteForms) 5022 if (matchPermute(Bytes, P, OpNo0, OpNo1)) 5023 return &P; 5024 return nullptr; 5025 } 5026 5027 // Bytes is a VPERM-like permute vector, except that -1 is used for 5028 // undefined bytes. This permute is an operand of an outer permute. 5029 // See whether redistributing the -1 bytes gives a shuffle that can be 5030 // implemented using P. If so, set Transform to a VPERM-like permute vector 5031 // that, when applied to the result of P, gives the original permute in Bytes. 5032 static bool matchDoublePermute(const SmallVectorImpl<int> &Bytes, 5033 const Permute &P, 5034 SmallVectorImpl<int> &Transform) { 5035 unsigned To = 0; 5036 for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) { 5037 int Elt = Bytes[From]; 5038 if (Elt < 0) 5039 // Byte number From of the result is undefined. 5040 Transform[From] = -1; 5041 else { 5042 while (P.Bytes[To] != Elt) { 5043 To += 1; 5044 if (To == SystemZ::VectorBytes) 5045 return false; 5046 } 5047 Transform[From] = To; 5048 } 5049 } 5050 return true; 5051 } 5052 5053 // As above, but search for a matching permute. 5054 static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes, 5055 SmallVectorImpl<int> &Transform) { 5056 for (auto &P : PermuteForms) 5057 if (matchDoublePermute(Bytes, P, Transform)) 5058 return &P; 5059 return nullptr; 5060 } 5061 5062 // Convert the mask of the given shuffle op into a byte-level mask, 5063 // as if it had type vNi8. 5064 static bool getVPermMask(SDValue ShuffleOp, 5065 SmallVectorImpl<int> &Bytes) { 5066 EVT VT = ShuffleOp.getValueType(); 5067 unsigned NumElements = VT.getVectorNumElements(); 5068 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize(); 5069 5070 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(ShuffleOp)) { 5071 Bytes.resize(NumElements * BytesPerElement, -1); 5072 for (unsigned I = 0; I < NumElements; ++I) { 5073 int Index = VSN->getMaskElt(I); 5074 if (Index >= 0) 5075 for (unsigned J = 0; J < BytesPerElement; ++J) 5076 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J; 5077 } 5078 return true; 5079 } 5080 if (SystemZISD::SPLAT == ShuffleOp.getOpcode() && 5081 isa<ConstantSDNode>(ShuffleOp.getOperand(1))) { 5082 unsigned Index = ShuffleOp.getConstantOperandVal(1); 5083 Bytes.resize(NumElements * BytesPerElement, -1); 5084 for (unsigned I = 0; I < NumElements; ++I) 5085 for (unsigned J = 0; J < BytesPerElement; ++J) 5086 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J; 5087 return true; 5088 } 5089 return false; 5090 } 5091 5092 // Bytes is a VPERM-like permute vector, except that -1 is used for 5093 // undefined bytes. See whether bytes [Start, Start + BytesPerElement) of 5094 // the result come from a contiguous sequence of bytes from one input. 5095 // Set Base to the selector for the first byte if so. 5096 static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start, 5097 unsigned BytesPerElement, int &Base) { 5098 Base = -1; 5099 for (unsigned I = 0; I < BytesPerElement; ++I) { 5100 if (Bytes[Start + I] >= 0) { 5101 unsigned Elem = Bytes[Start + I]; 5102 if (Base < 0) { 5103 Base = Elem - I; 5104 // Make sure the bytes would come from one input operand. 5105 if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size()) 5106 return false; 5107 } else if (unsigned(Base) != Elem - I) 5108 return false; 5109 } 5110 } 5111 return true; 5112 } 5113 5114 // Bytes is a VPERM-like permute vector, except that -1 is used for 5115 // undefined bytes. Return true if it can be performed using VSLDB. 5116 // When returning true, set StartIndex to the shift amount and OpNo0 5117 // and OpNo1 to the VPERM operands that should be used as the first 5118 // and second shift operand respectively. 5119 static bool isShlDoublePermute(const SmallVectorImpl<int> &Bytes, 5120 unsigned &StartIndex, unsigned &OpNo0, 5121 unsigned &OpNo1) { 5122 int OpNos[] = { -1, -1 }; 5123 int Shift = -1; 5124 for (unsigned I = 0; I < 16; ++I) { 5125 int Index = Bytes[I]; 5126 if (Index >= 0) { 5127 int ExpectedShift = (Index - I) % SystemZ::VectorBytes; 5128 int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes; 5129 int RealOpNo = unsigned(Index) / SystemZ::VectorBytes; 5130 if (Shift < 0) 5131 Shift = ExpectedShift; 5132 else if (Shift != ExpectedShift) 5133 return false; 5134 // Make sure that the operand mappings are consistent with previous 5135 // elements. 5136 if (OpNos[ModelOpNo] == 1 - RealOpNo) 5137 return false; 5138 OpNos[ModelOpNo] = RealOpNo; 5139 } 5140 } 5141 StartIndex = Shift; 5142 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1); 5143 } 5144 5145 // Create a node that performs P on operands Op0 and Op1, casting the 5146 // operands to the appropriate type. The type of the result is determined by P. 5147 static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL, 5148 const Permute &P, SDValue Op0, SDValue Op1) { 5149 // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input 5150 // elements of a PACK are twice as wide as the outputs. 5151 unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 : 5152 P.Opcode == SystemZISD::PACK ? P.Operand * 2 : 5153 P.Operand); 5154 // Cast both operands to the appropriate type. 5155 MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8), 5156 SystemZ::VectorBytes / InBytes); 5157 Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0); 5158 Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1); 5159 SDValue Op; 5160 if (P.Opcode == SystemZISD::PERMUTE_DWORDS) { 5161 SDValue Op2 = DAG.getTargetConstant(P.Operand, DL, MVT::i32); 5162 Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2); 5163 } else if (P.Opcode == SystemZISD::PACK) { 5164 MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8), 5165 SystemZ::VectorBytes / P.Operand); 5166 Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1); 5167 } else { 5168 Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1); 5169 } 5170 return Op; 5171 } 5172 5173 static bool isZeroVector(SDValue N) { 5174 if (N->getOpcode() == ISD::BITCAST) 5175 N = N->getOperand(0); 5176 if (N->getOpcode() == ISD::SPLAT_VECTOR) 5177 if (auto *Op = dyn_cast<ConstantSDNode>(N->getOperand(0))) 5178 return Op->getZExtValue() == 0; 5179 return ISD::isBuildVectorAllZeros(N.getNode()); 5180 } 5181 5182 // Return the index of the zero/undef vector, or UINT32_MAX if not found. 5183 static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num) { 5184 for (unsigned I = 0; I < Num ; I++) 5185 if (isZeroVector(Ops[I])) 5186 return I; 5187 return UINT32_MAX; 5188 } 5189 5190 // Bytes is a VPERM-like permute vector, except that -1 is used for 5191 // undefined bytes. Implement it on operands Ops[0] and Ops[1] using 5192 // VSLDB or VPERM. 5193 static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL, 5194 SDValue *Ops, 5195 const SmallVectorImpl<int> &Bytes) { 5196 for (unsigned I = 0; I < 2; ++I) 5197 Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]); 5198 5199 // First see whether VSLDB can be used. 5200 unsigned StartIndex, OpNo0, OpNo1; 5201 if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1)) 5202 return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0], 5203 Ops[OpNo1], 5204 DAG.getTargetConstant(StartIndex, DL, MVT::i32)); 5205 5206 // Fall back on VPERM. Construct an SDNode for the permute vector. Try to 5207 // eliminate a zero vector by reusing any zero index in the permute vector. 5208 unsigned ZeroVecIdx = findZeroVectorIdx(&Ops[0], 2); 5209 if (ZeroVecIdx != UINT32_MAX) { 5210 bool MaskFirst = true; 5211 int ZeroIdx = -1; 5212 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) { 5213 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes; 5214 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes; 5215 if (OpNo == ZeroVecIdx && I == 0) { 5216 // If the first byte is zero, use mask as first operand. 5217 ZeroIdx = 0; 5218 break; 5219 } 5220 if (OpNo != ZeroVecIdx && Byte == 0) { 5221 // If mask contains a zero, use it by placing that vector first. 5222 ZeroIdx = I + SystemZ::VectorBytes; 5223 MaskFirst = false; 5224 break; 5225 } 5226 } 5227 if (ZeroIdx != -1) { 5228 SDValue IndexNodes[SystemZ::VectorBytes]; 5229 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) { 5230 if (Bytes[I] >= 0) { 5231 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes; 5232 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes; 5233 if (OpNo == ZeroVecIdx) 5234 IndexNodes[I] = DAG.getConstant(ZeroIdx, DL, MVT::i32); 5235 else { 5236 unsigned BIdx = MaskFirst ? Byte + SystemZ::VectorBytes : Byte; 5237 IndexNodes[I] = DAG.getConstant(BIdx, DL, MVT::i32); 5238 } 5239 } else 5240 IndexNodes[I] = DAG.getUNDEF(MVT::i32); 5241 } 5242 SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes); 5243 SDValue Src = ZeroVecIdx == 0 ? Ops[1] : Ops[0]; 5244 if (MaskFirst) 5245 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Mask, Src, 5246 Mask); 5247 else 5248 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Src, Mask, 5249 Mask); 5250 } 5251 } 5252 5253 SDValue IndexNodes[SystemZ::VectorBytes]; 5254 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) 5255 if (Bytes[I] >= 0) 5256 IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32); 5257 else 5258 IndexNodes[I] = DAG.getUNDEF(MVT::i32); 5259 SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes); 5260 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], 5261 (!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2); 5262 } 5263 5264 namespace { 5265 // Describes a general N-operand vector shuffle. 5266 struct GeneralShuffle { 5267 GeneralShuffle(EVT vt) : VT(vt), UnpackFromEltSize(UINT_MAX) {} 5268 void addUndef(); 5269 bool add(SDValue, unsigned); 5270 SDValue getNode(SelectionDAG &, const SDLoc &); 5271 void tryPrepareForUnpack(); 5272 bool unpackWasPrepared() { return UnpackFromEltSize <= 4; } 5273 SDValue insertUnpackIfPrepared(SelectionDAG &DAG, const SDLoc &DL, SDValue Op); 5274 5275 // The operands of the shuffle. 5276 SmallVector<SDValue, SystemZ::VectorBytes> Ops; 5277 5278 // Index I is -1 if byte I of the result is undefined. Otherwise the 5279 // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand 5280 // Bytes[I] / SystemZ::VectorBytes. 5281 SmallVector<int, SystemZ::VectorBytes> Bytes; 5282 5283 // The type of the shuffle result. 5284 EVT VT; 5285 5286 // Holds a value of 1, 2 or 4 if a final unpack has been prepared for. 5287 unsigned UnpackFromEltSize; 5288 }; 5289 } 5290 5291 // Add an extra undefined element to the shuffle. 5292 void GeneralShuffle::addUndef() { 5293 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize(); 5294 for (unsigned I = 0; I < BytesPerElement; ++I) 5295 Bytes.push_back(-1); 5296 } 5297 5298 // Add an extra element to the shuffle, taking it from element Elem of Op. 5299 // A null Op indicates a vector input whose value will be calculated later; 5300 // there is at most one such input per shuffle and it always has the same 5301 // type as the result. Aborts and returns false if the source vector elements 5302 // of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per 5303 // LLVM they become implicitly extended, but this is rare and not optimized. 5304 bool GeneralShuffle::add(SDValue Op, unsigned Elem) { 5305 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize(); 5306 5307 // The source vector can have wider elements than the result, 5308 // either through an explicit TRUNCATE or because of type legalization. 5309 // We want the least significant part. 5310 EVT FromVT = Op.getNode() ? Op.getValueType() : VT; 5311 unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize(); 5312 5313 // Return false if the source elements are smaller than their destination 5314 // elements. 5315 if (FromBytesPerElement < BytesPerElement) 5316 return false; 5317 5318 unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes + 5319 (FromBytesPerElement - BytesPerElement)); 5320 5321 // Look through things like shuffles and bitcasts. 5322 while (Op.getNode()) { 5323 if (Op.getOpcode() == ISD::BITCAST) 5324 Op = Op.getOperand(0); 5325 else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) { 5326 // See whether the bytes we need come from a contiguous part of one 5327 // operand. 5328 SmallVector<int, SystemZ::VectorBytes> OpBytes; 5329 if (!getVPermMask(Op, OpBytes)) 5330 break; 5331 int NewByte; 5332 if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte)) 5333 break; 5334 if (NewByte < 0) { 5335 addUndef(); 5336 return true; 5337 } 5338 Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes); 5339 Byte = unsigned(NewByte) % SystemZ::VectorBytes; 5340 } else if (Op.isUndef()) { 5341 addUndef(); 5342 return true; 5343 } else 5344 break; 5345 } 5346 5347 // Make sure that the source of the extraction is in Ops. 5348 unsigned OpNo = 0; 5349 for (; OpNo < Ops.size(); ++OpNo) 5350 if (Ops[OpNo] == Op) 5351 break; 5352 if (OpNo == Ops.size()) 5353 Ops.push_back(Op); 5354 5355 // Add the element to Bytes. 5356 unsigned Base = OpNo * SystemZ::VectorBytes + Byte; 5357 for (unsigned I = 0; I < BytesPerElement; ++I) 5358 Bytes.push_back(Base + I); 5359 5360 return true; 5361 } 5362 5363 // Return SDNodes for the completed shuffle. 5364 SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) { 5365 assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector"); 5366 5367 if (Ops.size() == 0) 5368 return DAG.getUNDEF(VT); 5369 5370 // Use a single unpack if possible as the last operation. 5371 tryPrepareForUnpack(); 5372 5373 // Make sure that there are at least two shuffle operands. 5374 if (Ops.size() == 1) 5375 Ops.push_back(DAG.getUNDEF(MVT::v16i8)); 5376 5377 // Create a tree of shuffles, deferring root node until after the loop. 5378 // Try to redistribute the undefined elements of non-root nodes so that 5379 // the non-root shuffles match something like a pack or merge, then adjust 5380 // the parent node's permute vector to compensate for the new order. 5381 // Among other things, this copes with vectors like <2 x i16> that were 5382 // padded with undefined elements during type legalization. 5383 // 5384 // In the best case this redistribution will lead to the whole tree 5385 // using packs and merges. It should rarely be a loss in other cases. 5386 unsigned Stride = 1; 5387 for (; Stride * 2 < Ops.size(); Stride *= 2) { 5388 for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) { 5389 SDValue SubOps[] = { Ops[I], Ops[I + Stride] }; 5390 5391 // Create a mask for just these two operands. 5392 SmallVector<int, SystemZ::VectorBytes> NewBytes(SystemZ::VectorBytes); 5393 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) { 5394 unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes; 5395 unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes; 5396 if (OpNo == I) 5397 NewBytes[J] = Byte; 5398 else if (OpNo == I + Stride) 5399 NewBytes[J] = SystemZ::VectorBytes + Byte; 5400 else 5401 NewBytes[J] = -1; 5402 } 5403 // See if it would be better to reorganize NewMask to avoid using VPERM. 5404 SmallVector<int, SystemZ::VectorBytes> NewBytesMap(SystemZ::VectorBytes); 5405 if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) { 5406 Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]); 5407 // Applying NewBytesMap to Ops[I] gets back to NewBytes. 5408 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) { 5409 if (NewBytes[J] >= 0) { 5410 assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes && 5411 "Invalid double permute"); 5412 Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J]; 5413 } else 5414 assert(NewBytesMap[J] < 0 && "Invalid double permute"); 5415 } 5416 } else { 5417 // Just use NewBytes on the operands. 5418 Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes); 5419 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) 5420 if (NewBytes[J] >= 0) 5421 Bytes[J] = I * SystemZ::VectorBytes + J; 5422 } 5423 } 5424 } 5425 5426 // Now we just have 2 inputs. Put the second operand in Ops[1]. 5427 if (Stride > 1) { 5428 Ops[1] = Ops[Stride]; 5429 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) 5430 if (Bytes[I] >= int(SystemZ::VectorBytes)) 5431 Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes; 5432 } 5433 5434 // Look for an instruction that can do the permute without resorting 5435 // to VPERM. 5436 unsigned OpNo0, OpNo1; 5437 SDValue Op; 5438 if (unpackWasPrepared() && Ops[1].isUndef()) 5439 Op = Ops[0]; 5440 else if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1)) 5441 Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]); 5442 else 5443 Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes); 5444 5445 Op = insertUnpackIfPrepared(DAG, DL, Op); 5446 5447 return DAG.getNode(ISD::BITCAST, DL, VT, Op); 5448 } 5449 5450 #ifndef NDEBUG 5451 static void dumpBytes(const SmallVectorImpl<int> &Bytes, std::string Msg) { 5452 dbgs() << Msg.c_str() << " { "; 5453 for (unsigned i = 0; i < Bytes.size(); i++) 5454 dbgs() << Bytes[i] << " "; 5455 dbgs() << "}\n"; 5456 } 5457 #endif 5458 5459 // If the Bytes vector matches an unpack operation, prepare to do the unpack 5460 // after all else by removing the zero vector and the effect of the unpack on 5461 // Bytes. 5462 void GeneralShuffle::tryPrepareForUnpack() { 5463 uint32_t ZeroVecOpNo = findZeroVectorIdx(&Ops[0], Ops.size()); 5464 if (ZeroVecOpNo == UINT32_MAX || Ops.size() == 1) 5465 return; 5466 5467 // Only do this if removing the zero vector reduces the depth, otherwise 5468 // the critical path will increase with the final unpack. 5469 if (Ops.size() > 2 && 5470 Log2_32_Ceil(Ops.size()) == Log2_32_Ceil(Ops.size() - 1)) 5471 return; 5472 5473 // Find an unpack that would allow removing the zero vector from Ops. 5474 UnpackFromEltSize = 1; 5475 for (; UnpackFromEltSize <= 4; UnpackFromEltSize *= 2) { 5476 bool MatchUnpack = true; 5477 SmallVector<int, SystemZ::VectorBytes> SrcBytes; 5478 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes; Elt++) { 5479 unsigned ToEltSize = UnpackFromEltSize * 2; 5480 bool IsZextByte = (Elt % ToEltSize) < UnpackFromEltSize; 5481 if (!IsZextByte) 5482 SrcBytes.push_back(Bytes[Elt]); 5483 if (Bytes[Elt] != -1) { 5484 unsigned OpNo = unsigned(Bytes[Elt]) / SystemZ::VectorBytes; 5485 if (IsZextByte != (OpNo == ZeroVecOpNo)) { 5486 MatchUnpack = false; 5487 break; 5488 } 5489 } 5490 } 5491 if (MatchUnpack) { 5492 if (Ops.size() == 2) { 5493 // Don't use unpack if a single source operand needs rearrangement. 5494 for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++) 5495 if (SrcBytes[i] != -1 && SrcBytes[i] % 16 != int(i)) { 5496 UnpackFromEltSize = UINT_MAX; 5497 return; 5498 } 5499 } 5500 break; 5501 } 5502 } 5503 if (UnpackFromEltSize > 4) 5504 return; 5505 5506 LLVM_DEBUG(dbgs() << "Preparing for final unpack of element size " 5507 << UnpackFromEltSize << ". Zero vector is Op#" << ZeroVecOpNo 5508 << ".\n"; 5509 dumpBytes(Bytes, "Original Bytes vector:");); 5510 5511 // Apply the unpack in reverse to the Bytes array. 5512 unsigned B = 0; 5513 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) { 5514 Elt += UnpackFromEltSize; 5515 for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++) 5516 Bytes[B] = Bytes[Elt]; 5517 } 5518 while (B < SystemZ::VectorBytes) 5519 Bytes[B++] = -1; 5520 5521 // Remove the zero vector from Ops 5522 Ops.erase(&Ops[ZeroVecOpNo]); 5523 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) 5524 if (Bytes[I] >= 0) { 5525 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes; 5526 if (OpNo > ZeroVecOpNo) 5527 Bytes[I] -= SystemZ::VectorBytes; 5528 } 5529 5530 LLVM_DEBUG(dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:"); 5531 dbgs() << "\n";); 5532 } 5533 5534 SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG, 5535 const SDLoc &DL, 5536 SDValue Op) { 5537 if (!unpackWasPrepared()) 5538 return Op; 5539 unsigned InBits = UnpackFromEltSize * 8; 5540 EVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBits), 5541 SystemZ::VectorBits / InBits); 5542 SDValue PackedOp = DAG.getNode(ISD::BITCAST, DL, InVT, Op); 5543 unsigned OutBits = InBits * 2; 5544 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(OutBits), 5545 SystemZ::VectorBits / OutBits); 5546 return DAG.getNode(SystemZISD::UNPACKL_HIGH, DL, OutVT, PackedOp); 5547 } 5548 5549 // Return true if the given BUILD_VECTOR is a scalar-to-vector conversion. 5550 static bool isScalarToVector(SDValue Op) { 5551 for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I) 5552 if (!Op.getOperand(I).isUndef()) 5553 return false; 5554 return true; 5555 } 5556 5557 // Return a vector of type VT that contains Value in the first element. 5558 // The other elements don't matter. 5559 static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, 5560 SDValue Value) { 5561 // If we have a constant, replicate it to all elements and let the 5562 // BUILD_VECTOR lowering take care of it. 5563 if (Value.getOpcode() == ISD::Constant || 5564 Value.getOpcode() == ISD::ConstantFP) { 5565 SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Value); 5566 return DAG.getBuildVector(VT, DL, Ops); 5567 } 5568 if (Value.isUndef()) 5569 return DAG.getUNDEF(VT); 5570 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value); 5571 } 5572 5573 // Return a vector of type VT in which Op0 is in element 0 and Op1 is in 5574 // element 1. Used for cases in which replication is cheap. 5575 static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT, 5576 SDValue Op0, SDValue Op1) { 5577 if (Op0.isUndef()) { 5578 if (Op1.isUndef()) 5579 return DAG.getUNDEF(VT); 5580 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1); 5581 } 5582 if (Op1.isUndef()) 5583 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0); 5584 return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT, 5585 buildScalarToVector(DAG, DL, VT, Op0), 5586 buildScalarToVector(DAG, DL, VT, Op1)); 5587 } 5588 5589 // Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64 5590 // vector for them. 5591 static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0, 5592 SDValue Op1) { 5593 if (Op0.isUndef() && Op1.isUndef()) 5594 return DAG.getUNDEF(MVT::v2i64); 5595 // If one of the two inputs is undefined then replicate the other one, 5596 // in order to avoid using another register unnecessarily. 5597 if (Op0.isUndef()) 5598 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1); 5599 else if (Op1.isUndef()) 5600 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); 5601 else { 5602 Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); 5603 Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1); 5604 } 5605 return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1); 5606 } 5607 5608 // If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually 5609 // better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for 5610 // the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR 5611 // would benefit from this representation and return it if so. 5612 static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, 5613 BuildVectorSDNode *BVN) { 5614 EVT VT = BVN->getValueType(0); 5615 unsigned NumElements = VT.getVectorNumElements(); 5616 5617 // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation 5618 // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still 5619 // need a BUILD_VECTOR, add an additional placeholder operand for that 5620 // BUILD_VECTOR and store its operands in ResidueOps. 5621 GeneralShuffle GS(VT); 5622 SmallVector<SDValue, SystemZ::VectorBytes> ResidueOps; 5623 bool FoundOne = false; 5624 for (unsigned I = 0; I < NumElements; ++I) { 5625 SDValue Op = BVN->getOperand(I); 5626 if (Op.getOpcode() == ISD::TRUNCATE) 5627 Op = Op.getOperand(0); 5628 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 5629 Op.getOperand(1).getOpcode() == ISD::Constant) { 5630 unsigned Elem = Op.getConstantOperandVal(1); 5631 if (!GS.add(Op.getOperand(0), Elem)) 5632 return SDValue(); 5633 FoundOne = true; 5634 } else if (Op.isUndef()) { 5635 GS.addUndef(); 5636 } else { 5637 if (!GS.add(SDValue(), ResidueOps.size())) 5638 return SDValue(); 5639 ResidueOps.push_back(BVN->getOperand(I)); 5640 } 5641 } 5642 5643 // Nothing to do if there are no EXTRACT_VECTOR_ELTs. 5644 if (!FoundOne) 5645 return SDValue(); 5646 5647 // Create the BUILD_VECTOR for the remaining elements, if any. 5648 if (!ResidueOps.empty()) { 5649 while (ResidueOps.size() < NumElements) 5650 ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType())); 5651 for (auto &Op : GS.Ops) { 5652 if (!Op.getNode()) { 5653 Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps); 5654 break; 5655 } 5656 } 5657 } 5658 return GS.getNode(DAG, SDLoc(BVN)); 5659 } 5660 5661 bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const { 5662 if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Op)->isUnindexed()) 5663 return true; 5664 if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV) 5665 return true; 5666 return false; 5667 } 5668 5669 // Combine GPR scalar values Elems into a vector of type VT. 5670 SDValue 5671 SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, 5672 SmallVectorImpl<SDValue> &Elems) const { 5673 // See whether there is a single replicated value. 5674 SDValue Single; 5675 unsigned int NumElements = Elems.size(); 5676 unsigned int Count = 0; 5677 for (auto Elem : Elems) { 5678 if (!Elem.isUndef()) { 5679 if (!Single.getNode()) 5680 Single = Elem; 5681 else if (Elem != Single) { 5682 Single = SDValue(); 5683 break; 5684 } 5685 Count += 1; 5686 } 5687 } 5688 // There are three cases here: 5689 // 5690 // - if the only defined element is a loaded one, the best sequence 5691 // is a replicating load. 5692 // 5693 // - otherwise, if the only defined element is an i64 value, we will 5694 // end up with the same VLVGP sequence regardless of whether we short-cut 5695 // for replication or fall through to the later code. 5696 // 5697 // - otherwise, if the only defined element is an i32 or smaller value, 5698 // we would need 2 instructions to replicate it: VLVGP followed by VREPx. 5699 // This is only a win if the single defined element is used more than once. 5700 // In other cases we're better off using a single VLVGx. 5701 if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single))) 5702 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single); 5703 5704 // If all elements are loads, use VLREP/VLEs (below). 5705 bool AllLoads = true; 5706 for (auto Elem : Elems) 5707 if (!isVectorElementLoad(Elem)) { 5708 AllLoads = false; 5709 break; 5710 } 5711 5712 // The best way of building a v2i64 from two i64s is to use VLVGP. 5713 if (VT == MVT::v2i64 && !AllLoads) 5714 return joinDwords(DAG, DL, Elems[0], Elems[1]); 5715 5716 // Use a 64-bit merge high to combine two doubles. 5717 if (VT == MVT::v2f64 && !AllLoads) 5718 return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]); 5719 5720 // Build v4f32 values directly from the FPRs: 5721 // 5722 // <Axxx> <Bxxx> <Cxxxx> <Dxxx> 5723 // V V VMRHF 5724 // <ABxx> <CDxx> 5725 // V VMRHG 5726 // <ABCD> 5727 if (VT == MVT::v4f32 && !AllLoads) { 5728 SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]); 5729 SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]); 5730 // Avoid unnecessary undefs by reusing the other operand. 5731 if (Op01.isUndef()) 5732 Op01 = Op23; 5733 else if (Op23.isUndef()) 5734 Op23 = Op01; 5735 // Merging identical replications is a no-op. 5736 if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23) 5737 return Op01; 5738 Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01); 5739 Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23); 5740 SDValue Op = DAG.getNode(SystemZISD::MERGE_HIGH, 5741 DL, MVT::v2i64, Op01, Op23); 5742 return DAG.getNode(ISD::BITCAST, DL, VT, Op); 5743 } 5744 5745 // Collect the constant terms. 5746 SmallVector<SDValue, SystemZ::VectorBytes> Constants(NumElements, SDValue()); 5747 SmallVector<bool, SystemZ::VectorBytes> Done(NumElements, false); 5748 5749 unsigned NumConstants = 0; 5750 for (unsigned I = 0; I < NumElements; ++I) { 5751 SDValue Elem = Elems[I]; 5752 if (Elem.getOpcode() == ISD::Constant || 5753 Elem.getOpcode() == ISD::ConstantFP) { 5754 NumConstants += 1; 5755 Constants[I] = Elem; 5756 Done[I] = true; 5757 } 5758 } 5759 // If there was at least one constant, fill in the other elements of 5760 // Constants with undefs to get a full vector constant and use that 5761 // as the starting point. 5762 SDValue Result; 5763 SDValue ReplicatedVal; 5764 if (NumConstants > 0) { 5765 for (unsigned I = 0; I < NumElements; ++I) 5766 if (!Constants[I].getNode()) 5767 Constants[I] = DAG.getUNDEF(Elems[I].getValueType()); 5768 Result = DAG.getBuildVector(VT, DL, Constants); 5769 } else { 5770 // Otherwise try to use VLREP or VLVGP to start the sequence in order to 5771 // avoid a false dependency on any previous contents of the vector 5772 // register. 5773 5774 // Use a VLREP if at least one element is a load. Make sure to replicate 5775 // the load with the most elements having its value. 5776 std::map<const SDNode*, unsigned> UseCounts; 5777 SDNode *LoadMaxUses = nullptr; 5778 for (unsigned I = 0; I < NumElements; ++I) 5779 if (isVectorElementLoad(Elems[I])) { 5780 SDNode *Ld = Elems[I].getNode(); 5781 UseCounts[Ld]++; 5782 if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < UseCounts[Ld]) 5783 LoadMaxUses = Ld; 5784 } 5785 if (LoadMaxUses != nullptr) { 5786 ReplicatedVal = SDValue(LoadMaxUses, 0); 5787 Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, ReplicatedVal); 5788 } else { 5789 // Try to use VLVGP. 5790 unsigned I1 = NumElements / 2 - 1; 5791 unsigned I2 = NumElements - 1; 5792 bool Def1 = !Elems[I1].isUndef(); 5793 bool Def2 = !Elems[I2].isUndef(); 5794 if (Def1 || Def2) { 5795 SDValue Elem1 = Elems[Def1 ? I1 : I2]; 5796 SDValue Elem2 = Elems[Def2 ? I2 : I1]; 5797 Result = DAG.getNode(ISD::BITCAST, DL, VT, 5798 joinDwords(DAG, DL, Elem1, Elem2)); 5799 Done[I1] = true; 5800 Done[I2] = true; 5801 } else 5802 Result = DAG.getUNDEF(VT); 5803 } 5804 } 5805 5806 // Use VLVGx to insert the other elements. 5807 for (unsigned I = 0; I < NumElements; ++I) 5808 if (!Done[I] && !Elems[I].isUndef() && Elems[I] != ReplicatedVal) 5809 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I], 5810 DAG.getConstant(I, DL, MVT::i32)); 5811 return Result; 5812 } 5813 5814 SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op, 5815 SelectionDAG &DAG) const { 5816 auto *BVN = cast<BuildVectorSDNode>(Op.getNode()); 5817 SDLoc DL(Op); 5818 EVT VT = Op.getValueType(); 5819 5820 if (BVN->isConstant()) { 5821 if (SystemZVectorConstantInfo(BVN).isVectorConstantLegal(Subtarget)) 5822 return Op; 5823 5824 // Fall back to loading it from memory. 5825 return SDValue(); 5826 } 5827 5828 // See if we should use shuffles to construct the vector from other vectors. 5829 if (SDValue Res = tryBuildVectorShuffle(DAG, BVN)) 5830 return Res; 5831 5832 // Detect SCALAR_TO_VECTOR conversions. 5833 if (isOperationLegal(ISD::SCALAR_TO_VECTOR, VT) && isScalarToVector(Op)) 5834 return buildScalarToVector(DAG, DL, VT, Op.getOperand(0)); 5835 5836 // Otherwise use buildVector to build the vector up from GPRs. 5837 unsigned NumElements = Op.getNumOperands(); 5838 SmallVector<SDValue, SystemZ::VectorBytes> Ops(NumElements); 5839 for (unsigned I = 0; I < NumElements; ++I) 5840 Ops[I] = Op.getOperand(I); 5841 return buildVector(DAG, DL, VT, Ops); 5842 } 5843 5844 SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, 5845 SelectionDAG &DAG) const { 5846 auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode()); 5847 SDLoc DL(Op); 5848 EVT VT = Op.getValueType(); 5849 unsigned NumElements = VT.getVectorNumElements(); 5850 5851 if (VSN->isSplat()) { 5852 SDValue Op0 = Op.getOperand(0); 5853 unsigned Index = VSN->getSplatIndex(); 5854 assert(Index < VT.getVectorNumElements() && 5855 "Splat index should be defined and in first operand"); 5856 // See whether the value we're splatting is directly available as a scalar. 5857 if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) || 5858 Op0.getOpcode() == ISD::BUILD_VECTOR) 5859 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index)); 5860 // Otherwise keep it as a vector-to-vector operation. 5861 return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0), 5862 DAG.getTargetConstant(Index, DL, MVT::i32)); 5863 } 5864 5865 GeneralShuffle GS(VT); 5866 for (unsigned I = 0; I < NumElements; ++I) { 5867 int Elt = VSN->getMaskElt(I); 5868 if (Elt < 0) 5869 GS.addUndef(); 5870 else if (!GS.add(Op.getOperand(unsigned(Elt) / NumElements), 5871 unsigned(Elt) % NumElements)) 5872 return SDValue(); 5873 } 5874 return GS.getNode(DAG, SDLoc(VSN)); 5875 } 5876 5877 SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op, 5878 SelectionDAG &DAG) const { 5879 SDLoc DL(Op); 5880 // Just insert the scalar into element 0 of an undefined vector. 5881 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, 5882 Op.getValueType(), DAG.getUNDEF(Op.getValueType()), 5883 Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32)); 5884 } 5885 5886 SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, 5887 SelectionDAG &DAG) const { 5888 // Handle insertions of floating-point values. 5889 SDLoc DL(Op); 5890 SDValue Op0 = Op.getOperand(0); 5891 SDValue Op1 = Op.getOperand(1); 5892 SDValue Op2 = Op.getOperand(2); 5893 EVT VT = Op.getValueType(); 5894 5895 // Insertions into constant indices of a v2f64 can be done using VPDI. 5896 // However, if the inserted value is a bitcast or a constant then it's 5897 // better to use GPRs, as below. 5898 if (VT == MVT::v2f64 && 5899 Op1.getOpcode() != ISD::BITCAST && 5900 Op1.getOpcode() != ISD::ConstantFP && 5901 Op2.getOpcode() == ISD::Constant) { 5902 uint64_t Index = Op2->getAsZExtVal(); 5903 unsigned Mask = VT.getVectorNumElements() - 1; 5904 if (Index <= Mask) 5905 return Op; 5906 } 5907 5908 // Otherwise bitcast to the equivalent integer form and insert via a GPR. 5909 MVT IntVT = MVT::getIntegerVT(VT.getScalarSizeInBits()); 5910 MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements()); 5911 SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT, 5912 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), 5913 DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2); 5914 return DAG.getNode(ISD::BITCAST, DL, VT, Res); 5915 } 5916 5917 SDValue 5918 SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, 5919 SelectionDAG &DAG) const { 5920 // Handle extractions of floating-point values. 5921 SDLoc DL(Op); 5922 SDValue Op0 = Op.getOperand(0); 5923 SDValue Op1 = Op.getOperand(1); 5924 EVT VT = Op.getValueType(); 5925 EVT VecVT = Op0.getValueType(); 5926 5927 // Extractions of constant indices can be done directly. 5928 if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) { 5929 uint64_t Index = CIndexN->getZExtValue(); 5930 unsigned Mask = VecVT.getVectorNumElements() - 1; 5931 if (Index <= Mask) 5932 return Op; 5933 } 5934 5935 // Otherwise bitcast to the equivalent integer form and extract via a GPR. 5936 MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits()); 5937 MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements()); 5938 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT, 5939 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1); 5940 return DAG.getNode(ISD::BITCAST, DL, VT, Res); 5941 } 5942 5943 SDValue SystemZTargetLowering:: 5944 lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const { 5945 SDValue PackedOp = Op.getOperand(0); 5946 EVT OutVT = Op.getValueType(); 5947 EVT InVT = PackedOp.getValueType(); 5948 unsigned ToBits = OutVT.getScalarSizeInBits(); 5949 unsigned FromBits = InVT.getScalarSizeInBits(); 5950 do { 5951 FromBits *= 2; 5952 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits), 5953 SystemZ::VectorBits / FromBits); 5954 PackedOp = 5955 DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(PackedOp), OutVT, PackedOp); 5956 } while (FromBits != ToBits); 5957 return PackedOp; 5958 } 5959 5960 // Lower a ZERO_EXTEND_VECTOR_INREG to a vector shuffle with a zero vector. 5961 SDValue SystemZTargetLowering:: 5962 lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const { 5963 SDValue PackedOp = Op.getOperand(0); 5964 SDLoc DL(Op); 5965 EVT OutVT = Op.getValueType(); 5966 EVT InVT = PackedOp.getValueType(); 5967 unsigned InNumElts = InVT.getVectorNumElements(); 5968 unsigned OutNumElts = OutVT.getVectorNumElements(); 5969 unsigned NumInPerOut = InNumElts / OutNumElts; 5970 5971 SDValue ZeroVec = 5972 DAG.getSplatVector(InVT, DL, DAG.getConstant(0, DL, InVT.getScalarType())); 5973 5974 SmallVector<int, 16> Mask(InNumElts); 5975 unsigned ZeroVecElt = InNumElts; 5976 for (unsigned PackedElt = 0; PackedElt < OutNumElts; PackedElt++) { 5977 unsigned MaskElt = PackedElt * NumInPerOut; 5978 unsigned End = MaskElt + NumInPerOut - 1; 5979 for (; MaskElt < End; MaskElt++) 5980 Mask[MaskElt] = ZeroVecElt++; 5981 Mask[MaskElt] = PackedElt; 5982 } 5983 SDValue Shuf = DAG.getVectorShuffle(InVT, DL, PackedOp, ZeroVec, Mask); 5984 return DAG.getNode(ISD::BITCAST, DL, OutVT, Shuf); 5985 } 5986 5987 SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG, 5988 unsigned ByScalar) const { 5989 // Look for cases where a vector shift can use the *_BY_SCALAR form. 5990 SDValue Op0 = Op.getOperand(0); 5991 SDValue Op1 = Op.getOperand(1); 5992 SDLoc DL(Op); 5993 EVT VT = Op.getValueType(); 5994 unsigned ElemBitSize = VT.getScalarSizeInBits(); 5995 5996 // See whether the shift vector is a splat represented as BUILD_VECTOR. 5997 if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) { 5998 APInt SplatBits, SplatUndef; 5999 unsigned SplatBitSize; 6000 bool HasAnyUndefs; 6001 // Check for constant splats. Use ElemBitSize as the minimum element 6002 // width and reject splats that need wider elements. 6003 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 6004 ElemBitSize, true) && 6005 SplatBitSize == ElemBitSize) { 6006 SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff, 6007 DL, MVT::i32); 6008 return DAG.getNode(ByScalar, DL, VT, Op0, Shift); 6009 } 6010 // Check for variable splats. 6011 BitVector UndefElements; 6012 SDValue Splat = BVN->getSplatValue(&UndefElements); 6013 if (Splat) { 6014 // Since i32 is the smallest legal type, we either need a no-op 6015 // or a truncation. 6016 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat); 6017 return DAG.getNode(ByScalar, DL, VT, Op0, Shift); 6018 } 6019 } 6020 6021 // See whether the shift vector is a splat represented as SHUFFLE_VECTOR, 6022 // and the shift amount is directly available in a GPR. 6023 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) { 6024 if (VSN->isSplat()) { 6025 SDValue VSNOp0 = VSN->getOperand(0); 6026 unsigned Index = VSN->getSplatIndex(); 6027 assert(Index < VT.getVectorNumElements() && 6028 "Splat index should be defined and in first operand"); 6029 if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) || 6030 VSNOp0.getOpcode() == ISD::BUILD_VECTOR) { 6031 // Since i32 is the smallest legal type, we either need a no-op 6032 // or a truncation. 6033 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, 6034 VSNOp0.getOperand(Index)); 6035 return DAG.getNode(ByScalar, DL, VT, Op0, Shift); 6036 } 6037 } 6038 } 6039 6040 // Otherwise just treat the current form as legal. 6041 return Op; 6042 } 6043 6044 SDValue SystemZTargetLowering::lowerIS_FPCLASS(SDValue Op, 6045 SelectionDAG &DAG) const { 6046 SDLoc DL(Op); 6047 MVT ResultVT = Op.getSimpleValueType(); 6048 SDValue Arg = Op.getOperand(0); 6049 unsigned Check = Op.getConstantOperandVal(1); 6050 6051 unsigned TDCMask = 0; 6052 if (Check & fcSNan) 6053 TDCMask |= SystemZ::TDCMASK_SNAN_PLUS | SystemZ::TDCMASK_SNAN_MINUS; 6054 if (Check & fcQNan) 6055 TDCMask |= SystemZ::TDCMASK_QNAN_PLUS | SystemZ::TDCMASK_QNAN_MINUS; 6056 if (Check & fcPosInf) 6057 TDCMask |= SystemZ::TDCMASK_INFINITY_PLUS; 6058 if (Check & fcNegInf) 6059 TDCMask |= SystemZ::TDCMASK_INFINITY_MINUS; 6060 if (Check & fcPosNormal) 6061 TDCMask |= SystemZ::TDCMASK_NORMAL_PLUS; 6062 if (Check & fcNegNormal) 6063 TDCMask |= SystemZ::TDCMASK_NORMAL_MINUS; 6064 if (Check & fcPosSubnormal) 6065 TDCMask |= SystemZ::TDCMASK_SUBNORMAL_PLUS; 6066 if (Check & fcNegSubnormal) 6067 TDCMask |= SystemZ::TDCMASK_SUBNORMAL_MINUS; 6068 if (Check & fcPosZero) 6069 TDCMask |= SystemZ::TDCMASK_ZERO_PLUS; 6070 if (Check & fcNegZero) 6071 TDCMask |= SystemZ::TDCMASK_ZERO_MINUS; 6072 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, MVT::i64); 6073 6074 SDValue Intr = DAG.getNode(SystemZISD::TDC, DL, ResultVT, Arg, TDCMaskV); 6075 return getCCResult(DAG, Intr); 6076 } 6077 6078 SDValue SystemZTargetLowering::LowerOperation(SDValue Op, 6079 SelectionDAG &DAG) const { 6080 switch (Op.getOpcode()) { 6081 case ISD::FRAMEADDR: 6082 return lowerFRAMEADDR(Op, DAG); 6083 case ISD::RETURNADDR: 6084 return lowerRETURNADDR(Op, DAG); 6085 case ISD::BR_CC: 6086 return lowerBR_CC(Op, DAG); 6087 case ISD::SELECT_CC: 6088 return lowerSELECT_CC(Op, DAG); 6089 case ISD::SETCC: 6090 return lowerSETCC(Op, DAG); 6091 case ISD::STRICT_FSETCC: 6092 return lowerSTRICT_FSETCC(Op, DAG, false); 6093 case ISD::STRICT_FSETCCS: 6094 return lowerSTRICT_FSETCC(Op, DAG, true); 6095 case ISD::GlobalAddress: 6096 return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG); 6097 case ISD::GlobalTLSAddress: 6098 return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG); 6099 case ISD::BlockAddress: 6100 return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG); 6101 case ISD::JumpTable: 6102 return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG); 6103 case ISD::ConstantPool: 6104 return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG); 6105 case ISD::BITCAST: 6106 return lowerBITCAST(Op, DAG); 6107 case ISD::VASTART: 6108 return lowerVASTART(Op, DAG); 6109 case ISD::VACOPY: 6110 return lowerVACOPY(Op, DAG); 6111 case ISD::DYNAMIC_STACKALLOC: 6112 return lowerDYNAMIC_STACKALLOC(Op, DAG); 6113 case ISD::GET_DYNAMIC_AREA_OFFSET: 6114 return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG); 6115 case ISD::SMUL_LOHI: 6116 return lowerSMUL_LOHI(Op, DAG); 6117 case ISD::UMUL_LOHI: 6118 return lowerUMUL_LOHI(Op, DAG); 6119 case ISD::SDIVREM: 6120 return lowerSDIVREM(Op, DAG); 6121 case ISD::UDIVREM: 6122 return lowerUDIVREM(Op, DAG); 6123 case ISD::SADDO: 6124 case ISD::SSUBO: 6125 case ISD::UADDO: 6126 case ISD::USUBO: 6127 return lowerXALUO(Op, DAG); 6128 case ISD::UADDO_CARRY: 6129 case ISD::USUBO_CARRY: 6130 return lowerUADDSUBO_CARRY(Op, DAG); 6131 case ISD::OR: 6132 return lowerOR(Op, DAG); 6133 case ISD::CTPOP: 6134 return lowerCTPOP(Op, DAG); 6135 case ISD::ATOMIC_FENCE: 6136 return lowerATOMIC_FENCE(Op, DAG); 6137 case ISD::ATOMIC_SWAP: 6138 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW); 6139 case ISD::ATOMIC_STORE: 6140 return lowerATOMIC_STORE(Op, DAG); 6141 case ISD::ATOMIC_LOAD: 6142 return lowerATOMIC_LOAD(Op, DAG); 6143 case ISD::ATOMIC_LOAD_ADD: 6144 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD); 6145 case ISD::ATOMIC_LOAD_SUB: 6146 return lowerATOMIC_LOAD_SUB(Op, DAG); 6147 case ISD::ATOMIC_LOAD_AND: 6148 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND); 6149 case ISD::ATOMIC_LOAD_OR: 6150 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR); 6151 case ISD::ATOMIC_LOAD_XOR: 6152 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR); 6153 case ISD::ATOMIC_LOAD_NAND: 6154 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND); 6155 case ISD::ATOMIC_LOAD_MIN: 6156 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN); 6157 case ISD::ATOMIC_LOAD_MAX: 6158 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX); 6159 case ISD::ATOMIC_LOAD_UMIN: 6160 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN); 6161 case ISD::ATOMIC_LOAD_UMAX: 6162 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX); 6163 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: 6164 return lowerATOMIC_CMP_SWAP(Op, DAG); 6165 case ISD::STACKSAVE: 6166 return lowerSTACKSAVE(Op, DAG); 6167 case ISD::STACKRESTORE: 6168 return lowerSTACKRESTORE(Op, DAG); 6169 case ISD::PREFETCH: 6170 return lowerPREFETCH(Op, DAG); 6171 case ISD::INTRINSIC_W_CHAIN: 6172 return lowerINTRINSIC_W_CHAIN(Op, DAG); 6173 case ISD::INTRINSIC_WO_CHAIN: 6174 return lowerINTRINSIC_WO_CHAIN(Op, DAG); 6175 case ISD::BUILD_VECTOR: 6176 return lowerBUILD_VECTOR(Op, DAG); 6177 case ISD::VECTOR_SHUFFLE: 6178 return lowerVECTOR_SHUFFLE(Op, DAG); 6179 case ISD::SCALAR_TO_VECTOR: 6180 return lowerSCALAR_TO_VECTOR(Op, DAG); 6181 case ISD::INSERT_VECTOR_ELT: 6182 return lowerINSERT_VECTOR_ELT(Op, DAG); 6183 case ISD::EXTRACT_VECTOR_ELT: 6184 return lowerEXTRACT_VECTOR_ELT(Op, DAG); 6185 case ISD::SIGN_EXTEND_VECTOR_INREG: 6186 return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG); 6187 case ISD::ZERO_EXTEND_VECTOR_INREG: 6188 return lowerZERO_EXTEND_VECTOR_INREG(Op, DAG); 6189 case ISD::SHL: 6190 return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR); 6191 case ISD::SRL: 6192 return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR); 6193 case ISD::SRA: 6194 return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR); 6195 case ISD::ROTL: 6196 return lowerShift(Op, DAG, SystemZISD::VROTL_BY_SCALAR); 6197 case ISD::IS_FPCLASS: 6198 return lowerIS_FPCLASS(Op, DAG); 6199 case ISD::GET_ROUNDING: 6200 return lowerGET_ROUNDING(Op, DAG); 6201 default: 6202 llvm_unreachable("Unexpected node to lower"); 6203 } 6204 } 6205 6206 // Lower operations with invalid operand or result types (currently used 6207 // only for 128-bit integer types). 6208 void 6209 SystemZTargetLowering::LowerOperationWrapper(SDNode *N, 6210 SmallVectorImpl<SDValue> &Results, 6211 SelectionDAG &DAG) const { 6212 switch (N->getOpcode()) { 6213 case ISD::ATOMIC_LOAD: { 6214 SDLoc DL(N); 6215 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other); 6216 SDValue Ops[] = { N->getOperand(0), N->getOperand(1) }; 6217 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand(); 6218 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_LOAD_128, 6219 DL, Tys, Ops, MVT::i128, MMO); 6220 Results.push_back(lowerGR128ToI128(DAG, Res)); 6221 Results.push_back(Res.getValue(1)); 6222 break; 6223 } 6224 case ISD::ATOMIC_STORE: { 6225 SDLoc DL(N); 6226 SDVTList Tys = DAG.getVTList(MVT::Other); 6227 SDValue Ops[] = {N->getOperand(0), lowerI128ToGR128(DAG, N->getOperand(1)), 6228 N->getOperand(2)}; 6229 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand(); 6230 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_STORE_128, 6231 DL, Tys, Ops, MVT::i128, MMO); 6232 // We have to enforce sequential consistency by performing a 6233 // serialization operation after the store. 6234 if (cast<AtomicSDNode>(N)->getSuccessOrdering() == 6235 AtomicOrdering::SequentiallyConsistent) 6236 Res = SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, 6237 MVT::Other, Res), 0); 6238 Results.push_back(Res); 6239 break; 6240 } 6241 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: { 6242 SDLoc DL(N); 6243 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other); 6244 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), 6245 lowerI128ToGR128(DAG, N->getOperand(2)), 6246 lowerI128ToGR128(DAG, N->getOperand(3)) }; 6247 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand(); 6248 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP_128, 6249 DL, Tys, Ops, MVT::i128, MMO); 6250 SDValue Success = emitSETCC(DAG, DL, Res.getValue(1), 6251 SystemZ::CCMASK_CS, SystemZ::CCMASK_CS_EQ); 6252 Success = DAG.getZExtOrTrunc(Success, DL, N->getValueType(1)); 6253 Results.push_back(lowerGR128ToI128(DAG, Res)); 6254 Results.push_back(Success); 6255 Results.push_back(Res.getValue(2)); 6256 break; 6257 } 6258 case ISD::BITCAST: { 6259 SDValue Src = N->getOperand(0); 6260 if (N->getValueType(0) == MVT::i128 && Src.getValueType() == MVT::f128 && 6261 !useSoftFloat()) { 6262 SDLoc DL(N); 6263 SDValue Lo, Hi; 6264 if (getRepRegClassFor(MVT::f128) == &SystemZ::VR128BitRegClass) { 6265 SDValue VecBC = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Src); 6266 Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, VecBC, 6267 DAG.getConstant(1, DL, MVT::i32)); 6268 Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, VecBC, 6269 DAG.getConstant(0, DL, MVT::i32)); 6270 } else { 6271 assert(getRepRegClassFor(MVT::f128) == &SystemZ::FP128BitRegClass && 6272 "Unrecognized register class for f128."); 6273 SDValue LoFP = DAG.getTargetExtractSubreg(SystemZ::subreg_l64, 6274 DL, MVT::f64, Src); 6275 SDValue HiFP = DAG.getTargetExtractSubreg(SystemZ::subreg_h64, 6276 DL, MVT::f64, Src); 6277 Lo = DAG.getNode(ISD::BITCAST, DL, MVT::i64, LoFP); 6278 Hi = DAG.getNode(ISD::BITCAST, DL, MVT::i64, HiFP); 6279 } 6280 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi)); 6281 } 6282 break; 6283 } 6284 default: 6285 llvm_unreachable("Unexpected node to lower"); 6286 } 6287 } 6288 6289 void 6290 SystemZTargetLowering::ReplaceNodeResults(SDNode *N, 6291 SmallVectorImpl<SDValue> &Results, 6292 SelectionDAG &DAG) const { 6293 return LowerOperationWrapper(N, Results, DAG); 6294 } 6295 6296 const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { 6297 #define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME 6298 switch ((SystemZISD::NodeType)Opcode) { 6299 case SystemZISD::FIRST_NUMBER: break; 6300 OPCODE(RET_GLUE); 6301 OPCODE(CALL); 6302 OPCODE(SIBCALL); 6303 OPCODE(TLS_GDCALL); 6304 OPCODE(TLS_LDCALL); 6305 OPCODE(PCREL_WRAPPER); 6306 OPCODE(PCREL_OFFSET); 6307 OPCODE(ICMP); 6308 OPCODE(FCMP); 6309 OPCODE(STRICT_FCMP); 6310 OPCODE(STRICT_FCMPS); 6311 OPCODE(TM); 6312 OPCODE(BR_CCMASK); 6313 OPCODE(SELECT_CCMASK); 6314 OPCODE(ADJDYNALLOC); 6315 OPCODE(PROBED_ALLOCA); 6316 OPCODE(POPCNT); 6317 OPCODE(SMUL_LOHI); 6318 OPCODE(UMUL_LOHI); 6319 OPCODE(SDIVREM); 6320 OPCODE(UDIVREM); 6321 OPCODE(SADDO); 6322 OPCODE(SSUBO); 6323 OPCODE(UADDO); 6324 OPCODE(USUBO); 6325 OPCODE(ADDCARRY); 6326 OPCODE(SUBCARRY); 6327 OPCODE(GET_CCMASK); 6328 OPCODE(MVC); 6329 OPCODE(NC); 6330 OPCODE(OC); 6331 OPCODE(XC); 6332 OPCODE(CLC); 6333 OPCODE(MEMSET_MVC); 6334 OPCODE(STPCPY); 6335 OPCODE(STRCMP); 6336 OPCODE(SEARCH_STRING); 6337 OPCODE(IPM); 6338 OPCODE(TBEGIN); 6339 OPCODE(TBEGIN_NOFLOAT); 6340 OPCODE(TEND); 6341 OPCODE(BYTE_MASK); 6342 OPCODE(ROTATE_MASK); 6343 OPCODE(REPLICATE); 6344 OPCODE(JOIN_DWORDS); 6345 OPCODE(SPLAT); 6346 OPCODE(MERGE_HIGH); 6347 OPCODE(MERGE_LOW); 6348 OPCODE(SHL_DOUBLE); 6349 OPCODE(PERMUTE_DWORDS); 6350 OPCODE(PERMUTE); 6351 OPCODE(PACK); 6352 OPCODE(PACKS_CC); 6353 OPCODE(PACKLS_CC); 6354 OPCODE(UNPACK_HIGH); 6355 OPCODE(UNPACKL_HIGH); 6356 OPCODE(UNPACK_LOW); 6357 OPCODE(UNPACKL_LOW); 6358 OPCODE(VSHL_BY_SCALAR); 6359 OPCODE(VSRL_BY_SCALAR); 6360 OPCODE(VSRA_BY_SCALAR); 6361 OPCODE(VROTL_BY_SCALAR); 6362 OPCODE(VSUM); 6363 OPCODE(VACC); 6364 OPCODE(VSCBI); 6365 OPCODE(VAC); 6366 OPCODE(VSBI); 6367 OPCODE(VACCC); 6368 OPCODE(VSBCBI); 6369 OPCODE(VICMPE); 6370 OPCODE(VICMPH); 6371 OPCODE(VICMPHL); 6372 OPCODE(VICMPES); 6373 OPCODE(VICMPHS); 6374 OPCODE(VICMPHLS); 6375 OPCODE(VFCMPE); 6376 OPCODE(STRICT_VFCMPE); 6377 OPCODE(STRICT_VFCMPES); 6378 OPCODE(VFCMPH); 6379 OPCODE(STRICT_VFCMPH); 6380 OPCODE(STRICT_VFCMPHS); 6381 OPCODE(VFCMPHE); 6382 OPCODE(STRICT_VFCMPHE); 6383 OPCODE(STRICT_VFCMPHES); 6384 OPCODE(VFCMPES); 6385 OPCODE(VFCMPHS); 6386 OPCODE(VFCMPHES); 6387 OPCODE(VFTCI); 6388 OPCODE(VEXTEND); 6389 OPCODE(STRICT_VEXTEND); 6390 OPCODE(VROUND); 6391 OPCODE(STRICT_VROUND); 6392 OPCODE(VTM); 6393 OPCODE(SCMP128HI); 6394 OPCODE(UCMP128HI); 6395 OPCODE(VFAE_CC); 6396 OPCODE(VFAEZ_CC); 6397 OPCODE(VFEE_CC); 6398 OPCODE(VFEEZ_CC); 6399 OPCODE(VFENE_CC); 6400 OPCODE(VFENEZ_CC); 6401 OPCODE(VISTR_CC); 6402 OPCODE(VSTRC_CC); 6403 OPCODE(VSTRCZ_CC); 6404 OPCODE(VSTRS_CC); 6405 OPCODE(VSTRSZ_CC); 6406 OPCODE(TDC); 6407 OPCODE(ATOMIC_SWAPW); 6408 OPCODE(ATOMIC_LOADW_ADD); 6409 OPCODE(ATOMIC_LOADW_SUB); 6410 OPCODE(ATOMIC_LOADW_AND); 6411 OPCODE(ATOMIC_LOADW_OR); 6412 OPCODE(ATOMIC_LOADW_XOR); 6413 OPCODE(ATOMIC_LOADW_NAND); 6414 OPCODE(ATOMIC_LOADW_MIN); 6415 OPCODE(ATOMIC_LOADW_MAX); 6416 OPCODE(ATOMIC_LOADW_UMIN); 6417 OPCODE(ATOMIC_LOADW_UMAX); 6418 OPCODE(ATOMIC_CMP_SWAPW); 6419 OPCODE(ATOMIC_CMP_SWAP); 6420 OPCODE(ATOMIC_LOAD_128); 6421 OPCODE(ATOMIC_STORE_128); 6422 OPCODE(ATOMIC_CMP_SWAP_128); 6423 OPCODE(LRV); 6424 OPCODE(STRV); 6425 OPCODE(VLER); 6426 OPCODE(VSTER); 6427 OPCODE(PREFETCH); 6428 OPCODE(ADA_ENTRY); 6429 } 6430 return nullptr; 6431 #undef OPCODE 6432 } 6433 6434 // Return true if VT is a vector whose elements are a whole number of bytes 6435 // in width. Also check for presence of vector support. 6436 bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const { 6437 if (!Subtarget.hasVector()) 6438 return false; 6439 6440 return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0 && VT.isSimple(); 6441 } 6442 6443 // Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT 6444 // producing a result of type ResVT. Op is a possibly bitcast version 6445 // of the input vector and Index is the index (based on type VecVT) that 6446 // should be extracted. Return the new extraction if a simplification 6447 // was possible or if Force is true. 6448 SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT, 6449 EVT VecVT, SDValue Op, 6450 unsigned Index, 6451 DAGCombinerInfo &DCI, 6452 bool Force) const { 6453 SelectionDAG &DAG = DCI.DAG; 6454 6455 // The number of bytes being extracted. 6456 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize(); 6457 6458 for (;;) { 6459 unsigned Opcode = Op.getOpcode(); 6460 if (Opcode == ISD::BITCAST) 6461 // Look through bitcasts. 6462 Op = Op.getOperand(0); 6463 else if ((Opcode == ISD::VECTOR_SHUFFLE || Opcode == SystemZISD::SPLAT) && 6464 canTreatAsByteVector(Op.getValueType())) { 6465 // Get a VPERM-like permute mask and see whether the bytes covered 6466 // by the extracted element are a contiguous sequence from one 6467 // source operand. 6468 SmallVector<int, SystemZ::VectorBytes> Bytes; 6469 if (!getVPermMask(Op, Bytes)) 6470 break; 6471 int First; 6472 if (!getShuffleInput(Bytes, Index * BytesPerElement, 6473 BytesPerElement, First)) 6474 break; 6475 if (First < 0) 6476 return DAG.getUNDEF(ResVT); 6477 // Make sure the contiguous sequence starts at a multiple of the 6478 // original element size. 6479 unsigned Byte = unsigned(First) % Bytes.size(); 6480 if (Byte % BytesPerElement != 0) 6481 break; 6482 // We can get the extracted value directly from an input. 6483 Index = Byte / BytesPerElement; 6484 Op = Op.getOperand(unsigned(First) / Bytes.size()); 6485 Force = true; 6486 } else if (Opcode == ISD::BUILD_VECTOR && 6487 canTreatAsByteVector(Op.getValueType())) { 6488 // We can only optimize this case if the BUILD_VECTOR elements are 6489 // at least as wide as the extracted value. 6490 EVT OpVT = Op.getValueType(); 6491 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize(); 6492 if (OpBytesPerElement < BytesPerElement) 6493 break; 6494 // Make sure that the least-significant bit of the extracted value 6495 // is the least significant bit of an input. 6496 unsigned End = (Index + 1) * BytesPerElement; 6497 if (End % OpBytesPerElement != 0) 6498 break; 6499 // We're extracting the low part of one operand of the BUILD_VECTOR. 6500 Op = Op.getOperand(End / OpBytesPerElement - 1); 6501 if (!Op.getValueType().isInteger()) { 6502 EVT VT = MVT::getIntegerVT(Op.getValueSizeInBits()); 6503 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op); 6504 DCI.AddToWorklist(Op.getNode()); 6505 } 6506 EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits()); 6507 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op); 6508 if (VT != ResVT) { 6509 DCI.AddToWorklist(Op.getNode()); 6510 Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op); 6511 } 6512 return Op; 6513 } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || 6514 Opcode == ISD::ZERO_EXTEND_VECTOR_INREG || 6515 Opcode == ISD::ANY_EXTEND_VECTOR_INREG) && 6516 canTreatAsByteVector(Op.getValueType()) && 6517 canTreatAsByteVector(Op.getOperand(0).getValueType())) { 6518 // Make sure that only the unextended bits are significant. 6519 EVT ExtVT = Op.getValueType(); 6520 EVT OpVT = Op.getOperand(0).getValueType(); 6521 unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize(); 6522 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize(); 6523 unsigned Byte = Index * BytesPerElement; 6524 unsigned SubByte = Byte % ExtBytesPerElement; 6525 unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement; 6526 if (SubByte < MinSubByte || 6527 SubByte + BytesPerElement > ExtBytesPerElement) 6528 break; 6529 // Get the byte offset of the unextended element 6530 Byte = Byte / ExtBytesPerElement * OpBytesPerElement; 6531 // ...then add the byte offset relative to that element. 6532 Byte += SubByte - MinSubByte; 6533 if (Byte % BytesPerElement != 0) 6534 break; 6535 Op = Op.getOperand(0); 6536 Index = Byte / BytesPerElement; 6537 Force = true; 6538 } else 6539 break; 6540 } 6541 if (Force) { 6542 if (Op.getValueType() != VecVT) { 6543 Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op); 6544 DCI.AddToWorklist(Op.getNode()); 6545 } 6546 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op, 6547 DAG.getConstant(Index, DL, MVT::i32)); 6548 } 6549 return SDValue(); 6550 } 6551 6552 // Optimize vector operations in scalar value Op on the basis that Op 6553 // is truncated to TruncVT. 6554 SDValue SystemZTargetLowering::combineTruncateExtract( 6555 const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const { 6556 // If we have (trunc (extract_vector_elt X, Y)), try to turn it into 6557 // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements 6558 // of type TruncVT. 6559 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 6560 TruncVT.getSizeInBits() % 8 == 0) { 6561 SDValue Vec = Op.getOperand(0); 6562 EVT VecVT = Vec.getValueType(); 6563 if (canTreatAsByteVector(VecVT)) { 6564 if (auto *IndexN = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 6565 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize(); 6566 unsigned TruncBytes = TruncVT.getStoreSize(); 6567 if (BytesPerElement % TruncBytes == 0) { 6568 // Calculate the value of Y' in the above description. We are 6569 // splitting the original elements into Scale equal-sized pieces 6570 // and for truncation purposes want the last (least-significant) 6571 // of these pieces for IndexN. This is easiest to do by calculating 6572 // the start index of the following element and then subtracting 1. 6573 unsigned Scale = BytesPerElement / TruncBytes; 6574 unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1; 6575 6576 // Defer the creation of the bitcast from X to combineExtract, 6577 // which might be able to optimize the extraction. 6578 VecVT = MVT::getVectorVT(MVT::getIntegerVT(TruncBytes * 8), 6579 VecVT.getStoreSize() / TruncBytes); 6580 EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT); 6581 return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true); 6582 } 6583 } 6584 } 6585 } 6586 return SDValue(); 6587 } 6588 6589 SDValue SystemZTargetLowering::combineZERO_EXTEND( 6590 SDNode *N, DAGCombinerInfo &DCI) const { 6591 // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2') 6592 SelectionDAG &DAG = DCI.DAG; 6593 SDValue N0 = N->getOperand(0); 6594 EVT VT = N->getValueType(0); 6595 if (N0.getOpcode() == SystemZISD::SELECT_CCMASK) { 6596 auto *TrueOp = dyn_cast<ConstantSDNode>(N0.getOperand(0)); 6597 auto *FalseOp = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 6598 if (TrueOp && FalseOp) { 6599 SDLoc DL(N0); 6600 SDValue Ops[] = { DAG.getConstant(TrueOp->getZExtValue(), DL, VT), 6601 DAG.getConstant(FalseOp->getZExtValue(), DL, VT), 6602 N0.getOperand(2), N0.getOperand(3), N0.getOperand(4) }; 6603 SDValue NewSelect = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VT, Ops); 6604 // If N0 has multiple uses, change other uses as well. 6605 if (!N0.hasOneUse()) { 6606 SDValue TruncSelect = 6607 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), NewSelect); 6608 DCI.CombineTo(N0.getNode(), TruncSelect); 6609 } 6610 return NewSelect; 6611 } 6612 } 6613 return SDValue(); 6614 } 6615 6616 SDValue SystemZTargetLowering::combineSIGN_EXTEND_INREG( 6617 SDNode *N, DAGCombinerInfo &DCI) const { 6618 // Convert (sext_in_reg (setcc LHS, RHS, COND), i1) 6619 // and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1) 6620 // into (select_cc LHS, RHS, -1, 0, COND) 6621 SelectionDAG &DAG = DCI.DAG; 6622 SDValue N0 = N->getOperand(0); 6623 EVT VT = N->getValueType(0); 6624 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); 6625 if (N0.hasOneUse() && N0.getOpcode() == ISD::ANY_EXTEND) 6626 N0 = N0.getOperand(0); 6627 if (EVT == MVT::i1 && N0.hasOneUse() && N0.getOpcode() == ISD::SETCC) { 6628 SDLoc DL(N0); 6629 SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1), 6630 DAG.getConstant(-1, DL, VT), DAG.getConstant(0, DL, VT), 6631 N0.getOperand(2) }; 6632 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); 6633 } 6634 return SDValue(); 6635 } 6636 6637 SDValue SystemZTargetLowering::combineSIGN_EXTEND( 6638 SDNode *N, DAGCombinerInfo &DCI) const { 6639 // Convert (sext (ashr (shl X, C1), C2)) to 6640 // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as 6641 // cheap as narrower ones. 6642 SelectionDAG &DAG = DCI.DAG; 6643 SDValue N0 = N->getOperand(0); 6644 EVT VT = N->getValueType(0); 6645 if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) { 6646 auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 6647 SDValue Inner = N0.getOperand(0); 6648 if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) { 6649 if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) { 6650 unsigned Extra = (VT.getSizeInBits() - N0.getValueSizeInBits()); 6651 unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra; 6652 unsigned NewSraAmt = SraAmt->getZExtValue() + Extra; 6653 EVT ShiftVT = N0.getOperand(1).getValueType(); 6654 SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT, 6655 Inner.getOperand(0)); 6656 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext, 6657 DAG.getConstant(NewShlAmt, SDLoc(Inner), 6658 ShiftVT)); 6659 return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, 6660 DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT)); 6661 } 6662 } 6663 } 6664 return SDValue(); 6665 } 6666 6667 SDValue SystemZTargetLowering::combineMERGE( 6668 SDNode *N, DAGCombinerInfo &DCI) const { 6669 SelectionDAG &DAG = DCI.DAG; 6670 unsigned Opcode = N->getOpcode(); 6671 SDValue Op0 = N->getOperand(0); 6672 SDValue Op1 = N->getOperand(1); 6673 if (Op0.getOpcode() == ISD::BITCAST) 6674 Op0 = Op0.getOperand(0); 6675 if (ISD::isBuildVectorAllZeros(Op0.getNode())) { 6676 // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF 6677 // for v4f32. 6678 if (Op1 == N->getOperand(0)) 6679 return Op1; 6680 // (z_merge_? 0, X) -> (z_unpackl_? 0, X). 6681 EVT VT = Op1.getValueType(); 6682 unsigned ElemBytes = VT.getVectorElementType().getStoreSize(); 6683 if (ElemBytes <= 4) { 6684 Opcode = (Opcode == SystemZISD::MERGE_HIGH ? 6685 SystemZISD::UNPACKL_HIGH : SystemZISD::UNPACKL_LOW); 6686 EVT InVT = VT.changeVectorElementTypeToInteger(); 6687 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16), 6688 SystemZ::VectorBytes / ElemBytes / 2); 6689 if (VT != InVT) { 6690 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1); 6691 DCI.AddToWorklist(Op1.getNode()); 6692 } 6693 SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1); 6694 DCI.AddToWorklist(Op.getNode()); 6695 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op); 6696 } 6697 } 6698 return SDValue(); 6699 } 6700 6701 SDValue SystemZTargetLowering::combineLOAD( 6702 SDNode *N, DAGCombinerInfo &DCI) const { 6703 SelectionDAG &DAG = DCI.DAG; 6704 EVT LdVT = N->getValueType(0); 6705 SDLoc DL(N); 6706 6707 // Replace an i128 load that is used solely to move its value into GPRs 6708 // by separate loads of both halves. 6709 if (LdVT == MVT::i128) { 6710 LoadSDNode *LD = cast<LoadSDNode>(N); 6711 if (!LD->isSimple() || !ISD::isNormalLoad(LD)) 6712 return SDValue(); 6713 6714 // Scan through all users. 6715 SmallVector<std::pair<SDNode *, int>, 2> Users; 6716 int UsedElements = 0; 6717 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end(); 6718 UI != UIEnd; ++UI) { 6719 // Skip the uses of the chain. 6720 if (UI.getUse().getResNo() != 0) 6721 continue; 6722 6723 // Verify every user is a TRUNCATE to i64 of the low or high half ... 6724 SDNode *User = *UI; 6725 int Index = 1; 6726 if (User->getOpcode() == ISD::SRL && 6727 User->getOperand(1).getOpcode() == ISD::Constant && 6728 User->getConstantOperandVal(1) == 64 && User->hasOneUse()) { 6729 User = *User->use_begin(); 6730 Index = 0; 6731 } 6732 if (User->getOpcode() != ISD::TRUNCATE || 6733 User->getValueType(0) != MVT::i64) 6734 return SDValue(); 6735 6736 // ... and no half is extracted twice. 6737 if (UsedElements & (1 << Index)) 6738 return SDValue(); 6739 6740 UsedElements |= 1 << Index; 6741 Users.push_back(std::make_pair(User, Index)); 6742 } 6743 6744 // Rewrite each extraction as an independent load. 6745 SmallVector<SDValue, 2> ArgChains; 6746 for (auto UserAndIndex : Users) { 6747 SDNode *User = UserAndIndex.first; 6748 unsigned Offset = User->getValueType(0).getStoreSize() * UserAndIndex.second; 6749 SDValue Ptr = 6750 DAG.getMemBasePlusOffset(LD->getBasePtr(), TypeSize::getFixed(Offset), DL); 6751 SDValue EltLoad = 6752 DAG.getLoad(User->getValueType(0), DL, LD->getChain(), Ptr, 6753 LD->getPointerInfo().getWithOffset(Offset), 6754 LD->getOriginalAlign(), LD->getMemOperand()->getFlags(), 6755 LD->getAAInfo()); 6756 6757 DCI.CombineTo(User, EltLoad, true); 6758 ArgChains.push_back(EltLoad.getValue(1)); 6759 } 6760 6761 // Collect all chains via TokenFactor. 6762 SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, 6763 ArgChains); 6764 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); 6765 DCI.AddToWorklist(Chain.getNode()); 6766 return SDValue(N, 0); 6767 } 6768 6769 if (LdVT.isVector() || LdVT.isInteger()) 6770 return SDValue(); 6771 // Transform a scalar load that is REPLICATEd as well as having other 6772 // use(s) to the form where the other use(s) use the first element of the 6773 // REPLICATE instead of the load. Otherwise instruction selection will not 6774 // produce a VLREP. Avoid extracting to a GPR, so only do this for floating 6775 // point loads. 6776 6777 SDValue Replicate; 6778 SmallVector<SDNode*, 8> OtherUses; 6779 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); 6780 UI != UE; ++UI) { 6781 if (UI->getOpcode() == SystemZISD::REPLICATE) { 6782 if (Replicate) 6783 return SDValue(); // Should never happen 6784 Replicate = SDValue(*UI, 0); 6785 } 6786 else if (UI.getUse().getResNo() == 0) 6787 OtherUses.push_back(*UI); 6788 } 6789 if (!Replicate || OtherUses.empty()) 6790 return SDValue(); 6791 6792 SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, LdVT, 6793 Replicate, DAG.getConstant(0, DL, MVT::i32)); 6794 // Update uses of the loaded Value while preserving old chains. 6795 for (SDNode *U : OtherUses) { 6796 SmallVector<SDValue, 8> Ops; 6797 for (SDValue Op : U->ops()) 6798 Ops.push_back((Op.getNode() == N && Op.getResNo() == 0) ? Extract0 : Op); 6799 DAG.UpdateNodeOperands(U, Ops); 6800 } 6801 return SDValue(N, 0); 6802 } 6803 6804 bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT) const { 6805 if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) 6806 return true; 6807 if (Subtarget.hasVectorEnhancements2()) 6808 if (VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64 || VT == MVT::i128) 6809 return true; 6810 return false; 6811 } 6812 6813 static bool isVectorElementSwap(ArrayRef<int> M, EVT VT) { 6814 if (!VT.isVector() || !VT.isSimple() || 6815 VT.getSizeInBits() != 128 || 6816 VT.getScalarSizeInBits() % 8 != 0) 6817 return false; 6818 6819 unsigned NumElts = VT.getVectorNumElements(); 6820 for (unsigned i = 0; i < NumElts; ++i) { 6821 if (M[i] < 0) continue; // ignore UNDEF indices 6822 if ((unsigned) M[i] != NumElts - 1 - i) 6823 return false; 6824 } 6825 6826 return true; 6827 } 6828 6829 static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG) { 6830 for (auto *U : StoredVal->uses()) { 6831 if (StoreSDNode *ST = dyn_cast<StoreSDNode>(U)) { 6832 EVT CurrMemVT = ST->getMemoryVT().getScalarType(); 6833 if (CurrMemVT.isRound() && CurrMemVT.getStoreSize() <= 16) 6834 continue; 6835 } else if (isa<BuildVectorSDNode>(U)) { 6836 SDValue BuildVector = SDValue(U, 0); 6837 if (DAG.isSplatValue(BuildVector, true/*AllowUndefs*/) && 6838 isOnlyUsedByStores(BuildVector, DAG)) 6839 continue; 6840 } 6841 return false; 6842 } 6843 return true; 6844 } 6845 6846 static bool isMovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart) { 6847 if (Val.getOpcode() != ISD::OR || !Val.getNode()->hasOneUse()) 6848 return false; 6849 6850 SDValue Op0 = Val.getOperand(0); 6851 SDValue Op1 = Val.getOperand(1); 6852 6853 if (Op0.getOpcode() == ISD::SHL) 6854 std::swap(Op0, Op1); 6855 if (Op1.getOpcode() != ISD::SHL || !Op1.getNode()->hasOneUse() || 6856 Op1.getOperand(1).getOpcode() != ISD::Constant || 6857 Op1.getConstantOperandVal(1) != 64) 6858 return false; 6859 Op1 = Op1.getOperand(0); 6860 6861 if (Op0.getOpcode() != ISD::ZERO_EXTEND || !Op0.getNode()->hasOneUse() || 6862 Op0.getOperand(0).getValueType() != MVT::i64) 6863 return false; 6864 if (Op1.getOpcode() != ISD::ANY_EXTEND || !Op1.getNode()->hasOneUse() || 6865 Op1.getOperand(0).getValueType() != MVT::i64) 6866 return false; 6867 6868 LoPart = Op0.getOperand(0); 6869 HiPart = Op1.getOperand(0); 6870 return true; 6871 } 6872 6873 SDValue SystemZTargetLowering::combineSTORE( 6874 SDNode *N, DAGCombinerInfo &DCI) const { 6875 SelectionDAG &DAG = DCI.DAG; 6876 auto *SN = cast<StoreSDNode>(N); 6877 auto &Op1 = N->getOperand(1); 6878 EVT MemVT = SN->getMemoryVT(); 6879 // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better 6880 // for the extraction to be done on a vMiN value, so that we can use VSTE. 6881 // If X has wider elements then convert it to: 6882 // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z). 6883 if (MemVT.isInteger() && SN->isTruncatingStore()) { 6884 if (SDValue Value = 6885 combineTruncateExtract(SDLoc(N), MemVT, SN->getValue(), DCI)) { 6886 DCI.AddToWorklist(Value.getNode()); 6887 6888 // Rewrite the store with the new form of stored value. 6889 return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value, 6890 SN->getBasePtr(), SN->getMemoryVT(), 6891 SN->getMemOperand()); 6892 } 6893 } 6894 // Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR 6895 if (!SN->isTruncatingStore() && 6896 Op1.getOpcode() == ISD::BSWAP && 6897 Op1.getNode()->hasOneUse() && 6898 canLoadStoreByteSwapped(Op1.getValueType())) { 6899 6900 SDValue BSwapOp = Op1.getOperand(0); 6901 6902 if (BSwapOp.getValueType() == MVT::i16) 6903 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp); 6904 6905 SDValue Ops[] = { 6906 N->getOperand(0), BSwapOp, N->getOperand(2) 6907 }; 6908 6909 return 6910 DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other), 6911 Ops, MemVT, SN->getMemOperand()); 6912 } 6913 // Combine STORE (element-swap) into VSTER 6914 if (!SN->isTruncatingStore() && 6915 Op1.getOpcode() == ISD::VECTOR_SHUFFLE && 6916 Op1.getNode()->hasOneUse() && 6917 Subtarget.hasVectorEnhancements2()) { 6918 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op1.getNode()); 6919 ArrayRef<int> ShuffleMask = SVN->getMask(); 6920 if (isVectorElementSwap(ShuffleMask, Op1.getValueType())) { 6921 SDValue Ops[] = { 6922 N->getOperand(0), Op1.getOperand(0), N->getOperand(2) 6923 }; 6924 6925 return DAG.getMemIntrinsicNode(SystemZISD::VSTER, SDLoc(N), 6926 DAG.getVTList(MVT::Other), 6927 Ops, MemVT, SN->getMemOperand()); 6928 } 6929 } 6930 6931 // Transform a store of an i128 moved from GPRs into two separate stores. 6932 if (MemVT == MVT::i128 && SN->isSimple() && ISD::isNormalStore(SN)) { 6933 SDValue LoPart, HiPart; 6934 if (isMovedFromParts(Op1, LoPart, HiPart)) { 6935 SDLoc DL(SN); 6936 SDValue Chain0 = 6937 DAG.getStore(SN->getChain(), DL, HiPart, SN->getBasePtr(), 6938 SN->getPointerInfo(), SN->getOriginalAlign(), 6939 SN->getMemOperand()->getFlags(), SN->getAAInfo()); 6940 SDValue Chain1 = 6941 DAG.getStore(SN->getChain(), DL, LoPart, 6942 DAG.getObjectPtrOffset(DL, SN->getBasePtr(), 6943 TypeSize::getFixed(8)), 6944 SN->getPointerInfo().getWithOffset(8), 6945 SN->getOriginalAlign(), 6946 SN->getMemOperand()->getFlags(), SN->getAAInfo()); 6947 6948 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain0, Chain1); 6949 } 6950 } 6951 6952 // Replicate a reg or immediate with VREP instead of scalar multiply or 6953 // immediate load. It seems best to do this during the first DAGCombine as 6954 // it is straight-forward to handle the zero-extend node in the initial 6955 // DAG, and also not worry about the keeping the new MemVT legal (e.g. when 6956 // extracting an i16 element from a v16i8 vector). 6957 if (Subtarget.hasVector() && DCI.Level == BeforeLegalizeTypes && 6958 isOnlyUsedByStores(Op1, DAG)) { 6959 SDValue Word = SDValue(); 6960 EVT WordVT; 6961 6962 // Find a replicated immediate and return it if found in Word and its 6963 // type in WordVT. 6964 auto FindReplicatedImm = [&](ConstantSDNode *C, unsigned TotBytes) { 6965 // Some constants are better handled with a scalar store. 6966 if (C->getAPIntValue().getBitWidth() > 64 || C->isAllOnes() || 6967 isInt<16>(C->getSExtValue()) || MemVT.getStoreSize() <= 2) 6968 return; 6969 SystemZVectorConstantInfo VCI(APInt(TotBytes * 8, C->getZExtValue())); 6970 if (VCI.isVectorConstantLegal(Subtarget) && 6971 VCI.Opcode == SystemZISD::REPLICATE) { 6972 Word = DAG.getConstant(VCI.OpVals[0], SDLoc(SN), MVT::i32); 6973 WordVT = VCI.VecVT.getScalarType(); 6974 } 6975 }; 6976 6977 // Find a replicated register and return it if found in Word and its type 6978 // in WordVT. 6979 auto FindReplicatedReg = [&](SDValue MulOp) { 6980 EVT MulVT = MulOp.getValueType(); 6981 if (MulOp->getOpcode() == ISD::MUL && 6982 (MulVT == MVT::i16 || MulVT == MVT::i32 || MulVT == MVT::i64)) { 6983 // Find a zero extended value and its type. 6984 SDValue LHS = MulOp->getOperand(0); 6985 if (LHS->getOpcode() == ISD::ZERO_EXTEND) 6986 WordVT = LHS->getOperand(0).getValueType(); 6987 else if (LHS->getOpcode() == ISD::AssertZext) 6988 WordVT = cast<VTSDNode>(LHS->getOperand(1))->getVT(); 6989 else 6990 return; 6991 // Find a replicating constant, e.g. 0x00010001. 6992 if (auto *C = dyn_cast<ConstantSDNode>(MulOp->getOperand(1))) { 6993 SystemZVectorConstantInfo VCI( 6994 APInt(MulVT.getSizeInBits(), C->getZExtValue())); 6995 if (VCI.isVectorConstantLegal(Subtarget) && 6996 VCI.Opcode == SystemZISD::REPLICATE && VCI.OpVals[0] == 1 && 6997 WordVT == VCI.VecVT.getScalarType()) 6998 Word = DAG.getZExtOrTrunc(LHS->getOperand(0), SDLoc(SN), WordVT); 6999 } 7000 } 7001 }; 7002 7003 if (isa<BuildVectorSDNode>(Op1) && 7004 DAG.isSplatValue(Op1, true/*AllowUndefs*/)) { 7005 SDValue SplatVal = Op1->getOperand(0); 7006 if (auto *C = dyn_cast<ConstantSDNode>(SplatVal)) 7007 FindReplicatedImm(C, SplatVal.getValueType().getStoreSize()); 7008 else 7009 FindReplicatedReg(SplatVal); 7010 } else { 7011 if (auto *C = dyn_cast<ConstantSDNode>(Op1)) 7012 FindReplicatedImm(C, MemVT.getStoreSize()); 7013 else 7014 FindReplicatedReg(Op1); 7015 } 7016 7017 if (Word != SDValue()) { 7018 assert(MemVT.getSizeInBits() % WordVT.getSizeInBits() == 0 && 7019 "Bad type handling"); 7020 unsigned NumElts = MemVT.getSizeInBits() / WordVT.getSizeInBits(); 7021 EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), WordVT, NumElts); 7022 SDValue SplatVal = DAG.getSplatVector(SplatVT, SDLoc(SN), Word); 7023 return DAG.getStore(SN->getChain(), SDLoc(SN), SplatVal, 7024 SN->getBasePtr(), SN->getMemOperand()); 7025 } 7026 } 7027 7028 return SDValue(); 7029 } 7030 7031 SDValue SystemZTargetLowering::combineVECTOR_SHUFFLE( 7032 SDNode *N, DAGCombinerInfo &DCI) const { 7033 SelectionDAG &DAG = DCI.DAG; 7034 // Combine element-swap (LOAD) into VLER 7035 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && 7036 N->getOperand(0).hasOneUse() && 7037 Subtarget.hasVectorEnhancements2()) { 7038 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); 7039 ArrayRef<int> ShuffleMask = SVN->getMask(); 7040 if (isVectorElementSwap(ShuffleMask, N->getValueType(0))) { 7041 SDValue Load = N->getOperand(0); 7042 LoadSDNode *LD = cast<LoadSDNode>(Load); 7043 7044 // Create the element-swapping load. 7045 SDValue Ops[] = { 7046 LD->getChain(), // Chain 7047 LD->getBasePtr() // Ptr 7048 }; 7049 SDValue ESLoad = 7050 DAG.getMemIntrinsicNode(SystemZISD::VLER, SDLoc(N), 7051 DAG.getVTList(LD->getValueType(0), MVT::Other), 7052 Ops, LD->getMemoryVT(), LD->getMemOperand()); 7053 7054 // First, combine the VECTOR_SHUFFLE away. This makes the value produced 7055 // by the load dead. 7056 DCI.CombineTo(N, ESLoad); 7057 7058 // Next, combine the load away, we give it a bogus result value but a real 7059 // chain result. The result value is dead because the shuffle is dead. 7060 DCI.CombineTo(Load.getNode(), ESLoad, ESLoad.getValue(1)); 7061 7062 // Return N so it doesn't get rechecked! 7063 return SDValue(N, 0); 7064 } 7065 } 7066 7067 return SDValue(); 7068 } 7069 7070 SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT( 7071 SDNode *N, DAGCombinerInfo &DCI) const { 7072 SelectionDAG &DAG = DCI.DAG; 7073 7074 if (!Subtarget.hasVector()) 7075 return SDValue(); 7076 7077 // Look through bitcasts that retain the number of vector elements. 7078 SDValue Op = N->getOperand(0); 7079 if (Op.getOpcode() == ISD::BITCAST && 7080 Op.getValueType().isVector() && 7081 Op.getOperand(0).getValueType().isVector() && 7082 Op.getValueType().getVectorNumElements() == 7083 Op.getOperand(0).getValueType().getVectorNumElements()) 7084 Op = Op.getOperand(0); 7085 7086 // Pull BSWAP out of a vector extraction. 7087 if (Op.getOpcode() == ISD::BSWAP && Op.hasOneUse()) { 7088 EVT VecVT = Op.getValueType(); 7089 EVT EltVT = VecVT.getVectorElementType(); 7090 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), EltVT, 7091 Op.getOperand(0), N->getOperand(1)); 7092 DCI.AddToWorklist(Op.getNode()); 7093 Op = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Op); 7094 if (EltVT != N->getValueType(0)) { 7095 DCI.AddToWorklist(Op.getNode()); 7096 Op = DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op); 7097 } 7098 return Op; 7099 } 7100 7101 // Try to simplify a vector extraction. 7102 if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 7103 SDValue Op0 = N->getOperand(0); 7104 EVT VecVT = Op0.getValueType(); 7105 return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0, 7106 IndexN->getZExtValue(), DCI, false); 7107 } 7108 return SDValue(); 7109 } 7110 7111 SDValue SystemZTargetLowering::combineJOIN_DWORDS( 7112 SDNode *N, DAGCombinerInfo &DCI) const { 7113 SelectionDAG &DAG = DCI.DAG; 7114 // (join_dwords X, X) == (replicate X) 7115 if (N->getOperand(0) == N->getOperand(1)) 7116 return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0), 7117 N->getOperand(0)); 7118 return SDValue(); 7119 } 7120 7121 static SDValue MergeInputChains(SDNode *N1, SDNode *N2) { 7122 SDValue Chain1 = N1->getOperand(0); 7123 SDValue Chain2 = N2->getOperand(0); 7124 7125 // Trivial case: both nodes take the same chain. 7126 if (Chain1 == Chain2) 7127 return Chain1; 7128 7129 // FIXME - we could handle more complex cases via TokenFactor, 7130 // assuming we can verify that this would not create a cycle. 7131 return SDValue(); 7132 } 7133 7134 SDValue SystemZTargetLowering::combineFP_ROUND( 7135 SDNode *N, DAGCombinerInfo &DCI) const { 7136 7137 if (!Subtarget.hasVector()) 7138 return SDValue(); 7139 7140 // (fpround (extract_vector_elt X 0)) 7141 // (fpround (extract_vector_elt X 1)) -> 7142 // (extract_vector_elt (VROUND X) 0) 7143 // (extract_vector_elt (VROUND X) 2) 7144 // 7145 // This is a special case since the target doesn't really support v2f32s. 7146 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0; 7147 SelectionDAG &DAG = DCI.DAG; 7148 SDValue Op0 = N->getOperand(OpNo); 7149 if (N->getValueType(0) == MVT::f32 && Op0.hasOneUse() && 7150 Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 7151 Op0.getOperand(0).getValueType() == MVT::v2f64 && 7152 Op0.getOperand(1).getOpcode() == ISD::Constant && 7153 Op0.getConstantOperandVal(1) == 0) { 7154 SDValue Vec = Op0.getOperand(0); 7155 for (auto *U : Vec->uses()) { 7156 if (U != Op0.getNode() && U->hasOneUse() && 7157 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT && 7158 U->getOperand(0) == Vec && 7159 U->getOperand(1).getOpcode() == ISD::Constant && 7160 U->getConstantOperandVal(1) == 1) { 7161 SDValue OtherRound = SDValue(*U->use_begin(), 0); 7162 if (OtherRound.getOpcode() == N->getOpcode() && 7163 OtherRound.getOperand(OpNo) == SDValue(U, 0) && 7164 OtherRound.getValueType() == MVT::f32) { 7165 SDValue VRound, Chain; 7166 if (N->isStrictFPOpcode()) { 7167 Chain = MergeInputChains(N, OtherRound.getNode()); 7168 if (!Chain) 7169 continue; 7170 VRound = DAG.getNode(SystemZISD::STRICT_VROUND, SDLoc(N), 7171 {MVT::v4f32, MVT::Other}, {Chain, Vec}); 7172 Chain = VRound.getValue(1); 7173 } else 7174 VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N), 7175 MVT::v4f32, Vec); 7176 DCI.AddToWorklist(VRound.getNode()); 7177 SDValue Extract1 = 7178 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32, 7179 VRound, DAG.getConstant(2, SDLoc(U), MVT::i32)); 7180 DCI.AddToWorklist(Extract1.getNode()); 7181 DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1); 7182 if (Chain) 7183 DAG.ReplaceAllUsesOfValueWith(OtherRound.getValue(1), Chain); 7184 SDValue Extract0 = 7185 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32, 7186 VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32)); 7187 if (Chain) 7188 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0), 7189 N->getVTList(), Extract0, Chain); 7190 return Extract0; 7191 } 7192 } 7193 } 7194 } 7195 return SDValue(); 7196 } 7197 7198 SDValue SystemZTargetLowering::combineFP_EXTEND( 7199 SDNode *N, DAGCombinerInfo &DCI) const { 7200 7201 if (!Subtarget.hasVector()) 7202 return SDValue(); 7203 7204 // (fpextend (extract_vector_elt X 0)) 7205 // (fpextend (extract_vector_elt X 2)) -> 7206 // (extract_vector_elt (VEXTEND X) 0) 7207 // (extract_vector_elt (VEXTEND X) 1) 7208 // 7209 // This is a special case since the target doesn't really support v2f32s. 7210 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0; 7211 SelectionDAG &DAG = DCI.DAG; 7212 SDValue Op0 = N->getOperand(OpNo); 7213 if (N->getValueType(0) == MVT::f64 && Op0.hasOneUse() && 7214 Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 7215 Op0.getOperand(0).getValueType() == MVT::v4f32 && 7216 Op0.getOperand(1).getOpcode() == ISD::Constant && 7217 Op0.getConstantOperandVal(1) == 0) { 7218 SDValue Vec = Op0.getOperand(0); 7219 for (auto *U : Vec->uses()) { 7220 if (U != Op0.getNode() && U->hasOneUse() && 7221 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT && 7222 U->getOperand(0) == Vec && 7223 U->getOperand(1).getOpcode() == ISD::Constant && 7224 U->getConstantOperandVal(1) == 2) { 7225 SDValue OtherExtend = SDValue(*U->use_begin(), 0); 7226 if (OtherExtend.getOpcode() == N->getOpcode() && 7227 OtherExtend.getOperand(OpNo) == SDValue(U, 0) && 7228 OtherExtend.getValueType() == MVT::f64) { 7229 SDValue VExtend, Chain; 7230 if (N->isStrictFPOpcode()) { 7231 Chain = MergeInputChains(N, OtherExtend.getNode()); 7232 if (!Chain) 7233 continue; 7234 VExtend = DAG.getNode(SystemZISD::STRICT_VEXTEND, SDLoc(N), 7235 {MVT::v2f64, MVT::Other}, {Chain, Vec}); 7236 Chain = VExtend.getValue(1); 7237 } else 7238 VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N), 7239 MVT::v2f64, Vec); 7240 DCI.AddToWorklist(VExtend.getNode()); 7241 SDValue Extract1 = 7242 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64, 7243 VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32)); 7244 DCI.AddToWorklist(Extract1.getNode()); 7245 DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1); 7246 if (Chain) 7247 DAG.ReplaceAllUsesOfValueWith(OtherExtend.getValue(1), Chain); 7248 SDValue Extract0 = 7249 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64, 7250 VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32)); 7251 if (Chain) 7252 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0), 7253 N->getVTList(), Extract0, Chain); 7254 return Extract0; 7255 } 7256 } 7257 } 7258 } 7259 return SDValue(); 7260 } 7261 7262 SDValue SystemZTargetLowering::combineINT_TO_FP( 7263 SDNode *N, DAGCombinerInfo &DCI) const { 7264 if (DCI.Level != BeforeLegalizeTypes) 7265 return SDValue(); 7266 SelectionDAG &DAG = DCI.DAG; 7267 LLVMContext &Ctx = *DAG.getContext(); 7268 unsigned Opcode = N->getOpcode(); 7269 EVT OutVT = N->getValueType(0); 7270 Type *OutLLVMTy = OutVT.getTypeForEVT(Ctx); 7271 SDValue Op = N->getOperand(0); 7272 unsigned OutScalarBits = OutLLVMTy->getScalarSizeInBits(); 7273 unsigned InScalarBits = Op->getValueType(0).getScalarSizeInBits(); 7274 7275 // Insert an extension before type-legalization to avoid scalarization, e.g.: 7276 // v2f64 = uint_to_fp v2i16 7277 // => 7278 // v2f64 = uint_to_fp (v2i64 zero_extend v2i16) 7279 if (OutLLVMTy->isVectorTy() && OutScalarBits > InScalarBits && 7280 OutScalarBits <= 64) { 7281 unsigned NumElts = cast<FixedVectorType>(OutLLVMTy)->getNumElements(); 7282 EVT ExtVT = EVT::getVectorVT( 7283 Ctx, EVT::getIntegerVT(Ctx, OutLLVMTy->getScalarSizeInBits()), NumElts); 7284 unsigned ExtOpcode = 7285 (Opcode == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND); 7286 SDValue ExtOp = DAG.getNode(ExtOpcode, SDLoc(N), ExtVT, Op); 7287 return DAG.getNode(Opcode, SDLoc(N), OutVT, ExtOp); 7288 } 7289 return SDValue(); 7290 } 7291 7292 SDValue SystemZTargetLowering::combineBSWAP( 7293 SDNode *N, DAGCombinerInfo &DCI) const { 7294 SelectionDAG &DAG = DCI.DAG; 7295 // Combine BSWAP (LOAD) into LRVH/LRV/LRVG/VLBR 7296 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && 7297 N->getOperand(0).hasOneUse() && 7298 canLoadStoreByteSwapped(N->getValueType(0))) { 7299 SDValue Load = N->getOperand(0); 7300 LoadSDNode *LD = cast<LoadSDNode>(Load); 7301 7302 // Create the byte-swapping load. 7303 SDValue Ops[] = { 7304 LD->getChain(), // Chain 7305 LD->getBasePtr() // Ptr 7306 }; 7307 EVT LoadVT = N->getValueType(0); 7308 if (LoadVT == MVT::i16) 7309 LoadVT = MVT::i32; 7310 SDValue BSLoad = 7311 DAG.getMemIntrinsicNode(SystemZISD::LRV, SDLoc(N), 7312 DAG.getVTList(LoadVT, MVT::Other), 7313 Ops, LD->getMemoryVT(), LD->getMemOperand()); 7314 7315 // If this is an i16 load, insert the truncate. 7316 SDValue ResVal = BSLoad; 7317 if (N->getValueType(0) == MVT::i16) 7318 ResVal = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i16, BSLoad); 7319 7320 // First, combine the bswap away. This makes the value produced by the 7321 // load dead. 7322 DCI.CombineTo(N, ResVal); 7323 7324 // Next, combine the load away, we give it a bogus result value but a real 7325 // chain result. The result value is dead because the bswap is dead. 7326 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1)); 7327 7328 // Return N so it doesn't get rechecked! 7329 return SDValue(N, 0); 7330 } 7331 7332 // Look through bitcasts that retain the number of vector elements. 7333 SDValue Op = N->getOperand(0); 7334 if (Op.getOpcode() == ISD::BITCAST && 7335 Op.getValueType().isVector() && 7336 Op.getOperand(0).getValueType().isVector() && 7337 Op.getValueType().getVectorNumElements() == 7338 Op.getOperand(0).getValueType().getVectorNumElements()) 7339 Op = Op.getOperand(0); 7340 7341 // Push BSWAP into a vector insertion if at least one side then simplifies. 7342 if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && Op.hasOneUse()) { 7343 SDValue Vec = Op.getOperand(0); 7344 SDValue Elt = Op.getOperand(1); 7345 SDValue Idx = Op.getOperand(2); 7346 7347 if (DAG.isConstantIntBuildVectorOrConstantInt(Vec) || 7348 Vec.getOpcode() == ISD::BSWAP || Vec.isUndef() || 7349 DAG.isConstantIntBuildVectorOrConstantInt(Elt) || 7350 Elt.getOpcode() == ISD::BSWAP || Elt.isUndef() || 7351 (canLoadStoreByteSwapped(N->getValueType(0)) && 7352 ISD::isNON_EXTLoad(Elt.getNode()) && Elt.hasOneUse())) { 7353 EVT VecVT = N->getValueType(0); 7354 EVT EltVT = N->getValueType(0).getVectorElementType(); 7355 if (VecVT != Vec.getValueType()) { 7356 Vec = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Vec); 7357 DCI.AddToWorklist(Vec.getNode()); 7358 } 7359 if (EltVT != Elt.getValueType()) { 7360 Elt = DAG.getNode(ISD::BITCAST, SDLoc(N), EltVT, Elt); 7361 DCI.AddToWorklist(Elt.getNode()); 7362 } 7363 Vec = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Vec); 7364 DCI.AddToWorklist(Vec.getNode()); 7365 Elt = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Elt); 7366 DCI.AddToWorklist(Elt.getNode()); 7367 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VecVT, 7368 Vec, Elt, Idx); 7369 } 7370 } 7371 7372 // Push BSWAP into a vector shuffle if at least one side then simplifies. 7373 ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(Op); 7374 if (SV && Op.hasOneUse()) { 7375 SDValue Op0 = Op.getOperand(0); 7376 SDValue Op1 = Op.getOperand(1); 7377 7378 if (DAG.isConstantIntBuildVectorOrConstantInt(Op0) || 7379 Op0.getOpcode() == ISD::BSWAP || Op0.isUndef() || 7380 DAG.isConstantIntBuildVectorOrConstantInt(Op1) || 7381 Op1.getOpcode() == ISD::BSWAP || Op1.isUndef()) { 7382 EVT VecVT = N->getValueType(0); 7383 if (VecVT != Op0.getValueType()) { 7384 Op0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op0); 7385 DCI.AddToWorklist(Op0.getNode()); 7386 } 7387 if (VecVT != Op1.getValueType()) { 7388 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op1); 7389 DCI.AddToWorklist(Op1.getNode()); 7390 } 7391 Op0 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op0); 7392 DCI.AddToWorklist(Op0.getNode()); 7393 Op1 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op1); 7394 DCI.AddToWorklist(Op1.getNode()); 7395 return DAG.getVectorShuffle(VecVT, SDLoc(N), Op0, Op1, SV->getMask()); 7396 } 7397 } 7398 7399 return SDValue(); 7400 } 7401 7402 static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) { 7403 // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code 7404 // set by the CCReg instruction using the CCValid / CCMask masks, 7405 // If the CCReg instruction is itself a ICMP testing the condition 7406 // code set by some other instruction, see whether we can directly 7407 // use that condition code. 7408 7409 // Verify that we have an ICMP against some constant. 7410 if (CCValid != SystemZ::CCMASK_ICMP) 7411 return false; 7412 auto *ICmp = CCReg.getNode(); 7413 if (ICmp->getOpcode() != SystemZISD::ICMP) 7414 return false; 7415 auto *CompareLHS = ICmp->getOperand(0).getNode(); 7416 auto *CompareRHS = dyn_cast<ConstantSDNode>(ICmp->getOperand(1)); 7417 if (!CompareRHS) 7418 return false; 7419 7420 // Optimize the case where CompareLHS is a SELECT_CCMASK. 7421 if (CompareLHS->getOpcode() == SystemZISD::SELECT_CCMASK) { 7422 // Verify that we have an appropriate mask for a EQ or NE comparison. 7423 bool Invert = false; 7424 if (CCMask == SystemZ::CCMASK_CMP_NE) 7425 Invert = !Invert; 7426 else if (CCMask != SystemZ::CCMASK_CMP_EQ) 7427 return false; 7428 7429 // Verify that the ICMP compares against one of select values. 7430 auto *TrueVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(0)); 7431 if (!TrueVal) 7432 return false; 7433 auto *FalseVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1)); 7434 if (!FalseVal) 7435 return false; 7436 if (CompareRHS->getZExtValue() == FalseVal->getZExtValue()) 7437 Invert = !Invert; 7438 else if (CompareRHS->getZExtValue() != TrueVal->getZExtValue()) 7439 return false; 7440 7441 // Compute the effective CC mask for the new branch or select. 7442 auto *NewCCValid = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(2)); 7443 auto *NewCCMask = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(3)); 7444 if (!NewCCValid || !NewCCMask) 7445 return false; 7446 CCValid = NewCCValid->getZExtValue(); 7447 CCMask = NewCCMask->getZExtValue(); 7448 if (Invert) 7449 CCMask ^= CCValid; 7450 7451 // Return the updated CCReg link. 7452 CCReg = CompareLHS->getOperand(4); 7453 return true; 7454 } 7455 7456 // Optimize the case where CompareRHS is (SRA (SHL (IPM))). 7457 if (CompareLHS->getOpcode() == ISD::SRA) { 7458 auto *SRACount = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1)); 7459 if (!SRACount || SRACount->getZExtValue() != 30) 7460 return false; 7461 auto *SHL = CompareLHS->getOperand(0).getNode(); 7462 if (SHL->getOpcode() != ISD::SHL) 7463 return false; 7464 auto *SHLCount = dyn_cast<ConstantSDNode>(SHL->getOperand(1)); 7465 if (!SHLCount || SHLCount->getZExtValue() != 30 - SystemZ::IPM_CC) 7466 return false; 7467 auto *IPM = SHL->getOperand(0).getNode(); 7468 if (IPM->getOpcode() != SystemZISD::IPM) 7469 return false; 7470 7471 // Avoid introducing CC spills (because SRA would clobber CC). 7472 if (!CompareLHS->hasOneUse()) 7473 return false; 7474 // Verify that the ICMP compares against zero. 7475 if (CompareRHS->getZExtValue() != 0) 7476 return false; 7477 7478 // Compute the effective CC mask for the new branch or select. 7479 CCMask = SystemZ::reverseCCMask(CCMask); 7480 7481 // Return the updated CCReg link. 7482 CCReg = IPM->getOperand(0); 7483 return true; 7484 } 7485 7486 return false; 7487 } 7488 7489 SDValue SystemZTargetLowering::combineBR_CCMASK( 7490 SDNode *N, DAGCombinerInfo &DCI) const { 7491 SelectionDAG &DAG = DCI.DAG; 7492 7493 // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK. 7494 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1)); 7495 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2)); 7496 if (!CCValid || !CCMask) 7497 return SDValue(); 7498 7499 int CCValidVal = CCValid->getZExtValue(); 7500 int CCMaskVal = CCMask->getZExtValue(); 7501 SDValue Chain = N->getOperand(0); 7502 SDValue CCReg = N->getOperand(4); 7503 7504 if (combineCCMask(CCReg, CCValidVal, CCMaskVal)) 7505 return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0), 7506 Chain, 7507 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32), 7508 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32), 7509 N->getOperand(3), CCReg); 7510 return SDValue(); 7511 } 7512 7513 SDValue SystemZTargetLowering::combineSELECT_CCMASK( 7514 SDNode *N, DAGCombinerInfo &DCI) const { 7515 SelectionDAG &DAG = DCI.DAG; 7516 7517 // Combine SELECT_CCMASK (ICMP (SELECT_CCMASK)) into a single SELECT_CCMASK. 7518 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(2)); 7519 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(3)); 7520 if (!CCValid || !CCMask) 7521 return SDValue(); 7522 7523 int CCValidVal = CCValid->getZExtValue(); 7524 int CCMaskVal = CCMask->getZExtValue(); 7525 SDValue CCReg = N->getOperand(4); 7526 7527 if (combineCCMask(CCReg, CCValidVal, CCMaskVal)) 7528 return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0), 7529 N->getOperand(0), N->getOperand(1), 7530 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32), 7531 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32), 7532 CCReg); 7533 return SDValue(); 7534 } 7535 7536 7537 SDValue SystemZTargetLowering::combineGET_CCMASK( 7538 SDNode *N, DAGCombinerInfo &DCI) const { 7539 7540 // Optimize away GET_CCMASK (SELECT_CCMASK) if the CC masks are compatible 7541 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1)); 7542 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2)); 7543 if (!CCValid || !CCMask) 7544 return SDValue(); 7545 int CCValidVal = CCValid->getZExtValue(); 7546 int CCMaskVal = CCMask->getZExtValue(); 7547 7548 SDValue Select = N->getOperand(0); 7549 if (Select->getOpcode() == ISD::TRUNCATE) 7550 Select = Select->getOperand(0); 7551 if (Select->getOpcode() != SystemZISD::SELECT_CCMASK) 7552 return SDValue(); 7553 7554 auto *SelectCCValid = dyn_cast<ConstantSDNode>(Select->getOperand(2)); 7555 auto *SelectCCMask = dyn_cast<ConstantSDNode>(Select->getOperand(3)); 7556 if (!SelectCCValid || !SelectCCMask) 7557 return SDValue(); 7558 int SelectCCValidVal = SelectCCValid->getZExtValue(); 7559 int SelectCCMaskVal = SelectCCMask->getZExtValue(); 7560 7561 auto *TrueVal = dyn_cast<ConstantSDNode>(Select->getOperand(0)); 7562 auto *FalseVal = dyn_cast<ConstantSDNode>(Select->getOperand(1)); 7563 if (!TrueVal || !FalseVal) 7564 return SDValue(); 7565 if (TrueVal->getZExtValue() == 1 && FalseVal->getZExtValue() == 0) 7566 ; 7567 else if (TrueVal->getZExtValue() == 0 && FalseVal->getZExtValue() == 1) 7568 SelectCCMaskVal ^= SelectCCValidVal; 7569 else 7570 return SDValue(); 7571 7572 if (SelectCCValidVal & ~CCValidVal) 7573 return SDValue(); 7574 if (SelectCCMaskVal != (CCMaskVal & SelectCCValidVal)) 7575 return SDValue(); 7576 7577 return Select->getOperand(4); 7578 } 7579 7580 SDValue SystemZTargetLowering::combineIntDIVREM( 7581 SDNode *N, DAGCombinerInfo &DCI) const { 7582 SelectionDAG &DAG = DCI.DAG; 7583 EVT VT = N->getValueType(0); 7584 // In the case where the divisor is a vector of constants a cheaper 7585 // sequence of instructions can replace the divide. BuildSDIV is called to 7586 // do this during DAG combining, but it only succeeds when it can build a 7587 // multiplication node. The only option for SystemZ is ISD::SMUL_LOHI, and 7588 // since it is not Legal but Custom it can only happen before 7589 // legalization. Therefore we must scalarize this early before Combine 7590 // 1. For widened vectors, this is already the result of type legalization. 7591 if (DCI.Level == BeforeLegalizeTypes && VT.isVector() && isTypeLegal(VT) && 7592 DAG.isConstantIntBuildVectorOrConstantInt(N->getOperand(1))) 7593 return DAG.UnrollVectorOp(N); 7594 return SDValue(); 7595 } 7596 7597 SDValue SystemZTargetLowering::combineINTRINSIC( 7598 SDNode *N, DAGCombinerInfo &DCI) const { 7599 SelectionDAG &DAG = DCI.DAG; 7600 7601 unsigned Id = N->getConstantOperandVal(1); 7602 switch (Id) { 7603 // VECTOR LOAD (RIGHTMOST) WITH LENGTH with a length operand of 15 7604 // or larger is simply a vector load. 7605 case Intrinsic::s390_vll: 7606 case Intrinsic::s390_vlrl: 7607 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(2))) 7608 if (C->getZExtValue() >= 15) 7609 return DAG.getLoad(N->getValueType(0), SDLoc(N), N->getOperand(0), 7610 N->getOperand(3), MachinePointerInfo()); 7611 break; 7612 // Likewise for VECTOR STORE (RIGHTMOST) WITH LENGTH. 7613 case Intrinsic::s390_vstl: 7614 case Intrinsic::s390_vstrl: 7615 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(3))) 7616 if (C->getZExtValue() >= 15) 7617 return DAG.getStore(N->getOperand(0), SDLoc(N), N->getOperand(2), 7618 N->getOperand(4), MachinePointerInfo()); 7619 break; 7620 } 7621 7622 return SDValue(); 7623 } 7624 7625 SDValue SystemZTargetLowering::unwrapAddress(SDValue N) const { 7626 if (N->getOpcode() == SystemZISD::PCREL_WRAPPER) 7627 return N->getOperand(0); 7628 return N; 7629 } 7630 7631 SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, 7632 DAGCombinerInfo &DCI) const { 7633 switch(N->getOpcode()) { 7634 default: break; 7635 case ISD::ZERO_EXTEND: return combineZERO_EXTEND(N, DCI); 7636 case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI); 7637 case ISD::SIGN_EXTEND_INREG: return combineSIGN_EXTEND_INREG(N, DCI); 7638 case SystemZISD::MERGE_HIGH: 7639 case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI); 7640 case ISD::LOAD: return combineLOAD(N, DCI); 7641 case ISD::STORE: return combineSTORE(N, DCI); 7642 case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI); 7643 case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI); 7644 case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI); 7645 case ISD::STRICT_FP_ROUND: 7646 case ISD::FP_ROUND: return combineFP_ROUND(N, DCI); 7647 case ISD::STRICT_FP_EXTEND: 7648 case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI); 7649 case ISD::SINT_TO_FP: 7650 case ISD::UINT_TO_FP: return combineINT_TO_FP(N, DCI); 7651 case ISD::BSWAP: return combineBSWAP(N, DCI); 7652 case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI); 7653 case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI); 7654 case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI); 7655 case ISD::SDIV: 7656 case ISD::UDIV: 7657 case ISD::SREM: 7658 case ISD::UREM: return combineIntDIVREM(N, DCI); 7659 case ISD::INTRINSIC_W_CHAIN: 7660 case ISD::INTRINSIC_VOID: return combineINTRINSIC(N, DCI); 7661 } 7662 7663 return SDValue(); 7664 } 7665 7666 // Return the demanded elements for the OpNo source operand of Op. DemandedElts 7667 // are for Op. 7668 static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts, 7669 unsigned OpNo) { 7670 EVT VT = Op.getValueType(); 7671 unsigned NumElts = (VT.isVector() ? VT.getVectorNumElements() : 1); 7672 APInt SrcDemE; 7673 unsigned Opcode = Op.getOpcode(); 7674 if (Opcode == ISD::INTRINSIC_WO_CHAIN) { 7675 unsigned Id = Op.getConstantOperandVal(0); 7676 switch (Id) { 7677 case Intrinsic::s390_vpksh: // PACKS 7678 case Intrinsic::s390_vpksf: 7679 case Intrinsic::s390_vpksg: 7680 case Intrinsic::s390_vpkshs: // PACKS_CC 7681 case Intrinsic::s390_vpksfs: 7682 case Intrinsic::s390_vpksgs: 7683 case Intrinsic::s390_vpklsh: // PACKLS 7684 case Intrinsic::s390_vpklsf: 7685 case Intrinsic::s390_vpklsg: 7686 case Intrinsic::s390_vpklshs: // PACKLS_CC 7687 case Intrinsic::s390_vpklsfs: 7688 case Intrinsic::s390_vpklsgs: 7689 // VECTOR PACK truncates the elements of two source vectors into one. 7690 SrcDemE = DemandedElts; 7691 if (OpNo == 2) 7692 SrcDemE.lshrInPlace(NumElts / 2); 7693 SrcDemE = SrcDemE.trunc(NumElts / 2); 7694 break; 7695 // VECTOR UNPACK extends half the elements of the source vector. 7696 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH 7697 case Intrinsic::s390_vuphh: 7698 case Intrinsic::s390_vuphf: 7699 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH 7700 case Intrinsic::s390_vuplhh: 7701 case Intrinsic::s390_vuplhf: 7702 SrcDemE = APInt(NumElts * 2, 0); 7703 SrcDemE.insertBits(DemandedElts, 0); 7704 break; 7705 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW 7706 case Intrinsic::s390_vuplhw: 7707 case Intrinsic::s390_vuplf: 7708 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW 7709 case Intrinsic::s390_vupllh: 7710 case Intrinsic::s390_vupllf: 7711 SrcDemE = APInt(NumElts * 2, 0); 7712 SrcDemE.insertBits(DemandedElts, NumElts); 7713 break; 7714 case Intrinsic::s390_vpdi: { 7715 // VECTOR PERMUTE DWORD IMMEDIATE selects one element from each source. 7716 SrcDemE = APInt(NumElts, 0); 7717 if (!DemandedElts[OpNo - 1]) 7718 break; 7719 unsigned Mask = Op.getConstantOperandVal(3); 7720 unsigned MaskBit = ((OpNo - 1) ? 1 : 4); 7721 // Demand input element 0 or 1, given by the mask bit value. 7722 SrcDemE.setBit((Mask & MaskBit)? 1 : 0); 7723 break; 7724 } 7725 case Intrinsic::s390_vsldb: { 7726 // VECTOR SHIFT LEFT DOUBLE BY BYTE 7727 assert(VT == MVT::v16i8 && "Unexpected type."); 7728 unsigned FirstIdx = Op.getConstantOperandVal(3); 7729 assert (FirstIdx > 0 && FirstIdx < 16 && "Unused operand."); 7730 unsigned NumSrc0Els = 16 - FirstIdx; 7731 SrcDemE = APInt(NumElts, 0); 7732 if (OpNo == 1) { 7733 APInt DemEls = DemandedElts.trunc(NumSrc0Els); 7734 SrcDemE.insertBits(DemEls, FirstIdx); 7735 } else { 7736 APInt DemEls = DemandedElts.lshr(NumSrc0Els); 7737 SrcDemE.insertBits(DemEls, 0); 7738 } 7739 break; 7740 } 7741 case Intrinsic::s390_vperm: 7742 SrcDemE = APInt(NumElts, 1); 7743 break; 7744 default: 7745 llvm_unreachable("Unhandled intrinsic."); 7746 break; 7747 } 7748 } else { 7749 switch (Opcode) { 7750 case SystemZISD::JOIN_DWORDS: 7751 // Scalar operand. 7752 SrcDemE = APInt(1, 1); 7753 break; 7754 case SystemZISD::SELECT_CCMASK: 7755 SrcDemE = DemandedElts; 7756 break; 7757 default: 7758 llvm_unreachable("Unhandled opcode."); 7759 break; 7760 } 7761 } 7762 return SrcDemE; 7763 } 7764 7765 static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known, 7766 const APInt &DemandedElts, 7767 const SelectionDAG &DAG, unsigned Depth, 7768 unsigned OpNo) { 7769 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo); 7770 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1); 7771 KnownBits LHSKnown = 7772 DAG.computeKnownBits(Op.getOperand(OpNo), Src0DemE, Depth + 1); 7773 KnownBits RHSKnown = 7774 DAG.computeKnownBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1); 7775 Known = LHSKnown.intersectWith(RHSKnown); 7776 } 7777 7778 void 7779 SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, 7780 KnownBits &Known, 7781 const APInt &DemandedElts, 7782 const SelectionDAG &DAG, 7783 unsigned Depth) const { 7784 Known.resetAll(); 7785 7786 // Intrinsic CC result is returned in the two low bits. 7787 unsigned tmp0, tmp1; // not used 7788 if (Op.getResNo() == 1 && isIntrinsicWithCC(Op, tmp0, tmp1)) { 7789 Known.Zero.setBitsFrom(2); 7790 return; 7791 } 7792 EVT VT = Op.getValueType(); 7793 if (Op.getResNo() != 0 || VT == MVT::Untyped) 7794 return; 7795 assert (Known.getBitWidth() == VT.getScalarSizeInBits() && 7796 "KnownBits does not match VT in bitwidth"); 7797 assert ((!VT.isVector() || 7798 (DemandedElts.getBitWidth() == VT.getVectorNumElements())) && 7799 "DemandedElts does not match VT number of elements"); 7800 unsigned BitWidth = Known.getBitWidth(); 7801 unsigned Opcode = Op.getOpcode(); 7802 if (Opcode == ISD::INTRINSIC_WO_CHAIN) { 7803 bool IsLogical = false; 7804 unsigned Id = Op.getConstantOperandVal(0); 7805 switch (Id) { 7806 case Intrinsic::s390_vpksh: // PACKS 7807 case Intrinsic::s390_vpksf: 7808 case Intrinsic::s390_vpksg: 7809 case Intrinsic::s390_vpkshs: // PACKS_CC 7810 case Intrinsic::s390_vpksfs: 7811 case Intrinsic::s390_vpksgs: 7812 case Intrinsic::s390_vpklsh: // PACKLS 7813 case Intrinsic::s390_vpklsf: 7814 case Intrinsic::s390_vpklsg: 7815 case Intrinsic::s390_vpklshs: // PACKLS_CC 7816 case Intrinsic::s390_vpklsfs: 7817 case Intrinsic::s390_vpklsgs: 7818 case Intrinsic::s390_vpdi: 7819 case Intrinsic::s390_vsldb: 7820 case Intrinsic::s390_vperm: 7821 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 1); 7822 break; 7823 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH 7824 case Intrinsic::s390_vuplhh: 7825 case Intrinsic::s390_vuplhf: 7826 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW 7827 case Intrinsic::s390_vupllh: 7828 case Intrinsic::s390_vupllf: 7829 IsLogical = true; 7830 [[fallthrough]]; 7831 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH 7832 case Intrinsic::s390_vuphh: 7833 case Intrinsic::s390_vuphf: 7834 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW 7835 case Intrinsic::s390_vuplhw: 7836 case Intrinsic::s390_vuplf: { 7837 SDValue SrcOp = Op.getOperand(1); 7838 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0); 7839 Known = DAG.computeKnownBits(SrcOp, SrcDemE, Depth + 1); 7840 if (IsLogical) { 7841 Known = Known.zext(BitWidth); 7842 } else 7843 Known = Known.sext(BitWidth); 7844 break; 7845 } 7846 default: 7847 break; 7848 } 7849 } else { 7850 switch (Opcode) { 7851 case SystemZISD::JOIN_DWORDS: 7852 case SystemZISD::SELECT_CCMASK: 7853 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 0); 7854 break; 7855 case SystemZISD::REPLICATE: { 7856 SDValue SrcOp = Op.getOperand(0); 7857 Known = DAG.computeKnownBits(SrcOp, Depth + 1); 7858 if (Known.getBitWidth() < BitWidth && isa<ConstantSDNode>(SrcOp)) 7859 Known = Known.sext(BitWidth); // VREPI sign extends the immedate. 7860 break; 7861 } 7862 default: 7863 break; 7864 } 7865 } 7866 7867 // Known has the width of the source operand(s). Adjust if needed to match 7868 // the passed bitwidth. 7869 if (Known.getBitWidth() != BitWidth) 7870 Known = Known.anyextOrTrunc(BitWidth); 7871 } 7872 7873 static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts, 7874 const SelectionDAG &DAG, unsigned Depth, 7875 unsigned OpNo) { 7876 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo); 7877 unsigned LHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo), Src0DemE, Depth + 1); 7878 if (LHS == 1) return 1; // Early out. 7879 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1); 7880 unsigned RHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1); 7881 if (RHS == 1) return 1; // Early out. 7882 unsigned Common = std::min(LHS, RHS); 7883 unsigned SrcBitWidth = Op.getOperand(OpNo).getScalarValueSizeInBits(); 7884 EVT VT = Op.getValueType(); 7885 unsigned VTBits = VT.getScalarSizeInBits(); 7886 if (SrcBitWidth > VTBits) { // PACK 7887 unsigned SrcExtraBits = SrcBitWidth - VTBits; 7888 if (Common > SrcExtraBits) 7889 return (Common - SrcExtraBits); 7890 return 1; 7891 } 7892 assert (SrcBitWidth == VTBits && "Expected operands of same bitwidth."); 7893 return Common; 7894 } 7895 7896 unsigned 7897 SystemZTargetLowering::ComputeNumSignBitsForTargetNode( 7898 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, 7899 unsigned Depth) const { 7900 if (Op.getResNo() != 0) 7901 return 1; 7902 unsigned Opcode = Op.getOpcode(); 7903 if (Opcode == ISD::INTRINSIC_WO_CHAIN) { 7904 unsigned Id = Op.getConstantOperandVal(0); 7905 switch (Id) { 7906 case Intrinsic::s390_vpksh: // PACKS 7907 case Intrinsic::s390_vpksf: 7908 case Intrinsic::s390_vpksg: 7909 case Intrinsic::s390_vpkshs: // PACKS_CC 7910 case Intrinsic::s390_vpksfs: 7911 case Intrinsic::s390_vpksgs: 7912 case Intrinsic::s390_vpklsh: // PACKLS 7913 case Intrinsic::s390_vpklsf: 7914 case Intrinsic::s390_vpklsg: 7915 case Intrinsic::s390_vpklshs: // PACKLS_CC 7916 case Intrinsic::s390_vpklsfs: 7917 case Intrinsic::s390_vpklsgs: 7918 case Intrinsic::s390_vpdi: 7919 case Intrinsic::s390_vsldb: 7920 case Intrinsic::s390_vperm: 7921 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 1); 7922 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH 7923 case Intrinsic::s390_vuphh: 7924 case Intrinsic::s390_vuphf: 7925 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW 7926 case Intrinsic::s390_vuplhw: 7927 case Intrinsic::s390_vuplf: { 7928 SDValue PackedOp = Op.getOperand(1); 7929 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 1); 7930 unsigned Tmp = DAG.ComputeNumSignBits(PackedOp, SrcDemE, Depth + 1); 7931 EVT VT = Op.getValueType(); 7932 unsigned VTBits = VT.getScalarSizeInBits(); 7933 Tmp += VTBits - PackedOp.getScalarValueSizeInBits(); 7934 return Tmp; 7935 } 7936 default: 7937 break; 7938 } 7939 } else { 7940 switch (Opcode) { 7941 case SystemZISD::SELECT_CCMASK: 7942 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 0); 7943 default: 7944 break; 7945 } 7946 } 7947 7948 return 1; 7949 } 7950 7951 bool SystemZTargetLowering:: 7952 isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, 7953 const APInt &DemandedElts, const SelectionDAG &DAG, 7954 bool PoisonOnly, unsigned Depth) const { 7955 switch (Op->getOpcode()) { 7956 case SystemZISD::PCREL_WRAPPER: 7957 case SystemZISD::PCREL_OFFSET: 7958 return true; 7959 } 7960 return false; 7961 } 7962 7963 unsigned 7964 SystemZTargetLowering::getStackProbeSize(const MachineFunction &MF) const { 7965 const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); 7966 unsigned StackAlign = TFI->getStackAlignment(); 7967 assert(StackAlign >=1 && isPowerOf2_32(StackAlign) && 7968 "Unexpected stack alignment"); 7969 // The default stack probe size is 4096 if the function has no 7970 // stack-probe-size attribute. 7971 unsigned StackProbeSize = 7972 MF.getFunction().getFnAttributeAsParsedInteger("stack-probe-size", 4096); 7973 // Round down to the stack alignment. 7974 StackProbeSize &= ~(StackAlign - 1); 7975 return StackProbeSize ? StackProbeSize : StackAlign; 7976 } 7977 7978 //===----------------------------------------------------------------------===// 7979 // Custom insertion 7980 //===----------------------------------------------------------------------===// 7981 7982 // Force base value Base into a register before MI. Return the register. 7983 static Register forceReg(MachineInstr &MI, MachineOperand &Base, 7984 const SystemZInstrInfo *TII) { 7985 MachineBasicBlock *MBB = MI.getParent(); 7986 MachineFunction &MF = *MBB->getParent(); 7987 MachineRegisterInfo &MRI = MF.getRegInfo(); 7988 7989 if (Base.isReg()) { 7990 // Copy Base into a new virtual register to help register coalescing in 7991 // cases with multiple uses. 7992 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); 7993 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::COPY), Reg) 7994 .add(Base); 7995 return Reg; 7996 } 7997 7998 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); 7999 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg) 8000 .add(Base) 8001 .addImm(0) 8002 .addReg(0); 8003 return Reg; 8004 } 8005 8006 // The CC operand of MI might be missing a kill marker because there 8007 // were multiple uses of CC, and ISel didn't know which to mark. 8008 // Figure out whether MI should have had a kill marker. 8009 static bool checkCCKill(MachineInstr &MI, MachineBasicBlock *MBB) { 8010 // Scan forward through BB for a use/def of CC. 8011 MachineBasicBlock::iterator miI(std::next(MachineBasicBlock::iterator(MI))); 8012 for (MachineBasicBlock::iterator miE = MBB->end(); miI != miE; ++miI) { 8013 const MachineInstr& mi = *miI; 8014 if (mi.readsRegister(SystemZ::CC)) 8015 return false; 8016 if (mi.definesRegister(SystemZ::CC)) 8017 break; // Should have kill-flag - update below. 8018 } 8019 8020 // If we hit the end of the block, check whether CC is live into a 8021 // successor. 8022 if (miI == MBB->end()) { 8023 for (const MachineBasicBlock *Succ : MBB->successors()) 8024 if (Succ->isLiveIn(SystemZ::CC)) 8025 return false; 8026 } 8027 8028 return true; 8029 } 8030 8031 // Return true if it is OK for this Select pseudo-opcode to be cascaded 8032 // together with other Select pseudo-opcodes into a single basic-block with 8033 // a conditional jump around it. 8034 static bool isSelectPseudo(MachineInstr &MI) { 8035 switch (MI.getOpcode()) { 8036 case SystemZ::Select32: 8037 case SystemZ::Select64: 8038 case SystemZ::Select128: 8039 case SystemZ::SelectF32: 8040 case SystemZ::SelectF64: 8041 case SystemZ::SelectF128: 8042 case SystemZ::SelectVR32: 8043 case SystemZ::SelectVR64: 8044 case SystemZ::SelectVR128: 8045 return true; 8046 8047 default: 8048 return false; 8049 } 8050 } 8051 8052 // Helper function, which inserts PHI functions into SinkMBB: 8053 // %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ], 8054 // where %FalseValue(i) and %TrueValue(i) are taken from Selects. 8055 static void createPHIsForSelects(SmallVector<MachineInstr*, 8> &Selects, 8056 MachineBasicBlock *TrueMBB, 8057 MachineBasicBlock *FalseMBB, 8058 MachineBasicBlock *SinkMBB) { 8059 MachineFunction *MF = TrueMBB->getParent(); 8060 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); 8061 8062 MachineInstr *FirstMI = Selects.front(); 8063 unsigned CCValid = FirstMI->getOperand(3).getImm(); 8064 unsigned CCMask = FirstMI->getOperand(4).getImm(); 8065 8066 MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin(); 8067 8068 // As we are creating the PHIs, we have to be careful if there is more than 8069 // one. Later Selects may reference the results of earlier Selects, but later 8070 // PHIs have to reference the individual true/false inputs from earlier PHIs. 8071 // That also means that PHI construction must work forward from earlier to 8072 // later, and that the code must maintain a mapping from earlier PHI's 8073 // destination registers, and the registers that went into the PHI. 8074 DenseMap<unsigned, std::pair<unsigned, unsigned>> RegRewriteTable; 8075 8076 for (auto *MI : Selects) { 8077 Register DestReg = MI->getOperand(0).getReg(); 8078 Register TrueReg = MI->getOperand(1).getReg(); 8079 Register FalseReg = MI->getOperand(2).getReg(); 8080 8081 // If this Select we are generating is the opposite condition from 8082 // the jump we generated, then we have to swap the operands for the 8083 // PHI that is going to be generated. 8084 if (MI->getOperand(4).getImm() == (CCValid ^ CCMask)) 8085 std::swap(TrueReg, FalseReg); 8086 8087 if (RegRewriteTable.contains(TrueReg)) 8088 TrueReg = RegRewriteTable[TrueReg].first; 8089 8090 if (RegRewriteTable.contains(FalseReg)) 8091 FalseReg = RegRewriteTable[FalseReg].second; 8092 8093 DebugLoc DL = MI->getDebugLoc(); 8094 BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(SystemZ::PHI), DestReg) 8095 .addReg(TrueReg).addMBB(TrueMBB) 8096 .addReg(FalseReg).addMBB(FalseMBB); 8097 8098 // Add this PHI to the rewrite table. 8099 RegRewriteTable[DestReg] = std::make_pair(TrueReg, FalseReg); 8100 } 8101 8102 MF->getProperties().reset(MachineFunctionProperties::Property::NoPHIs); 8103 } 8104 8105 // Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI. 8106 MachineBasicBlock * 8107 SystemZTargetLowering::emitSelect(MachineInstr &MI, 8108 MachineBasicBlock *MBB) const { 8109 assert(isSelectPseudo(MI) && "Bad call to emitSelect()"); 8110 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 8111 8112 unsigned CCValid = MI.getOperand(3).getImm(); 8113 unsigned CCMask = MI.getOperand(4).getImm(); 8114 8115 // If we have a sequence of Select* pseudo instructions using the 8116 // same condition code value, we want to expand all of them into 8117 // a single pair of basic blocks using the same condition. 8118 SmallVector<MachineInstr*, 8> Selects; 8119 SmallVector<MachineInstr*, 8> DbgValues; 8120 Selects.push_back(&MI); 8121 unsigned Count = 0; 8122 for (MachineInstr &NextMI : llvm::make_range( 8123 std::next(MachineBasicBlock::iterator(MI)), MBB->end())) { 8124 if (isSelectPseudo(NextMI)) { 8125 assert(NextMI.getOperand(3).getImm() == CCValid && 8126 "Bad CCValid operands since CC was not redefined."); 8127 if (NextMI.getOperand(4).getImm() == CCMask || 8128 NextMI.getOperand(4).getImm() == (CCValid ^ CCMask)) { 8129 Selects.push_back(&NextMI); 8130 continue; 8131 } 8132 break; 8133 } 8134 if (NextMI.definesRegister(SystemZ::CC) || NextMI.usesCustomInsertionHook()) 8135 break; 8136 bool User = false; 8137 for (auto *SelMI : Selects) 8138 if (NextMI.readsVirtualRegister(SelMI->getOperand(0).getReg())) { 8139 User = true; 8140 break; 8141 } 8142 if (NextMI.isDebugInstr()) { 8143 if (User) { 8144 assert(NextMI.isDebugValue() && "Unhandled debug opcode."); 8145 DbgValues.push_back(&NextMI); 8146 } 8147 } else if (User || ++Count > 20) 8148 break; 8149 } 8150 8151 MachineInstr *LastMI = Selects.back(); 8152 bool CCKilled = 8153 (LastMI->killsRegister(SystemZ::CC) || checkCCKill(*LastMI, MBB)); 8154 MachineBasicBlock *StartMBB = MBB; 8155 MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(LastMI, MBB); 8156 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB); 8157 8158 // Unless CC was killed in the last Select instruction, mark it as 8159 // live-in to both FalseMBB and JoinMBB. 8160 if (!CCKilled) { 8161 FalseMBB->addLiveIn(SystemZ::CC); 8162 JoinMBB->addLiveIn(SystemZ::CC); 8163 } 8164 8165 // StartMBB: 8166 // BRC CCMask, JoinMBB 8167 // # fallthrough to FalseMBB 8168 MBB = StartMBB; 8169 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC)) 8170 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB); 8171 MBB->addSuccessor(JoinMBB); 8172 MBB->addSuccessor(FalseMBB); 8173 8174 // FalseMBB: 8175 // # fallthrough to JoinMBB 8176 MBB = FalseMBB; 8177 MBB->addSuccessor(JoinMBB); 8178 8179 // JoinMBB: 8180 // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ] 8181 // ... 8182 MBB = JoinMBB; 8183 createPHIsForSelects(Selects, StartMBB, FalseMBB, MBB); 8184 for (auto *SelMI : Selects) 8185 SelMI->eraseFromParent(); 8186 8187 MachineBasicBlock::iterator InsertPos = MBB->getFirstNonPHI(); 8188 for (auto *DbgMI : DbgValues) 8189 MBB->splice(InsertPos, StartMBB, DbgMI); 8190 8191 return JoinMBB; 8192 } 8193 8194 // Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI. 8195 // StoreOpcode is the store to use and Invert says whether the store should 8196 // happen when the condition is false rather than true. If a STORE ON 8197 // CONDITION is available, STOCOpcode is its opcode, otherwise it is 0. 8198 MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI, 8199 MachineBasicBlock *MBB, 8200 unsigned StoreOpcode, 8201 unsigned STOCOpcode, 8202 bool Invert) const { 8203 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 8204 8205 Register SrcReg = MI.getOperand(0).getReg(); 8206 MachineOperand Base = MI.getOperand(1); 8207 int64_t Disp = MI.getOperand(2).getImm(); 8208 Register IndexReg = MI.getOperand(3).getReg(); 8209 unsigned CCValid = MI.getOperand(4).getImm(); 8210 unsigned CCMask = MI.getOperand(5).getImm(); 8211 DebugLoc DL = MI.getDebugLoc(); 8212 8213 StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp); 8214 8215 // ISel pattern matching also adds a load memory operand of the same 8216 // address, so take special care to find the storing memory operand. 8217 MachineMemOperand *MMO = nullptr; 8218 for (auto *I : MI.memoperands()) 8219 if (I->isStore()) { 8220 MMO = I; 8221 break; 8222 } 8223 8224 // Use STOCOpcode if possible. We could use different store patterns in 8225 // order to avoid matching the index register, but the performance trade-offs 8226 // might be more complicated in that case. 8227 if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) { 8228 if (Invert) 8229 CCMask ^= CCValid; 8230 8231 BuildMI(*MBB, MI, DL, TII->get(STOCOpcode)) 8232 .addReg(SrcReg) 8233 .add(Base) 8234 .addImm(Disp) 8235 .addImm(CCValid) 8236 .addImm(CCMask) 8237 .addMemOperand(MMO); 8238 8239 MI.eraseFromParent(); 8240 return MBB; 8241 } 8242 8243 // Get the condition needed to branch around the store. 8244 if (!Invert) 8245 CCMask ^= CCValid; 8246 8247 MachineBasicBlock *StartMBB = MBB; 8248 MachineBasicBlock *JoinMBB = SystemZ::splitBlockBefore(MI, MBB); 8249 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB); 8250 8251 // Unless CC was killed in the CondStore instruction, mark it as 8252 // live-in to both FalseMBB and JoinMBB. 8253 if (!MI.killsRegister(SystemZ::CC) && !checkCCKill(MI, JoinMBB)) { 8254 FalseMBB->addLiveIn(SystemZ::CC); 8255 JoinMBB->addLiveIn(SystemZ::CC); 8256 } 8257 8258 // StartMBB: 8259 // BRC CCMask, JoinMBB 8260 // # fallthrough to FalseMBB 8261 MBB = StartMBB; 8262 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 8263 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB); 8264 MBB->addSuccessor(JoinMBB); 8265 MBB->addSuccessor(FalseMBB); 8266 8267 // FalseMBB: 8268 // store %SrcReg, %Disp(%Index,%Base) 8269 // # fallthrough to JoinMBB 8270 MBB = FalseMBB; 8271 BuildMI(MBB, DL, TII->get(StoreOpcode)) 8272 .addReg(SrcReg) 8273 .add(Base) 8274 .addImm(Disp) 8275 .addReg(IndexReg) 8276 .addMemOperand(MMO); 8277 MBB->addSuccessor(JoinMBB); 8278 8279 MI.eraseFromParent(); 8280 return JoinMBB; 8281 } 8282 8283 // Implement EmitInstrWithCustomInserter for pseudo [SU]Cmp128Hi instruction MI. 8284 MachineBasicBlock * 8285 SystemZTargetLowering::emitICmp128Hi(MachineInstr &MI, 8286 MachineBasicBlock *MBB, 8287 bool Unsigned) const { 8288 MachineFunction &MF = *MBB->getParent(); 8289 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 8290 MachineRegisterInfo &MRI = MF.getRegInfo(); 8291 8292 // Synthetic instruction to compare 128-bit values. 8293 // Sets CC 1 if Op0 > Op1, sets a different CC otherwise. 8294 Register Op0 = MI.getOperand(0).getReg(); 8295 Register Op1 = MI.getOperand(1).getReg(); 8296 8297 MachineBasicBlock *StartMBB = MBB; 8298 MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(MI, MBB); 8299 MachineBasicBlock *HiEqMBB = SystemZ::emitBlockAfter(StartMBB); 8300 8301 // StartMBB: 8302 // 8303 // Use VECTOR ELEMENT COMPARE [LOGICAL] to compare the high parts. 8304 // Swap the inputs to get: 8305 // CC 1 if high(Op0) > high(Op1) 8306 // CC 2 if high(Op0) < high(Op1) 8307 // CC 0 if high(Op0) == high(Op1) 8308 // 8309 // If CC != 0, we'd done, so jump over the next instruction. 8310 // 8311 // VEC[L]G Op1, Op0 8312 // JNE JoinMBB 8313 // # fallthrough to HiEqMBB 8314 MBB = StartMBB; 8315 int HiOpcode = Unsigned? SystemZ::VECLG : SystemZ::VECG; 8316 BuildMI(MBB, MI.getDebugLoc(), TII->get(HiOpcode)) 8317 .addReg(Op1).addReg(Op0); 8318 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC)) 8319 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE).addMBB(JoinMBB); 8320 MBB->addSuccessor(JoinMBB); 8321 MBB->addSuccessor(HiEqMBB); 8322 8323 // HiEqMBB: 8324 // 8325 // Otherwise, use VECTOR COMPARE HIGH LOGICAL. 8326 // Since we already know the high parts are equal, the CC 8327 // result will only depend on the low parts: 8328 // CC 1 if low(Op0) > low(Op1) 8329 // CC 3 if low(Op0) <= low(Op1) 8330 // 8331 // VCHLGS Tmp, Op0, Op1 8332 // # fallthrough to JoinMBB 8333 MBB = HiEqMBB; 8334 Register Temp = MRI.createVirtualRegister(&SystemZ::VR128BitRegClass); 8335 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::VCHLGS), Temp) 8336 .addReg(Op0).addReg(Op1); 8337 MBB->addSuccessor(JoinMBB); 8338 8339 // Mark CC as live-in to JoinMBB. 8340 JoinMBB->addLiveIn(SystemZ::CC); 8341 8342 MI.eraseFromParent(); 8343 return JoinMBB; 8344 } 8345 8346 // Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_LOADW_* or 8347 // ATOMIC_SWAPW instruction MI. BinOpcode is the instruction that performs 8348 // the binary operation elided by "*", or 0 for ATOMIC_SWAPW. Invert says 8349 // whether the field should be inverted after performing BinOpcode (e.g. for 8350 // NAND). 8351 MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary( 8352 MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode, 8353 bool Invert) const { 8354 MachineFunction &MF = *MBB->getParent(); 8355 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 8356 MachineRegisterInfo &MRI = MF.getRegInfo(); 8357 8358 // Extract the operands. Base can be a register or a frame index. 8359 // Src2 can be a register or immediate. 8360 Register Dest = MI.getOperand(0).getReg(); 8361 MachineOperand Base = earlyUseOperand(MI.getOperand(1)); 8362 int64_t Disp = MI.getOperand(2).getImm(); 8363 MachineOperand Src2 = earlyUseOperand(MI.getOperand(3)); 8364 Register BitShift = MI.getOperand(4).getReg(); 8365 Register NegBitShift = MI.getOperand(5).getReg(); 8366 unsigned BitSize = MI.getOperand(6).getImm(); 8367 DebugLoc DL = MI.getDebugLoc(); 8368 8369 // Get the right opcodes for the displacement. 8370 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp); 8371 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp); 8372 assert(LOpcode && CSOpcode && "Displacement out of range"); 8373 8374 // Create virtual registers for temporary results. 8375 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); 8376 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); 8377 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); 8378 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); 8379 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); 8380 8381 // Insert a basic block for the main loop. 8382 MachineBasicBlock *StartMBB = MBB; 8383 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB); 8384 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB); 8385 8386 // StartMBB: 8387 // ... 8388 // %OrigVal = L Disp(%Base) 8389 // # fall through to LoopMBB 8390 MBB = StartMBB; 8391 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0); 8392 MBB->addSuccessor(LoopMBB); 8393 8394 // LoopMBB: 8395 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ] 8396 // %RotatedOldVal = RLL %OldVal, 0(%BitShift) 8397 // %RotatedNewVal = OP %RotatedOldVal, %Src2 8398 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift) 8399 // %Dest = CS %OldVal, %NewVal, Disp(%Base) 8400 // JNE LoopMBB 8401 // # fall through to DoneMBB 8402 MBB = LoopMBB; 8403 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal) 8404 .addReg(OrigVal).addMBB(StartMBB) 8405 .addReg(Dest).addMBB(LoopMBB); 8406 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal) 8407 .addReg(OldVal).addReg(BitShift).addImm(0); 8408 if (Invert) { 8409 // Perform the operation normally and then invert every bit of the field. 8410 Register Tmp = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); 8411 BuildMI(MBB, DL, TII->get(BinOpcode), Tmp).addReg(RotatedOldVal).add(Src2); 8412 // XILF with the upper BitSize bits set. 8413 BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal) 8414 .addReg(Tmp).addImm(-1U << (32 - BitSize)); 8415 } else if (BinOpcode) 8416 // A simply binary operation. 8417 BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal) 8418 .addReg(RotatedOldVal) 8419 .add(Src2); 8420 else 8421 // Use RISBG to rotate Src2 into position and use it to replace the 8422 // field in RotatedOldVal. 8423 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal) 8424 .addReg(RotatedOldVal).addReg(Src2.getReg()) 8425 .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize); 8426 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal) 8427 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0); 8428 BuildMI(MBB, DL, TII->get(CSOpcode), Dest) 8429 .addReg(OldVal) 8430 .addReg(NewVal) 8431 .add(Base) 8432 .addImm(Disp); 8433 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 8434 .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB); 8435 MBB->addSuccessor(LoopMBB); 8436 MBB->addSuccessor(DoneMBB); 8437 8438 MI.eraseFromParent(); 8439 return DoneMBB; 8440 } 8441 8442 // Implement EmitInstrWithCustomInserter for subword pseudo 8443 // ATOMIC_LOADW_{,U}{MIN,MAX} instruction MI. CompareOpcode is the 8444 // instruction that should be used to compare the current field with the 8445 // minimum or maximum value. KeepOldMask is the BRC condition-code mask 8446 // for when the current field should be kept. 8447 MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax( 8448 MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode, 8449 unsigned KeepOldMask) const { 8450 MachineFunction &MF = *MBB->getParent(); 8451 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 8452 MachineRegisterInfo &MRI = MF.getRegInfo(); 8453 8454 // Extract the operands. Base can be a register or a frame index. 8455 Register Dest = MI.getOperand(0).getReg(); 8456 MachineOperand Base = earlyUseOperand(MI.getOperand(1)); 8457 int64_t Disp = MI.getOperand(2).getImm(); 8458 Register Src2 = MI.getOperand(3).getReg(); 8459 Register BitShift = MI.getOperand(4).getReg(); 8460 Register NegBitShift = MI.getOperand(5).getReg(); 8461 unsigned BitSize = MI.getOperand(6).getImm(); 8462 DebugLoc DL = MI.getDebugLoc(); 8463 8464 // Get the right opcodes for the displacement. 8465 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp); 8466 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp); 8467 assert(LOpcode && CSOpcode && "Displacement out of range"); 8468 8469 // Create virtual registers for temporary results. 8470 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); 8471 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); 8472 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); 8473 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); 8474 Register RotatedAltVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); 8475 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); 8476 8477 // Insert 3 basic blocks for the loop. 8478 MachineBasicBlock *StartMBB = MBB; 8479 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB); 8480 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB); 8481 MachineBasicBlock *UseAltMBB = SystemZ::emitBlockAfter(LoopMBB); 8482 MachineBasicBlock *UpdateMBB = SystemZ::emitBlockAfter(UseAltMBB); 8483 8484 // StartMBB: 8485 // ... 8486 // %OrigVal = L Disp(%Base) 8487 // # fall through to LoopMBB 8488 MBB = StartMBB; 8489 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0); 8490 MBB->addSuccessor(LoopMBB); 8491 8492 // LoopMBB: 8493 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ] 8494 // %RotatedOldVal = RLL %OldVal, 0(%BitShift) 8495 // CompareOpcode %RotatedOldVal, %Src2 8496 // BRC KeepOldMask, UpdateMBB 8497 MBB = LoopMBB; 8498 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal) 8499 .addReg(OrigVal).addMBB(StartMBB) 8500 .addReg(Dest).addMBB(UpdateMBB); 8501 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal) 8502 .addReg(OldVal).addReg(BitShift).addImm(0); 8503 BuildMI(MBB, DL, TII->get(CompareOpcode)) 8504 .addReg(RotatedOldVal).addReg(Src2); 8505 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 8506 .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB); 8507 MBB->addSuccessor(UpdateMBB); 8508 MBB->addSuccessor(UseAltMBB); 8509 8510 // UseAltMBB: 8511 // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0 8512 // # fall through to UpdateMBB 8513 MBB = UseAltMBB; 8514 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal) 8515 .addReg(RotatedOldVal).addReg(Src2) 8516 .addImm(32).addImm(31 + BitSize).addImm(0); 8517 MBB->addSuccessor(UpdateMBB); 8518 8519 // UpdateMBB: 8520 // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ], 8521 // [ %RotatedAltVal, UseAltMBB ] 8522 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift) 8523 // %Dest = CS %OldVal, %NewVal, Disp(%Base) 8524 // JNE LoopMBB 8525 // # fall through to DoneMBB 8526 MBB = UpdateMBB; 8527 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal) 8528 .addReg(RotatedOldVal).addMBB(LoopMBB) 8529 .addReg(RotatedAltVal).addMBB(UseAltMBB); 8530 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal) 8531 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0); 8532 BuildMI(MBB, DL, TII->get(CSOpcode), Dest) 8533 .addReg(OldVal) 8534 .addReg(NewVal) 8535 .add(Base) 8536 .addImm(Disp); 8537 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 8538 .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB); 8539 MBB->addSuccessor(LoopMBB); 8540 MBB->addSuccessor(DoneMBB); 8541 8542 MI.eraseFromParent(); 8543 return DoneMBB; 8544 } 8545 8546 // Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_CMP_SWAPW 8547 // instruction MI. 8548 MachineBasicBlock * 8549 SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI, 8550 MachineBasicBlock *MBB) const { 8551 MachineFunction &MF = *MBB->getParent(); 8552 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 8553 MachineRegisterInfo &MRI = MF.getRegInfo(); 8554 8555 // Extract the operands. Base can be a register or a frame index. 8556 Register Dest = MI.getOperand(0).getReg(); 8557 MachineOperand Base = earlyUseOperand(MI.getOperand(1)); 8558 int64_t Disp = MI.getOperand(2).getImm(); 8559 Register CmpVal = MI.getOperand(3).getReg(); 8560 Register OrigSwapVal = MI.getOperand(4).getReg(); 8561 Register BitShift = MI.getOperand(5).getReg(); 8562 Register NegBitShift = MI.getOperand(6).getReg(); 8563 int64_t BitSize = MI.getOperand(7).getImm(); 8564 DebugLoc DL = MI.getDebugLoc(); 8565 8566 const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass; 8567 8568 // Get the right opcodes for the displacement and zero-extension. 8569 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp); 8570 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp); 8571 unsigned ZExtOpcode = BitSize == 8 ? SystemZ::LLCR : SystemZ::LLHR; 8572 assert(LOpcode && CSOpcode && "Displacement out of range"); 8573 8574 // Create virtual registers for temporary results. 8575 Register OrigOldVal = MRI.createVirtualRegister(RC); 8576 Register OldVal = MRI.createVirtualRegister(RC); 8577 Register SwapVal = MRI.createVirtualRegister(RC); 8578 Register StoreVal = MRI.createVirtualRegister(RC); 8579 Register OldValRot = MRI.createVirtualRegister(RC); 8580 Register RetryOldVal = MRI.createVirtualRegister(RC); 8581 Register RetrySwapVal = MRI.createVirtualRegister(RC); 8582 8583 // Insert 2 basic blocks for the loop. 8584 MachineBasicBlock *StartMBB = MBB; 8585 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB); 8586 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB); 8587 MachineBasicBlock *SetMBB = SystemZ::emitBlockAfter(LoopMBB); 8588 8589 // StartMBB: 8590 // ... 8591 // %OrigOldVal = L Disp(%Base) 8592 // # fall through to LoopMBB 8593 MBB = StartMBB; 8594 BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal) 8595 .add(Base) 8596 .addImm(Disp) 8597 .addReg(0); 8598 MBB->addSuccessor(LoopMBB); 8599 8600 // LoopMBB: 8601 // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ] 8602 // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ] 8603 // %OldValRot = RLL %OldVal, BitSize(%BitShift) 8604 // ^^ The low BitSize bits contain the field 8605 // of interest. 8606 // %RetrySwapVal = RISBG32 %SwapVal, %OldValRot, 32, 63-BitSize, 0 8607 // ^^ Replace the upper 32-BitSize bits of the 8608 // swap value with those that we loaded and rotated. 8609 // %Dest = LL[CH] %OldValRot 8610 // CR %Dest, %CmpVal 8611 // JNE DoneMBB 8612 // # Fall through to SetMBB 8613 MBB = LoopMBB; 8614 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal) 8615 .addReg(OrigOldVal).addMBB(StartMBB) 8616 .addReg(RetryOldVal).addMBB(SetMBB); 8617 BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal) 8618 .addReg(OrigSwapVal).addMBB(StartMBB) 8619 .addReg(RetrySwapVal).addMBB(SetMBB); 8620 BuildMI(MBB, DL, TII->get(SystemZ::RLL), OldValRot) 8621 .addReg(OldVal).addReg(BitShift).addImm(BitSize); 8622 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal) 8623 .addReg(SwapVal).addReg(OldValRot).addImm(32).addImm(63 - BitSize).addImm(0); 8624 BuildMI(MBB, DL, TII->get(ZExtOpcode), Dest) 8625 .addReg(OldValRot); 8626 BuildMI(MBB, DL, TII->get(SystemZ::CR)) 8627 .addReg(Dest).addReg(CmpVal); 8628 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 8629 .addImm(SystemZ::CCMASK_ICMP) 8630 .addImm(SystemZ::CCMASK_CMP_NE).addMBB(DoneMBB); 8631 MBB->addSuccessor(DoneMBB); 8632 MBB->addSuccessor(SetMBB); 8633 8634 // SetMBB: 8635 // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift) 8636 // ^^ Rotate the new field to its proper position. 8637 // %RetryOldVal = CS %OldVal, %StoreVal, Disp(%Base) 8638 // JNE LoopMBB 8639 // # fall through to ExitMBB 8640 MBB = SetMBB; 8641 BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal) 8642 .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize); 8643 BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal) 8644 .addReg(OldVal) 8645 .addReg(StoreVal) 8646 .add(Base) 8647 .addImm(Disp); 8648 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 8649 .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB); 8650 MBB->addSuccessor(LoopMBB); 8651 MBB->addSuccessor(DoneMBB); 8652 8653 // If the CC def wasn't dead in the ATOMIC_CMP_SWAPW, mark CC as live-in 8654 // to the block after the loop. At this point, CC may have been defined 8655 // either by the CR in LoopMBB or by the CS in SetMBB. 8656 if (!MI.registerDefIsDead(SystemZ::CC)) 8657 DoneMBB->addLiveIn(SystemZ::CC); 8658 8659 MI.eraseFromParent(); 8660 return DoneMBB; 8661 } 8662 8663 // Emit a move from two GR64s to a GR128. 8664 MachineBasicBlock * 8665 SystemZTargetLowering::emitPair128(MachineInstr &MI, 8666 MachineBasicBlock *MBB) const { 8667 MachineFunction &MF = *MBB->getParent(); 8668 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 8669 MachineRegisterInfo &MRI = MF.getRegInfo(); 8670 DebugLoc DL = MI.getDebugLoc(); 8671 8672 Register Dest = MI.getOperand(0).getReg(); 8673 Register Hi = MI.getOperand(1).getReg(); 8674 Register Lo = MI.getOperand(2).getReg(); 8675 Register Tmp1 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); 8676 Register Tmp2 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); 8677 8678 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Tmp1); 8679 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Tmp2) 8680 .addReg(Tmp1).addReg(Hi).addImm(SystemZ::subreg_h64); 8681 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest) 8682 .addReg(Tmp2).addReg(Lo).addImm(SystemZ::subreg_l64); 8683 8684 MI.eraseFromParent(); 8685 return MBB; 8686 } 8687 8688 // Emit an extension from a GR64 to a GR128. ClearEven is true 8689 // if the high register of the GR128 value must be cleared or false if 8690 // it's "don't care". 8691 MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI, 8692 MachineBasicBlock *MBB, 8693 bool ClearEven) const { 8694 MachineFunction &MF = *MBB->getParent(); 8695 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 8696 MachineRegisterInfo &MRI = MF.getRegInfo(); 8697 DebugLoc DL = MI.getDebugLoc(); 8698 8699 Register Dest = MI.getOperand(0).getReg(); 8700 Register Src = MI.getOperand(1).getReg(); 8701 Register In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); 8702 8703 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128); 8704 if (ClearEven) { 8705 Register NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); 8706 Register Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass); 8707 8708 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64) 8709 .addImm(0); 8710 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128) 8711 .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64); 8712 In128 = NewIn128; 8713 } 8714 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest) 8715 .addReg(In128).addReg(Src).addImm(SystemZ::subreg_l64); 8716 8717 MI.eraseFromParent(); 8718 return MBB; 8719 } 8720 8721 MachineBasicBlock * 8722 SystemZTargetLowering::emitMemMemWrapper(MachineInstr &MI, 8723 MachineBasicBlock *MBB, 8724 unsigned Opcode, bool IsMemset) const { 8725 MachineFunction &MF = *MBB->getParent(); 8726 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 8727 MachineRegisterInfo &MRI = MF.getRegInfo(); 8728 DebugLoc DL = MI.getDebugLoc(); 8729 8730 MachineOperand DestBase = earlyUseOperand(MI.getOperand(0)); 8731 uint64_t DestDisp = MI.getOperand(1).getImm(); 8732 MachineOperand SrcBase = MachineOperand::CreateReg(0U, false); 8733 uint64_t SrcDisp; 8734 8735 // Fold the displacement Disp if it is out of range. 8736 auto foldDisplIfNeeded = [&](MachineOperand &Base, uint64_t &Disp) -> void { 8737 if (!isUInt<12>(Disp)) { 8738 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); 8739 unsigned Opcode = TII->getOpcodeForOffset(SystemZ::LA, Disp); 8740 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opcode), Reg) 8741 .add(Base).addImm(Disp).addReg(0); 8742 Base = MachineOperand::CreateReg(Reg, false); 8743 Disp = 0; 8744 } 8745 }; 8746 8747 if (!IsMemset) { 8748 SrcBase = earlyUseOperand(MI.getOperand(2)); 8749 SrcDisp = MI.getOperand(3).getImm(); 8750 } else { 8751 SrcBase = DestBase; 8752 SrcDisp = DestDisp++; 8753 foldDisplIfNeeded(DestBase, DestDisp); 8754 } 8755 8756 MachineOperand &LengthMO = MI.getOperand(IsMemset ? 2 : 4); 8757 bool IsImmForm = LengthMO.isImm(); 8758 bool IsRegForm = !IsImmForm; 8759 8760 // Build and insert one Opcode of Length, with special treatment for memset. 8761 auto insertMemMemOp = [&](MachineBasicBlock *InsMBB, 8762 MachineBasicBlock::iterator InsPos, 8763 MachineOperand DBase, uint64_t DDisp, 8764 MachineOperand SBase, uint64_t SDisp, 8765 unsigned Length) -> void { 8766 assert(Length > 0 && Length <= 256 && "Building memory op with bad length."); 8767 if (IsMemset) { 8768 MachineOperand ByteMO = earlyUseOperand(MI.getOperand(3)); 8769 if (ByteMO.isImm()) 8770 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::MVI)) 8771 .add(SBase).addImm(SDisp).add(ByteMO); 8772 else 8773 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::STC)) 8774 .add(ByteMO).add(SBase).addImm(SDisp).addReg(0); 8775 if (--Length == 0) 8776 return; 8777 } 8778 BuildMI(*MBB, InsPos, DL, TII->get(Opcode)) 8779 .add(DBase).addImm(DDisp).addImm(Length) 8780 .add(SBase).addImm(SDisp) 8781 .setMemRefs(MI.memoperands()); 8782 }; 8783 8784 bool NeedsLoop = false; 8785 uint64_t ImmLength = 0; 8786 Register LenAdjReg = SystemZ::NoRegister; 8787 if (IsImmForm) { 8788 ImmLength = LengthMO.getImm(); 8789 ImmLength += IsMemset ? 2 : 1; // Add back the subtracted adjustment. 8790 if (ImmLength == 0) { 8791 MI.eraseFromParent(); 8792 return MBB; 8793 } 8794 if (Opcode == SystemZ::CLC) { 8795 if (ImmLength > 3 * 256) 8796 // A two-CLC sequence is a clear win over a loop, not least because 8797 // it needs only one branch. A three-CLC sequence needs the same 8798 // number of branches as a loop (i.e. 2), but is shorter. That 8799 // brings us to lengths greater than 768 bytes. It seems relatively 8800 // likely that a difference will be found within the first 768 bytes, 8801 // so we just optimize for the smallest number of branch 8802 // instructions, in order to avoid polluting the prediction buffer 8803 // too much. 8804 NeedsLoop = true; 8805 } else if (ImmLength > 6 * 256) 8806 // The heuristic we use is to prefer loops for anything that would 8807 // require 7 or more MVCs. With these kinds of sizes there isn't much 8808 // to choose between straight-line code and looping code, since the 8809 // time will be dominated by the MVCs themselves. 8810 NeedsLoop = true; 8811 } else { 8812 NeedsLoop = true; 8813 LenAdjReg = LengthMO.getReg(); 8814 } 8815 8816 // When generating more than one CLC, all but the last will need to 8817 // branch to the end when a difference is found. 8818 MachineBasicBlock *EndMBB = 8819 (Opcode == SystemZ::CLC && (ImmLength > 256 || NeedsLoop) 8820 ? SystemZ::splitBlockAfter(MI, MBB) 8821 : nullptr); 8822 8823 if (NeedsLoop) { 8824 Register StartCountReg = 8825 MRI.createVirtualRegister(&SystemZ::GR64BitRegClass); 8826 if (IsImmForm) { 8827 TII->loadImmediate(*MBB, MI, StartCountReg, ImmLength / 256); 8828 ImmLength &= 255; 8829 } else { 8830 BuildMI(*MBB, MI, DL, TII->get(SystemZ::SRLG), StartCountReg) 8831 .addReg(LenAdjReg) 8832 .addReg(0) 8833 .addImm(8); 8834 } 8835 8836 bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase); 8837 auto loadZeroAddress = [&]() -> MachineOperand { 8838 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); 8839 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LGHI), Reg).addImm(0); 8840 return MachineOperand::CreateReg(Reg, false); 8841 }; 8842 if (DestBase.isReg() && DestBase.getReg() == SystemZ::NoRegister) 8843 DestBase = loadZeroAddress(); 8844 if (SrcBase.isReg() && SrcBase.getReg() == SystemZ::NoRegister) 8845 SrcBase = HaveSingleBase ? DestBase : loadZeroAddress(); 8846 8847 MachineBasicBlock *StartMBB = nullptr; 8848 MachineBasicBlock *LoopMBB = nullptr; 8849 MachineBasicBlock *NextMBB = nullptr; 8850 MachineBasicBlock *DoneMBB = nullptr; 8851 MachineBasicBlock *AllDoneMBB = nullptr; 8852 8853 Register StartSrcReg = forceReg(MI, SrcBase, TII); 8854 Register StartDestReg = 8855 (HaveSingleBase ? StartSrcReg : forceReg(MI, DestBase, TII)); 8856 8857 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass; 8858 Register ThisSrcReg = MRI.createVirtualRegister(RC); 8859 Register ThisDestReg = 8860 (HaveSingleBase ? ThisSrcReg : MRI.createVirtualRegister(RC)); 8861 Register NextSrcReg = MRI.createVirtualRegister(RC); 8862 Register NextDestReg = 8863 (HaveSingleBase ? NextSrcReg : MRI.createVirtualRegister(RC)); 8864 RC = &SystemZ::GR64BitRegClass; 8865 Register ThisCountReg = MRI.createVirtualRegister(RC); 8866 Register NextCountReg = MRI.createVirtualRegister(RC); 8867 8868 if (IsRegForm) { 8869 AllDoneMBB = SystemZ::splitBlockBefore(MI, MBB); 8870 StartMBB = SystemZ::emitBlockAfter(MBB); 8871 LoopMBB = SystemZ::emitBlockAfter(StartMBB); 8872 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB); 8873 DoneMBB = SystemZ::emitBlockAfter(NextMBB); 8874 8875 // MBB: 8876 // # Jump to AllDoneMBB if LenAdjReg means 0, or fall thru to StartMBB. 8877 BuildMI(MBB, DL, TII->get(SystemZ::CGHI)) 8878 .addReg(LenAdjReg).addImm(IsMemset ? -2 : -1); 8879 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 8880 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ) 8881 .addMBB(AllDoneMBB); 8882 MBB->addSuccessor(AllDoneMBB); 8883 if (!IsMemset) 8884 MBB->addSuccessor(StartMBB); 8885 else { 8886 // MemsetOneCheckMBB: 8887 // # Jump to MemsetOneMBB for a memset of length 1, or 8888 // # fall thru to StartMBB. 8889 MachineBasicBlock *MemsetOneCheckMBB = SystemZ::emitBlockAfter(MBB); 8890 MachineBasicBlock *MemsetOneMBB = SystemZ::emitBlockAfter(&*MF.rbegin()); 8891 MBB->addSuccessor(MemsetOneCheckMBB); 8892 MBB = MemsetOneCheckMBB; 8893 BuildMI(MBB, DL, TII->get(SystemZ::CGHI)) 8894 .addReg(LenAdjReg).addImm(-1); 8895 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 8896 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ) 8897 .addMBB(MemsetOneMBB); 8898 MBB->addSuccessor(MemsetOneMBB, {10, 100}); 8899 MBB->addSuccessor(StartMBB, {90, 100}); 8900 8901 // MemsetOneMBB: 8902 // # Jump back to AllDoneMBB after a single MVI or STC. 8903 MBB = MemsetOneMBB; 8904 insertMemMemOp(MBB, MBB->end(), 8905 MachineOperand::CreateReg(StartDestReg, false), DestDisp, 8906 MachineOperand::CreateReg(StartSrcReg, false), SrcDisp, 8907 1); 8908 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(AllDoneMBB); 8909 MBB->addSuccessor(AllDoneMBB); 8910 } 8911 8912 // StartMBB: 8913 // # Jump to DoneMBB if %StartCountReg is zero, or fall through to LoopMBB. 8914 MBB = StartMBB; 8915 BuildMI(MBB, DL, TII->get(SystemZ::CGHI)) 8916 .addReg(StartCountReg).addImm(0); 8917 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 8918 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ) 8919 .addMBB(DoneMBB); 8920 MBB->addSuccessor(DoneMBB); 8921 MBB->addSuccessor(LoopMBB); 8922 } 8923 else { 8924 StartMBB = MBB; 8925 DoneMBB = SystemZ::splitBlockBefore(MI, MBB); 8926 LoopMBB = SystemZ::emitBlockAfter(StartMBB); 8927 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB); 8928 8929 // StartMBB: 8930 // # fall through to LoopMBB 8931 MBB->addSuccessor(LoopMBB); 8932 8933 DestBase = MachineOperand::CreateReg(NextDestReg, false); 8934 SrcBase = MachineOperand::CreateReg(NextSrcReg, false); 8935 if (EndMBB && !ImmLength) 8936 // If the loop handled the whole CLC range, DoneMBB will be empty with 8937 // CC live-through into EndMBB, so add it as live-in. 8938 DoneMBB->addLiveIn(SystemZ::CC); 8939 } 8940 8941 // LoopMBB: 8942 // %ThisDestReg = phi [ %StartDestReg, StartMBB ], 8943 // [ %NextDestReg, NextMBB ] 8944 // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ], 8945 // [ %NextSrcReg, NextMBB ] 8946 // %ThisCountReg = phi [ %StartCountReg, StartMBB ], 8947 // [ %NextCountReg, NextMBB ] 8948 // ( PFD 2, 768+DestDisp(%ThisDestReg) ) 8949 // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg) 8950 // ( JLH EndMBB ) 8951 // 8952 // The prefetch is used only for MVC. The JLH is used only for CLC. 8953 MBB = LoopMBB; 8954 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg) 8955 .addReg(StartDestReg).addMBB(StartMBB) 8956 .addReg(NextDestReg).addMBB(NextMBB); 8957 if (!HaveSingleBase) 8958 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg) 8959 .addReg(StartSrcReg).addMBB(StartMBB) 8960 .addReg(NextSrcReg).addMBB(NextMBB); 8961 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg) 8962 .addReg(StartCountReg).addMBB(StartMBB) 8963 .addReg(NextCountReg).addMBB(NextMBB); 8964 if (Opcode == SystemZ::MVC) 8965 BuildMI(MBB, DL, TII->get(SystemZ::PFD)) 8966 .addImm(SystemZ::PFD_WRITE) 8967 .addReg(ThisDestReg).addImm(DestDisp - IsMemset + 768).addReg(0); 8968 insertMemMemOp(MBB, MBB->end(), 8969 MachineOperand::CreateReg(ThisDestReg, false), DestDisp, 8970 MachineOperand::CreateReg(ThisSrcReg, false), SrcDisp, 256); 8971 if (EndMBB) { 8972 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 8973 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE) 8974 .addMBB(EndMBB); 8975 MBB->addSuccessor(EndMBB); 8976 MBB->addSuccessor(NextMBB); 8977 } 8978 8979 // NextMBB: 8980 // %NextDestReg = LA 256(%ThisDestReg) 8981 // %NextSrcReg = LA 256(%ThisSrcReg) 8982 // %NextCountReg = AGHI %ThisCountReg, -1 8983 // CGHI %NextCountReg, 0 8984 // JLH LoopMBB 8985 // # fall through to DoneMBB 8986 // 8987 // The AGHI, CGHI and JLH should be converted to BRCTG by later passes. 8988 MBB = NextMBB; 8989 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg) 8990 .addReg(ThisDestReg).addImm(256).addReg(0); 8991 if (!HaveSingleBase) 8992 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg) 8993 .addReg(ThisSrcReg).addImm(256).addReg(0); 8994 BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg) 8995 .addReg(ThisCountReg).addImm(-1); 8996 BuildMI(MBB, DL, TII->get(SystemZ::CGHI)) 8997 .addReg(NextCountReg).addImm(0); 8998 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 8999 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE) 9000 .addMBB(LoopMBB); 9001 MBB->addSuccessor(LoopMBB); 9002 MBB->addSuccessor(DoneMBB); 9003 9004 MBB = DoneMBB; 9005 if (IsRegForm) { 9006 // DoneMBB: 9007 // # Make PHIs for RemDestReg/RemSrcReg as the loop may or may not run. 9008 // # Use EXecute Relative Long for the remainder of the bytes. The target 9009 // instruction of the EXRL will have a length field of 1 since 0 is an 9010 // illegal value. The number of bytes processed becomes (%LenAdjReg & 9011 // 0xff) + 1. 9012 // # Fall through to AllDoneMBB. 9013 Register RemSrcReg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); 9014 Register RemDestReg = HaveSingleBase ? RemSrcReg 9015 : MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); 9016 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemDestReg) 9017 .addReg(StartDestReg).addMBB(StartMBB) 9018 .addReg(NextDestReg).addMBB(NextMBB); 9019 if (!HaveSingleBase) 9020 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemSrcReg) 9021 .addReg(StartSrcReg).addMBB(StartMBB) 9022 .addReg(NextSrcReg).addMBB(NextMBB); 9023 if (IsMemset) 9024 insertMemMemOp(MBB, MBB->end(), 9025 MachineOperand::CreateReg(RemDestReg, false), DestDisp, 9026 MachineOperand::CreateReg(RemSrcReg, false), SrcDisp, 1); 9027 MachineInstrBuilder EXRL_MIB = 9028 BuildMI(MBB, DL, TII->get(SystemZ::EXRL_Pseudo)) 9029 .addImm(Opcode) 9030 .addReg(LenAdjReg) 9031 .addReg(RemDestReg).addImm(DestDisp) 9032 .addReg(RemSrcReg).addImm(SrcDisp); 9033 MBB->addSuccessor(AllDoneMBB); 9034 MBB = AllDoneMBB; 9035 if (Opcode != SystemZ::MVC) { 9036 EXRL_MIB.addReg(SystemZ::CC, RegState::ImplicitDefine); 9037 if (EndMBB) 9038 MBB->addLiveIn(SystemZ::CC); 9039 } 9040 } 9041 MF.getProperties().reset(MachineFunctionProperties::Property::NoPHIs); 9042 } 9043 9044 // Handle any remaining bytes with straight-line code. 9045 while (ImmLength > 0) { 9046 uint64_t ThisLength = std::min(ImmLength, uint64_t(256)); 9047 // The previous iteration might have created out-of-range displacements. 9048 // Apply them using LA/LAY if so. 9049 foldDisplIfNeeded(DestBase, DestDisp); 9050 foldDisplIfNeeded(SrcBase, SrcDisp); 9051 insertMemMemOp(MBB, MI, DestBase, DestDisp, SrcBase, SrcDisp, ThisLength); 9052 DestDisp += ThisLength; 9053 SrcDisp += ThisLength; 9054 ImmLength -= ThisLength; 9055 // If there's another CLC to go, branch to the end if a difference 9056 // was found. 9057 if (EndMBB && ImmLength > 0) { 9058 MachineBasicBlock *NextMBB = SystemZ::splitBlockBefore(MI, MBB); 9059 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 9060 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE) 9061 .addMBB(EndMBB); 9062 MBB->addSuccessor(EndMBB); 9063 MBB->addSuccessor(NextMBB); 9064 MBB = NextMBB; 9065 } 9066 } 9067 if (EndMBB) { 9068 MBB->addSuccessor(EndMBB); 9069 MBB = EndMBB; 9070 MBB->addLiveIn(SystemZ::CC); 9071 } 9072 9073 MI.eraseFromParent(); 9074 return MBB; 9075 } 9076 9077 // Decompose string pseudo-instruction MI into a loop that continually performs 9078 // Opcode until CC != 3. 9079 MachineBasicBlock *SystemZTargetLowering::emitStringWrapper( 9080 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const { 9081 MachineFunction &MF = *MBB->getParent(); 9082 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 9083 MachineRegisterInfo &MRI = MF.getRegInfo(); 9084 DebugLoc DL = MI.getDebugLoc(); 9085 9086 uint64_t End1Reg = MI.getOperand(0).getReg(); 9087 uint64_t Start1Reg = MI.getOperand(1).getReg(); 9088 uint64_t Start2Reg = MI.getOperand(2).getReg(); 9089 uint64_t CharReg = MI.getOperand(3).getReg(); 9090 9091 const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass; 9092 uint64_t This1Reg = MRI.createVirtualRegister(RC); 9093 uint64_t This2Reg = MRI.createVirtualRegister(RC); 9094 uint64_t End2Reg = MRI.createVirtualRegister(RC); 9095 9096 MachineBasicBlock *StartMBB = MBB; 9097 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB); 9098 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB); 9099 9100 // StartMBB: 9101 // # fall through to LoopMBB 9102 MBB->addSuccessor(LoopMBB); 9103 9104 // LoopMBB: 9105 // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ] 9106 // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ] 9107 // R0L = %CharReg 9108 // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L 9109 // JO LoopMBB 9110 // # fall through to DoneMBB 9111 // 9112 // The load of R0L can be hoisted by post-RA LICM. 9113 MBB = LoopMBB; 9114 9115 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg) 9116 .addReg(Start1Reg).addMBB(StartMBB) 9117 .addReg(End1Reg).addMBB(LoopMBB); 9118 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg) 9119 .addReg(Start2Reg).addMBB(StartMBB) 9120 .addReg(End2Reg).addMBB(LoopMBB); 9121 BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg); 9122 BuildMI(MBB, DL, TII->get(Opcode)) 9123 .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define) 9124 .addReg(This1Reg).addReg(This2Reg); 9125 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 9126 .addImm(SystemZ::CCMASK_ANY).addImm(SystemZ::CCMASK_3).addMBB(LoopMBB); 9127 MBB->addSuccessor(LoopMBB); 9128 MBB->addSuccessor(DoneMBB); 9129 9130 DoneMBB->addLiveIn(SystemZ::CC); 9131 9132 MI.eraseFromParent(); 9133 return DoneMBB; 9134 } 9135 9136 // Update TBEGIN instruction with final opcode and register clobbers. 9137 MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin( 9138 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode, 9139 bool NoFloat) const { 9140 MachineFunction &MF = *MBB->getParent(); 9141 const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); 9142 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 9143 9144 // Update opcode. 9145 MI.setDesc(TII->get(Opcode)); 9146 9147 // We cannot handle a TBEGIN that clobbers the stack or frame pointer. 9148 // Make sure to add the corresponding GRSM bits if they are missing. 9149 uint64_t Control = MI.getOperand(2).getImm(); 9150 static const unsigned GPRControlBit[16] = { 9151 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000, 9152 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100 9153 }; 9154 Control |= GPRControlBit[15]; 9155 if (TFI->hasFP(MF)) 9156 Control |= GPRControlBit[11]; 9157 MI.getOperand(2).setImm(Control); 9158 9159 // Add GPR clobbers. 9160 for (int I = 0; I < 16; I++) { 9161 if ((Control & GPRControlBit[I]) == 0) { 9162 unsigned Reg = SystemZMC::GR64Regs[I]; 9163 MI.addOperand(MachineOperand::CreateReg(Reg, true, true)); 9164 } 9165 } 9166 9167 // Add FPR/VR clobbers. 9168 if (!NoFloat && (Control & 4) != 0) { 9169 if (Subtarget.hasVector()) { 9170 for (unsigned Reg : SystemZMC::VR128Regs) { 9171 MI.addOperand(MachineOperand::CreateReg(Reg, true, true)); 9172 } 9173 } else { 9174 for (unsigned Reg : SystemZMC::FP64Regs) { 9175 MI.addOperand(MachineOperand::CreateReg(Reg, true, true)); 9176 } 9177 } 9178 } 9179 9180 return MBB; 9181 } 9182 9183 MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0( 9184 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const { 9185 MachineFunction &MF = *MBB->getParent(); 9186 MachineRegisterInfo *MRI = &MF.getRegInfo(); 9187 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 9188 DebugLoc DL = MI.getDebugLoc(); 9189 9190 Register SrcReg = MI.getOperand(0).getReg(); 9191 9192 // Create new virtual register of the same class as source. 9193 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); 9194 Register DstReg = MRI->createVirtualRegister(RC); 9195 9196 // Replace pseudo with a normal load-and-test that models the def as 9197 // well. 9198 BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg) 9199 .addReg(SrcReg) 9200 .setMIFlags(MI.getFlags()); 9201 MI.eraseFromParent(); 9202 9203 return MBB; 9204 } 9205 9206 MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca( 9207 MachineInstr &MI, MachineBasicBlock *MBB) const { 9208 MachineFunction &MF = *MBB->getParent(); 9209 MachineRegisterInfo *MRI = &MF.getRegInfo(); 9210 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 9211 DebugLoc DL = MI.getDebugLoc(); 9212 const unsigned ProbeSize = getStackProbeSize(MF); 9213 Register DstReg = MI.getOperand(0).getReg(); 9214 Register SizeReg = MI.getOperand(2).getReg(); 9215 9216 MachineBasicBlock *StartMBB = MBB; 9217 MachineBasicBlock *DoneMBB = SystemZ::splitBlockAfter(MI, MBB); 9218 MachineBasicBlock *LoopTestMBB = SystemZ::emitBlockAfter(StartMBB); 9219 MachineBasicBlock *LoopBodyMBB = SystemZ::emitBlockAfter(LoopTestMBB); 9220 MachineBasicBlock *TailTestMBB = SystemZ::emitBlockAfter(LoopBodyMBB); 9221 MachineBasicBlock *TailMBB = SystemZ::emitBlockAfter(TailTestMBB); 9222 9223 MachineMemOperand *VolLdMMO = MF.getMachineMemOperand(MachinePointerInfo(), 9224 MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad, 8, Align(1)); 9225 9226 Register PHIReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass); 9227 Register IncReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass); 9228 9229 // LoopTestMBB 9230 // BRC TailTestMBB 9231 // # fallthrough to LoopBodyMBB 9232 StartMBB->addSuccessor(LoopTestMBB); 9233 MBB = LoopTestMBB; 9234 BuildMI(MBB, DL, TII->get(SystemZ::PHI), PHIReg) 9235 .addReg(SizeReg) 9236 .addMBB(StartMBB) 9237 .addReg(IncReg) 9238 .addMBB(LoopBodyMBB); 9239 BuildMI(MBB, DL, TII->get(SystemZ::CLGFI)) 9240 .addReg(PHIReg) 9241 .addImm(ProbeSize); 9242 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 9243 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_LT) 9244 .addMBB(TailTestMBB); 9245 MBB->addSuccessor(LoopBodyMBB); 9246 MBB->addSuccessor(TailTestMBB); 9247 9248 // LoopBodyMBB: Allocate and probe by means of a volatile compare. 9249 // J LoopTestMBB 9250 MBB = LoopBodyMBB; 9251 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), IncReg) 9252 .addReg(PHIReg) 9253 .addImm(ProbeSize); 9254 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), SystemZ::R15D) 9255 .addReg(SystemZ::R15D) 9256 .addImm(ProbeSize); 9257 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D) 9258 .addReg(SystemZ::R15D).addImm(ProbeSize - 8).addReg(0) 9259 .setMemRefs(VolLdMMO); 9260 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(LoopTestMBB); 9261 MBB->addSuccessor(LoopTestMBB); 9262 9263 // TailTestMBB 9264 // BRC DoneMBB 9265 // # fallthrough to TailMBB 9266 MBB = TailTestMBB; 9267 BuildMI(MBB, DL, TII->get(SystemZ::CGHI)) 9268 .addReg(PHIReg) 9269 .addImm(0); 9270 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 9271 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ) 9272 .addMBB(DoneMBB); 9273 MBB->addSuccessor(TailMBB); 9274 MBB->addSuccessor(DoneMBB); 9275 9276 // TailMBB 9277 // # fallthrough to DoneMBB 9278 MBB = TailMBB; 9279 BuildMI(MBB, DL, TII->get(SystemZ::SLGR), SystemZ::R15D) 9280 .addReg(SystemZ::R15D) 9281 .addReg(PHIReg); 9282 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D) 9283 .addReg(SystemZ::R15D).addImm(-8).addReg(PHIReg) 9284 .setMemRefs(VolLdMMO); 9285 MBB->addSuccessor(DoneMBB); 9286 9287 // DoneMBB 9288 MBB = DoneMBB; 9289 BuildMI(*MBB, MBB->begin(), DL, TII->get(TargetOpcode::COPY), DstReg) 9290 .addReg(SystemZ::R15D); 9291 9292 MI.eraseFromParent(); 9293 return DoneMBB; 9294 } 9295 9296 SDValue SystemZTargetLowering:: 9297 getBackchainAddress(SDValue SP, SelectionDAG &DAG) const { 9298 MachineFunction &MF = DAG.getMachineFunction(); 9299 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>(); 9300 SDLoc DL(SP); 9301 return DAG.getNode(ISD::ADD, DL, MVT::i64, SP, 9302 DAG.getIntPtrConstant(TFL->getBackchainOffset(MF), DL)); 9303 } 9304 9305 MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter( 9306 MachineInstr &MI, MachineBasicBlock *MBB) const { 9307 switch (MI.getOpcode()) { 9308 case SystemZ::Select32: 9309 case SystemZ::Select64: 9310 case SystemZ::Select128: 9311 case SystemZ::SelectF32: 9312 case SystemZ::SelectF64: 9313 case SystemZ::SelectF128: 9314 case SystemZ::SelectVR32: 9315 case SystemZ::SelectVR64: 9316 case SystemZ::SelectVR128: 9317 return emitSelect(MI, MBB); 9318 9319 case SystemZ::CondStore8Mux: 9320 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false); 9321 case SystemZ::CondStore8MuxInv: 9322 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true); 9323 case SystemZ::CondStore16Mux: 9324 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false); 9325 case SystemZ::CondStore16MuxInv: 9326 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true); 9327 case SystemZ::CondStore32Mux: 9328 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, false); 9329 case SystemZ::CondStore32MuxInv: 9330 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, true); 9331 case SystemZ::CondStore8: 9332 return emitCondStore(MI, MBB, SystemZ::STC, 0, false); 9333 case SystemZ::CondStore8Inv: 9334 return emitCondStore(MI, MBB, SystemZ::STC, 0, true); 9335 case SystemZ::CondStore16: 9336 return emitCondStore(MI, MBB, SystemZ::STH, 0, false); 9337 case SystemZ::CondStore16Inv: 9338 return emitCondStore(MI, MBB, SystemZ::STH, 0, true); 9339 case SystemZ::CondStore32: 9340 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false); 9341 case SystemZ::CondStore32Inv: 9342 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true); 9343 case SystemZ::CondStore64: 9344 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false); 9345 case SystemZ::CondStore64Inv: 9346 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true); 9347 case SystemZ::CondStoreF32: 9348 return emitCondStore(MI, MBB, SystemZ::STE, 0, false); 9349 case SystemZ::CondStoreF32Inv: 9350 return emitCondStore(MI, MBB, SystemZ::STE, 0, true); 9351 case SystemZ::CondStoreF64: 9352 return emitCondStore(MI, MBB, SystemZ::STD, 0, false); 9353 case SystemZ::CondStoreF64Inv: 9354 return emitCondStore(MI, MBB, SystemZ::STD, 0, true); 9355 9356 case SystemZ::SCmp128Hi: 9357 return emitICmp128Hi(MI, MBB, false); 9358 case SystemZ::UCmp128Hi: 9359 return emitICmp128Hi(MI, MBB, true); 9360 9361 case SystemZ::PAIR128: 9362 return emitPair128(MI, MBB); 9363 case SystemZ::AEXT128: 9364 return emitExt128(MI, MBB, false); 9365 case SystemZ::ZEXT128: 9366 return emitExt128(MI, MBB, true); 9367 9368 case SystemZ::ATOMIC_SWAPW: 9369 return emitAtomicLoadBinary(MI, MBB, 0); 9370 9371 case SystemZ::ATOMIC_LOADW_AR: 9372 return emitAtomicLoadBinary(MI, MBB, SystemZ::AR); 9373 case SystemZ::ATOMIC_LOADW_AFI: 9374 return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI); 9375 9376 case SystemZ::ATOMIC_LOADW_SR: 9377 return emitAtomicLoadBinary(MI, MBB, SystemZ::SR); 9378 9379 case SystemZ::ATOMIC_LOADW_NR: 9380 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR); 9381 case SystemZ::ATOMIC_LOADW_NILH: 9382 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH); 9383 9384 case SystemZ::ATOMIC_LOADW_OR: 9385 return emitAtomicLoadBinary(MI, MBB, SystemZ::OR); 9386 case SystemZ::ATOMIC_LOADW_OILH: 9387 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH); 9388 9389 case SystemZ::ATOMIC_LOADW_XR: 9390 return emitAtomicLoadBinary(MI, MBB, SystemZ::XR); 9391 case SystemZ::ATOMIC_LOADW_XILF: 9392 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF); 9393 9394 case SystemZ::ATOMIC_LOADW_NRi: 9395 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, true); 9396 case SystemZ::ATOMIC_LOADW_NILHi: 9397 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, true); 9398 9399 case SystemZ::ATOMIC_LOADW_MIN: 9400 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_LE); 9401 case SystemZ::ATOMIC_LOADW_MAX: 9402 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_GE); 9403 case SystemZ::ATOMIC_LOADW_UMIN: 9404 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_LE); 9405 case SystemZ::ATOMIC_LOADW_UMAX: 9406 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_GE); 9407 9408 case SystemZ::ATOMIC_CMP_SWAPW: 9409 return emitAtomicCmpSwapW(MI, MBB); 9410 case SystemZ::MVCImm: 9411 case SystemZ::MVCReg: 9412 return emitMemMemWrapper(MI, MBB, SystemZ::MVC); 9413 case SystemZ::NCImm: 9414 return emitMemMemWrapper(MI, MBB, SystemZ::NC); 9415 case SystemZ::OCImm: 9416 return emitMemMemWrapper(MI, MBB, SystemZ::OC); 9417 case SystemZ::XCImm: 9418 case SystemZ::XCReg: 9419 return emitMemMemWrapper(MI, MBB, SystemZ::XC); 9420 case SystemZ::CLCImm: 9421 case SystemZ::CLCReg: 9422 return emitMemMemWrapper(MI, MBB, SystemZ::CLC); 9423 case SystemZ::MemsetImmImm: 9424 case SystemZ::MemsetImmReg: 9425 case SystemZ::MemsetRegImm: 9426 case SystemZ::MemsetRegReg: 9427 return emitMemMemWrapper(MI, MBB, SystemZ::MVC, true/*IsMemset*/); 9428 case SystemZ::CLSTLoop: 9429 return emitStringWrapper(MI, MBB, SystemZ::CLST); 9430 case SystemZ::MVSTLoop: 9431 return emitStringWrapper(MI, MBB, SystemZ::MVST); 9432 case SystemZ::SRSTLoop: 9433 return emitStringWrapper(MI, MBB, SystemZ::SRST); 9434 case SystemZ::TBEGIN: 9435 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false); 9436 case SystemZ::TBEGIN_nofloat: 9437 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true); 9438 case SystemZ::TBEGINC: 9439 return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true); 9440 case SystemZ::LTEBRCompare_Pseudo: 9441 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR); 9442 case SystemZ::LTDBRCompare_Pseudo: 9443 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR); 9444 case SystemZ::LTXBRCompare_Pseudo: 9445 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR); 9446 9447 case SystemZ::PROBED_ALLOCA: 9448 return emitProbedAlloca(MI, MBB); 9449 9450 case TargetOpcode::STACKMAP: 9451 case TargetOpcode::PATCHPOINT: 9452 return emitPatchPoint(MI, MBB); 9453 9454 default: 9455 llvm_unreachable("Unexpected instr type to insert"); 9456 } 9457 } 9458 9459 // This is only used by the isel schedulers, and is needed only to prevent 9460 // compiler from crashing when list-ilp is used. 9461 const TargetRegisterClass * 9462 SystemZTargetLowering::getRepRegClassFor(MVT VT) const { 9463 if (VT == MVT::Untyped) 9464 return &SystemZ::ADDR128BitRegClass; 9465 return TargetLowering::getRepRegClassFor(VT); 9466 } 9467 9468 SDValue SystemZTargetLowering::lowerGET_ROUNDING(SDValue Op, 9469 SelectionDAG &DAG) const { 9470 SDLoc dl(Op); 9471 /* 9472 The rounding method is in FPC Byte 3 bits 6-7, and has the following 9473 settings: 9474 00 Round to nearest 9475 01 Round to 0 9476 10 Round to +inf 9477 11 Round to -inf 9478 9479 FLT_ROUNDS, on the other hand, expects the following: 9480 -1 Undefined 9481 0 Round to 0 9482 1 Round to nearest 9483 2 Round to +inf 9484 3 Round to -inf 9485 */ 9486 9487 // Save FPC to register. 9488 SDValue Chain = Op.getOperand(0); 9489 SDValue EFPC( 9490 DAG.getMachineNode(SystemZ::EFPC, dl, {MVT::i32, MVT::Other}, Chain), 0); 9491 Chain = EFPC.getValue(1); 9492 9493 // Transform as necessary 9494 SDValue CWD1 = DAG.getNode(ISD::AND, dl, MVT::i32, EFPC, 9495 DAG.getConstant(3, dl, MVT::i32)); 9496 // RetVal = (CWD1 ^ (CWD1 >> 1)) ^ 1 9497 SDValue CWD2 = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, 9498 DAG.getNode(ISD::SRL, dl, MVT::i32, CWD1, 9499 DAG.getConstant(1, dl, MVT::i32))); 9500 9501 SDValue RetVal = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD2, 9502 DAG.getConstant(1, dl, MVT::i32)); 9503 RetVal = DAG.getZExtOrTrunc(RetVal, dl, Op.getValueType()); 9504 9505 return DAG.getMergeValues({RetVal, Chain}, dl); 9506 } 9507