1 //===-- AVRISelLowering.cpp - AVR DAG Lowering Implementation -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that AVR uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AVRISelLowering.h" 15 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/ADT/StringSwitch.h" 19 #include "llvm/CodeGen/CallingConvLower.h" 20 #include "llvm/CodeGen/MachineFrameInfo.h" 21 #include "llvm/CodeGen/MachineInstrBuilder.h" 22 #include "llvm/CodeGen/MachineRegisterInfo.h" 23 #include "llvm/CodeGen/SelectionDAG.h" 24 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 25 #include "llvm/IR/Function.h" 26 #include "llvm/Support/ErrorHandling.h" 27 28 #include "AVR.h" 29 #include "AVRMachineFunctionInfo.h" 30 #include "AVRSubtarget.h" 31 #include "AVRTargetMachine.h" 32 #include "MCTargetDesc/AVRMCTargetDesc.h" 33 34 namespace llvm { 35 36 AVRTargetLowering::AVRTargetLowering(const AVRTargetMachine &TM, 37 const AVRSubtarget &STI) 38 : TargetLowering(TM), Subtarget(STI) { 39 // Set up the register classes. 40 addRegisterClass(MVT::i8, &AVR::GPR8RegClass); 41 addRegisterClass(MVT::i16, &AVR::DREGSRegClass); 42 43 // Compute derived properties from the register classes. 44 computeRegisterProperties(Subtarget.getRegisterInfo()); 45 46 setBooleanContents(ZeroOrOneBooleanContent); 47 setBooleanVectorContents(ZeroOrOneBooleanContent); 48 setSchedulingPreference(Sched::RegPressure); 49 setStackPointerRegisterToSaveRestore(AVR::SP); 50 setSupportsUnalignedAtomics(true); 51 52 setOperationAction(ISD::GlobalAddress, MVT::i16, Custom); 53 setOperationAction(ISD::BlockAddress, MVT::i16, Custom); 54 55 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 56 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 57 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i8, Expand); 58 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i16, Expand); 59 60 setOperationAction(ISD::INLINEASM, MVT::Other, Custom); 61 62 for (MVT VT : MVT::integer_valuetypes()) { 63 for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) { 64 setLoadExtAction(N, VT, MVT::i1, Promote); 65 setLoadExtAction(N, VT, MVT::i8, Expand); 66 } 67 } 68 69 setTruncStoreAction(MVT::i16, MVT::i8, Expand); 70 71 for (MVT VT : MVT::integer_valuetypes()) { 72 setOperationAction(ISD::ADDC, VT, Legal); 73 setOperationAction(ISD::SUBC, VT, Legal); 74 setOperationAction(ISD::ADDE, VT, Legal); 75 setOperationAction(ISD::SUBE, VT, Legal); 76 } 77 78 // sub (x, imm) gets canonicalized to add (x, -imm), so for illegal types 79 // revert into a sub since we don't have an add with immediate instruction. 80 setOperationAction(ISD::ADD, MVT::i32, Custom); 81 setOperationAction(ISD::ADD, MVT::i64, Custom); 82 83 // our shift instructions are only able to shift 1 bit at a time, so handle 84 // this in a custom way. 85 setOperationAction(ISD::SRA, MVT::i8, Custom); 86 setOperationAction(ISD::SHL, MVT::i8, Custom); 87 setOperationAction(ISD::SRL, MVT::i8, Custom); 88 setOperationAction(ISD::SRA, MVT::i16, Custom); 89 setOperationAction(ISD::SHL, MVT::i16, Custom); 90 setOperationAction(ISD::SRL, MVT::i16, Custom); 91 setOperationAction(ISD::SRA, MVT::i32, Custom); 92 setOperationAction(ISD::SHL, MVT::i32, Custom); 93 setOperationAction(ISD::SRL, MVT::i32, Custom); 94 setOperationAction(ISD::SHL_PARTS, MVT::i16, Expand); 95 setOperationAction(ISD::SRA_PARTS, MVT::i16, Expand); 96 setOperationAction(ISD::SRL_PARTS, MVT::i16, Expand); 97 98 setOperationAction(ISD::ROTL, MVT::i8, Custom); 99 setOperationAction(ISD::ROTL, MVT::i16, Expand); 100 setOperationAction(ISD::ROTR, MVT::i8, Custom); 101 setOperationAction(ISD::ROTR, MVT::i16, Expand); 102 103 setOperationAction(ISD::BR_CC, MVT::i8, Custom); 104 setOperationAction(ISD::BR_CC, MVT::i16, Custom); 105 setOperationAction(ISD::BR_CC, MVT::i32, Custom); 106 setOperationAction(ISD::BR_CC, MVT::i64, Custom); 107 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 108 109 setOperationAction(ISD::SELECT_CC, MVT::i8, Custom); 110 setOperationAction(ISD::SELECT_CC, MVT::i16, Custom); 111 setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); 112 setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); 113 setOperationAction(ISD::SETCC, MVT::i8, Custom); 114 setOperationAction(ISD::SETCC, MVT::i16, Custom); 115 setOperationAction(ISD::SETCC, MVT::i32, Custom); 116 setOperationAction(ISD::SETCC, MVT::i64, Custom); 117 setOperationAction(ISD::SELECT, MVT::i8, Expand); 118 setOperationAction(ISD::SELECT, MVT::i16, Expand); 119 120 setOperationAction(ISD::BSWAP, MVT::i16, Expand); 121 122 // Add support for postincrement and predecrement load/stores. 123 setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal); 124 setIndexedLoadAction(ISD::POST_INC, MVT::i16, Legal); 125 setIndexedLoadAction(ISD::PRE_DEC, MVT::i8, Legal); 126 setIndexedLoadAction(ISD::PRE_DEC, MVT::i16, Legal); 127 setIndexedStoreAction(ISD::POST_INC, MVT::i8, Legal); 128 setIndexedStoreAction(ISD::POST_INC, MVT::i16, Legal); 129 setIndexedStoreAction(ISD::PRE_DEC, MVT::i8, Legal); 130 setIndexedStoreAction(ISD::PRE_DEC, MVT::i16, Legal); 131 132 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 133 134 setOperationAction(ISD::VASTART, MVT::Other, Custom); 135 setOperationAction(ISD::VAEND, MVT::Other, Expand); 136 setOperationAction(ISD::VAARG, MVT::Other, Expand); 137 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 138 139 // Atomic operations which must be lowered to rtlib calls 140 for (MVT VT : MVT::integer_valuetypes()) { 141 setOperationAction(ISD::ATOMIC_SWAP, VT, Expand); 142 setOperationAction(ISD::ATOMIC_CMP_SWAP, VT, Expand); 143 setOperationAction(ISD::ATOMIC_LOAD_NAND, VT, Expand); 144 setOperationAction(ISD::ATOMIC_LOAD_MAX, VT, Expand); 145 setOperationAction(ISD::ATOMIC_LOAD_MIN, VT, Expand); 146 setOperationAction(ISD::ATOMIC_LOAD_UMAX, VT, Expand); 147 setOperationAction(ISD::ATOMIC_LOAD_UMIN, VT, Expand); 148 } 149 150 // Division/remainder 151 setOperationAction(ISD::UDIV, MVT::i8, Expand); 152 setOperationAction(ISD::UDIV, MVT::i16, Expand); 153 setOperationAction(ISD::UREM, MVT::i8, Expand); 154 setOperationAction(ISD::UREM, MVT::i16, Expand); 155 setOperationAction(ISD::SDIV, MVT::i8, Expand); 156 setOperationAction(ISD::SDIV, MVT::i16, Expand); 157 setOperationAction(ISD::SREM, MVT::i8, Expand); 158 setOperationAction(ISD::SREM, MVT::i16, Expand); 159 160 // Make division and modulus custom 161 setOperationAction(ISD::UDIVREM, MVT::i8, Custom); 162 setOperationAction(ISD::UDIVREM, MVT::i16, Custom); 163 setOperationAction(ISD::UDIVREM, MVT::i32, Custom); 164 setOperationAction(ISD::SDIVREM, MVT::i8, Custom); 165 setOperationAction(ISD::SDIVREM, MVT::i16, Custom); 166 setOperationAction(ISD::SDIVREM, MVT::i32, Custom); 167 168 // Do not use MUL. The AVR instructions are closer to SMUL_LOHI &co. 169 setOperationAction(ISD::MUL, MVT::i8, Expand); 170 setOperationAction(ISD::MUL, MVT::i16, Expand); 171 172 // Expand 16 bit multiplications. 173 setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand); 174 setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand); 175 176 // Expand multiplications to libcalls when there is 177 // no hardware MUL. 178 if (!Subtarget.supportsMultiplication()) { 179 setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand); 180 setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand); 181 } 182 183 for (MVT VT : MVT::integer_valuetypes()) { 184 setOperationAction(ISD::MULHS, VT, Expand); 185 setOperationAction(ISD::MULHU, VT, Expand); 186 } 187 188 for (MVT VT : MVT::integer_valuetypes()) { 189 setOperationAction(ISD::CTPOP, VT, Expand); 190 setOperationAction(ISD::CTLZ, VT, Expand); 191 setOperationAction(ISD::CTTZ, VT, Expand); 192 } 193 194 for (MVT VT : MVT::integer_valuetypes()) { 195 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); 196 // TODO: The generated code is pretty poor. Investigate using the 197 // same "shift and subtract with carry" trick that we do for 198 // extending 8-bit to 16-bit. This may require infrastructure 199 // improvements in how we treat 16-bit "registers" to be feasible. 200 } 201 202 // Division rtlib functions (not supported), use divmod functions instead 203 setLibcallName(RTLIB::SDIV_I8, nullptr); 204 setLibcallName(RTLIB::SDIV_I16, nullptr); 205 setLibcallName(RTLIB::SDIV_I32, nullptr); 206 setLibcallName(RTLIB::UDIV_I8, nullptr); 207 setLibcallName(RTLIB::UDIV_I16, nullptr); 208 setLibcallName(RTLIB::UDIV_I32, nullptr); 209 210 // Modulus rtlib functions (not supported), use divmod functions instead 211 setLibcallName(RTLIB::SREM_I8, nullptr); 212 setLibcallName(RTLIB::SREM_I16, nullptr); 213 setLibcallName(RTLIB::SREM_I32, nullptr); 214 setLibcallName(RTLIB::UREM_I8, nullptr); 215 setLibcallName(RTLIB::UREM_I16, nullptr); 216 setLibcallName(RTLIB::UREM_I32, nullptr); 217 218 // Division and modulus rtlib functions 219 setLibcallName(RTLIB::SDIVREM_I8, "__divmodqi4"); 220 setLibcallName(RTLIB::SDIVREM_I16, "__divmodhi4"); 221 setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4"); 222 setLibcallName(RTLIB::UDIVREM_I8, "__udivmodqi4"); 223 setLibcallName(RTLIB::UDIVREM_I16, "__udivmodhi4"); 224 setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4"); 225 226 // Several of the runtime library functions use a special calling conv 227 setLibcallCallingConv(RTLIB::SDIVREM_I8, CallingConv::AVR_BUILTIN); 228 setLibcallCallingConv(RTLIB::SDIVREM_I16, CallingConv::AVR_BUILTIN); 229 setLibcallCallingConv(RTLIB::UDIVREM_I8, CallingConv::AVR_BUILTIN); 230 setLibcallCallingConv(RTLIB::UDIVREM_I16, CallingConv::AVR_BUILTIN); 231 232 // Trigonometric rtlib functions 233 setLibcallName(RTLIB::SIN_F32, "sin"); 234 setLibcallName(RTLIB::COS_F32, "cos"); 235 236 setMinFunctionAlignment(Align(2)); 237 setMinimumJumpTableEntries(UINT_MAX); 238 } 239 240 const char *AVRTargetLowering::getTargetNodeName(unsigned Opcode) const { 241 #define NODE(name) \ 242 case AVRISD::name: \ 243 return #name 244 245 switch (Opcode) { 246 default: 247 return nullptr; 248 NODE(RET_GLUE); 249 NODE(RETI_GLUE); 250 NODE(CALL); 251 NODE(WRAPPER); 252 NODE(LSL); 253 NODE(LSLW); 254 NODE(LSR); 255 NODE(LSRW); 256 NODE(ROL); 257 NODE(ROR); 258 NODE(ASR); 259 NODE(ASRW); 260 NODE(LSLLOOP); 261 NODE(LSRLOOP); 262 NODE(ROLLOOP); 263 NODE(RORLOOP); 264 NODE(ASRLOOP); 265 NODE(BRCOND); 266 NODE(CMP); 267 NODE(CMPC); 268 NODE(TST); 269 NODE(SELECT_CC); 270 #undef NODE 271 } 272 } 273 274 EVT AVRTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, 275 EVT VT) const { 276 assert(!VT.isVector() && "No AVR SetCC type for vectors!"); 277 return MVT::i8; 278 } 279 280 SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const { 281 unsigned Opc8; 282 const SDNode *N = Op.getNode(); 283 EVT VT = Op.getValueType(); 284 SDLoc dl(N); 285 assert(llvm::has_single_bit<uint32_t>(VT.getSizeInBits()) && 286 "Expected power-of-2 shift amount"); 287 288 if (VT.getSizeInBits() == 32) { 289 if (!isa<ConstantSDNode>(N->getOperand(1))) { 290 // 32-bit shifts are converted to a loop in IR. 291 // This should be unreachable. 292 report_fatal_error("Expected a constant shift amount!"); 293 } 294 SDVTList ResTys = DAG.getVTList(MVT::i16, MVT::i16); 295 SDValue SrcLo = 296 DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i16, Op.getOperand(0), 297 DAG.getConstant(0, dl, MVT::i16)); 298 SDValue SrcHi = 299 DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i16, Op.getOperand(0), 300 DAG.getConstant(1, dl, MVT::i16)); 301 uint64_t ShiftAmount = N->getConstantOperandVal(1); 302 if (ShiftAmount == 16) { 303 // Special case these two operations because they appear to be used by the 304 // generic codegen parts to lower 32-bit numbers. 305 // TODO: perhaps we can lower shift amounts bigger than 16 to a 16-bit 306 // shift of a part of the 32-bit value? 307 switch (Op.getOpcode()) { 308 case ISD::SHL: { 309 SDValue Zero = DAG.getConstant(0, dl, MVT::i16); 310 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i32, Zero, SrcLo); 311 } 312 case ISD::SRL: { 313 SDValue Zero = DAG.getConstant(0, dl, MVT::i16); 314 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i32, SrcHi, Zero); 315 } 316 } 317 } 318 SDValue Cnt = DAG.getTargetConstant(ShiftAmount, dl, MVT::i8); 319 unsigned Opc; 320 switch (Op.getOpcode()) { 321 default: 322 llvm_unreachable("Invalid 32-bit shift opcode!"); 323 case ISD::SHL: 324 Opc = AVRISD::LSLW; 325 break; 326 case ISD::SRL: 327 Opc = AVRISD::LSRW; 328 break; 329 case ISD::SRA: 330 Opc = AVRISD::ASRW; 331 break; 332 } 333 SDValue Result = DAG.getNode(Opc, dl, ResTys, SrcLo, SrcHi, Cnt); 334 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i32, Result.getValue(0), 335 Result.getValue(1)); 336 } 337 338 // Expand non-constant shifts to loops. 339 if (!isa<ConstantSDNode>(N->getOperand(1))) { 340 switch (Op.getOpcode()) { 341 default: 342 llvm_unreachable("Invalid shift opcode!"); 343 case ISD::SHL: 344 return DAG.getNode(AVRISD::LSLLOOP, dl, VT, N->getOperand(0), 345 N->getOperand(1)); 346 case ISD::SRL: 347 return DAG.getNode(AVRISD::LSRLOOP, dl, VT, N->getOperand(0), 348 N->getOperand(1)); 349 case ISD::ROTL: { 350 SDValue Amt = N->getOperand(1); 351 EVT AmtVT = Amt.getValueType(); 352 Amt = DAG.getNode(ISD::AND, dl, AmtVT, Amt, 353 DAG.getConstant(VT.getSizeInBits() - 1, dl, AmtVT)); 354 return DAG.getNode(AVRISD::ROLLOOP, dl, VT, N->getOperand(0), Amt); 355 } 356 case ISD::ROTR: { 357 SDValue Amt = N->getOperand(1); 358 EVT AmtVT = Amt.getValueType(); 359 Amt = DAG.getNode(ISD::AND, dl, AmtVT, Amt, 360 DAG.getConstant(VT.getSizeInBits() - 1, dl, AmtVT)); 361 return DAG.getNode(AVRISD::RORLOOP, dl, VT, N->getOperand(0), Amt); 362 } 363 case ISD::SRA: 364 return DAG.getNode(AVRISD::ASRLOOP, dl, VT, N->getOperand(0), 365 N->getOperand(1)); 366 } 367 } 368 369 uint64_t ShiftAmount = N->getConstantOperandVal(1); 370 SDValue Victim = N->getOperand(0); 371 372 switch (Op.getOpcode()) { 373 case ISD::SRA: 374 Opc8 = AVRISD::ASR; 375 break; 376 case ISD::ROTL: 377 Opc8 = AVRISD::ROL; 378 ShiftAmount = ShiftAmount % VT.getSizeInBits(); 379 break; 380 case ISD::ROTR: 381 Opc8 = AVRISD::ROR; 382 ShiftAmount = ShiftAmount % VT.getSizeInBits(); 383 break; 384 case ISD::SRL: 385 Opc8 = AVRISD::LSR; 386 break; 387 case ISD::SHL: 388 Opc8 = AVRISD::LSL; 389 break; 390 default: 391 llvm_unreachable("Invalid shift opcode"); 392 } 393 394 // Optimize int8/int16 shifts. 395 if (VT.getSizeInBits() == 8) { 396 if (Op.getOpcode() == ISD::SHL && 4 <= ShiftAmount && ShiftAmount < 7) { 397 // Optimize LSL when 4 <= ShiftAmount <= 6. 398 Victim = DAG.getNode(AVRISD::SWAP, dl, VT, Victim); 399 Victim = 400 DAG.getNode(ISD::AND, dl, VT, Victim, DAG.getConstant(0xf0, dl, VT)); 401 ShiftAmount -= 4; 402 } else if (Op.getOpcode() == ISD::SRL && 4 <= ShiftAmount && 403 ShiftAmount < 7) { 404 // Optimize LSR when 4 <= ShiftAmount <= 6. 405 Victim = DAG.getNode(AVRISD::SWAP, dl, VT, Victim); 406 Victim = 407 DAG.getNode(ISD::AND, dl, VT, Victim, DAG.getConstant(0x0f, dl, VT)); 408 ShiftAmount -= 4; 409 } else if (Op.getOpcode() == ISD::SHL && ShiftAmount == 7) { 410 // Optimize LSL when ShiftAmount == 7. 411 Victim = DAG.getNode(AVRISD::LSLBN, dl, VT, Victim, 412 DAG.getConstant(7, dl, VT)); 413 ShiftAmount = 0; 414 } else if (Op.getOpcode() == ISD::SRL && ShiftAmount == 7) { 415 // Optimize LSR when ShiftAmount == 7. 416 Victim = DAG.getNode(AVRISD::LSRBN, dl, VT, Victim, 417 DAG.getConstant(7, dl, VT)); 418 ShiftAmount = 0; 419 } else if (Op.getOpcode() == ISD::SRA && ShiftAmount == 6) { 420 // Optimize ASR when ShiftAmount == 6. 421 Victim = DAG.getNode(AVRISD::ASRBN, dl, VT, Victim, 422 DAG.getConstant(6, dl, VT)); 423 ShiftAmount = 0; 424 } else if (Op.getOpcode() == ISD::SRA && ShiftAmount == 7) { 425 // Optimize ASR when ShiftAmount == 7. 426 Victim = DAG.getNode(AVRISD::ASRBN, dl, VT, Victim, 427 DAG.getConstant(7, dl, VT)); 428 ShiftAmount = 0; 429 } else if (Op.getOpcode() == ISD::ROTL && ShiftAmount == 3) { 430 // Optimize left rotation 3 bits to swap then right rotation 1 bit. 431 Victim = DAG.getNode(AVRISD::SWAP, dl, VT, Victim); 432 Victim = 433 DAG.getNode(AVRISD::ROR, dl, VT, Victim, DAG.getConstant(1, dl, VT)); 434 ShiftAmount = 0; 435 } else if (Op.getOpcode() == ISD::ROTR && ShiftAmount == 3) { 436 // Optimize right rotation 3 bits to swap then left rotation 1 bit. 437 Victim = DAG.getNode(AVRISD::SWAP, dl, VT, Victim); 438 Victim = 439 DAG.getNode(AVRISD::ROL, dl, VT, Victim, DAG.getConstant(1, dl, VT)); 440 ShiftAmount = 0; 441 } else if (Op.getOpcode() == ISD::ROTL && ShiftAmount == 7) { 442 // Optimize left rotation 7 bits to right rotation 1 bit. 443 Victim = 444 DAG.getNode(AVRISD::ROR, dl, VT, Victim, DAG.getConstant(1, dl, VT)); 445 ShiftAmount = 0; 446 } else if (Op.getOpcode() == ISD::ROTR && ShiftAmount == 7) { 447 // Optimize right rotation 7 bits to left rotation 1 bit. 448 Victim = 449 DAG.getNode(AVRISD::ROL, dl, VT, Victim, DAG.getConstant(1, dl, VT)); 450 ShiftAmount = 0; 451 } else if ((Op.getOpcode() == ISD::ROTR || Op.getOpcode() == ISD::ROTL) && 452 ShiftAmount >= 4) { 453 // Optimize left/right rotation with the SWAP instruction. 454 Victim = DAG.getNode(AVRISD::SWAP, dl, VT, Victim); 455 ShiftAmount -= 4; 456 } 457 } else if (VT.getSizeInBits() == 16) { 458 if (Op.getOpcode() == ISD::SRA) 459 // Special optimization for int16 arithmetic right shift. 460 switch (ShiftAmount) { 461 case 15: 462 Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim, 463 DAG.getConstant(15, dl, VT)); 464 ShiftAmount = 0; 465 break; 466 case 14: 467 Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim, 468 DAG.getConstant(14, dl, VT)); 469 ShiftAmount = 0; 470 break; 471 case 7: 472 Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim, 473 DAG.getConstant(7, dl, VT)); 474 ShiftAmount = 0; 475 break; 476 default: 477 break; 478 } 479 if (4 <= ShiftAmount && ShiftAmount < 8) 480 switch (Op.getOpcode()) { 481 case ISD::SHL: 482 Victim = DAG.getNode(AVRISD::LSLWN, dl, VT, Victim, 483 DAG.getConstant(4, dl, VT)); 484 ShiftAmount -= 4; 485 break; 486 case ISD::SRL: 487 Victim = DAG.getNode(AVRISD::LSRWN, dl, VT, Victim, 488 DAG.getConstant(4, dl, VT)); 489 ShiftAmount -= 4; 490 break; 491 default: 492 break; 493 } 494 else if (8 <= ShiftAmount && ShiftAmount < 12) 495 switch (Op.getOpcode()) { 496 case ISD::SHL: 497 Victim = DAG.getNode(AVRISD::LSLWN, dl, VT, Victim, 498 DAG.getConstant(8, dl, VT)); 499 ShiftAmount -= 8; 500 // Only operate on the higher byte for remaining shift bits. 501 Opc8 = AVRISD::LSLHI; 502 break; 503 case ISD::SRL: 504 Victim = DAG.getNode(AVRISD::LSRWN, dl, VT, Victim, 505 DAG.getConstant(8, dl, VT)); 506 ShiftAmount -= 8; 507 // Only operate on the lower byte for remaining shift bits. 508 Opc8 = AVRISD::LSRLO; 509 break; 510 case ISD::SRA: 511 Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim, 512 DAG.getConstant(8, dl, VT)); 513 ShiftAmount -= 8; 514 // Only operate on the lower byte for remaining shift bits. 515 Opc8 = AVRISD::ASRLO; 516 break; 517 default: 518 break; 519 } 520 else if (12 <= ShiftAmount) 521 switch (Op.getOpcode()) { 522 case ISD::SHL: 523 Victim = DAG.getNode(AVRISD::LSLWN, dl, VT, Victim, 524 DAG.getConstant(12, dl, VT)); 525 ShiftAmount -= 12; 526 // Only operate on the higher byte for remaining shift bits. 527 Opc8 = AVRISD::LSLHI; 528 break; 529 case ISD::SRL: 530 Victim = DAG.getNode(AVRISD::LSRWN, dl, VT, Victim, 531 DAG.getConstant(12, dl, VT)); 532 ShiftAmount -= 12; 533 // Only operate on the lower byte for remaining shift bits. 534 Opc8 = AVRISD::LSRLO; 535 break; 536 case ISD::SRA: 537 Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim, 538 DAG.getConstant(8, dl, VT)); 539 ShiftAmount -= 8; 540 // Only operate on the lower byte for remaining shift bits. 541 Opc8 = AVRISD::ASRLO; 542 break; 543 default: 544 break; 545 } 546 } 547 548 while (ShiftAmount--) { 549 Victim = DAG.getNode(Opc8, dl, VT, Victim); 550 } 551 552 return Victim; 553 } 554 555 SDValue AVRTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { 556 unsigned Opcode = Op->getOpcode(); 557 assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) && 558 "Invalid opcode for Div/Rem lowering"); 559 bool IsSigned = (Opcode == ISD::SDIVREM); 560 EVT VT = Op->getValueType(0); 561 Type *Ty = VT.getTypeForEVT(*DAG.getContext()); 562 563 RTLIB::Libcall LC; 564 switch (VT.getSimpleVT().SimpleTy) { 565 default: 566 llvm_unreachable("Unexpected request for libcall!"); 567 case MVT::i8: 568 LC = IsSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; 569 break; 570 case MVT::i16: 571 LC = IsSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; 572 break; 573 case MVT::i32: 574 LC = IsSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; 575 break; 576 } 577 578 SDValue InChain = DAG.getEntryNode(); 579 580 TargetLowering::ArgListTy Args; 581 TargetLowering::ArgListEntry Entry; 582 for (SDValue const &Value : Op->op_values()) { 583 Entry.Node = Value; 584 Entry.Ty = Value.getValueType().getTypeForEVT(*DAG.getContext()); 585 Entry.IsSExt = IsSigned; 586 Entry.IsZExt = !IsSigned; 587 Args.push_back(Entry); 588 } 589 590 SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), 591 getPointerTy(DAG.getDataLayout())); 592 593 Type *RetTy = (Type *)StructType::get(Ty, Ty); 594 595 SDLoc dl(Op); 596 TargetLowering::CallLoweringInfo CLI(DAG); 597 CLI.setDebugLoc(dl) 598 .setChain(InChain) 599 .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args)) 600 .setInRegister() 601 .setSExtResult(IsSigned) 602 .setZExtResult(!IsSigned); 603 604 std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI); 605 return CallInfo.first; 606 } 607 608 SDValue AVRTargetLowering::LowerGlobalAddress(SDValue Op, 609 SelectionDAG &DAG) const { 610 auto DL = DAG.getDataLayout(); 611 612 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 613 int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset(); 614 615 // Create the TargetGlobalAddress node, folding in the constant offset. 616 SDValue Result = 617 DAG.getTargetGlobalAddress(GV, SDLoc(Op), getPointerTy(DL), Offset); 618 return DAG.getNode(AVRISD::WRAPPER, SDLoc(Op), getPointerTy(DL), Result); 619 } 620 621 SDValue AVRTargetLowering::LowerBlockAddress(SDValue Op, 622 SelectionDAG &DAG) const { 623 auto DL = DAG.getDataLayout(); 624 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); 625 626 SDValue Result = DAG.getTargetBlockAddress(BA, getPointerTy(DL)); 627 628 return DAG.getNode(AVRISD::WRAPPER, SDLoc(Op), getPointerTy(DL), Result); 629 } 630 631 /// IntCCToAVRCC - Convert a DAG integer condition code to an AVR CC. 632 static AVRCC::CondCodes intCCToAVRCC(ISD::CondCode CC) { 633 switch (CC) { 634 default: 635 llvm_unreachable("Unknown condition code!"); 636 case ISD::SETEQ: 637 return AVRCC::COND_EQ; 638 case ISD::SETNE: 639 return AVRCC::COND_NE; 640 case ISD::SETGE: 641 return AVRCC::COND_GE; 642 case ISD::SETLT: 643 return AVRCC::COND_LT; 644 case ISD::SETUGE: 645 return AVRCC::COND_SH; 646 case ISD::SETULT: 647 return AVRCC::COND_LO; 648 } 649 } 650 651 /// Returns appropriate CP/CPI/CPC nodes code for the given 8/16-bit operands. 652 SDValue AVRTargetLowering::getAVRCmp(SDValue LHS, SDValue RHS, 653 SelectionDAG &DAG, SDLoc DL) const { 654 assert((LHS.getSimpleValueType() == RHS.getSimpleValueType()) && 655 "LHS and RHS have different types"); 656 assert(((LHS.getSimpleValueType() == MVT::i16) || 657 (LHS.getSimpleValueType() == MVT::i8)) && 658 "invalid comparison type"); 659 660 SDValue Cmp; 661 662 if (LHS.getSimpleValueType() == MVT::i16 && isa<ConstantSDNode>(RHS)) { 663 uint64_t Imm = RHS->getAsZExtVal(); 664 // Generate a CPI/CPC pair if RHS is a 16-bit constant. Use the zero 665 // register for the constant RHS if its lower or higher byte is zero. 666 SDValue LHSlo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS, 667 DAG.getIntPtrConstant(0, DL)); 668 SDValue LHShi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS, 669 DAG.getIntPtrConstant(1, DL)); 670 SDValue RHSlo = (Imm & 0xff) == 0 671 ? DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8) 672 : DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, RHS, 673 DAG.getIntPtrConstant(0, DL)); 674 SDValue RHShi = (Imm & 0xff00) == 0 675 ? DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8) 676 : DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, RHS, 677 DAG.getIntPtrConstant(1, DL)); 678 Cmp = DAG.getNode(AVRISD::CMP, DL, MVT::Glue, LHSlo, RHSlo); 679 Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHShi, RHShi, Cmp); 680 } else if (RHS.getSimpleValueType() == MVT::i16 && isa<ConstantSDNode>(LHS)) { 681 // Generate a CPI/CPC pair if LHS is a 16-bit constant. Use the zero 682 // register for the constant LHS if its lower or higher byte is zero. 683 uint64_t Imm = LHS->getAsZExtVal(); 684 SDValue LHSlo = (Imm & 0xff) == 0 685 ? DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8) 686 : DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS, 687 DAG.getIntPtrConstant(0, DL)); 688 SDValue LHShi = (Imm & 0xff00) == 0 689 ? DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8) 690 : DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS, 691 DAG.getIntPtrConstant(1, DL)); 692 SDValue RHSlo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, RHS, 693 DAG.getIntPtrConstant(0, DL)); 694 SDValue RHShi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, RHS, 695 DAG.getIntPtrConstant(1, DL)); 696 Cmp = DAG.getNode(AVRISD::CMP, DL, MVT::Glue, LHSlo, RHSlo); 697 Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHShi, RHShi, Cmp); 698 } else { 699 // Generate ordinary 16-bit comparison. 700 Cmp = DAG.getNode(AVRISD::CMP, DL, MVT::Glue, LHS, RHS); 701 } 702 703 return Cmp; 704 } 705 706 /// Returns appropriate AVR CMP/CMPC nodes and corresponding condition code for 707 /// the given operands. 708 SDValue AVRTargetLowering::getAVRCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, 709 SDValue &AVRcc, SelectionDAG &DAG, 710 SDLoc DL) const { 711 SDValue Cmp; 712 EVT VT = LHS.getValueType(); 713 bool UseTest = false; 714 715 switch (CC) { 716 default: 717 break; 718 case ISD::SETLE: { 719 // Swap operands and reverse the branching condition. 720 std::swap(LHS, RHS); 721 CC = ISD::SETGE; 722 break; 723 } 724 case ISD::SETGT: { 725 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) { 726 switch (C->getSExtValue()) { 727 case -1: { 728 // When doing lhs > -1 use a tst instruction on the top part of lhs 729 // and use brpl instead of using a chain of cp/cpc. 730 UseTest = true; 731 AVRcc = DAG.getConstant(AVRCC::COND_PL, DL, MVT::i8); 732 break; 733 } 734 case 0: { 735 // Turn lhs > 0 into 0 < lhs since 0 can be materialized with 736 // __zero_reg__ in lhs. 737 RHS = LHS; 738 LHS = DAG.getConstant(0, DL, VT); 739 CC = ISD::SETLT; 740 break; 741 } 742 default: { 743 // Turn lhs < rhs with lhs constant into rhs >= lhs+1, this allows 744 // us to fold the constant into the cmp instruction. 745 RHS = DAG.getConstant(C->getSExtValue() + 1, DL, VT); 746 CC = ISD::SETGE; 747 break; 748 } 749 } 750 break; 751 } 752 // Swap operands and reverse the branching condition. 753 std::swap(LHS, RHS); 754 CC = ISD::SETLT; 755 break; 756 } 757 case ISD::SETLT: { 758 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) { 759 switch (C->getSExtValue()) { 760 case 1: { 761 // Turn lhs < 1 into 0 >= lhs since 0 can be materialized with 762 // __zero_reg__ in lhs. 763 RHS = LHS; 764 LHS = DAG.getConstant(0, DL, VT); 765 CC = ISD::SETGE; 766 break; 767 } 768 case 0: { 769 // When doing lhs < 0 use a tst instruction on the top part of lhs 770 // and use brmi instead of using a chain of cp/cpc. 771 UseTest = true; 772 AVRcc = DAG.getConstant(AVRCC::COND_MI, DL, MVT::i8); 773 break; 774 } 775 } 776 } 777 break; 778 } 779 case ISD::SETULE: { 780 // Swap operands and reverse the branching condition. 781 std::swap(LHS, RHS); 782 CC = ISD::SETUGE; 783 break; 784 } 785 case ISD::SETUGT: { 786 // Turn lhs < rhs with lhs constant into rhs >= lhs+1, this allows us to 787 // fold the constant into the cmp instruction. 788 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) { 789 RHS = DAG.getConstant(C->getSExtValue() + 1, DL, VT); 790 CC = ISD::SETUGE; 791 break; 792 } 793 // Swap operands and reverse the branching condition. 794 std::swap(LHS, RHS); 795 CC = ISD::SETULT; 796 break; 797 } 798 } 799 800 // Expand 32 and 64 bit comparisons with custom CMP and CMPC nodes instead of 801 // using the default and/or/xor expansion code which is much longer. 802 if (VT == MVT::i32) { 803 SDValue LHSlo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS, 804 DAG.getIntPtrConstant(0, DL)); 805 SDValue LHShi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS, 806 DAG.getIntPtrConstant(1, DL)); 807 SDValue RHSlo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS, 808 DAG.getIntPtrConstant(0, DL)); 809 SDValue RHShi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS, 810 DAG.getIntPtrConstant(1, DL)); 811 812 if (UseTest) { 813 // When using tst we only care about the highest part. 814 SDValue Top = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHShi, 815 DAG.getIntPtrConstant(1, DL)); 816 Cmp = DAG.getNode(AVRISD::TST, DL, MVT::Glue, Top); 817 } else { 818 Cmp = getAVRCmp(LHSlo, RHSlo, DAG, DL); 819 Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHShi, RHShi, Cmp); 820 } 821 } else if (VT == MVT::i64) { 822 SDValue LHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, LHS, 823 DAG.getIntPtrConstant(0, DL)); 824 SDValue LHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, LHS, 825 DAG.getIntPtrConstant(1, DL)); 826 827 SDValue LHS0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS_0, 828 DAG.getIntPtrConstant(0, DL)); 829 SDValue LHS1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS_0, 830 DAG.getIntPtrConstant(1, DL)); 831 SDValue LHS2 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS_1, 832 DAG.getIntPtrConstant(0, DL)); 833 SDValue LHS3 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS_1, 834 DAG.getIntPtrConstant(1, DL)); 835 836 SDValue RHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, RHS, 837 DAG.getIntPtrConstant(0, DL)); 838 SDValue RHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, RHS, 839 DAG.getIntPtrConstant(1, DL)); 840 841 SDValue RHS0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS_0, 842 DAG.getIntPtrConstant(0, DL)); 843 SDValue RHS1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS_0, 844 DAG.getIntPtrConstant(1, DL)); 845 SDValue RHS2 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS_1, 846 DAG.getIntPtrConstant(0, DL)); 847 SDValue RHS3 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS_1, 848 DAG.getIntPtrConstant(1, DL)); 849 850 if (UseTest) { 851 // When using tst we only care about the highest part. 852 SDValue Top = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS3, 853 DAG.getIntPtrConstant(1, DL)); 854 Cmp = DAG.getNode(AVRISD::TST, DL, MVT::Glue, Top); 855 } else { 856 Cmp = getAVRCmp(LHS0, RHS0, DAG, DL); 857 Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHS1, RHS1, Cmp); 858 Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHS2, RHS2, Cmp); 859 Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHS3, RHS3, Cmp); 860 } 861 } else if (VT == MVT::i8 || VT == MVT::i16) { 862 if (UseTest) { 863 // When using tst we only care about the highest part. 864 Cmp = DAG.getNode(AVRISD::TST, DL, MVT::Glue, 865 (VT == MVT::i8) 866 ? LHS 867 : DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, 868 LHS, DAG.getIntPtrConstant(1, DL))); 869 } else { 870 Cmp = getAVRCmp(LHS, RHS, DAG, DL); 871 } 872 } else { 873 llvm_unreachable("Invalid comparison size"); 874 } 875 876 // When using a test instruction AVRcc is already set. 877 if (!UseTest) { 878 AVRcc = DAG.getConstant(intCCToAVRCC(CC), DL, MVT::i8); 879 } 880 881 return Cmp; 882 } 883 884 SDValue AVRTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { 885 SDValue Chain = Op.getOperand(0); 886 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 887 SDValue LHS = Op.getOperand(2); 888 SDValue RHS = Op.getOperand(3); 889 SDValue Dest = Op.getOperand(4); 890 SDLoc dl(Op); 891 892 SDValue TargetCC; 893 SDValue Cmp = getAVRCmp(LHS, RHS, CC, TargetCC, DAG, dl); 894 895 return DAG.getNode(AVRISD::BRCOND, dl, MVT::Other, Chain, Dest, TargetCC, 896 Cmp); 897 } 898 899 SDValue AVRTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 900 SDValue LHS = Op.getOperand(0); 901 SDValue RHS = Op.getOperand(1); 902 SDValue TrueV = Op.getOperand(2); 903 SDValue FalseV = Op.getOperand(3); 904 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 905 SDLoc dl(Op); 906 907 SDValue TargetCC; 908 SDValue Cmp = getAVRCmp(LHS, RHS, CC, TargetCC, DAG, dl); 909 910 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); 911 SDValue Ops[] = {TrueV, FalseV, TargetCC, Cmp}; 912 913 return DAG.getNode(AVRISD::SELECT_CC, dl, VTs, Ops); 914 } 915 916 SDValue AVRTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { 917 SDValue LHS = Op.getOperand(0); 918 SDValue RHS = Op.getOperand(1); 919 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 920 SDLoc DL(Op); 921 922 SDValue TargetCC; 923 SDValue Cmp = getAVRCmp(LHS, RHS, CC, TargetCC, DAG, DL); 924 925 SDValue TrueV = DAG.getConstant(1, DL, Op.getValueType()); 926 SDValue FalseV = DAG.getConstant(0, DL, Op.getValueType()); 927 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); 928 SDValue Ops[] = {TrueV, FalseV, TargetCC, Cmp}; 929 930 return DAG.getNode(AVRISD::SELECT_CC, DL, VTs, Ops); 931 } 932 933 SDValue AVRTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { 934 const MachineFunction &MF = DAG.getMachineFunction(); 935 const AVRMachineFunctionInfo *AFI = MF.getInfo<AVRMachineFunctionInfo>(); 936 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 937 auto DL = DAG.getDataLayout(); 938 SDLoc dl(Op); 939 940 // Vastart just stores the address of the VarArgsFrameIndex slot into the 941 // memory location argument. 942 SDValue FI = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(), getPointerTy(DL)); 943 944 return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1), 945 MachinePointerInfo(SV)); 946 } 947 948 // Modify the existing ISD::INLINEASM node to add the implicit zero register. 949 SDValue AVRTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const { 950 SDValue ZeroReg = DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8); 951 if (Op.getOperand(Op.getNumOperands() - 1) == ZeroReg || 952 Op.getOperand(Op.getNumOperands() - 2) == ZeroReg) { 953 // Zero register has already been added. Don't add it again. 954 // If this isn't handled, we get called over and over again. 955 return Op; 956 } 957 958 // Get a list of operands to the new INLINEASM node. This is mostly a copy, 959 // with some edits. 960 // Add the following operands at the end (but before the glue node, if it's 961 // there): 962 // - The flags of the implicit zero register operand. 963 // - The implicit zero register operand itself. 964 SDLoc dl(Op); 965 SmallVector<SDValue, 8> Ops; 966 SDNode *N = Op.getNode(); 967 SDValue Glue; 968 for (unsigned I = 0; I < N->getNumOperands(); I++) { 969 SDValue Operand = N->getOperand(I); 970 if (Operand.getValueType() == MVT::Glue) { 971 // The glue operand always needs to be at the end, so we need to treat it 972 // specially. 973 Glue = Operand; 974 } else { 975 Ops.push_back(Operand); 976 } 977 } 978 InlineAsm::Flag Flags(InlineAsm::Kind::RegUse, 1); 979 Ops.push_back(DAG.getTargetConstant(Flags, dl, MVT::i32)); 980 Ops.push_back(ZeroReg); 981 if (Glue) { 982 Ops.push_back(Glue); 983 } 984 985 // Replace the current INLINEASM node with a new one that has the zero 986 // register as implicit parameter. 987 SDValue New = DAG.getNode(N->getOpcode(), dl, N->getVTList(), Ops); 988 DAG.ReplaceAllUsesOfValueWith(Op, New); 989 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), New.getValue(1)); 990 991 return New; 992 } 993 994 SDValue AVRTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 995 switch (Op.getOpcode()) { 996 default: 997 llvm_unreachable("Don't know how to custom lower this!"); 998 case ISD::SHL: 999 case ISD::SRA: 1000 case ISD::SRL: 1001 case ISD::ROTL: 1002 case ISD::ROTR: 1003 return LowerShifts(Op, DAG); 1004 case ISD::GlobalAddress: 1005 return LowerGlobalAddress(Op, DAG); 1006 case ISD::BlockAddress: 1007 return LowerBlockAddress(Op, DAG); 1008 case ISD::BR_CC: 1009 return LowerBR_CC(Op, DAG); 1010 case ISD::SELECT_CC: 1011 return LowerSELECT_CC(Op, DAG); 1012 case ISD::SETCC: 1013 return LowerSETCC(Op, DAG); 1014 case ISD::VASTART: 1015 return LowerVASTART(Op, DAG); 1016 case ISD::SDIVREM: 1017 case ISD::UDIVREM: 1018 return LowerDivRem(Op, DAG); 1019 case ISD::INLINEASM: 1020 return LowerINLINEASM(Op, DAG); 1021 } 1022 1023 return SDValue(); 1024 } 1025 1026 /// Replace a node with an illegal result type 1027 /// with a new node built out of custom code. 1028 void AVRTargetLowering::ReplaceNodeResults(SDNode *N, 1029 SmallVectorImpl<SDValue> &Results, 1030 SelectionDAG &DAG) const { 1031 SDLoc DL(N); 1032 1033 switch (N->getOpcode()) { 1034 case ISD::ADD: { 1035 // Convert add (x, imm) into sub (x, -imm). 1036 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 1037 SDValue Sub = DAG.getNode( 1038 ISD::SUB, DL, N->getValueType(0), N->getOperand(0), 1039 DAG.getConstant(-C->getAPIntValue(), DL, C->getValueType(0))); 1040 Results.push_back(Sub); 1041 } 1042 break; 1043 } 1044 default: { 1045 SDValue Res = LowerOperation(SDValue(N, 0), DAG); 1046 1047 for (unsigned I = 0, E = Res->getNumValues(); I != E; ++I) 1048 Results.push_back(Res.getValue(I)); 1049 1050 break; 1051 } 1052 } 1053 } 1054 1055 /// Return true if the addressing mode represented 1056 /// by AM is legal for this target, for a load/store of the specified type. 1057 bool AVRTargetLowering::isLegalAddressingMode(const DataLayout &DL, 1058 const AddrMode &AM, Type *Ty, 1059 unsigned AS, 1060 Instruction *I) const { 1061 int64_t Offs = AM.BaseOffs; 1062 1063 // Allow absolute addresses. 1064 if (AM.BaseGV && !AM.HasBaseReg && AM.Scale == 0 && Offs == 0) { 1065 return true; 1066 } 1067 1068 // Flash memory instructions only allow zero offsets. 1069 if (isa<PointerType>(Ty) && AS == AVR::ProgramMemory) { 1070 return false; 1071 } 1072 1073 // Allow reg+<6bit> offset. 1074 if (Offs < 0) 1075 Offs = -Offs; 1076 if (AM.BaseGV == nullptr && AM.HasBaseReg && AM.Scale == 0 && 1077 isUInt<6>(Offs)) { 1078 return true; 1079 } 1080 1081 return false; 1082 } 1083 1084 /// Returns true by value, base pointer and 1085 /// offset pointer and addressing mode by reference if the node's address 1086 /// can be legally represented as pre-indexed load / store address. 1087 bool AVRTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, 1088 SDValue &Offset, 1089 ISD::MemIndexedMode &AM, 1090 SelectionDAG &DAG) const { 1091 EVT VT; 1092 const SDNode *Op; 1093 SDLoc DL(N); 1094 1095 if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1096 VT = LD->getMemoryVT(); 1097 Op = LD->getBasePtr().getNode(); 1098 if (LD->getExtensionType() != ISD::NON_EXTLOAD) 1099 return false; 1100 if (AVR::isProgramMemoryAccess(LD)) { 1101 return false; 1102 } 1103 } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 1104 VT = ST->getMemoryVT(); 1105 Op = ST->getBasePtr().getNode(); 1106 if (AVR::isProgramMemoryAccess(ST)) { 1107 return false; 1108 } 1109 } else { 1110 return false; 1111 } 1112 1113 if (VT != MVT::i8 && VT != MVT::i16) { 1114 return false; 1115 } 1116 1117 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB) { 1118 return false; 1119 } 1120 1121 if (const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) { 1122 int RHSC = RHS->getSExtValue(); 1123 if (Op->getOpcode() == ISD::SUB) 1124 RHSC = -RHSC; 1125 1126 if ((VT == MVT::i16 && RHSC != -2) || (VT == MVT::i8 && RHSC != -1)) { 1127 return false; 1128 } 1129 1130 Base = Op->getOperand(0); 1131 Offset = DAG.getConstant(RHSC, DL, MVT::i8); 1132 AM = ISD::PRE_DEC; 1133 1134 return true; 1135 } 1136 1137 return false; 1138 } 1139 1140 /// Returns true by value, base pointer and 1141 /// offset pointer and addressing mode by reference if this node can be 1142 /// combined with a load / store to form a post-indexed load / store. 1143 bool AVRTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, 1144 SDValue &Base, 1145 SDValue &Offset, 1146 ISD::MemIndexedMode &AM, 1147 SelectionDAG &DAG) const { 1148 EVT VT; 1149 SDLoc DL(N); 1150 1151 if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1152 VT = LD->getMemoryVT(); 1153 if (LD->getExtensionType() != ISD::NON_EXTLOAD) 1154 return false; 1155 } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 1156 VT = ST->getMemoryVT(); 1157 // We can not store to program memory. 1158 if (AVR::isProgramMemoryAccess(ST)) 1159 return false; 1160 // Since the high byte need to be stored first, we can not emit 1161 // i16 post increment store like: 1162 // st X+, r24 1163 // st X+, r25 1164 if (VT == MVT::i16 && !Subtarget.hasLowByteFirst()) 1165 return false; 1166 } else { 1167 return false; 1168 } 1169 1170 if (VT != MVT::i8 && VT != MVT::i16) { 1171 return false; 1172 } 1173 1174 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB) { 1175 return false; 1176 } 1177 1178 if (const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) { 1179 int RHSC = RHS->getSExtValue(); 1180 if (Op->getOpcode() == ISD::SUB) 1181 RHSC = -RHSC; 1182 if ((VT == MVT::i16 && RHSC != 2) || (VT == MVT::i8 && RHSC != 1)) { 1183 return false; 1184 } 1185 1186 // FIXME: We temporarily disable post increment load from program memory, 1187 // due to bug https://github.com/llvm/llvm-project/issues/59914. 1188 if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) 1189 if (AVR::isProgramMemoryAccess(LD)) 1190 return false; 1191 1192 Base = Op->getOperand(0); 1193 Offset = DAG.getConstant(RHSC, DL, MVT::i8); 1194 AM = ISD::POST_INC; 1195 1196 return true; 1197 } 1198 1199 return false; 1200 } 1201 1202 bool AVRTargetLowering::isOffsetFoldingLegal( 1203 const GlobalAddressSDNode *GA) const { 1204 return true; 1205 } 1206 1207 //===----------------------------------------------------------------------===// 1208 // Formal Arguments Calling Convention Implementation 1209 //===----------------------------------------------------------------------===// 1210 1211 #include "AVRGenCallingConv.inc" 1212 1213 /// Registers for calling conventions, ordered in reverse as required by ABI. 1214 /// Both arrays must be of the same length. 1215 static const MCPhysReg RegList8AVR[] = { 1216 AVR::R25, AVR::R24, AVR::R23, AVR::R22, AVR::R21, AVR::R20, 1217 AVR::R19, AVR::R18, AVR::R17, AVR::R16, AVR::R15, AVR::R14, 1218 AVR::R13, AVR::R12, AVR::R11, AVR::R10, AVR::R9, AVR::R8}; 1219 static const MCPhysReg RegList8Tiny[] = {AVR::R25, AVR::R24, AVR::R23, 1220 AVR::R22, AVR::R21, AVR::R20}; 1221 static const MCPhysReg RegList16AVR[] = { 1222 AVR::R26R25, AVR::R25R24, AVR::R24R23, AVR::R23R22, AVR::R22R21, 1223 AVR::R21R20, AVR::R20R19, AVR::R19R18, AVR::R18R17, AVR::R17R16, 1224 AVR::R16R15, AVR::R15R14, AVR::R14R13, AVR::R13R12, AVR::R12R11, 1225 AVR::R11R10, AVR::R10R9, AVR::R9R8}; 1226 static const MCPhysReg RegList16Tiny[] = {AVR::R26R25, AVR::R25R24, 1227 AVR::R24R23, AVR::R23R22, 1228 AVR::R22R21, AVR::R21R20}; 1229 1230 static_assert(std::size(RegList8AVR) == std::size(RegList16AVR), 1231 "8-bit and 16-bit register arrays must be of equal length"); 1232 static_assert(std::size(RegList8Tiny) == std::size(RegList16Tiny), 1233 "8-bit and 16-bit register arrays must be of equal length"); 1234 1235 /// Analyze incoming and outgoing function arguments. We need custom C++ code 1236 /// to handle special constraints in the ABI. 1237 /// In addition, all pieces of a certain argument have to be passed either 1238 /// using registers or the stack but never mixing both. 1239 template <typename ArgT> 1240 static void analyzeArguments(TargetLowering::CallLoweringInfo *CLI, 1241 const Function *F, const DataLayout *TD, 1242 const SmallVectorImpl<ArgT> &Args, 1243 SmallVectorImpl<CCValAssign> &ArgLocs, 1244 CCState &CCInfo, bool Tiny) { 1245 // Choose the proper register list for argument passing according to the ABI. 1246 ArrayRef<MCPhysReg> RegList8; 1247 ArrayRef<MCPhysReg> RegList16; 1248 if (Tiny) { 1249 RegList8 = ArrayRef(RegList8Tiny, std::size(RegList8Tiny)); 1250 RegList16 = ArrayRef(RegList16Tiny, std::size(RegList16Tiny)); 1251 } else { 1252 RegList8 = ArrayRef(RegList8AVR, std::size(RegList8AVR)); 1253 RegList16 = ArrayRef(RegList16AVR, std::size(RegList16AVR)); 1254 } 1255 1256 unsigned NumArgs = Args.size(); 1257 // This is the index of the last used register, in RegList*. 1258 // -1 means R26 (R26 is never actually used in CC). 1259 int RegLastIdx = -1; 1260 // Once a value is passed to the stack it will always be used 1261 bool UseStack = false; 1262 for (unsigned i = 0; i != NumArgs;) { 1263 MVT VT = Args[i].VT; 1264 // We have to count the number of bytes for each function argument, that is 1265 // those Args with the same OrigArgIndex. This is important in case the 1266 // function takes an aggregate type. 1267 // Current argument will be between [i..j). 1268 unsigned ArgIndex = Args[i].OrigArgIndex; 1269 unsigned TotalBytes = VT.getStoreSize(); 1270 unsigned j = i + 1; 1271 for (; j != NumArgs; ++j) { 1272 if (Args[j].OrigArgIndex != ArgIndex) 1273 break; 1274 TotalBytes += Args[j].VT.getStoreSize(); 1275 } 1276 // Round up to even number of bytes. 1277 TotalBytes = alignTo(TotalBytes, 2); 1278 // Skip zero sized arguments 1279 if (TotalBytes == 0) 1280 continue; 1281 // The index of the first register to be used 1282 unsigned RegIdx = RegLastIdx + TotalBytes; 1283 RegLastIdx = RegIdx; 1284 // If there are not enough registers, use the stack 1285 if (RegIdx >= RegList8.size()) { 1286 UseStack = true; 1287 } 1288 for (; i != j; ++i) { 1289 MVT VT = Args[i].VT; 1290 1291 if (UseStack) { 1292 auto evt = EVT(VT).getTypeForEVT(CCInfo.getContext()); 1293 unsigned Offset = CCInfo.AllocateStack(TD->getTypeAllocSize(evt), 1294 TD->getABITypeAlign(evt)); 1295 CCInfo.addLoc( 1296 CCValAssign::getMem(i, VT, Offset, VT, CCValAssign::Full)); 1297 } else { 1298 unsigned Reg; 1299 if (VT == MVT::i8) { 1300 Reg = CCInfo.AllocateReg(RegList8[RegIdx]); 1301 } else if (VT == MVT::i16) { 1302 Reg = CCInfo.AllocateReg(RegList16[RegIdx]); 1303 } else { 1304 llvm_unreachable( 1305 "calling convention can only manage i8 and i16 types"); 1306 } 1307 assert(Reg && "register not available in calling convention"); 1308 CCInfo.addLoc(CCValAssign::getReg(i, VT, Reg, VT, CCValAssign::Full)); 1309 // Registers inside a particular argument are sorted in increasing order 1310 // (remember the array is reversed). 1311 RegIdx -= VT.getStoreSize(); 1312 } 1313 } 1314 } 1315 } 1316 1317 /// Count the total number of bytes needed to pass or return these arguments. 1318 template <typename ArgT> 1319 static unsigned 1320 getTotalArgumentsSizeInBytes(const SmallVectorImpl<ArgT> &Args) { 1321 unsigned TotalBytes = 0; 1322 1323 for (const ArgT &Arg : Args) { 1324 TotalBytes += Arg.VT.getStoreSize(); 1325 } 1326 return TotalBytes; 1327 } 1328 1329 /// Analyze incoming and outgoing value of returning from a function. 1330 /// The algorithm is similar to analyzeArguments, but there can only be 1331 /// one value, possibly an aggregate, and it is limited to 8 bytes. 1332 template <typename ArgT> 1333 static void analyzeReturnValues(const SmallVectorImpl<ArgT> &Args, 1334 CCState &CCInfo, bool Tiny) { 1335 unsigned NumArgs = Args.size(); 1336 unsigned TotalBytes = getTotalArgumentsSizeInBytes(Args); 1337 // CanLowerReturn() guarantees this assertion. 1338 if (Tiny) 1339 assert(TotalBytes <= 4 && 1340 "return values greater than 4 bytes cannot be lowered on AVRTiny"); 1341 else 1342 assert(TotalBytes <= 8 && 1343 "return values greater than 8 bytes cannot be lowered on AVR"); 1344 1345 // Choose the proper register list for argument passing according to the ABI. 1346 ArrayRef<MCPhysReg> RegList8; 1347 ArrayRef<MCPhysReg> RegList16; 1348 if (Tiny) { 1349 RegList8 = ArrayRef(RegList8Tiny, std::size(RegList8Tiny)); 1350 RegList16 = ArrayRef(RegList16Tiny, std::size(RegList16Tiny)); 1351 } else { 1352 RegList8 = ArrayRef(RegList8AVR, std::size(RegList8AVR)); 1353 RegList16 = ArrayRef(RegList16AVR, std::size(RegList16AVR)); 1354 } 1355 1356 // GCC-ABI says that the size is rounded up to the next even number, 1357 // but actually once it is more than 4 it will always round up to 8. 1358 if (TotalBytes > 4) { 1359 TotalBytes = 8; 1360 } else { 1361 TotalBytes = alignTo(TotalBytes, 2); 1362 } 1363 1364 // The index of the first register to use. 1365 int RegIdx = TotalBytes - 1; 1366 for (unsigned i = 0; i != NumArgs; ++i) { 1367 MVT VT = Args[i].VT; 1368 unsigned Reg; 1369 if (VT == MVT::i8) { 1370 Reg = CCInfo.AllocateReg(RegList8[RegIdx]); 1371 } else if (VT == MVT::i16) { 1372 Reg = CCInfo.AllocateReg(RegList16[RegIdx]); 1373 } else { 1374 llvm_unreachable("calling convention can only manage i8 and i16 types"); 1375 } 1376 assert(Reg && "register not available in calling convention"); 1377 CCInfo.addLoc(CCValAssign::getReg(i, VT, Reg, VT, CCValAssign::Full)); 1378 // Registers sort in increasing order 1379 RegIdx -= VT.getStoreSize(); 1380 } 1381 } 1382 1383 SDValue AVRTargetLowering::LowerFormalArguments( 1384 SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 1385 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 1386 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 1387 MachineFunction &MF = DAG.getMachineFunction(); 1388 MachineFrameInfo &MFI = MF.getFrameInfo(); 1389 auto DL = DAG.getDataLayout(); 1390 1391 // Assign locations to all of the incoming arguments. 1392 SmallVector<CCValAssign, 16> ArgLocs; 1393 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, 1394 *DAG.getContext()); 1395 1396 // Variadic functions do not need all the analysis below. 1397 if (isVarArg) { 1398 CCInfo.AnalyzeFormalArguments(Ins, ArgCC_AVR_Vararg); 1399 } else { 1400 analyzeArguments(nullptr, &MF.getFunction(), &DL, Ins, ArgLocs, CCInfo, 1401 Subtarget.hasTinyEncoding()); 1402 } 1403 1404 SDValue ArgValue; 1405 for (CCValAssign &VA : ArgLocs) { 1406 1407 // Arguments stored on registers. 1408 if (VA.isRegLoc()) { 1409 EVT RegVT = VA.getLocVT(); 1410 const TargetRegisterClass *RC; 1411 if (RegVT == MVT::i8) { 1412 RC = &AVR::GPR8RegClass; 1413 } else if (RegVT == MVT::i16) { 1414 RC = &AVR::DREGSRegClass; 1415 } else { 1416 llvm_unreachable("Unknown argument type!"); 1417 } 1418 1419 Register Reg = MF.addLiveIn(VA.getLocReg(), RC); 1420 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); 1421 1422 // :NOTE: Clang should not promote any i8 into i16 but for safety the 1423 // following code will handle zexts or sexts generated by other 1424 // front ends. Otherwise: 1425 // If this is an 8 bit value, it is really passed promoted 1426 // to 16 bits. Insert an assert[sz]ext to capture this, then 1427 // truncate to the right size. 1428 switch (VA.getLocInfo()) { 1429 default: 1430 llvm_unreachable("Unknown loc info!"); 1431 case CCValAssign::Full: 1432 break; 1433 case CCValAssign::BCvt: 1434 ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue); 1435 break; 1436 case CCValAssign::SExt: 1437 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, 1438 DAG.getValueType(VA.getValVT())); 1439 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 1440 break; 1441 case CCValAssign::ZExt: 1442 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, 1443 DAG.getValueType(VA.getValVT())); 1444 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 1445 break; 1446 } 1447 1448 InVals.push_back(ArgValue); 1449 } else { 1450 // Only arguments passed on the stack should make it here. 1451 assert(VA.isMemLoc()); 1452 1453 EVT LocVT = VA.getLocVT(); 1454 1455 // Create the frame index object for this incoming parameter. 1456 int FI = MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, 1457 VA.getLocMemOffset(), true); 1458 1459 // Create the SelectionDAG nodes corresponding to a load 1460 // from this parameter. 1461 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DL)); 1462 InVals.push_back(DAG.getLoad(LocVT, dl, Chain, FIN, 1463 MachinePointerInfo::getFixedStack(MF, FI))); 1464 } 1465 } 1466 1467 // If the function takes variable number of arguments, make a frame index for 1468 // the start of the first vararg value... for expansion of llvm.va_start. 1469 if (isVarArg) { 1470 unsigned StackSize = CCInfo.getStackSize(); 1471 AVRMachineFunctionInfo *AFI = MF.getInfo<AVRMachineFunctionInfo>(); 1472 1473 AFI->setVarArgsFrameIndex(MFI.CreateFixedObject(2, StackSize, true)); 1474 } 1475 1476 return Chain; 1477 } 1478 1479 //===----------------------------------------------------------------------===// 1480 // Call Calling Convention Implementation 1481 //===----------------------------------------------------------------------===// 1482 1483 SDValue AVRTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, 1484 SmallVectorImpl<SDValue> &InVals) const { 1485 SelectionDAG &DAG = CLI.DAG; 1486 SDLoc &DL = CLI.DL; 1487 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 1488 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 1489 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 1490 SDValue Chain = CLI.Chain; 1491 SDValue Callee = CLI.Callee; 1492 bool &isTailCall = CLI.IsTailCall; 1493 CallingConv::ID CallConv = CLI.CallConv; 1494 bool isVarArg = CLI.IsVarArg; 1495 1496 MachineFunction &MF = DAG.getMachineFunction(); 1497 1498 // AVR does not yet support tail call optimization. 1499 isTailCall = false; 1500 1501 // Analyze operands of the call, assigning locations to each operand. 1502 SmallVector<CCValAssign, 16> ArgLocs; 1503 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, 1504 *DAG.getContext()); 1505 1506 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every 1507 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol 1508 // node so that legalize doesn't hack it. 1509 const Function *F = nullptr; 1510 if (const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1511 const GlobalValue *GV = G->getGlobal(); 1512 if (isa<Function>(GV)) 1513 F = cast<Function>(GV); 1514 Callee = 1515 DAG.getTargetGlobalAddress(GV, DL, getPointerTy(DAG.getDataLayout())); 1516 } else if (const ExternalSymbolSDNode *ES = 1517 dyn_cast<ExternalSymbolSDNode>(Callee)) { 1518 Callee = DAG.getTargetExternalSymbol(ES->getSymbol(), 1519 getPointerTy(DAG.getDataLayout())); 1520 } 1521 1522 // Variadic functions do not need all the analysis below. 1523 if (isVarArg) { 1524 CCInfo.AnalyzeCallOperands(Outs, ArgCC_AVR_Vararg); 1525 } else { 1526 analyzeArguments(&CLI, F, &DAG.getDataLayout(), Outs, ArgLocs, CCInfo, 1527 Subtarget.hasTinyEncoding()); 1528 } 1529 1530 // Get a count of how many bytes are to be pushed on the stack. 1531 unsigned NumBytes = CCInfo.getStackSize(); 1532 1533 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL); 1534 1535 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 1536 1537 // First, walk the register assignments, inserting copies. 1538 unsigned AI, AE; 1539 bool HasStackArgs = false; 1540 for (AI = 0, AE = ArgLocs.size(); AI != AE; ++AI) { 1541 CCValAssign &VA = ArgLocs[AI]; 1542 EVT RegVT = VA.getLocVT(); 1543 SDValue Arg = OutVals[AI]; 1544 1545 // Promote the value if needed. With Clang this should not happen. 1546 switch (VA.getLocInfo()) { 1547 default: 1548 llvm_unreachable("Unknown loc info!"); 1549 case CCValAssign::Full: 1550 break; 1551 case CCValAssign::SExt: 1552 Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, RegVT, Arg); 1553 break; 1554 case CCValAssign::ZExt: 1555 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, RegVT, Arg); 1556 break; 1557 case CCValAssign::AExt: 1558 Arg = DAG.getNode(ISD::ANY_EXTEND, DL, RegVT, Arg); 1559 break; 1560 case CCValAssign::BCvt: 1561 Arg = DAG.getNode(ISD::BITCAST, DL, RegVT, Arg); 1562 break; 1563 } 1564 1565 // Stop when we encounter a stack argument, we need to process them 1566 // in reverse order in the loop below. 1567 if (VA.isMemLoc()) { 1568 HasStackArgs = true; 1569 break; 1570 } 1571 1572 // Arguments that can be passed on registers must be kept in the RegsToPass 1573 // vector. 1574 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1575 } 1576 1577 // Second, stack arguments have to walked. 1578 // Previously this code created chained stores but those chained stores appear 1579 // to be unchained in the legalization phase. Therefore, do not attempt to 1580 // chain them here. In fact, chaining them here somehow causes the first and 1581 // second store to be reversed which is the exact opposite of the intended 1582 // effect. 1583 if (HasStackArgs) { 1584 SmallVector<SDValue, 8> MemOpChains; 1585 for (; AI != AE; AI++) { 1586 CCValAssign &VA = ArgLocs[AI]; 1587 SDValue Arg = OutVals[AI]; 1588 1589 assert(VA.isMemLoc()); 1590 1591 // SP points to one stack slot further so add one to adjust it. 1592 SDValue PtrOff = DAG.getNode( 1593 ISD::ADD, DL, getPointerTy(DAG.getDataLayout()), 1594 DAG.getRegister(AVR::SP, getPointerTy(DAG.getDataLayout())), 1595 DAG.getIntPtrConstant(VA.getLocMemOffset() + 1, DL)); 1596 1597 MemOpChains.push_back( 1598 DAG.getStore(Chain, DL, Arg, PtrOff, 1599 MachinePointerInfo::getStack(MF, VA.getLocMemOffset()))); 1600 } 1601 1602 if (!MemOpChains.empty()) 1603 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); 1604 } 1605 1606 // Build a sequence of copy-to-reg nodes chained together with token chain and 1607 // flag operands which copy the outgoing args into registers. The InGlue in 1608 // necessary since all emited instructions must be stuck together. 1609 SDValue InGlue; 1610 for (auto Reg : RegsToPass) { 1611 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, InGlue); 1612 InGlue = Chain.getValue(1); 1613 } 1614 1615 // Returns a chain & a flag for retval copy to use. 1616 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 1617 SmallVector<SDValue, 8> Ops; 1618 Ops.push_back(Chain); 1619 Ops.push_back(Callee); 1620 1621 // Add argument registers to the end of the list so that they are known live 1622 // into the call. 1623 for (auto Reg : RegsToPass) { 1624 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); 1625 } 1626 1627 // The zero register (usually R1) must be passed as an implicit register so 1628 // that this register is correctly zeroed in interrupts. 1629 Ops.push_back(DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8)); 1630 1631 // Add a register mask operand representing the call-preserved registers. 1632 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 1633 const uint32_t *Mask = 1634 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv); 1635 assert(Mask && "Missing call preserved mask for calling convention"); 1636 Ops.push_back(DAG.getRegisterMask(Mask)); 1637 1638 if (InGlue.getNode()) { 1639 Ops.push_back(InGlue); 1640 } 1641 1642 Chain = DAG.getNode(AVRISD::CALL, DL, NodeTys, Ops); 1643 InGlue = Chain.getValue(1); 1644 1645 // Create the CALLSEQ_END node. 1646 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, DL); 1647 1648 if (!Ins.empty()) { 1649 InGlue = Chain.getValue(1); 1650 } 1651 1652 // Handle result values, copying them out of physregs into vregs that we 1653 // return. 1654 return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, DL, DAG, 1655 InVals); 1656 } 1657 1658 /// Lower the result values of a call into the 1659 /// appropriate copies out of appropriate physical registers. 1660 /// 1661 SDValue AVRTargetLowering::LowerCallResult( 1662 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg, 1663 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 1664 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 1665 1666 // Assign locations to each value returned by this call. 1667 SmallVector<CCValAssign, 16> RVLocs; 1668 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, 1669 *DAG.getContext()); 1670 1671 // Handle runtime calling convs. 1672 if (CallConv == CallingConv::AVR_BUILTIN) { 1673 CCInfo.AnalyzeCallResult(Ins, RetCC_AVR_BUILTIN); 1674 } else { 1675 analyzeReturnValues(Ins, CCInfo, Subtarget.hasTinyEncoding()); 1676 } 1677 1678 // Copy all of the result registers out of their specified physreg. 1679 for (CCValAssign const &RVLoc : RVLocs) { 1680 Chain = DAG.getCopyFromReg(Chain, dl, RVLoc.getLocReg(), RVLoc.getValVT(), 1681 InGlue) 1682 .getValue(1); 1683 InGlue = Chain.getValue(2); 1684 InVals.push_back(Chain.getValue(0)); 1685 } 1686 1687 return Chain; 1688 } 1689 1690 //===----------------------------------------------------------------------===// 1691 // Return Value Calling Convention Implementation 1692 //===----------------------------------------------------------------------===// 1693 1694 bool AVRTargetLowering::CanLowerReturn( 1695 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, 1696 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { 1697 if (CallConv == CallingConv::AVR_BUILTIN) { 1698 SmallVector<CCValAssign, 16> RVLocs; 1699 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); 1700 return CCInfo.CheckReturn(Outs, RetCC_AVR_BUILTIN); 1701 } 1702 1703 unsigned TotalBytes = getTotalArgumentsSizeInBytes(Outs); 1704 return TotalBytes <= (unsigned)(Subtarget.hasTinyEncoding() ? 4 : 8); 1705 } 1706 1707 SDValue 1708 AVRTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 1709 bool isVarArg, 1710 const SmallVectorImpl<ISD::OutputArg> &Outs, 1711 const SmallVectorImpl<SDValue> &OutVals, 1712 const SDLoc &dl, SelectionDAG &DAG) const { 1713 // CCValAssign - represent the assignment of the return value to locations. 1714 SmallVector<CCValAssign, 16> RVLocs; 1715 1716 // CCState - Info about the registers and stack slot. 1717 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, 1718 *DAG.getContext()); 1719 1720 MachineFunction &MF = DAG.getMachineFunction(); 1721 1722 // Analyze return values. 1723 if (CallConv == CallingConv::AVR_BUILTIN) { 1724 CCInfo.AnalyzeReturn(Outs, RetCC_AVR_BUILTIN); 1725 } else { 1726 analyzeReturnValues(Outs, CCInfo, Subtarget.hasTinyEncoding()); 1727 } 1728 1729 SDValue Glue; 1730 SmallVector<SDValue, 4> RetOps(1, Chain); 1731 // Copy the result values into the output registers. 1732 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { 1733 CCValAssign &VA = RVLocs[i]; 1734 assert(VA.isRegLoc() && "Can only return in registers!"); 1735 1736 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Glue); 1737 1738 // Guarantee that all emitted copies are stuck together with flags. 1739 Glue = Chain.getValue(1); 1740 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 1741 } 1742 1743 // Don't emit the ret/reti instruction when the naked attribute is present in 1744 // the function being compiled. 1745 if (MF.getFunction().getAttributes().hasFnAttr(Attribute::Naked)) { 1746 return Chain; 1747 } 1748 1749 const AVRMachineFunctionInfo *AFI = MF.getInfo<AVRMachineFunctionInfo>(); 1750 1751 if (!AFI->isInterruptOrSignalHandler()) { 1752 // The return instruction has an implicit zero register operand: it must 1753 // contain zero on return. 1754 // This is not needed in interrupts however, where the zero register is 1755 // handled specially (only pushed/popped when needed). 1756 RetOps.push_back(DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8)); 1757 } 1758 1759 unsigned RetOpc = 1760 AFI->isInterruptOrSignalHandler() ? AVRISD::RETI_GLUE : AVRISD::RET_GLUE; 1761 1762 RetOps[0] = Chain; // Update chain. 1763 1764 if (Glue.getNode()) { 1765 RetOps.push_back(Glue); 1766 } 1767 1768 return DAG.getNode(RetOpc, dl, MVT::Other, RetOps); 1769 } 1770 1771 //===----------------------------------------------------------------------===// 1772 // Custom Inserters 1773 //===----------------------------------------------------------------------===// 1774 1775 MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI, 1776 MachineBasicBlock *BB, 1777 bool Tiny) const { 1778 unsigned Opc; 1779 const TargetRegisterClass *RC; 1780 bool HasRepeatedOperand = false; 1781 MachineFunction *F = BB->getParent(); 1782 MachineRegisterInfo &RI = F->getRegInfo(); 1783 const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); 1784 DebugLoc dl = MI.getDebugLoc(); 1785 1786 switch (MI.getOpcode()) { 1787 default: 1788 llvm_unreachable("Invalid shift opcode!"); 1789 case AVR::Lsl8: 1790 Opc = AVR::ADDRdRr; // LSL is an alias of ADD Rd, Rd 1791 RC = &AVR::GPR8RegClass; 1792 HasRepeatedOperand = true; 1793 break; 1794 case AVR::Lsl16: 1795 Opc = AVR::LSLWRd; 1796 RC = &AVR::DREGSRegClass; 1797 break; 1798 case AVR::Asr8: 1799 Opc = AVR::ASRRd; 1800 RC = &AVR::GPR8RegClass; 1801 break; 1802 case AVR::Asr16: 1803 Opc = AVR::ASRWRd; 1804 RC = &AVR::DREGSRegClass; 1805 break; 1806 case AVR::Lsr8: 1807 Opc = AVR::LSRRd; 1808 RC = &AVR::GPR8RegClass; 1809 break; 1810 case AVR::Lsr16: 1811 Opc = AVR::LSRWRd; 1812 RC = &AVR::DREGSRegClass; 1813 break; 1814 case AVR::Rol8: 1815 Opc = Tiny ? AVR::ROLBRdR17 : AVR::ROLBRdR1; 1816 RC = &AVR::GPR8RegClass; 1817 break; 1818 case AVR::Rol16: 1819 Opc = AVR::ROLWRd; 1820 RC = &AVR::DREGSRegClass; 1821 break; 1822 case AVR::Ror8: 1823 Opc = AVR::RORBRd; 1824 RC = &AVR::GPR8RegClass; 1825 break; 1826 case AVR::Ror16: 1827 Opc = AVR::RORWRd; 1828 RC = &AVR::DREGSRegClass; 1829 break; 1830 } 1831 1832 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 1833 1834 MachineFunction::iterator I; 1835 for (I = BB->getIterator(); I != F->end() && &(*I) != BB; ++I) 1836 ; 1837 if (I != F->end()) 1838 ++I; 1839 1840 // Create loop block. 1841 MachineBasicBlock *LoopBB = F->CreateMachineBasicBlock(LLVM_BB); 1842 MachineBasicBlock *CheckBB = F->CreateMachineBasicBlock(LLVM_BB); 1843 MachineBasicBlock *RemBB = F->CreateMachineBasicBlock(LLVM_BB); 1844 1845 F->insert(I, LoopBB); 1846 F->insert(I, CheckBB); 1847 F->insert(I, RemBB); 1848 1849 // Update machine-CFG edges by transferring all successors of the current 1850 // block to the block containing instructions after shift. 1851 RemBB->splice(RemBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), 1852 BB->end()); 1853 RemBB->transferSuccessorsAndUpdatePHIs(BB); 1854 1855 // Add edges BB => LoopBB => CheckBB => RemBB, CheckBB => LoopBB. 1856 BB->addSuccessor(CheckBB); 1857 LoopBB->addSuccessor(CheckBB); 1858 CheckBB->addSuccessor(LoopBB); 1859 CheckBB->addSuccessor(RemBB); 1860 1861 Register ShiftAmtReg = RI.createVirtualRegister(&AVR::GPR8RegClass); 1862 Register ShiftAmtReg2 = RI.createVirtualRegister(&AVR::GPR8RegClass); 1863 Register ShiftReg = RI.createVirtualRegister(RC); 1864 Register ShiftReg2 = RI.createVirtualRegister(RC); 1865 Register ShiftAmtSrcReg = MI.getOperand(2).getReg(); 1866 Register SrcReg = MI.getOperand(1).getReg(); 1867 Register DstReg = MI.getOperand(0).getReg(); 1868 1869 // BB: 1870 // rjmp CheckBB 1871 BuildMI(BB, dl, TII.get(AVR::RJMPk)).addMBB(CheckBB); 1872 1873 // LoopBB: 1874 // ShiftReg2 = shift ShiftReg 1875 auto ShiftMI = BuildMI(LoopBB, dl, TII.get(Opc), ShiftReg2).addReg(ShiftReg); 1876 if (HasRepeatedOperand) 1877 ShiftMI.addReg(ShiftReg); 1878 1879 // CheckBB: 1880 // ShiftReg = phi [%SrcReg, BB], [%ShiftReg2, LoopBB] 1881 // ShiftAmt = phi [%N, BB], [%ShiftAmt2, LoopBB] 1882 // DestReg = phi [%SrcReg, BB], [%ShiftReg, LoopBB] 1883 // ShiftAmt2 = ShiftAmt - 1; 1884 // if (ShiftAmt2 >= 0) goto LoopBB; 1885 BuildMI(CheckBB, dl, TII.get(AVR::PHI), ShiftReg) 1886 .addReg(SrcReg) 1887 .addMBB(BB) 1888 .addReg(ShiftReg2) 1889 .addMBB(LoopBB); 1890 BuildMI(CheckBB, dl, TII.get(AVR::PHI), ShiftAmtReg) 1891 .addReg(ShiftAmtSrcReg) 1892 .addMBB(BB) 1893 .addReg(ShiftAmtReg2) 1894 .addMBB(LoopBB); 1895 BuildMI(CheckBB, dl, TII.get(AVR::PHI), DstReg) 1896 .addReg(SrcReg) 1897 .addMBB(BB) 1898 .addReg(ShiftReg2) 1899 .addMBB(LoopBB); 1900 1901 BuildMI(CheckBB, dl, TII.get(AVR::DECRd), ShiftAmtReg2).addReg(ShiftAmtReg); 1902 BuildMI(CheckBB, dl, TII.get(AVR::BRPLk)).addMBB(LoopBB); 1903 1904 MI.eraseFromParent(); // The pseudo instruction is gone now. 1905 return RemBB; 1906 } 1907 1908 // Do a multibyte AVR shift. Insert shift instructions and put the output 1909 // registers in the Regs array. 1910 // Because AVR does not have a normal shift instruction (only a single bit shift 1911 // instruction), we have to emulate this behavior with other instructions. 1912 // It first tries large steps (moving registers around) and then smaller steps 1913 // like single bit shifts. 1914 // Large shifts actually reduce the number of shifted registers, so the below 1915 // algorithms have to work independently of the number of registers that are 1916 // shifted. 1917 // For more information and background, see this blogpost: 1918 // https://aykevl.nl/2021/02/avr-bitshift 1919 static void insertMultibyteShift(MachineInstr &MI, MachineBasicBlock *BB, 1920 MutableArrayRef<std::pair<Register, int>> Regs, 1921 ISD::NodeType Opc, int64_t ShiftAmt) { 1922 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); 1923 const AVRSubtarget &STI = BB->getParent()->getSubtarget<AVRSubtarget>(); 1924 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 1925 const DebugLoc &dl = MI.getDebugLoc(); 1926 1927 const bool ShiftLeft = Opc == ISD::SHL; 1928 const bool ArithmeticShift = Opc == ISD::SRA; 1929 1930 // Zero a register, for use in later operations. 1931 Register ZeroReg = MRI.createVirtualRegister(&AVR::GPR8RegClass); 1932 BuildMI(*BB, MI, dl, TII.get(AVR::COPY), ZeroReg) 1933 .addReg(STI.getZeroRegister()); 1934 1935 // Do a shift modulo 6 or 7. This is a bit more complicated than most shifts 1936 // and is hard to compose with the rest, so these are special cased. 1937 // The basic idea is to shift one or two bits in the opposite direction and 1938 // then move registers around to get the correct end result. 1939 if (ShiftLeft && (ShiftAmt % 8) >= 6) { 1940 // Left shift modulo 6 or 7. 1941 1942 // Create a slice of the registers we're going to modify, to ease working 1943 // with them. 1944 size_t ShiftRegsOffset = ShiftAmt / 8; 1945 size_t ShiftRegsSize = Regs.size() - ShiftRegsOffset; 1946 MutableArrayRef<std::pair<Register, int>> ShiftRegs = 1947 Regs.slice(ShiftRegsOffset, ShiftRegsSize); 1948 1949 // Shift one to the right, keeping the least significant bit as the carry 1950 // bit. 1951 insertMultibyteShift(MI, BB, ShiftRegs, ISD::SRL, 1); 1952 1953 // Rotate the least significant bit from the carry bit into a new register 1954 // (that starts out zero). 1955 Register LowByte = MRI.createVirtualRegister(&AVR::GPR8RegClass); 1956 BuildMI(*BB, MI, dl, TII.get(AVR::RORRd), LowByte).addReg(ZeroReg); 1957 1958 // Shift one more to the right if this is a modulo-6 shift. 1959 if (ShiftAmt % 8 == 6) { 1960 insertMultibyteShift(MI, BB, ShiftRegs, ISD::SRL, 1); 1961 Register NewLowByte = MRI.createVirtualRegister(&AVR::GPR8RegClass); 1962 BuildMI(*BB, MI, dl, TII.get(AVR::RORRd), NewLowByte).addReg(LowByte); 1963 LowByte = NewLowByte; 1964 } 1965 1966 // Move all registers to the left, zeroing the bottom registers as needed. 1967 for (size_t I = 0; I < Regs.size(); I++) { 1968 int ShiftRegsIdx = I + 1; 1969 if (ShiftRegsIdx < (int)ShiftRegs.size()) { 1970 Regs[I] = ShiftRegs[ShiftRegsIdx]; 1971 } else if (ShiftRegsIdx == (int)ShiftRegs.size()) { 1972 Regs[I] = std::pair(LowByte, 0); 1973 } else { 1974 Regs[I] = std::pair(ZeroReg, 0); 1975 } 1976 } 1977 1978 return; 1979 } 1980 1981 // Right shift modulo 6 or 7. 1982 if (!ShiftLeft && (ShiftAmt % 8) >= 6) { 1983 // Create a view on the registers we're going to modify, to ease working 1984 // with them. 1985 size_t ShiftRegsSize = Regs.size() - (ShiftAmt / 8); 1986 MutableArrayRef<std::pair<Register, int>> ShiftRegs = 1987 Regs.slice(0, ShiftRegsSize); 1988 1989 // Shift one to the left. 1990 insertMultibyteShift(MI, BB, ShiftRegs, ISD::SHL, 1); 1991 1992 // Sign or zero extend the most significant register into a new register. 1993 // The HighByte is the byte that still has one (or two) bits from the 1994 // original value. The ExtByte is purely a zero/sign extend byte (all bits 1995 // are either 0 or 1). 1996 Register HighByte = MRI.createVirtualRegister(&AVR::GPR8RegClass); 1997 Register ExtByte = 0; 1998 if (ArithmeticShift) { 1999 // Sign-extend bit that was shifted out last. 2000 BuildMI(*BB, MI, dl, TII.get(AVR::SBCRdRr), HighByte) 2001 .addReg(HighByte, RegState::Undef) 2002 .addReg(HighByte, RegState::Undef); 2003 ExtByte = HighByte; 2004 // The highest bit of the original value is the same as the zero-extend 2005 // byte, so HighByte and ExtByte are the same. 2006 } else { 2007 // Use the zero register for zero extending. 2008 ExtByte = ZeroReg; 2009 // Rotate most significant bit into a new register (that starts out zero). 2010 BuildMI(*BB, MI, dl, TII.get(AVR::ADCRdRr), HighByte) 2011 .addReg(ExtByte) 2012 .addReg(ExtByte); 2013 } 2014 2015 // Shift one more to the left for modulo 6 shifts. 2016 if (ShiftAmt % 8 == 6) { 2017 insertMultibyteShift(MI, BB, ShiftRegs, ISD::SHL, 1); 2018 // Shift the topmost bit into the HighByte. 2019 Register NewExt = MRI.createVirtualRegister(&AVR::GPR8RegClass); 2020 BuildMI(*BB, MI, dl, TII.get(AVR::ADCRdRr), NewExt) 2021 .addReg(HighByte) 2022 .addReg(HighByte); 2023 HighByte = NewExt; 2024 } 2025 2026 // Move all to the right, while sign or zero extending. 2027 for (int I = Regs.size() - 1; I >= 0; I--) { 2028 int ShiftRegsIdx = I - (Regs.size() - ShiftRegs.size()) - 1; 2029 if (ShiftRegsIdx >= 0) { 2030 Regs[I] = ShiftRegs[ShiftRegsIdx]; 2031 } else if (ShiftRegsIdx == -1) { 2032 Regs[I] = std::pair(HighByte, 0); 2033 } else { 2034 Regs[I] = std::pair(ExtByte, 0); 2035 } 2036 } 2037 2038 return; 2039 } 2040 2041 // For shift amounts of at least one register, simply rename the registers and 2042 // zero the bottom registers. 2043 while (ShiftLeft && ShiftAmt >= 8) { 2044 // Move all registers one to the left. 2045 for (size_t I = 0; I < Regs.size() - 1; I++) { 2046 Regs[I] = Regs[I + 1]; 2047 } 2048 2049 // Zero the least significant register. 2050 Regs[Regs.size() - 1] = std::pair(ZeroReg, 0); 2051 2052 // Continue shifts with the leftover registers. 2053 Regs = Regs.drop_back(1); 2054 2055 ShiftAmt -= 8; 2056 } 2057 2058 // And again, the same for right shifts. 2059 Register ShrExtendReg = 0; 2060 if (!ShiftLeft && ShiftAmt >= 8) { 2061 if (ArithmeticShift) { 2062 // Sign extend the most significant register into ShrExtendReg. 2063 ShrExtendReg = MRI.createVirtualRegister(&AVR::GPR8RegClass); 2064 Register Tmp = MRI.createVirtualRegister(&AVR::GPR8RegClass); 2065 BuildMI(*BB, MI, dl, TII.get(AVR::ADDRdRr), Tmp) 2066 .addReg(Regs[0].first, 0, Regs[0].second) 2067 .addReg(Regs[0].first, 0, Regs[0].second); 2068 BuildMI(*BB, MI, dl, TII.get(AVR::SBCRdRr), ShrExtendReg) 2069 .addReg(Tmp) 2070 .addReg(Tmp); 2071 } else { 2072 ShrExtendReg = ZeroReg; 2073 } 2074 for (; ShiftAmt >= 8; ShiftAmt -= 8) { 2075 // Move all registers one to the right. 2076 for (size_t I = Regs.size() - 1; I != 0; I--) { 2077 Regs[I] = Regs[I - 1]; 2078 } 2079 2080 // Zero or sign extend the most significant register. 2081 Regs[0] = std::pair(ShrExtendReg, 0); 2082 2083 // Continue shifts with the leftover registers. 2084 Regs = Regs.drop_front(1); 2085 } 2086 } 2087 2088 // The bigger shifts are already handled above. 2089 assert((ShiftAmt < 8) && "Unexpect shift amount"); 2090 2091 // Shift by four bits, using a complicated swap/eor/andi/eor sequence. 2092 // It only works for logical shifts because the bits shifted in are all 2093 // zeroes. 2094 // To shift a single byte right, it produces code like this: 2095 // swap r0 2096 // andi r0, 0x0f 2097 // For a two-byte (16-bit) shift, it adds the following instructions to shift 2098 // the upper byte into the lower byte: 2099 // swap r1 2100 // eor r0, r1 2101 // andi r1, 0x0f 2102 // eor r0, r1 2103 // For bigger shifts, it repeats the above sequence. For example, for a 3-byte 2104 // (24-bit) shift it adds: 2105 // swap r2 2106 // eor r1, r2 2107 // andi r2, 0x0f 2108 // eor r1, r2 2109 if (!ArithmeticShift && ShiftAmt >= 4) { 2110 Register Prev = 0; 2111 for (size_t I = 0; I < Regs.size(); I++) { 2112 size_t Idx = ShiftLeft ? I : Regs.size() - I - 1; 2113 Register SwapReg = MRI.createVirtualRegister(&AVR::LD8RegClass); 2114 BuildMI(*BB, MI, dl, TII.get(AVR::SWAPRd), SwapReg) 2115 .addReg(Regs[Idx].first, 0, Regs[Idx].second); 2116 if (I != 0) { 2117 Register R = MRI.createVirtualRegister(&AVR::GPR8RegClass); 2118 BuildMI(*BB, MI, dl, TII.get(AVR::EORRdRr), R) 2119 .addReg(Prev) 2120 .addReg(SwapReg); 2121 Prev = R; 2122 } 2123 Register AndReg = MRI.createVirtualRegister(&AVR::LD8RegClass); 2124 BuildMI(*BB, MI, dl, TII.get(AVR::ANDIRdK), AndReg) 2125 .addReg(SwapReg) 2126 .addImm(ShiftLeft ? 0xf0 : 0x0f); 2127 if (I != 0) { 2128 Register R = MRI.createVirtualRegister(&AVR::GPR8RegClass); 2129 BuildMI(*BB, MI, dl, TII.get(AVR::EORRdRr), R) 2130 .addReg(Prev) 2131 .addReg(AndReg); 2132 size_t PrevIdx = ShiftLeft ? Idx - 1 : Idx + 1; 2133 Regs[PrevIdx] = std::pair(R, 0); 2134 } 2135 Prev = AndReg; 2136 Regs[Idx] = std::pair(AndReg, 0); 2137 } 2138 ShiftAmt -= 4; 2139 } 2140 2141 // Shift by one. This is the fallback that always works, and the shift 2142 // operation that is used for 1, 2, and 3 bit shifts. 2143 while (ShiftLeft && ShiftAmt) { 2144 // Shift one to the left. 2145 for (ssize_t I = Regs.size() - 1; I >= 0; I--) { 2146 Register Out = MRI.createVirtualRegister(&AVR::GPR8RegClass); 2147 Register In = Regs[I].first; 2148 Register InSubreg = Regs[I].second; 2149 if (I == (ssize_t)Regs.size() - 1) { // first iteration 2150 BuildMI(*BB, MI, dl, TII.get(AVR::ADDRdRr), Out) 2151 .addReg(In, 0, InSubreg) 2152 .addReg(In, 0, InSubreg); 2153 } else { 2154 BuildMI(*BB, MI, dl, TII.get(AVR::ADCRdRr), Out) 2155 .addReg(In, 0, InSubreg) 2156 .addReg(In, 0, InSubreg); 2157 } 2158 Regs[I] = std::pair(Out, 0); 2159 } 2160 ShiftAmt--; 2161 } 2162 while (!ShiftLeft && ShiftAmt) { 2163 // Shift one to the right. 2164 for (size_t I = 0; I < Regs.size(); I++) { 2165 Register Out = MRI.createVirtualRegister(&AVR::GPR8RegClass); 2166 Register In = Regs[I].first; 2167 Register InSubreg = Regs[I].second; 2168 if (I == 0) { 2169 unsigned Opc = ArithmeticShift ? AVR::ASRRd : AVR::LSRRd; 2170 BuildMI(*BB, MI, dl, TII.get(Opc), Out).addReg(In, 0, InSubreg); 2171 } else { 2172 BuildMI(*BB, MI, dl, TII.get(AVR::RORRd), Out).addReg(In, 0, InSubreg); 2173 } 2174 Regs[I] = std::pair(Out, 0); 2175 } 2176 ShiftAmt--; 2177 } 2178 2179 if (ShiftAmt != 0) { 2180 llvm_unreachable("don't know how to shift!"); // sanity check 2181 } 2182 } 2183 2184 // Do a wide (32-bit) shift. 2185 MachineBasicBlock * 2186 AVRTargetLowering::insertWideShift(MachineInstr &MI, 2187 MachineBasicBlock *BB) const { 2188 const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); 2189 const DebugLoc &dl = MI.getDebugLoc(); 2190 2191 // How much to shift to the right (meaning: a negative number indicates a left 2192 // shift). 2193 int64_t ShiftAmt = MI.getOperand(4).getImm(); 2194 ISD::NodeType Opc; 2195 switch (MI.getOpcode()) { 2196 case AVR::Lsl32: 2197 Opc = ISD::SHL; 2198 break; 2199 case AVR::Lsr32: 2200 Opc = ISD::SRL; 2201 break; 2202 case AVR::Asr32: 2203 Opc = ISD::SRA; 2204 break; 2205 } 2206 2207 // Read the input registers, with the most significant register at index 0. 2208 std::array<std::pair<Register, int>, 4> Registers = { 2209 std::pair(MI.getOperand(3).getReg(), AVR::sub_hi), 2210 std::pair(MI.getOperand(3).getReg(), AVR::sub_lo), 2211 std::pair(MI.getOperand(2).getReg(), AVR::sub_hi), 2212 std::pair(MI.getOperand(2).getReg(), AVR::sub_lo), 2213 }; 2214 2215 // Do the shift. The registers are modified in-place. 2216 insertMultibyteShift(MI, BB, Registers, Opc, ShiftAmt); 2217 2218 // Combine the 8-bit registers into 16-bit register pairs. 2219 // This done either from LSB to MSB or from MSB to LSB, depending on the 2220 // shift. It's an optimization so that the register allocator will use the 2221 // fewest movs possible (which order we use isn't a correctness issue, just an 2222 // optimization issue). 2223 // - lsl prefers starting from the most significant byte (2nd case). 2224 // - lshr prefers starting from the least significant byte (1st case). 2225 // - for ashr it depends on the number of shifted bytes. 2226 // Some shift operations still don't get the most optimal mov sequences even 2227 // with this distinction. TODO: figure out why and try to fix it (but we're 2228 // already equal to or faster than avr-gcc in all cases except ashr 8). 2229 if (Opc != ISD::SHL && 2230 (Opc != ISD::SRA || (ShiftAmt < 16 || ShiftAmt >= 22))) { 2231 // Use the resulting registers starting with the least significant byte. 2232 BuildMI(*BB, MI, dl, TII.get(AVR::REG_SEQUENCE), MI.getOperand(0).getReg()) 2233 .addReg(Registers[3].first, 0, Registers[3].second) 2234 .addImm(AVR::sub_lo) 2235 .addReg(Registers[2].first, 0, Registers[2].second) 2236 .addImm(AVR::sub_hi); 2237 BuildMI(*BB, MI, dl, TII.get(AVR::REG_SEQUENCE), MI.getOperand(1).getReg()) 2238 .addReg(Registers[1].first, 0, Registers[1].second) 2239 .addImm(AVR::sub_lo) 2240 .addReg(Registers[0].first, 0, Registers[0].second) 2241 .addImm(AVR::sub_hi); 2242 } else { 2243 // Use the resulting registers starting with the most significant byte. 2244 BuildMI(*BB, MI, dl, TII.get(AVR::REG_SEQUENCE), MI.getOperand(1).getReg()) 2245 .addReg(Registers[0].first, 0, Registers[0].second) 2246 .addImm(AVR::sub_hi) 2247 .addReg(Registers[1].first, 0, Registers[1].second) 2248 .addImm(AVR::sub_lo); 2249 BuildMI(*BB, MI, dl, TII.get(AVR::REG_SEQUENCE), MI.getOperand(0).getReg()) 2250 .addReg(Registers[2].first, 0, Registers[2].second) 2251 .addImm(AVR::sub_hi) 2252 .addReg(Registers[3].first, 0, Registers[3].second) 2253 .addImm(AVR::sub_lo); 2254 } 2255 2256 // Remove the pseudo instruction. 2257 MI.eraseFromParent(); 2258 return BB; 2259 } 2260 2261 static bool isCopyMulResult(MachineBasicBlock::iterator const &I) { 2262 if (I->getOpcode() == AVR::COPY) { 2263 Register SrcReg = I->getOperand(1).getReg(); 2264 return (SrcReg == AVR::R0 || SrcReg == AVR::R1); 2265 } 2266 2267 return false; 2268 } 2269 2270 // The mul instructions wreak havock on our zero_reg R1. We need to clear it 2271 // after the result has been evacuated. This is probably not the best way to do 2272 // it, but it works for now. 2273 MachineBasicBlock *AVRTargetLowering::insertMul(MachineInstr &MI, 2274 MachineBasicBlock *BB) const { 2275 const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); 2276 MachineBasicBlock::iterator I(MI); 2277 ++I; // in any case insert *after* the mul instruction 2278 if (isCopyMulResult(I)) 2279 ++I; 2280 if (isCopyMulResult(I)) 2281 ++I; 2282 BuildMI(*BB, I, MI.getDebugLoc(), TII.get(AVR::EORRdRr), AVR::R1) 2283 .addReg(AVR::R1) 2284 .addReg(AVR::R1); 2285 return BB; 2286 } 2287 2288 // Insert a read from the zero register. 2289 MachineBasicBlock * 2290 AVRTargetLowering::insertCopyZero(MachineInstr &MI, 2291 MachineBasicBlock *BB) const { 2292 const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); 2293 MachineBasicBlock::iterator I(MI); 2294 BuildMI(*BB, I, MI.getDebugLoc(), TII.get(AVR::COPY)) 2295 .add(MI.getOperand(0)) 2296 .addReg(Subtarget.getZeroRegister()); 2297 MI.eraseFromParent(); 2298 return BB; 2299 } 2300 2301 // Lower atomicrmw operation to disable interrupts, do operation, and restore 2302 // interrupts. This works because all AVR microcontrollers are single core. 2303 MachineBasicBlock *AVRTargetLowering::insertAtomicArithmeticOp( 2304 MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode, int Width) const { 2305 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 2306 const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); 2307 MachineBasicBlock::iterator I(MI); 2308 DebugLoc dl = MI.getDebugLoc(); 2309 2310 // Example instruction sequence, for an atomic 8-bit add: 2311 // ldi r25, 5 2312 // in r0, SREG 2313 // cli 2314 // ld r24, X 2315 // add r25, r24 2316 // st X, r25 2317 // out SREG, r0 2318 2319 const TargetRegisterClass *RC = 2320 (Width == 8) ? &AVR::GPR8RegClass : &AVR::DREGSRegClass; 2321 unsigned LoadOpcode = (Width == 8) ? AVR::LDRdPtr : AVR::LDWRdPtr; 2322 unsigned StoreOpcode = (Width == 8) ? AVR::STPtrRr : AVR::STWPtrRr; 2323 2324 // Disable interrupts. 2325 BuildMI(*BB, I, dl, TII.get(AVR::INRdA), Subtarget.getTmpRegister()) 2326 .addImm(Subtarget.getIORegSREG()); 2327 BuildMI(*BB, I, dl, TII.get(AVR::BCLRs)).addImm(7); 2328 2329 // Load the original value. 2330 BuildMI(*BB, I, dl, TII.get(LoadOpcode), MI.getOperand(0).getReg()) 2331 .add(MI.getOperand(1)); 2332 2333 // Do the arithmetic operation. 2334 Register Result = MRI.createVirtualRegister(RC); 2335 BuildMI(*BB, I, dl, TII.get(Opcode), Result) 2336 .addReg(MI.getOperand(0).getReg()) 2337 .add(MI.getOperand(2)); 2338 2339 // Store the result. 2340 BuildMI(*BB, I, dl, TII.get(StoreOpcode)) 2341 .add(MI.getOperand(1)) 2342 .addReg(Result); 2343 2344 // Restore interrupts. 2345 BuildMI(*BB, I, dl, TII.get(AVR::OUTARr)) 2346 .addImm(Subtarget.getIORegSREG()) 2347 .addReg(Subtarget.getTmpRegister()); 2348 2349 // Remove the pseudo instruction. 2350 MI.eraseFromParent(); 2351 return BB; 2352 } 2353 2354 MachineBasicBlock * 2355 AVRTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 2356 MachineBasicBlock *MBB) const { 2357 int Opc = MI.getOpcode(); 2358 const AVRSubtarget &STI = MBB->getParent()->getSubtarget<AVRSubtarget>(); 2359 2360 // Pseudo shift instructions with a non constant shift amount are expanded 2361 // into a loop. 2362 switch (Opc) { 2363 case AVR::Lsl8: 2364 case AVR::Lsl16: 2365 case AVR::Lsr8: 2366 case AVR::Lsr16: 2367 case AVR::Rol8: 2368 case AVR::Rol16: 2369 case AVR::Ror8: 2370 case AVR::Ror16: 2371 case AVR::Asr8: 2372 case AVR::Asr16: 2373 return insertShift(MI, MBB, STI.hasTinyEncoding()); 2374 case AVR::Lsl32: 2375 case AVR::Lsr32: 2376 case AVR::Asr32: 2377 return insertWideShift(MI, MBB); 2378 case AVR::MULRdRr: 2379 case AVR::MULSRdRr: 2380 return insertMul(MI, MBB); 2381 case AVR::CopyZero: 2382 return insertCopyZero(MI, MBB); 2383 case AVR::AtomicLoadAdd8: 2384 return insertAtomicArithmeticOp(MI, MBB, AVR::ADDRdRr, 8); 2385 case AVR::AtomicLoadAdd16: 2386 return insertAtomicArithmeticOp(MI, MBB, AVR::ADDWRdRr, 16); 2387 case AVR::AtomicLoadSub8: 2388 return insertAtomicArithmeticOp(MI, MBB, AVR::SUBRdRr, 8); 2389 case AVR::AtomicLoadSub16: 2390 return insertAtomicArithmeticOp(MI, MBB, AVR::SUBWRdRr, 16); 2391 case AVR::AtomicLoadAnd8: 2392 return insertAtomicArithmeticOp(MI, MBB, AVR::ANDRdRr, 8); 2393 case AVR::AtomicLoadAnd16: 2394 return insertAtomicArithmeticOp(MI, MBB, AVR::ANDWRdRr, 16); 2395 case AVR::AtomicLoadOr8: 2396 return insertAtomicArithmeticOp(MI, MBB, AVR::ORRdRr, 8); 2397 case AVR::AtomicLoadOr16: 2398 return insertAtomicArithmeticOp(MI, MBB, AVR::ORWRdRr, 16); 2399 case AVR::AtomicLoadXor8: 2400 return insertAtomicArithmeticOp(MI, MBB, AVR::EORRdRr, 8); 2401 case AVR::AtomicLoadXor16: 2402 return insertAtomicArithmeticOp(MI, MBB, AVR::EORWRdRr, 16); 2403 } 2404 2405 assert((Opc == AVR::Select16 || Opc == AVR::Select8) && 2406 "Unexpected instr type to insert"); 2407 2408 const AVRInstrInfo &TII = (const AVRInstrInfo &)*MI.getParent() 2409 ->getParent() 2410 ->getSubtarget() 2411 .getInstrInfo(); 2412 DebugLoc dl = MI.getDebugLoc(); 2413 2414 // To "insert" a SELECT instruction, we insert the diamond 2415 // control-flow pattern. The incoming instruction knows the 2416 // destination vreg to set, the condition code register to branch 2417 // on, the true/false values to select between, and a branch opcode 2418 // to use. 2419 2420 MachineFunction *MF = MBB->getParent(); 2421 const BasicBlock *LLVM_BB = MBB->getBasicBlock(); 2422 MachineBasicBlock *FallThrough = MBB->getFallThrough(); 2423 2424 // If the current basic block falls through to another basic block, 2425 // we must insert an unconditional branch to the fallthrough destination 2426 // if we are to insert basic blocks at the prior fallthrough point. 2427 if (FallThrough != nullptr) { 2428 BuildMI(MBB, dl, TII.get(AVR::RJMPk)).addMBB(FallThrough); 2429 } 2430 2431 MachineBasicBlock *trueMBB = MF->CreateMachineBasicBlock(LLVM_BB); 2432 MachineBasicBlock *falseMBB = MF->CreateMachineBasicBlock(LLVM_BB); 2433 2434 MachineFunction::iterator I; 2435 for (I = MF->begin(); I != MF->end() && &(*I) != MBB; ++I) 2436 ; 2437 if (I != MF->end()) 2438 ++I; 2439 MF->insert(I, trueMBB); 2440 MF->insert(I, falseMBB); 2441 2442 // Set the call frame size on entry to the new basic blocks. 2443 unsigned CallFrameSize = TII.getCallFrameSizeAt(MI); 2444 trueMBB->setCallFrameSize(CallFrameSize); 2445 falseMBB->setCallFrameSize(CallFrameSize); 2446 2447 // Transfer remaining instructions and all successors of the current 2448 // block to the block which will contain the Phi node for the 2449 // select. 2450 trueMBB->splice(trueMBB->begin(), MBB, 2451 std::next(MachineBasicBlock::iterator(MI)), MBB->end()); 2452 trueMBB->transferSuccessorsAndUpdatePHIs(MBB); 2453 2454 AVRCC::CondCodes CC = (AVRCC::CondCodes)MI.getOperand(3).getImm(); 2455 BuildMI(MBB, dl, TII.getBrCond(CC)).addMBB(trueMBB); 2456 BuildMI(MBB, dl, TII.get(AVR::RJMPk)).addMBB(falseMBB); 2457 MBB->addSuccessor(falseMBB); 2458 MBB->addSuccessor(trueMBB); 2459 2460 // Unconditionally flow back to the true block 2461 BuildMI(falseMBB, dl, TII.get(AVR::RJMPk)).addMBB(trueMBB); 2462 falseMBB->addSuccessor(trueMBB); 2463 2464 // Set up the Phi node to determine where we came from 2465 BuildMI(*trueMBB, trueMBB->begin(), dl, TII.get(AVR::PHI), 2466 MI.getOperand(0).getReg()) 2467 .addReg(MI.getOperand(1).getReg()) 2468 .addMBB(MBB) 2469 .addReg(MI.getOperand(2).getReg()) 2470 .addMBB(falseMBB); 2471 2472 MI.eraseFromParent(); // The pseudo instruction is gone now. 2473 return trueMBB; 2474 } 2475 2476 //===----------------------------------------------------------------------===// 2477 // Inline Asm Support 2478 //===----------------------------------------------------------------------===// 2479 2480 AVRTargetLowering::ConstraintType 2481 AVRTargetLowering::getConstraintType(StringRef Constraint) const { 2482 if (Constraint.size() == 1) { 2483 // See http://www.nongnu.org/avr-libc/user-manual/inline_asm.html 2484 switch (Constraint[0]) { 2485 default: 2486 break; 2487 case 'a': // Simple upper registers 2488 case 'b': // Base pointer registers pairs 2489 case 'd': // Upper register 2490 case 'l': // Lower registers 2491 case 'e': // Pointer register pairs 2492 case 'q': // Stack pointer register 2493 case 'r': // Any register 2494 case 'w': // Special upper register pairs 2495 return C_RegisterClass; 2496 case 't': // Temporary register 2497 case 'x': 2498 case 'X': // Pointer register pair X 2499 case 'y': 2500 case 'Y': // Pointer register pair Y 2501 case 'z': 2502 case 'Z': // Pointer register pair Z 2503 return C_Register; 2504 case 'Q': // A memory address based on Y or Z pointer with displacement. 2505 return C_Memory; 2506 case 'G': // Floating point constant 2507 case 'I': // 6-bit positive integer constant 2508 case 'J': // 6-bit negative integer constant 2509 case 'K': // Integer constant (Range: 2) 2510 case 'L': // Integer constant (Range: 0) 2511 case 'M': // 8-bit integer constant 2512 case 'N': // Integer constant (Range: -1) 2513 case 'O': // Integer constant (Range: 8, 16, 24) 2514 case 'P': // Integer constant (Range: 1) 2515 case 'R': // Integer constant (Range: -6 to 5)x 2516 return C_Immediate; 2517 } 2518 } 2519 2520 return TargetLowering::getConstraintType(Constraint); 2521 } 2522 2523 InlineAsm::ConstraintCode 2524 AVRTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const { 2525 // Not sure if this is actually the right thing to do, but we got to do 2526 // *something* [agnat] 2527 switch (ConstraintCode[0]) { 2528 case 'Q': 2529 return InlineAsm::ConstraintCode::Q; 2530 } 2531 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 2532 } 2533 2534 AVRTargetLowering::ConstraintWeight 2535 AVRTargetLowering::getSingleConstraintMatchWeight( 2536 AsmOperandInfo &info, const char *constraint) const { 2537 ConstraintWeight weight = CW_Invalid; 2538 Value *CallOperandVal = info.CallOperandVal; 2539 2540 // If we don't have a value, we can't do a match, 2541 // but allow it at the lowest weight. 2542 // (this behaviour has been copied from the ARM backend) 2543 if (!CallOperandVal) { 2544 return CW_Default; 2545 } 2546 2547 // Look at the constraint type. 2548 switch (*constraint) { 2549 default: 2550 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); 2551 break; 2552 case 'd': 2553 case 'r': 2554 case 'l': 2555 weight = CW_Register; 2556 break; 2557 case 'a': 2558 case 'b': 2559 case 'e': 2560 case 'q': 2561 case 't': 2562 case 'w': 2563 case 'x': 2564 case 'X': 2565 case 'y': 2566 case 'Y': 2567 case 'z': 2568 case 'Z': 2569 weight = CW_SpecificReg; 2570 break; 2571 case 'G': 2572 if (const ConstantFP *C = dyn_cast<ConstantFP>(CallOperandVal)) { 2573 if (C->isZero()) { 2574 weight = CW_Constant; 2575 } 2576 } 2577 break; 2578 case 'I': 2579 if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { 2580 if (isUInt<6>(C->getZExtValue())) { 2581 weight = CW_Constant; 2582 } 2583 } 2584 break; 2585 case 'J': 2586 if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { 2587 if ((C->getSExtValue() >= -63) && (C->getSExtValue() <= 0)) { 2588 weight = CW_Constant; 2589 } 2590 } 2591 break; 2592 case 'K': 2593 if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { 2594 if (C->getZExtValue() == 2) { 2595 weight = CW_Constant; 2596 } 2597 } 2598 break; 2599 case 'L': 2600 if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { 2601 if (C->getZExtValue() == 0) { 2602 weight = CW_Constant; 2603 } 2604 } 2605 break; 2606 case 'M': 2607 if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { 2608 if (isUInt<8>(C->getZExtValue())) { 2609 weight = CW_Constant; 2610 } 2611 } 2612 break; 2613 case 'N': 2614 if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { 2615 if (C->getSExtValue() == -1) { 2616 weight = CW_Constant; 2617 } 2618 } 2619 break; 2620 case 'O': 2621 if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { 2622 if ((C->getZExtValue() == 8) || (C->getZExtValue() == 16) || 2623 (C->getZExtValue() == 24)) { 2624 weight = CW_Constant; 2625 } 2626 } 2627 break; 2628 case 'P': 2629 if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { 2630 if (C->getZExtValue() == 1) { 2631 weight = CW_Constant; 2632 } 2633 } 2634 break; 2635 case 'R': 2636 if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { 2637 if ((C->getSExtValue() >= -6) && (C->getSExtValue() <= 5)) { 2638 weight = CW_Constant; 2639 } 2640 } 2641 break; 2642 case 'Q': 2643 weight = CW_Memory; 2644 break; 2645 } 2646 2647 return weight; 2648 } 2649 2650 std::pair<unsigned, const TargetRegisterClass *> 2651 AVRTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 2652 StringRef Constraint, 2653 MVT VT) const { 2654 if (Constraint.size() == 1) { 2655 switch (Constraint[0]) { 2656 case 'a': // Simple upper registers r16..r23. 2657 if (VT == MVT::i8) 2658 return std::make_pair(0U, &AVR::LD8loRegClass); 2659 else if (VT == MVT::i16) 2660 return std::make_pair(0U, &AVR::DREGSLD8loRegClass); 2661 break; 2662 case 'b': // Base pointer registers: y, z. 2663 if (VT == MVT::i8 || VT == MVT::i16) 2664 return std::make_pair(0U, &AVR::PTRDISPREGSRegClass); 2665 break; 2666 case 'd': // Upper registers r16..r31. 2667 if (VT == MVT::i8) 2668 return std::make_pair(0U, &AVR::LD8RegClass); 2669 else if (VT == MVT::i16) 2670 return std::make_pair(0U, &AVR::DLDREGSRegClass); 2671 break; 2672 case 'l': // Lower registers r0..r15. 2673 if (VT == MVT::i8) 2674 return std::make_pair(0U, &AVR::GPR8loRegClass); 2675 else if (VT == MVT::i16) 2676 return std::make_pair(0U, &AVR::DREGSloRegClass); 2677 break; 2678 case 'e': // Pointer register pairs: x, y, z. 2679 if (VT == MVT::i8 || VT == MVT::i16) 2680 return std::make_pair(0U, &AVR::PTRREGSRegClass); 2681 break; 2682 case 'q': // Stack pointer register: SPH:SPL. 2683 return std::make_pair(0U, &AVR::GPRSPRegClass); 2684 case 'r': // Any register: r0..r31. 2685 if (VT == MVT::i8) 2686 return std::make_pair(0U, &AVR::GPR8RegClass); 2687 else if (VT == MVT::i16) 2688 return std::make_pair(0U, &AVR::DREGSRegClass); 2689 break; 2690 case 't': // Temporary register: r0. 2691 if (VT == MVT::i8) 2692 return std::make_pair(unsigned(Subtarget.getTmpRegister()), 2693 &AVR::GPR8RegClass); 2694 break; 2695 case 'w': // Special upper register pairs: r24, r26, r28, r30. 2696 if (VT == MVT::i8 || VT == MVT::i16) 2697 return std::make_pair(0U, &AVR::IWREGSRegClass); 2698 break; 2699 case 'x': // Pointer register pair X: r27:r26. 2700 case 'X': 2701 if (VT == MVT::i8 || VT == MVT::i16) 2702 return std::make_pair(unsigned(AVR::R27R26), &AVR::PTRREGSRegClass); 2703 break; 2704 case 'y': // Pointer register pair Y: r29:r28. 2705 case 'Y': 2706 if (VT == MVT::i8 || VT == MVT::i16) 2707 return std::make_pair(unsigned(AVR::R29R28), &AVR::PTRREGSRegClass); 2708 break; 2709 case 'z': // Pointer register pair Z: r31:r30. 2710 case 'Z': 2711 if (VT == MVT::i8 || VT == MVT::i16) 2712 return std::make_pair(unsigned(AVR::R31R30), &AVR::PTRREGSRegClass); 2713 break; 2714 default: 2715 break; 2716 } 2717 } 2718 2719 return TargetLowering::getRegForInlineAsmConstraint( 2720 Subtarget.getRegisterInfo(), Constraint, VT); 2721 } 2722 2723 void AVRTargetLowering::LowerAsmOperandForConstraint(SDValue Op, 2724 StringRef Constraint, 2725 std::vector<SDValue> &Ops, 2726 SelectionDAG &DAG) const { 2727 SDValue Result; 2728 SDLoc DL(Op); 2729 EVT Ty = Op.getValueType(); 2730 2731 // Currently only support length 1 constraints. 2732 if (Constraint.size() != 1) { 2733 return; 2734 } 2735 2736 char ConstraintLetter = Constraint[0]; 2737 switch (ConstraintLetter) { 2738 default: 2739 break; 2740 // Deal with integers first: 2741 case 'I': 2742 case 'J': 2743 case 'K': 2744 case 'L': 2745 case 'M': 2746 case 'N': 2747 case 'O': 2748 case 'P': 2749 case 'R': { 2750 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 2751 if (!C) { 2752 return; 2753 } 2754 2755 int64_t CVal64 = C->getSExtValue(); 2756 uint64_t CUVal64 = C->getZExtValue(); 2757 switch (ConstraintLetter) { 2758 case 'I': // 0..63 2759 if (!isUInt<6>(CUVal64)) 2760 return; 2761 Result = DAG.getTargetConstant(CUVal64, DL, Ty); 2762 break; 2763 case 'J': // -63..0 2764 if (CVal64 < -63 || CVal64 > 0) 2765 return; 2766 Result = DAG.getTargetConstant(CVal64, DL, Ty); 2767 break; 2768 case 'K': // 2 2769 if (CUVal64 != 2) 2770 return; 2771 Result = DAG.getTargetConstant(CUVal64, DL, Ty); 2772 break; 2773 case 'L': // 0 2774 if (CUVal64 != 0) 2775 return; 2776 Result = DAG.getTargetConstant(CUVal64, DL, Ty); 2777 break; 2778 case 'M': // 0..255 2779 if (!isUInt<8>(CUVal64)) 2780 return; 2781 // i8 type may be printed as a negative number, 2782 // e.g. 254 would be printed as -2, 2783 // so we force it to i16 at least. 2784 if (Ty.getSimpleVT() == MVT::i8) { 2785 Ty = MVT::i16; 2786 } 2787 Result = DAG.getTargetConstant(CUVal64, DL, Ty); 2788 break; 2789 case 'N': // -1 2790 if (CVal64 != -1) 2791 return; 2792 Result = DAG.getTargetConstant(CVal64, DL, Ty); 2793 break; 2794 case 'O': // 8, 16, 24 2795 if (CUVal64 != 8 && CUVal64 != 16 && CUVal64 != 24) 2796 return; 2797 Result = DAG.getTargetConstant(CUVal64, DL, Ty); 2798 break; 2799 case 'P': // 1 2800 if (CUVal64 != 1) 2801 return; 2802 Result = DAG.getTargetConstant(CUVal64, DL, Ty); 2803 break; 2804 case 'R': // -6..5 2805 if (CVal64 < -6 || CVal64 > 5) 2806 return; 2807 Result = DAG.getTargetConstant(CVal64, DL, Ty); 2808 break; 2809 } 2810 2811 break; 2812 } 2813 case 'G': 2814 const ConstantFPSDNode *FC = dyn_cast<ConstantFPSDNode>(Op); 2815 if (!FC || !FC->isZero()) 2816 return; 2817 // Soften float to i8 0 2818 Result = DAG.getTargetConstant(0, DL, MVT::i8); 2819 break; 2820 } 2821 2822 if (Result.getNode()) { 2823 Ops.push_back(Result); 2824 return; 2825 } 2826 2827 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 2828 } 2829 2830 Register AVRTargetLowering::getRegisterByName(const char *RegName, LLT VT, 2831 const MachineFunction &MF) const { 2832 Register Reg; 2833 2834 if (VT == LLT::scalar(8)) { 2835 Reg = StringSwitch<unsigned>(RegName) 2836 .Case("r0", AVR::R0) 2837 .Case("r1", AVR::R1) 2838 .Default(0); 2839 } else { 2840 Reg = StringSwitch<unsigned>(RegName) 2841 .Case("r0", AVR::R1R0) 2842 .Case("sp", AVR::SP) 2843 .Default(0); 2844 } 2845 2846 if (Reg) 2847 return Reg; 2848 2849 report_fatal_error( 2850 Twine("Invalid register name \"" + StringRef(RegName) + "\".")); 2851 } 2852 2853 } // end of namespace llvm 2854