1 //===-- AVRISelLowering.cpp - AVR DAG Lowering Implementation -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the interfaces that AVR uses to lower LLVM code into a 10 // selection DAG. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AVRISelLowering.h" 15 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/ADT/StringSwitch.h" 19 #include "llvm/CodeGen/CallingConvLower.h" 20 #include "llvm/CodeGen/MachineFrameInfo.h" 21 #include "llvm/CodeGen/MachineInstrBuilder.h" 22 #include "llvm/CodeGen/MachineRegisterInfo.h" 23 #include "llvm/CodeGen/SelectionDAG.h" 24 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 25 #include "llvm/IR/Function.h" 26 #include "llvm/Support/ErrorHandling.h" 27 28 #include "AVR.h" 29 #include "AVRMachineFunctionInfo.h" 30 #include "AVRSubtarget.h" 31 #include "AVRTargetMachine.h" 32 #include "MCTargetDesc/AVRMCTargetDesc.h" 33 34 namespace llvm { 35 36 AVRTargetLowering::AVRTargetLowering(const AVRTargetMachine &TM, 37 const AVRSubtarget &STI) 38 : TargetLowering(TM), Subtarget(STI) { 39 // Set up the register classes. 40 addRegisterClass(MVT::i8, &AVR::GPR8RegClass); 41 addRegisterClass(MVT::i16, &AVR::DREGSRegClass); 42 43 // Compute derived properties from the register classes. 44 computeRegisterProperties(Subtarget.getRegisterInfo()); 45 46 setBooleanContents(ZeroOrOneBooleanContent); 47 setBooleanVectorContents(ZeroOrOneBooleanContent); 48 setSchedulingPreference(Sched::RegPressure); 49 setStackPointerRegisterToSaveRestore(AVR::SP); 50 setSupportsUnalignedAtomics(true); 51 52 setOperationAction(ISD::GlobalAddress, MVT::i16, Custom); 53 setOperationAction(ISD::BlockAddress, MVT::i16, Custom); 54 55 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 56 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 57 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i8, Expand); 58 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i16, Expand); 59 60 setOperationAction(ISD::INLINEASM, MVT::Other, Custom); 61 62 for (MVT VT : MVT::integer_valuetypes()) { 63 for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) { 64 setLoadExtAction(N, VT, MVT::i1, Promote); 65 setLoadExtAction(N, VT, MVT::i8, Expand); 66 } 67 } 68 69 setTruncStoreAction(MVT::i16, MVT::i8, Expand); 70 71 for (MVT VT : MVT::integer_valuetypes()) { 72 setOperationAction(ISD::ADDC, VT, Legal); 73 setOperationAction(ISD::SUBC, VT, Legal); 74 setOperationAction(ISD::ADDE, VT, Legal); 75 setOperationAction(ISD::SUBE, VT, Legal); 76 } 77 78 // sub (x, imm) gets canonicalized to add (x, -imm), so for illegal types 79 // revert into a sub since we don't have an add with immediate instruction. 80 setOperationAction(ISD::ADD, MVT::i32, Custom); 81 setOperationAction(ISD::ADD, MVT::i64, Custom); 82 83 // our shift instructions are only able to shift 1 bit at a time, so handle 84 // this in a custom way. 85 setOperationAction(ISD::SRA, MVT::i8, Custom); 86 setOperationAction(ISD::SHL, MVT::i8, Custom); 87 setOperationAction(ISD::SRL, MVT::i8, Custom); 88 setOperationAction(ISD::SRA, MVT::i16, Custom); 89 setOperationAction(ISD::SHL, MVT::i16, Custom); 90 setOperationAction(ISD::SRL, MVT::i16, Custom); 91 setOperationAction(ISD::SRA, MVT::i32, Custom); 92 setOperationAction(ISD::SHL, MVT::i32, Custom); 93 setOperationAction(ISD::SRL, MVT::i32, Custom); 94 setOperationAction(ISD::SHL_PARTS, MVT::i16, Expand); 95 setOperationAction(ISD::SRA_PARTS, MVT::i16, Expand); 96 setOperationAction(ISD::SRL_PARTS, MVT::i16, Expand); 97 98 setOperationAction(ISD::ROTL, MVT::i8, Custom); 99 setOperationAction(ISD::ROTL, MVT::i16, Expand); 100 setOperationAction(ISD::ROTR, MVT::i8, Custom); 101 setOperationAction(ISD::ROTR, MVT::i16, Expand); 102 103 setOperationAction(ISD::BR_CC, MVT::i8, Custom); 104 setOperationAction(ISD::BR_CC, MVT::i16, Custom); 105 setOperationAction(ISD::BR_CC, MVT::i32, Custom); 106 setOperationAction(ISD::BR_CC, MVT::i64, Custom); 107 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 108 109 setOperationAction(ISD::SELECT_CC, MVT::i8, Custom); 110 setOperationAction(ISD::SELECT_CC, MVT::i16, Custom); 111 setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); 112 setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); 113 setOperationAction(ISD::SETCC, MVT::i8, Custom); 114 setOperationAction(ISD::SETCC, MVT::i16, Custom); 115 setOperationAction(ISD::SETCC, MVT::i32, Custom); 116 setOperationAction(ISD::SETCC, MVT::i64, Custom); 117 setOperationAction(ISD::SELECT, MVT::i8, Expand); 118 setOperationAction(ISD::SELECT, MVT::i16, Expand); 119 120 setOperationAction(ISD::BSWAP, MVT::i16, Expand); 121 122 // Add support for postincrement and predecrement load/stores. 123 setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal); 124 setIndexedLoadAction(ISD::POST_INC, MVT::i16, Legal); 125 setIndexedLoadAction(ISD::PRE_DEC, MVT::i8, Legal); 126 setIndexedLoadAction(ISD::PRE_DEC, MVT::i16, Legal); 127 setIndexedStoreAction(ISD::POST_INC, MVT::i8, Legal); 128 setIndexedStoreAction(ISD::POST_INC, MVT::i16, Legal); 129 setIndexedStoreAction(ISD::PRE_DEC, MVT::i8, Legal); 130 setIndexedStoreAction(ISD::PRE_DEC, MVT::i16, Legal); 131 132 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 133 134 setOperationAction(ISD::VASTART, MVT::Other, Custom); 135 setOperationAction(ISD::VAEND, MVT::Other, Expand); 136 setOperationAction(ISD::VAARG, MVT::Other, Expand); 137 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 138 139 // Atomic operations which must be lowered to rtlib calls 140 for (MVT VT : MVT::integer_valuetypes()) { 141 setOperationAction(ISD::ATOMIC_SWAP, VT, Expand); 142 setOperationAction(ISD::ATOMIC_CMP_SWAP, VT, Expand); 143 setOperationAction(ISD::ATOMIC_LOAD_NAND, VT, Expand); 144 setOperationAction(ISD::ATOMIC_LOAD_MAX, VT, Expand); 145 setOperationAction(ISD::ATOMIC_LOAD_MIN, VT, Expand); 146 setOperationAction(ISD::ATOMIC_LOAD_UMAX, VT, Expand); 147 setOperationAction(ISD::ATOMIC_LOAD_UMIN, VT, Expand); 148 } 149 150 // Division/remainder 151 setOperationAction(ISD::UDIV, MVT::i8, Expand); 152 setOperationAction(ISD::UDIV, MVT::i16, Expand); 153 setOperationAction(ISD::UREM, MVT::i8, Expand); 154 setOperationAction(ISD::UREM, MVT::i16, Expand); 155 setOperationAction(ISD::SDIV, MVT::i8, Expand); 156 setOperationAction(ISD::SDIV, MVT::i16, Expand); 157 setOperationAction(ISD::SREM, MVT::i8, Expand); 158 setOperationAction(ISD::SREM, MVT::i16, Expand); 159 160 // Make division and modulus custom 161 setOperationAction(ISD::UDIVREM, MVT::i8, Custom); 162 setOperationAction(ISD::UDIVREM, MVT::i16, Custom); 163 setOperationAction(ISD::UDIVREM, MVT::i32, Custom); 164 setOperationAction(ISD::SDIVREM, MVT::i8, Custom); 165 setOperationAction(ISD::SDIVREM, MVT::i16, Custom); 166 setOperationAction(ISD::SDIVREM, MVT::i32, Custom); 167 168 // Do not use MUL. The AVR instructions are closer to SMUL_LOHI &co. 169 setOperationAction(ISD::MUL, MVT::i8, Expand); 170 setOperationAction(ISD::MUL, MVT::i16, Expand); 171 172 // Expand 16 bit multiplications. 173 setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand); 174 setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand); 175 176 // Expand multiplications to libcalls when there is 177 // no hardware MUL. 178 if (!Subtarget.supportsMultiplication()) { 179 setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand); 180 setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand); 181 } 182 183 for (MVT VT : MVT::integer_valuetypes()) { 184 setOperationAction(ISD::MULHS, VT, Expand); 185 setOperationAction(ISD::MULHU, VT, Expand); 186 } 187 188 for (MVT VT : MVT::integer_valuetypes()) { 189 setOperationAction(ISD::CTPOP, VT, Expand); 190 setOperationAction(ISD::CTLZ, VT, Expand); 191 setOperationAction(ISD::CTTZ, VT, Expand); 192 } 193 194 for (MVT VT : MVT::integer_valuetypes()) { 195 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); 196 // TODO: The generated code is pretty poor. Investigate using the 197 // same "shift and subtract with carry" trick that we do for 198 // extending 8-bit to 16-bit. This may require infrastructure 199 // improvements in how we treat 16-bit "registers" to be feasible. 200 } 201 202 // Division and modulus rtlib functions 203 setLibcallName(RTLIB::SDIVREM_I8, "__divmodqi4"); 204 setLibcallName(RTLIB::SDIVREM_I16, "__divmodhi4"); 205 setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4"); 206 setLibcallName(RTLIB::UDIVREM_I8, "__udivmodqi4"); 207 setLibcallName(RTLIB::UDIVREM_I16, "__udivmodhi4"); 208 setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4"); 209 210 // Several of the runtime library functions use a special calling conv 211 setLibcallCallingConv(RTLIB::SDIVREM_I8, CallingConv::AVR_BUILTIN); 212 setLibcallCallingConv(RTLIB::SDIVREM_I16, CallingConv::AVR_BUILTIN); 213 setLibcallCallingConv(RTLIB::UDIVREM_I8, CallingConv::AVR_BUILTIN); 214 setLibcallCallingConv(RTLIB::UDIVREM_I16, CallingConv::AVR_BUILTIN); 215 216 // Trigonometric rtlib functions 217 setLibcallName(RTLIB::SIN_F32, "sin"); 218 setLibcallName(RTLIB::COS_F32, "cos"); 219 220 setMinFunctionAlignment(Align(2)); 221 setMinimumJumpTableEntries(UINT_MAX); 222 } 223 224 const char *AVRTargetLowering::getTargetNodeName(unsigned Opcode) const { 225 #define NODE(name) \ 226 case AVRISD::name: \ 227 return #name 228 229 switch (Opcode) { 230 default: 231 return nullptr; 232 NODE(RET_GLUE); 233 NODE(RETI_GLUE); 234 NODE(CALL); 235 NODE(WRAPPER); 236 NODE(LSL); 237 NODE(LSLW); 238 NODE(LSR); 239 NODE(LSRW); 240 NODE(ROL); 241 NODE(ROR); 242 NODE(ASR); 243 NODE(ASRW); 244 NODE(LSLLOOP); 245 NODE(LSRLOOP); 246 NODE(ROLLOOP); 247 NODE(RORLOOP); 248 NODE(ASRLOOP); 249 NODE(BRCOND); 250 NODE(CMP); 251 NODE(CMPC); 252 NODE(TST); 253 NODE(SELECT_CC); 254 #undef NODE 255 } 256 } 257 258 EVT AVRTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, 259 EVT VT) const { 260 assert(!VT.isVector() && "No AVR SetCC type for vectors!"); 261 return MVT::i8; 262 } 263 264 SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const { 265 unsigned Opc8; 266 const SDNode *N = Op.getNode(); 267 EVT VT = Op.getValueType(); 268 SDLoc dl(N); 269 assert(llvm::has_single_bit<uint32_t>(VT.getSizeInBits()) && 270 "Expected power-of-2 shift amount"); 271 272 if (VT.getSizeInBits() == 32) { 273 if (!isa<ConstantSDNode>(N->getOperand(1))) { 274 // 32-bit shifts are converted to a loop in IR. 275 // This should be unreachable. 276 report_fatal_error("Expected a constant shift amount!"); 277 } 278 SDVTList ResTys = DAG.getVTList(MVT::i16, MVT::i16); 279 SDValue SrcLo = 280 DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i16, Op.getOperand(0), 281 DAG.getConstant(0, dl, MVT::i16)); 282 SDValue SrcHi = 283 DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i16, Op.getOperand(0), 284 DAG.getConstant(1, dl, MVT::i16)); 285 uint64_t ShiftAmount = N->getConstantOperandVal(1); 286 if (ShiftAmount == 16) { 287 // Special case these two operations because they appear to be used by the 288 // generic codegen parts to lower 32-bit numbers. 289 // TODO: perhaps we can lower shift amounts bigger than 16 to a 16-bit 290 // shift of a part of the 32-bit value? 291 switch (Op.getOpcode()) { 292 case ISD::SHL: { 293 SDValue Zero = DAG.getConstant(0, dl, MVT::i16); 294 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i32, Zero, SrcLo); 295 } 296 case ISD::SRL: { 297 SDValue Zero = DAG.getConstant(0, dl, MVT::i16); 298 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i32, SrcHi, Zero); 299 } 300 } 301 } 302 SDValue Cnt = DAG.getTargetConstant(ShiftAmount, dl, MVT::i8); 303 unsigned Opc; 304 switch (Op.getOpcode()) { 305 default: 306 llvm_unreachable("Invalid 32-bit shift opcode!"); 307 case ISD::SHL: 308 Opc = AVRISD::LSLW; 309 break; 310 case ISD::SRL: 311 Opc = AVRISD::LSRW; 312 break; 313 case ISD::SRA: 314 Opc = AVRISD::ASRW; 315 break; 316 } 317 SDValue Result = DAG.getNode(Opc, dl, ResTys, SrcLo, SrcHi, Cnt); 318 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i32, Result.getValue(0), 319 Result.getValue(1)); 320 } 321 322 // Expand non-constant shifts to loops. 323 if (!isa<ConstantSDNode>(N->getOperand(1))) { 324 switch (Op.getOpcode()) { 325 default: 326 llvm_unreachable("Invalid shift opcode!"); 327 case ISD::SHL: 328 return DAG.getNode(AVRISD::LSLLOOP, dl, VT, N->getOperand(0), 329 N->getOperand(1)); 330 case ISD::SRL: 331 return DAG.getNode(AVRISD::LSRLOOP, dl, VT, N->getOperand(0), 332 N->getOperand(1)); 333 case ISD::ROTL: { 334 SDValue Amt = N->getOperand(1); 335 EVT AmtVT = Amt.getValueType(); 336 Amt = DAG.getNode(ISD::AND, dl, AmtVT, Amt, 337 DAG.getConstant(VT.getSizeInBits() - 1, dl, AmtVT)); 338 return DAG.getNode(AVRISD::ROLLOOP, dl, VT, N->getOperand(0), Amt); 339 } 340 case ISD::ROTR: { 341 SDValue Amt = N->getOperand(1); 342 EVT AmtVT = Amt.getValueType(); 343 Amt = DAG.getNode(ISD::AND, dl, AmtVT, Amt, 344 DAG.getConstant(VT.getSizeInBits() - 1, dl, AmtVT)); 345 return DAG.getNode(AVRISD::RORLOOP, dl, VT, N->getOperand(0), Amt); 346 } 347 case ISD::SRA: 348 return DAG.getNode(AVRISD::ASRLOOP, dl, VT, N->getOperand(0), 349 N->getOperand(1)); 350 } 351 } 352 353 uint64_t ShiftAmount = N->getConstantOperandVal(1); 354 SDValue Victim = N->getOperand(0); 355 356 switch (Op.getOpcode()) { 357 case ISD::SRA: 358 Opc8 = AVRISD::ASR; 359 break; 360 case ISD::ROTL: 361 Opc8 = AVRISD::ROL; 362 ShiftAmount = ShiftAmount % VT.getSizeInBits(); 363 break; 364 case ISD::ROTR: 365 Opc8 = AVRISD::ROR; 366 ShiftAmount = ShiftAmount % VT.getSizeInBits(); 367 break; 368 case ISD::SRL: 369 Opc8 = AVRISD::LSR; 370 break; 371 case ISD::SHL: 372 Opc8 = AVRISD::LSL; 373 break; 374 default: 375 llvm_unreachable("Invalid shift opcode"); 376 } 377 378 // Optimize int8/int16 shifts. 379 if (VT.getSizeInBits() == 8) { 380 if (Op.getOpcode() == ISD::SHL && 4 <= ShiftAmount && ShiftAmount < 7) { 381 // Optimize LSL when 4 <= ShiftAmount <= 6. 382 Victim = DAG.getNode(AVRISD::SWAP, dl, VT, Victim); 383 Victim = 384 DAG.getNode(ISD::AND, dl, VT, Victim, DAG.getConstant(0xf0, dl, VT)); 385 ShiftAmount -= 4; 386 } else if (Op.getOpcode() == ISD::SRL && 4 <= ShiftAmount && 387 ShiftAmount < 7) { 388 // Optimize LSR when 4 <= ShiftAmount <= 6. 389 Victim = DAG.getNode(AVRISD::SWAP, dl, VT, Victim); 390 Victim = 391 DAG.getNode(ISD::AND, dl, VT, Victim, DAG.getConstant(0x0f, dl, VT)); 392 ShiftAmount -= 4; 393 } else if (Op.getOpcode() == ISD::SHL && ShiftAmount == 7) { 394 // Optimize LSL when ShiftAmount == 7. 395 Victim = DAG.getNode(AVRISD::LSLBN, dl, VT, Victim, 396 DAG.getConstant(7, dl, VT)); 397 ShiftAmount = 0; 398 } else if (Op.getOpcode() == ISD::SRL && ShiftAmount == 7) { 399 // Optimize LSR when ShiftAmount == 7. 400 Victim = DAG.getNode(AVRISD::LSRBN, dl, VT, Victim, 401 DAG.getConstant(7, dl, VT)); 402 ShiftAmount = 0; 403 } else if (Op.getOpcode() == ISD::SRA && ShiftAmount == 6) { 404 // Optimize ASR when ShiftAmount == 6. 405 Victim = DAG.getNode(AVRISD::ASRBN, dl, VT, Victim, 406 DAG.getConstant(6, dl, VT)); 407 ShiftAmount = 0; 408 } else if (Op.getOpcode() == ISD::SRA && ShiftAmount == 7) { 409 // Optimize ASR when ShiftAmount == 7. 410 Victim = DAG.getNode(AVRISD::ASRBN, dl, VT, Victim, 411 DAG.getConstant(7, dl, VT)); 412 ShiftAmount = 0; 413 } else if (Op.getOpcode() == ISD::ROTL && ShiftAmount == 3) { 414 // Optimize left rotation 3 bits to swap then right rotation 1 bit. 415 Victim = DAG.getNode(AVRISD::SWAP, dl, VT, Victim); 416 Victim = 417 DAG.getNode(AVRISD::ROR, dl, VT, Victim, DAG.getConstant(1, dl, VT)); 418 ShiftAmount = 0; 419 } else if (Op.getOpcode() == ISD::ROTR && ShiftAmount == 3) { 420 // Optimize right rotation 3 bits to swap then left rotation 1 bit. 421 Victim = DAG.getNode(AVRISD::SWAP, dl, VT, Victim); 422 Victim = 423 DAG.getNode(AVRISD::ROL, dl, VT, Victim, DAG.getConstant(1, dl, VT)); 424 ShiftAmount = 0; 425 } else if (Op.getOpcode() == ISD::ROTL && ShiftAmount == 7) { 426 // Optimize left rotation 7 bits to right rotation 1 bit. 427 Victim = 428 DAG.getNode(AVRISD::ROR, dl, VT, Victim, DAG.getConstant(1, dl, VT)); 429 ShiftAmount = 0; 430 } else if (Op.getOpcode() == ISD::ROTR && ShiftAmount == 7) { 431 // Optimize right rotation 7 bits to left rotation 1 bit. 432 Victim = 433 DAG.getNode(AVRISD::ROL, dl, VT, Victim, DAG.getConstant(1, dl, VT)); 434 ShiftAmount = 0; 435 } else if ((Op.getOpcode() == ISD::ROTR || Op.getOpcode() == ISD::ROTL) && 436 ShiftAmount >= 4) { 437 // Optimize left/right rotation with the SWAP instruction. 438 Victim = DAG.getNode(AVRISD::SWAP, dl, VT, Victim); 439 ShiftAmount -= 4; 440 } 441 } else if (VT.getSizeInBits() == 16) { 442 if (Op.getOpcode() == ISD::SRA) 443 // Special optimization for int16 arithmetic right shift. 444 switch (ShiftAmount) { 445 case 15: 446 Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim, 447 DAG.getConstant(15, dl, VT)); 448 ShiftAmount = 0; 449 break; 450 case 14: 451 Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim, 452 DAG.getConstant(14, dl, VT)); 453 ShiftAmount = 0; 454 break; 455 case 7: 456 Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim, 457 DAG.getConstant(7, dl, VT)); 458 ShiftAmount = 0; 459 break; 460 default: 461 break; 462 } 463 if (4 <= ShiftAmount && ShiftAmount < 8) 464 switch (Op.getOpcode()) { 465 case ISD::SHL: 466 Victim = DAG.getNode(AVRISD::LSLWN, dl, VT, Victim, 467 DAG.getConstant(4, dl, VT)); 468 ShiftAmount -= 4; 469 break; 470 case ISD::SRL: 471 Victim = DAG.getNode(AVRISD::LSRWN, dl, VT, Victim, 472 DAG.getConstant(4, dl, VT)); 473 ShiftAmount -= 4; 474 break; 475 default: 476 break; 477 } 478 else if (8 <= ShiftAmount && ShiftAmount < 12) 479 switch (Op.getOpcode()) { 480 case ISD::SHL: 481 Victim = DAG.getNode(AVRISD::LSLWN, dl, VT, Victim, 482 DAG.getConstant(8, dl, VT)); 483 ShiftAmount -= 8; 484 // Only operate on the higher byte for remaining shift bits. 485 Opc8 = AVRISD::LSLHI; 486 break; 487 case ISD::SRL: 488 Victim = DAG.getNode(AVRISD::LSRWN, dl, VT, Victim, 489 DAG.getConstant(8, dl, VT)); 490 ShiftAmount -= 8; 491 // Only operate on the lower byte for remaining shift bits. 492 Opc8 = AVRISD::LSRLO; 493 break; 494 case ISD::SRA: 495 Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim, 496 DAG.getConstant(8, dl, VT)); 497 ShiftAmount -= 8; 498 // Only operate on the lower byte for remaining shift bits. 499 Opc8 = AVRISD::ASRLO; 500 break; 501 default: 502 break; 503 } 504 else if (12 <= ShiftAmount) 505 switch (Op.getOpcode()) { 506 case ISD::SHL: 507 Victim = DAG.getNode(AVRISD::LSLWN, dl, VT, Victim, 508 DAG.getConstant(12, dl, VT)); 509 ShiftAmount -= 12; 510 // Only operate on the higher byte for remaining shift bits. 511 Opc8 = AVRISD::LSLHI; 512 break; 513 case ISD::SRL: 514 Victim = DAG.getNode(AVRISD::LSRWN, dl, VT, Victim, 515 DAG.getConstant(12, dl, VT)); 516 ShiftAmount -= 12; 517 // Only operate on the lower byte for remaining shift bits. 518 Opc8 = AVRISD::LSRLO; 519 break; 520 case ISD::SRA: 521 Victim = DAG.getNode(AVRISD::ASRWN, dl, VT, Victim, 522 DAG.getConstant(8, dl, VT)); 523 ShiftAmount -= 8; 524 // Only operate on the lower byte for remaining shift bits. 525 Opc8 = AVRISD::ASRLO; 526 break; 527 default: 528 break; 529 } 530 } 531 532 while (ShiftAmount--) { 533 Victim = DAG.getNode(Opc8, dl, VT, Victim); 534 } 535 536 return Victim; 537 } 538 539 SDValue AVRTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { 540 unsigned Opcode = Op->getOpcode(); 541 assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) && 542 "Invalid opcode for Div/Rem lowering"); 543 bool IsSigned = (Opcode == ISD::SDIVREM); 544 EVT VT = Op->getValueType(0); 545 Type *Ty = VT.getTypeForEVT(*DAG.getContext()); 546 547 RTLIB::Libcall LC; 548 switch (VT.getSimpleVT().SimpleTy) { 549 default: 550 llvm_unreachable("Unexpected request for libcall!"); 551 case MVT::i8: 552 LC = IsSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; 553 break; 554 case MVT::i16: 555 LC = IsSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; 556 break; 557 case MVT::i32: 558 LC = IsSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; 559 break; 560 } 561 562 SDValue InChain = DAG.getEntryNode(); 563 564 TargetLowering::ArgListTy Args; 565 TargetLowering::ArgListEntry Entry; 566 for (SDValue const &Value : Op->op_values()) { 567 Entry.Node = Value; 568 Entry.Ty = Value.getValueType().getTypeForEVT(*DAG.getContext()); 569 Entry.IsSExt = IsSigned; 570 Entry.IsZExt = !IsSigned; 571 Args.push_back(Entry); 572 } 573 574 SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), 575 getPointerTy(DAG.getDataLayout())); 576 577 Type *RetTy = (Type *)StructType::get(Ty, Ty); 578 579 SDLoc dl(Op); 580 TargetLowering::CallLoweringInfo CLI(DAG); 581 CLI.setDebugLoc(dl) 582 .setChain(InChain) 583 .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args)) 584 .setInRegister() 585 .setSExtResult(IsSigned) 586 .setZExtResult(!IsSigned); 587 588 std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI); 589 return CallInfo.first; 590 } 591 592 SDValue AVRTargetLowering::LowerGlobalAddress(SDValue Op, 593 SelectionDAG &DAG) const { 594 auto DL = DAG.getDataLayout(); 595 596 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 597 int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset(); 598 599 // Create the TargetGlobalAddress node, folding in the constant offset. 600 SDValue Result = 601 DAG.getTargetGlobalAddress(GV, SDLoc(Op), getPointerTy(DL), Offset); 602 return DAG.getNode(AVRISD::WRAPPER, SDLoc(Op), getPointerTy(DL), Result); 603 } 604 605 SDValue AVRTargetLowering::LowerBlockAddress(SDValue Op, 606 SelectionDAG &DAG) const { 607 auto DL = DAG.getDataLayout(); 608 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); 609 610 SDValue Result = DAG.getTargetBlockAddress(BA, getPointerTy(DL)); 611 612 return DAG.getNode(AVRISD::WRAPPER, SDLoc(Op), getPointerTy(DL), Result); 613 } 614 615 /// IntCCToAVRCC - Convert a DAG integer condition code to an AVR CC. 616 static AVRCC::CondCodes intCCToAVRCC(ISD::CondCode CC) { 617 switch (CC) { 618 default: 619 llvm_unreachable("Unknown condition code!"); 620 case ISD::SETEQ: 621 return AVRCC::COND_EQ; 622 case ISD::SETNE: 623 return AVRCC::COND_NE; 624 case ISD::SETGE: 625 return AVRCC::COND_GE; 626 case ISD::SETLT: 627 return AVRCC::COND_LT; 628 case ISD::SETUGE: 629 return AVRCC::COND_SH; 630 case ISD::SETULT: 631 return AVRCC::COND_LO; 632 } 633 } 634 635 /// Returns appropriate CP/CPI/CPC nodes code for the given 8/16-bit operands. 636 SDValue AVRTargetLowering::getAVRCmp(SDValue LHS, SDValue RHS, 637 SelectionDAG &DAG, SDLoc DL) const { 638 assert((LHS.getSimpleValueType() == RHS.getSimpleValueType()) && 639 "LHS and RHS have different types"); 640 assert(((LHS.getSimpleValueType() == MVT::i16) || 641 (LHS.getSimpleValueType() == MVT::i8)) && 642 "invalid comparison type"); 643 644 SDValue Cmp; 645 646 if (LHS.getSimpleValueType() == MVT::i16 && isa<ConstantSDNode>(RHS)) { 647 uint64_t Imm = RHS->getAsZExtVal(); 648 // Generate a CPI/CPC pair if RHS is a 16-bit constant. Use the zero 649 // register for the constant RHS if its lower or higher byte is zero. 650 SDValue LHSlo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS, 651 DAG.getIntPtrConstant(0, DL)); 652 SDValue LHShi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS, 653 DAG.getIntPtrConstant(1, DL)); 654 SDValue RHSlo = (Imm & 0xff) == 0 655 ? DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8) 656 : DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, RHS, 657 DAG.getIntPtrConstant(0, DL)); 658 SDValue RHShi = (Imm & 0xff00) == 0 659 ? DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8) 660 : DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, RHS, 661 DAG.getIntPtrConstant(1, DL)); 662 Cmp = DAG.getNode(AVRISD::CMP, DL, MVT::Glue, LHSlo, RHSlo); 663 Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHShi, RHShi, Cmp); 664 } else if (RHS.getSimpleValueType() == MVT::i16 && isa<ConstantSDNode>(LHS)) { 665 // Generate a CPI/CPC pair if LHS is a 16-bit constant. Use the zero 666 // register for the constant LHS if its lower or higher byte is zero. 667 uint64_t Imm = LHS->getAsZExtVal(); 668 SDValue LHSlo = (Imm & 0xff) == 0 669 ? DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8) 670 : DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS, 671 DAG.getIntPtrConstant(0, DL)); 672 SDValue LHShi = (Imm & 0xff00) == 0 673 ? DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8) 674 : DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS, 675 DAG.getIntPtrConstant(1, DL)); 676 SDValue RHSlo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, RHS, 677 DAG.getIntPtrConstant(0, DL)); 678 SDValue RHShi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, RHS, 679 DAG.getIntPtrConstant(1, DL)); 680 Cmp = DAG.getNode(AVRISD::CMP, DL, MVT::Glue, LHSlo, RHSlo); 681 Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHShi, RHShi, Cmp); 682 } else { 683 // Generate ordinary 16-bit comparison. 684 Cmp = DAG.getNode(AVRISD::CMP, DL, MVT::Glue, LHS, RHS); 685 } 686 687 return Cmp; 688 } 689 690 /// Returns appropriate AVR CMP/CMPC nodes and corresponding condition code for 691 /// the given operands. 692 SDValue AVRTargetLowering::getAVRCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, 693 SDValue &AVRcc, SelectionDAG &DAG, 694 SDLoc DL) const { 695 SDValue Cmp; 696 EVT VT = LHS.getValueType(); 697 bool UseTest = false; 698 699 switch (CC) { 700 default: 701 break; 702 case ISD::SETLE: { 703 // Swap operands and reverse the branching condition. 704 std::swap(LHS, RHS); 705 CC = ISD::SETGE; 706 break; 707 } 708 case ISD::SETGT: { 709 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) { 710 switch (C->getSExtValue()) { 711 case -1: { 712 // When doing lhs > -1 use a tst instruction on the top part of lhs 713 // and use brpl instead of using a chain of cp/cpc. 714 UseTest = true; 715 AVRcc = DAG.getConstant(AVRCC::COND_PL, DL, MVT::i8); 716 break; 717 } 718 case 0: { 719 // Turn lhs > 0 into 0 < lhs since 0 can be materialized with 720 // __zero_reg__ in lhs. 721 RHS = LHS; 722 LHS = DAG.getConstant(0, DL, VT); 723 CC = ISD::SETLT; 724 break; 725 } 726 default: { 727 // Turn lhs < rhs with lhs constant into rhs >= lhs+1, this allows 728 // us to fold the constant into the cmp instruction. 729 RHS = DAG.getConstant(C->getSExtValue() + 1, DL, VT); 730 CC = ISD::SETGE; 731 break; 732 } 733 } 734 break; 735 } 736 // Swap operands and reverse the branching condition. 737 std::swap(LHS, RHS); 738 CC = ISD::SETLT; 739 break; 740 } 741 case ISD::SETLT: { 742 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) { 743 switch (C->getSExtValue()) { 744 case 1: { 745 // Turn lhs < 1 into 0 >= lhs since 0 can be materialized with 746 // __zero_reg__ in lhs. 747 RHS = LHS; 748 LHS = DAG.getConstant(0, DL, VT); 749 CC = ISD::SETGE; 750 break; 751 } 752 case 0: { 753 // When doing lhs < 0 use a tst instruction on the top part of lhs 754 // and use brmi instead of using a chain of cp/cpc. 755 UseTest = true; 756 AVRcc = DAG.getConstant(AVRCC::COND_MI, DL, MVT::i8); 757 break; 758 } 759 } 760 } 761 break; 762 } 763 case ISD::SETULE: { 764 // Swap operands and reverse the branching condition. 765 std::swap(LHS, RHS); 766 CC = ISD::SETUGE; 767 break; 768 } 769 case ISD::SETUGT: { 770 // Turn lhs < rhs with lhs constant into rhs >= lhs+1, this allows us to 771 // fold the constant into the cmp instruction. 772 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) { 773 RHS = DAG.getConstant(C->getSExtValue() + 1, DL, VT); 774 CC = ISD::SETUGE; 775 break; 776 } 777 // Swap operands and reverse the branching condition. 778 std::swap(LHS, RHS); 779 CC = ISD::SETULT; 780 break; 781 } 782 } 783 784 // Expand 32 and 64 bit comparisons with custom CMP and CMPC nodes instead of 785 // using the default and/or/xor expansion code which is much longer. 786 if (VT == MVT::i32) { 787 SDValue LHSlo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS, 788 DAG.getIntPtrConstant(0, DL)); 789 SDValue LHShi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS, 790 DAG.getIntPtrConstant(1, DL)); 791 SDValue RHSlo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS, 792 DAG.getIntPtrConstant(0, DL)); 793 SDValue RHShi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS, 794 DAG.getIntPtrConstant(1, DL)); 795 796 if (UseTest) { 797 // When using tst we only care about the highest part. 798 SDValue Top = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHShi, 799 DAG.getIntPtrConstant(1, DL)); 800 Cmp = DAG.getNode(AVRISD::TST, DL, MVT::Glue, Top); 801 } else { 802 Cmp = getAVRCmp(LHSlo, RHSlo, DAG, DL); 803 Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHShi, RHShi, Cmp); 804 } 805 } else if (VT == MVT::i64) { 806 SDValue LHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, LHS, 807 DAG.getIntPtrConstant(0, DL)); 808 SDValue LHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, LHS, 809 DAG.getIntPtrConstant(1, DL)); 810 811 SDValue LHS0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS_0, 812 DAG.getIntPtrConstant(0, DL)); 813 SDValue LHS1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS_0, 814 DAG.getIntPtrConstant(1, DL)); 815 SDValue LHS2 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS_1, 816 DAG.getIntPtrConstant(0, DL)); 817 SDValue LHS3 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS_1, 818 DAG.getIntPtrConstant(1, DL)); 819 820 SDValue RHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, RHS, 821 DAG.getIntPtrConstant(0, DL)); 822 SDValue RHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, RHS, 823 DAG.getIntPtrConstant(1, DL)); 824 825 SDValue RHS0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS_0, 826 DAG.getIntPtrConstant(0, DL)); 827 SDValue RHS1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS_0, 828 DAG.getIntPtrConstant(1, DL)); 829 SDValue RHS2 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS_1, 830 DAG.getIntPtrConstant(0, DL)); 831 SDValue RHS3 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS_1, 832 DAG.getIntPtrConstant(1, DL)); 833 834 if (UseTest) { 835 // When using tst we only care about the highest part. 836 SDValue Top = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS3, 837 DAG.getIntPtrConstant(1, DL)); 838 Cmp = DAG.getNode(AVRISD::TST, DL, MVT::Glue, Top); 839 } else { 840 Cmp = getAVRCmp(LHS0, RHS0, DAG, DL); 841 Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHS1, RHS1, Cmp); 842 Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHS2, RHS2, Cmp); 843 Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHS3, RHS3, Cmp); 844 } 845 } else if (VT == MVT::i8 || VT == MVT::i16) { 846 if (UseTest) { 847 // When using tst we only care about the highest part. 848 Cmp = DAG.getNode(AVRISD::TST, DL, MVT::Glue, 849 (VT == MVT::i8) 850 ? LHS 851 : DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, 852 LHS, DAG.getIntPtrConstant(1, DL))); 853 } else { 854 Cmp = getAVRCmp(LHS, RHS, DAG, DL); 855 } 856 } else { 857 llvm_unreachable("Invalid comparison size"); 858 } 859 860 // When using a test instruction AVRcc is already set. 861 if (!UseTest) { 862 AVRcc = DAG.getConstant(intCCToAVRCC(CC), DL, MVT::i8); 863 } 864 865 return Cmp; 866 } 867 868 SDValue AVRTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { 869 SDValue Chain = Op.getOperand(0); 870 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 871 SDValue LHS = Op.getOperand(2); 872 SDValue RHS = Op.getOperand(3); 873 SDValue Dest = Op.getOperand(4); 874 SDLoc dl(Op); 875 876 SDValue TargetCC; 877 SDValue Cmp = getAVRCmp(LHS, RHS, CC, TargetCC, DAG, dl); 878 879 return DAG.getNode(AVRISD::BRCOND, dl, MVT::Other, Chain, Dest, TargetCC, 880 Cmp); 881 } 882 883 SDValue AVRTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 884 SDValue LHS = Op.getOperand(0); 885 SDValue RHS = Op.getOperand(1); 886 SDValue TrueV = Op.getOperand(2); 887 SDValue FalseV = Op.getOperand(3); 888 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 889 SDLoc dl(Op); 890 891 SDValue TargetCC; 892 SDValue Cmp = getAVRCmp(LHS, RHS, CC, TargetCC, DAG, dl); 893 894 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); 895 SDValue Ops[] = {TrueV, FalseV, TargetCC, Cmp}; 896 897 return DAG.getNode(AVRISD::SELECT_CC, dl, VTs, Ops); 898 } 899 900 SDValue AVRTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { 901 SDValue LHS = Op.getOperand(0); 902 SDValue RHS = Op.getOperand(1); 903 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 904 SDLoc DL(Op); 905 906 SDValue TargetCC; 907 SDValue Cmp = getAVRCmp(LHS, RHS, CC, TargetCC, DAG, DL); 908 909 SDValue TrueV = DAG.getConstant(1, DL, Op.getValueType()); 910 SDValue FalseV = DAG.getConstant(0, DL, Op.getValueType()); 911 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); 912 SDValue Ops[] = {TrueV, FalseV, TargetCC, Cmp}; 913 914 return DAG.getNode(AVRISD::SELECT_CC, DL, VTs, Ops); 915 } 916 917 SDValue AVRTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { 918 const MachineFunction &MF = DAG.getMachineFunction(); 919 const AVRMachineFunctionInfo *AFI = MF.getInfo<AVRMachineFunctionInfo>(); 920 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 921 auto DL = DAG.getDataLayout(); 922 SDLoc dl(Op); 923 924 // Vastart just stores the address of the VarArgsFrameIndex slot into the 925 // memory location argument. 926 SDValue FI = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(), getPointerTy(DL)); 927 928 return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1), 929 MachinePointerInfo(SV)); 930 } 931 932 // Modify the existing ISD::INLINEASM node to add the implicit zero register. 933 SDValue AVRTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const { 934 SDValue ZeroReg = DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8); 935 if (Op.getOperand(Op.getNumOperands() - 1) == ZeroReg || 936 Op.getOperand(Op.getNumOperands() - 2) == ZeroReg) { 937 // Zero register has already been added. Don't add it again. 938 // If this isn't handled, we get called over and over again. 939 return Op; 940 } 941 942 // Get a list of operands to the new INLINEASM node. This is mostly a copy, 943 // with some edits. 944 // Add the following operands at the end (but before the glue node, if it's 945 // there): 946 // - The flags of the implicit zero register operand. 947 // - The implicit zero register operand itself. 948 SDLoc dl(Op); 949 SmallVector<SDValue, 8> Ops; 950 SDNode *N = Op.getNode(); 951 SDValue Glue; 952 for (unsigned I = 0; I < N->getNumOperands(); I++) { 953 SDValue Operand = N->getOperand(I); 954 if (Operand.getValueType() == MVT::Glue) { 955 // The glue operand always needs to be at the end, so we need to treat it 956 // specially. 957 Glue = Operand; 958 } else { 959 Ops.push_back(Operand); 960 } 961 } 962 InlineAsm::Flag Flags(InlineAsm::Kind::RegUse, 1); 963 Ops.push_back(DAG.getTargetConstant(Flags, dl, MVT::i32)); 964 Ops.push_back(ZeroReg); 965 if (Glue) { 966 Ops.push_back(Glue); 967 } 968 969 // Replace the current INLINEASM node with a new one that has the zero 970 // register as implicit parameter. 971 SDValue New = DAG.getNode(N->getOpcode(), dl, N->getVTList(), Ops); 972 DAG.ReplaceAllUsesOfValueWith(Op, New); 973 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), New.getValue(1)); 974 975 return New; 976 } 977 978 SDValue AVRTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 979 switch (Op.getOpcode()) { 980 default: 981 llvm_unreachable("Don't know how to custom lower this!"); 982 case ISD::SHL: 983 case ISD::SRA: 984 case ISD::SRL: 985 case ISD::ROTL: 986 case ISD::ROTR: 987 return LowerShifts(Op, DAG); 988 case ISD::GlobalAddress: 989 return LowerGlobalAddress(Op, DAG); 990 case ISD::BlockAddress: 991 return LowerBlockAddress(Op, DAG); 992 case ISD::BR_CC: 993 return LowerBR_CC(Op, DAG); 994 case ISD::SELECT_CC: 995 return LowerSELECT_CC(Op, DAG); 996 case ISD::SETCC: 997 return LowerSETCC(Op, DAG); 998 case ISD::VASTART: 999 return LowerVASTART(Op, DAG); 1000 case ISD::SDIVREM: 1001 case ISD::UDIVREM: 1002 return LowerDivRem(Op, DAG); 1003 case ISD::INLINEASM: 1004 return LowerINLINEASM(Op, DAG); 1005 } 1006 1007 return SDValue(); 1008 } 1009 1010 /// Replace a node with an illegal result type 1011 /// with a new node built out of custom code. 1012 void AVRTargetLowering::ReplaceNodeResults(SDNode *N, 1013 SmallVectorImpl<SDValue> &Results, 1014 SelectionDAG &DAG) const { 1015 SDLoc DL(N); 1016 1017 switch (N->getOpcode()) { 1018 case ISD::ADD: { 1019 // Convert add (x, imm) into sub (x, -imm). 1020 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 1021 SDValue Sub = DAG.getNode( 1022 ISD::SUB, DL, N->getValueType(0), N->getOperand(0), 1023 DAG.getConstant(-C->getAPIntValue(), DL, C->getValueType(0))); 1024 Results.push_back(Sub); 1025 } 1026 break; 1027 } 1028 default: { 1029 SDValue Res = LowerOperation(SDValue(N, 0), DAG); 1030 1031 for (unsigned I = 0, E = Res->getNumValues(); I != E; ++I) 1032 Results.push_back(Res.getValue(I)); 1033 1034 break; 1035 } 1036 } 1037 } 1038 1039 /// Return true if the addressing mode represented 1040 /// by AM is legal for this target, for a load/store of the specified type. 1041 bool AVRTargetLowering::isLegalAddressingMode(const DataLayout &DL, 1042 const AddrMode &AM, Type *Ty, 1043 unsigned AS, 1044 Instruction *I) const { 1045 int64_t Offs = AM.BaseOffs; 1046 1047 // Allow absolute addresses. 1048 if (AM.BaseGV && !AM.HasBaseReg && AM.Scale == 0 && Offs == 0) { 1049 return true; 1050 } 1051 1052 // Flash memory instructions only allow zero offsets. 1053 if (isa<PointerType>(Ty) && AS == AVR::ProgramMemory) { 1054 return false; 1055 } 1056 1057 // Allow reg+<6bit> offset. 1058 if (Offs < 0) 1059 Offs = -Offs; 1060 if (AM.BaseGV == nullptr && AM.HasBaseReg && AM.Scale == 0 && 1061 isUInt<6>(Offs)) { 1062 return true; 1063 } 1064 1065 return false; 1066 } 1067 1068 /// Returns true by value, base pointer and 1069 /// offset pointer and addressing mode by reference if the node's address 1070 /// can be legally represented as pre-indexed load / store address. 1071 bool AVRTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, 1072 SDValue &Offset, 1073 ISD::MemIndexedMode &AM, 1074 SelectionDAG &DAG) const { 1075 EVT VT; 1076 const SDNode *Op; 1077 SDLoc DL(N); 1078 1079 if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1080 VT = LD->getMemoryVT(); 1081 Op = LD->getBasePtr().getNode(); 1082 if (LD->getExtensionType() != ISD::NON_EXTLOAD) 1083 return false; 1084 if (AVR::isProgramMemoryAccess(LD)) { 1085 return false; 1086 } 1087 } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 1088 VT = ST->getMemoryVT(); 1089 Op = ST->getBasePtr().getNode(); 1090 if (AVR::isProgramMemoryAccess(ST)) { 1091 return false; 1092 } 1093 } else { 1094 return false; 1095 } 1096 1097 if (VT != MVT::i8 && VT != MVT::i16) { 1098 return false; 1099 } 1100 1101 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB) { 1102 return false; 1103 } 1104 1105 if (const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) { 1106 int RHSC = RHS->getSExtValue(); 1107 if (Op->getOpcode() == ISD::SUB) 1108 RHSC = -RHSC; 1109 1110 if ((VT == MVT::i16 && RHSC != -2) || (VT == MVT::i8 && RHSC != -1)) { 1111 return false; 1112 } 1113 1114 Base = Op->getOperand(0); 1115 Offset = DAG.getConstant(RHSC, DL, MVT::i8); 1116 AM = ISD::PRE_DEC; 1117 1118 return true; 1119 } 1120 1121 return false; 1122 } 1123 1124 /// Returns true by value, base pointer and 1125 /// offset pointer and addressing mode by reference if this node can be 1126 /// combined with a load / store to form a post-indexed load / store. 1127 bool AVRTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, 1128 SDValue &Base, 1129 SDValue &Offset, 1130 ISD::MemIndexedMode &AM, 1131 SelectionDAG &DAG) const { 1132 EVT VT; 1133 SDLoc DL(N); 1134 1135 if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1136 VT = LD->getMemoryVT(); 1137 if (LD->getExtensionType() != ISD::NON_EXTLOAD) 1138 return false; 1139 } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 1140 VT = ST->getMemoryVT(); 1141 // We can not store to program memory. 1142 if (AVR::isProgramMemoryAccess(ST)) 1143 return false; 1144 // Since the high byte need to be stored first, we can not emit 1145 // i16 post increment store like: 1146 // st X+, r24 1147 // st X+, r25 1148 if (VT == MVT::i16 && !Subtarget.hasLowByteFirst()) 1149 return false; 1150 } else { 1151 return false; 1152 } 1153 1154 if (VT != MVT::i8 && VT != MVT::i16) { 1155 return false; 1156 } 1157 1158 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB) { 1159 return false; 1160 } 1161 1162 if (const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) { 1163 int RHSC = RHS->getSExtValue(); 1164 if (Op->getOpcode() == ISD::SUB) 1165 RHSC = -RHSC; 1166 if ((VT == MVT::i16 && RHSC != 2) || (VT == MVT::i8 && RHSC != 1)) { 1167 return false; 1168 } 1169 1170 // FIXME: We temporarily disable post increment load from program memory, 1171 // due to bug https://github.com/llvm/llvm-project/issues/59914. 1172 if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) 1173 if (AVR::isProgramMemoryAccess(LD)) 1174 return false; 1175 1176 Base = Op->getOperand(0); 1177 Offset = DAG.getConstant(RHSC, DL, MVT::i8); 1178 AM = ISD::POST_INC; 1179 1180 return true; 1181 } 1182 1183 return false; 1184 } 1185 1186 bool AVRTargetLowering::isOffsetFoldingLegal( 1187 const GlobalAddressSDNode *GA) const { 1188 return true; 1189 } 1190 1191 //===----------------------------------------------------------------------===// 1192 // Formal Arguments Calling Convention Implementation 1193 //===----------------------------------------------------------------------===// 1194 1195 #include "AVRGenCallingConv.inc" 1196 1197 /// Registers for calling conventions, ordered in reverse as required by ABI. 1198 /// Both arrays must be of the same length. 1199 static const MCPhysReg RegList8AVR[] = { 1200 AVR::R25, AVR::R24, AVR::R23, AVR::R22, AVR::R21, AVR::R20, 1201 AVR::R19, AVR::R18, AVR::R17, AVR::R16, AVR::R15, AVR::R14, 1202 AVR::R13, AVR::R12, AVR::R11, AVR::R10, AVR::R9, AVR::R8}; 1203 static const MCPhysReg RegList8Tiny[] = {AVR::R25, AVR::R24, AVR::R23, 1204 AVR::R22, AVR::R21, AVR::R20}; 1205 static const MCPhysReg RegList16AVR[] = { 1206 AVR::R26R25, AVR::R25R24, AVR::R24R23, AVR::R23R22, AVR::R22R21, 1207 AVR::R21R20, AVR::R20R19, AVR::R19R18, AVR::R18R17, AVR::R17R16, 1208 AVR::R16R15, AVR::R15R14, AVR::R14R13, AVR::R13R12, AVR::R12R11, 1209 AVR::R11R10, AVR::R10R9, AVR::R9R8}; 1210 static const MCPhysReg RegList16Tiny[] = {AVR::R26R25, AVR::R25R24, 1211 AVR::R24R23, AVR::R23R22, 1212 AVR::R22R21, AVR::R21R20}; 1213 1214 static_assert(std::size(RegList8AVR) == std::size(RegList16AVR), 1215 "8-bit and 16-bit register arrays must be of equal length"); 1216 static_assert(std::size(RegList8Tiny) == std::size(RegList16Tiny), 1217 "8-bit and 16-bit register arrays must be of equal length"); 1218 1219 /// Analyze incoming and outgoing function arguments. We need custom C++ code 1220 /// to handle special constraints in the ABI. 1221 /// In addition, all pieces of a certain argument have to be passed either 1222 /// using registers or the stack but never mixing both. 1223 template <typename ArgT> 1224 static void analyzeArguments(TargetLowering::CallLoweringInfo *CLI, 1225 const Function *F, const DataLayout *TD, 1226 const SmallVectorImpl<ArgT> &Args, 1227 SmallVectorImpl<CCValAssign> &ArgLocs, 1228 CCState &CCInfo, bool Tiny) { 1229 // Choose the proper register list for argument passing according to the ABI. 1230 ArrayRef<MCPhysReg> RegList8; 1231 ArrayRef<MCPhysReg> RegList16; 1232 if (Tiny) { 1233 RegList8 = ArrayRef(RegList8Tiny); 1234 RegList16 = ArrayRef(RegList16Tiny); 1235 } else { 1236 RegList8 = ArrayRef(RegList8AVR); 1237 RegList16 = ArrayRef(RegList16AVR); 1238 } 1239 1240 unsigned NumArgs = Args.size(); 1241 // This is the index of the last used register, in RegList*. 1242 // -1 means R26 (R26 is never actually used in CC). 1243 int RegLastIdx = -1; 1244 // Once a value is passed to the stack it will always be used 1245 bool UseStack = false; 1246 for (unsigned i = 0; i != NumArgs;) { 1247 MVT VT = Args[i].VT; 1248 // We have to count the number of bytes for each function argument, that is 1249 // those Args with the same OrigArgIndex. This is important in case the 1250 // function takes an aggregate type. 1251 // Current argument will be between [i..j). 1252 unsigned ArgIndex = Args[i].OrigArgIndex; 1253 unsigned TotalBytes = VT.getStoreSize(); 1254 unsigned j = i + 1; 1255 for (; j != NumArgs; ++j) { 1256 if (Args[j].OrigArgIndex != ArgIndex) 1257 break; 1258 TotalBytes += Args[j].VT.getStoreSize(); 1259 } 1260 // Round up to even number of bytes. 1261 TotalBytes = alignTo(TotalBytes, 2); 1262 // Skip zero sized arguments 1263 if (TotalBytes == 0) 1264 continue; 1265 // The index of the first register to be used 1266 unsigned RegIdx = RegLastIdx + TotalBytes; 1267 RegLastIdx = RegIdx; 1268 // If there are not enough registers, use the stack 1269 if (RegIdx >= RegList8.size()) { 1270 UseStack = true; 1271 } 1272 for (; i != j; ++i) { 1273 MVT VT = Args[i].VT; 1274 1275 if (UseStack) { 1276 auto evt = EVT(VT).getTypeForEVT(CCInfo.getContext()); 1277 unsigned Offset = CCInfo.AllocateStack(TD->getTypeAllocSize(evt), 1278 TD->getABITypeAlign(evt)); 1279 CCInfo.addLoc( 1280 CCValAssign::getMem(i, VT, Offset, VT, CCValAssign::Full)); 1281 } else { 1282 unsigned Reg; 1283 if (VT == MVT::i8) { 1284 Reg = CCInfo.AllocateReg(RegList8[RegIdx]); 1285 } else if (VT == MVT::i16) { 1286 Reg = CCInfo.AllocateReg(RegList16[RegIdx]); 1287 } else { 1288 llvm_unreachable( 1289 "calling convention can only manage i8 and i16 types"); 1290 } 1291 assert(Reg && "register not available in calling convention"); 1292 CCInfo.addLoc(CCValAssign::getReg(i, VT, Reg, VT, CCValAssign::Full)); 1293 // Registers inside a particular argument are sorted in increasing order 1294 // (remember the array is reversed). 1295 RegIdx -= VT.getStoreSize(); 1296 } 1297 } 1298 } 1299 } 1300 1301 /// Count the total number of bytes needed to pass or return these arguments. 1302 template <typename ArgT> 1303 static unsigned 1304 getTotalArgumentsSizeInBytes(const SmallVectorImpl<ArgT> &Args) { 1305 unsigned TotalBytes = 0; 1306 1307 for (const ArgT &Arg : Args) { 1308 TotalBytes += Arg.VT.getStoreSize(); 1309 } 1310 return TotalBytes; 1311 } 1312 1313 /// Analyze incoming and outgoing value of returning from a function. 1314 /// The algorithm is similar to analyzeArguments, but there can only be 1315 /// one value, possibly an aggregate, and it is limited to 8 bytes. 1316 template <typename ArgT> 1317 static void analyzeReturnValues(const SmallVectorImpl<ArgT> &Args, 1318 CCState &CCInfo, bool Tiny) { 1319 unsigned NumArgs = Args.size(); 1320 unsigned TotalBytes = getTotalArgumentsSizeInBytes(Args); 1321 // CanLowerReturn() guarantees this assertion. 1322 if (Tiny) 1323 assert(TotalBytes <= 4 && 1324 "return values greater than 4 bytes cannot be lowered on AVRTiny"); 1325 else 1326 assert(TotalBytes <= 8 && 1327 "return values greater than 8 bytes cannot be lowered on AVR"); 1328 1329 // Choose the proper register list for argument passing according to the ABI. 1330 ArrayRef<MCPhysReg> RegList8; 1331 ArrayRef<MCPhysReg> RegList16; 1332 if (Tiny) { 1333 RegList8 = ArrayRef(RegList8Tiny); 1334 RegList16 = ArrayRef(RegList16Tiny); 1335 } else { 1336 RegList8 = ArrayRef(RegList8AVR); 1337 RegList16 = ArrayRef(RegList16AVR); 1338 } 1339 1340 // GCC-ABI says that the size is rounded up to the next even number, 1341 // but actually once it is more than 4 it will always round up to 8. 1342 if (TotalBytes > 4) { 1343 TotalBytes = 8; 1344 } else { 1345 TotalBytes = alignTo(TotalBytes, 2); 1346 } 1347 1348 // The index of the first register to use. 1349 int RegIdx = TotalBytes - 1; 1350 for (unsigned i = 0; i != NumArgs; ++i) { 1351 MVT VT = Args[i].VT; 1352 unsigned Reg; 1353 if (VT == MVT::i8) { 1354 Reg = CCInfo.AllocateReg(RegList8[RegIdx]); 1355 } else if (VT == MVT::i16) { 1356 Reg = CCInfo.AllocateReg(RegList16[RegIdx]); 1357 } else { 1358 llvm_unreachable("calling convention can only manage i8 and i16 types"); 1359 } 1360 assert(Reg && "register not available in calling convention"); 1361 CCInfo.addLoc(CCValAssign::getReg(i, VT, Reg, VT, CCValAssign::Full)); 1362 // Registers sort in increasing order 1363 RegIdx -= VT.getStoreSize(); 1364 } 1365 } 1366 1367 SDValue AVRTargetLowering::LowerFormalArguments( 1368 SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 1369 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 1370 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 1371 MachineFunction &MF = DAG.getMachineFunction(); 1372 MachineFrameInfo &MFI = MF.getFrameInfo(); 1373 auto DL = DAG.getDataLayout(); 1374 1375 // Assign locations to all of the incoming arguments. 1376 SmallVector<CCValAssign, 16> ArgLocs; 1377 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, 1378 *DAG.getContext()); 1379 1380 // Variadic functions do not need all the analysis below. 1381 if (isVarArg) { 1382 CCInfo.AnalyzeFormalArguments(Ins, ArgCC_AVR_Vararg); 1383 } else { 1384 analyzeArguments(nullptr, &MF.getFunction(), &DL, Ins, ArgLocs, CCInfo, 1385 Subtarget.hasTinyEncoding()); 1386 } 1387 1388 SDValue ArgValue; 1389 for (CCValAssign &VA : ArgLocs) { 1390 1391 // Arguments stored on registers. 1392 if (VA.isRegLoc()) { 1393 EVT RegVT = VA.getLocVT(); 1394 const TargetRegisterClass *RC; 1395 if (RegVT == MVT::i8) { 1396 RC = &AVR::GPR8RegClass; 1397 } else if (RegVT == MVT::i16) { 1398 RC = &AVR::DREGSRegClass; 1399 } else { 1400 llvm_unreachable("Unknown argument type!"); 1401 } 1402 1403 Register Reg = MF.addLiveIn(VA.getLocReg(), RC); 1404 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); 1405 1406 // :NOTE: Clang should not promote any i8 into i16 but for safety the 1407 // following code will handle zexts or sexts generated by other 1408 // front ends. Otherwise: 1409 // If this is an 8 bit value, it is really passed promoted 1410 // to 16 bits. Insert an assert[sz]ext to capture this, then 1411 // truncate to the right size. 1412 switch (VA.getLocInfo()) { 1413 default: 1414 llvm_unreachable("Unknown loc info!"); 1415 case CCValAssign::Full: 1416 break; 1417 case CCValAssign::BCvt: 1418 ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue); 1419 break; 1420 case CCValAssign::SExt: 1421 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, 1422 DAG.getValueType(VA.getValVT())); 1423 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 1424 break; 1425 case CCValAssign::ZExt: 1426 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, 1427 DAG.getValueType(VA.getValVT())); 1428 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 1429 break; 1430 } 1431 1432 InVals.push_back(ArgValue); 1433 } else { 1434 // Only arguments passed on the stack should make it here. 1435 assert(VA.isMemLoc()); 1436 1437 EVT LocVT = VA.getLocVT(); 1438 1439 // Create the frame index object for this incoming parameter. 1440 int FI = MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, 1441 VA.getLocMemOffset(), true); 1442 1443 // Create the SelectionDAG nodes corresponding to a load 1444 // from this parameter. 1445 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DL)); 1446 InVals.push_back(DAG.getLoad(LocVT, dl, Chain, FIN, 1447 MachinePointerInfo::getFixedStack(MF, FI))); 1448 } 1449 } 1450 1451 // If the function takes variable number of arguments, make a frame index for 1452 // the start of the first vararg value... for expansion of llvm.va_start. 1453 if (isVarArg) { 1454 unsigned StackSize = CCInfo.getStackSize(); 1455 AVRMachineFunctionInfo *AFI = MF.getInfo<AVRMachineFunctionInfo>(); 1456 1457 AFI->setVarArgsFrameIndex(MFI.CreateFixedObject(2, StackSize, true)); 1458 } 1459 1460 return Chain; 1461 } 1462 1463 //===----------------------------------------------------------------------===// 1464 // Call Calling Convention Implementation 1465 //===----------------------------------------------------------------------===// 1466 1467 SDValue AVRTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, 1468 SmallVectorImpl<SDValue> &InVals) const { 1469 SelectionDAG &DAG = CLI.DAG; 1470 SDLoc &DL = CLI.DL; 1471 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 1472 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 1473 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 1474 SDValue Chain = CLI.Chain; 1475 SDValue Callee = CLI.Callee; 1476 bool &isTailCall = CLI.IsTailCall; 1477 CallingConv::ID CallConv = CLI.CallConv; 1478 bool isVarArg = CLI.IsVarArg; 1479 1480 MachineFunction &MF = DAG.getMachineFunction(); 1481 1482 // AVR does not yet support tail call optimization. 1483 isTailCall = false; 1484 1485 // Analyze operands of the call, assigning locations to each operand. 1486 SmallVector<CCValAssign, 16> ArgLocs; 1487 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, 1488 *DAG.getContext()); 1489 1490 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every 1491 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol 1492 // node so that legalize doesn't hack it. 1493 const Function *F = nullptr; 1494 if (const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1495 const GlobalValue *GV = G->getGlobal(); 1496 if (isa<Function>(GV)) 1497 F = cast<Function>(GV); 1498 Callee = 1499 DAG.getTargetGlobalAddress(GV, DL, getPointerTy(DAG.getDataLayout())); 1500 } else if (const ExternalSymbolSDNode *ES = 1501 dyn_cast<ExternalSymbolSDNode>(Callee)) { 1502 Callee = DAG.getTargetExternalSymbol(ES->getSymbol(), 1503 getPointerTy(DAG.getDataLayout())); 1504 } 1505 1506 // Variadic functions do not need all the analysis below. 1507 if (isVarArg) { 1508 CCInfo.AnalyzeCallOperands(Outs, ArgCC_AVR_Vararg); 1509 } else { 1510 analyzeArguments(&CLI, F, &DAG.getDataLayout(), Outs, ArgLocs, CCInfo, 1511 Subtarget.hasTinyEncoding()); 1512 } 1513 1514 // Get a count of how many bytes are to be pushed on the stack. 1515 unsigned NumBytes = CCInfo.getStackSize(); 1516 1517 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL); 1518 1519 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 1520 1521 // First, walk the register assignments, inserting copies. 1522 unsigned AI, AE; 1523 bool HasStackArgs = false; 1524 for (AI = 0, AE = ArgLocs.size(); AI != AE; ++AI) { 1525 CCValAssign &VA = ArgLocs[AI]; 1526 EVT RegVT = VA.getLocVT(); 1527 SDValue Arg = OutVals[AI]; 1528 1529 // Promote the value if needed. With Clang this should not happen. 1530 switch (VA.getLocInfo()) { 1531 default: 1532 llvm_unreachable("Unknown loc info!"); 1533 case CCValAssign::Full: 1534 break; 1535 case CCValAssign::SExt: 1536 Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, RegVT, Arg); 1537 break; 1538 case CCValAssign::ZExt: 1539 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, RegVT, Arg); 1540 break; 1541 case CCValAssign::AExt: 1542 Arg = DAG.getNode(ISD::ANY_EXTEND, DL, RegVT, Arg); 1543 break; 1544 case CCValAssign::BCvt: 1545 Arg = DAG.getNode(ISD::BITCAST, DL, RegVT, Arg); 1546 break; 1547 } 1548 1549 // Stop when we encounter a stack argument, we need to process them 1550 // in reverse order in the loop below. 1551 if (VA.isMemLoc()) { 1552 HasStackArgs = true; 1553 break; 1554 } 1555 1556 // Arguments that can be passed on registers must be kept in the RegsToPass 1557 // vector. 1558 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1559 } 1560 1561 // Second, stack arguments have to walked. 1562 // Previously this code created chained stores but those chained stores appear 1563 // to be unchained in the legalization phase. Therefore, do not attempt to 1564 // chain them here. In fact, chaining them here somehow causes the first and 1565 // second store to be reversed which is the exact opposite of the intended 1566 // effect. 1567 if (HasStackArgs) { 1568 SmallVector<SDValue, 8> MemOpChains; 1569 for (; AI != AE; AI++) { 1570 CCValAssign &VA = ArgLocs[AI]; 1571 SDValue Arg = OutVals[AI]; 1572 1573 assert(VA.isMemLoc()); 1574 1575 // SP points to one stack slot further so add one to adjust it. 1576 SDValue PtrOff = DAG.getNode( 1577 ISD::ADD, DL, getPointerTy(DAG.getDataLayout()), 1578 DAG.getRegister(AVR::SP, getPointerTy(DAG.getDataLayout())), 1579 DAG.getIntPtrConstant(VA.getLocMemOffset() + 1, DL)); 1580 1581 MemOpChains.push_back( 1582 DAG.getStore(Chain, DL, Arg, PtrOff, 1583 MachinePointerInfo::getStack(MF, VA.getLocMemOffset()))); 1584 } 1585 1586 if (!MemOpChains.empty()) 1587 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); 1588 } 1589 1590 // Build a sequence of copy-to-reg nodes chained together with token chain and 1591 // flag operands which copy the outgoing args into registers. The InGlue in 1592 // necessary since all emited instructions must be stuck together. 1593 SDValue InGlue; 1594 for (auto Reg : RegsToPass) { 1595 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, InGlue); 1596 InGlue = Chain.getValue(1); 1597 } 1598 1599 // Returns a chain & a flag for retval copy to use. 1600 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 1601 SmallVector<SDValue, 8> Ops; 1602 Ops.push_back(Chain); 1603 Ops.push_back(Callee); 1604 1605 // Add argument registers to the end of the list so that they are known live 1606 // into the call. 1607 for (auto Reg : RegsToPass) { 1608 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); 1609 } 1610 1611 // The zero register (usually R1) must be passed as an implicit register so 1612 // that this register is correctly zeroed in interrupts. 1613 Ops.push_back(DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8)); 1614 1615 // Add a register mask operand representing the call-preserved registers. 1616 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 1617 const uint32_t *Mask = 1618 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv); 1619 assert(Mask && "Missing call preserved mask for calling convention"); 1620 Ops.push_back(DAG.getRegisterMask(Mask)); 1621 1622 if (InGlue.getNode()) { 1623 Ops.push_back(InGlue); 1624 } 1625 1626 Chain = DAG.getNode(AVRISD::CALL, DL, NodeTys, Ops); 1627 InGlue = Chain.getValue(1); 1628 1629 // Create the CALLSEQ_END node. 1630 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, DL); 1631 1632 if (!Ins.empty()) { 1633 InGlue = Chain.getValue(1); 1634 } 1635 1636 // Handle result values, copying them out of physregs into vregs that we 1637 // return. 1638 return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, DL, DAG, 1639 InVals); 1640 } 1641 1642 /// Lower the result values of a call into the 1643 /// appropriate copies out of appropriate physical registers. 1644 /// 1645 SDValue AVRTargetLowering::LowerCallResult( 1646 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg, 1647 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, 1648 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 1649 1650 // Assign locations to each value returned by this call. 1651 SmallVector<CCValAssign, 16> RVLocs; 1652 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, 1653 *DAG.getContext()); 1654 1655 // Handle runtime calling convs. 1656 if (CallConv == CallingConv::AVR_BUILTIN) { 1657 CCInfo.AnalyzeCallResult(Ins, RetCC_AVR_BUILTIN); 1658 } else { 1659 analyzeReturnValues(Ins, CCInfo, Subtarget.hasTinyEncoding()); 1660 } 1661 1662 // Copy all of the result registers out of their specified physreg. 1663 for (CCValAssign const &RVLoc : RVLocs) { 1664 Chain = DAG.getCopyFromReg(Chain, dl, RVLoc.getLocReg(), RVLoc.getValVT(), 1665 InGlue) 1666 .getValue(1); 1667 InGlue = Chain.getValue(2); 1668 InVals.push_back(Chain.getValue(0)); 1669 } 1670 1671 return Chain; 1672 } 1673 1674 //===----------------------------------------------------------------------===// 1675 // Return Value Calling Convention Implementation 1676 //===----------------------------------------------------------------------===// 1677 1678 bool AVRTargetLowering::CanLowerReturn( 1679 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, 1680 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { 1681 if (CallConv == CallingConv::AVR_BUILTIN) { 1682 SmallVector<CCValAssign, 16> RVLocs; 1683 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); 1684 return CCInfo.CheckReturn(Outs, RetCC_AVR_BUILTIN); 1685 } 1686 1687 unsigned TotalBytes = getTotalArgumentsSizeInBytes(Outs); 1688 return TotalBytes <= (unsigned)(Subtarget.hasTinyEncoding() ? 4 : 8); 1689 } 1690 1691 SDValue 1692 AVRTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 1693 bool isVarArg, 1694 const SmallVectorImpl<ISD::OutputArg> &Outs, 1695 const SmallVectorImpl<SDValue> &OutVals, 1696 const SDLoc &dl, SelectionDAG &DAG) const { 1697 // CCValAssign - represent the assignment of the return value to locations. 1698 SmallVector<CCValAssign, 16> RVLocs; 1699 1700 // CCState - Info about the registers and stack slot. 1701 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, 1702 *DAG.getContext()); 1703 1704 MachineFunction &MF = DAG.getMachineFunction(); 1705 1706 // Analyze return values. 1707 if (CallConv == CallingConv::AVR_BUILTIN) { 1708 CCInfo.AnalyzeReturn(Outs, RetCC_AVR_BUILTIN); 1709 } else { 1710 analyzeReturnValues(Outs, CCInfo, Subtarget.hasTinyEncoding()); 1711 } 1712 1713 SDValue Glue; 1714 SmallVector<SDValue, 4> RetOps(1, Chain); 1715 // Copy the result values into the output registers. 1716 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { 1717 CCValAssign &VA = RVLocs[i]; 1718 assert(VA.isRegLoc() && "Can only return in registers!"); 1719 1720 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Glue); 1721 1722 // Guarantee that all emitted copies are stuck together with flags. 1723 Glue = Chain.getValue(1); 1724 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 1725 } 1726 1727 // Don't emit the ret/reti instruction when the naked attribute is present in 1728 // the function being compiled. 1729 if (MF.getFunction().getAttributes().hasFnAttr(Attribute::Naked)) { 1730 return Chain; 1731 } 1732 1733 const AVRMachineFunctionInfo *AFI = MF.getInfo<AVRMachineFunctionInfo>(); 1734 1735 if (!AFI->isInterruptOrSignalHandler()) { 1736 // The return instruction has an implicit zero register operand: it must 1737 // contain zero on return. 1738 // This is not needed in interrupts however, where the zero register is 1739 // handled specially (only pushed/popped when needed). 1740 RetOps.push_back(DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8)); 1741 } 1742 1743 unsigned RetOpc = 1744 AFI->isInterruptOrSignalHandler() ? AVRISD::RETI_GLUE : AVRISD::RET_GLUE; 1745 1746 RetOps[0] = Chain; // Update chain. 1747 1748 if (Glue.getNode()) { 1749 RetOps.push_back(Glue); 1750 } 1751 1752 return DAG.getNode(RetOpc, dl, MVT::Other, RetOps); 1753 } 1754 1755 //===----------------------------------------------------------------------===// 1756 // Custom Inserters 1757 //===----------------------------------------------------------------------===// 1758 1759 MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI, 1760 MachineBasicBlock *BB, 1761 bool Tiny) const { 1762 unsigned Opc; 1763 const TargetRegisterClass *RC; 1764 bool HasRepeatedOperand = false; 1765 MachineFunction *F = BB->getParent(); 1766 MachineRegisterInfo &RI = F->getRegInfo(); 1767 const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); 1768 DebugLoc dl = MI.getDebugLoc(); 1769 1770 switch (MI.getOpcode()) { 1771 default: 1772 llvm_unreachable("Invalid shift opcode!"); 1773 case AVR::Lsl8: 1774 Opc = AVR::ADDRdRr; // LSL is an alias of ADD Rd, Rd 1775 RC = &AVR::GPR8RegClass; 1776 HasRepeatedOperand = true; 1777 break; 1778 case AVR::Lsl16: 1779 Opc = AVR::LSLWRd; 1780 RC = &AVR::DREGSRegClass; 1781 break; 1782 case AVR::Asr8: 1783 Opc = AVR::ASRRd; 1784 RC = &AVR::GPR8RegClass; 1785 break; 1786 case AVR::Asr16: 1787 Opc = AVR::ASRWRd; 1788 RC = &AVR::DREGSRegClass; 1789 break; 1790 case AVR::Lsr8: 1791 Opc = AVR::LSRRd; 1792 RC = &AVR::GPR8RegClass; 1793 break; 1794 case AVR::Lsr16: 1795 Opc = AVR::LSRWRd; 1796 RC = &AVR::DREGSRegClass; 1797 break; 1798 case AVR::Rol8: 1799 Opc = Tiny ? AVR::ROLBRdR17 : AVR::ROLBRdR1; 1800 RC = &AVR::GPR8RegClass; 1801 break; 1802 case AVR::Rol16: 1803 Opc = AVR::ROLWRd; 1804 RC = &AVR::DREGSRegClass; 1805 break; 1806 case AVR::Ror8: 1807 Opc = AVR::RORBRd; 1808 RC = &AVR::GPR8RegClass; 1809 break; 1810 case AVR::Ror16: 1811 Opc = AVR::RORWRd; 1812 RC = &AVR::DREGSRegClass; 1813 break; 1814 } 1815 1816 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 1817 1818 MachineFunction::iterator I; 1819 for (I = BB->getIterator(); I != F->end() && &(*I) != BB; ++I) 1820 ; 1821 if (I != F->end()) 1822 ++I; 1823 1824 // Create loop block. 1825 MachineBasicBlock *LoopBB = F->CreateMachineBasicBlock(LLVM_BB); 1826 MachineBasicBlock *CheckBB = F->CreateMachineBasicBlock(LLVM_BB); 1827 MachineBasicBlock *RemBB = F->CreateMachineBasicBlock(LLVM_BB); 1828 1829 F->insert(I, LoopBB); 1830 F->insert(I, CheckBB); 1831 F->insert(I, RemBB); 1832 1833 // Update machine-CFG edges by transferring all successors of the current 1834 // block to the block containing instructions after shift. 1835 RemBB->splice(RemBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), 1836 BB->end()); 1837 RemBB->transferSuccessorsAndUpdatePHIs(BB); 1838 1839 // Add edges BB => LoopBB => CheckBB => RemBB, CheckBB => LoopBB. 1840 BB->addSuccessor(CheckBB); 1841 LoopBB->addSuccessor(CheckBB); 1842 CheckBB->addSuccessor(LoopBB); 1843 CheckBB->addSuccessor(RemBB); 1844 1845 Register ShiftAmtReg = RI.createVirtualRegister(&AVR::GPR8RegClass); 1846 Register ShiftAmtReg2 = RI.createVirtualRegister(&AVR::GPR8RegClass); 1847 Register ShiftReg = RI.createVirtualRegister(RC); 1848 Register ShiftReg2 = RI.createVirtualRegister(RC); 1849 Register ShiftAmtSrcReg = MI.getOperand(2).getReg(); 1850 Register SrcReg = MI.getOperand(1).getReg(); 1851 Register DstReg = MI.getOperand(0).getReg(); 1852 1853 // BB: 1854 // rjmp CheckBB 1855 BuildMI(BB, dl, TII.get(AVR::RJMPk)).addMBB(CheckBB); 1856 1857 // LoopBB: 1858 // ShiftReg2 = shift ShiftReg 1859 auto ShiftMI = BuildMI(LoopBB, dl, TII.get(Opc), ShiftReg2).addReg(ShiftReg); 1860 if (HasRepeatedOperand) 1861 ShiftMI.addReg(ShiftReg); 1862 1863 // CheckBB: 1864 // ShiftReg = phi [%SrcReg, BB], [%ShiftReg2, LoopBB] 1865 // ShiftAmt = phi [%N, BB], [%ShiftAmt2, LoopBB] 1866 // DestReg = phi [%SrcReg, BB], [%ShiftReg, LoopBB] 1867 // ShiftAmt2 = ShiftAmt - 1; 1868 // if (ShiftAmt2 >= 0) goto LoopBB; 1869 BuildMI(CheckBB, dl, TII.get(AVR::PHI), ShiftReg) 1870 .addReg(SrcReg) 1871 .addMBB(BB) 1872 .addReg(ShiftReg2) 1873 .addMBB(LoopBB); 1874 BuildMI(CheckBB, dl, TII.get(AVR::PHI), ShiftAmtReg) 1875 .addReg(ShiftAmtSrcReg) 1876 .addMBB(BB) 1877 .addReg(ShiftAmtReg2) 1878 .addMBB(LoopBB); 1879 BuildMI(CheckBB, dl, TII.get(AVR::PHI), DstReg) 1880 .addReg(SrcReg) 1881 .addMBB(BB) 1882 .addReg(ShiftReg2) 1883 .addMBB(LoopBB); 1884 1885 BuildMI(CheckBB, dl, TII.get(AVR::DECRd), ShiftAmtReg2).addReg(ShiftAmtReg); 1886 BuildMI(CheckBB, dl, TII.get(AVR::BRPLk)).addMBB(LoopBB); 1887 1888 MI.eraseFromParent(); // The pseudo instruction is gone now. 1889 return RemBB; 1890 } 1891 1892 // Do a multibyte AVR shift. Insert shift instructions and put the output 1893 // registers in the Regs array. 1894 // Because AVR does not have a normal shift instruction (only a single bit shift 1895 // instruction), we have to emulate this behavior with other instructions. 1896 // It first tries large steps (moving registers around) and then smaller steps 1897 // like single bit shifts. 1898 // Large shifts actually reduce the number of shifted registers, so the below 1899 // algorithms have to work independently of the number of registers that are 1900 // shifted. 1901 // For more information and background, see this blogpost: 1902 // https://aykevl.nl/2021/02/avr-bitshift 1903 static void insertMultibyteShift(MachineInstr &MI, MachineBasicBlock *BB, 1904 MutableArrayRef<std::pair<Register, int>> Regs, 1905 ISD::NodeType Opc, int64_t ShiftAmt) { 1906 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); 1907 const AVRSubtarget &STI = BB->getParent()->getSubtarget<AVRSubtarget>(); 1908 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 1909 const DebugLoc &dl = MI.getDebugLoc(); 1910 1911 const bool ShiftLeft = Opc == ISD::SHL; 1912 const bool ArithmeticShift = Opc == ISD::SRA; 1913 1914 // Zero a register, for use in later operations. 1915 Register ZeroReg = MRI.createVirtualRegister(&AVR::GPR8RegClass); 1916 BuildMI(*BB, MI, dl, TII.get(AVR::COPY), ZeroReg) 1917 .addReg(STI.getZeroRegister()); 1918 1919 // Do a shift modulo 6 or 7. This is a bit more complicated than most shifts 1920 // and is hard to compose with the rest, so these are special cased. 1921 // The basic idea is to shift one or two bits in the opposite direction and 1922 // then move registers around to get the correct end result. 1923 if (ShiftLeft && (ShiftAmt % 8) >= 6) { 1924 // Left shift modulo 6 or 7. 1925 1926 // Create a slice of the registers we're going to modify, to ease working 1927 // with them. 1928 size_t ShiftRegsOffset = ShiftAmt / 8; 1929 size_t ShiftRegsSize = Regs.size() - ShiftRegsOffset; 1930 MutableArrayRef<std::pair<Register, int>> ShiftRegs = 1931 Regs.slice(ShiftRegsOffset, ShiftRegsSize); 1932 1933 // Shift one to the right, keeping the least significant bit as the carry 1934 // bit. 1935 insertMultibyteShift(MI, BB, ShiftRegs, ISD::SRL, 1); 1936 1937 // Rotate the least significant bit from the carry bit into a new register 1938 // (that starts out zero). 1939 Register LowByte = MRI.createVirtualRegister(&AVR::GPR8RegClass); 1940 BuildMI(*BB, MI, dl, TII.get(AVR::RORRd), LowByte).addReg(ZeroReg); 1941 1942 // Shift one more to the right if this is a modulo-6 shift. 1943 if (ShiftAmt % 8 == 6) { 1944 insertMultibyteShift(MI, BB, ShiftRegs, ISD::SRL, 1); 1945 Register NewLowByte = MRI.createVirtualRegister(&AVR::GPR8RegClass); 1946 BuildMI(*BB, MI, dl, TII.get(AVR::RORRd), NewLowByte).addReg(LowByte); 1947 LowByte = NewLowByte; 1948 } 1949 1950 // Move all registers to the left, zeroing the bottom registers as needed. 1951 for (size_t I = 0; I < Regs.size(); I++) { 1952 int ShiftRegsIdx = I + 1; 1953 if (ShiftRegsIdx < (int)ShiftRegs.size()) { 1954 Regs[I] = ShiftRegs[ShiftRegsIdx]; 1955 } else if (ShiftRegsIdx == (int)ShiftRegs.size()) { 1956 Regs[I] = std::pair(LowByte, 0); 1957 } else { 1958 Regs[I] = std::pair(ZeroReg, 0); 1959 } 1960 } 1961 1962 return; 1963 } 1964 1965 // Right shift modulo 6 or 7. 1966 if (!ShiftLeft && (ShiftAmt % 8) >= 6) { 1967 // Create a view on the registers we're going to modify, to ease working 1968 // with them. 1969 size_t ShiftRegsSize = Regs.size() - (ShiftAmt / 8); 1970 MutableArrayRef<std::pair<Register, int>> ShiftRegs = 1971 Regs.slice(0, ShiftRegsSize); 1972 1973 // Shift one to the left. 1974 insertMultibyteShift(MI, BB, ShiftRegs, ISD::SHL, 1); 1975 1976 // Sign or zero extend the most significant register into a new register. 1977 // The HighByte is the byte that still has one (or two) bits from the 1978 // original value. The ExtByte is purely a zero/sign extend byte (all bits 1979 // are either 0 or 1). 1980 Register HighByte = MRI.createVirtualRegister(&AVR::GPR8RegClass); 1981 Register ExtByte = 0; 1982 if (ArithmeticShift) { 1983 // Sign-extend bit that was shifted out last. 1984 BuildMI(*BB, MI, dl, TII.get(AVR::SBCRdRr), HighByte) 1985 .addReg(HighByte, RegState::Undef) 1986 .addReg(HighByte, RegState::Undef); 1987 ExtByte = HighByte; 1988 // The highest bit of the original value is the same as the zero-extend 1989 // byte, so HighByte and ExtByte are the same. 1990 } else { 1991 // Use the zero register for zero extending. 1992 ExtByte = ZeroReg; 1993 // Rotate most significant bit into a new register (that starts out zero). 1994 BuildMI(*BB, MI, dl, TII.get(AVR::ADCRdRr), HighByte) 1995 .addReg(ExtByte) 1996 .addReg(ExtByte); 1997 } 1998 1999 // Shift one more to the left for modulo 6 shifts. 2000 if (ShiftAmt % 8 == 6) { 2001 insertMultibyteShift(MI, BB, ShiftRegs, ISD::SHL, 1); 2002 // Shift the topmost bit into the HighByte. 2003 Register NewExt = MRI.createVirtualRegister(&AVR::GPR8RegClass); 2004 BuildMI(*BB, MI, dl, TII.get(AVR::ADCRdRr), NewExt) 2005 .addReg(HighByte) 2006 .addReg(HighByte); 2007 HighByte = NewExt; 2008 } 2009 2010 // Move all to the right, while sign or zero extending. 2011 for (int I = Regs.size() - 1; I >= 0; I--) { 2012 int ShiftRegsIdx = I - (Regs.size() - ShiftRegs.size()) - 1; 2013 if (ShiftRegsIdx >= 0) { 2014 Regs[I] = ShiftRegs[ShiftRegsIdx]; 2015 } else if (ShiftRegsIdx == -1) { 2016 Regs[I] = std::pair(HighByte, 0); 2017 } else { 2018 Regs[I] = std::pair(ExtByte, 0); 2019 } 2020 } 2021 2022 return; 2023 } 2024 2025 // For shift amounts of at least one register, simply rename the registers and 2026 // zero the bottom registers. 2027 while (ShiftLeft && ShiftAmt >= 8) { 2028 // Move all registers one to the left. 2029 for (size_t I = 0; I < Regs.size() - 1; I++) { 2030 Regs[I] = Regs[I + 1]; 2031 } 2032 2033 // Zero the least significant register. 2034 Regs[Regs.size() - 1] = std::pair(ZeroReg, 0); 2035 2036 // Continue shifts with the leftover registers. 2037 Regs = Regs.drop_back(1); 2038 2039 ShiftAmt -= 8; 2040 } 2041 2042 // And again, the same for right shifts. 2043 Register ShrExtendReg = 0; 2044 if (!ShiftLeft && ShiftAmt >= 8) { 2045 if (ArithmeticShift) { 2046 // Sign extend the most significant register into ShrExtendReg. 2047 ShrExtendReg = MRI.createVirtualRegister(&AVR::GPR8RegClass); 2048 Register Tmp = MRI.createVirtualRegister(&AVR::GPR8RegClass); 2049 BuildMI(*BB, MI, dl, TII.get(AVR::ADDRdRr), Tmp) 2050 .addReg(Regs[0].first, 0, Regs[0].second) 2051 .addReg(Regs[0].first, 0, Regs[0].second); 2052 BuildMI(*BB, MI, dl, TII.get(AVR::SBCRdRr), ShrExtendReg) 2053 .addReg(Tmp) 2054 .addReg(Tmp); 2055 } else { 2056 ShrExtendReg = ZeroReg; 2057 } 2058 for (; ShiftAmt >= 8; ShiftAmt -= 8) { 2059 // Move all registers one to the right. 2060 for (size_t I = Regs.size() - 1; I != 0; I--) { 2061 Regs[I] = Regs[I - 1]; 2062 } 2063 2064 // Zero or sign extend the most significant register. 2065 Regs[0] = std::pair(ShrExtendReg, 0); 2066 2067 // Continue shifts with the leftover registers. 2068 Regs = Regs.drop_front(1); 2069 } 2070 } 2071 2072 // The bigger shifts are already handled above. 2073 assert((ShiftAmt < 8) && "Unexpect shift amount"); 2074 2075 // Shift by four bits, using a complicated swap/eor/andi/eor sequence. 2076 // It only works for logical shifts because the bits shifted in are all 2077 // zeroes. 2078 // To shift a single byte right, it produces code like this: 2079 // swap r0 2080 // andi r0, 0x0f 2081 // For a two-byte (16-bit) shift, it adds the following instructions to shift 2082 // the upper byte into the lower byte: 2083 // swap r1 2084 // eor r0, r1 2085 // andi r1, 0x0f 2086 // eor r0, r1 2087 // For bigger shifts, it repeats the above sequence. For example, for a 3-byte 2088 // (24-bit) shift it adds: 2089 // swap r2 2090 // eor r1, r2 2091 // andi r2, 0x0f 2092 // eor r1, r2 2093 if (!ArithmeticShift && ShiftAmt >= 4) { 2094 Register Prev = 0; 2095 for (size_t I = 0; I < Regs.size(); I++) { 2096 size_t Idx = ShiftLeft ? I : Regs.size() - I - 1; 2097 Register SwapReg = MRI.createVirtualRegister(&AVR::LD8RegClass); 2098 BuildMI(*BB, MI, dl, TII.get(AVR::SWAPRd), SwapReg) 2099 .addReg(Regs[Idx].first, 0, Regs[Idx].second); 2100 if (I != 0) { 2101 Register R = MRI.createVirtualRegister(&AVR::GPR8RegClass); 2102 BuildMI(*BB, MI, dl, TII.get(AVR::EORRdRr), R) 2103 .addReg(Prev) 2104 .addReg(SwapReg); 2105 Prev = R; 2106 } 2107 Register AndReg = MRI.createVirtualRegister(&AVR::LD8RegClass); 2108 BuildMI(*BB, MI, dl, TII.get(AVR::ANDIRdK), AndReg) 2109 .addReg(SwapReg) 2110 .addImm(ShiftLeft ? 0xf0 : 0x0f); 2111 if (I != 0) { 2112 Register R = MRI.createVirtualRegister(&AVR::GPR8RegClass); 2113 BuildMI(*BB, MI, dl, TII.get(AVR::EORRdRr), R) 2114 .addReg(Prev) 2115 .addReg(AndReg); 2116 size_t PrevIdx = ShiftLeft ? Idx - 1 : Idx + 1; 2117 Regs[PrevIdx] = std::pair(R, 0); 2118 } 2119 Prev = AndReg; 2120 Regs[Idx] = std::pair(AndReg, 0); 2121 } 2122 ShiftAmt -= 4; 2123 } 2124 2125 // Shift by one. This is the fallback that always works, and the shift 2126 // operation that is used for 1, 2, and 3 bit shifts. 2127 while (ShiftLeft && ShiftAmt) { 2128 // Shift one to the left. 2129 for (ssize_t I = Regs.size() - 1; I >= 0; I--) { 2130 Register Out = MRI.createVirtualRegister(&AVR::GPR8RegClass); 2131 Register In = Regs[I].first; 2132 Register InSubreg = Regs[I].second; 2133 if (I == (ssize_t)Regs.size() - 1) { // first iteration 2134 BuildMI(*BB, MI, dl, TII.get(AVR::ADDRdRr), Out) 2135 .addReg(In, 0, InSubreg) 2136 .addReg(In, 0, InSubreg); 2137 } else { 2138 BuildMI(*BB, MI, dl, TII.get(AVR::ADCRdRr), Out) 2139 .addReg(In, 0, InSubreg) 2140 .addReg(In, 0, InSubreg); 2141 } 2142 Regs[I] = std::pair(Out, 0); 2143 } 2144 ShiftAmt--; 2145 } 2146 while (!ShiftLeft && ShiftAmt) { 2147 // Shift one to the right. 2148 for (size_t I = 0; I < Regs.size(); I++) { 2149 Register Out = MRI.createVirtualRegister(&AVR::GPR8RegClass); 2150 Register In = Regs[I].first; 2151 Register InSubreg = Regs[I].second; 2152 if (I == 0) { 2153 unsigned Opc = ArithmeticShift ? AVR::ASRRd : AVR::LSRRd; 2154 BuildMI(*BB, MI, dl, TII.get(Opc), Out).addReg(In, 0, InSubreg); 2155 } else { 2156 BuildMI(*BB, MI, dl, TII.get(AVR::RORRd), Out).addReg(In, 0, InSubreg); 2157 } 2158 Regs[I] = std::pair(Out, 0); 2159 } 2160 ShiftAmt--; 2161 } 2162 2163 if (ShiftAmt != 0) { 2164 llvm_unreachable("don't know how to shift!"); // sanity check 2165 } 2166 } 2167 2168 // Do a wide (32-bit) shift. 2169 MachineBasicBlock * 2170 AVRTargetLowering::insertWideShift(MachineInstr &MI, 2171 MachineBasicBlock *BB) const { 2172 const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); 2173 const DebugLoc &dl = MI.getDebugLoc(); 2174 2175 // How much to shift to the right (meaning: a negative number indicates a left 2176 // shift). 2177 int64_t ShiftAmt = MI.getOperand(4).getImm(); 2178 ISD::NodeType Opc; 2179 switch (MI.getOpcode()) { 2180 case AVR::Lsl32: 2181 Opc = ISD::SHL; 2182 break; 2183 case AVR::Lsr32: 2184 Opc = ISD::SRL; 2185 break; 2186 case AVR::Asr32: 2187 Opc = ISD::SRA; 2188 break; 2189 } 2190 2191 // Read the input registers, with the most significant register at index 0. 2192 std::array<std::pair<Register, int>, 4> Registers = { 2193 std::pair(MI.getOperand(3).getReg(), AVR::sub_hi), 2194 std::pair(MI.getOperand(3).getReg(), AVR::sub_lo), 2195 std::pair(MI.getOperand(2).getReg(), AVR::sub_hi), 2196 std::pair(MI.getOperand(2).getReg(), AVR::sub_lo), 2197 }; 2198 2199 // Do the shift. The registers are modified in-place. 2200 insertMultibyteShift(MI, BB, Registers, Opc, ShiftAmt); 2201 2202 // Combine the 8-bit registers into 16-bit register pairs. 2203 // This done either from LSB to MSB or from MSB to LSB, depending on the 2204 // shift. It's an optimization so that the register allocator will use the 2205 // fewest movs possible (which order we use isn't a correctness issue, just an 2206 // optimization issue). 2207 // - lsl prefers starting from the most significant byte (2nd case). 2208 // - lshr prefers starting from the least significant byte (1st case). 2209 // - for ashr it depends on the number of shifted bytes. 2210 // Some shift operations still don't get the most optimal mov sequences even 2211 // with this distinction. TODO: figure out why and try to fix it (but we're 2212 // already equal to or faster than avr-gcc in all cases except ashr 8). 2213 if (Opc != ISD::SHL && 2214 (Opc != ISD::SRA || (ShiftAmt < 16 || ShiftAmt >= 22))) { 2215 // Use the resulting registers starting with the least significant byte. 2216 BuildMI(*BB, MI, dl, TII.get(AVR::REG_SEQUENCE), MI.getOperand(0).getReg()) 2217 .addReg(Registers[3].first, 0, Registers[3].second) 2218 .addImm(AVR::sub_lo) 2219 .addReg(Registers[2].first, 0, Registers[2].second) 2220 .addImm(AVR::sub_hi); 2221 BuildMI(*BB, MI, dl, TII.get(AVR::REG_SEQUENCE), MI.getOperand(1).getReg()) 2222 .addReg(Registers[1].first, 0, Registers[1].second) 2223 .addImm(AVR::sub_lo) 2224 .addReg(Registers[0].first, 0, Registers[0].second) 2225 .addImm(AVR::sub_hi); 2226 } else { 2227 // Use the resulting registers starting with the most significant byte. 2228 BuildMI(*BB, MI, dl, TII.get(AVR::REG_SEQUENCE), MI.getOperand(1).getReg()) 2229 .addReg(Registers[0].first, 0, Registers[0].second) 2230 .addImm(AVR::sub_hi) 2231 .addReg(Registers[1].first, 0, Registers[1].second) 2232 .addImm(AVR::sub_lo); 2233 BuildMI(*BB, MI, dl, TII.get(AVR::REG_SEQUENCE), MI.getOperand(0).getReg()) 2234 .addReg(Registers[2].first, 0, Registers[2].second) 2235 .addImm(AVR::sub_hi) 2236 .addReg(Registers[3].first, 0, Registers[3].second) 2237 .addImm(AVR::sub_lo); 2238 } 2239 2240 // Remove the pseudo instruction. 2241 MI.eraseFromParent(); 2242 return BB; 2243 } 2244 2245 static bool isCopyMulResult(MachineBasicBlock::iterator const &I) { 2246 if (I->getOpcode() == AVR::COPY) { 2247 Register SrcReg = I->getOperand(1).getReg(); 2248 return (SrcReg == AVR::R0 || SrcReg == AVR::R1); 2249 } 2250 2251 return false; 2252 } 2253 2254 // The mul instructions wreak havock on our zero_reg R1. We need to clear it 2255 // after the result has been evacuated. This is probably not the best way to do 2256 // it, but it works for now. 2257 MachineBasicBlock *AVRTargetLowering::insertMul(MachineInstr &MI, 2258 MachineBasicBlock *BB) const { 2259 const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); 2260 MachineBasicBlock::iterator I(MI); 2261 ++I; // in any case insert *after* the mul instruction 2262 if (isCopyMulResult(I)) 2263 ++I; 2264 if (isCopyMulResult(I)) 2265 ++I; 2266 BuildMI(*BB, I, MI.getDebugLoc(), TII.get(AVR::EORRdRr), AVR::R1) 2267 .addReg(AVR::R1) 2268 .addReg(AVR::R1); 2269 return BB; 2270 } 2271 2272 // Insert a read from the zero register. 2273 MachineBasicBlock * 2274 AVRTargetLowering::insertCopyZero(MachineInstr &MI, 2275 MachineBasicBlock *BB) const { 2276 const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); 2277 MachineBasicBlock::iterator I(MI); 2278 BuildMI(*BB, I, MI.getDebugLoc(), TII.get(AVR::COPY)) 2279 .add(MI.getOperand(0)) 2280 .addReg(Subtarget.getZeroRegister()); 2281 MI.eraseFromParent(); 2282 return BB; 2283 } 2284 2285 // Lower atomicrmw operation to disable interrupts, do operation, and restore 2286 // interrupts. This works because all AVR microcontrollers are single core. 2287 MachineBasicBlock *AVRTargetLowering::insertAtomicArithmeticOp( 2288 MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode, int Width) const { 2289 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 2290 const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); 2291 MachineBasicBlock::iterator I(MI); 2292 DebugLoc dl = MI.getDebugLoc(); 2293 2294 // Example instruction sequence, for an atomic 8-bit add: 2295 // ldi r25, 5 2296 // in r0, SREG 2297 // cli 2298 // ld r24, X 2299 // add r25, r24 2300 // st X, r25 2301 // out SREG, r0 2302 2303 const TargetRegisterClass *RC = 2304 (Width == 8) ? &AVR::GPR8RegClass : &AVR::DREGSRegClass; 2305 unsigned LoadOpcode = (Width == 8) ? AVR::LDRdPtr : AVR::LDWRdPtr; 2306 unsigned StoreOpcode = (Width == 8) ? AVR::STPtrRr : AVR::STWPtrRr; 2307 2308 // Disable interrupts. 2309 BuildMI(*BB, I, dl, TII.get(AVR::INRdA), Subtarget.getTmpRegister()) 2310 .addImm(Subtarget.getIORegSREG()); 2311 BuildMI(*BB, I, dl, TII.get(AVR::BCLRs)).addImm(7); 2312 2313 // Load the original value. 2314 BuildMI(*BB, I, dl, TII.get(LoadOpcode), MI.getOperand(0).getReg()) 2315 .add(MI.getOperand(1)); 2316 2317 // Do the arithmetic operation. 2318 Register Result = MRI.createVirtualRegister(RC); 2319 BuildMI(*BB, I, dl, TII.get(Opcode), Result) 2320 .addReg(MI.getOperand(0).getReg()) 2321 .add(MI.getOperand(2)); 2322 2323 // Store the result. 2324 BuildMI(*BB, I, dl, TII.get(StoreOpcode)) 2325 .add(MI.getOperand(1)) 2326 .addReg(Result); 2327 2328 // Restore interrupts. 2329 BuildMI(*BB, I, dl, TII.get(AVR::OUTARr)) 2330 .addImm(Subtarget.getIORegSREG()) 2331 .addReg(Subtarget.getTmpRegister()); 2332 2333 // Remove the pseudo instruction. 2334 MI.eraseFromParent(); 2335 return BB; 2336 } 2337 2338 MachineBasicBlock * 2339 AVRTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 2340 MachineBasicBlock *MBB) const { 2341 int Opc = MI.getOpcode(); 2342 const AVRSubtarget &STI = MBB->getParent()->getSubtarget<AVRSubtarget>(); 2343 2344 // Pseudo shift instructions with a non constant shift amount are expanded 2345 // into a loop. 2346 switch (Opc) { 2347 case AVR::Lsl8: 2348 case AVR::Lsl16: 2349 case AVR::Lsr8: 2350 case AVR::Lsr16: 2351 case AVR::Rol8: 2352 case AVR::Rol16: 2353 case AVR::Ror8: 2354 case AVR::Ror16: 2355 case AVR::Asr8: 2356 case AVR::Asr16: 2357 return insertShift(MI, MBB, STI.hasTinyEncoding()); 2358 case AVR::Lsl32: 2359 case AVR::Lsr32: 2360 case AVR::Asr32: 2361 return insertWideShift(MI, MBB); 2362 case AVR::MULRdRr: 2363 case AVR::MULSRdRr: 2364 return insertMul(MI, MBB); 2365 case AVR::CopyZero: 2366 return insertCopyZero(MI, MBB); 2367 case AVR::AtomicLoadAdd8: 2368 return insertAtomicArithmeticOp(MI, MBB, AVR::ADDRdRr, 8); 2369 case AVR::AtomicLoadAdd16: 2370 return insertAtomicArithmeticOp(MI, MBB, AVR::ADDWRdRr, 16); 2371 case AVR::AtomicLoadSub8: 2372 return insertAtomicArithmeticOp(MI, MBB, AVR::SUBRdRr, 8); 2373 case AVR::AtomicLoadSub16: 2374 return insertAtomicArithmeticOp(MI, MBB, AVR::SUBWRdRr, 16); 2375 case AVR::AtomicLoadAnd8: 2376 return insertAtomicArithmeticOp(MI, MBB, AVR::ANDRdRr, 8); 2377 case AVR::AtomicLoadAnd16: 2378 return insertAtomicArithmeticOp(MI, MBB, AVR::ANDWRdRr, 16); 2379 case AVR::AtomicLoadOr8: 2380 return insertAtomicArithmeticOp(MI, MBB, AVR::ORRdRr, 8); 2381 case AVR::AtomicLoadOr16: 2382 return insertAtomicArithmeticOp(MI, MBB, AVR::ORWRdRr, 16); 2383 case AVR::AtomicLoadXor8: 2384 return insertAtomicArithmeticOp(MI, MBB, AVR::EORRdRr, 8); 2385 case AVR::AtomicLoadXor16: 2386 return insertAtomicArithmeticOp(MI, MBB, AVR::EORWRdRr, 16); 2387 } 2388 2389 assert((Opc == AVR::Select16 || Opc == AVR::Select8) && 2390 "Unexpected instr type to insert"); 2391 2392 const AVRInstrInfo &TII = (const AVRInstrInfo &)*MI.getParent() 2393 ->getParent() 2394 ->getSubtarget() 2395 .getInstrInfo(); 2396 DebugLoc dl = MI.getDebugLoc(); 2397 2398 // To "insert" a SELECT instruction, we insert the diamond 2399 // control-flow pattern. The incoming instruction knows the 2400 // destination vreg to set, the condition code register to branch 2401 // on, the true/false values to select between, and a branch opcode 2402 // to use. 2403 2404 MachineFunction *MF = MBB->getParent(); 2405 const BasicBlock *LLVM_BB = MBB->getBasicBlock(); 2406 MachineBasicBlock *FallThrough = MBB->getFallThrough(); 2407 2408 // If the current basic block falls through to another basic block, 2409 // we must insert an unconditional branch to the fallthrough destination 2410 // if we are to insert basic blocks at the prior fallthrough point. 2411 if (FallThrough != nullptr) { 2412 BuildMI(MBB, dl, TII.get(AVR::RJMPk)).addMBB(FallThrough); 2413 } 2414 2415 MachineBasicBlock *trueMBB = MF->CreateMachineBasicBlock(LLVM_BB); 2416 MachineBasicBlock *falseMBB = MF->CreateMachineBasicBlock(LLVM_BB); 2417 2418 MachineFunction::iterator I; 2419 for (I = MF->begin(); I != MF->end() && &(*I) != MBB; ++I) 2420 ; 2421 if (I != MF->end()) 2422 ++I; 2423 MF->insert(I, trueMBB); 2424 MF->insert(I, falseMBB); 2425 2426 // Set the call frame size on entry to the new basic blocks. 2427 unsigned CallFrameSize = TII.getCallFrameSizeAt(MI); 2428 trueMBB->setCallFrameSize(CallFrameSize); 2429 falseMBB->setCallFrameSize(CallFrameSize); 2430 2431 // Transfer remaining instructions and all successors of the current 2432 // block to the block which will contain the Phi node for the 2433 // select. 2434 trueMBB->splice(trueMBB->begin(), MBB, 2435 std::next(MachineBasicBlock::iterator(MI)), MBB->end()); 2436 trueMBB->transferSuccessorsAndUpdatePHIs(MBB); 2437 2438 AVRCC::CondCodes CC = (AVRCC::CondCodes)MI.getOperand(3).getImm(); 2439 BuildMI(MBB, dl, TII.getBrCond(CC)).addMBB(trueMBB); 2440 BuildMI(MBB, dl, TII.get(AVR::RJMPk)).addMBB(falseMBB); 2441 MBB->addSuccessor(falseMBB); 2442 MBB->addSuccessor(trueMBB); 2443 2444 // Unconditionally flow back to the true block 2445 BuildMI(falseMBB, dl, TII.get(AVR::RJMPk)).addMBB(trueMBB); 2446 falseMBB->addSuccessor(trueMBB); 2447 2448 // Set up the Phi node to determine where we came from 2449 BuildMI(*trueMBB, trueMBB->begin(), dl, TII.get(AVR::PHI), 2450 MI.getOperand(0).getReg()) 2451 .addReg(MI.getOperand(1).getReg()) 2452 .addMBB(MBB) 2453 .addReg(MI.getOperand(2).getReg()) 2454 .addMBB(falseMBB); 2455 2456 MI.eraseFromParent(); // The pseudo instruction is gone now. 2457 return trueMBB; 2458 } 2459 2460 //===----------------------------------------------------------------------===// 2461 // Inline Asm Support 2462 //===----------------------------------------------------------------------===// 2463 2464 AVRTargetLowering::ConstraintType 2465 AVRTargetLowering::getConstraintType(StringRef Constraint) const { 2466 if (Constraint.size() == 1) { 2467 // See http://www.nongnu.org/avr-libc/user-manual/inline_asm.html 2468 switch (Constraint[0]) { 2469 default: 2470 break; 2471 case 'a': // Simple upper registers 2472 case 'b': // Base pointer registers pairs 2473 case 'd': // Upper register 2474 case 'l': // Lower registers 2475 case 'e': // Pointer register pairs 2476 case 'q': // Stack pointer register 2477 case 'r': // Any register 2478 case 'w': // Special upper register pairs 2479 return C_RegisterClass; 2480 case 't': // Temporary register 2481 case 'x': 2482 case 'X': // Pointer register pair X 2483 case 'y': 2484 case 'Y': // Pointer register pair Y 2485 case 'z': 2486 case 'Z': // Pointer register pair Z 2487 return C_Register; 2488 case 'Q': // A memory address based on Y or Z pointer with displacement. 2489 return C_Memory; 2490 case 'G': // Floating point constant 2491 case 'I': // 6-bit positive integer constant 2492 case 'J': // 6-bit negative integer constant 2493 case 'K': // Integer constant (Range: 2) 2494 case 'L': // Integer constant (Range: 0) 2495 case 'M': // 8-bit integer constant 2496 case 'N': // Integer constant (Range: -1) 2497 case 'O': // Integer constant (Range: 8, 16, 24) 2498 case 'P': // Integer constant (Range: 1) 2499 case 'R': // Integer constant (Range: -6 to 5)x 2500 return C_Immediate; 2501 } 2502 } 2503 2504 return TargetLowering::getConstraintType(Constraint); 2505 } 2506 2507 InlineAsm::ConstraintCode 2508 AVRTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const { 2509 // Not sure if this is actually the right thing to do, but we got to do 2510 // *something* [agnat] 2511 switch (ConstraintCode[0]) { 2512 case 'Q': 2513 return InlineAsm::ConstraintCode::Q; 2514 } 2515 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); 2516 } 2517 2518 AVRTargetLowering::ConstraintWeight 2519 AVRTargetLowering::getSingleConstraintMatchWeight( 2520 AsmOperandInfo &info, const char *constraint) const { 2521 ConstraintWeight weight = CW_Invalid; 2522 Value *CallOperandVal = info.CallOperandVal; 2523 2524 // If we don't have a value, we can't do a match, 2525 // but allow it at the lowest weight. 2526 // (this behaviour has been copied from the ARM backend) 2527 if (!CallOperandVal) { 2528 return CW_Default; 2529 } 2530 2531 // Look at the constraint type. 2532 switch (*constraint) { 2533 default: 2534 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); 2535 break; 2536 case 'd': 2537 case 'r': 2538 case 'l': 2539 weight = CW_Register; 2540 break; 2541 case 'a': 2542 case 'b': 2543 case 'e': 2544 case 'q': 2545 case 't': 2546 case 'w': 2547 case 'x': 2548 case 'X': 2549 case 'y': 2550 case 'Y': 2551 case 'z': 2552 case 'Z': 2553 weight = CW_SpecificReg; 2554 break; 2555 case 'G': 2556 if (const ConstantFP *C = dyn_cast<ConstantFP>(CallOperandVal)) { 2557 if (C->isZero()) { 2558 weight = CW_Constant; 2559 } 2560 } 2561 break; 2562 case 'I': 2563 if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { 2564 if (isUInt<6>(C->getZExtValue())) { 2565 weight = CW_Constant; 2566 } 2567 } 2568 break; 2569 case 'J': 2570 if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { 2571 if ((C->getSExtValue() >= -63) && (C->getSExtValue() <= 0)) { 2572 weight = CW_Constant; 2573 } 2574 } 2575 break; 2576 case 'K': 2577 if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { 2578 if (C->getZExtValue() == 2) { 2579 weight = CW_Constant; 2580 } 2581 } 2582 break; 2583 case 'L': 2584 if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { 2585 if (C->getZExtValue() == 0) { 2586 weight = CW_Constant; 2587 } 2588 } 2589 break; 2590 case 'M': 2591 if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { 2592 if (isUInt<8>(C->getZExtValue())) { 2593 weight = CW_Constant; 2594 } 2595 } 2596 break; 2597 case 'N': 2598 if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { 2599 if (C->getSExtValue() == -1) { 2600 weight = CW_Constant; 2601 } 2602 } 2603 break; 2604 case 'O': 2605 if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { 2606 if ((C->getZExtValue() == 8) || (C->getZExtValue() == 16) || 2607 (C->getZExtValue() == 24)) { 2608 weight = CW_Constant; 2609 } 2610 } 2611 break; 2612 case 'P': 2613 if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { 2614 if (C->getZExtValue() == 1) { 2615 weight = CW_Constant; 2616 } 2617 } 2618 break; 2619 case 'R': 2620 if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { 2621 if ((C->getSExtValue() >= -6) && (C->getSExtValue() <= 5)) { 2622 weight = CW_Constant; 2623 } 2624 } 2625 break; 2626 case 'Q': 2627 weight = CW_Memory; 2628 break; 2629 } 2630 2631 return weight; 2632 } 2633 2634 std::pair<unsigned, const TargetRegisterClass *> 2635 AVRTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, 2636 StringRef Constraint, 2637 MVT VT) const { 2638 if (Constraint.size() == 1) { 2639 switch (Constraint[0]) { 2640 case 'a': // Simple upper registers r16..r23. 2641 if (VT == MVT::i8) 2642 return std::make_pair(0U, &AVR::LD8loRegClass); 2643 else if (VT == MVT::i16) 2644 return std::make_pair(0U, &AVR::DREGSLD8loRegClass); 2645 break; 2646 case 'b': // Base pointer registers: y, z. 2647 if (VT == MVT::i8 || VT == MVT::i16) 2648 return std::make_pair(0U, &AVR::PTRDISPREGSRegClass); 2649 break; 2650 case 'd': // Upper registers r16..r31. 2651 if (VT == MVT::i8) 2652 return std::make_pair(0U, &AVR::LD8RegClass); 2653 else if (VT == MVT::i16) 2654 return std::make_pair(0U, &AVR::DLDREGSRegClass); 2655 break; 2656 case 'l': // Lower registers r0..r15. 2657 if (VT == MVT::i8) 2658 return std::make_pair(0U, &AVR::GPR8loRegClass); 2659 else if (VT == MVT::i16) 2660 return std::make_pair(0U, &AVR::DREGSloRegClass); 2661 break; 2662 case 'e': // Pointer register pairs: x, y, z. 2663 if (VT == MVT::i8 || VT == MVT::i16) 2664 return std::make_pair(0U, &AVR::PTRREGSRegClass); 2665 break; 2666 case 'q': // Stack pointer register: SPH:SPL. 2667 return std::make_pair(0U, &AVR::GPRSPRegClass); 2668 case 'r': // Any register: r0..r31. 2669 if (VT == MVT::i8) 2670 return std::make_pair(0U, &AVR::GPR8RegClass); 2671 else if (VT == MVT::i16) 2672 return std::make_pair(0U, &AVR::DREGSRegClass); 2673 break; 2674 case 't': // Temporary register: r0. 2675 if (VT == MVT::i8) 2676 return std::make_pair(unsigned(Subtarget.getTmpRegister()), 2677 &AVR::GPR8RegClass); 2678 break; 2679 case 'w': // Special upper register pairs: r24, r26, r28, r30. 2680 if (VT == MVT::i8 || VT == MVT::i16) 2681 return std::make_pair(0U, &AVR::IWREGSRegClass); 2682 break; 2683 case 'x': // Pointer register pair X: r27:r26. 2684 case 'X': 2685 if (VT == MVT::i8 || VT == MVT::i16) 2686 return std::make_pair(unsigned(AVR::R27R26), &AVR::PTRREGSRegClass); 2687 break; 2688 case 'y': // Pointer register pair Y: r29:r28. 2689 case 'Y': 2690 if (VT == MVT::i8 || VT == MVT::i16) 2691 return std::make_pair(unsigned(AVR::R29R28), &AVR::PTRREGSRegClass); 2692 break; 2693 case 'z': // Pointer register pair Z: r31:r30. 2694 case 'Z': 2695 if (VT == MVT::i8 || VT == MVT::i16) 2696 return std::make_pair(unsigned(AVR::R31R30), &AVR::PTRREGSRegClass); 2697 break; 2698 default: 2699 break; 2700 } 2701 } 2702 2703 return TargetLowering::getRegForInlineAsmConstraint( 2704 Subtarget.getRegisterInfo(), Constraint, VT); 2705 } 2706 2707 void AVRTargetLowering::LowerAsmOperandForConstraint(SDValue Op, 2708 StringRef Constraint, 2709 std::vector<SDValue> &Ops, 2710 SelectionDAG &DAG) const { 2711 SDValue Result; 2712 SDLoc DL(Op); 2713 EVT Ty = Op.getValueType(); 2714 2715 // Currently only support length 1 constraints. 2716 if (Constraint.size() != 1) { 2717 return; 2718 } 2719 2720 char ConstraintLetter = Constraint[0]; 2721 switch (ConstraintLetter) { 2722 default: 2723 break; 2724 // Deal with integers first: 2725 case 'I': 2726 case 'J': 2727 case 'K': 2728 case 'L': 2729 case 'M': 2730 case 'N': 2731 case 'O': 2732 case 'P': 2733 case 'R': { 2734 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 2735 if (!C) { 2736 return; 2737 } 2738 2739 int64_t CVal64 = C->getSExtValue(); 2740 uint64_t CUVal64 = C->getZExtValue(); 2741 switch (ConstraintLetter) { 2742 case 'I': // 0..63 2743 if (!isUInt<6>(CUVal64)) 2744 return; 2745 Result = DAG.getTargetConstant(CUVal64, DL, Ty); 2746 break; 2747 case 'J': // -63..0 2748 if (CVal64 < -63 || CVal64 > 0) 2749 return; 2750 Result = DAG.getTargetConstant(CVal64, DL, Ty); 2751 break; 2752 case 'K': // 2 2753 if (CUVal64 != 2) 2754 return; 2755 Result = DAG.getTargetConstant(CUVal64, DL, Ty); 2756 break; 2757 case 'L': // 0 2758 if (CUVal64 != 0) 2759 return; 2760 Result = DAG.getTargetConstant(CUVal64, DL, Ty); 2761 break; 2762 case 'M': // 0..255 2763 if (!isUInt<8>(CUVal64)) 2764 return; 2765 // i8 type may be printed as a negative number, 2766 // e.g. 254 would be printed as -2, 2767 // so we force it to i16 at least. 2768 if (Ty.getSimpleVT() == MVT::i8) { 2769 Ty = MVT::i16; 2770 } 2771 Result = DAG.getTargetConstant(CUVal64, DL, Ty); 2772 break; 2773 case 'N': // -1 2774 if (CVal64 != -1) 2775 return; 2776 Result = DAG.getTargetConstant(CVal64, DL, Ty); 2777 break; 2778 case 'O': // 8, 16, 24 2779 if (CUVal64 != 8 && CUVal64 != 16 && CUVal64 != 24) 2780 return; 2781 Result = DAG.getTargetConstant(CUVal64, DL, Ty); 2782 break; 2783 case 'P': // 1 2784 if (CUVal64 != 1) 2785 return; 2786 Result = DAG.getTargetConstant(CUVal64, DL, Ty); 2787 break; 2788 case 'R': // -6..5 2789 if (CVal64 < -6 || CVal64 > 5) 2790 return; 2791 Result = DAG.getTargetConstant(CVal64, DL, Ty); 2792 break; 2793 } 2794 2795 break; 2796 } 2797 case 'G': 2798 const ConstantFPSDNode *FC = dyn_cast<ConstantFPSDNode>(Op); 2799 if (!FC || !FC->isZero()) 2800 return; 2801 // Soften float to i8 0 2802 Result = DAG.getTargetConstant(0, DL, MVT::i8); 2803 break; 2804 } 2805 2806 if (Result.getNode()) { 2807 Ops.push_back(Result); 2808 return; 2809 } 2810 2811 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 2812 } 2813 2814 Register AVRTargetLowering::getRegisterByName(const char *RegName, LLT VT, 2815 const MachineFunction &MF) const { 2816 Register Reg; 2817 2818 if (VT == LLT::scalar(8)) { 2819 Reg = StringSwitch<unsigned>(RegName) 2820 .Case("r0", AVR::R0) 2821 .Case("r1", AVR::R1) 2822 .Default(0); 2823 } else { 2824 Reg = StringSwitch<unsigned>(RegName) 2825 .Case("r0", AVR::R1R0) 2826 .Case("sp", AVR::SP) 2827 .Default(0); 2828 } 2829 2830 if (Reg) 2831 return Reg; 2832 2833 report_fatal_error( 2834 Twine("Invalid register name \"" + StringRef(RegName) + "\".")); 2835 } 2836 2837 } // end of namespace llvm 2838