1 //===- AArch64MacroFusion.cpp - AArch64 Macro Fusion ----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file This file contains the AArch64 implementation of the DAG scheduling 10 /// mutation to pair instructions back to back. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AArch64Subtarget.h" 15 #include "llvm/CodeGen/MacroFusion.h" 16 #include "llvm/CodeGen/TargetInstrInfo.h" 17 18 using namespace llvm; 19 20 namespace { 21 22 /// CMN, CMP, TST followed by Bcc 23 static bool isArithmeticBccPair(const MachineInstr *FirstMI, 24 const MachineInstr &SecondMI, bool CmpOnly) { 25 if (SecondMI.getOpcode() != AArch64::Bcc) 26 return false; 27 28 // Assume the 1st instr to be a wildcard if it is unspecified. 29 if (FirstMI == nullptr) 30 return true; 31 32 // If we're in CmpOnly mode, we only fuse arithmetic instructions that 33 // discard their result. 34 if (CmpOnly && !(FirstMI->getOperand(0).getReg() == AArch64::XZR || 35 FirstMI->getOperand(0).getReg() == AArch64::WZR)) { 36 return false; 37 } 38 39 switch (FirstMI->getOpcode()) { 40 case AArch64::ADDSWri: 41 case AArch64::ADDSWrr: 42 case AArch64::ADDSXri: 43 case AArch64::ADDSXrr: 44 case AArch64::ANDSWri: 45 case AArch64::ANDSWrr: 46 case AArch64::ANDSXri: 47 case AArch64::ANDSXrr: 48 case AArch64::SUBSWri: 49 case AArch64::SUBSWrr: 50 case AArch64::SUBSXri: 51 case AArch64::SUBSXrr: 52 case AArch64::BICSWrr: 53 case AArch64::BICSXrr: 54 return true; 55 case AArch64::ADDSWrs: 56 case AArch64::ADDSXrs: 57 case AArch64::ANDSWrs: 58 case AArch64::ANDSXrs: 59 case AArch64::SUBSWrs: 60 case AArch64::SUBSXrs: 61 case AArch64::BICSWrs: 62 case AArch64::BICSXrs: 63 // Shift value can be 0 making these behave like the "rr" variant... 64 return !AArch64InstrInfo::hasShiftedReg(*FirstMI); 65 } 66 67 return false; 68 } 69 70 /// ALU operations followed by CBZ/CBNZ. 71 static bool isArithmeticCbzPair(const MachineInstr *FirstMI, 72 const MachineInstr &SecondMI) { 73 if (SecondMI.getOpcode() != AArch64::CBZW && 74 SecondMI.getOpcode() != AArch64::CBZX && 75 SecondMI.getOpcode() != AArch64::CBNZW && 76 SecondMI.getOpcode() != AArch64::CBNZX) 77 return false; 78 79 // Assume the 1st instr to be a wildcard if it is unspecified. 80 if (FirstMI == nullptr) 81 return true; 82 83 switch (FirstMI->getOpcode()) { 84 case AArch64::ADDWri: 85 case AArch64::ADDWrr: 86 case AArch64::ADDXri: 87 case AArch64::ADDXrr: 88 case AArch64::ANDWri: 89 case AArch64::ANDWrr: 90 case AArch64::ANDXri: 91 case AArch64::ANDXrr: 92 case AArch64::EORWri: 93 case AArch64::EORWrr: 94 case AArch64::EORXri: 95 case AArch64::EORXrr: 96 case AArch64::ORRWri: 97 case AArch64::ORRWrr: 98 case AArch64::ORRXri: 99 case AArch64::ORRXrr: 100 case AArch64::SUBWri: 101 case AArch64::SUBWrr: 102 case AArch64::SUBXri: 103 case AArch64::SUBXrr: 104 return true; 105 case AArch64::ADDWrs: 106 case AArch64::ADDXrs: 107 case AArch64::ANDWrs: 108 case AArch64::ANDXrs: 109 case AArch64::SUBWrs: 110 case AArch64::SUBXrs: 111 case AArch64::BICWrs: 112 case AArch64::BICXrs: 113 // Shift value can be 0 making these behave like the "rr" variant... 114 return !AArch64InstrInfo::hasShiftedReg(*FirstMI); 115 } 116 117 return false; 118 } 119 120 /// AES crypto encoding or decoding. 121 static bool isAESPair(const MachineInstr *FirstMI, 122 const MachineInstr &SecondMI) { 123 // Assume the 1st instr to be a wildcard if it is unspecified. 124 switch (SecondMI.getOpcode()) { 125 // AES encode. 126 case AArch64::AESMCrr: 127 case AArch64::AESMCrrTied: 128 return FirstMI == nullptr || FirstMI->getOpcode() == AArch64::AESErr; 129 // AES decode. 130 case AArch64::AESIMCrr: 131 case AArch64::AESIMCrrTied: 132 return FirstMI == nullptr || FirstMI->getOpcode() == AArch64::AESDrr; 133 } 134 135 return false; 136 } 137 138 /// AESE/AESD/PMULL + EOR. 139 static bool isCryptoEORPair(const MachineInstr *FirstMI, 140 const MachineInstr &SecondMI) { 141 if (SecondMI.getOpcode() != AArch64::EORv16i8) 142 return false; 143 144 // Assume the 1st instr to be a wildcard if it is unspecified. 145 if (FirstMI == nullptr) 146 return true; 147 148 switch (FirstMI->getOpcode()) { 149 case AArch64::AESErr: 150 case AArch64::AESDrr: 151 case AArch64::PMULLv16i8: 152 case AArch64::PMULLv8i8: 153 case AArch64::PMULLv1i64: 154 case AArch64::PMULLv2i64: 155 return true; 156 } 157 158 return false; 159 } 160 161 /// Literal generation. 162 static bool isLiteralsPair(const MachineInstr *FirstMI, 163 const MachineInstr &SecondMI) { 164 // Assume the 1st instr to be a wildcard if it is unspecified. 165 166 // PC relative address. 167 if ((FirstMI == nullptr || FirstMI->getOpcode() == AArch64::ADRP) && 168 SecondMI.getOpcode() == AArch64::ADDXri) 169 return true; 170 171 // 32 bit immediate. 172 if ((FirstMI == nullptr || FirstMI->getOpcode() == AArch64::MOVZWi) && 173 (SecondMI.getOpcode() == AArch64::MOVKWi && 174 SecondMI.getOperand(3).getImm() == 16)) 175 return true; 176 177 // Lower half of 64 bit immediate. 178 if((FirstMI == nullptr || FirstMI->getOpcode() == AArch64::MOVZXi) && 179 (SecondMI.getOpcode() == AArch64::MOVKXi && 180 SecondMI.getOperand(3).getImm() == 16)) 181 return true; 182 183 // Upper half of 64 bit immediate. 184 if ((FirstMI == nullptr || 185 (FirstMI->getOpcode() == AArch64::MOVKXi && 186 FirstMI->getOperand(3).getImm() == 32)) && 187 (SecondMI.getOpcode() == AArch64::MOVKXi && 188 SecondMI.getOperand(3).getImm() == 48)) 189 return true; 190 191 return false; 192 } 193 194 /// Fuse address generation and loads or stores. 195 static bool isAddressLdStPair(const MachineInstr *FirstMI, 196 const MachineInstr &SecondMI) { 197 switch (SecondMI.getOpcode()) { 198 case AArch64::STRBBui: 199 case AArch64::STRBui: 200 case AArch64::STRDui: 201 case AArch64::STRHHui: 202 case AArch64::STRHui: 203 case AArch64::STRQui: 204 case AArch64::STRSui: 205 case AArch64::STRWui: 206 case AArch64::STRXui: 207 case AArch64::LDRBBui: 208 case AArch64::LDRBui: 209 case AArch64::LDRDui: 210 case AArch64::LDRHHui: 211 case AArch64::LDRHui: 212 case AArch64::LDRQui: 213 case AArch64::LDRSui: 214 case AArch64::LDRWui: 215 case AArch64::LDRXui: 216 case AArch64::LDRSBWui: 217 case AArch64::LDRSBXui: 218 case AArch64::LDRSHWui: 219 case AArch64::LDRSHXui: 220 case AArch64::LDRSWui: 221 // Assume the 1st instr to be a wildcard if it is unspecified. 222 if (FirstMI == nullptr) 223 return true; 224 225 switch (FirstMI->getOpcode()) { 226 case AArch64::ADR: 227 return SecondMI.getOperand(2).getImm() == 0; 228 case AArch64::ADRP: 229 return true; 230 } 231 } 232 233 return false; 234 } 235 236 /// Compare and conditional select. 237 static bool isCCSelectPair(const MachineInstr *FirstMI, 238 const MachineInstr &SecondMI) { 239 // 32 bits 240 if (SecondMI.getOpcode() == AArch64::CSELWr) { 241 // Assume the 1st instr to be a wildcard if it is unspecified. 242 if (FirstMI == nullptr) 243 return true; 244 245 if (FirstMI->definesRegister(AArch64::WZR)) 246 switch (FirstMI->getOpcode()) { 247 case AArch64::SUBSWrs: 248 return !AArch64InstrInfo::hasShiftedReg(*FirstMI); 249 case AArch64::SUBSWrx: 250 return !AArch64InstrInfo::hasExtendedReg(*FirstMI); 251 case AArch64::SUBSWrr: 252 case AArch64::SUBSWri: 253 return true; 254 } 255 } 256 257 // 64 bits 258 if (SecondMI.getOpcode() == AArch64::CSELXr) { 259 // Assume the 1st instr to be a wildcard if it is unspecified. 260 if (FirstMI == nullptr) 261 return true; 262 263 if (FirstMI->definesRegister(AArch64::XZR)) 264 switch (FirstMI->getOpcode()) { 265 case AArch64::SUBSXrs: 266 return !AArch64InstrInfo::hasShiftedReg(*FirstMI); 267 case AArch64::SUBSXrx: 268 case AArch64::SUBSXrx64: 269 return !AArch64InstrInfo::hasExtendedReg(*FirstMI); 270 case AArch64::SUBSXrr: 271 case AArch64::SUBSXri: 272 return true; 273 } 274 } 275 276 return false; 277 } 278 279 // Arithmetic and logic. 280 static bool isArithmeticLogicPair(const MachineInstr *FirstMI, 281 const MachineInstr &SecondMI) { 282 if (AArch64InstrInfo::hasShiftedReg(SecondMI)) 283 return false; 284 285 switch (SecondMI.getOpcode()) { 286 // Arithmetic 287 case AArch64::ADDWrr: 288 case AArch64::ADDXrr: 289 case AArch64::SUBWrr: 290 case AArch64::SUBXrr: 291 case AArch64::ADDWrs: 292 case AArch64::ADDXrs: 293 case AArch64::SUBWrs: 294 case AArch64::SUBXrs: 295 // Logic 296 case AArch64::ANDWrr: 297 case AArch64::ANDXrr: 298 case AArch64::BICWrr: 299 case AArch64::BICXrr: 300 case AArch64::EONWrr: 301 case AArch64::EONXrr: 302 case AArch64::EORWrr: 303 case AArch64::EORXrr: 304 case AArch64::ORNWrr: 305 case AArch64::ORNXrr: 306 case AArch64::ORRWrr: 307 case AArch64::ORRXrr: 308 case AArch64::ANDWrs: 309 case AArch64::ANDXrs: 310 case AArch64::BICWrs: 311 case AArch64::BICXrs: 312 case AArch64::EONWrs: 313 case AArch64::EONXrs: 314 case AArch64::EORWrs: 315 case AArch64::EORXrs: 316 case AArch64::ORNWrs: 317 case AArch64::ORNXrs: 318 case AArch64::ORRWrs: 319 case AArch64::ORRXrs: 320 // Assume the 1st instr to be a wildcard if it is unspecified. 321 if (FirstMI == nullptr) 322 return true; 323 324 // Arithmetic 325 switch (FirstMI->getOpcode()) { 326 case AArch64::ADDWrr: 327 case AArch64::ADDXrr: 328 case AArch64::ADDSWrr: 329 case AArch64::ADDSXrr: 330 case AArch64::SUBWrr: 331 case AArch64::SUBXrr: 332 case AArch64::SUBSWrr: 333 case AArch64::SUBSXrr: 334 return true; 335 case AArch64::ADDWrs: 336 case AArch64::ADDXrs: 337 case AArch64::ADDSWrs: 338 case AArch64::ADDSXrs: 339 case AArch64::SUBWrs: 340 case AArch64::SUBXrs: 341 case AArch64::SUBSWrs: 342 case AArch64::SUBSXrs: 343 return !AArch64InstrInfo::hasShiftedReg(*FirstMI); 344 } 345 break; 346 347 // Arithmetic, setting flags. 348 case AArch64::ADDSWrr: 349 case AArch64::ADDSXrr: 350 case AArch64::SUBSWrr: 351 case AArch64::SUBSXrr: 352 case AArch64::ADDSWrs: 353 case AArch64::ADDSXrs: 354 case AArch64::SUBSWrs: 355 case AArch64::SUBSXrs: 356 // Assume the 1st instr to be a wildcard if it is unspecified. 357 if (FirstMI == nullptr) 358 return true; 359 360 // Arithmetic, not setting flags. 361 switch (FirstMI->getOpcode()) { 362 case AArch64::ADDWrr: 363 case AArch64::ADDXrr: 364 case AArch64::SUBWrr: 365 case AArch64::SUBXrr: 366 return true; 367 case AArch64::ADDWrs: 368 case AArch64::ADDXrs: 369 case AArch64::SUBWrs: 370 case AArch64::SUBXrs: 371 return !AArch64InstrInfo::hasShiftedReg(*FirstMI); 372 } 373 break; 374 } 375 376 return false; 377 } 378 379 /// \brief Check if the instr pair, FirstMI and SecondMI, should be fused 380 /// together. Given SecondMI, when FirstMI is unspecified, then check if 381 /// SecondMI may be part of a fused pair at all. 382 static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, 383 const TargetSubtargetInfo &TSI, 384 const MachineInstr *FirstMI, 385 const MachineInstr &SecondMI) { 386 const AArch64Subtarget &ST = static_cast<const AArch64Subtarget&>(TSI); 387 388 // All checking functions assume that the 1st instr is a wildcard if it is 389 // unspecified. 390 if (ST.hasCmpBccFusion() || ST.hasArithmeticBccFusion()) { 391 bool CmpOnly = !ST.hasArithmeticBccFusion(); 392 if (isArithmeticBccPair(FirstMI, SecondMI, CmpOnly)) 393 return true; 394 } 395 if (ST.hasArithmeticCbzFusion() && isArithmeticCbzPair(FirstMI, SecondMI)) 396 return true; 397 if (ST.hasFuseAES() && isAESPair(FirstMI, SecondMI)) 398 return true; 399 if (ST.hasFuseCryptoEOR() && isCryptoEORPair(FirstMI, SecondMI)) 400 return true; 401 if (ST.hasFuseLiterals() && isLiteralsPair(FirstMI, SecondMI)) 402 return true; 403 if (ST.hasFuseAddress() && isAddressLdStPair(FirstMI, SecondMI)) 404 return true; 405 if (ST.hasFuseCCSelect() && isCCSelectPair(FirstMI, SecondMI)) 406 return true; 407 if (ST.hasFuseArithmeticLogic() && isArithmeticLogicPair(FirstMI, SecondMI)) 408 return true; 409 410 return false; 411 } 412 413 } // end namespace 414 415 416 namespace llvm { 417 418 std::unique_ptr<ScheduleDAGMutation> createAArch64MacroFusionDAGMutation () { 419 return createMacroFusionDAGMutation(shouldScheduleAdjacent); 420 } 421 422 } // end namespace llvm 423