1 //===-- SystemZTDC.cpp - Utilize Test Data Class instruction --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass looks for instructions that can be replaced by a Test Data Class 10 // instruction, and replaces them when profitable. 11 // 12 // Roughly, the following rules are recognized: 13 // 14 // 1: fcmp pred X, 0 -> tdc X, mask 15 // 2: fcmp pred X, +-inf -> tdc X, mask 16 // 3: fcmp pred X, +-minnorm -> tdc X, mask 17 // 4: tdc (fabs X), mask -> tdc X, newmask 18 // 5: icmp slt (bitcast float X to int), 0 -> tdc X, mask [ie. signbit] 19 // 6: icmp sgt (bitcast float X to int), -1 -> tdc X, mask 20 // 7: icmp ne/eq (call @llvm.s390.tdc.*(X, mask)) -> tdc X, mask/~mask 21 // 8: and i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 & M2) 22 // 9: or i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 | M2) 23 // 10: xor i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 ^ M2) 24 // 25 // The pass works in 4 steps: 26 // 27 // 1. All fcmp and icmp instructions in a function are checked for a match 28 // with rules 1-3 and 5-7. Their TDC equivalents are stored in 29 // the ConvertedInsts mapping. If the operand of a fcmp instruction is 30 // a fabs, it's also folded according to rule 4. 31 // 2. All and/or/xor i1 instructions whose both operands have been already 32 // mapped are mapped according to rules 8-10. LogicOpsWorklist is used 33 // as a queue of instructions to check. 34 // 3. All mapped instructions that are considered worthy of conversion (ie. 35 // replacing them will actually simplify the final code) are replaced 36 // with a call to the s390.tdc intrinsic. 37 // 4. All intermediate results of replaced instructions are removed if unused. 38 // 39 // Instructions that match rules 1-3 are considered unworthy of conversion 40 // on their own (since a comparison instruction is superior), but are mapped 41 // in the hopes of folding the result using rules 4 and 8-10 (likely removing 42 // the original comparison in the process). 43 // 44 //===----------------------------------------------------------------------===// 45 46 #include "SystemZ.h" 47 #include "SystemZSubtarget.h" 48 #include "llvm/ADT/MapVector.h" 49 #include "llvm/CodeGen/TargetPassConfig.h" 50 #include "llvm/IR/Constants.h" 51 #include "llvm/IR/IRBuilder.h" 52 #include "llvm/IR/InstIterator.h" 53 #include "llvm/IR/Instructions.h" 54 #include "llvm/IR/IntrinsicInst.h" 55 #include "llvm/IR/IntrinsicsS390.h" 56 #include "llvm/IR/LegacyPassManager.h" 57 #include "llvm/IR/Module.h" 58 #include "llvm/Target/TargetMachine.h" 59 #include <deque> 60 #include <set> 61 62 using namespace llvm; 63 64 namespace llvm { 65 void initializeSystemZTDCPassPass(PassRegistry&); 66 } 67 68 namespace { 69 70 class SystemZTDCPass : public FunctionPass { 71 public: 72 static char ID; 73 SystemZTDCPass() : FunctionPass(ID) { 74 initializeSystemZTDCPassPass(*PassRegistry::getPassRegistry()); 75 } 76 77 bool runOnFunction(Function &F) override; 78 79 void getAnalysisUsage(AnalysisUsage &AU) const override { 80 AU.addRequired<TargetPassConfig>(); 81 } 82 83 private: 84 // Maps seen instructions that can be mapped to a TDC, values are 85 // (TDC operand, TDC mask, worthy flag) triples. 86 MapVector<Instruction *, std::tuple<Value *, int, bool>> ConvertedInsts; 87 // The queue of and/or/xor i1 instructions to be potentially folded. 88 std::vector<BinaryOperator *> LogicOpsWorklist; 89 // Instructions matched while folding, to be removed at the end if unused. 90 std::set<Instruction *> PossibleJunk; 91 92 // Tries to convert a fcmp instruction. 93 void convertFCmp(CmpInst &I); 94 95 // Tries to convert an icmp instruction. 96 void convertICmp(CmpInst &I); 97 98 // Tries to convert an i1 and/or/xor instruction, whose both operands 99 // have been already converted. 100 void convertLogicOp(BinaryOperator &I); 101 102 // Marks an instruction as converted - adds it to ConvertedInsts and adds 103 // any and/or/xor i1 users to the queue. 104 void converted(Instruction *I, Value *V, int Mask, bool Worthy) { 105 ConvertedInsts[I] = std::make_tuple(V, Mask, Worthy); 106 auto &M = *I->getFunction()->getParent(); 107 auto &Ctx = M.getContext(); 108 for (auto *U : I->users()) { 109 auto *LI = dyn_cast<BinaryOperator>(U); 110 if (LI && LI->getType() == Type::getInt1Ty(Ctx) && 111 (LI->getOpcode() == Instruction::And || 112 LI->getOpcode() == Instruction::Or || 113 LI->getOpcode() == Instruction::Xor)) { 114 LogicOpsWorklist.push_back(LI); 115 } 116 } 117 } 118 }; 119 120 } // end anonymous namespace 121 122 char SystemZTDCPass::ID = 0; 123 INITIALIZE_PASS(SystemZTDCPass, "systemz-tdc", 124 "SystemZ Test Data Class optimization", false, false) 125 126 FunctionPass *llvm::createSystemZTDCPass() { 127 return new SystemZTDCPass(); 128 } 129 130 void SystemZTDCPass::convertFCmp(CmpInst &I) { 131 Value *Op0 = I.getOperand(0); 132 auto *Const = dyn_cast<ConstantFP>(I.getOperand(1)); 133 auto Pred = I.getPredicate(); 134 // Only comparisons with consts are interesting. 135 if (!Const) 136 return; 137 // Compute the smallest normal number (and its negation). 138 auto &Sem = Op0->getType()->getFltSemantics(); 139 APFloat Smallest = APFloat::getSmallestNormalized(Sem); 140 APFloat NegSmallest = Smallest; 141 NegSmallest.changeSign(); 142 // Check if Const is one of our recognized consts. 143 int WhichConst; 144 if (Const->isZero()) { 145 // All comparisons with 0 can be converted. 146 WhichConst = 0; 147 } else if (Const->isInfinity()) { 148 // Likewise for infinities. 149 WhichConst = Const->isNegative() ? 2 : 1; 150 } else if (Const->isExactlyValue(Smallest)) { 151 // For Smallest, we cannot do EQ separately from GT. 152 if ((Pred & CmpInst::FCMP_OGE) != CmpInst::FCMP_OGE && 153 (Pred & CmpInst::FCMP_OGE) != 0) 154 return; 155 WhichConst = 3; 156 } else if (Const->isExactlyValue(NegSmallest)) { 157 // Likewise for NegSmallest, we cannot do EQ separately from LT. 158 if ((Pred & CmpInst::FCMP_OLE) != CmpInst::FCMP_OLE && 159 (Pred & CmpInst::FCMP_OLE) != 0) 160 return; 161 WhichConst = 4; 162 } else { 163 // Not one of our special constants. 164 return; 165 } 166 // Partial masks to use for EQ, GT, LT, UN comparisons, respectively. 167 static const int Masks[][4] = { 168 { // 0 169 SystemZ::TDCMASK_ZERO, // eq 170 SystemZ::TDCMASK_POSITIVE, // gt 171 SystemZ::TDCMASK_NEGATIVE, // lt 172 SystemZ::TDCMASK_NAN, // un 173 }, 174 { // inf 175 SystemZ::TDCMASK_INFINITY_PLUS, // eq 176 0, // gt 177 (SystemZ::TDCMASK_ZERO | 178 SystemZ::TDCMASK_NEGATIVE | 179 SystemZ::TDCMASK_NORMAL_PLUS | 180 SystemZ::TDCMASK_SUBNORMAL_PLUS), // lt 181 SystemZ::TDCMASK_NAN, // un 182 }, 183 { // -inf 184 SystemZ::TDCMASK_INFINITY_MINUS, // eq 185 (SystemZ::TDCMASK_ZERO | 186 SystemZ::TDCMASK_POSITIVE | 187 SystemZ::TDCMASK_NORMAL_MINUS | 188 SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt 189 0, // lt 190 SystemZ::TDCMASK_NAN, // un 191 }, 192 { // minnorm 193 0, // eq (unsupported) 194 (SystemZ::TDCMASK_NORMAL_PLUS | 195 SystemZ::TDCMASK_INFINITY_PLUS), // gt (actually ge) 196 (SystemZ::TDCMASK_ZERO | 197 SystemZ::TDCMASK_NEGATIVE | 198 SystemZ::TDCMASK_SUBNORMAL_PLUS), // lt 199 SystemZ::TDCMASK_NAN, // un 200 }, 201 { // -minnorm 202 0, // eq (unsupported) 203 (SystemZ::TDCMASK_ZERO | 204 SystemZ::TDCMASK_POSITIVE | 205 SystemZ::TDCMASK_SUBNORMAL_MINUS), // gt 206 (SystemZ::TDCMASK_NORMAL_MINUS | 207 SystemZ::TDCMASK_INFINITY_MINUS), // lt (actually le) 208 SystemZ::TDCMASK_NAN, // un 209 } 210 }; 211 // Construct the mask as a combination of the partial masks. 212 int Mask = 0; 213 if (Pred & CmpInst::FCMP_OEQ) 214 Mask |= Masks[WhichConst][0]; 215 if (Pred & CmpInst::FCMP_OGT) 216 Mask |= Masks[WhichConst][1]; 217 if (Pred & CmpInst::FCMP_OLT) 218 Mask |= Masks[WhichConst][2]; 219 if (Pred & CmpInst::FCMP_UNO) 220 Mask |= Masks[WhichConst][3]; 221 // A lone fcmp is unworthy of tdc conversion on its own, but may become 222 // worthy if combined with fabs. 223 bool Worthy = false; 224 if (CallInst *CI = dyn_cast<CallInst>(Op0)) { 225 Function *F = CI->getCalledFunction(); 226 if (F && F->getIntrinsicID() == Intrinsic::fabs) { 227 // Fold with fabs - adjust the mask appropriately. 228 Mask &= SystemZ::TDCMASK_PLUS; 229 Mask |= Mask >> 1; 230 Op0 = CI->getArgOperand(0); 231 // A combination of fcmp with fabs is a win, unless the constant 232 // involved is 0 (which is handled by later passes). 233 Worthy = WhichConst != 0; 234 PossibleJunk.insert(CI); 235 } 236 } 237 converted(&I, Op0, Mask, Worthy); 238 } 239 240 void SystemZTDCPass::convertICmp(CmpInst &I) { 241 Value *Op0 = I.getOperand(0); 242 auto *Const = dyn_cast<ConstantInt>(I.getOperand(1)); 243 auto Pred = I.getPredicate(); 244 // All our icmp rules involve comparisons with consts. 245 if (!Const) 246 return; 247 if (auto *Cast = dyn_cast<BitCastInst>(Op0)) { 248 // Check for icmp+bitcast used for signbit. 249 if (!Cast->getSrcTy()->isFloatTy() && 250 !Cast->getSrcTy()->isDoubleTy() && 251 !Cast->getSrcTy()->isFP128Ty()) 252 return; 253 Value *V = Cast->getOperand(0); 254 int Mask; 255 if (Pred == CmpInst::ICMP_SLT && Const->isZero()) { 256 // icmp slt (bitcast X), 0 - set if sign bit true 257 Mask = SystemZ::TDCMASK_MINUS; 258 } else if (Pred == CmpInst::ICMP_SGT && Const->isMinusOne()) { 259 // icmp sgt (bitcast X), -1 - set if sign bit false 260 Mask = SystemZ::TDCMASK_PLUS; 261 } else { 262 // Not a sign bit check. 263 return; 264 } 265 PossibleJunk.insert(Cast); 266 converted(&I, V, Mask, true); 267 } else if (auto *CI = dyn_cast<CallInst>(Op0)) { 268 // Check if this is a pre-existing call of our tdc intrinsic. 269 Function *F = CI->getCalledFunction(); 270 if (!F || F->getIntrinsicID() != Intrinsic::s390_tdc) 271 return; 272 if (!Const->isZero()) 273 return; 274 Value *V = CI->getArgOperand(0); 275 auto *MaskC = dyn_cast<ConstantInt>(CI->getArgOperand(1)); 276 // Bail if the mask is not a constant. 277 if (!MaskC) 278 return; 279 int Mask = MaskC->getZExtValue(); 280 Mask &= SystemZ::TDCMASK_ALL; 281 if (Pred == CmpInst::ICMP_NE) { 282 // icmp ne (call llvm.s390.tdc(...)), 0 -> simple TDC 283 } else if (Pred == CmpInst::ICMP_EQ) { 284 // icmp eq (call llvm.s390.tdc(...)), 0 -> TDC with inverted mask 285 Mask ^= SystemZ::TDCMASK_ALL; 286 } else { 287 // An unknown comparison - ignore. 288 return; 289 } 290 PossibleJunk.insert(CI); 291 converted(&I, V, Mask, false); 292 } 293 } 294 295 void SystemZTDCPass::convertLogicOp(BinaryOperator &I) { 296 Value *Op0, *Op1; 297 int Mask0, Mask1; 298 bool Worthy0, Worthy1; 299 std::tie(Op0, Mask0, Worthy0) = ConvertedInsts[cast<Instruction>(I.getOperand(0))]; 300 std::tie(Op1, Mask1, Worthy1) = ConvertedInsts[cast<Instruction>(I.getOperand(1))]; 301 if (Op0 != Op1) 302 return; 303 int Mask; 304 switch (I.getOpcode()) { 305 case Instruction::And: 306 Mask = Mask0 & Mask1; 307 break; 308 case Instruction::Or: 309 Mask = Mask0 | Mask1; 310 break; 311 case Instruction::Xor: 312 Mask = Mask0 ^ Mask1; 313 break; 314 default: 315 llvm_unreachable("Unknown op in convertLogicOp"); 316 } 317 converted(&I, Op0, Mask, true); 318 } 319 320 bool SystemZTDCPass::runOnFunction(Function &F) { 321 auto &TPC = getAnalysis<TargetPassConfig>(); 322 if (TPC.getTM<TargetMachine>() 323 .getSubtarget<SystemZSubtarget>(F) 324 .hasSoftFloat()) 325 return false; 326 327 ConvertedInsts.clear(); 328 LogicOpsWorklist.clear(); 329 PossibleJunk.clear(); 330 331 // Look for icmp+fcmp instructions. 332 for (auto &I : instructions(F)) { 333 if (I.getOpcode() == Instruction::FCmp) 334 convertFCmp(cast<CmpInst>(I)); 335 else if (I.getOpcode() == Instruction::ICmp) 336 convertICmp(cast<CmpInst>(I)); 337 } 338 339 // If none found, bail already. 340 if (ConvertedInsts.empty()) 341 return false; 342 343 // Process the queue of logic instructions. 344 while (!LogicOpsWorklist.empty()) { 345 BinaryOperator *Op = LogicOpsWorklist.back(); 346 LogicOpsWorklist.pop_back(); 347 // If both operands mapped, and the instruction itself not yet mapped, 348 // convert it. 349 if (ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(0))) && 350 ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(1))) && 351 !ConvertedInsts.count(Op)) 352 convertLogicOp(*Op); 353 } 354 355 // Time to actually replace the instructions. Do it in the reverse order 356 // of finding them, since there's a good chance the earlier ones will be 357 // unused (due to being folded into later ones). 358 Module &M = *F.getParent(); 359 auto &Ctx = M.getContext(); 360 Value *Zero32 = ConstantInt::get(Type::getInt32Ty(Ctx), 0); 361 bool MadeChange = false; 362 for (auto &It : reverse(ConvertedInsts)) { 363 Instruction *I = It.first; 364 Value *V; 365 int Mask; 366 bool Worthy; 367 std::tie(V, Mask, Worthy) = It.second; 368 if (!I->user_empty()) { 369 // If used and unworthy of conversion, skip it. 370 if (!Worthy) 371 continue; 372 // Call the intrinsic, compare result with 0. 373 Function *TDCFunc = 374 Intrinsic::getDeclaration(&M, Intrinsic::s390_tdc, V->getType()); 375 IRBuilder<> IRB(I); 376 Value *MaskVal = ConstantInt::get(Type::getInt64Ty(Ctx), Mask); 377 Instruction *TDC = IRB.CreateCall(TDCFunc, {V, MaskVal}); 378 Value *ICmp = IRB.CreateICmp(CmpInst::ICMP_NE, TDC, Zero32); 379 I->replaceAllUsesWith(ICmp); 380 } 381 // If unused, or used and converted, remove it. 382 I->eraseFromParent(); 383 MadeChange = true; 384 } 385 386 if (!MadeChange) 387 return false; 388 389 // We've actually done something - now clear misc accumulated junk (fabs, 390 // bitcast). 391 for (auto *I : PossibleJunk) 392 if (I->user_empty()) 393 I->eraseFromParent(); 394 395 return true; 396 } 397