1//==- HexagonPatterns.td - Target Description for Hexagon -*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9// Table of contents: 10// (0) Definitions 11// (1) Immediates 12// (2) Type casts 13// (3) Extend/truncate 14// (4) Logical 15// (5) Compare 16// (6) Select 17// (7) Insert/extract 18// (8) Shift/permute 19// (9) Arithmetic/bitwise 20// (10) Bit 21// (11) PIC 22// (12) Load 23// (13) Store 24// (14) Memop 25// (15) Call 26// (16) Branch 27// (17) Misc 28 29// Guidelines (in no particular order): 30// 1. Avoid relying on pattern ordering to give preference to one pattern 31// over another, prefer using AddedComplexity instead. The reason for 32// this is to avoid unintended conseqeuences (caused by altering the 33// order) when making changes. The current order of patterns in this 34// file obviously does play some role, but none of the ordering was 35// deliberately chosen (other than to create a logical structure of 36// this file). When making changes, adding AddedComplexity to existing 37// patterns may be needed. 38// 2. Maintain the logical structure of the file, try to put new patterns 39// in designated sections. 40// 3. Do not use A2_combinew instruction directly, use Combinew fragment 41// instead. It uses REG_SEQUENCE, which is more amenable to optimizations. 42// 4. Most selection macros are based on PatFrags. For DAGs that involve 43// SDNodes, use pf1/pf2 to convert them to PatFrags. Use common frags 44// whenever possible (see the Definitions section). When adding new 45// macro, try to make is general to enable reuse across sections. 46// 5. Compound instructions (e.g. Rx+Rs*Rt) are generated under the condition 47// that the nested operation has only one use. Having it separated in case 48// of multiple uses avoids duplication of (processor) work. 49// 6. The v4 vector instructions (64-bit) are treated as core instructions, 50// for example, A2_vaddh is in the "arithmetic" section with A2_add. 51// 7. When adding a pattern for an instruction with a constant-extendable 52// operand, allow all possible kinds of inputs for the immediate value 53// (see AnyImm/anyimm and their variants in the Definitions section). 54 55 56// --(0) Definitions ----------------------------------------------------- 57// 58 59// This complex pattern exists only to create a machine instruction operand 60// of type "frame index". There doesn't seem to be a way to do that directly 61// in the patterns. 62def AddrFI: ComplexPattern<i32, 1, "SelectAddrFI", [frameindex], []>; 63 64// These complex patterns are not strictly necessary, since global address 65// folding will happen during DAG combining. For distinguishing between GA 66// and GP, pat frags with HexagonCONST32 and HexagonCONST32_GP can be used. 67def AddrGA: ComplexPattern<i32, 1, "SelectAddrGA", [], []>; 68def AddrGP: ComplexPattern<i32, 1, "SelectAddrGP", [], []>; 69def AnyImm: ComplexPattern<i32, 1, "SelectAnyImm", [], []>; 70def AnyInt: ComplexPattern<i32, 1, "SelectAnyInt", [], []>; 71 72// Global address or a constant being a multiple of 2^n. 73def AnyImm0: ComplexPattern<i32, 1, "SelectAnyImm0", [], []>; 74def AnyImm1: ComplexPattern<i32, 1, "SelectAnyImm1", [], []>; 75def AnyImm2: ComplexPattern<i32, 1, "SelectAnyImm2", [], []>; 76def AnyImm3: ComplexPattern<i32, 1, "SelectAnyImm3", [], []>; 77 78 79// Type helper frags. 80def V2I1: PatLeaf<(v2i1 PredRegs:$R)>; 81def V4I1: PatLeaf<(v4i1 PredRegs:$R)>; 82def V8I1: PatLeaf<(v8i1 PredRegs:$R)>; 83def V4I8: PatLeaf<(v4i8 IntRegs:$R)>; 84def V2I16: PatLeaf<(v2i16 IntRegs:$R)>; 85 86def V8I8: PatLeaf<(v8i8 DoubleRegs:$R)>; 87def V4I16: PatLeaf<(v4i16 DoubleRegs:$R)>; 88def V2I32: PatLeaf<(v2i32 DoubleRegs:$R)>; 89 90def HQ8: PatLeaf<(VecQ8 HvxQR:$R)>; 91def HQ16: PatLeaf<(VecQ16 HvxQR:$R)>; 92def HQ32: PatLeaf<(VecQ32 HvxQR:$R)>; 93 94def HVI8: PatLeaf<(VecI8 HvxVR:$R)>; 95def HVI16: PatLeaf<(VecI16 HvxVR:$R)>; 96def HVI32: PatLeaf<(VecI32 HvxVR:$R)>; 97 98def HWI8: PatLeaf<(VecPI8 HvxWR:$R)>; 99def HWI16: PatLeaf<(VecPI16 HvxWR:$R)>; 100def HWI32: PatLeaf<(VecPI32 HvxWR:$R)>; 101 102def SDTVecLeaf: 103 SDTypeProfile<1, 0, [SDTCisVec<0>]>; 104def SDTVecVecIntOp: 105 SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<1,2>, 106 SDTCisVT<3,i32>]>; 107 108def HexagonPTRUE: SDNode<"HexagonISD::PTRUE", SDTVecLeaf>; 109def HexagonPFALSE: SDNode<"HexagonISD::PFALSE", SDTVecLeaf>; 110def HexagonVALIGN: SDNode<"HexagonISD::VALIGN", SDTVecVecIntOp>; 111def HexagonVALIGNADDR: SDNode<"HexagonISD::VALIGNADDR", SDTIntUnaryOp>; 112 113def ptrue: PatFrag<(ops), (HexagonPTRUE)>; 114def pfalse: PatFrag<(ops), (HexagonPFALSE)>; 115def pnot: PatFrag<(ops node:$Pu), (xor node:$Pu, ptrue)>; 116 117def valign: PatFrag<(ops node:$Vt, node:$Vs, node:$Ru), 118 (HexagonVALIGN node:$Vt, node:$Vs, node:$Ru)>; 119def valignaddr: PatFrag<(ops node:$Addr), (HexagonVALIGNADDR node:$Addr)>; 120 121// Pattern fragments to extract the low and high subregisters from a 122// 64-bit value. 123def LoReg: OutPatFrag<(ops node:$Rs), (EXTRACT_SUBREG (i64 $Rs), isub_lo)>; 124def HiReg: OutPatFrag<(ops node:$Rs), (EXTRACT_SUBREG (i64 $Rs), isub_hi)>; 125 126def IsOrAdd: PatFrag<(ops node:$A, node:$B), (or node:$A, node:$B), [{ 127 return isOrEquivalentToAdd(N); 128}]>; 129 130def IsPow2_32: PatLeaf<(i32 imm), [{ 131 uint32_t V = N->getZExtValue(); 132 return isPowerOf2_32(V); 133}]>; 134 135def IsPow2_64: PatLeaf<(i64 imm), [{ 136 uint64_t V = N->getZExtValue(); 137 return isPowerOf2_64(V); 138}]>; 139 140def IsNPow2_32: PatLeaf<(i32 imm), [{ 141 uint32_t NV = ~N->getZExtValue(); 142 return isPowerOf2_32(NV); 143}]>; 144 145def IsPow2_64L: PatLeaf<(i64 imm), [{ 146 uint64_t V = N->getZExtValue(); 147 return isPowerOf2_64(V) && Log2_64(V) < 32; 148}]>; 149 150def IsPow2_64H: PatLeaf<(i64 imm), [{ 151 uint64_t V = N->getZExtValue(); 152 return isPowerOf2_64(V) && Log2_64(V) >= 32; 153}]>; 154 155def IsNPow2_64L: PatLeaf<(i64 imm), [{ 156 uint64_t NV = ~N->getZExtValue(); 157 return isPowerOf2_64(NV) && Log2_64(NV) < 32; 158}]>; 159 160def IsNPow2_64H: PatLeaf<(i64 imm), [{ 161 uint64_t NV = ~N->getZExtValue(); 162 return isPowerOf2_64(NV) && Log2_64(NV) >= 32; 163}]>; 164 165class IsULE<int Width, int Arg>: PatLeaf<(i32 imm), 166 "uint64_t V = N->getZExtValue();" # 167 "return isUInt<" # Width # ">(V) && V <= " # Arg # ";" 168>; 169 170class IsUGT<int Width, int Arg>: PatLeaf<(i32 imm), 171 "uint64_t V = N->getZExtValue();" # 172 "return isUInt<" # Width # ">(V) && V > " # Arg # ";" 173>; 174 175def SDEC1: SDNodeXForm<imm, [{ 176 int32_t V = N->getSExtValue(); 177 return CurDAG->getTargetConstant(V-1, SDLoc(N), MVT::i32); 178}]>; 179 180def UDEC1: SDNodeXForm<imm, [{ 181 uint32_t V = N->getZExtValue(); 182 assert(V >= 1); 183 return CurDAG->getTargetConstant(V-1, SDLoc(N), MVT::i32); 184}]>; 185 186def UDEC32: SDNodeXForm<imm, [{ 187 uint32_t V = N->getZExtValue(); 188 assert(V >= 32); 189 return CurDAG->getTargetConstant(V-32, SDLoc(N), MVT::i32); 190}]>; 191 192class Subi<int From>: SDNodeXForm<imm, 193 "int32_t V = " # From # " - N->getSExtValue();" # 194 "return CurDAG->getTargetConstant(V, SDLoc(N), MVT::i32);" 195>; 196 197def Log2_32: SDNodeXForm<imm, [{ 198 uint32_t V = N->getZExtValue(); 199 return CurDAG->getTargetConstant(Log2_32(V), SDLoc(N), MVT::i32); 200}]>; 201 202def Log2_64: SDNodeXForm<imm, [{ 203 uint64_t V = N->getZExtValue(); 204 return CurDAG->getTargetConstant(Log2_64(V), SDLoc(N), MVT::i32); 205}]>; 206 207def LogN2_32: SDNodeXForm<imm, [{ 208 uint32_t NV = ~N->getZExtValue(); 209 return CurDAG->getTargetConstant(Log2_32(NV), SDLoc(N), MVT::i32); 210}]>; 211 212def LogN2_64: SDNodeXForm<imm, [{ 213 uint64_t NV = ~N->getZExtValue(); 214 return CurDAG->getTargetConstant(Log2_64(NV), SDLoc(N), MVT::i32); 215}]>; 216 217def NegImm8: SDNodeXForm<imm, [{ 218 int8_t NV = -N->getSExtValue(); 219 return CurDAG->getTargetConstant(NV, SDLoc(N), MVT::i32); 220}]>; 221 222def NegImm16: SDNodeXForm<imm, [{ 223 int16_t NV = -N->getSExtValue(); 224 return CurDAG->getTargetConstant(NV, SDLoc(N), MVT::i32); 225}]>; 226 227def NegImm32: SDNodeXForm<imm, [{ 228 int32_t NV = -N->getSExtValue(); 229 return CurDAG->getTargetConstant(NV, SDLoc(N), MVT::i32); 230}]>; 231 232 233// Helpers for type promotions/contractions. 234def I1toI32: OutPatFrag<(ops node:$Rs), (C2_muxii (i1 $Rs), 1, 0)>; 235def I32toI1: OutPatFrag<(ops node:$Rs), (i1 (C2_cmpgtui (i32 $Rs), (i32 0)))>; 236def ToZext64: OutPatFrag<(ops node:$Rs), (i64 (A4_combineir 0, (i32 $Rs)))>; 237def ToSext64: OutPatFrag<(ops node:$Rs), (i64 (A2_sxtw (i32 $Rs)))>; 238def ToAext64: OutPatFrag<(ops node:$Rs), 239 (REG_SEQUENCE DoubleRegs, (i32 (IMPLICIT_DEF)), isub_hi, (i32 $Rs), isub_lo)>; 240 241def Combinew: OutPatFrag<(ops node:$Rs, node:$Rt), 242 (REG_SEQUENCE DoubleRegs, $Rs, isub_hi, $Rt, isub_lo)>; 243 244def addrga: PatLeaf<(i32 AddrGA:$Addr)>; 245def addrgp: PatLeaf<(i32 AddrGP:$Addr)>; 246def anyimm: PatLeaf<(i32 AnyImm:$Imm)>; 247def anyint: PatLeaf<(i32 AnyInt:$Imm)>; 248 249// Global address or an aligned constant. 250def anyimm0: PatLeaf<(i32 AnyImm0:$Addr)>; 251def anyimm1: PatLeaf<(i32 AnyImm1:$Addr)>; 252def anyimm2: PatLeaf<(i32 AnyImm2:$Addr)>; 253def anyimm3: PatLeaf<(i32 AnyImm3:$Addr)>; 254 255def f32ImmPred : PatLeaf<(f32 fpimm:$F)>; 256def f64ImmPred : PatLeaf<(f64 fpimm:$F)>; 257 258// This complex pattern is really only to detect various forms of 259// sign-extension i32->i64. The selected value will be of type i64 260// whose low word is the value being extended. The high word is 261// unspecified. 262def Usxtw: ComplexPattern<i64, 1, "DetectUseSxtw", [], []>; 263 264def Aext64: PatFrag<(ops node:$Rs), (i64 (anyext node:$Rs))>; 265def Zext64: PatFrag<(ops node:$Rs), (i64 (zext node:$Rs))>; 266def Sext64: PatLeaf<(i64 Usxtw:$Rs)>; 267 268def azext: PatFrags<(ops node:$Rs), [(zext node:$Rs), (anyext node:$Rs)]>; 269def asext: PatFrags<(ops node:$Rs), [(sext node:$Rs), (anyext node:$Rs)]>; 270 271def: Pat<(IsOrAdd (i32 AddrFI:$Rs), s32_0ImmPred:$off), 272 (PS_fi (i32 AddrFI:$Rs), imm:$off)>; 273 274 275// Converters from unary/binary SDNode to PatFrag. 276class pf1<SDNode Op> : PatFrag<(ops node:$a), (Op node:$a)>; 277class pf2<SDNode Op> : PatFrag<(ops node:$a, node:$b), (Op node:$a, node:$b)>; 278 279class Not2<PatFrag P> 280 : PatFrag<(ops node:$A, node:$B), (P node:$A, (not node:$B))>; 281 282// If there is a constant operand that feeds the and/or instruction, 283// do not generate the compound instructions. 284// It is not always profitable, as some times we end up with a transfer. 285// Check the below example. 286// ra = #65820; rb = lsr(rb, #8); rc ^= and (rb, ra) 287// Instead this is preferable. 288// ra = and (#65820, lsr(ra, #8)); rb = xor(rb, ra) 289class Su_ni1<PatFrag Op> 290 : PatFrag<Op.Operands, !head(Op.Fragments), [{ 291 if (hasOneUse(N)){ 292 // Check if Op1 is an immediate operand. 293 SDValue Op1 = N->getOperand(1); 294 return !isa<ConstantSDNode>(Op1); 295 } 296 return false;}], 297 Op.OperandTransform>; 298 299class Su<PatFrag Op> 300 : PatFrag<Op.Operands, !head(Op.Fragments), [{ return hasOneUse(N); }], 301 Op.OperandTransform>; 302 303// Main selection macros. 304 305class OpR_R_pat<InstHexagon MI, PatFrag Op, ValueType ResVT, PatFrag RegPred> 306 : Pat<(ResVT (Op RegPred:$Rs)), (MI RegPred:$Rs)>; 307 308class OpR_RI_pat<InstHexagon MI, PatFrag Op, ValueType ResType, 309 PatFrag RegPred, PatFrag ImmPred> 310 : Pat<(ResType (Op RegPred:$Rs, ImmPred:$I)), 311 (MI RegPred:$Rs, imm:$I)>; 312 313class OpR_RR_pat<InstHexagon MI, PatFrag Op, ValueType ResType, 314 PatFrag RsPred, PatFrag RtPred = RsPred> 315 : Pat<(ResType (Op RsPred:$Rs, RtPred:$Rt)), 316 (MI RsPred:$Rs, RtPred:$Rt)>; 317 318class AccRRI_pat<InstHexagon MI, PatFrag AccOp, PatFrag Op, 319 PatFrag RegPred, PatFrag ImmPred> 320 : Pat<(AccOp RegPred:$Rx, (Op RegPred:$Rs, ImmPred:$I)), 321 (MI RegPred:$Rx, RegPred:$Rs, imm:$I)>; 322 323class AccRRR_pat<InstHexagon MI, PatFrag AccOp, PatFrag Op, 324 PatFrag RxPred, PatFrag RsPred, PatFrag RtPred> 325 : Pat<(AccOp RxPred:$Rx, (Op RsPred:$Rs, RtPred:$Rt)), 326 (MI RxPred:$Rx, RsPred:$Rs, RtPred:$Rt)>; 327 328multiclass SelMinMax_pats<PatFrag CmpOp, PatFrag Val, 329 InstHexagon InstA, InstHexagon InstB> { 330 def: Pat<(select (i1 (CmpOp Val:$A, Val:$B)), Val:$A, Val:$B), 331 (InstA Val:$A, Val:$B)>; 332 def: Pat<(select (i1 (CmpOp Val:$A, Val:$B)), Val:$B, Val:$A), 333 (InstB Val:$A, Val:$B)>; 334} 335 336multiclass MinMax_pats<InstHexagon PickT, InstHexagon PickS, 337 PatFrag Sel, PatFrag CmpOp, 338 ValueType CmpType, PatFrag CmpPred> { 339 def: Pat<(Sel (CmpType (CmpOp CmpPred:$Vs, CmpPred:$Vt)), 340 CmpPred:$Vt, CmpPred:$Vs), 341 (PickT CmpPred:$Vs, CmpPred:$Vt)>; 342 def: Pat<(Sel (CmpType (CmpOp CmpPred:$Vs, CmpPred:$Vt)), 343 CmpPred:$Vs, CmpPred:$Vt), 344 (PickS CmpPred:$Vs, CmpPred:$Vt)>; 345} 346 347// Bitcasts between same-size vector types are no-ops, except for the 348// actual type change. 349multiclass NopCast_pat<ValueType Ty1, ValueType Ty2, RegisterClass RC> { 350 def: Pat<(Ty1 (bitconvert (Ty2 RC:$Val))), (Ty1 RC:$Val)>; 351 def: Pat<(Ty2 (bitconvert (Ty1 RC:$Val))), (Ty2 RC:$Val)>; 352} 353 354 355// Frags for commonly used SDNodes. 356def Add: pf2<add>; def And: pf2<and>; def Sra: pf2<sra>; 357def Sub: pf2<sub>; def Or: pf2<or>; def Srl: pf2<srl>; 358def Mul: pf2<mul>; def Xor: pf2<xor>; def Shl: pf2<shl>; 359 360def Rol: pf2<rotl>; 361 362// --(1) Immediate ------------------------------------------------------- 363// 364 365def SDTHexagonCONST32 366 : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisPtrTy<0>]>; 367 368def HexagonJT: SDNode<"HexagonISD::JT", SDTIntUnaryOp>; 369def HexagonCP: SDNode<"HexagonISD::CP", SDTIntUnaryOp>; 370def HexagonCONST32: SDNode<"HexagonISD::CONST32", SDTHexagonCONST32>; 371def HexagonCONST32_GP: SDNode<"HexagonISD::CONST32_GP", SDTHexagonCONST32>; 372 373def TruncI64ToI32: SDNodeXForm<imm, [{ 374 return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32); 375}]>; 376 377def: Pat<(s32_0ImmPred:$s16), (A2_tfrsi imm:$s16)>; 378def: Pat<(s8_0Imm64Pred:$s8), (A2_tfrpi (TruncI64ToI32 $s8))>; 379 380def: Pat<(HexagonCONST32 tglobaltlsaddr:$A), (A2_tfrsi imm:$A)>; 381def: Pat<(HexagonCONST32 bbl:$A), (A2_tfrsi imm:$A)>; 382def: Pat<(HexagonCONST32 tglobaladdr:$A), (A2_tfrsi imm:$A)>; 383def: Pat<(HexagonCONST32_GP tblockaddress:$A), (A2_tfrsi imm:$A)>; 384def: Pat<(HexagonCONST32_GP tglobaladdr:$A), (A2_tfrsi imm:$A)>; 385def: Pat<(HexagonJT tjumptable:$A), (A2_tfrsi imm:$A)>; 386def: Pat<(HexagonCP tconstpool:$A), (A2_tfrsi imm:$A)>; 387// The HVX load patterns also match CP directly. Make sure that if 388// the selection of this opcode changes, it's updated in all places. 389 390def: Pat<(i1 0), (PS_false)>; 391def: Pat<(i1 1), (PS_true)>; 392def: Pat<(i64 imm:$v), (CONST64 imm:$v)>; 393 394def ftoi : SDNodeXForm<fpimm, [{ 395 APInt I = N->getValueAPF().bitcastToAPInt(); 396 return CurDAG->getTargetConstant(I.getZExtValue(), SDLoc(N), 397 MVT::getIntegerVT(I.getBitWidth())); 398}]>; 399 400def: Pat<(f32ImmPred:$f), (A2_tfrsi (ftoi $f))>; 401def: Pat<(f64ImmPred:$f), (CONST64 (ftoi $f))>; 402 403def ToI32: OutPatFrag<(ops node:$V), (A2_tfrsi $V)>; 404 405// --(2) Type cast ------------------------------------------------------- 406// 407 408def: OpR_R_pat<F2_conv_sf2df, pf1<fpextend>, f64, F32>; 409def: OpR_R_pat<F2_conv_df2sf, pf1<fpround>, f32, F64>; 410 411def: OpR_R_pat<F2_conv_w2sf, pf1<sint_to_fp>, f32, I32>; 412def: OpR_R_pat<F2_conv_d2sf, pf1<sint_to_fp>, f32, I64>; 413def: OpR_R_pat<F2_conv_w2df, pf1<sint_to_fp>, f64, I32>; 414def: OpR_R_pat<F2_conv_d2df, pf1<sint_to_fp>, f64, I64>; 415 416def: OpR_R_pat<F2_conv_uw2sf, pf1<uint_to_fp>, f32, I32>; 417def: OpR_R_pat<F2_conv_ud2sf, pf1<uint_to_fp>, f32, I64>; 418def: OpR_R_pat<F2_conv_uw2df, pf1<uint_to_fp>, f64, I32>; 419def: OpR_R_pat<F2_conv_ud2df, pf1<uint_to_fp>, f64, I64>; 420 421def: OpR_R_pat<F2_conv_sf2w_chop, pf1<fp_to_sint>, i32, F32>; 422def: OpR_R_pat<F2_conv_df2w_chop, pf1<fp_to_sint>, i32, F64>; 423def: OpR_R_pat<F2_conv_sf2d_chop, pf1<fp_to_sint>, i64, F32>; 424def: OpR_R_pat<F2_conv_df2d_chop, pf1<fp_to_sint>, i64, F64>; 425 426def: OpR_R_pat<F2_conv_sf2uw_chop, pf1<fp_to_uint>, i32, F32>; 427def: OpR_R_pat<F2_conv_df2uw_chop, pf1<fp_to_uint>, i32, F64>; 428def: OpR_R_pat<F2_conv_sf2ud_chop, pf1<fp_to_uint>, i64, F32>; 429def: OpR_R_pat<F2_conv_df2ud_chop, pf1<fp_to_uint>, i64, F64>; 430 431// Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp]. 432def: Pat<(i32 (bitconvert F32:$v)), (I32:$v)>; 433def: Pat<(f32 (bitconvert I32:$v)), (F32:$v)>; 434def: Pat<(i64 (bitconvert F64:$v)), (I64:$v)>; 435def: Pat<(f64 (bitconvert I64:$v)), (F64:$v)>; 436 437// Bit convert 32- and 64-bit types. 438// All of these are bitcastable to one another: i32, v2i16, v4i8. 439defm: NopCast_pat<i32, v2i16, IntRegs>; 440defm: NopCast_pat<i32, v4i8, IntRegs>; 441defm: NopCast_pat<v2i16, v4i8, IntRegs>; 442// All of these are bitcastable to one another: i64, v2i32, v4i16, v8i8. 443defm: NopCast_pat<i64, v2i32, DoubleRegs>; 444defm: NopCast_pat<i64, v4i16, DoubleRegs>; 445defm: NopCast_pat<i64, v8i8, DoubleRegs>; 446defm: NopCast_pat<v2i32, v4i16, DoubleRegs>; 447defm: NopCast_pat<v2i32, v8i8, DoubleRegs>; 448defm: NopCast_pat<v4i16, v8i8, DoubleRegs>; 449 450 451// --(3) Extend/truncate ------------------------------------------------- 452// 453 454def: Pat<(sext_inreg I32:$Rs, i8), (A2_sxtb I32:$Rs)>; 455def: Pat<(sext_inreg I32:$Rs, i16), (A2_sxth I32:$Rs)>; 456def: Pat<(sext_inreg I64:$Rs, i32), (A2_sxtw (LoReg $Rs))>; 457def: Pat<(sext_inreg I64:$Rs, i16), (A2_sxtw (A2_sxth (LoReg $Rs)))>; 458def: Pat<(sext_inreg I64:$Rs, i8), (A2_sxtw (A2_sxtb (LoReg $Rs)))>; 459 460def: Pat<(i64 (sext I32:$Rs)), (A2_sxtw I32:$Rs)>; 461def: Pat<(Zext64 I32:$Rs), (ToZext64 $Rs)>; 462def: Pat<(Aext64 I32:$Rs), (ToZext64 $Rs)>; 463 464def: Pat<(i32 (trunc I64:$Rs)), (LoReg $Rs)>; 465def: Pat<(i1 (trunc I32:$Rs)), (S2_tstbit_i I32:$Rs, 0)>; 466def: Pat<(i1 (trunc I64:$Rs)), (S2_tstbit_i (LoReg $Rs), 0)>; 467 468let AddedComplexity = 20 in { 469 def: Pat<(and I32:$Rs, 255), (A2_zxtb I32:$Rs)>; 470 def: Pat<(and I32:$Rs, 65535), (A2_zxth I32:$Rs)>; 471} 472 473// Extensions from i1 or vectors of i1. 474def: Pat<(i32 (azext I1:$Pu)), (C2_muxii I1:$Pu, 1, 0)>; 475def: Pat<(i64 (azext I1:$Pu)), (ToZext64 (C2_muxii I1:$Pu, 1, 0))>; 476def: Pat<(i32 (sext I1:$Pu)), (C2_muxii I1:$Pu, -1, 0)>; 477def: Pat<(i64 (sext I1:$Pu)), (Combinew (C2_muxii PredRegs:$Pu, -1, 0), 478 (C2_muxii PredRegs:$Pu, -1, 0))>; 479 480def: Pat<(v2i16 (sext V2I1:$Pu)), (S2_vtrunehb (C2_mask V2I1:$Pu))>; 481def: Pat<(v2i32 (sext V2I1:$Pu)), (C2_mask V2I1:$Pu)>; 482def: Pat<(v4i8 (sext V4I1:$Pu)), (S2_vtrunehb (C2_mask V4I1:$Pu))>; 483def: Pat<(v4i16 (sext V4I1:$Pu)), (C2_mask V4I1:$Pu)>; 484def: Pat<(v8i8 (sext V8I1:$Pu)), (C2_mask V8I1:$Pu)>; 485 486def Vsplatpi: OutPatFrag<(ops node:$V), 487 (Combinew (A2_tfrsi $V), (A2_tfrsi $V))>; 488 489def: Pat<(v2i16 (azext V2I1:$Pu)), 490 (A2_andir (LoReg (C2_mask V2I1:$Pu)), (i32 0x00010001))>; 491def: Pat<(v2i32 (azext V2I1:$Pu)), 492 (A2_andp (C2_mask V2I1:$Pu), (A2_combineii (i32 1), (i32 1)))>; 493def: Pat<(v4i8 (azext V4I1:$Pu)), 494 (A2_andir (LoReg (C2_mask V4I1:$Pu)), (i32 0x01010101))>; 495def: Pat<(v4i16 (azext V4I1:$Pu)), 496 (A2_andp (C2_mask V4I1:$Pu), (Vsplatpi (i32 0x00010001)))>; 497def: Pat<(v8i8 (azext V8I1:$Pu)), 498 (A2_andp (C2_mask V8I1:$Pu), (Vsplatpi (i32 0x01010101)))>; 499 500def: Pat<(v4i16 (azext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; 501def: Pat<(v2i32 (azext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>; 502def: Pat<(v4i16 (sext V4I8:$Rs)), (S2_vsxtbh V4I8:$Rs)>; 503def: Pat<(v2i32 (sext V2I16:$Rs)), (S2_vsxthw V2I16:$Rs)>; 504 505def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i8)), 506 (Combinew (A2_sxtb (HiReg $Rs)), (A2_sxtb (LoReg $Rs)))>; 507 508def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i16)), 509 (Combinew (A2_sxth (HiReg $Rs)), (A2_sxth (LoReg $Rs)))>; 510 511// Truncate: from vector B copy all 'E'ven 'B'yte elements: 512// A[0] = B[0]; A[1] = B[2]; A[2] = B[4]; A[3] = B[6]; 513def: Pat<(v4i8 (trunc V4I16:$Rs)), 514 (S2_vtrunehb V4I16:$Rs)>; 515 516// Truncate: from vector B copy all 'O'dd 'B'yte elements: 517// A[0] = B[1]; A[1] = B[3]; A[2] = B[5]; A[3] = B[7]; 518// S2_vtrunohb 519 520// Truncate: from vectors B and C copy all 'E'ven 'H'alf-word elements: 521// A[0] = B[0]; A[1] = B[2]; A[2] = C[0]; A[3] = C[2]; 522// S2_vtruneh 523 524def: Pat<(v2i16 (trunc V2I32:$Rs)), 525 (A2_combine_ll (HiReg $Rs), (LoReg $Rs))>; 526 527 528// --(4) Logical --------------------------------------------------------- 529// 530 531def: Pat<(not I1:$Ps), (C2_not I1:$Ps)>; 532def: Pat<(pnot V2I1:$Ps), (C2_not V2I1:$Ps)>; 533def: Pat<(pnot V4I1:$Ps), (C2_not V4I1:$Ps)>; 534def: Pat<(pnot V8I1:$Ps), (C2_not V8I1:$Ps)>; 535def: Pat<(add I1:$Ps, -1), (C2_not I1:$Ps)>; 536 537multiclass BoolOpR_RR_pat<InstHexagon MI, PatFrag Op> { 538 def: OpR_RR_pat<MI, Op, i1, I1>; 539 def: OpR_RR_pat<MI, Op, v2i1, V2I1>; 540 def: OpR_RR_pat<MI, Op, v4i1, V4I1>; 541 def: OpR_RR_pat<MI, Op, v8i1, V8I1>; 542} 543 544multiclass BoolAccRRR_pat<InstHexagon MI, PatFrag AccOp, PatFrag Op> { 545 def: AccRRR_pat<MI, AccOp, Op, I1, I1, I1>; 546 def: AccRRR_pat<MI, AccOp, Op, V2I1, V2I1, V2I1>; 547 def: AccRRR_pat<MI, AccOp, Op, V4I1, V4I1, V4I1>; 548 def: AccRRR_pat<MI, AccOp, Op, V8I1, V8I1, V8I1>; 549} 550 551defm: BoolOpR_RR_pat<C2_and, And>; 552defm: BoolOpR_RR_pat<C2_or, Or>; 553defm: BoolOpR_RR_pat<C2_xor, Xor>; 554defm: BoolOpR_RR_pat<C2_andn, Not2<And>>; 555defm: BoolOpR_RR_pat<C2_orn, Not2<Or>>; 556 557// op(Ps, op(Pt, Pu)) 558defm: BoolAccRRR_pat<C4_and_and, And, Su<And>>; 559defm: BoolAccRRR_pat<C4_and_or, And, Su<Or>>; 560defm: BoolAccRRR_pat<C4_or_and, Or, Su<And>>; 561defm: BoolAccRRR_pat<C4_or_or, Or, Su<Or>>; 562 563// op(Ps, op(Pt, ~Pu)) 564defm: BoolAccRRR_pat<C4_and_andn, And, Su<Not2<And>>>; 565defm: BoolAccRRR_pat<C4_and_orn, And, Su<Not2<Or>>>; 566defm: BoolAccRRR_pat<C4_or_andn, Or, Su<Not2<And>>>; 567defm: BoolAccRRR_pat<C4_or_orn, Or, Su<Not2<Or>>>; 568 569 570// --(5) Compare --------------------------------------------------------- 571// 572 573// Avoid negated comparisons, i.e. those of form "Pd = !cmp(...)". 574// These cannot form compounds (e.g. J4_cmpeqi_tp0_jump_nt). 575 576def: OpR_RI_pat<C2_cmpeqi, seteq, i1, I32, anyimm>; 577def: OpR_RI_pat<C2_cmpgti, setgt, i1, I32, anyimm>; 578def: OpR_RI_pat<C2_cmpgtui, setugt, i1, I32, anyimm>; 579 580def: Pat<(i1 (setge I32:$Rs, s32_0ImmPred:$s10)), 581 (C2_cmpgti I32:$Rs, (SDEC1 imm:$s10))>; 582def: Pat<(i1 (setuge I32:$Rs, u32_0ImmPred:$u9)), 583 (C2_cmpgtui I32:$Rs, (UDEC1 imm:$u9))>; 584 585def: Pat<(i1 (setlt I32:$Rs, s32_0ImmPred:$s10)), 586 (C2_not (C2_cmpgti I32:$Rs, (SDEC1 imm:$s10)))>; 587def: Pat<(i1 (setult I32:$Rs, u32_0ImmPred:$u9)), 588 (C2_not (C2_cmpgtui I32:$Rs, (UDEC1 imm:$u9)))>; 589 590// Patfrag to convert the usual comparison patfrags (e.g. setlt) to ones 591// that reverse the order of the operands. 592class RevCmp<PatFrag F> 593 : PatFrag<(ops node:$rhs, node:$lhs), !head(F.Fragments), F.PredicateCode, 594 F.OperandTransform>; 595 596def: OpR_RR_pat<C2_cmpeq, seteq, i1, I32>; 597def: OpR_RR_pat<C2_cmpgt, setgt, i1, I32>; 598def: OpR_RR_pat<C2_cmpgtu, setugt, i1, I32>; 599def: OpR_RR_pat<C2_cmpgt, RevCmp<setlt>, i1, I32>; 600def: OpR_RR_pat<C2_cmpgtu, RevCmp<setult>, i1, I32>; 601def: OpR_RR_pat<C2_cmpeqp, seteq, i1, I64>; 602def: OpR_RR_pat<C2_cmpgtp, setgt, i1, I64>; 603def: OpR_RR_pat<C2_cmpgtup, setugt, i1, I64>; 604def: OpR_RR_pat<C2_cmpgtp, RevCmp<setlt>, i1, I64>; 605def: OpR_RR_pat<C2_cmpgtup, RevCmp<setult>, i1, I64>; 606def: OpR_RR_pat<A2_vcmpbeq, seteq, i1, V8I8>; 607def: OpR_RR_pat<A2_vcmpbeq, seteq, v8i1, V8I8>; 608def: OpR_RR_pat<A4_vcmpbgt, RevCmp<setlt>, i1, V8I8>; 609def: OpR_RR_pat<A4_vcmpbgt, RevCmp<setlt>, v8i1, V8I8>; 610def: OpR_RR_pat<A4_vcmpbgt, setgt, i1, V8I8>; 611def: OpR_RR_pat<A4_vcmpbgt, setgt, v8i1, V8I8>; 612def: OpR_RR_pat<A2_vcmpbgtu, RevCmp<setult>, i1, V8I8>; 613def: OpR_RR_pat<A2_vcmpbgtu, RevCmp<setult>, v8i1, V8I8>; 614def: OpR_RR_pat<A2_vcmpbgtu, setugt, i1, V8I8>; 615def: OpR_RR_pat<A2_vcmpbgtu, setugt, v8i1, V8I8>; 616def: OpR_RR_pat<A2_vcmpheq, seteq, i1, V4I16>; 617def: OpR_RR_pat<A2_vcmpheq, seteq, v4i1, V4I16>; 618def: OpR_RR_pat<A2_vcmphgt, RevCmp<setlt>, i1, V4I16>; 619def: OpR_RR_pat<A2_vcmphgt, RevCmp<setlt>, v4i1, V4I16>; 620def: OpR_RR_pat<A2_vcmphgt, setgt, i1, V4I16>; 621def: OpR_RR_pat<A2_vcmphgt, setgt, v4i1, V4I16>; 622def: OpR_RR_pat<A2_vcmphgtu, RevCmp<setult>, i1, V4I16>; 623def: OpR_RR_pat<A2_vcmphgtu, RevCmp<setult>, v4i1, V4I16>; 624def: OpR_RR_pat<A2_vcmphgtu, setugt, i1, V4I16>; 625def: OpR_RR_pat<A2_vcmphgtu, setugt, v4i1, V4I16>; 626def: OpR_RR_pat<A2_vcmpweq, seteq, i1, V2I32>; 627def: OpR_RR_pat<A2_vcmpweq, seteq, v2i1, V2I32>; 628def: OpR_RR_pat<A2_vcmpwgt, RevCmp<setlt>, i1, V2I32>; 629def: OpR_RR_pat<A2_vcmpwgt, RevCmp<setlt>, v2i1, V2I32>; 630def: OpR_RR_pat<A2_vcmpwgt, setgt, i1, V2I32>; 631def: OpR_RR_pat<A2_vcmpwgt, setgt, v2i1, V2I32>; 632def: OpR_RR_pat<A2_vcmpwgtu, RevCmp<setult>, i1, V2I32>; 633def: OpR_RR_pat<A2_vcmpwgtu, RevCmp<setult>, v2i1, V2I32>; 634def: OpR_RR_pat<A2_vcmpwgtu, setugt, i1, V2I32>; 635def: OpR_RR_pat<A2_vcmpwgtu, setugt, v2i1, V2I32>; 636 637def: OpR_RR_pat<F2_sfcmpeq, seteq, i1, F32>; 638def: OpR_RR_pat<F2_sfcmpgt, setgt, i1, F32>; 639def: OpR_RR_pat<F2_sfcmpge, setge, i1, F32>; 640def: OpR_RR_pat<F2_sfcmpeq, setoeq, i1, F32>; 641def: OpR_RR_pat<F2_sfcmpgt, setogt, i1, F32>; 642def: OpR_RR_pat<F2_sfcmpge, setoge, i1, F32>; 643def: OpR_RR_pat<F2_sfcmpgt, RevCmp<setolt>, i1, F32>; 644def: OpR_RR_pat<F2_sfcmpge, RevCmp<setole>, i1, F32>; 645def: OpR_RR_pat<F2_sfcmpgt, RevCmp<setlt>, i1, F32>; 646def: OpR_RR_pat<F2_sfcmpge, RevCmp<setle>, i1, F32>; 647def: OpR_RR_pat<F2_sfcmpuo, setuo, i1, F32>; 648 649def: OpR_RR_pat<F2_dfcmpeq, seteq, i1, F64>; 650def: OpR_RR_pat<F2_dfcmpgt, setgt, i1, F64>; 651def: OpR_RR_pat<F2_dfcmpge, setge, i1, F64>; 652def: OpR_RR_pat<F2_dfcmpeq, setoeq, i1, F64>; 653def: OpR_RR_pat<F2_dfcmpgt, setogt, i1, F64>; 654def: OpR_RR_pat<F2_dfcmpge, setoge, i1, F64>; 655def: OpR_RR_pat<F2_dfcmpgt, RevCmp<setolt>, i1, F64>; 656def: OpR_RR_pat<F2_dfcmpge, RevCmp<setole>, i1, F64>; 657def: OpR_RR_pat<F2_dfcmpgt, RevCmp<setlt>, i1, F64>; 658def: OpR_RR_pat<F2_dfcmpge, RevCmp<setle>, i1, F64>; 659def: OpR_RR_pat<F2_dfcmpuo, setuo, i1, F64>; 660 661// Avoid C4_cmpneqi, C4_cmpltei, C4_cmplteui, since they cannot form compounds. 662 663def: Pat<(i1 (setne I32:$Rs, anyimm:$u5)), 664 (C2_not (C2_cmpeqi I32:$Rs, imm:$u5))>; 665def: Pat<(i1 (setle I32:$Rs, anyimm:$u5)), 666 (C2_not (C2_cmpgti I32:$Rs, imm:$u5))>; 667def: Pat<(i1 (setule I32:$Rs, anyimm:$u5)), 668 (C2_not (C2_cmpgtui I32:$Rs, imm:$u5))>; 669 670class OpmR_RR_pat<PatFrag Output, PatFrag Op, ValueType ResType, 671 PatFrag RsPred, PatFrag RtPred = RsPred> 672 : Pat<(ResType (Op RsPred:$Rs, RtPred:$Rt)), 673 (Output RsPred:$Rs, RtPred:$Rt)>; 674 675class Outn<InstHexagon MI> 676 : OutPatFrag<(ops node:$Rs, node:$Rt), 677 (C2_not (MI $Rs, $Rt))>; 678 679def: OpmR_RR_pat<Outn<C2_cmpeq>, setne, i1, I32>; 680def: OpmR_RR_pat<Outn<C2_cmpgt>, setle, i1, I32>; 681def: OpmR_RR_pat<Outn<C2_cmpgtu>, setule, i1, I32>; 682def: OpmR_RR_pat<Outn<C2_cmpgt>, RevCmp<setge>, i1, I32>; 683def: OpmR_RR_pat<Outn<C2_cmpgtu>, RevCmp<setuge>, i1, I32>; 684def: OpmR_RR_pat<Outn<C2_cmpeqp>, setne, i1, I64>; 685def: OpmR_RR_pat<Outn<C2_cmpgtp>, setle, i1, I64>; 686def: OpmR_RR_pat<Outn<C2_cmpgtup>, setule, i1, I64>; 687def: OpmR_RR_pat<Outn<C2_cmpgtp>, RevCmp<setge>, i1, I64>; 688def: OpmR_RR_pat<Outn<C2_cmpgtup>, RevCmp<setuge>, i1, I64>; 689def: OpmR_RR_pat<Outn<A2_vcmpbeq>, setne, v8i1, V8I8>; 690def: OpmR_RR_pat<Outn<A4_vcmpbgt>, setle, v8i1, V8I8>; 691def: OpmR_RR_pat<Outn<A2_vcmpbgtu>, setule, v8i1, V8I8>; 692def: OpmR_RR_pat<Outn<A4_vcmpbgt>, RevCmp<setge>, v8i1, V8I8>; 693def: OpmR_RR_pat<Outn<A2_vcmpbgtu>, RevCmp<setuge>, v8i1, V8I8>; 694def: OpmR_RR_pat<Outn<A2_vcmpheq>, setne, v4i1, V4I16>; 695def: OpmR_RR_pat<Outn<A2_vcmphgt>, setle, v4i1, V4I16>; 696def: OpmR_RR_pat<Outn<A2_vcmphgtu>, setule, v4i1, V4I16>; 697def: OpmR_RR_pat<Outn<A2_vcmphgt>, RevCmp<setge>, v4i1, V4I16>; 698def: OpmR_RR_pat<Outn<A2_vcmphgtu>, RevCmp<setuge>, v4i1, V4I16>; 699def: OpmR_RR_pat<Outn<A2_vcmpweq>, setne, v2i1, V2I32>; 700def: OpmR_RR_pat<Outn<A2_vcmpwgt>, setle, v2i1, V2I32>; 701def: OpmR_RR_pat<Outn<A2_vcmpwgtu>, setule, v2i1, V2I32>; 702def: OpmR_RR_pat<Outn<A2_vcmpwgt>, RevCmp<setge>, v2i1, V2I32>; 703def: OpmR_RR_pat<Outn<A2_vcmpwgtu>, RevCmp<setuge>, v2i1, V2I32>; 704 705let AddedComplexity = 100 in { 706 def: Pat<(i1 (seteq (and (xor I32:$Rs, I32:$Rt), 255), 0)), 707 (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt)>; 708 def: Pat<(i1 (setne (and (xor I32:$Rs, I32:$Rt), 255), 0)), 709 (C2_not (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt))>; 710 def: Pat<(i1 (seteq (and (xor I32:$Rs, I32:$Rt), 65535), 0)), 711 (A4_cmpheq IntRegs:$Rs, IntRegs:$Rt)>; 712 def: Pat<(i1 (setne (and (xor I32:$Rs, I32:$Rt), 65535), 0)), 713 (C2_not (A4_cmpheq IntRegs:$Rs, IntRegs:$Rt))>; 714} 715 716// PatFrag for AsserZext which takes the original type as a parameter. 717def SDTAssertZext: SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0,1>]>; 718def AssertZextSD: SDNode<"ISD::AssertZext", SDTAssertZext>; 719class AssertZext<ValueType T>: PatFrag<(ops node:$A), (AssertZextSD $A, T)>; 720 721multiclass Cmpb_pat<InstHexagon MI, PatFrag Op, PatFrag AssertExt, 722 PatLeaf ImmPred, int Mask> { 723 def: Pat<(i1 (Op (and I32:$Rs, Mask), ImmPred:$I)), 724 (MI I32:$Rs, imm:$I)>; 725 def: Pat<(i1 (Op (AssertExt I32:$Rs), ImmPred:$I)), 726 (MI I32:$Rs, imm:$I)>; 727} 728 729multiclass CmpbN_pat<InstHexagon MI, PatFrag Op, PatFrag AssertExt, 730 PatLeaf ImmPred, int Mask> { 731 def: Pat<(i1 (Op (and I32:$Rs, Mask), ImmPred:$I)), 732 (C2_not (MI I32:$Rs, imm:$I))>; 733 def: Pat<(i1 (Op (AssertExt I32:$Rs), ImmPred:$I)), 734 (C2_not (MI I32:$Rs, imm:$I))>; 735} 736 737multiclass CmpbND_pat<InstHexagon MI, PatFrag Op, PatFrag AssertExt, 738 PatLeaf ImmPred, int Mask> { 739 def: Pat<(i1 (Op (and I32:$Rs, Mask), ImmPred:$I)), 740 (C2_not (MI I32:$Rs, (UDEC1 imm:$I)))>; 741 def: Pat<(i1 (Op (AssertExt I32:$Rs), ImmPred:$I)), 742 (C2_not (MI I32:$Rs, (UDEC1 imm:$I)))>; 743} 744 745let AddedComplexity = 200 in { 746 defm: Cmpb_pat <A4_cmpbeqi, seteq, AssertZext<i8>, IsUGT<8,31>, 255>; 747 defm: CmpbN_pat <A4_cmpbeqi, setne, AssertZext<i8>, IsUGT<8,31>, 255>; 748 defm: Cmpb_pat <A4_cmpbgtui, setugt, AssertZext<i8>, IsUGT<32,31>, 255>; 749 defm: CmpbN_pat <A4_cmpbgtui, setule, AssertZext<i8>, IsUGT<32,31>, 255>; 750 defm: Cmpb_pat <A4_cmphgtui, setugt, AssertZext<i16>, IsUGT<32,31>, 65535>; 751 defm: CmpbN_pat <A4_cmphgtui, setule, AssertZext<i16>, IsUGT<32,31>, 65535>; 752 defm: CmpbND_pat<A4_cmpbgtui, setult, AssertZext<i8>, IsUGT<32,32>, 255>; 753 defm: CmpbND_pat<A4_cmphgtui, setult, AssertZext<i16>, IsUGT<32,32>, 65535>; 754} 755 756def: Pat<(i32 (zext (i1 (seteq I32:$Rs, I32:$Rt)))), 757 (A4_rcmpeq I32:$Rs, I32:$Rt)>; 758def: Pat<(i32 (zext (i1 (setne I32:$Rs, I32:$Rt)))), 759 (A4_rcmpneq I32:$Rs, I32:$Rt)>; 760def: Pat<(i32 (zext (i1 (seteq I32:$Rs, anyimm:$s8)))), 761 (A4_rcmpeqi I32:$Rs, imm:$s8)>; 762def: Pat<(i32 (zext (i1 (setne I32:$Rs, anyimm:$s8)))), 763 (A4_rcmpneqi I32:$Rs, imm:$s8)>; 764 765def: Pat<(i1 (seteq I1:$Ps, (i1 -1))), (I1:$Ps)>; 766def: Pat<(i1 (setne I1:$Ps, (i1 -1))), (C2_not I1:$Ps)>; 767def: Pat<(i1 (seteq I1:$Ps, I1:$Pt)), (C2_xor I1:$Ps, (C2_not I1:$Pt))>; 768def: Pat<(i1 (setne I1:$Ps, I1:$Pt)), (C2_xor I1:$Ps, I1:$Pt)>; 769 770// Floating-point comparisons with checks for ordered/unordered status. 771 772class T3<InstHexagon MI1, InstHexagon MI2, InstHexagon MI3> 773 : OutPatFrag<(ops node:$Rs, node:$Rt), 774 (MI1 (MI2 $Rs, $Rt), (MI3 $Rs, $Rt))>; 775 776class Cmpuf<InstHexagon MI>: T3<C2_or, F2_sfcmpuo, MI>; 777class Cmpud<InstHexagon MI>: T3<C2_or, F2_dfcmpuo, MI>; 778 779class Cmpufn<InstHexagon MI>: T3<C2_orn, F2_sfcmpuo, MI>; 780class Cmpudn<InstHexagon MI>: T3<C2_orn, F2_dfcmpuo, MI>; 781 782def: OpmR_RR_pat<Cmpuf<F2_sfcmpeq>, setueq, i1, F32>; 783def: OpmR_RR_pat<Cmpuf<F2_sfcmpge>, setuge, i1, F32>; 784def: OpmR_RR_pat<Cmpuf<F2_sfcmpgt>, setugt, i1, F32>; 785def: OpmR_RR_pat<Cmpuf<F2_sfcmpge>, RevCmp<setule>, i1, F32>; 786def: OpmR_RR_pat<Cmpuf<F2_sfcmpgt>, RevCmp<setult>, i1, F32>; 787def: OpmR_RR_pat<Cmpufn<F2_sfcmpeq>, setune, i1, F32>; 788 789def: OpmR_RR_pat<Cmpud<F2_dfcmpeq>, setueq, i1, F64>; 790def: OpmR_RR_pat<Cmpud<F2_dfcmpge>, setuge, i1, F64>; 791def: OpmR_RR_pat<Cmpud<F2_dfcmpgt>, setugt, i1, F64>; 792def: OpmR_RR_pat<Cmpud<F2_dfcmpge>, RevCmp<setule>, i1, F64>; 793def: OpmR_RR_pat<Cmpud<F2_dfcmpgt>, RevCmp<setult>, i1, F64>; 794def: OpmR_RR_pat<Cmpudn<F2_dfcmpeq>, setune, i1, F64>; 795 796def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setone, i1, F32>; 797def: OpmR_RR_pat<Outn<F2_sfcmpeq>, setne, i1, F32>; 798 799def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setone, i1, F64>; 800def: OpmR_RR_pat<Outn<F2_dfcmpeq>, setne, i1, F64>; 801 802def: OpmR_RR_pat<Outn<F2_sfcmpuo>, seto, i1, F32>; 803def: OpmR_RR_pat<Outn<F2_dfcmpuo>, seto, i1, F64>; 804 805 806// --(6) Select ---------------------------------------------------------- 807// 808 809def: Pat<(select I1:$Pu, I32:$Rs, I32:$Rt), 810 (C2_mux I1:$Pu, I32:$Rs, I32:$Rt)>; 811def: Pat<(select I1:$Pu, anyimm:$s8, I32:$Rs), 812 (C2_muxri I1:$Pu, imm:$s8, I32:$Rs)>; 813def: Pat<(select I1:$Pu, I32:$Rs, anyimm:$s8), 814 (C2_muxir I1:$Pu, I32:$Rs, imm:$s8)>; 815def: Pat<(select I1:$Pu, anyimm:$s8, s8_0ImmPred:$S8), 816 (C2_muxii I1:$Pu, imm:$s8, imm:$S8)>; 817 818def: Pat<(select (not I1:$Pu), I32:$Rs, I32:$Rt), 819 (C2_mux I1:$Pu, I32:$Rt, I32:$Rs)>; 820def: Pat<(select (not I1:$Pu), s8_0ImmPred:$S8, anyimm:$s8), 821 (C2_muxii I1:$Pu, imm:$s8, imm:$S8)>; 822def: Pat<(select (not I1:$Pu), anyimm:$s8, I32:$Rs), 823 (C2_muxir I1:$Pu, I32:$Rs, imm:$s8)>; 824def: Pat<(select (not I1:$Pu), I32:$Rs, anyimm:$s8), 825 (C2_muxri I1:$Pu, imm:$s8, I32:$Rs)>; 826 827// Map from a 64-bit select to an emulated 64-bit mux. 828// Hexagon does not support 64-bit MUXes; so emulate with combines. 829def: Pat<(select I1:$Pu, I64:$Rs, I64:$Rt), 830 (Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)), 831 (C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>; 832 833def: Pat<(select I1:$Pu, F32:$Rs, f32ImmPred:$I), 834 (C2_muxir I1:$Pu, F32:$Rs, (ftoi $I))>; 835def: Pat<(select I1:$Pu, f32ImmPred:$I, F32:$Rt), 836 (C2_muxri I1:$Pu, (ftoi $I), F32:$Rt)>; 837def: Pat<(select I1:$Pu, F32:$Rs, F32:$Rt), 838 (C2_mux I1:$Pu, F32:$Rs, F32:$Rt)>; 839def: Pat<(select I1:$Pu, F64:$Rs, F64:$Rt), 840 (Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)), 841 (C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>; 842 843def: Pat<(select (i1 (setult F32:$Ra, F32:$Rb)), F32:$Rs, F32:$Rt), 844 (C2_mux (F2_sfcmpgt F32:$Rb, F32:$Ra), F32:$Rs, F32:$Rt)>; 845def: Pat<(select (i1 (setult F64:$Ra, F64:$Rb)), F64:$Rs, F64:$Rt), 846 (C2_vmux (F2_dfcmpgt F64:$Rb, F64:$Ra), F64:$Rs, F64:$Rt)>; 847 848def: Pat<(select (not I1:$Pu), f32ImmPred:$I, F32:$Rs), 849 (C2_muxir I1:$Pu, F32:$Rs, (ftoi $I))>; 850def: Pat<(select (not I1:$Pu), F32:$Rt, f32ImmPred:$I), 851 (C2_muxri I1:$Pu, (ftoi $I), F32:$Rt)>; 852 853def: Pat<(vselect V8I1:$Pu, V8I8:$Rs, V8I8:$Rt), 854 (C2_vmux V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)>; 855def: Pat<(vselect V4I1:$Pu, V4I16:$Rs, V4I16:$Rt), 856 (C2_vmux V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)>; 857def: Pat<(vselect V2I1:$Pu, V2I32:$Rs, V2I32:$Rt), 858 (C2_vmux V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)>; 859 860def: Pat<(vselect (pnot V8I1:$Pu), V8I8:$Rs, V8I8:$Rt), 861 (C2_vmux V8I1:$Pu, V8I8:$Rt, V8I8:$Rs)>; 862def: Pat<(vselect (pnot V4I1:$Pu), V4I16:$Rs, V4I16:$Rt), 863 (C2_vmux V4I1:$Pu, V4I16:$Rt, V4I16:$Rs)>; 864def: Pat<(vselect (pnot V2I1:$Pu), V2I32:$Rs, V2I32:$Rt), 865 (C2_vmux V2I1:$Pu, V2I32:$Rt, V2I32:$Rs)>; 866 867 868// From LegalizeDAG.cpp: (Pu ? Pv : Pw) <=> (Pu & Pv) | (!Pu & Pw). 869def: Pat<(select I1:$Pu, I1:$Pv, I1:$Pw), 870 (C2_or (C2_and I1:$Pu, I1:$Pv), 871 (C2_andn I1:$Pw, I1:$Pu))>; 872 873 874def IsPosHalf : PatLeaf<(i32 IntRegs:$a), [{ 875 return isPositiveHalfWord(N); 876}]>; 877 878multiclass SelMinMax16_pats<PatFrag CmpOp, InstHexagon InstA, 879 InstHexagon InstB> { 880 def: Pat<(sext_inreg (select (i1 (CmpOp IsPosHalf:$Rs, IsPosHalf:$Rt)), 881 IsPosHalf:$Rs, IsPosHalf:$Rt), i16), 882 (InstA IntRegs:$Rs, IntRegs:$Rt)>; 883 def: Pat<(sext_inreg (select (i1 (CmpOp IsPosHalf:$Rs, IsPosHalf:$Rt)), 884 IsPosHalf:$Rt, IsPosHalf:$Rs), i16), 885 (InstB IntRegs:$Rs, IntRegs:$Rt)>; 886} 887 888let AddedComplexity = 200 in { 889 defm: SelMinMax16_pats<setge, A2_max, A2_min>; 890 defm: SelMinMax16_pats<setgt, A2_max, A2_min>; 891 defm: SelMinMax16_pats<setle, A2_min, A2_max>; 892 defm: SelMinMax16_pats<setlt, A2_min, A2_max>; 893 defm: SelMinMax16_pats<setuge, A2_maxu, A2_minu>; 894 defm: SelMinMax16_pats<setugt, A2_maxu, A2_minu>; 895 defm: SelMinMax16_pats<setule, A2_minu, A2_maxu>; 896 defm: SelMinMax16_pats<setult, A2_minu, A2_maxu>; 897} 898 899let AddedComplexity = 200 in { 900 defm: MinMax_pats<A2_min, A2_max, select, setgt, i1, I32>; 901 defm: MinMax_pats<A2_min, A2_max, select, setge, i1, I32>; 902 defm: MinMax_pats<A2_max, A2_min, select, setlt, i1, I32>; 903 defm: MinMax_pats<A2_max, A2_min, select, setle, i1, I32>; 904 defm: MinMax_pats<A2_minu, A2_maxu, select, setugt, i1, I32>; 905 defm: MinMax_pats<A2_minu, A2_maxu, select, setuge, i1, I32>; 906 defm: MinMax_pats<A2_maxu, A2_minu, select, setult, i1, I32>; 907 defm: MinMax_pats<A2_maxu, A2_minu, select, setule, i1, I32>; 908 909 defm: MinMax_pats<A2_minp, A2_maxp, select, setgt, i1, I64>; 910 defm: MinMax_pats<A2_minp, A2_maxp, select, setge, i1, I64>; 911 defm: MinMax_pats<A2_maxp, A2_minp, select, setlt, i1, I64>; 912 defm: MinMax_pats<A2_maxp, A2_minp, select, setle, i1, I64>; 913 defm: MinMax_pats<A2_minup, A2_maxup, select, setugt, i1, I64>; 914 defm: MinMax_pats<A2_minup, A2_maxup, select, setuge, i1, I64>; 915 defm: MinMax_pats<A2_maxup, A2_minup, select, setult, i1, I64>; 916 defm: MinMax_pats<A2_maxup, A2_minup, select, setule, i1, I64>; 917} 918 919let AddedComplexity = 100 in { 920 defm: MinMax_pats<F2_sfmin, F2_sfmax, select, setogt, i1, F32>; 921 defm: MinMax_pats<F2_sfmin, F2_sfmax, select, setoge, i1, F32>; 922 defm: MinMax_pats<F2_sfmax, F2_sfmin, select, setolt, i1, F32>; 923 defm: MinMax_pats<F2_sfmax, F2_sfmin, select, setole, i1, F32>; 924} 925 926defm: MinMax_pats<A2_vminb, A2_vmaxb, vselect, setgt, v8i1, V8I8>; 927defm: MinMax_pats<A2_vminb, A2_vmaxb, vselect, setge, v8i1, V8I8>; 928defm: MinMax_pats<A2_vminh, A2_vmaxh, vselect, setgt, v4i1, V4I16>; 929defm: MinMax_pats<A2_vminh, A2_vmaxh, vselect, setge, v4i1, V4I16>; 930defm: MinMax_pats<A2_vminw, A2_vmaxw, vselect, setgt, v2i1, V2I32>; 931defm: MinMax_pats<A2_vminw, A2_vmaxw, vselect, setge, v2i1, V2I32>; 932defm: MinMax_pats<A2_vminub, A2_vmaxub, vselect, setugt, v8i1, V8I8>; 933defm: MinMax_pats<A2_vminub, A2_vmaxub, vselect, setuge, v8i1, V8I8>; 934defm: MinMax_pats<A2_vminuh, A2_vmaxuh, vselect, setugt, v4i1, V4I16>; 935defm: MinMax_pats<A2_vminuh, A2_vmaxuh, vselect, setuge, v4i1, V4I16>; 936defm: MinMax_pats<A2_vminuw, A2_vmaxuw, vselect, setugt, v2i1, V2I32>; 937defm: MinMax_pats<A2_vminuw, A2_vmaxuw, vselect, setuge, v2i1, V2I32>; 938 939// --(7) Insert/extract -------------------------------------------------- 940// 941 942def SDTHexagonINSERT: 943 SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, 944 SDTCisInt<0>, SDTCisVT<3, i32>, SDTCisVT<4, i32>]>; 945def HexagonINSERT: SDNode<"HexagonISD::INSERT", SDTHexagonINSERT>; 946 947let AddedComplexity = 10 in { 948 def: Pat<(HexagonINSERT I32:$Rs, I32:$Rt, u5_0ImmPred:$u1, u5_0ImmPred:$u2), 949 (S2_insert I32:$Rs, I32:$Rt, imm:$u1, imm:$u2)>; 950 def: Pat<(HexagonINSERT I64:$Rs, I64:$Rt, u6_0ImmPred:$u1, u6_0ImmPred:$u2), 951 (S2_insertp I64:$Rs, I64:$Rt, imm:$u1, imm:$u2)>; 952} 953def: Pat<(HexagonINSERT I32:$Rs, I32:$Rt, I32:$Width, I32:$Off), 954 (S2_insert_rp I32:$Rs, I32:$Rt, (Combinew $Width, $Off))>; 955def: Pat<(HexagonINSERT I64:$Rs, I64:$Rt, I32:$Width, I32:$Off), 956 (S2_insertp_rp I64:$Rs, I64:$Rt, (Combinew $Width, $Off))>; 957 958def SDTHexagonEXTRACTU 959 : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>, 960 SDTCisVT<2, i32>, SDTCisVT<3, i32>]>; 961def HexagonEXTRACTU: SDNode<"HexagonISD::EXTRACTU", SDTHexagonEXTRACTU>; 962 963let AddedComplexity = 10 in { 964 def: Pat<(HexagonEXTRACTU I32:$Rs, u5_0ImmPred:$u5, u5_0ImmPred:$U5), 965 (S2_extractu I32:$Rs, imm:$u5, imm:$U5)>; 966 def: Pat<(HexagonEXTRACTU I64:$Rs, u6_0ImmPred:$u6, u6_0ImmPred:$U6), 967 (S2_extractup I64:$Rs, imm:$u6, imm:$U6)>; 968} 969def: Pat<(HexagonEXTRACTU I32:$Rs, I32:$Width, I32:$Off), 970 (S2_extractu_rp I32:$Rs, (Combinew $Width, $Off))>; 971def: Pat<(HexagonEXTRACTU I64:$Rs, I32:$Width, I32:$Off), 972 (S2_extractup_rp I64:$Rs, (Combinew $Width, $Off))>; 973 974def SDTHexagonVSPLAT: 975 SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; 976 977def HexagonVSPLAT: SDNode<"HexagonISD::VSPLAT", SDTHexagonVSPLAT>; 978 979def: Pat<(v4i8 (HexagonVSPLAT I32:$Rs)), (S2_vsplatrb I32:$Rs)>; 980def: Pat<(v4i16 (HexagonVSPLAT I32:$Rs)), (S2_vsplatrh I32:$Rs)>; 981def: Pat<(v2i32 (HexagonVSPLAT s8_0ImmPred:$s8)), 982 (A2_combineii imm:$s8, imm:$s8)>; 983def: Pat<(v2i32 (HexagonVSPLAT I32:$Rs)), (Combinew I32:$Rs, I32:$Rs)>; 984 985let AddedComplexity = 10 in 986def: Pat<(v8i8 (HexagonVSPLAT I32:$Rs)), (S6_vsplatrbp I32:$Rs)>, 987 Requires<[HasV62]>; 988def: Pat<(v8i8 (HexagonVSPLAT I32:$Rs)), 989 (Combinew (S2_vsplatrb I32:$Rs), (S2_vsplatrb I32:$Rs))>; 990 991 992// --(8) Shift/permute --------------------------------------------------- 993// 994 995def SDTHexagonI64I32I32: SDTypeProfile<1, 2, 996 [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>; 997 998def HexagonCOMBINE: SDNode<"HexagonISD::COMBINE", SDTHexagonI64I32I32>; 999 1000def: Pat<(HexagonCOMBINE I32:$Rs, I32:$Rt), (Combinew $Rs, $Rt)>; 1001 1002// The complexity of the combines involving immediates should be greater 1003// than the complexity of the combine with two registers. 1004let AddedComplexity = 50 in { 1005 def: Pat<(HexagonCOMBINE I32:$Rs, anyimm:$s8), 1006 (A4_combineri IntRegs:$Rs, imm:$s8)>; 1007 def: Pat<(HexagonCOMBINE anyimm:$s8, I32:$Rs), 1008 (A4_combineir imm:$s8, IntRegs:$Rs)>; 1009} 1010 1011// The complexity of the combine with two immediates should be greater than 1012// the complexity of a combine involving a register. 1013let AddedComplexity = 75 in { 1014 def: Pat<(HexagonCOMBINE s8_0ImmPred:$s8, anyimm:$u6), 1015 (A4_combineii imm:$s8, imm:$u6)>; 1016 def: Pat<(HexagonCOMBINE anyimm:$s8, s8_0ImmPred:$S8), 1017 (A2_combineii imm:$s8, imm:$S8)>; 1018} 1019 1020def: Pat<(bswap I32:$Rs), (A2_swiz I32:$Rs)>; 1021def: Pat<(bswap I64:$Rss), (Combinew (A2_swiz (LoReg $Rss)), 1022 (A2_swiz (HiReg $Rss)))>; 1023 1024def: Pat<(shl s6_0ImmPred:$s6, I32:$Rt), (S4_lsli imm:$s6, I32:$Rt)>; 1025def: Pat<(shl I32:$Rs, (i32 16)), (A2_aslh I32:$Rs)>; 1026def: Pat<(sra I32:$Rs, (i32 16)), (A2_asrh I32:$Rs)>; 1027 1028def: OpR_RI_pat<S2_asr_i_r, Sra, i32, I32, u5_0ImmPred>; 1029def: OpR_RI_pat<S2_lsr_i_r, Srl, i32, I32, u5_0ImmPred>; 1030def: OpR_RI_pat<S2_asl_i_r, Shl, i32, I32, u5_0ImmPred>; 1031def: OpR_RI_pat<S2_asr_i_p, Sra, i64, I64, u6_0ImmPred>; 1032def: OpR_RI_pat<S2_lsr_i_p, Srl, i64, I64, u6_0ImmPred>; 1033def: OpR_RI_pat<S2_asl_i_p, Shl, i64, I64, u6_0ImmPred>; 1034def: OpR_RI_pat<S2_asr_i_vh, Sra, v4i16, V4I16, u4_0ImmPred>; 1035def: OpR_RI_pat<S2_lsr_i_vh, Srl, v4i16, V4I16, u4_0ImmPred>; 1036def: OpR_RI_pat<S2_asl_i_vh, Shl, v4i16, V4I16, u4_0ImmPred>; 1037def: OpR_RI_pat<S2_asr_i_vh, Sra, v2i32, V2I32, u5_0ImmPred>; 1038def: OpR_RI_pat<S2_lsr_i_vh, Srl, v2i32, V2I32, u5_0ImmPred>; 1039def: OpR_RI_pat<S2_asl_i_vh, Shl, v2i32, V2I32, u5_0ImmPred>; 1040 1041def: OpR_RR_pat<S2_asr_r_r, Sra, i32, I32, I32>; 1042def: OpR_RR_pat<S2_lsr_r_r, Srl, i32, I32, I32>; 1043def: OpR_RR_pat<S2_asl_r_r, Shl, i32, I32, I32>; 1044def: OpR_RR_pat<S2_asr_r_p, Sra, i64, I64, I32>; 1045def: OpR_RR_pat<S2_lsr_r_p, Srl, i64, I64, I32>; 1046def: OpR_RR_pat<S2_asl_r_p, Shl, i64, I64, I32>; 1047 1048// Funnel shifts. 1049def IsMul8_U3: PatLeaf<(i32 imm), [{ 1050 uint64_t V = N->getZExtValue(); 1051 return V % 8 == 0 && isUInt<3>(V / 8); 1052}]>; 1053 1054def Divu8: SDNodeXForm<imm, [{ 1055 return CurDAG->getTargetConstant(N->getZExtValue() / 8, SDLoc(N), MVT::i32); 1056}]>; 1057 1058// Funnel shift-left. 1059def FShl32i: OutPatFrag<(ops node:$Rs, node:$Rt, node:$S), 1060 (HiReg (S2_asl_i_p (Combinew $Rs, $Rt), $S))>; 1061def FShl32r: OutPatFrag<(ops node:$Rs, node:$Rt, node:$Ru), 1062 (HiReg (S2_asl_r_p (Combinew $Rs, $Rt), $Ru))>; 1063 1064def FShl64i: OutPatFrag<(ops node:$Rs, node:$Rt, node:$S), 1065 (S2_lsr_i_p_or (S2_asl_i_p $Rt, $S), $Rs, (Subi<64> $S))>; 1066def FShl64r: OutPatFrag<(ops node:$Rs, node:$Rt, node:$Ru), 1067 (S2_lsr_r_p_or (S2_asl_r_p $Rt, $Ru), $Rs, (A2_subri 64, $Ru))>; 1068 1069// Combined SDNodeXForm: (Divu8 (Subi<64> $S)) 1070def Divu64_8: SDNodeXForm<imm, [{ 1071 return CurDAG->getTargetConstant((64 - N->getSExtValue()) / 8, 1072 SDLoc(N), MVT::i32); 1073}]>; 1074 1075// Special cases: 1076let AddedComplexity = 100 in { 1077 def: Pat<(fshl I32:$Rs, I32:$Rt, (i32 16)), 1078 (A2_combine_hl I32:$Rs, I32:$Rt)>; 1079 def: Pat<(fshl I64:$Rs, I64:$Rt, IsMul8_U3:$S), 1080 (S2_valignib I64:$Rs, I64:$Rt, (Divu64_8 $S))>; 1081} 1082 1083let Predicates = [HasV60], AddedComplexity = 50 in { 1084 def: OpR_RI_pat<S6_rol_i_r, Rol, i32, I32, u5_0ImmPred>; 1085 def: OpR_RI_pat<S6_rol_i_p, Rol, i64, I64, u6_0ImmPred>; 1086} 1087let AddedComplexity = 30 in { 1088 def: Pat<(rotl I32:$Rs, u5_0ImmPred:$S), (FShl32i $Rs, $Rs, imm:$S)>; 1089 def: Pat<(rotl I64:$Rs, u6_0ImmPred:$S), (FShl64i $Rs, $Rs, imm:$S)>; 1090 def: Pat<(fshl I32:$Rs, I32:$Rt, u5_0ImmPred:$S), (FShl32i $Rs, $Rt, imm:$S)>; 1091 def: Pat<(fshl I64:$Rs, I64:$Rt, u6_0ImmPred:$S), (FShl64i $Rs, $Rt, imm:$S)>; 1092} 1093def: Pat<(rotl I32:$Rs, I32:$Rt), (FShl32r $Rs, $Rs, $Rt)>; 1094def: Pat<(rotl I64:$Rs, I32:$Rt), (FShl64r $Rs, $Rs, $Rt)>; 1095def: Pat<(fshl I32:$Rs, I32:$Rt, I32:$Ru), (FShl32r $Rs, $Rt, $Ru)>; 1096def: Pat<(fshl I64:$Rs, I64:$Rt, I32:$Ru), (FShl64r $Rs, $Rt, $Ru)>; 1097 1098// Funnel shift-right. 1099def FShr32i: OutPatFrag<(ops node:$Rs, node:$Rt, node:$S), 1100 (LoReg (S2_lsr_i_p (Combinew $Rs, $Rt), $S))>; 1101def FShr32r: OutPatFrag<(ops node:$Rs, node:$Rt, node:$Ru), 1102 (LoReg (S2_lsr_r_p (Combinew $Rs, $Rt), $Ru))>; 1103 1104def FShr64i: OutPatFrag<(ops node:$Rs, node:$Rt, node:$S), 1105 (S2_asl_i_p_or (S2_lsr_i_p $Rt, $S), $Rs, (Subi<64> $S))>; 1106def FShr64r: OutPatFrag<(ops node:$Rs, node:$Rt, node:$Ru), 1107 (S2_asl_r_p_or (S2_lsr_r_p $Rt, $Ru), $Rs, (A2_subri 64, $Ru))>; 1108 1109// Special cases: 1110let AddedComplexity = 100 in { 1111 def: Pat<(fshr I32:$Rs, I32:$Rt, (i32 16)), 1112 (A2_combine_hl I32:$Rs, I32:$Rt)>; 1113 def: Pat<(fshr I64:$Rs, I64:$Rt, IsMul8_U3:$S), 1114 (S2_valignib I64:$Rs, I64:$Rt, (Divu8 $S))>; 1115} 1116 1117let Predicates = [HasV60], AddedComplexity = 50 in { 1118 def: Pat<(rotr I32:$Rs, u5_0ImmPred:$S), (S6_rol_i_r I32:$Rs, (Subi<32> $S))>; 1119 def: Pat<(rotr I64:$Rs, u6_0ImmPred:$S), (S6_rol_i_p I64:$Rs, (Subi<64> $S))>; 1120} 1121let AddedComplexity = 30 in { 1122 def: Pat<(rotr I32:$Rs, u5_0ImmPred:$S), (FShr32i $Rs, $Rs, imm:$S)>; 1123 def: Pat<(rotr I64:$Rs, u6_0ImmPred:$S), (FShr64i $Rs, $Rs, imm:$S)>; 1124 def: Pat<(fshr I32:$Rs, I32:$Rt, u5_0ImmPred:$S), (FShr32i $Rs, $Rt, imm:$S)>; 1125 def: Pat<(fshr I64:$Rs, I64:$Rt, u6_0ImmPred:$S), (FShr64i $Rs, $Rt, imm:$S)>; 1126} 1127def: Pat<(rotr I32:$Rs, I32:$Rt), (FShr32r $Rs, $Rs, $Rt)>; 1128def: Pat<(rotr I64:$Rs, I32:$Rt), (FShr64r $Rs, $Rs, $Rt)>; 1129def: Pat<(fshr I32:$Rs, I32:$Rt, I32:$Ru), (FShr32r $Rs, $Rt, $Ru)>; 1130def: Pat<(fshr I64:$Rs, I64:$Rt, I32:$Ru), (FShr64r $Rs, $Rt, $Ru)>; 1131 1132 1133def: Pat<(sra (add (sra I32:$Rs, u5_0ImmPred:$u5), 1), (i32 1)), 1134 (S2_asr_i_r_rnd I32:$Rs, imm:$u5)>; 1135def: Pat<(sra (add (sra I64:$Rs, u6_0ImmPred:$u6), 1), (i32 1)), 1136 (S2_asr_i_p_rnd I64:$Rs, imm:$u6)>; 1137 1138// Prefer S2_addasl_rrri over S2_asl_i_r_acc. 1139let AddedComplexity = 120 in 1140def: Pat<(add I32:$Rt, (shl I32:$Rs, u3_0ImmPred:$u3)), 1141 (S2_addasl_rrri IntRegs:$Rt, IntRegs:$Rs, imm:$u3)>; 1142 1143let AddedComplexity = 100 in { 1144 def: AccRRI_pat<S2_asr_i_r_acc, Add, Su<Sra>, I32, u5_0ImmPred>; 1145 def: AccRRI_pat<S2_asr_i_r_nac, Sub, Su<Sra>, I32, u5_0ImmPred>; 1146 def: AccRRI_pat<S2_asr_i_r_and, And, Su<Sra>, I32, u5_0ImmPred>; 1147 def: AccRRI_pat<S2_asr_i_r_or, Or, Su<Sra>, I32, u5_0ImmPred>; 1148 1149 def: AccRRI_pat<S2_asr_i_p_acc, Add, Su<Sra>, I64, u6_0ImmPred>; 1150 def: AccRRI_pat<S2_asr_i_p_nac, Sub, Su<Sra>, I64, u6_0ImmPred>; 1151 def: AccRRI_pat<S2_asr_i_p_and, And, Su<Sra>, I64, u6_0ImmPred>; 1152 def: AccRRI_pat<S2_asr_i_p_or, Or, Su<Sra>, I64, u6_0ImmPred>; 1153 1154 def: AccRRI_pat<S2_lsr_i_r_acc, Add, Su<Srl>, I32, u5_0ImmPred>; 1155 def: AccRRI_pat<S2_lsr_i_r_nac, Sub, Su<Srl>, I32, u5_0ImmPred>; 1156 def: AccRRI_pat<S2_lsr_i_r_and, And, Su<Srl>, I32, u5_0ImmPred>; 1157 def: AccRRI_pat<S2_lsr_i_r_or, Or, Su<Srl>, I32, u5_0ImmPred>; 1158 def: AccRRI_pat<S2_lsr_i_r_xacc, Xor, Su<Srl>, I32, u5_0ImmPred>; 1159 1160 def: AccRRI_pat<S2_lsr_i_p_acc, Add, Su<Srl>, I64, u6_0ImmPred>; 1161 def: AccRRI_pat<S2_lsr_i_p_nac, Sub, Su<Srl>, I64, u6_0ImmPred>; 1162 def: AccRRI_pat<S2_lsr_i_p_and, And, Su<Srl>, I64, u6_0ImmPred>; 1163 def: AccRRI_pat<S2_lsr_i_p_or, Or, Su<Srl>, I64, u6_0ImmPred>; 1164 def: AccRRI_pat<S2_lsr_i_p_xacc, Xor, Su<Srl>, I64, u6_0ImmPred>; 1165 1166 def: AccRRI_pat<S2_asl_i_r_acc, Add, Su<Shl>, I32, u5_0ImmPred>; 1167 def: AccRRI_pat<S2_asl_i_r_nac, Sub, Su<Shl>, I32, u5_0ImmPred>; 1168 def: AccRRI_pat<S2_asl_i_r_and, And, Su<Shl>, I32, u5_0ImmPred>; 1169 def: AccRRI_pat<S2_asl_i_r_or, Or, Su<Shl>, I32, u5_0ImmPred>; 1170 def: AccRRI_pat<S2_asl_i_r_xacc, Xor, Su<Shl>, I32, u5_0ImmPred>; 1171 1172 def: AccRRI_pat<S2_asl_i_p_acc, Add, Su<Shl>, I64, u6_0ImmPred>; 1173 def: AccRRI_pat<S2_asl_i_p_nac, Sub, Su<Shl>, I64, u6_0ImmPred>; 1174 def: AccRRI_pat<S2_asl_i_p_and, And, Su<Shl>, I64, u6_0ImmPred>; 1175 def: AccRRI_pat<S2_asl_i_p_or, Or, Su<Shl>, I64, u6_0ImmPred>; 1176 def: AccRRI_pat<S2_asl_i_p_xacc, Xor, Su<Shl>, I64, u6_0ImmPred>; 1177 1178 let Predicates = [HasV60] in { 1179 def: AccRRI_pat<S6_rol_i_r_acc, Add, Su<Rol>, I32, u5_0ImmPred>; 1180 def: AccRRI_pat<S6_rol_i_r_nac, Sub, Su<Rol>, I32, u5_0ImmPred>; 1181 def: AccRRI_pat<S6_rol_i_r_and, And, Su<Rol>, I32, u5_0ImmPred>; 1182 def: AccRRI_pat<S6_rol_i_r_or, Or, Su<Rol>, I32, u5_0ImmPred>; 1183 def: AccRRI_pat<S6_rol_i_r_xacc, Xor, Su<Rol>, I32, u5_0ImmPred>; 1184 1185 def: AccRRI_pat<S6_rol_i_p_acc, Add, Su<Rol>, I64, u6_0ImmPred>; 1186 def: AccRRI_pat<S6_rol_i_p_nac, Sub, Su<Rol>, I64, u6_0ImmPred>; 1187 def: AccRRI_pat<S6_rol_i_p_and, And, Su<Rol>, I64, u6_0ImmPred>; 1188 def: AccRRI_pat<S6_rol_i_p_or, Or, Su<Rol>, I64, u6_0ImmPred>; 1189 def: AccRRI_pat<S6_rol_i_p_xacc, Xor, Su<Rol>, I64, u6_0ImmPred>; 1190 } 1191} 1192 1193let AddedComplexity = 100 in { 1194 def: AccRRR_pat<S2_asr_r_r_acc, Add, Su<Sra>, I32, I32, I32>; 1195 def: AccRRR_pat<S2_asr_r_r_nac, Sub, Su<Sra>, I32, I32, I32>; 1196 def: AccRRR_pat<S2_asr_r_r_and, And, Su<Sra>, I32, I32, I32>; 1197 def: AccRRR_pat<S2_asr_r_r_or, Or, Su<Sra>, I32, I32, I32>; 1198 1199 def: AccRRR_pat<S2_asr_r_p_acc, Add, Su<Sra>, I64, I64, I32>; 1200 def: AccRRR_pat<S2_asr_r_p_nac, Sub, Su<Sra>, I64, I64, I32>; 1201 def: AccRRR_pat<S2_asr_r_p_and, And, Su<Sra>, I64, I64, I32>; 1202 def: AccRRR_pat<S2_asr_r_p_or, Or, Su<Sra>, I64, I64, I32>; 1203 def: AccRRR_pat<S2_asr_r_p_xor, Xor, Su<Sra>, I64, I64, I32>; 1204 1205 def: AccRRR_pat<S2_lsr_r_r_acc, Add, Su<Srl>, I32, I32, I32>; 1206 def: AccRRR_pat<S2_lsr_r_r_nac, Sub, Su<Srl>, I32, I32, I32>; 1207 def: AccRRR_pat<S2_lsr_r_r_and, And, Su<Srl>, I32, I32, I32>; 1208 def: AccRRR_pat<S2_lsr_r_r_or, Or, Su<Srl>, I32, I32, I32>; 1209 1210 def: AccRRR_pat<S2_lsr_r_p_acc, Add, Su<Srl>, I64, I64, I32>; 1211 def: AccRRR_pat<S2_lsr_r_p_nac, Sub, Su<Srl>, I64, I64, I32>; 1212 def: AccRRR_pat<S2_lsr_r_p_and, And, Su<Srl>, I64, I64, I32>; 1213 def: AccRRR_pat<S2_lsr_r_p_or, Or, Su<Srl>, I64, I64, I32>; 1214 def: AccRRR_pat<S2_lsr_r_p_xor, Xor, Su<Srl>, I64, I64, I32>; 1215 1216 def: AccRRR_pat<S2_asl_r_r_acc, Add, Su<Shl>, I32, I32, I32>; 1217 def: AccRRR_pat<S2_asl_r_r_nac, Sub, Su<Shl>, I32, I32, I32>; 1218 def: AccRRR_pat<S2_asl_r_r_and, And, Su<Shl>, I32, I32, I32>; 1219 def: AccRRR_pat<S2_asl_r_r_or, Or, Su<Shl>, I32, I32, I32>; 1220 1221 def: AccRRR_pat<S2_asl_r_p_acc, Add, Su<Shl>, I64, I64, I32>; 1222 def: AccRRR_pat<S2_asl_r_p_nac, Sub, Su<Shl>, I64, I64, I32>; 1223 def: AccRRR_pat<S2_asl_r_p_and, And, Su<Shl>, I64, I64, I32>; 1224 def: AccRRR_pat<S2_asl_r_p_or, Or, Su<Shl>, I64, I64, I32>; 1225 def: AccRRR_pat<S2_asl_r_p_xor, Xor, Su<Shl>, I64, I64, I32>; 1226} 1227 1228 1229class OpshIRI_pat<InstHexagon MI, PatFrag Op, PatFrag ShOp, 1230 PatFrag RegPred, PatFrag ImmPred> 1231 : Pat<(Op anyimm:$u8, (ShOp RegPred:$Rs, ImmPred:$U5)), 1232 (MI anyimm:$u8, RegPred:$Rs, imm:$U5)>; 1233 1234let AddedComplexity = 200 in { 1235 def: OpshIRI_pat<S4_addi_asl_ri, Add, Su<Shl>, I32, u5_0ImmPred>; 1236 def: OpshIRI_pat<S4_addi_lsr_ri, Add, Su<Srl>, I32, u5_0ImmPred>; 1237 def: OpshIRI_pat<S4_subi_asl_ri, Sub, Su<Shl>, I32, u5_0ImmPred>; 1238 def: OpshIRI_pat<S4_subi_lsr_ri, Sub, Su<Srl>, I32, u5_0ImmPred>; 1239 def: OpshIRI_pat<S4_andi_asl_ri, And, Su<Shl>, I32, u5_0ImmPred>; 1240 def: OpshIRI_pat<S4_andi_lsr_ri, And, Su<Srl>, I32, u5_0ImmPred>; 1241 def: OpshIRI_pat<S4_ori_asl_ri, Or, Su<Shl>, I32, u5_0ImmPred>; 1242 def: OpshIRI_pat<S4_ori_lsr_ri, Or, Su<Srl>, I32, u5_0ImmPred>; 1243} 1244 1245// Prefer this pattern to S2_asl_i_p_or for the special case of joining 1246// two 32-bit words into a 64-bit word. 1247let AddedComplexity = 200 in 1248def: Pat<(or (shl (Aext64 I32:$a), (i32 32)), (Zext64 I32:$b)), 1249 (Combinew I32:$a, I32:$b)>; 1250 1251def: Pat<(or (or (or (shl (Zext64 (and I32:$b, (i32 65535))), (i32 16)), 1252 (Zext64 (and I32:$a, (i32 65535)))), 1253 (shl (Aext64 (and I32:$c, (i32 65535))), (i32 32))), 1254 (shl (Aext64 I32:$d), (i32 48))), 1255 (Combinew (A2_combine_ll I32:$d, I32:$c), 1256 (A2_combine_ll I32:$b, I32:$a))>; 1257 1258let AddedComplexity = 200 in { 1259 def: Pat<(or (shl I32:$Rt, (i32 16)), (and I32:$Rs, (i32 65535))), 1260 (A2_combine_ll I32:$Rt, I32:$Rs)>; 1261 def: Pat<(or (shl I32:$Rt, (i32 16)), (srl I32:$Rs, (i32 16))), 1262 (A2_combine_lh I32:$Rt, I32:$Rs)>; 1263 def: Pat<(or (and I32:$Rt, (i32 268431360)), (and I32:$Rs, (i32 65535))), 1264 (A2_combine_hl I32:$Rt, I32:$Rs)>; 1265 def: Pat<(or (and I32:$Rt, (i32 268431360)), (srl I32:$Rs, (i32 16))), 1266 (A2_combine_hh I32:$Rt, I32:$Rs)>; 1267} 1268 1269def SDTHexagonVShift 1270 : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisVec<0>, SDTCisVT<2, i32>]>; 1271 1272def HexagonVASL: SDNode<"HexagonISD::VASL", SDTHexagonVShift>; 1273def HexagonVASR: SDNode<"HexagonISD::VASR", SDTHexagonVShift>; 1274def HexagonVLSR: SDNode<"HexagonISD::VLSR", SDTHexagonVShift>; 1275 1276def: OpR_RI_pat<S2_asl_i_vw, pf2<HexagonVASL>, v2i32, V2I32, u5_0ImmPred>; 1277def: OpR_RI_pat<S2_asl_i_vh, pf2<HexagonVASL>, v4i16, V4I16, u4_0ImmPred>; 1278def: OpR_RI_pat<S2_asr_i_vw, pf2<HexagonVASR>, v2i32, V2I32, u5_0ImmPred>; 1279def: OpR_RI_pat<S2_asr_i_vh, pf2<HexagonVASR>, v4i16, V4I16, u4_0ImmPred>; 1280def: OpR_RI_pat<S2_lsr_i_vw, pf2<HexagonVLSR>, v2i32, V2I32, u5_0ImmPred>; 1281def: OpR_RI_pat<S2_lsr_i_vh, pf2<HexagonVLSR>, v4i16, V4I16, u4_0ImmPred>; 1282 1283def: OpR_RR_pat<S2_asl_r_vw, pf2<HexagonVASL>, v2i32, V2I32, I32>; 1284def: OpR_RR_pat<S2_asl_r_vh, pf2<HexagonVASL>, v4i16, V4I16, I32>; 1285def: OpR_RR_pat<S2_asr_r_vw, pf2<HexagonVASR>, v2i32, V2I32, I32>; 1286def: OpR_RR_pat<S2_asr_r_vh, pf2<HexagonVASR>, v4i16, V4I16, I32>; 1287def: OpR_RR_pat<S2_lsr_r_vw, pf2<HexagonVLSR>, v2i32, V2I32, I32>; 1288def: OpR_RR_pat<S2_lsr_r_vh, pf2<HexagonVLSR>, v4i16, V4I16, I32>; 1289 1290def: Pat<(sra V2I32:$b, (v2i32 (HexagonVSPLAT u5_0ImmPred:$c))), 1291 (S2_asr_i_vw V2I32:$b, imm:$c)>; 1292def: Pat<(srl V2I32:$b, (v2i32 (HexagonVSPLAT u5_0ImmPred:$c))), 1293 (S2_lsr_i_vw V2I32:$b, imm:$c)>; 1294def: Pat<(shl V2I32:$b, (v2i32 (HexagonVSPLAT u5_0ImmPred:$c))), 1295 (S2_asl_i_vw V2I32:$b, imm:$c)>; 1296def: Pat<(sra V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c))), 1297 (S2_asr_i_vh V4I16:$b, imm:$c)>; 1298def: Pat<(srl V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c))), 1299 (S2_lsr_i_vh V4I16:$b, imm:$c)>; 1300def: Pat<(shl V4I16:$b, (v4i16 (HexagonVSPLAT u4_0ImmPred:$c))), 1301 (S2_asl_i_vh V4I16:$b, imm:$c)>; 1302 1303def: Pat<(HexagonVASR V2I16:$Rs, u4_0ImmPred:$S), 1304 (LoReg (S2_asr_i_vh (ToAext64 $Rs), imm:$S))>; 1305def: Pat<(HexagonVASL V2I16:$Rs, u4_0ImmPred:$S), 1306 (LoReg (S2_asl_i_vh (ToAext64 $Rs), imm:$S))>; 1307def: Pat<(HexagonVLSR V2I16:$Rs, u4_0ImmPred:$S), 1308 (LoReg (S2_lsr_i_vh (ToAext64 $Rs), imm:$S))>; 1309def: Pat<(HexagonVASR V2I16:$Rs, I32:$Rt), 1310 (LoReg (S2_asr_i_vh (ToAext64 $Rs), I32:$Rt))>; 1311def: Pat<(HexagonVASL V2I16:$Rs, I32:$Rt), 1312 (LoReg (S2_asl_i_vh (ToAext64 $Rs), I32:$Rt))>; 1313def: Pat<(HexagonVLSR V2I16:$Rs, I32:$Rt), 1314 (LoReg (S2_lsr_i_vh (ToAext64 $Rs), I32:$Rt))>; 1315 1316 1317// --(9) Arithmetic/bitwise ---------------------------------------------- 1318// 1319 1320def: Pat<(abs I32:$Rs), (A2_abs I32:$Rs)>; 1321def: Pat<(abs I64:$Rs), (A2_absp I64:$Rs)>; 1322def: Pat<(not I32:$Rs), (A2_subri -1, I32:$Rs)>; 1323def: Pat<(not I64:$Rs), (A2_notp I64:$Rs)>; 1324def: Pat<(ineg I64:$Rs), (A2_negp I64:$Rs)>; 1325 1326def: Pat<(fabs F32:$Rs), (S2_clrbit_i F32:$Rs, 31)>; 1327def: Pat<(fneg F32:$Rs), (S2_togglebit_i F32:$Rs, 31)>; 1328 1329def: Pat<(fabs F64:$Rs), 1330 (Combinew (S2_clrbit_i (HiReg $Rs), 31), 1331 (i32 (LoReg $Rs)))>; 1332def: Pat<(fneg F64:$Rs), 1333 (Combinew (S2_togglebit_i (HiReg $Rs), 31), 1334 (i32 (LoReg $Rs)))>; 1335 1336def: Pat<(add I32:$Rs, anyimm:$s16), (A2_addi I32:$Rs, imm:$s16)>; 1337def: Pat<(or I32:$Rs, anyimm:$s10), (A2_orir I32:$Rs, imm:$s10)>; 1338def: Pat<(and I32:$Rs, anyimm:$s10), (A2_andir I32:$Rs, imm:$s10)>; 1339def: Pat<(sub anyimm:$s10, I32:$Rs), (A2_subri imm:$s10, I32:$Rs)>; 1340 1341def: OpR_RR_pat<A2_add, Add, i32, I32>; 1342def: OpR_RR_pat<A2_sub, Sub, i32, I32>; 1343def: OpR_RR_pat<A2_and, And, i32, I32>; 1344def: OpR_RR_pat<A2_or, Or, i32, I32>; 1345def: OpR_RR_pat<A2_xor, Xor, i32, I32>; 1346def: OpR_RR_pat<A2_addp, Add, i64, I64>; 1347def: OpR_RR_pat<A2_subp, Sub, i64, I64>; 1348def: OpR_RR_pat<A2_andp, And, i64, I64>; 1349def: OpR_RR_pat<A2_orp, Or, i64, I64>; 1350def: OpR_RR_pat<A2_xorp, Xor, i64, I64>; 1351def: OpR_RR_pat<A4_andnp, Not2<And>, i64, I64>; 1352def: OpR_RR_pat<A4_ornp, Not2<Or>, i64, I64>; 1353 1354def: OpR_RR_pat<A2_svaddh, Add, v2i16, V2I16>; 1355def: OpR_RR_pat<A2_svsubh, Sub, v2i16, V2I16>; 1356 1357def: OpR_RR_pat<A2_vaddub, Add, v8i8, V8I8>; 1358def: OpR_RR_pat<A2_vaddh, Add, v4i16, V4I16>; 1359def: OpR_RR_pat<A2_vaddw, Add, v2i32, V2I32>; 1360def: OpR_RR_pat<A2_vsubub, Sub, v8i8, V8I8>; 1361def: OpR_RR_pat<A2_vsubh, Sub, v4i16, V4I16>; 1362def: OpR_RR_pat<A2_vsubw, Sub, v2i32, V2I32>; 1363 1364def: OpR_RR_pat<A2_and, And, v4i8, V4I8>; 1365def: OpR_RR_pat<A2_xor, Xor, v4i8, V4I8>; 1366def: OpR_RR_pat<A2_or, Or, v4i8, V4I8>; 1367def: OpR_RR_pat<A2_and, And, v2i16, V2I16>; 1368def: OpR_RR_pat<A2_xor, Xor, v2i16, V2I16>; 1369def: OpR_RR_pat<A2_or, Or, v2i16, V2I16>; 1370def: OpR_RR_pat<A2_andp, And, v8i8, V8I8>; 1371def: OpR_RR_pat<A2_orp, Or, v8i8, V8I8>; 1372def: OpR_RR_pat<A2_xorp, Xor, v8i8, V8I8>; 1373def: OpR_RR_pat<A2_andp, And, v4i16, V4I16>; 1374def: OpR_RR_pat<A2_orp, Or, v4i16, V4I16>; 1375def: OpR_RR_pat<A2_xorp, Xor, v4i16, V4I16>; 1376def: OpR_RR_pat<A2_andp, And, v2i32, V2I32>; 1377def: OpR_RR_pat<A2_orp, Or, v2i32, V2I32>; 1378def: OpR_RR_pat<A2_xorp, Xor, v2i32, V2I32>; 1379 1380def: OpR_RR_pat<M2_mpyi, Mul, i32, I32>; 1381def: OpR_RR_pat<M2_mpy_up, pf2<mulhs>, i32, I32>; 1382def: OpR_RR_pat<M2_mpyu_up, pf2<mulhu>, i32, I32>; 1383def: OpR_RI_pat<M2_mpysip, Mul, i32, I32, u32_0ImmPred>; 1384def: OpR_RI_pat<M2_mpysmi, Mul, i32, I32, s32_0ImmPred>; 1385 1386// Arithmetic on predicates. 1387def: OpR_RR_pat<C2_xor, Add, i1, I1>; 1388def: OpR_RR_pat<C2_xor, Add, v2i1, V2I1>; 1389def: OpR_RR_pat<C2_xor, Add, v4i1, V4I1>; 1390def: OpR_RR_pat<C2_xor, Add, v8i1, V8I1>; 1391def: OpR_RR_pat<C2_xor, Sub, i1, I1>; 1392def: OpR_RR_pat<C2_xor, Sub, v2i1, V2I1>; 1393def: OpR_RR_pat<C2_xor, Sub, v4i1, V4I1>; 1394def: OpR_RR_pat<C2_xor, Sub, v8i1, V8I1>; 1395def: OpR_RR_pat<C2_and, Mul, i1, I1>; 1396def: OpR_RR_pat<C2_and, Mul, v2i1, V2I1>; 1397def: OpR_RR_pat<C2_and, Mul, v4i1, V4I1>; 1398def: OpR_RR_pat<C2_and, Mul, v8i1, V8I1>; 1399 1400def: OpR_RR_pat<F2_sfadd, pf2<fadd>, f32, F32>; 1401def: OpR_RR_pat<F2_sfsub, pf2<fsub>, f32, F32>; 1402def: OpR_RR_pat<F2_sfmpy, pf2<fmul>, f32, F32>; 1403def: OpR_RR_pat<F2_sfmin, pf2<fminnum>, f32, F32>; 1404def: OpR_RR_pat<F2_sfmax, pf2<fmaxnum>, f32, F32>; 1405 1406let Predicates = [HasV66] in { 1407 def: OpR_RR_pat<F2_dfadd, pf2<fadd>, f64, F64>; 1408 def: OpR_RR_pat<F2_dfsub, pf2<fsub>, f64, F64>; 1409} 1410 1411// In expressions like a0*b0 + a1*b1 + ..., prefer to generate multiply-add, 1412// over add-add with individual multiplies as inputs. 1413let AddedComplexity = 10 in { 1414 def: AccRRI_pat<M2_macsip, Add, Su<Mul>, I32, u32_0ImmPred>; 1415 def: AccRRI_pat<M2_macsin, Sub, Su<Mul>, I32, u32_0ImmPred>; 1416 def: AccRRR_pat<M2_maci, Add, Su<Mul>, I32, I32, I32>; 1417 let Predicates = [HasV66] in 1418 def: AccRRR_pat<M2_mnaci, Sub, Su<Mul>, I32, I32, I32>; 1419} 1420 1421def: AccRRI_pat<M2_naccii, Sub, Su<Add>, I32, s32_0ImmPred>; 1422def: AccRRI_pat<M2_accii, Add, Su<Add>, I32, s32_0ImmPred>; 1423def: AccRRR_pat<M2_acci, Add, Su<Add>, I32, I32, I32>; 1424 1425// Mulh for vectors 1426// 1427def: Pat<(v2i32 (mulhu V2I32:$Rss, V2I32:$Rtt)), 1428 (Combinew (M2_mpyu_up (HiReg $Rss), (HiReg $Rtt)), 1429 (M2_mpyu_up (LoReg $Rss), (LoReg $Rtt)))>; 1430 1431def: Pat<(v2i32 (mulhs V2I32:$Rs, V2I32:$Rt)), 1432 (Combinew (M2_mpy_up (HiReg $Rs), (HiReg $Rt)), 1433 (M2_mpy_up (LoReg $Rt), (LoReg $Rt)))>; 1434 1435def Mulhub: 1436 OutPatFrag<(ops node:$Rss, node:$Rtt), 1437 (Combinew (S2_vtrunohb (M5_vmpybuu (HiReg $Rss), (HiReg $Rtt))), 1438 (S2_vtrunohb (M5_vmpybuu (LoReg $Rss), (LoReg $Rtt))))>; 1439 1440// Equivalent of byte-wise arithmetic shift right by 7 in v8i8. 1441def Asr7: 1442 OutPatFrag<(ops node:$Rss), (C2_mask (C2_not (A4_vcmpbgti $Rss, 0)))>; 1443 1444def: Pat<(v8i8 (mulhu V8I8:$Rss, V8I8:$Rtt)), 1445 (Mulhub $Rss, $Rtt)>; 1446 1447def: Pat<(v8i8 (mulhs V8I8:$Rss, V8I8:$Rtt)), 1448 (A2_vsubub 1449 (Mulhub $Rss, $Rtt), 1450 (A2_vaddub (A2_andp V8I8:$Rss, (Asr7 $Rtt)), 1451 (A2_andp V8I8:$Rtt, (Asr7 $Rss))))>; 1452 1453def Mpysh: 1454 OutPatFrag<(ops node:$Rs, node:$Rt), (M2_vmpy2s_s0 $Rs, $Rt)>; 1455def Mpyshh: 1456 OutPatFrag<(ops node:$Rss, node:$Rtt), (Mpysh (HiReg $Rss), (HiReg $Rtt))>; 1457def Mpyshl: 1458 OutPatFrag<(ops node:$Rss, node:$Rtt), (Mpysh (LoReg $Rss), (LoReg $Rtt))>; 1459 1460def Mulhsh: 1461 OutPatFrag<(ops node:$Rss, node:$Rtt), 1462 (Combinew (A2_combine_hh (HiReg (Mpyshh $Rss, $Rtt)), 1463 (LoReg (Mpyshh $Rss, $Rtt))), 1464 (A2_combine_hh (HiReg (Mpyshl $Rss, $Rtt)), 1465 (LoReg (Mpyshl $Rss, $Rtt))))>; 1466 1467def: Pat<(v4i16 (mulhs V4I16:$Rss, V4I16:$Rtt)), (Mulhsh $Rss, $Rtt)>; 1468 1469def: Pat<(v4i16 (mulhu V4I16:$Rss, V4I16:$Rtt)), 1470 (A2_vaddh 1471 (Mulhsh $Rss, $Rtt), 1472 (A2_vaddh (A2_andp V4I16:$Rss, (S2_asr_i_vh $Rtt, 15)), 1473 (A2_andp V4I16:$Rtt, (S2_asr_i_vh $Rss, 15))))>; 1474 1475 1476def: Pat<(ineg (mul I32:$Rs, u8_0ImmPred:$u8)), 1477 (M2_mpysin IntRegs:$Rs, imm:$u8)>; 1478 1479def n8_0ImmPred: PatLeaf<(i32 imm), [{ 1480 int64_t V = N->getSExtValue(); 1481 return -255 <= V && V <= 0; 1482}]>; 1483 1484// Change the sign of the immediate for Rd=-mpyi(Rs,#u8) 1485def: Pat<(mul I32:$Rs, n8_0ImmPred:$n8), 1486 (M2_mpysin I32:$Rs, (NegImm8 imm:$n8))>; 1487 1488def: Pat<(add Sext64:$Rs, I64:$Rt), 1489 (A2_addsp (LoReg Sext64:$Rs), I64:$Rt)>; 1490 1491def: AccRRR_pat<M4_and_and, And, Su_ni1<And>, I32, I32, I32>; 1492def: AccRRR_pat<M4_and_or, And, Su_ni1<Or>, I32, I32, I32>; 1493def: AccRRR_pat<M4_and_xor, And, Su<Xor>, I32, I32, I32>; 1494def: AccRRR_pat<M4_or_and, Or, Su_ni1<And>, I32, I32, I32>; 1495def: AccRRR_pat<M4_or_or, Or, Su_ni1<Or>, I32, I32, I32>; 1496def: AccRRR_pat<M4_or_xor, Or, Su<Xor>, I32, I32, I32>; 1497def: AccRRR_pat<M4_xor_and, Xor, Su_ni1<And>, I32, I32, I32>; 1498def: AccRRR_pat<M4_xor_or, Xor, Su_ni1<Or>, I32, I32, I32>; 1499def: AccRRR_pat<M2_xor_xacc, Xor, Su<Xor>, I32, I32, I32>; 1500def: AccRRR_pat<M4_xor_xacc, Xor, Su<Xor>, I64, I64, I64>; 1501 1502// For dags like (or (and (not _), _), (shl _, _)) where the "or" with 1503// one argument matches the patterns below, and with the other argument 1504// matches S2_asl_r_r_or, etc, prefer the patterns below. 1505let AddedComplexity = 110 in { // greater than S2_asl_r_r_and/or/xor. 1506 def: AccRRR_pat<M4_and_andn, And, Su<Not2<And>>, I32, I32, I32>; 1507 def: AccRRR_pat<M4_or_andn, Or, Su<Not2<And>>, I32, I32, I32>; 1508 def: AccRRR_pat<M4_xor_andn, Xor, Su<Not2<And>>, I32, I32, I32>; 1509} 1510 1511// S4_addaddi and S4_subaddi don't have tied operands, so give them 1512// a bit of preference. 1513let AddedComplexity = 30 in { 1514 def: Pat<(add I32:$Rs, (Su<Add> I32:$Ru, anyimm:$s6)), 1515 (S4_addaddi IntRegs:$Rs, IntRegs:$Ru, imm:$s6)>; 1516 def: Pat<(add anyimm:$s6, (Su<Add> I32:$Rs, I32:$Ru)), 1517 (S4_addaddi IntRegs:$Rs, IntRegs:$Ru, imm:$s6)>; 1518 def: Pat<(add I32:$Rs, (Su<Sub> anyimm:$s6, I32:$Ru)), 1519 (S4_subaddi IntRegs:$Rs, imm:$s6, IntRegs:$Ru)>; 1520 def: Pat<(sub (Su<Add> I32:$Rs, anyimm:$s6), I32:$Ru), 1521 (S4_subaddi IntRegs:$Rs, imm:$s6, IntRegs:$Ru)>; 1522 def: Pat<(add (Su<Sub> I32:$Rs, I32:$Ru), anyimm:$s6), 1523 (S4_subaddi IntRegs:$Rs, imm:$s6, IntRegs:$Ru)>; 1524} 1525 1526def: Pat<(or I32:$Ru, (Su<And> I32:$Rx, anyimm:$s10)), 1527 (S4_or_andix IntRegs:$Ru, IntRegs:$Rx, imm:$s10)>; 1528def: Pat<(or I32:$Rx, (Su<And> I32:$Rs, anyimm:$s10)), 1529 (S4_or_andi IntRegs:$Rx, IntRegs:$Rs, imm:$s10)>; 1530def: Pat<(or I32:$Rx, (Su<Or> I32:$Rs, anyimm:$s10)), 1531 (S4_or_ori IntRegs:$Rx, IntRegs:$Rs, imm:$s10)>; 1532 1533 1534def: Pat<(i32 (trunc (sra (Su<Mul> Sext64:$Rs, Sext64:$Rt), (i32 32)))), 1535 (M2_mpy_up (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>; 1536def: Pat<(i32 (trunc (srl (Su<Mul> Sext64:$Rs, Sext64:$Rt), (i32 32)))), 1537 (M2_mpy_up (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>; 1538 1539def: Pat<(mul (Zext64 I32:$Rs), (Zext64 I32:$Rt)), 1540 (M2_dpmpyuu_s0 I32:$Rs, I32:$Rt)>; 1541def: Pat<(mul (Aext64 I32:$Rs), (Aext64 I32:$Rt)), 1542 (M2_dpmpyuu_s0 I32:$Rs, I32:$Rt)>; 1543def: Pat<(mul Sext64:$Rs, Sext64:$Rt), 1544 (M2_dpmpyss_s0 (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>; 1545 1546def: Pat<(add I64:$Rx, (Su<Mul> Sext64:$Rs, Sext64:$Rt)), 1547 (M2_dpmpyss_acc_s0 I64:$Rx, (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>; 1548def: Pat<(sub I64:$Rx, (Su<Mul> Sext64:$Rs, Sext64:$Rt)), 1549 (M2_dpmpyss_nac_s0 I64:$Rx, (LoReg Sext64:$Rs), (LoReg Sext64:$Rt))>; 1550def: Pat<(add I64:$Rx, (Su<Mul> (Aext64 I32:$Rs), (Aext64 I32:$Rt))), 1551 (M2_dpmpyuu_acc_s0 I64:$Rx, I32:$Rs, I32:$Rt)>; 1552def: Pat<(add I64:$Rx, (Su<Mul> (Zext64 I32:$Rs), (Zext64 I32:$Rt))), 1553 (M2_dpmpyuu_acc_s0 I64:$Rx, I32:$Rs, I32:$Rt)>; 1554def: Pat<(sub I64:$Rx, (Su<Mul> (Aext64 I32:$Rs), (Aext64 I32:$Rt))), 1555 (M2_dpmpyuu_nac_s0 I64:$Rx, I32:$Rs, I32:$Rt)>; 1556def: Pat<(sub I64:$Rx, (Su<Mul> (Zext64 I32:$Rs), (Zext64 I32:$Rt))), 1557 (M2_dpmpyuu_nac_s0 I64:$Rx, I32:$Rs, I32:$Rt)>; 1558 1559// Add halfword. 1560def: Pat<(sext_inreg (add I32:$Rt, I32:$Rs), i16), 1561 (A2_addh_l16_ll I32:$Rt, I32:$Rs)>; 1562def: Pat<(sra (add (shl I32:$Rt, (i32 16)), I32:$Rs), (i32 16)), 1563 (A2_addh_l16_hl I32:$Rt, I32:$Rs)>; 1564def: Pat<(shl (add I32:$Rt, I32:$Rs), (i32 16)), 1565 (A2_addh_h16_ll I32:$Rt, I32:$Rs)>; 1566 1567// Subtract halfword. 1568def: Pat<(sext_inreg (sub I32:$Rt, I32:$Rs), i16), 1569 (A2_subh_l16_ll I32:$Rt, I32:$Rs)>; 1570def: Pat<(sra (add (shl I32:$Rt, (i32 16)), I32:$Rs), (i32 16)), 1571 (A2_addh_l16_hl I32:$Rt, I32:$Rs)>; 1572def: Pat<(shl (sub I32:$Rt, I32:$Rs), (i32 16)), 1573 (A2_subh_h16_ll I32:$Rt, I32:$Rs)>; 1574 1575def: Pat<(mul I64:$Rss, I64:$Rtt), 1576 (Combinew 1577 (M2_maci (M2_maci (HiReg (M2_dpmpyuu_s0 (LoReg $Rss), (LoReg $Rtt))), 1578 (LoReg $Rss), 1579 (HiReg $Rtt)), 1580 (LoReg $Rtt), 1581 (HiReg $Rss)), 1582 (i32 (LoReg (M2_dpmpyuu_s0 (LoReg $Rss), (LoReg $Rtt)))))>; 1583 1584def MulHU : OutPatFrag<(ops node:$Rss, node:$Rtt), 1585 (A2_addp 1586 (M2_dpmpyuu_acc_s0 1587 (S2_lsr_i_p 1588 (A2_addp 1589 (M2_dpmpyuu_acc_s0 1590 (S2_lsr_i_p (M2_dpmpyuu_s0 (LoReg $Rss), (LoReg $Rtt)), 32), 1591 (HiReg $Rss), 1592 (LoReg $Rtt)), 1593 (A4_combineir 0, (LoReg (M2_dpmpyuu_s0 (LoReg $Rss), (HiReg $Rtt))))), 1594 32), 1595 (HiReg $Rss), 1596 (HiReg $Rtt)), 1597 (S2_lsr_i_p (M2_dpmpyuu_s0 (LoReg $Rss), (HiReg $Rtt)), 32))>; 1598 1599// Multiply 64-bit unsigned and use upper result. 1600def : Pat <(mulhu I64:$Rss, I64:$Rtt), (MulHU $Rss, $Rtt)>; 1601 1602// Multiply 64-bit signed and use upper result. 1603// 1604// For two signed 64-bit integers A and B, let A' and B' denote A and B 1605// with the sign bit cleared. Then A = -2^63*s(A) + A', where s(A) is the 1606// sign bit of A (and identically for B). With this notation, the signed 1607// product A*B can be written as: 1608// AB = (-2^63 s(A) + A') * (-2^63 s(B) + B') 1609// = 2^126 s(A)s(B) - 2^63 [s(A)B'+s(B)A'] + A'B' 1610// = 2^126 s(A)s(B) + 2^63 [s(A)B'+s(B)A'] + A'B' - 2*2^63 [s(A)B'+s(B)A'] 1611// = (unsigned product AB) - 2^64 [s(A)B'+s(B)A'] 1612 1613// Clear the sign bit in a 64-bit register. 1614def ClearSign : OutPatFrag<(ops node:$Rss), 1615 (Combinew (S2_clrbit_i (HiReg $Rss), 31), (i32 (LoReg $Rss)))>; 1616 1617def : Pat <(mulhs I64:$Rss, I64:$Rtt), 1618 (A2_subp 1619 (MulHU $Rss, $Rtt), 1620 (A2_addp 1621 (A2_andp (S2_asr_i_p $Rss, 63), (ClearSign $Rtt)), 1622 (A2_andp (S2_asr_i_p $Rtt, 63), (ClearSign $Rss))))>; 1623 1624// Prefer these instructions over M2_macsip/M2_macsin: the macsi* instructions 1625// will put the immediate addend into a register, while these instructions will 1626// use it directly. Such a construct does not appear in the middle of a gep, 1627// where M2_macsip would be preferable. 1628let AddedComplexity = 20 in { 1629 def: Pat<(add (Su<Mul> I32:$Rs, u6_0ImmPred:$U6), anyimm:$u6), 1630 (M4_mpyri_addi imm:$u6, IntRegs:$Rs, imm:$U6)>; 1631 def: Pat<(add (Su<Mul> I32:$Rs, I32:$Rt), anyimm:$u6), 1632 (M4_mpyrr_addi imm:$u6, IntRegs:$Rs, IntRegs:$Rt)>; 1633} 1634 1635// Keep these instructions less preferable to M2_macsip/M2_macsin. 1636def: Pat<(add I32:$Ru, (Su<Mul> I32:$Rs, u6_2ImmPred:$u6_2)), 1637 (M4_mpyri_addr_u2 IntRegs:$Ru, imm:$u6_2, IntRegs:$Rs)>; 1638def: Pat<(add I32:$Ru, (Su<Mul> I32:$Rs, anyimm:$u6)), 1639 (M4_mpyri_addr IntRegs:$Ru, IntRegs:$Rs, imm:$u6)>; 1640def: Pat<(add I32:$Ru, (Su<Mul> I32:$Ry, I32:$Rs)), 1641 (M4_mpyrr_addr IntRegs:$Ru, IntRegs:$Ry, IntRegs:$Rs)>; 1642 1643 1644def: Pat<(fma F32:$Rs, F32:$Rt, F32:$Rx), 1645 (F2_sffma F32:$Rx, F32:$Rs, F32:$Rt)>; 1646def: Pat<(fma (fneg F32:$Rs), F32:$Rt, F32:$Rx), 1647 (F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>; 1648def: Pat<(fma F32:$Rs, (fneg F32:$Rt), F32:$Rx), 1649 (F2_sffms F32:$Rx, F32:$Rs, F32:$Rt)>; 1650 1651 1652def: Pat<(mul V2I32:$Rs, V2I32:$Rt), 1653 (PS_vmulw V2I32:$Rs, V2I32:$Rt)>; 1654def: Pat<(add V2I32:$Rx, (mul V2I32:$Rs, V2I32:$Rt)), 1655 (PS_vmulw_acc V2I32:$Rx, V2I32:$Rs, V2I32:$Rt)>; 1656 1657// Add/subtract two v4i8: Hexagon does not have an insn for this one, so 1658// we use the double add v8i8, and use only the low part of the result. 1659def: Pat<(add V4I8:$Rs, V4I8:$Rt), 1660 (LoReg (A2_vaddub (ToAext64 $Rs), (ToAext64 $Rt)))>; 1661def: Pat<(sub V4I8:$Rs, V4I8:$Rt), 1662 (LoReg (A2_vsubub (ToAext64 $Rs), (ToAext64 $Rt)))>; 1663 1664// Use M2_vmpy2s_s0 for half-word vector multiply. It multiplies two 1665// half-words, and saturates the result to a 32-bit value, except the 1666// saturation never happens (it can only occur with scaling). 1667def: Pat<(v2i16 (mul V2I16:$Rs, V2I16:$Rt)), 1668 (LoReg (S2_vtrunewh (A2_combineii 0, 0), 1669 (M2_vmpy2s_s0 V2I16:$Rs, V2I16:$Rt)))>; 1670def: Pat<(v4i16 (mul V4I16:$Rs, V4I16:$Rt)), 1671 (S2_vtrunewh (M2_vmpy2s_s0 (HiReg $Rs), (HiReg $Rt)), 1672 (M2_vmpy2s_s0 (LoReg $Rs), (LoReg $Rt)))>; 1673 1674// Multiplies two v4i8 vectors. 1675def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)), 1676 (S2_vtrunehb (M5_vmpybuu V4I8:$Rs, V4I8:$Rt))>; 1677 1678// Multiplies two v8i8 vectors. 1679def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)), 1680 (Combinew (S2_vtrunehb (M5_vmpybuu (HiReg $Rs), (HiReg $Rt))), 1681 (S2_vtrunehb (M5_vmpybuu (LoReg $Rs), (LoReg $Rt))))>; 1682 1683 1684// --(10) Bit ------------------------------------------------------------ 1685// 1686 1687// Count leading zeros. 1688def: Pat<(i32 (ctlz I32:$Rs)), (S2_cl0 I32:$Rs)>; 1689def: Pat<(i32 (trunc (ctlz I64:$Rss))), (S2_cl0p I64:$Rss)>; 1690 1691// Count trailing zeros. 1692def: Pat<(i32 (cttz I32:$Rs)), (S2_ct0 I32:$Rs)>; 1693def: Pat<(i32 (trunc (cttz I64:$Rss))), (S2_ct0p I64:$Rss)>; 1694 1695// Count leading ones. 1696def: Pat<(i32 (ctlz (not I32:$Rs))), (S2_cl1 I32:$Rs)>; 1697def: Pat<(i32 (trunc (ctlz (not I64:$Rss)))), (S2_cl1p I64:$Rss)>; 1698 1699// Count trailing ones. 1700def: Pat<(i32 (cttz (not I32:$Rs))), (S2_ct1 I32:$Rs)>; 1701def: Pat<(i32 (trunc (cttz (not I64:$Rss)))), (S2_ct1p I64:$Rss)>; 1702 1703// Define leading/trailing patterns that require zero-extensions to 64 bits. 1704def: Pat<(i64 (ctlz I64:$Rss)), (ToZext64 (S2_cl0p I64:$Rss))>; 1705def: Pat<(i64 (cttz I64:$Rss)), (ToZext64 (S2_ct0p I64:$Rss))>; 1706def: Pat<(i64 (ctlz (not I64:$Rss))), (ToZext64 (S2_cl1p I64:$Rss))>; 1707def: Pat<(i64 (cttz (not I64:$Rss))), (ToZext64 (S2_ct1p I64:$Rss))>; 1708 1709def: Pat<(i64 (ctpop I64:$Rss)), (ToZext64 (S5_popcountp I64:$Rss))>; 1710def: Pat<(i32 (ctpop I32:$Rs)), (S5_popcountp (A4_combineir 0, I32:$Rs))>; 1711 1712def: Pat<(bitreverse I32:$Rs), (S2_brev I32:$Rs)>; 1713def: Pat<(bitreverse I64:$Rss), (S2_brevp I64:$Rss)>; 1714 1715let AddedComplexity = 20 in { // Complexity greater than and/or/xor 1716 def: Pat<(and I32:$Rs, IsNPow2_32:$V), 1717 (S2_clrbit_i IntRegs:$Rs, (LogN2_32 $V))>; 1718 def: Pat<(or I32:$Rs, IsPow2_32:$V), 1719 (S2_setbit_i IntRegs:$Rs, (Log2_32 $V))>; 1720 def: Pat<(xor I32:$Rs, IsPow2_32:$V), 1721 (S2_togglebit_i IntRegs:$Rs, (Log2_32 $V))>; 1722 1723 def: Pat<(and I32:$Rs, (not (shl 1, I32:$Rt))), 1724 (S2_clrbit_r IntRegs:$Rs, IntRegs:$Rt)>; 1725 def: Pat<(or I32:$Rs, (shl 1, I32:$Rt)), 1726 (S2_setbit_r IntRegs:$Rs, IntRegs:$Rt)>; 1727 def: Pat<(xor I32:$Rs, (shl 1, I32:$Rt)), 1728 (S2_togglebit_r IntRegs:$Rs, IntRegs:$Rt)>; 1729} 1730 1731// Clr/set/toggle bit for 64-bit values with immediate bit index. 1732let AddedComplexity = 20 in { // Complexity greater than and/or/xor 1733 def: Pat<(and I64:$Rss, IsNPow2_64L:$V), 1734 (Combinew (i32 (HiReg $Rss)), 1735 (S2_clrbit_i (LoReg $Rss), (LogN2_64 $V)))>; 1736 def: Pat<(and I64:$Rss, IsNPow2_64H:$V), 1737 (Combinew (S2_clrbit_i (HiReg $Rss), (UDEC32 (i32 (LogN2_64 $V)))), 1738 (i32 (LoReg $Rss)))>; 1739 1740 def: Pat<(or I64:$Rss, IsPow2_64L:$V), 1741 (Combinew (i32 (HiReg $Rss)), 1742 (S2_setbit_i (LoReg $Rss), (Log2_64 $V)))>; 1743 def: Pat<(or I64:$Rss, IsPow2_64H:$V), 1744 (Combinew (S2_setbit_i (HiReg $Rss), (UDEC32 (i32 (Log2_64 $V)))), 1745 (i32 (LoReg $Rss)))>; 1746 1747 def: Pat<(xor I64:$Rss, IsPow2_64L:$V), 1748 (Combinew (i32 (HiReg $Rss)), 1749 (S2_togglebit_i (LoReg $Rss), (Log2_64 $V)))>; 1750 def: Pat<(xor I64:$Rss, IsPow2_64H:$V), 1751 (Combinew (S2_togglebit_i (HiReg $Rss), (UDEC32 (i32 (Log2_64 $V)))), 1752 (i32 (LoReg $Rss)))>; 1753} 1754 1755 1756let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm. 1757 def: Pat<(i1 (setne (and (shl 1, u5_0ImmPred:$u5), I32:$Rs), 0)), 1758 (S2_tstbit_i IntRegs:$Rs, imm:$u5)>; 1759 def: Pat<(i1 (setne (and (shl 1, I32:$Rt), I32:$Rs), 0)), 1760 (S2_tstbit_r IntRegs:$Rs, IntRegs:$Rt)>; 1761 def: Pat<(i1 (trunc I32:$Rs)), 1762 (S2_tstbit_i IntRegs:$Rs, 0)>; 1763 def: Pat<(i1 (trunc I64:$Rs)), 1764 (S2_tstbit_i (LoReg DoubleRegs:$Rs), 0)>; 1765} 1766 1767def: Pat<(and (srl I32:$Rs, u5_0ImmPred:$u5), 1), 1768 (I1toI32 (S2_tstbit_i I32:$Rs, imm:$u5))>; 1769def: Pat<(and (srl I64:$Rss, IsULE<32,31>:$u6), 1), 1770 (ToZext64 (I1toI32 (S2_tstbit_i (LoReg $Rss), imm:$u6)))>; 1771def: Pat<(and (srl I64:$Rss, IsUGT<32,31>:$u6), 1), 1772 (ToZext64 (I1toI32 (S2_tstbit_i (HiReg $Rss), (UDEC32 $u6))))>; 1773 1774def: Pat<(and (not (srl I32:$Rs, u5_0ImmPred:$u5)), 1), 1775 (I1toI32 (S4_ntstbit_i I32:$Rs, imm:$u5))>; 1776def: Pat<(and (not (srl I64:$Rss, IsULE<32,31>:$u6)), 1), 1777 (ToZext64 (I1toI32 (S4_ntstbit_i (LoReg $Rss), imm:$u6)))>; 1778def: Pat<(and (not (srl I64:$Rss, IsUGT<32,31>:$u6)), 1), 1779 (ToZext64 (I1toI32 (S4_ntstbit_i (HiReg $Rss), (UDEC32 $u6))))>; 1780 1781let AddedComplexity = 20 in { // Complexity greater than compare reg-imm. 1782 def: Pat<(i1 (seteq (and I32:$Rs, u6_0ImmPred:$u6), 0)), 1783 (C2_bitsclri IntRegs:$Rs, imm:$u6)>; 1784 def: Pat<(i1 (seteq (and I32:$Rs, I32:$Rt), 0)), 1785 (C2_bitsclr IntRegs:$Rs, IntRegs:$Rt)>; 1786} 1787 1788let AddedComplexity = 10 in // Complexity greater than compare reg-reg. 1789def: Pat<(i1 (seteq (and I32:$Rs, I32:$Rt), IntRegs:$Rt)), 1790 (C2_bitsset IntRegs:$Rs, IntRegs:$Rt)>; 1791 1792def SDTTestBit: 1793 SDTypeProfile<1, 2, [SDTCisVT<0, i1>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>; 1794def HexagonTSTBIT: SDNode<"HexagonISD::TSTBIT", SDTTestBit>; 1795 1796def: Pat<(HexagonTSTBIT I32:$Rs, u5_0ImmPred:$u5), 1797 (S2_tstbit_i I32:$Rs, imm:$u5)>; 1798def: Pat<(HexagonTSTBIT I32:$Rs, I32:$Rt), 1799 (S2_tstbit_r I32:$Rs, I32:$Rt)>; 1800 1801// Add extra complexity to prefer these instructions over bitsset/bitsclr. 1802// The reason is that tstbit/ntstbit can be folded into a compound instruction: 1803// if ([!]tstbit(...)) jump ... 1804let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm. 1805 def: Pat<(i1 (seteq (and I32:$Rs, IsPow2_32:$u5), 0)), 1806 (S4_ntstbit_i I32:$Rs, (Log2_32 imm:$u5))>; 1807 def: Pat<(i1 (setne (and I32:$Rs, IsPow2_32:$u5), 0)), 1808 (S2_tstbit_i I32:$Rs, (Log2_32 imm:$u5))>; 1809 def: Pat<(i1 (seteq (and (shl 1, I32:$Rt), I32:$Rs), 0)), 1810 (S4_ntstbit_r I32:$Rs, I32:$Rt)>; 1811 def: Pat<(i1 (setne (and (shl 1, I32:$Rt), I32:$Rs), 0)), 1812 (S2_tstbit_r I32:$Rs, I32:$Rt)>; 1813} 1814 1815def: Pat<(i1 (seteq (and I64:$Rs, IsPow2_64L:$u6), 0)), 1816 (S4_ntstbit_i (LoReg $Rs), (Log2_64 $u6))>; 1817def: Pat<(i1 (seteq (and I64:$Rs, IsPow2_64H:$u6), 0)), 1818 (S4_ntstbit_i (HiReg $Rs), (UDEC32 (i32 (Log2_64 $u6))))>; 1819def: Pat<(i1 (setne (and I64:$Rs, IsPow2_64L:$u6), 0)), 1820 (S2_tstbit_i (LoReg $Rs), (Log2_64 imm:$u6))>; 1821def: Pat<(i1 (setne (and I64:$Rs, IsPow2_64H:$u6), 0)), 1822 (S2_tstbit_i (HiReg $Rs), (UDEC32 (i32 (Log2_64 imm:$u6))))>; 1823 1824// Do not increase complexity of these patterns. In the DAG, "cmp i8" may be 1825// represented as a compare against "value & 0xFF", which is an exact match 1826// for cmpb (same for cmph). The patterns below do not contain any additional 1827// complexity that would make them preferable, and if they were actually used 1828// instead of cmpb/cmph, they would result in a compare against register that 1829// is loaded with the byte/half mask (i.e. 0xFF or 0xFFFF). 1830def: Pat<(i1 (setne (and I32:$Rs, u6_0ImmPred:$u6), 0)), 1831 (C4_nbitsclri I32:$Rs, imm:$u6)>; 1832def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), 0)), 1833 (C4_nbitsclr I32:$Rs, I32:$Rt)>; 1834def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), I32:$Rt)), 1835 (C4_nbitsset I32:$Rs, I32:$Rt)>; 1836 1837// Special patterns to address certain cases where the "top-down" matching 1838// algorithm would cause suboptimal selection. 1839 1840let AddedComplexity = 100 in { 1841 // Avoid A4_rcmp[n]eqi in these cases: 1842 def: Pat<(i32 (zext (i1 (seteq (and (shl 1, I32:$Rt), I32:$Rs), 0)))), 1843 (I1toI32 (S4_ntstbit_r IntRegs:$Rs, IntRegs:$Rt))>; 1844 def: Pat<(i32 (zext (i1 (setne (and (shl 1, I32:$Rt), I32:$Rs), 0)))), 1845 (I1toI32 (S2_tstbit_r IntRegs:$Rs, IntRegs:$Rt))>; 1846 def: Pat<(i32 (zext (i1 (seteq (and I32:$Rs, IsPow2_32:$u5), 0)))), 1847 (I1toI32 (S4_ntstbit_i I32:$Rs, (Log2_32 imm:$u5)))>; 1848 def: Pat<(i32 (zext (i1 (setne (and I32:$Rs, IsPow2_32:$u5), 0)))), 1849 (I1toI32 (S2_tstbit_i I32:$Rs, (Log2_32 imm:$u5)))>; 1850 def: Pat<(i32 (zext (i1 (seteq (and (shl 1, I32:$Rt), I32:$Rs), 0)))), 1851 (I1toI32 (S4_ntstbit_r I32:$Rs, I32:$Rt))>; 1852 def: Pat<(i32 (zext (i1 (setne (and (shl 1, I32:$Rt), I32:$Rs), 0)))), 1853 (I1toI32 (S2_tstbit_r I32:$Rs, I32:$Rt))>; 1854} 1855 1856// --(11) PIC ------------------------------------------------------------ 1857// 1858 1859def SDT_HexagonAtGot 1860 : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>; 1861def SDT_HexagonAtPcrel 1862 : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; 1863 1864// AT_GOT address-of-GOT, address-of-global, offset-in-global 1865def HexagonAtGot : SDNode<"HexagonISD::AT_GOT", SDT_HexagonAtGot>; 1866// AT_PCREL address-of-global 1867def HexagonAtPcrel : SDNode<"HexagonISD::AT_PCREL", SDT_HexagonAtPcrel>; 1868 1869def: Pat<(HexagonAtGot I32:$got, I32:$addr, (i32 0)), 1870 (L2_loadri_io I32:$got, imm:$addr)>; 1871def: Pat<(HexagonAtGot I32:$got, I32:$addr, s30_2ImmPred:$off), 1872 (A2_addi (L2_loadri_io I32:$got, imm:$addr), imm:$off)>; 1873def: Pat<(HexagonAtPcrel I32:$addr), 1874 (C4_addipc imm:$addr)>; 1875 1876// The HVX load patterns also match AT_PCREL directly. Make sure that 1877// if the selection of this opcode changes, it's updated in all places. 1878 1879 1880// --(12) Load ----------------------------------------------------------- 1881// 1882 1883def extloadv2i8: PatFrag<(ops node:$ptr), (extload node:$ptr), [{ 1884 return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; 1885}]>; 1886def extloadv4i8: PatFrag<(ops node:$ptr), (extload node:$ptr), [{ 1887 return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v4i8; 1888}]>; 1889 1890def zextloadv2i8: PatFrag<(ops node:$ptr), (zextload node:$ptr), [{ 1891 return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; 1892}]>; 1893def zextloadv4i8: PatFrag<(ops node:$ptr), (zextload node:$ptr), [{ 1894 return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v4i8; 1895}]>; 1896 1897def sextloadv2i8: PatFrag<(ops node:$ptr), (sextload node:$ptr), [{ 1898 return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; 1899}]>; 1900def sextloadv4i8: PatFrag<(ops node:$ptr), (sextload node:$ptr), [{ 1901 return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v4i8; 1902}]>; 1903 1904// Patterns to select load-indexed: Rs + Off. 1905// - frameindex [+ imm], 1906multiclass Loadxfi_pat<PatFrag Load, ValueType VT, PatLeaf ImmPred, 1907 InstHexagon MI> { 1908 def: Pat<(VT (Load (add (i32 AddrFI:$fi), ImmPred:$Off))), 1909 (VT (MI AddrFI:$fi, imm:$Off))>; 1910 def: Pat<(VT (Load (IsOrAdd (i32 AddrFI:$fi), ImmPred:$Off))), 1911 (VT (MI AddrFI:$fi, imm:$Off))>; 1912 def: Pat<(VT (Load AddrFI:$fi)), (VT (MI AddrFI:$fi, 0))>; 1913} 1914 1915// Patterns to select load-indexed: Rs + Off. 1916// - base reg [+ imm] 1917multiclass Loadxgi_pat<PatFrag Load, ValueType VT, PatLeaf ImmPred, 1918 InstHexagon MI> { 1919 def: Pat<(VT (Load (add I32:$Rs, ImmPred:$Off))), 1920 (VT (MI IntRegs:$Rs, imm:$Off))>; 1921 def: Pat<(VT (Load (IsOrAdd I32:$Rs, ImmPred:$Off))), 1922 (VT (MI IntRegs:$Rs, imm:$Off))>; 1923 def: Pat<(VT (Load I32:$Rs)), (VT (MI IntRegs:$Rs, 0))>; 1924} 1925 1926// Patterns to select load-indexed: Rs + Off. Combines Loadxfi + Loadxgi. 1927multiclass Loadxi_pat<PatFrag Load, ValueType VT, PatLeaf ImmPred, 1928 InstHexagon MI> { 1929 defm: Loadxfi_pat<Load, VT, ImmPred, MI>; 1930 defm: Loadxgi_pat<Load, VT, ImmPred, MI>; 1931} 1932 1933// Patterns to select load reg indexed: Rs + Off with a value modifier. 1934// - frameindex [+ imm] 1935multiclass Loadxfim_pat<PatFrag Load, ValueType VT, PatFrag ValueMod, 1936 PatLeaf ImmPred, InstHexagon MI> { 1937 def: Pat<(VT (Load (add (i32 AddrFI:$fi), ImmPred:$Off))), 1938 (VT (ValueMod (MI AddrFI:$fi, imm:$Off)))>; 1939 def: Pat<(VT (Load (IsOrAdd (i32 AddrFI:$fi), ImmPred:$Off))), 1940 (VT (ValueMod (MI AddrFI:$fi, imm:$Off)))>; 1941 def: Pat<(VT (Load AddrFI:$fi)), (VT (ValueMod (MI AddrFI:$fi, 0)))>; 1942} 1943 1944// Patterns to select load reg indexed: Rs + Off with a value modifier. 1945// - base reg [+ imm] 1946multiclass Loadxgim_pat<PatFrag Load, ValueType VT, PatFrag ValueMod, 1947 PatLeaf ImmPred, InstHexagon MI> { 1948 def: Pat<(VT (Load (add I32:$Rs, ImmPred:$Off))), 1949 (VT (ValueMod (MI IntRegs:$Rs, imm:$Off)))>; 1950 def: Pat<(VT (Load (IsOrAdd I32:$Rs, ImmPred:$Off))), 1951 (VT (ValueMod (MI IntRegs:$Rs, imm:$Off)))>; 1952 def: Pat<(VT (Load I32:$Rs)), (VT (ValueMod (MI IntRegs:$Rs, 0)))>; 1953} 1954 1955// Patterns to select load reg indexed: Rs + Off with a value modifier. 1956// Combines Loadxfim + Loadxgim. 1957multiclass Loadxim_pat<PatFrag Load, ValueType VT, PatFrag ValueMod, 1958 PatLeaf ImmPred, InstHexagon MI> { 1959 defm: Loadxfim_pat<Load, VT, ValueMod, ImmPred, MI>; 1960 defm: Loadxgim_pat<Load, VT, ValueMod, ImmPred, MI>; 1961} 1962 1963// Pattern to select load reg reg-indexed: Rs + Rt<<u2. 1964class Loadxr_shl_pat<PatFrag Load, ValueType VT, InstHexagon MI> 1965 : Pat<(VT (Load (add I32:$Rs, (i32 (shl I32:$Rt, u2_0ImmPred:$u2))))), 1966 (VT (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2))>; 1967 1968// Pattern to select load reg reg-indexed: Rs + Rt<<0. 1969class Loadxr_add_pat<PatFrag Load, ValueType VT, InstHexagon MI> 1970 : Pat<(VT (Load (add I32:$Rs, I32:$Rt))), 1971 (VT (MI IntRegs:$Rs, IntRegs:$Rt, 0))>; 1972 1973// Pattern to select load reg reg-indexed: Rs + Rt<<u2 with value modifier. 1974class Loadxrm_shl_pat<PatFrag Load, ValueType VT, PatFrag ValueMod, 1975 InstHexagon MI> 1976 : Pat<(VT (Load (add I32:$Rs, (i32 (shl I32:$Rt, u2_0ImmPred:$u2))))), 1977 (VT (ValueMod (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2)))>; 1978 1979// Pattern to select load reg reg-indexed: Rs + Rt<<0 with value modifier. 1980class Loadxrm_add_pat<PatFrag Load, ValueType VT, PatFrag ValueMod, 1981 InstHexagon MI> 1982 : Pat<(VT (Load (add I32:$Rs, I32:$Rt))), 1983 (VT (ValueMod (MI IntRegs:$Rs, IntRegs:$Rt, 0)))>; 1984 1985// Pattern to select load long-offset reg-indexed: Addr + Rt<<u2. 1986// Don't match for u2==0, instead use reg+imm for those cases. 1987class Loadxu_pat<PatFrag Load, ValueType VT, PatFrag ImmPred, InstHexagon MI> 1988 : Pat<(VT (Load (add (shl IntRegs:$Rt, u2_0ImmPred:$u2), ImmPred:$Addr))), 1989 (VT (MI IntRegs:$Rt, imm:$u2, ImmPred:$Addr))>; 1990 1991class Loadxum_pat<PatFrag Load, ValueType VT, PatFrag ImmPred, PatFrag ValueMod, 1992 InstHexagon MI> 1993 : Pat<(VT (Load (add (shl IntRegs:$Rt, u2_0ImmPred:$u2), ImmPred:$Addr))), 1994 (VT (ValueMod (MI IntRegs:$Rt, imm:$u2, ImmPred:$Addr)))>; 1995 1996// Pattern to select load absolute. 1997class Loada_pat<PatFrag Load, ValueType VT, PatFrag Addr, InstHexagon MI> 1998 : Pat<(VT (Load Addr:$addr)), (MI Addr:$addr)>; 1999 2000// Pattern to select load absolute with value modifier. 2001class Loadam_pat<PatFrag Load, ValueType VT, PatFrag Addr, PatFrag ValueMod, 2002 InstHexagon MI> 2003 : Pat<(VT (Load Addr:$addr)), (ValueMod (MI Addr:$addr))>; 2004 2005 2006let AddedComplexity = 20 in { 2007 defm: Loadxi_pat<extloadi1, i32, anyimm0, L2_loadrub_io>; 2008 defm: Loadxi_pat<extloadi8, i32, anyimm0, L2_loadrub_io>; 2009 defm: Loadxi_pat<extloadi16, i32, anyimm1, L2_loadruh_io>; 2010 defm: Loadxi_pat<extloadv2i8, v2i16, anyimm1, L2_loadbzw2_io>; 2011 defm: Loadxi_pat<extloadv4i8, v4i16, anyimm2, L2_loadbzw4_io>; 2012 defm: Loadxi_pat<sextloadi8, i32, anyimm0, L2_loadrb_io>; 2013 defm: Loadxi_pat<sextloadi16, i32, anyimm1, L2_loadrh_io>; 2014 defm: Loadxi_pat<sextloadv2i8, v2i16, anyimm1, L2_loadbsw2_io>; 2015 defm: Loadxi_pat<sextloadv4i8, v4i16, anyimm2, L2_loadbzw4_io>; 2016 defm: Loadxi_pat<zextloadi1, i32, anyimm0, L2_loadrub_io>; 2017 defm: Loadxi_pat<zextloadi8, i32, anyimm0, L2_loadrub_io>; 2018 defm: Loadxi_pat<zextloadi16, i32, anyimm1, L2_loadruh_io>; 2019 defm: Loadxi_pat<zextloadv2i8, v2i16, anyimm1, L2_loadbzw2_io>; 2020 defm: Loadxi_pat<zextloadv4i8, v4i16, anyimm2, L2_loadbzw4_io>; 2021 defm: Loadxi_pat<load, i32, anyimm2, L2_loadri_io>; 2022 defm: Loadxi_pat<load, v2i16, anyimm2, L2_loadri_io>; 2023 defm: Loadxi_pat<load, v4i8, anyimm2, L2_loadri_io>; 2024 defm: Loadxi_pat<load, i64, anyimm3, L2_loadrd_io>; 2025 defm: Loadxi_pat<load, v2i32, anyimm3, L2_loadrd_io>; 2026 defm: Loadxi_pat<load, v4i16, anyimm3, L2_loadrd_io>; 2027 defm: Loadxi_pat<load, v8i8, anyimm3, L2_loadrd_io>; 2028 defm: Loadxi_pat<load, f32, anyimm2, L2_loadri_io>; 2029 defm: Loadxi_pat<load, f64, anyimm3, L2_loadrd_io>; 2030 // No sextloadi1. 2031 2032 defm: Loadxi_pat<atomic_load_8 , i32, anyimm0, L2_loadrub_io>; 2033 defm: Loadxi_pat<atomic_load_16, i32, anyimm1, L2_loadruh_io>; 2034 defm: Loadxi_pat<atomic_load_32, i32, anyimm2, L2_loadri_io>; 2035 defm: Loadxi_pat<atomic_load_64, i64, anyimm3, L2_loadrd_io>; 2036} 2037 2038let AddedComplexity = 30 in { 2039 defm: Loadxim_pat<extloadi1, i64, ToAext64, anyimm0, L2_loadrub_io>; 2040 defm: Loadxim_pat<extloadi8, i64, ToAext64, anyimm0, L2_loadrub_io>; 2041 defm: Loadxim_pat<extloadi16, i64, ToAext64, anyimm1, L2_loadruh_io>; 2042 defm: Loadxim_pat<extloadi32, i64, ToAext64, anyimm2, L2_loadri_io>; 2043 defm: Loadxim_pat<zextloadi1, i64, ToZext64, anyimm0, L2_loadrub_io>; 2044 defm: Loadxim_pat<zextloadi8, i64, ToZext64, anyimm0, L2_loadrub_io>; 2045 defm: Loadxim_pat<zextloadi16, i64, ToZext64, anyimm1, L2_loadruh_io>; 2046 defm: Loadxim_pat<zextloadi32, i64, ToZext64, anyimm2, L2_loadri_io>; 2047 defm: Loadxim_pat<sextloadi8, i64, ToSext64, anyimm0, L2_loadrb_io>; 2048 defm: Loadxim_pat<sextloadi16, i64, ToSext64, anyimm1, L2_loadrh_io>; 2049 defm: Loadxim_pat<sextloadi32, i64, ToSext64, anyimm2, L2_loadri_io>; 2050} 2051 2052let AddedComplexity = 60 in { 2053 def: Loadxu_pat<extloadi8, i32, anyimm0, L4_loadrub_ur>; 2054 def: Loadxu_pat<extloadi16, i32, anyimm1, L4_loadruh_ur>; 2055 def: Loadxu_pat<extloadv2i8, v2i16, anyimm1, L4_loadbzw2_ur>; 2056 def: Loadxu_pat<extloadv4i8, v4i16, anyimm2, L4_loadbzw4_ur>; 2057 def: Loadxu_pat<sextloadi8, i32, anyimm0, L4_loadrb_ur>; 2058 def: Loadxu_pat<sextloadi16, i32, anyimm1, L4_loadrh_ur>; 2059 def: Loadxu_pat<sextloadv2i8, v2i16, anyimm1, L4_loadbsw2_ur>; 2060 def: Loadxu_pat<sextloadv4i8, v4i16, anyimm2, L4_loadbzw4_ur>; 2061 def: Loadxu_pat<zextloadi8, i32, anyimm0, L4_loadrub_ur>; 2062 def: Loadxu_pat<zextloadi16, i32, anyimm1, L4_loadruh_ur>; 2063 def: Loadxu_pat<zextloadv2i8, v2i16, anyimm1, L4_loadbzw2_ur>; 2064 def: Loadxu_pat<zextloadv4i8, v4i16, anyimm2, L4_loadbzw4_ur>; 2065 def: Loadxu_pat<load, i32, anyimm2, L4_loadri_ur>; 2066 def: Loadxu_pat<load, v2i16, anyimm2, L4_loadri_ur>; 2067 def: Loadxu_pat<load, v4i8, anyimm2, L4_loadri_ur>; 2068 def: Loadxu_pat<load, i64, anyimm3, L4_loadrd_ur>; 2069 def: Loadxu_pat<load, v2i32, anyimm3, L4_loadrd_ur>; 2070 def: Loadxu_pat<load, v4i16, anyimm3, L4_loadrd_ur>; 2071 def: Loadxu_pat<load, v8i8, anyimm3, L4_loadrd_ur>; 2072 def: Loadxu_pat<load, f32, anyimm2, L4_loadri_ur>; 2073 def: Loadxu_pat<load, f64, anyimm3, L4_loadrd_ur>; 2074 2075 def: Loadxum_pat<sextloadi8, i64, anyimm0, ToSext64, L4_loadrb_ur>; 2076 def: Loadxum_pat<zextloadi8, i64, anyimm0, ToZext64, L4_loadrub_ur>; 2077 def: Loadxum_pat<extloadi8, i64, anyimm0, ToAext64, L4_loadrub_ur>; 2078 def: Loadxum_pat<sextloadi16, i64, anyimm1, ToSext64, L4_loadrh_ur>; 2079 def: Loadxum_pat<zextloadi16, i64, anyimm1, ToZext64, L4_loadruh_ur>; 2080 def: Loadxum_pat<extloadi16, i64, anyimm1, ToAext64, L4_loadruh_ur>; 2081 def: Loadxum_pat<sextloadi32, i64, anyimm2, ToSext64, L4_loadri_ur>; 2082 def: Loadxum_pat<zextloadi32, i64, anyimm2, ToZext64, L4_loadri_ur>; 2083 def: Loadxum_pat<extloadi32, i64, anyimm2, ToAext64, L4_loadri_ur>; 2084} 2085 2086let AddedComplexity = 40 in { 2087 def: Loadxr_shl_pat<extloadi8, i32, L4_loadrub_rr>; 2088 def: Loadxr_shl_pat<zextloadi8, i32, L4_loadrub_rr>; 2089 def: Loadxr_shl_pat<sextloadi8, i32, L4_loadrb_rr>; 2090 def: Loadxr_shl_pat<extloadi16, i32, L4_loadruh_rr>; 2091 def: Loadxr_shl_pat<zextloadi16, i32, L4_loadruh_rr>; 2092 def: Loadxr_shl_pat<sextloadi16, i32, L4_loadrh_rr>; 2093 def: Loadxr_shl_pat<load, i32, L4_loadri_rr>; 2094 def: Loadxr_shl_pat<load, v2i16, L4_loadri_rr>; 2095 def: Loadxr_shl_pat<load, v4i8, L4_loadri_rr>; 2096 def: Loadxr_shl_pat<load, i64, L4_loadrd_rr>; 2097 def: Loadxr_shl_pat<load, v2i32, L4_loadrd_rr>; 2098 def: Loadxr_shl_pat<load, v4i16, L4_loadrd_rr>; 2099 def: Loadxr_shl_pat<load, v8i8, L4_loadrd_rr>; 2100 def: Loadxr_shl_pat<load, f32, L4_loadri_rr>; 2101 def: Loadxr_shl_pat<load, f64, L4_loadrd_rr>; 2102} 2103 2104let AddedComplexity = 20 in { 2105 def: Loadxr_add_pat<extloadi8, i32, L4_loadrub_rr>; 2106 def: Loadxr_add_pat<zextloadi8, i32, L4_loadrub_rr>; 2107 def: Loadxr_add_pat<sextloadi8, i32, L4_loadrb_rr>; 2108 def: Loadxr_add_pat<extloadi16, i32, L4_loadruh_rr>; 2109 def: Loadxr_add_pat<zextloadi16, i32, L4_loadruh_rr>; 2110 def: Loadxr_add_pat<sextloadi16, i32, L4_loadrh_rr>; 2111 def: Loadxr_add_pat<load, i32, L4_loadri_rr>; 2112 def: Loadxr_add_pat<load, v2i16, L4_loadri_rr>; 2113 def: Loadxr_add_pat<load, v4i8, L4_loadri_rr>; 2114 def: Loadxr_add_pat<load, i64, L4_loadrd_rr>; 2115 def: Loadxr_add_pat<load, v2i32, L4_loadrd_rr>; 2116 def: Loadxr_add_pat<load, v4i16, L4_loadrd_rr>; 2117 def: Loadxr_add_pat<load, v8i8, L4_loadrd_rr>; 2118 def: Loadxr_add_pat<load, f32, L4_loadri_rr>; 2119 def: Loadxr_add_pat<load, f64, L4_loadrd_rr>; 2120} 2121 2122let AddedComplexity = 40 in { 2123 def: Loadxrm_shl_pat<extloadi8, i64, ToAext64, L4_loadrub_rr>; 2124 def: Loadxrm_shl_pat<zextloadi8, i64, ToZext64, L4_loadrub_rr>; 2125 def: Loadxrm_shl_pat<sextloadi8, i64, ToSext64, L4_loadrb_rr>; 2126 def: Loadxrm_shl_pat<extloadi16, i64, ToAext64, L4_loadruh_rr>; 2127 def: Loadxrm_shl_pat<zextloadi16, i64, ToZext64, L4_loadruh_rr>; 2128 def: Loadxrm_shl_pat<sextloadi16, i64, ToSext64, L4_loadrh_rr>; 2129 def: Loadxrm_shl_pat<extloadi32, i64, ToAext64, L4_loadri_rr>; 2130 def: Loadxrm_shl_pat<zextloadi32, i64, ToZext64, L4_loadri_rr>; 2131 def: Loadxrm_shl_pat<sextloadi32, i64, ToSext64, L4_loadri_rr>; 2132} 2133 2134let AddedComplexity = 20 in { 2135 def: Loadxrm_add_pat<extloadi8, i64, ToAext64, L4_loadrub_rr>; 2136 def: Loadxrm_add_pat<zextloadi8, i64, ToZext64, L4_loadrub_rr>; 2137 def: Loadxrm_add_pat<sextloadi8, i64, ToSext64, L4_loadrb_rr>; 2138 def: Loadxrm_add_pat<extloadi16, i64, ToAext64, L4_loadruh_rr>; 2139 def: Loadxrm_add_pat<zextloadi16, i64, ToZext64, L4_loadruh_rr>; 2140 def: Loadxrm_add_pat<sextloadi16, i64, ToSext64, L4_loadrh_rr>; 2141 def: Loadxrm_add_pat<extloadi32, i64, ToAext64, L4_loadri_rr>; 2142 def: Loadxrm_add_pat<zextloadi32, i64, ToZext64, L4_loadri_rr>; 2143 def: Loadxrm_add_pat<sextloadi32, i64, ToSext64, L4_loadri_rr>; 2144} 2145 2146// Absolute address 2147 2148let AddedComplexity = 60 in { 2149 def: Loada_pat<zextloadi1, i32, anyimm0, PS_loadrubabs>; 2150 def: Loada_pat<sextloadi8, i32, anyimm0, PS_loadrbabs>; 2151 def: Loada_pat<extloadi8, i32, anyimm0, PS_loadrubabs>; 2152 def: Loada_pat<zextloadi8, i32, anyimm0, PS_loadrubabs>; 2153 def: Loada_pat<sextloadi16, i32, anyimm1, PS_loadrhabs>; 2154 def: Loada_pat<extloadi16, i32, anyimm1, PS_loadruhabs>; 2155 def: Loada_pat<zextloadi16, i32, anyimm1, PS_loadruhabs>; 2156 def: Loada_pat<load, i32, anyimm2, PS_loadriabs>; 2157 def: Loada_pat<load, v2i16, anyimm2, PS_loadriabs>; 2158 def: Loada_pat<load, v4i8, anyimm2, PS_loadriabs>; 2159 def: Loada_pat<load, i64, anyimm3, PS_loadrdabs>; 2160 def: Loada_pat<load, v2i32, anyimm3, PS_loadrdabs>; 2161 def: Loada_pat<load, v4i16, anyimm3, PS_loadrdabs>; 2162 def: Loada_pat<load, v8i8, anyimm3, PS_loadrdabs>; 2163 def: Loada_pat<load, f32, anyimm2, PS_loadriabs>; 2164 def: Loada_pat<load, f64, anyimm3, PS_loadrdabs>; 2165 2166 def: Loada_pat<atomic_load_8, i32, anyimm0, PS_loadrubabs>; 2167 def: Loada_pat<atomic_load_16, i32, anyimm1, PS_loadruhabs>; 2168 def: Loada_pat<atomic_load_32, i32, anyimm2, PS_loadriabs>; 2169 def: Loada_pat<atomic_load_64, i64, anyimm3, PS_loadrdabs>; 2170} 2171 2172let AddedComplexity = 30 in { 2173 def: Loadam_pat<extloadi8, i64, anyimm0, ToAext64, PS_loadrubabs>; 2174 def: Loadam_pat<sextloadi8, i64, anyimm0, ToSext64, PS_loadrbabs>; 2175 def: Loadam_pat<zextloadi8, i64, anyimm0, ToZext64, PS_loadrubabs>; 2176 def: Loadam_pat<extloadi16, i64, anyimm1, ToAext64, PS_loadruhabs>; 2177 def: Loadam_pat<sextloadi16, i64, anyimm1, ToSext64, PS_loadrhabs>; 2178 def: Loadam_pat<zextloadi16, i64, anyimm1, ToZext64, PS_loadruhabs>; 2179 def: Loadam_pat<extloadi32, i64, anyimm2, ToAext64, PS_loadriabs>; 2180 def: Loadam_pat<sextloadi32, i64, anyimm2, ToSext64, PS_loadriabs>; 2181 def: Loadam_pat<zextloadi32, i64, anyimm2, ToZext64, PS_loadriabs>; 2182 2183 def: Loadam_pat<load, i1, anyimm0, I32toI1, PS_loadrubabs>; 2184 def: Loadam_pat<zextloadi1, i64, anyimm0, ToZext64, PS_loadrubabs>; 2185} 2186 2187// GP-relative address 2188 2189let AddedComplexity = 100 in { 2190 def: Loada_pat<extloadi1, i32, addrgp, L2_loadrubgp>; 2191 def: Loada_pat<zextloadi1, i32, addrgp, L2_loadrubgp>; 2192 def: Loada_pat<extloadi8, i32, addrgp, L2_loadrubgp>; 2193 def: Loada_pat<sextloadi8, i32, addrgp, L2_loadrbgp>; 2194 def: Loada_pat<zextloadi8, i32, addrgp, L2_loadrubgp>; 2195 def: Loada_pat<extloadi16, i32, addrgp, L2_loadruhgp>; 2196 def: Loada_pat<sextloadi16, i32, addrgp, L2_loadrhgp>; 2197 def: Loada_pat<zextloadi16, i32, addrgp, L2_loadruhgp>; 2198 def: Loada_pat<load, i32, addrgp, L2_loadrigp>; 2199 def: Loada_pat<load, v2i16, addrgp, L2_loadrigp>; 2200 def: Loada_pat<load, v4i8, addrgp, L2_loadrigp>; 2201 def: Loada_pat<load, i64, addrgp, L2_loadrdgp>; 2202 def: Loada_pat<load, v2i32, addrgp, L2_loadrdgp>; 2203 def: Loada_pat<load, v4i16, addrgp, L2_loadrdgp>; 2204 def: Loada_pat<load, v8i8, addrgp, L2_loadrdgp>; 2205 def: Loada_pat<load, f32, addrgp, L2_loadrigp>; 2206 def: Loada_pat<load, f64, addrgp, L2_loadrdgp>; 2207 2208 def: Loada_pat<atomic_load_8, i32, addrgp, L2_loadrubgp>; 2209 def: Loada_pat<atomic_load_16, i32, addrgp, L2_loadruhgp>; 2210 def: Loada_pat<atomic_load_32, i32, addrgp, L2_loadrigp>; 2211 def: Loada_pat<atomic_load_64, i64, addrgp, L2_loadrdgp>; 2212} 2213 2214let AddedComplexity = 70 in { 2215 def: Loadam_pat<extloadi8, i64, addrgp, ToAext64, L2_loadrubgp>; 2216 def: Loadam_pat<sextloadi8, i64, addrgp, ToSext64, L2_loadrbgp>; 2217 def: Loadam_pat<zextloadi8, i64, addrgp, ToZext64, L2_loadrubgp>; 2218 def: Loadam_pat<extloadi16, i64, addrgp, ToAext64, L2_loadruhgp>; 2219 def: Loadam_pat<sextloadi16, i64, addrgp, ToSext64, L2_loadrhgp>; 2220 def: Loadam_pat<zextloadi16, i64, addrgp, ToZext64, L2_loadruhgp>; 2221 def: Loadam_pat<extloadi32, i64, addrgp, ToAext64, L2_loadrigp>; 2222 def: Loadam_pat<sextloadi32, i64, addrgp, ToSext64, L2_loadrigp>; 2223 def: Loadam_pat<zextloadi32, i64, addrgp, ToZext64, L2_loadrigp>; 2224 2225 def: Loadam_pat<load, i1, addrgp, I32toI1, L2_loadrubgp>; 2226 def: Loadam_pat<zextloadi1, i64, addrgp, ToZext64, L2_loadrubgp>; 2227} 2228 2229 2230// Sign-extending loads of i1 need to replicate the lowest bit throughout 2231// the 32-bit value. Since the loaded value can only be 0 or 1, 0-v should 2232// do the trick. 2233let AddedComplexity = 20 in 2234def: Pat<(i32 (sextloadi1 I32:$Rs)), 2235 (A2_subri 0, (L2_loadrub_io IntRegs:$Rs, 0))>; 2236 2237// Patterns for loads of i1: 2238def: Pat<(i1 (load AddrFI:$fi)), 2239 (C2_tfrrp (L2_loadrub_io AddrFI:$fi, 0))>; 2240def: Pat<(i1 (load (add I32:$Rs, anyimm0:$Off))), 2241 (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, imm:$Off))>; 2242def: Pat<(i1 (load I32:$Rs)), 2243 (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, 0))>; 2244 2245 2246// --(13) Store ---------------------------------------------------------- 2247// 2248 2249class Storepi_pat<PatFrag Store, PatFrag Value, PatFrag Offset, InstHexagon MI> 2250 : Pat<(Store Value:$Rt, I32:$Rx, Offset:$s4), 2251 (MI I32:$Rx, imm:$s4, Value:$Rt)>; 2252 2253def: Storepi_pat<post_truncsti8, I32, s4_0ImmPred, S2_storerb_pi>; 2254def: Storepi_pat<post_truncsti16, I32, s4_1ImmPred, S2_storerh_pi>; 2255def: Storepi_pat<post_store, I32, s4_2ImmPred, S2_storeri_pi>; 2256def: Storepi_pat<post_store, I64, s4_3ImmPred, S2_storerd_pi>; 2257 2258// Patterns for generating stores, where the address takes different forms: 2259// - frameindex, 2260// - frameindex + offset, 2261// - base + offset, 2262// - simple (base address without offset). 2263// These would usually be used together (via Storexi_pat defined below), but 2264// in some cases one may want to apply different properties (such as 2265// AddedComplexity) to the individual patterns. 2266class Storexi_fi_pat<PatFrag Store, PatFrag Value, InstHexagon MI> 2267 : Pat<(Store Value:$Rs, AddrFI:$fi), (MI AddrFI:$fi, 0, Value:$Rs)>; 2268 2269multiclass Storexi_fi_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred, 2270 InstHexagon MI> { 2271 def: Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)), 2272 (MI AddrFI:$fi, imm:$Off, Value:$Rs)>; 2273 def: Pat<(Store Value:$Rs, (IsOrAdd (i32 AddrFI:$fi), ImmPred:$Off)), 2274 (MI AddrFI:$fi, imm:$Off, Value:$Rs)>; 2275} 2276 2277multiclass Storexi_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred, 2278 InstHexagon MI> { 2279 def: Pat<(Store Value:$Rt, (add I32:$Rs, ImmPred:$Off)), 2280 (MI IntRegs:$Rs, imm:$Off, Value:$Rt)>; 2281 def: Pat<(Store Value:$Rt, (IsOrAdd I32:$Rs, ImmPred:$Off)), 2282 (MI IntRegs:$Rs, imm:$Off, Value:$Rt)>; 2283} 2284 2285class Storexi_base_pat<PatFrag Store, PatFrag Value, InstHexagon MI> 2286 : Pat<(Store Value:$Rt, I32:$Rs), 2287 (MI IntRegs:$Rs, 0, Value:$Rt)>; 2288 2289// Patterns for generating stores, where the address takes different forms, 2290// and where the value being stored is transformed through the value modifier 2291// ValueMod. The address forms are same as above. 2292class Storexim_fi_pat<PatFrag Store, PatFrag Value, PatFrag ValueMod, 2293 InstHexagon MI> 2294 : Pat<(Store Value:$Rs, AddrFI:$fi), 2295 (MI AddrFI:$fi, 0, (ValueMod Value:$Rs))>; 2296 2297multiclass Storexim_fi_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred, 2298 PatFrag ValueMod, InstHexagon MI> { 2299 def: Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)), 2300 (MI AddrFI:$fi, imm:$Off, (ValueMod Value:$Rs))>; 2301 def: Pat<(Store Value:$Rs, (IsOrAdd (i32 AddrFI:$fi), ImmPred:$Off)), 2302 (MI AddrFI:$fi, imm:$Off, (ValueMod Value:$Rs))>; 2303} 2304 2305multiclass Storexim_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred, 2306 PatFrag ValueMod, InstHexagon MI> { 2307 def: Pat<(Store Value:$Rt, (add I32:$Rs, ImmPred:$Off)), 2308 (MI IntRegs:$Rs, imm:$Off, (ValueMod Value:$Rt))>; 2309 def: Pat<(Store Value:$Rt, (IsOrAdd I32:$Rs, ImmPred:$Off)), 2310 (MI IntRegs:$Rs, imm:$Off, (ValueMod Value:$Rt))>; 2311} 2312 2313class Storexim_base_pat<PatFrag Store, PatFrag Value, PatFrag ValueMod, 2314 InstHexagon MI> 2315 : Pat<(Store Value:$Rt, I32:$Rs), 2316 (MI IntRegs:$Rs, 0, (ValueMod Value:$Rt))>; 2317 2318multiclass Storexi_pat<PatFrag Store, PatFrag Value, PatLeaf ImmPred, 2319 InstHexagon MI> { 2320 defm: Storexi_fi_add_pat <Store, Value, ImmPred, MI>; 2321 def: Storexi_fi_pat <Store, Value, MI>; 2322 defm: Storexi_add_pat <Store, Value, ImmPred, MI>; 2323} 2324 2325multiclass Storexim_pat<PatFrag Store, PatFrag Value, PatLeaf ImmPred, 2326 PatFrag ValueMod, InstHexagon MI> { 2327 defm: Storexim_fi_add_pat <Store, Value, ImmPred, ValueMod, MI>; 2328 def: Storexim_fi_pat <Store, Value, ValueMod, MI>; 2329 defm: Storexim_add_pat <Store, Value, ImmPred, ValueMod, MI>; 2330} 2331 2332// Reg<<S + Imm 2333class Storexu_shl_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred, InstHexagon MI> 2334 : Pat<(Store Value:$Rt, (add (shl I32:$Ru, u2_0ImmPred:$u2), ImmPred:$A)), 2335 (MI IntRegs:$Ru, imm:$u2, ImmPred:$A, Value:$Rt)>; 2336 2337// Reg<<S + Reg 2338class Storexr_shl_pat<PatFrag Store, PatFrag Value, InstHexagon MI> 2339 : Pat<(Store Value:$Ru, (add I32:$Rs, (shl I32:$Rt, u2_0ImmPred:$u2))), 2340 (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2, Value:$Ru)>; 2341 2342// Reg + Reg 2343class Storexr_add_pat<PatFrag Store, PatFrag Value, InstHexagon MI> 2344 : Pat<(Store Value:$Ru, (add I32:$Rs, I32:$Rt)), 2345 (MI IntRegs:$Rs, IntRegs:$Rt, 0, Value:$Ru)>; 2346 2347class Storea_pat<PatFrag Store, PatFrag Value, PatFrag Addr, InstHexagon MI> 2348 : Pat<(Store Value:$val, Addr:$addr), (MI Addr:$addr, Value:$val)>; 2349 2350class Stoream_pat<PatFrag Store, PatFrag Value, PatFrag Addr, PatFrag ValueMod, 2351 InstHexagon MI> 2352 : Pat<(Store Value:$val, Addr:$addr), 2353 (MI Addr:$addr, (ValueMod Value:$val))>; 2354 2355// Regular stores in the DAG have two operands: value and address. 2356// Atomic stores also have two, but they are reversed: address, value. 2357// To use atomic stores with the patterns, they need to have their operands 2358// swapped. This relies on the knowledge that the F.Fragment uses names 2359// "ptr" and "val". 2360class AtomSt<PatFrag F> 2361 : PatFrag<(ops node:$val, node:$ptr), !head(F.Fragments), F.PredicateCode, 2362 F.OperandTransform> { 2363 let IsAtomic = F.IsAtomic; 2364 let MemoryVT = F.MemoryVT; 2365} 2366 2367 2368def IMM_BYTE : SDNodeXForm<imm, [{ 2369 // -1 can be represented as 255, etc. 2370 // assigning to a byte restores our desired signed value. 2371 int8_t imm = N->getSExtValue(); 2372 return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32); 2373}]>; 2374 2375def IMM_HALF : SDNodeXForm<imm, [{ 2376 // -1 can be represented as 65535, etc. 2377 // assigning to a short restores our desired signed value. 2378 int16_t imm = N->getSExtValue(); 2379 return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32); 2380}]>; 2381 2382def IMM_WORD : SDNodeXForm<imm, [{ 2383 // -1 can be represented as 4294967295, etc. 2384 // Currently, it's not doing this. But some optimization 2385 // might convert -1 to a large +ve number. 2386 // assigning to a word restores our desired signed value. 2387 int32_t imm = N->getSExtValue(); 2388 return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32); 2389}]>; 2390 2391def ToImmByte : OutPatFrag<(ops node:$R), (IMM_BYTE $R)>; 2392def ToImmHalf : OutPatFrag<(ops node:$R), (IMM_HALF $R)>; 2393def ToImmWord : OutPatFrag<(ops node:$R), (IMM_WORD $R)>; 2394 2395// Even though the offset is not extendable in the store-immediate, we 2396// can still generate the fi# in the base address. If the final offset 2397// is not valid for the instruction, we will replace it with a scratch 2398// register. 2399class SmallStackStore<PatFrag Store> 2400 : PatFrag<(ops node:$Val, node:$Addr), (Store node:$Val, node:$Addr), [{ 2401 return isSmallStackStore(cast<StoreSDNode>(N)); 2402}]>; 2403 2404// This is the complement of SmallStackStore. 2405class LargeStackStore<PatFrag Store> 2406 : PatFrag<(ops node:$Val, node:$Addr), (Store node:$Val, node:$Addr), [{ 2407 return !isSmallStackStore(cast<StoreSDNode>(N)); 2408}]>; 2409 2410// Preferred addressing modes for various combinations of stored value 2411// and address computation. 2412// For stores where the address and value are both immediates, prefer 2413// store-immediate. The reason is that the constant-extender optimization 2414// can replace store-immediate with a store-register, but there is nothing 2415// to generate a store-immediate out of a store-register. 2416// 2417// C R F F+C R+C R+R R<<S+C R<<S+R 2418// --+-------+-----+-----+------+-----+-----+--------+-------- 2419// C | imm | imm | imm | imm | imm | rr | ur | rr 2420// R | abs* | io | io | io | io | rr | ur | rr 2421// 2422// (*) Absolute or GP-relative. 2423// 2424// Note that any expression can be matched by Reg. In particular, an immediate 2425// can always be placed in a register, so patterns checking for Imm should 2426// have a higher priority than the ones involving Reg that could also match. 2427// For example, *(p+4) could become r1=#4; memw(r0+r1<<#0) instead of the 2428// preferred memw(r0+#4). Similarly Reg+Imm or Reg+Reg should be tried before 2429// Reg alone. 2430// 2431// The order in which the different combinations are tried: 2432// 2433// C F R F+C R+C R+R R<<S+C R<<S+R 2434// --+-------+-----+-----+------+-----+-----+--------+-------- 2435// C | 1 | 6 | - | 5 | 9 | - | - | - 2436// R | 2 | 8 | 12 | 7 | 10 | 11 | 3 | 4 2437 2438 2439// First, match the unusual case of doubleword store into Reg+Imm4, i.e. 2440// a store where the offset Imm4 is a multiple of 4, but not of 8. This 2441// implies that Reg is also a proper multiple of 4. To still generate a 2442// doubleword store, add 4 to Reg, and subtract 4 from the offset. 2443 2444def s30_2ProperPred : PatLeaf<(i32 imm), [{ 2445 int64_t v = (int64_t)N->getSExtValue(); 2446 return isShiftedInt<30,2>(v) && !isShiftedInt<29,3>(v); 2447}]>; 2448def RoundTo8 : SDNodeXForm<imm, [{ 2449 int32_t Imm = N->getSExtValue(); 2450 return CurDAG->getTargetConstant(Imm & -8, SDLoc(N), MVT::i32); 2451}]>; 2452 2453let AddedComplexity = 150 in 2454def: Pat<(store I64:$Ru, (add I32:$Rs, s30_2ProperPred:$Off)), 2455 (S2_storerd_io (A2_addi I32:$Rs, 4), (RoundTo8 $Off), I64:$Ru)>; 2456 2457class Storexi_abs_pat<PatFrag Store, PatFrag Value, InstHexagon MI> 2458 : Pat<(Store Value:$val, anyimm:$addr), 2459 (MI (ToI32 $addr), 0, Value:$val)>; 2460class Storexim_abs_pat<PatFrag Store, PatFrag Value, PatFrag ValueMod, 2461 InstHexagon MI> 2462 : Pat<(Store Value:$val, anyimm:$addr), 2463 (MI (ToI32 $addr), 0, (ValueMod Value:$val))>; 2464 2465let AddedComplexity = 140 in { 2466 def: Storexim_abs_pat<truncstorei8, anyint, ToImmByte, S4_storeirb_io>; 2467 def: Storexim_abs_pat<truncstorei16, anyint, ToImmHalf, S4_storeirh_io>; 2468 def: Storexim_abs_pat<store, anyint, ToImmWord, S4_storeiri_io>; 2469 2470 def: Storexi_abs_pat<truncstorei8, anyimm, S4_storeirb_io>; 2471 def: Storexi_abs_pat<truncstorei16, anyimm, S4_storeirh_io>; 2472 def: Storexi_abs_pat<store, anyimm, S4_storeiri_io>; 2473} 2474 2475// GP-relative address 2476let AddedComplexity = 120 in { 2477 def: Storea_pat<truncstorei8, I32, addrgp, S2_storerbgp>; 2478 def: Storea_pat<truncstorei16, I32, addrgp, S2_storerhgp>; 2479 def: Storea_pat<store, I32, addrgp, S2_storerigp>; 2480 def: Storea_pat<store, V4I8, addrgp, S2_storerigp>; 2481 def: Storea_pat<store, V2I16, addrgp, S2_storerigp>; 2482 def: Storea_pat<store, I64, addrgp, S2_storerdgp>; 2483 def: Storea_pat<store, V8I8, addrgp, S2_storerdgp>; 2484 def: Storea_pat<store, V4I16, addrgp, S2_storerdgp>; 2485 def: Storea_pat<store, V2I32, addrgp, S2_storerdgp>; 2486 def: Storea_pat<store, F32, addrgp, S2_storerigp>; 2487 def: Storea_pat<store, F64, addrgp, S2_storerdgp>; 2488 def: Storea_pat<AtomSt<atomic_store_8>, I32, addrgp, S2_storerbgp>; 2489 def: Storea_pat<AtomSt<atomic_store_16>, I32, addrgp, S2_storerhgp>; 2490 def: Storea_pat<AtomSt<atomic_store_32>, I32, addrgp, S2_storerigp>; 2491 def: Storea_pat<AtomSt<atomic_store_32>, V4I8, addrgp, S2_storerigp>; 2492 def: Storea_pat<AtomSt<atomic_store_32>, V2I16, addrgp, S2_storerigp>; 2493 def: Storea_pat<AtomSt<atomic_store_64>, I64, addrgp, S2_storerdgp>; 2494 def: Storea_pat<AtomSt<atomic_store_64>, V8I8, addrgp, S2_storerdgp>; 2495 def: Storea_pat<AtomSt<atomic_store_64>, V4I16, addrgp, S2_storerdgp>; 2496 def: Storea_pat<AtomSt<atomic_store_64>, V2I32, addrgp, S2_storerdgp>; 2497 2498 def: Stoream_pat<truncstorei8, I64, addrgp, LoReg, S2_storerbgp>; 2499 def: Stoream_pat<truncstorei16, I64, addrgp, LoReg, S2_storerhgp>; 2500 def: Stoream_pat<truncstorei32, I64, addrgp, LoReg, S2_storerigp>; 2501 def: Stoream_pat<store, I1, addrgp, I1toI32, S2_storerbgp>; 2502} 2503 2504// Absolute address 2505let AddedComplexity = 110 in { 2506 def: Storea_pat<truncstorei8, I32, anyimm0, PS_storerbabs>; 2507 def: Storea_pat<truncstorei16, I32, anyimm1, PS_storerhabs>; 2508 def: Storea_pat<store, I32, anyimm2, PS_storeriabs>; 2509 def: Storea_pat<store, V4I8, anyimm2, PS_storeriabs>; 2510 def: Storea_pat<store, V2I16, anyimm2, PS_storeriabs>; 2511 def: Storea_pat<store, I64, anyimm3, PS_storerdabs>; 2512 def: Storea_pat<store, V8I8, anyimm3, PS_storerdabs>; 2513 def: Storea_pat<store, V4I16, anyimm3, PS_storerdabs>; 2514 def: Storea_pat<store, V2I32, anyimm3, PS_storerdabs>; 2515 def: Storea_pat<store, F32, anyimm2, PS_storeriabs>; 2516 def: Storea_pat<store, F64, anyimm3, PS_storerdabs>; 2517 def: Storea_pat<AtomSt<atomic_store_8>, I32, anyimm0, PS_storerbabs>; 2518 def: Storea_pat<AtomSt<atomic_store_16>, I32, anyimm1, PS_storerhabs>; 2519 def: Storea_pat<AtomSt<atomic_store_32>, I32, anyimm2, PS_storeriabs>; 2520 def: Storea_pat<AtomSt<atomic_store_32>, V4I8, anyimm2, PS_storeriabs>; 2521 def: Storea_pat<AtomSt<atomic_store_32>, V2I16, anyimm2, PS_storeriabs>; 2522 def: Storea_pat<AtomSt<atomic_store_64>, I64, anyimm3, PS_storerdabs>; 2523 def: Storea_pat<AtomSt<atomic_store_64>, V8I8, anyimm3, PS_storerdabs>; 2524 def: Storea_pat<AtomSt<atomic_store_64>, V4I16, anyimm3, PS_storerdabs>; 2525 def: Storea_pat<AtomSt<atomic_store_64>, V2I32, anyimm3, PS_storerdabs>; 2526 2527 def: Stoream_pat<truncstorei8, I64, anyimm0, LoReg, PS_storerbabs>; 2528 def: Stoream_pat<truncstorei16, I64, anyimm1, LoReg, PS_storerhabs>; 2529 def: Stoream_pat<truncstorei32, I64, anyimm2, LoReg, PS_storeriabs>; 2530 def: Stoream_pat<store, I1, anyimm0, I1toI32, PS_storerbabs>; 2531} 2532 2533// Reg<<S + Imm 2534let AddedComplexity = 100 in { 2535 def: Storexu_shl_pat<truncstorei8, I32, anyimm0, S4_storerb_ur>; 2536 def: Storexu_shl_pat<truncstorei16, I32, anyimm1, S4_storerh_ur>; 2537 def: Storexu_shl_pat<store, I32, anyimm2, S4_storeri_ur>; 2538 def: Storexu_shl_pat<store, V4I8, anyimm2, S4_storeri_ur>; 2539 def: Storexu_shl_pat<store, V2I16, anyimm2, S4_storeri_ur>; 2540 def: Storexu_shl_pat<store, I64, anyimm3, S4_storerd_ur>; 2541 def: Storexu_shl_pat<store, V8I8, anyimm3, S4_storerd_ur>; 2542 def: Storexu_shl_pat<store, V4I16, anyimm3, S4_storerd_ur>; 2543 def: Storexu_shl_pat<store, V2I32, anyimm3, S4_storerd_ur>; 2544 def: Storexu_shl_pat<store, F32, anyimm2, S4_storeri_ur>; 2545 def: Storexu_shl_pat<store, F64, anyimm3, S4_storerd_ur>; 2546 2547 def: Pat<(store I1:$Pu, (add (shl I32:$Rs, u2_0ImmPred:$u2), anyimm:$A)), 2548 (S4_storerb_ur IntRegs:$Rs, imm:$u2, imm:$A, (I1toI32 I1:$Pu))>; 2549} 2550 2551// Reg<<S + Reg 2552let AddedComplexity = 90 in { 2553 def: Storexr_shl_pat<truncstorei8, I32, S4_storerb_rr>; 2554 def: Storexr_shl_pat<truncstorei16, I32, S4_storerh_rr>; 2555 def: Storexr_shl_pat<store, I32, S4_storeri_rr>; 2556 def: Storexr_shl_pat<store, V4I8, S4_storeri_rr>; 2557 def: Storexr_shl_pat<store, V2I16, S4_storeri_rr>; 2558 def: Storexr_shl_pat<store, I64, S4_storerd_rr>; 2559 def: Storexr_shl_pat<store, V8I8, S4_storerd_rr>; 2560 def: Storexr_shl_pat<store, V4I16, S4_storerd_rr>; 2561 def: Storexr_shl_pat<store, V2I32, S4_storerd_rr>; 2562 def: Storexr_shl_pat<store, F32, S4_storeri_rr>; 2563 def: Storexr_shl_pat<store, F64, S4_storerd_rr>; 2564 2565 def: Pat<(store I1:$Pu, (add (shl I32:$Rs, u2_0ImmPred:$u2), I32:$Rt)), 2566 (S4_storerb_ur IntRegs:$Rt, IntRegs:$Rs, imm:$u2, (I1toI32 I1:$Pu))>; 2567} 2568 2569class SS_<PatFrag F> : SmallStackStore<F>; 2570class LS_<PatFrag F> : LargeStackStore<F>; 2571 2572multiclass IMFA_<PatFrag S, PatFrag V, PatFrag O, PatFrag M, InstHexagon I> { 2573 defm: Storexim_fi_add_pat<S, V, O, M, I>; 2574} 2575multiclass IFA_<PatFrag S, PatFrag V, PatFrag O, InstHexagon I> { 2576 defm: Storexi_fi_add_pat<S, V, O, I>; 2577} 2578 2579// Fi+Imm, store-immediate 2580let AddedComplexity = 80 in { 2581 defm: IMFA_<SS_<truncstorei8>, anyint, u6_0ImmPred, ToImmByte, S4_storeirb_io>; 2582 defm: IMFA_<SS_<truncstorei16>, anyint, u6_1ImmPred, ToImmHalf, S4_storeirh_io>; 2583 defm: IMFA_<SS_<store>, anyint, u6_2ImmPred, ToImmWord, S4_storeiri_io>; 2584 2585 defm: IFA_<SS_<truncstorei8>, anyimm, u6_0ImmPred, S4_storeirb_io>; 2586 defm: IFA_<SS_<truncstorei16>, anyimm, u6_1ImmPred, S4_storeirh_io>; 2587 defm: IFA_<SS_<store>, anyimm, u6_2ImmPred, S4_storeiri_io>; 2588 2589 // For large-stack stores, generate store-register (prefer explicit Fi 2590 // in the address). 2591 defm: IMFA_<LS_<truncstorei8>, anyimm, u6_0ImmPred, ToI32, S2_storerb_io>; 2592 defm: IMFA_<LS_<truncstorei16>, anyimm, u6_1ImmPred, ToI32, S2_storerh_io>; 2593 defm: IMFA_<LS_<store>, anyimm, u6_2ImmPred, ToI32, S2_storeri_io>; 2594} 2595 2596// Fi, store-immediate 2597let AddedComplexity = 70 in { 2598 def: Storexim_fi_pat<SS_<truncstorei8>, anyint, ToImmByte, S4_storeirb_io>; 2599 def: Storexim_fi_pat<SS_<truncstorei16>, anyint, ToImmHalf, S4_storeirh_io>; 2600 def: Storexim_fi_pat<SS_<store>, anyint, ToImmWord, S4_storeiri_io>; 2601 2602 def: Storexi_fi_pat<SS_<truncstorei8>, anyimm, S4_storeirb_io>; 2603 def: Storexi_fi_pat<SS_<truncstorei16>, anyimm, S4_storeirh_io>; 2604 def: Storexi_fi_pat<SS_<store>, anyimm, S4_storeiri_io>; 2605 2606 // For large-stack stores, generate store-register (prefer explicit Fi 2607 // in the address). 2608 def: Storexim_fi_pat<LS_<truncstorei8>, anyimm, ToI32, S2_storerb_io>; 2609 def: Storexim_fi_pat<LS_<truncstorei16>, anyimm, ToI32, S2_storerh_io>; 2610 def: Storexim_fi_pat<LS_<store>, anyimm, ToI32, S2_storeri_io>; 2611} 2612 2613// Fi+Imm, Fi, store-register 2614let AddedComplexity = 60 in { 2615 defm: Storexi_fi_add_pat<truncstorei8, I32, anyimm, S2_storerb_io>; 2616 defm: Storexi_fi_add_pat<truncstorei16, I32, anyimm, S2_storerh_io>; 2617 defm: Storexi_fi_add_pat<store, I32, anyimm, S2_storeri_io>; 2618 defm: Storexi_fi_add_pat<store, V4I8, anyimm, S2_storeri_io>; 2619 defm: Storexi_fi_add_pat<store, V2I16, anyimm, S2_storeri_io>; 2620 defm: Storexi_fi_add_pat<store, I64, anyimm, S2_storerd_io>; 2621 defm: Storexi_fi_add_pat<store, V8I8, anyimm, S2_storerd_io>; 2622 defm: Storexi_fi_add_pat<store, V4I16, anyimm, S2_storerd_io>; 2623 defm: Storexi_fi_add_pat<store, V2I32, anyimm, S2_storerd_io>; 2624 defm: Storexi_fi_add_pat<store, F32, anyimm, S2_storeri_io>; 2625 defm: Storexi_fi_add_pat<store, F64, anyimm, S2_storerd_io>; 2626 defm: Storexim_fi_add_pat<store, I1, anyimm, I1toI32, S2_storerb_io>; 2627 2628 def: Storexi_fi_pat<truncstorei8, I32, S2_storerb_io>; 2629 def: Storexi_fi_pat<truncstorei16, I32, S2_storerh_io>; 2630 def: Storexi_fi_pat<store, I32, S2_storeri_io>; 2631 def: Storexi_fi_pat<store, V4I8, S2_storeri_io>; 2632 def: Storexi_fi_pat<store, V2I16, S2_storeri_io>; 2633 def: Storexi_fi_pat<store, I64, S2_storerd_io>; 2634 def: Storexi_fi_pat<store, V8I8, S2_storerd_io>; 2635 def: Storexi_fi_pat<store, V4I16, S2_storerd_io>; 2636 def: Storexi_fi_pat<store, V2I32, S2_storerd_io>; 2637 def: Storexi_fi_pat<store, F32, S2_storeri_io>; 2638 def: Storexi_fi_pat<store, F64, S2_storerd_io>; 2639 def: Storexim_fi_pat<store, I1, I1toI32, S2_storerb_io>; 2640} 2641 2642 2643multiclass IMRA_<PatFrag S, PatFrag V, PatFrag O, PatFrag M, InstHexagon I> { 2644 defm: Storexim_add_pat<S, V, O, M, I>; 2645} 2646multiclass IRA_<PatFrag S, PatFrag V, PatFrag O, InstHexagon I> { 2647 defm: Storexi_add_pat<S, V, O, I>; 2648} 2649 2650// Reg+Imm, store-immediate 2651let AddedComplexity = 50 in { 2652 defm: IMRA_<truncstorei8, anyint, u6_0ImmPred, ToImmByte, S4_storeirb_io>; 2653 defm: IMRA_<truncstorei16, anyint, u6_1ImmPred, ToImmHalf, S4_storeirh_io>; 2654 defm: IMRA_<store, anyint, u6_2ImmPred, ToImmWord, S4_storeiri_io>; 2655 2656 defm: IRA_<truncstorei8, anyimm, u6_0ImmPred, S4_storeirb_io>; 2657 defm: IRA_<truncstorei16, anyimm, u6_1ImmPred, S4_storeirh_io>; 2658 defm: IRA_<store, anyimm, u6_2ImmPred, S4_storeiri_io>; 2659} 2660 2661// Reg+Imm, store-register 2662let AddedComplexity = 40 in { 2663 defm: Storexi_pat<truncstorei8, I32, anyimm0, S2_storerb_io>; 2664 defm: Storexi_pat<truncstorei16, I32, anyimm1, S2_storerh_io>; 2665 defm: Storexi_pat<store, I32, anyimm2, S2_storeri_io>; 2666 defm: Storexi_pat<store, V4I8, anyimm2, S2_storeri_io>; 2667 defm: Storexi_pat<store, V2I16, anyimm2, S2_storeri_io>; 2668 defm: Storexi_pat<store, I64, anyimm3, S2_storerd_io>; 2669 defm: Storexi_pat<store, V8I8, anyimm3, S2_storerd_io>; 2670 defm: Storexi_pat<store, V4I16, anyimm3, S2_storerd_io>; 2671 defm: Storexi_pat<store, V2I32, anyimm3, S2_storerd_io>; 2672 defm: Storexi_pat<store, F32, anyimm2, S2_storeri_io>; 2673 defm: Storexi_pat<store, F64, anyimm3, S2_storerd_io>; 2674 2675 defm: Storexim_pat<truncstorei8, I64, anyimm0, LoReg, S2_storerb_io>; 2676 defm: Storexim_pat<truncstorei16, I64, anyimm1, LoReg, S2_storerh_io>; 2677 defm: Storexim_pat<truncstorei32, I64, anyimm2, LoReg, S2_storeri_io>; 2678 defm: Storexim_pat<store, I1, anyimm0, I1toI32, S2_storerb_io>; 2679 2680 defm: Storexi_pat<AtomSt<atomic_store_8>, I32, anyimm0, S2_storerb_io>; 2681 defm: Storexi_pat<AtomSt<atomic_store_16>, I32, anyimm1, S2_storerh_io>; 2682 defm: Storexi_pat<AtomSt<atomic_store_32>, I32, anyimm2, S2_storeri_io>; 2683 defm: Storexi_pat<AtomSt<atomic_store_32>, V4I8, anyimm2, S2_storeri_io>; 2684 defm: Storexi_pat<AtomSt<atomic_store_32>, V2I16, anyimm2, S2_storeri_io>; 2685 defm: Storexi_pat<AtomSt<atomic_store_64>, I64, anyimm3, S2_storerd_io>; 2686 defm: Storexi_pat<AtomSt<atomic_store_64>, V8I8, anyimm3, S2_storerd_io>; 2687 defm: Storexi_pat<AtomSt<atomic_store_64>, V4I16, anyimm3, S2_storerd_io>; 2688 defm: Storexi_pat<AtomSt<atomic_store_64>, V2I32, anyimm3, S2_storerd_io>; 2689} 2690 2691// Reg+Reg 2692let AddedComplexity = 30 in { 2693 def: Storexr_add_pat<truncstorei8, I32, S4_storerb_rr>; 2694 def: Storexr_add_pat<truncstorei16, I32, S4_storerh_rr>; 2695 def: Storexr_add_pat<store, I32, S4_storeri_rr>; 2696 def: Storexr_add_pat<store, V4I8, S4_storeri_rr>; 2697 def: Storexr_add_pat<store, V2I16, S4_storeri_rr>; 2698 def: Storexr_add_pat<store, I64, S4_storerd_rr>; 2699 def: Storexr_add_pat<store, V8I8, S4_storerd_rr>; 2700 def: Storexr_add_pat<store, V4I16, S4_storerd_rr>; 2701 def: Storexr_add_pat<store, V2I32, S4_storerd_rr>; 2702 def: Storexr_add_pat<store, F32, S4_storeri_rr>; 2703 def: Storexr_add_pat<store, F64, S4_storerd_rr>; 2704 2705 def: Pat<(store I1:$Pu, (add I32:$Rs, I32:$Rt)), 2706 (S4_storerb_rr IntRegs:$Rs, IntRegs:$Rt, 0, (I1toI32 I1:$Pu))>; 2707} 2708 2709// Reg, store-immediate 2710let AddedComplexity = 20 in { 2711 def: Storexim_base_pat<truncstorei8, anyint, ToImmByte, S4_storeirb_io>; 2712 def: Storexim_base_pat<truncstorei16, anyint, ToImmHalf, S4_storeirh_io>; 2713 def: Storexim_base_pat<store, anyint, ToImmWord, S4_storeiri_io>; 2714 2715 def: Storexi_base_pat<truncstorei8, anyimm, S4_storeirb_io>; 2716 def: Storexi_base_pat<truncstorei16, anyimm, S4_storeirh_io>; 2717 def: Storexi_base_pat<store, anyimm, S4_storeiri_io>; 2718} 2719 2720// Reg, store-register 2721let AddedComplexity = 10 in { 2722 def: Storexi_base_pat<truncstorei8, I32, S2_storerb_io>; 2723 def: Storexi_base_pat<truncstorei16, I32, S2_storerh_io>; 2724 def: Storexi_base_pat<store, I32, S2_storeri_io>; 2725 def: Storexi_base_pat<store, V4I8, S2_storeri_io>; 2726 def: Storexi_base_pat<store, V2I16, S2_storeri_io>; 2727 def: Storexi_base_pat<store, I64, S2_storerd_io>; 2728 def: Storexi_base_pat<store, V8I8, S2_storerd_io>; 2729 def: Storexi_base_pat<store, V4I16, S2_storerd_io>; 2730 def: Storexi_base_pat<store, V2I32, S2_storerd_io>; 2731 def: Storexi_base_pat<store, F32, S2_storeri_io>; 2732 def: Storexi_base_pat<store, F64, S2_storerd_io>; 2733 2734 def: Storexim_base_pat<truncstorei8, I64, LoReg, S2_storerb_io>; 2735 def: Storexim_base_pat<truncstorei16, I64, LoReg, S2_storerh_io>; 2736 def: Storexim_base_pat<truncstorei32, I64, LoReg, S2_storeri_io>; 2737 def: Storexim_base_pat<store, I1, I1toI32, S2_storerb_io>; 2738 2739 def: Storexi_base_pat<AtomSt<atomic_store_8>, I32, S2_storerb_io>; 2740 def: Storexi_base_pat<AtomSt<atomic_store_16>, I32, S2_storerh_io>; 2741 def: Storexi_base_pat<AtomSt<atomic_store_32>, I32, S2_storeri_io>; 2742 def: Storexi_base_pat<AtomSt<atomic_store_32>, V4I8, S2_storeri_io>; 2743 def: Storexi_base_pat<AtomSt<atomic_store_32>, V2I16, S2_storeri_io>; 2744 def: Storexi_base_pat<AtomSt<atomic_store_64>, I64, S2_storerd_io>; 2745 def: Storexi_base_pat<AtomSt<atomic_store_64>, V8I8, S2_storerd_io>; 2746 def: Storexi_base_pat<AtomSt<atomic_store_64>, V4I16, S2_storerd_io>; 2747 def: Storexi_base_pat<AtomSt<atomic_store_64>, V2I32, S2_storerd_io>; 2748} 2749 2750 2751// --(14) Memop ---------------------------------------------------------- 2752// 2753 2754def m5_0Imm8Pred : PatLeaf<(i32 imm), [{ 2755 int8_t V = N->getSExtValue(); 2756 return -32 < V && V <= -1; 2757}]>; 2758 2759def m5_0Imm16Pred : PatLeaf<(i32 imm), [{ 2760 int16_t V = N->getSExtValue(); 2761 return -32 < V && V <= -1; 2762}]>; 2763 2764def m5_0ImmPred : PatLeaf<(i32 imm), [{ 2765 int64_t V = N->getSExtValue(); 2766 return -31 <= V && V <= -1; 2767}]>; 2768 2769def IsNPow2_8 : PatLeaf<(i32 imm), [{ 2770 uint8_t NV = ~N->getZExtValue(); 2771 return isPowerOf2_32(NV); 2772}]>; 2773 2774def IsNPow2_16 : PatLeaf<(i32 imm), [{ 2775 uint16_t NV = ~N->getZExtValue(); 2776 return isPowerOf2_32(NV); 2777}]>; 2778 2779def Log2_8 : SDNodeXForm<imm, [{ 2780 uint8_t V = N->getZExtValue(); 2781 return CurDAG->getTargetConstant(Log2_32(V), SDLoc(N), MVT::i32); 2782}]>; 2783 2784def Log2_16 : SDNodeXForm<imm, [{ 2785 uint16_t V = N->getZExtValue(); 2786 return CurDAG->getTargetConstant(Log2_32(V), SDLoc(N), MVT::i32); 2787}]>; 2788 2789def LogN2_8 : SDNodeXForm<imm, [{ 2790 uint8_t NV = ~N->getZExtValue(); 2791 return CurDAG->getTargetConstant(Log2_32(NV), SDLoc(N), MVT::i32); 2792}]>; 2793 2794def LogN2_16 : SDNodeXForm<imm, [{ 2795 uint16_t NV = ~N->getZExtValue(); 2796 return CurDAG->getTargetConstant(Log2_32(NV), SDLoc(N), MVT::i32); 2797}]>; 2798 2799def IdImm : SDNodeXForm<imm, [{ return SDValue(N, 0); }]>; 2800 2801multiclass Memopxr_base_pat<PatFrag Load, PatFrag Store, SDNode Oper, 2802 InstHexagon MI> { 2803 // Addr: i32 2804 def: Pat<(Store (Oper (Load I32:$Rs), I32:$A), I32:$Rs), 2805 (MI I32:$Rs, 0, I32:$A)>; 2806 // Addr: fi 2807 def: Pat<(Store (Oper (Load AddrFI:$Rs), I32:$A), AddrFI:$Rs), 2808 (MI AddrFI:$Rs, 0, I32:$A)>; 2809} 2810 2811multiclass Memopxr_add_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred, 2812 SDNode Oper, InstHexagon MI> { 2813 // Addr: i32 2814 def: Pat<(Store (Oper (Load (add I32:$Rs, ImmPred:$Off)), I32:$A), 2815 (add I32:$Rs, ImmPred:$Off)), 2816 (MI I32:$Rs, imm:$Off, I32:$A)>; 2817 def: Pat<(Store (Oper (Load (IsOrAdd I32:$Rs, ImmPred:$Off)), I32:$A), 2818 (IsOrAdd I32:$Rs, ImmPred:$Off)), 2819 (MI I32:$Rs, imm:$Off, I32:$A)>; 2820 // Addr: fi 2821 def: Pat<(Store (Oper (Load (add AddrFI:$Rs, ImmPred:$Off)), I32:$A), 2822 (add AddrFI:$Rs, ImmPred:$Off)), 2823 (MI AddrFI:$Rs, imm:$Off, I32:$A)>; 2824 def: Pat<(Store (Oper (Load (IsOrAdd AddrFI:$Rs, ImmPred:$Off)), I32:$A), 2825 (IsOrAdd AddrFI:$Rs, ImmPred:$Off)), 2826 (MI AddrFI:$Rs, imm:$Off, I32:$A)>; 2827} 2828 2829multiclass Memopxr_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred, 2830 SDNode Oper, InstHexagon MI> { 2831 let Predicates = [UseMEMOPS] in { 2832 defm: Memopxr_base_pat <Load, Store, Oper, MI>; 2833 defm: Memopxr_add_pat <Load, Store, ImmPred, Oper, MI>; 2834 } 2835} 2836 2837let AddedComplexity = 200 in { 2838 // add reg 2839 defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, add, 2840 /*anyext*/ L4_add_memopb_io>; 2841 defm: Memopxr_pat<sextloadi8, truncstorei8, u6_0ImmPred, add, 2842 /*sext*/ L4_add_memopb_io>; 2843 defm: Memopxr_pat<zextloadi8, truncstorei8, u6_0ImmPred, add, 2844 /*zext*/ L4_add_memopb_io>; 2845 defm: Memopxr_pat<extloadi16, truncstorei16, u6_1ImmPred, add, 2846 /*anyext*/ L4_add_memoph_io>; 2847 defm: Memopxr_pat<sextloadi16, truncstorei16, u6_1ImmPred, add, 2848 /*sext*/ L4_add_memoph_io>; 2849 defm: Memopxr_pat<zextloadi16, truncstorei16, u6_1ImmPred, add, 2850 /*zext*/ L4_add_memoph_io>; 2851 defm: Memopxr_pat<load, store, u6_2ImmPred, add, L4_add_memopw_io>; 2852 2853 // sub reg 2854 defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, sub, 2855 /*anyext*/ L4_sub_memopb_io>; 2856 defm: Memopxr_pat<sextloadi8, truncstorei8, u6_0ImmPred, sub, 2857 /*sext*/ L4_sub_memopb_io>; 2858 defm: Memopxr_pat<zextloadi8, truncstorei8, u6_0ImmPred, sub, 2859 /*zext*/ L4_sub_memopb_io>; 2860 defm: Memopxr_pat<extloadi16, truncstorei16, u6_1ImmPred, sub, 2861 /*anyext*/ L4_sub_memoph_io>; 2862 defm: Memopxr_pat<sextloadi16, truncstorei16, u6_1ImmPred, sub, 2863 /*sext*/ L4_sub_memoph_io>; 2864 defm: Memopxr_pat<zextloadi16, truncstorei16, u6_1ImmPred, sub, 2865 /*zext*/ L4_sub_memoph_io>; 2866 defm: Memopxr_pat<load, store, u6_2ImmPred, sub, L4_sub_memopw_io>; 2867 2868 // and reg 2869 defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, and, 2870 /*anyext*/ L4_and_memopb_io>; 2871 defm: Memopxr_pat<sextloadi8, truncstorei8, u6_0ImmPred, and, 2872 /*sext*/ L4_and_memopb_io>; 2873 defm: Memopxr_pat<zextloadi8, truncstorei8, u6_0ImmPred, and, 2874 /*zext*/ L4_and_memopb_io>; 2875 defm: Memopxr_pat<extloadi16, truncstorei16, u6_1ImmPred, and, 2876 /*anyext*/ L4_and_memoph_io>; 2877 defm: Memopxr_pat<sextloadi16, truncstorei16, u6_1ImmPred, and, 2878 /*sext*/ L4_and_memoph_io>; 2879 defm: Memopxr_pat<zextloadi16, truncstorei16, u6_1ImmPred, and, 2880 /*zext*/ L4_and_memoph_io>; 2881 defm: Memopxr_pat<load, store, u6_2ImmPred, and, L4_and_memopw_io>; 2882 2883 // or reg 2884 defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, or, 2885 /*anyext*/ L4_or_memopb_io>; 2886 defm: Memopxr_pat<sextloadi8, truncstorei8, u6_0ImmPred, or, 2887 /*sext*/ L4_or_memopb_io>; 2888 defm: Memopxr_pat<zextloadi8, truncstorei8, u6_0ImmPred, or, 2889 /*zext*/ L4_or_memopb_io>; 2890 defm: Memopxr_pat<extloadi16, truncstorei16, u6_1ImmPred, or, 2891 /*anyext*/ L4_or_memoph_io>; 2892 defm: Memopxr_pat<sextloadi16, truncstorei16, u6_1ImmPred, or, 2893 /*sext*/ L4_or_memoph_io>; 2894 defm: Memopxr_pat<zextloadi16, truncstorei16, u6_1ImmPred, or, 2895 /*zext*/ L4_or_memoph_io>; 2896 defm: Memopxr_pat<load, store, u6_2ImmPred, or, L4_or_memopw_io>; 2897} 2898 2899 2900multiclass Memopxi_base_pat<PatFrag Load, PatFrag Store, SDNode Oper, 2901 PatFrag Arg, SDNodeXForm ArgMod, InstHexagon MI> { 2902 // Addr: i32 2903 def: Pat<(Store (Oper (Load I32:$Rs), Arg:$A), I32:$Rs), 2904 (MI I32:$Rs, 0, (ArgMod Arg:$A))>; 2905 // Addr: fi 2906 def: Pat<(Store (Oper (Load AddrFI:$Rs), Arg:$A), AddrFI:$Rs), 2907 (MI AddrFI:$Rs, 0, (ArgMod Arg:$A))>; 2908} 2909 2910multiclass Memopxi_add_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred, 2911 SDNode Oper, PatFrag Arg, SDNodeXForm ArgMod, 2912 InstHexagon MI> { 2913 // Addr: i32 2914 def: Pat<(Store (Oper (Load (add I32:$Rs, ImmPred:$Off)), Arg:$A), 2915 (add I32:$Rs, ImmPred:$Off)), 2916 (MI I32:$Rs, imm:$Off, (ArgMod Arg:$A))>; 2917 def: Pat<(Store (Oper (Load (IsOrAdd I32:$Rs, ImmPred:$Off)), Arg:$A), 2918 (IsOrAdd I32:$Rs, ImmPred:$Off)), 2919 (MI I32:$Rs, imm:$Off, (ArgMod Arg:$A))>; 2920 // Addr: fi 2921 def: Pat<(Store (Oper (Load (add AddrFI:$Rs, ImmPred:$Off)), Arg:$A), 2922 (add AddrFI:$Rs, ImmPred:$Off)), 2923 (MI AddrFI:$Rs, imm:$Off, (ArgMod Arg:$A))>; 2924 def: Pat<(Store (Oper (Load (IsOrAdd AddrFI:$Rs, ImmPred:$Off)), Arg:$A), 2925 (IsOrAdd AddrFI:$Rs, ImmPred:$Off)), 2926 (MI AddrFI:$Rs, imm:$Off, (ArgMod Arg:$A))>; 2927} 2928 2929multiclass Memopxi_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred, 2930 SDNode Oper, PatFrag Arg, SDNodeXForm ArgMod, 2931 InstHexagon MI> { 2932 let Predicates = [UseMEMOPS] in { 2933 defm: Memopxi_base_pat <Load, Store, Oper, Arg, ArgMod, MI>; 2934 defm: Memopxi_add_pat <Load, Store, ImmPred, Oper, Arg, ArgMod, MI>; 2935 } 2936} 2937 2938let AddedComplexity = 220 in { 2939 // add imm 2940 defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, add, u5_0ImmPred, 2941 /*anyext*/ IdImm, L4_iadd_memopb_io>; 2942 defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, add, u5_0ImmPred, 2943 /*sext*/ IdImm, L4_iadd_memopb_io>; 2944 defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, add, u5_0ImmPred, 2945 /*zext*/ IdImm, L4_iadd_memopb_io>; 2946 defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, add, u5_0ImmPred, 2947 /*anyext*/ IdImm, L4_iadd_memoph_io>; 2948 defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, add, u5_0ImmPred, 2949 /*sext*/ IdImm, L4_iadd_memoph_io>; 2950 defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, add, u5_0ImmPred, 2951 /*zext*/ IdImm, L4_iadd_memoph_io>; 2952 defm: Memopxi_pat<load, store, u6_2ImmPred, add, u5_0ImmPred, IdImm, 2953 L4_iadd_memopw_io>; 2954 defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, sub, m5_0Imm8Pred, 2955 /*anyext*/ NegImm8, L4_iadd_memopb_io>; 2956 defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, sub, m5_0Imm8Pred, 2957 /*sext*/ NegImm8, L4_iadd_memopb_io>; 2958 defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, sub, m5_0Imm8Pred, 2959 /*zext*/ NegImm8, L4_iadd_memopb_io>; 2960 defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, sub, m5_0Imm16Pred, 2961 /*anyext*/ NegImm16, L4_iadd_memoph_io>; 2962 defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, sub, m5_0Imm16Pred, 2963 /*sext*/ NegImm16, L4_iadd_memoph_io>; 2964 defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, sub, m5_0Imm16Pred, 2965 /*zext*/ NegImm16, L4_iadd_memoph_io>; 2966 defm: Memopxi_pat<load, store, u6_2ImmPred, sub, m5_0ImmPred, NegImm32, 2967 L4_iadd_memopw_io>; 2968 2969 // sub imm 2970 defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, sub, u5_0ImmPred, 2971 /*anyext*/ IdImm, L4_isub_memopb_io>; 2972 defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, sub, u5_0ImmPred, 2973 /*sext*/ IdImm, L4_isub_memopb_io>; 2974 defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, sub, u5_0ImmPred, 2975 /*zext*/ IdImm, L4_isub_memopb_io>; 2976 defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, sub, u5_0ImmPred, 2977 /*anyext*/ IdImm, L4_isub_memoph_io>; 2978 defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, sub, u5_0ImmPred, 2979 /*sext*/ IdImm, L4_isub_memoph_io>; 2980 defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, sub, u5_0ImmPred, 2981 /*zext*/ IdImm, L4_isub_memoph_io>; 2982 defm: Memopxi_pat<load, store, u6_2ImmPred, sub, u5_0ImmPred, IdImm, 2983 L4_isub_memopw_io>; 2984 defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, add, m5_0Imm8Pred, 2985 /*anyext*/ NegImm8, L4_isub_memopb_io>; 2986 defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, add, m5_0Imm8Pred, 2987 /*sext*/ NegImm8, L4_isub_memopb_io>; 2988 defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, add, m5_0Imm8Pred, 2989 /*zext*/ NegImm8, L4_isub_memopb_io>; 2990 defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, add, m5_0Imm16Pred, 2991 /*anyext*/ NegImm16, L4_isub_memoph_io>; 2992 defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, add, m5_0Imm16Pred, 2993 /*sext*/ NegImm16, L4_isub_memoph_io>; 2994 defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, add, m5_0Imm16Pred, 2995 /*zext*/ NegImm16, L4_isub_memoph_io>; 2996 defm: Memopxi_pat<load, store, u6_2ImmPred, add, m5_0ImmPred, NegImm32, 2997 L4_isub_memopw_io>; 2998 2999 // clrbit imm 3000 defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, and, IsNPow2_8, 3001 /*anyext*/ LogN2_8, L4_iand_memopb_io>; 3002 defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, and, IsNPow2_8, 3003 /*sext*/ LogN2_8, L4_iand_memopb_io>; 3004 defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, and, IsNPow2_8, 3005 /*zext*/ LogN2_8, L4_iand_memopb_io>; 3006 defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, and, IsNPow2_16, 3007 /*anyext*/ LogN2_16, L4_iand_memoph_io>; 3008 defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, and, IsNPow2_16, 3009 /*sext*/ LogN2_16, L4_iand_memoph_io>; 3010 defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, and, IsNPow2_16, 3011 /*zext*/ LogN2_16, L4_iand_memoph_io>; 3012 defm: Memopxi_pat<load, store, u6_2ImmPred, and, IsNPow2_32, 3013 LogN2_32, L4_iand_memopw_io>; 3014 3015 // setbit imm 3016 defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, or, IsPow2_32, 3017 /*anyext*/ Log2_8, L4_ior_memopb_io>; 3018 defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, or, IsPow2_32, 3019 /*sext*/ Log2_8, L4_ior_memopb_io>; 3020 defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, or, IsPow2_32, 3021 /*zext*/ Log2_8, L4_ior_memopb_io>; 3022 defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, or, IsPow2_32, 3023 /*anyext*/ Log2_16, L4_ior_memoph_io>; 3024 defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, or, IsPow2_32, 3025 /*sext*/ Log2_16, L4_ior_memoph_io>; 3026 defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, or, IsPow2_32, 3027 /*zext*/ Log2_16, L4_ior_memoph_io>; 3028 defm: Memopxi_pat<load, store, u6_2ImmPred, or, IsPow2_32, 3029 Log2_32, L4_ior_memopw_io>; 3030} 3031 3032 3033// --(15) Call ----------------------------------------------------------- 3034// 3035 3036// Pseudo instructions. 3037def SDT_SPCallSeqStart 3038 : SDCallSeqStart<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; 3039def SDT_SPCallSeqEnd 3040 : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; 3041 3042def callseq_start: SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart, 3043 [SDNPHasChain, SDNPOutGlue]>; 3044def callseq_end: SDNode<"ISD::CALLSEQ_END", SDT_SPCallSeqEnd, 3045 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; 3046 3047def SDT_SPCall: SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; 3048 3049def HexagonTCRet: SDNode<"HexagonISD::TC_RETURN", SDT_SPCall, 3050 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; 3051def callv3: SDNode<"HexagonISD::CALL", SDT_SPCall, 3052 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; 3053def callv3nr: SDNode<"HexagonISD::CALLnr", SDT_SPCall, 3054 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; 3055 3056def: Pat<(callseq_start timm:$amt, timm:$amt2), 3057 (ADJCALLSTACKDOWN imm:$amt, imm:$amt2)>; 3058def: Pat<(callseq_end timm:$amt1, timm:$amt2), 3059 (ADJCALLSTACKUP imm:$amt1, imm:$amt2)>; 3060 3061def: Pat<(HexagonTCRet tglobaladdr:$dst), (PS_tailcall_i tglobaladdr:$dst)>; 3062def: Pat<(HexagonTCRet texternalsym:$dst), (PS_tailcall_i texternalsym:$dst)>; 3063def: Pat<(HexagonTCRet I32:$dst), (PS_tailcall_r I32:$dst)>; 3064 3065def: Pat<(callv3 I32:$dst), (J2_callr I32:$dst)>; 3066def: Pat<(callv3 tglobaladdr:$dst), (J2_call tglobaladdr:$dst)>; 3067def: Pat<(callv3 texternalsym:$dst), (J2_call texternalsym:$dst)>; 3068def: Pat<(callv3 tglobaltlsaddr:$dst), (J2_call tglobaltlsaddr:$dst)>; 3069 3070def: Pat<(callv3nr I32:$dst), (PS_callr_nr I32:$dst)>; 3071def: Pat<(callv3nr tglobaladdr:$dst), (PS_call_nr tglobaladdr:$dst)>; 3072def: Pat<(callv3nr texternalsym:$dst), (PS_call_nr texternalsym:$dst)>; 3073 3074def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone, 3075 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; 3076def eh_return: SDNode<"HexagonISD::EH_RETURN", SDTNone, [SDNPHasChain]>; 3077 3078def: Pat<(retflag), (PS_jmpret (i32 R31))>; 3079def: Pat<(eh_return), (EH_RETURN_JMPR (i32 R31))>; 3080 3081 3082// --(16) Branch --------------------------------------------------------- 3083// 3084 3085def: Pat<(br bb:$dst), (J2_jump b30_2Imm:$dst)>; 3086def: Pat<(brind I32:$dst), (J2_jumpr I32:$dst)>; 3087 3088def: Pat<(brcond I1:$Pu, bb:$dst), 3089 (J2_jumpt I1:$Pu, bb:$dst)>; 3090def: Pat<(brcond (not I1:$Pu), bb:$dst), 3091 (J2_jumpf I1:$Pu, bb:$dst)>; 3092def: Pat<(brcond (i1 (setne I1:$Pu, -1)), bb:$dst), 3093 (J2_jumpf I1:$Pu, bb:$dst)>; 3094def: Pat<(brcond (i1 (seteq I1:$Pu, 0)), bb:$dst), 3095 (J2_jumpf I1:$Pu, bb:$dst)>; 3096def: Pat<(brcond (i1 (setne I1:$Pu, 0)), bb:$dst), 3097 (J2_jumpt I1:$Pu, bb:$dst)>; 3098 3099 3100// --(17) Misc ----------------------------------------------------------- 3101 3102 3103// Generate code of the form 'C2_muxii(cmpbgtui(Rdd, C-1),0,1)' 3104// for C code of the form r = (c>='0' && c<='9') ? 1 : 0. 3105// The isdigit transformation relies on two 'clever' aspects: 3106// 1) The data type is unsigned which allows us to eliminate a zero test after 3107// biasing the expression by 48. We are depending on the representation of 3108// the unsigned types, and semantics. 3109// 2) The front end has converted <= 9 into < 10 on entry to LLVM. 3110// 3111// For the C code: 3112// retval = (c >= '0' && c <= '9') ? 1 : 0; 3113// The code is transformed upstream of llvm into 3114// retval = (c-48) < 10 ? 1 : 0; 3115 3116def u7_0PosImmPred : ImmLeaf<i32, [{ 3117 // True if the immediate fits in an 7-bit unsigned field and is positive. 3118 return Imm > 0 && isUInt<7>(Imm); 3119}]>; 3120 3121let AddedComplexity = 139 in 3122def: Pat<(i32 (zext (i1 (setult (and I32:$Rs, 255), u7_0PosImmPred:$u7)))), 3123 (C2_muxii (A4_cmpbgtui IntRegs:$Rs, (UDEC1 imm:$u7)), 0, 1)>; 3124 3125let AddedComplexity = 100 in 3126def: Pat<(or (or (shl (HexagonINSERT (i32 (zextloadi8 (add I32:$b, 2))), 3127 (i32 (extloadi8 (add I32:$b, 3))), 3128 24, 8), 3129 (i32 16)), 3130 (shl (i32 (zextloadi8 (add I32:$b, 1))), (i32 8))), 3131 (zextloadi8 I32:$b)), 3132 (A2_swiz (L2_loadri_io I32:$b, 0))>; 3133 3134 3135// We need custom lowering of ISD::PREFETCH into HexagonISD::DCFETCH 3136// because the SDNode ISD::PREFETCH has properties MayLoad and MayStore. 3137// We don't really want either one here. 3138def SDTHexagonDCFETCH: SDTypeProfile<0, 2, [SDTCisPtrTy<0>,SDTCisInt<1>]>; 3139def HexagonDCFETCH: SDNode<"HexagonISD::DCFETCH", SDTHexagonDCFETCH, 3140 [SDNPHasChain]>; 3141 3142def: Pat<(HexagonDCFETCH IntRegs:$Rs, u11_3ImmPred:$u11_3), 3143 (Y2_dcfetchbo IntRegs:$Rs, imm:$u11_3)>; 3144def: Pat<(HexagonDCFETCH (i32 (add IntRegs:$Rs, u11_3ImmPred:$u11_3)), (i32 0)), 3145 (Y2_dcfetchbo IntRegs:$Rs, imm:$u11_3)>; 3146 3147def SDTHexagonALLOCA 3148 : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; 3149def HexagonALLOCA 3150 : SDNode<"HexagonISD::ALLOCA", SDTHexagonALLOCA, [SDNPHasChain]>; 3151 3152def: Pat<(HexagonALLOCA I32:$Rs, (i32 imm:$A)), 3153 (PS_alloca IntRegs:$Rs, imm:$A)>; 3154 3155def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDTNone, [SDNPHasChain]>; 3156def: Pat<(HexagonBARRIER), (Y2_barrier)>; 3157 3158def: Pat<(trap), (PS_crash)>; 3159 3160// Read cycle counter. 3161def SDTInt64Leaf: SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>; 3162def HexagonREADCYCLE: SDNode<"HexagonISD::READCYCLE", SDTInt64Leaf, 3163 [SDNPHasChain]>; 3164 3165def: Pat<(HexagonREADCYCLE), (A4_tfrcpp UPCYCLE)>; 3166 3167// The declared return value of the store-locked intrinsics is i32, but 3168// the instructions actually define i1. To avoid register copies from 3169// IntRegs to PredRegs and back, fold the entire pattern checking the 3170// result against true/false. 3171let AddedComplexity = 100 in { 3172 def: Pat<(i1 (setne (int_hexagon_S2_storew_locked I32:$Rs, I32:$Rt), 0)), 3173 (S2_storew_locked I32:$Rs, I32:$Rt)>; 3174 def: Pat<(i1 (seteq (int_hexagon_S2_storew_locked I32:$Rs, I32:$Rt), 0)), 3175 (C2_not (S2_storew_locked I32:$Rs, I32:$Rt))>; 3176 def: Pat<(i1 (setne (int_hexagon_S4_stored_locked I32:$Rs, I64:$Rt), 0)), 3177 (S4_stored_locked I32:$Rs, I64:$Rt)>; 3178 def: Pat<(i1 (seteq (int_hexagon_S4_stored_locked I32:$Rs, I64:$Rt), 0)), 3179 (C2_not (S4_stored_locked I32:$Rs, I64:$Rt))>; 3180} 3181