1//===- X86InstrCompiler.td - Compiler Pseudos and Patterns -*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file describes the various pseudo instructions used by the compiler, 10// as well as Pat patterns used during instruction selection. 11// 12//===----------------------------------------------------------------------===// 13 14//===----------------------------------------------------------------------===// 15// Pattern Matching Support 16 17def GetLo32XForm : SDNodeXForm<imm, [{ 18 // Transformation function: get the low 32 bits. 19 return getI32Imm((uint32_t)N->getZExtValue(), SDLoc(N)); 20}]>; 21 22 23//===----------------------------------------------------------------------===// 24// Random Pseudo Instructions. 25 26// PIC base construction. This expands to code that looks like this: 27// call $next_inst 28// popl %destreg" 29let hasSideEffects = 0, isNotDuplicable = 1, Uses = [ESP, SSP], 30 SchedRW = [WriteJump] in 31 def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins i32imm:$label), 32 "", []>; 33 34// ADJCALLSTACKDOWN/UP implicitly use/def ESP because they may be expanded into 35// a stack adjustment and the codegen must know that they may modify the stack 36// pointer before prolog-epilog rewriting occurs. 37// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become 38// sub / add which can clobber EFLAGS. 39let Defs = [ESP, EFLAGS, SSP], Uses = [ESP, SSP], SchedRW = [WriteALU] in { 40def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), 41 (ins i32imm:$amt1, i32imm:$amt2, i32imm:$amt3), 42 "#ADJCALLSTACKDOWN", []>, Requires<[NotLP64]>; 43def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2), 44 "#ADJCALLSTACKUP", 45 [(X86callseq_end timm:$amt1, timm:$amt2)]>, 46 Requires<[NotLP64]>; 47} 48def : Pat<(X86callseq_start timm:$amt1, timm:$amt2), 49 (ADJCALLSTACKDOWN32 i32imm:$amt1, i32imm:$amt2, 0)>, Requires<[NotLP64]>; 50 51 52// ADJCALLSTACKDOWN/UP implicitly use/def RSP because they may be expanded into 53// a stack adjustment and the codegen must know that they may modify the stack 54// pointer before prolog-epilog rewriting occurs. 55// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become 56// sub / add which can clobber EFLAGS. 57let Defs = [RSP, EFLAGS, SSP], Uses = [RSP, SSP], SchedRW = [WriteALU] in { 58def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), 59 (ins i32imm:$amt1, i32imm:$amt2, i32imm:$amt3), 60 "#ADJCALLSTACKDOWN", []>, Requires<[IsLP64]>; 61def ADJCALLSTACKUP64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2), 62 "#ADJCALLSTACKUP", 63 [(X86callseq_end timm:$amt1, timm:$amt2)]>, 64 Requires<[IsLP64]>; 65} 66def : Pat<(X86callseq_start timm:$amt1, timm:$amt2), 67 (ADJCALLSTACKDOWN64 i32imm:$amt1, i32imm:$amt2, 0)>, Requires<[IsLP64]>; 68 69let SchedRW = [WriteSystem] in { 70 71// x86-64 va_start lowering magic. 72let hasSideEffects = 1, mayStore = 1, Defs = [EFLAGS] in { 73def VASTART_SAVE_XMM_REGS : I<0, Pseudo, 74 (outs), 75 (ins GR8:$al, i8mem:$regsavefi, variable_ops), 76 "#VASTART_SAVE_XMM_REGS $al, $regsavefi", 77 [(X86vastart_save_xmm_regs GR8:$al, addr:$regsavefi)]>; 78} 79 80let usesCustomInserter = 1, Defs = [EFLAGS] in { 81// The VAARG_64 and VAARG_X32 pseudo-instructions take the address of the 82// va_list, and place the address of the next argument into a register. 83let Defs = [EFLAGS] in { 84def VAARG_64 : I<0, Pseudo, 85 (outs GR64:$dst), 86 (ins i8mem:$ap, i32imm:$size, i8imm:$mode, i32imm:$align), 87 "#VAARG_64 $dst, $ap, $size, $mode, $align", 88 [(set GR64:$dst, 89 (X86vaarg64 addr:$ap, timm:$size, timm:$mode, timm:$align))]>, 90 Requires<[In64BitMode, IsLP64]>; 91def VAARG_X32 : I<0, Pseudo, 92 (outs GR32:$dst), 93 (ins i8mem:$ap, i32imm:$size, i8imm:$mode, i32imm:$align), 94 "#VAARG_X32 $dst, $ap, $size, $mode, $align", 95 [(set GR32:$dst, 96 (X86vaargx32 addr:$ap, timm:$size, timm:$mode, timm:$align))]>, 97 Requires<[In64BitMode, NotLP64]>; 98} 99 100// When using segmented stacks these are lowered into instructions which first 101// check if the current stacklet has enough free memory. If it does, memory is 102// allocated by bumping the stack pointer. Otherwise memory is allocated from 103// the heap. 104 105let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in 106def SEG_ALLOCA_32 : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$size), 107 "# variable sized alloca for segmented stacks", 108 [(set GR32:$dst, 109 (X86SegAlloca GR32:$size))]>, 110 Requires<[NotLP64]>; 111 112let Defs = [RAX, RSP, EFLAGS], Uses = [RSP] in 113def SEG_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size), 114 "# variable sized alloca for segmented stacks", 115 [(set GR64:$dst, 116 (X86SegAlloca GR64:$size))]>, 117 Requires<[In64BitMode]>; 118 119// To protect against stack clash, dynamic allocation should perform a memory 120// probe at each page. 121 122let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in 123def PROBED_ALLOCA_32 : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$size), 124 "# variable sized alloca with probing", 125 [(set GR32:$dst, 126 (X86ProbedAlloca GR32:$size))]>, 127 Requires<[NotLP64]>; 128 129let Defs = [RAX, RSP, EFLAGS], Uses = [RSP] in 130def PROBED_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size), 131 "# variable sized alloca with probing", 132 [(set GR64:$dst, 133 (X86ProbedAlloca GR64:$size))]>, 134 Requires<[In64BitMode]>; 135} 136 137let hasNoSchedulingInfo = 1 in 138def STACKALLOC_W_PROBING : I<0, Pseudo, (outs), (ins i64imm:$stacksize), 139 "# fixed size alloca with probing", 140 []>; 141 142// Dynamic stack allocation yields a _chkstk or _alloca call for all Windows 143// targets. These calls are needed to probe the stack when allocating more than 144// 4k bytes in one go. Touching the stack at 4K increments is necessary to 145// ensure that the guard pages used by the OS virtual memory manager are 146// allocated in correct sequence. 147// The main point of having separate instruction are extra unmodelled effects 148// (compared to ordinary calls) like stack pointer change. 149 150let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in 151def DYN_ALLOCA_32 : I<0, Pseudo, (outs), (ins GR32:$size), 152 "# dynamic stack allocation", 153 [(X86DynAlloca GR32:$size)]>, 154 Requires<[NotLP64]>; 155 156let Defs = [RAX, RSP, EFLAGS], Uses = [RSP] in 157def DYN_ALLOCA_64 : I<0, Pseudo, (outs), (ins GR64:$size), 158 "# dynamic stack allocation", 159 [(X86DynAlloca GR64:$size)]>, 160 Requires<[In64BitMode]>; 161} // SchedRW 162 163// These instructions XOR the frame pointer into a GPR. They are used in some 164// stack protection schemes. These are post-RA pseudos because we only know the 165// frame register after register allocation. 166let Constraints = "$src = $dst", isMoveImm = 1, isPseudo = 1, Defs = [EFLAGS] in { 167 def XOR32_FP : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src), 168 "xorl\t$$FP, $src", []>, 169 Requires<[NotLP64]>, Sched<[WriteALU]>; 170 def XOR64_FP : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$src), 171 "xorq\t$$FP $src", []>, 172 Requires<[In64BitMode]>, Sched<[WriteALU]>; 173} 174 175//===----------------------------------------------------------------------===// 176// EH Pseudo Instructions 177// 178let SchedRW = [WriteSystem] in { 179let isTerminator = 1, isReturn = 1, isBarrier = 1, 180 hasCtrlDep = 1, isCodeGenOnly = 1 in { 181def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr), 182 "ret\t#eh_return, addr: $addr", 183 [(X86ehret GR32:$addr)]>, Sched<[WriteJumpLd]>; 184 185} 186 187let isTerminator = 1, isReturn = 1, isBarrier = 1, 188 hasCtrlDep = 1, isCodeGenOnly = 1 in { 189def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr), 190 "ret\t#eh_return, addr: $addr", 191 [(X86ehret GR64:$addr)]>, Sched<[WriteJumpLd]>; 192 193} 194 195let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1, 196 isCodeGenOnly = 1, isReturn = 1, isEHScopeReturn = 1 in { 197 def CLEANUPRET : I<0, Pseudo, (outs), (ins), "# CLEANUPRET", 198 [(cleanupret bb)]>; 199 200 // CATCHRET needs a custom inserter for SEH. 201 let usesCustomInserter = 1 in 202 def CATCHRET : I<0, Pseudo, (outs), (ins brtarget32:$dst, brtarget32:$from), 203 "# CATCHRET", 204 [(catchret bb:$dst, bb:$from)]>; 205} 206 207let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1, 208 usesCustomInserter = 1 in { 209 def EH_SjLj_SetJmp32 : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$buf), 210 "#EH_SJLJ_SETJMP32", 211 [(set GR32:$dst, (X86eh_sjlj_setjmp addr:$buf))]>, 212 Requires<[Not64BitMode]>; 213 def EH_SjLj_SetJmp64 : I<0, Pseudo, (outs GR32:$dst), (ins i64mem:$buf), 214 "#EH_SJLJ_SETJMP64", 215 [(set GR32:$dst, (X86eh_sjlj_setjmp addr:$buf))]>, 216 Requires<[In64BitMode]>; 217 let isTerminator = 1 in { 218 def EH_SjLj_LongJmp32 : I<0, Pseudo, (outs), (ins i32mem:$buf), 219 "#EH_SJLJ_LONGJMP32", 220 [(X86eh_sjlj_longjmp addr:$buf)]>, 221 Requires<[Not64BitMode]>; 222 def EH_SjLj_LongJmp64 : I<0, Pseudo, (outs), (ins i64mem:$buf), 223 "#EH_SJLJ_LONGJMP64", 224 [(X86eh_sjlj_longjmp addr:$buf)]>, 225 Requires<[In64BitMode]>; 226 } 227} 228 229let isBranch = 1, isTerminator = 1, isCodeGenOnly = 1 in { 230 def EH_SjLj_Setup : I<0, Pseudo, (outs), (ins brtarget:$dst), 231 "#EH_SjLj_Setup\t$dst", []>; 232} 233} // SchedRW 234 235//===----------------------------------------------------------------------===// 236// Pseudo instructions used by unwind info. 237// 238 239// Prolog instructions should not be duplicated, since this can cause issues 240// because 1) if only some of the instructions are duplicated, then we will 241// observe prolog instructions after the end-prolog instruction and 2) Windows 242// expects there to only be a single prolog (e.g., when checking if unwinding 243// is happening in the middle of a prolog). 244let isPseudo = 1, isMeta = 1, isNotDuplicable = 1, SchedRW = [WriteSystem] in { 245 def SEH_PushReg : I<0, Pseudo, (outs), (ins i32imm:$reg), 246 "#SEH_PushReg $reg", []>; 247 def SEH_SaveReg : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$dst), 248 "#SEH_SaveReg $reg, $dst", []>; 249 def SEH_SaveXMM : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$dst), 250 "#SEH_SaveXMM $reg, $dst", []>; 251 def SEH_StackAlloc : I<0, Pseudo, (outs), (ins i32imm:$size), 252 "#SEH_StackAlloc $size", []>; 253 def SEH_StackAlign : I<0, Pseudo, (outs), (ins i32imm:$align), 254 "#SEH_StackAlign $align", []>; 255 def SEH_SetFrame : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$offset), 256 "#SEH_SetFrame $reg, $offset", []>; 257 def SEH_PushFrame : I<0, Pseudo, (outs), (ins i1imm:$mode), 258 "#SEH_PushFrame $mode", []>; 259 def SEH_EndPrologue : I<0, Pseudo, (outs), (ins), 260 "#SEH_EndPrologue", []>; 261 def SEH_UnwindVersion : I<0, Pseudo, (outs), (ins i1imm:$version), 262 "#SEH_UnwindVersion $version", []>; 263} 264 265// Epilog instructions: 266let isPseudo = 1, isMeta = 1, SchedRW = [WriteSystem] in { 267 def SEH_BeginEpilogue : I<0, Pseudo, (outs), (ins), 268 "#SEH_BeginEpilogue", []>; 269 def SEH_EndEpilogue : I<0, Pseudo, (outs), (ins), 270 "#SEH_EndEpilogue", []>; 271 def SEH_UnwindV2Start : I<0, Pseudo, (outs), (ins), 272 "#SEH_UnwindV2Start", []>; 273} 274 275//===----------------------------------------------------------------------===// 276// Pseudo instructions used by KCFI. 277//===----------------------------------------------------------------------===// 278let 279 Defs = [R10, R11, EFLAGS] in { 280def KCFI_CHECK : PseudoI< 281 (outs), (ins GR64:$ptr, i32imm:$type), []>, Sched<[]>; 282} 283 284//===----------------------------------------------------------------------===// 285// Pseudo instructions used by address sanitizer. 286//===----------------------------------------------------------------------===// 287let 288 Defs = [R10, R11, EFLAGS] in { 289def ASAN_CHECK_MEMACCESS : PseudoI< 290 (outs), (ins GR64PLTSafe:$addr, i32imm:$accessinfo), 291 [(int_asan_check_memaccess GR64PLTSafe:$addr, (i32 timm:$accessinfo))]>, 292 Sched<[]>; 293} 294 295//===----------------------------------------------------------------------===// 296// Pseudo instructions used by segmented stacks. 297// 298 299// This is lowered into a RET instruction by MCInstLower. We need 300// this so that we don't have to have a MachineBasicBlock which ends 301// with a RET and also has successors. 302let isPseudo = 1, SchedRW = [WriteJumpLd] in { 303def MORESTACK_RET: I<0, Pseudo, (outs), (ins), "", []>; 304 305// This instruction is lowered to a RET followed by a MOV. The two 306// instructions are not generated on a higher level since then the 307// verifier sees a MachineBasicBlock ending with a non-terminator. 308def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins), "", []>; 309} 310 311//===----------------------------------------------------------------------===// 312// Alias Instructions 313//===----------------------------------------------------------------------===// 314 315// Alias instruction mapping movr0 to xor. 316// FIXME: remove when we can teach regalloc that xor reg, reg is ok. 317let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1, 318 isPseudo = 1, isMoveImm = 1, AddedComplexity = 10 in 319def MOV32r0 : I<0, Pseudo, (outs GR32:$dst), (ins), "", 320 [(set GR32:$dst, 0)]>, Sched<[WriteZero]>; 321 322// Other widths can also make use of the 32-bit xor, which may have a smaller 323// encoding and avoid partial register updates. 324let AddedComplexity = 10 in { 325def : Pat<(i8 0), (EXTRACT_SUBREG (MOV32r0), sub_8bit)>; 326def : Pat<(i16 0), (EXTRACT_SUBREG (MOV32r0), sub_16bit)>; 327def : Pat<(i64 0), (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit)>; 328} 329 330let Predicates = [OptForSize, Not64BitMode], 331 AddedComplexity = 10 in { 332 let SchedRW = [WriteALU] in { 333 // Pseudo instructions for materializing 1 and -1 using XOR+INC/DEC, 334 // which only require 3 bytes compared to MOV32ri which requires 5. 335 let Defs = [EFLAGS], isReMaterializable = 1, isPseudo = 1 in { 336 def MOV32r1 : I<0, Pseudo, (outs GR32:$dst), (ins), "", 337 [(set GR32:$dst, 1)]>; 338 def MOV32r_1 : I<0, Pseudo, (outs GR32:$dst), (ins), "", 339 [(set GR32:$dst, -1)]>; 340 } 341 } // SchedRW 342 343 // MOV16ri is 4 bytes, so the instructions above are smaller. 344 def : Pat<(i16 1), (EXTRACT_SUBREG (MOV32r1), sub_16bit)>; 345 def : Pat<(i16 -1), (EXTRACT_SUBREG (MOV32r_1), sub_16bit)>; 346} 347 348let isReMaterializable = 1, isPseudo = 1, AddedComplexity = 5, 349 SchedRW = [WriteALU] in { 350// AddedComplexity higher than MOV64ri but lower than MOV32r0 and MOV32r1. 351def MOV32ImmSExti8 : I<0, Pseudo, (outs GR32:$dst), (ins i32i8imm:$src), "", 352 [(set GR32:$dst, i32immSExt8:$src)]>, 353 Requires<[OptForMinSize, NotWin64WithoutFP]>; 354def MOV64ImmSExti8 : I<0, Pseudo, (outs GR64:$dst), (ins i64i8imm:$src), "", 355 [(set GR64:$dst, i64immSExt8:$src)]>, 356 Requires<[OptForMinSize, NotWin64WithoutFP]>; 357} 358 359// Materialize i64 constant where top 32-bits are zero. This could theoretically 360// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however 361// that would make it more difficult to rematerialize. 362let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1, 363 isPseudo = 1, SchedRW = [WriteMove] in 364def MOV32ri64 : I<0, Pseudo, (outs GR64:$dst), (ins i64i32imm:$src), "", 365 [(set GR64:$dst, i64immZExt32:$src)]>; 366 367// This 64-bit pseudo-move can also be used for labels in the x86-64 small code 368// model. 369def mov64imm32 : ComplexPattern<i64, 1, "selectMOV64Imm32", [X86Wrapper]>; 370def : Pat<(i64 mov64imm32:$src), (MOV32ri64 mov64imm32:$src)>; 371 372// Use sbb to materialize carry bit. 373let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1, SchedRW = [WriteADC], 374 hasSideEffects = 0 in { 375// FIXME: These are pseudo ops that should be replaced with Pat<> patterns. 376// However, Pat<> can't replicate the destination reg into the inputs of the 377// result. 378def SETB_C32r : I<0, Pseudo, (outs GR32:$dst), (ins), "", []>; 379def SETB_C64r : I<0, Pseudo, (outs GR64:$dst), (ins), "", []>; 380} // isCodeGenOnly 381 382//===----------------------------------------------------------------------===// 383// String Pseudo Instructions 384// 385let SchedRW = [WriteMicrocoded] in { 386let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in { 387def REP_MOVSB_32 : I<0xA4, RawFrm, (outs), (ins), 388 "{rep;movsb (%esi), %es:(%edi)|rep movsb es:[edi], [esi]}", 389 [(X86rep_movs i8)]>, REP, AdSize32, 390 Requires<[NotLP64]>; 391def REP_MOVSW_32 : I<0xA5, RawFrm, (outs), (ins), 392 "{rep;movsw (%esi), %es:(%edi)|rep movsw es:[edi], [esi]}", 393 [(X86rep_movs i16)]>, REP, AdSize32, OpSize16, 394 Requires<[NotLP64]>; 395def REP_MOVSD_32 : I<0xA5, RawFrm, (outs), (ins), 396 "{rep;movsl (%esi), %es:(%edi)|rep movsd es:[edi], [esi]}", 397 [(X86rep_movs i32)]>, REP, AdSize32, OpSize32, 398 Requires<[NotLP64]>; 399def REP_MOVSQ_32 : RI<0xA5, RawFrm, (outs), (ins), 400 "{rep;movsq (%esi), %es:(%edi)|rep movsq es:[edi], [esi]}", 401 [(X86rep_movs i64)]>, REP, AdSize32, 402 Requires<[NotLP64, In64BitMode]>; 403} 404 405let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in { 406def REP_MOVSB_64 : I<0xA4, RawFrm, (outs), (ins), 407 "{rep;movsb (%rsi), %es:(%rdi)|rep movsb es:[rdi], [rsi]}", 408 [(X86rep_movs i8)]>, REP, AdSize64, 409 Requires<[IsLP64]>; 410def REP_MOVSW_64 : I<0xA5, RawFrm, (outs), (ins), 411 "{rep;movsw (%rsi), %es:(%rdi)|rep movsw es:[rdi], [rsi]}", 412 [(X86rep_movs i16)]>, REP, AdSize64, OpSize16, 413 Requires<[IsLP64]>; 414def REP_MOVSD_64 : I<0xA5, RawFrm, (outs), (ins), 415 "{rep;movsl (%rsi), %es:(%rdi)|rep movsdi es:[rdi], [rsi]}", 416 [(X86rep_movs i32)]>, REP, AdSize64, OpSize32, 417 Requires<[IsLP64]>; 418def REP_MOVSQ_64 : RI<0xA5, RawFrm, (outs), (ins), 419 "{rep;movsq (%rsi), %es:(%rdi)|rep movsq es:[rdi], [rsi]}", 420 [(X86rep_movs i64)]>, REP, AdSize64, 421 Requires<[IsLP64]>; 422} 423 424// FIXME: Should use "(X86rep_stos AL)" as the pattern. 425let Defs = [ECX,EDI], isCodeGenOnly = 1 in { 426 let Uses = [AL,ECX,EDI] in 427 def REP_STOSB_32 : I<0xAA, RawFrm, (outs), (ins), 428 "{rep;stosb %al, %es:(%edi)|rep stosb es:[edi], al}", 429 [(X86rep_stos i8)]>, REP, AdSize32, 430 Requires<[NotLP64]>; 431 let Uses = [AX,ECX,EDI] in 432 def REP_STOSW_32 : I<0xAB, RawFrm, (outs), (ins), 433 "{rep;stosw %ax, %es:(%edi)|rep stosw es:[edi], ax}", 434 [(X86rep_stos i16)]>, REP, AdSize32, OpSize16, 435 Requires<[NotLP64]>; 436 let Uses = [EAX,ECX,EDI] in 437 def REP_STOSD_32 : I<0xAB, RawFrm, (outs), (ins), 438 "{rep;stosl %eax, %es:(%edi)|rep stosd es:[edi], eax}", 439 [(X86rep_stos i32)]>, REP, AdSize32, OpSize32, 440 Requires<[NotLP64]>; 441 let Uses = [RAX,RCX,RDI] in 442 def REP_STOSQ_32 : RI<0xAB, RawFrm, (outs), (ins), 443 "{rep;stosq %rax, %es:(%edi)|rep stosq es:[edi], rax}", 444 [(X86rep_stos i64)]>, REP, AdSize32, 445 Requires<[NotLP64, In64BitMode]>; 446} 447 448let Defs = [RCX,RDI], isCodeGenOnly = 1 in { 449 let Uses = [AL,RCX,RDI] in 450 def REP_STOSB_64 : I<0xAA, RawFrm, (outs), (ins), 451 "{rep;stosb %al, %es:(%rdi)|rep stosb es:[rdi], al}", 452 [(X86rep_stos i8)]>, REP, AdSize64, 453 Requires<[IsLP64]>; 454 let Uses = [AX,RCX,RDI] in 455 def REP_STOSW_64 : I<0xAB, RawFrm, (outs), (ins), 456 "{rep;stosw %ax, %es:(%rdi)|rep stosw es:[rdi], ax}", 457 [(X86rep_stos i16)]>, REP, AdSize64, OpSize16, 458 Requires<[IsLP64]>; 459 let Uses = [RAX,RCX,RDI] in 460 def REP_STOSD_64 : I<0xAB, RawFrm, (outs), (ins), 461 "{rep;stosl %eax, %es:(%rdi)|rep stosd es:[rdi], eax}", 462 [(X86rep_stos i32)]>, REP, AdSize64, OpSize32, 463 Requires<[IsLP64]>; 464 465 let Uses = [RAX,RCX,RDI] in 466 def REP_STOSQ_64 : RI<0xAB, RawFrm, (outs), (ins), 467 "{rep;stosq %rax, %es:(%rdi)|rep stosq es:[rdi], rax}", 468 [(X86rep_stos i64)]>, REP, AdSize64, 469 Requires<[IsLP64]>; 470} 471} // SchedRW 472 473//===----------------------------------------------------------------------===// 474// Thread Local Storage Instructions 475// 476let SchedRW = [WriteSystem] in { 477 478// ELF TLS Support 479// All calls clobber the non-callee saved registers. ESP is marked as 480// a use to prevent stack-pointer assignments that appear immediately 481// before calls from potentially appearing dead. 482let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, FP7, 483 ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7, 484 MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, 485 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, 486 XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS, DF], 487 Uses = [ESP, SSP] in { 488def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym), 489 "# TLS_addr32", 490 [(X86tlsaddr tls32addr:$sym)]>, 491 Requires<[Not64BitMode]>; 492def TLS_base_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym), 493 "# TLS_base_addr32", 494 [(X86tlsbaseaddr tls32baseaddr:$sym)]>, 495 Requires<[Not64BitMode]>; 496} 497 498// All calls clobber the non-callee saved registers. RSP is marked as 499// a use to prevent stack-pointer assignments that appear immediately 500// before calls from potentially appearing dead. 501let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, 502 FP0, FP1, FP2, FP3, FP4, FP5, FP6, FP7, 503 ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7, 504 MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, 505 XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, 506 XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS, DF], 507 Uses = [RSP, SSP] in { 508def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym), 509 "# TLS_addr64", 510 [(X86tlsaddr tls64addr:$sym)]>, 511 Requires<[In64BitMode, IsLP64]>; 512def TLS_base_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym), 513 "# TLS_base_addr64", 514 [(X86tlsbaseaddr tls64baseaddr:$sym)]>, 515 Requires<[In64BitMode, IsLP64]>; 516def TLS_addrX32 : I<0, Pseudo, (outs), (ins i32mem:$sym), 517 "# TLS_addrX32", 518 [(X86tlsaddr tls32addr:$sym)]>, 519 Requires<[In64BitMode, NotLP64]>; 520def TLS_base_addrX32 : I<0, Pseudo, (outs), (ins i32mem:$sym), 521 "# TLS_base_addrX32", 522 [(X86tlsbaseaddr tls32baseaddr:$sym)]>, 523 Requires<[In64BitMode, NotLP64]>; 524} 525 526// TLSDESC only clobbers EAX and EFLAGS. ESP is marked as a use to prevent 527// stack-pointer assignments that appear immediately before calls from 528// potentially appearing dead. 529let Defs = [EAX, EFLAGS], Uses = [RSP, SSP] in { 530 def TLS_desc32 : I<0, Pseudo, (outs), (ins i32mem:$sym), 531 "# TLS_desc32", [(X86tlsdesc tls32addr:$sym)]>; 532 def TLS_desc64 : I<0, Pseudo, (outs), (ins i64mem:$sym), 533 "# TLS_desc64", [(X86tlsdesc tls64addr:$sym)]>; 534} 535 536// Darwin TLS Support 537// For i386, the address of the thunk is passed on the stack, on return the 538// address of the variable is in %eax. %ecx is trashed during the function 539// call. All other registers are preserved. 540let Defs = [EAX, ECX, EFLAGS, DF], 541 Uses = [ESP, SSP], 542 usesCustomInserter = 1 in 543def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym), 544 "# TLSCall_32", 545 [(X86TLSCall addr:$sym)]>, 546 Requires<[Not64BitMode]>; 547 548// For x86_64, the address of the thunk is passed in %rdi, but the 549// pseudo directly use the symbol, so do not add an implicit use of 550// %rdi. The lowering will do the right thing with RDI. 551// On return the address of the variable is in %rax. All other 552// registers are preserved. 553let Defs = [RAX, EFLAGS, DF], 554 Uses = [RSP, SSP], 555 usesCustomInserter = 1 in 556def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym), 557 "# TLSCall_64", 558 [(X86TLSCall addr:$sym)]>, 559 Requires<[In64BitMode]>; 560} // SchedRW 561 562//===----------------------------------------------------------------------===// 563// Conditional Move Pseudo Instructions 564 565// CMOV* - Used to implement the SELECT DAG operation. Expanded after 566// instruction selection into a branch sequence. 567multiclass CMOVrr_PSEUDO<RegisterClass RC, ValueType VT> { 568 def CMOV#NAME : I<0, Pseudo, 569 (outs RC:$dst), (ins RC:$t, RC:$f, i8imm:$cond), 570 "#CMOV_"#NAME#" PSEUDO!", 571 [(set RC:$dst, (VT (X86cmov RC:$t, RC:$f, timm:$cond, 572 EFLAGS)))]>; 573} 574 575let usesCustomInserter = 1, hasNoSchedulingInfo = 1, Uses = [EFLAGS] in { 576 // X86 doesn't have 8-bit conditional moves. Use a customInserter to 577 // emit control flow. An alternative to this is to mark i8 SELECT as Promote, 578 // however that requires promoting the operands, and can induce additional 579 // i8 register pressure. 580 defm _GR8 : CMOVrr_PSEUDO<GR8, i8>; 581 582 let Predicates = [NoCMOV] in { 583 defm _GR32 : CMOVrr_PSEUDO<GR32, i32>; 584 defm _GR16 : CMOVrr_PSEUDO<GR16, i16>; 585 } // Predicates = [NoCMOV] 586 587 // fcmov doesn't handle all possible EFLAGS, provide a fallback if there is no 588 // SSE1/SSE2. 589 let Predicates = [FPStackf32] in 590 defm _RFP32 : CMOVrr_PSEUDO<RFP32, f32>; 591 592 let Predicates = [FPStackf64] in 593 defm _RFP64 : CMOVrr_PSEUDO<RFP64, f64>; 594 595 defm _RFP80 : CMOVrr_PSEUDO<RFP80, f80>; 596 597 let Predicates = [HasMMX] in 598 defm _VR64 : CMOVrr_PSEUDO<VR64, x86mmx>; 599 600 let Predicates = [HasSSE1,NoAVX512] in 601 defm _FR32 : CMOVrr_PSEUDO<FR32, f32>; 602 let Predicates = [HasSSE2,NoAVX512] in { 603 defm _FR16 : CMOVrr_PSEUDO<FR16, f16>; 604 defm _FR64 : CMOVrr_PSEUDO<FR64, f64>; 605 } 606 let Predicates = [HasAVX512] in { 607 defm _FR16X : CMOVrr_PSEUDO<FR16X, f16>; 608 defm _FR32X : CMOVrr_PSEUDO<FR32X, f32>; 609 defm _FR64X : CMOVrr_PSEUDO<FR64X, f64>; 610 } 611 let Predicates = [NoVLX] in { 612 defm _VR128 : CMOVrr_PSEUDO<VR128, v2i64>; 613 defm _VR256 : CMOVrr_PSEUDO<VR256, v4i64>; 614 } 615 let Predicates = [HasVLX] in { 616 defm _VR128X : CMOVrr_PSEUDO<VR128X, v2i64>; 617 defm _VR256X : CMOVrr_PSEUDO<VR256X, v4i64>; 618 } 619 defm _VR512 : CMOVrr_PSEUDO<VR512, v8i64>; 620 defm _VK1 : CMOVrr_PSEUDO<VK1, v1i1>; 621 defm _VK2 : CMOVrr_PSEUDO<VK2, v2i1>; 622 defm _VK4 : CMOVrr_PSEUDO<VK4, v4i1>; 623 defm _VK8 : CMOVrr_PSEUDO<VK8, v8i1>; 624 defm _VK16 : CMOVrr_PSEUDO<VK16, v16i1>; 625 defm _VK32 : CMOVrr_PSEUDO<VK32, v32i1>; 626 defm _VK64 : CMOVrr_PSEUDO<VK64, v64i1>; 627} // usesCustomInserter = 1, hasNoSchedulingInfo = 1, Uses = [EFLAGS] 628 629def : Pat<(f128 (X86cmov VR128:$t, VR128:$f, timm:$cond, EFLAGS)), 630 (CMOV_VR128 VR128:$t, VR128:$f, timm:$cond)>; 631 632let Predicates = [NoVLX] in { 633 def : Pat<(v16i8 (X86cmov VR128:$t, VR128:$f, timm:$cond, EFLAGS)), 634 (CMOV_VR128 VR128:$t, VR128:$f, timm:$cond)>; 635 def : Pat<(v8i16 (X86cmov VR128:$t, VR128:$f, timm:$cond, EFLAGS)), 636 (CMOV_VR128 VR128:$t, VR128:$f, timm:$cond)>; 637 def : Pat<(v4i32 (X86cmov VR128:$t, VR128:$f, timm:$cond, EFLAGS)), 638 (CMOV_VR128 VR128:$t, VR128:$f, timm:$cond)>; 639 def : Pat<(v4f32 (X86cmov VR128:$t, VR128:$f, timm:$cond, EFLAGS)), 640 (CMOV_VR128 VR128:$t, VR128:$f, timm:$cond)>; 641 def : Pat<(v2f64 (X86cmov VR128:$t, VR128:$f, timm:$cond, EFLAGS)), 642 (CMOV_VR128 VR128:$t, VR128:$f, timm:$cond)>; 643 644 def : Pat<(v32i8 (X86cmov VR256:$t, VR256:$f, timm:$cond, EFLAGS)), 645 (CMOV_VR256 VR256:$t, VR256:$f, timm:$cond)>; 646 def : Pat<(v16i16 (X86cmov VR256:$t, VR256:$f, timm:$cond, EFLAGS)), 647 (CMOV_VR256 VR256:$t, VR256:$f, timm:$cond)>; 648 def : Pat<(v8i32 (X86cmov VR256:$t, VR256:$f, timm:$cond, EFLAGS)), 649 (CMOV_VR256 VR256:$t, VR256:$f, timm:$cond)>; 650 def : Pat<(v8f32 (X86cmov VR256:$t, VR256:$f, timm:$cond, EFLAGS)), 651 (CMOV_VR256 VR256:$t, VR256:$f, timm:$cond)>; 652 def : Pat<(v4f64 (X86cmov VR256:$t, VR256:$f, timm:$cond, EFLAGS)), 653 (CMOV_VR256 VR256:$t, VR256:$f, timm:$cond)>; 654} 655let Predicates = [HasVLX] in { 656 def : Pat<(v16i8 (X86cmov VR128X:$t, VR128X:$f, timm:$cond, EFLAGS)), 657 (CMOV_VR128X VR128X:$t, VR128X:$f, timm:$cond)>; 658 def : Pat<(v8i16 (X86cmov VR128X:$t, VR128X:$f, timm:$cond, EFLAGS)), 659 (CMOV_VR128X VR128X:$t, VR128X:$f, timm:$cond)>; 660 def : Pat<(v8f16 (X86cmov VR128X:$t, VR128X:$f, timm:$cond, EFLAGS)), 661 (CMOV_VR128X VR128X:$t, VR128X:$f, timm:$cond)>; 662 def : Pat<(v4i32 (X86cmov VR128X:$t, VR128X:$f, timm:$cond, EFLAGS)), 663 (CMOV_VR128X VR128X:$t, VR128X:$f, timm:$cond)>; 664 def : Pat<(v4f32 (X86cmov VR128X:$t, VR128X:$f, timm:$cond, EFLAGS)), 665 (CMOV_VR128X VR128X:$t, VR128X:$f, timm:$cond)>; 666 def : Pat<(v2f64 (X86cmov VR128X:$t, VR128X:$f, timm:$cond, EFLAGS)), 667 (CMOV_VR128X VR128X:$t, VR128X:$f, timm:$cond)>; 668 669 def : Pat<(v32i8 (X86cmov VR256X:$t, VR256X:$f, timm:$cond, EFLAGS)), 670 (CMOV_VR256X VR256X:$t, VR256X:$f, timm:$cond)>; 671 def : Pat<(v16i16 (X86cmov VR256X:$t, VR256X:$f, timm:$cond, EFLAGS)), 672 (CMOV_VR256X VR256X:$t, VR256X:$f, timm:$cond)>; 673 def : Pat<(v16f16 (X86cmov VR256X:$t, VR256X:$f, timm:$cond, EFLAGS)), 674 (CMOV_VR256X VR256X:$t, VR256X:$f, timm:$cond)>; 675 def : Pat<(v8i32 (X86cmov VR256X:$t, VR256X:$f, timm:$cond, EFLAGS)), 676 (CMOV_VR256X VR256X:$t, VR256X:$f, timm:$cond)>; 677 def : Pat<(v8f32 (X86cmov VR256X:$t, VR256X:$f, timm:$cond, EFLAGS)), 678 (CMOV_VR256X VR256X:$t, VR256X:$f, timm:$cond)>; 679 def : Pat<(v4f64 (X86cmov VR256X:$t, VR256X:$f, timm:$cond, EFLAGS)), 680 (CMOV_VR256X VR256X:$t, VR256X:$f, timm:$cond)>; 681} 682 683def : Pat<(v64i8 (X86cmov VR512:$t, VR512:$f, timm:$cond, EFLAGS)), 684 (CMOV_VR512 VR512:$t, VR512:$f, timm:$cond)>; 685def : Pat<(v32i16 (X86cmov VR512:$t, VR512:$f, timm:$cond, EFLAGS)), 686 (CMOV_VR512 VR512:$t, VR512:$f, timm:$cond)>; 687def : Pat<(v32f16 (X86cmov VR512:$t, VR512:$f, timm:$cond, EFLAGS)), 688 (CMOV_VR512 VR512:$t, VR512:$f, timm:$cond)>; 689def : Pat<(v16i32 (X86cmov VR512:$t, VR512:$f, timm:$cond, EFLAGS)), 690 (CMOV_VR512 VR512:$t, VR512:$f, timm:$cond)>; 691def : Pat<(v16f32 (X86cmov VR512:$t, VR512:$f, timm:$cond, EFLAGS)), 692 (CMOV_VR512 VR512:$t, VR512:$f, timm:$cond)>; 693def : Pat<(v8f64 (X86cmov VR512:$t, VR512:$f, timm:$cond, EFLAGS)), 694 (CMOV_VR512 VR512:$t, VR512:$f, timm:$cond)>; 695 696//===----------------------------------------------------------------------===// 697// Normal-Instructions-With-Lock-Prefix Pseudo Instructions 698//===----------------------------------------------------------------------===// 699 700// FIXME: Use normal instructions and add lock prefix dynamically. 701 702// Memory barriers 703 704let isCodeGenOnly = 1, Defs = [EFLAGS] in 705def OR32mi8Locked : Ii8<0x83, MRM1m, (outs), (ins i32mem:$dst, i32i8imm:$zero), 706 "or{l}\t{$zero, $dst|$dst, $zero}", []>, 707 Requires<[Not64BitMode]>, OpSize32, LOCK, 708 Sched<[WriteALURMW]>; 709 710// RegOpc corresponds to the mr version of the instruction 711// ImmOpc corresponds to the mi version of the instruction 712// ImmOpc8 corresponds to the mi8 version of the instruction 713// ImmMod corresponds to the instruction format of the mi and mi8 versions 714multiclass LOCK_ArithBinOp<bits<8> RegOpc, bits<8> ImmOpc, bits<8> ImmOpc8, 715 Format ImmMod, SDNode Op, string mnemonic> { 716let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1, 717 SchedRW = [WriteALURMW] in { 718 719def NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, 720 RegOpc{3}, RegOpc{2}, RegOpc{1}, 0 }, 721 MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), 722 !strconcat(mnemonic, "{b}\t", 723 "{$src2, $dst|$dst, $src2}"), 724 [(set EFLAGS, (Op addr:$dst, GR8:$src2))]>, LOCK; 725 726def NAME#16mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, 727 RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 }, 728 MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), 729 !strconcat(mnemonic, "{w}\t", 730 "{$src2, $dst|$dst, $src2}"), 731 [(set EFLAGS, (Op addr:$dst, GR16:$src2))]>, 732 OpSize16, LOCK; 733 734def NAME#32mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, 735 RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 }, 736 MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), 737 !strconcat(mnemonic, "{l}\t", 738 "{$src2, $dst|$dst, $src2}"), 739 [(set EFLAGS, (Op addr:$dst, GR32:$src2))]>, 740 OpSize32, LOCK; 741 742def NAME#64mr : RI<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, 743 RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 }, 744 MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), 745 !strconcat(mnemonic, "{q}\t", 746 "{$src2, $dst|$dst, $src2}"), 747 [(set EFLAGS, (Op addr:$dst, GR64:$src2))]>, LOCK; 748 749// NOTE: These are order specific, we want the mi8 forms to be listed 750// first so that they are slightly preferred to the mi forms. 751def NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, 752 ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 }, 753 ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2), 754 !strconcat(mnemonic, "{w}\t", 755 "{$src2, $dst|$dst, $src2}"), 756 [(set EFLAGS, (Op addr:$dst, i16immSExt8:$src2))]>, 757 OpSize16, LOCK; 758 759def NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, 760 ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 }, 761 ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2), 762 !strconcat(mnemonic, "{l}\t", 763 "{$src2, $dst|$dst, $src2}"), 764 [(set EFLAGS, (Op addr:$dst, i32immSExt8:$src2))]>, 765 OpSize32, LOCK; 766 767def NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, 768 ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 }, 769 ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2), 770 !strconcat(mnemonic, "{q}\t", 771 "{$src2, $dst|$dst, $src2}"), 772 [(set EFLAGS, (Op addr:$dst, i64immSExt8:$src2))]>, 773 LOCK; 774 775def NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, 776 ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 }, 777 ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2), 778 !strconcat(mnemonic, "{b}\t", 779 "{$src2, $dst|$dst, $src2}"), 780 [(set EFLAGS, (Op addr:$dst, (i8 imm:$src2)))]>, LOCK; 781 782def NAME#16mi : Ii16<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, 783 ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 }, 784 ImmMod, (outs), (ins i16mem :$dst, i16imm :$src2), 785 !strconcat(mnemonic, "{w}\t", 786 "{$src2, $dst|$dst, $src2}"), 787 [(set EFLAGS, (Op addr:$dst, (i16 imm:$src2)))]>, 788 OpSize16, LOCK; 789 790def NAME#32mi : Ii32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, 791 ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 }, 792 ImmMod, (outs), (ins i32mem :$dst, i32imm :$src2), 793 !strconcat(mnemonic, "{l}\t", 794 "{$src2, $dst|$dst, $src2}"), 795 [(set EFLAGS, (Op addr:$dst, (i32 imm:$src2)))]>, 796 OpSize32, LOCK; 797 798def NAME#64mi32 : RIi32S<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, 799 ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 }, 800 ImmMod, (outs), (ins i64mem :$dst, i64i32imm :$src2), 801 !strconcat(mnemonic, "{q}\t", 802 "{$src2, $dst|$dst, $src2}"), 803 [(set EFLAGS, (Op addr:$dst, i64immSExt32:$src2))]>, 804 LOCK; 805} 806 807} 808 809defm LOCK_ADD : LOCK_ArithBinOp<0x00, 0x80, 0x83, MRM0m, X86lock_add, "add">; 810defm LOCK_SUB : LOCK_ArithBinOp<0x28, 0x80, 0x83, MRM5m, X86lock_sub, "sub">; 811defm LOCK_OR : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM1m, X86lock_or , "or">; 812defm LOCK_AND : LOCK_ArithBinOp<0x20, 0x80, 0x83, MRM4m, X86lock_and, "and">; 813defm LOCK_XOR : LOCK_ArithBinOp<0x30, 0x80, 0x83, MRM6m, X86lock_xor, "xor">; 814 815let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1, 816 SchedRW = [WriteALURMW] in { 817 let Predicates = [UseIncDec] in { 818 def LOCK_INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), 819 "inc{b}\t$dst", 820 [(set EFLAGS, (X86lock_add_nocf addr:$dst, (i8 1)))]>, 821 LOCK; 822 def LOCK_INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), 823 "inc{w}\t$dst", 824 [(set EFLAGS, (X86lock_add_nocf addr:$dst, (i16 1)))]>, 825 OpSize16, LOCK; 826 def LOCK_INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), 827 "inc{l}\t$dst", 828 [(set EFLAGS, (X86lock_add_nocf addr:$dst, (i32 1)))]>, 829 OpSize32, LOCK; 830 831 def LOCK_DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), 832 "dec{b}\t$dst", 833 [(set EFLAGS, (X86lock_sub_nocf addr:$dst, (i8 1)))]>, 834 LOCK; 835 def LOCK_DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), 836 "dec{w}\t$dst", 837 [(set EFLAGS, (X86lock_sub_nocf addr:$dst, (i16 1)))]>, 838 OpSize16, LOCK; 839 def LOCK_DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), 840 "dec{l}\t$dst", 841 [(set EFLAGS, (X86lock_sub_nocf addr:$dst, (i32 1)))]>, 842 OpSize32, LOCK; 843 } 844 845 let Predicates = [UseIncDec, In64BitMode] in { 846 def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), 847 "inc{q}\t$dst", 848 [(set EFLAGS, (X86lock_add_nocf addr:$dst, (i64 1)))]>, 849 LOCK; 850 def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), 851 "dec{q}\t$dst", 852 [(set EFLAGS, (X86lock_sub_nocf addr:$dst, (i64 1)))]>, 853 LOCK; 854 } 855} 856 857let Predicates = [UseIncDec] in { 858 // Additional patterns for -1 constant. 859 def : Pat<(X86lock_add addr:$dst, (i8 -1)), (LOCK_DEC8m addr:$dst)>; 860 def : Pat<(X86lock_add addr:$dst, (i16 -1)), (LOCK_DEC16m addr:$dst)>; 861 def : Pat<(X86lock_add addr:$dst, (i32 -1)), (LOCK_DEC32m addr:$dst)>; 862 def : Pat<(X86lock_sub addr:$dst, (i8 -1)), (LOCK_INC8m addr:$dst)>; 863 def : Pat<(X86lock_sub addr:$dst, (i16 -1)), (LOCK_INC16m addr:$dst)>; 864 def : Pat<(X86lock_sub addr:$dst, (i32 -1)), (LOCK_INC32m addr:$dst)>; 865} 866 867let Predicates = [UseIncDec, In64BitMode] in { 868 // Additional patterns for -1 constant. 869 def : Pat<(X86lock_add addr:$dst, (i64 -1)), (LOCK_DEC64m addr:$dst)>; 870 def : Pat<(X86lock_sub addr:$dst, (i64 -1)), (LOCK_INC64m addr:$dst)>; 871} 872 873// Atomic bit test. 874def X86LBTest : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisPtrTy<1>, 875 SDTCisVT<2, i8>, SDTCisVT<3, i32>]>; 876def x86bts : SDNode<"X86ISD::LBTS", X86LBTest, 877 [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>; 878def x86btc : SDNode<"X86ISD::LBTC", X86LBTest, 879 [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>; 880def x86btr : SDNode<"X86ISD::LBTR", X86LBTest, 881 [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>; 882 883def X86LBTestRM : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>, 884 SDTCisInt<2>]>; 885 886def x86_rm_bts : SDNode<"X86ISD::LBTS_RM", X86LBTestRM, 887 [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>; 888def x86_rm_btc : SDNode<"X86ISD::LBTC_RM", X86LBTestRM, 889 [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>; 890def x86_rm_btr : SDNode<"X86ISD::LBTR_RM", X86LBTestRM, 891 [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>; 892 893 894multiclass ATOMIC_LOGIC_OP<Format Form, string s> { 895 let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1, 896 SchedRW = [WriteBitTestSetRegRMW] in { 897 def 16m : Ii8<0xBA, Form, (outs), (ins i16mem:$src1, i8imm:$src2), 898 !strconcat(s, "{w}\t{$src2, $src1|$src1, $src2}"), 899 [(set EFLAGS, (!cast<SDNode>("x86" # s) addr:$src1, timm:$src2, (i32 16)))]>, 900 OpSize16, TB, LOCK; 901 def 32m : Ii8<0xBA, Form, (outs), (ins i32mem:$src1, i8imm:$src2), 902 !strconcat(s, "{l}\t{$src2, $src1|$src1, $src2}"), 903 [(set EFLAGS, (!cast<SDNode>("x86" # s) addr:$src1, timm:$src2, (i32 32)))]>, 904 OpSize32, TB, LOCK; 905 def 64m : RIi8<0xBA, Form, (outs), (ins i64mem:$src1, i8imm:$src2), 906 !strconcat(s, "{q}\t{$src2, $src1|$src1, $src2}"), 907 [(set EFLAGS, (!cast<SDNode>("x86" # s) addr:$src1, timm:$src2, (i32 64)))]>, 908 TB, LOCK; 909 } 910} 911 912multiclass ATOMIC_LOGIC_OP_RM<bits<8> Opc8, string s> { 913 let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1, 914 SchedRW = [WriteBitTestSetRegRMW] in { 915 def 16rm : I<Opc8, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), 916 !strconcat(s, "{w}\t{$src2, $src1|$src1, $src2}"), 917 [(set EFLAGS, (!cast<SDNode>("x86_rm_" # s) addr:$src1, GR16:$src2))]>, 918 OpSize16, TB, LOCK; 919 def 32rm : I<Opc8, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2), 920 !strconcat(s, "{l}\t{$src2, $src1|$src1, $src2}"), 921 [(set EFLAGS, (!cast<SDNode>("x86_rm_" # s) addr:$src1, GR32:$src2))]>, 922 OpSize32, TB, LOCK; 923 def 64rm : RI<Opc8, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), 924 !strconcat(s, "{q}\t{$src2, $src1|$src1, $src2}"), 925 [(set EFLAGS, (!cast<SDNode>("x86_rm_" # s) addr:$src1, GR64:$src2))]>, 926 TB, LOCK; 927 } 928} 929 930 931defm LOCK_BTS : ATOMIC_LOGIC_OP<MRM5m, "bts">; 932defm LOCK_BTC : ATOMIC_LOGIC_OP<MRM7m, "btc">; 933defm LOCK_BTR : ATOMIC_LOGIC_OP<MRM6m, "btr">; 934 935defm LOCK_BTS_RM : ATOMIC_LOGIC_OP_RM<0xAB, "bts">; 936defm LOCK_BTC_RM : ATOMIC_LOGIC_OP_RM<0xBB, "btc">; 937defm LOCK_BTR_RM : ATOMIC_LOGIC_OP_RM<0xB3, "btr">; 938 939// Atomic compare and swap. 940multiclass LCMPXCHG_BinOp<bits<8> Opc8, bits<8> Opc, Format Form, 941 string mnemonic, SDPatternOperator frag> { 942let isCodeGenOnly = 1, SchedRW = [WriteCMPXCHGRMW] in { 943 let Defs = [AL, EFLAGS], Uses = [AL] in 944 def NAME#8 : I<Opc8, Form, (outs), (ins i8mem:$ptr, GR8:$swap), 945 !strconcat(mnemonic, "{b}\t{$swap, $ptr|$ptr, $swap}"), 946 [(frag addr:$ptr, GR8:$swap, 1)]>, TB, LOCK; 947 let Defs = [AX, EFLAGS], Uses = [AX] in 948 def NAME#16 : I<Opc, Form, (outs), (ins i16mem:$ptr, GR16:$swap), 949 !strconcat(mnemonic, "{w}\t{$swap, $ptr|$ptr, $swap}"), 950 [(frag addr:$ptr, GR16:$swap, 2)]>, TB, OpSize16, LOCK; 951 let Defs = [EAX, EFLAGS], Uses = [EAX] in 952 def NAME#32 : I<Opc, Form, (outs), (ins i32mem:$ptr, GR32:$swap), 953 !strconcat(mnemonic, "{l}\t{$swap, $ptr|$ptr, $swap}"), 954 [(frag addr:$ptr, GR32:$swap, 4)]>, TB, OpSize32, LOCK; 955 let Defs = [RAX, EFLAGS], Uses = [RAX] in 956 def NAME#64 : RI<Opc, Form, (outs), (ins i64mem:$ptr, GR64:$swap), 957 !strconcat(mnemonic, "{q}\t{$swap, $ptr|$ptr, $swap}"), 958 [(frag addr:$ptr, GR64:$swap, 8)]>, TB, LOCK; 959} 960} 961 962let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX], 963 Predicates = [HasCX8], SchedRW = [WriteCMPXCHGRMW], 964 isCodeGenOnly = 1, usesCustomInserter = 1 in { 965def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr), 966 "cmpxchg8b\t$ptr", 967 [(X86cas8 addr:$ptr)]>, TB, LOCK; 968} 969 970let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX], 971 Predicates = [HasCX16,In64BitMode], SchedRW = [WriteCMPXCHGRMW], 972 isCodeGenOnly = 1, mayLoad = 1, mayStore = 1, hasSideEffects = 0 in { 973def LCMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$ptr), 974 "cmpxchg16b\t$ptr", 975 []>, TB, LOCK; 976} 977 978// This pseudo must be used when the frame uses RBX as 979// the base pointer. Indeed, in such situation RBX is a reserved 980// register and the register allocator will ignore any use/def of 981// it. In other words, the register will not fix the clobbering of 982// RBX that will happen when setting the arguments for the instrucion. 983// 984// Unlike the actual related instruction, we mark that this one 985// defines RBX (instead of using RBX). 986// The rationale is that we will define RBX during the expansion of 987// the pseudo. The argument feeding RBX is rbx_input. 988// 989// The additional argument, $rbx_save, is a temporary register used to 990// save the value of RBX across the actual instruction. 991// 992// To make sure the register assigned to $rbx_save does not interfere with 993// the definition of the actual instruction, we use a definition $dst which 994// is tied to $rbx_save. That way, the live-range of $rbx_save spans across 995// the instruction and we are sure we will have a valid register to restore 996// the value of RBX. 997let Defs = [RAX, RDX, RBX, EFLAGS], Uses = [RAX, RCX, RDX], 998 Predicates = [HasCX16,In64BitMode], SchedRW = [WriteCMPXCHGRMW], 999 isCodeGenOnly = 1, isPseudo = 1, 1000 mayLoad = 1, mayStore = 1, hasSideEffects = 0, 1001 Constraints = "$rbx_save = $dst" in { 1002def LCMPXCHG16B_SAVE_RBX : 1003 I<0, Pseudo, (outs GR64:$dst), 1004 (ins i128mem:$ptr, GR64:$rbx_input, GR64:$rbx_save), "", []>; 1005} 1006 1007// Pseudo instruction that doesn't read/write RBX. Will be turned into either 1008// LCMPXCHG16B_SAVE_RBX or LCMPXCHG16B via a custom inserter. 1009let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RCX, RDX], 1010 Predicates = [HasCX16,In64BitMode], SchedRW = [WriteCMPXCHGRMW], 1011 isCodeGenOnly = 1, isPseudo = 1, 1012 mayLoad = 1, mayStore = 1, hasSideEffects = 0, 1013 usesCustomInserter = 1 in { 1014def LCMPXCHG16B_NO_RBX : 1015 I<0, Pseudo, (outs), (ins i128mem:$ptr, GR64:$rbx_input), "", 1016 [(X86cas16 addr:$ptr, GR64:$rbx_input)]>; 1017} 1018 1019// This pseudo must be used when the frame uses RBX/EBX as 1020// the base pointer. 1021// cf comment for LCMPXCHG16B_SAVE_RBX. 1022let Defs = [EBX], Uses = [ECX, EAX], 1023 Predicates = [HasMWAITX], SchedRW = [WriteSystem], 1024 isCodeGenOnly = 1, isPseudo = 1, Constraints = "$rbx_save = $dst" in { 1025def MWAITX_SAVE_RBX : 1026 I<0, Pseudo, (outs GR64:$dst), 1027 (ins GR32:$ebx_input, GR64:$rbx_save), 1028 "mwaitx", 1029 []>; 1030} 1031 1032// Pseudo mwaitx instruction to use for custom insertion. 1033let Predicates = [HasMWAITX], SchedRW = [WriteSystem], 1034 isCodeGenOnly = 1, isPseudo = 1, 1035 usesCustomInserter = 1 in { 1036def MWAITX : 1037 I<0, Pseudo, (outs), (ins GR32:$ecx, GR32:$eax, GR32:$ebx), 1038 "mwaitx", 1039 [(int_x86_mwaitx GR32:$ecx, GR32:$eax, GR32:$ebx)]>; 1040} 1041 1042 1043defm LCMPXCHG : LCMPXCHG_BinOp<0xB0, 0xB1, MRMDestMem, "cmpxchg", X86cas>; 1044 1045// Atomic exchange and add 1046multiclass ATOMIC_RMW_BINOP<bits<8> opc8, bits<8> opc, string mnemonic, 1047 string frag> { 1048 let Constraints = "$val = $dst", Defs = [EFLAGS], mayLoad = 1, mayStore = 1, 1049 isCodeGenOnly = 1, SchedRW = [WriteALURMW] in { 1050 def NAME#8 : I<opc8, MRMSrcMem, (outs GR8:$dst), 1051 (ins GR8:$val, i8mem:$ptr), 1052 !strconcat(mnemonic, "{b}\t{$val, $ptr|$ptr, $val}"), 1053 [(set GR8:$dst, 1054 (!cast<PatFrag>(frag # "_i8") addr:$ptr, GR8:$val))]>; 1055 def NAME#16 : I<opc, MRMSrcMem, (outs GR16:$dst), 1056 (ins GR16:$val, i16mem:$ptr), 1057 !strconcat(mnemonic, "{w}\t{$val, $ptr|$ptr, $val}"), 1058 [(set 1059 GR16:$dst, 1060 (!cast<PatFrag>(frag # "_i16") addr:$ptr, GR16:$val))]>, 1061 OpSize16; 1062 def NAME#32 : I<opc, MRMSrcMem, (outs GR32:$dst), 1063 (ins GR32:$val, i32mem:$ptr), 1064 !strconcat(mnemonic, "{l}\t{$val, $ptr|$ptr, $val}"), 1065 [(set 1066 GR32:$dst, 1067 (!cast<PatFrag>(frag # "_i32") addr:$ptr, GR32:$val))]>, 1068 OpSize32; 1069 def NAME#64 : RI<opc, MRMSrcMem, (outs GR64:$dst), 1070 (ins GR64:$val, i64mem:$ptr), 1071 !strconcat(mnemonic, "{q}\t{$val, $ptr|$ptr, $val}"), 1072 [(set 1073 GR64:$dst, 1074 (!cast<PatFrag>(frag # "_i64") addr:$ptr, GR64:$val))]>; 1075 } 1076} 1077 1078defm LXADD : ATOMIC_RMW_BINOP<0xc0, 0xc1, "xadd", "atomic_load_add">, TB, LOCK; 1079 1080/* The following multiclass tries to make sure that in code like 1081 * x.store (immediate op x.load(acquire), release) 1082 * and 1083 * x.store (register op x.load(acquire), release) 1084 * an operation directly on memory is generated instead of wasting a register. 1085 * It is not automatic as atomic_store/load are only lowered to MOV instructions 1086 * extremely late to prevent them from being accidentally reordered in the backend 1087 * (see below the RELEASE_MOV* / ACQUIRE_MOV* pseudo-instructions) 1088 */ 1089multiclass RELEASE_BINOP_MI<string Name, SDNode op> { 1090 def : Pat<(atomic_store_8 (op (atomic_load_nonext_8 addr:$dst), (i8 imm:$src)), 1091 addr:$dst), 1092 (!cast<Instruction>(Name#"8mi") addr:$dst, imm:$src)>; 1093 def : Pat<(atomic_store_16 (op (atomic_load_nonext_16 addr:$dst), (i16 imm:$src)), 1094 addr:$dst), 1095 (!cast<Instruction>(Name#"16mi") addr:$dst, imm:$src)>; 1096 def : Pat<(atomic_store_32 (op (atomic_load_nonext_32 addr:$dst), (i32 imm:$src)), 1097 addr:$dst), 1098 (!cast<Instruction>(Name#"32mi") addr:$dst, imm:$src)>; 1099 def : Pat<(atomic_store_64 (op (atomic_load_nonext_64 addr:$dst), (i64immSExt32:$src)), 1100 addr:$dst), 1101 (!cast<Instruction>(Name#"64mi32") addr:$dst, (i64immSExt32:$src))>; 1102 def : Pat<(atomic_store_8 (op (atomic_load_nonext_8 addr:$dst), (i8 GR8:$src)), addr:$dst), 1103 (!cast<Instruction>(Name#"8mr") addr:$dst, GR8:$src)>; 1104 def : Pat<(atomic_store_16 (op (atomic_load_nonext_16 addr:$dst), (i16 GR16:$src)), 1105 addr:$dst), 1106 (!cast<Instruction>(Name#"16mr") addr:$dst, GR16:$src)>; 1107 def : Pat<(atomic_store_32 (op (atomic_load_nonext_32 addr:$dst), (i32 GR32:$src)), 1108 addr:$dst), 1109 (!cast<Instruction>(Name#"32mr") addr:$dst, GR32:$src)>; 1110 def : Pat<(atomic_store_64 (op (atomic_load_nonext_64 addr:$dst), (i64 GR64:$src)), 1111 addr:$dst), 1112 (!cast<Instruction>(Name#"64mr") addr:$dst, GR64:$src)>; 1113} 1114defm : RELEASE_BINOP_MI<"ADD", add>; 1115defm : RELEASE_BINOP_MI<"AND", and>; 1116defm : RELEASE_BINOP_MI<"OR", or>; 1117defm : RELEASE_BINOP_MI<"XOR", xor>; 1118defm : RELEASE_BINOP_MI<"SUB", sub>; 1119 1120// Atomic load + floating point patterns. 1121// FIXME: This could also handle SIMD operations with *ps and *pd instructions. 1122multiclass ATOMIC_LOAD_FP_BINOP_MI<string Name, SDNode op> { 1123 def : Pat<(op FR32:$src1, (bitconvert (i32 (atomic_load_nonext_32 addr:$src2)))), 1124 (!cast<Instruction>(Name#"SSrm") FR32:$src1, addr:$src2)>, 1125 Requires<[UseSSE1]>; 1126 def : Pat<(op FR32:$src1, (bitconvert (i32 (atomic_load_nonext_32 addr:$src2)))), 1127 (!cast<Instruction>("V"#Name#"SSrm") FR32:$src1, addr:$src2)>, 1128 Requires<[UseAVX]>; 1129 def : Pat<(op FR32X:$src1, (bitconvert (i32 (atomic_load_nonext_32 addr:$src2)))), 1130 (!cast<Instruction>("V"#Name#"SSZrm") FR32X:$src1, addr:$src2)>, 1131 Requires<[HasAVX512]>; 1132 1133 def : Pat<(op FR64:$src1, (bitconvert (i64 (atomic_load_nonext_64 addr:$src2)))), 1134 (!cast<Instruction>(Name#"SDrm") FR64:$src1, addr:$src2)>, 1135 Requires<[UseSSE1]>; 1136 def : Pat<(op FR64:$src1, (bitconvert (i64 (atomic_load_nonext_64 addr:$src2)))), 1137 (!cast<Instruction>("V"#Name#"SDrm") FR64:$src1, addr:$src2)>, 1138 Requires<[UseAVX]>; 1139 def : Pat<(op FR64X:$src1, (bitconvert (i64 (atomic_load_nonext_64 addr:$src2)))), 1140 (!cast<Instruction>("V"#Name#"SDZrm") FR64X:$src1, addr:$src2)>, 1141 Requires<[HasAVX512]>; 1142} 1143defm : ATOMIC_LOAD_FP_BINOP_MI<"ADD", fadd>; 1144defm : ATOMIC_LOAD_FP_BINOP_MI<"SUB", fsub>; 1145defm : ATOMIC_LOAD_FP_BINOP_MI<"MUL", fmul>; 1146defm : ATOMIC_LOAD_FP_BINOP_MI<"DIV", fdiv>; 1147 1148multiclass RELEASE_UNOP<string Name, dag dag8, dag dag16, dag dag32, 1149 dag dag64> { 1150 def : Pat<(atomic_store_8 dag8, addr:$dst), 1151 (!cast<Instruction>(Name#8m) addr:$dst)>; 1152 def : Pat<(atomic_store_16 dag16, addr:$dst), 1153 (!cast<Instruction>(Name#16m) addr:$dst)>; 1154 def : Pat<(atomic_store_32 dag32, addr:$dst), 1155 (!cast<Instruction>(Name#32m) addr:$dst)>; 1156 def : Pat<(atomic_store_64 dag64, addr:$dst), 1157 (!cast<Instruction>(Name#64m) addr:$dst)>; 1158} 1159 1160let Predicates = [UseIncDec] in { 1161 defm : RELEASE_UNOP<"INC", 1162 (add (atomic_load_nonext_8 addr:$dst), (i8 1)), 1163 (add (atomic_load_nonext_16 addr:$dst), (i16 1)), 1164 (add (atomic_load_nonext_32 addr:$dst), (i32 1)), 1165 (add (atomic_load_nonext_64 addr:$dst), (i64 1))>; 1166 defm : RELEASE_UNOP<"DEC", 1167 (add (atomic_load_nonext_8 addr:$dst), (i8 -1)), 1168 (add (atomic_load_nonext_16 addr:$dst), (i16 -1)), 1169 (add (atomic_load_nonext_32 addr:$dst), (i32 -1)), 1170 (add (atomic_load_nonext_64 addr:$dst), (i64 -1))>; 1171} 1172 1173defm : RELEASE_UNOP<"NEG", 1174 (ineg (i8 (atomic_load_nonext_8 addr:$dst))), 1175 (ineg (i16 (atomic_load_nonext_16 addr:$dst))), 1176 (ineg (i32 (atomic_load_nonext_32 addr:$dst))), 1177 (ineg (i64 (atomic_load_nonext_64 addr:$dst)))>; 1178defm : RELEASE_UNOP<"NOT", 1179 (not (i8 (atomic_load_nonext_8 addr:$dst))), 1180 (not (i16 (atomic_load_nonext_16 addr:$dst))), 1181 (not (i32 (atomic_load_nonext_32 addr:$dst))), 1182 (not (i64 (atomic_load_nonext_64 addr:$dst)))>; 1183 1184def : Pat<(atomic_store_8 (i8 imm:$src), addr:$dst), 1185 (MOV8mi addr:$dst, imm:$src)>; 1186def : Pat<(atomic_store_16 (i16 imm:$src), addr:$dst), 1187 (MOV16mi addr:$dst, imm:$src)>; 1188def : Pat<(atomic_store_32 (i32 imm:$src), addr:$dst), 1189 (MOV32mi addr:$dst, imm:$src)>; 1190def : Pat<(atomic_store_64 (i64immSExt32:$src), addr:$dst), 1191 (MOV64mi32 addr:$dst, i64immSExt32:$src)>; 1192 1193def : Pat<(atomic_store_8 GR8:$src, addr:$dst), 1194 (MOV8mr addr:$dst, GR8:$src)>; 1195def : Pat<(atomic_store_16 GR16:$src, addr:$dst), 1196 (MOV16mr addr:$dst, GR16:$src)>; 1197def : Pat<(atomic_store_32 GR32:$src, addr:$dst), 1198 (MOV32mr addr:$dst, GR32:$src)>; 1199def : Pat<(atomic_store_64 GR64:$src, addr:$dst), 1200 (MOV64mr addr:$dst, GR64:$src)>; 1201 1202def : Pat<(i8 (atomic_load_nonext_8 addr:$src)), (MOV8rm addr:$src)>; 1203def : Pat<(i16 (atomic_load_nonext_16 addr:$src)), (MOV16rm addr:$src)>; 1204def : Pat<(i32 (atomic_load_nonext_32 addr:$src)), (MOV32rm addr:$src)>; 1205def : Pat<(i64 (atomic_load_nonext_64 addr:$src)), (MOV64rm addr:$src)>; 1206 1207// Floating point loads/stores. 1208def : Pat<(atomic_store_32 (i32 (bitconvert (f32 FR32:$src))), addr:$dst), 1209 (MOVSSmr addr:$dst, FR32:$src)>, Requires<[UseSSE1]>; 1210def : Pat<(atomic_store_32 (i32 (bitconvert (f32 FR32:$src))), addr:$dst), 1211 (VMOVSSmr addr:$dst, FR32:$src)>, Requires<[UseAVX]>; 1212def : Pat<(atomic_store_32 (i32 (bitconvert (f32 FR32:$src))), addr:$dst), 1213 (VMOVSSZmr addr:$dst, FR32:$src)>, Requires<[HasAVX512]>; 1214 1215def : Pat<(atomic_store_64 (i64 (bitconvert (f64 FR64:$src))), addr:$dst), 1216 (MOVSDmr addr:$dst, FR64:$src)>, Requires<[UseSSE2]>; 1217def : Pat<(atomic_store_64 (i64 (bitconvert (f64 FR64:$src))), addr:$dst), 1218 (VMOVSDmr addr:$dst, FR64:$src)>, Requires<[UseAVX]>; 1219def : Pat<(atomic_store_64 (i64 (bitconvert (f64 FR64:$src))), addr:$dst), 1220 (VMOVSDmr addr:$dst, FR64:$src)>, Requires<[HasAVX512]>; 1221 1222def : Pat<(f32 (bitconvert (i32 (atomic_load_nonext_32 addr:$src)))), 1223 (MOVSSrm_alt addr:$src)>, Requires<[UseSSE1]>; 1224def : Pat<(f32 (bitconvert (i32 (atomic_load_nonext_32 addr:$src)))), 1225 (VMOVSSrm_alt addr:$src)>, Requires<[UseAVX]>; 1226def : Pat<(f32 (bitconvert (i32 (atomic_load_nonext_32 addr:$src)))), 1227 (VMOVSSZrm_alt addr:$src)>, Requires<[HasAVX512]>; 1228 1229def : Pat<(f64 (bitconvert (i64 (atomic_load_nonext_64 addr:$src)))), 1230 (MOVSDrm_alt addr:$src)>, Requires<[UseSSE2]>; 1231def : Pat<(f64 (bitconvert (i64 (atomic_load_nonext_64 addr:$src)))), 1232 (VMOVSDrm_alt addr:$src)>, Requires<[UseAVX]>; 1233def : Pat<(f64 (bitconvert (i64 (atomic_load_nonext_64 addr:$src)))), 1234 (VMOVSDZrm_alt addr:$src)>, Requires<[HasAVX512]>; 1235 1236//===----------------------------------------------------------------------===// 1237// DAG Pattern Matching Rules 1238//===----------------------------------------------------------------------===// 1239 1240// Use AND/OR to store 0/-1 in memory when optimizing for minsize. This saves 1241// binary size compared to a regular MOV, but it introduces an unnecessary 1242// load, so is not suitable for regular or optsize functions. 1243let Predicates = [OptForMinSize] in { 1244def : Pat<(simple_store (i16 0), addr:$dst), (AND16mi addr:$dst, 0)>; 1245def : Pat<(simple_store (i32 0), addr:$dst), (AND32mi addr:$dst, 0)>; 1246def : Pat<(simple_store (i64 0), addr:$dst), (AND64mi32 addr:$dst, 0)>; 1247def : Pat<(simple_store (i16 -1), addr:$dst), (OR16mi addr:$dst, -1)>; 1248def : Pat<(simple_store (i32 -1), addr:$dst), (OR32mi addr:$dst, -1)>; 1249def : Pat<(simple_store (i64 -1), addr:$dst), (OR64mi32 addr:$dst, -1)>; 1250} 1251 1252// In kernel code model, we can get the address of a label 1253// into a register with 'movq'. FIXME: This is a hack, the 'imm' predicate of 1254// the MOV64ri32 should accept these. 1255def : Pat<(i64 (X86Wrapper tconstpool :$dst)), 1256 (MOV64ri32 tconstpool :$dst)>, Requires<[KernelCode]>; 1257def : Pat<(i64 (X86Wrapper tjumptable :$dst)), 1258 (MOV64ri32 tjumptable :$dst)>, Requires<[KernelCode]>; 1259def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)), 1260 (MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>; 1261def : Pat<(i64 (X86Wrapper texternalsym:$dst)), 1262 (MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>; 1263def : Pat<(i64 (X86Wrapper mcsym:$dst)), 1264 (MOV64ri32 mcsym:$dst)>, Requires<[KernelCode]>; 1265def : Pat<(i64 (X86Wrapper tblockaddress:$dst)), 1266 (MOV64ri32 tblockaddress:$dst)>, Requires<[KernelCode]>; 1267 1268// If we have small model and -static mode, it is safe to store global addresses 1269// directly as immediates. FIXME: This is really a hack, the 'imm' predicate 1270// for MOV64mi32 should handle this sort of thing. 1271def : Pat<(store (i64 (X86Wrapper tconstpool:$src)), addr:$dst), 1272 (MOV64mi32 addr:$dst, tconstpool:$src)>, 1273 Requires<[NearData, IsNotPIC]>; 1274def : Pat<(store (i64 (X86Wrapper tjumptable:$src)), addr:$dst), 1275 (MOV64mi32 addr:$dst, tjumptable:$src)>, 1276 Requires<[NearData, IsNotPIC]>; 1277def : Pat<(store (i64 (X86Wrapper tglobaladdr:$src)), addr:$dst), 1278 (MOV64mi32 addr:$dst, tglobaladdr:$src)>, 1279 Requires<[NearData, IsNotPIC]>; 1280def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst), 1281 (MOV64mi32 addr:$dst, texternalsym:$src)>, 1282 Requires<[NearData, IsNotPIC]>; 1283def : Pat<(store (i64 (X86Wrapper mcsym:$src)), addr:$dst), 1284 (MOV64mi32 addr:$dst, mcsym:$src)>, 1285 Requires<[NearData, IsNotPIC]>; 1286def : Pat<(store (i64 (X86Wrapper tblockaddress:$src)), addr:$dst), 1287 (MOV64mi32 addr:$dst, tblockaddress:$src)>, 1288 Requires<[NearData, IsNotPIC]>; 1289 1290def : Pat<(i32 (X86RecoverFrameAlloc mcsym:$dst)), (MOV32ri mcsym:$dst)>; 1291def : Pat<(i64 (X86RecoverFrameAlloc mcsym:$dst)), (MOV64ri mcsym:$dst)>; 1292 1293// Calls 1294 1295// tls has some funny stuff here... 1296// This corresponds to movabs $foo@tpoff, %rax 1297def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)), 1298 (MOV64ri32 tglobaltlsaddr :$dst)>; 1299// This corresponds to add $foo@tpoff, %rax 1300def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)), 1301 (ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>; 1302 1303 1304// Direct PC relative function call for small code model. 32-bit displacement 1305// sign extended to 64-bit. 1306def : Pat<(X86call (i64 tglobaladdr:$dst)), 1307 (CALL64pcrel32 tglobaladdr:$dst)>; 1308def : Pat<(X86call (i64 texternalsym:$dst)), 1309 (CALL64pcrel32 texternalsym:$dst)>; 1310 1311def : Pat<(X86call_rvmarker (i64 tglobaladdr:$rvfunc), (i64 texternalsym:$dst)), 1312 (CALL64pcrel32_RVMARKER tglobaladdr:$rvfunc, texternalsym:$dst)>; 1313def : Pat<(X86call_rvmarker (i64 tglobaladdr:$rvfunc), (i64 tglobaladdr:$dst)), 1314 (CALL64pcrel32_RVMARKER tglobaladdr:$rvfunc, tglobaladdr:$dst)>; 1315 1316def : Pat<(X86imp_call (i64 tglobaladdr:$dst)), 1317 (CALL64pcrel32 tglobaladdr:$dst)>; 1318 1319// Tailcall stuff. The TCRETURN instructions execute after the epilog, so they 1320// can never use callee-saved registers. That is the purpose of the GR64_TC 1321// register classes. 1322// 1323// The only volatile register that is never used by the calling convention is 1324// %r11. This happens when calling a vararg function with 6 arguments. 1325// 1326// Match an X86tcret that uses less than 7 volatile registers. 1327def : Pat<(X86tcret ptr_rc_tailcall:$dst, timm:$off), 1328 (TCRETURNri ptr_rc_tailcall:$dst, timm:$off)>, 1329 Requires<[Not64BitMode, NotUseIndirectThunkCalls]>; 1330 1331// FIXME: This is disabled for 32-bit PIC mode because the global base 1332// register which is part of the address mode may be assigned a 1333// callee-saved register. 1334// Similar to X86tcret_6regs, here we only have 1 register left 1335def : Pat<(X86tcret_1reg (load addr:$dst), timm:$off), 1336 (TCRETURNmi addr:$dst, timm:$off)>, 1337 Requires<[Not64BitMode, IsNotPIC, NotUseIndirectThunkCalls]>; 1338 1339def : Pat<(X86tcret (i32 tglobaladdr:$dst), timm:$off), 1340 (TCRETURNdi tglobaladdr:$dst, timm:$off)>, 1341 Requires<[NotLP64]>; 1342 1343def : Pat<(X86tcret (i32 texternalsym:$dst), timm:$off), 1344 (TCRETURNdi texternalsym:$dst, timm:$off)>, 1345 Requires<[NotLP64]>; 1346 1347def : Pat<(X86tcret ptr_rc_tailcall:$dst, timm:$off), 1348 (TCRETURNri64 ptr_rc_tailcall:$dst, timm:$off)>, 1349 Requires<[In64BitMode, NotUseIndirectThunkCalls, ImportCallOptimizationDisabled]>; 1350 1351def : Pat<(X86tcret ptr_rc_tailcall:$dst, timm:$off), 1352 (TCRETURNri64_ImpCall ptr_rc_tailcall:$dst, timm:$off)>, 1353 Requires<[In64BitMode, NotUseIndirectThunkCalls, ImportCallOptimizationEnabled]>; 1354 1355// Don't fold loads into X86tcret requiring more than 6 regs. 1356// There wouldn't be enough scratch registers for base+index. 1357def : Pat<(X86tcret_6regs (load addr:$dst), timm:$off), 1358 (TCRETURNmi64 addr:$dst, timm:$off)>, 1359 Requires<[In64BitMode, NotUseIndirectThunkCalls]>; 1360 1361def : Pat<(X86tcret ptr_rc_tailcall:$dst, timm:$off), 1362 (INDIRECT_THUNK_TCRETURN64 ptr_rc_tailcall:$dst, timm:$off)>, 1363 Requires<[In64BitMode, UseIndirectThunkCalls]>; 1364 1365def : Pat<(X86tcret ptr_rc_tailcall:$dst, timm:$off), 1366 (INDIRECT_THUNK_TCRETURN32 ptr_rc_tailcall:$dst, timm:$off)>, 1367 Requires<[Not64BitMode, UseIndirectThunkCalls]>; 1368 1369def : Pat<(X86tcret (i64 tglobaladdr:$dst), timm:$off), 1370 (TCRETURNdi64 tglobaladdr:$dst, timm:$off)>, 1371 Requires<[IsLP64]>; 1372 1373def : Pat<(X86tcret (i64 texternalsym:$dst), timm:$off), 1374 (TCRETURNdi64 texternalsym:$dst, timm:$off)>, 1375 Requires<[IsLP64]>; 1376 1377// Normal calls, with various flavors of addresses. 1378def : Pat<(X86call (i32 tglobaladdr:$dst)), 1379 (CALLpcrel32 tglobaladdr:$dst)>; 1380def : Pat<(X86call (i32 texternalsym:$dst)), 1381 (CALLpcrel32 texternalsym:$dst)>; 1382def : Pat<(X86call (i32 imm:$dst)), 1383 (CALLpcrel32 imm:$dst)>, Requires<[CallImmAddr]>; 1384 1385// Comparisons. 1386 1387// TEST R,R is smaller than CMP R,0 1388def : Pat<(X86cmp GR8:$src1, 0), 1389 (TEST8rr GR8:$src1, GR8:$src1)>; 1390def : Pat<(X86cmp GR16:$src1, 0), 1391 (TEST16rr GR16:$src1, GR16:$src1)>; 1392def : Pat<(X86cmp GR32:$src1, 0), 1393 (TEST32rr GR32:$src1, GR32:$src1)>; 1394def : Pat<(X86cmp GR64:$src1, 0), 1395 (TEST64rr GR64:$src1, GR64:$src1)>; 1396 1397// zextload bool -> zextload byte 1398// i1 stored in one byte in zero-extended form. 1399// Upper bits cleanup should be executed before Store. 1400def : Pat<(zextloadi8i1 addr:$src), (MOV8rm addr:$src)>; 1401def : Pat<(zextloadi16i1 addr:$src), 1402 (EXTRACT_SUBREG (MOVZX32rm8 addr:$src), sub_16bit)>; 1403def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>; 1404def : Pat<(zextloadi64i1 addr:$src), 1405 (SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>; 1406 1407// extload bool -> extload byte 1408// When extloading from 16-bit and smaller memory locations into 64-bit 1409// registers, use zero-extending loads so that the entire 64-bit register is 1410// defined, avoiding partial-register updates. 1411 1412def : Pat<(extloadi8i1 addr:$src), (MOV8rm addr:$src)>; 1413def : Pat<(extloadi16i1 addr:$src), 1414 (EXTRACT_SUBREG (MOVZX32rm8 addr:$src), sub_16bit)>; 1415def : Pat<(extloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>; 1416def : Pat<(extloadi16i8 addr:$src), 1417 (EXTRACT_SUBREG (MOVZX32rm8 addr:$src), sub_16bit)>; 1418def : Pat<(extloadi32i8 addr:$src), (MOVZX32rm8 addr:$src)>; 1419def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>; 1420 1421// For other extloads, use subregs, since the high contents of the register are 1422// defined after an extload. 1423// NOTE: The extloadi64i32 pattern needs to be first as it will try to form 1424// 32-bit loads for 4 byte aligned i8/i16 loads. 1425def : Pat<(extloadi64i32 addr:$src), 1426 (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), sub_32bit)>; 1427def : Pat<(extloadi64i1 addr:$src), 1428 (SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>; 1429def : Pat<(extloadi64i8 addr:$src), 1430 (SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>; 1431def : Pat<(extloadi64i16 addr:$src), 1432 (SUBREG_TO_REG (i64 0), (MOVZX32rm16 addr:$src), sub_32bit)>; 1433 1434// anyext. Define these to do an explicit zero-extend to 1435// avoid partial-register updates. 1436def : Pat<(i16 (anyext GR8 :$src)), (EXTRACT_SUBREG 1437 (MOVZX32rr8 GR8 :$src), sub_16bit)>; 1438def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>; 1439 1440// Except for i16 -> i32 since isel expect i16 ops to be promoted to i32. 1441def : Pat<(i32 (anyext GR16:$src)), 1442 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, sub_16bit)>; 1443 1444def : Pat<(i64 (anyext GR8 :$src)), 1445 (SUBREG_TO_REG (i64 0), (MOVZX32rr8 GR8 :$src), sub_32bit)>; 1446def : Pat<(i64 (anyext GR16:$src)), 1447 (SUBREG_TO_REG (i64 0), (MOVZX32rr16 GR16 :$src), sub_32bit)>; 1448def : Pat<(i64 (anyext GR32:$src)), 1449 (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, sub_32bit)>; 1450 1451def : Pat<(i32 (anyext_sdiv GR8:$src)), (MOVSX32rr8 GR8:$src)>; 1452 1453// In the case of a 32-bit def that is known to implicitly zero-extend, 1454// we can use a SUBREG_TO_REG. 1455def : Pat<(i64 (zext def32:$src)), 1456 (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>; 1457def : Pat<(i64 (and (anyext def32:$src), 0x00000000FFFFFFFF)), 1458 (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>; 1459 1460//===----------------------------------------------------------------------===// 1461// Pattern match OR as ADD 1462//===----------------------------------------------------------------------===// 1463 1464// If safe, we prefer to pattern match OR as ADD at isel time. ADD can be 1465// 3-addressified into an LEA instruction to avoid copies. However, we also 1466// want to finally emit these instructions as an or at the end of the code 1467// generator to make the generated code easier to read. To do this, we select 1468// into "disjoint bits" pseudo ops. 1469 1470// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits. 1471// Try this before the selecting to OR. 1472let SchedRW = [WriteALU] in { 1473 1474let isConvertibleToThreeAddress = 1, isPseudo = 1, 1475 Constraints = "$src1 = $dst", Defs = [EFLAGS] in { 1476let isCommutable = 1 in { 1477def ADD8rr_DB : I<0, Pseudo, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), 1478 "", // orb/addb REG, REG 1479 [(set GR8:$dst, (or_is_add GR8:$src1, GR8:$src2))]>; 1480def ADD16rr_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), 1481 "", // orw/addw REG, REG 1482 [(set GR16:$dst, (or_is_add GR16:$src1, GR16:$src2))]>; 1483def ADD32rr_DB : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), 1484 "", // orl/addl REG, REG 1485 [(set GR32:$dst, (or_is_add GR32:$src1, GR32:$src2))]>; 1486def ADD64rr_DB : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), 1487 "", // orq/addq REG, REG 1488 [(set GR64:$dst, (or_is_add GR64:$src1, GR64:$src2))]>; 1489} // isCommutable 1490 1491def ADD8ri_DB : I<0, Pseudo, 1492 (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), 1493 "", // orb/addb REG, imm8 1494 [(set GR8:$dst, (or_is_add GR8:$src1, imm:$src2))]>; 1495def ADD16ri_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), 1496 "", // orw/addw REG, imm 1497 [(set GR16:$dst, (or_is_add GR16:$src1, imm:$src2))]>; 1498def ADD32ri_DB : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), 1499 "", // orl/addl REG, imm 1500 [(set GR32:$dst, (or_is_add GR32:$src1, imm:$src2))]>; 1501def ADD64ri32_DB : I<0, Pseudo, 1502 (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), 1503 "", // orq/addq REG, imm 1504 [(set GR64:$dst, (or_is_add GR64:$src1, 1505 i64immSExt32:$src2))]>; 1506} 1507} // AddedComplexity, SchedRW 1508 1509//===----------------------------------------------------------------------===// 1510// Pattern match XOR as ADD 1511//===----------------------------------------------------------------------===// 1512 1513// Prefer to pattern match XOR with min_signed_value as ADD at isel time. 1514// ADD can be 3-addressified into an LEA instruction to avoid copies. 1515let AddedComplexity = 5 in { 1516def : Pat<(xor GR8:$src1, -128), 1517 (ADD8ri GR8:$src1, -128)>; 1518def : Pat<(xor GR16:$src1, -32768), 1519 (ADD16ri GR16:$src1, -32768)>; 1520def : Pat<(xor GR32:$src1, -2147483648), 1521 (ADD32ri GR32:$src1, -2147483648)>; 1522} 1523 1524//===----------------------------------------------------------------------===// 1525// Some peepholes 1526//===----------------------------------------------------------------------===// 1527 1528// Odd encoding trick: -128 fits into an 8-bit immediate field while 1529// +128 doesn't, so in this special case use a sub instead of an add. 1530let Predicates = [NoNDD] in { 1531 def : Pat<(add GR16:$src1, 128), 1532 (SUB16ri GR16:$src1, -128)>; 1533 def : Pat<(add GR32:$src1, 128), 1534 (SUB32ri GR32:$src1, -128)>; 1535 def : Pat<(add GR64:$src1, 128), 1536 (SUB64ri32 GR64:$src1, -128)>; 1537 1538 def : Pat<(X86add_flag_nocf GR16:$src1, 128), 1539 (SUB16ri GR16:$src1, -128)>; 1540 def : Pat<(X86add_flag_nocf GR32:$src1, 128), 1541 (SUB32ri GR32:$src1, -128)>; 1542 def : Pat<(X86add_flag_nocf GR64:$src1, 128), 1543 (SUB64ri32 GR64:$src1, -128)>; 1544} 1545let Predicates = [HasNDD] in { 1546 def : Pat<(add GR16:$src1, 128), 1547 (SUB16ri_ND GR16:$src1, -128)>; 1548 def : Pat<(add GR32:$src1, 128), 1549 (SUB32ri_ND GR32:$src1, -128)>; 1550 def : Pat<(add GR64:$src1, 128), 1551 (SUB64ri32_ND GR64:$src1, -128)>; 1552 1553 def : Pat<(X86add_flag_nocf GR16:$src1, 128), 1554 (SUB16ri_ND GR16:$src1, -128)>; 1555 def : Pat<(X86add_flag_nocf GR32:$src1, 128), 1556 (SUB32ri_ND GR32:$src1, -128)>; 1557 def : Pat<(X86add_flag_nocf GR64:$src1, 128), 1558 (SUB64ri32_ND GR64:$src1, -128)>; 1559} 1560def : Pat<(store (add (loadi16 addr:$dst), 128), addr:$dst), 1561 (SUB16mi addr:$dst, -128)>; 1562def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst), 1563 (SUB32mi addr:$dst, -128)>; 1564def : Pat<(store (add (loadi64 addr:$dst), 128), addr:$dst), 1565 (SUB64mi32 addr:$dst, -128)>; 1566let Predicates = [HasNDD] in { 1567 def : Pat<(add (loadi16 addr:$src), 128), 1568 (SUB16mi_ND addr:$src, -128)>; 1569 def : Pat<(add (loadi32 addr:$src), 128), 1570 (SUB32mi_ND addr:$src, -128)>; 1571 def : Pat<(add (loadi64 addr:$src), 128), 1572 (SUB64mi32_ND addr:$src, -128)>; 1573} 1574 1575// The same trick applies for 32-bit immediate fields in 64-bit 1576// instructions. 1577let Predicates = [NoNDD] in { 1578 def : Pat<(add GR64:$src1, 0x0000000080000000), 1579 (SUB64ri32 GR64:$src1, 0xffffffff80000000)>; 1580 def : Pat<(X86add_flag_nocf GR64:$src1, 0x0000000080000000), 1581 (SUB64ri32 GR64:$src1, 0xffffffff80000000)>; 1582} 1583let Predicates = [HasNDD] in { 1584 def : Pat<(add GR64:$src1, 0x0000000080000000), 1585 (SUB64ri32_ND GR64:$src1, 0xffffffff80000000)>; 1586 def : Pat<(X86add_flag_nocf GR64:$src1, 0x0000000080000000), 1587 (SUB64ri32_ND GR64:$src1, 0xffffffff80000000)>; 1588} 1589def : Pat<(store (add (loadi64 addr:$dst), 0x0000000080000000), addr:$dst), 1590 (SUB64mi32 addr:$dst, 0xffffffff80000000)>; 1591let Predicates = [HasNDD] in { 1592 def : Pat<(add(loadi64 addr:$src), 0x0000000080000000), 1593 (SUB64mi32_ND addr:$src, 0xffffffff80000000)>; 1594} 1595 1596// Depositing value to 8/16 bit subreg: 1597def : Pat<(or (and GR64:$dst, -256), 1598 (i64 (zextloadi8 addr:$src))), 1599 (INSERT_SUBREG (i64 (COPY $dst)), (MOV8rm i8mem:$src), sub_8bit)>; 1600 1601def : Pat<(or (and GR32:$dst, -256), 1602 (i32 (zextloadi8 addr:$src))), 1603 (INSERT_SUBREG (i32 (COPY $dst)), (MOV8rm i8mem:$src), sub_8bit)>; 1604 1605def : Pat<(or (and GR64:$dst, -65536), 1606 (i64 (zextloadi16 addr:$src))), 1607 (INSERT_SUBREG (i64 (COPY $dst)), (MOV16rm i16mem:$src), sub_16bit)>; 1608 1609def : Pat<(or (and GR32:$dst, -65536), 1610 (i32 (zextloadi16 addr:$src))), 1611 (INSERT_SUBREG (i32 (COPY $dst)), (MOV16rm i16mem:$src), sub_16bit)>; 1612 1613// To avoid needing to materialize an immediate in a register, use a 32-bit and 1614// with implicit zero-extension instead of a 64-bit and if the immediate has at 1615// least 32 bits of leading zeros. If in addition the last 32 bits can be 1616// represented with a sign extension of a 8 bit constant, use that. 1617// This can also reduce instruction size by eliminating the need for the REX 1618// prefix. 1619 1620// AddedComplexity is needed to give priority over i64immSExt8 and i64immSExt32. 1621let AddedComplexity = 1 in { 1622 let Predicates = [NoNDD] in { 1623 def : Pat<(and GR64:$src, i64immZExt32:$imm), 1624 (SUBREG_TO_REG 1625 (i64 0), 1626 (AND32ri 1627 (EXTRACT_SUBREG GR64:$src, sub_32bit), 1628 (i32 (GetLo32XForm imm:$imm))), 1629 sub_32bit)>; 1630 } 1631 let Predicates = [HasNDD] in { 1632 def : Pat<(and GR64:$src, i64immZExt32:$imm), 1633 (SUBREG_TO_REG 1634 (i64 0), 1635 (AND32ri_ND 1636 (EXTRACT_SUBREG GR64:$src, sub_32bit), 1637 (i32 (GetLo32XForm imm:$imm))), 1638 sub_32bit)>; 1639 } 1640} // AddedComplexity = 1 1641 1642 1643// AddedComplexity is needed due to the increased complexity on the 1644// i64immZExt32SExt8 and i64immZExt32 patterns above. Applying this to all 1645// the MOVZX patterns keeps thems together in DAGIsel tables. 1646let AddedComplexity = 1 in { 1647// r & (2^16-1) ==> movz 1648def : Pat<(and GR32:$src1, 0xffff), 1649 (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, sub_16bit))>; 1650// r & (2^8-1) ==> movz 1651def : Pat<(and GR32:$src1, 0xff), 1652 (MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, sub_8bit))>; 1653// r & (2^8-1) ==> movz 1654def : Pat<(and GR16:$src1, 0xff), 1655 (EXTRACT_SUBREG (MOVZX32rr8 (EXTRACT_SUBREG GR16:$src1, sub_8bit)), 1656 sub_16bit)>; 1657 1658// r & (2^32-1) ==> movz 1659def : Pat<(and GR64:$src, 0x00000000FFFFFFFF), 1660 (SUBREG_TO_REG (i64 0), 1661 (MOV32rr (EXTRACT_SUBREG GR64:$src, sub_32bit)), 1662 sub_32bit)>; 1663// r & (2^16-1) ==> movz 1664def : Pat<(and GR64:$src, 0xffff), 1665 (SUBREG_TO_REG (i64 0), 1666 (MOVZX32rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit))), 1667 sub_32bit)>; 1668// r & (2^8-1) ==> movz 1669def : Pat<(and GR64:$src, 0xff), 1670 (SUBREG_TO_REG (i64 0), 1671 (MOVZX32rr8 (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit))), 1672 sub_32bit)>; 1673} // AddedComplexity = 1 1674 1675 1676// Try to use BTS/BTR/BTC for single bit operations on the upper 32-bits. 1677 1678def BTRXForm : SDNodeXForm<imm, [{ 1679 // Transformation function: Find the lowest 0. 1680 return getI64Imm((uint8_t)N->getAPIntValue().countr_one(), SDLoc(N)); 1681}]>; 1682 1683def BTCBTSXForm : SDNodeXForm<imm, [{ 1684 // Transformation function: Find the lowest 1. 1685 return getI64Imm((uint8_t)N->getAPIntValue().countr_zero(), SDLoc(N)); 1686}]>; 1687 1688def BTRMask64 : ImmLeaf<i64, [{ 1689 return !isUInt<32>(Imm) && !isInt<32>(Imm) && isPowerOf2_64(~Imm); 1690}]>; 1691 1692def BTCBTSMask64 : ImmLeaf<i64, [{ 1693 return !isInt<32>(Imm) && isPowerOf2_64(Imm); 1694}]>; 1695 1696// For now only do this for optsize. 1697let AddedComplexity = 1, Predicates=[OptForSize] in { 1698 def : Pat<(and GR64:$src1, BTRMask64:$mask), 1699 (BTR64ri8 GR64:$src1, (BTRXForm imm:$mask))>; 1700 def : Pat<(or GR64:$src1, BTCBTSMask64:$mask), 1701 (BTS64ri8 GR64:$src1, (BTCBTSXForm imm:$mask))>; 1702 def : Pat<(xor GR64:$src1, BTCBTSMask64:$mask), 1703 (BTC64ri8 GR64:$src1, (BTCBTSXForm imm:$mask))>; 1704} 1705 1706 1707// sext_inreg patterns 1708def : Pat<(sext_inreg GR32:$src, i16), 1709 (MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, sub_16bit))>; 1710def : Pat<(sext_inreg GR32:$src, i8), 1711 (MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, sub_8bit))>; 1712 1713def : Pat<(sext_inreg GR16:$src, i8), 1714 (EXTRACT_SUBREG (MOVSX32rr8 (EXTRACT_SUBREG GR16:$src, sub_8bit)), 1715 sub_16bit)>; 1716 1717def : Pat<(sext_inreg GR64:$src, i32), 1718 (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>; 1719def : Pat<(sext_inreg GR64:$src, i16), 1720 (MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, sub_16bit))>; 1721def : Pat<(sext_inreg GR64:$src, i8), 1722 (MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, sub_8bit))>; 1723 1724// sext, sext_load, zext, zext_load 1725def: Pat<(i16 (sext GR8:$src)), 1726 (EXTRACT_SUBREG (MOVSX32rr8 GR8:$src), sub_16bit)>; 1727def: Pat<(sextloadi16i8 addr:$src), 1728 (EXTRACT_SUBREG (MOVSX32rm8 addr:$src), sub_16bit)>; 1729def: Pat<(i16 (zext GR8:$src)), 1730 (EXTRACT_SUBREG (MOVZX32rr8 GR8:$src), sub_16bit)>; 1731def: Pat<(zextloadi16i8 addr:$src), 1732 (EXTRACT_SUBREG (MOVZX32rm8 addr:$src), sub_16bit)>; 1733 1734// trunc patterns 1735def : Pat<(i16 (trunc GR32:$src)), 1736 (EXTRACT_SUBREG GR32:$src, sub_16bit)>; 1737def : Pat<(i8 (trunc GR32:$src)), 1738 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)), 1739 sub_8bit)>, 1740 Requires<[Not64BitMode]>; 1741def : Pat<(i8 (trunc GR16:$src)), 1742 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)), 1743 sub_8bit)>, 1744 Requires<[Not64BitMode]>; 1745def : Pat<(i32 (trunc GR64:$src)), 1746 (EXTRACT_SUBREG GR64:$src, sub_32bit)>; 1747def : Pat<(i16 (trunc GR64:$src)), 1748 (EXTRACT_SUBREG GR64:$src, sub_16bit)>; 1749def : Pat<(i8 (trunc GR64:$src)), 1750 (EXTRACT_SUBREG GR64:$src, sub_8bit)>; 1751def : Pat<(i8 (trunc GR32:$src)), 1752 (EXTRACT_SUBREG GR32:$src, sub_8bit)>, 1753 Requires<[In64BitMode]>; 1754def : Pat<(i8 (trunc GR16:$src)), 1755 (EXTRACT_SUBREG GR16:$src, sub_8bit)>, 1756 Requires<[In64BitMode]>; 1757 1758def immff00_ffff : ImmLeaf<i32, [{ 1759 return Imm >= 0xff00 && Imm <= 0xffff; 1760}]>; 1761 1762// h-register tricks 1763def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))), 1764 (EXTRACT_SUBREG GR16:$src, sub_8bit_hi)>, 1765 Requires<[Not64BitMode]>; 1766def : Pat<(i8 (trunc (srl_su (i32 (anyext GR16:$src)), (i8 8)))), 1767 (EXTRACT_SUBREG GR16:$src, sub_8bit_hi)>, 1768 Requires<[Not64BitMode]>; 1769def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))), 1770 (EXTRACT_SUBREG GR32:$src, sub_8bit_hi)>, 1771 Requires<[Not64BitMode]>; 1772def : Pat<(srl GR16:$src, (i8 8)), 1773 (EXTRACT_SUBREG 1774 (MOVZX32rr8_NOREX (EXTRACT_SUBREG GR16:$src, sub_8bit_hi)), 1775 sub_16bit)>; 1776def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))), 1777 (MOVZX32rr8_NOREX (EXTRACT_SUBREG GR16:$src, sub_8bit_hi))>; 1778def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))), 1779 (MOVZX32rr8_NOREX (EXTRACT_SUBREG GR16:$src, sub_8bit_hi))>; 1780def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)), 1781 (MOVZX32rr8_NOREX (EXTRACT_SUBREG GR32:$src, sub_8bit_hi))>; 1782def : Pat<(srl (and_su GR32:$src, immff00_ffff), (i8 8)), 1783 (MOVZX32rr8_NOREX (EXTRACT_SUBREG GR32:$src, sub_8bit_hi))>; 1784 1785// h-register tricks. 1786// For now, be conservative on x86-64 and use an h-register extract only if the 1787// value is immediately zero-extended or stored, which are somewhat common 1788// cases. This uses a bunch of code to prevent a register requiring a REX prefix 1789// from being allocated in the same instruction as the h register, as there's 1790// currently no way to describe this requirement to the register allocator. 1791 1792// h-register extract and zero-extend. 1793def : Pat<(and (srl_su GR64:$src, (i8 8)), (i64 255)), 1794 (SUBREG_TO_REG 1795 (i64 0), 1796 (MOVZX32rr8_NOREX 1797 (EXTRACT_SUBREG GR64:$src, sub_8bit_hi)), 1798 sub_32bit)>; 1799def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))), 1800 (SUBREG_TO_REG 1801 (i64 0), 1802 (MOVZX32rr8_NOREX 1803 (EXTRACT_SUBREG GR16:$src, sub_8bit_hi)), 1804 sub_32bit)>; 1805def : Pat<(i64 (anyext (srl_su GR16:$src, (i8 8)))), 1806 (SUBREG_TO_REG 1807 (i64 0), 1808 (MOVZX32rr8_NOREX 1809 (EXTRACT_SUBREG GR16:$src, sub_8bit_hi)), 1810 sub_32bit)>; 1811 1812// h-register extract and store. 1813def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst), 1814 (MOV8mr_NOREX 1815 addr:$dst, 1816 (EXTRACT_SUBREG GR64:$src, sub_8bit_hi))>; 1817def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst), 1818 (MOV8mr_NOREX 1819 addr:$dst, 1820 (EXTRACT_SUBREG GR32:$src, sub_8bit_hi))>, 1821 Requires<[In64BitMode]>; 1822def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst), 1823 (MOV8mr_NOREX 1824 addr:$dst, 1825 (EXTRACT_SUBREG GR16:$src, sub_8bit_hi))>, 1826 Requires<[In64BitMode]>; 1827 1828// Special pattern to catch the last step of __builtin_parity handling. Our 1829// goal is to use an xor of an h-register with the corresponding l-register. 1830// The above patterns would handle this on non 64-bit targets, but for 64-bit 1831// we need to be more careful. We're using a NOREX instruction here in case 1832// register allocation fails to keep the two registers together. So we need to 1833// make sure we can't accidentally mix R8-R15 with an h-register. 1834def : Pat<(X86xor_flag (i8 (trunc GR32:$src)), 1835 (i8 (trunc (srl_su GR32:$src, (i8 8))))), 1836 (XOR8rr_NOREX (EXTRACT_SUBREG GR32:$src, sub_8bit), 1837 (EXTRACT_SUBREG GR32:$src, sub_8bit_hi))>; 1838 1839// (shl x, 1) ==> (add x, x) 1840// Note that if x is undef (immediate or otherwise), we could theoretically 1841// end up with the two uses of x getting different values, producing a result 1842// where the least significant bit is not 0. However, the probability of this 1843// happening is considered low enough that this is officially not a 1844// "real problem". 1845let Predicates = [NoNDD] in { 1846 def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>; 1847 def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>; 1848 def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>; 1849 def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>; 1850} 1851let Predicates = [HasNDD] in { 1852 def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr_ND GR8 :$src1, GR8 :$src1)>; 1853 def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr_ND GR16:$src1, GR16:$src1)>; 1854 def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr_ND GR32:$src1, GR32:$src1)>; 1855 def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr_ND GR64:$src1, GR64:$src1)>; 1856} 1857 1858// Shift amount is implicitly masked. 1859multiclass MaskedShiftAmountPats<SDNode frag> { 1860 // (shift x (and y, 31)) ==> (shift x, y) 1861 // (shift x (and y, 63)) ==> (shift x, y) 1862 let Predicates = [NoNDD] in { 1863 def : Pat<(frag GR8:$src1, (shiftMask32 CL)), 1864 (!cast<Instruction>(NAME # "8rCL") GR8:$src1)>; 1865 def : Pat<(frag GR16:$src1, (shiftMask32 CL)), 1866 (!cast<Instruction>(NAME # "16rCL") GR16:$src1)>; 1867 def : Pat<(frag GR32:$src1, (shiftMask32 CL)), 1868 (!cast<Instruction>(NAME # "32rCL") GR32:$src1)>; 1869 def : Pat<(frag GR64:$src1, (shiftMask64 CL)), 1870 (!cast<Instruction>(NAME # "64rCL") GR64:$src1)>; 1871 } 1872 let Predicates = [HasNDD] in { 1873 def : Pat<(frag GR8:$src1, (shiftMask32 CL)), 1874 (!cast<Instruction>(NAME # "8rCL_ND") GR8:$src1)>; 1875 def : Pat<(frag GR16:$src1, (shiftMask32 CL)), 1876 (!cast<Instruction>(NAME # "16rCL_ND") GR16:$src1)>; 1877 def : Pat<(frag GR32:$src1, (shiftMask32 CL)), 1878 (!cast<Instruction>(NAME # "32rCL_ND") GR32:$src1)>; 1879 def : Pat<(frag GR64:$src1, (shiftMask64 CL)), 1880 (!cast<Instruction>(NAME # "64rCL_ND") GR64:$src1)>; 1881 } 1882 1883 def : Pat<(store (frag (loadi8 addr:$dst), (shiftMask32 CL)), addr:$dst), 1884 (!cast<Instruction>(NAME # "8mCL") addr:$dst)>; 1885 def : Pat<(store (frag (loadi16 addr:$dst), (shiftMask32 CL)), addr:$dst), 1886 (!cast<Instruction>(NAME # "16mCL") addr:$dst)>; 1887 def : Pat<(store (frag (loadi32 addr:$dst), (shiftMask32 CL)), addr:$dst), 1888 (!cast<Instruction>(NAME # "32mCL") addr:$dst)>; 1889 def : Pat<(store (frag (loadi64 addr:$dst), (shiftMask64 CL)), addr:$dst), 1890 (!cast<Instruction>(NAME # "64mCL") addr:$dst)>; 1891 1892 let Predicates = [HasNDD] in { 1893 def : Pat<(frag (loadi8 addr:$src), (shiftMask32 CL)), 1894 (!cast<Instruction>(NAME # "8mCL_ND") addr:$src)>; 1895 def : Pat<(frag (loadi16 addr:$src), (shiftMask32 CL)), 1896 (!cast<Instruction>(NAME # "16mCL_ND") addr:$src)>; 1897 def : Pat<(frag (loadi32 addr:$src), (shiftMask32 CL)), 1898 (!cast<Instruction>(NAME # "32mCL_ND") addr:$src)>; 1899 def : Pat<(frag (loadi64 addr:$src), (shiftMask64 CL)), 1900 (!cast<Instruction>(NAME # "64mCL_ND") addr:$src)>; 1901 } 1902} 1903 1904defm SHL : MaskedShiftAmountPats<shl>; 1905defm SHR : MaskedShiftAmountPats<srl>; 1906defm SAR : MaskedShiftAmountPats<sra>; 1907 1908// ROL/ROR instructions allow a stronger mask optimization than shift for 8- and 1909// 16-bit. We can remove a mask of any (bitwidth - 1) on the rotation amount 1910// because over-rotating produces the same result. This is noted in the Intel 1911// docs with: "tempCOUNT <- (COUNT & COUNTMASK) MOD SIZE". Masking the rotation 1912// amount could affect EFLAGS results, but that does not matter because we are 1913// not tracking flags for these nodes. 1914multiclass MaskedRotateAmountPats<SDNode frag> { 1915 // (rot x (and y, BitWidth - 1)) ==> (rot x, y) 1916 let Predicates = [NoNDD] in { 1917 def : Pat<(frag GR8:$src1, (shiftMask8 CL)), 1918 (!cast<Instruction>(NAME # "8rCL") GR8:$src1)>; 1919 def : Pat<(frag GR16:$src1, (shiftMask16 CL)), 1920 (!cast<Instruction>(NAME # "16rCL") GR16:$src1)>; 1921 def : Pat<(frag GR32:$src1, (shiftMask32 CL)), 1922 (!cast<Instruction>(NAME # "32rCL") GR32:$src1)>; 1923 def : Pat<(frag GR64:$src1, (shiftMask64 CL)), 1924 (!cast<Instruction>(NAME # "64rCL") GR64:$src1)>; 1925 } 1926 let Predicates = [HasNDD] in { 1927 def : Pat<(frag GR8:$src1, (shiftMask8 CL)), 1928 (!cast<Instruction>(NAME # "8rCL_ND") GR8:$src1)>; 1929 def : Pat<(frag GR16:$src1, (shiftMask16 CL)), 1930 (!cast<Instruction>(NAME # "16rCL_ND") GR16:$src1)>; 1931 def : Pat<(frag GR32:$src1, (shiftMask32 CL)), 1932 (!cast<Instruction>(NAME # "32rCL_ND") GR32:$src1)>; 1933 def : Pat<(frag GR64:$src1, (shiftMask64 CL)), 1934 (!cast<Instruction>(NAME # "64rCL_ND") GR64:$src1)>; 1935 } 1936 1937 def : Pat<(store (frag (loadi8 addr:$dst), (shiftMask8 CL)), addr:$dst), 1938 (!cast<Instruction>(NAME # "8mCL") addr:$dst)>; 1939 def : Pat<(store (frag (loadi16 addr:$dst), (shiftMask16 CL)), addr:$dst), 1940 (!cast<Instruction>(NAME # "16mCL") addr:$dst)>; 1941 def : Pat<(store (frag (loadi32 addr:$dst), (shiftMask32 CL)), addr:$dst), 1942 (!cast<Instruction>(NAME # "32mCL") addr:$dst)>; 1943 def : Pat<(store (frag (loadi64 addr:$dst), (shiftMask64 CL)), addr:$dst), 1944 (!cast<Instruction>(NAME # "64mCL") addr:$dst)>; 1945 1946 let Predicates = [HasNDD] in { 1947 def : Pat<(frag (loadi8 addr:$src), (shiftMask8 CL)), 1948 (!cast<Instruction>(NAME # "8mCL_ND") addr:$src)>; 1949 def : Pat<(frag (loadi16 addr:$src), (shiftMask16 CL)), 1950 (!cast<Instruction>(NAME # "16mCL_ND") addr:$src)>; 1951 def : Pat<(frag (loadi32 addr:$src), (shiftMask32 CL)), 1952 (!cast<Instruction>(NAME # "32mCL_ND") addr:$src)>; 1953 def : Pat<(frag (loadi64 addr:$src), (shiftMask64 CL)), 1954 (!cast<Instruction>(NAME # "64mCL_ND") addr:$src)>; 1955 } 1956} 1957 1958defm ROL : MaskedRotateAmountPats<rotl>; 1959defm ROR : MaskedRotateAmountPats<rotr>; 1960 1961multiclass MaskedShlrdAmountPats<string suffix, Predicate p> { 1962 let Predicates = [p] in { 1963 // Double "funnel" shift amount is implicitly masked. 1964 // (fshl/fshr x (and y, 31)) ==> (fshl/fshr x, y) (NOTE: modulo32) 1965 def : Pat<(X86fshl GR16:$src1, GR16:$src2, (shiftMask32 CL)), 1966 (!cast<Instruction>(SHLD16rrCL#suffix) GR16:$src1, GR16:$src2)>; 1967 def : Pat<(X86fshr GR16:$src2, GR16:$src1, (shiftMask32 CL)), 1968 (!cast<Instruction>(SHRD16rrCL#suffix) GR16:$src1, GR16:$src2)>; 1969 1970 // (fshl/fshr x (and y, 31)) ==> (fshl/fshr x, y) 1971 def : Pat<(fshl GR32:$src1, GR32:$src2, (shiftMask32 CL)), 1972 (!cast<Instruction>(SHLD32rrCL#suffix) GR32:$src1, GR32:$src2)>; 1973 def : Pat<(fshr GR32:$src2, GR32:$src1, (shiftMask32 CL)), 1974 (!cast<Instruction>(SHRD32rrCL#suffix) GR32:$src1, GR32:$src2)>; 1975 1976 // (fshl/fshr x (and y, 63)) ==> (fshl/fshr x, y) 1977 def : Pat<(fshl GR64:$src1, GR64:$src2, (shiftMask64 CL)), 1978 (!cast<Instruction>(SHLD64rrCL#suffix) GR64:$src1, GR64:$src2)>; 1979 def : Pat<(fshr GR64:$src2, GR64:$src1, (shiftMask64 CL)), 1980 (!cast<Instruction>(SHRD64rrCL#suffix) GR64:$src1, GR64:$src2)>; 1981 } 1982} 1983 1984defm : MaskedShlrdAmountPats<"", NoNDD>; 1985defm : MaskedShlrdAmountPats<"_ND", HasNDD>; 1986 1987// Use BTR/BTS/BTC for clearing/setting/toggling a bit in a variable location. 1988multiclass OneBitPats<RegisterClass rc, ValueType vt, Instruction btr, 1989 Instruction bts, Instruction btc, PatFrag mask> { 1990 def : Pat<(and rc:$src1, (rotl -2, GR8:$src2)), 1991 (btr rc:$src1, 1992 (INSERT_SUBREG (vt (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; 1993 def : Pat<(or rc:$src1, (shl 1, GR8:$src2)), 1994 (bts rc:$src1, 1995 (INSERT_SUBREG (vt (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; 1996 def : Pat<(xor rc:$src1, (shl 1, GR8:$src2)), 1997 (btc rc:$src1, 1998 (INSERT_SUBREG (vt (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; 1999 2000 // Similar to above, but removing unneeded masking of the shift amount. 2001 def : Pat<(and rc:$src1, (rotl -2, (mask GR8:$src2))), 2002 (btr rc:$src1, 2003 (INSERT_SUBREG (vt (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; 2004 def : Pat<(or rc:$src1, (shl 1, (mask GR8:$src2))), 2005 (bts rc:$src1, 2006 (INSERT_SUBREG (vt (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; 2007 def : Pat<(xor rc:$src1, (shl 1, (mask GR8:$src2))), 2008 (btc rc:$src1, 2009 (INSERT_SUBREG (vt (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>; 2010} 2011 2012defm : OneBitPats<GR16, i16, BTR16rr, BTS16rr, BTC16rr, shiftMask16>; 2013defm : OneBitPats<GR32, i32, BTR32rr, BTS32rr, BTC32rr, shiftMask32>; 2014defm : OneBitPats<GR64, i64, BTR64rr, BTS64rr, BTC64rr, shiftMask64>; 2015 2016//===----------------------------------------------------------------------===// 2017// EFLAGS-defining Patterns 2018//===----------------------------------------------------------------------===// 2019 2020multiclass EFLAGSDefiningPats<string suffix, Predicate p> { 2021 let Predicates = [p] in { 2022 // add reg, reg 2023 def : Pat<(add GR8 :$src1, GR8 :$src2), (!cast<Instruction>(ADD8rr#suffix) GR8 :$src1, GR8 :$src2)>; 2024 def : Pat<(add GR16:$src1, GR16:$src2), (!cast<Instruction>(ADD16rr#suffix) GR16:$src1, GR16:$src2)>; 2025 def : Pat<(add GR32:$src1, GR32:$src2), (!cast<Instruction>(ADD32rr#suffix) GR32:$src1, GR32:$src2)>; 2026 def : Pat<(add GR64:$src1, GR64:$src2), (!cast<Instruction>(ADD64rr#suffix) GR64:$src1, GR64:$src2)>; 2027 2028 // add reg, mem 2029 def : Pat<(add GR8:$src1, (loadi8 addr:$src2)), 2030 (!cast<Instruction>(ADD8rm#suffix) GR8:$src1, addr:$src2)>; 2031 def : Pat<(add GR16:$src1, (loadi16 addr:$src2)), 2032 (!cast<Instruction>(ADD16rm#suffix) GR16:$src1, addr:$src2)>; 2033 def : Pat<(add GR32:$src1, (loadi32 addr:$src2)), 2034 (!cast<Instruction>(ADD32rm#suffix) GR32:$src1, addr:$src2)>; 2035 def : Pat<(add GR64:$src1, (loadi64 addr:$src2)), 2036 (!cast<Instruction>(ADD64rm#suffix) GR64:$src1, addr:$src2)>; 2037 2038 // add reg, imm 2039 def : Pat<(add GR8 :$src1, imm:$src2), (!cast<Instruction>(ADD8ri#suffix) GR8:$src1 , imm:$src2)>; 2040 def : Pat<(add GR16:$src1, imm:$src2), (!cast<Instruction>(ADD16ri#suffix) GR16:$src1, imm:$src2)>; 2041 def : Pat<(add GR32:$src1, imm:$src2), (!cast<Instruction>(ADD32ri#suffix) GR32:$src1, imm:$src2)>; 2042 def : Pat<(add GR64:$src1, i64immSExt32:$src2), (!cast<Instruction>(ADD64ri32#suffix) GR64:$src1, i64immSExt32:$src2)>; 2043 2044 // sub reg, reg 2045 def : Pat<(sub GR8 :$src1, GR8 :$src2), (!cast<Instruction>(SUB8rr#suffix) GR8 :$src1, GR8 :$src2)>; 2046 def : Pat<(sub GR16:$src1, GR16:$src2), (!cast<Instruction>(SUB16rr#suffix) GR16:$src1, GR16:$src2)>; 2047 def : Pat<(sub GR32:$src1, GR32:$src2), (!cast<Instruction>(SUB32rr#suffix) GR32:$src1, GR32:$src2)>; 2048 def : Pat<(sub GR64:$src1, GR64:$src2), (!cast<Instruction>(SUB64rr#suffix) GR64:$src1, GR64:$src2)>; 2049 2050 // sub reg, mem 2051 def : Pat<(sub GR8:$src1, (loadi8 addr:$src2)), 2052 (!cast<Instruction>(SUB8rm#suffix) GR8:$src1, addr:$src2)>; 2053 def : Pat<(sub GR16:$src1, (loadi16 addr:$src2)), 2054 (!cast<Instruction>(SUB16rm#suffix) GR16:$src1, addr:$src2)>; 2055 def : Pat<(sub GR32:$src1, (loadi32 addr:$src2)), 2056 (!cast<Instruction>(SUB32rm#suffix) GR32:$src1, addr:$src2)>; 2057 def : Pat<(sub GR64:$src1, (loadi64 addr:$src2)), 2058 (!cast<Instruction>(SUB64rm#suffix) GR64:$src1, addr:$src2)>; 2059 2060 // sub reg, imm 2061 def : Pat<(sub GR8:$src1, imm:$src2), 2062 (!cast<Instruction>(SUB8ri#suffix) GR8:$src1, imm:$src2)>; 2063 def : Pat<(sub GR16:$src1, imm:$src2), 2064 (!cast<Instruction>(SUB16ri#suffix) GR16:$src1, imm:$src2)>; 2065 def : Pat<(sub GR32:$src1, imm:$src2), 2066 (!cast<Instruction>(SUB32ri#suffix) GR32:$src1, imm:$src2)>; 2067 def : Pat<(sub GR64:$src1, i64immSExt32:$src2), 2068 (!cast<Instruction>(SUB64ri32#suffix) GR64:$src1, i64immSExt32:$src2)>; 2069 2070 // sub 0, reg 2071 def : Pat<(X86sub_flag 0, GR8 :$src), (!cast<Instruction>(NEG8r#suffix) GR8 :$src)>; 2072 def : Pat<(X86sub_flag 0, GR16:$src), (!cast<Instruction>(NEG16r#suffix) GR16:$src)>; 2073 def : Pat<(X86sub_flag 0, GR32:$src), (!cast<Instruction>(NEG32r#suffix) GR32:$src)>; 2074 def : Pat<(X86sub_flag 0, GR64:$src), (!cast<Instruction>(NEG64r#suffix) GR64:$src)>; 2075 2076 // mul reg, reg 2077 def : Pat<(mul GR16:$src1, GR16:$src2), 2078 (!cast<Instruction>(IMUL16rr#suffix) GR16:$src1, GR16:$src2)>; 2079 def : Pat<(mul GR32:$src1, GR32:$src2), 2080 (!cast<Instruction>(IMUL32rr#suffix) GR32:$src1, GR32:$src2)>; 2081 def : Pat<(mul GR64:$src1, GR64:$src2), 2082 (!cast<Instruction>(IMUL64rr#suffix) GR64:$src1, GR64:$src2)>; 2083 2084 // mul reg, mem 2085 def : Pat<(mul GR16:$src1, (loadi16 addr:$src2)), 2086 (!cast<Instruction>(IMUL16rm#suffix) GR16:$src1, addr:$src2)>; 2087 def : Pat<(mul GR32:$src1, (loadi32 addr:$src2)), 2088 (!cast<Instruction>(IMUL32rm#suffix) GR32:$src1, addr:$src2)>; 2089 def : Pat<(mul GR64:$src1, (loadi64 addr:$src2)), 2090 (!cast<Instruction>(IMUL64rm#suffix) GR64:$src1, addr:$src2)>; 2091 2092 // or reg/reg. 2093 def : Pat<(or GR8 :$src1, GR8 :$src2), (!cast<Instruction>(OR8rr#suffix) GR8 :$src1, GR8 :$src2)>; 2094 def : Pat<(or GR16:$src1, GR16:$src2), (!cast<Instruction>(OR16rr#suffix) GR16:$src1, GR16:$src2)>; 2095 def : Pat<(or GR32:$src1, GR32:$src2), (!cast<Instruction>(OR32rr#suffix) GR32:$src1, GR32:$src2)>; 2096 def : Pat<(or GR64:$src1, GR64:$src2), (!cast<Instruction>(OR64rr#suffix) GR64:$src1, GR64:$src2)>; 2097 2098 // or reg/mem 2099 def : Pat<(or GR8:$src1, (loadi8 addr:$src2)), 2100 (!cast<Instruction>(OR8rm#suffix) GR8:$src1, addr:$src2)>; 2101 def : Pat<(or GR16:$src1, (loadi16 addr:$src2)), 2102 (!cast<Instruction>(OR16rm#suffix) GR16:$src1, addr:$src2)>; 2103 def : Pat<(or GR32:$src1, (loadi32 addr:$src2)), 2104 (!cast<Instruction>(OR32rm#suffix) GR32:$src1, addr:$src2)>; 2105 def : Pat<(or GR64:$src1, (loadi64 addr:$src2)), 2106 (!cast<Instruction>(OR64rm#suffix) GR64:$src1, addr:$src2)>; 2107 2108 // or reg/imm 2109 def : Pat<(or GR8:$src1 , imm:$src2), (!cast<Instruction>(OR8ri#suffix) GR8 :$src1, imm:$src2)>; 2110 def : Pat<(or GR16:$src1, imm:$src2), (!cast<Instruction>(OR16ri#suffix) GR16:$src1, imm:$src2)>; 2111 def : Pat<(or GR32:$src1, imm:$src2), (!cast<Instruction>(OR32ri#suffix) GR32:$src1, imm:$src2)>; 2112 def : Pat<(or GR64:$src1, i64immSExt32:$src2), 2113 (!cast<Instruction>(OR64ri32#suffix) GR64:$src1, i64immSExt32:$src2)>; 2114 2115 // xor reg/reg 2116 def : Pat<(xor GR8 :$src1, GR8 :$src2), (!cast<Instruction>(XOR8rr#suffix) GR8 :$src1, GR8 :$src2)>; 2117 def : Pat<(xor GR16:$src1, GR16:$src2), (!cast<Instruction>(XOR16rr#suffix) GR16:$src1, GR16:$src2)>; 2118 def : Pat<(xor GR32:$src1, GR32:$src2), (!cast<Instruction>(XOR32rr#suffix) GR32:$src1, GR32:$src2)>; 2119 def : Pat<(xor GR64:$src1, GR64:$src2), (!cast<Instruction>(XOR64rr#suffix) GR64:$src1, GR64:$src2)>; 2120 2121 // xor reg/mem 2122 def : Pat<(xor GR8:$src1, (loadi8 addr:$src2)), 2123 (!cast<Instruction>(XOR8rm#suffix) GR8:$src1, addr:$src2)>; 2124 def : Pat<(xor GR16:$src1, (loadi16 addr:$src2)), 2125 (!cast<Instruction>(XOR16rm#suffix) GR16:$src1, addr:$src2)>; 2126 def : Pat<(xor GR32:$src1, (loadi32 addr:$src2)), 2127 (!cast<Instruction>(XOR32rm#suffix) GR32:$src1, addr:$src2)>; 2128 def : Pat<(xor GR64:$src1, (loadi64 addr:$src2)), 2129 (!cast<Instruction>(XOR64rm#suffix) GR64:$src1, addr:$src2)>; 2130 2131 // xor reg/imm 2132 def : Pat<(xor GR8:$src1, imm:$src2), 2133 (!cast<Instruction>(XOR8ri#suffix) GR8:$src1, imm:$src2)>; 2134 def : Pat<(xor GR16:$src1, imm:$src2), 2135 (!cast<Instruction>(XOR16ri#suffix) GR16:$src1, imm:$src2)>; 2136 def : Pat<(xor GR32:$src1, imm:$src2), 2137 (!cast<Instruction>(XOR32ri#suffix) GR32:$src1, imm:$src2)>; 2138 def : Pat<(xor GR64:$src1, i64immSExt32:$src2), 2139 (!cast<Instruction>(XOR64ri32#suffix) GR64:$src1, i64immSExt32:$src2)>; 2140 2141 // and reg/reg 2142 def : Pat<(and GR8 :$src1, GR8 :$src2), (!cast<Instruction>(AND8rr#suffix) GR8 :$src1, GR8 :$src2)>; 2143 def : Pat<(and GR16:$src1, GR16:$src2), (!cast<Instruction>(AND16rr#suffix) GR16:$src1, GR16:$src2)>; 2144 def : Pat<(and GR32:$src1, GR32:$src2), (!cast<Instruction>(AND32rr#suffix) GR32:$src1, GR32:$src2)>; 2145 def : Pat<(and GR64:$src1, GR64:$src2), (!cast<Instruction>(AND64rr#suffix) GR64:$src1, GR64:$src2)>; 2146 2147 // and reg/mem 2148 def : Pat<(and GR8:$src1, (loadi8 addr:$src2)), 2149 (!cast<Instruction>(AND8rm#suffix) GR8:$src1, addr:$src2)>; 2150 def : Pat<(and GR16:$src1, (loadi16 addr:$src2)), 2151 (!cast<Instruction>(AND16rm#suffix) GR16:$src1, addr:$src2)>; 2152 def : Pat<(and GR32:$src1, (loadi32 addr:$src2)), 2153 (!cast<Instruction>(AND32rm#suffix) GR32:$src1, addr:$src2)>; 2154 def : Pat<(and GR64:$src1, (loadi64 addr:$src2)), 2155 (!cast<Instruction>(AND64rm#suffix) GR64:$src1, addr:$src2)>; 2156 2157 // and reg/imm 2158 def : Pat<(and GR8:$src1, imm:$src2), 2159 (!cast<Instruction>(AND8ri#suffix) GR8:$src1, imm:$src2)>; 2160 def : Pat<(and GR16:$src1, imm:$src2), 2161 (!cast<Instruction>(AND16ri#suffix) GR16:$src1, imm:$src2)>; 2162 def : Pat<(and GR32:$src1, imm:$src2), 2163 (!cast<Instruction>(AND32ri#suffix) GR32:$src1, imm:$src2)>; 2164 def : Pat<(and GR64:$src1, i64immSExt32:$src2), 2165 (!cast<Instruction>(AND64ri32#suffix) GR64:$src1, i64immSExt32:$src2)>; 2166 } 2167 2168 // Increment/Decrement reg. 2169 // Do not make INC/DEC if it is slow 2170 let Predicates = [UseIncDec, p] in { 2171 def : Pat<(add GR8:$src, 1), (!cast<Instruction>(INC8r#suffix) GR8:$src)>; 2172 def : Pat<(add GR16:$src, 1), (!cast<Instruction>(INC16r#suffix) GR16:$src)>; 2173 def : Pat<(add GR32:$src, 1), (!cast<Instruction>(INC32r#suffix) GR32:$src)>; 2174 def : Pat<(add GR64:$src, 1), (!cast<Instruction>(INC64r#suffix) GR64:$src)>; 2175 def : Pat<(add GR8:$src, -1), (!cast<Instruction>(DEC8r#suffix) GR8:$src)>; 2176 def : Pat<(add GR16:$src, -1), (!cast<Instruction>(DEC16r#suffix) GR16:$src)>; 2177 def : Pat<(add GR32:$src, -1), (!cast<Instruction>(DEC32r#suffix) GR32:$src)>; 2178 def : Pat<(add GR64:$src, -1), (!cast<Instruction>(DEC64r#suffix) GR64:$src)>; 2179 2180 def : Pat<(X86add_flag_nocf GR8:$src, -1), (!cast<Instruction>(DEC8r#suffix) GR8:$src)>; 2181 def : Pat<(X86add_flag_nocf GR16:$src, -1), (!cast<Instruction>(DEC16r#suffix) GR16:$src)>; 2182 def : Pat<(X86add_flag_nocf GR32:$src, -1), (!cast<Instruction>(DEC32r#suffix) GR32:$src)>; 2183 def : Pat<(X86add_flag_nocf GR64:$src, -1), (!cast<Instruction>(DEC64r#suffix) GR64:$src)>; 2184 def : Pat<(X86sub_flag_nocf GR8:$src, -1), (!cast<Instruction>(INC8r#suffix) GR8:$src)>; 2185 def : Pat<(X86sub_flag_nocf GR16:$src, -1), (!cast<Instruction>(INC16r#suffix) GR16:$src)>; 2186 def : Pat<(X86sub_flag_nocf GR32:$src, -1), (!cast<Instruction>(INC32r#suffix) GR32:$src)>; 2187 def : Pat<(X86sub_flag_nocf GR64:$src, -1), (!cast<Instruction>(INC64r#suffix) GR64:$src)>; 2188 2189 def : Pat<(or_is_add GR8:$src, 1), (!cast<Instruction>(INC8r#suffix) GR8:$src)>; 2190 def : Pat<(or_is_add GR16:$src, 1), (!cast<Instruction>(INC16r#suffix) GR16:$src)>; 2191 def : Pat<(or_is_add GR32:$src, 1), (!cast<Instruction>(INC32r#suffix) GR32:$src)>; 2192 def : Pat<(or_is_add GR64:$src, 1), (!cast<Instruction>(INC64r#suffix) GR64:$src)>; 2193 } 2194} 2195 2196defm : EFLAGSDefiningPats<"", NoNDD>; 2197defm : EFLAGSDefiningPats<"_ND", HasNDD>; 2198 2199let Predicates = [HasZU] in { 2200 // zext (mul reg/mem, imm) -> imulzu 2201 def : Pat<(i32 (zext (i16 (mul GR16:$src1, imm:$src2)))), 2202 (SUBREG_TO_REG (i32 0), (IMULZU16rri GR16:$src1, imm:$src2), sub_16bit)>; 2203 def : Pat<(i32 (zext (i16 (mul (loadi16 addr:$src1), imm:$src2)))), 2204 (SUBREG_TO_REG (i32 0), (IMULZU16rmi addr:$src1, imm:$src2), sub_16bit)>; 2205 def : Pat<(i64 (zext (i16 (mul GR16:$src1, imm:$src2)))), 2206 (SUBREG_TO_REG (i64 0), (IMULZU16rri GR16:$src1, imm:$src2), sub_16bit)>; 2207 def : Pat<(i64 (zext (i16 (mul (loadi16 addr:$src1), imm:$src2)))), 2208 (SUBREG_TO_REG (i64 0), (IMULZU16rmi addr:$src1, imm:$src2), sub_16bit)>; 2209} 2210 2211// mul reg, imm 2212def : Pat<(mul GR16:$src1, imm:$src2), 2213 (IMUL16rri GR16:$src1, imm:$src2)>; 2214def : Pat<(mul GR32:$src1, imm:$src2), 2215 (IMUL32rri GR32:$src1, imm:$src2)>; 2216def : Pat<(mul GR64:$src1, i64immSExt32:$src2), 2217 (IMUL64rri32 GR64:$src1, i64immSExt32:$src2)>; 2218 2219// reg = mul mem, imm 2220def : Pat<(mul (loadi16 addr:$src1), imm:$src2), 2221 (IMUL16rmi addr:$src1, imm:$src2)>; 2222def : Pat<(mul (loadi32 addr:$src1), imm:$src2), 2223 (IMUL32rmi addr:$src1, imm:$src2)>; 2224def : Pat<(mul (loadi64 addr:$src1), i64immSExt32:$src2), 2225 (IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>; 2226 2227// Bit scan instruction patterns to match explicit zero-undef behavior. 2228def : Pat<(cttz_zero_undef GR16:$src), (BSF16rr (i16 (IMPLICIT_DEF)), GR16:$src)>; 2229def : Pat<(cttz_zero_undef GR32:$src), (BSF32rr (i32 (IMPLICIT_DEF)), GR32:$src)>; 2230def : Pat<(cttz_zero_undef GR64:$src), (BSF64rr (i64 (IMPLICIT_DEF)), GR64:$src)>; 2231def : Pat<(cttz_zero_undef (loadi16 addr:$src)), (BSF16rm (i16 (IMPLICIT_DEF)), addr:$src)>; 2232def : Pat<(cttz_zero_undef (loadi32 addr:$src)), (BSF32rm (i32 (IMPLICIT_DEF)), addr:$src)>; 2233def : Pat<(cttz_zero_undef (loadi64 addr:$src)), (BSF64rm (i64 (IMPLICIT_DEF)), addr:$src)>; 2234 2235// When HasMOVBE is enabled it is possible to get a non-legalized 2236// register-register 16 bit bswap. This maps it to a ROL instruction. 2237let Predicates = [HasMOVBE] in { 2238 def : Pat<(bswap GR16:$src), (ROL16ri GR16:$src, (i8 8))>; 2239} 2240