xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86InstrCompiler.td (revision 700637cbb5e582861067a11aaca4d053546871d2)
1//===- X86InstrCompiler.td - Compiler Pseudos and Patterns -*- tablegen -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file describes the various pseudo instructions used by the compiler,
10// as well as Pat patterns used during instruction selection.
11//
12//===----------------------------------------------------------------------===//
13
14//===----------------------------------------------------------------------===//
15// Pattern Matching Support
16
17def GetLo32XForm : SDNodeXForm<imm, [{
18  // Transformation function: get the low 32 bits.
19  return getI32Imm((uint32_t)N->getZExtValue(), SDLoc(N));
20}]>;
21
22
23//===----------------------------------------------------------------------===//
24// Random Pseudo Instructions.
25
26// PIC base construction.  This expands to code that looks like this:
27//     call  $next_inst
28//     popl %destreg"
29let hasSideEffects = 0, isNotDuplicable = 1, Uses = [ESP, SSP],
30    SchedRW = [WriteJump] in
31  def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins i32imm:$label),
32                      "", []>;
33
34// ADJCALLSTACKDOWN/UP implicitly use/def ESP because they may be expanded into
35// a stack adjustment and the codegen must know that they may modify the stack
36// pointer before prolog-epilog rewriting occurs.
37// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
38// sub / add which can clobber EFLAGS.
39let Defs = [ESP, EFLAGS, SSP], Uses = [ESP, SSP], SchedRW = [WriteALU] in {
40def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs),
41                           (ins i32imm:$amt1, i32imm:$amt2, i32imm:$amt3),
42                           "#ADJCALLSTACKDOWN", []>, Requires<[NotLP64]>;
43def ADJCALLSTACKUP32   : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
44                           "#ADJCALLSTACKUP",
45                           [(X86callseq_end timm:$amt1, timm:$amt2)]>,
46                           Requires<[NotLP64]>;
47}
48def : Pat<(X86callseq_start timm:$amt1, timm:$amt2),
49       (ADJCALLSTACKDOWN32 i32imm:$amt1, i32imm:$amt2, 0)>, Requires<[NotLP64]>;
50
51
52// ADJCALLSTACKDOWN/UP implicitly use/def RSP because they may be expanded into
53// a stack adjustment and the codegen must know that they may modify the stack
54// pointer before prolog-epilog rewriting occurs.
55// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
56// sub / add which can clobber EFLAGS.
57let Defs = [RSP, EFLAGS, SSP], Uses = [RSP, SSP], SchedRW = [WriteALU] in {
58def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs),
59                           (ins i32imm:$amt1, i32imm:$amt2, i32imm:$amt3),
60                           "#ADJCALLSTACKDOWN", []>, Requires<[IsLP64]>;
61def ADJCALLSTACKUP64   : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
62                           "#ADJCALLSTACKUP",
63                           [(X86callseq_end timm:$amt1, timm:$amt2)]>,
64                           Requires<[IsLP64]>;
65}
66def : Pat<(X86callseq_start timm:$amt1, timm:$amt2),
67        (ADJCALLSTACKDOWN64 i32imm:$amt1, i32imm:$amt2, 0)>, Requires<[IsLP64]>;
68
69let SchedRW = [WriteSystem] in {
70
71// x86-64 va_start lowering magic.
72let hasSideEffects = 1, mayStore = 1, Defs = [EFLAGS] in {
73def VASTART_SAVE_XMM_REGS : I<0, Pseudo,
74                              (outs),
75                              (ins GR8:$al, i8mem:$regsavefi, variable_ops),
76                              "#VASTART_SAVE_XMM_REGS $al, $regsavefi",
77                              [(X86vastart_save_xmm_regs GR8:$al, addr:$regsavefi)]>;
78}
79
80let usesCustomInserter = 1, Defs = [EFLAGS] in {
81// The VAARG_64 and VAARG_X32 pseudo-instructions take the address of the
82// va_list, and place the address of the next argument into a register.
83let Defs = [EFLAGS] in {
84def VAARG_64 : I<0, Pseudo,
85                 (outs GR64:$dst),
86                 (ins i8mem:$ap, i32imm:$size, i8imm:$mode, i32imm:$align),
87                 "#VAARG_64 $dst, $ap, $size, $mode, $align",
88                 [(set GR64:$dst,
89                    (X86vaarg64 addr:$ap, timm:$size, timm:$mode, timm:$align))]>,
90               Requires<[In64BitMode, IsLP64]>;
91def VAARG_X32 : I<0, Pseudo,
92                 (outs GR32:$dst),
93                 (ins i8mem:$ap, i32imm:$size, i8imm:$mode, i32imm:$align),
94                 "#VAARG_X32 $dst, $ap, $size, $mode, $align",
95                 [(set GR32:$dst,
96                    (X86vaargx32 addr:$ap, timm:$size, timm:$mode, timm:$align))]>,
97                Requires<[In64BitMode, NotLP64]>;
98}
99
100// When using segmented stacks these are lowered into instructions which first
101// check if the current stacklet has enough free memory. If it does, memory is
102// allocated by bumping the stack pointer. Otherwise memory is allocated from
103// the heap.
104
105let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
106def SEG_ALLOCA_32 : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$size),
107                      "# variable sized alloca for segmented stacks",
108                      [(set GR32:$dst,
109                         (X86SegAlloca GR32:$size))]>,
110                    Requires<[NotLP64]>;
111
112let Defs = [RAX, RSP, EFLAGS], Uses = [RSP] in
113def SEG_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size),
114                      "# variable sized alloca for segmented stacks",
115                      [(set GR64:$dst,
116                         (X86SegAlloca GR64:$size))]>,
117                    Requires<[In64BitMode]>;
118
119// To protect against stack clash, dynamic allocation should perform a memory
120// probe at each page.
121
122let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
123def PROBED_ALLOCA_32 : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$size),
124                      "# variable sized alloca with probing",
125                      [(set GR32:$dst,
126                         (X86ProbedAlloca GR32:$size))]>,
127                    Requires<[NotLP64]>;
128
129let Defs = [RAX, RSP, EFLAGS], Uses = [RSP] in
130def PROBED_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size),
131                      "# variable sized alloca with probing",
132                      [(set GR64:$dst,
133                         (X86ProbedAlloca GR64:$size))]>,
134                    Requires<[In64BitMode]>;
135}
136
137let hasNoSchedulingInfo = 1 in
138def STACKALLOC_W_PROBING : I<0, Pseudo, (outs), (ins i64imm:$stacksize),
139                             "# fixed size alloca with probing",
140                             []>;
141
142// Dynamic stack allocation yields a _chkstk or _alloca call for all Windows
143// targets.  These calls are needed to probe the stack when allocating more than
144// 4k bytes in one go. Touching the stack at 4K increments is necessary to
145// ensure that the guard pages used by the OS virtual memory manager are
146// allocated in correct sequence.
147// The main point of having separate instruction are extra unmodelled effects
148// (compared to ordinary calls) like stack pointer change.
149
150let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
151def DYN_ALLOCA_32 : I<0, Pseudo, (outs), (ins GR32:$size),
152                     "# dynamic stack allocation",
153                     [(X86DynAlloca GR32:$size)]>,
154                     Requires<[NotLP64]>;
155
156let Defs = [RAX, RSP, EFLAGS], Uses = [RSP] in
157def DYN_ALLOCA_64 : I<0, Pseudo, (outs), (ins GR64:$size),
158                     "# dynamic stack allocation",
159                     [(X86DynAlloca GR64:$size)]>,
160                     Requires<[In64BitMode]>;
161} // SchedRW
162
163// These instructions XOR the frame pointer into a GPR. They are used in some
164// stack protection schemes. These are post-RA pseudos because we only know the
165// frame register after register allocation.
166let Constraints = "$src = $dst", isMoveImm = 1, isPseudo = 1, Defs = [EFLAGS] in {
167  def XOR32_FP : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src),
168                  "xorl\t$$FP, $src", []>,
169                  Requires<[NotLP64]>, Sched<[WriteALU]>;
170  def XOR64_FP : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$src),
171                  "xorq\t$$FP $src", []>,
172                  Requires<[In64BitMode]>, Sched<[WriteALU]>;
173}
174
175//===----------------------------------------------------------------------===//
176// EH Pseudo Instructions
177//
178let SchedRW = [WriteSystem] in {
179let isTerminator = 1, isReturn = 1, isBarrier = 1,
180    hasCtrlDep = 1, isCodeGenOnly = 1 in {
181def EH_RETURN   : I<0xC3, RawFrm, (outs), (ins GR32:$addr),
182                    "ret\t#eh_return, addr: $addr",
183                    [(X86ehret GR32:$addr)]>, Sched<[WriteJumpLd]>;
184
185}
186
187let isTerminator = 1, isReturn = 1, isBarrier = 1,
188    hasCtrlDep = 1, isCodeGenOnly = 1 in {
189def EH_RETURN64   : I<0xC3, RawFrm, (outs), (ins GR64:$addr),
190                     "ret\t#eh_return, addr: $addr",
191                     [(X86ehret GR64:$addr)]>, Sched<[WriteJumpLd]>;
192
193}
194
195let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1,
196    isCodeGenOnly = 1, isReturn = 1, isEHScopeReturn = 1 in {
197  def CLEANUPRET : I<0, Pseudo, (outs), (ins), "# CLEANUPRET",
198                     [(cleanupret bb)]>;
199
200  // CATCHRET needs a custom inserter for SEH.
201  let usesCustomInserter = 1 in
202    def CATCHRET : I<0, Pseudo, (outs), (ins brtarget32:$dst, brtarget32:$from),
203                     "# CATCHRET",
204                     [(catchret bb:$dst, bb:$from)]>;
205}
206
207let hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1,
208    usesCustomInserter = 1 in {
209  def EH_SjLj_SetJmp32  : I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$buf),
210                            "#EH_SJLJ_SETJMP32",
211                            [(set GR32:$dst, (X86eh_sjlj_setjmp addr:$buf))]>,
212                          Requires<[Not64BitMode]>;
213  def EH_SjLj_SetJmp64  : I<0, Pseudo, (outs GR32:$dst), (ins i64mem:$buf),
214                            "#EH_SJLJ_SETJMP64",
215                            [(set GR32:$dst, (X86eh_sjlj_setjmp addr:$buf))]>,
216                          Requires<[In64BitMode]>;
217  let isTerminator = 1 in {
218  def EH_SjLj_LongJmp32 : I<0, Pseudo, (outs), (ins i32mem:$buf),
219                            "#EH_SJLJ_LONGJMP32",
220                            [(X86eh_sjlj_longjmp addr:$buf)]>,
221                          Requires<[Not64BitMode]>;
222  def EH_SjLj_LongJmp64 : I<0, Pseudo, (outs), (ins i64mem:$buf),
223                            "#EH_SJLJ_LONGJMP64",
224                            [(X86eh_sjlj_longjmp addr:$buf)]>,
225                          Requires<[In64BitMode]>;
226  }
227}
228
229let isBranch = 1, isTerminator = 1, isCodeGenOnly = 1 in {
230  def EH_SjLj_Setup : I<0, Pseudo, (outs), (ins brtarget:$dst),
231                        "#EH_SjLj_Setup\t$dst", []>;
232}
233} // SchedRW
234
235//===----------------------------------------------------------------------===//
236// Pseudo instructions used by unwind info.
237//
238
239// Prolog instructions should not be duplicated, since this can cause issues
240// because 1) if only some of the instructions are duplicated, then we will
241// observe prolog instructions after the end-prolog instruction and 2) Windows
242// expects there to only be a single prolog (e.g., when checking if unwinding
243// is happening in the middle of a prolog).
244let isPseudo = 1, isMeta = 1, isNotDuplicable = 1, SchedRW = [WriteSystem] in {
245  def SEH_PushReg : I<0, Pseudo, (outs), (ins i32imm:$reg),
246                            "#SEH_PushReg $reg", []>;
247  def SEH_SaveReg : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$dst),
248                            "#SEH_SaveReg $reg, $dst", []>;
249  def SEH_SaveXMM : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$dst),
250                            "#SEH_SaveXMM $reg, $dst", []>;
251  def SEH_StackAlloc : I<0, Pseudo, (outs), (ins i32imm:$size),
252                            "#SEH_StackAlloc $size", []>;
253  def SEH_StackAlign : I<0, Pseudo, (outs), (ins i32imm:$align),
254                            "#SEH_StackAlign $align", []>;
255  def SEH_SetFrame : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$offset),
256                            "#SEH_SetFrame $reg, $offset", []>;
257  def SEH_PushFrame : I<0, Pseudo, (outs), (ins i1imm:$mode),
258                            "#SEH_PushFrame $mode", []>;
259  def SEH_EndPrologue : I<0, Pseudo, (outs), (ins),
260                            "#SEH_EndPrologue", []>;
261  def SEH_UnwindVersion : I<0, Pseudo, (outs), (ins i1imm:$version),
262                            "#SEH_UnwindVersion $version", []>;
263}
264
265// Epilog instructions:
266let isPseudo = 1, isMeta = 1, SchedRW = [WriteSystem] in {
267  def SEH_BeginEpilogue : I<0, Pseudo, (outs), (ins),
268                            "#SEH_BeginEpilogue", []>;
269  def SEH_EndEpilogue : I<0, Pseudo, (outs), (ins),
270                            "#SEH_EndEpilogue", []>;
271  def SEH_UnwindV2Start : I<0, Pseudo, (outs), (ins),
272                            "#SEH_UnwindV2Start", []>;
273}
274
275//===----------------------------------------------------------------------===//
276// Pseudo instructions used by KCFI.
277//===----------------------------------------------------------------------===//
278let
279  Defs = [R10, R11, EFLAGS] in {
280def KCFI_CHECK : PseudoI<
281  (outs), (ins GR64:$ptr, i32imm:$type), []>, Sched<[]>;
282}
283
284//===----------------------------------------------------------------------===//
285// Pseudo instructions used by address sanitizer.
286//===----------------------------------------------------------------------===//
287let
288  Defs = [R10, R11, EFLAGS] in {
289def ASAN_CHECK_MEMACCESS : PseudoI<
290  (outs), (ins GR64PLTSafe:$addr, i32imm:$accessinfo),
291  [(int_asan_check_memaccess GR64PLTSafe:$addr, (i32 timm:$accessinfo))]>,
292  Sched<[]>;
293}
294
295//===----------------------------------------------------------------------===//
296// Pseudo instructions used by segmented stacks.
297//
298
299// This is lowered into a RET instruction by MCInstLower.  We need
300// this so that we don't have to have a MachineBasicBlock which ends
301// with a RET and also has successors.
302let isPseudo = 1, SchedRW = [WriteJumpLd] in {
303def MORESTACK_RET: I<0, Pseudo, (outs), (ins), "", []>;
304
305// This instruction is lowered to a RET followed by a MOV.  The two
306// instructions are not generated on a higher level since then the
307// verifier sees a MachineBasicBlock ending with a non-terminator.
308def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins), "", []>;
309}
310
311//===----------------------------------------------------------------------===//
312// Alias Instructions
313//===----------------------------------------------------------------------===//
314
315// Alias instruction mapping movr0 to xor.
316// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
317let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
318    isPseudo = 1, isMoveImm = 1, AddedComplexity = 10 in
319def MOV32r0  : I<0, Pseudo, (outs GR32:$dst), (ins), "",
320                 [(set GR32:$dst, 0)]>, Sched<[WriteZero]>;
321
322// Other widths can also make use of the 32-bit xor, which may have a smaller
323// encoding and avoid partial register updates.
324let AddedComplexity = 10 in {
325def : Pat<(i8 0), (EXTRACT_SUBREG (MOV32r0), sub_8bit)>;
326def : Pat<(i16 0), (EXTRACT_SUBREG (MOV32r0), sub_16bit)>;
327def : Pat<(i64 0), (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit)>;
328}
329
330let Predicates = [OptForSize, Not64BitMode],
331    AddedComplexity = 10 in {
332  let SchedRW = [WriteALU] in {
333  // Pseudo instructions for materializing 1 and -1 using XOR+INC/DEC,
334  // which only require 3 bytes compared to MOV32ri which requires 5.
335  let Defs = [EFLAGS], isReMaterializable = 1, isPseudo = 1 in {
336    def MOV32r1 : I<0, Pseudo, (outs GR32:$dst), (ins), "",
337                        [(set GR32:$dst, 1)]>;
338    def MOV32r_1 : I<0, Pseudo, (outs GR32:$dst), (ins), "",
339                        [(set GR32:$dst, -1)]>;
340  }
341  } // SchedRW
342
343  // MOV16ri is 4 bytes, so the instructions above are smaller.
344  def : Pat<(i16 1), (EXTRACT_SUBREG (MOV32r1), sub_16bit)>;
345  def : Pat<(i16 -1), (EXTRACT_SUBREG (MOV32r_1), sub_16bit)>;
346}
347
348let isReMaterializable = 1, isPseudo = 1, AddedComplexity = 5,
349    SchedRW = [WriteALU] in {
350// AddedComplexity higher than MOV64ri but lower than MOV32r0 and MOV32r1.
351def MOV32ImmSExti8 : I<0, Pseudo, (outs GR32:$dst), (ins i32i8imm:$src), "",
352                       [(set GR32:$dst, i32immSExt8:$src)]>,
353                       Requires<[OptForMinSize, NotWin64WithoutFP]>;
354def MOV64ImmSExti8 : I<0, Pseudo, (outs GR64:$dst), (ins i64i8imm:$src), "",
355                       [(set GR64:$dst, i64immSExt8:$src)]>,
356                       Requires<[OptForMinSize, NotWin64WithoutFP]>;
357}
358
359// Materialize i64 constant where top 32-bits are zero. This could theoretically
360// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
361// that would make it more difficult to rematerialize.
362let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1,
363    isPseudo = 1, SchedRW = [WriteMove] in
364def MOV32ri64 : I<0, Pseudo, (outs GR64:$dst), (ins i64i32imm:$src), "",
365                  [(set GR64:$dst, i64immZExt32:$src)]>;
366
367// This 64-bit pseudo-move can also be used for labels in the x86-64 small code
368// model.
369def mov64imm32 : ComplexPattern<i64, 1, "selectMOV64Imm32", [X86Wrapper]>;
370def : Pat<(i64 mov64imm32:$src), (MOV32ri64 mov64imm32:$src)>;
371
372// Use sbb to materialize carry bit.
373let Uses = [EFLAGS], Defs = [EFLAGS], isPseudo = 1, SchedRW = [WriteADC],
374    hasSideEffects = 0 in {
375// FIXME: These are pseudo ops that should be replaced with Pat<> patterns.
376// However, Pat<> can't replicate the destination reg into the inputs of the
377// result.
378def SETB_C32r : I<0, Pseudo, (outs GR32:$dst), (ins), "", []>;
379def SETB_C64r : I<0, Pseudo, (outs GR64:$dst), (ins), "", []>;
380} // isCodeGenOnly
381
382//===----------------------------------------------------------------------===//
383// String Pseudo Instructions
384//
385let SchedRW = [WriteMicrocoded] in {
386let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in {
387def REP_MOVSB_32 : I<0xA4, RawFrm, (outs), (ins),
388                    "{rep;movsb (%esi), %es:(%edi)|rep movsb es:[edi], [esi]}",
389                    [(X86rep_movs i8)]>, REP, AdSize32,
390                   Requires<[NotLP64]>;
391def REP_MOVSW_32 : I<0xA5, RawFrm, (outs), (ins),
392                    "{rep;movsw (%esi), %es:(%edi)|rep movsw es:[edi], [esi]}",
393                    [(X86rep_movs i16)]>, REP, AdSize32, OpSize16,
394                   Requires<[NotLP64]>;
395def REP_MOVSD_32 : I<0xA5, RawFrm, (outs), (ins),
396                    "{rep;movsl (%esi), %es:(%edi)|rep movsd es:[edi], [esi]}",
397                    [(X86rep_movs i32)]>, REP, AdSize32, OpSize32,
398                   Requires<[NotLP64]>;
399def REP_MOVSQ_32 : RI<0xA5, RawFrm, (outs), (ins),
400                    "{rep;movsq (%esi), %es:(%edi)|rep movsq es:[edi], [esi]}",
401                    [(X86rep_movs i64)]>, REP, AdSize32,
402                   Requires<[NotLP64, In64BitMode]>;
403}
404
405let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in {
406def REP_MOVSB_64 : I<0xA4, RawFrm, (outs), (ins),
407                    "{rep;movsb (%rsi), %es:(%rdi)|rep movsb es:[rdi], [rsi]}",
408                    [(X86rep_movs i8)]>, REP, AdSize64,
409                   Requires<[IsLP64]>;
410def REP_MOVSW_64 : I<0xA5, RawFrm, (outs), (ins),
411                    "{rep;movsw (%rsi), %es:(%rdi)|rep movsw es:[rdi], [rsi]}",
412                    [(X86rep_movs i16)]>, REP, AdSize64, OpSize16,
413                   Requires<[IsLP64]>;
414def REP_MOVSD_64 : I<0xA5, RawFrm, (outs), (ins),
415                    "{rep;movsl (%rsi), %es:(%rdi)|rep movsdi es:[rdi], [rsi]}",
416                    [(X86rep_movs i32)]>, REP, AdSize64, OpSize32,
417                   Requires<[IsLP64]>;
418def REP_MOVSQ_64 : RI<0xA5, RawFrm, (outs), (ins),
419                    "{rep;movsq (%rsi), %es:(%rdi)|rep movsq es:[rdi], [rsi]}",
420                    [(X86rep_movs i64)]>, REP, AdSize64,
421                   Requires<[IsLP64]>;
422}
423
424// FIXME: Should use "(X86rep_stos AL)" as the pattern.
425let Defs = [ECX,EDI], isCodeGenOnly = 1 in {
426  let Uses = [AL,ECX,EDI] in
427  def REP_STOSB_32 : I<0xAA, RawFrm, (outs), (ins),
428                       "{rep;stosb %al, %es:(%edi)|rep stosb es:[edi], al}",
429                      [(X86rep_stos i8)]>, REP, AdSize32,
430                     Requires<[NotLP64]>;
431  let Uses = [AX,ECX,EDI] in
432  def REP_STOSW_32 : I<0xAB, RawFrm, (outs), (ins),
433                      "{rep;stosw %ax, %es:(%edi)|rep stosw es:[edi], ax}",
434                      [(X86rep_stos i16)]>, REP, AdSize32, OpSize16,
435                     Requires<[NotLP64]>;
436  let Uses = [EAX,ECX,EDI] in
437  def REP_STOSD_32 : I<0xAB, RawFrm, (outs), (ins),
438                      "{rep;stosl %eax, %es:(%edi)|rep stosd es:[edi], eax}",
439                      [(X86rep_stos i32)]>, REP, AdSize32, OpSize32,
440                     Requires<[NotLP64]>;
441  let Uses = [RAX,RCX,RDI] in
442  def REP_STOSQ_32 : RI<0xAB, RawFrm, (outs), (ins),
443                        "{rep;stosq %rax, %es:(%edi)|rep stosq es:[edi], rax}",
444                        [(X86rep_stos i64)]>, REP, AdSize32,
445                        Requires<[NotLP64, In64BitMode]>;
446}
447
448let Defs = [RCX,RDI], isCodeGenOnly = 1 in {
449  let Uses = [AL,RCX,RDI] in
450  def REP_STOSB_64 : I<0xAA, RawFrm, (outs), (ins),
451                       "{rep;stosb %al, %es:(%rdi)|rep stosb es:[rdi], al}",
452                       [(X86rep_stos i8)]>, REP, AdSize64,
453                       Requires<[IsLP64]>;
454  let Uses = [AX,RCX,RDI] in
455  def REP_STOSW_64 : I<0xAB, RawFrm, (outs), (ins),
456                       "{rep;stosw %ax, %es:(%rdi)|rep stosw es:[rdi], ax}",
457                       [(X86rep_stos i16)]>, REP, AdSize64, OpSize16,
458                       Requires<[IsLP64]>;
459  let Uses = [RAX,RCX,RDI] in
460  def REP_STOSD_64 : I<0xAB, RawFrm, (outs), (ins),
461                      "{rep;stosl %eax, %es:(%rdi)|rep stosd es:[rdi], eax}",
462                       [(X86rep_stos i32)]>, REP, AdSize64, OpSize32,
463                       Requires<[IsLP64]>;
464
465  let Uses = [RAX,RCX,RDI] in
466  def REP_STOSQ_64 : RI<0xAB, RawFrm, (outs), (ins),
467                        "{rep;stosq %rax, %es:(%rdi)|rep stosq es:[rdi], rax}",
468                        [(X86rep_stos i64)]>, REP, AdSize64,
469                        Requires<[IsLP64]>;
470}
471} // SchedRW
472
473//===----------------------------------------------------------------------===//
474// Thread Local Storage Instructions
475//
476let SchedRW = [WriteSystem] in {
477
478// ELF TLS Support
479// All calls clobber the non-callee saved registers. ESP is marked as
480// a use to prevent stack-pointer assignments that appear immediately
481// before calls from potentially appearing dead.
482let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, FP7,
483            ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7,
484            MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
485            XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
486            XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS, DF],
487    Uses = [ESP, SSP] in {
488def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
489                  "# TLS_addr32",
490                  [(X86tlsaddr tls32addr:$sym)]>,
491                  Requires<[Not64BitMode]>;
492def TLS_base_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
493                  "# TLS_base_addr32",
494                  [(X86tlsbaseaddr tls32baseaddr:$sym)]>,
495                  Requires<[Not64BitMode]>;
496}
497
498// All calls clobber the non-callee saved registers. RSP is marked as
499// a use to prevent stack-pointer assignments that appear immediately
500// before calls from potentially appearing dead.
501let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
502            FP0, FP1, FP2, FP3, FP4, FP5, FP6, FP7,
503            ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7,
504            MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
505            XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
506            XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS, DF],
507    Uses = [RSP, SSP] in {
508def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
509                   "# TLS_addr64",
510                  [(X86tlsaddr tls64addr:$sym)]>,
511                  Requires<[In64BitMode, IsLP64]>;
512def TLS_base_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
513                   "# TLS_base_addr64",
514                  [(X86tlsbaseaddr tls64baseaddr:$sym)]>,
515                  Requires<[In64BitMode, IsLP64]>;
516def TLS_addrX32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
517                   "# TLS_addrX32",
518                  [(X86tlsaddr tls32addr:$sym)]>,
519                  Requires<[In64BitMode, NotLP64]>;
520def TLS_base_addrX32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
521                   "# TLS_base_addrX32",
522                  [(X86tlsbaseaddr tls32baseaddr:$sym)]>,
523                  Requires<[In64BitMode, NotLP64]>;
524}
525
526// TLSDESC only clobbers EAX and EFLAGS. ESP is marked as a use to prevent
527// stack-pointer assignments that appear immediately before calls from
528// potentially appearing dead.
529let Defs = [EAX, EFLAGS], Uses = [RSP, SSP] in {
530  def TLS_desc32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
531                     "# TLS_desc32", [(X86tlsdesc tls32addr:$sym)]>;
532  def TLS_desc64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
533                     "# TLS_desc64", [(X86tlsdesc tls64addr:$sym)]>;
534}
535
536// Darwin TLS Support
537// For i386, the address of the thunk is passed on the stack, on return the
538// address of the variable is in %eax.  %ecx is trashed during the function
539// call.  All other registers are preserved.
540let Defs = [EAX, ECX, EFLAGS, DF],
541    Uses = [ESP, SSP],
542    usesCustomInserter = 1 in
543def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
544                "# TLSCall_32",
545                [(X86TLSCall addr:$sym)]>,
546                Requires<[Not64BitMode]>;
547
548// For x86_64, the address of the thunk is passed in %rdi, but the
549// pseudo directly use the symbol, so do not add an implicit use of
550// %rdi. The lowering will do the right thing with RDI.
551// On return the address of the variable is in %rax.  All other
552// registers are preserved.
553let Defs = [RAX, EFLAGS, DF],
554    Uses = [RSP, SSP],
555    usesCustomInserter = 1 in
556def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
557                  "# TLSCall_64",
558                  [(X86TLSCall addr:$sym)]>,
559                  Requires<[In64BitMode]>;
560} // SchedRW
561
562//===----------------------------------------------------------------------===//
563// Conditional Move Pseudo Instructions
564
565// CMOV* - Used to implement the SELECT DAG operation.  Expanded after
566// instruction selection into a branch sequence.
567multiclass CMOVrr_PSEUDO<RegisterClass RC, ValueType VT> {
568  def CMOV#NAME  : I<0, Pseudo,
569                    (outs RC:$dst), (ins RC:$t, RC:$f, i8imm:$cond),
570                    "#CMOV_"#NAME#" PSEUDO!",
571                    [(set RC:$dst, (VT (X86cmov RC:$t, RC:$f, timm:$cond,
572                                                EFLAGS)))]>;
573}
574
575let usesCustomInserter = 1, hasNoSchedulingInfo = 1, Uses = [EFLAGS] in {
576  // X86 doesn't have 8-bit conditional moves. Use a customInserter to
577  // emit control flow. An alternative to this is to mark i8 SELECT as Promote,
578  // however that requires promoting the operands, and can induce additional
579  // i8 register pressure.
580  defm _GR8 : CMOVrr_PSEUDO<GR8, i8>;
581
582  let Predicates = [NoCMOV] in {
583    defm _GR32 : CMOVrr_PSEUDO<GR32, i32>;
584    defm _GR16 : CMOVrr_PSEUDO<GR16, i16>;
585  } // Predicates = [NoCMOV]
586
587  // fcmov doesn't handle all possible EFLAGS, provide a fallback if there is no
588  // SSE1/SSE2.
589  let Predicates = [FPStackf32] in
590    defm _RFP32 : CMOVrr_PSEUDO<RFP32, f32>;
591
592  let Predicates = [FPStackf64] in
593    defm _RFP64 : CMOVrr_PSEUDO<RFP64, f64>;
594
595  defm _RFP80 : CMOVrr_PSEUDO<RFP80, f80>;
596
597  let Predicates = [HasMMX] in
598    defm _VR64   : CMOVrr_PSEUDO<VR64, x86mmx>;
599
600  let Predicates = [HasSSE1,NoAVX512] in
601    defm _FR32   : CMOVrr_PSEUDO<FR32, f32>;
602  let Predicates = [HasSSE2,NoAVX512] in {
603    defm _FR16   : CMOVrr_PSEUDO<FR16, f16>;
604    defm _FR64   : CMOVrr_PSEUDO<FR64, f64>;
605  }
606  let Predicates = [HasAVX512] in {
607    defm _FR16X  : CMOVrr_PSEUDO<FR16X, f16>;
608    defm _FR32X  : CMOVrr_PSEUDO<FR32X, f32>;
609    defm _FR64X  : CMOVrr_PSEUDO<FR64X, f64>;
610  }
611  let Predicates = [NoVLX] in {
612    defm _VR128  : CMOVrr_PSEUDO<VR128, v2i64>;
613    defm _VR256  : CMOVrr_PSEUDO<VR256, v4i64>;
614  }
615  let Predicates = [HasVLX] in {
616    defm _VR128X : CMOVrr_PSEUDO<VR128X, v2i64>;
617    defm _VR256X : CMOVrr_PSEUDO<VR256X, v4i64>;
618  }
619  defm _VR512  : CMOVrr_PSEUDO<VR512, v8i64>;
620  defm _VK1    : CMOVrr_PSEUDO<VK1,  v1i1>;
621  defm _VK2    : CMOVrr_PSEUDO<VK2,  v2i1>;
622  defm _VK4    : CMOVrr_PSEUDO<VK4,  v4i1>;
623  defm _VK8    : CMOVrr_PSEUDO<VK8,  v8i1>;
624  defm _VK16   : CMOVrr_PSEUDO<VK16, v16i1>;
625  defm _VK32   : CMOVrr_PSEUDO<VK32, v32i1>;
626  defm _VK64   : CMOVrr_PSEUDO<VK64, v64i1>;
627} // usesCustomInserter = 1, hasNoSchedulingInfo = 1, Uses = [EFLAGS]
628
629def : Pat<(f128 (X86cmov VR128:$t, VR128:$f, timm:$cond, EFLAGS)),
630          (CMOV_VR128 VR128:$t, VR128:$f, timm:$cond)>;
631
632let Predicates = [NoVLX] in {
633  def : Pat<(v16i8 (X86cmov VR128:$t, VR128:$f, timm:$cond, EFLAGS)),
634            (CMOV_VR128 VR128:$t, VR128:$f, timm:$cond)>;
635  def : Pat<(v8i16 (X86cmov VR128:$t, VR128:$f, timm:$cond, EFLAGS)),
636            (CMOV_VR128 VR128:$t, VR128:$f, timm:$cond)>;
637  def : Pat<(v4i32 (X86cmov VR128:$t, VR128:$f, timm:$cond, EFLAGS)),
638            (CMOV_VR128 VR128:$t, VR128:$f, timm:$cond)>;
639  def : Pat<(v4f32 (X86cmov VR128:$t, VR128:$f, timm:$cond, EFLAGS)),
640            (CMOV_VR128 VR128:$t, VR128:$f, timm:$cond)>;
641  def : Pat<(v2f64 (X86cmov VR128:$t, VR128:$f, timm:$cond, EFLAGS)),
642            (CMOV_VR128 VR128:$t, VR128:$f, timm:$cond)>;
643
644  def : Pat<(v32i8 (X86cmov VR256:$t, VR256:$f, timm:$cond, EFLAGS)),
645            (CMOV_VR256 VR256:$t, VR256:$f, timm:$cond)>;
646  def : Pat<(v16i16 (X86cmov VR256:$t, VR256:$f, timm:$cond, EFLAGS)),
647            (CMOV_VR256 VR256:$t, VR256:$f, timm:$cond)>;
648  def : Pat<(v8i32 (X86cmov VR256:$t, VR256:$f, timm:$cond, EFLAGS)),
649            (CMOV_VR256 VR256:$t, VR256:$f, timm:$cond)>;
650  def : Pat<(v8f32 (X86cmov VR256:$t, VR256:$f, timm:$cond, EFLAGS)),
651            (CMOV_VR256 VR256:$t, VR256:$f, timm:$cond)>;
652  def : Pat<(v4f64 (X86cmov VR256:$t, VR256:$f, timm:$cond, EFLAGS)),
653            (CMOV_VR256 VR256:$t, VR256:$f, timm:$cond)>;
654}
655let Predicates = [HasVLX] in {
656  def : Pat<(v16i8 (X86cmov VR128X:$t, VR128X:$f, timm:$cond, EFLAGS)),
657            (CMOV_VR128X VR128X:$t, VR128X:$f, timm:$cond)>;
658  def : Pat<(v8i16 (X86cmov VR128X:$t, VR128X:$f, timm:$cond, EFLAGS)),
659            (CMOV_VR128X VR128X:$t, VR128X:$f, timm:$cond)>;
660  def : Pat<(v8f16 (X86cmov VR128X:$t, VR128X:$f, timm:$cond, EFLAGS)),
661            (CMOV_VR128X VR128X:$t, VR128X:$f, timm:$cond)>;
662  def : Pat<(v4i32 (X86cmov VR128X:$t, VR128X:$f, timm:$cond, EFLAGS)),
663            (CMOV_VR128X VR128X:$t, VR128X:$f, timm:$cond)>;
664  def : Pat<(v4f32 (X86cmov VR128X:$t, VR128X:$f, timm:$cond, EFLAGS)),
665            (CMOV_VR128X VR128X:$t, VR128X:$f, timm:$cond)>;
666  def : Pat<(v2f64 (X86cmov VR128X:$t, VR128X:$f, timm:$cond, EFLAGS)),
667            (CMOV_VR128X VR128X:$t, VR128X:$f, timm:$cond)>;
668
669  def : Pat<(v32i8 (X86cmov VR256X:$t, VR256X:$f, timm:$cond, EFLAGS)),
670            (CMOV_VR256X VR256X:$t, VR256X:$f, timm:$cond)>;
671  def : Pat<(v16i16 (X86cmov VR256X:$t, VR256X:$f, timm:$cond, EFLAGS)),
672            (CMOV_VR256X VR256X:$t, VR256X:$f, timm:$cond)>;
673  def : Pat<(v16f16 (X86cmov VR256X:$t, VR256X:$f, timm:$cond, EFLAGS)),
674            (CMOV_VR256X VR256X:$t, VR256X:$f, timm:$cond)>;
675  def : Pat<(v8i32 (X86cmov VR256X:$t, VR256X:$f, timm:$cond, EFLAGS)),
676            (CMOV_VR256X VR256X:$t, VR256X:$f, timm:$cond)>;
677  def : Pat<(v8f32 (X86cmov VR256X:$t, VR256X:$f, timm:$cond, EFLAGS)),
678            (CMOV_VR256X VR256X:$t, VR256X:$f, timm:$cond)>;
679  def : Pat<(v4f64 (X86cmov VR256X:$t, VR256X:$f, timm:$cond, EFLAGS)),
680            (CMOV_VR256X VR256X:$t, VR256X:$f, timm:$cond)>;
681}
682
683def : Pat<(v64i8 (X86cmov VR512:$t, VR512:$f, timm:$cond, EFLAGS)),
684          (CMOV_VR512 VR512:$t, VR512:$f, timm:$cond)>;
685def : Pat<(v32i16 (X86cmov VR512:$t, VR512:$f, timm:$cond, EFLAGS)),
686          (CMOV_VR512 VR512:$t, VR512:$f, timm:$cond)>;
687def : Pat<(v32f16 (X86cmov VR512:$t, VR512:$f, timm:$cond, EFLAGS)),
688          (CMOV_VR512 VR512:$t, VR512:$f, timm:$cond)>;
689def : Pat<(v16i32 (X86cmov VR512:$t, VR512:$f, timm:$cond, EFLAGS)),
690          (CMOV_VR512 VR512:$t, VR512:$f, timm:$cond)>;
691def : Pat<(v16f32 (X86cmov VR512:$t, VR512:$f, timm:$cond, EFLAGS)),
692          (CMOV_VR512 VR512:$t, VR512:$f, timm:$cond)>;
693def : Pat<(v8f64 (X86cmov VR512:$t, VR512:$f, timm:$cond, EFLAGS)),
694          (CMOV_VR512 VR512:$t, VR512:$f, timm:$cond)>;
695
696//===----------------------------------------------------------------------===//
697// Normal-Instructions-With-Lock-Prefix Pseudo Instructions
698//===----------------------------------------------------------------------===//
699
700// FIXME: Use normal instructions and add lock prefix dynamically.
701
702// Memory barriers
703
704let isCodeGenOnly = 1, Defs = [EFLAGS] in
705def OR32mi8Locked  : Ii8<0x83, MRM1m, (outs), (ins i32mem:$dst, i32i8imm:$zero),
706                         "or{l}\t{$zero, $dst|$dst, $zero}", []>,
707                         Requires<[Not64BitMode]>, OpSize32, LOCK,
708                         Sched<[WriteALURMW]>;
709
710// RegOpc corresponds to the mr version of the instruction
711// ImmOpc corresponds to the mi version of the instruction
712// ImmOpc8 corresponds to the mi8 version of the instruction
713// ImmMod corresponds to the instruction format of the mi and mi8 versions
714multiclass LOCK_ArithBinOp<bits<8> RegOpc, bits<8> ImmOpc, bits<8> ImmOpc8,
715                           Format ImmMod, SDNode Op, string mnemonic> {
716let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
717    SchedRW = [WriteALURMW] in {
718
719def NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
720                  RegOpc{3}, RegOpc{2}, RegOpc{1}, 0 },
721                  MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
722                  !strconcat(mnemonic, "{b}\t",
723                             "{$src2, $dst|$dst, $src2}"),
724                  [(set EFLAGS, (Op addr:$dst, GR8:$src2))]>, LOCK;
725
726def NAME#16mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
727                   RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
728                   MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
729                   !strconcat(mnemonic, "{w}\t",
730                              "{$src2, $dst|$dst, $src2}"),
731                   [(set EFLAGS, (Op addr:$dst, GR16:$src2))]>,
732                   OpSize16, LOCK;
733
734def NAME#32mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
735                   RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
736                   MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
737                   !strconcat(mnemonic, "{l}\t",
738                              "{$src2, $dst|$dst, $src2}"),
739                   [(set EFLAGS, (Op addr:$dst, GR32:$src2))]>,
740                   OpSize32, LOCK;
741
742def NAME#64mr : RI<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4},
743                    RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 },
744                    MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
745                    !strconcat(mnemonic, "{q}\t",
746                               "{$src2, $dst|$dst, $src2}"),
747                    [(set EFLAGS, (Op addr:$dst, GR64:$src2))]>, LOCK;
748
749// NOTE: These are order specific, we want the mi8 forms to be listed
750// first so that they are slightly preferred to the mi forms.
751def NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
752                      ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
753                      ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2),
754                      !strconcat(mnemonic, "{w}\t",
755                                 "{$src2, $dst|$dst, $src2}"),
756                      [(set EFLAGS, (Op addr:$dst, i16immSExt8:$src2))]>,
757                      OpSize16, LOCK;
758
759def NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
760                      ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
761                      ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2),
762                      !strconcat(mnemonic, "{l}\t",
763                                 "{$src2, $dst|$dst, $src2}"),
764                      [(set EFLAGS, (Op addr:$dst, i32immSExt8:$src2))]>,
765                      OpSize32, LOCK;
766
767def NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4},
768                       ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 },
769                       ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2),
770                       !strconcat(mnemonic, "{q}\t",
771                                  "{$src2, $dst|$dst, $src2}"),
772                       [(set EFLAGS, (Op addr:$dst, i64immSExt8:$src2))]>,
773                       LOCK;
774
775def NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
776                    ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 },
777                    ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2),
778                    !strconcat(mnemonic, "{b}\t",
779                               "{$src2, $dst|$dst, $src2}"),
780                    [(set EFLAGS, (Op addr:$dst, (i8 imm:$src2)))]>, LOCK;
781
782def NAME#16mi : Ii16<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
783                      ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
784                      ImmMod, (outs), (ins i16mem :$dst, i16imm :$src2),
785                      !strconcat(mnemonic, "{w}\t",
786                                 "{$src2, $dst|$dst, $src2}"),
787                      [(set EFLAGS, (Op addr:$dst, (i16 imm:$src2)))]>,
788                      OpSize16, LOCK;
789
790def NAME#32mi : Ii32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
791                      ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
792                      ImmMod, (outs), (ins i32mem :$dst, i32imm :$src2),
793                      !strconcat(mnemonic, "{l}\t",
794                                 "{$src2, $dst|$dst, $src2}"),
795                      [(set EFLAGS, (Op addr:$dst, (i32 imm:$src2)))]>,
796                      OpSize32, LOCK;
797
798def NAME#64mi32 : RIi32S<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4},
799                          ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 },
800                          ImmMod, (outs), (ins i64mem :$dst, i64i32imm :$src2),
801                          !strconcat(mnemonic, "{q}\t",
802                                     "{$src2, $dst|$dst, $src2}"),
803                          [(set EFLAGS, (Op addr:$dst, i64immSExt32:$src2))]>,
804                          LOCK;
805}
806
807}
808
809defm LOCK_ADD : LOCK_ArithBinOp<0x00, 0x80, 0x83, MRM0m, X86lock_add, "add">;
810defm LOCK_SUB : LOCK_ArithBinOp<0x28, 0x80, 0x83, MRM5m, X86lock_sub, "sub">;
811defm LOCK_OR  : LOCK_ArithBinOp<0x08, 0x80, 0x83, MRM1m, X86lock_or , "or">;
812defm LOCK_AND : LOCK_ArithBinOp<0x20, 0x80, 0x83, MRM4m, X86lock_and, "and">;
813defm LOCK_XOR : LOCK_ArithBinOp<0x30, 0x80, 0x83, MRM6m, X86lock_xor, "xor">;
814
815let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
816    SchedRW = [WriteALURMW]  in {
817  let Predicates = [UseIncDec] in {
818    def LOCK_INC8m  : I<0xFE, MRM0m, (outs), (ins i8mem :$dst),
819                        "inc{b}\t$dst",
820                        [(set EFLAGS, (X86lock_add_nocf addr:$dst, (i8 1)))]>,
821                        LOCK;
822    def LOCK_INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst),
823                        "inc{w}\t$dst",
824                        [(set EFLAGS, (X86lock_add_nocf addr:$dst, (i16 1)))]>,
825                        OpSize16, LOCK;
826    def LOCK_INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst),
827                        "inc{l}\t$dst",
828                        [(set EFLAGS, (X86lock_add_nocf addr:$dst, (i32 1)))]>,
829                        OpSize32, LOCK;
830
831    def LOCK_DEC8m  : I<0xFE, MRM1m, (outs), (ins i8mem :$dst),
832                        "dec{b}\t$dst",
833                        [(set EFLAGS, (X86lock_sub_nocf addr:$dst, (i8 1)))]>,
834                        LOCK;
835    def LOCK_DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst),
836                        "dec{w}\t$dst",
837                        [(set EFLAGS, (X86lock_sub_nocf addr:$dst, (i16 1)))]>,
838                        OpSize16, LOCK;
839    def LOCK_DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst),
840                        "dec{l}\t$dst",
841                        [(set EFLAGS, (X86lock_sub_nocf addr:$dst, (i32 1)))]>,
842                        OpSize32, LOCK;
843  }
844
845  let Predicates = [UseIncDec, In64BitMode] in {
846    def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst),
847                         "inc{q}\t$dst",
848                         [(set EFLAGS, (X86lock_add_nocf addr:$dst, (i64 1)))]>,
849                         LOCK;
850    def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst),
851                         "dec{q}\t$dst",
852                         [(set EFLAGS, (X86lock_sub_nocf addr:$dst, (i64 1)))]>,
853                         LOCK;
854  }
855}
856
857let Predicates = [UseIncDec] in {
858  // Additional patterns for -1 constant.
859  def : Pat<(X86lock_add addr:$dst, (i8  -1)), (LOCK_DEC8m  addr:$dst)>;
860  def : Pat<(X86lock_add addr:$dst, (i16 -1)), (LOCK_DEC16m addr:$dst)>;
861  def : Pat<(X86lock_add addr:$dst, (i32 -1)), (LOCK_DEC32m addr:$dst)>;
862  def : Pat<(X86lock_sub addr:$dst, (i8  -1)), (LOCK_INC8m  addr:$dst)>;
863  def : Pat<(X86lock_sub addr:$dst, (i16 -1)), (LOCK_INC16m addr:$dst)>;
864  def : Pat<(X86lock_sub addr:$dst, (i32 -1)), (LOCK_INC32m addr:$dst)>;
865}
866
867let Predicates = [UseIncDec, In64BitMode] in {
868  // Additional patterns for -1 constant.
869  def : Pat<(X86lock_add addr:$dst, (i64 -1)), (LOCK_DEC64m addr:$dst)>;
870  def : Pat<(X86lock_sub addr:$dst, (i64 -1)), (LOCK_INC64m addr:$dst)>;
871}
872
873// Atomic bit test.
874def X86LBTest : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisPtrTy<1>,
875                                     SDTCisVT<2, i8>, SDTCisVT<3, i32>]>;
876def x86bts : SDNode<"X86ISD::LBTS", X86LBTest,
877                    [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
878def x86btc : SDNode<"X86ISD::LBTC", X86LBTest,
879                    [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
880def x86btr : SDNode<"X86ISD::LBTR", X86LBTest,
881                    [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
882
883def X86LBTestRM : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>,
884                                       SDTCisInt<2>]>;
885
886def x86_rm_bts : SDNode<"X86ISD::LBTS_RM", X86LBTestRM,
887                        [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
888def x86_rm_btc : SDNode<"X86ISD::LBTC_RM", X86LBTestRM,
889                        [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
890def x86_rm_btr : SDNode<"X86ISD::LBTR_RM", X86LBTestRM,
891                        [SDNPHasChain, SDNPMayLoad, SDNPMayStore, SDNPMemOperand]>;
892
893
894multiclass ATOMIC_LOGIC_OP<Format Form, string s> {
895  let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
896      SchedRW = [WriteBitTestSetRegRMW]  in {
897    def 16m : Ii8<0xBA, Form, (outs), (ins i16mem:$src1, i8imm:$src2),
898                  !strconcat(s, "{w}\t{$src2, $src1|$src1, $src2}"),
899                  [(set EFLAGS, (!cast<SDNode>("x86" # s) addr:$src1, timm:$src2, (i32 16)))]>,
900              OpSize16, TB, LOCK;
901    def 32m : Ii8<0xBA, Form, (outs), (ins i32mem:$src1, i8imm:$src2),
902                  !strconcat(s, "{l}\t{$src2, $src1|$src1, $src2}"),
903                  [(set EFLAGS, (!cast<SDNode>("x86" # s) addr:$src1, timm:$src2, (i32 32)))]>,
904              OpSize32, TB, LOCK;
905    def 64m : RIi8<0xBA, Form, (outs), (ins i64mem:$src1, i8imm:$src2),
906                   !strconcat(s, "{q}\t{$src2, $src1|$src1, $src2}"),
907                   [(set EFLAGS, (!cast<SDNode>("x86" # s) addr:$src1, timm:$src2, (i32 64)))]>,
908              TB, LOCK;
909  }
910}
911
912multiclass ATOMIC_LOGIC_OP_RM<bits<8> Opc8, string s> {
913  let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
914      SchedRW = [WriteBitTestSetRegRMW]  in {
915    def 16rm : I<Opc8, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
916                  !strconcat(s, "{w}\t{$src2, $src1|$src1, $src2}"),
917                  [(set EFLAGS, (!cast<SDNode>("x86_rm_" # s) addr:$src1, GR16:$src2))]>,
918               OpSize16, TB, LOCK;
919    def 32rm : I<Opc8, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
920                  !strconcat(s, "{l}\t{$src2, $src1|$src1, $src2}"),
921                  [(set EFLAGS, (!cast<SDNode>("x86_rm_" # s) addr:$src1, GR32:$src2))]>,
922               OpSize32, TB, LOCK;
923    def 64rm : RI<Opc8, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
924                   !strconcat(s, "{q}\t{$src2, $src1|$src1, $src2}"),
925                   [(set EFLAGS, (!cast<SDNode>("x86_rm_" # s) addr:$src1, GR64:$src2))]>,
926               TB, LOCK;
927  }
928}
929
930
931defm LOCK_BTS : ATOMIC_LOGIC_OP<MRM5m, "bts">;
932defm LOCK_BTC : ATOMIC_LOGIC_OP<MRM7m, "btc">;
933defm LOCK_BTR : ATOMIC_LOGIC_OP<MRM6m, "btr">;
934
935defm LOCK_BTS_RM : ATOMIC_LOGIC_OP_RM<0xAB, "bts">;
936defm LOCK_BTC_RM : ATOMIC_LOGIC_OP_RM<0xBB, "btc">;
937defm LOCK_BTR_RM : ATOMIC_LOGIC_OP_RM<0xB3, "btr">;
938
939// Atomic compare and swap.
940multiclass LCMPXCHG_BinOp<bits<8> Opc8, bits<8> Opc, Format Form,
941                          string mnemonic, SDPatternOperator frag> {
942let isCodeGenOnly = 1, SchedRW = [WriteCMPXCHGRMW] in {
943  let Defs = [AL, EFLAGS], Uses = [AL] in
944  def NAME#8  : I<Opc8, Form, (outs), (ins i8mem:$ptr, GR8:$swap),
945                  !strconcat(mnemonic, "{b}\t{$swap, $ptr|$ptr, $swap}"),
946                  [(frag addr:$ptr, GR8:$swap, 1)]>, TB, LOCK;
947  let Defs = [AX, EFLAGS], Uses = [AX] in
948  def NAME#16 : I<Opc, Form, (outs), (ins i16mem:$ptr, GR16:$swap),
949                  !strconcat(mnemonic, "{w}\t{$swap, $ptr|$ptr, $swap}"),
950                  [(frag addr:$ptr, GR16:$swap, 2)]>, TB, OpSize16, LOCK;
951  let Defs = [EAX, EFLAGS], Uses = [EAX] in
952  def NAME#32 : I<Opc, Form, (outs), (ins i32mem:$ptr, GR32:$swap),
953                  !strconcat(mnemonic, "{l}\t{$swap, $ptr|$ptr, $swap}"),
954                  [(frag addr:$ptr, GR32:$swap, 4)]>, TB, OpSize32, LOCK;
955  let Defs = [RAX, EFLAGS], Uses = [RAX] in
956  def NAME#64 : RI<Opc, Form, (outs), (ins i64mem:$ptr, GR64:$swap),
957                   !strconcat(mnemonic, "{q}\t{$swap, $ptr|$ptr, $swap}"),
958                   [(frag addr:$ptr, GR64:$swap, 8)]>, TB, LOCK;
959}
960}
961
962let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX],
963    Predicates = [HasCX8], SchedRW = [WriteCMPXCHGRMW],
964    isCodeGenOnly = 1, usesCustomInserter = 1 in {
965def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr),
966                   "cmpxchg8b\t$ptr",
967                   [(X86cas8 addr:$ptr)]>, TB, LOCK;
968}
969
970let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX],
971    Predicates = [HasCX16,In64BitMode], SchedRW = [WriteCMPXCHGRMW],
972    isCodeGenOnly = 1, mayLoad = 1, mayStore = 1, hasSideEffects = 0 in {
973def LCMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$ptr),
974                     "cmpxchg16b\t$ptr",
975                     []>, TB, LOCK;
976}
977
978// This pseudo must be used when the frame uses RBX as
979// the base pointer. Indeed, in such situation RBX is a reserved
980// register and the register allocator will ignore any use/def of
981// it. In other words, the register will not fix the clobbering of
982// RBX that will happen when setting the arguments for the instrucion.
983//
984// Unlike the actual related instruction, we mark that this one
985// defines RBX (instead of using RBX).
986// The rationale is that we will define RBX during the expansion of
987// the pseudo. The argument feeding RBX is rbx_input.
988//
989// The additional argument, $rbx_save, is a temporary register used to
990// save the value of RBX across the actual instruction.
991//
992// To make sure the register assigned to $rbx_save does not interfere with
993// the definition of the actual instruction, we use a definition $dst which
994// is tied to $rbx_save. That way, the live-range of $rbx_save spans across
995// the instruction and we are sure we will have a valid register to restore
996// the value of RBX.
997let Defs = [RAX, RDX, RBX, EFLAGS], Uses = [RAX, RCX, RDX],
998    Predicates = [HasCX16,In64BitMode], SchedRW = [WriteCMPXCHGRMW],
999    isCodeGenOnly = 1, isPseudo = 1,
1000    mayLoad = 1, mayStore = 1, hasSideEffects = 0,
1001    Constraints = "$rbx_save = $dst" in {
1002def LCMPXCHG16B_SAVE_RBX :
1003    I<0, Pseudo, (outs GR64:$dst),
1004      (ins i128mem:$ptr, GR64:$rbx_input, GR64:$rbx_save), "", []>;
1005}
1006
1007// Pseudo instruction that doesn't read/write RBX. Will be turned into either
1008// LCMPXCHG16B_SAVE_RBX or LCMPXCHG16B via a custom inserter.
1009let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RCX, RDX],
1010    Predicates = [HasCX16,In64BitMode], SchedRW = [WriteCMPXCHGRMW],
1011    isCodeGenOnly = 1, isPseudo = 1,
1012    mayLoad = 1, mayStore = 1, hasSideEffects = 0,
1013    usesCustomInserter = 1 in {
1014def LCMPXCHG16B_NO_RBX :
1015    I<0, Pseudo, (outs), (ins i128mem:$ptr, GR64:$rbx_input), "",
1016      [(X86cas16 addr:$ptr, GR64:$rbx_input)]>;
1017}
1018
1019// This pseudo must be used when the frame uses RBX/EBX as
1020// the base pointer.
1021// cf comment for LCMPXCHG16B_SAVE_RBX.
1022let Defs = [EBX], Uses = [ECX, EAX],
1023    Predicates = [HasMWAITX], SchedRW = [WriteSystem],
1024    isCodeGenOnly = 1, isPseudo = 1, Constraints = "$rbx_save = $dst" in {
1025def MWAITX_SAVE_RBX :
1026    I<0, Pseudo, (outs GR64:$dst),
1027      (ins GR32:$ebx_input, GR64:$rbx_save),
1028      "mwaitx",
1029      []>;
1030}
1031
1032// Pseudo mwaitx instruction to use for custom insertion.
1033let Predicates = [HasMWAITX], SchedRW = [WriteSystem],
1034    isCodeGenOnly = 1, isPseudo = 1,
1035    usesCustomInserter = 1 in {
1036def MWAITX :
1037    I<0, Pseudo, (outs), (ins GR32:$ecx, GR32:$eax, GR32:$ebx),
1038      "mwaitx",
1039      [(int_x86_mwaitx GR32:$ecx, GR32:$eax, GR32:$ebx)]>;
1040}
1041
1042
1043defm LCMPXCHG : LCMPXCHG_BinOp<0xB0, 0xB1, MRMDestMem, "cmpxchg", X86cas>;
1044
1045// Atomic exchange and add
1046multiclass ATOMIC_RMW_BINOP<bits<8> opc8, bits<8> opc, string mnemonic,
1047                            string frag> {
1048  let Constraints = "$val = $dst", Defs = [EFLAGS], mayLoad = 1, mayStore = 1,
1049      isCodeGenOnly = 1, SchedRW = [WriteALURMW] in {
1050    def NAME#8  : I<opc8, MRMSrcMem, (outs GR8:$dst),
1051                    (ins GR8:$val, i8mem:$ptr),
1052                    !strconcat(mnemonic, "{b}\t{$val, $ptr|$ptr, $val}"),
1053                    [(set GR8:$dst,
1054                          (!cast<PatFrag>(frag # "_i8") addr:$ptr, GR8:$val))]>;
1055    def NAME#16 : I<opc, MRMSrcMem, (outs GR16:$dst),
1056                    (ins GR16:$val, i16mem:$ptr),
1057                    !strconcat(mnemonic, "{w}\t{$val, $ptr|$ptr, $val}"),
1058                    [(set
1059                       GR16:$dst,
1060                       (!cast<PatFrag>(frag # "_i16") addr:$ptr, GR16:$val))]>,
1061                    OpSize16;
1062    def NAME#32 : I<opc, MRMSrcMem, (outs GR32:$dst),
1063                    (ins GR32:$val, i32mem:$ptr),
1064                    !strconcat(mnemonic, "{l}\t{$val, $ptr|$ptr, $val}"),
1065                    [(set
1066                       GR32:$dst,
1067                       (!cast<PatFrag>(frag # "_i32") addr:$ptr, GR32:$val))]>,
1068                    OpSize32;
1069    def NAME#64 : RI<opc, MRMSrcMem, (outs GR64:$dst),
1070                     (ins GR64:$val, i64mem:$ptr),
1071                     !strconcat(mnemonic, "{q}\t{$val, $ptr|$ptr, $val}"),
1072                     [(set
1073                        GR64:$dst,
1074                        (!cast<PatFrag>(frag # "_i64") addr:$ptr, GR64:$val))]>;
1075  }
1076}
1077
1078defm LXADD : ATOMIC_RMW_BINOP<0xc0, 0xc1, "xadd", "atomic_load_add">, TB, LOCK;
1079
1080/* The following multiclass tries to make sure that in code like
1081 *    x.store (immediate op x.load(acquire), release)
1082 * and
1083 *    x.store (register op x.load(acquire), release)
1084 * an operation directly on memory is generated instead of wasting a register.
1085 * It is not automatic as atomic_store/load are only lowered to MOV instructions
1086 * extremely late to prevent them from being accidentally reordered in the backend
1087 * (see below the RELEASE_MOV* / ACQUIRE_MOV* pseudo-instructions)
1088 */
1089multiclass RELEASE_BINOP_MI<string Name, SDNode op> {
1090  def : Pat<(atomic_store_8 (op (atomic_load_nonext_8 addr:$dst), (i8 imm:$src)),
1091                            addr:$dst),
1092            (!cast<Instruction>(Name#"8mi") addr:$dst, imm:$src)>;
1093  def : Pat<(atomic_store_16 (op (atomic_load_nonext_16 addr:$dst), (i16 imm:$src)),
1094                             addr:$dst),
1095            (!cast<Instruction>(Name#"16mi") addr:$dst, imm:$src)>;
1096  def : Pat<(atomic_store_32 (op (atomic_load_nonext_32 addr:$dst), (i32 imm:$src)),
1097                             addr:$dst),
1098            (!cast<Instruction>(Name#"32mi") addr:$dst, imm:$src)>;
1099  def : Pat<(atomic_store_64 (op (atomic_load_nonext_64 addr:$dst), (i64immSExt32:$src)),
1100                             addr:$dst),
1101            (!cast<Instruction>(Name#"64mi32") addr:$dst, (i64immSExt32:$src))>;
1102  def : Pat<(atomic_store_8 (op (atomic_load_nonext_8 addr:$dst), (i8 GR8:$src)), addr:$dst),
1103            (!cast<Instruction>(Name#"8mr") addr:$dst, GR8:$src)>;
1104  def : Pat<(atomic_store_16 (op (atomic_load_nonext_16 addr:$dst), (i16 GR16:$src)),
1105                             addr:$dst),
1106            (!cast<Instruction>(Name#"16mr") addr:$dst, GR16:$src)>;
1107  def : Pat<(atomic_store_32 (op (atomic_load_nonext_32 addr:$dst), (i32 GR32:$src)),
1108                             addr:$dst),
1109            (!cast<Instruction>(Name#"32mr") addr:$dst, GR32:$src)>;
1110  def : Pat<(atomic_store_64 (op (atomic_load_nonext_64 addr:$dst), (i64 GR64:$src)),
1111                             addr:$dst),
1112            (!cast<Instruction>(Name#"64mr") addr:$dst, GR64:$src)>;
1113}
1114defm : RELEASE_BINOP_MI<"ADD", add>;
1115defm : RELEASE_BINOP_MI<"AND", and>;
1116defm : RELEASE_BINOP_MI<"OR",  or>;
1117defm : RELEASE_BINOP_MI<"XOR", xor>;
1118defm : RELEASE_BINOP_MI<"SUB", sub>;
1119
1120// Atomic load + floating point patterns.
1121// FIXME: This could also handle SIMD operations with *ps and *pd instructions.
1122multiclass ATOMIC_LOAD_FP_BINOP_MI<string Name, SDNode op> {
1123  def : Pat<(op FR32:$src1, (bitconvert (i32 (atomic_load_nonext_32 addr:$src2)))),
1124            (!cast<Instruction>(Name#"SSrm") FR32:$src1, addr:$src2)>,
1125            Requires<[UseSSE1]>;
1126  def : Pat<(op FR32:$src1, (bitconvert (i32 (atomic_load_nonext_32 addr:$src2)))),
1127            (!cast<Instruction>("V"#Name#"SSrm") FR32:$src1, addr:$src2)>,
1128            Requires<[UseAVX]>;
1129  def : Pat<(op FR32X:$src1, (bitconvert (i32 (atomic_load_nonext_32 addr:$src2)))),
1130            (!cast<Instruction>("V"#Name#"SSZrm") FR32X:$src1, addr:$src2)>,
1131            Requires<[HasAVX512]>;
1132
1133  def : Pat<(op FR64:$src1, (bitconvert (i64 (atomic_load_nonext_64 addr:$src2)))),
1134            (!cast<Instruction>(Name#"SDrm") FR64:$src1, addr:$src2)>,
1135            Requires<[UseSSE1]>;
1136  def : Pat<(op FR64:$src1, (bitconvert (i64 (atomic_load_nonext_64 addr:$src2)))),
1137            (!cast<Instruction>("V"#Name#"SDrm") FR64:$src1, addr:$src2)>,
1138            Requires<[UseAVX]>;
1139  def : Pat<(op FR64X:$src1, (bitconvert (i64 (atomic_load_nonext_64 addr:$src2)))),
1140            (!cast<Instruction>("V"#Name#"SDZrm") FR64X:$src1, addr:$src2)>,
1141            Requires<[HasAVX512]>;
1142}
1143defm : ATOMIC_LOAD_FP_BINOP_MI<"ADD", fadd>;
1144defm : ATOMIC_LOAD_FP_BINOP_MI<"SUB", fsub>;
1145defm : ATOMIC_LOAD_FP_BINOP_MI<"MUL", fmul>;
1146defm : ATOMIC_LOAD_FP_BINOP_MI<"DIV", fdiv>;
1147
1148multiclass RELEASE_UNOP<string Name, dag dag8, dag dag16, dag dag32,
1149                        dag dag64> {
1150  def : Pat<(atomic_store_8 dag8, addr:$dst),
1151            (!cast<Instruction>(Name#8m) addr:$dst)>;
1152  def : Pat<(atomic_store_16 dag16, addr:$dst),
1153            (!cast<Instruction>(Name#16m) addr:$dst)>;
1154  def : Pat<(atomic_store_32 dag32, addr:$dst),
1155            (!cast<Instruction>(Name#32m) addr:$dst)>;
1156  def : Pat<(atomic_store_64 dag64, addr:$dst),
1157            (!cast<Instruction>(Name#64m) addr:$dst)>;
1158}
1159
1160let Predicates = [UseIncDec] in {
1161  defm : RELEASE_UNOP<"INC",
1162      (add (atomic_load_nonext_8  addr:$dst), (i8 1)),
1163      (add (atomic_load_nonext_16 addr:$dst), (i16 1)),
1164      (add (atomic_load_nonext_32 addr:$dst), (i32 1)),
1165      (add (atomic_load_nonext_64 addr:$dst), (i64 1))>;
1166  defm : RELEASE_UNOP<"DEC",
1167      (add (atomic_load_nonext_8  addr:$dst), (i8 -1)),
1168      (add (atomic_load_nonext_16 addr:$dst), (i16 -1)),
1169      (add (atomic_load_nonext_32 addr:$dst), (i32 -1)),
1170      (add (atomic_load_nonext_64 addr:$dst), (i64 -1))>;
1171}
1172
1173defm : RELEASE_UNOP<"NEG",
1174    (ineg (i8 (atomic_load_nonext_8  addr:$dst))),
1175    (ineg (i16 (atomic_load_nonext_16 addr:$dst))),
1176    (ineg (i32 (atomic_load_nonext_32 addr:$dst))),
1177    (ineg (i64 (atomic_load_nonext_64 addr:$dst)))>;
1178defm : RELEASE_UNOP<"NOT",
1179    (not (i8 (atomic_load_nonext_8  addr:$dst))),
1180    (not (i16 (atomic_load_nonext_16 addr:$dst))),
1181    (not (i32 (atomic_load_nonext_32 addr:$dst))),
1182    (not (i64 (atomic_load_nonext_64 addr:$dst)))>;
1183
1184def : Pat<(atomic_store_8 (i8 imm:$src), addr:$dst),
1185          (MOV8mi addr:$dst, imm:$src)>;
1186def : Pat<(atomic_store_16 (i16 imm:$src), addr:$dst),
1187          (MOV16mi addr:$dst, imm:$src)>;
1188def : Pat<(atomic_store_32 (i32 imm:$src), addr:$dst),
1189          (MOV32mi addr:$dst, imm:$src)>;
1190def : Pat<(atomic_store_64 (i64immSExt32:$src), addr:$dst),
1191          (MOV64mi32 addr:$dst, i64immSExt32:$src)>;
1192
1193def : Pat<(atomic_store_8 GR8:$src, addr:$dst),
1194          (MOV8mr addr:$dst, GR8:$src)>;
1195def : Pat<(atomic_store_16 GR16:$src, addr:$dst),
1196          (MOV16mr addr:$dst, GR16:$src)>;
1197def : Pat<(atomic_store_32 GR32:$src, addr:$dst),
1198          (MOV32mr addr:$dst, GR32:$src)>;
1199def : Pat<(atomic_store_64 GR64:$src, addr:$dst),
1200          (MOV64mr addr:$dst, GR64:$src)>;
1201
1202def : Pat<(i8  (atomic_load_nonext_8 addr:$src)),  (MOV8rm addr:$src)>;
1203def : Pat<(i16 (atomic_load_nonext_16 addr:$src)), (MOV16rm addr:$src)>;
1204def : Pat<(i32 (atomic_load_nonext_32 addr:$src)), (MOV32rm addr:$src)>;
1205def : Pat<(i64 (atomic_load_nonext_64 addr:$src)), (MOV64rm addr:$src)>;
1206
1207// Floating point loads/stores.
1208def : Pat<(atomic_store_32 (i32 (bitconvert (f32 FR32:$src))), addr:$dst),
1209          (MOVSSmr addr:$dst, FR32:$src)>, Requires<[UseSSE1]>;
1210def : Pat<(atomic_store_32 (i32 (bitconvert (f32 FR32:$src))), addr:$dst),
1211          (VMOVSSmr addr:$dst, FR32:$src)>, Requires<[UseAVX]>;
1212def : Pat<(atomic_store_32 (i32 (bitconvert (f32 FR32:$src))), addr:$dst),
1213          (VMOVSSZmr addr:$dst, FR32:$src)>, Requires<[HasAVX512]>;
1214
1215def : Pat<(atomic_store_64 (i64 (bitconvert (f64 FR64:$src))), addr:$dst),
1216          (MOVSDmr addr:$dst, FR64:$src)>, Requires<[UseSSE2]>;
1217def : Pat<(atomic_store_64 (i64 (bitconvert (f64 FR64:$src))), addr:$dst),
1218          (VMOVSDmr addr:$dst, FR64:$src)>, Requires<[UseAVX]>;
1219def : Pat<(atomic_store_64 (i64 (bitconvert (f64 FR64:$src))), addr:$dst),
1220          (VMOVSDmr addr:$dst, FR64:$src)>, Requires<[HasAVX512]>;
1221
1222def : Pat<(f32 (bitconvert (i32 (atomic_load_nonext_32 addr:$src)))),
1223          (MOVSSrm_alt addr:$src)>, Requires<[UseSSE1]>;
1224def : Pat<(f32 (bitconvert (i32 (atomic_load_nonext_32 addr:$src)))),
1225          (VMOVSSrm_alt addr:$src)>, Requires<[UseAVX]>;
1226def : Pat<(f32 (bitconvert (i32 (atomic_load_nonext_32 addr:$src)))),
1227          (VMOVSSZrm_alt addr:$src)>, Requires<[HasAVX512]>;
1228
1229def : Pat<(f64 (bitconvert (i64 (atomic_load_nonext_64 addr:$src)))),
1230          (MOVSDrm_alt addr:$src)>, Requires<[UseSSE2]>;
1231def : Pat<(f64 (bitconvert (i64 (atomic_load_nonext_64 addr:$src)))),
1232          (VMOVSDrm_alt addr:$src)>, Requires<[UseAVX]>;
1233def : Pat<(f64 (bitconvert (i64 (atomic_load_nonext_64 addr:$src)))),
1234          (VMOVSDZrm_alt addr:$src)>, Requires<[HasAVX512]>;
1235
1236//===----------------------------------------------------------------------===//
1237// DAG Pattern Matching Rules
1238//===----------------------------------------------------------------------===//
1239
1240// Use AND/OR to store 0/-1 in memory when optimizing for minsize. This saves
1241// binary size compared to a regular MOV, but it introduces an unnecessary
1242// load, so is not suitable for regular or optsize functions.
1243let Predicates = [OptForMinSize] in {
1244def : Pat<(simple_store (i16 0), addr:$dst), (AND16mi addr:$dst, 0)>;
1245def : Pat<(simple_store (i32 0), addr:$dst), (AND32mi addr:$dst, 0)>;
1246def : Pat<(simple_store (i64 0), addr:$dst), (AND64mi32 addr:$dst, 0)>;
1247def : Pat<(simple_store (i16 -1), addr:$dst), (OR16mi addr:$dst, -1)>;
1248def : Pat<(simple_store (i32 -1), addr:$dst), (OR32mi addr:$dst, -1)>;
1249def : Pat<(simple_store (i64 -1), addr:$dst), (OR64mi32 addr:$dst, -1)>;
1250}
1251
1252// In kernel code model, we can get the address of a label
1253// into a register with 'movq'.  FIXME: This is a hack, the 'imm' predicate of
1254// the MOV64ri32 should accept these.
1255def : Pat<(i64 (X86Wrapper tconstpool  :$dst)),
1256          (MOV64ri32 tconstpool  :$dst)>, Requires<[KernelCode]>;
1257def : Pat<(i64 (X86Wrapper tjumptable  :$dst)),
1258          (MOV64ri32 tjumptable  :$dst)>, Requires<[KernelCode]>;
1259def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
1260          (MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>;
1261def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
1262          (MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>;
1263def : Pat<(i64 (X86Wrapper mcsym:$dst)),
1264          (MOV64ri32 mcsym:$dst)>, Requires<[KernelCode]>;
1265def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
1266          (MOV64ri32 tblockaddress:$dst)>, Requires<[KernelCode]>;
1267
1268// If we have small model and -static mode, it is safe to store global addresses
1269// directly as immediates.  FIXME: This is really a hack, the 'imm' predicate
1270// for MOV64mi32 should handle this sort of thing.
1271def : Pat<(store (i64 (X86Wrapper tconstpool:$src)), addr:$dst),
1272          (MOV64mi32 addr:$dst, tconstpool:$src)>,
1273          Requires<[NearData, IsNotPIC]>;
1274def : Pat<(store (i64 (X86Wrapper tjumptable:$src)), addr:$dst),
1275          (MOV64mi32 addr:$dst, tjumptable:$src)>,
1276          Requires<[NearData, IsNotPIC]>;
1277def : Pat<(store (i64 (X86Wrapper tglobaladdr:$src)), addr:$dst),
1278          (MOV64mi32 addr:$dst, tglobaladdr:$src)>,
1279          Requires<[NearData, IsNotPIC]>;
1280def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst),
1281          (MOV64mi32 addr:$dst, texternalsym:$src)>,
1282          Requires<[NearData, IsNotPIC]>;
1283def : Pat<(store (i64 (X86Wrapper mcsym:$src)), addr:$dst),
1284          (MOV64mi32 addr:$dst, mcsym:$src)>,
1285          Requires<[NearData, IsNotPIC]>;
1286def : Pat<(store (i64 (X86Wrapper tblockaddress:$src)), addr:$dst),
1287          (MOV64mi32 addr:$dst, tblockaddress:$src)>,
1288          Requires<[NearData, IsNotPIC]>;
1289
1290def : Pat<(i32 (X86RecoverFrameAlloc mcsym:$dst)), (MOV32ri mcsym:$dst)>;
1291def : Pat<(i64 (X86RecoverFrameAlloc mcsym:$dst)), (MOV64ri mcsym:$dst)>;
1292
1293// Calls
1294
1295// tls has some funny stuff here...
1296// This corresponds to movabs $foo@tpoff, %rax
1297def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)),
1298          (MOV64ri32 tglobaltlsaddr :$dst)>;
1299// This corresponds to add $foo@tpoff, %rax
1300def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)),
1301          (ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>;
1302
1303
1304// Direct PC relative function call for small code model. 32-bit displacement
1305// sign extended to 64-bit.
1306def : Pat<(X86call (i64 tglobaladdr:$dst)),
1307          (CALL64pcrel32 tglobaladdr:$dst)>;
1308def : Pat<(X86call (i64 texternalsym:$dst)),
1309          (CALL64pcrel32 texternalsym:$dst)>;
1310
1311def : Pat<(X86call_rvmarker (i64 tglobaladdr:$rvfunc), (i64 texternalsym:$dst)),
1312          (CALL64pcrel32_RVMARKER tglobaladdr:$rvfunc, texternalsym:$dst)>;
1313def : Pat<(X86call_rvmarker (i64 tglobaladdr:$rvfunc), (i64 tglobaladdr:$dst)),
1314          (CALL64pcrel32_RVMARKER tglobaladdr:$rvfunc, tglobaladdr:$dst)>;
1315
1316def : Pat<(X86imp_call (i64 tglobaladdr:$dst)),
1317          (CALL64pcrel32 tglobaladdr:$dst)>;
1318
1319// Tailcall stuff. The TCRETURN instructions execute after the epilog, so they
1320// can never use callee-saved registers. That is the purpose of the GR64_TC
1321// register classes.
1322//
1323// The only volatile register that is never used by the calling convention is
1324// %r11. This happens when calling a vararg function with 6 arguments.
1325//
1326// Match an X86tcret that uses less than 7 volatile registers.
1327def : Pat<(X86tcret ptr_rc_tailcall:$dst, timm:$off),
1328          (TCRETURNri ptr_rc_tailcall:$dst, timm:$off)>,
1329          Requires<[Not64BitMode, NotUseIndirectThunkCalls]>;
1330
1331// FIXME: This is disabled for 32-bit PIC mode because the global base
1332// register which is part of the address mode may be assigned a
1333// callee-saved register.
1334// Similar to X86tcret_6regs, here we only have 1 register left
1335def : Pat<(X86tcret_1reg (load addr:$dst), timm:$off),
1336          (TCRETURNmi addr:$dst, timm:$off)>,
1337          Requires<[Not64BitMode, IsNotPIC, NotUseIndirectThunkCalls]>;
1338
1339def : Pat<(X86tcret (i32 tglobaladdr:$dst), timm:$off),
1340          (TCRETURNdi tglobaladdr:$dst, timm:$off)>,
1341          Requires<[NotLP64]>;
1342
1343def : Pat<(X86tcret (i32 texternalsym:$dst), timm:$off),
1344          (TCRETURNdi texternalsym:$dst, timm:$off)>,
1345          Requires<[NotLP64]>;
1346
1347def : Pat<(X86tcret ptr_rc_tailcall:$dst, timm:$off),
1348          (TCRETURNri64 ptr_rc_tailcall:$dst, timm:$off)>,
1349          Requires<[In64BitMode, NotUseIndirectThunkCalls, ImportCallOptimizationDisabled]>;
1350
1351def : Pat<(X86tcret ptr_rc_tailcall:$dst, timm:$off),
1352          (TCRETURNri64_ImpCall ptr_rc_tailcall:$dst, timm:$off)>,
1353          Requires<[In64BitMode, NotUseIndirectThunkCalls, ImportCallOptimizationEnabled]>;
1354
1355// Don't fold loads into X86tcret requiring more than 6 regs.
1356// There wouldn't be enough scratch registers for base+index.
1357def : Pat<(X86tcret_6regs (load addr:$dst), timm:$off),
1358          (TCRETURNmi64 addr:$dst, timm:$off)>,
1359          Requires<[In64BitMode, NotUseIndirectThunkCalls]>;
1360
1361def : Pat<(X86tcret ptr_rc_tailcall:$dst, timm:$off),
1362          (INDIRECT_THUNK_TCRETURN64 ptr_rc_tailcall:$dst, timm:$off)>,
1363          Requires<[In64BitMode, UseIndirectThunkCalls]>;
1364
1365def : Pat<(X86tcret ptr_rc_tailcall:$dst, timm:$off),
1366          (INDIRECT_THUNK_TCRETURN32 ptr_rc_tailcall:$dst, timm:$off)>,
1367          Requires<[Not64BitMode, UseIndirectThunkCalls]>;
1368
1369def : Pat<(X86tcret (i64 tglobaladdr:$dst), timm:$off),
1370          (TCRETURNdi64 tglobaladdr:$dst, timm:$off)>,
1371          Requires<[IsLP64]>;
1372
1373def : Pat<(X86tcret (i64 texternalsym:$dst), timm:$off),
1374          (TCRETURNdi64 texternalsym:$dst, timm:$off)>,
1375          Requires<[IsLP64]>;
1376
1377// Normal calls, with various flavors of addresses.
1378def : Pat<(X86call (i32 tglobaladdr:$dst)),
1379          (CALLpcrel32 tglobaladdr:$dst)>;
1380def : Pat<(X86call (i32 texternalsym:$dst)),
1381          (CALLpcrel32 texternalsym:$dst)>;
1382def : Pat<(X86call (i32 imm:$dst)),
1383          (CALLpcrel32 imm:$dst)>, Requires<[CallImmAddr]>;
1384
1385// Comparisons.
1386
1387// TEST R,R is smaller than CMP R,0
1388def : Pat<(X86cmp GR8:$src1, 0),
1389          (TEST8rr GR8:$src1, GR8:$src1)>;
1390def : Pat<(X86cmp GR16:$src1, 0),
1391          (TEST16rr GR16:$src1, GR16:$src1)>;
1392def : Pat<(X86cmp GR32:$src1, 0),
1393          (TEST32rr GR32:$src1, GR32:$src1)>;
1394def : Pat<(X86cmp GR64:$src1, 0),
1395          (TEST64rr GR64:$src1, GR64:$src1)>;
1396
1397// zextload bool -> zextload byte
1398// i1 stored in one byte in zero-extended form.
1399// Upper bits cleanup should be executed before Store.
1400def : Pat<(zextloadi8i1  addr:$src), (MOV8rm addr:$src)>;
1401def : Pat<(zextloadi16i1 addr:$src),
1402          (EXTRACT_SUBREG (MOVZX32rm8 addr:$src), sub_16bit)>;
1403def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
1404def : Pat<(zextloadi64i1 addr:$src),
1405          (SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;
1406
1407// extload bool -> extload byte
1408// When extloading from 16-bit and smaller memory locations into 64-bit
1409// registers, use zero-extending loads so that the entire 64-bit register is
1410// defined, avoiding partial-register updates.
1411
1412def : Pat<(extloadi8i1 addr:$src),   (MOV8rm      addr:$src)>;
1413def : Pat<(extloadi16i1 addr:$src),
1414          (EXTRACT_SUBREG (MOVZX32rm8 addr:$src), sub_16bit)>;
1415def : Pat<(extloadi32i1 addr:$src),  (MOVZX32rm8  addr:$src)>;
1416def : Pat<(extloadi16i8 addr:$src),
1417          (EXTRACT_SUBREG (MOVZX32rm8 addr:$src), sub_16bit)>;
1418def : Pat<(extloadi32i8 addr:$src),  (MOVZX32rm8  addr:$src)>;
1419def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>;
1420
1421// For other extloads, use subregs, since the high contents of the register are
1422// defined after an extload.
1423// NOTE: The extloadi64i32 pattern needs to be first as it will try to form
1424// 32-bit loads for 4 byte aligned i8/i16 loads.
1425def : Pat<(extloadi64i32 addr:$src),
1426          (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src), sub_32bit)>;
1427def : Pat<(extloadi64i1 addr:$src),
1428          (SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;
1429def : Pat<(extloadi64i8 addr:$src),
1430          (SUBREG_TO_REG (i64 0), (MOVZX32rm8 addr:$src), sub_32bit)>;
1431def : Pat<(extloadi64i16 addr:$src),
1432          (SUBREG_TO_REG (i64 0), (MOVZX32rm16 addr:$src), sub_32bit)>;
1433
1434// anyext. Define these to do an explicit zero-extend to
1435// avoid partial-register updates.
1436def : Pat<(i16 (anyext GR8 :$src)), (EXTRACT_SUBREG
1437                                     (MOVZX32rr8 GR8 :$src), sub_16bit)>;
1438def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8  GR8 :$src)>;
1439
1440// Except for i16 -> i32 since isel expect i16 ops to be promoted to i32.
1441def : Pat<(i32 (anyext GR16:$src)),
1442          (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, sub_16bit)>;
1443
1444def : Pat<(i64 (anyext GR8 :$src)),
1445          (SUBREG_TO_REG (i64 0), (MOVZX32rr8  GR8  :$src), sub_32bit)>;
1446def : Pat<(i64 (anyext GR16:$src)),
1447          (SUBREG_TO_REG (i64 0), (MOVZX32rr16 GR16 :$src), sub_32bit)>;
1448def : Pat<(i64 (anyext GR32:$src)),
1449          (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, sub_32bit)>;
1450
1451def : Pat<(i32 (anyext_sdiv GR8:$src)), (MOVSX32rr8 GR8:$src)>;
1452
1453// In the case of a 32-bit def that is known to implicitly zero-extend,
1454// we can use a SUBREG_TO_REG.
1455def : Pat<(i64 (zext def32:$src)),
1456          (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
1457def : Pat<(i64 (and (anyext def32:$src), 0x00000000FFFFFFFF)),
1458          (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
1459
1460//===----------------------------------------------------------------------===//
1461// Pattern match OR as ADD
1462//===----------------------------------------------------------------------===//
1463
1464// If safe, we prefer to pattern match OR as ADD at isel time. ADD can be
1465// 3-addressified into an LEA instruction to avoid copies.  However, we also
1466// want to finally emit these instructions as an or at the end of the code
1467// generator to make the generated code easier to read.  To do this, we select
1468// into "disjoint bits" pseudo ops.
1469
1470// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
1471// Try this before the selecting to OR.
1472let SchedRW = [WriteALU] in {
1473
1474let isConvertibleToThreeAddress = 1, isPseudo = 1,
1475    Constraints = "$src1 = $dst", Defs = [EFLAGS] in {
1476let isCommutable = 1 in {
1477def ADD8rr_DB   : I<0, Pseudo, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
1478                    "", // orb/addb REG, REG
1479                    [(set GR8:$dst, (or_is_add GR8:$src1, GR8:$src2))]>;
1480def ADD16rr_DB  : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
1481                    "", // orw/addw REG, REG
1482                    [(set GR16:$dst, (or_is_add GR16:$src1, GR16:$src2))]>;
1483def ADD32rr_DB  : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
1484                    "", // orl/addl REG, REG
1485                    [(set GR32:$dst, (or_is_add GR32:$src1, GR32:$src2))]>;
1486def ADD64rr_DB  : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
1487                    "", // orq/addq REG, REG
1488                    [(set GR64:$dst, (or_is_add GR64:$src1, GR64:$src2))]>;
1489} // isCommutable
1490
1491def ADD8ri_DB :   I<0, Pseudo,
1492                    (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
1493                    "", // orb/addb REG, imm8
1494                    [(set GR8:$dst, (or_is_add GR8:$src1, imm:$src2))]>;
1495def ADD16ri_DB  : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
1496                    "", // orw/addw REG, imm
1497                    [(set GR16:$dst, (or_is_add GR16:$src1, imm:$src2))]>;
1498def ADD32ri_DB  : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
1499                    "", // orl/addl REG, imm
1500                    [(set GR32:$dst, (or_is_add GR32:$src1, imm:$src2))]>;
1501def ADD64ri32_DB : I<0, Pseudo,
1502                     (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
1503                     "", // orq/addq REG, imm
1504                     [(set GR64:$dst, (or_is_add GR64:$src1,
1505                                                 i64immSExt32:$src2))]>;
1506}
1507} // AddedComplexity, SchedRW
1508
1509//===----------------------------------------------------------------------===//
1510// Pattern match XOR as ADD
1511//===----------------------------------------------------------------------===//
1512
1513// Prefer to pattern match XOR with min_signed_value as ADD at isel time.
1514// ADD can be 3-addressified into an LEA instruction to avoid copies.
1515let AddedComplexity = 5 in {
1516def : Pat<(xor GR8:$src1, -128),
1517          (ADD8ri GR8:$src1, -128)>;
1518def : Pat<(xor GR16:$src1, -32768),
1519          (ADD16ri GR16:$src1, -32768)>;
1520def : Pat<(xor GR32:$src1, -2147483648),
1521          (ADD32ri GR32:$src1, -2147483648)>;
1522}
1523
1524//===----------------------------------------------------------------------===//
1525// Some peepholes
1526//===----------------------------------------------------------------------===//
1527
1528// Odd encoding trick: -128 fits into an 8-bit immediate field while
1529// +128 doesn't, so in this special case use a sub instead of an add.
1530let Predicates = [NoNDD] in {
1531  def : Pat<(add GR16:$src1, 128),
1532            (SUB16ri GR16:$src1, -128)>;
1533  def : Pat<(add GR32:$src1, 128),
1534            (SUB32ri GR32:$src1, -128)>;
1535  def : Pat<(add GR64:$src1, 128),
1536            (SUB64ri32 GR64:$src1, -128)>;
1537
1538  def : Pat<(X86add_flag_nocf GR16:$src1, 128),
1539            (SUB16ri GR16:$src1, -128)>;
1540  def : Pat<(X86add_flag_nocf GR32:$src1, 128),
1541            (SUB32ri GR32:$src1, -128)>;
1542  def : Pat<(X86add_flag_nocf GR64:$src1, 128),
1543            (SUB64ri32 GR64:$src1, -128)>;
1544}
1545let Predicates = [HasNDD] in {
1546  def : Pat<(add GR16:$src1, 128),
1547            (SUB16ri_ND GR16:$src1, -128)>;
1548  def : Pat<(add GR32:$src1, 128),
1549            (SUB32ri_ND GR32:$src1, -128)>;
1550  def : Pat<(add GR64:$src1, 128),
1551            (SUB64ri32_ND GR64:$src1, -128)>;
1552
1553  def : Pat<(X86add_flag_nocf GR16:$src1, 128),
1554            (SUB16ri_ND GR16:$src1, -128)>;
1555  def : Pat<(X86add_flag_nocf GR32:$src1, 128),
1556            (SUB32ri_ND GR32:$src1, -128)>;
1557  def : Pat<(X86add_flag_nocf GR64:$src1, 128),
1558            (SUB64ri32_ND GR64:$src1, -128)>;
1559}
1560def : Pat<(store (add (loadi16 addr:$dst), 128), addr:$dst),
1561          (SUB16mi addr:$dst, -128)>;
1562def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst),
1563          (SUB32mi addr:$dst, -128)>;
1564def : Pat<(store (add (loadi64 addr:$dst), 128), addr:$dst),
1565          (SUB64mi32 addr:$dst, -128)>;
1566let Predicates = [HasNDD] in {
1567  def : Pat<(add (loadi16 addr:$src), 128),
1568            (SUB16mi_ND addr:$src, -128)>;
1569  def : Pat<(add (loadi32 addr:$src), 128),
1570            (SUB32mi_ND addr:$src, -128)>;
1571  def : Pat<(add (loadi64 addr:$src), 128),
1572            (SUB64mi32_ND addr:$src, -128)>;
1573}
1574
1575// The same trick applies for 32-bit immediate fields in 64-bit
1576// instructions.
1577let Predicates = [NoNDD] in {
1578  def : Pat<(add GR64:$src1, 0x0000000080000000),
1579            (SUB64ri32 GR64:$src1, 0xffffffff80000000)>;
1580  def : Pat<(X86add_flag_nocf GR64:$src1, 0x0000000080000000),
1581            (SUB64ri32 GR64:$src1, 0xffffffff80000000)>;
1582}
1583let Predicates = [HasNDD] in {
1584  def : Pat<(add GR64:$src1, 0x0000000080000000),
1585            (SUB64ri32_ND GR64:$src1, 0xffffffff80000000)>;
1586  def : Pat<(X86add_flag_nocf GR64:$src1, 0x0000000080000000),
1587            (SUB64ri32_ND GR64:$src1, 0xffffffff80000000)>;
1588}
1589def : Pat<(store (add (loadi64 addr:$dst), 0x0000000080000000), addr:$dst),
1590          (SUB64mi32 addr:$dst, 0xffffffff80000000)>;
1591let Predicates = [HasNDD] in {
1592  def : Pat<(add(loadi64 addr:$src), 0x0000000080000000),
1593            (SUB64mi32_ND addr:$src, 0xffffffff80000000)>;
1594}
1595
1596// Depositing value to 8/16 bit subreg:
1597def : Pat<(or (and GR64:$dst, -256),
1598              (i64 (zextloadi8 addr:$src))),
1599          (INSERT_SUBREG (i64 (COPY $dst)), (MOV8rm  i8mem:$src), sub_8bit)>;
1600
1601def : Pat<(or (and GR32:$dst, -256),
1602              (i32 (zextloadi8 addr:$src))),
1603          (INSERT_SUBREG (i32 (COPY $dst)), (MOV8rm  i8mem:$src), sub_8bit)>;
1604
1605def : Pat<(or (and GR64:$dst, -65536),
1606              (i64 (zextloadi16 addr:$src))),
1607          (INSERT_SUBREG (i64 (COPY $dst)), (MOV16rm  i16mem:$src), sub_16bit)>;
1608
1609def : Pat<(or (and GR32:$dst, -65536),
1610              (i32 (zextloadi16 addr:$src))),
1611          (INSERT_SUBREG (i32 (COPY $dst)), (MOV16rm  i16mem:$src), sub_16bit)>;
1612
1613// To avoid needing to materialize an immediate in a register, use a 32-bit and
1614// with implicit zero-extension instead of a 64-bit and if the immediate has at
1615// least 32 bits of leading zeros. If in addition the last 32 bits can be
1616// represented with a sign extension of a 8 bit constant, use that.
1617// This can also reduce instruction size by eliminating the need for the REX
1618// prefix.
1619
1620// AddedComplexity is needed to give priority over i64immSExt8 and i64immSExt32.
1621let AddedComplexity = 1 in {
1622  let Predicates = [NoNDD] in {
1623    def : Pat<(and GR64:$src, i64immZExt32:$imm),
1624              (SUBREG_TO_REG
1625                (i64 0),
1626                (AND32ri
1627                  (EXTRACT_SUBREG GR64:$src, sub_32bit),
1628                  (i32 (GetLo32XForm imm:$imm))),
1629                sub_32bit)>;
1630  }
1631  let Predicates = [HasNDD] in {
1632    def : Pat<(and GR64:$src, i64immZExt32:$imm),
1633              (SUBREG_TO_REG
1634                (i64 0),
1635                (AND32ri_ND
1636                  (EXTRACT_SUBREG GR64:$src, sub_32bit),
1637                  (i32 (GetLo32XForm imm:$imm))),
1638                sub_32bit)>;
1639  }
1640} // AddedComplexity = 1
1641
1642
1643// AddedComplexity is needed due to the increased complexity on the
1644// i64immZExt32SExt8 and i64immZExt32 patterns above. Applying this to all
1645// the MOVZX patterns keeps thems together in DAGIsel tables.
1646let AddedComplexity = 1 in {
1647// r & (2^16-1) ==> movz
1648def : Pat<(and GR32:$src1, 0xffff),
1649          (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, sub_16bit))>;
1650// r & (2^8-1) ==> movz
1651def : Pat<(and GR32:$src1, 0xff),
1652          (MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, sub_8bit))>;
1653// r & (2^8-1) ==> movz
1654def : Pat<(and GR16:$src1, 0xff),
1655           (EXTRACT_SUBREG (MOVZX32rr8 (EXTRACT_SUBREG GR16:$src1, sub_8bit)),
1656             sub_16bit)>;
1657
1658// r & (2^32-1) ==> movz
1659def : Pat<(and GR64:$src, 0x00000000FFFFFFFF),
1660          (SUBREG_TO_REG (i64 0),
1661                         (MOV32rr (EXTRACT_SUBREG GR64:$src, sub_32bit)),
1662                         sub_32bit)>;
1663// r & (2^16-1) ==> movz
1664def : Pat<(and GR64:$src, 0xffff),
1665          (SUBREG_TO_REG (i64 0),
1666                      (MOVZX32rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit))),
1667                      sub_32bit)>;
1668// r & (2^8-1) ==> movz
1669def : Pat<(and GR64:$src, 0xff),
1670          (SUBREG_TO_REG (i64 0),
1671                         (MOVZX32rr8 (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit))),
1672                         sub_32bit)>;
1673} // AddedComplexity = 1
1674
1675
1676// Try to use BTS/BTR/BTC for single bit operations on the upper 32-bits.
1677
1678def BTRXForm : SDNodeXForm<imm, [{
1679  // Transformation function: Find the lowest 0.
1680  return getI64Imm((uint8_t)N->getAPIntValue().countr_one(), SDLoc(N));
1681}]>;
1682
1683def BTCBTSXForm : SDNodeXForm<imm, [{
1684  // Transformation function: Find the lowest 1.
1685  return getI64Imm((uint8_t)N->getAPIntValue().countr_zero(), SDLoc(N));
1686}]>;
1687
1688def BTRMask64 : ImmLeaf<i64, [{
1689  return !isUInt<32>(Imm) && !isInt<32>(Imm) && isPowerOf2_64(~Imm);
1690}]>;
1691
1692def BTCBTSMask64 : ImmLeaf<i64, [{
1693  return !isInt<32>(Imm) && isPowerOf2_64(Imm);
1694}]>;
1695
1696// For now only do this for optsize.
1697let AddedComplexity = 1, Predicates=[OptForSize] in {
1698  def : Pat<(and GR64:$src1, BTRMask64:$mask),
1699            (BTR64ri8 GR64:$src1, (BTRXForm imm:$mask))>;
1700  def : Pat<(or GR64:$src1, BTCBTSMask64:$mask),
1701            (BTS64ri8 GR64:$src1, (BTCBTSXForm imm:$mask))>;
1702  def : Pat<(xor GR64:$src1, BTCBTSMask64:$mask),
1703            (BTC64ri8 GR64:$src1, (BTCBTSXForm imm:$mask))>;
1704}
1705
1706
1707// sext_inreg patterns
1708def : Pat<(sext_inreg GR32:$src, i16),
1709          (MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, sub_16bit))>;
1710def : Pat<(sext_inreg GR32:$src, i8),
1711          (MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, sub_8bit))>;
1712
1713def : Pat<(sext_inreg GR16:$src, i8),
1714           (EXTRACT_SUBREG (MOVSX32rr8 (EXTRACT_SUBREG GR16:$src, sub_8bit)),
1715             sub_16bit)>;
1716
1717def : Pat<(sext_inreg GR64:$src, i32),
1718          (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>;
1719def : Pat<(sext_inreg GR64:$src, i16),
1720          (MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, sub_16bit))>;
1721def : Pat<(sext_inreg GR64:$src, i8),
1722          (MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, sub_8bit))>;
1723
1724// sext, sext_load, zext, zext_load
1725def: Pat<(i16 (sext GR8:$src)),
1726          (EXTRACT_SUBREG (MOVSX32rr8 GR8:$src), sub_16bit)>;
1727def: Pat<(sextloadi16i8 addr:$src),
1728          (EXTRACT_SUBREG (MOVSX32rm8 addr:$src), sub_16bit)>;
1729def: Pat<(i16 (zext GR8:$src)),
1730          (EXTRACT_SUBREG (MOVZX32rr8 GR8:$src), sub_16bit)>;
1731def: Pat<(zextloadi16i8 addr:$src),
1732          (EXTRACT_SUBREG (MOVZX32rm8 addr:$src), sub_16bit)>;
1733
1734// trunc patterns
1735def : Pat<(i16 (trunc GR32:$src)),
1736          (EXTRACT_SUBREG GR32:$src, sub_16bit)>;
1737def : Pat<(i8 (trunc GR32:$src)),
1738          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
1739                          sub_8bit)>,
1740      Requires<[Not64BitMode]>;
1741def : Pat<(i8 (trunc GR16:$src)),
1742          (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
1743                          sub_8bit)>,
1744      Requires<[Not64BitMode]>;
1745def : Pat<(i32 (trunc GR64:$src)),
1746          (EXTRACT_SUBREG GR64:$src, sub_32bit)>;
1747def : Pat<(i16 (trunc GR64:$src)),
1748          (EXTRACT_SUBREG GR64:$src, sub_16bit)>;
1749def : Pat<(i8 (trunc GR64:$src)),
1750          (EXTRACT_SUBREG GR64:$src, sub_8bit)>;
1751def : Pat<(i8 (trunc GR32:$src)),
1752          (EXTRACT_SUBREG GR32:$src, sub_8bit)>,
1753      Requires<[In64BitMode]>;
1754def : Pat<(i8 (trunc GR16:$src)),
1755          (EXTRACT_SUBREG GR16:$src, sub_8bit)>,
1756      Requires<[In64BitMode]>;
1757
1758def immff00_ffff  : ImmLeaf<i32, [{
1759  return Imm >= 0xff00 && Imm <= 0xffff;
1760}]>;
1761
1762// h-register tricks
1763def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))),
1764          (EXTRACT_SUBREG GR16:$src, sub_8bit_hi)>,
1765      Requires<[Not64BitMode]>;
1766def : Pat<(i8 (trunc (srl_su (i32 (anyext GR16:$src)), (i8 8)))),
1767          (EXTRACT_SUBREG GR16:$src, sub_8bit_hi)>,
1768      Requires<[Not64BitMode]>;
1769def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))),
1770          (EXTRACT_SUBREG GR32:$src, sub_8bit_hi)>,
1771      Requires<[Not64BitMode]>;
1772def : Pat<(srl GR16:$src, (i8 8)),
1773          (EXTRACT_SUBREG
1774            (MOVZX32rr8_NOREX (EXTRACT_SUBREG GR16:$src, sub_8bit_hi)),
1775            sub_16bit)>;
1776def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
1777          (MOVZX32rr8_NOREX (EXTRACT_SUBREG GR16:$src, sub_8bit_hi))>;
1778def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
1779          (MOVZX32rr8_NOREX (EXTRACT_SUBREG GR16:$src, sub_8bit_hi))>;
1780def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
1781          (MOVZX32rr8_NOREX (EXTRACT_SUBREG GR32:$src, sub_8bit_hi))>;
1782def : Pat<(srl (and_su GR32:$src, immff00_ffff), (i8 8)),
1783          (MOVZX32rr8_NOREX (EXTRACT_SUBREG GR32:$src, sub_8bit_hi))>;
1784
1785// h-register tricks.
1786// For now, be conservative on x86-64 and use an h-register extract only if the
1787// value is immediately zero-extended or stored, which are somewhat common
1788// cases. This uses a bunch of code to prevent a register requiring a REX prefix
1789// from being allocated in the same instruction as the h register, as there's
1790// currently no way to describe this requirement to the register allocator.
1791
1792// h-register extract and zero-extend.
1793def : Pat<(and (srl_su GR64:$src, (i8 8)), (i64 255)),
1794          (SUBREG_TO_REG
1795            (i64 0),
1796            (MOVZX32rr8_NOREX
1797              (EXTRACT_SUBREG GR64:$src, sub_8bit_hi)),
1798            sub_32bit)>;
1799def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))),
1800          (SUBREG_TO_REG
1801            (i64 0),
1802            (MOVZX32rr8_NOREX
1803              (EXTRACT_SUBREG GR16:$src, sub_8bit_hi)),
1804            sub_32bit)>;
1805def : Pat<(i64 (anyext (srl_su GR16:$src, (i8 8)))),
1806          (SUBREG_TO_REG
1807            (i64 0),
1808            (MOVZX32rr8_NOREX
1809              (EXTRACT_SUBREG GR16:$src, sub_8bit_hi)),
1810            sub_32bit)>;
1811
1812// h-register extract and store.
1813def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst),
1814          (MOV8mr_NOREX
1815            addr:$dst,
1816            (EXTRACT_SUBREG GR64:$src, sub_8bit_hi))>;
1817def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst),
1818          (MOV8mr_NOREX
1819            addr:$dst,
1820            (EXTRACT_SUBREG GR32:$src, sub_8bit_hi))>,
1821      Requires<[In64BitMode]>;
1822def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst),
1823          (MOV8mr_NOREX
1824            addr:$dst,
1825            (EXTRACT_SUBREG GR16:$src, sub_8bit_hi))>,
1826      Requires<[In64BitMode]>;
1827
1828// Special pattern to catch the last step of __builtin_parity handling. Our
1829// goal is to use an xor of an h-register with the corresponding l-register.
1830// The above patterns would handle this on non 64-bit targets, but for 64-bit
1831// we need to be more careful. We're using a NOREX instruction here in case
1832// register allocation fails to keep the two registers together. So we need to
1833// make sure we can't accidentally mix R8-R15 with an h-register.
1834def : Pat<(X86xor_flag (i8 (trunc GR32:$src)),
1835                       (i8 (trunc (srl_su GR32:$src, (i8 8))))),
1836          (XOR8rr_NOREX (EXTRACT_SUBREG GR32:$src, sub_8bit),
1837                        (EXTRACT_SUBREG GR32:$src, sub_8bit_hi))>;
1838
1839// (shl x, 1) ==> (add x, x)
1840// Note that if x is undef (immediate or otherwise), we could theoretically
1841// end up with the two uses of x getting different values, producing a result
1842// where the least significant bit is not 0. However, the probability of this
1843// happening is considered low enough that this is officially not a
1844// "real problem".
1845let Predicates = [NoNDD] in {
1846  def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr  GR8 :$src1, GR8 :$src1)>;
1847  def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;
1848  def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;
1849  def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>;
1850}
1851let Predicates = [HasNDD] in {
1852  def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr_ND  GR8 :$src1, GR8 :$src1)>;
1853  def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr_ND GR16:$src1, GR16:$src1)>;
1854  def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr_ND GR32:$src1, GR32:$src1)>;
1855  def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr_ND GR64:$src1, GR64:$src1)>;
1856}
1857
1858// Shift amount is implicitly masked.
1859multiclass MaskedShiftAmountPats<SDNode frag> {
1860  // (shift x (and y, 31)) ==> (shift x, y)
1861  // (shift x (and y, 63)) ==> (shift x, y)
1862  let Predicates = [NoNDD] in {
1863    def : Pat<(frag GR8:$src1, (shiftMask32 CL)),
1864              (!cast<Instruction>(NAME # "8rCL") GR8:$src1)>;
1865    def : Pat<(frag GR16:$src1, (shiftMask32 CL)),
1866              (!cast<Instruction>(NAME # "16rCL") GR16:$src1)>;
1867    def : Pat<(frag GR32:$src1, (shiftMask32 CL)),
1868              (!cast<Instruction>(NAME # "32rCL") GR32:$src1)>;
1869    def : Pat<(frag GR64:$src1, (shiftMask64 CL)),
1870              (!cast<Instruction>(NAME # "64rCL") GR64:$src1)>;
1871  }
1872  let Predicates = [HasNDD] in {
1873    def : Pat<(frag GR8:$src1, (shiftMask32 CL)),
1874              (!cast<Instruction>(NAME # "8rCL_ND") GR8:$src1)>;
1875    def : Pat<(frag GR16:$src1, (shiftMask32 CL)),
1876              (!cast<Instruction>(NAME # "16rCL_ND") GR16:$src1)>;
1877    def : Pat<(frag GR32:$src1, (shiftMask32 CL)),
1878              (!cast<Instruction>(NAME # "32rCL_ND") GR32:$src1)>;
1879    def : Pat<(frag GR64:$src1, (shiftMask64 CL)),
1880              (!cast<Instruction>(NAME # "64rCL_ND") GR64:$src1)>;
1881  }
1882
1883  def : Pat<(store (frag (loadi8 addr:$dst), (shiftMask32 CL)), addr:$dst),
1884            (!cast<Instruction>(NAME # "8mCL") addr:$dst)>;
1885  def : Pat<(store (frag (loadi16 addr:$dst), (shiftMask32 CL)), addr:$dst),
1886            (!cast<Instruction>(NAME # "16mCL") addr:$dst)>;
1887  def : Pat<(store (frag (loadi32 addr:$dst), (shiftMask32 CL)), addr:$dst),
1888            (!cast<Instruction>(NAME # "32mCL") addr:$dst)>;
1889  def : Pat<(store (frag (loadi64 addr:$dst), (shiftMask64 CL)), addr:$dst),
1890            (!cast<Instruction>(NAME # "64mCL") addr:$dst)>;
1891
1892  let Predicates = [HasNDD] in {
1893    def : Pat<(frag (loadi8 addr:$src), (shiftMask32 CL)),
1894              (!cast<Instruction>(NAME # "8mCL_ND") addr:$src)>;
1895    def : Pat<(frag (loadi16 addr:$src), (shiftMask32 CL)),
1896              (!cast<Instruction>(NAME # "16mCL_ND") addr:$src)>;
1897    def : Pat<(frag (loadi32 addr:$src), (shiftMask32 CL)),
1898              (!cast<Instruction>(NAME # "32mCL_ND") addr:$src)>;
1899    def : Pat<(frag (loadi64 addr:$src), (shiftMask64 CL)),
1900              (!cast<Instruction>(NAME # "64mCL_ND") addr:$src)>;
1901  }
1902}
1903
1904defm SHL : MaskedShiftAmountPats<shl>;
1905defm SHR : MaskedShiftAmountPats<srl>;
1906defm SAR : MaskedShiftAmountPats<sra>;
1907
1908// ROL/ROR instructions allow a stronger mask optimization than shift for 8- and
1909// 16-bit. We can remove a mask of any (bitwidth - 1) on the rotation amount
1910// because over-rotating produces the same result. This is noted in the Intel
1911// docs with: "tempCOUNT <- (COUNT & COUNTMASK) MOD SIZE". Masking the rotation
1912// amount could affect EFLAGS results, but that does not matter because we are
1913// not tracking flags for these nodes.
1914multiclass MaskedRotateAmountPats<SDNode frag> {
1915  // (rot x (and y, BitWidth - 1)) ==> (rot x, y)
1916  let Predicates = [NoNDD] in {
1917    def : Pat<(frag GR8:$src1, (shiftMask8 CL)),
1918              (!cast<Instruction>(NAME # "8rCL") GR8:$src1)>;
1919    def : Pat<(frag GR16:$src1, (shiftMask16 CL)),
1920              (!cast<Instruction>(NAME # "16rCL") GR16:$src1)>;
1921    def : Pat<(frag GR32:$src1, (shiftMask32 CL)),
1922              (!cast<Instruction>(NAME # "32rCL") GR32:$src1)>;
1923    def : Pat<(frag GR64:$src1, (shiftMask64 CL)),
1924              (!cast<Instruction>(NAME # "64rCL") GR64:$src1)>;
1925  }
1926  let Predicates = [HasNDD] in {
1927    def : Pat<(frag GR8:$src1, (shiftMask8 CL)),
1928              (!cast<Instruction>(NAME # "8rCL_ND") GR8:$src1)>;
1929    def : Pat<(frag GR16:$src1, (shiftMask16 CL)),
1930              (!cast<Instruction>(NAME # "16rCL_ND") GR16:$src1)>;
1931    def : Pat<(frag GR32:$src1, (shiftMask32 CL)),
1932              (!cast<Instruction>(NAME # "32rCL_ND") GR32:$src1)>;
1933    def : Pat<(frag GR64:$src1, (shiftMask64 CL)),
1934              (!cast<Instruction>(NAME # "64rCL_ND") GR64:$src1)>;
1935  }
1936
1937  def : Pat<(store (frag (loadi8 addr:$dst), (shiftMask8 CL)), addr:$dst),
1938            (!cast<Instruction>(NAME # "8mCL") addr:$dst)>;
1939  def : Pat<(store (frag (loadi16 addr:$dst), (shiftMask16 CL)), addr:$dst),
1940            (!cast<Instruction>(NAME # "16mCL") addr:$dst)>;
1941  def : Pat<(store (frag (loadi32 addr:$dst), (shiftMask32 CL)), addr:$dst),
1942            (!cast<Instruction>(NAME # "32mCL") addr:$dst)>;
1943  def : Pat<(store (frag (loadi64 addr:$dst), (shiftMask64 CL)), addr:$dst),
1944            (!cast<Instruction>(NAME # "64mCL") addr:$dst)>;
1945
1946  let Predicates = [HasNDD] in {
1947    def : Pat<(frag (loadi8 addr:$src), (shiftMask8 CL)),
1948              (!cast<Instruction>(NAME # "8mCL_ND") addr:$src)>;
1949    def : Pat<(frag (loadi16 addr:$src), (shiftMask16 CL)),
1950              (!cast<Instruction>(NAME # "16mCL_ND") addr:$src)>;
1951    def : Pat<(frag (loadi32 addr:$src), (shiftMask32 CL)),
1952              (!cast<Instruction>(NAME # "32mCL_ND") addr:$src)>;
1953    def : Pat<(frag (loadi64 addr:$src), (shiftMask64 CL)),
1954              (!cast<Instruction>(NAME # "64mCL_ND") addr:$src)>;
1955  }
1956}
1957
1958defm ROL : MaskedRotateAmountPats<rotl>;
1959defm ROR : MaskedRotateAmountPats<rotr>;
1960
1961multiclass MaskedShlrdAmountPats<string suffix, Predicate p> {
1962  let Predicates = [p] in {
1963    // Double "funnel" shift amount is implicitly masked.
1964    // (fshl/fshr x (and y, 31)) ==> (fshl/fshr x, y) (NOTE: modulo32)
1965    def : Pat<(X86fshl GR16:$src1, GR16:$src2, (shiftMask32 CL)),
1966              (!cast<Instruction>(SHLD16rrCL#suffix) GR16:$src1, GR16:$src2)>;
1967    def : Pat<(X86fshr GR16:$src2, GR16:$src1, (shiftMask32 CL)),
1968              (!cast<Instruction>(SHRD16rrCL#suffix) GR16:$src1, GR16:$src2)>;
1969
1970    // (fshl/fshr x (and y, 31)) ==> (fshl/fshr x, y)
1971    def : Pat<(fshl GR32:$src1, GR32:$src2, (shiftMask32 CL)),
1972              (!cast<Instruction>(SHLD32rrCL#suffix) GR32:$src1, GR32:$src2)>;
1973    def : Pat<(fshr GR32:$src2, GR32:$src1, (shiftMask32 CL)),
1974              (!cast<Instruction>(SHRD32rrCL#suffix) GR32:$src1, GR32:$src2)>;
1975
1976    // (fshl/fshr x (and y, 63)) ==> (fshl/fshr x, y)
1977    def : Pat<(fshl GR64:$src1, GR64:$src2, (shiftMask64 CL)),
1978              (!cast<Instruction>(SHLD64rrCL#suffix) GR64:$src1, GR64:$src2)>;
1979    def : Pat<(fshr GR64:$src2, GR64:$src1, (shiftMask64 CL)),
1980              (!cast<Instruction>(SHRD64rrCL#suffix) GR64:$src1, GR64:$src2)>;
1981  }
1982}
1983
1984defm : MaskedShlrdAmountPats<"", NoNDD>;
1985defm : MaskedShlrdAmountPats<"_ND", HasNDD>;
1986
1987// Use BTR/BTS/BTC for clearing/setting/toggling a bit in a variable location.
1988multiclass OneBitPats<RegisterClass rc, ValueType vt, Instruction btr,
1989                      Instruction bts, Instruction btc, PatFrag mask> {
1990  def : Pat<(and rc:$src1, (rotl -2, GR8:$src2)),
1991            (btr rc:$src1,
1992                 (INSERT_SUBREG (vt (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
1993  def : Pat<(or rc:$src1, (shl 1, GR8:$src2)),
1994            (bts rc:$src1,
1995                 (INSERT_SUBREG (vt (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
1996  def : Pat<(xor rc:$src1, (shl 1, GR8:$src2)),
1997            (btc rc:$src1,
1998                 (INSERT_SUBREG (vt (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
1999
2000  // Similar to above, but removing unneeded masking of the shift amount.
2001  def : Pat<(and rc:$src1, (rotl -2, (mask GR8:$src2))),
2002            (btr rc:$src1,
2003                 (INSERT_SUBREG (vt (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
2004  def : Pat<(or rc:$src1, (shl 1, (mask GR8:$src2))),
2005            (bts rc:$src1,
2006                (INSERT_SUBREG (vt (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
2007  def : Pat<(xor rc:$src1, (shl 1, (mask GR8:$src2))),
2008            (btc rc:$src1,
2009                (INSERT_SUBREG (vt (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
2010}
2011
2012defm : OneBitPats<GR16, i16, BTR16rr, BTS16rr, BTC16rr, shiftMask16>;
2013defm : OneBitPats<GR32, i32, BTR32rr, BTS32rr, BTC32rr, shiftMask32>;
2014defm : OneBitPats<GR64, i64, BTR64rr, BTS64rr, BTC64rr, shiftMask64>;
2015
2016//===----------------------------------------------------------------------===//
2017// EFLAGS-defining Patterns
2018//===----------------------------------------------------------------------===//
2019
2020multiclass EFLAGSDefiningPats<string suffix, Predicate p> {
2021  let Predicates = [p] in {
2022    // add reg, reg
2023    def : Pat<(add GR8 :$src1, GR8 :$src2), (!cast<Instruction>(ADD8rr#suffix) GR8 :$src1, GR8 :$src2)>;
2024    def : Pat<(add GR16:$src1, GR16:$src2), (!cast<Instruction>(ADD16rr#suffix) GR16:$src1, GR16:$src2)>;
2025    def : Pat<(add GR32:$src1, GR32:$src2), (!cast<Instruction>(ADD32rr#suffix) GR32:$src1, GR32:$src2)>;
2026    def : Pat<(add GR64:$src1, GR64:$src2), (!cast<Instruction>(ADD64rr#suffix) GR64:$src1, GR64:$src2)>;
2027
2028    // add reg, mem
2029    def : Pat<(add GR8:$src1, (loadi8 addr:$src2)),
2030              (!cast<Instruction>(ADD8rm#suffix) GR8:$src1, addr:$src2)>;
2031    def : Pat<(add GR16:$src1, (loadi16 addr:$src2)),
2032              (!cast<Instruction>(ADD16rm#suffix) GR16:$src1, addr:$src2)>;
2033    def : Pat<(add GR32:$src1, (loadi32 addr:$src2)),
2034              (!cast<Instruction>(ADD32rm#suffix) GR32:$src1, addr:$src2)>;
2035    def : Pat<(add GR64:$src1, (loadi64 addr:$src2)),
2036              (!cast<Instruction>(ADD64rm#suffix) GR64:$src1, addr:$src2)>;
2037
2038    // add reg, imm
2039    def : Pat<(add GR8 :$src1, imm:$src2), (!cast<Instruction>(ADD8ri#suffix) GR8:$src1 , imm:$src2)>;
2040    def : Pat<(add GR16:$src1, imm:$src2), (!cast<Instruction>(ADD16ri#suffix) GR16:$src1, imm:$src2)>;
2041    def : Pat<(add GR32:$src1, imm:$src2), (!cast<Instruction>(ADD32ri#suffix) GR32:$src1, imm:$src2)>;
2042    def : Pat<(add GR64:$src1, i64immSExt32:$src2), (!cast<Instruction>(ADD64ri32#suffix) GR64:$src1, i64immSExt32:$src2)>;
2043
2044    // sub reg, reg
2045    def : Pat<(sub GR8 :$src1, GR8 :$src2), (!cast<Instruction>(SUB8rr#suffix)  GR8 :$src1, GR8 :$src2)>;
2046    def : Pat<(sub GR16:$src1, GR16:$src2), (!cast<Instruction>(SUB16rr#suffix) GR16:$src1, GR16:$src2)>;
2047    def : Pat<(sub GR32:$src1, GR32:$src2), (!cast<Instruction>(SUB32rr#suffix) GR32:$src1, GR32:$src2)>;
2048    def : Pat<(sub GR64:$src1, GR64:$src2), (!cast<Instruction>(SUB64rr#suffix) GR64:$src1, GR64:$src2)>;
2049
2050    // sub reg, mem
2051    def : Pat<(sub GR8:$src1, (loadi8 addr:$src2)),
2052              (!cast<Instruction>(SUB8rm#suffix) GR8:$src1, addr:$src2)>;
2053    def : Pat<(sub GR16:$src1, (loadi16 addr:$src2)),
2054              (!cast<Instruction>(SUB16rm#suffix) GR16:$src1, addr:$src2)>;
2055    def : Pat<(sub GR32:$src1, (loadi32 addr:$src2)),
2056              (!cast<Instruction>(SUB32rm#suffix) GR32:$src1, addr:$src2)>;
2057    def : Pat<(sub GR64:$src1, (loadi64 addr:$src2)),
2058              (!cast<Instruction>(SUB64rm#suffix) GR64:$src1, addr:$src2)>;
2059
2060    // sub reg, imm
2061    def : Pat<(sub GR8:$src1, imm:$src2),
2062              (!cast<Instruction>(SUB8ri#suffix) GR8:$src1, imm:$src2)>;
2063    def : Pat<(sub GR16:$src1, imm:$src2),
2064              (!cast<Instruction>(SUB16ri#suffix) GR16:$src1, imm:$src2)>;
2065    def : Pat<(sub GR32:$src1, imm:$src2),
2066              (!cast<Instruction>(SUB32ri#suffix) GR32:$src1, imm:$src2)>;
2067    def : Pat<(sub GR64:$src1, i64immSExt32:$src2),
2068              (!cast<Instruction>(SUB64ri32#suffix) GR64:$src1, i64immSExt32:$src2)>;
2069
2070    // sub 0, reg
2071    def : Pat<(X86sub_flag 0, GR8 :$src), (!cast<Instruction>(NEG8r#suffix)  GR8 :$src)>;
2072    def : Pat<(X86sub_flag 0, GR16:$src), (!cast<Instruction>(NEG16r#suffix) GR16:$src)>;
2073    def : Pat<(X86sub_flag 0, GR32:$src), (!cast<Instruction>(NEG32r#suffix) GR32:$src)>;
2074    def : Pat<(X86sub_flag 0, GR64:$src), (!cast<Instruction>(NEG64r#suffix) GR64:$src)>;
2075
2076    // mul reg, reg
2077    def : Pat<(mul GR16:$src1, GR16:$src2),
2078              (!cast<Instruction>(IMUL16rr#suffix) GR16:$src1, GR16:$src2)>;
2079    def : Pat<(mul GR32:$src1, GR32:$src2),
2080              (!cast<Instruction>(IMUL32rr#suffix) GR32:$src1, GR32:$src2)>;
2081    def : Pat<(mul GR64:$src1, GR64:$src2),
2082              (!cast<Instruction>(IMUL64rr#suffix) GR64:$src1, GR64:$src2)>;
2083
2084    // mul reg, mem
2085    def : Pat<(mul GR16:$src1, (loadi16 addr:$src2)),
2086              (!cast<Instruction>(IMUL16rm#suffix) GR16:$src1, addr:$src2)>;
2087    def : Pat<(mul GR32:$src1, (loadi32 addr:$src2)),
2088              (!cast<Instruction>(IMUL32rm#suffix) GR32:$src1, addr:$src2)>;
2089    def : Pat<(mul GR64:$src1, (loadi64 addr:$src2)),
2090              (!cast<Instruction>(IMUL64rm#suffix) GR64:$src1, addr:$src2)>;
2091
2092    // or reg/reg.
2093    def : Pat<(or GR8 :$src1, GR8 :$src2), (!cast<Instruction>(OR8rr#suffix)  GR8 :$src1, GR8 :$src2)>;
2094    def : Pat<(or GR16:$src1, GR16:$src2), (!cast<Instruction>(OR16rr#suffix) GR16:$src1, GR16:$src2)>;
2095    def : Pat<(or GR32:$src1, GR32:$src2), (!cast<Instruction>(OR32rr#suffix) GR32:$src1, GR32:$src2)>;
2096    def : Pat<(or GR64:$src1, GR64:$src2), (!cast<Instruction>(OR64rr#suffix) GR64:$src1, GR64:$src2)>;
2097
2098    // or reg/mem
2099    def : Pat<(or GR8:$src1, (loadi8 addr:$src2)),
2100              (!cast<Instruction>(OR8rm#suffix) GR8:$src1, addr:$src2)>;
2101    def : Pat<(or GR16:$src1, (loadi16 addr:$src2)),
2102              (!cast<Instruction>(OR16rm#suffix) GR16:$src1, addr:$src2)>;
2103    def : Pat<(or GR32:$src1, (loadi32 addr:$src2)),
2104              (!cast<Instruction>(OR32rm#suffix) GR32:$src1, addr:$src2)>;
2105    def : Pat<(or GR64:$src1, (loadi64 addr:$src2)),
2106              (!cast<Instruction>(OR64rm#suffix) GR64:$src1, addr:$src2)>;
2107
2108    // or reg/imm
2109    def : Pat<(or GR8:$src1 , imm:$src2), (!cast<Instruction>(OR8ri#suffix)  GR8 :$src1, imm:$src2)>;
2110    def : Pat<(or GR16:$src1, imm:$src2), (!cast<Instruction>(OR16ri#suffix) GR16:$src1, imm:$src2)>;
2111    def : Pat<(or GR32:$src1, imm:$src2), (!cast<Instruction>(OR32ri#suffix) GR32:$src1, imm:$src2)>;
2112    def : Pat<(or GR64:$src1, i64immSExt32:$src2),
2113              (!cast<Instruction>(OR64ri32#suffix) GR64:$src1, i64immSExt32:$src2)>;
2114
2115    // xor reg/reg
2116    def : Pat<(xor GR8 :$src1, GR8 :$src2), (!cast<Instruction>(XOR8rr#suffix)  GR8 :$src1, GR8 :$src2)>;
2117    def : Pat<(xor GR16:$src1, GR16:$src2), (!cast<Instruction>(XOR16rr#suffix) GR16:$src1, GR16:$src2)>;
2118    def : Pat<(xor GR32:$src1, GR32:$src2), (!cast<Instruction>(XOR32rr#suffix) GR32:$src1, GR32:$src2)>;
2119    def : Pat<(xor GR64:$src1, GR64:$src2), (!cast<Instruction>(XOR64rr#suffix) GR64:$src1, GR64:$src2)>;
2120
2121    // xor reg/mem
2122    def : Pat<(xor GR8:$src1, (loadi8 addr:$src2)),
2123              (!cast<Instruction>(XOR8rm#suffix) GR8:$src1, addr:$src2)>;
2124    def : Pat<(xor GR16:$src1, (loadi16 addr:$src2)),
2125              (!cast<Instruction>(XOR16rm#suffix) GR16:$src1, addr:$src2)>;
2126    def : Pat<(xor GR32:$src1, (loadi32 addr:$src2)),
2127              (!cast<Instruction>(XOR32rm#suffix) GR32:$src1, addr:$src2)>;
2128    def : Pat<(xor GR64:$src1, (loadi64 addr:$src2)),
2129              (!cast<Instruction>(XOR64rm#suffix) GR64:$src1, addr:$src2)>;
2130
2131    // xor reg/imm
2132    def : Pat<(xor GR8:$src1, imm:$src2),
2133              (!cast<Instruction>(XOR8ri#suffix) GR8:$src1, imm:$src2)>;
2134    def : Pat<(xor GR16:$src1, imm:$src2),
2135              (!cast<Instruction>(XOR16ri#suffix) GR16:$src1, imm:$src2)>;
2136    def : Pat<(xor GR32:$src1, imm:$src2),
2137              (!cast<Instruction>(XOR32ri#suffix) GR32:$src1, imm:$src2)>;
2138    def : Pat<(xor GR64:$src1, i64immSExt32:$src2),
2139              (!cast<Instruction>(XOR64ri32#suffix) GR64:$src1, i64immSExt32:$src2)>;
2140
2141    // and reg/reg
2142    def : Pat<(and GR8 :$src1, GR8 :$src2), (!cast<Instruction>(AND8rr#suffix)  GR8 :$src1, GR8 :$src2)>;
2143    def : Pat<(and GR16:$src1, GR16:$src2), (!cast<Instruction>(AND16rr#suffix) GR16:$src1, GR16:$src2)>;
2144    def : Pat<(and GR32:$src1, GR32:$src2), (!cast<Instruction>(AND32rr#suffix) GR32:$src1, GR32:$src2)>;
2145    def : Pat<(and GR64:$src1, GR64:$src2), (!cast<Instruction>(AND64rr#suffix) GR64:$src1, GR64:$src2)>;
2146
2147    // and reg/mem
2148    def : Pat<(and GR8:$src1, (loadi8 addr:$src2)),
2149              (!cast<Instruction>(AND8rm#suffix) GR8:$src1, addr:$src2)>;
2150    def : Pat<(and GR16:$src1, (loadi16 addr:$src2)),
2151              (!cast<Instruction>(AND16rm#suffix) GR16:$src1, addr:$src2)>;
2152    def : Pat<(and GR32:$src1, (loadi32 addr:$src2)),
2153              (!cast<Instruction>(AND32rm#suffix) GR32:$src1, addr:$src2)>;
2154    def : Pat<(and GR64:$src1, (loadi64 addr:$src2)),
2155              (!cast<Instruction>(AND64rm#suffix) GR64:$src1, addr:$src2)>;
2156
2157    // and reg/imm
2158    def : Pat<(and GR8:$src1, imm:$src2),
2159              (!cast<Instruction>(AND8ri#suffix) GR8:$src1, imm:$src2)>;
2160    def : Pat<(and GR16:$src1, imm:$src2),
2161              (!cast<Instruction>(AND16ri#suffix) GR16:$src1, imm:$src2)>;
2162    def : Pat<(and GR32:$src1, imm:$src2),
2163              (!cast<Instruction>(AND32ri#suffix) GR32:$src1, imm:$src2)>;
2164    def : Pat<(and GR64:$src1, i64immSExt32:$src2),
2165              (!cast<Instruction>(AND64ri32#suffix) GR64:$src1, i64immSExt32:$src2)>;
2166  }
2167
2168  // Increment/Decrement reg.
2169  // Do not make INC/DEC if it is slow
2170  let Predicates = [UseIncDec, p] in {
2171    def : Pat<(add GR8:$src, 1),   (!cast<Instruction>(INC8r#suffix) GR8:$src)>;
2172    def : Pat<(add GR16:$src, 1),  (!cast<Instruction>(INC16r#suffix) GR16:$src)>;
2173    def : Pat<(add GR32:$src, 1),  (!cast<Instruction>(INC32r#suffix) GR32:$src)>;
2174    def : Pat<(add GR64:$src, 1),  (!cast<Instruction>(INC64r#suffix) GR64:$src)>;
2175    def : Pat<(add GR8:$src, -1),  (!cast<Instruction>(DEC8r#suffix) GR8:$src)>;
2176    def : Pat<(add GR16:$src, -1), (!cast<Instruction>(DEC16r#suffix) GR16:$src)>;
2177    def : Pat<(add GR32:$src, -1), (!cast<Instruction>(DEC32r#suffix) GR32:$src)>;
2178    def : Pat<(add GR64:$src, -1), (!cast<Instruction>(DEC64r#suffix) GR64:$src)>;
2179
2180    def : Pat<(X86add_flag_nocf GR8:$src, -1),  (!cast<Instruction>(DEC8r#suffix) GR8:$src)>;
2181    def : Pat<(X86add_flag_nocf GR16:$src, -1), (!cast<Instruction>(DEC16r#suffix) GR16:$src)>;
2182    def : Pat<(X86add_flag_nocf GR32:$src, -1), (!cast<Instruction>(DEC32r#suffix) GR32:$src)>;
2183    def : Pat<(X86add_flag_nocf GR64:$src, -1), (!cast<Instruction>(DEC64r#suffix) GR64:$src)>;
2184    def : Pat<(X86sub_flag_nocf GR8:$src, -1),  (!cast<Instruction>(INC8r#suffix) GR8:$src)>;
2185    def : Pat<(X86sub_flag_nocf GR16:$src, -1), (!cast<Instruction>(INC16r#suffix) GR16:$src)>;
2186    def : Pat<(X86sub_flag_nocf GR32:$src, -1), (!cast<Instruction>(INC32r#suffix) GR32:$src)>;
2187    def : Pat<(X86sub_flag_nocf GR64:$src, -1), (!cast<Instruction>(INC64r#suffix) GR64:$src)>;
2188
2189    def : Pat<(or_is_add GR8:$src, 1),   (!cast<Instruction>(INC8r#suffix) GR8:$src)>;
2190    def : Pat<(or_is_add GR16:$src, 1),  (!cast<Instruction>(INC16r#suffix) GR16:$src)>;
2191    def : Pat<(or_is_add GR32:$src, 1),  (!cast<Instruction>(INC32r#suffix) GR32:$src)>;
2192    def : Pat<(or_is_add GR64:$src, 1),  (!cast<Instruction>(INC64r#suffix) GR64:$src)>;
2193  }
2194}
2195
2196defm : EFLAGSDefiningPats<"", NoNDD>;
2197defm : EFLAGSDefiningPats<"_ND", HasNDD>;
2198
2199let Predicates = [HasZU] in {
2200  // zext (mul reg/mem, imm) -> imulzu
2201  def : Pat<(i32 (zext (i16 (mul GR16:$src1, imm:$src2)))),
2202            (SUBREG_TO_REG (i32 0), (IMULZU16rri GR16:$src1, imm:$src2), sub_16bit)>;
2203  def : Pat<(i32 (zext (i16 (mul (loadi16 addr:$src1), imm:$src2)))),
2204            (SUBREG_TO_REG (i32 0), (IMULZU16rmi addr:$src1, imm:$src2), sub_16bit)>;
2205  def : Pat<(i64 (zext (i16 (mul GR16:$src1, imm:$src2)))),
2206            (SUBREG_TO_REG (i64 0), (IMULZU16rri GR16:$src1, imm:$src2), sub_16bit)>;
2207  def : Pat<(i64 (zext (i16 (mul (loadi16 addr:$src1), imm:$src2)))),
2208            (SUBREG_TO_REG (i64 0), (IMULZU16rmi addr:$src1, imm:$src2), sub_16bit)>;
2209}
2210
2211// mul reg, imm
2212def : Pat<(mul GR16:$src1, imm:$src2),
2213          (IMUL16rri GR16:$src1, imm:$src2)>;
2214def : Pat<(mul GR32:$src1, imm:$src2),
2215          (IMUL32rri GR32:$src1, imm:$src2)>;
2216def : Pat<(mul GR64:$src1, i64immSExt32:$src2),
2217          (IMUL64rri32 GR64:$src1, i64immSExt32:$src2)>;
2218
2219// reg = mul mem, imm
2220def : Pat<(mul (loadi16 addr:$src1), imm:$src2),
2221          (IMUL16rmi addr:$src1, imm:$src2)>;
2222def : Pat<(mul (loadi32 addr:$src1), imm:$src2),
2223          (IMUL32rmi addr:$src1, imm:$src2)>;
2224def : Pat<(mul (loadi64 addr:$src1), i64immSExt32:$src2),
2225          (IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>;
2226
2227// Bit scan instruction patterns to match explicit zero-undef behavior.
2228def : Pat<(cttz_zero_undef GR16:$src), (BSF16rr (i16 (IMPLICIT_DEF)), GR16:$src)>;
2229def : Pat<(cttz_zero_undef GR32:$src), (BSF32rr (i32 (IMPLICIT_DEF)), GR32:$src)>;
2230def : Pat<(cttz_zero_undef GR64:$src), (BSF64rr (i64 (IMPLICIT_DEF)), GR64:$src)>;
2231def : Pat<(cttz_zero_undef (loadi16 addr:$src)), (BSF16rm (i16 (IMPLICIT_DEF)), addr:$src)>;
2232def : Pat<(cttz_zero_undef (loadi32 addr:$src)), (BSF32rm (i32 (IMPLICIT_DEF)), addr:$src)>;
2233def : Pat<(cttz_zero_undef (loadi64 addr:$src)), (BSF64rm (i64 (IMPLICIT_DEF)), addr:$src)>;
2234
2235// When HasMOVBE is enabled it is possible to get a non-legalized
2236// register-register 16 bit bswap. This maps it to a ROL instruction.
2237let Predicates = [HasMOVBE] in {
2238 def : Pat<(bswap GR16:$src), (ROL16ri GR16:$src, (i8 8))>;
2239}
2240