Lines Matching +full:dcb +full:- +full:algorithm
1 //===-- PPCInstrInfo.td - The PowerPC Instruction Set ------*- tablegen -*-===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file describes the subset of the 32-bit PowerPC instruction set, as used
12 //===----------------------------------------------------------------------===//
16 //===----------------------------------------------------------------------===//
127 //===----------------------------------------------------------------------===//
187 // Perform FADD in round-to-zero mode.
244 // These nodes represent the 32-bit PPC shifts that operate on 6-bit shift
245 // amounts. These nodes are generated by the multi-precision shift code.
295 // These are target-independent nodes, but have target-specific formats.
301 def SDT_PPCCall : SDTypeProfile<0, -1, [SDTCisInt<0>]>;
369 // PPC-specific atomic operations.
404 //===----------------------------------------------------------------------===//
409 // to a single precision floating point non-denormal immediate without loss of
412 APFloat APFloatOfN = N->getValueAPF();
413 return convertToNonDenormSingle(APFloatOfN) && !N->isExactlyValue(+0.0);
416 // A floating point immediate that is exactly an integer (for example 3.0, -5.0)
417 // and can be represented in 5 bits (range of [-16, 15]).
419 APFloat FloatValue = N->getValueAPF();
423 return IsExact && IntResult <= 15 && IntResult >= -16 && !FloatValue.isZero();
427 APFloat FloatValue = N->getValueAPF();
431 return CurDAG->getTargetConstant(IntResult, SDLoc(N), MVT::i32);
437 APFloat APFloatOfN = N->getValueAPF();
439 return CurDAG->getTargetConstant(APFloatOfN.bitcastToAPInt().getZExtValue(),
447 APFloat APFloatOfN = N->getValueAPF();
448 return !N->isExactlyValue(+0.0) && !checkConvertToNonDenormSingle(APFloatOfN);
453 APFloat APFloatOfN = N->getValueAPF();
459 return CurDAG->getTargetConstant(Hi, SDLoc(N), MVT::i32);
464 APFloat APFloatOfN = N->getValueAPF();
470 return CurDAG->getTargetConstant(Lo, SDLoc(N), MVT::i32);
474 return isInt<34>(N->getSExtValue());
478 return getI64Imm(N->getSExtValue(), SDLoc(N));
482 // Transformation function: 31 - imm
483 return getI32Imm(31 - N->getZExtValue(), SDLoc(N));
487 // Transformation function: 32 - imm
488 return N->getZExtValue() ? getI32Imm(32 - N->getZExtValue(), SDLoc(N))
494 return getI32Imm((unsigned short)N->getZExtValue(), SDLoc(N));
499 return getI32Imm((unsigned)N->getZExtValue() >> 16, SDLoc(N));
504 int64_t Val = N->getZExtValue();
505 return getI32Imm((Val - (signed short)Val) >> 16, SDLoc(N));
510 (void)isRunOfOnes((unsigned)N->getZExtValue(), mb, me);
517 (void)isRunOfOnes((unsigned)N->getZExtValue(), mb, me);
521 // maskImm predicate - True if immediate is a run of ones.
523 if (N->getValueType(0) == MVT::i32)
524 return isRunOfOnes((unsigned)N->getZExtValue(), mb, me);
530 // imm32SExt16 predicate - True if the i32 immediate fits in a 16-bit
535 // imm64SExt16 predicate - True if the i64 immediate fits in a 16-bit
540 // immZExt16 predicate - True if the immediate fits in a 16-bit zero extended
542 return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue();
545 return (isInt<8>(Imm) && (Imm != -1)) || (isUInt<8>(Imm) && (Imm != 0xFF));
547 def i32immNonAllOneNonZero : ImmLeaf<i32, [{ return Imm && (Imm != -1); }]>;
550 // imm16Shifted* - These match immediates where the low 16-bits are zero. There
552 // identical in 32-bit mode, but in 64-bit mode, they return true if the
553 // immediate fits into a sign/zero extended 32-bit immediate (with the low bits
556 // imm16ShiftedZExt predicate - True if only bits in the top 16-bits of the
558 return (N->getZExtValue() & ~uint64_t(0xFFFF0000)) == 0;
562 // imm16ShiftedSExt predicate - True if only bits in the top 16-bits of the
564 // imm16ShiftedZExt in 32-bit mode.
565 if (N->getZExtValue() & 0xFFFF) return false;
566 if (N->getValueType(0) == MVT::i32)
568 // For 64-bit, make sure it is sext right.
569 return N->getZExtValue() == (uint64_t)(int)N->getZExtValue();
573 // imm64ZExt32 predicate - True if the i64 immediate fits in a 32-bit
578 // This is a somewhat weaker condition than actually checking for 4-byte
580 // as an immediate that is a multiple of 4 (i.e. the requirements for DS-Form
583 // restricted memrix (4-aligned) constants are alignment sensitive. If these
584 // offsets are hidden behind TOC entries than the values of the lower-order
589 return isOffsetMultipleOf(N, 4) || cast<LoadSDNode>(N)->getAlign() >= 4;
593 return isOffsetMultipleOf(N, 4) || cast<StoreSDNode>(N)->getAlign() >= 4;
596 return isOffsetMultipleOf(N, 4) || cast<LoadSDNode>(N)->getAlign() >= 4;
601 return isOffsetMultipleOf(N, 4) || cast<StoreSDNode>(N)->getAlign() >= 4;
605 return cast<LoadSDNode>(N)->getAlign() < 4 && !isOffsetMultipleOf(N, 4);
609 return cast<StoreSDNode>(N)->getAlign() < 4 && !isOffsetMultipleOf(N, 4);
612 return cast<LoadSDNode>(N)->getAlign() < 4 && !isOffsetMultipleOf(N, 4);
615 // This is a somewhat weaker condition than actually checking for 16-byte
617 // as an immediate that is a multiple of 16 (i.e. the requirements for DQ-Form
634 // PatFrag for binary operation whose operands are both non-constant
638 return !isIntS16Immediate(N->getOperand(0), Imm)
639 && !isIntS16Immediate(N->getOperand(1), Imm);
645 //===----------------------------------------------------------------------===//
661 // d-form
663 // ds-form
665 // dq-form
667 // 8LS:d-form
670 // Below forms are all x-form addressing mode, use three different ones so we
671 // can make a accurate check for x-form instructions in ISEL.
672 // x-form addressing mode whose associated displacement form is D.
674 // x-form addressing mode whose associated displacement form is DS.
676 // x-form addressing mode whose associated displacement form is DQ.
682 // pseudo-instructions.
697 //===----------------------------------------------------------------------===//
699 def In32BitMode : Predicate<"!Subtarget->isPPC64()">;
700 def In64BitMode : Predicate<"Subtarget->isPPC64()">;
701 def IsBookE : Predicate<"Subtarget->isBookE()">;
702 def IsNotBookE : Predicate<"!Subtarget->isBookE()">;
703 def HasOnlyMSYNC : Predicate<"Subtarget->hasOnlyMSYNC()">;
704 def HasSYNC : Predicate<"!Subtarget->hasOnlyMSYNC()">;
705 def IsPPC4xx : Predicate<"Subtarget->isPPC4xx()">;
706 def IsPPC6xx : Predicate<"Subtarget->isPPC6xx()">;
707 def IsE500 : Predicate<"Subtarget->isE500()">;
708 def HasSPE : Predicate<"Subtarget->hasSPE()">;
709 def HasICBT : Predicate<"Subtarget->hasICBT()">;
710 def HasPartwordAtomics : Predicate<"Subtarget->hasPartwordAtomics()">;
711 def HasQuadwordAtomics : Predicate<"Subtarget->hasQuadwordAtomics()">;
713 : Predicate<"Subtarget->getTargetMachine().Options.NoNaNsFPMath">;
715 : Predicate<"!Subtarget->getTargetMachine().Options.NoNaNsFPMath">;
716 def HasBPERMD : Predicate<"Subtarget->hasBPERMD()">;
717 def HasExtDiv : Predicate<"Subtarget->hasExtDiv()">;
718 def IsISA2_06 : Predicate<"Subtarget->isISA2_06()">;
719 def IsISA2_07 : Predicate<"Subtarget->isISA2_07()">;
720 def IsISA3_0 : Predicate<"Subtarget->isISA3_0()">;
721 def HasFPU : Predicate<"Subtarget->hasFPU()">;
722 def PCRelativeMemops : Predicate<"Subtarget->hasPCRelativeMemops()">;
723 def IsNotISA3_1 : Predicate<"!Subtarget->isISA3_1()">;
726 def ModernAs: Predicate<"!Subtarget->isAIXABI() || Subtarget->HasModernAIXAs">,
728 def IsAIX : Predicate<"Subtarget->isAIXABI()">;
729 def NotAIX : Predicate<"!Subtarget->isAIXABI()">;
730 def IsISAFuture : Predicate<"Subtarget->isISAFuture()">;
731 def IsNotISAFuture : Predicate<"!Subtarget->isISAFuture()">;
733 //===----------------------------------------------------------------------===//
1184 //===----------------------------------------------------------------------===//
1186 //===----------------------------------------------------------------------===//
1188 //===----------------------------------------------------------------------===//
1229 // SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
1232 // Note that SELECT_CC_I4 and SELECT_CC_I8 use the no-r0 register classes
1281 // SPILL_CR - Indicate that we're dumping the CR register, so we'll need to
1290 // RESTORE_CR - Indicate that we're restoring the CR register (previously
1356 // a two-value operand where a dag node expects two operands. :(
1402 "bdzlr-", IIC_BrB, []>;
1404 "bdnzlr-", IIC_BrB, []>;
1425 "bdz- $BD">;
1427 "bdnz- $BD">;
1429 "bdza- $BD">;
1431 "bdnza- $BD">;
1521 "bdzl- $BD">;
1523 "bdnzl- $BD">;
1525 "bdzla- $BD">;
1527 "bdnzla- $BD">;
1539 "bdzlrl-", IIC_BrB, []>;
1541 "bdnzlrl-", IIC_BrB, []>;
1621 // While longjmp is a control-flow barrier (fallthrough isn't allowed), setjmp
1681 // DCB* instructions.
1879 // (EH=1 - see Power ISA 2.07 Book II 4.4.2)
1941 //===----------------------------------------------------------------------===//
2095 //===----------------------------------------------------------------------===//
2141 // Patterns to match the pre-inc stores. We can't put the patterns on
2229 // Patterns to match the pre-inc stores. We can't put the patterns on
2260 // We used to have EIEIO as value but E[0-9A-Z] is a reserved name
2278 //===----------------------------------------------------------------------===//
2353 // The POWER6 and POWER7 have special group-terminating nops.
2532 /// Note that FMR is defined as pseudo-ops on the PPC970 because they are
2535 /// sneak into a d-group with a store).
2591 // XL-Form instructions. condition register logical ops.
2599 // condition-register logical instructions have preferred forms. Specifically,
2602 // for this via hinting the register allocator and anti-dep breakers, or we
2675 // XFX-Form instructions. Instructions that deal with SPRs.
2693 // A pseudo-instruction used to implement the read of the 64-bit cycle counter
2694 // on a 32-bit target.
2776 // on the cr register selected. Thus, post-ra anti-dep breaking must not
2791 // on the cr register selected. Thus, post-ra anti-dep breaking must not
2812 // Custom inserter instruction to perform FADD in round-to-zero mode.
2821 // When FM is 30/31, we are setting the 62/63 bit of FPSCR, the implicit-def
2896 // XO-Form instructions. Arithmetic instructions that can set overflow bit
2953 [(set i32:$RT, (adde i32:$RA, -1))]>;
2962 [(set i32:$RT, (sube -1, i32:$RA))]>;
2974 // A-Form instructions. Most of the instructions executed in the FPU are of
3020 // FSEL is artificially split into 4 and 8-byte forms for the result. To avoid
3021 // having 4 of these, force the comparison to always be an 8-byte double (code
3083 // M-Form instructions. rotate and mask instructions.
3111 //===----------------------------------------------------------------------===//
3221 // Support for thread-local storage.
3265 // For local-exec accesses on 32-bit AIX, a call to .__get_tpointer is
3274 // The following pattern matches local- and initial-exec TLS accesses on 32-bit AIX.
3275 // PPCaddTls is used in local- and initial-exec accesses in order to:
3276 // - Get the address of a variable (add the variable offset to the thread
3278 // - Create an opportunity to optimize the user of the loaded address.
3336 // Support for Position-independent code
3357 // Pseudo-instruction marked for deletion. When deleting the instruction would
3359 // used instead. It will be removed unconditionally at pre-emit time (prior to
3364 // so that we can distinguish between shifts that allow 5-bit and 6-bit shift
3452 // Pseudo-instructions for alternate assembly syntax (never used by codegen).
3485 // We're sometimes asked to materialize i1 -1, which is just 1 in this case
3486 // (-1 is used to mean all bits set).
3487 def : Pat<(i1 -1), (CRSET)>;
3493 (SELECT_I4 $in, (LI -1), (LI 0))>;
3498 (SELECT_I8 $in, (LI8 -1), (LI8 0))>;
3515 // -1 -1 : F
3516 // -1 0 : T
3517 // 0 -1 : F
3536 // -1 -1 : T
3537 // -1 0 : T
3538 // 0 -1 : F
3555 // -1 -1 : T
3556 // -1 0 : F
3557 // 0 -1 : T
3571 // -1 -1 : F
3572 // -1 0 : F
3573 // 0 -1 : T
3589 // match setcc on non-i1 (non-vector) variables. Note that SETUEQ, SETOGE,
3591 // floating-point types.
3601 (SELECT_I4 result, (LI 0), (LI -1))>;
3607 (SELECT_I8 result, (LI8 0), (LI8 -1))>;
3618 // FIXME: Because of what seems like a bug in TableGen's type-inference code,
3646 // (xori x, 1) because on the A2 nor has single-cycle latency while xori
3647 // has 2-cycle latency.
3699 (setcc $in, -1, $cc)>,
3707 (setcc $in, -1, $cc)>,
3715 (setcc $in, -1, $cc)>,
3723 (setcc $in, -1, $cc)>,
3783 // For non-equality comparisons, the default code would materialize the
3823 // For non-equality comparisons, the default code would materialize the
4061 // match selectcc on i1 variables with non-i1 output.
4215 //===----------------------------------------------------------------------===//
4268 let Inst{21-30} = 163;
4488 //===----------------------------------------------------------------------===//
4554 foreach BR = 0-7 in {
4610 foreach SPRG = 4-7 in {
4621 foreach SPRG = 0-3 in {
4639 foreach BATR = 0-3 in {
4915 defm : BranchSimpleMnemonicAT<"-", 2>;
4937 defm : BranchSimpleMnemonic2<"t", "-", 14>;
4938 defm : BranchSimpleMnemonic2<"f", "-", 6>;
4989 defm : BranchExtendedMnemonicPM<name, "-", !add(bibo, 2)>;
5065 // Copy-Paste Facility
5080 // Power-Saving Mode Instruction:
5108 // Fast 32-bit reverse bits algorithm:
5109 // Step 1: 1-bit swap (swap odd 1-bit and even 1-bit):
5111 // Step 2: 2-bit swap (swap odd 2-bit and even 2-bit):
5113 // Step 3: 4-bit swap (swap odd 4-bit and even 4-bit):
5173 // Clear the upper half of the register when in 64-bit mode
5179 // Fast 64-bit reverse bits algorithm:
5180 // Step 1: 1-bit swap (swap odd 1-bit and even 1-bit):
5182 // Step 2: 2-bit swap (swap odd 2-bit and even 2-bit):
5184 // Step 3: 4-bit swap (swap odd 4-bit and even 4-bit):
5187 // Apply the same byte reverse algorithm mentioned above for the fast 32-bit
5217 // Intra-byte swap is done, now start inter-byte swap.