xref: /freebsd/contrib/llvm-project/compiler-rt/lib/builtins/fp_trunc_impl.inc (revision 53120fbb68952b7d620c2c0e1cf05c5017fc1b27)
1//= lib/fp_trunc_impl.inc - high precision -> low precision conversion *-*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a fairly generic conversion from a wider to a narrower
10// IEEE-754 floating-point type in the default (round to nearest, ties to even)
11// rounding mode.  The constants and types defined following the includes below
12// parameterize the conversion.
13//
14// This routine can be trivially adapted to support conversions to
15// half-precision or from quad-precision. It does not support types that don't
16// use the usual IEEE-754 interchange formats; specifically, some work would be
17// needed to adapt it to (for example) the Intel 80-bit format or PowerPC
18// double-double format.
19//
20// Note please, however, that this implementation is only intended to support
21// *narrowing* operations; if you need to convert to a *wider* floating-point
22// type (e.g. float -> double), then this routine will not do what you want it
23// to.
24//
25// It also requires that integer types at least as large as both formats
26// are available on the target platform; this may pose a problem when trying
27// to add support for quad on some 32-bit systems, for example.
28//
29// Finally, the following assumptions are made:
30//
31// 1. Floating-point types and integer types have the same endianness on the
32//    target platform.
33//
34// 2. Quiet NaNs, if supported, are indicated by the leading bit of the
35//    significand field being set.
36//
37//===----------------------------------------------------------------------===//
38
39#include "fp_trunc.h"
40
41// The destination type may use a usual IEEE-754 interchange format or Intel
42// 80-bit format. In particular, for the destination type dstSigFracBits may be
43// not equal to dstSigBits. The source type is assumed to be one of IEEE-754
44// standard types.
45static __inline dst_t __truncXfYf2__(src_t a) {
46  // Various constants whose values follow from the type parameters.
47  // Any reasonable optimizer will fold and propagate all of these.
48  const int srcInfExp = (1 << srcExpBits) - 1;
49  const int srcExpBias = srcInfExp >> 1;
50
51  const src_rep_t srcMinNormal = SRC_REP_C(1) << srcSigFracBits;
52  const src_rep_t roundMask =
53      (SRC_REP_C(1) << (srcSigFracBits - dstSigFracBits)) - 1;
54  const src_rep_t halfway = SRC_REP_C(1)
55                            << (srcSigFracBits - dstSigFracBits - 1);
56  const src_rep_t srcQNaN = SRC_REP_C(1) << (srcSigFracBits - 1);
57  const src_rep_t srcNaNCode = srcQNaN - 1;
58
59  const int dstInfExp = (1 << dstExpBits) - 1;
60  const int dstExpBias = dstInfExp >> 1;
61  const int overflowExponent = srcExpBias + dstInfExp - dstExpBias;
62
63  const dst_rep_t dstQNaN = DST_REP_C(1) << (dstSigFracBits - 1);
64  const dst_rep_t dstNaNCode = dstQNaN - 1;
65
66  const src_rep_t aRep = srcToRep(a);
67  const src_rep_t srcSign = extract_sign_from_src(aRep);
68  const src_rep_t srcExp = extract_exp_from_src(aRep);
69  const src_rep_t srcSigFrac = extract_sig_frac_from_src(aRep);
70
71  dst_rep_t dstSign = srcSign;
72  dst_rep_t dstExp;
73  dst_rep_t dstSigFrac;
74
75  // Same size exponents and a's significand tail is 0.
76  // The significand can be truncated and the exponent can be copied over.
77  const int sigFracTailBits = srcSigFracBits - dstSigFracBits;
78  if (srcExpBits == dstExpBits &&
79      ((aRep >> sigFracTailBits) << sigFracTailBits) == aRep) {
80    dstExp = srcExp;
81    dstSigFrac = (dst_rep_t)(srcSigFrac >> sigFracTailBits);
82    return dstFromRep(construct_dst_rep(dstSign, dstExp, dstSigFrac));
83  }
84
85  const int dstExpCandidate = ((int)srcExp - srcExpBias) + dstExpBias;
86  if (dstExpCandidate >= 1 && dstExpCandidate < dstInfExp) {
87    // The exponent of a is within the range of normal numbers in the
88    // destination format. We can convert by simply right-shifting with
89    // rounding and adjusting the exponent.
90    dstExp = dstExpCandidate;
91    dstSigFrac = (dst_rep_t)(srcSigFrac >> sigFracTailBits);
92
93    const src_rep_t roundBits = srcSigFrac & roundMask;
94    // Round to nearest.
95    if (roundBits > halfway)
96      dstSigFrac++;
97    // Tie to even.
98    else if (roundBits == halfway)
99      dstSigFrac += dstSigFrac & 1;
100
101    // Rounding has changed the exponent.
102    if (dstSigFrac >= (DST_REP_C(1) << dstSigFracBits)) {
103      dstExp += 1;
104      dstSigFrac ^= (DST_REP_C(1) << dstSigFracBits);
105    }
106  } else if (srcExp == srcInfExp && srcSigFrac) {
107    // a is NaN.
108    // Conjure the result by beginning with infinity, setting the qNaN
109    // bit and inserting the (truncated) trailing NaN field.
110    dstExp = dstInfExp;
111    dstSigFrac = dstQNaN;
112    dstSigFrac |= ((srcSigFrac & srcNaNCode) >> sigFracTailBits) & dstNaNCode;
113  } else if ((int)srcExp >= overflowExponent) {
114    dstExp = dstInfExp;
115    dstSigFrac = 0;
116  } else {
117    // a underflows on conversion to the destination type or is an exact
118    // zero.  The result may be a denormal or zero.  Extract the exponent
119    // to get the shift amount for the denormalization.
120    src_rep_t significand = srcSigFrac;
121    int shift = srcExpBias - dstExpBias - srcExp;
122
123    if (srcExp) {
124      // Set the implicit integer bit if the source is a normal number.
125      significand |= srcMinNormal;
126      shift += 1;
127    }
128
129    // Right shift by the denormalization amount with sticky.
130    if (shift > srcSigFracBits) {
131      dstExp = 0;
132      dstSigFrac = 0;
133    } else {
134      dstExp = 0;
135      const bool sticky = shift && ((significand << (srcBits - shift)) != 0);
136      src_rep_t denormalizedSignificand = significand >> shift | sticky;
137      dstSigFrac = denormalizedSignificand >> sigFracTailBits;
138      const src_rep_t roundBits = denormalizedSignificand & roundMask;
139      // Round to nearest
140      if (roundBits > halfway)
141        dstSigFrac++;
142      // Ties to even
143      else if (roundBits == halfway)
144        dstSigFrac += dstSigFrac & 1;
145
146      // Rounding has changed the exponent.
147      if (dstSigFrac >= (DST_REP_C(1) << dstSigFracBits)) {
148        dstExp += 1;
149        dstSigFrac ^= (DST_REP_C(1) << dstSigFracBits);
150      }
151    }
152  }
153
154  return dstFromRep(construct_dst_rep(dstSign, dstExp, dstSigFrac));
155}
156