1 //=== lib/fp_trunc.h - high precision -> low precision conversion *- C -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Set source and destination precision setting 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef FP_TRUNC_HEADER 14 #define FP_TRUNC_HEADER 15 16 #include "int_lib.h" 17 18 #if defined SRC_SINGLE 19 typedef float src_t; 20 typedef uint32_t src_rep_t; 21 #define SRC_REP_C UINT32_C 22 static const int srcBits = sizeof(src_t) * CHAR_BIT; 23 static const int srcSigFracBits = 23; 24 // -1 accounts for the sign bit. 25 // srcBits - srcSigFracBits - 1 26 static const int srcExpBits = 8; 27 28 #elif defined SRC_DOUBLE 29 typedef double src_t; 30 typedef uint64_t src_rep_t; 31 #define SRC_REP_C UINT64_C 32 static const int srcBits = sizeof(src_t) * CHAR_BIT; 33 static const int srcSigFracBits = 52; 34 // -1 accounts for the sign bit. 35 // srcBits - srcSigFracBits - 1 36 static const int srcExpBits = 11; 37 38 #elif defined SRC_QUAD 39 typedef tf_float src_t; 40 typedef __uint128_t src_rep_t; 41 #define SRC_REP_C (__uint128_t) 42 static const int srcBits = sizeof(src_t) * CHAR_BIT; 43 static const int srcSigFracBits = 112; 44 // -1 accounts for the sign bit. 45 // srcBits - srcSigFracBits - 1 46 static const int srcExpBits = 15; 47 48 #else 49 #error Source should be double precision or quad precision! 50 #endif // end source precision 51 52 #if defined DST_DOUBLE 53 typedef double dst_t; 54 typedef uint64_t dst_rep_t; 55 #define DST_REP_C UINT64_C 56 static const int dstBits = sizeof(dst_t) * CHAR_BIT; 57 static const int dstSigFracBits = 52; 58 // -1 accounts for the sign bit. 59 // dstBits - dstSigFracBits - 1 60 static const int dstExpBits = 11; 61 62 #elif defined DST_80 63 typedef xf_float dst_t; 64 typedef __uint128_t dst_rep_t; 65 #define DST_REP_C (__uint128_t) 66 static const int dstBits = 80; 67 static const int dstSigFracBits = 63; 68 // -1 accounts for the sign bit. 69 // -1 accounts for the explicitly stored integer bit. 70 // dstBits - dstSigFracBits - 1 - 1 71 static const int dstExpBits = 15; 72 73 #elif defined DST_SINGLE 74 typedef float dst_t; 75 typedef uint32_t dst_rep_t; 76 #define DST_REP_C UINT32_C 77 static const int dstBits = sizeof(dst_t) * CHAR_BIT; 78 static const int dstSigFracBits = 23; 79 // -1 accounts for the sign bit. 80 // dstBits - dstSigFracBits - 1 81 static const int dstExpBits = 8; 82 83 #elif defined DST_HALF 84 #ifdef COMPILER_RT_HAS_FLOAT16 85 typedef _Float16 dst_t; 86 #else 87 typedef uint16_t dst_t; 88 #endif 89 typedef uint16_t dst_rep_t; 90 #define DST_REP_C UINT16_C 91 static const int dstBits = sizeof(dst_t) * CHAR_BIT; 92 static const int dstSigFracBits = 10; 93 // -1 accounts for the sign bit. 94 // dstBits - dstSigFracBits - 1 95 static const int dstExpBits = 5; 96 97 #elif defined DST_BFLOAT 98 typedef __bf16 dst_t; 99 typedef uint16_t dst_rep_t; 100 #define DST_REP_C UINT16_C 101 static const int dstBits = sizeof(dst_t) * CHAR_BIT; 102 static const int dstSigFracBits = 7; 103 // -1 accounts for the sign bit. 104 // dstBits - dstSigFracBits - 1 105 static const int dstExpBits = 8; 106 107 #else 108 #error Destination should be single precision or double precision! 109 #endif // end destination precision 110 111 // TODO: These helper routines should be placed into fp_lib.h 112 // Currently they depend on macros/constants defined above. 113 114 static inline src_rep_t extract_sign_from_src(src_rep_t x) { 115 const src_rep_t srcSignMask = SRC_REP_C(1) << (srcBits - 1); 116 return (x & srcSignMask) >> (srcBits - 1); 117 } 118 119 static inline src_rep_t extract_exp_from_src(src_rep_t x) { 120 const int srcSigBits = srcBits - 1 - srcExpBits; 121 const src_rep_t srcExpMask = ((SRC_REP_C(1) << srcExpBits) - 1) << srcSigBits; 122 return (x & srcExpMask) >> srcSigBits; 123 } 124 125 static inline src_rep_t extract_sig_frac_from_src(src_rep_t x) { 126 const src_rep_t srcSigFracMask = (SRC_REP_C(1) << srcSigFracBits) - 1; 127 return x & srcSigFracMask; 128 } 129 130 static inline dst_rep_t construct_dst_rep(dst_rep_t sign, dst_rep_t exp, dst_rep_t sigFrac) { 131 dst_rep_t result = (sign << (dstBits - 1)) | (exp << (dstBits - 1 - dstExpBits)) | sigFrac; 132 // Set the explicit integer bit in F80 if present. 133 if (dstBits == 80 && exp) { 134 result |= (DST_REP_C(1) << dstSigFracBits); 135 } 136 return result; 137 } 138 139 // End of specialization parameters. Two helper routines for conversion to and 140 // from the representation of floating-point data as integer values follow. 141 142 static inline src_rep_t srcToRep(src_t x) { 143 const union { 144 src_t f; 145 src_rep_t i; 146 } rep = {.f = x}; 147 return rep.i; 148 } 149 150 static inline dst_t dstFromRep(dst_rep_t x) { 151 const union { 152 dst_t f; 153 dst_rep_t i; 154 } rep = {.i = x}; 155 return rep.f; 156 } 157 158 #endif // FP_TRUNC_HEADER 159