1*c03c5b1cSMartin Matuska /* 2*c03c5b1cSMartin Matuska * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. 3*c03c5b1cSMartin Matuska * All rights reserved. 4*c03c5b1cSMartin Matuska * 5*c03c5b1cSMartin Matuska * This source code is licensed under both the BSD-style license (found in the 6*c03c5b1cSMartin Matuska * LICENSE file in the root directory of this source tree) and the GPLv2 (found 7*c03c5b1cSMartin Matuska * in the COPYING file in the root directory of this source tree). 8*c03c5b1cSMartin Matuska * You may select, at your option, one of the above-listed licenses. 9*c03c5b1cSMartin Matuska */ 10*c03c5b1cSMartin Matuska 11*c03c5b1cSMartin Matuska #ifndef ZSTD_COMPILER_H 12*c03c5b1cSMartin Matuska #define ZSTD_COMPILER_H 13*c03c5b1cSMartin Matuska 14*c03c5b1cSMartin Matuska /*-******************************************************* 15*c03c5b1cSMartin Matuska * Compiler specifics 16*c03c5b1cSMartin Matuska *********************************************************/ 17*c03c5b1cSMartin Matuska /* force inlining */ 18*c03c5b1cSMartin Matuska 19*c03c5b1cSMartin Matuska #if !defined(ZSTD_NO_INLINE) 20*c03c5b1cSMartin Matuska #if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ 21*c03c5b1cSMartin Matuska # define INLINE_KEYWORD inline 22*c03c5b1cSMartin Matuska #else 23*c03c5b1cSMartin Matuska # define INLINE_KEYWORD 24*c03c5b1cSMartin Matuska #endif 25*c03c5b1cSMartin Matuska 26*c03c5b1cSMartin Matuska #if defined(__GNUC__) || defined(__ICCARM__) 27*c03c5b1cSMartin Matuska # define FORCE_INLINE_ATTR __attribute__((always_inline)) 28*c03c5b1cSMartin Matuska #elif defined(_MSC_VER) 29*c03c5b1cSMartin Matuska # define FORCE_INLINE_ATTR __forceinline 30*c03c5b1cSMartin Matuska #else 31*c03c5b1cSMartin Matuska # define FORCE_INLINE_ATTR 32*c03c5b1cSMartin Matuska #endif 33*c03c5b1cSMartin Matuska 34*c03c5b1cSMartin Matuska #else 35*c03c5b1cSMartin Matuska 36*c03c5b1cSMartin Matuska #define INLINE_KEYWORD 37*c03c5b1cSMartin Matuska #define FORCE_INLINE_ATTR 38*c03c5b1cSMartin Matuska 39*c03c5b1cSMartin Matuska #endif 40*c03c5b1cSMartin Matuska 41*c03c5b1cSMartin Matuska /** 42*c03c5b1cSMartin Matuska * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant 43*c03c5b1cSMartin Matuska * parameters. They must be inlined for the compiler to eliminate the constant 44*c03c5b1cSMartin Matuska * branches. 45*c03c5b1cSMartin Matuska */ 46*c03c5b1cSMartin Matuska #define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR 47*c03c5b1cSMartin Matuska /** 48*c03c5b1cSMartin Matuska * HINT_INLINE is used to help the compiler generate better code. It is *not* 49*c03c5b1cSMartin Matuska * used for "templates", so it can be tweaked based on the compilers 50*c03c5b1cSMartin Matuska * performance. 51*c03c5b1cSMartin Matuska * 52*c03c5b1cSMartin Matuska * gcc-4.8 and gcc-4.9 have been shown to benefit from leaving off the 53*c03c5b1cSMartin Matuska * always_inline attribute. 54*c03c5b1cSMartin Matuska * 55*c03c5b1cSMartin Matuska * clang up to 5.0.0 (trunk) benefit tremendously from the always_inline 56*c03c5b1cSMartin Matuska * attribute. 57*c03c5b1cSMartin Matuska */ 58*c03c5b1cSMartin Matuska #if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5 59*c03c5b1cSMartin Matuska # define HINT_INLINE static INLINE_KEYWORD 60*c03c5b1cSMartin Matuska #else 61*c03c5b1cSMartin Matuska # define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR 62*c03c5b1cSMartin Matuska #endif 63*c03c5b1cSMartin Matuska 64*c03c5b1cSMartin Matuska /* UNUSED_ATTR tells the compiler it is okay if the function is unused. */ 65*c03c5b1cSMartin Matuska #if defined(__GNUC__) 66*c03c5b1cSMartin Matuska # define UNUSED_ATTR __attribute__((unused)) 67*c03c5b1cSMartin Matuska #else 68*c03c5b1cSMartin Matuska # define UNUSED_ATTR 69*c03c5b1cSMartin Matuska #endif 70*c03c5b1cSMartin Matuska 71*c03c5b1cSMartin Matuska /* force no inlining */ 72*c03c5b1cSMartin Matuska #ifdef _MSC_VER 73*c03c5b1cSMartin Matuska # define FORCE_NOINLINE static __declspec(noinline) 74*c03c5b1cSMartin Matuska #else 75*c03c5b1cSMartin Matuska # if defined(__GNUC__) || defined(__ICCARM__) 76*c03c5b1cSMartin Matuska # define FORCE_NOINLINE static __attribute__((__noinline__)) 77*c03c5b1cSMartin Matuska # else 78*c03c5b1cSMartin Matuska # define FORCE_NOINLINE static 79*c03c5b1cSMartin Matuska # endif 80*c03c5b1cSMartin Matuska #endif 81*c03c5b1cSMartin Matuska 82*c03c5b1cSMartin Matuska /* target attribute */ 83*c03c5b1cSMartin Matuska #ifndef __has_attribute 84*c03c5b1cSMartin Matuska #define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */ 85*c03c5b1cSMartin Matuska #endif 86*c03c5b1cSMartin Matuska #if defined(__GNUC__) || defined(__ICCARM__) 87*c03c5b1cSMartin Matuska # define TARGET_ATTRIBUTE(target) __attribute__((__target__(target))) 88*c03c5b1cSMartin Matuska #else 89*c03c5b1cSMartin Matuska # define TARGET_ATTRIBUTE(target) 90*c03c5b1cSMartin Matuska #endif 91*c03c5b1cSMartin Matuska 92*c03c5b1cSMartin Matuska /* Enable runtime BMI2 dispatch based on the CPU. 93*c03c5b1cSMartin Matuska * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default. 94*c03c5b1cSMartin Matuska */ 95*c03c5b1cSMartin Matuska #ifndef DYNAMIC_BMI2 96*c03c5b1cSMartin Matuska #if ((defined(__clang__) && __has_attribute(__target__)) \ 97*c03c5b1cSMartin Matuska || (defined(__GNUC__) \ 98*c03c5b1cSMartin Matuska && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \ 99*c03c5b1cSMartin Matuska && (defined(__x86_64__) || defined(_M_X86)) \ 100*c03c5b1cSMartin Matuska && !defined(__BMI2__) 101*c03c5b1cSMartin Matuska # define DYNAMIC_BMI2 1 102*c03c5b1cSMartin Matuska #else 103*c03c5b1cSMartin Matuska # define DYNAMIC_BMI2 0 104*c03c5b1cSMartin Matuska #endif 105*c03c5b1cSMartin Matuska #endif 106*c03c5b1cSMartin Matuska 107*c03c5b1cSMartin Matuska /* prefetch 108*c03c5b1cSMartin Matuska * can be disabled, by declaring NO_PREFETCH build macro */ 109*c03c5b1cSMartin Matuska #if defined(NO_PREFETCH) 110*c03c5b1cSMartin Matuska # define PREFETCH_L1(ptr) (void)(ptr) /* disabled */ 111*c03c5b1cSMartin Matuska # define PREFETCH_L2(ptr) (void)(ptr) /* disabled */ 112*c03c5b1cSMartin Matuska #else 113*c03c5b1cSMartin Matuska # if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */ 114*c03c5b1cSMartin Matuska # include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ 115*c03c5b1cSMartin Matuska # define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0) 116*c03c5b1cSMartin Matuska # define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1) 117*c03c5b1cSMartin Matuska # elif defined(__aarch64__) 118*c03c5b1cSMartin Matuska # define PREFETCH_L1(ptr) __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))) 119*c03c5b1cSMartin Matuska # define PREFETCH_L2(ptr) __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))) 120*c03c5b1cSMartin Matuska # elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) ) 121*c03c5b1cSMartin Matuska # define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */) 122*c03c5b1cSMartin Matuska # define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */) 123*c03c5b1cSMartin Matuska # else 124*c03c5b1cSMartin Matuska # define PREFETCH_L1(ptr) (void)(ptr) /* disabled */ 125*c03c5b1cSMartin Matuska # define PREFETCH_L2(ptr) (void)(ptr) /* disabled */ 126*c03c5b1cSMartin Matuska # endif 127*c03c5b1cSMartin Matuska #endif /* NO_PREFETCH */ 128*c03c5b1cSMartin Matuska 129*c03c5b1cSMartin Matuska #define CACHELINE_SIZE 64 130*c03c5b1cSMartin Matuska 131*c03c5b1cSMartin Matuska #define PREFETCH_AREA(p, s) { \ 132*c03c5b1cSMartin Matuska const char* const _ptr = (const char*)(p); \ 133*c03c5b1cSMartin Matuska size_t const _size = (size_t)(s); \ 134*c03c5b1cSMartin Matuska size_t _pos; \ 135*c03c5b1cSMartin Matuska for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \ 136*c03c5b1cSMartin Matuska PREFETCH_L2(_ptr + _pos); \ 137*c03c5b1cSMartin Matuska } \ 138*c03c5b1cSMartin Matuska } 139*c03c5b1cSMartin Matuska 140*c03c5b1cSMartin Matuska /* vectorization 141*c03c5b1cSMartin Matuska * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */ 142*c03c5b1cSMartin Matuska #if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__) 143*c03c5b1cSMartin Matuska # if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5) 144*c03c5b1cSMartin Matuska # define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize"))) 145*c03c5b1cSMartin Matuska # else 146*c03c5b1cSMartin Matuska # define DONT_VECTORIZE _Pragma("GCC optimize(\"no-tree-vectorize\")") 147*c03c5b1cSMartin Matuska # endif 148*c03c5b1cSMartin Matuska #else 149*c03c5b1cSMartin Matuska # define DONT_VECTORIZE 150*c03c5b1cSMartin Matuska #endif 151*c03c5b1cSMartin Matuska 152*c03c5b1cSMartin Matuska /* Tell the compiler that a branch is likely or unlikely. 153*c03c5b1cSMartin Matuska * Only use these macros if it causes the compiler to generate better code. 154*c03c5b1cSMartin Matuska * If you can remove a LIKELY/UNLIKELY annotation without speed changes in gcc 155*c03c5b1cSMartin Matuska * and clang, please do. 156*c03c5b1cSMartin Matuska */ 157*c03c5b1cSMartin Matuska #if defined(__GNUC__) 158*c03c5b1cSMartin Matuska #define LIKELY(x) (__builtin_expect((x), 1)) 159*c03c5b1cSMartin Matuska #define UNLIKELY(x) (__builtin_expect((x), 0)) 160*c03c5b1cSMartin Matuska #else 161*c03c5b1cSMartin Matuska #define LIKELY(x) (x) 162*c03c5b1cSMartin Matuska #define UNLIKELY(x) (x) 163*c03c5b1cSMartin Matuska #endif 164*c03c5b1cSMartin Matuska 165*c03c5b1cSMartin Matuska /* disable warnings */ 166*c03c5b1cSMartin Matuska #ifdef _MSC_VER /* Visual Studio */ 167*c03c5b1cSMartin Matuska # include <intrin.h> /* For Visual 2005 */ 168*c03c5b1cSMartin Matuska # pragma warning(disable : 4100) /* disable: C4100: unreferenced formal parameter */ 169*c03c5b1cSMartin Matuska # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ 170*c03c5b1cSMartin Matuska # pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */ 171*c03c5b1cSMartin Matuska # pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */ 172*c03c5b1cSMartin Matuska # pragma warning(disable : 4324) /* disable: C4324: padded structure */ 173*c03c5b1cSMartin Matuska #endif 174*c03c5b1cSMartin Matuska 175*c03c5b1cSMartin Matuska #endif /* ZSTD_COMPILER_H */ 176