1 /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ 2 #ifndef __TASK_LOCAL_DATA_H 3 #define __TASK_LOCAL_DATA_H 4 5 #include <errno.h> 6 #include <fcntl.h> 7 #include <sched.h> 8 #include <stdatomic.h> 9 #include <stddef.h> 10 #include <stdlib.h> 11 #include <string.h> 12 #include <unistd.h> 13 #include <sys/syscall.h> 14 #include <sys/types.h> 15 16 #ifdef TLD_FREE_DATA_ON_THREAD_EXIT 17 #include <pthread.h> 18 #endif 19 20 #include <bpf/bpf.h> 21 22 /* 23 * OPTIONS 24 * 25 * Define the option before including the header. Using different options in 26 * different translation units is strongly discouraged. 27 * 28 * TLD_FREE_DATA_ON_THREAD_EXIT - Frees memory on thread exit automatically 29 * 30 * Thread-specific memory for storing TLD is allocated lazily on the first call to 31 * tld_get_data(). The thread that calls it must also call tld_free() on thread exit 32 * to prevent memory leak. Pthread will be included if the option is defined. A pthread 33 * key will be registered with a destructor that calls tld_free(). Enabled only when 34 * the option is defined and TLD_DEFINE_KEY/tld_create_key() is called in the same 35 * translation unit. 36 * 37 * 38 * TLD_DYN_DATA_SIZE - The maximum size of memory allocated for TLDs created dynamically 39 * (default: 64 bytes) 40 * 41 * A TLD can be defined statically using TLD_DEFINE_KEY() or created on the fly using 42 * tld_create_key(). As the total size of TLDs created with tld_create_key() cannot be 43 * possibly known statically, a memory area of size TLD_DYN_DATA_SIZE will be allocated 44 * for these TLDs. This additional memory is allocated for every thread that calls 45 * tld_get_data() even if no tld_create_key are actually called, so be mindful of 46 * potential memory wastage. Use TLD_DEFINE_KEY() whenever possible as just enough memory 47 * will be allocated for TLDs created with it. 48 * 49 * 50 * TLD_NAME_LEN - The maximum length of the name of a TLD (default: 62) 51 * 52 * Setting TLD_NAME_LEN will affect the maximum number of TLDs a process can store, 53 * TLD_MAX_DATA_CNT. Must be consistent with task_local_data.bpf.h. 54 * 55 * 56 * TLD_DONT_ROUND_UP_DATA_SIZE - Don't round up memory size allocated for data if 57 * the memory allocator has low overhead aligned_alloc() implementation. 58 * 59 * For some memory allocators, when calling aligned_alloc(alignment, size), size 60 * does not need to be an integral multiple of alignment and it can be fulfilled 61 * without using round_up(size, alignment) bytes of memory. Enable this option to 62 * reduce memory usage. 63 */ 64 65 #define TLD_PAGE_SIZE getpagesize() 66 #define TLD_PAGE_MASK (~(TLD_PAGE_SIZE - 1)) 67 68 #define TLD_ROUND_MASK(x, y) ((__typeof__(x))((y) - 1)) 69 #define TLD_ROUND_UP(x, y) ((((x) - 1) | TLD_ROUND_MASK(x, y)) + 1) 70 71 #define TLD_ROUND_UP_POWER_OF_TWO(x) (1UL << (sizeof(x) * 8 - __builtin_clzl(x - 1))) 72 73 #ifndef TLD_DYN_DATA_SIZE 74 #define TLD_DYN_DATA_SIZE 64 75 #endif 76 77 #define TLD_MAX_DATA_CNT (TLD_PAGE_SIZE / sizeof(struct tld_metadata) - 1) 78 79 #ifndef TLD_NAME_LEN 80 #define TLD_NAME_LEN 62 81 #endif 82 83 #ifdef __cplusplus 84 extern "C" { 85 #endif 86 87 typedef struct { 88 __s16 off; 89 } tld_key_t; 90 91 struct tld_metadata { 92 char name[TLD_NAME_LEN]; 93 _Atomic __u16 size; /* size of tld_data_u->data */ 94 }; 95 96 struct tld_meta_u { 97 _Atomic __u16 cnt; 98 __u16 size; 99 struct tld_metadata metadata[]; 100 }; 101 102 /* 103 * The unused field ensures map_val.start > 0. On the BPF side, __tld_fetch_key() 104 * calculates off by summing map_val.start and tld_key_t.off and treats off == 0 105 * as key not cached. 106 */ 107 struct tld_data_u { 108 __u64 unused; 109 char data[] __attribute__((aligned(8))); 110 }; 111 112 struct tld_map_value { 113 void *data; 114 struct tld_meta_u *meta; 115 __u16 start; /* offset of tld_data_u->data in a page */ 116 }; 117 118 struct tld_meta_u * _Atomic tld_meta_p __attribute__((weak)); 119 __thread struct tld_data_u *tld_data_p __attribute__((weak)); 120 121 #ifdef TLD_FREE_DATA_ON_THREAD_EXIT 122 bool _Atomic tld_pthread_key_init __attribute__((weak)); 123 pthread_key_t tld_pthread_key __attribute__((weak)); 124 125 static void tld_free(void); 126 127 static void __tld_thread_exit_handler(void *unused) 128 { 129 (void)unused; 130 tld_free(); 131 } 132 #endif 133 134 static int __tld_init_meta_p(void) 135 { 136 struct tld_meta_u *meta, *uninit = NULL; 137 int err = 0; 138 139 meta = (struct tld_meta_u *)aligned_alloc(TLD_PAGE_SIZE, TLD_PAGE_SIZE); 140 if (!meta) { 141 err = -ENOMEM; 142 goto out; 143 } 144 145 memset(meta, 0, TLD_PAGE_SIZE); 146 meta->size = TLD_DYN_DATA_SIZE; 147 148 if (!atomic_compare_exchange_strong(&tld_meta_p, &uninit, meta)) { 149 free(meta); 150 goto out; 151 } 152 153 out: 154 return err; 155 } 156 157 static int __tld_init_data_p(int map_fd) 158 { 159 struct tld_map_value map_val; 160 struct tld_data_u *data; 161 int err, tid_fd = -1; 162 size_t size, size_pot; 163 164 tid_fd = syscall(SYS_pidfd_open, sys_gettid(), O_EXCL); 165 if (tid_fd < 0) { 166 err = -errno; 167 goto out; 168 } 169 170 /* 171 * tld_meta_p->size = TLD_DYN_DATA_SIZE + 172 * total size of TLDs defined via TLD_DEFINE_KEY() 173 */ 174 size = tld_meta_p->size + sizeof(struct tld_data_u); 175 size_pot = TLD_ROUND_UP_POWER_OF_TWO(size); 176 #ifdef TLD_DONT_ROUND_UP_DATA_SIZE 177 data = (struct tld_data_u *)aligned_alloc(size_pot, size); 178 #else 179 data = (struct tld_data_u *)aligned_alloc(size_pot, size_pot); 180 #endif 181 if (!data) { 182 err = -ENOMEM; 183 goto out; 184 } 185 186 /* 187 * Always pass a page-aligned address to UPTR since the size of tld_map_value::data 188 * is a page in BTF. 189 */ 190 map_val.data = (void *)(TLD_PAGE_MASK & (intptr_t)data); 191 map_val.start = (~TLD_PAGE_MASK & (intptr_t)data) + sizeof(struct tld_data_u); 192 map_val.meta = tld_meta_p; 193 194 err = bpf_map_update_elem(map_fd, &tid_fd, &map_val, 0); 195 if (err) { 196 free(data); 197 goto out; 198 } 199 200 tld_data_p = data; 201 #ifdef TLD_FREE_DATA_ON_THREAD_EXIT 202 pthread_setspecific(tld_pthread_key, (void *)1); 203 #endif 204 out: 205 if (tid_fd >= 0) 206 close(tid_fd); 207 return err; 208 } 209 210 static tld_key_t __tld_create_key(const char *name, size_t size, bool dyn_data) 211 { 212 int err, i, sz, off = 0; 213 bool uninit = false; 214 __u16 cnt; 215 216 if (!tld_meta_p) { 217 err = __tld_init_meta_p(); 218 if (err) 219 return (tld_key_t){(__s16)err}; 220 } 221 222 #ifdef TLD_FREE_DATA_ON_THREAD_EXIT 223 if (atomic_compare_exchange_strong(&tld_pthread_key_init, &uninit, true)) { 224 err = pthread_key_create(&tld_pthread_key, __tld_thread_exit_handler); 225 if (err) 226 return (tld_key_t){(__s16)err}; 227 } 228 #endif 229 230 for (i = 0; i < (int)TLD_MAX_DATA_CNT; i++) { 231 retry: 232 cnt = atomic_load(&tld_meta_p->cnt); 233 if (i < cnt) { 234 /* A metadata is not ready until size is updated with a non-zero value */ 235 while (!(sz = atomic_load(&tld_meta_p->metadata[i].size))) 236 sched_yield(); 237 238 if (!strncmp(tld_meta_p->metadata[i].name, name, TLD_NAME_LEN)) 239 return (tld_key_t){-EEXIST}; 240 241 off += TLD_ROUND_UP(sz, 8); 242 continue; 243 } 244 245 /* 246 * TLD_DEFINE_KEY() is given memory upto a page while at most 247 * TLD_DYN_DATA_SIZE is allocated for tld_create_key() 248 */ 249 if (dyn_data) { 250 if (off + TLD_ROUND_UP(size, 8) > tld_meta_p->size || 251 tld_meta_p->size > TLD_PAGE_SIZE - sizeof(struct tld_data_u)) 252 return (tld_key_t){-E2BIG}; 253 } else { 254 if (off + TLD_ROUND_UP(size, 8) > TLD_PAGE_SIZE - sizeof(struct tld_data_u)) 255 return (tld_key_t){-E2BIG}; 256 tld_meta_p->size += TLD_ROUND_UP(size, 8); 257 } 258 259 /* 260 * Only one tld_create_key() can increase the current cnt by one and 261 * takes the latest available slot. Other threads will check again if a new 262 * TLD can still be added, and then compete for the new slot after the 263 * succeeding thread update the size. 264 */ 265 if (!atomic_compare_exchange_strong(&tld_meta_p->cnt, &cnt, cnt + 1)) 266 goto retry; 267 268 strscpy(tld_meta_p->metadata[i].name, name); 269 atomic_store(&tld_meta_p->metadata[i].size, size); 270 return (tld_key_t){(__s16)off}; 271 } 272 273 return (tld_key_t){-ENOSPC}; 274 } 275 276 /** 277 * TLD_DEFINE_KEY() - Define a TLD and a global variable key associated with the TLD. 278 * 279 * @name: The name of the TLD 280 * @size: The size of the TLD 281 * @key: The variable name of the key. Cannot exceed TLD_NAME_LEN 282 * 283 * The macro can only be used in file scope. 284 * 285 * A global variable key of opaque type, tld_key_t, will be declared and initialized before 286 * main() starts. Use tld_key_is_err() or tld_key_err_or_zero() later to check if the key 287 * creation succeeded. Pass the key to tld_get_data() to get a pointer to the TLD. 288 * bpf programs can also fetch the same key by name. 289 * 290 * The total size of TLDs created using TLD_DEFINE_KEY() cannot exceed a page. Just 291 * enough memory will be allocated for each thread on the first call to tld_get_data(). 292 */ 293 #define TLD_DEFINE_KEY(key, name, size) \ 294 tld_key_t key; \ 295 \ 296 __attribute__((constructor(101))) \ 297 void __tld_define_key_##key(void) \ 298 { \ 299 key = __tld_create_key(name, size, false); \ 300 } 301 302 /** 303 * tld_create_key() - Create a TLD and return a key associated with the TLD. 304 * 305 * @name: The name the TLD 306 * @size: The size of the TLD 307 * 308 * Return an opaque object key. Use tld_key_is_err() or tld_key_err_or_zero() to check 309 * if the key creation succeeded. Pass the key to tld_get_data() to get a pointer to 310 * locate the TLD. bpf programs can also fetch the same key by name. 311 * 312 * Use tld_create_key() only when a TLD needs to be created dynamically (e.g., @name is 313 * not known statically or a TLD needs to be created conditionally) 314 * 315 * An additional TLD_DYN_DATA_SIZE bytes are allocated per-thread to accommodate TLDs 316 * created dynamically with tld_create_key(). Since only a user page is pinned to the 317 * kernel, when TLDs created with TLD_DEFINE_KEY() uses more than TLD_PAGE_SIZE - 318 * TLD_DYN_DATA_SIZE, the buffer size will be limited to the rest of the page. 319 */ 320 __attribute__((unused)) 321 static tld_key_t tld_create_key(const char *name, size_t size) 322 { 323 return __tld_create_key(name, size, true); 324 } 325 326 __attribute__((unused)) 327 static inline bool tld_key_is_err(tld_key_t key) 328 { 329 return key.off < 0; 330 } 331 332 __attribute__((unused)) 333 static inline int tld_key_err_or_zero(tld_key_t key) 334 { 335 return tld_key_is_err(key) ? key.off : 0; 336 } 337 338 /** 339 * tld_get_data() - Get a pointer to the TLD associated with the given key of the 340 * calling thread. 341 * 342 * @map_fd: A file descriptor of tld_data_map, the underlying BPF task local storage map 343 * of task local data. 344 * @key: A key object created by TLD_DEFINE_KEY() or tld_create_key(). 345 * 346 * Return a pointer to the TLD if the key is valid; NULL if not enough memory for TLD 347 * for this thread, or the key is invalid. The returned pointer is guaranteed to be 8-byte 348 * aligned. 349 * 350 * Threads that call tld_get_data() must call tld_free() on exit to prevent 351 * memory leak if TLD_FREE_DATA_ON_THREAD_EXIT is not defined. 352 */ 353 __attribute__((unused)) 354 static void *tld_get_data(int map_fd, tld_key_t key) 355 { 356 if (!tld_meta_p) 357 return NULL; 358 359 /* tld_data_p is allocated on the first invocation of tld_get_data() */ 360 if (!tld_data_p && __tld_init_data_p(map_fd)) 361 return NULL; 362 363 return tld_data_p->data + key.off; 364 } 365 366 /** 367 * tld_free() - Free task local data memory of the calling thread 368 * 369 * For the calling thread, all pointers to TLDs acquired before will become invalid. 370 * 371 * Users must call tld_free() on thread exit to prevent memory leak. Alternatively, 372 * define TLD_FREE_DATA_ON_THREAD_EXIT and a thread exit handler will be registered 373 * to free the memory automatically. Calling tld_free() before thread exit is 374 * undefined behavior, which may lead to null-pointer dereference. 375 */ 376 __attribute__((unused)) 377 static void tld_free(void) 378 { 379 if (tld_data_p) { 380 free(tld_data_p); 381 tld_data_p = NULL; 382 } 383 } 384 385 #ifdef __cplusplus 386 } /* extern "C" */ 387 #endif 388 389 #endif /* __TASK_LOCAL_DATA_H */ 390