1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef __TASK_LOCAL_DATA_H 3 #define __TASK_LOCAL_DATA_H 4 5 #include <errno.h> 6 #include <fcntl.h> 7 #include <sched.h> 8 #include <stdatomic.h> 9 #include <stddef.h> 10 #include <stdlib.h> 11 #include <string.h> 12 #include <unistd.h> 13 #include <sys/syscall.h> 14 #include <sys/types.h> 15 16 #ifdef TLD_FREE_DATA_ON_THREAD_EXIT 17 #include <pthread.h> 18 #endif 19 20 #include <bpf/bpf.h> 21 22 /* 23 * OPTIONS 24 * 25 * Define the option before including the header 26 * 27 * TLD_FREE_DATA_ON_THREAD_EXIT - Frees memory on thread exit automatically 28 * 29 * Thread-specific memory for storing TLD is allocated lazily on the first call to 30 * tld_get_data(). The thread that calls it must also call tld_free() on thread exit 31 * to prevent memory leak. Pthread will be included if the option is defined. A pthread 32 * key will be registered with a destructor that calls tld_free(). 33 * 34 * 35 * TLD_DYN_DATA_SIZE - The maximum size of memory allocated for TLDs created dynamically 36 * (default: 64 bytes) 37 * 38 * A TLD can be defined statically using TLD_DEFINE_KEY() or created on the fly using 39 * tld_create_key(). As the total size of TLDs created with tld_create_key() cannot be 40 * possibly known statically, a memory area of size TLD_DYN_DATA_SIZE will be allocated 41 * for these TLDs. This additional memory is allocated for every thread that calls 42 * tld_get_data() even if no tld_create_key are actually called, so be mindful of 43 * potential memory wastage. Use TLD_DEFINE_KEY() whenever possible as just enough memory 44 * will be allocated for TLDs created with it. 45 * 46 * 47 * TLD_NAME_LEN - The maximum length of the name of a TLD (default: 62) 48 * 49 * Setting TLD_NAME_LEN will affect the maximum number of TLDs a process can store, 50 * TLD_MAX_DATA_CNT. 51 * 52 * 53 * TLD_DATA_USE_ALIGNED_ALLOC - Always use aligned_alloc() instead of malloc() 54 * 55 * When allocating the memory for storing TLDs, we need to make sure there is a memory 56 * region of the X bytes within a page. This is due to the limit posed by UPTR: memory 57 * pinned to the kernel cannot exceed a page nor can it cross the page boundary. The 58 * library normally calls malloc(2*X) given X bytes of total TLDs, and only uses 59 * aligned_alloc(PAGE_SIZE, X) when X >= PAGE_SIZE / 2. This is to reduce memory wastage 60 * as not all memory allocator can use the exact amount of memory requested to fulfill 61 * aligned_alloc(). For example, some may round the size up to the alignment. Enable the 62 * option to always use aligned_alloc() if the implementation has low memory overhead. 63 */ 64 65 #define TLD_PAGE_SIZE getpagesize() 66 #define TLD_PAGE_MASK (~(TLD_PAGE_SIZE - 1)) 67 68 #define TLD_ROUND_MASK(x, y) ((__typeof__(x))((y) - 1)) 69 #define TLD_ROUND_UP(x, y) ((((x) - 1) | TLD_ROUND_MASK(x, y)) + 1) 70 71 #define TLD_READ_ONCE(x) (*(volatile typeof(x) *)&(x)) 72 73 #ifndef TLD_DYN_DATA_SIZE 74 #define TLD_DYN_DATA_SIZE 64 75 #endif 76 77 #define TLD_MAX_DATA_CNT (TLD_PAGE_SIZE / sizeof(struct tld_metadata) - 1) 78 79 #ifndef TLD_NAME_LEN 80 #define TLD_NAME_LEN 62 81 #endif 82 83 #ifdef __cplusplus 84 extern "C" { 85 #endif 86 87 typedef struct { 88 __s16 off; 89 } tld_key_t; 90 91 struct tld_metadata { 92 char name[TLD_NAME_LEN]; 93 _Atomic __u16 size; 94 }; 95 96 struct tld_meta_u { 97 _Atomic __u8 cnt; 98 __u16 size; 99 struct tld_metadata metadata[]; 100 }; 101 102 struct tld_data_u { 103 __u64 start; /* offset of tld_data_u->data in a page */ 104 char data[]; 105 }; 106 107 struct tld_map_value { 108 void *data; 109 struct tld_meta_u *meta; 110 }; 111 112 struct tld_meta_u * _Atomic tld_meta_p __attribute__((weak)); 113 __thread struct tld_data_u *tld_data_p __attribute__((weak)); 114 __thread void *tld_data_alloc_p __attribute__((weak)); 115 116 #ifdef TLD_FREE_DATA_ON_THREAD_EXIT 117 pthread_key_t tld_pthread_key __attribute__((weak)); 118 119 static void tld_free(void); 120 121 static void __tld_thread_exit_handler(void *unused) 122 { 123 tld_free(); 124 } 125 #endif 126 127 static int __tld_init_meta_p(void) 128 { 129 struct tld_meta_u *meta, *uninit = NULL; 130 int err = 0; 131 132 meta = (struct tld_meta_u *)aligned_alloc(TLD_PAGE_SIZE, TLD_PAGE_SIZE); 133 if (!meta) { 134 err = -ENOMEM; 135 goto out; 136 } 137 138 memset(meta, 0, TLD_PAGE_SIZE); 139 meta->size = TLD_DYN_DATA_SIZE; 140 141 if (!atomic_compare_exchange_strong(&tld_meta_p, &uninit, meta)) { 142 free(meta); 143 goto out; 144 } 145 146 #ifdef TLD_FREE_DATA_ON_THREAD_EXIT 147 pthread_key_create(&tld_pthread_key, __tld_thread_exit_handler); 148 #endif 149 out: 150 return err; 151 } 152 153 static int __tld_init_data_p(int map_fd) 154 { 155 bool use_aligned_alloc = false; 156 struct tld_map_value map_val; 157 struct tld_data_u *data; 158 void *data_alloc = NULL; 159 int err, tid_fd = -1; 160 161 tid_fd = syscall(SYS_pidfd_open, sys_gettid(), O_EXCL); 162 if (tid_fd < 0) { 163 err = -errno; 164 goto out; 165 } 166 167 #ifdef TLD_DATA_USE_ALIGNED_ALLOC 168 use_aligned_alloc = true; 169 #endif 170 171 /* 172 * tld_meta_p->size = TLD_DYN_DATA_SIZE + 173 * total size of TLDs defined via TLD_DEFINE_KEY() 174 */ 175 data_alloc = (use_aligned_alloc || tld_meta_p->size * 2 >= TLD_PAGE_SIZE) ? 176 aligned_alloc(TLD_PAGE_SIZE, tld_meta_p->size) : 177 malloc(tld_meta_p->size * 2); 178 if (!data_alloc) { 179 err = -ENOMEM; 180 goto out; 181 } 182 183 /* 184 * Always pass a page-aligned address to UPTR since the size of tld_map_value::data 185 * is a page in BTF. If data_alloc spans across two pages, use the page that contains large 186 * enough memory. 187 */ 188 if (TLD_PAGE_SIZE - (~TLD_PAGE_MASK & (intptr_t)data_alloc) >= tld_meta_p->size) { 189 map_val.data = (void *)(TLD_PAGE_MASK & (intptr_t)data_alloc); 190 data = data_alloc; 191 data->start = (~TLD_PAGE_MASK & (intptr_t)data_alloc) + 192 offsetof(struct tld_data_u, data); 193 } else { 194 map_val.data = (void *)(TLD_ROUND_UP((intptr_t)data_alloc, TLD_PAGE_SIZE)); 195 data = (void *)(TLD_ROUND_UP((intptr_t)data_alloc, TLD_PAGE_SIZE)); 196 data->start = offsetof(struct tld_data_u, data); 197 } 198 map_val.meta = TLD_READ_ONCE(tld_meta_p); 199 200 err = bpf_map_update_elem(map_fd, &tid_fd, &map_val, 0); 201 if (err) { 202 free(data_alloc); 203 goto out; 204 } 205 206 tld_data_p = data; 207 tld_data_alloc_p = data_alloc; 208 #ifdef TLD_FREE_DATA_ON_THREAD_EXIT 209 pthread_setspecific(tld_pthread_key, (void *)1); 210 #endif 211 out: 212 if (tid_fd >= 0) 213 close(tid_fd); 214 return err; 215 } 216 217 static tld_key_t __tld_create_key(const char *name, size_t size, bool dyn_data) 218 { 219 int err, i, sz, off = 0; 220 __u8 cnt; 221 222 if (!TLD_READ_ONCE(tld_meta_p)) { 223 err = __tld_init_meta_p(); 224 if (err) 225 return (tld_key_t){err}; 226 } 227 228 for (i = 0; i < TLD_MAX_DATA_CNT; i++) { 229 retry: 230 cnt = atomic_load(&tld_meta_p->cnt); 231 if (i < cnt) { 232 /* A metadata is not ready until size is updated with a non-zero value */ 233 while (!(sz = atomic_load(&tld_meta_p->metadata[i].size))) 234 sched_yield(); 235 236 if (!strncmp(tld_meta_p->metadata[i].name, name, TLD_NAME_LEN)) 237 return (tld_key_t){-EEXIST}; 238 239 off += TLD_ROUND_UP(sz, 8); 240 continue; 241 } 242 243 /* 244 * TLD_DEFINE_KEY() is given memory upto a page while at most 245 * TLD_DYN_DATA_SIZE is allocated for tld_create_key() 246 */ 247 if (dyn_data) { 248 if (off + TLD_ROUND_UP(size, 8) > tld_meta_p->size) 249 return (tld_key_t){-E2BIG}; 250 } else { 251 if (off + TLD_ROUND_UP(size, 8) > TLD_PAGE_SIZE - sizeof(struct tld_data_u)) 252 return (tld_key_t){-E2BIG}; 253 tld_meta_p->size += TLD_ROUND_UP(size, 8); 254 } 255 256 /* 257 * Only one tld_create_key() can increase the current cnt by one and 258 * takes the latest available slot. Other threads will check again if a new 259 * TLD can still be added, and then compete for the new slot after the 260 * succeeding thread update the size. 261 */ 262 if (!atomic_compare_exchange_strong(&tld_meta_p->cnt, &cnt, cnt + 1)) 263 goto retry; 264 265 strncpy(tld_meta_p->metadata[i].name, name, TLD_NAME_LEN); 266 atomic_store(&tld_meta_p->metadata[i].size, size); 267 return (tld_key_t){(__s16)off}; 268 } 269 270 return (tld_key_t){-ENOSPC}; 271 } 272 273 /** 274 * TLD_DEFINE_KEY() - Define a TLD and a global variable key associated with the TLD. 275 * 276 * @name: The name of the TLD 277 * @size: The size of the TLD 278 * @key: The variable name of the key. Cannot exceed TLD_NAME_LEN 279 * 280 * The macro can only be used in file scope. 281 * 282 * A global variable key of opaque type, tld_key_t, will be declared and initialized before 283 * main() starts. Use tld_key_is_err() or tld_key_err_or_zero() later to check if the key 284 * creation succeeded. Pass the key to tld_get_data() to get a pointer to the TLD. 285 * bpf programs can also fetch the same key by name. 286 * 287 * The total size of TLDs created using TLD_DEFINE_KEY() cannot exceed a page. Just 288 * enough memory will be allocated for each thread on the first call to tld_get_data(). 289 */ 290 #define TLD_DEFINE_KEY(key, name, size) \ 291 tld_key_t key; \ 292 \ 293 __attribute__((constructor)) \ 294 void __tld_define_key_##key(void) \ 295 { \ 296 key = __tld_create_key(name, size, false); \ 297 } 298 299 /** 300 * tld_create_key() - Create a TLD and return a key associated with the TLD. 301 * 302 * @name: The name the TLD 303 * @size: The size of the TLD 304 * 305 * Return an opaque object key. Use tld_key_is_err() or tld_key_err_or_zero() to check 306 * if the key creation succeeded. Pass the key to tld_get_data() to get a pointer to 307 * locate the TLD. bpf programs can also fetch the same key by name. 308 * 309 * Use tld_create_key() only when a TLD needs to be created dynamically (e.g., @name is 310 * not known statically or a TLD needs to be created conditionally) 311 * 312 * An additional TLD_DYN_DATA_SIZE bytes are allocated per-thread to accommodate TLDs 313 * created dynamically with tld_create_key(). Since only a user page is pinned to the 314 * kernel, when TLDs created with TLD_DEFINE_KEY() uses more than TLD_PAGE_SIZE - 315 * TLD_DYN_DATA_SIZE, the buffer size will be limited to the rest of the page. 316 */ 317 __attribute__((unused)) 318 static tld_key_t tld_create_key(const char *name, size_t size) 319 { 320 return __tld_create_key(name, size, true); 321 } 322 323 __attribute__((unused)) 324 static inline bool tld_key_is_err(tld_key_t key) 325 { 326 return key.off < 0; 327 } 328 329 __attribute__((unused)) 330 static inline int tld_key_err_or_zero(tld_key_t key) 331 { 332 return tld_key_is_err(key) ? key.off : 0; 333 } 334 335 /** 336 * tld_get_data() - Get a pointer to the TLD associated with the given key of the 337 * calling thread. 338 * 339 * @map_fd: A file descriptor of tld_data_map, the underlying BPF task local storage map 340 * of task local data. 341 * @key: A key object created by TLD_DEFINE_KEY() or tld_create_key(). 342 * 343 * Return a pointer to the TLD if the key is valid; NULL if not enough memory for TLD 344 * for this thread, or the key is invalid. The returned pointer is guaranteed to be 8-byte 345 * aligned. 346 * 347 * Threads that call tld_get_data() must call tld_free() on exit to prevent 348 * memory leak if TLD_FREE_DATA_ON_THREAD_EXIT is not defined. 349 */ 350 __attribute__((unused)) 351 static void *tld_get_data(int map_fd, tld_key_t key) 352 { 353 if (!TLD_READ_ONCE(tld_meta_p)) 354 return NULL; 355 356 /* tld_data_p is allocated on the first invocation of tld_get_data() */ 357 if (!tld_data_p && __tld_init_data_p(map_fd)) 358 return NULL; 359 360 return tld_data_p->data + key.off; 361 } 362 363 /** 364 * tld_free() - Free task local data memory of the calling thread 365 * 366 * For the calling thread, all pointers to TLDs acquired before will become invalid. 367 * 368 * Users must call tld_free() on thread exit to prevent memory leak. Alternatively, 369 * define TLD_FREE_DATA_ON_THREAD_EXIT and a thread exit handler will be registered 370 * to free the memory automatically. 371 */ 372 __attribute__((unused)) 373 static void tld_free(void) 374 { 375 if (tld_data_alloc_p) { 376 free(tld_data_alloc_p); 377 tld_data_alloc_p = NULL; 378 tld_data_p = NULL; 379 } 380 } 381 382 #ifdef __cplusplus 383 } /* extern "C" */ 384 #endif 385 386 #endif /* __TASK_LOCAL_DATA_H */ 387