1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2021 VMware Inc, Steven Rostedt <rostedt@goodmis.org> 4 */ 5 #include <linux/spinlock.h> 6 #include <linux/irq_work.h> 7 #include <linux/slab.h> 8 #include "trace.h" 9 10 /* See pid_list.h for details */ 11 12 static inline union lower_chunk *get_lower_chunk(struct trace_pid_list *pid_list) 13 { 14 union lower_chunk *chunk; 15 16 lockdep_assert_held(&pid_list->lock); 17 18 if (!pid_list->lower_list) 19 return NULL; 20 21 chunk = pid_list->lower_list; 22 pid_list->lower_list = chunk->next; 23 pid_list->free_lower_chunks--; 24 WARN_ON_ONCE(pid_list->free_lower_chunks < 0); 25 chunk->next = NULL; 26 /* 27 * If a refill needs to happen, it can not happen here 28 * as the scheduler run queue locks are held. 29 */ 30 if (pid_list->free_lower_chunks <= CHUNK_REALLOC) 31 irq_work_queue(&pid_list->refill_irqwork); 32 33 return chunk; 34 } 35 36 static inline union upper_chunk *get_upper_chunk(struct trace_pid_list *pid_list) 37 { 38 union upper_chunk *chunk; 39 40 lockdep_assert_held(&pid_list->lock); 41 42 if (!pid_list->upper_list) 43 return NULL; 44 45 chunk = pid_list->upper_list; 46 pid_list->upper_list = chunk->next; 47 pid_list->free_upper_chunks--; 48 WARN_ON_ONCE(pid_list->free_upper_chunks < 0); 49 chunk->next = NULL; 50 /* 51 * If a refill needs to happen, it can not happen here 52 * as the scheduler run queue locks are held. 53 */ 54 if (pid_list->free_upper_chunks <= CHUNK_REALLOC) 55 irq_work_queue(&pid_list->refill_irqwork); 56 57 return chunk; 58 } 59 60 static inline void put_lower_chunk(struct trace_pid_list *pid_list, 61 union lower_chunk *chunk) 62 { 63 lockdep_assert_held(&pid_list->lock); 64 65 chunk->next = pid_list->lower_list; 66 pid_list->lower_list = chunk; 67 pid_list->free_lower_chunks++; 68 } 69 70 static inline void put_upper_chunk(struct trace_pid_list *pid_list, 71 union upper_chunk *chunk) 72 { 73 lockdep_assert_held(&pid_list->lock); 74 75 chunk->next = pid_list->upper_list; 76 pid_list->upper_list = chunk; 77 pid_list->free_upper_chunks++; 78 } 79 80 static inline bool upper_empty(union upper_chunk *chunk) 81 { 82 /* 83 * If chunk->data has no lower chunks, it will be the same 84 * as a zeroed bitmask. 85 */ 86 return bitmap_empty((unsigned long *)chunk->data, BITS_PER_TYPE(chunk->data)); 87 } 88 89 static inline int pid_split(unsigned int pid, unsigned int *upper1, 90 unsigned int *upper2, unsigned int *lower) 91 { 92 /* MAX_PID should cover all pids */ 93 BUILD_BUG_ON(MAX_PID < PID_MAX_LIMIT); 94 95 /* In case a bad pid is passed in, then fail */ 96 if (unlikely(pid >= MAX_PID)) 97 return -1; 98 99 *upper1 = (pid >> UPPER1_SHIFT) & UPPER_MASK; 100 *upper2 = (pid >> UPPER2_SHIFT) & UPPER_MASK; 101 *lower = pid & LOWER_MASK; 102 103 return 0; 104 } 105 106 static inline unsigned int pid_join(unsigned int upper1, 107 unsigned int upper2, unsigned int lower) 108 { 109 return ((upper1 & UPPER_MASK) << UPPER1_SHIFT) | 110 ((upper2 & UPPER_MASK) << UPPER2_SHIFT) | 111 (lower & LOWER_MASK); 112 } 113 114 /** 115 * trace_pid_list_is_set - test if the pid is set in the list 116 * @pid_list: The pid list to test 117 * @pid: The pid to see if set in the list. 118 * 119 * Tests if @pid is set in the @pid_list. This is usually called 120 * from the scheduler when a task is scheduled. Its pid is checked 121 * if it should be traced or not. 122 * 123 * Return true if the pid is in the list, false otherwise. 124 */ 125 bool trace_pid_list_is_set(struct trace_pid_list *pid_list, unsigned int pid) 126 { 127 union upper_chunk *upper_chunk; 128 union lower_chunk *lower_chunk; 129 unsigned long flags; 130 unsigned int upper1; 131 unsigned int upper2; 132 unsigned int lower; 133 bool ret = false; 134 135 if (!pid_list) 136 return false; 137 138 if (pid_split(pid, &upper1, &upper2, &lower) < 0) 139 return false; 140 141 raw_spin_lock_irqsave(&pid_list->lock, flags); 142 upper_chunk = pid_list->upper[upper1]; 143 if (upper_chunk) { 144 lower_chunk = upper_chunk->data[upper2]; 145 if (lower_chunk) 146 ret = test_bit(lower, lower_chunk->data); 147 } 148 raw_spin_unlock_irqrestore(&pid_list->lock, flags); 149 150 return ret; 151 } 152 153 /** 154 * trace_pid_list_set - add a pid to the list 155 * @pid_list: The pid list to add the @pid to. 156 * @pid: The pid to add. 157 * 158 * Adds @pid to @pid_list. This is usually done explicitly by a user 159 * adding a task to be traced, or indirectly by the fork function 160 * when children should be traced and a task's pid is in the list. 161 * 162 * Return 0 on success, negative otherwise. 163 */ 164 int trace_pid_list_set(struct trace_pid_list *pid_list, unsigned int pid) 165 { 166 union upper_chunk *upper_chunk; 167 union lower_chunk *lower_chunk; 168 unsigned long flags; 169 unsigned int upper1; 170 unsigned int upper2; 171 unsigned int lower; 172 int ret; 173 174 if (!pid_list) 175 return -ENODEV; 176 177 if (pid_split(pid, &upper1, &upper2, &lower) < 0) 178 return -EINVAL; 179 180 raw_spin_lock_irqsave(&pid_list->lock, flags); 181 upper_chunk = pid_list->upper[upper1]; 182 if (!upper_chunk) { 183 upper_chunk = get_upper_chunk(pid_list); 184 if (!upper_chunk) { 185 ret = -ENOMEM; 186 goto out; 187 } 188 pid_list->upper[upper1] = upper_chunk; 189 } 190 lower_chunk = upper_chunk->data[upper2]; 191 if (!lower_chunk) { 192 lower_chunk = get_lower_chunk(pid_list); 193 if (!lower_chunk) { 194 ret = -ENOMEM; 195 goto out; 196 } 197 upper_chunk->data[upper2] = lower_chunk; 198 } 199 set_bit(lower, lower_chunk->data); 200 ret = 0; 201 out: 202 raw_spin_unlock_irqrestore(&pid_list->lock, flags); 203 return ret; 204 } 205 206 /** 207 * trace_pid_list_clear - remove a pid from the list 208 * @pid_list: The pid list to remove the @pid from. 209 * @pid: The pid to remove. 210 * 211 * Removes @pid from @pid_list. This is usually done explicitly by a user 212 * removing tasks from tracing, or indirectly by the exit function 213 * when a task that is set to be traced exits. 214 * 215 * Return 0 on success, negative otherwise. 216 */ 217 int trace_pid_list_clear(struct trace_pid_list *pid_list, unsigned int pid) 218 { 219 union upper_chunk *upper_chunk; 220 union lower_chunk *lower_chunk; 221 unsigned long flags; 222 unsigned int upper1; 223 unsigned int upper2; 224 unsigned int lower; 225 226 if (!pid_list) 227 return -ENODEV; 228 229 if (pid_split(pid, &upper1, &upper2, &lower) < 0) 230 return -EINVAL; 231 232 raw_spin_lock_irqsave(&pid_list->lock, flags); 233 upper_chunk = pid_list->upper[upper1]; 234 if (!upper_chunk) 235 goto out; 236 237 lower_chunk = upper_chunk->data[upper2]; 238 if (!lower_chunk) 239 goto out; 240 241 clear_bit(lower, lower_chunk->data); 242 243 /* if there's no more bits set, add it to the free list */ 244 if (find_first_bit(lower_chunk->data, LOWER_MAX) >= LOWER_MAX) { 245 put_lower_chunk(pid_list, lower_chunk); 246 upper_chunk->data[upper2] = NULL; 247 if (upper_empty(upper_chunk)) { 248 put_upper_chunk(pid_list, upper_chunk); 249 pid_list->upper[upper1] = NULL; 250 } 251 } 252 out: 253 raw_spin_unlock_irqrestore(&pid_list->lock, flags); 254 return 0; 255 } 256 257 /** 258 * trace_pid_list_next - return the next pid in the list 259 * @pid_list: The pid list to examine. 260 * @pid: The pid to start from 261 * @next: The pointer to place the pid that is set starting from @pid. 262 * 263 * Looks for the next consecutive pid that is in @pid_list starting 264 * at the pid specified by @pid. If one is set (including @pid), then 265 * that pid is placed into @next. 266 * 267 * Return 0 when a pid is found, -1 if there are no more pids included. 268 */ 269 int trace_pid_list_next(struct trace_pid_list *pid_list, unsigned int pid, 270 unsigned int *next) 271 { 272 union upper_chunk *upper_chunk; 273 union lower_chunk *lower_chunk; 274 unsigned long flags; 275 unsigned int upper1; 276 unsigned int upper2; 277 unsigned int lower; 278 279 if (!pid_list) 280 return -ENODEV; 281 282 if (pid_split(pid, &upper1, &upper2, &lower) < 0) 283 return -EINVAL; 284 285 raw_spin_lock_irqsave(&pid_list->lock, flags); 286 for (; upper1 <= UPPER_MASK; upper1++, upper2 = 0) { 287 upper_chunk = pid_list->upper[upper1]; 288 289 if (!upper_chunk) 290 continue; 291 292 for (; upper2 <= UPPER_MASK; upper2++, lower = 0) { 293 lower_chunk = upper_chunk->data[upper2]; 294 if (!lower_chunk) 295 continue; 296 297 lower = find_next_bit(lower_chunk->data, LOWER_MAX, 298 lower); 299 if (lower < LOWER_MAX) 300 goto found; 301 } 302 } 303 304 found: 305 raw_spin_unlock_irqrestore(&pid_list->lock, flags); 306 if (upper1 > UPPER_MASK) 307 return -1; 308 309 *next = pid_join(upper1, upper2, lower); 310 return 0; 311 } 312 313 /** 314 * trace_pid_list_first - return the first pid in the list 315 * @pid_list: The pid list to examine. 316 * @pid: The pointer to place the pid first found pid that is set. 317 * 318 * Looks for the first pid that is set in @pid_list, and places it 319 * into @pid if found. 320 * 321 * Return 0 when a pid is found, -1 if there are no pids set. 322 */ 323 int trace_pid_list_first(struct trace_pid_list *pid_list, unsigned int *pid) 324 { 325 return trace_pid_list_next(pid_list, 0, pid); 326 } 327 328 static void pid_list_refill_irq(struct irq_work *iwork) 329 { 330 struct trace_pid_list *pid_list = container_of(iwork, struct trace_pid_list, 331 refill_irqwork); 332 union upper_chunk *upper = NULL; 333 union lower_chunk *lower = NULL; 334 union upper_chunk **upper_next = &upper; 335 union lower_chunk **lower_next = &lower; 336 int upper_count; 337 int lower_count; 338 int ucnt = 0; 339 int lcnt = 0; 340 341 again: 342 raw_spin_lock(&pid_list->lock); 343 upper_count = CHUNK_ALLOC - pid_list->free_upper_chunks; 344 lower_count = CHUNK_ALLOC - pid_list->free_lower_chunks; 345 raw_spin_unlock(&pid_list->lock); 346 347 if (upper_count <= 0 && lower_count <= 0) 348 return; 349 350 while (upper_count-- > 0) { 351 union upper_chunk *chunk; 352 353 chunk = kzalloc(sizeof(*chunk), GFP_NOWAIT); 354 if (!chunk) 355 break; 356 *upper_next = chunk; 357 upper_next = &chunk->next; 358 ucnt++; 359 } 360 361 while (lower_count-- > 0) { 362 union lower_chunk *chunk; 363 364 chunk = kzalloc(sizeof(*chunk), GFP_NOWAIT); 365 if (!chunk) 366 break; 367 *lower_next = chunk; 368 lower_next = &chunk->next; 369 lcnt++; 370 } 371 372 raw_spin_lock(&pid_list->lock); 373 if (upper) { 374 *upper_next = pid_list->upper_list; 375 pid_list->upper_list = upper; 376 pid_list->free_upper_chunks += ucnt; 377 } 378 if (lower) { 379 *lower_next = pid_list->lower_list; 380 pid_list->lower_list = lower; 381 pid_list->free_lower_chunks += lcnt; 382 } 383 raw_spin_unlock(&pid_list->lock); 384 385 /* 386 * On success of allocating all the chunks, both counters 387 * will be less than zero. If they are not, then an allocation 388 * failed, and we should not try again. 389 */ 390 if (upper_count >= 0 || lower_count >= 0) 391 return; 392 /* 393 * When the locks were released, free chunks could have 394 * been used and allocation needs to be done again. Might as 395 * well allocate it now. 396 */ 397 goto again; 398 } 399 400 /** 401 * trace_pid_list_alloc - create a new pid_list 402 * 403 * Allocates a new pid_list to store pids into. 404 * 405 * Returns the pid_list on success, NULL otherwise. 406 */ 407 struct trace_pid_list *trace_pid_list_alloc(void) 408 { 409 struct trace_pid_list *pid_list; 410 int i; 411 412 /* According to linux/thread.h, pids can be no bigger that 30 bits */ 413 WARN_ON_ONCE(init_pid_ns.pid_max > (1 << 30)); 414 415 pid_list = kzalloc(sizeof(*pid_list), GFP_KERNEL); 416 if (!pid_list) 417 return NULL; 418 419 init_irq_work(&pid_list->refill_irqwork, pid_list_refill_irq); 420 421 raw_spin_lock_init(&pid_list->lock); 422 423 for (i = 0; i < CHUNK_ALLOC; i++) { 424 union upper_chunk *chunk; 425 426 chunk = kzalloc(sizeof(*chunk), GFP_KERNEL); 427 if (!chunk) 428 break; 429 chunk->next = pid_list->upper_list; 430 pid_list->upper_list = chunk; 431 pid_list->free_upper_chunks++; 432 } 433 434 for (i = 0; i < CHUNK_ALLOC; i++) { 435 union lower_chunk *chunk; 436 437 chunk = kzalloc(sizeof(*chunk), GFP_KERNEL); 438 if (!chunk) 439 break; 440 chunk->next = pid_list->lower_list; 441 pid_list->lower_list = chunk; 442 pid_list->free_lower_chunks++; 443 } 444 445 return pid_list; 446 } 447 448 /** 449 * trace_pid_list_free - Frees an allocated pid_list. 450 * @pid_list: The pid list to free. 451 * 452 * Frees the memory for a pid_list that was allocated. 453 */ 454 void trace_pid_list_free(struct trace_pid_list *pid_list) 455 { 456 union upper_chunk *upper; 457 union lower_chunk *lower; 458 int i, j; 459 460 if (!pid_list) 461 return; 462 463 irq_work_sync(&pid_list->refill_irqwork); 464 465 while (pid_list->lower_list) { 466 union lower_chunk *chunk; 467 468 chunk = pid_list->lower_list; 469 pid_list->lower_list = pid_list->lower_list->next; 470 kfree(chunk); 471 } 472 473 while (pid_list->upper_list) { 474 union upper_chunk *chunk; 475 476 chunk = pid_list->upper_list; 477 pid_list->upper_list = pid_list->upper_list->next; 478 kfree(chunk); 479 } 480 481 for (i = 0; i < UPPER1_SIZE; i++) { 482 upper = pid_list->upper[i]; 483 if (upper) { 484 for (j = 0; j < UPPER2_SIZE; j++) { 485 lower = upper->data[j]; 486 kfree(lower); 487 } 488 kfree(upper); 489 } 490 } 491 kfree(pid_list); 492 } 493