1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2021 VMware Inc, Steven Rostedt <rostedt@goodmis.org> 4 */ 5 #include <linux/spinlock.h> 6 #include <linux/irq_work.h> 7 #include <linux/slab.h> 8 #include "trace.h" 9 10 /* See pid_list.h for details */ 11 12 static inline union lower_chunk *get_lower_chunk(struct trace_pid_list *pid_list) 13 { 14 union lower_chunk *chunk; 15 16 lockdep_assert_held(&pid_list->lock); 17 18 if (!pid_list->lower_list) 19 return NULL; 20 21 chunk = pid_list->lower_list; 22 pid_list->lower_list = chunk->next; 23 pid_list->free_lower_chunks--; 24 WARN_ON_ONCE(pid_list->free_lower_chunks < 0); 25 chunk->next = NULL; 26 /* 27 * If a refill needs to happen, it can not happen here 28 * as the scheduler run queue locks are held. 29 */ 30 if (pid_list->free_lower_chunks <= CHUNK_REALLOC) 31 irq_work_queue(&pid_list->refill_irqwork); 32 33 return chunk; 34 } 35 36 static inline union upper_chunk *get_upper_chunk(struct trace_pid_list *pid_list) 37 { 38 union upper_chunk *chunk; 39 40 lockdep_assert_held(&pid_list->lock); 41 42 if (!pid_list->upper_list) 43 return NULL; 44 45 chunk = pid_list->upper_list; 46 pid_list->upper_list = chunk->next; 47 pid_list->free_upper_chunks--; 48 WARN_ON_ONCE(pid_list->free_upper_chunks < 0); 49 chunk->next = NULL; 50 /* 51 * If a refill needs to happen, it can not happen here 52 * as the scheduler run queue locks are held. 53 */ 54 if (pid_list->free_upper_chunks <= CHUNK_REALLOC) 55 irq_work_queue(&pid_list->refill_irqwork); 56 57 return chunk; 58 } 59 60 static inline void put_lower_chunk(struct trace_pid_list *pid_list, 61 union lower_chunk *chunk) 62 { 63 lockdep_assert_held(&pid_list->lock); 64 65 chunk->next = pid_list->lower_list; 66 pid_list->lower_list = chunk; 67 pid_list->free_lower_chunks++; 68 } 69 70 static inline void put_upper_chunk(struct trace_pid_list *pid_list, 71 union upper_chunk *chunk) 72 { 73 lockdep_assert_held(&pid_list->lock); 74 75 chunk->next = pid_list->upper_list; 76 pid_list->upper_list = chunk; 77 pid_list->free_upper_chunks++; 78 } 79 80 static inline bool upper_empty(union upper_chunk *chunk) 81 { 82 /* 83 * If chunk->data has no lower chunks, it will be the same 84 * as a zeroed bitmask. Use find_first_bit() to test it 85 * and if it doesn't find any bits set, then the array 86 * is empty. 87 */ 88 int bit = find_first_bit((unsigned long *)chunk->data, 89 sizeof(chunk->data) * 8); 90 return bit >= sizeof(chunk->data) * 8; 91 } 92 93 static inline int pid_split(unsigned int pid, unsigned int *upper1, 94 unsigned int *upper2, unsigned int *lower) 95 { 96 /* MAX_PID should cover all pids */ 97 BUILD_BUG_ON(MAX_PID < PID_MAX_LIMIT); 98 99 /* In case a bad pid is passed in, then fail */ 100 if (unlikely(pid >= MAX_PID)) 101 return -1; 102 103 *upper1 = (pid >> UPPER1_SHIFT) & UPPER_MASK; 104 *upper2 = (pid >> UPPER2_SHIFT) & UPPER_MASK; 105 *lower = pid & LOWER_MASK; 106 107 return 0; 108 } 109 110 static inline unsigned int pid_join(unsigned int upper1, 111 unsigned int upper2, unsigned int lower) 112 { 113 return ((upper1 & UPPER_MASK) << UPPER1_SHIFT) | 114 ((upper2 & UPPER_MASK) << UPPER2_SHIFT) | 115 (lower & LOWER_MASK); 116 } 117 118 /** 119 * trace_pid_list_is_set - test if the pid is set in the list 120 * @pid_list: The pid list to test 121 * @pid: The pid to to see if set in the list. 122 * 123 * Tests if @pid is is set in the @pid_list. This is usually called 124 * from the scheduler when a task is scheduled. Its pid is checked 125 * if it should be traced or not. 126 * 127 * Return true if the pid is in the list, false otherwise. 128 */ 129 bool trace_pid_list_is_set(struct trace_pid_list *pid_list, unsigned int pid) 130 { 131 union upper_chunk *upper_chunk; 132 union lower_chunk *lower_chunk; 133 unsigned long flags; 134 unsigned int upper1; 135 unsigned int upper2; 136 unsigned int lower; 137 bool ret = false; 138 139 if (!pid_list) 140 return false; 141 142 if (pid_split(pid, &upper1, &upper2, &lower) < 0) 143 return false; 144 145 raw_spin_lock_irqsave(&pid_list->lock, flags); 146 upper_chunk = pid_list->upper[upper1]; 147 if (upper_chunk) { 148 lower_chunk = upper_chunk->data[upper2]; 149 if (lower_chunk) 150 ret = test_bit(lower, lower_chunk->data); 151 } 152 raw_spin_unlock_irqrestore(&pid_list->lock, flags); 153 154 return ret; 155 } 156 157 /** 158 * trace_pid_list_set - add a pid to the list 159 * @pid_list: The pid list to add the @pid to. 160 * @pid: The pid to add. 161 * 162 * Adds @pid to @pid_list. This is usually done explicitly by a user 163 * adding a task to be traced, or indirectly by the fork function 164 * when children should be traced and a task's pid is in the list. 165 * 166 * Return 0 on success, negative otherwise. 167 */ 168 int trace_pid_list_set(struct trace_pid_list *pid_list, unsigned int pid) 169 { 170 union upper_chunk *upper_chunk; 171 union lower_chunk *lower_chunk; 172 unsigned long flags; 173 unsigned int upper1; 174 unsigned int upper2; 175 unsigned int lower; 176 int ret; 177 178 if (!pid_list) 179 return -ENODEV; 180 181 if (pid_split(pid, &upper1, &upper2, &lower) < 0) 182 return -EINVAL; 183 184 raw_spin_lock_irqsave(&pid_list->lock, flags); 185 upper_chunk = pid_list->upper[upper1]; 186 if (!upper_chunk) { 187 upper_chunk = get_upper_chunk(pid_list); 188 if (!upper_chunk) { 189 ret = -ENOMEM; 190 goto out; 191 } 192 pid_list->upper[upper1] = upper_chunk; 193 } 194 lower_chunk = upper_chunk->data[upper2]; 195 if (!lower_chunk) { 196 lower_chunk = get_lower_chunk(pid_list); 197 if (!lower_chunk) { 198 ret = -ENOMEM; 199 goto out; 200 } 201 upper_chunk->data[upper2] = lower_chunk; 202 } 203 set_bit(lower, lower_chunk->data); 204 ret = 0; 205 out: 206 raw_spin_unlock_irqrestore(&pid_list->lock, flags); 207 return ret; 208 } 209 210 /** 211 * trace_pid_list_clear - remove a pid from the list 212 * @pid_list: The pid list to remove the @pid from. 213 * @pid: The pid to remove. 214 * 215 * Removes @pid from @pid_list. This is usually done explicitly by a user 216 * removing tasks from tracing, or indirectly by the exit function 217 * when a task that is set to be traced exits. 218 * 219 * Return 0 on success, negative otherwise. 220 */ 221 int trace_pid_list_clear(struct trace_pid_list *pid_list, unsigned int pid) 222 { 223 union upper_chunk *upper_chunk; 224 union lower_chunk *lower_chunk; 225 unsigned long flags; 226 unsigned int upper1; 227 unsigned int upper2; 228 unsigned int lower; 229 230 if (!pid_list) 231 return -ENODEV; 232 233 if (pid_split(pid, &upper1, &upper2, &lower) < 0) 234 return -EINVAL; 235 236 raw_spin_lock_irqsave(&pid_list->lock, flags); 237 upper_chunk = pid_list->upper[upper1]; 238 if (!upper_chunk) 239 goto out; 240 241 lower_chunk = upper_chunk->data[upper2]; 242 if (!lower_chunk) 243 goto out; 244 245 clear_bit(lower, lower_chunk->data); 246 247 /* if there's no more bits set, add it to the free list */ 248 if (find_first_bit(lower_chunk->data, LOWER_MAX) >= LOWER_MAX) { 249 put_lower_chunk(pid_list, lower_chunk); 250 upper_chunk->data[upper2] = NULL; 251 if (upper_empty(upper_chunk)) { 252 put_upper_chunk(pid_list, upper_chunk); 253 pid_list->upper[upper1] = NULL; 254 } 255 } 256 out: 257 raw_spin_unlock_irqrestore(&pid_list->lock, flags); 258 return 0; 259 } 260 261 /** 262 * trace_pid_list_next - return the next pid in the list 263 * @pid_list: The pid list to examine. 264 * @pid: The pid to start from 265 * @next: The pointer to place the pid that is set starting from @pid. 266 * 267 * Looks for the next consecutive pid that is in @pid_list starting 268 * at the pid specified by @pid. If one is set (including @pid), then 269 * that pid is placed into @next. 270 * 271 * Return 0 when a pid is found, -1 if there are no more pids included. 272 */ 273 int trace_pid_list_next(struct trace_pid_list *pid_list, unsigned int pid, 274 unsigned int *next) 275 { 276 union upper_chunk *upper_chunk; 277 union lower_chunk *lower_chunk; 278 unsigned long flags; 279 unsigned int upper1; 280 unsigned int upper2; 281 unsigned int lower; 282 283 if (!pid_list) 284 return -ENODEV; 285 286 if (pid_split(pid, &upper1, &upper2, &lower) < 0) 287 return -EINVAL; 288 289 raw_spin_lock_irqsave(&pid_list->lock, flags); 290 for (; upper1 <= UPPER_MASK; upper1++, upper2 = 0) { 291 upper_chunk = pid_list->upper[upper1]; 292 293 if (!upper_chunk) 294 continue; 295 296 for (; upper2 <= UPPER_MASK; upper2++, lower = 0) { 297 lower_chunk = upper_chunk->data[upper2]; 298 if (!lower_chunk) 299 continue; 300 301 lower = find_next_bit(lower_chunk->data, LOWER_MAX, 302 lower); 303 if (lower < LOWER_MAX) 304 goto found; 305 } 306 } 307 308 found: 309 raw_spin_unlock_irqrestore(&pid_list->lock, flags); 310 if (upper1 > UPPER_MASK) 311 return -1; 312 313 *next = pid_join(upper1, upper2, lower); 314 return 0; 315 } 316 317 /** 318 * trace_pid_list_first - return the first pid in the list 319 * @pid_list: The pid list to examine. 320 * @pid: The pointer to place the pid first found pid that is set. 321 * 322 * Looks for the first pid that is set in @pid_list, and places it 323 * into @pid if found. 324 * 325 * Return 0 when a pid is found, -1 if there are no pids set. 326 */ 327 int trace_pid_list_first(struct trace_pid_list *pid_list, unsigned int *pid) 328 { 329 return trace_pid_list_next(pid_list, 0, pid); 330 } 331 332 static void pid_list_refill_irq(struct irq_work *iwork) 333 { 334 struct trace_pid_list *pid_list = container_of(iwork, struct trace_pid_list, 335 refill_irqwork); 336 union upper_chunk *upper = NULL; 337 union lower_chunk *lower = NULL; 338 union upper_chunk **upper_next = &upper; 339 union lower_chunk **lower_next = &lower; 340 int upper_count; 341 int lower_count; 342 int ucnt = 0; 343 int lcnt = 0; 344 345 again: 346 raw_spin_lock(&pid_list->lock); 347 upper_count = CHUNK_ALLOC - pid_list->free_upper_chunks; 348 lower_count = CHUNK_ALLOC - pid_list->free_lower_chunks; 349 raw_spin_unlock(&pid_list->lock); 350 351 if (upper_count <= 0 && lower_count <= 0) 352 return; 353 354 while (upper_count-- > 0) { 355 union upper_chunk *chunk; 356 357 chunk = kzalloc(sizeof(*chunk), GFP_KERNEL); 358 if (!chunk) 359 break; 360 *upper_next = chunk; 361 upper_next = &chunk->next; 362 ucnt++; 363 } 364 365 while (lower_count-- > 0) { 366 union lower_chunk *chunk; 367 368 chunk = kzalloc(sizeof(*chunk), GFP_KERNEL); 369 if (!chunk) 370 break; 371 *lower_next = chunk; 372 lower_next = &chunk->next; 373 lcnt++; 374 } 375 376 raw_spin_lock(&pid_list->lock); 377 if (upper) { 378 *upper_next = pid_list->upper_list; 379 pid_list->upper_list = upper; 380 pid_list->free_upper_chunks += ucnt; 381 } 382 if (lower) { 383 *lower_next = pid_list->lower_list; 384 pid_list->lower_list = lower; 385 pid_list->free_lower_chunks += lcnt; 386 } 387 raw_spin_unlock(&pid_list->lock); 388 389 /* 390 * On success of allocating all the chunks, both counters 391 * will be less than zero. If they are not, then an allocation 392 * failed, and we should not try again. 393 */ 394 if (upper_count >= 0 || lower_count >= 0) 395 return; 396 /* 397 * When the locks were released, free chunks could have 398 * been used and allocation needs to be done again. Might as 399 * well allocate it now. 400 */ 401 goto again; 402 } 403 404 /** 405 * trace_pid_list_alloc - create a new pid_list 406 * 407 * Allocates a new pid_list to store pids into. 408 * 409 * Returns the pid_list on success, NULL otherwise. 410 */ 411 struct trace_pid_list *trace_pid_list_alloc(void) 412 { 413 struct trace_pid_list *pid_list; 414 int i; 415 416 /* According to linux/thread.h, pids can be no bigger that 30 bits */ 417 WARN_ON_ONCE(pid_max > (1 << 30)); 418 419 pid_list = kzalloc(sizeof(*pid_list), GFP_KERNEL); 420 if (!pid_list) 421 return NULL; 422 423 init_irq_work(&pid_list->refill_irqwork, pid_list_refill_irq); 424 425 raw_spin_lock_init(&pid_list->lock); 426 427 for (i = 0; i < CHUNK_ALLOC; i++) { 428 union upper_chunk *chunk; 429 430 chunk = kzalloc(sizeof(*chunk), GFP_KERNEL); 431 if (!chunk) 432 break; 433 chunk->next = pid_list->upper_list; 434 pid_list->upper_list = chunk; 435 pid_list->free_upper_chunks++; 436 } 437 438 for (i = 0; i < CHUNK_ALLOC; i++) { 439 union lower_chunk *chunk; 440 441 chunk = kzalloc(sizeof(*chunk), GFP_KERNEL); 442 if (!chunk) 443 break; 444 chunk->next = pid_list->lower_list; 445 pid_list->lower_list = chunk; 446 pid_list->free_lower_chunks++; 447 } 448 449 return pid_list; 450 } 451 452 /** 453 * trace_pid_list_free - Frees an allocated pid_list. 454 * 455 * Frees the memory for a pid_list that was allocated. 456 */ 457 void trace_pid_list_free(struct trace_pid_list *pid_list) 458 { 459 union upper_chunk *upper; 460 union lower_chunk *lower; 461 int i, j; 462 463 if (!pid_list) 464 return; 465 466 irq_work_sync(&pid_list->refill_irqwork); 467 468 while (pid_list->lower_list) { 469 union lower_chunk *chunk; 470 471 chunk = pid_list->lower_list; 472 pid_list->lower_list = pid_list->lower_list->next; 473 kfree(chunk); 474 } 475 476 while (pid_list->upper_list) { 477 union upper_chunk *chunk; 478 479 chunk = pid_list->upper_list; 480 pid_list->upper_list = pid_list->upper_list->next; 481 kfree(chunk); 482 } 483 484 for (i = 0; i < UPPER1_SIZE; i++) { 485 upper = pid_list->upper[i]; 486 if (upper) { 487 for (j = 0; j < UPPER2_SIZE; j++) { 488 lower = upper->data[j]; 489 kfree(lower); 490 } 491 kfree(upper); 492 } 493 } 494 kfree(pid_list); 495 } 496