1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * delaytop.c - system-wide delay monitoring tool. 4 * 5 * This tool provides real-time monitoring and statistics of 6 * system, container, and task-level delays, including CPU, 7 * memory, IO, and IRQ. It supports both interactive (top-like), 8 * and can output delay information for the whole system, specific 9 * containers (cgroups), or individual tasks (PIDs). 10 * 11 * Key features: 12 * - Collects per-task delay accounting statistics via taskstats. 13 * - Collects system-wide PSI information. 14 * - Supports sorting, filtering. 15 * - Supports both interactive (screen refresh). 16 * 17 * Copyright (C) Fan Yu, ZTE Corp. 2025 18 * Copyright (C) Wang Yaxin, ZTE Corp. 2025 19 * 20 * Compile with 21 * gcc -I/usr/src/linux/include delaytop.c -o delaytop 22 */ 23 24 #include <stdio.h> 25 #include <stdlib.h> 26 #include <string.h> 27 #include <errno.h> 28 #include <unistd.h> 29 #include <fcntl.h> 30 #include <getopt.h> 31 #include <signal.h> 32 #include <time.h> 33 #include <dirent.h> 34 #include <ctype.h> 35 #include <stdbool.h> 36 #include <sys/types.h> 37 #include <sys/stat.h> 38 #include <sys/socket.h> 39 #include <sys/select.h> 40 #include <termios.h> 41 #include <limits.h> 42 #include <linux/genetlink.h> 43 #include <linux/taskstats.h> 44 #include <linux/cgroupstats.h> 45 #include <stddef.h> 46 47 #define PSI_CPU_SOME "/proc/pressure/cpu" 48 #define PSI_CPU_FULL "/proc/pressure/cpu" 49 #define PSI_MEMORY_SOME "/proc/pressure/memory" 50 #define PSI_MEMORY_FULL "/proc/pressure/memory" 51 #define PSI_IO_SOME "/proc/pressure/io" 52 #define PSI_IO_FULL "/proc/pressure/io" 53 #define PSI_IRQ_FULL "/proc/pressure/irq" 54 55 #define NLA_NEXT(na) ((struct nlattr *)((char *)(na) + NLA_ALIGN((na)->nla_len))) 56 #define NLA_DATA(na) ((void *)((char *)(na) + NLA_HDRLEN)) 57 #define NLA_PAYLOAD(len) (len - NLA_HDRLEN) 58 59 #define GENLMSG_DATA(glh) ((void *)(NLMSG_DATA(glh) + GENL_HDRLEN)) 60 #define GENLMSG_PAYLOAD(glh) (NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN) 61 62 #define TASK_COMM_LEN 16 63 #define MAX_MSG_SIZE 1024 64 #define MAX_TASKS 1000 65 #define MAX_BUF_LEN 256 66 #define SET_TASK_STAT(task_count, field) tasks[task_count].field = stats.field 67 #define BOOL_FPRINT(stream, fmt, ...) \ 68 ({ \ 69 int ret = fprintf(stream, fmt, ##__VA_ARGS__); \ 70 ret >= 0; \ 71 }) 72 #define TASK_AVG(task, field) average_ms((task).field##_delay_total, (task).field##_count) 73 #define PSI_LINE_FORMAT "%-12s %6.1f%%/%6.1f%%/%6.1f%%/%8llu(ms)\n" 74 #define DELAY_FMT_DEFAULT "%8.2f %8.2f %8.2f %8.2f\n" 75 #define DELAY_FMT_MEMVERBOSE "%8.2f %8.2f %8.2f %8.2f %8.2f %8.2f\n" 76 #define SORT_FIELD(name, modes) \ 77 {#name, \ 78 offsetof(struct task_info, name##_delay_total), \ 79 offsetof(struct task_info, name##_count), \ 80 modes} 81 #define END_FIELD {NULL, 0, 0} 82 83 /* Display mode types */ 84 #define MODE_TYPE_ALL (0xFFFFFFFF) 85 #define MODE_DEFAULT (1 << 0) 86 #define MODE_MEMVERBOSE (1 << 1) 87 88 /* PSI statistics structure */ 89 struct psi_stats { 90 double cpu_some_avg10, cpu_some_avg60, cpu_some_avg300; 91 unsigned long long cpu_some_total; 92 double cpu_full_avg10, cpu_full_avg60, cpu_full_avg300; 93 unsigned long long cpu_full_total; 94 double memory_some_avg10, memory_some_avg60, memory_some_avg300; 95 unsigned long long memory_some_total; 96 double memory_full_avg10, memory_full_avg60, memory_full_avg300; 97 unsigned long long memory_full_total; 98 double io_some_avg10, io_some_avg60, io_some_avg300; 99 unsigned long long io_some_total; 100 double io_full_avg10, io_full_avg60, io_full_avg300; 101 unsigned long long io_full_total; 102 double irq_full_avg10, irq_full_avg60, irq_full_avg300; 103 unsigned long long irq_full_total; 104 }; 105 106 /* Task delay information structure */ 107 struct task_info { 108 int pid; 109 int tgid; 110 char command[TASK_COMM_LEN]; 111 unsigned long long cpu_count; 112 unsigned long long cpu_delay_total; 113 unsigned long long blkio_count; 114 unsigned long long blkio_delay_total; 115 unsigned long long swapin_count; 116 unsigned long long swapin_delay_total; 117 unsigned long long freepages_count; 118 unsigned long long freepages_delay_total; 119 unsigned long long thrashing_count; 120 unsigned long long thrashing_delay_total; 121 unsigned long long compact_count; 122 unsigned long long compact_delay_total; 123 unsigned long long wpcopy_count; 124 unsigned long long wpcopy_delay_total; 125 unsigned long long irq_count; 126 unsigned long long irq_delay_total; 127 unsigned long long mem_count; 128 unsigned long long mem_delay_total; 129 }; 130 131 /* Container statistics structure */ 132 struct container_stats { 133 int nr_sleeping; /* Number of sleeping processes */ 134 int nr_running; /* Number of running processes */ 135 int nr_stopped; /* Number of stopped processes */ 136 int nr_uninterruptible; /* Number of uninterruptible processes */ 137 int nr_io_wait; /* Number of processes in IO wait */ 138 }; 139 140 /* Delay field structure */ 141 struct field_desc { 142 const char *name; /* Field name for cmdline argument */ 143 unsigned long total_offset; /* Offset of total delay in task_info */ 144 unsigned long count_offset; /* Offset of count in task_info */ 145 size_t supported_modes; /* Supported display modes */ 146 }; 147 148 /* Program settings structure */ 149 struct config { 150 int delay; /* Update interval in seconds */ 151 int iterations; /* Number of iterations, 0 == infinite */ 152 int max_processes; /* Maximum number of processes to show */ 153 int output_one_time; /* Output once and exit */ 154 int monitor_pid; /* Monitor specific PID */ 155 char *container_path; /* Path to container cgroup */ 156 const struct field_desc *sort_field; /* Current sort field */ 157 size_t display_mode; /* Current display mode */ 158 }; 159 160 /* Global variables */ 161 static struct config cfg; 162 static struct psi_stats psi; 163 static struct task_info tasks[MAX_TASKS]; 164 static int task_count; 165 static int running = 1; 166 static struct container_stats container_stats; 167 static const struct field_desc sort_fields[] = { 168 SORT_FIELD(cpu, MODE_DEFAULT), 169 SORT_FIELD(blkio, MODE_DEFAULT), 170 SORT_FIELD(irq, MODE_DEFAULT), 171 SORT_FIELD(mem, MODE_DEFAULT | MODE_MEMVERBOSE), 172 SORT_FIELD(swapin, MODE_MEMVERBOSE), 173 SORT_FIELD(freepages, MODE_MEMVERBOSE), 174 SORT_FIELD(thrashing, MODE_MEMVERBOSE), 175 SORT_FIELD(compact, MODE_MEMVERBOSE), 176 SORT_FIELD(wpcopy, MODE_MEMVERBOSE), 177 END_FIELD 178 }; 179 180 /* Netlink socket variables */ 181 static int nl_sd = -1; 182 static int family_id; 183 184 /* Set terminal to non-canonical mode for q-to-quit */ 185 static struct termios orig_termios; 186 static void enable_raw_mode(void) 187 { 188 struct termios raw; 189 190 tcgetattr(STDIN_FILENO, &orig_termios); 191 raw = orig_termios; 192 raw.c_lflag &= ~(ICANON | ECHO); 193 tcsetattr(STDIN_FILENO, TCSAFLUSH, &raw); 194 } 195 static void disable_raw_mode(void) 196 { 197 tcsetattr(STDIN_FILENO, TCSAFLUSH, &orig_termios); 198 } 199 200 /* Find field descriptor by name with string comparison */ 201 static const struct field_desc *get_field_by_name(const char *name) 202 { 203 const struct field_desc *field; 204 size_t field_len; 205 206 for (field = sort_fields; field->name != NULL; field++) { 207 field_len = strlen(field->name); 208 if (field_len != strlen(name)) 209 continue; 210 if (strncmp(field->name, name, field_len) == 0) 211 return field; 212 } 213 214 return NULL; 215 } 216 217 /* Find display name for a field descriptor */ 218 static const char *get_name_by_field(const struct field_desc *field) 219 { 220 return field ? field->name : "UNKNOWN"; 221 } 222 223 /* Generate string of available field names */ 224 static void display_available_fields(size_t mode) 225 { 226 const struct field_desc *field; 227 char buf[MAX_BUF_LEN]; 228 229 buf[0] = '\0'; 230 231 for (field = sort_fields; field->name != NULL; field++) { 232 if (!(field->supported_modes & mode)) 233 continue; 234 strncat(buf, "|", MAX_BUF_LEN - strlen(buf) - 1); 235 strncat(buf, field->name, MAX_BUF_LEN - strlen(buf) - 1); 236 buf[MAX_BUF_LEN - 1] = '\0'; 237 } 238 239 fprintf(stderr, "Available fields: %s\n", buf); 240 } 241 242 /* Display usage information and command line options */ 243 static void usage(void) 244 { 245 printf("Usage: delaytop [Options]\n" 246 "Options:\n" 247 " -h, --help Show this help message and exit\n" 248 " -d, --delay=SECONDS Set refresh interval (default: 2 seconds, min: 1)\n" 249 " -n, --iterations=COUNT Set number of updates (default: 0 = infinite)\n" 250 " -P, --processes=NUMBER Set maximum number of processes to show (default: 20, max: 1000)\n" 251 " -o, --once Display once and exit\n" 252 " -p, --pid=PID Monitor only the specified PID\n" 253 " -C, --container=PATH Monitor the container at specified cgroup path\n" 254 " -s, --sort=FIELD Sort by delay field (default: cpu)\n" 255 " -M, --memverbose Display memory detailed information\n"); 256 exit(0); 257 } 258 259 /* Parse command line arguments and set configuration */ 260 static void parse_args(int argc, char **argv) 261 { 262 int c; 263 const struct field_desc *field; 264 struct option long_options[] = { 265 {"help", no_argument, 0, 'h'}, 266 {"delay", required_argument, 0, 'd'}, 267 {"iterations", required_argument, 0, 'n'}, 268 {"pid", required_argument, 0, 'p'}, 269 {"once", no_argument, 0, 'o'}, 270 {"processes", required_argument, 0, 'P'}, 271 {"sort", required_argument, 0, 's'}, 272 {"container", required_argument, 0, 'C'}, 273 {"memverbose", no_argument, 0, 'M'}, 274 {0, 0, 0, 0} 275 }; 276 277 /* Set defaults */ 278 cfg.delay = 2; 279 cfg.iterations = 0; 280 cfg.max_processes = 20; 281 cfg.sort_field = &sort_fields[0]; /* Default sorted by CPU delay */ 282 cfg.output_one_time = 0; 283 cfg.monitor_pid = 0; /* 0 means monitor all PIDs */ 284 cfg.container_path = NULL; 285 cfg.display_mode = MODE_DEFAULT; 286 287 while (1) { 288 int option_index = 0; 289 290 c = getopt_long(argc, argv, "hd:n:p:oP:C:s:M", long_options, &option_index); 291 if (c == -1) 292 break; 293 294 switch (c) { 295 case 'h': 296 usage(); 297 break; 298 case 'd': 299 cfg.delay = atoi(optarg); 300 if (cfg.delay < 1) { 301 fprintf(stderr, "Error: delay must be >= 1.\n"); 302 exit(1); 303 } 304 break; 305 case 'n': 306 cfg.iterations = atoi(optarg); 307 if (cfg.iterations < 0) { 308 fprintf(stderr, "Error: iterations must be >= 0.\n"); 309 exit(1); 310 } 311 break; 312 case 'p': 313 cfg.monitor_pid = atoi(optarg); 314 if (cfg.monitor_pid < 1) { 315 fprintf(stderr, "Error: pid must be >= 1.\n"); 316 exit(1); 317 } 318 break; 319 case 'o': 320 cfg.output_one_time = 1; 321 break; 322 case 'P': 323 cfg.max_processes = atoi(optarg); 324 if (cfg.max_processes < 1) { 325 fprintf(stderr, "Error: processes must be >= 1.\n"); 326 exit(1); 327 } 328 if (cfg.max_processes > MAX_TASKS) { 329 fprintf(stderr, "Warning: processes capped to %d.\n", 330 MAX_TASKS); 331 cfg.max_processes = MAX_TASKS; 332 } 333 break; 334 case 'C': 335 cfg.container_path = strdup(optarg); 336 break; 337 case 's': 338 if (strlen(optarg) == 0) { 339 fprintf(stderr, "Error: empty sort field\n"); 340 exit(1); 341 } 342 343 field = get_field_by_name(optarg); 344 /* Show available fields if invalid option provided */ 345 if (!field) { 346 fprintf(stderr, "Error: invalid sort field '%s'\n", optarg); 347 display_available_fields(MODE_TYPE_ALL); 348 exit(1); 349 } 350 351 cfg.sort_field = field; 352 break; 353 case 'M': 354 cfg.display_mode = MODE_MEMVERBOSE; 355 cfg.sort_field = get_field_by_name("mem"); 356 break; 357 default: 358 fprintf(stderr, "Try 'delaytop --help' for more information.\n"); 359 exit(1); 360 } 361 } 362 } 363 364 /* Calculate average delay in milliseconds for overall memory */ 365 static void set_mem_delay_total(struct task_info *t) 366 { 367 t->mem_delay_total = t->swapin_delay_total + 368 t->freepages_delay_total + 369 t->thrashing_delay_total + 370 t->compact_delay_total + 371 t->wpcopy_delay_total; 372 } 373 374 static void set_mem_count(struct task_info *t) 375 { 376 t->mem_count = t->swapin_count + 377 t->freepages_count + 378 t->thrashing_count + 379 t->compact_count + 380 t->wpcopy_count; 381 } 382 383 /* Create a raw netlink socket and bind */ 384 static int create_nl_socket(void) 385 { 386 int fd; 387 struct sockaddr_nl local; 388 389 fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); 390 if (fd < 0) 391 return -1; 392 393 memset(&local, 0, sizeof(local)); 394 local.nl_family = AF_NETLINK; 395 396 if (bind(fd, (struct sockaddr *) &local, sizeof(local)) < 0) { 397 fprintf(stderr, "Failed to bind socket when create nl_socket\n"); 398 close(fd); 399 return -1; 400 } 401 402 return fd; 403 } 404 405 /* Send a command via netlink */ 406 static int send_cmd(int sd, __u16 nlmsg_type, __u32 nlmsg_pid, 407 __u8 genl_cmd, __u16 nla_type, 408 void *nla_data, int nla_len) 409 { 410 struct sockaddr_nl nladdr; 411 struct nlattr *na; 412 int r, buflen; 413 char *buf; 414 415 struct { 416 struct nlmsghdr n; 417 struct genlmsghdr g; 418 char buf[MAX_MSG_SIZE]; 419 } msg; 420 421 msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN); 422 msg.n.nlmsg_type = nlmsg_type; 423 msg.n.nlmsg_flags = NLM_F_REQUEST; 424 msg.n.nlmsg_seq = 0; 425 msg.n.nlmsg_pid = nlmsg_pid; 426 msg.g.cmd = genl_cmd; 427 msg.g.version = 0x1; 428 na = (struct nlattr *) GENLMSG_DATA(&msg); 429 na->nla_type = nla_type; 430 na->nla_len = nla_len + NLA_HDRLEN; 431 memcpy(NLA_DATA(na), nla_data, nla_len); 432 msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len); 433 434 buf = (char *) &msg; 435 buflen = msg.n.nlmsg_len; 436 memset(&nladdr, 0, sizeof(nladdr)); 437 nladdr.nl_family = AF_NETLINK; 438 while ((r = sendto(sd, buf, buflen, 0, (struct sockaddr *) &nladdr, 439 sizeof(nladdr))) < buflen) { 440 if (r > 0) { 441 buf += r; 442 buflen -= r; 443 } else if (errno != EAGAIN) 444 return -1; 445 } 446 return 0; 447 } 448 449 /* Get family ID for taskstats via netlink */ 450 static int get_family_id(int sd) 451 { 452 struct { 453 struct nlmsghdr n; 454 struct genlmsghdr g; 455 char buf[256]; 456 } ans; 457 458 int id = 0, rc; 459 struct nlattr *na; 460 int rep_len; 461 char name[100]; 462 463 strncpy(name, TASKSTATS_GENL_NAME, sizeof(name) - 1); 464 name[sizeof(name) - 1] = '\0'; 465 rc = send_cmd(sd, GENL_ID_CTRL, getpid(), CTRL_CMD_GETFAMILY, 466 CTRL_ATTR_FAMILY_NAME, (void *)name, 467 strlen(TASKSTATS_GENL_NAME)+1); 468 if (rc < 0) { 469 fprintf(stderr, "Failed to send cmd for family id\n"); 470 return 0; 471 } 472 473 rep_len = recv(sd, &ans, sizeof(ans), 0); 474 if (ans.n.nlmsg_type == NLMSG_ERROR || 475 (rep_len < 0) || !NLMSG_OK((&ans.n), rep_len)) { 476 fprintf(stderr, "Failed to receive response for family id\n"); 477 return 0; 478 } 479 480 na = (struct nlattr *) GENLMSG_DATA(&ans); 481 na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len)); 482 if (na->nla_type == CTRL_ATTR_FAMILY_ID) 483 id = *(__u16 *) NLA_DATA(na); 484 return id; 485 } 486 487 static void read_psi_stats(void) 488 { 489 FILE *fp; 490 char line[256]; 491 int ret = 0; 492 /* Zero all fields */ 493 memset(&psi, 0, sizeof(psi)); 494 /* CPU pressure */ 495 fp = fopen(PSI_CPU_SOME, "r"); 496 if (fp) { 497 while (fgets(line, sizeof(line), fp)) { 498 if (strncmp(line, "some", 4) == 0) { 499 ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu", 500 &psi.cpu_some_avg10, &psi.cpu_some_avg60, 501 &psi.cpu_some_avg300, &psi.cpu_some_total); 502 if (ret != 4) 503 fprintf(stderr, "Failed to parse CPU some PSI data\n"); 504 } else if (strncmp(line, "full", 4) == 0) { 505 ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu", 506 &psi.cpu_full_avg10, &psi.cpu_full_avg60, 507 &psi.cpu_full_avg300, &psi.cpu_full_total); 508 if (ret != 4) 509 fprintf(stderr, "Failed to parse CPU full PSI data\n"); 510 } 511 } 512 fclose(fp); 513 } 514 /* Memory pressure */ 515 fp = fopen(PSI_MEMORY_SOME, "r"); 516 if (fp) { 517 while (fgets(line, sizeof(line), fp)) { 518 if (strncmp(line, "some", 4) == 0) { 519 ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu", 520 &psi.memory_some_avg10, &psi.memory_some_avg60, 521 &psi.memory_some_avg300, &psi.memory_some_total); 522 if (ret != 4) 523 fprintf(stderr, "Failed to parse Memory some PSI data\n"); 524 } else if (strncmp(line, "full", 4) == 0) { 525 ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu", 526 &psi.memory_full_avg10, &psi.memory_full_avg60, 527 &psi.memory_full_avg300, &psi.memory_full_total); 528 } 529 if (ret != 4) 530 fprintf(stderr, "Failed to parse Memory full PSI data\n"); 531 } 532 fclose(fp); 533 } 534 /* IO pressure */ 535 fp = fopen(PSI_IO_SOME, "r"); 536 if (fp) { 537 while (fgets(line, sizeof(line), fp)) { 538 if (strncmp(line, "some", 4) == 0) { 539 ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu", 540 &psi.io_some_avg10, &psi.io_some_avg60, 541 &psi.io_some_avg300, &psi.io_some_total); 542 if (ret != 4) 543 fprintf(stderr, "Failed to parse IO some PSI data\n"); 544 } else if (strncmp(line, "full", 4) == 0) { 545 ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu", 546 &psi.io_full_avg10, &psi.io_full_avg60, 547 &psi.io_full_avg300, &psi.io_full_total); 548 if (ret != 4) 549 fprintf(stderr, "Failed to parse IO full PSI data\n"); 550 } 551 } 552 fclose(fp); 553 } 554 /* IRQ pressure (only full) */ 555 fp = fopen(PSI_IRQ_FULL, "r"); 556 if (fp) { 557 while (fgets(line, sizeof(line), fp)) { 558 if (strncmp(line, "full", 4) == 0) { 559 ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu", 560 &psi.irq_full_avg10, &psi.irq_full_avg60, 561 &psi.irq_full_avg300, &psi.irq_full_total); 562 if (ret != 4) 563 fprintf(stderr, "Failed to parse IRQ full PSI data\n"); 564 } 565 } 566 fclose(fp); 567 } 568 } 569 570 static int read_comm(int pid, char *comm_buf, size_t buf_size) 571 { 572 char path[64]; 573 int ret = -1; 574 size_t len; 575 FILE *fp; 576 577 snprintf(path, sizeof(path), "/proc/%d/comm", pid); 578 fp = fopen(path, "r"); 579 if (!fp) { 580 fprintf(stderr, "Failed to open comm file /proc/%d/comm\n", pid); 581 return ret; 582 } 583 584 if (fgets(comm_buf, buf_size, fp)) { 585 len = strlen(comm_buf); 586 if (len > 0 && comm_buf[len - 1] == '\n') 587 comm_buf[len - 1] = '\0'; 588 ret = 0; 589 } 590 591 fclose(fp); 592 593 return ret; 594 } 595 596 static void fetch_and_fill_task_info(int pid, const char *comm) 597 { 598 struct { 599 struct nlmsghdr n; 600 struct genlmsghdr g; 601 char buf[MAX_MSG_SIZE]; 602 } resp; 603 struct taskstats stats; 604 struct nlattr *nested; 605 struct nlattr *na; 606 int nested_len; 607 int nl_len; 608 int rc; 609 610 /* Send request for task stats */ 611 if (send_cmd(nl_sd, family_id, getpid(), TASKSTATS_CMD_GET, 612 TASKSTATS_CMD_ATTR_PID, &pid, sizeof(pid)) < 0) { 613 fprintf(stderr, "Failed to send request for task stats\n"); 614 return; 615 } 616 617 /* Receive response */ 618 rc = recv(nl_sd, &resp, sizeof(resp), 0); 619 if (rc < 0 || resp.n.nlmsg_type == NLMSG_ERROR) { 620 fprintf(stderr, "Failed to receive response for task stats\n"); 621 return; 622 } 623 624 /* Parse response */ 625 nl_len = GENLMSG_PAYLOAD(&resp.n); 626 na = (struct nlattr *) GENLMSG_DATA(&resp); 627 while (nl_len > 0) { 628 if (na->nla_type == TASKSTATS_TYPE_AGGR_PID) { 629 nested = (struct nlattr *) NLA_DATA(na); 630 nested_len = NLA_PAYLOAD(na->nla_len); 631 while (nested_len > 0) { 632 if (nested->nla_type == TASKSTATS_TYPE_STATS) { 633 memcpy(&stats, NLA_DATA(nested), sizeof(stats)); 634 if (task_count < MAX_TASKS) { 635 tasks[task_count].pid = pid; 636 tasks[task_count].tgid = pid; 637 strncpy(tasks[task_count].command, comm, 638 TASK_COMM_LEN - 1); 639 tasks[task_count].command[TASK_COMM_LEN - 1] = '\0'; 640 SET_TASK_STAT(task_count, cpu_count); 641 SET_TASK_STAT(task_count, cpu_delay_total); 642 SET_TASK_STAT(task_count, blkio_count); 643 SET_TASK_STAT(task_count, blkio_delay_total); 644 SET_TASK_STAT(task_count, swapin_count); 645 SET_TASK_STAT(task_count, swapin_delay_total); 646 SET_TASK_STAT(task_count, freepages_count); 647 SET_TASK_STAT(task_count, freepages_delay_total); 648 SET_TASK_STAT(task_count, thrashing_count); 649 SET_TASK_STAT(task_count, thrashing_delay_total); 650 SET_TASK_STAT(task_count, compact_count); 651 SET_TASK_STAT(task_count, compact_delay_total); 652 SET_TASK_STAT(task_count, wpcopy_count); 653 SET_TASK_STAT(task_count, wpcopy_delay_total); 654 SET_TASK_STAT(task_count, irq_count); 655 SET_TASK_STAT(task_count, irq_delay_total); 656 set_mem_count(&tasks[task_count]); 657 set_mem_delay_total(&tasks[task_count]); 658 task_count++; 659 } 660 break; 661 } 662 nested_len -= NLA_ALIGN(nested->nla_len); 663 nested = NLA_NEXT(nested); 664 } 665 } 666 nl_len -= NLA_ALIGN(na->nla_len); 667 na = NLA_NEXT(na); 668 } 669 return; 670 } 671 672 static void get_task_delays(void) 673 { 674 char comm[TASK_COMM_LEN]; 675 struct dirent *entry; 676 DIR *dir; 677 int pid; 678 679 task_count = 0; 680 if (cfg.monitor_pid > 0) { 681 if (read_comm(cfg.monitor_pid, comm, sizeof(comm)) == 0) 682 fetch_and_fill_task_info(cfg.monitor_pid, comm); 683 return; 684 } 685 686 dir = opendir("/proc"); 687 if (!dir) { 688 fprintf(stderr, "Error opening /proc directory\n"); 689 return; 690 } 691 692 while ((entry = readdir(dir)) != NULL && task_count < MAX_TASKS) { 693 if (!isdigit(entry->d_name[0])) 694 continue; 695 pid = atoi(entry->d_name); 696 if (pid == 0) 697 continue; 698 if (read_comm(pid, comm, sizeof(comm)) != 0) 699 continue; 700 fetch_and_fill_task_info(pid, comm); 701 } 702 closedir(dir); 703 } 704 705 /* Calculate average delay in milliseconds */ 706 static double average_ms(unsigned long long total, unsigned long long count) 707 { 708 if (count == 0) 709 return 0; 710 return (double)total / 1000000.0 / count; 711 } 712 713 /* Comparison function for sorting tasks */ 714 static int compare_tasks(const void *a, const void *b) 715 { 716 const struct task_info *t1 = (const struct task_info *)a; 717 const struct task_info *t2 = (const struct task_info *)b; 718 unsigned long long total1; 719 unsigned long long total2; 720 unsigned long count1; 721 unsigned long count2; 722 double avg1, avg2; 723 724 total1 = *(unsigned long long *)((char *)t1 + cfg.sort_field->total_offset); 725 total2 = *(unsigned long long *)((char *)t2 + cfg.sort_field->total_offset); 726 count1 = *(unsigned long *)((char *)t1 + cfg.sort_field->count_offset); 727 count2 = *(unsigned long *)((char *)t2 + cfg.sort_field->count_offset); 728 729 avg1 = average_ms(total1, count1); 730 avg2 = average_ms(total2, count2); 731 if (avg1 != avg2) 732 return avg2 > avg1 ? 1 : -1; 733 734 return 0; 735 } 736 737 /* Sort tasks by selected field */ 738 static void sort_tasks(void) 739 { 740 if (task_count > 0) 741 qsort(tasks, task_count, sizeof(struct task_info), compare_tasks); 742 } 743 744 /* Get container statistics via cgroupstats */ 745 static void get_container_stats(void) 746 { 747 int rc, cfd; 748 struct { 749 struct nlmsghdr n; 750 struct genlmsghdr g; 751 char buf[MAX_MSG_SIZE]; 752 } req, resp; 753 struct nlattr *na; 754 int nl_len; 755 struct cgroupstats stats; 756 757 /* Check if container path is set */ 758 if (!cfg.container_path) 759 return; 760 761 /* Open container cgroup */ 762 cfd = open(cfg.container_path, O_RDONLY); 763 if (cfd < 0) { 764 fprintf(stderr, "Error opening container path: %s\n", cfg.container_path); 765 return; 766 } 767 768 /* Send request for container stats */ 769 if (send_cmd(nl_sd, family_id, getpid(), CGROUPSTATS_CMD_GET, 770 CGROUPSTATS_CMD_ATTR_FD, &cfd, sizeof(__u32)) < 0) { 771 fprintf(stderr, "Failed to send request for container stats\n"); 772 close(cfd); 773 return; 774 } 775 776 /* Receive response */ 777 rc = recv(nl_sd, &resp, sizeof(resp), 0); 778 if (rc < 0 || resp.n.nlmsg_type == NLMSG_ERROR) { 779 fprintf(stderr, "Failed to receive response for container stats\n"); 780 close(cfd); 781 return; 782 } 783 784 /* Parse response */ 785 nl_len = GENLMSG_PAYLOAD(&resp.n); 786 na = (struct nlattr *) GENLMSG_DATA(&resp); 787 while (nl_len > 0) { 788 if (na->nla_type == CGROUPSTATS_TYPE_CGROUP_STATS) { 789 /* Get the cgroupstats structure */ 790 memcpy(&stats, NLA_DATA(na), sizeof(stats)); 791 792 /* Fill container stats */ 793 container_stats.nr_sleeping = stats.nr_sleeping; 794 container_stats.nr_running = stats.nr_running; 795 container_stats.nr_stopped = stats.nr_stopped; 796 container_stats.nr_uninterruptible = stats.nr_uninterruptible; 797 container_stats.nr_io_wait = stats.nr_io_wait; 798 break; 799 } 800 nl_len -= NLA_ALIGN(na->nla_len); 801 na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len)); 802 } 803 804 close(cfd); 805 } 806 807 /* Display results to stdout or log file */ 808 static void display_results(void) 809 { 810 time_t now = time(NULL); 811 struct tm *tm_now = localtime(&now); 812 FILE *out = stdout; 813 char timestamp[32]; 814 bool suc = true; 815 int i, count; 816 817 /* Clear terminal screen */ 818 suc &= BOOL_FPRINT(out, "\033[H\033[J"); 819 820 /* PSI output (one-line, no cat style) */ 821 suc &= BOOL_FPRINT(out, "System Pressure Information: (avg10/avg60/avg300/total)\n"); 822 suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, 823 "CPU some:", 824 psi.cpu_some_avg10, 825 psi.cpu_some_avg60, 826 psi.cpu_some_avg300, 827 psi.cpu_some_total / 1000); 828 suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, 829 "CPU full:", 830 psi.cpu_full_avg10, 831 psi.cpu_full_avg60, 832 psi.cpu_full_avg300, 833 psi.cpu_full_total / 1000); 834 suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, 835 "Memory full:", 836 psi.memory_full_avg10, 837 psi.memory_full_avg60, 838 psi.memory_full_avg300, 839 psi.memory_full_total / 1000); 840 suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, 841 "Memory some:", 842 psi.memory_some_avg10, 843 psi.memory_some_avg60, 844 psi.memory_some_avg300, 845 psi.memory_some_total / 1000); 846 suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, 847 "IO full:", 848 psi.io_full_avg10, 849 psi.io_full_avg60, 850 psi.io_full_avg300, 851 psi.io_full_total / 1000); 852 suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, 853 "IO some:", 854 psi.io_some_avg10, 855 psi.io_some_avg60, 856 psi.io_some_avg300, 857 psi.io_some_total / 1000); 858 suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, 859 "IRQ full:", 860 psi.irq_full_avg10, 861 psi.irq_full_avg60, 862 psi.irq_full_avg300, 863 psi.irq_full_total / 1000); 864 865 if (cfg.container_path) { 866 suc &= BOOL_FPRINT(out, "Container Information (%s):\n", cfg.container_path); 867 suc &= BOOL_FPRINT(out, "Processes: running=%d, sleeping=%d, ", 868 container_stats.nr_running, container_stats.nr_sleeping); 869 suc &= BOOL_FPRINT(out, "stopped=%d, uninterruptible=%d, io_wait=%d\n\n", 870 container_stats.nr_stopped, container_stats.nr_uninterruptible, 871 container_stats.nr_io_wait); 872 } 873 /* Task delay output */ 874 suc &= BOOL_FPRINT(out, "Top %d processes (sorted by %s delay):\n", 875 cfg.max_processes, get_name_by_field(cfg.sort_field)); 876 877 suc &= BOOL_FPRINT(out, "%8s %8s %-17s", "PID", "TGID", "COMMAND"); 878 if (cfg.display_mode == MODE_MEMVERBOSE) { 879 suc &= BOOL_FPRINT(out, "%8s %8s %8s %8s %8s %8s\n", 880 "MEM(ms)", "SWAP(ms)", "RCL(ms)", 881 "THR(ms)", "CMP(ms)", "WP(ms)"); 882 suc &= BOOL_FPRINT(out, "-----------------------"); 883 suc &= BOOL_FPRINT(out, "-----------------------"); 884 suc &= BOOL_FPRINT(out, "-----------------------"); 885 suc &= BOOL_FPRINT(out, "---------------------\n"); 886 } else { 887 suc &= BOOL_FPRINT(out, "%8s %8s %8s %8s\n", 888 "CPU(ms)", "IO(ms)", "IRQ(ms)", "MEM(ms)"); 889 suc &= BOOL_FPRINT(out, "-----------------------"); 890 suc &= BOOL_FPRINT(out, "-----------------------"); 891 suc &= BOOL_FPRINT(out, "--------------------------\n"); 892 } 893 894 count = task_count < cfg.max_processes ? task_count : cfg.max_processes; 895 896 for (i = 0; i < count; i++) { 897 suc &= BOOL_FPRINT(out, "%8d %8d %-15s", 898 tasks[i].pid, tasks[i].tgid, tasks[i].command); 899 if (cfg.display_mode == MODE_MEMVERBOSE) { 900 suc &= BOOL_FPRINT(out, DELAY_FMT_MEMVERBOSE, 901 TASK_AVG(tasks[i], mem), 902 TASK_AVG(tasks[i], swapin), 903 TASK_AVG(tasks[i], freepages), 904 TASK_AVG(tasks[i], thrashing), 905 TASK_AVG(tasks[i], compact), 906 TASK_AVG(tasks[i], wpcopy)); 907 } else { 908 suc &= BOOL_FPRINT(out, DELAY_FMT_DEFAULT, 909 TASK_AVG(tasks[i], cpu), 910 TASK_AVG(tasks[i], blkio), 911 TASK_AVG(tasks[i], irq), 912 TASK_AVG(tasks[i], mem)); 913 } 914 } 915 916 suc &= BOOL_FPRINT(out, "\n"); 917 918 if (!suc) 919 perror("Error writing to output"); 920 } 921 922 /* Main function */ 923 int main(int argc, char **argv) 924 { 925 int iterations = 0; 926 int use_q_quit = 0; 927 928 /* Parse command line arguments */ 929 parse_args(argc, argv); 930 931 /* Setup netlink socket */ 932 nl_sd = create_nl_socket(); 933 if (nl_sd < 0) { 934 fprintf(stderr, "Error creating netlink socket\n"); 935 exit(1); 936 } 937 938 /* Get family ID for taskstats via netlink */ 939 family_id = get_family_id(nl_sd); 940 if (!family_id) { 941 fprintf(stderr, "Error getting taskstats family ID\n"); 942 close(nl_sd); 943 exit(1); 944 } 945 946 if (!cfg.output_one_time) { 947 use_q_quit = 1; 948 enable_raw_mode(); 949 printf("Press 'q' to quit.\n"); 950 fflush(stdout); 951 } 952 953 /* Main loop */ 954 while (running) { 955 /* Exit when sort field do not match display mode */ 956 if (!(cfg.sort_field->supported_modes & cfg.display_mode)) { 957 fprintf(stderr, "Sort field not supported in this mode\n"); 958 display_available_fields(cfg.display_mode); 959 break; 960 } 961 962 /* Read PSI statistics */ 963 read_psi_stats(); 964 965 /* Get container stats if container path provided */ 966 if (cfg.container_path) 967 get_container_stats(); 968 969 /* Get task delays */ 970 get_task_delays(); 971 972 /* Sort tasks */ 973 sort_tasks(); 974 975 /* Display results to stdout or log file */ 976 display_results(); 977 978 /* Check for iterations */ 979 if (cfg.iterations > 0 && ++iterations >= cfg.iterations) 980 break; 981 982 /* Exit if output_one_time is set */ 983 if (cfg.output_one_time) 984 break; 985 986 /* Check for 'q' key to quit */ 987 if (use_q_quit) { 988 struct timeval tv = {cfg.delay, 0}; 989 fd_set readfds; 990 991 FD_ZERO(&readfds); 992 FD_SET(STDIN_FILENO, &readfds); 993 int r = select(STDIN_FILENO+1, &readfds, NULL, NULL, &tv); 994 995 if (r > 0 && FD_ISSET(STDIN_FILENO, &readfds)) { 996 char ch = 0; 997 998 read(STDIN_FILENO, &ch, 1); 999 if (ch == 'q' || ch == 'Q') { 1000 running = 0; 1001 break; 1002 } 1003 } 1004 } else { 1005 sleep(cfg.delay); 1006 } 1007 } 1008 1009 /* Restore terminal mode */ 1010 if (use_q_quit) 1011 disable_raw_mode(); 1012 1013 /* Cleanup */ 1014 close(nl_sd); 1015 if (cfg.container_path) 1016 free(cfg.container_path); 1017 1018 return 0; 1019 } 1020