1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * delaytop.c - system-wide delay monitoring tool. 4 * 5 * This tool provides real-time monitoring and statistics of 6 * system, container, and task-level delays, including CPU, 7 * memory, IO, and IRQ. It supports both interactive (top-like), 8 * and can output delay information for the whole system, specific 9 * containers (cgroups), or individual tasks (PIDs). 10 * 11 * Key features: 12 * - Collects per-task delay accounting statistics via taskstats. 13 * - Collects system-wide PSI information. 14 * - Supports sorting, filtering. 15 * - Supports both interactive (screen refresh). 16 * 17 * Copyright (C) Fan Yu, ZTE Corp. 2025 18 * Copyright (C) Wang Yaxin, ZTE Corp. 2025 19 * 20 * Compile with 21 * gcc -I/usr/src/linux/include delaytop.c -o delaytop 22 */ 23 24 #include <stdio.h> 25 #include <stdlib.h> 26 #include <string.h> 27 #include <errno.h> 28 #include <unistd.h> 29 #include <fcntl.h> 30 #include <getopt.h> 31 #include <signal.h> 32 #include <time.h> 33 #include <dirent.h> 34 #include <ctype.h> 35 #include <stdbool.h> 36 #include <sys/types.h> 37 #include <sys/stat.h> 38 #include <sys/socket.h> 39 #include <sys/select.h> 40 #include <termios.h> 41 #include <limits.h> 42 #include <linux/genetlink.h> 43 #include <linux/taskstats.h> 44 #include <linux/cgroupstats.h> 45 #include <stddef.h> 46 47 #define PSI_CPU_SOME "/proc/pressure/cpu" 48 #define PSI_CPU_FULL "/proc/pressure/cpu" 49 #define PSI_MEMORY_SOME "/proc/pressure/memory" 50 #define PSI_MEMORY_FULL "/proc/pressure/memory" 51 #define PSI_IO_SOME "/proc/pressure/io" 52 #define PSI_IO_FULL "/proc/pressure/io" 53 #define PSI_IRQ_FULL "/proc/pressure/irq" 54 55 #define NLA_NEXT(na) ((struct nlattr *)((char *)(na) + NLA_ALIGN((na)->nla_len))) 56 #define NLA_DATA(na) ((void *)((char *)(na) + NLA_HDRLEN)) 57 #define NLA_PAYLOAD(len) (len - NLA_HDRLEN) 58 59 #define GENLMSG_DATA(glh) ((void *)(NLMSG_DATA(glh) + GENL_HDRLEN)) 60 #define GENLMSG_PAYLOAD(glh) (NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN) 61 62 #define TASK_COMM_LEN 16 63 #define MAX_MSG_SIZE 1024 64 #define MAX_TASKS 1000 65 #define MAX_BUF_LEN 256 66 #define SET_TASK_STAT(task_count, field) tasks[task_count].field = stats.field 67 #define BOOL_FPRINT(stream, fmt, ...) \ 68 ({ \ 69 int ret = fprintf(stream, fmt, ##__VA_ARGS__); \ 70 ret >= 0; \ 71 }) 72 #define TASK_AVG(task, field) average_ms((task).field##_delay_total, (task).field##_count) 73 #define PSI_LINE_FORMAT "%-12s %6.1f%%/%6.1f%%/%6.1f%%/%8llu(ms)\n" 74 #define DELAY_FMT_DEFAULT "%8.2f %8.2f %8.2f %8.2f\n" 75 #define DELAY_FMT_MEMVERBOSE "%8.2f %8.2f %8.2f %8.2f %8.2f %8.2f\n" 76 #define SORT_FIELD(name, cmd, modes) \ 77 {#name, #cmd, \ 78 offsetof(struct task_info, name##_delay_total), \ 79 offsetof(struct task_info, name##_count), \ 80 modes} 81 #define END_FIELD {NULL, 0, 0} 82 83 /* Display mode types */ 84 #define MODE_TYPE_ALL (0xFFFFFFFF) 85 #define MODE_DEFAULT (1 << 0) 86 #define MODE_MEMVERBOSE (1 << 1) 87 88 /* PSI statistics structure */ 89 struct psi_stats { 90 double cpu_some_avg10, cpu_some_avg60, cpu_some_avg300; 91 unsigned long long cpu_some_total; 92 double cpu_full_avg10, cpu_full_avg60, cpu_full_avg300; 93 unsigned long long cpu_full_total; 94 double memory_some_avg10, memory_some_avg60, memory_some_avg300; 95 unsigned long long memory_some_total; 96 double memory_full_avg10, memory_full_avg60, memory_full_avg300; 97 unsigned long long memory_full_total; 98 double io_some_avg10, io_some_avg60, io_some_avg300; 99 unsigned long long io_some_total; 100 double io_full_avg10, io_full_avg60, io_full_avg300; 101 unsigned long long io_full_total; 102 double irq_full_avg10, irq_full_avg60, irq_full_avg300; 103 unsigned long long irq_full_total; 104 }; 105 106 /* Task delay information structure */ 107 struct task_info { 108 int pid; 109 int tgid; 110 char command[TASK_COMM_LEN]; 111 unsigned long long cpu_count; 112 unsigned long long cpu_delay_total; 113 unsigned long long blkio_count; 114 unsigned long long blkio_delay_total; 115 unsigned long long swapin_count; 116 unsigned long long swapin_delay_total; 117 unsigned long long freepages_count; 118 unsigned long long freepages_delay_total; 119 unsigned long long thrashing_count; 120 unsigned long long thrashing_delay_total; 121 unsigned long long compact_count; 122 unsigned long long compact_delay_total; 123 unsigned long long wpcopy_count; 124 unsigned long long wpcopy_delay_total; 125 unsigned long long irq_count; 126 unsigned long long irq_delay_total; 127 unsigned long long mem_count; 128 unsigned long long mem_delay_total; 129 }; 130 131 /* Container statistics structure */ 132 struct container_stats { 133 int nr_sleeping; /* Number of sleeping processes */ 134 int nr_running; /* Number of running processes */ 135 int nr_stopped; /* Number of stopped processes */ 136 int nr_uninterruptible; /* Number of uninterruptible processes */ 137 int nr_io_wait; /* Number of processes in IO wait */ 138 }; 139 140 /* Delay field structure */ 141 struct field_desc { 142 const char *name; /* Field name for cmdline argument */ 143 const char *cmd_char; /* Interactive command */ 144 unsigned long total_offset; /* Offset of total delay in task_info */ 145 unsigned long count_offset; /* Offset of count in task_info */ 146 size_t supported_modes; /* Supported display modes */ 147 }; 148 149 /* Program settings structure */ 150 struct config { 151 int delay; /* Update interval in seconds */ 152 int iterations; /* Number of iterations, 0 == infinite */ 153 int max_processes; /* Maximum number of processes to show */ 154 int output_one_time; /* Output once and exit */ 155 int monitor_pid; /* Monitor specific PID */ 156 char *container_path; /* Path to container cgroup */ 157 const struct field_desc *sort_field; /* Current sort field */ 158 size_t display_mode; /* Current display mode */ 159 }; 160 161 /* Global variables */ 162 static struct config cfg; 163 static struct psi_stats psi; 164 static struct task_info tasks[MAX_TASKS]; 165 static int task_count; 166 static int running = 1; 167 static struct container_stats container_stats; 168 static const struct field_desc sort_fields[] = { 169 SORT_FIELD(cpu, c, MODE_DEFAULT), 170 SORT_FIELD(blkio, i, MODE_DEFAULT), 171 SORT_FIELD(irq, q, MODE_DEFAULT), 172 SORT_FIELD(mem, m, MODE_DEFAULT | MODE_MEMVERBOSE), 173 SORT_FIELD(swapin, s, MODE_MEMVERBOSE), 174 SORT_FIELD(freepages, r, MODE_MEMVERBOSE), 175 SORT_FIELD(thrashing, t, MODE_MEMVERBOSE), 176 SORT_FIELD(compact, p, MODE_MEMVERBOSE), 177 SORT_FIELD(wpcopy, w, MODE_MEMVERBOSE), 178 END_FIELD 179 }; 180 static int sort_selected; 181 182 /* Netlink socket variables */ 183 static int nl_sd = -1; 184 static int family_id; 185 186 /* Set terminal to non-canonical mode for q-to-quit */ 187 static struct termios orig_termios; 188 static void enable_raw_mode(void) 189 { 190 struct termios raw; 191 192 tcgetattr(STDIN_FILENO, &orig_termios); 193 raw = orig_termios; 194 raw.c_lflag &= ~(ICANON | ECHO); 195 tcsetattr(STDIN_FILENO, TCSAFLUSH, &raw); 196 } 197 static void disable_raw_mode(void) 198 { 199 tcsetattr(STDIN_FILENO, TCSAFLUSH, &orig_termios); 200 } 201 202 /* Find field descriptor by command line */ 203 static const struct field_desc *get_field_by_cmd_char(char ch) 204 { 205 const struct field_desc *field; 206 207 for (field = sort_fields; field->name != NULL; field++) { 208 if (field->cmd_char[0] == ch) 209 return field; 210 } 211 212 return NULL; 213 } 214 215 /* Find field descriptor by name with string comparison */ 216 static const struct field_desc *get_field_by_name(const char *name) 217 { 218 const struct field_desc *field; 219 size_t field_len; 220 221 for (field = sort_fields; field->name != NULL; field++) { 222 field_len = strlen(field->name); 223 if (field_len != strlen(name)) 224 continue; 225 if (strncmp(field->name, name, field_len) == 0) 226 return field; 227 } 228 229 return NULL; 230 } 231 232 /* Find display name for a field descriptor */ 233 static const char *get_name_by_field(const struct field_desc *field) 234 { 235 return field ? field->name : "UNKNOWN"; 236 } 237 238 /* Generate string of available field names */ 239 static void display_available_fields(size_t mode) 240 { 241 const struct field_desc *field; 242 char buf[MAX_BUF_LEN]; 243 244 buf[0] = '\0'; 245 246 for (field = sort_fields; field->name != NULL; field++) { 247 if (!(field->supported_modes & mode)) 248 continue; 249 strncat(buf, "|", MAX_BUF_LEN - strlen(buf) - 1); 250 strncat(buf, field->name, MAX_BUF_LEN - strlen(buf) - 1); 251 buf[MAX_BUF_LEN - 1] = '\0'; 252 } 253 254 fprintf(stderr, "Available fields: %s\n", buf); 255 } 256 257 /* Display usage information and command line options */ 258 static void usage(void) 259 { 260 printf("Usage: delaytop [Options]\n" 261 "Options:\n" 262 " -h, --help Show this help message and exit\n" 263 " -d, --delay=SECONDS Set refresh interval (default: 2 seconds, min: 1)\n" 264 " -n, --iterations=COUNT Set number of updates (default: 0 = infinite)\n" 265 " -P, --processes=NUMBER Set maximum number of processes to show (default: 20, max: 1000)\n" 266 " -o, --once Display once and exit\n" 267 " -p, --pid=PID Monitor only the specified PID\n" 268 " -C, --container=PATH Monitor the container at specified cgroup path\n" 269 " -s, --sort=FIELD Sort by delay field (default: cpu)\n" 270 " -M, --memverbose Display memory detailed information\n"); 271 exit(0); 272 } 273 274 /* Parse command line arguments and set configuration */ 275 static void parse_args(int argc, char **argv) 276 { 277 int c; 278 const struct field_desc *field; 279 struct option long_options[] = { 280 {"help", no_argument, 0, 'h'}, 281 {"delay", required_argument, 0, 'd'}, 282 {"iterations", required_argument, 0, 'n'}, 283 {"pid", required_argument, 0, 'p'}, 284 {"once", no_argument, 0, 'o'}, 285 {"processes", required_argument, 0, 'P'}, 286 {"sort", required_argument, 0, 's'}, 287 {"container", required_argument, 0, 'C'}, 288 {"memverbose", no_argument, 0, 'M'}, 289 {0, 0, 0, 0} 290 }; 291 292 /* Set defaults */ 293 cfg.delay = 2; 294 cfg.iterations = 0; 295 cfg.max_processes = 20; 296 cfg.sort_field = &sort_fields[0]; /* Default sorted by CPU delay */ 297 cfg.output_one_time = 0; 298 cfg.monitor_pid = 0; /* 0 means monitor all PIDs */ 299 cfg.container_path = NULL; 300 cfg.display_mode = MODE_DEFAULT; 301 302 while (1) { 303 int option_index = 0; 304 305 c = getopt_long(argc, argv, "hd:n:p:oP:C:s:M", long_options, &option_index); 306 if (c == -1) 307 break; 308 309 switch (c) { 310 case 'h': 311 usage(); 312 break; 313 case 'd': 314 cfg.delay = atoi(optarg); 315 if (cfg.delay < 1) { 316 fprintf(stderr, "Error: delay must be >= 1.\n"); 317 exit(1); 318 } 319 break; 320 case 'n': 321 cfg.iterations = atoi(optarg); 322 if (cfg.iterations < 0) { 323 fprintf(stderr, "Error: iterations must be >= 0.\n"); 324 exit(1); 325 } 326 break; 327 case 'p': 328 cfg.monitor_pid = atoi(optarg); 329 if (cfg.monitor_pid < 1) { 330 fprintf(stderr, "Error: pid must be >= 1.\n"); 331 exit(1); 332 } 333 break; 334 case 'o': 335 cfg.output_one_time = 1; 336 break; 337 case 'P': 338 cfg.max_processes = atoi(optarg); 339 if (cfg.max_processes < 1) { 340 fprintf(stderr, "Error: processes must be >= 1.\n"); 341 exit(1); 342 } 343 if (cfg.max_processes > MAX_TASKS) { 344 fprintf(stderr, "Warning: processes capped to %d.\n", 345 MAX_TASKS); 346 cfg.max_processes = MAX_TASKS; 347 } 348 break; 349 case 'C': 350 cfg.container_path = strdup(optarg); 351 break; 352 case 's': 353 if (strlen(optarg) == 0) { 354 fprintf(stderr, "Error: empty sort field\n"); 355 exit(1); 356 } 357 358 field = get_field_by_name(optarg); 359 /* Show available fields if invalid option provided */ 360 if (!field) { 361 fprintf(stderr, "Error: invalid sort field '%s'\n", optarg); 362 display_available_fields(MODE_TYPE_ALL); 363 exit(1); 364 } 365 366 cfg.sort_field = field; 367 break; 368 case 'M': 369 cfg.display_mode = MODE_MEMVERBOSE; 370 cfg.sort_field = get_field_by_name("mem"); 371 break; 372 default: 373 fprintf(stderr, "Try 'delaytop --help' for more information.\n"); 374 exit(1); 375 } 376 } 377 } 378 379 /* Calculate average delay in milliseconds for overall memory */ 380 static void set_mem_delay_total(struct task_info *t) 381 { 382 t->mem_delay_total = t->swapin_delay_total + 383 t->freepages_delay_total + 384 t->thrashing_delay_total + 385 t->compact_delay_total + 386 t->wpcopy_delay_total; 387 } 388 389 static void set_mem_count(struct task_info *t) 390 { 391 t->mem_count = t->swapin_count + 392 t->freepages_count + 393 t->thrashing_count + 394 t->compact_count + 395 t->wpcopy_count; 396 } 397 398 /* Create a raw netlink socket and bind */ 399 static int create_nl_socket(void) 400 { 401 int fd; 402 struct sockaddr_nl local; 403 404 fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); 405 if (fd < 0) 406 return -1; 407 408 memset(&local, 0, sizeof(local)); 409 local.nl_family = AF_NETLINK; 410 411 if (bind(fd, (struct sockaddr *) &local, sizeof(local)) < 0) { 412 fprintf(stderr, "Failed to bind socket when create nl_socket\n"); 413 close(fd); 414 return -1; 415 } 416 417 return fd; 418 } 419 420 /* Send a command via netlink */ 421 static int send_cmd(int sd, __u16 nlmsg_type, __u32 nlmsg_pid, 422 __u8 genl_cmd, __u16 nla_type, 423 void *nla_data, int nla_len) 424 { 425 struct sockaddr_nl nladdr; 426 struct nlattr *na; 427 int r, buflen; 428 char *buf; 429 430 struct { 431 struct nlmsghdr n; 432 struct genlmsghdr g; 433 char buf[MAX_MSG_SIZE]; 434 } msg; 435 436 msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN); 437 msg.n.nlmsg_type = nlmsg_type; 438 msg.n.nlmsg_flags = NLM_F_REQUEST; 439 msg.n.nlmsg_seq = 0; 440 msg.n.nlmsg_pid = nlmsg_pid; 441 msg.g.cmd = genl_cmd; 442 msg.g.version = 0x1; 443 na = (struct nlattr *) GENLMSG_DATA(&msg); 444 na->nla_type = nla_type; 445 na->nla_len = nla_len + NLA_HDRLEN; 446 memcpy(NLA_DATA(na), nla_data, nla_len); 447 msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len); 448 449 buf = (char *) &msg; 450 buflen = msg.n.nlmsg_len; 451 memset(&nladdr, 0, sizeof(nladdr)); 452 nladdr.nl_family = AF_NETLINK; 453 while ((r = sendto(sd, buf, buflen, 0, (struct sockaddr *) &nladdr, 454 sizeof(nladdr))) < buflen) { 455 if (r > 0) { 456 buf += r; 457 buflen -= r; 458 } else if (errno != EAGAIN) 459 return -1; 460 } 461 return 0; 462 } 463 464 /* Get family ID for taskstats via netlink */ 465 static int get_family_id(int sd) 466 { 467 struct { 468 struct nlmsghdr n; 469 struct genlmsghdr g; 470 char buf[256]; 471 } ans; 472 473 int id = 0, rc; 474 struct nlattr *na; 475 int rep_len; 476 char name[100]; 477 478 strncpy(name, TASKSTATS_GENL_NAME, sizeof(name) - 1); 479 name[sizeof(name) - 1] = '\0'; 480 rc = send_cmd(sd, GENL_ID_CTRL, getpid(), CTRL_CMD_GETFAMILY, 481 CTRL_ATTR_FAMILY_NAME, (void *)name, 482 strlen(TASKSTATS_GENL_NAME)+1); 483 if (rc < 0) { 484 fprintf(stderr, "Failed to send cmd for family id\n"); 485 return 0; 486 } 487 488 rep_len = recv(sd, &ans, sizeof(ans), 0); 489 if (ans.n.nlmsg_type == NLMSG_ERROR || 490 (rep_len < 0) || !NLMSG_OK((&ans.n), rep_len)) { 491 fprintf(stderr, "Failed to receive response for family id\n"); 492 return 0; 493 } 494 495 na = (struct nlattr *) GENLMSG_DATA(&ans); 496 na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len)); 497 if (na->nla_type == CTRL_ATTR_FAMILY_ID) 498 id = *(__u16 *) NLA_DATA(na); 499 return id; 500 } 501 502 static void read_psi_stats(void) 503 { 504 FILE *fp; 505 char line[256]; 506 int ret = 0; 507 /* Zero all fields */ 508 memset(&psi, 0, sizeof(psi)); 509 /* CPU pressure */ 510 fp = fopen(PSI_CPU_SOME, "r"); 511 if (fp) { 512 while (fgets(line, sizeof(line), fp)) { 513 if (strncmp(line, "some", 4) == 0) { 514 ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu", 515 &psi.cpu_some_avg10, &psi.cpu_some_avg60, 516 &psi.cpu_some_avg300, &psi.cpu_some_total); 517 if (ret != 4) 518 fprintf(stderr, "Failed to parse CPU some PSI data\n"); 519 } else if (strncmp(line, "full", 4) == 0) { 520 ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu", 521 &psi.cpu_full_avg10, &psi.cpu_full_avg60, 522 &psi.cpu_full_avg300, &psi.cpu_full_total); 523 if (ret != 4) 524 fprintf(stderr, "Failed to parse CPU full PSI data\n"); 525 } 526 } 527 fclose(fp); 528 } 529 /* Memory pressure */ 530 fp = fopen(PSI_MEMORY_SOME, "r"); 531 if (fp) { 532 while (fgets(line, sizeof(line), fp)) { 533 if (strncmp(line, "some", 4) == 0) { 534 ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu", 535 &psi.memory_some_avg10, &psi.memory_some_avg60, 536 &psi.memory_some_avg300, &psi.memory_some_total); 537 if (ret != 4) 538 fprintf(stderr, "Failed to parse Memory some PSI data\n"); 539 } else if (strncmp(line, "full", 4) == 0) { 540 ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu", 541 &psi.memory_full_avg10, &psi.memory_full_avg60, 542 &psi.memory_full_avg300, &psi.memory_full_total); 543 } 544 if (ret != 4) 545 fprintf(stderr, "Failed to parse Memory full PSI data\n"); 546 } 547 fclose(fp); 548 } 549 /* IO pressure */ 550 fp = fopen(PSI_IO_SOME, "r"); 551 if (fp) { 552 while (fgets(line, sizeof(line), fp)) { 553 if (strncmp(line, "some", 4) == 0) { 554 ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu", 555 &psi.io_some_avg10, &psi.io_some_avg60, 556 &psi.io_some_avg300, &psi.io_some_total); 557 if (ret != 4) 558 fprintf(stderr, "Failed to parse IO some PSI data\n"); 559 } else if (strncmp(line, "full", 4) == 0) { 560 ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu", 561 &psi.io_full_avg10, &psi.io_full_avg60, 562 &psi.io_full_avg300, &psi.io_full_total); 563 if (ret != 4) 564 fprintf(stderr, "Failed to parse IO full PSI data\n"); 565 } 566 } 567 fclose(fp); 568 } 569 /* IRQ pressure (only full) */ 570 fp = fopen(PSI_IRQ_FULL, "r"); 571 if (fp) { 572 while (fgets(line, sizeof(line), fp)) { 573 if (strncmp(line, "full", 4) == 0) { 574 ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu", 575 &psi.irq_full_avg10, &psi.irq_full_avg60, 576 &psi.irq_full_avg300, &psi.irq_full_total); 577 if (ret != 4) 578 fprintf(stderr, "Failed to parse IRQ full PSI data\n"); 579 } 580 } 581 fclose(fp); 582 } 583 } 584 585 static int read_comm(int pid, char *comm_buf, size_t buf_size) 586 { 587 char path[64]; 588 int ret = -1; 589 size_t len; 590 FILE *fp; 591 592 snprintf(path, sizeof(path), "/proc/%d/comm", pid); 593 fp = fopen(path, "r"); 594 if (!fp) { 595 fprintf(stderr, "Failed to open comm file /proc/%d/comm\n", pid); 596 return ret; 597 } 598 599 if (fgets(comm_buf, buf_size, fp)) { 600 len = strlen(comm_buf); 601 if (len > 0 && comm_buf[len - 1] == '\n') 602 comm_buf[len - 1] = '\0'; 603 ret = 0; 604 } 605 606 fclose(fp); 607 608 return ret; 609 } 610 611 static void fetch_and_fill_task_info(int pid, const char *comm) 612 { 613 struct { 614 struct nlmsghdr n; 615 struct genlmsghdr g; 616 char buf[MAX_MSG_SIZE]; 617 } resp; 618 struct taskstats stats; 619 struct nlattr *nested; 620 struct nlattr *na; 621 int nested_len; 622 int nl_len; 623 int rc; 624 625 /* Send request for task stats */ 626 if (send_cmd(nl_sd, family_id, getpid(), TASKSTATS_CMD_GET, 627 TASKSTATS_CMD_ATTR_PID, &pid, sizeof(pid)) < 0) { 628 fprintf(stderr, "Failed to send request for task stats\n"); 629 return; 630 } 631 632 /* Receive response */ 633 rc = recv(nl_sd, &resp, sizeof(resp), 0); 634 if (rc < 0 || resp.n.nlmsg_type == NLMSG_ERROR) { 635 fprintf(stderr, "Failed to receive response for task stats\n"); 636 return; 637 } 638 639 /* Parse response */ 640 nl_len = GENLMSG_PAYLOAD(&resp.n); 641 na = (struct nlattr *) GENLMSG_DATA(&resp); 642 while (nl_len > 0) { 643 if (na->nla_type == TASKSTATS_TYPE_AGGR_PID) { 644 nested = (struct nlattr *) NLA_DATA(na); 645 nested_len = NLA_PAYLOAD(na->nla_len); 646 while (nested_len > 0) { 647 if (nested->nla_type == TASKSTATS_TYPE_STATS) { 648 memcpy(&stats, NLA_DATA(nested), sizeof(stats)); 649 if (task_count < MAX_TASKS) { 650 tasks[task_count].pid = pid; 651 tasks[task_count].tgid = pid; 652 strncpy(tasks[task_count].command, comm, 653 TASK_COMM_LEN - 1); 654 tasks[task_count].command[TASK_COMM_LEN - 1] = '\0'; 655 SET_TASK_STAT(task_count, cpu_count); 656 SET_TASK_STAT(task_count, cpu_delay_total); 657 SET_TASK_STAT(task_count, blkio_count); 658 SET_TASK_STAT(task_count, blkio_delay_total); 659 SET_TASK_STAT(task_count, swapin_count); 660 SET_TASK_STAT(task_count, swapin_delay_total); 661 SET_TASK_STAT(task_count, freepages_count); 662 SET_TASK_STAT(task_count, freepages_delay_total); 663 SET_TASK_STAT(task_count, thrashing_count); 664 SET_TASK_STAT(task_count, thrashing_delay_total); 665 SET_TASK_STAT(task_count, compact_count); 666 SET_TASK_STAT(task_count, compact_delay_total); 667 SET_TASK_STAT(task_count, wpcopy_count); 668 SET_TASK_STAT(task_count, wpcopy_delay_total); 669 SET_TASK_STAT(task_count, irq_count); 670 SET_TASK_STAT(task_count, irq_delay_total); 671 set_mem_count(&tasks[task_count]); 672 set_mem_delay_total(&tasks[task_count]); 673 task_count++; 674 } 675 break; 676 } 677 nested_len -= NLA_ALIGN(nested->nla_len); 678 nested = NLA_NEXT(nested); 679 } 680 } 681 nl_len -= NLA_ALIGN(na->nla_len); 682 na = NLA_NEXT(na); 683 } 684 return; 685 } 686 687 static void get_task_delays(void) 688 { 689 char comm[TASK_COMM_LEN]; 690 struct dirent *entry; 691 DIR *dir; 692 int pid; 693 694 task_count = 0; 695 if (cfg.monitor_pid > 0) { 696 if (read_comm(cfg.monitor_pid, comm, sizeof(comm)) == 0) 697 fetch_and_fill_task_info(cfg.monitor_pid, comm); 698 return; 699 } 700 701 dir = opendir("/proc"); 702 if (!dir) { 703 fprintf(stderr, "Error opening /proc directory\n"); 704 return; 705 } 706 707 while ((entry = readdir(dir)) != NULL && task_count < MAX_TASKS) { 708 if (!isdigit(entry->d_name[0])) 709 continue; 710 pid = atoi(entry->d_name); 711 if (pid == 0) 712 continue; 713 if (read_comm(pid, comm, sizeof(comm)) != 0) 714 continue; 715 fetch_and_fill_task_info(pid, comm); 716 } 717 closedir(dir); 718 } 719 720 /* Calculate average delay in milliseconds */ 721 static double average_ms(unsigned long long total, unsigned long long count) 722 { 723 if (count == 0) 724 return 0; 725 return (double)total / 1000000.0 / count; 726 } 727 728 /* Comparison function for sorting tasks */ 729 static int compare_tasks(const void *a, const void *b) 730 { 731 const struct task_info *t1 = (const struct task_info *)a; 732 const struct task_info *t2 = (const struct task_info *)b; 733 unsigned long long total1; 734 unsigned long long total2; 735 unsigned long count1; 736 unsigned long count2; 737 double avg1, avg2; 738 739 total1 = *(unsigned long long *)((char *)t1 + cfg.sort_field->total_offset); 740 total2 = *(unsigned long long *)((char *)t2 + cfg.sort_field->total_offset); 741 count1 = *(unsigned long *)((char *)t1 + cfg.sort_field->count_offset); 742 count2 = *(unsigned long *)((char *)t2 + cfg.sort_field->count_offset); 743 744 avg1 = average_ms(total1, count1); 745 avg2 = average_ms(total2, count2); 746 if (avg1 != avg2) 747 return avg2 > avg1 ? 1 : -1; 748 749 return 0; 750 } 751 752 /* Sort tasks by selected field */ 753 static void sort_tasks(void) 754 { 755 if (task_count > 0) 756 qsort(tasks, task_count, sizeof(struct task_info), compare_tasks); 757 } 758 759 /* Get container statistics via cgroupstats */ 760 static void get_container_stats(void) 761 { 762 int rc, cfd; 763 struct { 764 struct nlmsghdr n; 765 struct genlmsghdr g; 766 char buf[MAX_MSG_SIZE]; 767 } req, resp; 768 struct nlattr *na; 769 int nl_len; 770 struct cgroupstats stats; 771 772 /* Check if container path is set */ 773 if (!cfg.container_path) 774 return; 775 776 /* Open container cgroup */ 777 cfd = open(cfg.container_path, O_RDONLY); 778 if (cfd < 0) { 779 fprintf(stderr, "Error opening container path: %s\n", cfg.container_path); 780 return; 781 } 782 783 /* Send request for container stats */ 784 if (send_cmd(nl_sd, family_id, getpid(), CGROUPSTATS_CMD_GET, 785 CGROUPSTATS_CMD_ATTR_FD, &cfd, sizeof(__u32)) < 0) { 786 fprintf(stderr, "Failed to send request for container stats\n"); 787 close(cfd); 788 return; 789 } 790 791 /* Receive response */ 792 rc = recv(nl_sd, &resp, sizeof(resp), 0); 793 if (rc < 0 || resp.n.nlmsg_type == NLMSG_ERROR) { 794 fprintf(stderr, "Failed to receive response for container stats\n"); 795 close(cfd); 796 return; 797 } 798 799 /* Parse response */ 800 nl_len = GENLMSG_PAYLOAD(&resp.n); 801 na = (struct nlattr *) GENLMSG_DATA(&resp); 802 while (nl_len > 0) { 803 if (na->nla_type == CGROUPSTATS_TYPE_CGROUP_STATS) { 804 /* Get the cgroupstats structure */ 805 memcpy(&stats, NLA_DATA(na), sizeof(stats)); 806 807 /* Fill container stats */ 808 container_stats.nr_sleeping = stats.nr_sleeping; 809 container_stats.nr_running = stats.nr_running; 810 container_stats.nr_stopped = stats.nr_stopped; 811 container_stats.nr_uninterruptible = stats.nr_uninterruptible; 812 container_stats.nr_io_wait = stats.nr_io_wait; 813 break; 814 } 815 nl_len -= NLA_ALIGN(na->nla_len); 816 na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len)); 817 } 818 819 close(cfd); 820 } 821 822 /* Display results to stdout or log file */ 823 static void display_results(void) 824 { 825 time_t now = time(NULL); 826 struct tm *tm_now = localtime(&now); 827 FILE *out = stdout; 828 char timestamp[32]; 829 bool suc = true; 830 int i, count; 831 832 /* Clear terminal screen */ 833 suc &= BOOL_FPRINT(out, "\033[H\033[J"); 834 835 /* PSI output (one-line, no cat style) */ 836 suc &= BOOL_FPRINT(out, "System Pressure Information: (avg10/avg60/avg300/total)\n"); 837 suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, 838 "CPU some:", 839 psi.cpu_some_avg10, 840 psi.cpu_some_avg60, 841 psi.cpu_some_avg300, 842 psi.cpu_some_total / 1000); 843 suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, 844 "CPU full:", 845 psi.cpu_full_avg10, 846 psi.cpu_full_avg60, 847 psi.cpu_full_avg300, 848 psi.cpu_full_total / 1000); 849 suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, 850 "Memory full:", 851 psi.memory_full_avg10, 852 psi.memory_full_avg60, 853 psi.memory_full_avg300, 854 psi.memory_full_total / 1000); 855 suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, 856 "Memory some:", 857 psi.memory_some_avg10, 858 psi.memory_some_avg60, 859 psi.memory_some_avg300, 860 psi.memory_some_total / 1000); 861 suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, 862 "IO full:", 863 psi.io_full_avg10, 864 psi.io_full_avg60, 865 psi.io_full_avg300, 866 psi.io_full_total / 1000); 867 suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, 868 "IO some:", 869 psi.io_some_avg10, 870 psi.io_some_avg60, 871 psi.io_some_avg300, 872 psi.io_some_total / 1000); 873 suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, 874 "IRQ full:", 875 psi.irq_full_avg10, 876 psi.irq_full_avg60, 877 psi.irq_full_avg300, 878 psi.irq_full_total / 1000); 879 880 if (cfg.container_path) { 881 suc &= BOOL_FPRINT(out, "Container Information (%s):\n", cfg.container_path); 882 suc &= BOOL_FPRINT(out, "Processes: running=%d, sleeping=%d, ", 883 container_stats.nr_running, container_stats.nr_sleeping); 884 suc &= BOOL_FPRINT(out, "stopped=%d, uninterruptible=%d, io_wait=%d\n\n", 885 container_stats.nr_stopped, container_stats.nr_uninterruptible, 886 container_stats.nr_io_wait); 887 } 888 889 /* Interacive command */ 890 suc &= BOOL_FPRINT(out, "[o]sort [M]memverbose [q]quit\n"); 891 if (sort_selected) { 892 if (cfg.display_mode == MODE_MEMVERBOSE) 893 suc &= BOOL_FPRINT(out, 894 "sort selection: [m]MEM [r]RCL [t]THR [p]CMP [w]WP\n"); 895 else 896 suc &= BOOL_FPRINT(out, 897 "sort selection: [c]CPU [i]IO [m]MEM [q]IRQ\n"); 898 } 899 900 /* Task delay output */ 901 suc &= BOOL_FPRINT(out, "Top %d processes (sorted by %s delay):\n", 902 cfg.max_processes, get_name_by_field(cfg.sort_field)); 903 904 suc &= BOOL_FPRINT(out, "%8s %8s %-17s", "PID", "TGID", "COMMAND"); 905 if (cfg.display_mode == MODE_MEMVERBOSE) { 906 suc &= BOOL_FPRINT(out, "%8s %8s %8s %8s %8s %8s\n", 907 "MEM(ms)", "SWAP(ms)", "RCL(ms)", 908 "THR(ms)", "CMP(ms)", "WP(ms)"); 909 suc &= BOOL_FPRINT(out, "-----------------------"); 910 suc &= BOOL_FPRINT(out, "-----------------------"); 911 suc &= BOOL_FPRINT(out, "-----------------------"); 912 suc &= BOOL_FPRINT(out, "---------------------\n"); 913 } else { 914 suc &= BOOL_FPRINT(out, "%8s %8s %8s %8s\n", 915 "CPU(ms)", "IO(ms)", "IRQ(ms)", "MEM(ms)"); 916 suc &= BOOL_FPRINT(out, "-----------------------"); 917 suc &= BOOL_FPRINT(out, "-----------------------"); 918 suc &= BOOL_FPRINT(out, "--------------------------\n"); 919 } 920 921 count = task_count < cfg.max_processes ? task_count : cfg.max_processes; 922 923 for (i = 0; i < count; i++) { 924 suc &= BOOL_FPRINT(out, "%8d %8d %-15s", 925 tasks[i].pid, tasks[i].tgid, tasks[i].command); 926 if (cfg.display_mode == MODE_MEMVERBOSE) { 927 suc &= BOOL_FPRINT(out, DELAY_FMT_MEMVERBOSE, 928 TASK_AVG(tasks[i], mem), 929 TASK_AVG(tasks[i], swapin), 930 TASK_AVG(tasks[i], freepages), 931 TASK_AVG(tasks[i], thrashing), 932 TASK_AVG(tasks[i], compact), 933 TASK_AVG(tasks[i], wpcopy)); 934 } else { 935 suc &= BOOL_FPRINT(out, DELAY_FMT_DEFAULT, 936 TASK_AVG(tasks[i], cpu), 937 TASK_AVG(tasks[i], blkio), 938 TASK_AVG(tasks[i], irq), 939 TASK_AVG(tasks[i], mem)); 940 } 941 } 942 943 suc &= BOOL_FPRINT(out, "\n"); 944 945 if (!suc) 946 perror("Error writing to output"); 947 } 948 949 /* Check for keyboard input with timeout based on cfg.delay */ 950 static char check_for_keypress(void) 951 { 952 struct timeval tv = {cfg.delay, 0}; 953 fd_set readfds; 954 char ch = 0; 955 956 FD_ZERO(&readfds); 957 FD_SET(STDIN_FILENO, &readfds); 958 int r = select(STDIN_FILENO + 1, &readfds, NULL, NULL, &tv); 959 960 if (r > 0 && FD_ISSET(STDIN_FILENO, &readfds)) { 961 read(STDIN_FILENO, &ch, 1); 962 return ch; 963 } 964 965 return 0; 966 } 967 968 #define MAX_MODE_SIZE 2 969 static void toggle_display_mode(void) 970 { 971 static const size_t modes[MAX_MODE_SIZE] = {MODE_DEFAULT, MODE_MEMVERBOSE}; 972 static size_t cur_index; 973 974 cur_index = (cur_index + 1) % MAX_MODE_SIZE; 975 cfg.display_mode = modes[cur_index]; 976 } 977 978 /* Handle keyboard input: sorting selection, mode toggle, or quit */ 979 static void handle_keypress(char ch, int *running) 980 { 981 const struct field_desc *field; 982 983 /* Change sort field */ 984 if (sort_selected) { 985 field = get_field_by_cmd_char(ch); 986 if (field && (field->supported_modes & cfg.display_mode)) 987 cfg.sort_field = field; 988 989 sort_selected = 0; 990 /* Handle mode changes or quit */ 991 } else { 992 switch (ch) { 993 case 'o': 994 sort_selected = 1; 995 break; 996 case 'M': 997 toggle_display_mode(); 998 for (field = sort_fields; field->name != NULL; field++) { 999 if (field->supported_modes & cfg.display_mode) { 1000 cfg.sort_field = field; 1001 break; 1002 } 1003 } 1004 break; 1005 case 'q': 1006 case 'Q': 1007 *running = 0; 1008 break; 1009 default: 1010 break; 1011 } 1012 } 1013 } 1014 1015 /* Main function */ 1016 int main(int argc, char **argv) 1017 { 1018 const struct field_desc *field; 1019 int iterations = 0; 1020 char keypress; 1021 1022 /* Parse command line arguments */ 1023 parse_args(argc, argv); 1024 1025 /* Setup netlink socket */ 1026 nl_sd = create_nl_socket(); 1027 if (nl_sd < 0) { 1028 fprintf(stderr, "Error creating netlink socket\n"); 1029 exit(1); 1030 } 1031 1032 /* Get family ID for taskstats via netlink */ 1033 family_id = get_family_id(nl_sd); 1034 if (!family_id) { 1035 fprintf(stderr, "Error getting taskstats family ID\n"); 1036 close(nl_sd); 1037 exit(1); 1038 } 1039 1040 /* Set terminal to non-canonical mode for interaction */ 1041 enable_raw_mode(); 1042 1043 /* Main loop */ 1044 while (running) { 1045 /* Auto-switch sort field when not matching display mode */ 1046 if (!(cfg.sort_field->supported_modes & cfg.display_mode)) { 1047 for (field = sort_fields; field->name != NULL; field++) { 1048 if (field->supported_modes & cfg.display_mode) { 1049 cfg.sort_field = field; 1050 printf("Auto-switched sort field to: %s\n", field->name); 1051 break; 1052 } 1053 } 1054 } 1055 1056 /* Read PSI statistics */ 1057 read_psi_stats(); 1058 1059 /* Get container stats if container path provided */ 1060 if (cfg.container_path) 1061 get_container_stats(); 1062 1063 /* Get task delays */ 1064 get_task_delays(); 1065 1066 /* Sort tasks */ 1067 sort_tasks(); 1068 1069 /* Display results to stdout or log file */ 1070 display_results(); 1071 1072 /* Check for iterations */ 1073 if (cfg.iterations > 0 && ++iterations >= cfg.iterations) 1074 break; 1075 1076 /* Exit if output_one_time is set */ 1077 if (cfg.output_one_time) 1078 break; 1079 1080 /* Keypress for interactive usage */ 1081 keypress = check_for_keypress(); 1082 if (keypress) 1083 handle_keypress(keypress, &running); 1084 } 1085 1086 /* Restore terminal mode */ 1087 disable_raw_mode(); 1088 1089 /* Cleanup */ 1090 close(nl_sd); 1091 if (cfg.container_path) 1092 free(cfg.container_path); 1093 1094 return 0; 1095 } 1096