1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * delaytop.c - system-wide delay monitoring tool. 4 * 5 * This tool provides real-time monitoring and statistics of 6 * system, container, and task-level delays, including CPU, 7 * memory, IO, and IRQ. It supports both interactive (top-like), 8 * and can output delay information for the whole system, specific 9 * containers (cgroups), or individual tasks (PIDs). 10 * 11 * Key features: 12 * - Collects per-task delay accounting statistics via taskstats. 13 * - Collects system-wide PSI information. 14 * - Supports sorting, filtering. 15 * - Supports both interactive (screen refresh). 16 * 17 * Copyright (C) Fan Yu, ZTE Corp. 2025 18 * Copyright (C) Wang Yaxin, ZTE Corp. 2025 19 * 20 * Compile with 21 * gcc -I/usr/src/linux/include delaytop.c -o delaytop 22 */ 23 24 #include <stdio.h> 25 #include <stdlib.h> 26 #include <string.h> 27 #include <errno.h> 28 #include <unistd.h> 29 #include <fcntl.h> 30 #include <getopt.h> 31 #include <signal.h> 32 #include <time.h> 33 #include <dirent.h> 34 #include <ctype.h> 35 #include <stdbool.h> 36 #include <sys/types.h> 37 #include <sys/stat.h> 38 #include <sys/socket.h> 39 #include <sys/select.h> 40 #include <termios.h> 41 #include <limits.h> 42 #include <linux/genetlink.h> 43 #include <linux/taskstats.h> 44 #include <linux/cgroupstats.h> 45 46 #define PSI_CPU_SOME "/proc/pressure/cpu" 47 #define PSI_CPU_FULL "/proc/pressure/cpu" 48 #define PSI_MEMORY_SOME "/proc/pressure/memory" 49 #define PSI_MEMORY_FULL "/proc/pressure/memory" 50 #define PSI_IO_SOME "/proc/pressure/io" 51 #define PSI_IO_FULL "/proc/pressure/io" 52 #define PSI_IRQ_FULL "/proc/pressure/irq" 53 54 #define NLA_NEXT(na) ((struct nlattr *)((char *)(na) + NLA_ALIGN((na)->nla_len))) 55 #define NLA_DATA(na) ((void *)((char *)(na) + NLA_HDRLEN)) 56 #define NLA_PAYLOAD(len) (len - NLA_HDRLEN) 57 58 #define GENLMSG_DATA(glh) ((void *)(NLMSG_DATA(glh) + GENL_HDRLEN)) 59 #define GENLMSG_PAYLOAD(glh) (NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN) 60 61 #define TASK_COMM_LEN 16 62 #define MAX_MSG_SIZE 1024 63 #define MAX_TASKS 1000 64 #define SET_TASK_STAT(task_count, field) tasks[task_count].field = stats.field 65 #define BOOL_FPRINT(stream, fmt, ...) \ 66 ({ \ 67 int ret = fprintf(stream, fmt, ##__VA_ARGS__); \ 68 ret >= 0; \ 69 }) 70 #define PSI_LINE_FORMAT "%-12s %6.1f%%/%6.1f%%/%6.1f%%/%8llu(ms)\n" 71 72 /* Program settings structure */ 73 struct config { 74 int delay; /* Update interval in seconds */ 75 int iterations; /* Number of iterations, 0 == infinite */ 76 int max_processes; /* Maximum number of processes to show */ 77 char sort_field; /* Field to sort by */ 78 int output_one_time; /* Output once and exit */ 79 int monitor_pid; /* Monitor specific PID */ 80 char *container_path; /* Path to container cgroup */ 81 }; 82 83 /* PSI statistics structure */ 84 struct psi_stats { 85 double cpu_some_avg10, cpu_some_avg60, cpu_some_avg300; 86 unsigned long long cpu_some_total; 87 double cpu_full_avg10, cpu_full_avg60, cpu_full_avg300; 88 unsigned long long cpu_full_total; 89 double memory_some_avg10, memory_some_avg60, memory_some_avg300; 90 unsigned long long memory_some_total; 91 double memory_full_avg10, memory_full_avg60, memory_full_avg300; 92 unsigned long long memory_full_total; 93 double io_some_avg10, io_some_avg60, io_some_avg300; 94 unsigned long long io_some_total; 95 double io_full_avg10, io_full_avg60, io_full_avg300; 96 unsigned long long io_full_total; 97 double irq_full_avg10, irq_full_avg60, irq_full_avg300; 98 unsigned long long irq_full_total; 99 }; 100 101 /* Task delay information structure */ 102 struct task_info { 103 int pid; 104 int tgid; 105 char command[TASK_COMM_LEN]; 106 unsigned long long cpu_count; 107 unsigned long long cpu_delay_total; 108 unsigned long long blkio_count; 109 unsigned long long blkio_delay_total; 110 unsigned long long swapin_count; 111 unsigned long long swapin_delay_total; 112 unsigned long long freepages_count; 113 unsigned long long freepages_delay_total; 114 unsigned long long thrashing_count; 115 unsigned long long thrashing_delay_total; 116 unsigned long long compact_count; 117 unsigned long long compact_delay_total; 118 unsigned long long wpcopy_count; 119 unsigned long long wpcopy_delay_total; 120 unsigned long long irq_count; 121 unsigned long long irq_delay_total; 122 }; 123 124 /* Container statistics structure */ 125 struct container_stats { 126 int nr_sleeping; /* Number of sleeping processes */ 127 int nr_running; /* Number of running processes */ 128 int nr_stopped; /* Number of stopped processes */ 129 int nr_uninterruptible; /* Number of uninterruptible processes */ 130 int nr_io_wait; /* Number of processes in IO wait */ 131 }; 132 133 /* Global variables */ 134 static struct config cfg; 135 static struct psi_stats psi; 136 static struct task_info tasks[MAX_TASKS]; 137 static int task_count; 138 static int running = 1; 139 static struct container_stats container_stats; 140 141 /* Netlink socket variables */ 142 static int nl_sd = -1; 143 static int family_id; 144 145 /* Set terminal to non-canonical mode for q-to-quit */ 146 static struct termios orig_termios; 147 static void enable_raw_mode(void) 148 { 149 struct termios raw; 150 151 tcgetattr(STDIN_FILENO, &orig_termios); 152 raw = orig_termios; 153 raw.c_lflag &= ~(ICANON | ECHO); 154 tcsetattr(STDIN_FILENO, TCSAFLUSH, &raw); 155 } 156 static void disable_raw_mode(void) 157 { 158 tcsetattr(STDIN_FILENO, TCSAFLUSH, &orig_termios); 159 } 160 161 /* Display usage information and command line options */ 162 static void usage(void) 163 { 164 printf("Usage: delaytop [Options]\n" 165 "Options:\n" 166 " -h, --help Show this help message and exit\n" 167 " -d, --delay=SECONDS Set refresh interval (default: 2 seconds, min: 1)\n" 168 " -n, --iterations=COUNT Set number of updates (default: 0 = infinite)\n" 169 " -P, --processes=NUMBER Set maximum number of processes to show (default: 20, max: 1000)\n" 170 " -o, --once Display once and exit\n" 171 " -p, --pid=PID Monitor only the specified PID\n" 172 " -C, --container=PATH Monitor the container at specified cgroup path\n"); 173 exit(0); 174 } 175 176 /* Parse command line arguments and set configuration */ 177 static void parse_args(int argc, char **argv) 178 { 179 int c; 180 struct option long_options[] = { 181 {"help", no_argument, 0, 'h'}, 182 {"delay", required_argument, 0, 'd'}, 183 {"iterations", required_argument, 0, 'n'}, 184 {"pid", required_argument, 0, 'p'}, 185 {"once", no_argument, 0, 'o'}, 186 {"processes", required_argument, 0, 'P'}, 187 {"container", required_argument, 0, 'C'}, 188 {0, 0, 0, 0} 189 }; 190 191 /* Set defaults */ 192 cfg.delay = 2; 193 cfg.iterations = 0; 194 cfg.max_processes = 20; 195 cfg.sort_field = 'c'; /* Default sort by CPU delay */ 196 cfg.output_one_time = 0; 197 cfg.monitor_pid = 0; /* 0 means monitor all PIDs */ 198 cfg.container_path = NULL; 199 200 while (1) { 201 int option_index = 0; 202 203 c = getopt_long(argc, argv, "hd:n:p:oP:C:", long_options, &option_index); 204 if (c == -1) 205 break; 206 207 switch (c) { 208 case 'h': 209 usage(); 210 break; 211 case 'd': 212 cfg.delay = atoi(optarg); 213 if (cfg.delay < 1) { 214 fprintf(stderr, "Error: delay must be >= 1.\n"); 215 exit(1); 216 } 217 break; 218 case 'n': 219 cfg.iterations = atoi(optarg); 220 if (cfg.iterations < 0) { 221 fprintf(stderr, "Error: iterations must be >= 0.\n"); 222 exit(1); 223 } 224 break; 225 case 'p': 226 cfg.monitor_pid = atoi(optarg); 227 if (cfg.monitor_pid < 1) { 228 fprintf(stderr, "Error: pid must be >= 1.\n"); 229 exit(1); 230 } 231 break; 232 case 'o': 233 cfg.output_one_time = 1; 234 break; 235 case 'P': 236 cfg.max_processes = atoi(optarg); 237 if (cfg.max_processes < 1) { 238 fprintf(stderr, "Error: processes must be >= 1.\n"); 239 exit(1); 240 } 241 if (cfg.max_processes > MAX_TASKS) { 242 fprintf(stderr, "Warning: processes capped to %d.\n", 243 MAX_TASKS); 244 cfg.max_processes = MAX_TASKS; 245 } 246 break; 247 case 'C': 248 cfg.container_path = strdup(optarg); 249 break; 250 default: 251 fprintf(stderr, "Try 'delaytop --help' for more information.\n"); 252 exit(1); 253 } 254 } 255 } 256 257 /* Create a raw netlink socket and bind */ 258 static int create_nl_socket(void) 259 { 260 int fd; 261 struct sockaddr_nl local; 262 263 fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); 264 if (fd < 0) 265 return -1; 266 267 memset(&local, 0, sizeof(local)); 268 local.nl_family = AF_NETLINK; 269 270 if (bind(fd, (struct sockaddr *) &local, sizeof(local)) < 0) { 271 fprintf(stderr, "Failed to bind socket when create nl_socket\n"); 272 close(fd); 273 return -1; 274 } 275 276 return fd; 277 } 278 279 /* Send a command via netlink */ 280 static int send_cmd(int sd, __u16 nlmsg_type, __u32 nlmsg_pid, 281 __u8 genl_cmd, __u16 nla_type, 282 void *nla_data, int nla_len) 283 { 284 struct sockaddr_nl nladdr; 285 struct nlattr *na; 286 int r, buflen; 287 char *buf; 288 289 struct { 290 struct nlmsghdr n; 291 struct genlmsghdr g; 292 char buf[MAX_MSG_SIZE]; 293 } msg; 294 295 msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN); 296 msg.n.nlmsg_type = nlmsg_type; 297 msg.n.nlmsg_flags = NLM_F_REQUEST; 298 msg.n.nlmsg_seq = 0; 299 msg.n.nlmsg_pid = nlmsg_pid; 300 msg.g.cmd = genl_cmd; 301 msg.g.version = 0x1; 302 na = (struct nlattr *) GENLMSG_DATA(&msg); 303 na->nla_type = nla_type; 304 na->nla_len = nla_len + NLA_HDRLEN; 305 memcpy(NLA_DATA(na), nla_data, nla_len); 306 msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len); 307 308 buf = (char *) &msg; 309 buflen = msg.n.nlmsg_len; 310 memset(&nladdr, 0, sizeof(nladdr)); 311 nladdr.nl_family = AF_NETLINK; 312 while ((r = sendto(sd, buf, buflen, 0, (struct sockaddr *) &nladdr, 313 sizeof(nladdr))) < buflen) { 314 if (r > 0) { 315 buf += r; 316 buflen -= r; 317 } else if (errno != EAGAIN) 318 return -1; 319 } 320 return 0; 321 } 322 323 /* Get family ID for taskstats via netlink */ 324 static int get_family_id(int sd) 325 { 326 struct { 327 struct nlmsghdr n; 328 struct genlmsghdr g; 329 char buf[256]; 330 } ans; 331 332 int id = 0, rc; 333 struct nlattr *na; 334 int rep_len; 335 char name[100]; 336 337 strncpy(name, TASKSTATS_GENL_NAME, sizeof(name) - 1); 338 name[sizeof(name) - 1] = '\0'; 339 rc = send_cmd(sd, GENL_ID_CTRL, getpid(), CTRL_CMD_GETFAMILY, 340 CTRL_ATTR_FAMILY_NAME, (void *)name, 341 strlen(TASKSTATS_GENL_NAME)+1); 342 if (rc < 0) { 343 fprintf(stderr, "Failed to send cmd for family id\n"); 344 return 0; 345 } 346 347 rep_len = recv(sd, &ans, sizeof(ans), 0); 348 if (ans.n.nlmsg_type == NLMSG_ERROR || 349 (rep_len < 0) || !NLMSG_OK((&ans.n), rep_len)) { 350 fprintf(stderr, "Failed to receive response for family id\n"); 351 return 0; 352 } 353 354 na = (struct nlattr *) GENLMSG_DATA(&ans); 355 na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len)); 356 if (na->nla_type == CTRL_ATTR_FAMILY_ID) 357 id = *(__u16 *) NLA_DATA(na); 358 return id; 359 } 360 361 static void read_psi_stats(void) 362 { 363 FILE *fp; 364 char line[256]; 365 int ret = 0; 366 /* Zero all fields */ 367 memset(&psi, 0, sizeof(psi)); 368 /* CPU pressure */ 369 fp = fopen(PSI_CPU_SOME, "r"); 370 if (fp) { 371 while (fgets(line, sizeof(line), fp)) { 372 if (strncmp(line, "some", 4) == 0) { 373 ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu", 374 &psi.cpu_some_avg10, &psi.cpu_some_avg60, 375 &psi.cpu_some_avg300, &psi.cpu_some_total); 376 if (ret != 4) 377 fprintf(stderr, "Failed to parse CPU some PSI data\n"); 378 } else if (strncmp(line, "full", 4) == 0) { 379 ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu", 380 &psi.cpu_full_avg10, &psi.cpu_full_avg60, 381 &psi.cpu_full_avg300, &psi.cpu_full_total); 382 if (ret != 4) 383 fprintf(stderr, "Failed to parse CPU full PSI data\n"); 384 } 385 } 386 fclose(fp); 387 } 388 /* Memory pressure */ 389 fp = fopen(PSI_MEMORY_SOME, "r"); 390 if (fp) { 391 while (fgets(line, sizeof(line), fp)) { 392 if (strncmp(line, "some", 4) == 0) { 393 ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu", 394 &psi.memory_some_avg10, &psi.memory_some_avg60, 395 &psi.memory_some_avg300, &psi.memory_some_total); 396 if (ret != 4) 397 fprintf(stderr, "Failed to parse Memory some PSI data\n"); 398 } else if (strncmp(line, "full", 4) == 0) { 399 ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu", 400 &psi.memory_full_avg10, &psi.memory_full_avg60, 401 &psi.memory_full_avg300, &psi.memory_full_total); 402 } 403 if (ret != 4) 404 fprintf(stderr, "Failed to parse Memory full PSI data\n"); 405 } 406 fclose(fp); 407 } 408 /* IO pressure */ 409 fp = fopen(PSI_IO_SOME, "r"); 410 if (fp) { 411 while (fgets(line, sizeof(line), fp)) { 412 if (strncmp(line, "some", 4) == 0) { 413 ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu", 414 &psi.io_some_avg10, &psi.io_some_avg60, 415 &psi.io_some_avg300, &psi.io_some_total); 416 if (ret != 4) 417 fprintf(stderr, "Failed to parse IO some PSI data\n"); 418 } else if (strncmp(line, "full", 4) == 0) { 419 ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu", 420 &psi.io_full_avg10, &psi.io_full_avg60, 421 &psi.io_full_avg300, &psi.io_full_total); 422 if (ret != 4) 423 fprintf(stderr, "Failed to parse IO full PSI data\n"); 424 } 425 } 426 fclose(fp); 427 } 428 /* IRQ pressure (only full) */ 429 fp = fopen(PSI_IRQ_FULL, "r"); 430 if (fp) { 431 while (fgets(line, sizeof(line), fp)) { 432 if (strncmp(line, "full", 4) == 0) { 433 ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu", 434 &psi.irq_full_avg10, &psi.irq_full_avg60, 435 &psi.irq_full_avg300, &psi.irq_full_total); 436 if (ret != 4) 437 fprintf(stderr, "Failed to parse IRQ full PSI data\n"); 438 } 439 } 440 fclose(fp); 441 } 442 } 443 444 static int read_comm(int pid, char *comm_buf, size_t buf_size) 445 { 446 char path[64]; 447 int ret = -1; 448 size_t len; 449 FILE *fp; 450 451 snprintf(path, sizeof(path), "/proc/%d/comm", pid); 452 fp = fopen(path, "r"); 453 if (!fp) { 454 fprintf(stderr, "Failed to open comm file /proc/%d/comm\n", pid); 455 return ret; 456 } 457 458 if (fgets(comm_buf, buf_size, fp)) { 459 len = strlen(comm_buf); 460 if (len > 0 && comm_buf[len - 1] == '\n') 461 comm_buf[len - 1] = '\0'; 462 ret = 0; 463 } 464 465 fclose(fp); 466 467 return ret; 468 } 469 470 static void fetch_and_fill_task_info(int pid, const char *comm) 471 { 472 struct { 473 struct nlmsghdr n; 474 struct genlmsghdr g; 475 char buf[MAX_MSG_SIZE]; 476 } resp; 477 struct taskstats stats; 478 struct nlattr *nested; 479 struct nlattr *na; 480 int nested_len; 481 int nl_len; 482 int rc; 483 484 /* Send request for task stats */ 485 if (send_cmd(nl_sd, family_id, getpid(), TASKSTATS_CMD_GET, 486 TASKSTATS_CMD_ATTR_PID, &pid, sizeof(pid)) < 0) { 487 fprintf(stderr, "Failed to send request for task stats\n"); 488 return; 489 } 490 491 /* Receive response */ 492 rc = recv(nl_sd, &resp, sizeof(resp), 0); 493 if (rc < 0 || resp.n.nlmsg_type == NLMSG_ERROR) { 494 fprintf(stderr, "Failed to receive response for task stats\n"); 495 return; 496 } 497 498 /* Parse response */ 499 nl_len = GENLMSG_PAYLOAD(&resp.n); 500 na = (struct nlattr *) GENLMSG_DATA(&resp); 501 while (nl_len > 0) { 502 if (na->nla_type == TASKSTATS_TYPE_AGGR_PID) { 503 nested = (struct nlattr *) NLA_DATA(na); 504 nested_len = NLA_PAYLOAD(na->nla_len); 505 while (nested_len > 0) { 506 if (nested->nla_type == TASKSTATS_TYPE_STATS) { 507 memcpy(&stats, NLA_DATA(nested), sizeof(stats)); 508 if (task_count < MAX_TASKS) { 509 tasks[task_count].pid = pid; 510 tasks[task_count].tgid = pid; 511 strncpy(tasks[task_count].command, comm, 512 TASK_COMM_LEN - 1); 513 tasks[task_count].command[TASK_COMM_LEN - 1] = '\0'; 514 SET_TASK_STAT(task_count, cpu_count); 515 SET_TASK_STAT(task_count, cpu_delay_total); 516 SET_TASK_STAT(task_count, blkio_count); 517 SET_TASK_STAT(task_count, blkio_delay_total); 518 SET_TASK_STAT(task_count, swapin_count); 519 SET_TASK_STAT(task_count, swapin_delay_total); 520 SET_TASK_STAT(task_count, freepages_count); 521 SET_TASK_STAT(task_count, freepages_delay_total); 522 SET_TASK_STAT(task_count, thrashing_count); 523 SET_TASK_STAT(task_count, thrashing_delay_total); 524 SET_TASK_STAT(task_count, compact_count); 525 SET_TASK_STAT(task_count, compact_delay_total); 526 SET_TASK_STAT(task_count, wpcopy_count); 527 SET_TASK_STAT(task_count, wpcopy_delay_total); 528 SET_TASK_STAT(task_count, irq_count); 529 SET_TASK_STAT(task_count, irq_delay_total); 530 task_count++; 531 } 532 break; 533 } 534 nested_len -= NLA_ALIGN(nested->nla_len); 535 nested = NLA_NEXT(nested); 536 } 537 } 538 nl_len -= NLA_ALIGN(na->nla_len); 539 na = NLA_NEXT(na); 540 } 541 return; 542 } 543 544 static void get_task_delays(void) 545 { 546 char comm[TASK_COMM_LEN]; 547 struct dirent *entry; 548 DIR *dir; 549 int pid; 550 551 task_count = 0; 552 if (cfg.monitor_pid > 0) { 553 if (read_comm(cfg.monitor_pid, comm, sizeof(comm)) == 0) 554 fetch_and_fill_task_info(cfg.monitor_pid, comm); 555 return; 556 } 557 558 dir = opendir("/proc"); 559 if (!dir) { 560 fprintf(stderr, "Error opening /proc directory\n"); 561 return; 562 } 563 564 while ((entry = readdir(dir)) != NULL && task_count < MAX_TASKS) { 565 if (!isdigit(entry->d_name[0])) 566 continue; 567 pid = atoi(entry->d_name); 568 if (pid == 0) 569 continue; 570 if (read_comm(pid, comm, sizeof(comm)) != 0) 571 continue; 572 fetch_and_fill_task_info(pid, comm); 573 } 574 closedir(dir); 575 } 576 577 /* Calculate average delay in milliseconds */ 578 static double average_ms(unsigned long long total, unsigned long long count) 579 { 580 if (count == 0) 581 return 0; 582 return (double)total / 1000000.0 / count; 583 } 584 585 /* Comparison function for sorting tasks */ 586 static int compare_tasks(const void *a, const void *b) 587 { 588 const struct task_info *t1 = (const struct task_info *)a; 589 const struct task_info *t2 = (const struct task_info *)b; 590 double avg1, avg2; 591 592 switch (cfg.sort_field) { 593 case 'c': /* CPU */ 594 avg1 = average_ms(t1->cpu_delay_total, t1->cpu_count); 595 avg2 = average_ms(t2->cpu_delay_total, t2->cpu_count); 596 if (avg1 != avg2) 597 return avg2 > avg1 ? 1 : -1; 598 return t2->cpu_delay_total > t1->cpu_delay_total ? 1 : -1; 599 600 default: 601 return t2->cpu_delay_total > t1->cpu_delay_total ? 1 : -1; 602 } 603 } 604 605 /* Sort tasks by selected field */ 606 static void sort_tasks(void) 607 { 608 if (task_count > 0) 609 qsort(tasks, task_count, sizeof(struct task_info), compare_tasks); 610 } 611 612 /* Get container statistics via cgroupstats */ 613 static void get_container_stats(void) 614 { 615 int rc, cfd; 616 struct { 617 struct nlmsghdr n; 618 struct genlmsghdr g; 619 char buf[MAX_MSG_SIZE]; 620 } req, resp; 621 struct nlattr *na; 622 int nl_len; 623 struct cgroupstats stats; 624 625 /* Check if container path is set */ 626 if (!cfg.container_path) 627 return; 628 629 /* Open container cgroup */ 630 cfd = open(cfg.container_path, O_RDONLY); 631 if (cfd < 0) { 632 fprintf(stderr, "Error opening container path: %s\n", cfg.container_path); 633 return; 634 } 635 636 /* Send request for container stats */ 637 if (send_cmd(nl_sd, family_id, getpid(), CGROUPSTATS_CMD_GET, 638 CGROUPSTATS_CMD_ATTR_FD, &cfd, sizeof(__u32)) < 0) { 639 fprintf(stderr, "Failed to send request for container stats\n"); 640 close(cfd); 641 return; 642 } 643 644 /* Receive response */ 645 rc = recv(nl_sd, &resp, sizeof(resp), 0); 646 if (rc < 0 || resp.n.nlmsg_type == NLMSG_ERROR) { 647 fprintf(stderr, "Failed to receive response for container stats\n"); 648 close(cfd); 649 return; 650 } 651 652 /* Parse response */ 653 nl_len = GENLMSG_PAYLOAD(&resp.n); 654 na = (struct nlattr *) GENLMSG_DATA(&resp); 655 while (nl_len > 0) { 656 if (na->nla_type == CGROUPSTATS_TYPE_CGROUP_STATS) { 657 /* Get the cgroupstats structure */ 658 memcpy(&stats, NLA_DATA(na), sizeof(stats)); 659 660 /* Fill container stats */ 661 container_stats.nr_sleeping = stats.nr_sleeping; 662 container_stats.nr_running = stats.nr_running; 663 container_stats.nr_stopped = stats.nr_stopped; 664 container_stats.nr_uninterruptible = stats.nr_uninterruptible; 665 container_stats.nr_io_wait = stats.nr_io_wait; 666 break; 667 } 668 nl_len -= NLA_ALIGN(na->nla_len); 669 na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len)); 670 } 671 672 close(cfd); 673 } 674 675 /* Display results to stdout or log file */ 676 static void display_results(void) 677 { 678 time_t now = time(NULL); 679 struct tm *tm_now = localtime(&now); 680 FILE *out = stdout; 681 char timestamp[32]; 682 bool suc = true; 683 int i, count; 684 685 /* Clear terminal screen */ 686 suc &= BOOL_FPRINT(out, "\033[H\033[J"); 687 688 /* PSI output (one-line, no cat style) */ 689 suc &= BOOL_FPRINT(out, "System Pressure Information: (avg10/avg60/avg300/total)\n"); 690 suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, 691 "CPU some:", 692 psi.cpu_some_avg10, 693 psi.cpu_some_avg60, 694 psi.cpu_some_avg300, 695 psi.cpu_some_total / 1000); 696 suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, 697 "CPU full:", 698 psi.cpu_full_avg10, 699 psi.cpu_full_avg60, 700 psi.cpu_full_avg300, 701 psi.cpu_full_total / 1000); 702 suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, 703 "Memory full:", 704 psi.memory_full_avg10, 705 psi.memory_full_avg60, 706 psi.memory_full_avg300, 707 psi.memory_full_total / 1000); 708 suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, 709 "Memory some:", 710 psi.memory_some_avg10, 711 psi.memory_some_avg60, 712 psi.memory_some_avg300, 713 psi.memory_some_total / 1000); 714 suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, 715 "IO full:", 716 psi.io_full_avg10, 717 psi.io_full_avg60, 718 psi.io_full_avg300, 719 psi.io_full_total / 1000); 720 suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, 721 "IO some:", 722 psi.io_some_avg10, 723 psi.io_some_avg60, 724 psi.io_some_avg300, 725 psi.io_some_total / 1000); 726 suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT, 727 "IRQ full:", 728 psi.irq_full_avg10, 729 psi.irq_full_avg60, 730 psi.irq_full_avg300, 731 psi.irq_full_total / 1000); 732 733 if (cfg.container_path) { 734 suc &= BOOL_FPRINT(out, "Container Information (%s):\n", cfg.container_path); 735 suc &= BOOL_FPRINT(out, "Processes: running=%d, sleeping=%d, ", 736 container_stats.nr_running, container_stats.nr_sleeping); 737 suc &= BOOL_FPRINT(out, "stopped=%d, uninterruptible=%d, io_wait=%d\n\n", 738 container_stats.nr_stopped, container_stats.nr_uninterruptible, 739 container_stats.nr_io_wait); 740 } 741 suc &= BOOL_FPRINT(out, "Top %d processes (sorted by CPU delay):\n", 742 cfg.max_processes); 743 suc &= BOOL_FPRINT(out, "%5s %5s %-17s", "PID", "TGID", "COMMAND"); 744 suc &= BOOL_FPRINT(out, "%7s %7s %7s %7s %7s %7s %7s %7s\n", 745 "CPU(ms)", "IO(ms)", "SWAP(ms)", "RCL(ms)", 746 "THR(ms)", "CMP(ms)", "WP(ms)", "IRQ(ms)"); 747 748 suc &= BOOL_FPRINT(out, "-----------------------------------------------"); 749 suc &= BOOL_FPRINT(out, "----------------------------------------------\n"); 750 count = task_count < cfg.max_processes ? task_count : cfg.max_processes; 751 752 for (i = 0; i < count; i++) { 753 suc &= BOOL_FPRINT(out, "%5d %5d %-15s", 754 tasks[i].pid, tasks[i].tgid, tasks[i].command); 755 suc &= BOOL_FPRINT(out, "%7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f\n", 756 average_ms(tasks[i].cpu_delay_total, tasks[i].cpu_count), 757 average_ms(tasks[i].blkio_delay_total, tasks[i].blkio_count), 758 average_ms(tasks[i].swapin_delay_total, tasks[i].swapin_count), 759 average_ms(tasks[i].freepages_delay_total, tasks[i].freepages_count), 760 average_ms(tasks[i].thrashing_delay_total, tasks[i].thrashing_count), 761 average_ms(tasks[i].compact_delay_total, tasks[i].compact_count), 762 average_ms(tasks[i].wpcopy_delay_total, tasks[i].wpcopy_count), 763 average_ms(tasks[i].irq_delay_total, tasks[i].irq_count)); 764 } 765 766 suc &= BOOL_FPRINT(out, "\n"); 767 768 if (!suc) 769 perror("Error writing to output"); 770 } 771 772 /* Main function */ 773 int main(int argc, char **argv) 774 { 775 int iterations = 0; 776 int use_q_quit = 0; 777 778 /* Parse command line arguments */ 779 parse_args(argc, argv); 780 781 /* Setup netlink socket */ 782 nl_sd = create_nl_socket(); 783 if (nl_sd < 0) { 784 fprintf(stderr, "Error creating netlink socket\n"); 785 exit(1); 786 } 787 788 /* Get family ID for taskstats via netlink */ 789 family_id = get_family_id(nl_sd); 790 if (!family_id) { 791 fprintf(stderr, "Error getting taskstats family ID\n"); 792 close(nl_sd); 793 exit(1); 794 } 795 796 if (!cfg.output_one_time) { 797 use_q_quit = 1; 798 enable_raw_mode(); 799 printf("Press 'q' to quit.\n"); 800 fflush(stdout); 801 } 802 803 /* Main loop */ 804 while (running) { 805 /* Read PSI statistics */ 806 read_psi_stats(); 807 808 /* Get container stats if container path provided */ 809 if (cfg.container_path) 810 get_container_stats(); 811 812 /* Get task delays */ 813 get_task_delays(); 814 815 /* Sort tasks */ 816 sort_tasks(); 817 818 /* Display results to stdout or log file */ 819 display_results(); 820 821 /* Check for iterations */ 822 if (cfg.iterations > 0 && ++iterations >= cfg.iterations) 823 break; 824 825 /* Exit if output_one_time is set */ 826 if (cfg.output_one_time) 827 break; 828 829 /* Check for 'q' key to quit */ 830 if (use_q_quit) { 831 struct timeval tv = {cfg.delay, 0}; 832 fd_set readfds; 833 834 FD_ZERO(&readfds); 835 FD_SET(STDIN_FILENO, &readfds); 836 int r = select(STDIN_FILENO+1, &readfds, NULL, NULL, &tv); 837 838 if (r > 0 && FD_ISSET(STDIN_FILENO, &readfds)) { 839 char ch = 0; 840 841 read(STDIN_FILENO, &ch, 1); 842 if (ch == 'q' || ch == 'Q') { 843 running = 0; 844 break; 845 } 846 } 847 } else { 848 sleep(cfg.delay); 849 } 850 } 851 852 /* Restore terminal mode */ 853 if (use_q_quit) 854 disable_raw_mode(); 855 856 /* Cleanup */ 857 close(nl_sd); 858 if (cfg.container_path) 859 free(cfg.container_path); 860 861 return 0; 862 } 863