1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * delaytop.c - system-wide delay monitoring tool.
4 *
5 * This tool provides real-time monitoring and statistics of
6 * system, container, and task-level delays, including CPU,
7 * memory, IO, and IRQ. It supports both interactive (top-like),
8 * and can output delay information for the whole system, specific
9 * containers (cgroups), or individual tasks (PIDs).
10 *
11 * Key features:
12 * - Collects per-task delay accounting statistics via taskstats.
13 * - Collects system-wide PSI information.
14 * - Supports sorting, filtering.
15 * - Supports both interactive (screen refresh).
16 *
17 * Copyright (C) Fan Yu, ZTE Corp. 2025
18 * Copyright (C) Wang Yaxin, ZTE Corp. 2025
19 *
20 * Compile with
21 * gcc -I/usr/src/linux/include delaytop.c -o delaytop
22 */
23
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <errno.h>
28 #include <unistd.h>
29 #include <fcntl.h>
30 #include <getopt.h>
31 #include <signal.h>
32 #include <time.h>
33 #include <dirent.h>
34 #include <ctype.h>
35 #include <stdbool.h>
36 #include <sys/types.h>
37 #include <sys/stat.h>
38 #include <sys/socket.h>
39 #include <sys/select.h>
40 #include <termios.h>
41 #include <limits.h>
42 #include <linux/genetlink.h>
43 #include <linux/taskstats.h>
44 #include <linux/cgroupstats.h>
45
46 #define PSI_CPU_SOME "/proc/pressure/cpu"
47 #define PSI_CPU_FULL "/proc/pressure/cpu"
48 #define PSI_MEMORY_SOME "/proc/pressure/memory"
49 #define PSI_MEMORY_FULL "/proc/pressure/memory"
50 #define PSI_IO_SOME "/proc/pressure/io"
51 #define PSI_IO_FULL "/proc/pressure/io"
52 #define PSI_IRQ_FULL "/proc/pressure/irq"
53
54 #define NLA_NEXT(na) ((struct nlattr *)((char *)(na) + NLA_ALIGN((na)->nla_len)))
55 #define NLA_DATA(na) ((void *)((char *)(na) + NLA_HDRLEN))
56 #define NLA_PAYLOAD(len) (len - NLA_HDRLEN)
57
58 #define GENLMSG_DATA(glh) ((void *)(NLMSG_DATA(glh) + GENL_HDRLEN))
59 #define GENLMSG_PAYLOAD(glh) (NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN)
60
61 #define TASK_COMM_LEN 16
62 #define MAX_MSG_SIZE 1024
63 #define MAX_TASKS 1000
64 #define SET_TASK_STAT(task_count, field) tasks[task_count].field = stats.field
65 #define BOOL_FPRINT(stream, fmt, ...) \
66 ({ \
67 int ret = fprintf(stream, fmt, ##__VA_ARGS__); \
68 ret >= 0; \
69 })
70 #define PSI_LINE_FORMAT "%-12s %6.1f%%/%6.1f%%/%6.1f%%/%8llu(ms)\n"
71
72 /* Program settings structure */
73 struct config {
74 int delay; /* Update interval in seconds */
75 int iterations; /* Number of iterations, 0 == infinite */
76 int max_processes; /* Maximum number of processes to show */
77 char sort_field; /* Field to sort by */
78 int output_one_time; /* Output once and exit */
79 int monitor_pid; /* Monitor specific PID */
80 char *container_path; /* Path to container cgroup */
81 };
82
83 /* PSI statistics structure */
84 struct psi_stats {
85 double cpu_some_avg10, cpu_some_avg60, cpu_some_avg300;
86 unsigned long long cpu_some_total;
87 double cpu_full_avg10, cpu_full_avg60, cpu_full_avg300;
88 unsigned long long cpu_full_total;
89 double memory_some_avg10, memory_some_avg60, memory_some_avg300;
90 unsigned long long memory_some_total;
91 double memory_full_avg10, memory_full_avg60, memory_full_avg300;
92 unsigned long long memory_full_total;
93 double io_some_avg10, io_some_avg60, io_some_avg300;
94 unsigned long long io_some_total;
95 double io_full_avg10, io_full_avg60, io_full_avg300;
96 unsigned long long io_full_total;
97 double irq_full_avg10, irq_full_avg60, irq_full_avg300;
98 unsigned long long irq_full_total;
99 };
100
101 /* Task delay information structure */
102 struct task_info {
103 int pid;
104 int tgid;
105 char command[TASK_COMM_LEN];
106 unsigned long long cpu_count;
107 unsigned long long cpu_delay_total;
108 unsigned long long blkio_count;
109 unsigned long long blkio_delay_total;
110 unsigned long long swapin_count;
111 unsigned long long swapin_delay_total;
112 unsigned long long freepages_count;
113 unsigned long long freepages_delay_total;
114 unsigned long long thrashing_count;
115 unsigned long long thrashing_delay_total;
116 unsigned long long compact_count;
117 unsigned long long compact_delay_total;
118 unsigned long long wpcopy_count;
119 unsigned long long wpcopy_delay_total;
120 unsigned long long irq_count;
121 unsigned long long irq_delay_total;
122 };
123
124 /* Container statistics structure */
125 struct container_stats {
126 int nr_sleeping; /* Number of sleeping processes */
127 int nr_running; /* Number of running processes */
128 int nr_stopped; /* Number of stopped processes */
129 int nr_uninterruptible; /* Number of uninterruptible processes */
130 int nr_io_wait; /* Number of processes in IO wait */
131 };
132
133 /* Global variables */
134 static struct config cfg;
135 static struct psi_stats psi;
136 static struct task_info tasks[MAX_TASKS];
137 static int task_count;
138 static int running = 1;
139 static struct container_stats container_stats;
140
141 /* Netlink socket variables */
142 static int nl_sd = -1;
143 static int family_id;
144
145 /* Set terminal to non-canonical mode for q-to-quit */
146 static struct termios orig_termios;
enable_raw_mode(void)147 static void enable_raw_mode(void)
148 {
149 struct termios raw;
150
151 tcgetattr(STDIN_FILENO, &orig_termios);
152 raw = orig_termios;
153 raw.c_lflag &= ~(ICANON | ECHO);
154 tcsetattr(STDIN_FILENO, TCSAFLUSH, &raw);
155 }
disable_raw_mode(void)156 static void disable_raw_mode(void)
157 {
158 tcsetattr(STDIN_FILENO, TCSAFLUSH, &orig_termios);
159 }
160
161 /* Display usage information and command line options */
usage(void)162 static void usage(void)
163 {
164 printf("Usage: delaytop [Options]\n"
165 "Options:\n"
166 " -h, --help Show this help message and exit\n"
167 " -d, --delay=SECONDS Set refresh interval (default: 2 seconds, min: 1)\n"
168 " -n, --iterations=COUNT Set number of updates (default: 0 = infinite)\n"
169 " -P, --processes=NUMBER Set maximum number of processes to show (default: 20, max: 1000)\n"
170 " -o, --once Display once and exit\n"
171 " -p, --pid=PID Monitor only the specified PID\n"
172 " -C, --container=PATH Monitor the container at specified cgroup path\n");
173 exit(0);
174 }
175
176 /* Parse command line arguments and set configuration */
parse_args(int argc,char ** argv)177 static void parse_args(int argc, char **argv)
178 {
179 int c;
180 struct option long_options[] = {
181 {"help", no_argument, 0, 'h'},
182 {"delay", required_argument, 0, 'd'},
183 {"iterations", required_argument, 0, 'n'},
184 {"pid", required_argument, 0, 'p'},
185 {"once", no_argument, 0, 'o'},
186 {"processes", required_argument, 0, 'P'},
187 {"container", required_argument, 0, 'C'},
188 {0, 0, 0, 0}
189 };
190
191 /* Set defaults */
192 cfg.delay = 2;
193 cfg.iterations = 0;
194 cfg.max_processes = 20;
195 cfg.sort_field = 'c'; /* Default sort by CPU delay */
196 cfg.output_one_time = 0;
197 cfg.monitor_pid = 0; /* 0 means monitor all PIDs */
198 cfg.container_path = NULL;
199
200 while (1) {
201 int option_index = 0;
202
203 c = getopt_long(argc, argv, "hd:n:p:oP:C:", long_options, &option_index);
204 if (c == -1)
205 break;
206
207 switch (c) {
208 case 'h':
209 usage();
210 break;
211 case 'd':
212 cfg.delay = atoi(optarg);
213 if (cfg.delay < 1) {
214 fprintf(stderr, "Error: delay must be >= 1.\n");
215 exit(1);
216 }
217 break;
218 case 'n':
219 cfg.iterations = atoi(optarg);
220 if (cfg.iterations < 0) {
221 fprintf(stderr, "Error: iterations must be >= 0.\n");
222 exit(1);
223 }
224 break;
225 case 'p':
226 cfg.monitor_pid = atoi(optarg);
227 if (cfg.monitor_pid < 1) {
228 fprintf(stderr, "Error: pid must be >= 1.\n");
229 exit(1);
230 }
231 break;
232 case 'o':
233 cfg.output_one_time = 1;
234 break;
235 case 'P':
236 cfg.max_processes = atoi(optarg);
237 if (cfg.max_processes < 1) {
238 fprintf(stderr, "Error: processes must be >= 1.\n");
239 exit(1);
240 }
241 if (cfg.max_processes > MAX_TASKS) {
242 fprintf(stderr, "Warning: processes capped to %d.\n",
243 MAX_TASKS);
244 cfg.max_processes = MAX_TASKS;
245 }
246 break;
247 case 'C':
248 cfg.container_path = strdup(optarg);
249 break;
250 default:
251 fprintf(stderr, "Try 'delaytop --help' for more information.\n");
252 exit(1);
253 }
254 }
255 }
256
257 /* Create a raw netlink socket and bind */
create_nl_socket(void)258 static int create_nl_socket(void)
259 {
260 int fd;
261 struct sockaddr_nl local;
262
263 fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
264 if (fd < 0)
265 return -1;
266
267 memset(&local, 0, sizeof(local));
268 local.nl_family = AF_NETLINK;
269
270 if (bind(fd, (struct sockaddr *) &local, sizeof(local)) < 0) {
271 fprintf(stderr, "Failed to bind socket when create nl_socket\n");
272 close(fd);
273 return -1;
274 }
275
276 return fd;
277 }
278
279 /* Send a command via netlink */
send_cmd(int sd,__u16 nlmsg_type,__u32 nlmsg_pid,__u8 genl_cmd,__u16 nla_type,void * nla_data,int nla_len)280 static int send_cmd(int sd, __u16 nlmsg_type, __u32 nlmsg_pid,
281 __u8 genl_cmd, __u16 nla_type,
282 void *nla_data, int nla_len)
283 {
284 struct sockaddr_nl nladdr;
285 struct nlattr *na;
286 int r, buflen;
287 char *buf;
288
289 struct {
290 struct nlmsghdr n;
291 struct genlmsghdr g;
292 char buf[MAX_MSG_SIZE];
293 } msg;
294
295 msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
296 msg.n.nlmsg_type = nlmsg_type;
297 msg.n.nlmsg_flags = NLM_F_REQUEST;
298 msg.n.nlmsg_seq = 0;
299 msg.n.nlmsg_pid = nlmsg_pid;
300 msg.g.cmd = genl_cmd;
301 msg.g.version = 0x1;
302 na = (struct nlattr *) GENLMSG_DATA(&msg);
303 na->nla_type = nla_type;
304 na->nla_len = nla_len + NLA_HDRLEN;
305 memcpy(NLA_DATA(na), nla_data, nla_len);
306 msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len);
307
308 buf = (char *) &msg;
309 buflen = msg.n.nlmsg_len;
310 memset(&nladdr, 0, sizeof(nladdr));
311 nladdr.nl_family = AF_NETLINK;
312 while ((r = sendto(sd, buf, buflen, 0, (struct sockaddr *) &nladdr,
313 sizeof(nladdr))) < buflen) {
314 if (r > 0) {
315 buf += r;
316 buflen -= r;
317 } else if (errno != EAGAIN)
318 return -1;
319 }
320 return 0;
321 }
322
323 /* Get family ID for taskstats via netlink */
get_family_id(int sd)324 static int get_family_id(int sd)
325 {
326 struct {
327 struct nlmsghdr n;
328 struct genlmsghdr g;
329 char buf[256];
330 } ans;
331
332 int id = 0, rc;
333 struct nlattr *na;
334 int rep_len;
335 char name[100];
336
337 strncpy(name, TASKSTATS_GENL_NAME, sizeof(name) - 1);
338 name[sizeof(name) - 1] = '\0';
339 rc = send_cmd(sd, GENL_ID_CTRL, getpid(), CTRL_CMD_GETFAMILY,
340 CTRL_ATTR_FAMILY_NAME, (void *)name,
341 strlen(TASKSTATS_GENL_NAME)+1);
342 if (rc < 0) {
343 fprintf(stderr, "Failed to send cmd for family id\n");
344 return 0;
345 }
346
347 rep_len = recv(sd, &ans, sizeof(ans), 0);
348 if (ans.n.nlmsg_type == NLMSG_ERROR ||
349 (rep_len < 0) || !NLMSG_OK((&ans.n), rep_len)) {
350 fprintf(stderr, "Failed to receive response for family id\n");
351 return 0;
352 }
353
354 na = (struct nlattr *) GENLMSG_DATA(&ans);
355 na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len));
356 if (na->nla_type == CTRL_ATTR_FAMILY_ID)
357 id = *(__u16 *) NLA_DATA(na);
358 return id;
359 }
360
read_psi_stats(void)361 static void read_psi_stats(void)
362 {
363 FILE *fp;
364 char line[256];
365 int ret = 0;
366 /* Zero all fields */
367 memset(&psi, 0, sizeof(psi));
368 /* CPU pressure */
369 fp = fopen(PSI_CPU_SOME, "r");
370 if (fp) {
371 while (fgets(line, sizeof(line), fp)) {
372 if (strncmp(line, "some", 4) == 0) {
373 ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu",
374 &psi.cpu_some_avg10, &psi.cpu_some_avg60,
375 &psi.cpu_some_avg300, &psi.cpu_some_total);
376 if (ret != 4)
377 fprintf(stderr, "Failed to parse CPU some PSI data\n");
378 } else if (strncmp(line, "full", 4) == 0) {
379 ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu",
380 &psi.cpu_full_avg10, &psi.cpu_full_avg60,
381 &psi.cpu_full_avg300, &psi.cpu_full_total);
382 if (ret != 4)
383 fprintf(stderr, "Failed to parse CPU full PSI data\n");
384 }
385 }
386 fclose(fp);
387 }
388 /* Memory pressure */
389 fp = fopen(PSI_MEMORY_SOME, "r");
390 if (fp) {
391 while (fgets(line, sizeof(line), fp)) {
392 if (strncmp(line, "some", 4) == 0) {
393 ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu",
394 &psi.memory_some_avg10, &psi.memory_some_avg60,
395 &psi.memory_some_avg300, &psi.memory_some_total);
396 if (ret != 4)
397 fprintf(stderr, "Failed to parse Memory some PSI data\n");
398 } else if (strncmp(line, "full", 4) == 0) {
399 ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu",
400 &psi.memory_full_avg10, &psi.memory_full_avg60,
401 &psi.memory_full_avg300, &psi.memory_full_total);
402 }
403 if (ret != 4)
404 fprintf(stderr, "Failed to parse Memory full PSI data\n");
405 }
406 fclose(fp);
407 }
408 /* IO pressure */
409 fp = fopen(PSI_IO_SOME, "r");
410 if (fp) {
411 while (fgets(line, sizeof(line), fp)) {
412 if (strncmp(line, "some", 4) == 0) {
413 ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu",
414 &psi.io_some_avg10, &psi.io_some_avg60,
415 &psi.io_some_avg300, &psi.io_some_total);
416 if (ret != 4)
417 fprintf(stderr, "Failed to parse IO some PSI data\n");
418 } else if (strncmp(line, "full", 4) == 0) {
419 ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu",
420 &psi.io_full_avg10, &psi.io_full_avg60,
421 &psi.io_full_avg300, &psi.io_full_total);
422 if (ret != 4)
423 fprintf(stderr, "Failed to parse IO full PSI data\n");
424 }
425 }
426 fclose(fp);
427 }
428 /* IRQ pressure (only full) */
429 fp = fopen(PSI_IRQ_FULL, "r");
430 if (fp) {
431 while (fgets(line, sizeof(line), fp)) {
432 if (strncmp(line, "full", 4) == 0) {
433 ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu",
434 &psi.irq_full_avg10, &psi.irq_full_avg60,
435 &psi.irq_full_avg300, &psi.irq_full_total);
436 if (ret != 4)
437 fprintf(stderr, "Failed to parse IRQ full PSI data\n");
438 }
439 }
440 fclose(fp);
441 }
442 }
443
read_comm(int pid,char * comm_buf,size_t buf_size)444 static int read_comm(int pid, char *comm_buf, size_t buf_size)
445 {
446 char path[64];
447 int ret = -1;
448 size_t len;
449 FILE *fp;
450
451 snprintf(path, sizeof(path), "/proc/%d/comm", pid);
452 fp = fopen(path, "r");
453 if (!fp) {
454 fprintf(stderr, "Failed to open comm file /proc/%d/comm\n", pid);
455 return ret;
456 }
457
458 if (fgets(comm_buf, buf_size, fp)) {
459 len = strlen(comm_buf);
460 if (len > 0 && comm_buf[len - 1] == '\n')
461 comm_buf[len - 1] = '\0';
462 ret = 0;
463 }
464
465 fclose(fp);
466
467 return ret;
468 }
469
fetch_and_fill_task_info(int pid,const char * comm)470 static void fetch_and_fill_task_info(int pid, const char *comm)
471 {
472 struct {
473 struct nlmsghdr n;
474 struct genlmsghdr g;
475 char buf[MAX_MSG_SIZE];
476 } resp;
477 struct taskstats stats;
478 struct nlattr *nested;
479 struct nlattr *na;
480 int nested_len;
481 int nl_len;
482 int rc;
483
484 /* Send request for task stats */
485 if (send_cmd(nl_sd, family_id, getpid(), TASKSTATS_CMD_GET,
486 TASKSTATS_CMD_ATTR_PID, &pid, sizeof(pid)) < 0) {
487 fprintf(stderr, "Failed to send request for task stats\n");
488 return;
489 }
490
491 /* Receive response */
492 rc = recv(nl_sd, &resp, sizeof(resp), 0);
493 if (rc < 0 || resp.n.nlmsg_type == NLMSG_ERROR) {
494 fprintf(stderr, "Failed to receive response for task stats\n");
495 return;
496 }
497
498 /* Parse response */
499 nl_len = GENLMSG_PAYLOAD(&resp.n);
500 na = (struct nlattr *) GENLMSG_DATA(&resp);
501 while (nl_len > 0) {
502 if (na->nla_type == TASKSTATS_TYPE_AGGR_PID) {
503 nested = (struct nlattr *) NLA_DATA(na);
504 nested_len = NLA_PAYLOAD(na->nla_len);
505 while (nested_len > 0) {
506 if (nested->nla_type == TASKSTATS_TYPE_STATS) {
507 memcpy(&stats, NLA_DATA(nested), sizeof(stats));
508 if (task_count < MAX_TASKS) {
509 tasks[task_count].pid = pid;
510 tasks[task_count].tgid = pid;
511 strncpy(tasks[task_count].command, comm,
512 TASK_COMM_LEN - 1);
513 tasks[task_count].command[TASK_COMM_LEN - 1] = '\0';
514 SET_TASK_STAT(task_count, cpu_count);
515 SET_TASK_STAT(task_count, cpu_delay_total);
516 SET_TASK_STAT(task_count, blkio_count);
517 SET_TASK_STAT(task_count, blkio_delay_total);
518 SET_TASK_STAT(task_count, swapin_count);
519 SET_TASK_STAT(task_count, swapin_delay_total);
520 SET_TASK_STAT(task_count, freepages_count);
521 SET_TASK_STAT(task_count, freepages_delay_total);
522 SET_TASK_STAT(task_count, thrashing_count);
523 SET_TASK_STAT(task_count, thrashing_delay_total);
524 SET_TASK_STAT(task_count, compact_count);
525 SET_TASK_STAT(task_count, compact_delay_total);
526 SET_TASK_STAT(task_count, wpcopy_count);
527 SET_TASK_STAT(task_count, wpcopy_delay_total);
528 SET_TASK_STAT(task_count, irq_count);
529 SET_TASK_STAT(task_count, irq_delay_total);
530 task_count++;
531 }
532 break;
533 }
534 nested_len -= NLA_ALIGN(nested->nla_len);
535 nested = NLA_NEXT(nested);
536 }
537 }
538 nl_len -= NLA_ALIGN(na->nla_len);
539 na = NLA_NEXT(na);
540 }
541 return;
542 }
543
get_task_delays(void)544 static void get_task_delays(void)
545 {
546 char comm[TASK_COMM_LEN];
547 struct dirent *entry;
548 DIR *dir;
549 int pid;
550
551 task_count = 0;
552 if (cfg.monitor_pid > 0) {
553 if (read_comm(cfg.monitor_pid, comm, sizeof(comm)) == 0)
554 fetch_and_fill_task_info(cfg.monitor_pid, comm);
555 return;
556 }
557
558 dir = opendir("/proc");
559 if (!dir) {
560 fprintf(stderr, "Error opening /proc directory\n");
561 return;
562 }
563
564 while ((entry = readdir(dir)) != NULL && task_count < MAX_TASKS) {
565 if (!isdigit(entry->d_name[0]))
566 continue;
567 pid = atoi(entry->d_name);
568 if (pid == 0)
569 continue;
570 if (read_comm(pid, comm, sizeof(comm)) != 0)
571 continue;
572 fetch_and_fill_task_info(pid, comm);
573 }
574 closedir(dir);
575 }
576
577 /* Calculate average delay in milliseconds */
average_ms(unsigned long long total,unsigned long long count)578 static double average_ms(unsigned long long total, unsigned long long count)
579 {
580 if (count == 0)
581 return 0;
582 return (double)total / 1000000.0 / count;
583 }
584
585 /* Comparison function for sorting tasks */
compare_tasks(const void * a,const void * b)586 static int compare_tasks(const void *a, const void *b)
587 {
588 const struct task_info *t1 = (const struct task_info *)a;
589 const struct task_info *t2 = (const struct task_info *)b;
590 double avg1, avg2;
591
592 switch (cfg.sort_field) {
593 case 'c': /* CPU */
594 avg1 = average_ms(t1->cpu_delay_total, t1->cpu_count);
595 avg2 = average_ms(t2->cpu_delay_total, t2->cpu_count);
596 if (avg1 != avg2)
597 return avg2 > avg1 ? 1 : -1;
598 return t2->cpu_delay_total > t1->cpu_delay_total ? 1 : -1;
599
600 default:
601 return t2->cpu_delay_total > t1->cpu_delay_total ? 1 : -1;
602 }
603 }
604
605 /* Sort tasks by selected field */
sort_tasks(void)606 static void sort_tasks(void)
607 {
608 if (task_count > 0)
609 qsort(tasks, task_count, sizeof(struct task_info), compare_tasks);
610 }
611
612 /* Get container statistics via cgroupstats */
get_container_stats(void)613 static void get_container_stats(void)
614 {
615 int rc, cfd;
616 struct {
617 struct nlmsghdr n;
618 struct genlmsghdr g;
619 char buf[MAX_MSG_SIZE];
620 } req, resp;
621 struct nlattr *na;
622 int nl_len;
623 struct cgroupstats stats;
624
625 /* Check if container path is set */
626 if (!cfg.container_path)
627 return;
628
629 /* Open container cgroup */
630 cfd = open(cfg.container_path, O_RDONLY);
631 if (cfd < 0) {
632 fprintf(stderr, "Error opening container path: %s\n", cfg.container_path);
633 return;
634 }
635
636 /* Send request for container stats */
637 if (send_cmd(nl_sd, family_id, getpid(), CGROUPSTATS_CMD_GET,
638 CGROUPSTATS_CMD_ATTR_FD, &cfd, sizeof(__u32)) < 0) {
639 fprintf(stderr, "Failed to send request for container stats\n");
640 close(cfd);
641 return;
642 }
643
644 /* Receive response */
645 rc = recv(nl_sd, &resp, sizeof(resp), 0);
646 if (rc < 0 || resp.n.nlmsg_type == NLMSG_ERROR) {
647 fprintf(stderr, "Failed to receive response for container stats\n");
648 close(cfd);
649 return;
650 }
651
652 /* Parse response */
653 nl_len = GENLMSG_PAYLOAD(&resp.n);
654 na = (struct nlattr *) GENLMSG_DATA(&resp);
655 while (nl_len > 0) {
656 if (na->nla_type == CGROUPSTATS_TYPE_CGROUP_STATS) {
657 /* Get the cgroupstats structure */
658 memcpy(&stats, NLA_DATA(na), sizeof(stats));
659
660 /* Fill container stats */
661 container_stats.nr_sleeping = stats.nr_sleeping;
662 container_stats.nr_running = stats.nr_running;
663 container_stats.nr_stopped = stats.nr_stopped;
664 container_stats.nr_uninterruptible = stats.nr_uninterruptible;
665 container_stats.nr_io_wait = stats.nr_io_wait;
666 break;
667 }
668 nl_len -= NLA_ALIGN(na->nla_len);
669 na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len));
670 }
671
672 close(cfd);
673 }
674
675 /* Display results to stdout or log file */
display_results(void)676 static void display_results(void)
677 {
678 time_t now = time(NULL);
679 struct tm *tm_now = localtime(&now);
680 FILE *out = stdout;
681 char timestamp[32];
682 bool suc = true;
683 int i, count;
684
685 /* Clear terminal screen */
686 suc &= BOOL_FPRINT(out, "\033[H\033[J");
687
688 /* PSI output (one-line, no cat style) */
689 suc &= BOOL_FPRINT(out, "System Pressure Information: (avg10/avg60/avg300/total)\n");
690 suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
691 "CPU some:",
692 psi.cpu_some_avg10,
693 psi.cpu_some_avg60,
694 psi.cpu_some_avg300,
695 psi.cpu_some_total / 1000);
696 suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
697 "CPU full:",
698 psi.cpu_full_avg10,
699 psi.cpu_full_avg60,
700 psi.cpu_full_avg300,
701 psi.cpu_full_total / 1000);
702 suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
703 "Memory full:",
704 psi.memory_full_avg10,
705 psi.memory_full_avg60,
706 psi.memory_full_avg300,
707 psi.memory_full_total / 1000);
708 suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
709 "Memory some:",
710 psi.memory_some_avg10,
711 psi.memory_some_avg60,
712 psi.memory_some_avg300,
713 psi.memory_some_total / 1000);
714 suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
715 "IO full:",
716 psi.io_full_avg10,
717 psi.io_full_avg60,
718 psi.io_full_avg300,
719 psi.io_full_total / 1000);
720 suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
721 "IO some:",
722 psi.io_some_avg10,
723 psi.io_some_avg60,
724 psi.io_some_avg300,
725 psi.io_some_total / 1000);
726 suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
727 "IRQ full:",
728 psi.irq_full_avg10,
729 psi.irq_full_avg60,
730 psi.irq_full_avg300,
731 psi.irq_full_total / 1000);
732
733 if (cfg.container_path) {
734 suc &= BOOL_FPRINT(out, "Container Information (%s):\n", cfg.container_path);
735 suc &= BOOL_FPRINT(out, "Processes: running=%d, sleeping=%d, ",
736 container_stats.nr_running, container_stats.nr_sleeping);
737 suc &= BOOL_FPRINT(out, "stopped=%d, uninterruptible=%d, io_wait=%d\n\n",
738 container_stats.nr_stopped, container_stats.nr_uninterruptible,
739 container_stats.nr_io_wait);
740 }
741 suc &= BOOL_FPRINT(out, "Top %d processes (sorted by CPU delay):\n",
742 cfg.max_processes);
743 suc &= BOOL_FPRINT(out, "%5s %5s %-17s", "PID", "TGID", "COMMAND");
744 suc &= BOOL_FPRINT(out, "%7s %7s %7s %7s %7s %7s %7s %7s\n",
745 "CPU(ms)", "IO(ms)", "SWAP(ms)", "RCL(ms)",
746 "THR(ms)", "CMP(ms)", "WP(ms)", "IRQ(ms)");
747
748 suc &= BOOL_FPRINT(out, "-----------------------------------------------");
749 suc &= BOOL_FPRINT(out, "----------------------------------------------\n");
750 count = task_count < cfg.max_processes ? task_count : cfg.max_processes;
751
752 for (i = 0; i < count; i++) {
753 suc &= BOOL_FPRINT(out, "%5d %5d %-15s",
754 tasks[i].pid, tasks[i].tgid, tasks[i].command);
755 suc &= BOOL_FPRINT(out, "%7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f\n",
756 average_ms(tasks[i].cpu_delay_total, tasks[i].cpu_count),
757 average_ms(tasks[i].blkio_delay_total, tasks[i].blkio_count),
758 average_ms(tasks[i].swapin_delay_total, tasks[i].swapin_count),
759 average_ms(tasks[i].freepages_delay_total, tasks[i].freepages_count),
760 average_ms(tasks[i].thrashing_delay_total, tasks[i].thrashing_count),
761 average_ms(tasks[i].compact_delay_total, tasks[i].compact_count),
762 average_ms(tasks[i].wpcopy_delay_total, tasks[i].wpcopy_count),
763 average_ms(tasks[i].irq_delay_total, tasks[i].irq_count));
764 }
765
766 suc &= BOOL_FPRINT(out, "\n");
767
768 if (!suc)
769 perror("Error writing to output");
770 }
771
772 /* Main function */
main(int argc,char ** argv)773 int main(int argc, char **argv)
774 {
775 int iterations = 0;
776 int use_q_quit = 0;
777
778 /* Parse command line arguments */
779 parse_args(argc, argv);
780
781 /* Setup netlink socket */
782 nl_sd = create_nl_socket();
783 if (nl_sd < 0) {
784 fprintf(stderr, "Error creating netlink socket\n");
785 exit(1);
786 }
787
788 /* Get family ID for taskstats via netlink */
789 family_id = get_family_id(nl_sd);
790 if (!family_id) {
791 fprintf(stderr, "Error getting taskstats family ID\n");
792 close(nl_sd);
793 exit(1);
794 }
795
796 if (!cfg.output_one_time) {
797 use_q_quit = 1;
798 enable_raw_mode();
799 printf("Press 'q' to quit.\n");
800 fflush(stdout);
801 }
802
803 /* Main loop */
804 while (running) {
805 /* Read PSI statistics */
806 read_psi_stats();
807
808 /* Get container stats if container path provided */
809 if (cfg.container_path)
810 get_container_stats();
811
812 /* Get task delays */
813 get_task_delays();
814
815 /* Sort tasks */
816 sort_tasks();
817
818 /* Display results to stdout or log file */
819 display_results();
820
821 /* Check for iterations */
822 if (cfg.iterations > 0 && ++iterations >= cfg.iterations)
823 break;
824
825 /* Exit if output_one_time is set */
826 if (cfg.output_one_time)
827 break;
828
829 /* Check for 'q' key to quit */
830 if (use_q_quit) {
831 struct timeval tv = {cfg.delay, 0};
832 fd_set readfds;
833
834 FD_ZERO(&readfds);
835 FD_SET(STDIN_FILENO, &readfds);
836 int r = select(STDIN_FILENO+1, &readfds, NULL, NULL, &tv);
837
838 if (r > 0 && FD_ISSET(STDIN_FILENO, &readfds)) {
839 char ch = 0;
840
841 read(STDIN_FILENO, &ch, 1);
842 if (ch == 'q' || ch == 'Q') {
843 running = 0;
844 break;
845 }
846 }
847 } else {
848 sleep(cfg.delay);
849 }
850 }
851
852 /* Restore terminal mode */
853 if (use_q_quit)
854 disable_raw_mode();
855
856 /* Cleanup */
857 close(nl_sd);
858 if (cfg.container_path)
859 free(cfg.container_path);
860
861 return 0;
862 }
863