1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
4 */
5
6 #define _GNU_SOURCE
7 #ifdef HAVE_LIBCPUPOWER_SUPPORT
8 #include <cpuidle.h>
9 #endif /* HAVE_LIBCPUPOWER_SUPPORT */
10 #include <dirent.h>
11 #include <stdarg.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <unistd.h>
15 #include <ctype.h>
16 #include <errno.h>
17 #include <fcntl.h>
18 #include <sched.h>
19 #include <stdio.h>
20
21 #include "utils.h"
22
23 #define MAX_MSG_LENGTH 1024
24 int config_debug;
25
26 /*
27 * err_msg - print an error message to the stderr
28 */
err_msg(const char * fmt,...)29 void err_msg(const char *fmt, ...)
30 {
31 char message[MAX_MSG_LENGTH];
32 va_list ap;
33
34 va_start(ap, fmt);
35 vsnprintf(message, sizeof(message), fmt, ap);
36 va_end(ap);
37
38 fprintf(stderr, "%s", message);
39 }
40
41 /*
42 * debug_msg - print a debug message to stderr if debug is set
43 */
debug_msg(const char * fmt,...)44 void debug_msg(const char *fmt, ...)
45 {
46 char message[MAX_MSG_LENGTH];
47 va_list ap;
48
49 if (!config_debug)
50 return;
51
52 va_start(ap, fmt);
53 vsnprintf(message, sizeof(message), fmt, ap);
54 va_end(ap);
55
56 fprintf(stderr, "%s", message);
57 }
58
59 /*
60 * fatal - print an error message and EOL to stderr and exit with ERROR
61 */
fatal(const char * fmt,...)62 void fatal(const char *fmt, ...)
63 {
64 va_list ap;
65
66 va_start(ap, fmt);
67 vfprintf(stderr, fmt, ap);
68 va_end(ap);
69 fprintf(stderr, "\n");
70
71 exit(ERROR);
72 }
73
74 /*
75 * get_llong_from_str - get a long long int from a string
76 */
get_llong_from_str(char * start)77 long long get_llong_from_str(char *start)
78 {
79 long long value;
80 char *end;
81
82 errno = 0;
83 value = strtoll(start, &end, 10);
84 if (errno || start == end)
85 return -1;
86
87 return value;
88 }
89
90 /*
91 * get_duration - fill output with a human readable duration since start_time
92 */
get_duration(time_t start_time,char * output,int output_size)93 void get_duration(time_t start_time, char *output, int output_size)
94 {
95 time_t now = time(NULL);
96 struct tm *tm_info;
97 time_t duration;
98
99 duration = difftime(now, start_time);
100 tm_info = gmtime(&duration);
101
102 snprintf(output, output_size, "%3d %02d:%02d:%02d",
103 tm_info->tm_yday,
104 tm_info->tm_hour,
105 tm_info->tm_min,
106 tm_info->tm_sec);
107 }
108
109 /*
110 * parse_cpu_set - parse a cpu_list filling cpu_set_t argument
111 *
112 * Receives a cpu list, like 1-3,5 (cpus 1, 2, 3, 5), and then set
113 * filling cpu_set_t argument.
114 *
115 * Returns 1 on success, 0 otherwise.
116 */
parse_cpu_set(char * cpu_list,cpu_set_t * set)117 int parse_cpu_set(char *cpu_list, cpu_set_t *set)
118 {
119 const char *p;
120 int end_cpu;
121 int nr_cpus;
122 int cpu;
123 int i;
124
125 CPU_ZERO(set);
126
127 nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
128
129 for (p = cpu_list; *p; ) {
130 cpu = atoi(p);
131 if (cpu < 0 || (!cpu && *p != '0') || cpu >= nr_cpus)
132 goto err;
133
134 while (isdigit(*p))
135 p++;
136 if (*p == '-') {
137 p++;
138 end_cpu = atoi(p);
139 if (end_cpu < cpu || (!end_cpu && *p != '0') || end_cpu >= nr_cpus)
140 goto err;
141 while (isdigit(*p))
142 p++;
143 } else
144 end_cpu = cpu;
145
146 if (cpu == end_cpu) {
147 debug_msg("cpu_set: adding cpu %d\n", cpu);
148 CPU_SET(cpu, set);
149 } else {
150 for (i = cpu; i <= end_cpu; i++) {
151 debug_msg("cpu_set: adding cpu %d\n", i);
152 CPU_SET(i, set);
153 }
154 }
155
156 if (*p == ',')
157 p++;
158 }
159
160 return 0;
161 err:
162 debug_msg("Error parsing the cpu set %s\n", cpu_list);
163 return 1;
164 }
165
166 /*
167 * parse_duration - parse duration with s/m/h/d suffix converting it to seconds
168 */
parse_seconds_duration(char * val)169 long parse_seconds_duration(char *val)
170 {
171 char *end;
172 long t;
173
174 t = strtol(val, &end, 10);
175
176 if (end) {
177 switch (*end) {
178 case 's':
179 case 'S':
180 break;
181 case 'm':
182 case 'M':
183 t *= 60;
184 break;
185 case 'h':
186 case 'H':
187 t *= 60 * 60;
188 break;
189
190 case 'd':
191 case 'D':
192 t *= 24 * 60 * 60;
193 break;
194 }
195 }
196
197 return t;
198 }
199
200 /*
201 * parse_ns_duration - parse duration with ns/us/ms/s converting it to nanoseconds
202 */
parse_ns_duration(char * val)203 long parse_ns_duration(char *val)
204 {
205 char *end;
206 long t;
207
208 t = strtol(val, &end, 10);
209
210 if (end) {
211 if (!strncmp(end, "ns", 2)) {
212 return t;
213 } else if (!strncmp(end, "us", 2)) {
214 t *= 1000;
215 return t;
216 } else if (!strncmp(end, "ms", 2)) {
217 t *= 1000 * 1000;
218 return t;
219 } else if (!strncmp(end, "s", 1)) {
220 t *= 1000 * 1000 * 1000;
221 return t;
222 }
223 return -1;
224 }
225
226 return t;
227 }
228
229 /*
230 * This is a set of helper functions to use SCHED_DEADLINE.
231 */
232 #ifndef __NR_sched_setattr
233 # ifdef __x86_64__
234 # define __NR_sched_setattr 314
235 # elif __i386__
236 # define __NR_sched_setattr 351
237 # elif __arm__
238 # define __NR_sched_setattr 380
239 # elif __aarch64__ || __riscv
240 # define __NR_sched_setattr 274
241 # elif __powerpc__
242 # define __NR_sched_setattr 355
243 # elif __s390x__
244 # define __NR_sched_setattr 345
245 # elif __loongarch__
246 # define __NR_sched_setattr 274
247 # endif
248 #endif
249
250 #define SCHED_DEADLINE 6
251
syscall_sched_setattr(pid_t pid,const struct sched_attr * attr,unsigned int flags)252 static inline int syscall_sched_setattr(pid_t pid, const struct sched_attr *attr,
253 unsigned int flags) {
254 return syscall(__NR_sched_setattr, pid, attr, flags);
255 }
256
__set_sched_attr(int pid,struct sched_attr * attr)257 int __set_sched_attr(int pid, struct sched_attr *attr)
258 {
259 int flags = 0;
260 int retval;
261
262 retval = syscall_sched_setattr(pid, attr, flags);
263 if (retval < 0) {
264 err_msg("Failed to set sched attributes to the pid %d: %s\n",
265 pid, strerror(errno));
266 return 1;
267 }
268
269 return 0;
270 }
271
272 /*
273 * procfs_is_workload_pid - check if a procfs entry contains a comm_prefix* comm
274 *
275 * Check if the procfs entry is a directory of a process, and then check if the
276 * process has a comm with the prefix set in char *comm_prefix. As the
277 * current users of this function only check for kernel threads, there is no
278 * need to check for the threads for the process.
279 *
280 * Return: True if the proc_entry contains a comm file with comm_prefix*.
281 * Otherwise returns false.
282 */
procfs_is_workload_pid(const char * comm_prefix,struct dirent * proc_entry)283 static int procfs_is_workload_pid(const char *comm_prefix, struct dirent *proc_entry)
284 {
285 char buffer[MAX_PATH];
286 int comm_fd, retval;
287 char *t_name;
288
289 if (proc_entry->d_type != DT_DIR)
290 return 0;
291
292 if (*proc_entry->d_name == '.')
293 return 0;
294
295 /* check if the string is a pid */
296 for (t_name = proc_entry->d_name; t_name; t_name++) {
297 if (!isdigit(*t_name))
298 break;
299 }
300
301 if (*t_name != '\0')
302 return 0;
303
304 snprintf(buffer, MAX_PATH, "/proc/%s/comm", proc_entry->d_name);
305 comm_fd = open(buffer, O_RDONLY);
306 if (comm_fd < 0)
307 return 0;
308
309 memset(buffer, 0, MAX_PATH);
310 retval = read(comm_fd, buffer, MAX_PATH);
311
312 close(comm_fd);
313
314 if (retval <= 0)
315 return 0;
316
317 retval = strncmp(comm_prefix, buffer, strlen(comm_prefix));
318 if (retval)
319 return 0;
320
321 /* comm already have \n */
322 debug_msg("Found workload pid:%s comm:%s", proc_entry->d_name, buffer);
323
324 return 1;
325 }
326
327 /*
328 * set_comm_sched_attr - set sched params to threads starting with char *comm_prefix
329 *
330 * This function uses procfs to list the currently running threads and then set the
331 * sched_attr *attr to the threads that start with char *comm_prefix. It is
332 * mainly used to set the priority to the kernel threads created by the
333 * tracers.
334 */
set_comm_sched_attr(const char * comm_prefix,struct sched_attr * attr)335 int set_comm_sched_attr(const char *comm_prefix, struct sched_attr *attr)
336 {
337 struct dirent *proc_entry;
338 DIR *procfs;
339 int retval;
340
341 if (strlen(comm_prefix) >= MAX_PATH) {
342 err_msg("Command prefix is too long: %d < strlen(%s)\n",
343 MAX_PATH, comm_prefix);
344 return 1;
345 }
346
347 procfs = opendir("/proc");
348 if (!procfs) {
349 err_msg("Could not open procfs\n");
350 return 1;
351 }
352
353 while ((proc_entry = readdir(procfs))) {
354
355 retval = procfs_is_workload_pid(comm_prefix, proc_entry);
356 if (!retval)
357 continue;
358
359 /* procfs_is_workload_pid confirmed it is a pid */
360 retval = __set_sched_attr(atoi(proc_entry->d_name), attr);
361 if (retval) {
362 err_msg("Error setting sched attributes for pid:%s\n", proc_entry->d_name);
363 goto out_err;
364 }
365
366 debug_msg("Set sched attributes for pid:%s\n", proc_entry->d_name);
367 }
368 return 0;
369
370 out_err:
371 closedir(procfs);
372 return 1;
373 }
374
375 #define INVALID_VAL (~0L)
get_long_ns_after_colon(char * start)376 static long get_long_ns_after_colon(char *start)
377 {
378 long val = INVALID_VAL;
379
380 /* find the ":" */
381 start = strstr(start, ":");
382 if (!start)
383 return -1;
384
385 /* skip ":" */
386 start++;
387 val = parse_ns_duration(start);
388
389 return val;
390 }
391
get_long_after_colon(char * start)392 static long get_long_after_colon(char *start)
393 {
394 long val = INVALID_VAL;
395
396 /* find the ":" */
397 start = strstr(start, ":");
398 if (!start)
399 return -1;
400
401 /* skip ":" */
402 start++;
403 val = get_llong_from_str(start);
404
405 return val;
406 }
407
408 /*
409 * parse priority in the format:
410 * SCHED_OTHER:
411 * o:<prio>
412 * O:<prio>
413 * SCHED_RR:
414 * r:<prio>
415 * R:<prio>
416 * SCHED_FIFO:
417 * f:<prio>
418 * F:<prio>
419 * SCHED_DEADLINE:
420 * d:runtime:period
421 * D:runtime:period
422 */
parse_prio(char * arg,struct sched_attr * sched_param)423 int parse_prio(char *arg, struct sched_attr *sched_param)
424 {
425 long prio;
426 long runtime;
427 long period;
428
429 memset(sched_param, 0, sizeof(*sched_param));
430 sched_param->size = sizeof(*sched_param);
431
432 switch (arg[0]) {
433 case 'd':
434 case 'D':
435 /* d:runtime:period */
436 if (strlen(arg) < 4)
437 return -1;
438
439 runtime = get_long_ns_after_colon(arg);
440 if (runtime == INVALID_VAL)
441 return -1;
442
443 period = get_long_ns_after_colon(&arg[2]);
444 if (period == INVALID_VAL)
445 return -1;
446
447 if (runtime > period)
448 return -1;
449
450 sched_param->sched_policy = SCHED_DEADLINE;
451 sched_param->sched_runtime = runtime;
452 sched_param->sched_deadline = period;
453 sched_param->sched_period = period;
454 break;
455 case 'f':
456 case 'F':
457 /* f:prio */
458 prio = get_long_after_colon(arg);
459 if (prio == INVALID_VAL)
460 return -1;
461
462 if (prio < sched_get_priority_min(SCHED_FIFO))
463 return -1;
464 if (prio > sched_get_priority_max(SCHED_FIFO))
465 return -1;
466
467 sched_param->sched_policy = SCHED_FIFO;
468 sched_param->sched_priority = prio;
469 break;
470 case 'r':
471 case 'R':
472 /* r:prio */
473 prio = get_long_after_colon(arg);
474 if (prio == INVALID_VAL)
475 return -1;
476
477 if (prio < sched_get_priority_min(SCHED_RR))
478 return -1;
479 if (prio > sched_get_priority_max(SCHED_RR))
480 return -1;
481
482 sched_param->sched_policy = SCHED_RR;
483 sched_param->sched_priority = prio;
484 break;
485 case 'o':
486 case 'O':
487 /* o:prio */
488 prio = get_long_after_colon(arg);
489 if (prio == INVALID_VAL)
490 return -1;
491
492 if (prio < MIN_NICE)
493 return -1;
494 if (prio > MAX_NICE)
495 return -1;
496
497 sched_param->sched_policy = SCHED_OTHER;
498 sched_param->sched_nice = prio;
499 break;
500 default:
501 return -1;
502 }
503 return 0;
504 }
505
506 /*
507 * set_cpu_dma_latency - set the /dev/cpu_dma_latecy
508 *
509 * This is used to reduce the exit from idle latency. The value
510 * will be reset once the file descriptor of /dev/cpu_dma_latecy
511 * is closed.
512 *
513 * Return: the /dev/cpu_dma_latecy file descriptor
514 */
set_cpu_dma_latency(int32_t latency)515 int set_cpu_dma_latency(int32_t latency)
516 {
517 int retval;
518 int fd;
519
520 fd = open("/dev/cpu_dma_latency", O_RDWR);
521 if (fd < 0) {
522 err_msg("Error opening /dev/cpu_dma_latency\n");
523 return -1;
524 }
525
526 retval = write(fd, &latency, 4);
527 if (retval < 1) {
528 err_msg("Error setting /dev/cpu_dma_latency\n");
529 close(fd);
530 return -1;
531 }
532
533 debug_msg("Set /dev/cpu_dma_latency to %d\n", latency);
534
535 return fd;
536 }
537
538 #ifdef HAVE_LIBCPUPOWER_SUPPORT
539 static unsigned int **saved_cpu_idle_disable_state;
540 static size_t saved_cpu_idle_disable_state_alloc_ctr;
541
542 /*
543 * save_cpu_idle_state_disable - save disable for all idle states of a cpu
544 *
545 * Saves the current disable of all idle states of a cpu, to be subsequently
546 * restored via restore_cpu_idle_disable_state.
547 *
548 * Return: idle state count on success, negative on error
549 */
save_cpu_idle_disable_state(unsigned int cpu)550 int save_cpu_idle_disable_state(unsigned int cpu)
551 {
552 unsigned int nr_states;
553 unsigned int state;
554 int disabled;
555 int nr_cpus;
556
557 nr_states = cpuidle_state_count(cpu);
558
559 if (nr_states == 0)
560 return 0;
561
562 if (saved_cpu_idle_disable_state == NULL) {
563 nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
564 saved_cpu_idle_disable_state = calloc(nr_cpus, sizeof(unsigned int *));
565 if (!saved_cpu_idle_disable_state)
566 return -1;
567 }
568
569 saved_cpu_idle_disable_state[cpu] = calloc(nr_states, sizeof(unsigned int));
570 if (!saved_cpu_idle_disable_state[cpu])
571 return -1;
572 saved_cpu_idle_disable_state_alloc_ctr++;
573
574 for (state = 0; state < nr_states; state++) {
575 disabled = cpuidle_is_state_disabled(cpu, state);
576 if (disabled < 0)
577 return disabled;
578 saved_cpu_idle_disable_state[cpu][state] = disabled;
579 }
580
581 return nr_states;
582 }
583
584 /*
585 * restore_cpu_idle_disable_state - restore disable for all idle states of a cpu
586 *
587 * Restores the current disable state of all idle states of a cpu that was
588 * previously saved by save_cpu_idle_disable_state.
589 *
590 * Return: idle state count on success, negative on error
591 */
restore_cpu_idle_disable_state(unsigned int cpu)592 int restore_cpu_idle_disable_state(unsigned int cpu)
593 {
594 unsigned int nr_states;
595 unsigned int state;
596 int disabled;
597 int result;
598
599 nr_states = cpuidle_state_count(cpu);
600
601 if (nr_states == 0)
602 return 0;
603
604 if (!saved_cpu_idle_disable_state)
605 return -1;
606
607 for (state = 0; state < nr_states; state++) {
608 if (!saved_cpu_idle_disable_state[cpu])
609 return -1;
610 disabled = saved_cpu_idle_disable_state[cpu][state];
611 result = cpuidle_state_disable(cpu, state, disabled);
612 if (result < 0)
613 return result;
614 }
615
616 free(saved_cpu_idle_disable_state[cpu]);
617 saved_cpu_idle_disable_state[cpu] = NULL;
618 saved_cpu_idle_disable_state_alloc_ctr--;
619 if (saved_cpu_idle_disable_state_alloc_ctr == 0) {
620 free(saved_cpu_idle_disable_state);
621 saved_cpu_idle_disable_state = NULL;
622 }
623
624 return nr_states;
625 }
626
627 /*
628 * free_cpu_idle_disable_states - free saved idle state disable for all cpus
629 *
630 * Frees the memory used for storing cpu idle state disable for all cpus
631 * and states.
632 *
633 * Normally, the memory is freed automatically in
634 * restore_cpu_idle_disable_state; this is mostly for cleaning up after an
635 * error.
636 */
free_cpu_idle_disable_states(void)637 void free_cpu_idle_disable_states(void)
638 {
639 int cpu;
640 int nr_cpus;
641
642 if (!saved_cpu_idle_disable_state)
643 return;
644
645 nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
646
647 for (cpu = 0; cpu < nr_cpus; cpu++) {
648 free(saved_cpu_idle_disable_state[cpu]);
649 saved_cpu_idle_disable_state[cpu] = NULL;
650 }
651
652 free(saved_cpu_idle_disable_state);
653 saved_cpu_idle_disable_state = NULL;
654 }
655
656 /*
657 * set_deepest_cpu_idle_state - limit idle state of cpu
658 *
659 * Disables all idle states deeper than the one given in
660 * deepest_state (assuming states with higher number are deeper).
661 *
662 * This is used to reduce the exit from idle latency. Unlike
663 * set_cpu_dma_latency, it can disable idle states per cpu.
664 *
665 * Return: idle state count on success, negative on error
666 */
set_deepest_cpu_idle_state(unsigned int cpu,unsigned int deepest_state)667 int set_deepest_cpu_idle_state(unsigned int cpu, unsigned int deepest_state)
668 {
669 unsigned int nr_states;
670 unsigned int state;
671 int result;
672
673 nr_states = cpuidle_state_count(cpu);
674
675 for (state = deepest_state + 1; state < nr_states; state++) {
676 result = cpuidle_state_disable(cpu, state, 1);
677 if (result < 0)
678 return result;
679 }
680
681 return nr_states;
682 }
683 #endif /* HAVE_LIBCPUPOWER_SUPPORT */
684
685 #define _STR(x) #x
686 #define STR(x) _STR(x)
687
688 /*
689 * find_mount - find a the mount point of a given fs
690 *
691 * Returns 0 if mount is not found, otherwise return 1 and fill mp
692 * with the mount point.
693 */
find_mount(const char * fs,char * mp,int sizeof_mp)694 static const int find_mount(const char *fs, char *mp, int sizeof_mp)
695 {
696 char mount_point[MAX_PATH+1];
697 char type[100];
698 int found = 0;
699 FILE *fp;
700
701 fp = fopen("/proc/mounts", "r");
702 if (!fp)
703 return 0;
704
705 while (fscanf(fp, "%*s %" STR(MAX_PATH) "s %99s %*s %*d %*d\n", mount_point, type) == 2) {
706 if (strcmp(type, fs) == 0) {
707 found = 1;
708 break;
709 }
710 }
711 fclose(fp);
712
713 if (!found)
714 return 0;
715
716 memset(mp, 0, sizeof_mp);
717 strncpy(mp, mount_point, sizeof_mp - 1);
718
719 debug_msg("Fs %s found at %s\n", fs, mp);
720 return 1;
721 }
722
723 /*
724 * get_self_cgroup - get the current thread cgroup path
725 *
726 * Parse /proc/$$/cgroup file to get the thread's cgroup. As an example of line to parse:
727 *
728 * 0::/user.slice/user-0.slice/session-3.scope'\n'
729 *
730 * This function is interested in the content after the second : and before the '\n'.
731 *
732 * Returns 1 if a string was found, 0 otherwise.
733 */
get_self_cgroup(char * self_cg,int sizeof_self_cg)734 static int get_self_cgroup(char *self_cg, int sizeof_self_cg)
735 {
736 char path[MAX_PATH], *start;
737 int fd, retval;
738
739 snprintf(path, MAX_PATH, "/proc/%d/cgroup", getpid());
740
741 fd = open(path, O_RDONLY);
742 if (fd < 0)
743 return 0;
744
745 retval = read(fd, path, MAX_PATH);
746
747 close(fd);
748
749 if (retval <= 0)
750 return 0;
751
752 start = path;
753
754 start = strstr(start, ":");
755 if (!start)
756 return 0;
757
758 /* skip ":" */
759 start++;
760
761 start = strstr(start, ":");
762 if (!start)
763 return 0;
764
765 /* skip ":" */
766 start++;
767
768 if (strlen(start) >= sizeof_self_cg)
769 return 0;
770
771 snprintf(self_cg, sizeof_self_cg, "%s", start);
772
773 /* Swap '\n' with '\0' */
774 start = strstr(self_cg, "\n");
775
776 /* there must be '\n' */
777 if (!start)
778 return 0;
779
780 /* ok, it found a string after the second : and before the \n */
781 *start = '\0';
782
783 return 1;
784 }
785
786 /*
787 * set_comm_cgroup - Set cgroup to pid_t pid
788 *
789 * If cgroup argument is not NULL, the threads will move to the given cgroup.
790 * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used.
791 *
792 * Supports cgroup v2.
793 *
794 * Returns 1 on success, 0 otherwise.
795 */
set_pid_cgroup(pid_t pid,const char * cgroup)796 int set_pid_cgroup(pid_t pid, const char *cgroup)
797 {
798 char cgroup_path[MAX_PATH - strlen("/cgroup.procs")];
799 char cgroup_procs[MAX_PATH];
800 char pid_str[24];
801 int retval;
802 int cg_fd;
803
804 retval = find_mount("cgroup2", cgroup_path, sizeof(cgroup_path));
805 if (!retval) {
806 err_msg("Did not find cgroupv2 mount point\n");
807 return 0;
808 }
809
810 if (!cgroup) {
811 retval = get_self_cgroup(&cgroup_path[strlen(cgroup_path)],
812 sizeof(cgroup_path) - strlen(cgroup_path));
813 if (!retval) {
814 err_msg("Did not find self cgroup\n");
815 return 0;
816 }
817 } else {
818 snprintf(&cgroup_path[strlen(cgroup_path)],
819 sizeof(cgroup_path) - strlen(cgroup_path), "%s/", cgroup);
820 }
821
822 snprintf(cgroup_procs, MAX_PATH, "%s/cgroup.procs", cgroup_path);
823
824 debug_msg("Using cgroup path at: %s\n", cgroup_procs);
825
826 cg_fd = open(cgroup_procs, O_RDWR);
827 if (cg_fd < 0)
828 return 0;
829
830 snprintf(pid_str, sizeof(pid_str), "%d\n", pid);
831
832 retval = write(cg_fd, pid_str, strlen(pid_str));
833 if (retval < 0)
834 err_msg("Error setting cgroup attributes for pid:%s - %s\n",
835 pid_str, strerror(errno));
836 else
837 debug_msg("Set cgroup attributes for pid:%s\n", pid_str);
838
839 close(cg_fd);
840
841 return (retval >= 0);
842 }
843
844 /**
845 * set_comm_cgroup - Set cgroup to threads starting with char *comm_prefix
846 *
847 * If cgroup argument is not NULL, the threads will move to the given cgroup.
848 * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used.
849 *
850 * Supports cgroup v2.
851 *
852 * Returns 1 on success, 0 otherwise.
853 */
set_comm_cgroup(const char * comm_prefix,const char * cgroup)854 int set_comm_cgroup(const char *comm_prefix, const char *cgroup)
855 {
856 char cgroup_path[MAX_PATH - strlen("/cgroup.procs")];
857 char cgroup_procs[MAX_PATH];
858 struct dirent *proc_entry;
859 DIR *procfs;
860 int retval;
861 int cg_fd;
862
863 if (strlen(comm_prefix) >= MAX_PATH) {
864 err_msg("Command prefix is too long: %d < strlen(%s)\n",
865 MAX_PATH, comm_prefix);
866 return 0;
867 }
868
869 retval = find_mount("cgroup2", cgroup_path, sizeof(cgroup_path));
870 if (!retval) {
871 err_msg("Did not find cgroupv2 mount point\n");
872 return 0;
873 }
874
875 if (!cgroup) {
876 retval = get_self_cgroup(&cgroup_path[strlen(cgroup_path)],
877 sizeof(cgroup_path) - strlen(cgroup_path));
878 if (!retval) {
879 err_msg("Did not find self cgroup\n");
880 return 0;
881 }
882 } else {
883 snprintf(&cgroup_path[strlen(cgroup_path)],
884 sizeof(cgroup_path) - strlen(cgroup_path), "%s/", cgroup);
885 }
886
887 snprintf(cgroup_procs, MAX_PATH, "%s/cgroup.procs", cgroup_path);
888
889 debug_msg("Using cgroup path at: %s\n", cgroup_procs);
890
891 cg_fd = open(cgroup_procs, O_RDWR);
892 if (cg_fd < 0)
893 return 0;
894
895 procfs = opendir("/proc");
896 if (!procfs) {
897 err_msg("Could not open procfs\n");
898 goto out_cg;
899 }
900
901 while ((proc_entry = readdir(procfs))) {
902
903 retval = procfs_is_workload_pid(comm_prefix, proc_entry);
904 if (!retval)
905 continue;
906
907 retval = write(cg_fd, proc_entry->d_name, strlen(proc_entry->d_name));
908 if (retval < 0) {
909 err_msg("Error setting cgroup attributes for pid:%s - %s\n",
910 proc_entry->d_name, strerror(errno));
911 goto out_procfs;
912 }
913
914 debug_msg("Set cgroup attributes for pid:%s\n", proc_entry->d_name);
915 }
916
917 closedir(procfs);
918 close(cg_fd);
919 return 1;
920
921 out_procfs:
922 closedir(procfs);
923 out_cg:
924 close(cg_fd);
925 return 0;
926 }
927
928 /**
929 * auto_house_keeping - Automatically move rtla out of measurement threads
930 *
931 * Try to move rtla away from the tracer, if possible.
932 *
933 * Returns 1 on success, 0 otherwise.
934 */
auto_house_keeping(cpu_set_t * monitored_cpus)935 int auto_house_keeping(cpu_set_t *monitored_cpus)
936 {
937 cpu_set_t rtla_cpus, house_keeping_cpus;
938 int retval;
939
940 /* first get the CPUs in which rtla can actually run. */
941 retval = sched_getaffinity(getpid(), sizeof(rtla_cpus), &rtla_cpus);
942 if (retval == -1) {
943 debug_msg("Could not get rtla affinity, rtla might run with the threads!\n");
944 return 0;
945 }
946
947 /* then check if the existing setup is already good. */
948 CPU_AND(&house_keeping_cpus, &rtla_cpus, monitored_cpus);
949 if (!CPU_COUNT(&house_keeping_cpus)) {
950 debug_msg("rtla and the monitored CPUs do not share CPUs.");
951 debug_msg("Skipping auto house-keeping\n");
952 return 1;
953 }
954
955 /* remove the intersection */
956 CPU_XOR(&house_keeping_cpus, &rtla_cpus, monitored_cpus);
957
958 /* get only those that rtla can run */
959 CPU_AND(&house_keeping_cpus, &house_keeping_cpus, &rtla_cpus);
960
961 /* is there any cpu left? */
962 if (!CPU_COUNT(&house_keeping_cpus)) {
963 debug_msg("Could not find any CPU for auto house-keeping\n");
964 return 0;
965 }
966
967 retval = sched_setaffinity(getpid(), sizeof(house_keeping_cpus), &house_keeping_cpus);
968 if (retval == -1) {
969 debug_msg("Could not set affinity for auto house-keeping\n");
970 return 0;
971 }
972
973 debug_msg("rtla automatically moved to an auto house-keeping cpu set\n");
974
975 return 1;
976 }
977
978 /**
979 * parse_optional_arg - Parse optional argument value
980 *
981 * Parse optional argument value, which can be in the form of:
982 * -sarg, -s/--long=arg, -s/--long arg
983 *
984 * Returns arg value if found, NULL otherwise.
985 */
parse_optional_arg(int argc,char ** argv)986 char *parse_optional_arg(int argc, char **argv)
987 {
988 if (optarg) {
989 if (optarg[0] == '=') {
990 /* skip the = */
991 return &optarg[1];
992 } else {
993 return optarg;
994 }
995 /* parse argument of form -s [arg] and --long [arg]*/
996 } else if (optind < argc && argv[optind][0] != '-') {
997 /* consume optind */
998 return argv[optind++];
999 } else {
1000 return NULL;
1001 }
1002 }
1003