1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
4 */
5
6 #define _GNU_SOURCE
7 #ifdef HAVE_LIBCPUPOWER_SUPPORT
8 #include <cpuidle.h>
9 #endif /* HAVE_LIBCPUPOWER_SUPPORT */
10 #include <dirent.h>
11 #include <stdarg.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <unistd.h>
15 #include <ctype.h>
16 #include <errno.h>
17 #include <fcntl.h>
18 #include <sched.h>
19 #include <stdio.h>
20
21 #include "utils.h"
22
23 #define MAX_MSG_LENGTH 1024
24 int config_debug;
25
26 /*
27 * err_msg - print an error message to the stderr
28 */
err_msg(const char * fmt,...)29 void err_msg(const char *fmt, ...)
30 {
31 char message[MAX_MSG_LENGTH];
32 va_list ap;
33
34 va_start(ap, fmt);
35 vsnprintf(message, sizeof(message), fmt, ap);
36 va_end(ap);
37
38 fprintf(stderr, "%s", message);
39 }
40
41 /*
42 * debug_msg - print a debug message to stderr if debug is set
43 */
debug_msg(const char * fmt,...)44 void debug_msg(const char *fmt, ...)
45 {
46 char message[MAX_MSG_LENGTH];
47 va_list ap;
48
49 if (!config_debug)
50 return;
51
52 va_start(ap, fmt);
53 vsnprintf(message, sizeof(message), fmt, ap);
54 va_end(ap);
55
56 fprintf(stderr, "%s", message);
57 }
58
59 /*
60 * get_llong_from_str - get a long long int from a string
61 */
get_llong_from_str(char * start)62 long long get_llong_from_str(char *start)
63 {
64 long long value;
65 char *end;
66
67 errno = 0;
68 value = strtoll(start, &end, 10);
69 if (errno || start == end)
70 return -1;
71
72 return value;
73 }
74
75 /*
76 * get_duration - fill output with a human readable duration since start_time
77 */
get_duration(time_t start_time,char * output,int output_size)78 void get_duration(time_t start_time, char *output, int output_size)
79 {
80 time_t now = time(NULL);
81 struct tm *tm_info;
82 time_t duration;
83
84 duration = difftime(now, start_time);
85 tm_info = gmtime(&duration);
86
87 snprintf(output, output_size, "%3d %02d:%02d:%02d",
88 tm_info->tm_yday,
89 tm_info->tm_hour,
90 tm_info->tm_min,
91 tm_info->tm_sec);
92 }
93
94 /*
95 * parse_cpu_set - parse a cpu_list filling cpu_set_t argument
96 *
97 * Receives a cpu list, like 1-3,5 (cpus 1, 2, 3, 5), and then set
98 * filling cpu_set_t argument.
99 *
100 * Returns 1 on success, 0 otherwise.
101 */
parse_cpu_set(char * cpu_list,cpu_set_t * set)102 int parse_cpu_set(char *cpu_list, cpu_set_t *set)
103 {
104 const char *p;
105 int end_cpu;
106 int nr_cpus;
107 int cpu;
108 int i;
109
110 CPU_ZERO(set);
111
112 nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
113
114 for (p = cpu_list; *p; ) {
115 cpu = atoi(p);
116 if (cpu < 0 || (!cpu && *p != '0') || cpu >= nr_cpus)
117 goto err;
118
119 while (isdigit(*p))
120 p++;
121 if (*p == '-') {
122 p++;
123 end_cpu = atoi(p);
124 if (end_cpu < cpu || (!end_cpu && *p != '0') || end_cpu >= nr_cpus)
125 goto err;
126 while (isdigit(*p))
127 p++;
128 } else
129 end_cpu = cpu;
130
131 if (cpu == end_cpu) {
132 debug_msg("cpu_set: adding cpu %d\n", cpu);
133 CPU_SET(cpu, set);
134 } else {
135 for (i = cpu; i <= end_cpu; i++) {
136 debug_msg("cpu_set: adding cpu %d\n", i);
137 CPU_SET(i, set);
138 }
139 }
140
141 if (*p == ',')
142 p++;
143 }
144
145 return 0;
146 err:
147 debug_msg("Error parsing the cpu set %s\n", cpu_list);
148 return 1;
149 }
150
151 /*
152 * parse_duration - parse duration with s/m/h/d suffix converting it to seconds
153 */
parse_seconds_duration(char * val)154 long parse_seconds_duration(char *val)
155 {
156 char *end;
157 long t;
158
159 t = strtol(val, &end, 10);
160
161 if (end) {
162 switch (*end) {
163 case 's':
164 case 'S':
165 break;
166 case 'm':
167 case 'M':
168 t *= 60;
169 break;
170 case 'h':
171 case 'H':
172 t *= 60 * 60;
173 break;
174
175 case 'd':
176 case 'D':
177 t *= 24 * 60 * 60;
178 break;
179 }
180 }
181
182 return t;
183 }
184
185 /*
186 * parse_ns_duration - parse duration with ns/us/ms/s converting it to nanoseconds
187 */
parse_ns_duration(char * val)188 long parse_ns_duration(char *val)
189 {
190 char *end;
191 long t;
192
193 t = strtol(val, &end, 10);
194
195 if (end) {
196 if (!strncmp(end, "ns", 2)) {
197 return t;
198 } else if (!strncmp(end, "us", 2)) {
199 t *= 1000;
200 return t;
201 } else if (!strncmp(end, "ms", 2)) {
202 t *= 1000 * 1000;
203 return t;
204 } else if (!strncmp(end, "s", 1)) {
205 t *= 1000 * 1000 * 1000;
206 return t;
207 }
208 return -1;
209 }
210
211 return t;
212 }
213
214 /*
215 * This is a set of helper functions to use SCHED_DEADLINE.
216 */
217 #ifndef __NR_sched_setattr
218 # ifdef __x86_64__
219 # define __NR_sched_setattr 314
220 # elif __i386__
221 # define __NR_sched_setattr 351
222 # elif __arm__
223 # define __NR_sched_setattr 380
224 # elif __aarch64__ || __riscv
225 # define __NR_sched_setattr 274
226 # elif __powerpc__
227 # define __NR_sched_setattr 355
228 # elif __s390x__
229 # define __NR_sched_setattr 345
230 # elif __loongarch__
231 # define __NR_sched_setattr 274
232 # endif
233 #endif
234
235 #define SCHED_DEADLINE 6
236
syscall_sched_setattr(pid_t pid,const struct sched_attr * attr,unsigned int flags)237 static inline int syscall_sched_setattr(pid_t pid, const struct sched_attr *attr,
238 unsigned int flags) {
239 return syscall(__NR_sched_setattr, pid, attr, flags);
240 }
241
__set_sched_attr(int pid,struct sched_attr * attr)242 int __set_sched_attr(int pid, struct sched_attr *attr)
243 {
244 int flags = 0;
245 int retval;
246
247 retval = syscall_sched_setattr(pid, attr, flags);
248 if (retval < 0) {
249 err_msg("Failed to set sched attributes to the pid %d: %s\n",
250 pid, strerror(errno));
251 return 1;
252 }
253
254 return 0;
255 }
256
257 /*
258 * procfs_is_workload_pid - check if a procfs entry contains a comm_prefix* comm
259 *
260 * Check if the procfs entry is a directory of a process, and then check if the
261 * process has a comm with the prefix set in char *comm_prefix. As the
262 * current users of this function only check for kernel threads, there is no
263 * need to check for the threads for the process.
264 *
265 * Return: True if the proc_entry contains a comm file with comm_prefix*.
266 * Otherwise returns false.
267 */
procfs_is_workload_pid(const char * comm_prefix,struct dirent * proc_entry)268 static int procfs_is_workload_pid(const char *comm_prefix, struct dirent *proc_entry)
269 {
270 char buffer[MAX_PATH];
271 int comm_fd, retval;
272 char *t_name;
273
274 if (proc_entry->d_type != DT_DIR)
275 return 0;
276
277 if (*proc_entry->d_name == '.')
278 return 0;
279
280 /* check if the string is a pid */
281 for (t_name = proc_entry->d_name; t_name; t_name++) {
282 if (!isdigit(*t_name))
283 break;
284 }
285
286 if (*t_name != '\0')
287 return 0;
288
289 snprintf(buffer, MAX_PATH, "/proc/%s/comm", proc_entry->d_name);
290 comm_fd = open(buffer, O_RDONLY);
291 if (comm_fd < 0)
292 return 0;
293
294 memset(buffer, 0, MAX_PATH);
295 retval = read(comm_fd, buffer, MAX_PATH);
296
297 close(comm_fd);
298
299 if (retval <= 0)
300 return 0;
301
302 retval = strncmp(comm_prefix, buffer, strlen(comm_prefix));
303 if (retval)
304 return 0;
305
306 /* comm already have \n */
307 debug_msg("Found workload pid:%s comm:%s", proc_entry->d_name, buffer);
308
309 return 1;
310 }
311
312 /*
313 * set_comm_sched_attr - set sched params to threads starting with char *comm_prefix
314 *
315 * This function uses procfs to list the currently running threads and then set the
316 * sched_attr *attr to the threads that start with char *comm_prefix. It is
317 * mainly used to set the priority to the kernel threads created by the
318 * tracers.
319 */
set_comm_sched_attr(const char * comm_prefix,struct sched_attr * attr)320 int set_comm_sched_attr(const char *comm_prefix, struct sched_attr *attr)
321 {
322 struct dirent *proc_entry;
323 DIR *procfs;
324 int retval;
325
326 if (strlen(comm_prefix) >= MAX_PATH) {
327 err_msg("Command prefix is too long: %d < strlen(%s)\n",
328 MAX_PATH, comm_prefix);
329 return 1;
330 }
331
332 procfs = opendir("/proc");
333 if (!procfs) {
334 err_msg("Could not open procfs\n");
335 return 1;
336 }
337
338 while ((proc_entry = readdir(procfs))) {
339
340 retval = procfs_is_workload_pid(comm_prefix, proc_entry);
341 if (!retval)
342 continue;
343
344 /* procfs_is_workload_pid confirmed it is a pid */
345 retval = __set_sched_attr(atoi(proc_entry->d_name), attr);
346 if (retval) {
347 err_msg("Error setting sched attributes for pid:%s\n", proc_entry->d_name);
348 goto out_err;
349 }
350
351 debug_msg("Set sched attributes for pid:%s\n", proc_entry->d_name);
352 }
353 return 0;
354
355 out_err:
356 closedir(procfs);
357 return 1;
358 }
359
360 #define INVALID_VAL (~0L)
get_long_ns_after_colon(char * start)361 static long get_long_ns_after_colon(char *start)
362 {
363 long val = INVALID_VAL;
364
365 /* find the ":" */
366 start = strstr(start, ":");
367 if (!start)
368 return -1;
369
370 /* skip ":" */
371 start++;
372 val = parse_ns_duration(start);
373
374 return val;
375 }
376
get_long_after_colon(char * start)377 static long get_long_after_colon(char *start)
378 {
379 long val = INVALID_VAL;
380
381 /* find the ":" */
382 start = strstr(start, ":");
383 if (!start)
384 return -1;
385
386 /* skip ":" */
387 start++;
388 val = get_llong_from_str(start);
389
390 return val;
391 }
392
393 /*
394 * parse priority in the format:
395 * SCHED_OTHER:
396 * o:<prio>
397 * O:<prio>
398 * SCHED_RR:
399 * r:<prio>
400 * R:<prio>
401 * SCHED_FIFO:
402 * f:<prio>
403 * F:<prio>
404 * SCHED_DEADLINE:
405 * d:runtime:period
406 * D:runtime:period
407 */
parse_prio(char * arg,struct sched_attr * sched_param)408 int parse_prio(char *arg, struct sched_attr *sched_param)
409 {
410 long prio;
411 long runtime;
412 long period;
413
414 memset(sched_param, 0, sizeof(*sched_param));
415 sched_param->size = sizeof(*sched_param);
416
417 switch (arg[0]) {
418 case 'd':
419 case 'D':
420 /* d:runtime:period */
421 if (strlen(arg) < 4)
422 return -1;
423
424 runtime = get_long_ns_after_colon(arg);
425 if (runtime == INVALID_VAL)
426 return -1;
427
428 period = get_long_ns_after_colon(&arg[2]);
429 if (period == INVALID_VAL)
430 return -1;
431
432 if (runtime > period)
433 return -1;
434
435 sched_param->sched_policy = SCHED_DEADLINE;
436 sched_param->sched_runtime = runtime;
437 sched_param->sched_deadline = period;
438 sched_param->sched_period = period;
439 break;
440 case 'f':
441 case 'F':
442 /* f:prio */
443 prio = get_long_after_colon(arg);
444 if (prio == INVALID_VAL)
445 return -1;
446
447 if (prio < sched_get_priority_min(SCHED_FIFO))
448 return -1;
449 if (prio > sched_get_priority_max(SCHED_FIFO))
450 return -1;
451
452 sched_param->sched_policy = SCHED_FIFO;
453 sched_param->sched_priority = prio;
454 break;
455 case 'r':
456 case 'R':
457 /* r:prio */
458 prio = get_long_after_colon(arg);
459 if (prio == INVALID_VAL)
460 return -1;
461
462 if (prio < sched_get_priority_min(SCHED_RR))
463 return -1;
464 if (prio > sched_get_priority_max(SCHED_RR))
465 return -1;
466
467 sched_param->sched_policy = SCHED_RR;
468 sched_param->sched_priority = prio;
469 break;
470 case 'o':
471 case 'O':
472 /* o:prio */
473 prio = get_long_after_colon(arg);
474 if (prio == INVALID_VAL)
475 return -1;
476
477 if (prio < MIN_NICE)
478 return -1;
479 if (prio > MAX_NICE)
480 return -1;
481
482 sched_param->sched_policy = SCHED_OTHER;
483 sched_param->sched_nice = prio;
484 break;
485 default:
486 return -1;
487 }
488 return 0;
489 }
490
491 /*
492 * set_cpu_dma_latency - set the /dev/cpu_dma_latecy
493 *
494 * This is used to reduce the exit from idle latency. The value
495 * will be reset once the file descriptor of /dev/cpu_dma_latecy
496 * is closed.
497 *
498 * Return: the /dev/cpu_dma_latecy file descriptor
499 */
set_cpu_dma_latency(int32_t latency)500 int set_cpu_dma_latency(int32_t latency)
501 {
502 int retval;
503 int fd;
504
505 fd = open("/dev/cpu_dma_latency", O_RDWR);
506 if (fd < 0) {
507 err_msg("Error opening /dev/cpu_dma_latency\n");
508 return -1;
509 }
510
511 retval = write(fd, &latency, 4);
512 if (retval < 1) {
513 err_msg("Error setting /dev/cpu_dma_latency\n");
514 close(fd);
515 return -1;
516 }
517
518 debug_msg("Set /dev/cpu_dma_latency to %d\n", latency);
519
520 return fd;
521 }
522
523 #ifdef HAVE_LIBCPUPOWER_SUPPORT
524 static unsigned int **saved_cpu_idle_disable_state;
525 static size_t saved_cpu_idle_disable_state_alloc_ctr;
526
527 /*
528 * save_cpu_idle_state_disable - save disable for all idle states of a cpu
529 *
530 * Saves the current disable of all idle states of a cpu, to be subsequently
531 * restored via restore_cpu_idle_disable_state.
532 *
533 * Return: idle state count on success, negative on error
534 */
save_cpu_idle_disable_state(unsigned int cpu)535 int save_cpu_idle_disable_state(unsigned int cpu)
536 {
537 unsigned int nr_states;
538 unsigned int state;
539 int disabled;
540 int nr_cpus;
541
542 nr_states = cpuidle_state_count(cpu);
543
544 if (nr_states == 0)
545 return 0;
546
547 if (saved_cpu_idle_disable_state == NULL) {
548 nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
549 saved_cpu_idle_disable_state = calloc(nr_cpus, sizeof(unsigned int *));
550 if (!saved_cpu_idle_disable_state)
551 return -1;
552 }
553
554 saved_cpu_idle_disable_state[cpu] = calloc(nr_states, sizeof(unsigned int));
555 if (!saved_cpu_idle_disable_state[cpu])
556 return -1;
557 saved_cpu_idle_disable_state_alloc_ctr++;
558
559 for (state = 0; state < nr_states; state++) {
560 disabled = cpuidle_is_state_disabled(cpu, state);
561 if (disabled < 0)
562 return disabled;
563 saved_cpu_idle_disable_state[cpu][state] = disabled;
564 }
565
566 return nr_states;
567 }
568
569 /*
570 * restore_cpu_idle_disable_state - restore disable for all idle states of a cpu
571 *
572 * Restores the current disable state of all idle states of a cpu that was
573 * previously saved by save_cpu_idle_disable_state.
574 *
575 * Return: idle state count on success, negative on error
576 */
restore_cpu_idle_disable_state(unsigned int cpu)577 int restore_cpu_idle_disable_state(unsigned int cpu)
578 {
579 unsigned int nr_states;
580 unsigned int state;
581 int disabled;
582 int result;
583
584 nr_states = cpuidle_state_count(cpu);
585
586 if (nr_states == 0)
587 return 0;
588
589 if (!saved_cpu_idle_disable_state)
590 return -1;
591
592 for (state = 0; state < nr_states; state++) {
593 if (!saved_cpu_idle_disable_state[cpu])
594 return -1;
595 disabled = saved_cpu_idle_disable_state[cpu][state];
596 result = cpuidle_state_disable(cpu, state, disabled);
597 if (result < 0)
598 return result;
599 }
600
601 free(saved_cpu_idle_disable_state[cpu]);
602 saved_cpu_idle_disable_state[cpu] = NULL;
603 saved_cpu_idle_disable_state_alloc_ctr--;
604 if (saved_cpu_idle_disable_state_alloc_ctr == 0) {
605 free(saved_cpu_idle_disable_state);
606 saved_cpu_idle_disable_state = NULL;
607 }
608
609 return nr_states;
610 }
611
612 /*
613 * free_cpu_idle_disable_states - free saved idle state disable for all cpus
614 *
615 * Frees the memory used for storing cpu idle state disable for all cpus
616 * and states.
617 *
618 * Normally, the memory is freed automatically in
619 * restore_cpu_idle_disable_state; this is mostly for cleaning up after an
620 * error.
621 */
free_cpu_idle_disable_states(void)622 void free_cpu_idle_disable_states(void)
623 {
624 int cpu;
625 int nr_cpus;
626
627 if (!saved_cpu_idle_disable_state)
628 return;
629
630 nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
631
632 for (cpu = 0; cpu < nr_cpus; cpu++) {
633 free(saved_cpu_idle_disable_state[cpu]);
634 saved_cpu_idle_disable_state[cpu] = NULL;
635 }
636
637 free(saved_cpu_idle_disable_state);
638 saved_cpu_idle_disable_state = NULL;
639 }
640
641 /*
642 * set_deepest_cpu_idle_state - limit idle state of cpu
643 *
644 * Disables all idle states deeper than the one given in
645 * deepest_state (assuming states with higher number are deeper).
646 *
647 * This is used to reduce the exit from idle latency. Unlike
648 * set_cpu_dma_latency, it can disable idle states per cpu.
649 *
650 * Return: idle state count on success, negative on error
651 */
set_deepest_cpu_idle_state(unsigned int cpu,unsigned int deepest_state)652 int set_deepest_cpu_idle_state(unsigned int cpu, unsigned int deepest_state)
653 {
654 unsigned int nr_states;
655 unsigned int state;
656 int result;
657
658 nr_states = cpuidle_state_count(cpu);
659
660 for (state = deepest_state + 1; state < nr_states; state++) {
661 result = cpuidle_state_disable(cpu, state, 1);
662 if (result < 0)
663 return result;
664 }
665
666 return nr_states;
667 }
668 #endif /* HAVE_LIBCPUPOWER_SUPPORT */
669
670 #define _STR(x) #x
671 #define STR(x) _STR(x)
672
673 /*
674 * find_mount - find a the mount point of a given fs
675 *
676 * Returns 0 if mount is not found, otherwise return 1 and fill mp
677 * with the mount point.
678 */
find_mount(const char * fs,char * mp,int sizeof_mp)679 static const int find_mount(const char *fs, char *mp, int sizeof_mp)
680 {
681 char mount_point[MAX_PATH+1];
682 char type[100];
683 int found = 0;
684 FILE *fp;
685
686 fp = fopen("/proc/mounts", "r");
687 if (!fp)
688 return 0;
689
690 while (fscanf(fp, "%*s %" STR(MAX_PATH) "s %99s %*s %*d %*d\n", mount_point, type) == 2) {
691 if (strcmp(type, fs) == 0) {
692 found = 1;
693 break;
694 }
695 }
696 fclose(fp);
697
698 if (!found)
699 return 0;
700
701 memset(mp, 0, sizeof_mp);
702 strncpy(mp, mount_point, sizeof_mp - 1);
703
704 debug_msg("Fs %s found at %s\n", fs, mp);
705 return 1;
706 }
707
708 /*
709 * get_self_cgroup - get the current thread cgroup path
710 *
711 * Parse /proc/$$/cgroup file to get the thread's cgroup. As an example of line to parse:
712 *
713 * 0::/user.slice/user-0.slice/session-3.scope'\n'
714 *
715 * This function is interested in the content after the second : and before the '\n'.
716 *
717 * Returns 1 if a string was found, 0 otherwise.
718 */
get_self_cgroup(char * self_cg,int sizeof_self_cg)719 static int get_self_cgroup(char *self_cg, int sizeof_self_cg)
720 {
721 char path[MAX_PATH], *start;
722 int fd, retval;
723
724 snprintf(path, MAX_PATH, "/proc/%d/cgroup", getpid());
725
726 fd = open(path, O_RDONLY);
727 if (fd < 0)
728 return 0;
729
730 retval = read(fd, path, MAX_PATH);
731
732 close(fd);
733
734 if (retval <= 0)
735 return 0;
736
737 start = path;
738
739 start = strstr(start, ":");
740 if (!start)
741 return 0;
742
743 /* skip ":" */
744 start++;
745
746 start = strstr(start, ":");
747 if (!start)
748 return 0;
749
750 /* skip ":" */
751 start++;
752
753 if (strlen(start) >= sizeof_self_cg)
754 return 0;
755
756 snprintf(self_cg, sizeof_self_cg, "%s", start);
757
758 /* Swap '\n' with '\0' */
759 start = strstr(self_cg, "\n");
760
761 /* there must be '\n' */
762 if (!start)
763 return 0;
764
765 /* ok, it found a string after the second : and before the \n */
766 *start = '\0';
767
768 return 1;
769 }
770
771 /*
772 * set_comm_cgroup - Set cgroup to pid_t pid
773 *
774 * If cgroup argument is not NULL, the threads will move to the given cgroup.
775 * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used.
776 *
777 * Supports cgroup v2.
778 *
779 * Returns 1 on success, 0 otherwise.
780 */
set_pid_cgroup(pid_t pid,const char * cgroup)781 int set_pid_cgroup(pid_t pid, const char *cgroup)
782 {
783 char cgroup_path[MAX_PATH - strlen("/cgroup.procs")];
784 char cgroup_procs[MAX_PATH];
785 char pid_str[24];
786 int retval;
787 int cg_fd;
788
789 retval = find_mount("cgroup2", cgroup_path, sizeof(cgroup_path));
790 if (!retval) {
791 err_msg("Did not find cgroupv2 mount point\n");
792 return 0;
793 }
794
795 if (!cgroup) {
796 retval = get_self_cgroup(&cgroup_path[strlen(cgroup_path)],
797 sizeof(cgroup_path) - strlen(cgroup_path));
798 if (!retval) {
799 err_msg("Did not find self cgroup\n");
800 return 0;
801 }
802 } else {
803 snprintf(&cgroup_path[strlen(cgroup_path)],
804 sizeof(cgroup_path) - strlen(cgroup_path), "%s/", cgroup);
805 }
806
807 snprintf(cgroup_procs, MAX_PATH, "%s/cgroup.procs", cgroup_path);
808
809 debug_msg("Using cgroup path at: %s\n", cgroup_procs);
810
811 cg_fd = open(cgroup_procs, O_RDWR);
812 if (cg_fd < 0)
813 return 0;
814
815 snprintf(pid_str, sizeof(pid_str), "%d\n", pid);
816
817 retval = write(cg_fd, pid_str, strlen(pid_str));
818 if (retval < 0)
819 err_msg("Error setting cgroup attributes for pid:%s - %s\n",
820 pid_str, strerror(errno));
821 else
822 debug_msg("Set cgroup attributes for pid:%s\n", pid_str);
823
824 close(cg_fd);
825
826 return (retval >= 0);
827 }
828
829 /**
830 * set_comm_cgroup - Set cgroup to threads starting with char *comm_prefix
831 *
832 * If cgroup argument is not NULL, the threads will move to the given cgroup.
833 * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used.
834 *
835 * Supports cgroup v2.
836 *
837 * Returns 1 on success, 0 otherwise.
838 */
set_comm_cgroup(const char * comm_prefix,const char * cgroup)839 int set_comm_cgroup(const char *comm_prefix, const char *cgroup)
840 {
841 char cgroup_path[MAX_PATH - strlen("/cgroup.procs")];
842 char cgroup_procs[MAX_PATH];
843 struct dirent *proc_entry;
844 DIR *procfs;
845 int retval;
846 int cg_fd;
847
848 if (strlen(comm_prefix) >= MAX_PATH) {
849 err_msg("Command prefix is too long: %d < strlen(%s)\n",
850 MAX_PATH, comm_prefix);
851 return 0;
852 }
853
854 retval = find_mount("cgroup2", cgroup_path, sizeof(cgroup_path));
855 if (!retval) {
856 err_msg("Did not find cgroupv2 mount point\n");
857 return 0;
858 }
859
860 if (!cgroup) {
861 retval = get_self_cgroup(&cgroup_path[strlen(cgroup_path)],
862 sizeof(cgroup_path) - strlen(cgroup_path));
863 if (!retval) {
864 err_msg("Did not find self cgroup\n");
865 return 0;
866 }
867 } else {
868 snprintf(&cgroup_path[strlen(cgroup_path)],
869 sizeof(cgroup_path) - strlen(cgroup_path), "%s/", cgroup);
870 }
871
872 snprintf(cgroup_procs, MAX_PATH, "%s/cgroup.procs", cgroup_path);
873
874 debug_msg("Using cgroup path at: %s\n", cgroup_procs);
875
876 cg_fd = open(cgroup_procs, O_RDWR);
877 if (cg_fd < 0)
878 return 0;
879
880 procfs = opendir("/proc");
881 if (!procfs) {
882 err_msg("Could not open procfs\n");
883 goto out_cg;
884 }
885
886 while ((proc_entry = readdir(procfs))) {
887
888 retval = procfs_is_workload_pid(comm_prefix, proc_entry);
889 if (!retval)
890 continue;
891
892 retval = write(cg_fd, proc_entry->d_name, strlen(proc_entry->d_name));
893 if (retval < 0) {
894 err_msg("Error setting cgroup attributes for pid:%s - %s\n",
895 proc_entry->d_name, strerror(errno));
896 goto out_procfs;
897 }
898
899 debug_msg("Set cgroup attributes for pid:%s\n", proc_entry->d_name);
900 }
901
902 closedir(procfs);
903 close(cg_fd);
904 return 1;
905
906 out_procfs:
907 closedir(procfs);
908 out_cg:
909 close(cg_fd);
910 return 0;
911 }
912
913 /**
914 * auto_house_keeping - Automatically move rtla out of measurement threads
915 *
916 * Try to move rtla away from the tracer, if possible.
917 *
918 * Returns 1 on success, 0 otherwise.
919 */
auto_house_keeping(cpu_set_t * monitored_cpus)920 int auto_house_keeping(cpu_set_t *monitored_cpus)
921 {
922 cpu_set_t rtla_cpus, house_keeping_cpus;
923 int retval;
924
925 /* first get the CPUs in which rtla can actually run. */
926 retval = sched_getaffinity(getpid(), sizeof(rtla_cpus), &rtla_cpus);
927 if (retval == -1) {
928 debug_msg("Could not get rtla affinity, rtla might run with the threads!\n");
929 return 0;
930 }
931
932 /* then check if the existing setup is already good. */
933 CPU_AND(&house_keeping_cpus, &rtla_cpus, monitored_cpus);
934 if (!CPU_COUNT(&house_keeping_cpus)) {
935 debug_msg("rtla and the monitored CPUs do not share CPUs.");
936 debug_msg("Skipping auto house-keeping\n");
937 return 1;
938 }
939
940 /* remove the intersection */
941 CPU_XOR(&house_keeping_cpus, &rtla_cpus, monitored_cpus);
942
943 /* get only those that rtla can run */
944 CPU_AND(&house_keeping_cpus, &house_keeping_cpus, &rtla_cpus);
945
946 /* is there any cpu left? */
947 if (!CPU_COUNT(&house_keeping_cpus)) {
948 debug_msg("Could not find any CPU for auto house-keeping\n");
949 return 0;
950 }
951
952 retval = sched_setaffinity(getpid(), sizeof(house_keeping_cpus), &house_keeping_cpus);
953 if (retval == -1) {
954 debug_msg("Could not set affinity for auto house-keeping\n");
955 return 0;
956 }
957
958 debug_msg("rtla automatically moved to an auto house-keeping cpu set\n");
959
960 return 1;
961 }
962