1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
4 */
5
6 #define _GNU_SOURCE
7 #ifdef HAVE_LIBCPUPOWER_SUPPORT
8 #include <cpuidle.h>
9 #endif /* HAVE_LIBCPUPOWER_SUPPORT */
10 #include <dirent.h>
11 #include <stdarg.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <unistd.h>
15 #include <ctype.h>
16 #include <errno.h>
17 #include <fcntl.h>
18 #include <sched.h>
19 #include <stdio.h>
20
21 #include "utils.h"
22
23 #define MAX_MSG_LENGTH 1024
24 int config_debug;
25
26 /*
27 * err_msg - print an error message to the stderr
28 */
err_msg(const char * fmt,...)29 void err_msg(const char *fmt, ...)
30 {
31 char message[MAX_MSG_LENGTH];
32 va_list ap;
33
34 va_start(ap, fmt);
35 vsnprintf(message, sizeof(message), fmt, ap);
36 va_end(ap);
37
38 fprintf(stderr, "%s", message);
39 }
40
41 /*
42 * debug_msg - print a debug message to stderr if debug is set
43 */
debug_msg(const char * fmt,...)44 void debug_msg(const char *fmt, ...)
45 {
46 char message[MAX_MSG_LENGTH];
47 va_list ap;
48
49 if (!config_debug)
50 return;
51
52 va_start(ap, fmt);
53 vsnprintf(message, sizeof(message), fmt, ap);
54 va_end(ap);
55
56 fprintf(stderr, "%s", message);
57 }
58
59 /*
60 * get_llong_from_str - get a long long int from a string
61 */
get_llong_from_str(char * start)62 long long get_llong_from_str(char *start)
63 {
64 long long value;
65 char *end;
66
67 errno = 0;
68 value = strtoll(start, &end, 10);
69 if (errno || start == end)
70 return -1;
71
72 return value;
73 }
74
75 /*
76 * get_duration - fill output with a human readable duration since start_time
77 */
get_duration(time_t start_time,char * output,int output_size)78 void get_duration(time_t start_time, char *output, int output_size)
79 {
80 time_t now = time(NULL);
81 struct tm *tm_info;
82 time_t duration;
83
84 duration = difftime(now, start_time);
85 tm_info = gmtime(&duration);
86
87 snprintf(output, output_size, "%3d %02d:%02d:%02d",
88 tm_info->tm_yday,
89 tm_info->tm_hour,
90 tm_info->tm_min,
91 tm_info->tm_sec);
92 }
93
94 /*
95 * parse_cpu_set - parse a cpu_list filling cpu_set_t argument
96 *
97 * Receives a cpu list, like 1-3,5 (cpus 1, 2, 3, 5), and then set
98 * filling cpu_set_t argument.
99 *
100 * Returns 1 on success, 0 otherwise.
101 */
parse_cpu_set(char * cpu_list,cpu_set_t * set)102 int parse_cpu_set(char *cpu_list, cpu_set_t *set)
103 {
104 const char *p;
105 int end_cpu;
106 int nr_cpus;
107 int cpu;
108 int i;
109
110 CPU_ZERO(set);
111
112 nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
113
114 for (p = cpu_list; *p; ) {
115 cpu = atoi(p);
116 if (cpu < 0 || (!cpu && *p != '0') || cpu >= nr_cpus)
117 goto err;
118
119 while (isdigit(*p))
120 p++;
121 if (*p == '-') {
122 p++;
123 end_cpu = atoi(p);
124 if (end_cpu < cpu || (!end_cpu && *p != '0') || end_cpu >= nr_cpus)
125 goto err;
126 while (isdigit(*p))
127 p++;
128 } else
129 end_cpu = cpu;
130
131 if (cpu == end_cpu) {
132 debug_msg("cpu_set: adding cpu %d\n", cpu);
133 CPU_SET(cpu, set);
134 } else {
135 for (i = cpu; i <= end_cpu; i++) {
136 debug_msg("cpu_set: adding cpu %d\n", i);
137 CPU_SET(i, set);
138 }
139 }
140
141 if (*p == ',')
142 p++;
143 }
144
145 return 0;
146 err:
147 debug_msg("Error parsing the cpu set %s\n", cpu_list);
148 return 1;
149 }
150
151 /*
152 * parse_duration - parse duration with s/m/h/d suffix converting it to seconds
153 */
parse_seconds_duration(char * val)154 long parse_seconds_duration(char *val)
155 {
156 char *end;
157 long t;
158
159 t = strtol(val, &end, 10);
160
161 if (end) {
162 switch (*end) {
163 case 's':
164 case 'S':
165 break;
166 case 'm':
167 case 'M':
168 t *= 60;
169 break;
170 case 'h':
171 case 'H':
172 t *= 60 * 60;
173 break;
174
175 case 'd':
176 case 'D':
177 t *= 24 * 60 * 60;
178 break;
179 }
180 }
181
182 return t;
183 }
184
185 /*
186 * parse_ns_duration - parse duration with ns/us/ms/s converting it to nanoseconds
187 */
parse_ns_duration(char * val)188 long parse_ns_duration(char *val)
189 {
190 char *end;
191 long t;
192
193 t = strtol(val, &end, 10);
194
195 if (end) {
196 if (!strncmp(end, "ns", 2)) {
197 return t;
198 } else if (!strncmp(end, "us", 2)) {
199 t *= 1000;
200 return t;
201 } else if (!strncmp(end, "ms", 2)) {
202 t *= 1000 * 1000;
203 return t;
204 } else if (!strncmp(end, "s", 1)) {
205 t *= 1000 * 1000 * 1000;
206 return t;
207 }
208 return -1;
209 }
210
211 return t;
212 }
213
214 /*
215 * This is a set of helper functions to use SCHED_DEADLINE.
216 */
217 #ifndef __NR_sched_setattr
218 # ifdef __x86_64__
219 # define __NR_sched_setattr 314
220 # elif __i386__
221 # define __NR_sched_setattr 351
222 # elif __arm__
223 # define __NR_sched_setattr 380
224 # elif __aarch64__ || __riscv
225 # define __NR_sched_setattr 274
226 # elif __powerpc__
227 # define __NR_sched_setattr 355
228 # elif __s390x__
229 # define __NR_sched_setattr 345
230 # endif
231 #endif
232
233 #define SCHED_DEADLINE 6
234
syscall_sched_setattr(pid_t pid,const struct sched_attr * attr,unsigned int flags)235 static inline int syscall_sched_setattr(pid_t pid, const struct sched_attr *attr,
236 unsigned int flags) {
237 return syscall(__NR_sched_setattr, pid, attr, flags);
238 }
239
__set_sched_attr(int pid,struct sched_attr * attr)240 int __set_sched_attr(int pid, struct sched_attr *attr)
241 {
242 int flags = 0;
243 int retval;
244
245 retval = syscall_sched_setattr(pid, attr, flags);
246 if (retval < 0) {
247 err_msg("Failed to set sched attributes to the pid %d: %s\n",
248 pid, strerror(errno));
249 return 1;
250 }
251
252 return 0;
253 }
254
255 /*
256 * procfs_is_workload_pid - check if a procfs entry contains a comm_prefix* comm
257 *
258 * Check if the procfs entry is a directory of a process, and then check if the
259 * process has a comm with the prefix set in char *comm_prefix. As the
260 * current users of this function only check for kernel threads, there is no
261 * need to check for the threads for the process.
262 *
263 * Return: True if the proc_entry contains a comm file with comm_prefix*.
264 * Otherwise returns false.
265 */
procfs_is_workload_pid(const char * comm_prefix,struct dirent * proc_entry)266 static int procfs_is_workload_pid(const char *comm_prefix, struct dirent *proc_entry)
267 {
268 char buffer[MAX_PATH];
269 int comm_fd, retval;
270 char *t_name;
271
272 if (proc_entry->d_type != DT_DIR)
273 return 0;
274
275 if (*proc_entry->d_name == '.')
276 return 0;
277
278 /* check if the string is a pid */
279 for (t_name = proc_entry->d_name; t_name; t_name++) {
280 if (!isdigit(*t_name))
281 break;
282 }
283
284 if (*t_name != '\0')
285 return 0;
286
287 snprintf(buffer, MAX_PATH, "/proc/%s/comm", proc_entry->d_name);
288 comm_fd = open(buffer, O_RDONLY);
289 if (comm_fd < 0)
290 return 0;
291
292 memset(buffer, 0, MAX_PATH);
293 retval = read(comm_fd, buffer, MAX_PATH);
294
295 close(comm_fd);
296
297 if (retval <= 0)
298 return 0;
299
300 retval = strncmp(comm_prefix, buffer, strlen(comm_prefix));
301 if (retval)
302 return 0;
303
304 /* comm already have \n */
305 debug_msg("Found workload pid:%s comm:%s", proc_entry->d_name, buffer);
306
307 return 1;
308 }
309
310 /*
311 * set_comm_sched_attr - set sched params to threads starting with char *comm_prefix
312 *
313 * This function uses procfs to list the currently running threads and then set the
314 * sched_attr *attr to the threads that start with char *comm_prefix. It is
315 * mainly used to set the priority to the kernel threads created by the
316 * tracers.
317 */
set_comm_sched_attr(const char * comm_prefix,struct sched_attr * attr)318 int set_comm_sched_attr(const char *comm_prefix, struct sched_attr *attr)
319 {
320 struct dirent *proc_entry;
321 DIR *procfs;
322 int retval;
323
324 if (strlen(comm_prefix) >= MAX_PATH) {
325 err_msg("Command prefix is too long: %d < strlen(%s)\n",
326 MAX_PATH, comm_prefix);
327 return 1;
328 }
329
330 procfs = opendir("/proc");
331 if (!procfs) {
332 err_msg("Could not open procfs\n");
333 return 1;
334 }
335
336 while ((proc_entry = readdir(procfs))) {
337
338 retval = procfs_is_workload_pid(comm_prefix, proc_entry);
339 if (!retval)
340 continue;
341
342 /* procfs_is_workload_pid confirmed it is a pid */
343 retval = __set_sched_attr(atoi(proc_entry->d_name), attr);
344 if (retval) {
345 err_msg("Error setting sched attributes for pid:%s\n", proc_entry->d_name);
346 goto out_err;
347 }
348
349 debug_msg("Set sched attributes for pid:%s\n", proc_entry->d_name);
350 }
351 return 0;
352
353 out_err:
354 closedir(procfs);
355 return 1;
356 }
357
358 #define INVALID_VAL (~0L)
get_long_ns_after_colon(char * start)359 static long get_long_ns_after_colon(char *start)
360 {
361 long val = INVALID_VAL;
362
363 /* find the ":" */
364 start = strstr(start, ":");
365 if (!start)
366 return -1;
367
368 /* skip ":" */
369 start++;
370 val = parse_ns_duration(start);
371
372 return val;
373 }
374
get_long_after_colon(char * start)375 static long get_long_after_colon(char *start)
376 {
377 long val = INVALID_VAL;
378
379 /* find the ":" */
380 start = strstr(start, ":");
381 if (!start)
382 return -1;
383
384 /* skip ":" */
385 start++;
386 val = get_llong_from_str(start);
387
388 return val;
389 }
390
391 /*
392 * parse priority in the format:
393 * SCHED_OTHER:
394 * o:<prio>
395 * O:<prio>
396 * SCHED_RR:
397 * r:<prio>
398 * R:<prio>
399 * SCHED_FIFO:
400 * f:<prio>
401 * F:<prio>
402 * SCHED_DEADLINE:
403 * d:runtime:period
404 * D:runtime:period
405 */
parse_prio(char * arg,struct sched_attr * sched_param)406 int parse_prio(char *arg, struct sched_attr *sched_param)
407 {
408 long prio;
409 long runtime;
410 long period;
411
412 memset(sched_param, 0, sizeof(*sched_param));
413 sched_param->size = sizeof(*sched_param);
414
415 switch (arg[0]) {
416 case 'd':
417 case 'D':
418 /* d:runtime:period */
419 if (strlen(arg) < 4)
420 return -1;
421
422 runtime = get_long_ns_after_colon(arg);
423 if (runtime == INVALID_VAL)
424 return -1;
425
426 period = get_long_ns_after_colon(&arg[2]);
427 if (period == INVALID_VAL)
428 return -1;
429
430 if (runtime > period)
431 return -1;
432
433 sched_param->sched_policy = SCHED_DEADLINE;
434 sched_param->sched_runtime = runtime;
435 sched_param->sched_deadline = period;
436 sched_param->sched_period = period;
437 break;
438 case 'f':
439 case 'F':
440 /* f:prio */
441 prio = get_long_after_colon(arg);
442 if (prio == INVALID_VAL)
443 return -1;
444
445 if (prio < sched_get_priority_min(SCHED_FIFO))
446 return -1;
447 if (prio > sched_get_priority_max(SCHED_FIFO))
448 return -1;
449
450 sched_param->sched_policy = SCHED_FIFO;
451 sched_param->sched_priority = prio;
452 break;
453 case 'r':
454 case 'R':
455 /* r:prio */
456 prio = get_long_after_colon(arg);
457 if (prio == INVALID_VAL)
458 return -1;
459
460 if (prio < sched_get_priority_min(SCHED_RR))
461 return -1;
462 if (prio > sched_get_priority_max(SCHED_RR))
463 return -1;
464
465 sched_param->sched_policy = SCHED_RR;
466 sched_param->sched_priority = prio;
467 break;
468 case 'o':
469 case 'O':
470 /* o:prio */
471 prio = get_long_after_colon(arg);
472 if (prio == INVALID_VAL)
473 return -1;
474
475 if (prio < MIN_NICE)
476 return -1;
477 if (prio > MAX_NICE)
478 return -1;
479
480 sched_param->sched_policy = SCHED_OTHER;
481 sched_param->sched_nice = prio;
482 break;
483 default:
484 return -1;
485 }
486 return 0;
487 }
488
489 /*
490 * set_cpu_dma_latency - set the /dev/cpu_dma_latecy
491 *
492 * This is used to reduce the exit from idle latency. The value
493 * will be reset once the file descriptor of /dev/cpu_dma_latecy
494 * is closed.
495 *
496 * Return: the /dev/cpu_dma_latecy file descriptor
497 */
set_cpu_dma_latency(int32_t latency)498 int set_cpu_dma_latency(int32_t latency)
499 {
500 int retval;
501 int fd;
502
503 fd = open("/dev/cpu_dma_latency", O_RDWR);
504 if (fd < 0) {
505 err_msg("Error opening /dev/cpu_dma_latency\n");
506 return -1;
507 }
508
509 retval = write(fd, &latency, 4);
510 if (retval < 1) {
511 err_msg("Error setting /dev/cpu_dma_latency\n");
512 close(fd);
513 return -1;
514 }
515
516 debug_msg("Set /dev/cpu_dma_latency to %d\n", latency);
517
518 return fd;
519 }
520
521 #ifdef HAVE_LIBCPUPOWER_SUPPORT
522 static unsigned int **saved_cpu_idle_disable_state;
523 static size_t saved_cpu_idle_disable_state_alloc_ctr;
524
525 /*
526 * save_cpu_idle_state_disable - save disable for all idle states of a cpu
527 *
528 * Saves the current disable of all idle states of a cpu, to be subsequently
529 * restored via restore_cpu_idle_disable_state.
530 *
531 * Return: idle state count on success, negative on error
532 */
save_cpu_idle_disable_state(unsigned int cpu)533 int save_cpu_idle_disable_state(unsigned int cpu)
534 {
535 unsigned int nr_states;
536 unsigned int state;
537 int disabled;
538 int nr_cpus;
539
540 nr_states = cpuidle_state_count(cpu);
541
542 if (nr_states == 0)
543 return 0;
544
545 if (saved_cpu_idle_disable_state == NULL) {
546 nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
547 saved_cpu_idle_disable_state = calloc(nr_cpus, sizeof(unsigned int *));
548 if (!saved_cpu_idle_disable_state)
549 return -1;
550 }
551
552 saved_cpu_idle_disable_state[cpu] = calloc(nr_states, sizeof(unsigned int));
553 if (!saved_cpu_idle_disable_state[cpu])
554 return -1;
555 saved_cpu_idle_disable_state_alloc_ctr++;
556
557 for (state = 0; state < nr_states; state++) {
558 disabled = cpuidle_is_state_disabled(cpu, state);
559 if (disabled < 0)
560 return disabled;
561 saved_cpu_idle_disable_state[cpu][state] = disabled;
562 }
563
564 return nr_states;
565 }
566
567 /*
568 * restore_cpu_idle_disable_state - restore disable for all idle states of a cpu
569 *
570 * Restores the current disable state of all idle states of a cpu that was
571 * previously saved by save_cpu_idle_disable_state.
572 *
573 * Return: idle state count on success, negative on error
574 */
restore_cpu_idle_disable_state(unsigned int cpu)575 int restore_cpu_idle_disable_state(unsigned int cpu)
576 {
577 unsigned int nr_states;
578 unsigned int state;
579 int disabled;
580 int result;
581
582 nr_states = cpuidle_state_count(cpu);
583
584 if (nr_states == 0)
585 return 0;
586
587 if (!saved_cpu_idle_disable_state)
588 return -1;
589
590 for (state = 0; state < nr_states; state++) {
591 if (!saved_cpu_idle_disable_state[cpu])
592 return -1;
593 disabled = saved_cpu_idle_disable_state[cpu][state];
594 result = cpuidle_state_disable(cpu, state, disabled);
595 if (result < 0)
596 return result;
597 }
598
599 free(saved_cpu_idle_disable_state[cpu]);
600 saved_cpu_idle_disable_state[cpu] = NULL;
601 saved_cpu_idle_disable_state_alloc_ctr--;
602 if (saved_cpu_idle_disable_state_alloc_ctr == 0) {
603 free(saved_cpu_idle_disable_state);
604 saved_cpu_idle_disable_state = NULL;
605 }
606
607 return nr_states;
608 }
609
610 /*
611 * free_cpu_idle_disable_states - free saved idle state disable for all cpus
612 *
613 * Frees the memory used for storing cpu idle state disable for all cpus
614 * and states.
615 *
616 * Normally, the memory is freed automatically in
617 * restore_cpu_idle_disable_state; this is mostly for cleaning up after an
618 * error.
619 */
free_cpu_idle_disable_states(void)620 void free_cpu_idle_disable_states(void)
621 {
622 int cpu;
623 int nr_cpus;
624
625 if (!saved_cpu_idle_disable_state)
626 return;
627
628 nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
629
630 for (cpu = 0; cpu < nr_cpus; cpu++) {
631 free(saved_cpu_idle_disable_state[cpu]);
632 saved_cpu_idle_disable_state[cpu] = NULL;
633 }
634
635 free(saved_cpu_idle_disable_state);
636 saved_cpu_idle_disable_state = NULL;
637 }
638
639 /*
640 * set_deepest_cpu_idle_state - limit idle state of cpu
641 *
642 * Disables all idle states deeper than the one given in
643 * deepest_state (assuming states with higher number are deeper).
644 *
645 * This is used to reduce the exit from idle latency. Unlike
646 * set_cpu_dma_latency, it can disable idle states per cpu.
647 *
648 * Return: idle state count on success, negative on error
649 */
set_deepest_cpu_idle_state(unsigned int cpu,unsigned int deepest_state)650 int set_deepest_cpu_idle_state(unsigned int cpu, unsigned int deepest_state)
651 {
652 unsigned int nr_states;
653 unsigned int state;
654 int result;
655
656 nr_states = cpuidle_state_count(cpu);
657
658 for (state = deepest_state + 1; state < nr_states; state++) {
659 result = cpuidle_state_disable(cpu, state, 1);
660 if (result < 0)
661 return result;
662 }
663
664 return nr_states;
665 }
666 #endif /* HAVE_LIBCPUPOWER_SUPPORT */
667
668 #define _STR(x) #x
669 #define STR(x) _STR(x)
670
671 /*
672 * find_mount - find a the mount point of a given fs
673 *
674 * Returns 0 if mount is not found, otherwise return 1 and fill mp
675 * with the mount point.
676 */
find_mount(const char * fs,char * mp,int sizeof_mp)677 static const int find_mount(const char *fs, char *mp, int sizeof_mp)
678 {
679 char mount_point[MAX_PATH+1];
680 char type[100];
681 int found = 0;
682 FILE *fp;
683
684 fp = fopen("/proc/mounts", "r");
685 if (!fp)
686 return 0;
687
688 while (fscanf(fp, "%*s %" STR(MAX_PATH) "s %99s %*s %*d %*d\n", mount_point, type) == 2) {
689 if (strcmp(type, fs) == 0) {
690 found = 1;
691 break;
692 }
693 }
694 fclose(fp);
695
696 if (!found)
697 return 0;
698
699 memset(mp, 0, sizeof_mp);
700 strncpy(mp, mount_point, sizeof_mp - 1);
701
702 debug_msg("Fs %s found at %s\n", fs, mp);
703 return 1;
704 }
705
706 /*
707 * get_self_cgroup - get the current thread cgroup path
708 *
709 * Parse /proc/$$/cgroup file to get the thread's cgroup. As an example of line to parse:
710 *
711 * 0::/user.slice/user-0.slice/session-3.scope'\n'
712 *
713 * This function is interested in the content after the second : and before the '\n'.
714 *
715 * Returns 1 if a string was found, 0 otherwise.
716 */
get_self_cgroup(char * self_cg,int sizeof_self_cg)717 static int get_self_cgroup(char *self_cg, int sizeof_self_cg)
718 {
719 char path[MAX_PATH], *start;
720 int fd, retval;
721
722 snprintf(path, MAX_PATH, "/proc/%d/cgroup", getpid());
723
724 fd = open(path, O_RDONLY);
725 if (fd < 0)
726 return 0;
727
728 retval = read(fd, path, MAX_PATH);
729
730 close(fd);
731
732 if (retval <= 0)
733 return 0;
734
735 start = path;
736
737 start = strstr(start, ":");
738 if (!start)
739 return 0;
740
741 /* skip ":" */
742 start++;
743
744 start = strstr(start, ":");
745 if (!start)
746 return 0;
747
748 /* skip ":" */
749 start++;
750
751 if (strlen(start) >= sizeof_self_cg)
752 return 0;
753
754 snprintf(self_cg, sizeof_self_cg, "%s", start);
755
756 /* Swap '\n' with '\0' */
757 start = strstr(self_cg, "\n");
758
759 /* there must be '\n' */
760 if (!start)
761 return 0;
762
763 /* ok, it found a string after the second : and before the \n */
764 *start = '\0';
765
766 return 1;
767 }
768
769 /*
770 * set_comm_cgroup - Set cgroup to pid_t pid
771 *
772 * If cgroup argument is not NULL, the threads will move to the given cgroup.
773 * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used.
774 *
775 * Supports cgroup v2.
776 *
777 * Returns 1 on success, 0 otherwise.
778 */
set_pid_cgroup(pid_t pid,const char * cgroup)779 int set_pid_cgroup(pid_t pid, const char *cgroup)
780 {
781 char cgroup_path[MAX_PATH - strlen("/cgroup.procs")];
782 char cgroup_procs[MAX_PATH];
783 char pid_str[24];
784 int retval;
785 int cg_fd;
786
787 retval = find_mount("cgroup2", cgroup_path, sizeof(cgroup_path));
788 if (!retval) {
789 err_msg("Did not find cgroupv2 mount point\n");
790 return 0;
791 }
792
793 if (!cgroup) {
794 retval = get_self_cgroup(&cgroup_path[strlen(cgroup_path)],
795 sizeof(cgroup_path) - strlen(cgroup_path));
796 if (!retval) {
797 err_msg("Did not find self cgroup\n");
798 return 0;
799 }
800 } else {
801 snprintf(&cgroup_path[strlen(cgroup_path)],
802 sizeof(cgroup_path) - strlen(cgroup_path), "%s/", cgroup);
803 }
804
805 snprintf(cgroup_procs, MAX_PATH, "%s/cgroup.procs", cgroup_path);
806
807 debug_msg("Using cgroup path at: %s\n", cgroup_procs);
808
809 cg_fd = open(cgroup_procs, O_RDWR);
810 if (cg_fd < 0)
811 return 0;
812
813 snprintf(pid_str, sizeof(pid_str), "%d\n", pid);
814
815 retval = write(cg_fd, pid_str, strlen(pid_str));
816 if (retval < 0)
817 err_msg("Error setting cgroup attributes for pid:%s - %s\n",
818 pid_str, strerror(errno));
819 else
820 debug_msg("Set cgroup attributes for pid:%s\n", pid_str);
821
822 close(cg_fd);
823
824 return (retval >= 0);
825 }
826
827 /**
828 * set_comm_cgroup - Set cgroup to threads starting with char *comm_prefix
829 *
830 * If cgroup argument is not NULL, the threads will move to the given cgroup.
831 * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used.
832 *
833 * Supports cgroup v2.
834 *
835 * Returns 1 on success, 0 otherwise.
836 */
set_comm_cgroup(const char * comm_prefix,const char * cgroup)837 int set_comm_cgroup(const char *comm_prefix, const char *cgroup)
838 {
839 char cgroup_path[MAX_PATH - strlen("/cgroup.procs")];
840 char cgroup_procs[MAX_PATH];
841 struct dirent *proc_entry;
842 DIR *procfs;
843 int retval;
844 int cg_fd;
845
846 if (strlen(comm_prefix) >= MAX_PATH) {
847 err_msg("Command prefix is too long: %d < strlen(%s)\n",
848 MAX_PATH, comm_prefix);
849 return 0;
850 }
851
852 retval = find_mount("cgroup2", cgroup_path, sizeof(cgroup_path));
853 if (!retval) {
854 err_msg("Did not find cgroupv2 mount point\n");
855 return 0;
856 }
857
858 if (!cgroup) {
859 retval = get_self_cgroup(&cgroup_path[strlen(cgroup_path)],
860 sizeof(cgroup_path) - strlen(cgroup_path));
861 if (!retval) {
862 err_msg("Did not find self cgroup\n");
863 return 0;
864 }
865 } else {
866 snprintf(&cgroup_path[strlen(cgroup_path)],
867 sizeof(cgroup_path) - strlen(cgroup_path), "%s/", cgroup);
868 }
869
870 snprintf(cgroup_procs, MAX_PATH, "%s/cgroup.procs", cgroup_path);
871
872 debug_msg("Using cgroup path at: %s\n", cgroup_procs);
873
874 cg_fd = open(cgroup_procs, O_RDWR);
875 if (cg_fd < 0)
876 return 0;
877
878 procfs = opendir("/proc");
879 if (!procfs) {
880 err_msg("Could not open procfs\n");
881 goto out_cg;
882 }
883
884 while ((proc_entry = readdir(procfs))) {
885
886 retval = procfs_is_workload_pid(comm_prefix, proc_entry);
887 if (!retval)
888 continue;
889
890 retval = write(cg_fd, proc_entry->d_name, strlen(proc_entry->d_name));
891 if (retval < 0) {
892 err_msg("Error setting cgroup attributes for pid:%s - %s\n",
893 proc_entry->d_name, strerror(errno));
894 goto out_procfs;
895 }
896
897 debug_msg("Set cgroup attributes for pid:%s\n", proc_entry->d_name);
898 }
899
900 closedir(procfs);
901 close(cg_fd);
902 return 1;
903
904 out_procfs:
905 closedir(procfs);
906 out_cg:
907 close(cg_fd);
908 return 0;
909 }
910
911 /**
912 * auto_house_keeping - Automatically move rtla out of measurement threads
913 *
914 * Try to move rtla away from the tracer, if possible.
915 *
916 * Returns 1 on success, 0 otherwise.
917 */
auto_house_keeping(cpu_set_t * monitored_cpus)918 int auto_house_keeping(cpu_set_t *monitored_cpus)
919 {
920 cpu_set_t rtla_cpus, house_keeping_cpus;
921 int retval;
922
923 /* first get the CPUs in which rtla can actually run. */
924 retval = sched_getaffinity(getpid(), sizeof(rtla_cpus), &rtla_cpus);
925 if (retval == -1) {
926 debug_msg("Could not get rtla affinity, rtla might run with the threads!\n");
927 return 0;
928 }
929
930 /* then check if the existing setup is already good. */
931 CPU_AND(&house_keeping_cpus, &rtla_cpus, monitored_cpus);
932 if (!CPU_COUNT(&house_keeping_cpus)) {
933 debug_msg("rtla and the monitored CPUs do not share CPUs.");
934 debug_msg("Skipping auto house-keeping\n");
935 return 1;
936 }
937
938 /* remove the intersection */
939 CPU_XOR(&house_keeping_cpus, &rtla_cpus, monitored_cpus);
940
941 /* get only those that rtla can run */
942 CPU_AND(&house_keeping_cpus, &house_keeping_cpus, &rtla_cpus);
943
944 /* is there any cpu left? */
945 if (!CPU_COUNT(&house_keeping_cpus)) {
946 debug_msg("Could not find any CPU for auto house-keeping\n");
947 return 0;
948 }
949
950 retval = sched_setaffinity(getpid(), sizeof(house_keeping_cpus), &house_keeping_cpus);
951 if (retval == -1) {
952 debug_msg("Could not set affinity for auto house-keeping\n");
953 return 0;
954 }
955
956 debug_msg("rtla automatically moved to an auto house-keeping cpu set\n");
957
958 return 1;
959 }
960