xref: /linux/tools/tracing/rtla/src/utils.c (revision cdd30ebb1b9f36159d66f088b61aee264e649d7a)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
4  */
5 
6 #define _GNU_SOURCE
7 #ifdef HAVE_LIBCPUPOWER_SUPPORT
8 #include <cpuidle.h>
9 #endif /* HAVE_LIBCPUPOWER_SUPPORT */
10 #include <dirent.h>
11 #include <stdarg.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <unistd.h>
15 #include <ctype.h>
16 #include <errno.h>
17 #include <fcntl.h>
18 #include <sched.h>
19 #include <stdio.h>
20 
21 #include "utils.h"
22 
23 #define MAX_MSG_LENGTH	1024
24 int config_debug;
25 
26 /*
27  * err_msg - print an error message to the stderr
28  */
29 void err_msg(const char *fmt, ...)
30 {
31 	char message[MAX_MSG_LENGTH];
32 	va_list ap;
33 
34 	va_start(ap, fmt);
35 	vsnprintf(message, sizeof(message), fmt, ap);
36 	va_end(ap);
37 
38 	fprintf(stderr, "%s", message);
39 }
40 
41 /*
42  * debug_msg - print a debug message to stderr if debug is set
43  */
44 void debug_msg(const char *fmt, ...)
45 {
46 	char message[MAX_MSG_LENGTH];
47 	va_list ap;
48 
49 	if (!config_debug)
50 		return;
51 
52 	va_start(ap, fmt);
53 	vsnprintf(message, sizeof(message), fmt, ap);
54 	va_end(ap);
55 
56 	fprintf(stderr, "%s", message);
57 }
58 
59 /*
60  * get_llong_from_str - get a long long int from a string
61  */
62 long long get_llong_from_str(char *start)
63 {
64 	long long value;
65 	char *end;
66 
67 	errno = 0;
68 	value = strtoll(start, &end, 10);
69 	if (errno || start == end)
70 		return -1;
71 
72 	return value;
73 }
74 
75 /*
76  * get_duration - fill output with a human readable duration since start_time
77  */
78 void get_duration(time_t start_time, char *output, int output_size)
79 {
80 	time_t now = time(NULL);
81 	struct tm *tm_info;
82 	time_t duration;
83 
84 	duration = difftime(now, start_time);
85 	tm_info = gmtime(&duration);
86 
87 	snprintf(output, output_size, "%3d %02d:%02d:%02d",
88 			tm_info->tm_yday,
89 			tm_info->tm_hour,
90 			tm_info->tm_min,
91 			tm_info->tm_sec);
92 }
93 
94 /*
95  * parse_cpu_set - parse a cpu_list filling cpu_set_t argument
96  *
97  * Receives a cpu list, like 1-3,5 (cpus 1, 2, 3, 5), and then set
98  * filling cpu_set_t argument.
99  *
100  * Returns 1 on success, 0 otherwise.
101  */
102 int parse_cpu_set(char *cpu_list, cpu_set_t *set)
103 {
104 	const char *p;
105 	int end_cpu;
106 	int nr_cpus;
107 	int cpu;
108 	int i;
109 
110 	CPU_ZERO(set);
111 
112 	nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
113 
114 	for (p = cpu_list; *p; ) {
115 		cpu = atoi(p);
116 		if (cpu < 0 || (!cpu && *p != '0') || cpu >= nr_cpus)
117 			goto err;
118 
119 		while (isdigit(*p))
120 			p++;
121 		if (*p == '-') {
122 			p++;
123 			end_cpu = atoi(p);
124 			if (end_cpu < cpu || (!end_cpu && *p != '0') || end_cpu >= nr_cpus)
125 				goto err;
126 			while (isdigit(*p))
127 				p++;
128 		} else
129 			end_cpu = cpu;
130 
131 		if (cpu == end_cpu) {
132 			debug_msg("cpu_set: adding cpu %d\n", cpu);
133 			CPU_SET(cpu, set);
134 		} else {
135 			for (i = cpu; i <= end_cpu; i++) {
136 				debug_msg("cpu_set: adding cpu %d\n", i);
137 				CPU_SET(i, set);
138 			}
139 		}
140 
141 		if (*p == ',')
142 			p++;
143 	}
144 
145 	return 0;
146 err:
147 	debug_msg("Error parsing the cpu set %s\n", cpu_list);
148 	return 1;
149 }
150 
151 /*
152  * parse_duration - parse duration with s/m/h/d suffix converting it to seconds
153  */
154 long parse_seconds_duration(char *val)
155 {
156 	char *end;
157 	long t;
158 
159 	t = strtol(val, &end, 10);
160 
161 	if (end) {
162 		switch (*end) {
163 		case 's':
164 		case 'S':
165 			break;
166 		case 'm':
167 		case 'M':
168 			t *= 60;
169 			break;
170 		case 'h':
171 		case 'H':
172 			t *= 60 * 60;
173 			break;
174 
175 		case 'd':
176 		case 'D':
177 			t *= 24 * 60 * 60;
178 			break;
179 		}
180 	}
181 
182 	return t;
183 }
184 
185 /*
186  * parse_ns_duration - parse duration with ns/us/ms/s converting it to nanoseconds
187  */
188 long parse_ns_duration(char *val)
189 {
190 	char *end;
191 	long t;
192 
193 	t = strtol(val, &end, 10);
194 
195 	if (end) {
196 		if (!strncmp(end, "ns", 2)) {
197 			return t;
198 		} else if (!strncmp(end, "us", 2)) {
199 			t *= 1000;
200 			return t;
201 		} else if (!strncmp(end, "ms", 2)) {
202 			t *= 1000 * 1000;
203 			return t;
204 		} else if (!strncmp(end, "s", 1)) {
205 			t *= 1000 * 1000 * 1000;
206 			return t;
207 		}
208 		return -1;
209 	}
210 
211 	return t;
212 }
213 
214 /*
215  * This is a set of helper functions to use SCHED_DEADLINE.
216  */
217 #ifndef __NR_sched_setattr
218 # ifdef __x86_64__
219 #  define __NR_sched_setattr	314
220 # elif __i386__
221 #  define __NR_sched_setattr	351
222 # elif __arm__
223 #  define __NR_sched_setattr	380
224 # elif __aarch64__ || __riscv
225 #  define __NR_sched_setattr	274
226 # elif __powerpc__
227 #  define __NR_sched_setattr	355
228 # elif __s390x__
229 #  define __NR_sched_setattr	345
230 # endif
231 #endif
232 
233 #define SCHED_DEADLINE		6
234 
235 static inline int syscall_sched_setattr(pid_t pid, const struct sched_attr *attr,
236 				unsigned int flags) {
237 	return syscall(__NR_sched_setattr, pid, attr, flags);
238 }
239 
240 int __set_sched_attr(int pid, struct sched_attr *attr)
241 {
242 	int flags = 0;
243 	int retval;
244 
245 	retval = syscall_sched_setattr(pid, attr, flags);
246 	if (retval < 0) {
247 		err_msg("Failed to set sched attributes to the pid %d: %s\n",
248 			pid, strerror(errno));
249 		return 1;
250 	}
251 
252 	return 0;
253 }
254 
255 /*
256  * procfs_is_workload_pid - check if a procfs entry contains a comm_prefix* comm
257  *
258  * Check if the procfs entry is a directory of a process, and then check if the
259  * process has a comm with the prefix set in char *comm_prefix. As the
260  * current users of this function only check for kernel threads, there is no
261  * need to check for the threads for the process.
262  *
263  * Return: True if the proc_entry contains a comm file with comm_prefix*.
264  * Otherwise returns false.
265  */
266 static int procfs_is_workload_pid(const char *comm_prefix, struct dirent *proc_entry)
267 {
268 	char buffer[MAX_PATH];
269 	int comm_fd, retval;
270 	char *t_name;
271 
272 	if (proc_entry->d_type != DT_DIR)
273 		return 0;
274 
275 	if (*proc_entry->d_name == '.')
276 		return 0;
277 
278 	/* check if the string is a pid */
279 	for (t_name = proc_entry->d_name; t_name; t_name++) {
280 		if (!isdigit(*t_name))
281 			break;
282 	}
283 
284 	if (*t_name != '\0')
285 		return 0;
286 
287 	snprintf(buffer, MAX_PATH, "/proc/%s/comm", proc_entry->d_name);
288 	comm_fd = open(buffer, O_RDONLY);
289 	if (comm_fd < 0)
290 		return 0;
291 
292 	memset(buffer, 0, MAX_PATH);
293 	retval = read(comm_fd, buffer, MAX_PATH);
294 
295 	close(comm_fd);
296 
297 	if (retval <= 0)
298 		return 0;
299 
300 	retval = strncmp(comm_prefix, buffer, strlen(comm_prefix));
301 	if (retval)
302 		return 0;
303 
304 	/* comm already have \n */
305 	debug_msg("Found workload pid:%s comm:%s", proc_entry->d_name, buffer);
306 
307 	return 1;
308 }
309 
310 /*
311  * set_comm_sched_attr - set sched params to threads starting with char *comm_prefix
312  *
313  * This function uses procfs to list the currently running threads and then set the
314  * sched_attr *attr to the threads that start with char *comm_prefix. It is
315  * mainly used to set the priority to the kernel threads created by the
316  * tracers.
317  */
318 int set_comm_sched_attr(const char *comm_prefix, struct sched_attr *attr)
319 {
320 	struct dirent *proc_entry;
321 	DIR *procfs;
322 	int retval;
323 
324 	if (strlen(comm_prefix) >= MAX_PATH) {
325 		err_msg("Command prefix is too long: %d < strlen(%s)\n",
326 			MAX_PATH, comm_prefix);
327 		return 1;
328 	}
329 
330 	procfs = opendir("/proc");
331 	if (!procfs) {
332 		err_msg("Could not open procfs\n");
333 		return 1;
334 	}
335 
336 	while ((proc_entry = readdir(procfs))) {
337 
338 		retval = procfs_is_workload_pid(comm_prefix, proc_entry);
339 		if (!retval)
340 			continue;
341 
342 		/* procfs_is_workload_pid confirmed it is a pid */
343 		retval = __set_sched_attr(atoi(proc_entry->d_name), attr);
344 		if (retval) {
345 			err_msg("Error setting sched attributes for pid:%s\n", proc_entry->d_name);
346 			goto out_err;
347 		}
348 
349 		debug_msg("Set sched attributes for pid:%s\n", proc_entry->d_name);
350 	}
351 	return 0;
352 
353 out_err:
354 	closedir(procfs);
355 	return 1;
356 }
357 
358 #define INVALID_VAL	(~0L)
359 static long get_long_ns_after_colon(char *start)
360 {
361 	long val = INVALID_VAL;
362 
363 	/* find the ":" */
364 	start = strstr(start, ":");
365 	if (!start)
366 		return -1;
367 
368 	/* skip ":" */
369 	start++;
370 	val = parse_ns_duration(start);
371 
372 	return val;
373 }
374 
375 static long get_long_after_colon(char *start)
376 {
377 	long val = INVALID_VAL;
378 
379 	/* find the ":" */
380 	start = strstr(start, ":");
381 	if (!start)
382 		return -1;
383 
384 	/* skip ":" */
385 	start++;
386 	val = get_llong_from_str(start);
387 
388 	return val;
389 }
390 
391 /*
392  * parse priority in the format:
393  * SCHED_OTHER:
394  *		o:<prio>
395  *		O:<prio>
396  * SCHED_RR:
397  *		r:<prio>
398  *		R:<prio>
399  * SCHED_FIFO:
400  *		f:<prio>
401  *		F:<prio>
402  * SCHED_DEADLINE:
403  *		d:runtime:period
404  *		D:runtime:period
405  */
406 int parse_prio(char *arg, struct sched_attr *sched_param)
407 {
408 	long prio;
409 	long runtime;
410 	long period;
411 
412 	memset(sched_param, 0, sizeof(*sched_param));
413 	sched_param->size = sizeof(*sched_param);
414 
415 	switch (arg[0]) {
416 	case 'd':
417 	case 'D':
418 		/* d:runtime:period */
419 		if (strlen(arg) < 4)
420 			return -1;
421 
422 		runtime = get_long_ns_after_colon(arg);
423 		if (runtime == INVALID_VAL)
424 			return -1;
425 
426 		period = get_long_ns_after_colon(&arg[2]);
427 		if (period == INVALID_VAL)
428 			return -1;
429 
430 		if (runtime > period)
431 			return -1;
432 
433 		sched_param->sched_policy   = SCHED_DEADLINE;
434 		sched_param->sched_runtime  = runtime;
435 		sched_param->sched_deadline = period;
436 		sched_param->sched_period   = period;
437 		break;
438 	case 'f':
439 	case 'F':
440 		/* f:prio */
441 		prio = get_long_after_colon(arg);
442 		if (prio == INVALID_VAL)
443 			return -1;
444 
445 		if (prio < sched_get_priority_min(SCHED_FIFO))
446 			return -1;
447 		if (prio > sched_get_priority_max(SCHED_FIFO))
448 			return -1;
449 
450 		sched_param->sched_policy   = SCHED_FIFO;
451 		sched_param->sched_priority = prio;
452 		break;
453 	case 'r':
454 	case 'R':
455 		/* r:prio */
456 		prio = get_long_after_colon(arg);
457 		if (prio == INVALID_VAL)
458 			return -1;
459 
460 		if (prio < sched_get_priority_min(SCHED_RR))
461 			return -1;
462 		if (prio > sched_get_priority_max(SCHED_RR))
463 			return -1;
464 
465 		sched_param->sched_policy   = SCHED_RR;
466 		sched_param->sched_priority = prio;
467 		break;
468 	case 'o':
469 	case 'O':
470 		/* o:prio */
471 		prio = get_long_after_colon(arg);
472 		if (prio == INVALID_VAL)
473 			return -1;
474 
475 		if (prio < MIN_NICE)
476 			return -1;
477 		if (prio > MAX_NICE)
478 			return -1;
479 
480 		sched_param->sched_policy   = SCHED_OTHER;
481 		sched_param->sched_nice = prio;
482 		break;
483 	default:
484 		return -1;
485 	}
486 	return 0;
487 }
488 
489 /*
490  * set_cpu_dma_latency - set the /dev/cpu_dma_latecy
491  *
492  * This is used to reduce the exit from idle latency. The value
493  * will be reset once the file descriptor of /dev/cpu_dma_latecy
494  * is closed.
495  *
496  * Return: the /dev/cpu_dma_latecy file descriptor
497  */
498 int set_cpu_dma_latency(int32_t latency)
499 {
500 	int retval;
501 	int fd;
502 
503 	fd = open("/dev/cpu_dma_latency", O_RDWR);
504 	if (fd < 0) {
505 		err_msg("Error opening /dev/cpu_dma_latency\n");
506 		return -1;
507 	}
508 
509 	retval = write(fd, &latency, 4);
510 	if (retval < 1) {
511 		err_msg("Error setting /dev/cpu_dma_latency\n");
512 		close(fd);
513 		return -1;
514 	}
515 
516 	debug_msg("Set /dev/cpu_dma_latency to %d\n", latency);
517 
518 	return fd;
519 }
520 
521 #ifdef HAVE_LIBCPUPOWER_SUPPORT
522 static unsigned int **saved_cpu_idle_disable_state;
523 static size_t saved_cpu_idle_disable_state_alloc_ctr;
524 
525 /*
526  * save_cpu_idle_state_disable - save disable for all idle states of a cpu
527  *
528  * Saves the current disable of all idle states of a cpu, to be subsequently
529  * restored via restore_cpu_idle_disable_state.
530  *
531  * Return: idle state count on success, negative on error
532  */
533 int save_cpu_idle_disable_state(unsigned int cpu)
534 {
535 	unsigned int nr_states;
536 	unsigned int state;
537 	int disabled;
538 	int nr_cpus;
539 
540 	nr_states = cpuidle_state_count(cpu);
541 
542 	if (nr_states == 0)
543 		return 0;
544 
545 	if (saved_cpu_idle_disable_state == NULL) {
546 		nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
547 		saved_cpu_idle_disable_state = calloc(nr_cpus, sizeof(unsigned int *));
548 		if (!saved_cpu_idle_disable_state)
549 			return -1;
550 	}
551 
552 	saved_cpu_idle_disable_state[cpu] = calloc(nr_states, sizeof(unsigned int));
553 	if (!saved_cpu_idle_disable_state[cpu])
554 		return -1;
555 	saved_cpu_idle_disable_state_alloc_ctr++;
556 
557 	for (state = 0; state < nr_states; state++) {
558 		disabled = cpuidle_is_state_disabled(cpu, state);
559 		if (disabled < 0)
560 			return disabled;
561 		saved_cpu_idle_disable_state[cpu][state] = disabled;
562 	}
563 
564 	return nr_states;
565 }
566 
567 /*
568  * restore_cpu_idle_disable_state - restore disable for all idle states of a cpu
569  *
570  * Restores the current disable state of all idle states of a cpu that was
571  * previously saved by save_cpu_idle_disable_state.
572  *
573  * Return: idle state count on success, negative on error
574  */
575 int restore_cpu_idle_disable_state(unsigned int cpu)
576 {
577 	unsigned int nr_states;
578 	unsigned int state;
579 	int disabled;
580 	int result;
581 
582 	nr_states = cpuidle_state_count(cpu);
583 
584 	if (nr_states == 0)
585 		return 0;
586 
587 	if (!saved_cpu_idle_disable_state)
588 		return -1;
589 
590 	for (state = 0; state < nr_states; state++) {
591 		if (!saved_cpu_idle_disable_state[cpu])
592 			return -1;
593 		disabled = saved_cpu_idle_disable_state[cpu][state];
594 		result = cpuidle_state_disable(cpu, state, disabled);
595 		if (result < 0)
596 			return result;
597 	}
598 
599 	free(saved_cpu_idle_disable_state[cpu]);
600 	saved_cpu_idle_disable_state[cpu] = NULL;
601 	saved_cpu_idle_disable_state_alloc_ctr--;
602 	if (saved_cpu_idle_disable_state_alloc_ctr == 0) {
603 		free(saved_cpu_idle_disable_state);
604 		saved_cpu_idle_disable_state = NULL;
605 	}
606 
607 	return nr_states;
608 }
609 
610 /*
611  * free_cpu_idle_disable_states - free saved idle state disable for all cpus
612  *
613  * Frees the memory used for storing cpu idle state disable for all cpus
614  * and states.
615  *
616  * Normally, the memory is freed automatically in
617  * restore_cpu_idle_disable_state; this is mostly for cleaning up after an
618  * error.
619  */
620 void free_cpu_idle_disable_states(void)
621 {
622 	int cpu;
623 	int nr_cpus;
624 
625 	if (!saved_cpu_idle_disable_state)
626 		return;
627 
628 	nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
629 
630 	for (cpu = 0; cpu < nr_cpus; cpu++) {
631 		free(saved_cpu_idle_disable_state[cpu]);
632 		saved_cpu_idle_disable_state[cpu] = NULL;
633 	}
634 
635 	free(saved_cpu_idle_disable_state);
636 	saved_cpu_idle_disable_state = NULL;
637 }
638 
639 /*
640  * set_deepest_cpu_idle_state - limit idle state of cpu
641  *
642  * Disables all idle states deeper than the one given in
643  * deepest_state (assuming states with higher number are deeper).
644  *
645  * This is used to reduce the exit from idle latency. Unlike
646  * set_cpu_dma_latency, it can disable idle states per cpu.
647  *
648  * Return: idle state count on success, negative on error
649  */
650 int set_deepest_cpu_idle_state(unsigned int cpu, unsigned int deepest_state)
651 {
652 	unsigned int nr_states;
653 	unsigned int state;
654 	int result;
655 
656 	nr_states = cpuidle_state_count(cpu);
657 
658 	for (state = deepest_state + 1; state < nr_states; state++) {
659 		result = cpuidle_state_disable(cpu, state, 1);
660 		if (result < 0)
661 			return result;
662 	}
663 
664 	return nr_states;
665 }
666 #endif /* HAVE_LIBCPUPOWER_SUPPORT */
667 
668 #define _STR(x) #x
669 #define STR(x) _STR(x)
670 
671 /*
672  * find_mount - find a the mount point of a given fs
673  *
674  * Returns 0 if mount is not found, otherwise return 1 and fill mp
675  * with the mount point.
676  */
677 static const int find_mount(const char *fs, char *mp, int sizeof_mp)
678 {
679 	char mount_point[MAX_PATH+1];
680 	char type[100];
681 	int found = 0;
682 	FILE *fp;
683 
684 	fp = fopen("/proc/mounts", "r");
685 	if (!fp)
686 		return 0;
687 
688 	while (fscanf(fp, "%*s %" STR(MAX_PATH) "s %99s %*s %*d %*d\n",	mount_point, type) == 2) {
689 		if (strcmp(type, fs) == 0) {
690 			found = 1;
691 			break;
692 		}
693 	}
694 	fclose(fp);
695 
696 	if (!found)
697 		return 0;
698 
699 	memset(mp, 0, sizeof_mp);
700 	strncpy(mp, mount_point, sizeof_mp - 1);
701 
702 	debug_msg("Fs %s found at %s\n", fs, mp);
703 	return 1;
704 }
705 
706 /*
707  * get_self_cgroup - get the current thread cgroup path
708  *
709  * Parse /proc/$$/cgroup file to get the thread's cgroup. As an example of line to parse:
710  *
711  * 0::/user.slice/user-0.slice/session-3.scope'\n'
712  *
713  * This function is interested in the content after the second : and before the '\n'.
714  *
715  * Returns 1 if a string was found, 0 otherwise.
716  */
717 static int get_self_cgroup(char *self_cg, int sizeof_self_cg)
718 {
719 	char path[MAX_PATH], *start;
720 	int fd, retval;
721 
722 	snprintf(path, MAX_PATH, "/proc/%d/cgroup", getpid());
723 
724 	fd = open(path, O_RDONLY);
725 	if (fd < 0)
726 		return 0;
727 
728 	retval = read(fd, path, MAX_PATH);
729 
730 	close(fd);
731 
732 	if (retval <= 0)
733 		return 0;
734 
735 	start = path;
736 
737 	start = strstr(start, ":");
738 	if (!start)
739 		return 0;
740 
741 	/* skip ":" */
742 	start++;
743 
744 	start = strstr(start, ":");
745 	if (!start)
746 		return 0;
747 
748 	/* skip ":" */
749 	start++;
750 
751 	if (strlen(start) >= sizeof_self_cg)
752 		return 0;
753 
754 	snprintf(self_cg, sizeof_self_cg, "%s", start);
755 
756 	/* Swap '\n' with '\0' */
757 	start = strstr(self_cg, "\n");
758 
759 	/* there must be '\n' */
760 	if (!start)
761 		return 0;
762 
763 	/* ok, it found a string after the second : and before the \n */
764 	*start = '\0';
765 
766 	return 1;
767 }
768 
769 /*
770  * set_comm_cgroup - Set cgroup to pid_t pid
771  *
772  * If cgroup argument is not NULL, the threads will move to the given cgroup.
773  * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used.
774  *
775  * Supports cgroup v2.
776  *
777  * Returns 1 on success, 0 otherwise.
778  */
779 int set_pid_cgroup(pid_t pid, const char *cgroup)
780 {
781 	char cgroup_path[MAX_PATH - strlen("/cgroup.procs")];
782 	char cgroup_procs[MAX_PATH];
783 	char pid_str[24];
784 	int retval;
785 	int cg_fd;
786 
787 	retval = find_mount("cgroup2", cgroup_path, sizeof(cgroup_path));
788 	if (!retval) {
789 		err_msg("Did not find cgroupv2 mount point\n");
790 		return 0;
791 	}
792 
793 	if (!cgroup) {
794 		retval = get_self_cgroup(&cgroup_path[strlen(cgroup_path)],
795 				sizeof(cgroup_path) - strlen(cgroup_path));
796 		if (!retval) {
797 			err_msg("Did not find self cgroup\n");
798 			return 0;
799 		}
800 	} else {
801 		snprintf(&cgroup_path[strlen(cgroup_path)],
802 				sizeof(cgroup_path) - strlen(cgroup_path), "%s/", cgroup);
803 	}
804 
805 	snprintf(cgroup_procs, MAX_PATH, "%s/cgroup.procs", cgroup_path);
806 
807 	debug_msg("Using cgroup path at: %s\n", cgroup_procs);
808 
809 	cg_fd = open(cgroup_procs, O_RDWR);
810 	if (cg_fd < 0)
811 		return 0;
812 
813 	snprintf(pid_str, sizeof(pid_str), "%d\n", pid);
814 
815 	retval = write(cg_fd, pid_str, strlen(pid_str));
816 	if (retval < 0)
817 		err_msg("Error setting cgroup attributes for pid:%s - %s\n",
818 				pid_str, strerror(errno));
819 	else
820 		debug_msg("Set cgroup attributes for pid:%s\n", pid_str);
821 
822 	close(cg_fd);
823 
824 	return (retval >= 0);
825 }
826 
827 /**
828  * set_comm_cgroup - Set cgroup to threads starting with char *comm_prefix
829  *
830  * If cgroup argument is not NULL, the threads will move to the given cgroup.
831  * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used.
832  *
833  * Supports cgroup v2.
834  *
835  * Returns 1 on success, 0 otherwise.
836  */
837 int set_comm_cgroup(const char *comm_prefix, const char *cgroup)
838 {
839 	char cgroup_path[MAX_PATH - strlen("/cgroup.procs")];
840 	char cgroup_procs[MAX_PATH];
841 	struct dirent *proc_entry;
842 	DIR *procfs;
843 	int retval;
844 	int cg_fd;
845 
846 	if (strlen(comm_prefix) >= MAX_PATH) {
847 		err_msg("Command prefix is too long: %d < strlen(%s)\n",
848 			MAX_PATH, comm_prefix);
849 		return 0;
850 	}
851 
852 	retval = find_mount("cgroup2", cgroup_path, sizeof(cgroup_path));
853 	if (!retval) {
854 		err_msg("Did not find cgroupv2 mount point\n");
855 		return 0;
856 	}
857 
858 	if (!cgroup) {
859 		retval = get_self_cgroup(&cgroup_path[strlen(cgroup_path)],
860 				sizeof(cgroup_path) - strlen(cgroup_path));
861 		if (!retval) {
862 			err_msg("Did not find self cgroup\n");
863 			return 0;
864 		}
865 	} else {
866 		snprintf(&cgroup_path[strlen(cgroup_path)],
867 				sizeof(cgroup_path) - strlen(cgroup_path), "%s/", cgroup);
868 	}
869 
870 	snprintf(cgroup_procs, MAX_PATH, "%s/cgroup.procs", cgroup_path);
871 
872 	debug_msg("Using cgroup path at: %s\n", cgroup_procs);
873 
874 	cg_fd = open(cgroup_procs, O_RDWR);
875 	if (cg_fd < 0)
876 		return 0;
877 
878 	procfs = opendir("/proc");
879 	if (!procfs) {
880 		err_msg("Could not open procfs\n");
881 		goto out_cg;
882 	}
883 
884 	while ((proc_entry = readdir(procfs))) {
885 
886 		retval = procfs_is_workload_pid(comm_prefix, proc_entry);
887 		if (!retval)
888 			continue;
889 
890 		retval = write(cg_fd, proc_entry->d_name, strlen(proc_entry->d_name));
891 		if (retval < 0) {
892 			err_msg("Error setting cgroup attributes for pid:%s - %s\n",
893 				proc_entry->d_name, strerror(errno));
894 			goto out_procfs;
895 		}
896 
897 		debug_msg("Set cgroup attributes for pid:%s\n", proc_entry->d_name);
898 	}
899 
900 	closedir(procfs);
901 	close(cg_fd);
902 	return 1;
903 
904 out_procfs:
905 	closedir(procfs);
906 out_cg:
907 	close(cg_fd);
908 	return 0;
909 }
910 
911 /**
912  * auto_house_keeping - Automatically move rtla out of measurement threads
913  *
914  * Try to move rtla away from the tracer, if possible.
915  *
916  * Returns 1 on success, 0 otherwise.
917  */
918 int auto_house_keeping(cpu_set_t *monitored_cpus)
919 {
920 	cpu_set_t rtla_cpus, house_keeping_cpus;
921 	int retval;
922 
923 	/* first get the CPUs in which rtla can actually run. */
924 	retval = sched_getaffinity(getpid(), sizeof(rtla_cpus), &rtla_cpus);
925 	if (retval == -1) {
926 		debug_msg("Could not get rtla affinity, rtla might run with the threads!\n");
927 		return 0;
928 	}
929 
930 	/* then check if the existing setup is already good. */
931 	CPU_AND(&house_keeping_cpus, &rtla_cpus, monitored_cpus);
932 	if (!CPU_COUNT(&house_keeping_cpus)) {
933 		debug_msg("rtla and the monitored CPUs do not share CPUs.");
934 		debug_msg("Skipping auto house-keeping\n");
935 		return 1;
936 	}
937 
938 	/* remove the intersection */
939 	CPU_XOR(&house_keeping_cpus, &rtla_cpus, monitored_cpus);
940 
941 	/* get only those that rtla can run */
942 	CPU_AND(&house_keeping_cpus, &house_keeping_cpus, &rtla_cpus);
943 
944 	/* is there any cpu left? */
945 	if (!CPU_COUNT(&house_keeping_cpus)) {
946 		debug_msg("Could not find any CPU for auto house-keeping\n");
947 		return 0;
948 	}
949 
950 	retval = sched_setaffinity(getpid(), sizeof(house_keeping_cpus), &house_keeping_cpus);
951 	if (retval == -1) {
952 		debug_msg("Could not set affinity for auto house-keeping\n");
953 		return 0;
954 	}
955 
956 	debug_msg("rtla automatically moved to an auto house-keeping cpu set\n");
957 
958 	return 1;
959 }
960