xref: /linux/tools/tracing/rtla/src/utils.c (revision 6ea8a206108fe8b5940c2797afc54ae9f5a7bbdd)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
4  */
5 
6 #define _GNU_SOURCE
7 #ifdef HAVE_LIBCPUPOWER_SUPPORT
8 #include <cpuidle.h>
9 #endif /* HAVE_LIBCPUPOWER_SUPPORT */
10 #include <dirent.h>
11 #include <stdarg.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <unistd.h>
15 #include <ctype.h>
16 #include <errno.h>
17 #include <fcntl.h>
18 #include <sched.h>
19 #include <stdio.h>
20 #include <limits.h>
21 
22 #include "utils.h"
23 
24 #define MAX_MSG_LENGTH	1024
25 int config_debug;
26 
27 /*
28  * err_msg - print an error message to the stderr
29  */
30 void err_msg(const char *fmt, ...)
31 {
32 	char message[MAX_MSG_LENGTH];
33 	va_list ap;
34 
35 	va_start(ap, fmt);
36 	vsnprintf(message, sizeof(message), fmt, ap);
37 	va_end(ap);
38 
39 	fprintf(stderr, "%s", message);
40 }
41 
42 /*
43  * debug_msg - print a debug message to stderr if debug is set
44  */
45 void debug_msg(const char *fmt, ...)
46 {
47 	char message[MAX_MSG_LENGTH];
48 	va_list ap;
49 
50 	if (!config_debug)
51 		return;
52 
53 	va_start(ap, fmt);
54 	vsnprintf(message, sizeof(message), fmt, ap);
55 	va_end(ap);
56 
57 	fprintf(stderr, "%s", message);
58 }
59 
60 /*
61  * fatal - print an error message and EOL to stderr and exit with ERROR
62  */
63 void fatal(const char *fmt, ...)
64 {
65 	va_list ap;
66 
67 	va_start(ap, fmt);
68 	vfprintf(stderr, fmt, ap);
69 	va_end(ap);
70 	fprintf(stderr, "\n");
71 
72 	exit(ERROR);
73 }
74 
75 /*
76  * get_llong_from_str - get a long long int from a string
77  */
78 long long get_llong_from_str(char *start)
79 {
80 	long long value;
81 	char *end;
82 
83 	errno = 0;
84 	value = strtoll(start, &end, 10);
85 	if (errno || start == end)
86 		return -1;
87 
88 	return value;
89 }
90 
91 /*
92  * get_duration - fill output with a human readable duration since start_time
93  */
94 void get_duration(time_t start_time, char *output, int output_size)
95 {
96 	time_t now = time(NULL);
97 	struct tm *tm_info;
98 	time_t duration;
99 
100 	duration = difftime(now, start_time);
101 	tm_info = gmtime(&duration);
102 
103 	snprintf(output, output_size, "%3d %02d:%02d:%02d",
104 			tm_info->tm_yday,
105 			tm_info->tm_hour,
106 			tm_info->tm_min,
107 			tm_info->tm_sec);
108 }
109 
110 /*
111  * parse_cpu_set - parse a cpu_list filling cpu_set_t argument
112  *
113  * Receives a cpu list, like 1-3,5 (cpus 1, 2, 3, 5), and then set
114  * filling cpu_set_t argument.
115  *
116  * Returns 0 on success, 1 otherwise.
117  */
118 int parse_cpu_set(char *cpu_list, cpu_set_t *set)
119 {
120 	const char *p;
121 	int end_cpu;
122 	int nr_cpus;
123 	int cpu;
124 	int i;
125 
126 	CPU_ZERO(set);
127 
128 	nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
129 
130 	for (p = cpu_list; *p; ) {
131 		cpu = atoi(p);
132 		if (cpu < 0 || (!cpu && *p != '0') || cpu >= nr_cpus)
133 			goto err;
134 
135 		while (isdigit(*p))
136 			p++;
137 		if (*p == '-') {
138 			p++;
139 			end_cpu = atoi(p);
140 			if (end_cpu < cpu || (!end_cpu && *p != '0') || end_cpu >= nr_cpus)
141 				goto err;
142 			while (isdigit(*p))
143 				p++;
144 		} else
145 			end_cpu = cpu;
146 
147 		if (cpu == end_cpu) {
148 			debug_msg("cpu_set: adding cpu %d\n", cpu);
149 			CPU_SET(cpu, set);
150 		} else {
151 			for (i = cpu; i <= end_cpu; i++) {
152 				debug_msg("cpu_set: adding cpu %d\n", i);
153 				CPU_SET(i, set);
154 			}
155 		}
156 
157 		if (*p == ',')
158 			p++;
159 	}
160 
161 	return 0;
162 err:
163 	debug_msg("Error parsing the cpu set %s\n", cpu_list);
164 	return 1;
165 }
166 
167 /*
168  * parse_duration - parse duration with s/m/h/d suffix converting it to seconds
169  */
170 long parse_seconds_duration(char *val)
171 {
172 	char *end;
173 	long t;
174 
175 	t = strtol(val, &end, 10);
176 
177 	if (end) {
178 		switch (*end) {
179 		case 's':
180 		case 'S':
181 			break;
182 		case 'm':
183 		case 'M':
184 			t *= 60;
185 			break;
186 		case 'h':
187 		case 'H':
188 			t *= 60 * 60;
189 			break;
190 
191 		case 'd':
192 		case 'D':
193 			t *= 24 * 60 * 60;
194 			break;
195 		}
196 	}
197 
198 	return t;
199 }
200 
201 /*
202  * parse_ns_duration - parse duration with ns/us/ms/s converting it to nanoseconds
203  */
204 long parse_ns_duration(char *val)
205 {
206 	char *end;
207 	long t;
208 
209 	t = strtol(val, &end, 10);
210 
211 	if (end) {
212 		if (!strncmp(end, "ns", 2)) {
213 			return t;
214 		} else if (!strncmp(end, "us", 2)) {
215 			t *= 1000;
216 			return t;
217 		} else if (!strncmp(end, "ms", 2)) {
218 			t *= 1000 * 1000;
219 			return t;
220 		} else if (!strncmp(end, "s", 1)) {
221 			t *= 1000 * 1000 * 1000;
222 			return t;
223 		}
224 		return -1;
225 	}
226 
227 	return t;
228 }
229 
230 /*
231  * This is a set of helper functions to use SCHED_DEADLINE.
232  */
233 #ifndef __NR_sched_setattr
234 # ifdef __x86_64__
235 #  define __NR_sched_setattr	314
236 # elif __i386__
237 #  define __NR_sched_setattr	351
238 # elif __arm__
239 #  define __NR_sched_setattr	380
240 # elif __aarch64__ || __riscv
241 #  define __NR_sched_setattr	274
242 # elif __powerpc__
243 #  define __NR_sched_setattr	355
244 # elif __s390x__
245 #  define __NR_sched_setattr	345
246 # elif __loongarch__
247 #  define __NR_sched_setattr	274
248 # endif
249 #endif
250 
251 #define SCHED_DEADLINE		6
252 
253 static inline int syscall_sched_setattr(pid_t pid, const struct sched_attr *attr,
254 				unsigned int flags) {
255 	return syscall(__NR_sched_setattr, pid, attr, flags);
256 }
257 
258 int __set_sched_attr(int pid, struct sched_attr *attr)
259 {
260 	int flags = 0;
261 	int retval;
262 
263 	retval = syscall_sched_setattr(pid, attr, flags);
264 	if (retval < 0) {
265 		err_msg("Failed to set sched attributes to the pid %d: %s\n",
266 			pid, strerror(errno));
267 		return 1;
268 	}
269 
270 	return 0;
271 }
272 
273 /*
274  * procfs_is_workload_pid - check if a procfs entry contains a comm_prefix* comm
275  *
276  * Check if the procfs entry is a directory of a process, and then check if the
277  * process has a comm with the prefix set in char *comm_prefix. As the
278  * current users of this function only check for kernel threads, there is no
279  * need to check for the threads for the process.
280  *
281  * Return: True if the proc_entry contains a comm file with comm_prefix*.
282  * Otherwise returns false.
283  */
284 static int procfs_is_workload_pid(const char *comm_prefix, struct dirent *proc_entry)
285 {
286 	char buffer[MAX_PATH];
287 	int comm_fd, retval;
288 	char *t_name;
289 
290 	if (proc_entry->d_type != DT_DIR)
291 		return 0;
292 
293 	if (*proc_entry->d_name == '.')
294 		return 0;
295 
296 	/* check if the string is a pid */
297 	for (t_name = proc_entry->d_name; t_name; t_name++) {
298 		if (!isdigit(*t_name))
299 			break;
300 	}
301 
302 	if (*t_name != '\0')
303 		return 0;
304 
305 	snprintf(buffer, MAX_PATH, "/proc/%s/comm", proc_entry->d_name);
306 	comm_fd = open(buffer, O_RDONLY);
307 	if (comm_fd < 0)
308 		return 0;
309 
310 	memset(buffer, 0, MAX_PATH);
311 	retval = read(comm_fd, buffer, MAX_PATH);
312 
313 	close(comm_fd);
314 
315 	if (retval <= 0)
316 		return 0;
317 
318 	buffer[MAX_PATH-1] = '\0';
319 	retval = strncmp(comm_prefix, buffer, strlen(comm_prefix));
320 	if (retval)
321 		return 0;
322 
323 	/* comm already have \n */
324 	debug_msg("Found workload pid:%s comm:%s", proc_entry->d_name, buffer);
325 
326 	return 1;
327 }
328 
329 /*
330  * set_comm_sched_attr - set sched params to threads starting with char *comm_prefix
331  *
332  * This function uses procfs to list the currently running threads and then set the
333  * sched_attr *attr to the threads that start with char *comm_prefix. It is
334  * mainly used to set the priority to the kernel threads created by the
335  * tracers.
336  */
337 int set_comm_sched_attr(const char *comm_prefix, struct sched_attr *attr)
338 {
339 	struct dirent *proc_entry;
340 	DIR *procfs;
341 	int retval;
342 	int pid;
343 
344 	if (strlen(comm_prefix) >= MAX_PATH) {
345 		err_msg("Command prefix is too long: %d < strlen(%s)\n",
346 			MAX_PATH, comm_prefix);
347 		return 1;
348 	}
349 
350 	procfs = opendir("/proc");
351 	if (!procfs) {
352 		err_msg("Could not open procfs\n");
353 		return 1;
354 	}
355 
356 	while ((proc_entry = readdir(procfs))) {
357 
358 		retval = procfs_is_workload_pid(comm_prefix, proc_entry);
359 		if (!retval)
360 			continue;
361 
362 		if (strtoi(proc_entry->d_name, &pid)) {
363 			err_msg("'%s' is not a valid pid", proc_entry->d_name);
364 			goto out_err;
365 		}
366 		/* procfs_is_workload_pid confirmed it is a pid */
367 		retval = __set_sched_attr(pid, attr);
368 		if (retval) {
369 			err_msg("Error setting sched attributes for pid:%s\n", proc_entry->d_name);
370 			goto out_err;
371 		}
372 
373 		debug_msg("Set sched attributes for pid:%s\n", proc_entry->d_name);
374 	}
375 	return 0;
376 
377 out_err:
378 	closedir(procfs);
379 	return 1;
380 }
381 
382 #define INVALID_VAL	(~0L)
383 static long get_long_ns_after_colon(char *start)
384 {
385 	long val = INVALID_VAL;
386 
387 	/* find the ":" */
388 	start = strstr(start, ":");
389 	if (!start)
390 		return -1;
391 
392 	/* skip ":" */
393 	start++;
394 	val = parse_ns_duration(start);
395 
396 	return val;
397 }
398 
399 static long get_long_after_colon(char *start)
400 {
401 	long val = INVALID_VAL;
402 
403 	/* find the ":" */
404 	start = strstr(start, ":");
405 	if (!start)
406 		return -1;
407 
408 	/* skip ":" */
409 	start++;
410 	val = get_llong_from_str(start);
411 
412 	return val;
413 }
414 
415 /*
416  * parse priority in the format:
417  * SCHED_OTHER:
418  *		o:<prio>
419  *		O:<prio>
420  * SCHED_RR:
421  *		r:<prio>
422  *		R:<prio>
423  * SCHED_FIFO:
424  *		f:<prio>
425  *		F:<prio>
426  * SCHED_DEADLINE:
427  *		d:runtime:period
428  *		D:runtime:period
429  */
430 int parse_prio(char *arg, struct sched_attr *sched_param)
431 {
432 	long prio;
433 	long runtime;
434 	long period;
435 
436 	memset(sched_param, 0, sizeof(*sched_param));
437 	sched_param->size = sizeof(*sched_param);
438 
439 	switch (arg[0]) {
440 	case 'd':
441 	case 'D':
442 		/* d:runtime:period */
443 		if (strlen(arg) < 4)
444 			return -1;
445 
446 		runtime = get_long_ns_after_colon(arg);
447 		if (runtime == INVALID_VAL)
448 			return -1;
449 
450 		period = get_long_ns_after_colon(&arg[2]);
451 		if (period == INVALID_VAL)
452 			return -1;
453 
454 		if (runtime > period)
455 			return -1;
456 
457 		sched_param->sched_policy   = SCHED_DEADLINE;
458 		sched_param->sched_runtime  = runtime;
459 		sched_param->sched_deadline = period;
460 		sched_param->sched_period   = period;
461 		break;
462 	case 'f':
463 	case 'F':
464 		/* f:prio */
465 		prio = get_long_after_colon(arg);
466 		if (prio == INVALID_VAL)
467 			return -1;
468 
469 		if (prio < sched_get_priority_min(SCHED_FIFO))
470 			return -1;
471 		if (prio > sched_get_priority_max(SCHED_FIFO))
472 			return -1;
473 
474 		sched_param->sched_policy   = SCHED_FIFO;
475 		sched_param->sched_priority = prio;
476 		break;
477 	case 'r':
478 	case 'R':
479 		/* r:prio */
480 		prio = get_long_after_colon(arg);
481 		if (prio == INVALID_VAL)
482 			return -1;
483 
484 		if (prio < sched_get_priority_min(SCHED_RR))
485 			return -1;
486 		if (prio > sched_get_priority_max(SCHED_RR))
487 			return -1;
488 
489 		sched_param->sched_policy   = SCHED_RR;
490 		sched_param->sched_priority = prio;
491 		break;
492 	case 'o':
493 	case 'O':
494 		/* o:prio */
495 		prio = get_long_after_colon(arg);
496 		if (prio == INVALID_VAL)
497 			return -1;
498 
499 		if (prio < MIN_NICE)
500 			return -1;
501 		if (prio > MAX_NICE)
502 			return -1;
503 
504 		sched_param->sched_policy   = SCHED_OTHER;
505 		sched_param->sched_nice = prio;
506 		break;
507 	default:
508 		return -1;
509 	}
510 	return 0;
511 }
512 
513 /*
514  * set_cpu_dma_latency - set the /dev/cpu_dma_latecy
515  *
516  * This is used to reduce the exit from idle latency. The value
517  * will be reset once the file descriptor of /dev/cpu_dma_latecy
518  * is closed.
519  *
520  * Return: the /dev/cpu_dma_latecy file descriptor
521  */
522 int set_cpu_dma_latency(int32_t latency)
523 {
524 	int retval;
525 	int fd;
526 
527 	fd = open("/dev/cpu_dma_latency", O_RDWR);
528 	if (fd < 0) {
529 		err_msg("Error opening /dev/cpu_dma_latency\n");
530 		return -1;
531 	}
532 
533 	retval = write(fd, &latency, 4);
534 	if (retval < 1) {
535 		err_msg("Error setting /dev/cpu_dma_latency\n");
536 		close(fd);
537 		return -1;
538 	}
539 
540 	debug_msg("Set /dev/cpu_dma_latency to %d\n", latency);
541 
542 	return fd;
543 }
544 
545 #ifdef HAVE_LIBCPUPOWER_SUPPORT
546 static unsigned int **saved_cpu_idle_disable_state;
547 static size_t saved_cpu_idle_disable_state_alloc_ctr;
548 
549 /*
550  * save_cpu_idle_state_disable - save disable for all idle states of a cpu
551  *
552  * Saves the current disable of all idle states of a cpu, to be subsequently
553  * restored via restore_cpu_idle_disable_state.
554  *
555  * Return: idle state count on success, negative on error
556  */
557 int save_cpu_idle_disable_state(unsigned int cpu)
558 {
559 	unsigned int nr_states;
560 	unsigned int state;
561 	int disabled;
562 	int nr_cpus;
563 
564 	nr_states = cpuidle_state_count(cpu);
565 
566 	if (nr_states == 0)
567 		return 0;
568 
569 	if (saved_cpu_idle_disable_state == NULL) {
570 		nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
571 		saved_cpu_idle_disable_state = calloc(nr_cpus, sizeof(unsigned int *));
572 		if (!saved_cpu_idle_disable_state)
573 			return -1;
574 	}
575 
576 	saved_cpu_idle_disable_state[cpu] = calloc(nr_states, sizeof(unsigned int));
577 	if (!saved_cpu_idle_disable_state[cpu])
578 		return -1;
579 	saved_cpu_idle_disable_state_alloc_ctr++;
580 
581 	for (state = 0; state < nr_states; state++) {
582 		disabled = cpuidle_is_state_disabled(cpu, state);
583 		if (disabled < 0)
584 			return disabled;
585 		saved_cpu_idle_disable_state[cpu][state] = disabled;
586 	}
587 
588 	return nr_states;
589 }
590 
591 /*
592  * restore_cpu_idle_disable_state - restore disable for all idle states of a cpu
593  *
594  * Restores the current disable state of all idle states of a cpu that was
595  * previously saved by save_cpu_idle_disable_state.
596  *
597  * Return: idle state count on success, negative on error
598  */
599 int restore_cpu_idle_disable_state(unsigned int cpu)
600 {
601 	unsigned int nr_states;
602 	unsigned int state;
603 	int disabled;
604 	int result;
605 
606 	nr_states = cpuidle_state_count(cpu);
607 
608 	if (nr_states == 0)
609 		return 0;
610 
611 	if (!saved_cpu_idle_disable_state)
612 		return -1;
613 
614 	for (state = 0; state < nr_states; state++) {
615 		if (!saved_cpu_idle_disable_state[cpu])
616 			return -1;
617 		disabled = saved_cpu_idle_disable_state[cpu][state];
618 		result = cpuidle_state_disable(cpu, state, disabled);
619 		if (result < 0)
620 			return result;
621 	}
622 
623 	free(saved_cpu_idle_disable_state[cpu]);
624 	saved_cpu_idle_disable_state[cpu] = NULL;
625 	saved_cpu_idle_disable_state_alloc_ctr--;
626 	if (saved_cpu_idle_disable_state_alloc_ctr == 0) {
627 		free(saved_cpu_idle_disable_state);
628 		saved_cpu_idle_disable_state = NULL;
629 	}
630 
631 	return nr_states;
632 }
633 
634 /*
635  * free_cpu_idle_disable_states - free saved idle state disable for all cpus
636  *
637  * Frees the memory used for storing cpu idle state disable for all cpus
638  * and states.
639  *
640  * Normally, the memory is freed automatically in
641  * restore_cpu_idle_disable_state; this is mostly for cleaning up after an
642  * error.
643  */
644 void free_cpu_idle_disable_states(void)
645 {
646 	int cpu;
647 	int nr_cpus;
648 
649 	if (!saved_cpu_idle_disable_state)
650 		return;
651 
652 	nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
653 
654 	for (cpu = 0; cpu < nr_cpus; cpu++) {
655 		free(saved_cpu_idle_disable_state[cpu]);
656 		saved_cpu_idle_disable_state[cpu] = NULL;
657 	}
658 
659 	free(saved_cpu_idle_disable_state);
660 	saved_cpu_idle_disable_state = NULL;
661 }
662 
663 /*
664  * set_deepest_cpu_idle_state - limit idle state of cpu
665  *
666  * Disables all idle states deeper than the one given in
667  * deepest_state (assuming states with higher number are deeper).
668  *
669  * This is used to reduce the exit from idle latency. Unlike
670  * set_cpu_dma_latency, it can disable idle states per cpu.
671  *
672  * Return: idle state count on success, negative on error
673  */
674 int set_deepest_cpu_idle_state(unsigned int cpu, unsigned int deepest_state)
675 {
676 	unsigned int nr_states;
677 	unsigned int state;
678 	int result;
679 
680 	nr_states = cpuidle_state_count(cpu);
681 
682 	for (state = deepest_state + 1; state < nr_states; state++) {
683 		result = cpuidle_state_disable(cpu, state, 1);
684 		if (result < 0)
685 			return result;
686 	}
687 
688 	return nr_states;
689 }
690 #endif /* HAVE_LIBCPUPOWER_SUPPORT */
691 
692 #define _STR(x) #x
693 #define STR(x) _STR(x)
694 
695 /*
696  * find_mount - find a the mount point of a given fs
697  *
698  * Returns 0 if mount is not found, otherwise return 1 and fill mp
699  * with the mount point.
700  */
701 static const int find_mount(const char *fs, char *mp, int sizeof_mp)
702 {
703 	char mount_point[MAX_PATH+1];
704 	char type[100];
705 	int found = 0;
706 	FILE *fp;
707 
708 	fp = fopen("/proc/mounts", "r");
709 	if (!fp)
710 		return 0;
711 
712 	while (fscanf(fp, "%*s %" STR(MAX_PATH) "s %99s %*s %*d %*d\n",	mount_point, type) == 2) {
713 		if (strcmp(type, fs) == 0) {
714 			found = 1;
715 			break;
716 		}
717 	}
718 	fclose(fp);
719 
720 	if (!found)
721 		return 0;
722 
723 	memset(mp, 0, sizeof_mp);
724 	strncpy(mp, mount_point, sizeof_mp - 1);
725 
726 	debug_msg("Fs %s found at %s\n", fs, mp);
727 	return 1;
728 }
729 
730 /*
731  * get_self_cgroup - get the current thread cgroup path
732  *
733  * Parse /proc/$$/cgroup file to get the thread's cgroup. As an example of line to parse:
734  *
735  * 0::/user.slice/user-0.slice/session-3.scope'\n'
736  *
737  * This function is interested in the content after the second : and before the '\n'.
738  *
739  * Returns 1 if a string was found, 0 otherwise.
740  */
741 static int get_self_cgroup(char *self_cg, int sizeof_self_cg)
742 {
743 	char path[MAX_PATH], *start;
744 	int fd, retval;
745 
746 	snprintf(path, MAX_PATH, "/proc/%d/cgroup", getpid());
747 
748 	fd = open(path, O_RDONLY);
749 	if (fd < 0)
750 		return 0;
751 
752 	memset(path, 0, sizeof(path));
753 	retval = read(fd, path, MAX_PATH);
754 
755 	close(fd);
756 
757 	if (retval <= 0)
758 		return 0;
759 
760 	path[MAX_PATH-1] = '\0';
761 	start = path;
762 
763 	start = strstr(start, ":");
764 	if (!start)
765 		return 0;
766 
767 	/* skip ":" */
768 	start++;
769 
770 	start = strstr(start, ":");
771 	if (!start)
772 		return 0;
773 
774 	/* skip ":" */
775 	start++;
776 
777 	if (strlen(start) >= sizeof_self_cg)
778 		return 0;
779 
780 	snprintf(self_cg, sizeof_self_cg, "%s", start);
781 
782 	/* Swap '\n' with '\0' */
783 	start = strstr(self_cg, "\n");
784 
785 	/* there must be '\n' */
786 	if (!start)
787 		return 0;
788 
789 	/* ok, it found a string after the second : and before the \n */
790 	*start = '\0';
791 
792 	return 1;
793 }
794 
795 /*
796  * open_cgroup_procs - Open the cgroup.procs file for the given cgroup
797  *
798  * If cgroup argument is not NULL, the cgroup.procs file for that cgroup
799  * will be opened. Otherwise, the cgroup of the calling, i.e., rtla, thread
800  * will be used.
801  *
802  * Supports cgroup v2.
803  *
804  * Returns the file descriptor on success, -1 otherwise.
805  */
806 static int open_cgroup_procs(const char *cgroup)
807 {
808 	char cgroup_path[MAX_PATH - strlen("/cgroup.procs")];
809 	char cgroup_procs[MAX_PATH];
810 	int retval;
811 	int cg_fd;
812 
813 	retval = find_mount("cgroup2", cgroup_path, sizeof(cgroup_path));
814 	if (!retval) {
815 		err_msg("Did not find cgroupv2 mount point\n");
816 		return -1;
817 	}
818 
819 	if (!cgroup) {
820 		retval = get_self_cgroup(&cgroup_path[strlen(cgroup_path)],
821 				sizeof(cgroup_path) - strlen(cgroup_path));
822 		if (!retval) {
823 			err_msg("Did not find self cgroup\n");
824 			return -1;
825 		}
826 	} else {
827 		snprintf(&cgroup_path[strlen(cgroup_path)],
828 				sizeof(cgroup_path) - strlen(cgroup_path), "%s/", cgroup);
829 	}
830 
831 	snprintf(cgroup_procs, MAX_PATH, "%s/cgroup.procs", cgroup_path);
832 
833 	debug_msg("Using cgroup path at: %s\n", cgroup_procs);
834 
835 	cg_fd = open(cgroup_procs, O_RDWR);
836 	if (cg_fd < 0)
837 		return -1;
838 
839 	return cg_fd;
840 }
841 
842 /*
843  * set_pid_cgroup - Set cgroup to pid_t pid
844  *
845  * If cgroup argument is not NULL, the threads will move to the given cgroup.
846  * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used.
847  *
848  * Supports cgroup v2.
849  *
850  * Returns 1 on success, 0 otherwise.
851  */
852 int set_pid_cgroup(pid_t pid, const char *cgroup)
853 {
854 	char pid_str[24];
855 	int retval;
856 	int cg_fd;
857 
858 	cg_fd = open_cgroup_procs(cgroup);
859 	if (cg_fd < 0)
860 		return 0;
861 
862 	snprintf(pid_str, sizeof(pid_str), "%d\n", pid);
863 
864 	retval = write(cg_fd, pid_str, strlen(pid_str));
865 	if (retval < 0)
866 		err_msg("Error setting cgroup attributes for pid:%s - %s\n",
867 				pid_str, strerror(errno));
868 	else
869 		debug_msg("Set cgroup attributes for pid:%s\n", pid_str);
870 
871 	close(cg_fd);
872 
873 	return (retval >= 0);
874 }
875 
876 /**
877  * set_comm_cgroup - Set cgroup to threads starting with char *comm_prefix
878  *
879  * If cgroup argument is not NULL, the threads will move to the given cgroup.
880  * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used.
881  *
882  * Supports cgroup v2.
883  *
884  * Returns 1 on success, 0 otherwise.
885  */
886 int set_comm_cgroup(const char *comm_prefix, const char *cgroup)
887 {
888 	struct dirent *proc_entry;
889 	DIR *procfs;
890 	int retval;
891 	int cg_fd;
892 
893 	if (strlen(comm_prefix) >= MAX_PATH) {
894 		err_msg("Command prefix is too long: %d < strlen(%s)\n",
895 			MAX_PATH, comm_prefix);
896 		return 0;
897 	}
898 
899 	cg_fd = open_cgroup_procs(cgroup);
900 	if (cg_fd < 0)
901 		return 0;
902 
903 	procfs = opendir("/proc");
904 	if (!procfs) {
905 		err_msg("Could not open procfs\n");
906 		goto out_cg;
907 	}
908 
909 	while ((proc_entry = readdir(procfs))) {
910 
911 		retval = procfs_is_workload_pid(comm_prefix, proc_entry);
912 		if (!retval)
913 			continue;
914 
915 		retval = write(cg_fd, proc_entry->d_name, strlen(proc_entry->d_name));
916 		if (retval < 0) {
917 			err_msg("Error setting cgroup attributes for pid:%s - %s\n",
918 				proc_entry->d_name, strerror(errno));
919 			goto out_procfs;
920 		}
921 
922 		debug_msg("Set cgroup attributes for pid:%s\n", proc_entry->d_name);
923 	}
924 
925 	closedir(procfs);
926 	close(cg_fd);
927 	return 1;
928 
929 out_procfs:
930 	closedir(procfs);
931 out_cg:
932 	close(cg_fd);
933 	return 0;
934 }
935 
936 /**
937  * auto_house_keeping - Automatically move rtla out of measurement threads
938  *
939  * Try to move rtla away from the tracer, if possible.
940  *
941  * Returns 1 on success, 0 otherwise.
942  */
943 int auto_house_keeping(cpu_set_t *monitored_cpus)
944 {
945 	cpu_set_t rtla_cpus, house_keeping_cpus;
946 	int retval;
947 
948 	/* first get the CPUs in which rtla can actually run. */
949 	retval = sched_getaffinity(getpid(), sizeof(rtla_cpus), &rtla_cpus);
950 	if (retval == -1) {
951 		debug_msg("Could not get rtla affinity, rtla might run with the threads!\n");
952 		return 0;
953 	}
954 
955 	/* then check if the existing setup is already good. */
956 	CPU_AND(&house_keeping_cpus, &rtla_cpus, monitored_cpus);
957 	if (!CPU_COUNT(&house_keeping_cpus)) {
958 		debug_msg("rtla and the monitored CPUs do not share CPUs.");
959 		debug_msg("Skipping auto house-keeping\n");
960 		return 1;
961 	}
962 
963 	/* remove the intersection */
964 	CPU_XOR(&house_keeping_cpus, &rtla_cpus, monitored_cpus);
965 
966 	/* get only those that rtla can run */
967 	CPU_AND(&house_keeping_cpus, &house_keeping_cpus, &rtla_cpus);
968 
969 	/* is there any cpu left? */
970 	if (!CPU_COUNT(&house_keeping_cpus)) {
971 		debug_msg("Could not find any CPU for auto house-keeping\n");
972 		return 0;
973 	}
974 
975 	retval = sched_setaffinity(getpid(), sizeof(house_keeping_cpus), &house_keeping_cpus);
976 	if (retval == -1) {
977 		debug_msg("Could not set affinity for auto house-keeping\n");
978 		return 0;
979 	}
980 
981 	debug_msg("rtla automatically moved to an auto house-keeping cpu set\n");
982 
983 	return 1;
984 }
985 
986 /**
987  * parse_optional_arg - Parse optional argument value
988  *
989  * Parse optional argument value, which can be in the form of:
990  * -sarg, -s/--long=arg, -s/--long arg
991  *
992  * Returns arg value if found, NULL otherwise.
993  */
994 char *parse_optional_arg(int argc, char **argv)
995 {
996 	if (optarg) {
997 		if (optarg[0] == '=') {
998 			/* skip the = */
999 			return &optarg[1];
1000 		} else {
1001 			return optarg;
1002 		}
1003 	/* parse argument of form -s [arg] and --long [arg]*/
1004 	} else if (optind < argc && argv[optind][0] != '-') {
1005 		/* consume optind */
1006 		return argv[optind++];
1007 	} else {
1008 		return NULL;
1009 	}
1010 }
1011 
1012 /*
1013  * strtoi - convert string to integer with error checking
1014  *
1015  * Returns 0 on success, -1 if conversion fails or result is out of int range.
1016  */
1017 int strtoi(const char *s, int *res)
1018 {
1019 	char *end_ptr;
1020 	long lres;
1021 
1022 	if (!*s)
1023 		return -1;
1024 
1025 	errno = 0;
1026 	lres = strtol(s, &end_ptr, 0);
1027 	if (errno || *end_ptr || lres > INT_MAX || lres < INT_MIN)
1028 		return -1;
1029 
1030 	*res = (int) lres;
1031 	return 0;
1032 }
1033