xref: /linux/tools/tracing/rtla/src/utils.c (revision fb8b8183208d8efe824e8d2c73fb1ab5ad1191fd)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
4  */
5 
6 #define _GNU_SOURCE
7 #ifdef HAVE_LIBCPUPOWER_SUPPORT
8 #include <cpuidle.h>
9 #endif /* HAVE_LIBCPUPOWER_SUPPORT */
10 #include <dirent.h>
11 #include <stdarg.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <unistd.h>
15 #include <ctype.h>
16 #include <errno.h>
17 #include <fcntl.h>
18 #include <sched.h>
19 #include <stdio.h>
20 #include <limits.h>
21 
22 #include "utils.h"
23 
24 #define MAX_MSG_LENGTH	1024
25 int config_debug;
26 
27 /*
28  * err_msg - print an error message to the stderr
29  */
30 void err_msg(const char *fmt, ...)
31 {
32 	char message[MAX_MSG_LENGTH];
33 	va_list ap;
34 
35 	va_start(ap, fmt);
36 	vsnprintf(message, sizeof(message), fmt, ap);
37 	va_end(ap);
38 
39 	fprintf(stderr, "%s", message);
40 }
41 
42 /*
43  * debug_msg - print a debug message to stderr if debug is set
44  */
45 void debug_msg(const char *fmt, ...)
46 {
47 	char message[MAX_MSG_LENGTH];
48 	va_list ap;
49 
50 	if (!config_debug)
51 		return;
52 
53 	va_start(ap, fmt);
54 	vsnprintf(message, sizeof(message), fmt, ap);
55 	va_end(ap);
56 
57 	fprintf(stderr, "%s", message);
58 }
59 
60 /*
61  * fatal - print an error message and EOL to stderr and exit with ERROR
62  */
63 void fatal(const char *fmt, ...)
64 {
65 	va_list ap;
66 
67 	va_start(ap, fmt);
68 	vfprintf(stderr, fmt, ap);
69 	va_end(ap);
70 	fprintf(stderr, "\n");
71 
72 	exit(ERROR);
73 }
74 
75 /*
76  * get_llong_from_str - get a long long int from a string
77  */
78 long long get_llong_from_str(char *start)
79 {
80 	long long value;
81 	char *end;
82 
83 	errno = 0;
84 	value = strtoll(start, &end, 10);
85 	if (errno || start == end)
86 		return -1;
87 
88 	return value;
89 }
90 
91 /*
92  * get_duration - fill output with a human readable duration since start_time
93  */
94 void get_duration(time_t start_time, char *output, int output_size)
95 {
96 	time_t now = time(NULL);
97 	struct tm *tm_info;
98 	time_t duration;
99 
100 	duration = difftime(now, start_time);
101 	tm_info = gmtime(&duration);
102 
103 	snprintf(output, output_size, "%3d %02d:%02d:%02d",
104 			tm_info->tm_yday,
105 			tm_info->tm_hour,
106 			tm_info->tm_min,
107 			tm_info->tm_sec);
108 }
109 
110 /*
111  * parse_cpu_set - parse a cpu_list filling cpu_set_t argument
112  *
113  * Receives a cpu list, like 1-3,5 (cpus 1, 2, 3, 5), and then set
114  * filling cpu_set_t argument.
115  *
116  * Returns 0 on success, 1 otherwise.
117  */
118 int parse_cpu_set(char *cpu_list, cpu_set_t *set)
119 {
120 	const char *p;
121 	int end_cpu;
122 	int nr_cpus;
123 	int cpu;
124 	int i;
125 
126 	CPU_ZERO(set);
127 
128 	nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
129 
130 	for (p = cpu_list; *p; ) {
131 		if (strtoi(p, &cpu))
132 			goto err;
133 		if (cpu < 0 || cpu >= nr_cpus)
134 			goto err;
135 
136 		while (isdigit(*p))
137 			p++;
138 		if (*p == '-') {
139 			p++;
140 			if (strtoi(p, &end_cpu))
141 				goto err;
142 			if (end_cpu < cpu || end_cpu >= nr_cpus)
143 				goto err;
144 			while (isdigit(*p))
145 				p++;
146 		} else
147 			end_cpu = cpu;
148 
149 		if (cpu == end_cpu) {
150 			debug_msg("cpu_set: adding cpu %d\n", cpu);
151 			CPU_SET(cpu, set);
152 		} else {
153 			for (i = cpu; i <= end_cpu; i++) {
154 				debug_msg("cpu_set: adding cpu %d\n", i);
155 				CPU_SET(i, set);
156 			}
157 		}
158 
159 		if (*p == ',')
160 			p++;
161 	}
162 
163 	return 0;
164 err:
165 	debug_msg("Error parsing the cpu set %s\n", cpu_list);
166 	return 1;
167 }
168 
169 /*
170  * parse_duration - parse duration with s/m/h/d suffix converting it to seconds
171  */
172 long parse_seconds_duration(char *val)
173 {
174 	char *end;
175 	long t;
176 
177 	t = strtol(val, &end, 10);
178 
179 	if (end) {
180 		switch (*end) {
181 		case 's':
182 		case 'S':
183 			break;
184 		case 'm':
185 		case 'M':
186 			t *= 60;
187 			break;
188 		case 'h':
189 		case 'H':
190 			t *= 60 * 60;
191 			break;
192 
193 		case 'd':
194 		case 'D':
195 			t *= 24 * 60 * 60;
196 			break;
197 		}
198 	}
199 
200 	return t;
201 }
202 
203 /*
204  * parse_ns_duration - parse duration with ns/us/ms/s converting it to nanoseconds
205  */
206 long parse_ns_duration(char *val)
207 {
208 	char *end;
209 	long t;
210 
211 	t = strtol(val, &end, 10);
212 
213 	if (end) {
214 		if (!strncmp(end, "ns", 2)) {
215 			return t;
216 		} else if (!strncmp(end, "us", 2)) {
217 			t *= 1000;
218 			return t;
219 		} else if (!strncmp(end, "ms", 2)) {
220 			t *= 1000 * 1000;
221 			return t;
222 		} else if (!strncmp(end, "s", 1)) {
223 			t *= 1000 * 1000 * 1000;
224 			return t;
225 		}
226 		return -1;
227 	}
228 
229 	return t;
230 }
231 
232 /*
233  * This is a set of helper functions to use SCHED_DEADLINE.
234  */
235 #ifndef __NR_sched_setattr
236 # ifdef __x86_64__
237 #  define __NR_sched_setattr	314
238 # elif __i386__
239 #  define __NR_sched_setattr	351
240 # elif __arm__
241 #  define __NR_sched_setattr	380
242 # elif __aarch64__ || __riscv
243 #  define __NR_sched_setattr	274
244 # elif __powerpc__
245 #  define __NR_sched_setattr	355
246 # elif __s390x__
247 #  define __NR_sched_setattr	345
248 # elif __loongarch__
249 #  define __NR_sched_setattr	274
250 # endif
251 #endif
252 
253 #define SCHED_DEADLINE		6
254 
255 static inline int syscall_sched_setattr(pid_t pid, const struct sched_attr *attr,
256 				unsigned int flags) {
257 	return syscall(__NR_sched_setattr, pid, attr, flags);
258 }
259 
260 int __set_sched_attr(int pid, struct sched_attr *attr)
261 {
262 	int flags = 0;
263 	int retval;
264 
265 	retval = syscall_sched_setattr(pid, attr, flags);
266 	if (retval < 0) {
267 		err_msg("Failed to set sched attributes to the pid %d: %s\n",
268 			pid, strerror(errno));
269 		return 1;
270 	}
271 
272 	return 0;
273 }
274 
275 /*
276  * procfs_is_workload_pid - check if a procfs entry contains a comm_prefix* comm
277  *
278  * Check if the procfs entry is a directory of a process, and then check if the
279  * process has a comm with the prefix set in char *comm_prefix. As the
280  * current users of this function only check for kernel threads, there is no
281  * need to check for the threads for the process.
282  *
283  * Return: True if the proc_entry contains a comm file with comm_prefix*.
284  * Otherwise returns false.
285  */
286 static int procfs_is_workload_pid(const char *comm_prefix, struct dirent *proc_entry)
287 {
288 	char buffer[MAX_PATH];
289 	int comm_fd, retval;
290 	char *t_name;
291 
292 	if (proc_entry->d_type != DT_DIR)
293 		return 0;
294 
295 	if (*proc_entry->d_name == '.')
296 		return 0;
297 
298 	/* check if the string is a pid */
299 	for (t_name = proc_entry->d_name; t_name; t_name++) {
300 		if (!isdigit(*t_name))
301 			break;
302 	}
303 
304 	if (*t_name != '\0')
305 		return 0;
306 
307 	snprintf(buffer, MAX_PATH, "/proc/%s/comm", proc_entry->d_name);
308 	comm_fd = open(buffer, O_RDONLY);
309 	if (comm_fd < 0)
310 		return 0;
311 
312 	memset(buffer, 0, MAX_PATH);
313 	retval = read(comm_fd, buffer, MAX_PATH);
314 
315 	close(comm_fd);
316 
317 	if (retval <= 0)
318 		return 0;
319 
320 	buffer[MAX_PATH-1] = '\0';
321 	retval = strncmp(comm_prefix, buffer, strlen(comm_prefix));
322 	if (retval)
323 		return 0;
324 
325 	/* comm already have \n */
326 	debug_msg("Found workload pid:%s comm:%s", proc_entry->d_name, buffer);
327 
328 	return 1;
329 }
330 
331 /*
332  * set_comm_sched_attr - set sched params to threads starting with char *comm_prefix
333  *
334  * This function uses procfs to list the currently running threads and then set the
335  * sched_attr *attr to the threads that start with char *comm_prefix. It is
336  * mainly used to set the priority to the kernel threads created by the
337  * tracers.
338  */
339 int set_comm_sched_attr(const char *comm_prefix, struct sched_attr *attr)
340 {
341 	struct dirent *proc_entry;
342 	DIR *procfs;
343 	int retval;
344 	int pid;
345 
346 	if (strlen(comm_prefix) >= MAX_PATH) {
347 		err_msg("Command prefix is too long: %d < strlen(%s)\n",
348 			MAX_PATH, comm_prefix);
349 		return 1;
350 	}
351 
352 	procfs = opendir("/proc");
353 	if (!procfs) {
354 		err_msg("Could not open procfs\n");
355 		return 1;
356 	}
357 
358 	while ((proc_entry = readdir(procfs))) {
359 
360 		retval = procfs_is_workload_pid(comm_prefix, proc_entry);
361 		if (!retval)
362 			continue;
363 
364 		if (strtoi(proc_entry->d_name, &pid)) {
365 			err_msg("'%s' is not a valid pid", proc_entry->d_name);
366 			goto out_err;
367 		}
368 		/* procfs_is_workload_pid confirmed it is a pid */
369 		retval = __set_sched_attr(pid, attr);
370 		if (retval) {
371 			err_msg("Error setting sched attributes for pid:%s\n", proc_entry->d_name);
372 			goto out_err;
373 		}
374 
375 		debug_msg("Set sched attributes for pid:%s\n", proc_entry->d_name);
376 	}
377 	return 0;
378 
379 out_err:
380 	closedir(procfs);
381 	return 1;
382 }
383 
384 #define INVALID_VAL	(~0L)
385 static long get_long_ns_after_colon(char *start)
386 {
387 	long val = INVALID_VAL;
388 
389 	/* find the ":" */
390 	start = strstr(start, ":");
391 	if (!start)
392 		return -1;
393 
394 	/* skip ":" */
395 	start++;
396 	val = parse_ns_duration(start);
397 
398 	return val;
399 }
400 
401 static long get_long_after_colon(char *start)
402 {
403 	long val = INVALID_VAL;
404 
405 	/* find the ":" */
406 	start = strstr(start, ":");
407 	if (!start)
408 		return -1;
409 
410 	/* skip ":" */
411 	start++;
412 	val = get_llong_from_str(start);
413 
414 	return val;
415 }
416 
417 /*
418  * parse priority in the format:
419  * SCHED_OTHER:
420  *		o:<prio>
421  *		O:<prio>
422  * SCHED_RR:
423  *		r:<prio>
424  *		R:<prio>
425  * SCHED_FIFO:
426  *		f:<prio>
427  *		F:<prio>
428  * SCHED_DEADLINE:
429  *		d:runtime:period
430  *		D:runtime:period
431  */
432 int parse_prio(char *arg, struct sched_attr *sched_param)
433 {
434 	long prio;
435 	long runtime;
436 	long period;
437 
438 	memset(sched_param, 0, sizeof(*sched_param));
439 	sched_param->size = sizeof(*sched_param);
440 
441 	switch (arg[0]) {
442 	case 'd':
443 	case 'D':
444 		/* d:runtime:period */
445 		if (strlen(arg) < 4)
446 			return -1;
447 
448 		runtime = get_long_ns_after_colon(arg);
449 		if (runtime == INVALID_VAL)
450 			return -1;
451 
452 		period = get_long_ns_after_colon(&arg[2]);
453 		if (period == INVALID_VAL)
454 			return -1;
455 
456 		if (runtime > period)
457 			return -1;
458 
459 		sched_param->sched_policy   = SCHED_DEADLINE;
460 		sched_param->sched_runtime  = runtime;
461 		sched_param->sched_deadline = period;
462 		sched_param->sched_period   = period;
463 		break;
464 	case 'f':
465 	case 'F':
466 		/* f:prio */
467 		prio = get_long_after_colon(arg);
468 		if (prio == INVALID_VAL)
469 			return -1;
470 
471 		if (prio < sched_get_priority_min(SCHED_FIFO))
472 			return -1;
473 		if (prio > sched_get_priority_max(SCHED_FIFO))
474 			return -1;
475 
476 		sched_param->sched_policy   = SCHED_FIFO;
477 		sched_param->sched_priority = prio;
478 		break;
479 	case 'r':
480 	case 'R':
481 		/* r:prio */
482 		prio = get_long_after_colon(arg);
483 		if (prio == INVALID_VAL)
484 			return -1;
485 
486 		if (prio < sched_get_priority_min(SCHED_RR))
487 			return -1;
488 		if (prio > sched_get_priority_max(SCHED_RR))
489 			return -1;
490 
491 		sched_param->sched_policy   = SCHED_RR;
492 		sched_param->sched_priority = prio;
493 		break;
494 	case 'o':
495 	case 'O':
496 		/* o:prio */
497 		prio = get_long_after_colon(arg);
498 		if (prio == INVALID_VAL)
499 			return -1;
500 
501 		if (prio < MIN_NICE)
502 			return -1;
503 		if (prio > MAX_NICE)
504 			return -1;
505 
506 		sched_param->sched_policy   = SCHED_OTHER;
507 		sched_param->sched_nice = prio;
508 		break;
509 	default:
510 		return -1;
511 	}
512 	return 0;
513 }
514 
515 /*
516  * set_cpu_dma_latency - set the /dev/cpu_dma_latecy
517  *
518  * This is used to reduce the exit from idle latency. The value
519  * will be reset once the file descriptor of /dev/cpu_dma_latecy
520  * is closed.
521  *
522  * Return: the /dev/cpu_dma_latecy file descriptor
523  */
524 int set_cpu_dma_latency(int32_t latency)
525 {
526 	int retval;
527 	int fd;
528 
529 	fd = open("/dev/cpu_dma_latency", O_RDWR);
530 	if (fd < 0) {
531 		err_msg("Error opening /dev/cpu_dma_latency\n");
532 		return -1;
533 	}
534 
535 	retval = write(fd, &latency, 4);
536 	if (retval < 1) {
537 		err_msg("Error setting /dev/cpu_dma_latency\n");
538 		close(fd);
539 		return -1;
540 	}
541 
542 	debug_msg("Set /dev/cpu_dma_latency to %d\n", latency);
543 
544 	return fd;
545 }
546 
547 #ifdef HAVE_LIBCPUPOWER_SUPPORT
548 static unsigned int **saved_cpu_idle_disable_state;
549 static size_t saved_cpu_idle_disable_state_alloc_ctr;
550 
551 /*
552  * save_cpu_idle_state_disable - save disable for all idle states of a cpu
553  *
554  * Saves the current disable of all idle states of a cpu, to be subsequently
555  * restored via restore_cpu_idle_disable_state.
556  *
557  * Return: idle state count on success, negative on error
558  */
559 int save_cpu_idle_disable_state(unsigned int cpu)
560 {
561 	unsigned int nr_states;
562 	unsigned int state;
563 	int disabled;
564 	int nr_cpus;
565 
566 	nr_states = cpuidle_state_count(cpu);
567 
568 	if (nr_states == 0)
569 		return 0;
570 
571 	if (saved_cpu_idle_disable_state == NULL) {
572 		nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
573 		saved_cpu_idle_disable_state = calloc(nr_cpus, sizeof(unsigned int *));
574 		if (!saved_cpu_idle_disable_state)
575 			return -1;
576 	}
577 
578 	saved_cpu_idle_disable_state[cpu] = calloc(nr_states, sizeof(unsigned int));
579 	if (!saved_cpu_idle_disable_state[cpu])
580 		return -1;
581 	saved_cpu_idle_disable_state_alloc_ctr++;
582 
583 	for (state = 0; state < nr_states; state++) {
584 		disabled = cpuidle_is_state_disabled(cpu, state);
585 		if (disabled < 0)
586 			return disabled;
587 		saved_cpu_idle_disable_state[cpu][state] = disabled;
588 	}
589 
590 	return nr_states;
591 }
592 
593 /*
594  * restore_cpu_idle_disable_state - restore disable for all idle states of a cpu
595  *
596  * Restores the current disable state of all idle states of a cpu that was
597  * previously saved by save_cpu_idle_disable_state.
598  *
599  * Return: idle state count on success, negative on error
600  */
601 int restore_cpu_idle_disable_state(unsigned int cpu)
602 {
603 	unsigned int nr_states;
604 	unsigned int state;
605 	int disabled;
606 	int result;
607 
608 	nr_states = cpuidle_state_count(cpu);
609 
610 	if (nr_states == 0)
611 		return 0;
612 
613 	if (!saved_cpu_idle_disable_state)
614 		return -1;
615 
616 	for (state = 0; state < nr_states; state++) {
617 		if (!saved_cpu_idle_disable_state[cpu])
618 			return -1;
619 		disabled = saved_cpu_idle_disable_state[cpu][state];
620 		result = cpuidle_state_disable(cpu, state, disabled);
621 		if (result < 0)
622 			return result;
623 	}
624 
625 	free(saved_cpu_idle_disable_state[cpu]);
626 	saved_cpu_idle_disable_state[cpu] = NULL;
627 	saved_cpu_idle_disable_state_alloc_ctr--;
628 	if (saved_cpu_idle_disable_state_alloc_ctr == 0) {
629 		free(saved_cpu_idle_disable_state);
630 		saved_cpu_idle_disable_state = NULL;
631 	}
632 
633 	return nr_states;
634 }
635 
636 /*
637  * free_cpu_idle_disable_states - free saved idle state disable for all cpus
638  *
639  * Frees the memory used for storing cpu idle state disable for all cpus
640  * and states.
641  *
642  * Normally, the memory is freed automatically in
643  * restore_cpu_idle_disable_state; this is mostly for cleaning up after an
644  * error.
645  */
646 void free_cpu_idle_disable_states(void)
647 {
648 	int cpu;
649 	int nr_cpus;
650 
651 	if (!saved_cpu_idle_disable_state)
652 		return;
653 
654 	nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
655 
656 	for (cpu = 0; cpu < nr_cpus; cpu++) {
657 		free(saved_cpu_idle_disable_state[cpu]);
658 		saved_cpu_idle_disable_state[cpu] = NULL;
659 	}
660 
661 	free(saved_cpu_idle_disable_state);
662 	saved_cpu_idle_disable_state = NULL;
663 }
664 
665 /*
666  * set_deepest_cpu_idle_state - limit idle state of cpu
667  *
668  * Disables all idle states deeper than the one given in
669  * deepest_state (assuming states with higher number are deeper).
670  *
671  * This is used to reduce the exit from idle latency. Unlike
672  * set_cpu_dma_latency, it can disable idle states per cpu.
673  *
674  * Return: idle state count on success, negative on error
675  */
676 int set_deepest_cpu_idle_state(unsigned int cpu, unsigned int deepest_state)
677 {
678 	unsigned int nr_states;
679 	unsigned int state;
680 	int result;
681 
682 	nr_states = cpuidle_state_count(cpu);
683 
684 	for (state = deepest_state + 1; state < nr_states; state++) {
685 		result = cpuidle_state_disable(cpu, state, 1);
686 		if (result < 0)
687 			return result;
688 	}
689 
690 	return nr_states;
691 }
692 #endif /* HAVE_LIBCPUPOWER_SUPPORT */
693 
694 #define _STR(x) #x
695 #define STR(x) _STR(x)
696 
697 /*
698  * find_mount - find a the mount point of a given fs
699  *
700  * Returns 0 if mount is not found, otherwise return 1 and fill mp
701  * with the mount point.
702  */
703 static const int find_mount(const char *fs, char *mp, int sizeof_mp)
704 {
705 	char mount_point[MAX_PATH+1];
706 	char type[100];
707 	int found = 0;
708 	FILE *fp;
709 
710 	fp = fopen("/proc/mounts", "r");
711 	if (!fp)
712 		return 0;
713 
714 	while (fscanf(fp, "%*s %" STR(MAX_PATH) "s %99s %*s %*d %*d\n",	mount_point, type) == 2) {
715 		if (strcmp(type, fs) == 0) {
716 			found = 1;
717 			break;
718 		}
719 	}
720 	fclose(fp);
721 
722 	if (!found)
723 		return 0;
724 
725 	memset(mp, 0, sizeof_mp);
726 	strncpy(mp, mount_point, sizeof_mp - 1);
727 
728 	debug_msg("Fs %s found at %s\n", fs, mp);
729 	return 1;
730 }
731 
732 /*
733  * get_self_cgroup - get the current thread cgroup path
734  *
735  * Parse /proc/$$/cgroup file to get the thread's cgroup. As an example of line to parse:
736  *
737  * 0::/user.slice/user-0.slice/session-3.scope'\n'
738  *
739  * This function is interested in the content after the second : and before the '\n'.
740  *
741  * Returns 1 if a string was found, 0 otherwise.
742  */
743 static int get_self_cgroup(char *self_cg, int sizeof_self_cg)
744 {
745 	char path[MAX_PATH], *start;
746 	int fd, retval;
747 
748 	snprintf(path, MAX_PATH, "/proc/%d/cgroup", getpid());
749 
750 	fd = open(path, O_RDONLY);
751 	if (fd < 0)
752 		return 0;
753 
754 	memset(path, 0, sizeof(path));
755 	retval = read(fd, path, MAX_PATH);
756 
757 	close(fd);
758 
759 	if (retval <= 0)
760 		return 0;
761 
762 	path[MAX_PATH-1] = '\0';
763 	start = path;
764 
765 	start = strstr(start, ":");
766 	if (!start)
767 		return 0;
768 
769 	/* skip ":" */
770 	start++;
771 
772 	start = strstr(start, ":");
773 	if (!start)
774 		return 0;
775 
776 	/* skip ":" */
777 	start++;
778 
779 	if (strlen(start) >= sizeof_self_cg)
780 		return 0;
781 
782 	snprintf(self_cg, sizeof_self_cg, "%s", start);
783 
784 	/* Swap '\n' with '\0' */
785 	start = strstr(self_cg, "\n");
786 
787 	/* there must be '\n' */
788 	if (!start)
789 		return 0;
790 
791 	/* ok, it found a string after the second : and before the \n */
792 	*start = '\0';
793 
794 	return 1;
795 }
796 
797 /*
798  * open_cgroup_procs - Open the cgroup.procs file for the given cgroup
799  *
800  * If cgroup argument is not NULL, the cgroup.procs file for that cgroup
801  * will be opened. Otherwise, the cgroup of the calling, i.e., rtla, thread
802  * will be used.
803  *
804  * Supports cgroup v2.
805  *
806  * Returns the file descriptor on success, -1 otherwise.
807  */
808 static int open_cgroup_procs(const char *cgroup)
809 {
810 	char cgroup_path[MAX_PATH - strlen("/cgroup.procs")];
811 	char cgroup_procs[MAX_PATH];
812 	int retval;
813 	int cg_fd;
814 
815 	retval = find_mount("cgroup2", cgroup_path, sizeof(cgroup_path));
816 	if (!retval) {
817 		err_msg("Did not find cgroupv2 mount point\n");
818 		return -1;
819 	}
820 
821 	if (!cgroup) {
822 		retval = get_self_cgroup(&cgroup_path[strlen(cgroup_path)],
823 				sizeof(cgroup_path) - strlen(cgroup_path));
824 		if (!retval) {
825 			err_msg("Did not find self cgroup\n");
826 			return -1;
827 		}
828 	} else {
829 		snprintf(&cgroup_path[strlen(cgroup_path)],
830 				sizeof(cgroup_path) - strlen(cgroup_path), "%s/", cgroup);
831 	}
832 
833 	snprintf(cgroup_procs, MAX_PATH, "%s/cgroup.procs", cgroup_path);
834 
835 	debug_msg("Using cgroup path at: %s\n", cgroup_procs);
836 
837 	cg_fd = open(cgroup_procs, O_RDWR);
838 	if (cg_fd < 0)
839 		return -1;
840 
841 	return cg_fd;
842 }
843 
844 /*
845  * set_pid_cgroup - Set cgroup to pid_t pid
846  *
847  * If cgroup argument is not NULL, the threads will move to the given cgroup.
848  * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used.
849  *
850  * Supports cgroup v2.
851  *
852  * Returns 1 on success, 0 otherwise.
853  */
854 int set_pid_cgroup(pid_t pid, const char *cgroup)
855 {
856 	char pid_str[24];
857 	int retval;
858 	int cg_fd;
859 
860 	cg_fd = open_cgroup_procs(cgroup);
861 	if (cg_fd < 0)
862 		return 0;
863 
864 	snprintf(pid_str, sizeof(pid_str), "%d\n", pid);
865 
866 	retval = write(cg_fd, pid_str, strlen(pid_str));
867 	if (retval < 0)
868 		err_msg("Error setting cgroup attributes for pid:%s - %s\n",
869 				pid_str, strerror(errno));
870 	else
871 		debug_msg("Set cgroup attributes for pid:%s\n", pid_str);
872 
873 	close(cg_fd);
874 
875 	return (retval >= 0);
876 }
877 
878 /**
879  * set_comm_cgroup - Set cgroup to threads starting with char *comm_prefix
880  *
881  * If cgroup argument is not NULL, the threads will move to the given cgroup.
882  * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used.
883  *
884  * Supports cgroup v2.
885  *
886  * Returns 1 on success, 0 otherwise.
887  */
888 int set_comm_cgroup(const char *comm_prefix, const char *cgroup)
889 {
890 	struct dirent *proc_entry;
891 	DIR *procfs;
892 	int retval;
893 	int cg_fd;
894 
895 	if (strlen(comm_prefix) >= MAX_PATH) {
896 		err_msg("Command prefix is too long: %d < strlen(%s)\n",
897 			MAX_PATH, comm_prefix);
898 		return 0;
899 	}
900 
901 	cg_fd = open_cgroup_procs(cgroup);
902 	if (cg_fd < 0)
903 		return 0;
904 
905 	procfs = opendir("/proc");
906 	if (!procfs) {
907 		err_msg("Could not open procfs\n");
908 		goto out_cg;
909 	}
910 
911 	while ((proc_entry = readdir(procfs))) {
912 
913 		retval = procfs_is_workload_pid(comm_prefix, proc_entry);
914 		if (!retval)
915 			continue;
916 
917 		retval = write(cg_fd, proc_entry->d_name, strlen(proc_entry->d_name));
918 		if (retval < 0) {
919 			err_msg("Error setting cgroup attributes for pid:%s - %s\n",
920 				proc_entry->d_name, strerror(errno));
921 			goto out_procfs;
922 		}
923 
924 		debug_msg("Set cgroup attributes for pid:%s\n", proc_entry->d_name);
925 	}
926 
927 	closedir(procfs);
928 	close(cg_fd);
929 	return 1;
930 
931 out_procfs:
932 	closedir(procfs);
933 out_cg:
934 	close(cg_fd);
935 	return 0;
936 }
937 
938 /**
939  * auto_house_keeping - Automatically move rtla out of measurement threads
940  *
941  * Try to move rtla away from the tracer, if possible.
942  *
943  * Returns 1 on success, 0 otherwise.
944  */
945 int auto_house_keeping(cpu_set_t *monitored_cpus)
946 {
947 	cpu_set_t rtla_cpus, house_keeping_cpus;
948 	int retval;
949 
950 	/* first get the CPUs in which rtla can actually run. */
951 	retval = sched_getaffinity(getpid(), sizeof(rtla_cpus), &rtla_cpus);
952 	if (retval == -1) {
953 		debug_msg("Could not get rtla affinity, rtla might run with the threads!\n");
954 		return 0;
955 	}
956 
957 	/* then check if the existing setup is already good. */
958 	CPU_AND(&house_keeping_cpus, &rtla_cpus, monitored_cpus);
959 	if (!CPU_COUNT(&house_keeping_cpus)) {
960 		debug_msg("rtla and the monitored CPUs do not share CPUs.");
961 		debug_msg("Skipping auto house-keeping\n");
962 		return 1;
963 	}
964 
965 	/* remove the intersection */
966 	CPU_XOR(&house_keeping_cpus, &rtla_cpus, monitored_cpus);
967 
968 	/* get only those that rtla can run */
969 	CPU_AND(&house_keeping_cpus, &house_keeping_cpus, &rtla_cpus);
970 
971 	/* is there any cpu left? */
972 	if (!CPU_COUNT(&house_keeping_cpus)) {
973 		debug_msg("Could not find any CPU for auto house-keeping\n");
974 		return 0;
975 	}
976 
977 	retval = sched_setaffinity(getpid(), sizeof(house_keeping_cpus), &house_keeping_cpus);
978 	if (retval == -1) {
979 		debug_msg("Could not set affinity for auto house-keeping\n");
980 		return 0;
981 	}
982 
983 	debug_msg("rtla automatically moved to an auto house-keeping cpu set\n");
984 
985 	return 1;
986 }
987 
988 /**
989  * parse_optional_arg - Parse optional argument value
990  *
991  * Parse optional argument value, which can be in the form of:
992  * -sarg, -s/--long=arg, -s/--long arg
993  *
994  * Returns arg value if found, NULL otherwise.
995  */
996 char *parse_optional_arg(int argc, char **argv)
997 {
998 	if (optarg) {
999 		if (optarg[0] == '=') {
1000 			/* skip the = */
1001 			return &optarg[1];
1002 		} else {
1003 			return optarg;
1004 		}
1005 	/* parse argument of form -s [arg] and --long [arg]*/
1006 	} else if (optind < argc && argv[optind][0] != '-') {
1007 		/* consume optind */
1008 		return argv[optind++];
1009 	} else {
1010 		return NULL;
1011 	}
1012 }
1013 
1014 /*
1015  * strtoi - convert string to integer with error checking
1016  *
1017  * Returns 0 on success, -1 if conversion fails or result is out of int range.
1018  */
1019 int strtoi(const char *s, int *res)
1020 {
1021 	char *end_ptr;
1022 	long lres;
1023 
1024 	if (!*s)
1025 		return -1;
1026 
1027 	errno = 0;
1028 	lres = strtol(s, &end_ptr, 0);
1029 	if (errno || *end_ptr || lres > INT_MAX || lres < INT_MIN)
1030 		return -1;
1031 
1032 	*res = (int) lres;
1033 	return 0;
1034 }
1035