xref: /linux/tools/tracing/rtla/src/utils.c (revision ef59e454156eb068ba3f2d9e14b397fd7953f65a)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
4  */
5 
6 #define _GNU_SOURCE
7 #ifdef HAVE_LIBCPUPOWER_SUPPORT
8 #include <cpuidle.h>
9 #endif /* HAVE_LIBCPUPOWER_SUPPORT */
10 #include <dirent.h>
11 #include <stdarg.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <unistd.h>
15 #include <ctype.h>
16 #include <errno.h>
17 #include <fcntl.h>
18 #include <sched.h>
19 #include <stdio.h>
20 #include <limits.h>
21 
22 #include "utils.h"
23 
24 #define MAX_MSG_LENGTH	1024
25 int config_debug;
26 
27 /*
28  * err_msg - print an error message to the stderr
29  */
30 void err_msg(const char *fmt, ...)
31 {
32 	char message[MAX_MSG_LENGTH];
33 	va_list ap;
34 
35 	va_start(ap, fmt);
36 	vsnprintf(message, sizeof(message), fmt, ap);
37 	va_end(ap);
38 
39 	fprintf(stderr, "%s", message);
40 }
41 
42 /*
43  * debug_msg - print a debug message to stderr if debug is set
44  */
45 void debug_msg(const char *fmt, ...)
46 {
47 	char message[MAX_MSG_LENGTH];
48 	va_list ap;
49 
50 	if (!config_debug)
51 		return;
52 
53 	va_start(ap, fmt);
54 	vsnprintf(message, sizeof(message), fmt, ap);
55 	va_end(ap);
56 
57 	fprintf(stderr, "%s", message);
58 }
59 
60 /*
61  * fatal - print an error message and EOL to stderr and exit with ERROR
62  */
63 void fatal(const char *fmt, ...)
64 {
65 	va_list ap;
66 
67 	va_start(ap, fmt);
68 	vfprintf(stderr, fmt, ap);
69 	va_end(ap);
70 	fprintf(stderr, "\n");
71 
72 	exit(ERROR);
73 }
74 
75 /*
76  * get_llong_from_str - get a long long int from a string
77  */
78 long long get_llong_from_str(char *start)
79 {
80 	long long value;
81 	char *end;
82 
83 	errno = 0;
84 	value = strtoll(start, &end, 10);
85 	if (errno || start == end)
86 		return -1;
87 
88 	return value;
89 }
90 
91 /*
92  * get_duration - fill output with a human readable duration since start_time
93  */
94 void get_duration(time_t start_time, char *output, int output_size)
95 {
96 	time_t now = time(NULL);
97 	struct tm *tm_info;
98 	time_t duration;
99 
100 	duration = difftime(now, start_time);
101 	tm_info = gmtime(&duration);
102 
103 	snprintf(output, output_size, "%3d %02d:%02d:%02d",
104 			tm_info->tm_yday,
105 			tm_info->tm_hour,
106 			tm_info->tm_min,
107 			tm_info->tm_sec);
108 }
109 
110 /*
111  * parse_cpu_set - parse a cpu_list filling cpu_set_t argument
112  *
113  * Receives a cpu list, like 1-3,5 (cpus 1, 2, 3, 5), and then set
114  * filling cpu_set_t argument.
115  *
116  * Returns 0 on success, 1 otherwise.
117  */
118 int parse_cpu_set(char *cpu_list, cpu_set_t *set)
119 {
120 	const char *p;
121 	int end_cpu;
122 	int nr_cpus;
123 	int cpu;
124 	int i;
125 
126 	CPU_ZERO(set);
127 
128 	nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
129 
130 	for (p = cpu_list; *p; ) {
131 		cpu = atoi(p);
132 		if (cpu < 0 || (!cpu && *p != '0') || cpu >= nr_cpus)
133 			goto err;
134 
135 		while (isdigit(*p))
136 			p++;
137 		if (*p == '-') {
138 			p++;
139 			end_cpu = atoi(p);
140 			if (end_cpu < cpu || (!end_cpu && *p != '0') || end_cpu >= nr_cpus)
141 				goto err;
142 			while (isdigit(*p))
143 				p++;
144 		} else
145 			end_cpu = cpu;
146 
147 		if (cpu == end_cpu) {
148 			debug_msg("cpu_set: adding cpu %d\n", cpu);
149 			CPU_SET(cpu, set);
150 		} else {
151 			for (i = cpu; i <= end_cpu; i++) {
152 				debug_msg("cpu_set: adding cpu %d\n", i);
153 				CPU_SET(i, set);
154 			}
155 		}
156 
157 		if (*p == ',')
158 			p++;
159 	}
160 
161 	return 0;
162 err:
163 	debug_msg("Error parsing the cpu set %s\n", cpu_list);
164 	return 1;
165 }
166 
167 /*
168  * parse_stack_format - parse the stack format
169  *
170  * Return: the stack format on success, -1 otherwise.
171  */
172 int parse_stack_format(char *arg)
173 {
174 	if (!strcmp(arg, "truncate"))
175 		return STACK_FORMAT_TRUNCATE;
176 	if (!strcmp(arg, "skip"))
177 		return STACK_FORMAT_SKIP;
178 	if (!strcmp(arg, "full"))
179 		return STACK_FORMAT_FULL;
180 
181 	debug_msg("Error parsing the stack format %s\n", arg);
182 	return -1;
183 }
184 
185 /*
186  * parse_duration - parse duration with s/m/h/d suffix converting it to seconds
187  */
188 long parse_seconds_duration(char *val)
189 {
190 	char *end;
191 	long t;
192 
193 	t = strtol(val, &end, 10);
194 
195 	if (end) {
196 		switch (*end) {
197 		case 's':
198 		case 'S':
199 			break;
200 		case 'm':
201 		case 'M':
202 			t *= 60;
203 			break;
204 		case 'h':
205 		case 'H':
206 			t *= 60 * 60;
207 			break;
208 
209 		case 'd':
210 		case 'D':
211 			t *= 24 * 60 * 60;
212 			break;
213 		}
214 	}
215 
216 	return t;
217 }
218 
219 /*
220  * parse_ns_duration - parse duration with ns/us/ms/s converting it to nanoseconds
221  */
222 long parse_ns_duration(char *val)
223 {
224 	char *end;
225 	long t;
226 
227 	t = strtol(val, &end, 10);
228 
229 	if (end) {
230 		if (!strncmp(end, "ns", 2)) {
231 			return t;
232 		} else if (!strncmp(end, "us", 2)) {
233 			t *= 1000;
234 			return t;
235 		} else if (!strncmp(end, "ms", 2)) {
236 			t *= 1000 * 1000;
237 			return t;
238 		} else if (!strncmp(end, "s", 1)) {
239 			t *= 1000 * 1000 * 1000;
240 			return t;
241 		}
242 		return -1;
243 	}
244 
245 	return t;
246 }
247 
248 /*
249  * This is a set of helper functions to use SCHED_DEADLINE.
250  */
251 #ifndef __NR_sched_setattr
252 # ifdef __x86_64__
253 #  define __NR_sched_setattr	314
254 # elif __i386__
255 #  define __NR_sched_setattr	351
256 # elif __arm__
257 #  define __NR_sched_setattr	380
258 # elif __aarch64__ || __riscv
259 #  define __NR_sched_setattr	274
260 # elif __powerpc__
261 #  define __NR_sched_setattr	355
262 # elif __s390x__
263 #  define __NR_sched_setattr	345
264 # elif __loongarch__
265 #  define __NR_sched_setattr	274
266 # endif
267 #endif
268 
269 #define SCHED_DEADLINE		6
270 
271 static inline int syscall_sched_setattr(pid_t pid, const struct sched_attr *attr,
272 				unsigned int flags) {
273 	return syscall(__NR_sched_setattr, pid, attr, flags);
274 }
275 
276 int __set_sched_attr(int pid, struct sched_attr *attr)
277 {
278 	int flags = 0;
279 	int retval;
280 
281 	retval = syscall_sched_setattr(pid, attr, flags);
282 	if (retval < 0) {
283 		err_msg("Failed to set sched attributes to the pid %d: %s\n",
284 			pid, strerror(errno));
285 		return 1;
286 	}
287 
288 	return 0;
289 }
290 
291 /*
292  * procfs_is_workload_pid - check if a procfs entry contains a comm_prefix* comm
293  *
294  * Check if the procfs entry is a directory of a process, and then check if the
295  * process has a comm with the prefix set in char *comm_prefix. As the
296  * current users of this function only check for kernel threads, there is no
297  * need to check for the threads for the process.
298  *
299  * Return: True if the proc_entry contains a comm file with comm_prefix*.
300  * Otherwise returns false.
301  */
302 static int procfs_is_workload_pid(const char *comm_prefix, struct dirent *proc_entry)
303 {
304 	char buffer[MAX_PATH];
305 	int comm_fd, retval;
306 	char *t_name;
307 
308 	if (proc_entry->d_type != DT_DIR)
309 		return 0;
310 
311 	if (*proc_entry->d_name == '.')
312 		return 0;
313 
314 	/* check if the string is a pid */
315 	for (t_name = proc_entry->d_name; t_name; t_name++) {
316 		if (!isdigit(*t_name))
317 			break;
318 	}
319 
320 	if (*t_name != '\0')
321 		return 0;
322 
323 	snprintf(buffer, MAX_PATH, "/proc/%s/comm", proc_entry->d_name);
324 	comm_fd = open(buffer, O_RDONLY);
325 	if (comm_fd < 0)
326 		return 0;
327 
328 	memset(buffer, 0, MAX_PATH);
329 	retval = read(comm_fd, buffer, MAX_PATH);
330 
331 	close(comm_fd);
332 
333 	if (retval <= 0)
334 		return 0;
335 
336 	buffer[MAX_PATH-1] = '\0';
337 	retval = strncmp(comm_prefix, buffer, strlen(comm_prefix));
338 	if (retval)
339 		return 0;
340 
341 	/* comm already have \n */
342 	debug_msg("Found workload pid:%s comm:%s", proc_entry->d_name, buffer);
343 
344 	return 1;
345 }
346 
347 /*
348  * set_comm_sched_attr - set sched params to threads starting with char *comm_prefix
349  *
350  * This function uses procfs to list the currently running threads and then set the
351  * sched_attr *attr to the threads that start with char *comm_prefix. It is
352  * mainly used to set the priority to the kernel threads created by the
353  * tracers.
354  */
355 int set_comm_sched_attr(const char *comm_prefix, struct sched_attr *attr)
356 {
357 	struct dirent *proc_entry;
358 	DIR *procfs;
359 	int retval;
360 	int pid;
361 
362 	if (strlen(comm_prefix) >= MAX_PATH) {
363 		err_msg("Command prefix is too long: %d < strlen(%s)\n",
364 			MAX_PATH, comm_prefix);
365 		return 1;
366 	}
367 
368 	procfs = opendir("/proc");
369 	if (!procfs) {
370 		err_msg("Could not open procfs\n");
371 		return 1;
372 	}
373 
374 	while ((proc_entry = readdir(procfs))) {
375 
376 		retval = procfs_is_workload_pid(comm_prefix, proc_entry);
377 		if (!retval)
378 			continue;
379 
380 		if (strtoi(proc_entry->d_name, &pid)) {
381 			err_msg("'%s' is not a valid pid", proc_entry->d_name);
382 			goto out_err;
383 		}
384 		/* procfs_is_workload_pid confirmed it is a pid */
385 		retval = __set_sched_attr(pid, attr);
386 		if (retval) {
387 			err_msg("Error setting sched attributes for pid:%s\n", proc_entry->d_name);
388 			goto out_err;
389 		}
390 
391 		debug_msg("Set sched attributes for pid:%s\n", proc_entry->d_name);
392 	}
393 	return 0;
394 
395 out_err:
396 	closedir(procfs);
397 	return 1;
398 }
399 
400 #define INVALID_VAL	(~0L)
401 static long get_long_ns_after_colon(char *start)
402 {
403 	long val = INVALID_VAL;
404 
405 	/* find the ":" */
406 	start = strstr(start, ":");
407 	if (!start)
408 		return -1;
409 
410 	/* skip ":" */
411 	start++;
412 	val = parse_ns_duration(start);
413 
414 	return val;
415 }
416 
417 static long get_long_after_colon(char *start)
418 {
419 	long val = INVALID_VAL;
420 
421 	/* find the ":" */
422 	start = strstr(start, ":");
423 	if (!start)
424 		return -1;
425 
426 	/* skip ":" */
427 	start++;
428 	val = get_llong_from_str(start);
429 
430 	return val;
431 }
432 
433 /*
434  * parse priority in the format:
435  * SCHED_OTHER:
436  *		o:<prio>
437  *		O:<prio>
438  * SCHED_RR:
439  *		r:<prio>
440  *		R:<prio>
441  * SCHED_FIFO:
442  *		f:<prio>
443  *		F:<prio>
444  * SCHED_DEADLINE:
445  *		d:runtime:period
446  *		D:runtime:period
447  */
448 int parse_prio(char *arg, struct sched_attr *sched_param)
449 {
450 	long prio;
451 	long runtime;
452 	long period;
453 
454 	memset(sched_param, 0, sizeof(*sched_param));
455 	sched_param->size = sizeof(*sched_param);
456 
457 	switch (arg[0]) {
458 	case 'd':
459 	case 'D':
460 		/* d:runtime:period */
461 		if (strlen(arg) < 4)
462 			return -1;
463 
464 		runtime = get_long_ns_after_colon(arg);
465 		if (runtime == INVALID_VAL)
466 			return -1;
467 
468 		period = get_long_ns_after_colon(&arg[2]);
469 		if (period == INVALID_VAL)
470 			return -1;
471 
472 		if (runtime > period)
473 			return -1;
474 
475 		sched_param->sched_policy   = SCHED_DEADLINE;
476 		sched_param->sched_runtime  = runtime;
477 		sched_param->sched_deadline = period;
478 		sched_param->sched_period   = period;
479 		break;
480 	case 'f':
481 	case 'F':
482 		/* f:prio */
483 		prio = get_long_after_colon(arg);
484 		if (prio == INVALID_VAL)
485 			return -1;
486 
487 		if (prio < sched_get_priority_min(SCHED_FIFO))
488 			return -1;
489 		if (prio > sched_get_priority_max(SCHED_FIFO))
490 			return -1;
491 
492 		sched_param->sched_policy   = SCHED_FIFO;
493 		sched_param->sched_priority = prio;
494 		break;
495 	case 'r':
496 	case 'R':
497 		/* r:prio */
498 		prio = get_long_after_colon(arg);
499 		if (prio == INVALID_VAL)
500 			return -1;
501 
502 		if (prio < sched_get_priority_min(SCHED_RR))
503 			return -1;
504 		if (prio > sched_get_priority_max(SCHED_RR))
505 			return -1;
506 
507 		sched_param->sched_policy   = SCHED_RR;
508 		sched_param->sched_priority = prio;
509 		break;
510 	case 'o':
511 	case 'O':
512 		/* o:prio */
513 		prio = get_long_after_colon(arg);
514 		if (prio == INVALID_VAL)
515 			return -1;
516 
517 		if (prio < MIN_NICE)
518 			return -1;
519 		if (prio > MAX_NICE)
520 			return -1;
521 
522 		sched_param->sched_policy   = SCHED_OTHER;
523 		sched_param->sched_nice = prio;
524 		break;
525 	default:
526 		return -1;
527 	}
528 	return 0;
529 }
530 
531 /*
532  * set_cpu_dma_latency - set the /dev/cpu_dma_latecy
533  *
534  * This is used to reduce the exit from idle latency. The value
535  * will be reset once the file descriptor of /dev/cpu_dma_latecy
536  * is closed.
537  *
538  * Return: the /dev/cpu_dma_latecy file descriptor
539  */
540 int set_cpu_dma_latency(int32_t latency)
541 {
542 	int retval;
543 	int fd;
544 
545 	fd = open("/dev/cpu_dma_latency", O_RDWR);
546 	if (fd < 0) {
547 		err_msg("Error opening /dev/cpu_dma_latency\n");
548 		return -1;
549 	}
550 
551 	retval = write(fd, &latency, 4);
552 	if (retval < 1) {
553 		err_msg("Error setting /dev/cpu_dma_latency\n");
554 		close(fd);
555 		return -1;
556 	}
557 
558 	debug_msg("Set /dev/cpu_dma_latency to %d\n", latency);
559 
560 	return fd;
561 }
562 
563 #ifdef HAVE_LIBCPUPOWER_SUPPORT
564 static unsigned int **saved_cpu_idle_disable_state;
565 static size_t saved_cpu_idle_disable_state_alloc_ctr;
566 
567 /*
568  * save_cpu_idle_state_disable - save disable for all idle states of a cpu
569  *
570  * Saves the current disable of all idle states of a cpu, to be subsequently
571  * restored via restore_cpu_idle_disable_state.
572  *
573  * Return: idle state count on success, negative on error
574  */
575 int save_cpu_idle_disable_state(unsigned int cpu)
576 {
577 	unsigned int nr_states;
578 	unsigned int state;
579 	int disabled;
580 	int nr_cpus;
581 
582 	nr_states = cpuidle_state_count(cpu);
583 
584 	if (nr_states == 0)
585 		return 0;
586 
587 	if (saved_cpu_idle_disable_state == NULL) {
588 		nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
589 		saved_cpu_idle_disable_state = calloc(nr_cpus, sizeof(unsigned int *));
590 		if (!saved_cpu_idle_disable_state)
591 			return -1;
592 	}
593 
594 	saved_cpu_idle_disable_state[cpu] = calloc(nr_states, sizeof(unsigned int));
595 	if (!saved_cpu_idle_disable_state[cpu])
596 		return -1;
597 	saved_cpu_idle_disable_state_alloc_ctr++;
598 
599 	for (state = 0; state < nr_states; state++) {
600 		disabled = cpuidle_is_state_disabled(cpu, state);
601 		if (disabled < 0)
602 			return disabled;
603 		saved_cpu_idle_disable_state[cpu][state] = disabled;
604 	}
605 
606 	return nr_states;
607 }
608 
609 /*
610  * restore_cpu_idle_disable_state - restore disable for all idle states of a cpu
611  *
612  * Restores the current disable state of all idle states of a cpu that was
613  * previously saved by save_cpu_idle_disable_state.
614  *
615  * Return: idle state count on success, negative on error
616  */
617 int restore_cpu_idle_disable_state(unsigned int cpu)
618 {
619 	unsigned int nr_states;
620 	unsigned int state;
621 	int disabled;
622 	int result;
623 
624 	nr_states = cpuidle_state_count(cpu);
625 
626 	if (nr_states == 0)
627 		return 0;
628 
629 	if (!saved_cpu_idle_disable_state)
630 		return -1;
631 
632 	for (state = 0; state < nr_states; state++) {
633 		if (!saved_cpu_idle_disable_state[cpu])
634 			return -1;
635 		disabled = saved_cpu_idle_disable_state[cpu][state];
636 		result = cpuidle_state_disable(cpu, state, disabled);
637 		if (result < 0)
638 			return result;
639 	}
640 
641 	free(saved_cpu_idle_disable_state[cpu]);
642 	saved_cpu_idle_disable_state[cpu] = NULL;
643 	saved_cpu_idle_disable_state_alloc_ctr--;
644 	if (saved_cpu_idle_disable_state_alloc_ctr == 0) {
645 		free(saved_cpu_idle_disable_state);
646 		saved_cpu_idle_disable_state = NULL;
647 	}
648 
649 	return nr_states;
650 }
651 
652 /*
653  * free_cpu_idle_disable_states - free saved idle state disable for all cpus
654  *
655  * Frees the memory used for storing cpu idle state disable for all cpus
656  * and states.
657  *
658  * Normally, the memory is freed automatically in
659  * restore_cpu_idle_disable_state; this is mostly for cleaning up after an
660  * error.
661  */
662 void free_cpu_idle_disable_states(void)
663 {
664 	int cpu;
665 	int nr_cpus;
666 
667 	if (!saved_cpu_idle_disable_state)
668 		return;
669 
670 	nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
671 
672 	for (cpu = 0; cpu < nr_cpus; cpu++) {
673 		free(saved_cpu_idle_disable_state[cpu]);
674 		saved_cpu_idle_disable_state[cpu] = NULL;
675 	}
676 
677 	free(saved_cpu_idle_disable_state);
678 	saved_cpu_idle_disable_state = NULL;
679 }
680 
681 /*
682  * set_deepest_cpu_idle_state - limit idle state of cpu
683  *
684  * Disables all idle states deeper than the one given in
685  * deepest_state (assuming states with higher number are deeper).
686  *
687  * This is used to reduce the exit from idle latency. Unlike
688  * set_cpu_dma_latency, it can disable idle states per cpu.
689  *
690  * Return: idle state count on success, negative on error
691  */
692 int set_deepest_cpu_idle_state(unsigned int cpu, unsigned int deepest_state)
693 {
694 	unsigned int nr_states;
695 	unsigned int state;
696 	int result;
697 
698 	nr_states = cpuidle_state_count(cpu);
699 
700 	for (state = deepest_state + 1; state < nr_states; state++) {
701 		result = cpuidle_state_disable(cpu, state, 1);
702 		if (result < 0)
703 			return result;
704 	}
705 
706 	return nr_states;
707 }
708 #endif /* HAVE_LIBCPUPOWER_SUPPORT */
709 
710 #define _STR(x) #x
711 #define STR(x) _STR(x)
712 
713 /*
714  * find_mount - find a the mount point of a given fs
715  *
716  * Returns 0 if mount is not found, otherwise return 1 and fill mp
717  * with the mount point.
718  */
719 static const int find_mount(const char *fs, char *mp, int sizeof_mp)
720 {
721 	char mount_point[MAX_PATH+1];
722 	char type[100];
723 	int found = 0;
724 	FILE *fp;
725 
726 	fp = fopen("/proc/mounts", "r");
727 	if (!fp)
728 		return 0;
729 
730 	while (fscanf(fp, "%*s %" STR(MAX_PATH) "s %99s %*s %*d %*d\n",	mount_point, type) == 2) {
731 		if (strcmp(type, fs) == 0) {
732 			found = 1;
733 			break;
734 		}
735 	}
736 	fclose(fp);
737 
738 	if (!found)
739 		return 0;
740 
741 	memset(mp, 0, sizeof_mp);
742 	strncpy(mp, mount_point, sizeof_mp - 1);
743 
744 	debug_msg("Fs %s found at %s\n", fs, mp);
745 	return 1;
746 }
747 
748 /*
749  * get_self_cgroup - get the current thread cgroup path
750  *
751  * Parse /proc/$$/cgroup file to get the thread's cgroup. As an example of line to parse:
752  *
753  * 0::/user.slice/user-0.slice/session-3.scope'\n'
754  *
755  * This function is interested in the content after the second : and before the '\n'.
756  *
757  * Returns 1 if a string was found, 0 otherwise.
758  */
759 static int get_self_cgroup(char *self_cg, int sizeof_self_cg)
760 {
761 	char path[MAX_PATH], *start;
762 	int fd, retval;
763 
764 	snprintf(path, MAX_PATH, "/proc/%d/cgroup", getpid());
765 
766 	fd = open(path, O_RDONLY);
767 	if (fd < 0)
768 		return 0;
769 
770 	memset(path, 0, sizeof(path));
771 	retval = read(fd, path, MAX_PATH);
772 
773 	close(fd);
774 
775 	if (retval <= 0)
776 		return 0;
777 
778 	path[MAX_PATH-1] = '\0';
779 	start = path;
780 
781 	start = strstr(start, ":");
782 	if (!start)
783 		return 0;
784 
785 	/* skip ":" */
786 	start++;
787 
788 	start = strstr(start, ":");
789 	if (!start)
790 		return 0;
791 
792 	/* skip ":" */
793 	start++;
794 
795 	if (strlen(start) >= sizeof_self_cg)
796 		return 0;
797 
798 	snprintf(self_cg, sizeof_self_cg, "%s", start);
799 
800 	/* Swap '\n' with '\0' */
801 	start = strstr(self_cg, "\n");
802 
803 	/* there must be '\n' */
804 	if (!start)
805 		return 0;
806 
807 	/* ok, it found a string after the second : and before the \n */
808 	*start = '\0';
809 
810 	return 1;
811 }
812 
813 /*
814  * open_cgroup_procs - Open the cgroup.procs file for the given cgroup
815  *
816  * If cgroup argument is not NULL, the cgroup.procs file for that cgroup
817  * will be opened. Otherwise, the cgroup of the calling, i.e., rtla, thread
818  * will be used.
819  *
820  * Supports cgroup v2.
821  *
822  * Returns the file descriptor on success, -1 otherwise.
823  */
824 static int open_cgroup_procs(const char *cgroup)
825 {
826 	char cgroup_path[MAX_PATH - strlen("/cgroup.procs")];
827 	char cgroup_procs[MAX_PATH];
828 	int retval;
829 	int cg_fd;
830 
831 	retval = find_mount("cgroup2", cgroup_path, sizeof(cgroup_path));
832 	if (!retval) {
833 		err_msg("Did not find cgroupv2 mount point\n");
834 		return -1;
835 	}
836 
837 	if (!cgroup) {
838 		retval = get_self_cgroup(&cgroup_path[strlen(cgroup_path)],
839 				sizeof(cgroup_path) - strlen(cgroup_path));
840 		if (!retval) {
841 			err_msg("Did not find self cgroup\n");
842 			return -1;
843 		}
844 	} else {
845 		snprintf(&cgroup_path[strlen(cgroup_path)],
846 				sizeof(cgroup_path) - strlen(cgroup_path), "%s/", cgroup);
847 	}
848 
849 	snprintf(cgroup_procs, MAX_PATH, "%s/cgroup.procs", cgroup_path);
850 
851 	debug_msg("Using cgroup path at: %s\n", cgroup_procs);
852 
853 	cg_fd = open(cgroup_procs, O_RDWR);
854 	if (cg_fd < 0)
855 		return -1;
856 
857 	return cg_fd;
858 }
859 
860 /*
861  * set_pid_cgroup - Set cgroup to pid_t pid
862  *
863  * If cgroup argument is not NULL, the threads will move to the given cgroup.
864  * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used.
865  *
866  * Supports cgroup v2.
867  *
868  * Returns 1 on success, 0 otherwise.
869  */
870 int set_pid_cgroup(pid_t pid, const char *cgroup)
871 {
872 	char pid_str[24];
873 	int retval;
874 	int cg_fd;
875 
876 	cg_fd = open_cgroup_procs(cgroup);
877 	if (cg_fd < 0)
878 		return 0;
879 
880 	snprintf(pid_str, sizeof(pid_str), "%d\n", pid);
881 
882 	retval = write(cg_fd, pid_str, strlen(pid_str));
883 	if (retval < 0)
884 		err_msg("Error setting cgroup attributes for pid:%s - %s\n",
885 				pid_str, strerror(errno));
886 	else
887 		debug_msg("Set cgroup attributes for pid:%s\n", pid_str);
888 
889 	close(cg_fd);
890 
891 	return (retval >= 0);
892 }
893 
894 /**
895  * set_comm_cgroup - Set cgroup to threads starting with char *comm_prefix
896  *
897  * If cgroup argument is not NULL, the threads will move to the given cgroup.
898  * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used.
899  *
900  * Supports cgroup v2.
901  *
902  * Returns 1 on success, 0 otherwise.
903  */
904 int set_comm_cgroup(const char *comm_prefix, const char *cgroup)
905 {
906 	struct dirent *proc_entry;
907 	DIR *procfs;
908 	int retval;
909 	int cg_fd;
910 
911 	if (strlen(comm_prefix) >= MAX_PATH) {
912 		err_msg("Command prefix is too long: %d < strlen(%s)\n",
913 			MAX_PATH, comm_prefix);
914 		return 0;
915 	}
916 
917 	cg_fd = open_cgroup_procs(cgroup);
918 	if (cg_fd < 0)
919 		return 0;
920 
921 	procfs = opendir("/proc");
922 	if (!procfs) {
923 		err_msg("Could not open procfs\n");
924 		goto out_cg;
925 	}
926 
927 	while ((proc_entry = readdir(procfs))) {
928 
929 		retval = procfs_is_workload_pid(comm_prefix, proc_entry);
930 		if (!retval)
931 			continue;
932 
933 		retval = write(cg_fd, proc_entry->d_name, strlen(proc_entry->d_name));
934 		if (retval < 0) {
935 			err_msg("Error setting cgroup attributes for pid:%s - %s\n",
936 				proc_entry->d_name, strerror(errno));
937 			goto out_procfs;
938 		}
939 
940 		debug_msg("Set cgroup attributes for pid:%s\n", proc_entry->d_name);
941 	}
942 
943 	closedir(procfs);
944 	close(cg_fd);
945 	return 1;
946 
947 out_procfs:
948 	closedir(procfs);
949 out_cg:
950 	close(cg_fd);
951 	return 0;
952 }
953 
954 /**
955  * auto_house_keeping - Automatically move rtla out of measurement threads
956  *
957  * Try to move rtla away from the tracer, if possible.
958  *
959  * Returns 1 on success, 0 otherwise.
960  */
961 int auto_house_keeping(cpu_set_t *monitored_cpus)
962 {
963 	cpu_set_t rtla_cpus, house_keeping_cpus;
964 	int retval;
965 
966 	/* first get the CPUs in which rtla can actually run. */
967 	retval = sched_getaffinity(getpid(), sizeof(rtla_cpus), &rtla_cpus);
968 	if (retval == -1) {
969 		debug_msg("Could not get rtla affinity, rtla might run with the threads!\n");
970 		return 0;
971 	}
972 
973 	/* then check if the existing setup is already good. */
974 	CPU_AND(&house_keeping_cpus, &rtla_cpus, monitored_cpus);
975 	if (!CPU_COUNT(&house_keeping_cpus)) {
976 		debug_msg("rtla and the monitored CPUs do not share CPUs.");
977 		debug_msg("Skipping auto house-keeping\n");
978 		return 1;
979 	}
980 
981 	/* remove the intersection */
982 	CPU_XOR(&house_keeping_cpus, &rtla_cpus, monitored_cpus);
983 
984 	/* get only those that rtla can run */
985 	CPU_AND(&house_keeping_cpus, &house_keeping_cpus, &rtla_cpus);
986 
987 	/* is there any cpu left? */
988 	if (!CPU_COUNT(&house_keeping_cpus)) {
989 		debug_msg("Could not find any CPU for auto house-keeping\n");
990 		return 0;
991 	}
992 
993 	retval = sched_setaffinity(getpid(), sizeof(house_keeping_cpus), &house_keeping_cpus);
994 	if (retval == -1) {
995 		debug_msg("Could not set affinity for auto house-keeping\n");
996 		return 0;
997 	}
998 
999 	debug_msg("rtla automatically moved to an auto house-keeping cpu set\n");
1000 
1001 	return 1;
1002 }
1003 
1004 /**
1005  * parse_optional_arg - Parse optional argument value
1006  *
1007  * Parse optional argument value, which can be in the form of:
1008  * -sarg, -s/--long=arg, -s/--long arg
1009  *
1010  * Returns arg value if found, NULL otherwise.
1011  */
1012 char *parse_optional_arg(int argc, char **argv)
1013 {
1014 	if (optarg) {
1015 		if (optarg[0] == '=') {
1016 			/* skip the = */
1017 			return &optarg[1];
1018 		} else {
1019 			return optarg;
1020 		}
1021 	/* parse argument of form -s [arg] and --long [arg]*/
1022 	} else if (optind < argc && argv[optind][0] != '-') {
1023 		/* consume optind */
1024 		return argv[optind++];
1025 	} else {
1026 		return NULL;
1027 	}
1028 }
1029 
1030 /*
1031  * strtoi - convert string to integer with error checking
1032  *
1033  * Returns 0 on success, -1 if conversion fails or result is out of int range.
1034  */
1035 int strtoi(const char *s, int *res)
1036 {
1037 	char *end_ptr;
1038 	long lres;
1039 
1040 	if (!*s)
1041 		return -1;
1042 
1043 	errno = 0;
1044 	lres = strtol(s, &end_ptr, 0);
1045 	if (errno || *end_ptr || lres > INT_MAX || lres < INT_MIN)
1046 		return -1;
1047 
1048 	*res = (int) lres;
1049 	return 0;
1050 }
1051