xref: /linux/tools/tracing/rtla/src/utils.c (revision b3910a739235f89f616345dda1f8303d9ccb99fb)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
4  */
5 
6 #define _GNU_SOURCE
7 #ifdef HAVE_LIBCPUPOWER_SUPPORT
8 #include <cpuidle.h>
9 #endif /* HAVE_LIBCPUPOWER_SUPPORT */
10 #include <dirent.h>
11 #include <stdarg.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <unistd.h>
15 #include <ctype.h>
16 #include <errno.h>
17 #include <fcntl.h>
18 #include <sched.h>
19 #include <stdio.h>
20 #include <limits.h>
21 
22 #include "common.h"
23 
24 #define MAX_MSG_LENGTH	1024
25 int config_debug;
26 
27 /*
28  * err_msg - print an error message to the stderr
29  */
30 void err_msg(const char *fmt, ...)
31 {
32 	char message[MAX_MSG_LENGTH];
33 	va_list ap;
34 
35 	va_start(ap, fmt);
36 	vsnprintf(message, sizeof(message), fmt, ap);
37 	va_end(ap);
38 
39 	fprintf(stderr, "%s", message);
40 }
41 
42 /*
43  * debug_msg - print a debug message to stderr if debug is set
44  */
45 void debug_msg(const char *fmt, ...)
46 {
47 	char message[MAX_MSG_LENGTH];
48 	va_list ap;
49 
50 	if (!config_debug)
51 		return;
52 
53 	va_start(ap, fmt);
54 	vsnprintf(message, sizeof(message), fmt, ap);
55 	va_end(ap);
56 
57 	fprintf(stderr, "%s", message);
58 }
59 
60 /*
61  * fatal - print an error message and EOL to stderr and exit with ERROR
62  */
63 void fatal(const char *fmt, ...)
64 {
65 	va_list ap;
66 
67 	va_start(ap, fmt);
68 	vfprintf(stderr, fmt, ap);
69 	va_end(ap);
70 	fprintf(stderr, "\n");
71 
72 	exit(ERROR);
73 }
74 
75 /*
76  * get_llong_from_str - get a long long int from a string
77  */
78 long long get_llong_from_str(char *start)
79 {
80 	long long value;
81 	char *end;
82 
83 	errno = 0;
84 	value = strtoll(start, &end, 10);
85 	if (errno || start == end)
86 		return -1;
87 
88 	return value;
89 }
90 
91 /*
92  * get_duration - fill output with a human readable duration since start_time
93  */
94 void get_duration(time_t start_time, char *output, int output_size)
95 {
96 	time_t now = time(NULL);
97 	struct tm *tm_info;
98 	time_t duration;
99 
100 	duration = difftime(now, start_time);
101 	tm_info = gmtime(&duration);
102 
103 	snprintf(output, output_size, "%3d %02d:%02d:%02d",
104 			tm_info->tm_yday,
105 			tm_info->tm_hour,
106 			tm_info->tm_min,
107 			tm_info->tm_sec);
108 }
109 
110 /*
111  * parse_cpu_set - parse a cpu_list filling cpu_set_t argument
112  *
113  * Receives a cpu list, like 1-3,5 (cpus 1, 2, 3, 5), and then set
114  * filling cpu_set_t argument.
115  *
116  * Returns 0 on success, 1 otherwise.
117  */
118 int parse_cpu_set(char *cpu_list, cpu_set_t *set)
119 {
120 	const char *p;
121 	int end_cpu;
122 	int cpu;
123 	int i;
124 
125 	CPU_ZERO(set);
126 
127 	for (p = cpu_list; *p; ) {
128 		cpu = atoi(p);
129 		if (cpu < 0 || (!cpu && *p != '0') || cpu >= nr_cpus)
130 			goto err;
131 
132 		while (isdigit(*p))
133 			p++;
134 		if (*p == '-') {
135 			p++;
136 			end_cpu = atoi(p);
137 			if (end_cpu < cpu || (!end_cpu && *p != '0') || end_cpu >= nr_cpus)
138 				goto err;
139 			while (isdigit(*p))
140 				p++;
141 		} else
142 			end_cpu = cpu;
143 
144 		if (cpu == end_cpu) {
145 			debug_msg("cpu_set: adding cpu %d\n", cpu);
146 			CPU_SET(cpu, set);
147 		} else {
148 			for (i = cpu; i <= end_cpu; i++) {
149 				debug_msg("cpu_set: adding cpu %d\n", i);
150 				CPU_SET(i, set);
151 			}
152 		}
153 
154 		if (*p == ',')
155 			p++;
156 	}
157 
158 	return 0;
159 err:
160 	debug_msg("Error parsing the cpu set %s\n", cpu_list);
161 	return 1;
162 }
163 
164 /*
165  * parse_stack_format - parse the stack format
166  *
167  * Return: the stack format on success, -1 otherwise.
168  */
169 int parse_stack_format(char *arg)
170 {
171 	if (!strcmp(arg, "truncate"))
172 		return STACK_FORMAT_TRUNCATE;
173 	if (!strcmp(arg, "skip"))
174 		return STACK_FORMAT_SKIP;
175 	if (!strcmp(arg, "full"))
176 		return STACK_FORMAT_FULL;
177 
178 	debug_msg("Error parsing the stack format %s\n", arg);
179 	return -1;
180 }
181 
182 /*
183  * parse_duration - parse duration with s/m/h/d suffix converting it to seconds
184  */
185 long parse_seconds_duration(char *val)
186 {
187 	char *end;
188 	long t;
189 
190 	t = strtol(val, &end, 10);
191 
192 	if (end) {
193 		switch (*end) {
194 		case 's':
195 		case 'S':
196 			break;
197 		case 'm':
198 		case 'M':
199 			t *= 60;
200 			break;
201 		case 'h':
202 		case 'H':
203 			t *= 60 * 60;
204 			break;
205 
206 		case 'd':
207 		case 'D':
208 			t *= 24 * 60 * 60;
209 			break;
210 		}
211 	}
212 
213 	return t;
214 }
215 
216 /*
217  * match_time_unit - check if str starts with unit followed by end-of-string or ':'
218  *
219  * This allows the time unit parser to work both in standalone duration strings
220  * like "100ms" and in colon-delimited SCHED_DEADLINE specifications like
221  * "d:10ms:100ms", while still rejecting malformed input like "100msx".
222  */
223 static bool match_time_unit(const char *str, const char *unit)
224 {
225 	size_t len = strlen(unit);
226 
227 	return strncmp(str, unit, len) == 0 &&
228 	       (str[len] == '\0' || str[len] == ':');
229 }
230 
231 /*
232  * parse_ns_duration - parse duration with ns/us/ms/s converting it to nanoseconds
233  */
234 long parse_ns_duration(char *val)
235 {
236 	char *end;
237 	long t;
238 
239 	t = strtol(val, &end, 10);
240 
241 	if (end) {
242 		if (match_time_unit(end, "ns")) {
243 			return t;
244 		} else if (match_time_unit(end, "us")) {
245 			t *= 1000;
246 			return t;
247 		} else if (match_time_unit(end, "ms")) {
248 			t *= 1000 * 1000;
249 			return t;
250 		} else if (match_time_unit(end, "s")) {
251 			t *= 1000 * 1000 * 1000;
252 			return t;
253 		}
254 		return -1;
255 	}
256 
257 	return t;
258 }
259 
260 /*
261  * This is a set of helper functions to use SCHED_DEADLINE.
262  */
263 #ifndef __NR_sched_setattr
264 # ifdef __x86_64__
265 #  define __NR_sched_setattr	314
266 # elif __i386__
267 #  define __NR_sched_setattr	351
268 # elif __arm__
269 #  define __NR_sched_setattr	380
270 # elif __aarch64__ || __riscv
271 #  define __NR_sched_setattr	274
272 # elif __powerpc__
273 #  define __NR_sched_setattr	355
274 # elif __s390x__
275 #  define __NR_sched_setattr	345
276 # elif __loongarch__
277 #  define __NR_sched_setattr	274
278 # endif
279 #endif
280 
281 #define SCHED_DEADLINE		6
282 
283 static inline int syscall_sched_setattr(pid_t pid, const struct sched_attr *attr,
284 				unsigned int flags) {
285 	return syscall(__NR_sched_setattr, pid, attr, flags);
286 }
287 
288 int __set_sched_attr(int pid, struct sched_attr *attr)
289 {
290 	int flags = 0;
291 	int retval;
292 
293 	retval = syscall_sched_setattr(pid, attr, flags);
294 	if (retval < 0) {
295 		err_msg("Failed to set sched attributes to the pid %d: %s\n",
296 			pid, strerror(errno));
297 		return 1;
298 	}
299 
300 	return 0;
301 }
302 
303 /*
304  * procfs_is_workload_pid - check if a procfs entry contains a comm_prefix* comm
305  *
306  * Check if the procfs entry is a directory of a process, and then check if the
307  * process has a comm with the prefix set in char *comm_prefix. As the
308  * current users of this function only check for kernel threads, there is no
309  * need to check for the threads for the process.
310  *
311  * Return: True if the proc_entry contains a comm file with comm_prefix*.
312  * Otherwise returns false.
313  */
314 static int procfs_is_workload_pid(const char *comm_prefix, struct dirent *proc_entry)
315 {
316 	char buffer[MAX_PATH];
317 	int comm_fd, retval;
318 	char *t_name;
319 
320 	if (proc_entry->d_type != DT_DIR)
321 		return 0;
322 
323 	if (*proc_entry->d_name == '.')
324 		return 0;
325 
326 	/* check if the string is a pid */
327 	for (t_name = proc_entry->d_name; t_name; t_name++) {
328 		if (!isdigit(*t_name))
329 			break;
330 	}
331 
332 	if (*t_name != '\0')
333 		return 0;
334 
335 	snprintf(buffer, MAX_PATH, "/proc/%s/comm", proc_entry->d_name);
336 	comm_fd = open(buffer, O_RDONLY);
337 	if (comm_fd < 0)
338 		return 0;
339 
340 	memset(buffer, 0, MAX_PATH);
341 	retval = read(comm_fd, buffer, MAX_PATH);
342 
343 	close(comm_fd);
344 
345 	if (retval <= 0)
346 		return 0;
347 
348 	buffer[MAX_PATH-1] = '\0';
349 	if (!str_has_prefix(buffer, comm_prefix))
350 		return 0;
351 
352 	/* comm already have \n */
353 	debug_msg("Found workload pid:%s comm:%s", proc_entry->d_name, buffer);
354 
355 	return 1;
356 }
357 
358 /*
359  * set_comm_sched_attr - set sched params to threads starting with char *comm_prefix
360  *
361  * This function uses procfs to list the currently running threads and then set the
362  * sched_attr *attr to the threads that start with char *comm_prefix. It is
363  * mainly used to set the priority to the kernel threads created by the
364  * tracers.
365  */
366 int set_comm_sched_attr(const char *comm_prefix, struct sched_attr *attr)
367 {
368 	struct dirent *proc_entry;
369 	DIR *procfs;
370 	int retval;
371 	int pid;
372 
373 	if (strlen(comm_prefix) >= MAX_PATH) {
374 		err_msg("Command prefix is too long: %d < strlen(%s)\n",
375 			MAX_PATH, comm_prefix);
376 		return 1;
377 	}
378 
379 	procfs = opendir("/proc");
380 	if (!procfs) {
381 		err_msg("Could not open procfs\n");
382 		return 1;
383 	}
384 
385 	while ((proc_entry = readdir(procfs))) {
386 
387 		retval = procfs_is_workload_pid(comm_prefix, proc_entry);
388 		if (!retval)
389 			continue;
390 
391 		if (strtoi(proc_entry->d_name, &pid)) {
392 			err_msg("'%s' is not a valid pid", proc_entry->d_name);
393 			goto out_err;
394 		}
395 		/* procfs_is_workload_pid confirmed it is a pid */
396 		retval = __set_sched_attr(pid, attr);
397 		if (retval) {
398 			err_msg("Error setting sched attributes for pid:%s\n", proc_entry->d_name);
399 			goto out_err;
400 		}
401 
402 		debug_msg("Set sched attributes for pid:%s\n", proc_entry->d_name);
403 	}
404 	return 0;
405 
406 out_err:
407 	closedir(procfs);
408 	return 1;
409 }
410 
411 #define INVALID_VAL	(~0L)
412 static long get_long_ns_after_colon(char *start)
413 {
414 	long val = INVALID_VAL;
415 
416 	/* find the ":" */
417 	start = strstr(start, ":");
418 	if (!start)
419 		return -1;
420 
421 	/* skip ":" */
422 	start++;
423 	val = parse_ns_duration(start);
424 
425 	return val;
426 }
427 
428 static long get_long_after_colon(char *start)
429 {
430 	long val = INVALID_VAL;
431 
432 	/* find the ":" */
433 	start = strstr(start, ":");
434 	if (!start)
435 		return -1;
436 
437 	/* skip ":" */
438 	start++;
439 	val = get_llong_from_str(start);
440 
441 	return val;
442 }
443 
444 /*
445  * parse priority in the format:
446  * SCHED_OTHER:
447  *		o:<prio>
448  *		O:<prio>
449  * SCHED_RR:
450  *		r:<prio>
451  *		R:<prio>
452  * SCHED_FIFO:
453  *		f:<prio>
454  *		F:<prio>
455  * SCHED_DEADLINE:
456  *		d:runtime:period
457  *		D:runtime:period
458  */
459 int parse_prio(char *arg, struct sched_attr *sched_param)
460 {
461 	long prio;
462 	long runtime;
463 	long period;
464 
465 	memset(sched_param, 0, sizeof(*sched_param));
466 	sched_param->size = sizeof(*sched_param);
467 
468 	switch (arg[0]) {
469 	case 'd':
470 	case 'D':
471 		/* d:runtime:period */
472 		if (strlen(arg) < 4)
473 			return -1;
474 
475 		runtime = get_long_ns_after_colon(arg);
476 		if (runtime == INVALID_VAL)
477 			return -1;
478 
479 		period = get_long_ns_after_colon(&arg[2]);
480 		if (period == INVALID_VAL)
481 			return -1;
482 
483 		if (runtime > period)
484 			return -1;
485 
486 		sched_param->sched_policy   = SCHED_DEADLINE;
487 		sched_param->sched_runtime  = runtime;
488 		sched_param->sched_deadline = period;
489 		sched_param->sched_period   = period;
490 		break;
491 	case 'f':
492 	case 'F':
493 		/* f:prio */
494 		prio = get_long_after_colon(arg);
495 		if (prio == INVALID_VAL)
496 			return -1;
497 
498 		if (prio < sched_get_priority_min(SCHED_FIFO))
499 			return -1;
500 		if (prio > sched_get_priority_max(SCHED_FIFO))
501 			return -1;
502 
503 		sched_param->sched_policy   = SCHED_FIFO;
504 		sched_param->sched_priority = prio;
505 		break;
506 	case 'r':
507 	case 'R':
508 		/* r:prio */
509 		prio = get_long_after_colon(arg);
510 		if (prio == INVALID_VAL)
511 			return -1;
512 
513 		if (prio < sched_get_priority_min(SCHED_RR))
514 			return -1;
515 		if (prio > sched_get_priority_max(SCHED_RR))
516 			return -1;
517 
518 		sched_param->sched_policy   = SCHED_RR;
519 		sched_param->sched_priority = prio;
520 		break;
521 	case 'o':
522 	case 'O':
523 		/* o:prio */
524 		prio = get_long_after_colon(arg);
525 		if (prio == INVALID_VAL)
526 			return -1;
527 
528 		if (prio < MIN_NICE)
529 			return -1;
530 		if (prio > MAX_NICE)
531 			return -1;
532 
533 		sched_param->sched_policy   = SCHED_OTHER;
534 		sched_param->sched_nice = prio;
535 		break;
536 	default:
537 		return -1;
538 	}
539 	return 0;
540 }
541 
542 /*
543  * set_cpu_dma_latency - set the /dev/cpu_dma_latecy
544  *
545  * This is used to reduce the exit from idle latency. The value
546  * will be reset once the file descriptor of /dev/cpu_dma_latecy
547  * is closed.
548  *
549  * Return: the /dev/cpu_dma_latecy file descriptor
550  */
551 int set_cpu_dma_latency(int32_t latency)
552 {
553 	int retval;
554 	int fd;
555 
556 	fd = open("/dev/cpu_dma_latency", O_RDWR);
557 	if (fd < 0) {
558 		err_msg("Error opening /dev/cpu_dma_latency\n");
559 		return -1;
560 	}
561 
562 	retval = write(fd, &latency, 4);
563 	if (retval < 1) {
564 		err_msg("Error setting /dev/cpu_dma_latency\n");
565 		close(fd);
566 		return -1;
567 	}
568 
569 	debug_msg("Set /dev/cpu_dma_latency to %d\n", latency);
570 
571 	return fd;
572 }
573 
574 #ifdef HAVE_LIBCPUPOWER_SUPPORT
575 static unsigned int **saved_cpu_idle_disable_state;
576 static size_t saved_cpu_idle_disable_state_alloc_ctr;
577 
578 /*
579  * save_cpu_idle_state_disable - save disable for all idle states of a cpu
580  *
581  * Saves the current disable of all idle states of a cpu, to be subsequently
582  * restored via restore_cpu_idle_disable_state.
583  *
584  * Return: idle state count on success, negative on error
585  */
586 int save_cpu_idle_disable_state(unsigned int cpu)
587 {
588 	unsigned int nr_states;
589 	unsigned int state;
590 	int disabled;
591 
592 	nr_states = cpuidle_state_count(cpu);
593 
594 	if (nr_states == 0)
595 		return 0;
596 
597 	if (saved_cpu_idle_disable_state == NULL) {
598 		saved_cpu_idle_disable_state = calloc(nr_cpus, sizeof(unsigned int *));
599 		if (!saved_cpu_idle_disable_state)
600 			return -1;
601 	}
602 
603 	saved_cpu_idle_disable_state[cpu] = calloc(nr_states, sizeof(unsigned int));
604 	if (!saved_cpu_idle_disable_state[cpu])
605 		return -1;
606 	saved_cpu_idle_disable_state_alloc_ctr++;
607 
608 	for (state = 0; state < nr_states; state++) {
609 		disabled = cpuidle_is_state_disabled(cpu, state);
610 		if (disabled < 0)
611 			return disabled;
612 		saved_cpu_idle_disable_state[cpu][state] = disabled;
613 	}
614 
615 	return nr_states;
616 }
617 
618 /*
619  * restore_cpu_idle_disable_state - restore disable for all idle states of a cpu
620  *
621  * Restores the current disable state of all idle states of a cpu that was
622  * previously saved by save_cpu_idle_disable_state.
623  *
624  * Return: idle state count on success, negative on error
625  */
626 int restore_cpu_idle_disable_state(unsigned int cpu)
627 {
628 	unsigned int nr_states;
629 	unsigned int state;
630 	int disabled;
631 	int result;
632 
633 	nr_states = cpuidle_state_count(cpu);
634 
635 	if (nr_states == 0)
636 		return 0;
637 
638 	if (!saved_cpu_idle_disable_state)
639 		return -1;
640 
641 	for (state = 0; state < nr_states; state++) {
642 		if (!saved_cpu_idle_disable_state[cpu])
643 			return -1;
644 		disabled = saved_cpu_idle_disable_state[cpu][state];
645 		result = cpuidle_state_disable(cpu, state, disabled);
646 		if (result < 0)
647 			return result;
648 	}
649 
650 	free(saved_cpu_idle_disable_state[cpu]);
651 	saved_cpu_idle_disable_state[cpu] = NULL;
652 	saved_cpu_idle_disable_state_alloc_ctr--;
653 	if (saved_cpu_idle_disable_state_alloc_ctr == 0) {
654 		free(saved_cpu_idle_disable_state);
655 		saved_cpu_idle_disable_state = NULL;
656 	}
657 
658 	return nr_states;
659 }
660 
661 /*
662  * free_cpu_idle_disable_states - free saved idle state disable for all cpus
663  *
664  * Frees the memory used for storing cpu idle state disable for all cpus
665  * and states.
666  *
667  * Normally, the memory is freed automatically in
668  * restore_cpu_idle_disable_state; this is mostly for cleaning up after an
669  * error.
670  */
671 void free_cpu_idle_disable_states(void)
672 {
673 	int cpu;
674 
675 	if (!saved_cpu_idle_disable_state)
676 		return;
677 
678 	for (cpu = 0; cpu < nr_cpus; cpu++) {
679 		free(saved_cpu_idle_disable_state[cpu]);
680 		saved_cpu_idle_disable_state[cpu] = NULL;
681 	}
682 
683 	free(saved_cpu_idle_disable_state);
684 	saved_cpu_idle_disable_state = NULL;
685 }
686 
687 /*
688  * set_deepest_cpu_idle_state - limit idle state of cpu
689  *
690  * Disables all idle states deeper than the one given in
691  * deepest_state (assuming states with higher number are deeper).
692  *
693  * This is used to reduce the exit from idle latency. Unlike
694  * set_cpu_dma_latency, it can disable idle states per cpu.
695  *
696  * Return: idle state count on success, negative on error
697  */
698 int set_deepest_cpu_idle_state(unsigned int cpu, unsigned int deepest_state)
699 {
700 	unsigned int nr_states;
701 	unsigned int state;
702 	int result;
703 
704 	nr_states = cpuidle_state_count(cpu);
705 
706 	for (state = deepest_state + 1; state < nr_states; state++) {
707 		result = cpuidle_state_disable(cpu, state, 1);
708 		if (result < 0)
709 			return result;
710 	}
711 
712 	return nr_states;
713 }
714 #endif /* HAVE_LIBCPUPOWER_SUPPORT */
715 
716 #define _STR(x) #x
717 #define STR(x) _STR(x)
718 
719 /*
720  * find_mount - find a the mount point of a given fs
721  *
722  * Returns 0 if mount is not found, otherwise return 1 and fill mp
723  * with the mount point.
724  */
725 static const int find_mount(const char *fs, char *mp, int sizeof_mp)
726 {
727 	char mount_point[MAX_PATH+1];
728 	char type[100];
729 	int found = 0;
730 	FILE *fp;
731 
732 	fp = fopen("/proc/mounts", "r");
733 	if (!fp)
734 		return 0;
735 
736 	while (fscanf(fp, "%*s %" STR(MAX_PATH) "s %99s %*s %*d %*d\n",	mount_point, type) == 2) {
737 		if (strcmp(type, fs) == 0) {
738 			found = 1;
739 			break;
740 		}
741 	}
742 	fclose(fp);
743 
744 	if (!found)
745 		return 0;
746 
747 	memset(mp, 0, sizeof_mp);
748 	strncpy(mp, mount_point, sizeof_mp - 1);
749 
750 	debug_msg("Fs %s found at %s\n", fs, mp);
751 	return 1;
752 }
753 
754 /*
755  * get_self_cgroup - get the current thread cgroup path
756  *
757  * Parse /proc/$$/cgroup file to get the thread's cgroup. As an example of line to parse:
758  *
759  * 0::/user.slice/user-0.slice/session-3.scope'\n'
760  *
761  * This function is interested in the content after the second : and before the '\n'.
762  *
763  * Returns 1 if a string was found, 0 otherwise.
764  */
765 static int get_self_cgroup(char *self_cg, int sizeof_self_cg)
766 {
767 	char path[MAX_PATH], *start;
768 	int fd, retval;
769 
770 	snprintf(path, MAX_PATH, "/proc/%d/cgroup", getpid());
771 
772 	fd = open(path, O_RDONLY);
773 	if (fd < 0)
774 		return 0;
775 
776 	memset(path, 0, sizeof(path));
777 	retval = read(fd, path, MAX_PATH);
778 
779 	close(fd);
780 
781 	if (retval <= 0)
782 		return 0;
783 
784 	path[MAX_PATH-1] = '\0';
785 	start = path;
786 
787 	start = strstr(start, ":");
788 	if (!start)
789 		return 0;
790 
791 	/* skip ":" */
792 	start++;
793 
794 	start = strstr(start, ":");
795 	if (!start)
796 		return 0;
797 
798 	/* skip ":" */
799 	start++;
800 
801 	if (strlen(start) >= sizeof_self_cg)
802 		return 0;
803 
804 	snprintf(self_cg, sizeof_self_cg, "%s", start);
805 
806 	/* Swap '\n' with '\0' */
807 	start = strstr(self_cg, "\n");
808 
809 	/* there must be '\n' */
810 	if (!start)
811 		return 0;
812 
813 	/* ok, it found a string after the second : and before the \n */
814 	*start = '\0';
815 
816 	return 1;
817 }
818 
819 /*
820  * open_cgroup_procs - Open the cgroup.procs file for the given cgroup
821  *
822  * If cgroup argument is not NULL, the cgroup.procs file for that cgroup
823  * will be opened. Otherwise, the cgroup of the calling, i.e., rtla, thread
824  * will be used.
825  *
826  * Supports cgroup v2.
827  *
828  * Returns the file descriptor on success, -1 otherwise.
829  */
830 static int open_cgroup_procs(const char *cgroup)
831 {
832 	char cgroup_path[MAX_PATH - strlen("/cgroup.procs")];
833 	char cgroup_procs[MAX_PATH];
834 	int retval;
835 	int cg_fd;
836 	size_t cg_path_len;
837 
838 	retval = find_mount("cgroup2", cgroup_path, sizeof(cgroup_path));
839 	if (!retval) {
840 		err_msg("Did not find cgroupv2 mount point\n");
841 		return -1;
842 	}
843 
844 	cg_path_len = strlen(cgroup_path);
845 
846 	if (!cgroup) {
847 		retval = get_self_cgroup(&cgroup_path[cg_path_len],
848 				sizeof(cgroup_path) - cg_path_len);
849 		if (!retval) {
850 			err_msg("Did not find self cgroup\n");
851 			return -1;
852 		}
853 	} else {
854 		snprintf(&cgroup_path[cg_path_len],
855 				sizeof(cgroup_path) - cg_path_len, "%s/", cgroup);
856 	}
857 
858 	snprintf(cgroup_procs, MAX_PATH, "%s/cgroup.procs", cgroup_path);
859 
860 	debug_msg("Using cgroup path at: %s\n", cgroup_procs);
861 
862 	cg_fd = open(cgroup_procs, O_RDWR);
863 	if (cg_fd < 0)
864 		return -1;
865 
866 	return cg_fd;
867 }
868 
869 /*
870  * set_pid_cgroup - Set cgroup to pid_t pid
871  *
872  * If cgroup argument is not NULL, the threads will move to the given cgroup.
873  * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used.
874  *
875  * Supports cgroup v2.
876  *
877  * Returns 1 on success, 0 otherwise.
878  */
879 int set_pid_cgroup(pid_t pid, const char *cgroup)
880 {
881 	char pid_str[24];
882 	int retval;
883 	int cg_fd;
884 
885 	cg_fd = open_cgroup_procs(cgroup);
886 	if (cg_fd < 0)
887 		return 0;
888 
889 	snprintf(pid_str, sizeof(pid_str), "%d\n", pid);
890 
891 	retval = write(cg_fd, pid_str, strlen(pid_str));
892 	if (retval < 0)
893 		err_msg("Error setting cgroup attributes for pid:%s - %s\n",
894 				pid_str, strerror(errno));
895 	else
896 		debug_msg("Set cgroup attributes for pid:%s\n", pid_str);
897 
898 	close(cg_fd);
899 
900 	return (retval >= 0);
901 }
902 
903 /**
904  * set_comm_cgroup - Set cgroup to threads starting with char *comm_prefix
905  *
906  * If cgroup argument is not NULL, the threads will move to the given cgroup.
907  * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used.
908  *
909  * Supports cgroup v2.
910  *
911  * Returns 1 on success, 0 otherwise.
912  */
913 int set_comm_cgroup(const char *comm_prefix, const char *cgroup)
914 {
915 	struct dirent *proc_entry;
916 	DIR *procfs;
917 	int retval;
918 	int cg_fd;
919 
920 	if (strlen(comm_prefix) >= MAX_PATH) {
921 		err_msg("Command prefix is too long: %d < strlen(%s)\n",
922 			MAX_PATH, comm_prefix);
923 		return 0;
924 	}
925 
926 	cg_fd = open_cgroup_procs(cgroup);
927 	if (cg_fd < 0)
928 		return 0;
929 
930 	procfs = opendir("/proc");
931 	if (!procfs) {
932 		err_msg("Could not open procfs\n");
933 		goto out_cg;
934 	}
935 
936 	while ((proc_entry = readdir(procfs))) {
937 
938 		retval = procfs_is_workload_pid(comm_prefix, proc_entry);
939 		if (!retval)
940 			continue;
941 
942 		retval = write(cg_fd, proc_entry->d_name, strlen(proc_entry->d_name));
943 		if (retval < 0) {
944 			err_msg("Error setting cgroup attributes for pid:%s - %s\n",
945 				proc_entry->d_name, strerror(errno));
946 			goto out_procfs;
947 		}
948 
949 		debug_msg("Set cgroup attributes for pid:%s\n", proc_entry->d_name);
950 	}
951 
952 	closedir(procfs);
953 	close(cg_fd);
954 	return 1;
955 
956 out_procfs:
957 	closedir(procfs);
958 out_cg:
959 	close(cg_fd);
960 	return 0;
961 }
962 
963 /**
964  * auto_house_keeping - Automatically move rtla out of measurement threads
965  *
966  * Try to move rtla away from the tracer, if possible.
967  *
968  * Returns 1 on success, 0 otherwise.
969  */
970 int auto_house_keeping(cpu_set_t *monitored_cpus)
971 {
972 	cpu_set_t rtla_cpus, house_keeping_cpus;
973 	int retval;
974 
975 	/* first get the CPUs in which rtla can actually run. */
976 	retval = sched_getaffinity(getpid(), sizeof(rtla_cpus), &rtla_cpus);
977 	if (retval == -1) {
978 		debug_msg("Could not get rtla affinity, rtla might run with the threads!\n");
979 		return 0;
980 	}
981 
982 	/* then check if the existing setup is already good. */
983 	CPU_AND(&house_keeping_cpus, &rtla_cpus, monitored_cpus);
984 	if (!CPU_COUNT(&house_keeping_cpus)) {
985 		debug_msg("rtla and the monitored CPUs do not share CPUs.");
986 		debug_msg("Skipping auto house-keeping\n");
987 		return 1;
988 	}
989 
990 	/* remove the intersection */
991 	CPU_XOR(&house_keeping_cpus, &rtla_cpus, monitored_cpus);
992 
993 	/* get only those that rtla can run */
994 	CPU_AND(&house_keeping_cpus, &house_keeping_cpus, &rtla_cpus);
995 
996 	/* is there any cpu left? */
997 	if (!CPU_COUNT(&house_keeping_cpus)) {
998 		debug_msg("Could not find any CPU for auto house-keeping\n");
999 		return 0;
1000 	}
1001 
1002 	retval = sched_setaffinity(getpid(), sizeof(house_keeping_cpus), &house_keeping_cpus);
1003 	if (retval == -1) {
1004 		debug_msg("Could not set affinity for auto house-keeping\n");
1005 		return 0;
1006 	}
1007 
1008 	debug_msg("rtla automatically moved to an auto house-keeping cpu set\n");
1009 
1010 	return 1;
1011 }
1012 
1013 /**
1014  * parse_optional_arg - Parse optional argument value
1015  *
1016  * Parse optional argument value, which can be in the form of:
1017  * -sarg, -s/--long=arg, -s/--long arg
1018  *
1019  * Returns arg value if found, NULL otherwise.
1020  */
1021 char *parse_optional_arg(int argc, char **argv)
1022 {
1023 	if (optarg) {
1024 		if (optarg[0] == '=') {
1025 			/* skip the = */
1026 			return &optarg[1];
1027 		} else {
1028 			return optarg;
1029 		}
1030 	/* parse argument of form -s [arg] and --long [arg]*/
1031 	} else if (optind < argc && argv[optind][0] != '-') {
1032 		/* consume optind */
1033 		return argv[optind++];
1034 	} else {
1035 		return NULL;
1036 	}
1037 }
1038 
1039 /*
1040  * strtoi - convert string to integer with error checking
1041  *
1042  * Returns 0 on success, -1 if conversion fails or result is out of int range.
1043  */
1044 int strtoi(const char *s, int *res)
1045 {
1046 	char *end_ptr;
1047 	long lres;
1048 
1049 	if (!*s)
1050 		return -1;
1051 
1052 	errno = 0;
1053 	lres = strtol(s, &end_ptr, 0);
1054 	if (errno || *end_ptr || lres > INT_MAX || lres < INT_MIN)
1055 		return -1;
1056 
1057 	*res = (int) lres;
1058 	return 0;
1059 }
1060 
1061 static inline void fatal_alloc(void)
1062 {
1063 	fatal("Error allocating memory\n");
1064 }
1065 
1066 void *calloc_fatal(size_t n, size_t size)
1067 {
1068 	void *p = calloc(n, size);
1069 
1070 	if (!p)
1071 		fatal_alloc();
1072 
1073 	return p;
1074 }
1075 
1076 void *reallocarray_fatal(void *p, size_t n, size_t size)
1077 {
1078 	p = reallocarray(p, n, size);
1079 
1080 	if (!p)
1081 		fatal_alloc();
1082 
1083 	return p;
1084 }
1085 
1086 char *strdup_fatal(const char *s)
1087 {
1088 	char *p = strdup(s);
1089 
1090 	if (!p)
1091 		fatal_alloc();
1092 
1093 	return p;
1094 }
1095