xref: /linux/tools/tracing/rtla/src/utils.c (revision 5779de8d36ac5a0c929f276096a499b03ae0afa7)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
4  */
5 
6 #define _GNU_SOURCE
7 #ifdef HAVE_LIBCPUPOWER_SUPPORT
8 #include <cpuidle.h>
9 #endif /* HAVE_LIBCPUPOWER_SUPPORT */
10 #include <dirent.h>
11 #include <stdarg.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <unistd.h>
15 #include <ctype.h>
16 #include <errno.h>
17 #include <fcntl.h>
18 #include <sched.h>
19 #include <stdio.h>
20 
21 #include "utils.h"
22 
23 #define MAX_MSG_LENGTH	1024
24 int config_debug;
25 
26 /*
27  * err_msg - print an error message to the stderr
28  */
err_msg(const char * fmt,...)29 void err_msg(const char *fmt, ...)
30 {
31 	char message[MAX_MSG_LENGTH];
32 	va_list ap;
33 
34 	va_start(ap, fmt);
35 	vsnprintf(message, sizeof(message), fmt, ap);
36 	va_end(ap);
37 
38 	fprintf(stderr, "%s", message);
39 }
40 
41 /*
42  * debug_msg - print a debug message to stderr if debug is set
43  */
debug_msg(const char * fmt,...)44 void debug_msg(const char *fmt, ...)
45 {
46 	char message[MAX_MSG_LENGTH];
47 	va_list ap;
48 
49 	if (!config_debug)
50 		return;
51 
52 	va_start(ap, fmt);
53 	vsnprintf(message, sizeof(message), fmt, ap);
54 	va_end(ap);
55 
56 	fprintf(stderr, "%s", message);
57 }
58 
59 /*
60  * fatal - print an error message and EOL to stderr and exit with ERROR
61  */
fatal(const char * fmt,...)62 void fatal(const char *fmt, ...)
63 {
64 	va_list ap;
65 
66 	va_start(ap, fmt);
67 	vfprintf(stderr, fmt, ap);
68 	va_end(ap);
69 	fprintf(stderr, "\n");
70 
71 	exit(ERROR);
72 }
73 
74 /*
75  * get_llong_from_str - get a long long int from a string
76  */
get_llong_from_str(char * start)77 long long get_llong_from_str(char *start)
78 {
79 	long long value;
80 	char *end;
81 
82 	errno = 0;
83 	value = strtoll(start, &end, 10);
84 	if (errno || start == end)
85 		return -1;
86 
87 	return value;
88 }
89 
90 /*
91  * get_duration - fill output with a human readable duration since start_time
92  */
get_duration(time_t start_time,char * output,int output_size)93 void get_duration(time_t start_time, char *output, int output_size)
94 {
95 	time_t now = time(NULL);
96 	struct tm *tm_info;
97 	time_t duration;
98 
99 	duration = difftime(now, start_time);
100 	tm_info = gmtime(&duration);
101 
102 	snprintf(output, output_size, "%3d %02d:%02d:%02d",
103 			tm_info->tm_yday,
104 			tm_info->tm_hour,
105 			tm_info->tm_min,
106 			tm_info->tm_sec);
107 }
108 
109 /*
110  * parse_cpu_set - parse a cpu_list filling cpu_set_t argument
111  *
112  * Receives a cpu list, like 1-3,5 (cpus 1, 2, 3, 5), and then set
113  * filling cpu_set_t argument.
114  *
115  * Returns 1 on success, 0 otherwise.
116  */
parse_cpu_set(char * cpu_list,cpu_set_t * set)117 int parse_cpu_set(char *cpu_list, cpu_set_t *set)
118 {
119 	const char *p;
120 	int end_cpu;
121 	int nr_cpus;
122 	int cpu;
123 	int i;
124 
125 	CPU_ZERO(set);
126 
127 	nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
128 
129 	for (p = cpu_list; *p; ) {
130 		cpu = atoi(p);
131 		if (cpu < 0 || (!cpu && *p != '0') || cpu >= nr_cpus)
132 			goto err;
133 
134 		while (isdigit(*p))
135 			p++;
136 		if (*p == '-') {
137 			p++;
138 			end_cpu = atoi(p);
139 			if (end_cpu < cpu || (!end_cpu && *p != '0') || end_cpu >= nr_cpus)
140 				goto err;
141 			while (isdigit(*p))
142 				p++;
143 		} else
144 			end_cpu = cpu;
145 
146 		if (cpu == end_cpu) {
147 			debug_msg("cpu_set: adding cpu %d\n", cpu);
148 			CPU_SET(cpu, set);
149 		} else {
150 			for (i = cpu; i <= end_cpu; i++) {
151 				debug_msg("cpu_set: adding cpu %d\n", i);
152 				CPU_SET(i, set);
153 			}
154 		}
155 
156 		if (*p == ',')
157 			p++;
158 	}
159 
160 	return 0;
161 err:
162 	debug_msg("Error parsing the cpu set %s\n", cpu_list);
163 	return 1;
164 }
165 
166 /*
167  * parse_duration - parse duration with s/m/h/d suffix converting it to seconds
168  */
parse_seconds_duration(char * val)169 long parse_seconds_duration(char *val)
170 {
171 	char *end;
172 	long t;
173 
174 	t = strtol(val, &end, 10);
175 
176 	if (end) {
177 		switch (*end) {
178 		case 's':
179 		case 'S':
180 			break;
181 		case 'm':
182 		case 'M':
183 			t *= 60;
184 			break;
185 		case 'h':
186 		case 'H':
187 			t *= 60 * 60;
188 			break;
189 
190 		case 'd':
191 		case 'D':
192 			t *= 24 * 60 * 60;
193 			break;
194 		}
195 	}
196 
197 	return t;
198 }
199 
200 /*
201  * parse_ns_duration - parse duration with ns/us/ms/s converting it to nanoseconds
202  */
parse_ns_duration(char * val)203 long parse_ns_duration(char *val)
204 {
205 	char *end;
206 	long t;
207 
208 	t = strtol(val, &end, 10);
209 
210 	if (end) {
211 		if (!strncmp(end, "ns", 2)) {
212 			return t;
213 		} else if (!strncmp(end, "us", 2)) {
214 			t *= 1000;
215 			return t;
216 		} else if (!strncmp(end, "ms", 2)) {
217 			t *= 1000 * 1000;
218 			return t;
219 		} else if (!strncmp(end, "s", 1)) {
220 			t *= 1000 * 1000 * 1000;
221 			return t;
222 		}
223 		return -1;
224 	}
225 
226 	return t;
227 }
228 
229 /*
230  * This is a set of helper functions to use SCHED_DEADLINE.
231  */
232 #ifndef __NR_sched_setattr
233 # ifdef __x86_64__
234 #  define __NR_sched_setattr	314
235 # elif __i386__
236 #  define __NR_sched_setattr	351
237 # elif __arm__
238 #  define __NR_sched_setattr	380
239 # elif __aarch64__ || __riscv
240 #  define __NR_sched_setattr	274
241 # elif __powerpc__
242 #  define __NR_sched_setattr	355
243 # elif __s390x__
244 #  define __NR_sched_setattr	345
245 # elif __loongarch__
246 #  define __NR_sched_setattr	274
247 # endif
248 #endif
249 
250 #define SCHED_DEADLINE		6
251 
syscall_sched_setattr(pid_t pid,const struct sched_attr * attr,unsigned int flags)252 static inline int syscall_sched_setattr(pid_t pid, const struct sched_attr *attr,
253 				unsigned int flags) {
254 	return syscall(__NR_sched_setattr, pid, attr, flags);
255 }
256 
__set_sched_attr(int pid,struct sched_attr * attr)257 int __set_sched_attr(int pid, struct sched_attr *attr)
258 {
259 	int flags = 0;
260 	int retval;
261 
262 	retval = syscall_sched_setattr(pid, attr, flags);
263 	if (retval < 0) {
264 		err_msg("Failed to set sched attributes to the pid %d: %s\n",
265 			pid, strerror(errno));
266 		return 1;
267 	}
268 
269 	return 0;
270 }
271 
272 /*
273  * procfs_is_workload_pid - check if a procfs entry contains a comm_prefix* comm
274  *
275  * Check if the procfs entry is a directory of a process, and then check if the
276  * process has a comm with the prefix set in char *comm_prefix. As the
277  * current users of this function only check for kernel threads, there is no
278  * need to check for the threads for the process.
279  *
280  * Return: True if the proc_entry contains a comm file with comm_prefix*.
281  * Otherwise returns false.
282  */
procfs_is_workload_pid(const char * comm_prefix,struct dirent * proc_entry)283 static int procfs_is_workload_pid(const char *comm_prefix, struct dirent *proc_entry)
284 {
285 	char buffer[MAX_PATH];
286 	int comm_fd, retval;
287 	char *t_name;
288 
289 	if (proc_entry->d_type != DT_DIR)
290 		return 0;
291 
292 	if (*proc_entry->d_name == '.')
293 		return 0;
294 
295 	/* check if the string is a pid */
296 	for (t_name = proc_entry->d_name; t_name; t_name++) {
297 		if (!isdigit(*t_name))
298 			break;
299 	}
300 
301 	if (*t_name != '\0')
302 		return 0;
303 
304 	snprintf(buffer, MAX_PATH, "/proc/%s/comm", proc_entry->d_name);
305 	comm_fd = open(buffer, O_RDONLY);
306 	if (comm_fd < 0)
307 		return 0;
308 
309 	memset(buffer, 0, MAX_PATH);
310 	retval = read(comm_fd, buffer, MAX_PATH);
311 
312 	close(comm_fd);
313 
314 	if (retval <= 0)
315 		return 0;
316 
317 	retval = strncmp(comm_prefix, buffer, strlen(comm_prefix));
318 	if (retval)
319 		return 0;
320 
321 	/* comm already have \n */
322 	debug_msg("Found workload pid:%s comm:%s", proc_entry->d_name, buffer);
323 
324 	return 1;
325 }
326 
327 /*
328  * set_comm_sched_attr - set sched params to threads starting with char *comm_prefix
329  *
330  * This function uses procfs to list the currently running threads and then set the
331  * sched_attr *attr to the threads that start with char *comm_prefix. It is
332  * mainly used to set the priority to the kernel threads created by the
333  * tracers.
334  */
set_comm_sched_attr(const char * comm_prefix,struct sched_attr * attr)335 int set_comm_sched_attr(const char *comm_prefix, struct sched_attr *attr)
336 {
337 	struct dirent *proc_entry;
338 	DIR *procfs;
339 	int retval;
340 
341 	if (strlen(comm_prefix) >= MAX_PATH) {
342 		err_msg("Command prefix is too long: %d < strlen(%s)\n",
343 			MAX_PATH, comm_prefix);
344 		return 1;
345 	}
346 
347 	procfs = opendir("/proc");
348 	if (!procfs) {
349 		err_msg("Could not open procfs\n");
350 		return 1;
351 	}
352 
353 	while ((proc_entry = readdir(procfs))) {
354 
355 		retval = procfs_is_workload_pid(comm_prefix, proc_entry);
356 		if (!retval)
357 			continue;
358 
359 		/* procfs_is_workload_pid confirmed it is a pid */
360 		retval = __set_sched_attr(atoi(proc_entry->d_name), attr);
361 		if (retval) {
362 			err_msg("Error setting sched attributes for pid:%s\n", proc_entry->d_name);
363 			goto out_err;
364 		}
365 
366 		debug_msg("Set sched attributes for pid:%s\n", proc_entry->d_name);
367 	}
368 	return 0;
369 
370 out_err:
371 	closedir(procfs);
372 	return 1;
373 }
374 
375 #define INVALID_VAL	(~0L)
get_long_ns_after_colon(char * start)376 static long get_long_ns_after_colon(char *start)
377 {
378 	long val = INVALID_VAL;
379 
380 	/* find the ":" */
381 	start = strstr(start, ":");
382 	if (!start)
383 		return -1;
384 
385 	/* skip ":" */
386 	start++;
387 	val = parse_ns_duration(start);
388 
389 	return val;
390 }
391 
get_long_after_colon(char * start)392 static long get_long_after_colon(char *start)
393 {
394 	long val = INVALID_VAL;
395 
396 	/* find the ":" */
397 	start = strstr(start, ":");
398 	if (!start)
399 		return -1;
400 
401 	/* skip ":" */
402 	start++;
403 	val = get_llong_from_str(start);
404 
405 	return val;
406 }
407 
408 /*
409  * parse priority in the format:
410  * SCHED_OTHER:
411  *		o:<prio>
412  *		O:<prio>
413  * SCHED_RR:
414  *		r:<prio>
415  *		R:<prio>
416  * SCHED_FIFO:
417  *		f:<prio>
418  *		F:<prio>
419  * SCHED_DEADLINE:
420  *		d:runtime:period
421  *		D:runtime:period
422  */
parse_prio(char * arg,struct sched_attr * sched_param)423 int parse_prio(char *arg, struct sched_attr *sched_param)
424 {
425 	long prio;
426 	long runtime;
427 	long period;
428 
429 	memset(sched_param, 0, sizeof(*sched_param));
430 	sched_param->size = sizeof(*sched_param);
431 
432 	switch (arg[0]) {
433 	case 'd':
434 	case 'D':
435 		/* d:runtime:period */
436 		if (strlen(arg) < 4)
437 			return -1;
438 
439 		runtime = get_long_ns_after_colon(arg);
440 		if (runtime == INVALID_VAL)
441 			return -1;
442 
443 		period = get_long_ns_after_colon(&arg[2]);
444 		if (period == INVALID_VAL)
445 			return -1;
446 
447 		if (runtime > period)
448 			return -1;
449 
450 		sched_param->sched_policy   = SCHED_DEADLINE;
451 		sched_param->sched_runtime  = runtime;
452 		sched_param->sched_deadline = period;
453 		sched_param->sched_period   = period;
454 		break;
455 	case 'f':
456 	case 'F':
457 		/* f:prio */
458 		prio = get_long_after_colon(arg);
459 		if (prio == INVALID_VAL)
460 			return -1;
461 
462 		if (prio < sched_get_priority_min(SCHED_FIFO))
463 			return -1;
464 		if (prio > sched_get_priority_max(SCHED_FIFO))
465 			return -1;
466 
467 		sched_param->sched_policy   = SCHED_FIFO;
468 		sched_param->sched_priority = prio;
469 		break;
470 	case 'r':
471 	case 'R':
472 		/* r:prio */
473 		prio = get_long_after_colon(arg);
474 		if (prio == INVALID_VAL)
475 			return -1;
476 
477 		if (prio < sched_get_priority_min(SCHED_RR))
478 			return -1;
479 		if (prio > sched_get_priority_max(SCHED_RR))
480 			return -1;
481 
482 		sched_param->sched_policy   = SCHED_RR;
483 		sched_param->sched_priority = prio;
484 		break;
485 	case 'o':
486 	case 'O':
487 		/* o:prio */
488 		prio = get_long_after_colon(arg);
489 		if (prio == INVALID_VAL)
490 			return -1;
491 
492 		if (prio < MIN_NICE)
493 			return -1;
494 		if (prio > MAX_NICE)
495 			return -1;
496 
497 		sched_param->sched_policy   = SCHED_OTHER;
498 		sched_param->sched_nice = prio;
499 		break;
500 	default:
501 		return -1;
502 	}
503 	return 0;
504 }
505 
506 /*
507  * set_cpu_dma_latency - set the /dev/cpu_dma_latecy
508  *
509  * This is used to reduce the exit from idle latency. The value
510  * will be reset once the file descriptor of /dev/cpu_dma_latecy
511  * is closed.
512  *
513  * Return: the /dev/cpu_dma_latecy file descriptor
514  */
set_cpu_dma_latency(int32_t latency)515 int set_cpu_dma_latency(int32_t latency)
516 {
517 	int retval;
518 	int fd;
519 
520 	fd = open("/dev/cpu_dma_latency", O_RDWR);
521 	if (fd < 0) {
522 		err_msg("Error opening /dev/cpu_dma_latency\n");
523 		return -1;
524 	}
525 
526 	retval = write(fd, &latency, 4);
527 	if (retval < 1) {
528 		err_msg("Error setting /dev/cpu_dma_latency\n");
529 		close(fd);
530 		return -1;
531 	}
532 
533 	debug_msg("Set /dev/cpu_dma_latency to %d\n", latency);
534 
535 	return fd;
536 }
537 
538 #ifdef HAVE_LIBCPUPOWER_SUPPORT
539 static unsigned int **saved_cpu_idle_disable_state;
540 static size_t saved_cpu_idle_disable_state_alloc_ctr;
541 
542 /*
543  * save_cpu_idle_state_disable - save disable for all idle states of a cpu
544  *
545  * Saves the current disable of all idle states of a cpu, to be subsequently
546  * restored via restore_cpu_idle_disable_state.
547  *
548  * Return: idle state count on success, negative on error
549  */
save_cpu_idle_disable_state(unsigned int cpu)550 int save_cpu_idle_disable_state(unsigned int cpu)
551 {
552 	unsigned int nr_states;
553 	unsigned int state;
554 	int disabled;
555 	int nr_cpus;
556 
557 	nr_states = cpuidle_state_count(cpu);
558 
559 	if (nr_states == 0)
560 		return 0;
561 
562 	if (saved_cpu_idle_disable_state == NULL) {
563 		nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
564 		saved_cpu_idle_disable_state = calloc(nr_cpus, sizeof(unsigned int *));
565 		if (!saved_cpu_idle_disable_state)
566 			return -1;
567 	}
568 
569 	saved_cpu_idle_disable_state[cpu] = calloc(nr_states, sizeof(unsigned int));
570 	if (!saved_cpu_idle_disable_state[cpu])
571 		return -1;
572 	saved_cpu_idle_disable_state_alloc_ctr++;
573 
574 	for (state = 0; state < nr_states; state++) {
575 		disabled = cpuidle_is_state_disabled(cpu, state);
576 		if (disabled < 0)
577 			return disabled;
578 		saved_cpu_idle_disable_state[cpu][state] = disabled;
579 	}
580 
581 	return nr_states;
582 }
583 
584 /*
585  * restore_cpu_idle_disable_state - restore disable for all idle states of a cpu
586  *
587  * Restores the current disable state of all idle states of a cpu that was
588  * previously saved by save_cpu_idle_disable_state.
589  *
590  * Return: idle state count on success, negative on error
591  */
restore_cpu_idle_disable_state(unsigned int cpu)592 int restore_cpu_idle_disable_state(unsigned int cpu)
593 {
594 	unsigned int nr_states;
595 	unsigned int state;
596 	int disabled;
597 	int result;
598 
599 	nr_states = cpuidle_state_count(cpu);
600 
601 	if (nr_states == 0)
602 		return 0;
603 
604 	if (!saved_cpu_idle_disable_state)
605 		return -1;
606 
607 	for (state = 0; state < nr_states; state++) {
608 		if (!saved_cpu_idle_disable_state[cpu])
609 			return -1;
610 		disabled = saved_cpu_idle_disable_state[cpu][state];
611 		result = cpuidle_state_disable(cpu, state, disabled);
612 		if (result < 0)
613 			return result;
614 	}
615 
616 	free(saved_cpu_idle_disable_state[cpu]);
617 	saved_cpu_idle_disable_state[cpu] = NULL;
618 	saved_cpu_idle_disable_state_alloc_ctr--;
619 	if (saved_cpu_idle_disable_state_alloc_ctr == 0) {
620 		free(saved_cpu_idle_disable_state);
621 		saved_cpu_idle_disable_state = NULL;
622 	}
623 
624 	return nr_states;
625 }
626 
627 /*
628  * free_cpu_idle_disable_states - free saved idle state disable for all cpus
629  *
630  * Frees the memory used for storing cpu idle state disable for all cpus
631  * and states.
632  *
633  * Normally, the memory is freed automatically in
634  * restore_cpu_idle_disable_state; this is mostly for cleaning up after an
635  * error.
636  */
free_cpu_idle_disable_states(void)637 void free_cpu_idle_disable_states(void)
638 {
639 	int cpu;
640 	int nr_cpus;
641 
642 	if (!saved_cpu_idle_disable_state)
643 		return;
644 
645 	nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
646 
647 	for (cpu = 0; cpu < nr_cpus; cpu++) {
648 		free(saved_cpu_idle_disable_state[cpu]);
649 		saved_cpu_idle_disable_state[cpu] = NULL;
650 	}
651 
652 	free(saved_cpu_idle_disable_state);
653 	saved_cpu_idle_disable_state = NULL;
654 }
655 
656 /*
657  * set_deepest_cpu_idle_state - limit idle state of cpu
658  *
659  * Disables all idle states deeper than the one given in
660  * deepest_state (assuming states with higher number are deeper).
661  *
662  * This is used to reduce the exit from idle latency. Unlike
663  * set_cpu_dma_latency, it can disable idle states per cpu.
664  *
665  * Return: idle state count on success, negative on error
666  */
set_deepest_cpu_idle_state(unsigned int cpu,unsigned int deepest_state)667 int set_deepest_cpu_idle_state(unsigned int cpu, unsigned int deepest_state)
668 {
669 	unsigned int nr_states;
670 	unsigned int state;
671 	int result;
672 
673 	nr_states = cpuidle_state_count(cpu);
674 
675 	for (state = deepest_state + 1; state < nr_states; state++) {
676 		result = cpuidle_state_disable(cpu, state, 1);
677 		if (result < 0)
678 			return result;
679 	}
680 
681 	return nr_states;
682 }
683 #endif /* HAVE_LIBCPUPOWER_SUPPORT */
684 
685 #define _STR(x) #x
686 #define STR(x) _STR(x)
687 
688 /*
689  * find_mount - find a the mount point of a given fs
690  *
691  * Returns 0 if mount is not found, otherwise return 1 and fill mp
692  * with the mount point.
693  */
find_mount(const char * fs,char * mp,int sizeof_mp)694 static const int find_mount(const char *fs, char *mp, int sizeof_mp)
695 {
696 	char mount_point[MAX_PATH+1];
697 	char type[100];
698 	int found = 0;
699 	FILE *fp;
700 
701 	fp = fopen("/proc/mounts", "r");
702 	if (!fp)
703 		return 0;
704 
705 	while (fscanf(fp, "%*s %" STR(MAX_PATH) "s %99s %*s %*d %*d\n",	mount_point, type) == 2) {
706 		if (strcmp(type, fs) == 0) {
707 			found = 1;
708 			break;
709 		}
710 	}
711 	fclose(fp);
712 
713 	if (!found)
714 		return 0;
715 
716 	memset(mp, 0, sizeof_mp);
717 	strncpy(mp, mount_point, sizeof_mp - 1);
718 
719 	debug_msg("Fs %s found at %s\n", fs, mp);
720 	return 1;
721 }
722 
723 /*
724  * get_self_cgroup - get the current thread cgroup path
725  *
726  * Parse /proc/$$/cgroup file to get the thread's cgroup. As an example of line to parse:
727  *
728  * 0::/user.slice/user-0.slice/session-3.scope'\n'
729  *
730  * This function is interested in the content after the second : and before the '\n'.
731  *
732  * Returns 1 if a string was found, 0 otherwise.
733  */
get_self_cgroup(char * self_cg,int sizeof_self_cg)734 static int get_self_cgroup(char *self_cg, int sizeof_self_cg)
735 {
736 	char path[MAX_PATH], *start;
737 	int fd, retval;
738 
739 	snprintf(path, MAX_PATH, "/proc/%d/cgroup", getpid());
740 
741 	fd = open(path, O_RDONLY);
742 	if (fd < 0)
743 		return 0;
744 
745 	retval = read(fd, path, MAX_PATH);
746 
747 	close(fd);
748 
749 	if (retval <= 0)
750 		return 0;
751 
752 	start = path;
753 
754 	start = strstr(start, ":");
755 	if (!start)
756 		return 0;
757 
758 	/* skip ":" */
759 	start++;
760 
761 	start = strstr(start, ":");
762 	if (!start)
763 		return 0;
764 
765 	/* skip ":" */
766 	start++;
767 
768 	if (strlen(start) >= sizeof_self_cg)
769 		return 0;
770 
771 	snprintf(self_cg, sizeof_self_cg, "%s", start);
772 
773 	/* Swap '\n' with '\0' */
774 	start = strstr(self_cg, "\n");
775 
776 	/* there must be '\n' */
777 	if (!start)
778 		return 0;
779 
780 	/* ok, it found a string after the second : and before the \n */
781 	*start = '\0';
782 
783 	return 1;
784 }
785 
786 /*
787  * set_comm_cgroup - Set cgroup to pid_t pid
788  *
789  * If cgroup argument is not NULL, the threads will move to the given cgroup.
790  * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used.
791  *
792  * Supports cgroup v2.
793  *
794  * Returns 1 on success, 0 otherwise.
795  */
set_pid_cgroup(pid_t pid,const char * cgroup)796 int set_pid_cgroup(pid_t pid, const char *cgroup)
797 {
798 	char cgroup_path[MAX_PATH - strlen("/cgroup.procs")];
799 	char cgroup_procs[MAX_PATH];
800 	char pid_str[24];
801 	int retval;
802 	int cg_fd;
803 
804 	retval = find_mount("cgroup2", cgroup_path, sizeof(cgroup_path));
805 	if (!retval) {
806 		err_msg("Did not find cgroupv2 mount point\n");
807 		return 0;
808 	}
809 
810 	if (!cgroup) {
811 		retval = get_self_cgroup(&cgroup_path[strlen(cgroup_path)],
812 				sizeof(cgroup_path) - strlen(cgroup_path));
813 		if (!retval) {
814 			err_msg("Did not find self cgroup\n");
815 			return 0;
816 		}
817 	} else {
818 		snprintf(&cgroup_path[strlen(cgroup_path)],
819 				sizeof(cgroup_path) - strlen(cgroup_path), "%s/", cgroup);
820 	}
821 
822 	snprintf(cgroup_procs, MAX_PATH, "%s/cgroup.procs", cgroup_path);
823 
824 	debug_msg("Using cgroup path at: %s\n", cgroup_procs);
825 
826 	cg_fd = open(cgroup_procs, O_RDWR);
827 	if (cg_fd < 0)
828 		return 0;
829 
830 	snprintf(pid_str, sizeof(pid_str), "%d\n", pid);
831 
832 	retval = write(cg_fd, pid_str, strlen(pid_str));
833 	if (retval < 0)
834 		err_msg("Error setting cgroup attributes for pid:%s - %s\n",
835 				pid_str, strerror(errno));
836 	else
837 		debug_msg("Set cgroup attributes for pid:%s\n", pid_str);
838 
839 	close(cg_fd);
840 
841 	return (retval >= 0);
842 }
843 
844 /**
845  * set_comm_cgroup - Set cgroup to threads starting with char *comm_prefix
846  *
847  * If cgroup argument is not NULL, the threads will move to the given cgroup.
848  * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used.
849  *
850  * Supports cgroup v2.
851  *
852  * Returns 1 on success, 0 otherwise.
853  */
set_comm_cgroup(const char * comm_prefix,const char * cgroup)854 int set_comm_cgroup(const char *comm_prefix, const char *cgroup)
855 {
856 	char cgroup_path[MAX_PATH - strlen("/cgroup.procs")];
857 	char cgroup_procs[MAX_PATH];
858 	struct dirent *proc_entry;
859 	DIR *procfs;
860 	int retval;
861 	int cg_fd;
862 
863 	if (strlen(comm_prefix) >= MAX_PATH) {
864 		err_msg("Command prefix is too long: %d < strlen(%s)\n",
865 			MAX_PATH, comm_prefix);
866 		return 0;
867 	}
868 
869 	retval = find_mount("cgroup2", cgroup_path, sizeof(cgroup_path));
870 	if (!retval) {
871 		err_msg("Did not find cgroupv2 mount point\n");
872 		return 0;
873 	}
874 
875 	if (!cgroup) {
876 		retval = get_self_cgroup(&cgroup_path[strlen(cgroup_path)],
877 				sizeof(cgroup_path) - strlen(cgroup_path));
878 		if (!retval) {
879 			err_msg("Did not find self cgroup\n");
880 			return 0;
881 		}
882 	} else {
883 		snprintf(&cgroup_path[strlen(cgroup_path)],
884 				sizeof(cgroup_path) - strlen(cgroup_path), "%s/", cgroup);
885 	}
886 
887 	snprintf(cgroup_procs, MAX_PATH, "%s/cgroup.procs", cgroup_path);
888 
889 	debug_msg("Using cgroup path at: %s\n", cgroup_procs);
890 
891 	cg_fd = open(cgroup_procs, O_RDWR);
892 	if (cg_fd < 0)
893 		return 0;
894 
895 	procfs = opendir("/proc");
896 	if (!procfs) {
897 		err_msg("Could not open procfs\n");
898 		goto out_cg;
899 	}
900 
901 	while ((proc_entry = readdir(procfs))) {
902 
903 		retval = procfs_is_workload_pid(comm_prefix, proc_entry);
904 		if (!retval)
905 			continue;
906 
907 		retval = write(cg_fd, proc_entry->d_name, strlen(proc_entry->d_name));
908 		if (retval < 0) {
909 			err_msg("Error setting cgroup attributes for pid:%s - %s\n",
910 				proc_entry->d_name, strerror(errno));
911 			goto out_procfs;
912 		}
913 
914 		debug_msg("Set cgroup attributes for pid:%s\n", proc_entry->d_name);
915 	}
916 
917 	closedir(procfs);
918 	close(cg_fd);
919 	return 1;
920 
921 out_procfs:
922 	closedir(procfs);
923 out_cg:
924 	close(cg_fd);
925 	return 0;
926 }
927 
928 /**
929  * auto_house_keeping - Automatically move rtla out of measurement threads
930  *
931  * Try to move rtla away from the tracer, if possible.
932  *
933  * Returns 1 on success, 0 otherwise.
934  */
auto_house_keeping(cpu_set_t * monitored_cpus)935 int auto_house_keeping(cpu_set_t *monitored_cpus)
936 {
937 	cpu_set_t rtla_cpus, house_keeping_cpus;
938 	int retval;
939 
940 	/* first get the CPUs in which rtla can actually run. */
941 	retval = sched_getaffinity(getpid(), sizeof(rtla_cpus), &rtla_cpus);
942 	if (retval == -1) {
943 		debug_msg("Could not get rtla affinity, rtla might run with the threads!\n");
944 		return 0;
945 	}
946 
947 	/* then check if the existing setup is already good. */
948 	CPU_AND(&house_keeping_cpus, &rtla_cpus, monitored_cpus);
949 	if (!CPU_COUNT(&house_keeping_cpus)) {
950 		debug_msg("rtla and the monitored CPUs do not share CPUs.");
951 		debug_msg("Skipping auto house-keeping\n");
952 		return 1;
953 	}
954 
955 	/* remove the intersection */
956 	CPU_XOR(&house_keeping_cpus, &rtla_cpus, monitored_cpus);
957 
958 	/* get only those that rtla can run */
959 	CPU_AND(&house_keeping_cpus, &house_keeping_cpus, &rtla_cpus);
960 
961 	/* is there any cpu left? */
962 	if (!CPU_COUNT(&house_keeping_cpus)) {
963 		debug_msg("Could not find any CPU for auto house-keeping\n");
964 		return 0;
965 	}
966 
967 	retval = sched_setaffinity(getpid(), sizeof(house_keeping_cpus), &house_keeping_cpus);
968 	if (retval == -1) {
969 		debug_msg("Could not set affinity for auto house-keeping\n");
970 		return 0;
971 	}
972 
973 	debug_msg("rtla automatically moved to an auto house-keeping cpu set\n");
974 
975 	return 1;
976 }
977 
978 /**
979  * parse_optional_arg - Parse optional argument value
980  *
981  * Parse optional argument value, which can be in the form of:
982  * -sarg, -s/--long=arg, -s/--long arg
983  *
984  * Returns arg value if found, NULL otherwise.
985  */
parse_optional_arg(int argc,char ** argv)986 char *parse_optional_arg(int argc, char **argv)
987 {
988 	if (optarg) {
989 		if (optarg[0] == '=') {
990 			/* skip the = */
991 			return &optarg[1];
992 		} else {
993 			return optarg;
994 		}
995 	/* parse argument of form -s [arg] and --long [arg]*/
996 	} else if (optind < argc && argv[optind][0] != '-') {
997 		/* consume optind */
998 		return argv[optind++];
999 	} else {
1000 		return NULL;
1001 	}
1002 }
1003