xref: /linux/tools/tracing/rtla/src/utils.c (revision 472c5f736b54c476c9bfaa0258c4c07fc6ddeea4)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2021 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
4  */
5 
6 #define _GNU_SOURCE
7 #ifdef HAVE_LIBCPUPOWER_SUPPORT
8 #include <cpuidle.h>
9 #endif /* HAVE_LIBCPUPOWER_SUPPORT */
10 #include <dirent.h>
11 #include <stdarg.h>
12 #include <stdlib.h>
13 #include <string.h>
14 #include <unistd.h>
15 #include <ctype.h>
16 #include <errno.h>
17 #include <fcntl.h>
18 #include <sched.h>
19 #include <stdio.h>
20 
21 #include "utils.h"
22 
23 #define MAX_MSG_LENGTH	1024
24 int config_debug;
25 
26 /*
27  * err_msg - print an error message to the stderr
28  */
err_msg(const char * fmt,...)29 void err_msg(const char *fmt, ...)
30 {
31 	char message[MAX_MSG_LENGTH];
32 	va_list ap;
33 
34 	va_start(ap, fmt);
35 	vsnprintf(message, sizeof(message), fmt, ap);
36 	va_end(ap);
37 
38 	fprintf(stderr, "%s", message);
39 }
40 
41 /*
42  * debug_msg - print a debug message to stderr if debug is set
43  */
debug_msg(const char * fmt,...)44 void debug_msg(const char *fmt, ...)
45 {
46 	char message[MAX_MSG_LENGTH];
47 	va_list ap;
48 
49 	if (!config_debug)
50 		return;
51 
52 	va_start(ap, fmt);
53 	vsnprintf(message, sizeof(message), fmt, ap);
54 	va_end(ap);
55 
56 	fprintf(stderr, "%s", message);
57 }
58 
59 /*
60  * get_llong_from_str - get a long long int from a string
61  */
get_llong_from_str(char * start)62 long long get_llong_from_str(char *start)
63 {
64 	long long value;
65 	char *end;
66 
67 	errno = 0;
68 	value = strtoll(start, &end, 10);
69 	if (errno || start == end)
70 		return -1;
71 
72 	return value;
73 }
74 
75 /*
76  * get_duration - fill output with a human readable duration since start_time
77  */
get_duration(time_t start_time,char * output,int output_size)78 void get_duration(time_t start_time, char *output, int output_size)
79 {
80 	time_t now = time(NULL);
81 	struct tm *tm_info;
82 	time_t duration;
83 
84 	duration = difftime(now, start_time);
85 	tm_info = gmtime(&duration);
86 
87 	snprintf(output, output_size, "%3d %02d:%02d:%02d",
88 			tm_info->tm_yday,
89 			tm_info->tm_hour,
90 			tm_info->tm_min,
91 			tm_info->tm_sec);
92 }
93 
94 /*
95  * parse_cpu_set - parse a cpu_list filling cpu_set_t argument
96  *
97  * Receives a cpu list, like 1-3,5 (cpus 1, 2, 3, 5), and then set
98  * filling cpu_set_t argument.
99  *
100  * Returns 1 on success, 0 otherwise.
101  */
parse_cpu_set(char * cpu_list,cpu_set_t * set)102 int parse_cpu_set(char *cpu_list, cpu_set_t *set)
103 {
104 	const char *p;
105 	int end_cpu;
106 	int nr_cpus;
107 	int cpu;
108 	int i;
109 
110 	CPU_ZERO(set);
111 
112 	nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
113 
114 	for (p = cpu_list; *p; ) {
115 		cpu = atoi(p);
116 		if (cpu < 0 || (!cpu && *p != '0') || cpu >= nr_cpus)
117 			goto err;
118 
119 		while (isdigit(*p))
120 			p++;
121 		if (*p == '-') {
122 			p++;
123 			end_cpu = atoi(p);
124 			if (end_cpu < cpu || (!end_cpu && *p != '0') || end_cpu >= nr_cpus)
125 				goto err;
126 			while (isdigit(*p))
127 				p++;
128 		} else
129 			end_cpu = cpu;
130 
131 		if (cpu == end_cpu) {
132 			debug_msg("cpu_set: adding cpu %d\n", cpu);
133 			CPU_SET(cpu, set);
134 		} else {
135 			for (i = cpu; i <= end_cpu; i++) {
136 				debug_msg("cpu_set: adding cpu %d\n", i);
137 				CPU_SET(i, set);
138 			}
139 		}
140 
141 		if (*p == ',')
142 			p++;
143 	}
144 
145 	return 0;
146 err:
147 	debug_msg("Error parsing the cpu set %s\n", cpu_list);
148 	return 1;
149 }
150 
151 /*
152  * parse_duration - parse duration with s/m/h/d suffix converting it to seconds
153  */
parse_seconds_duration(char * val)154 long parse_seconds_duration(char *val)
155 {
156 	char *end;
157 	long t;
158 
159 	t = strtol(val, &end, 10);
160 
161 	if (end) {
162 		switch (*end) {
163 		case 's':
164 		case 'S':
165 			break;
166 		case 'm':
167 		case 'M':
168 			t *= 60;
169 			break;
170 		case 'h':
171 		case 'H':
172 			t *= 60 * 60;
173 			break;
174 
175 		case 'd':
176 		case 'D':
177 			t *= 24 * 60 * 60;
178 			break;
179 		}
180 	}
181 
182 	return t;
183 }
184 
185 /*
186  * parse_ns_duration - parse duration with ns/us/ms/s converting it to nanoseconds
187  */
parse_ns_duration(char * val)188 long parse_ns_duration(char *val)
189 {
190 	char *end;
191 	long t;
192 
193 	t = strtol(val, &end, 10);
194 
195 	if (end) {
196 		if (!strncmp(end, "ns", 2)) {
197 			return t;
198 		} else if (!strncmp(end, "us", 2)) {
199 			t *= 1000;
200 			return t;
201 		} else if (!strncmp(end, "ms", 2)) {
202 			t *= 1000 * 1000;
203 			return t;
204 		} else if (!strncmp(end, "s", 1)) {
205 			t *= 1000 * 1000 * 1000;
206 			return t;
207 		}
208 		return -1;
209 	}
210 
211 	return t;
212 }
213 
214 /*
215  * This is a set of helper functions to use SCHED_DEADLINE.
216  */
217 #ifndef __NR_sched_setattr
218 # ifdef __x86_64__
219 #  define __NR_sched_setattr	314
220 # elif __i386__
221 #  define __NR_sched_setattr	351
222 # elif __arm__
223 #  define __NR_sched_setattr	380
224 # elif __aarch64__ || __riscv
225 #  define __NR_sched_setattr	274
226 # elif __powerpc__
227 #  define __NR_sched_setattr	355
228 # elif __s390x__
229 #  define __NR_sched_setattr	345
230 # elif __loongarch__
231 #  define __NR_sched_setattr	274
232 # endif
233 #endif
234 
235 #define SCHED_DEADLINE		6
236 
syscall_sched_setattr(pid_t pid,const struct sched_attr * attr,unsigned int flags)237 static inline int syscall_sched_setattr(pid_t pid, const struct sched_attr *attr,
238 				unsigned int flags) {
239 	return syscall(__NR_sched_setattr, pid, attr, flags);
240 }
241 
__set_sched_attr(int pid,struct sched_attr * attr)242 int __set_sched_attr(int pid, struct sched_attr *attr)
243 {
244 	int flags = 0;
245 	int retval;
246 
247 	retval = syscall_sched_setattr(pid, attr, flags);
248 	if (retval < 0) {
249 		err_msg("Failed to set sched attributes to the pid %d: %s\n",
250 			pid, strerror(errno));
251 		return 1;
252 	}
253 
254 	return 0;
255 }
256 
257 /*
258  * procfs_is_workload_pid - check if a procfs entry contains a comm_prefix* comm
259  *
260  * Check if the procfs entry is a directory of a process, and then check if the
261  * process has a comm with the prefix set in char *comm_prefix. As the
262  * current users of this function only check for kernel threads, there is no
263  * need to check for the threads for the process.
264  *
265  * Return: True if the proc_entry contains a comm file with comm_prefix*.
266  * Otherwise returns false.
267  */
procfs_is_workload_pid(const char * comm_prefix,struct dirent * proc_entry)268 static int procfs_is_workload_pid(const char *comm_prefix, struct dirent *proc_entry)
269 {
270 	char buffer[MAX_PATH];
271 	int comm_fd, retval;
272 	char *t_name;
273 
274 	if (proc_entry->d_type != DT_DIR)
275 		return 0;
276 
277 	if (*proc_entry->d_name == '.')
278 		return 0;
279 
280 	/* check if the string is a pid */
281 	for (t_name = proc_entry->d_name; t_name; t_name++) {
282 		if (!isdigit(*t_name))
283 			break;
284 	}
285 
286 	if (*t_name != '\0')
287 		return 0;
288 
289 	snprintf(buffer, MAX_PATH, "/proc/%s/comm", proc_entry->d_name);
290 	comm_fd = open(buffer, O_RDONLY);
291 	if (comm_fd < 0)
292 		return 0;
293 
294 	memset(buffer, 0, MAX_PATH);
295 	retval = read(comm_fd, buffer, MAX_PATH);
296 
297 	close(comm_fd);
298 
299 	if (retval <= 0)
300 		return 0;
301 
302 	retval = strncmp(comm_prefix, buffer, strlen(comm_prefix));
303 	if (retval)
304 		return 0;
305 
306 	/* comm already have \n */
307 	debug_msg("Found workload pid:%s comm:%s", proc_entry->d_name, buffer);
308 
309 	return 1;
310 }
311 
312 /*
313  * set_comm_sched_attr - set sched params to threads starting with char *comm_prefix
314  *
315  * This function uses procfs to list the currently running threads and then set the
316  * sched_attr *attr to the threads that start with char *comm_prefix. It is
317  * mainly used to set the priority to the kernel threads created by the
318  * tracers.
319  */
set_comm_sched_attr(const char * comm_prefix,struct sched_attr * attr)320 int set_comm_sched_attr(const char *comm_prefix, struct sched_attr *attr)
321 {
322 	struct dirent *proc_entry;
323 	DIR *procfs;
324 	int retval;
325 
326 	if (strlen(comm_prefix) >= MAX_PATH) {
327 		err_msg("Command prefix is too long: %d < strlen(%s)\n",
328 			MAX_PATH, comm_prefix);
329 		return 1;
330 	}
331 
332 	procfs = opendir("/proc");
333 	if (!procfs) {
334 		err_msg("Could not open procfs\n");
335 		return 1;
336 	}
337 
338 	while ((proc_entry = readdir(procfs))) {
339 
340 		retval = procfs_is_workload_pid(comm_prefix, proc_entry);
341 		if (!retval)
342 			continue;
343 
344 		/* procfs_is_workload_pid confirmed it is a pid */
345 		retval = __set_sched_attr(atoi(proc_entry->d_name), attr);
346 		if (retval) {
347 			err_msg("Error setting sched attributes for pid:%s\n", proc_entry->d_name);
348 			goto out_err;
349 		}
350 
351 		debug_msg("Set sched attributes for pid:%s\n", proc_entry->d_name);
352 	}
353 	return 0;
354 
355 out_err:
356 	closedir(procfs);
357 	return 1;
358 }
359 
360 #define INVALID_VAL	(~0L)
get_long_ns_after_colon(char * start)361 static long get_long_ns_after_colon(char *start)
362 {
363 	long val = INVALID_VAL;
364 
365 	/* find the ":" */
366 	start = strstr(start, ":");
367 	if (!start)
368 		return -1;
369 
370 	/* skip ":" */
371 	start++;
372 	val = parse_ns_duration(start);
373 
374 	return val;
375 }
376 
get_long_after_colon(char * start)377 static long get_long_after_colon(char *start)
378 {
379 	long val = INVALID_VAL;
380 
381 	/* find the ":" */
382 	start = strstr(start, ":");
383 	if (!start)
384 		return -1;
385 
386 	/* skip ":" */
387 	start++;
388 	val = get_llong_from_str(start);
389 
390 	return val;
391 }
392 
393 /*
394  * parse priority in the format:
395  * SCHED_OTHER:
396  *		o:<prio>
397  *		O:<prio>
398  * SCHED_RR:
399  *		r:<prio>
400  *		R:<prio>
401  * SCHED_FIFO:
402  *		f:<prio>
403  *		F:<prio>
404  * SCHED_DEADLINE:
405  *		d:runtime:period
406  *		D:runtime:period
407  */
parse_prio(char * arg,struct sched_attr * sched_param)408 int parse_prio(char *arg, struct sched_attr *sched_param)
409 {
410 	long prio;
411 	long runtime;
412 	long period;
413 
414 	memset(sched_param, 0, sizeof(*sched_param));
415 	sched_param->size = sizeof(*sched_param);
416 
417 	switch (arg[0]) {
418 	case 'd':
419 	case 'D':
420 		/* d:runtime:period */
421 		if (strlen(arg) < 4)
422 			return -1;
423 
424 		runtime = get_long_ns_after_colon(arg);
425 		if (runtime == INVALID_VAL)
426 			return -1;
427 
428 		period = get_long_ns_after_colon(&arg[2]);
429 		if (period == INVALID_VAL)
430 			return -1;
431 
432 		if (runtime > period)
433 			return -1;
434 
435 		sched_param->sched_policy   = SCHED_DEADLINE;
436 		sched_param->sched_runtime  = runtime;
437 		sched_param->sched_deadline = period;
438 		sched_param->sched_period   = period;
439 		break;
440 	case 'f':
441 	case 'F':
442 		/* f:prio */
443 		prio = get_long_after_colon(arg);
444 		if (prio == INVALID_VAL)
445 			return -1;
446 
447 		if (prio < sched_get_priority_min(SCHED_FIFO))
448 			return -1;
449 		if (prio > sched_get_priority_max(SCHED_FIFO))
450 			return -1;
451 
452 		sched_param->sched_policy   = SCHED_FIFO;
453 		sched_param->sched_priority = prio;
454 		break;
455 	case 'r':
456 	case 'R':
457 		/* r:prio */
458 		prio = get_long_after_colon(arg);
459 		if (prio == INVALID_VAL)
460 			return -1;
461 
462 		if (prio < sched_get_priority_min(SCHED_RR))
463 			return -1;
464 		if (prio > sched_get_priority_max(SCHED_RR))
465 			return -1;
466 
467 		sched_param->sched_policy   = SCHED_RR;
468 		sched_param->sched_priority = prio;
469 		break;
470 	case 'o':
471 	case 'O':
472 		/* o:prio */
473 		prio = get_long_after_colon(arg);
474 		if (prio == INVALID_VAL)
475 			return -1;
476 
477 		if (prio < MIN_NICE)
478 			return -1;
479 		if (prio > MAX_NICE)
480 			return -1;
481 
482 		sched_param->sched_policy   = SCHED_OTHER;
483 		sched_param->sched_nice = prio;
484 		break;
485 	default:
486 		return -1;
487 	}
488 	return 0;
489 }
490 
491 /*
492  * set_cpu_dma_latency - set the /dev/cpu_dma_latecy
493  *
494  * This is used to reduce the exit from idle latency. The value
495  * will be reset once the file descriptor of /dev/cpu_dma_latecy
496  * is closed.
497  *
498  * Return: the /dev/cpu_dma_latecy file descriptor
499  */
set_cpu_dma_latency(int32_t latency)500 int set_cpu_dma_latency(int32_t latency)
501 {
502 	int retval;
503 	int fd;
504 
505 	fd = open("/dev/cpu_dma_latency", O_RDWR);
506 	if (fd < 0) {
507 		err_msg("Error opening /dev/cpu_dma_latency\n");
508 		return -1;
509 	}
510 
511 	retval = write(fd, &latency, 4);
512 	if (retval < 1) {
513 		err_msg("Error setting /dev/cpu_dma_latency\n");
514 		close(fd);
515 		return -1;
516 	}
517 
518 	debug_msg("Set /dev/cpu_dma_latency to %d\n", latency);
519 
520 	return fd;
521 }
522 
523 #ifdef HAVE_LIBCPUPOWER_SUPPORT
524 static unsigned int **saved_cpu_idle_disable_state;
525 static size_t saved_cpu_idle_disable_state_alloc_ctr;
526 
527 /*
528  * save_cpu_idle_state_disable - save disable for all idle states of a cpu
529  *
530  * Saves the current disable of all idle states of a cpu, to be subsequently
531  * restored via restore_cpu_idle_disable_state.
532  *
533  * Return: idle state count on success, negative on error
534  */
save_cpu_idle_disable_state(unsigned int cpu)535 int save_cpu_idle_disable_state(unsigned int cpu)
536 {
537 	unsigned int nr_states;
538 	unsigned int state;
539 	int disabled;
540 	int nr_cpus;
541 
542 	nr_states = cpuidle_state_count(cpu);
543 
544 	if (nr_states == 0)
545 		return 0;
546 
547 	if (saved_cpu_idle_disable_state == NULL) {
548 		nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
549 		saved_cpu_idle_disable_state = calloc(nr_cpus, sizeof(unsigned int *));
550 		if (!saved_cpu_idle_disable_state)
551 			return -1;
552 	}
553 
554 	saved_cpu_idle_disable_state[cpu] = calloc(nr_states, sizeof(unsigned int));
555 	if (!saved_cpu_idle_disable_state[cpu])
556 		return -1;
557 	saved_cpu_idle_disable_state_alloc_ctr++;
558 
559 	for (state = 0; state < nr_states; state++) {
560 		disabled = cpuidle_is_state_disabled(cpu, state);
561 		if (disabled < 0)
562 			return disabled;
563 		saved_cpu_idle_disable_state[cpu][state] = disabled;
564 	}
565 
566 	return nr_states;
567 }
568 
569 /*
570  * restore_cpu_idle_disable_state - restore disable for all idle states of a cpu
571  *
572  * Restores the current disable state of all idle states of a cpu that was
573  * previously saved by save_cpu_idle_disable_state.
574  *
575  * Return: idle state count on success, negative on error
576  */
restore_cpu_idle_disable_state(unsigned int cpu)577 int restore_cpu_idle_disable_state(unsigned int cpu)
578 {
579 	unsigned int nr_states;
580 	unsigned int state;
581 	int disabled;
582 	int result;
583 
584 	nr_states = cpuidle_state_count(cpu);
585 
586 	if (nr_states == 0)
587 		return 0;
588 
589 	if (!saved_cpu_idle_disable_state)
590 		return -1;
591 
592 	for (state = 0; state < nr_states; state++) {
593 		if (!saved_cpu_idle_disable_state[cpu])
594 			return -1;
595 		disabled = saved_cpu_idle_disable_state[cpu][state];
596 		result = cpuidle_state_disable(cpu, state, disabled);
597 		if (result < 0)
598 			return result;
599 	}
600 
601 	free(saved_cpu_idle_disable_state[cpu]);
602 	saved_cpu_idle_disable_state[cpu] = NULL;
603 	saved_cpu_idle_disable_state_alloc_ctr--;
604 	if (saved_cpu_idle_disable_state_alloc_ctr == 0) {
605 		free(saved_cpu_idle_disable_state);
606 		saved_cpu_idle_disable_state = NULL;
607 	}
608 
609 	return nr_states;
610 }
611 
612 /*
613  * free_cpu_idle_disable_states - free saved idle state disable for all cpus
614  *
615  * Frees the memory used for storing cpu idle state disable for all cpus
616  * and states.
617  *
618  * Normally, the memory is freed automatically in
619  * restore_cpu_idle_disable_state; this is mostly for cleaning up after an
620  * error.
621  */
free_cpu_idle_disable_states(void)622 void free_cpu_idle_disable_states(void)
623 {
624 	int cpu;
625 	int nr_cpus;
626 
627 	if (!saved_cpu_idle_disable_state)
628 		return;
629 
630 	nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
631 
632 	for (cpu = 0; cpu < nr_cpus; cpu++) {
633 		free(saved_cpu_idle_disable_state[cpu]);
634 		saved_cpu_idle_disable_state[cpu] = NULL;
635 	}
636 
637 	free(saved_cpu_idle_disable_state);
638 	saved_cpu_idle_disable_state = NULL;
639 }
640 
641 /*
642  * set_deepest_cpu_idle_state - limit idle state of cpu
643  *
644  * Disables all idle states deeper than the one given in
645  * deepest_state (assuming states with higher number are deeper).
646  *
647  * This is used to reduce the exit from idle latency. Unlike
648  * set_cpu_dma_latency, it can disable idle states per cpu.
649  *
650  * Return: idle state count on success, negative on error
651  */
set_deepest_cpu_idle_state(unsigned int cpu,unsigned int deepest_state)652 int set_deepest_cpu_idle_state(unsigned int cpu, unsigned int deepest_state)
653 {
654 	unsigned int nr_states;
655 	unsigned int state;
656 	int result;
657 
658 	nr_states = cpuidle_state_count(cpu);
659 
660 	for (state = deepest_state + 1; state < nr_states; state++) {
661 		result = cpuidle_state_disable(cpu, state, 1);
662 		if (result < 0)
663 			return result;
664 	}
665 
666 	return nr_states;
667 }
668 #endif /* HAVE_LIBCPUPOWER_SUPPORT */
669 
670 #define _STR(x) #x
671 #define STR(x) _STR(x)
672 
673 /*
674  * find_mount - find a the mount point of a given fs
675  *
676  * Returns 0 if mount is not found, otherwise return 1 and fill mp
677  * with the mount point.
678  */
find_mount(const char * fs,char * mp,int sizeof_mp)679 static const int find_mount(const char *fs, char *mp, int sizeof_mp)
680 {
681 	char mount_point[MAX_PATH+1];
682 	char type[100];
683 	int found = 0;
684 	FILE *fp;
685 
686 	fp = fopen("/proc/mounts", "r");
687 	if (!fp)
688 		return 0;
689 
690 	while (fscanf(fp, "%*s %" STR(MAX_PATH) "s %99s %*s %*d %*d\n",	mount_point, type) == 2) {
691 		if (strcmp(type, fs) == 0) {
692 			found = 1;
693 			break;
694 		}
695 	}
696 	fclose(fp);
697 
698 	if (!found)
699 		return 0;
700 
701 	memset(mp, 0, sizeof_mp);
702 	strncpy(mp, mount_point, sizeof_mp - 1);
703 
704 	debug_msg("Fs %s found at %s\n", fs, mp);
705 	return 1;
706 }
707 
708 /*
709  * get_self_cgroup - get the current thread cgroup path
710  *
711  * Parse /proc/$$/cgroup file to get the thread's cgroup. As an example of line to parse:
712  *
713  * 0::/user.slice/user-0.slice/session-3.scope'\n'
714  *
715  * This function is interested in the content after the second : and before the '\n'.
716  *
717  * Returns 1 if a string was found, 0 otherwise.
718  */
get_self_cgroup(char * self_cg,int sizeof_self_cg)719 static int get_self_cgroup(char *self_cg, int sizeof_self_cg)
720 {
721 	char path[MAX_PATH], *start;
722 	int fd, retval;
723 
724 	snprintf(path, MAX_PATH, "/proc/%d/cgroup", getpid());
725 
726 	fd = open(path, O_RDONLY);
727 	if (fd < 0)
728 		return 0;
729 
730 	retval = read(fd, path, MAX_PATH);
731 
732 	close(fd);
733 
734 	if (retval <= 0)
735 		return 0;
736 
737 	start = path;
738 
739 	start = strstr(start, ":");
740 	if (!start)
741 		return 0;
742 
743 	/* skip ":" */
744 	start++;
745 
746 	start = strstr(start, ":");
747 	if (!start)
748 		return 0;
749 
750 	/* skip ":" */
751 	start++;
752 
753 	if (strlen(start) >= sizeof_self_cg)
754 		return 0;
755 
756 	snprintf(self_cg, sizeof_self_cg, "%s", start);
757 
758 	/* Swap '\n' with '\0' */
759 	start = strstr(self_cg, "\n");
760 
761 	/* there must be '\n' */
762 	if (!start)
763 		return 0;
764 
765 	/* ok, it found a string after the second : and before the \n */
766 	*start = '\0';
767 
768 	return 1;
769 }
770 
771 /*
772  * set_comm_cgroup - Set cgroup to pid_t pid
773  *
774  * If cgroup argument is not NULL, the threads will move to the given cgroup.
775  * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used.
776  *
777  * Supports cgroup v2.
778  *
779  * Returns 1 on success, 0 otherwise.
780  */
set_pid_cgroup(pid_t pid,const char * cgroup)781 int set_pid_cgroup(pid_t pid, const char *cgroup)
782 {
783 	char cgroup_path[MAX_PATH - strlen("/cgroup.procs")];
784 	char cgroup_procs[MAX_PATH];
785 	char pid_str[24];
786 	int retval;
787 	int cg_fd;
788 
789 	retval = find_mount("cgroup2", cgroup_path, sizeof(cgroup_path));
790 	if (!retval) {
791 		err_msg("Did not find cgroupv2 mount point\n");
792 		return 0;
793 	}
794 
795 	if (!cgroup) {
796 		retval = get_self_cgroup(&cgroup_path[strlen(cgroup_path)],
797 				sizeof(cgroup_path) - strlen(cgroup_path));
798 		if (!retval) {
799 			err_msg("Did not find self cgroup\n");
800 			return 0;
801 		}
802 	} else {
803 		snprintf(&cgroup_path[strlen(cgroup_path)],
804 				sizeof(cgroup_path) - strlen(cgroup_path), "%s/", cgroup);
805 	}
806 
807 	snprintf(cgroup_procs, MAX_PATH, "%s/cgroup.procs", cgroup_path);
808 
809 	debug_msg("Using cgroup path at: %s\n", cgroup_procs);
810 
811 	cg_fd = open(cgroup_procs, O_RDWR);
812 	if (cg_fd < 0)
813 		return 0;
814 
815 	snprintf(pid_str, sizeof(pid_str), "%d\n", pid);
816 
817 	retval = write(cg_fd, pid_str, strlen(pid_str));
818 	if (retval < 0)
819 		err_msg("Error setting cgroup attributes for pid:%s - %s\n",
820 				pid_str, strerror(errno));
821 	else
822 		debug_msg("Set cgroup attributes for pid:%s\n", pid_str);
823 
824 	close(cg_fd);
825 
826 	return (retval >= 0);
827 }
828 
829 /**
830  * set_comm_cgroup - Set cgroup to threads starting with char *comm_prefix
831  *
832  * If cgroup argument is not NULL, the threads will move to the given cgroup.
833  * Otherwise, the cgroup of the calling, i.e., rtla, thread will be used.
834  *
835  * Supports cgroup v2.
836  *
837  * Returns 1 on success, 0 otherwise.
838  */
set_comm_cgroup(const char * comm_prefix,const char * cgroup)839 int set_comm_cgroup(const char *comm_prefix, const char *cgroup)
840 {
841 	char cgroup_path[MAX_PATH - strlen("/cgroup.procs")];
842 	char cgroup_procs[MAX_PATH];
843 	struct dirent *proc_entry;
844 	DIR *procfs;
845 	int retval;
846 	int cg_fd;
847 
848 	if (strlen(comm_prefix) >= MAX_PATH) {
849 		err_msg("Command prefix is too long: %d < strlen(%s)\n",
850 			MAX_PATH, comm_prefix);
851 		return 0;
852 	}
853 
854 	retval = find_mount("cgroup2", cgroup_path, sizeof(cgroup_path));
855 	if (!retval) {
856 		err_msg("Did not find cgroupv2 mount point\n");
857 		return 0;
858 	}
859 
860 	if (!cgroup) {
861 		retval = get_self_cgroup(&cgroup_path[strlen(cgroup_path)],
862 				sizeof(cgroup_path) - strlen(cgroup_path));
863 		if (!retval) {
864 			err_msg("Did not find self cgroup\n");
865 			return 0;
866 		}
867 	} else {
868 		snprintf(&cgroup_path[strlen(cgroup_path)],
869 				sizeof(cgroup_path) - strlen(cgroup_path), "%s/", cgroup);
870 	}
871 
872 	snprintf(cgroup_procs, MAX_PATH, "%s/cgroup.procs", cgroup_path);
873 
874 	debug_msg("Using cgroup path at: %s\n", cgroup_procs);
875 
876 	cg_fd = open(cgroup_procs, O_RDWR);
877 	if (cg_fd < 0)
878 		return 0;
879 
880 	procfs = opendir("/proc");
881 	if (!procfs) {
882 		err_msg("Could not open procfs\n");
883 		goto out_cg;
884 	}
885 
886 	while ((proc_entry = readdir(procfs))) {
887 
888 		retval = procfs_is_workload_pid(comm_prefix, proc_entry);
889 		if (!retval)
890 			continue;
891 
892 		retval = write(cg_fd, proc_entry->d_name, strlen(proc_entry->d_name));
893 		if (retval < 0) {
894 			err_msg("Error setting cgroup attributes for pid:%s - %s\n",
895 				proc_entry->d_name, strerror(errno));
896 			goto out_procfs;
897 		}
898 
899 		debug_msg("Set cgroup attributes for pid:%s\n", proc_entry->d_name);
900 	}
901 
902 	closedir(procfs);
903 	close(cg_fd);
904 	return 1;
905 
906 out_procfs:
907 	closedir(procfs);
908 out_cg:
909 	close(cg_fd);
910 	return 0;
911 }
912 
913 /**
914  * auto_house_keeping - Automatically move rtla out of measurement threads
915  *
916  * Try to move rtla away from the tracer, if possible.
917  *
918  * Returns 1 on success, 0 otherwise.
919  */
auto_house_keeping(cpu_set_t * monitored_cpus)920 int auto_house_keeping(cpu_set_t *monitored_cpus)
921 {
922 	cpu_set_t rtla_cpus, house_keeping_cpus;
923 	int retval;
924 
925 	/* first get the CPUs in which rtla can actually run. */
926 	retval = sched_getaffinity(getpid(), sizeof(rtla_cpus), &rtla_cpus);
927 	if (retval == -1) {
928 		debug_msg("Could not get rtla affinity, rtla might run with the threads!\n");
929 		return 0;
930 	}
931 
932 	/* then check if the existing setup is already good. */
933 	CPU_AND(&house_keeping_cpus, &rtla_cpus, monitored_cpus);
934 	if (!CPU_COUNT(&house_keeping_cpus)) {
935 		debug_msg("rtla and the monitored CPUs do not share CPUs.");
936 		debug_msg("Skipping auto house-keeping\n");
937 		return 1;
938 	}
939 
940 	/* remove the intersection */
941 	CPU_XOR(&house_keeping_cpus, &rtla_cpus, monitored_cpus);
942 
943 	/* get only those that rtla can run */
944 	CPU_AND(&house_keeping_cpus, &house_keeping_cpus, &rtla_cpus);
945 
946 	/* is there any cpu left? */
947 	if (!CPU_COUNT(&house_keeping_cpus)) {
948 		debug_msg("Could not find any CPU for auto house-keeping\n");
949 		return 0;
950 	}
951 
952 	retval = sched_setaffinity(getpid(), sizeof(house_keeping_cpus), &house_keeping_cpus);
953 	if (retval == -1) {
954 		debug_msg("Could not set affinity for auto house-keeping\n");
955 		return 0;
956 	}
957 
958 	debug_msg("rtla automatically moved to an auto house-keeping cpu set\n");
959 
960 	return 1;
961 }
962