xref: /illumos-gate/usr/src/cmd/latencytop/latencytop.c (revision 4b9db4f6425b1a08fca4390f446072c4a6aae8d5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2008-2009, Intel Corporation.
23  * All Rights Reserved.
24  */
25 
26 #include <unistd.h>
27 #include <getopt.h>
28 #include <stdio.h>
29 #include <string.h>
30 #include <stdlib.h>
31 #include <limits.h>
32 #include <libgen.h>
33 #include <signal.h>
34 #include "latencytop.h"
35 
36 #define	CMPOPT(a, b)	strncmp((a), (b), sizeof (b))
37 
38 /*
39  * This variable is used to check if "dynamic variable drop" in dtrace
40  * has happened.
41  */
42 boolean_t lt_drop_detected = 0;
43 
44 lt_config_t g_config;
45 
46 typedef enum {
47 	LT_CMDOPT_INTERVAL,
48 	LT_CMDOPT_LOG_FILE,
49 	LT_CMDOPT_LOG_LEVEL,
50 	LT_CMDOPT_LOG_INTERVAL,
51 	LT_CMDOPT_CONFIG_FILE,
52 	LT_CMDOPT_F_FILTER,
53 	LT_CMDOPT_F_SCHED,
54 	LT_CMDOPT_F_SOBJ,
55 	LT_CMDOPT_F_LOW,
56 	LT_CMDOPT_SELECT,
57 	LT_CMDOPT__LAST	/* Must be the last one */
58 } lt_cmd_option_id_t;
59 
60 /*
61  * Check for duplicate command line options.
62  * Returns TRUE if duplicate options with different values are found,
63  * returns FALSE otherwise.
64  */
65 static int
66 check_opt_dup(lt_cmd_option_id_t id, uint64_t value)
67 {
68 
69 	static int opt_set[(int)LT_CMDOPT__LAST];
70 	static uint64_t opt_val[(int)LT_CMDOPT__LAST];
71 
72 	const char *errmsg[] = {
73 		"-t is set more than once with different values.",
74 		"-o is set more than once.",
75 		"-k is set more than once with different values.",
76 		"-l is set more than once with different values.",
77 		"-c is set more than once.",
78 		"-f [no]filter is set more than once with different values.",
79 		"-f [no]sched is set more than once with different values.",
80 		"-f [no]sobj is set more than once with different values.",
81 		"-f [no]low is set more than once with different values.",
82 		"-s is set more than once with different values."
83 	};
84 
85 	g_assert(sizeof (errmsg)/sizeof (errmsg[0]) == (int)LT_CMDOPT__LAST);
86 
87 	if (!opt_set[(int)id]) {
88 		opt_set[(int)id] = TRUE;
89 		opt_val[(int)id] = value;
90 		return (FALSE);
91 	}
92 
93 	if (opt_val[(int)id] != value) {
94 		(void) fprintf(stderr, "%s\n", errmsg[(int)id]);
95 		return (TRUE);
96 	}
97 
98 	return (FALSE);
99 }
100 
101 /*
102  * Print command-line help message.
103  */
104 static void
105 print_usage(const char *execname, int long_help)
106 {
107 	char buffer[PATH_MAX];
108 	(void) snprintf(buffer, sizeof (buffer), "%s", execname);
109 
110 	if (!long_help) {
111 		/* Print short help to stderr. */
112 		(void) fprintf(stderr, "Usage: %s [option(s)], ",
113 		    basename(buffer));
114 		(void) fprintf(stderr, "use '%s -h' for details.\n",
115 		    basename(buffer));
116 		return;
117 	}
118 
119 	(void) printf("Usage: %s [option(s)]\n", basename(buffer));
120 	(void) printf("Options:\n"
121 	    "    -h, --help\n"
122 	    "        Print this help.\n"
123 	    "    -t, --interval TIME\n"
124 	    "        Set refresh interval to TIME. "
125 	    "Valid range [1...60] seconds, default = 5\n"
126 	/*
127 	 * Option "-c, --config FILE" is not user-visible for now.
128 	 * When we have chance to properly document the format of translation
129 	 * rules, we'll make it user-visible.
130 	 */
131 	    "    -o, --output-log-file FILE\n"
132 	    "        Output kernel log to FILE. Default = "
133 	    DEFAULT_KLOG_FILE "\n"
134 	    "    -k, --kernel-log-level LEVEL\n"
135 	    "        Set kernel log level to LEVEL.\n"
136 	    "        0(default) = None, 1 = Unmapped, 2 = Mapped, 3 = All.\n"
137 	    "    -f, --feature [no]feature1,[no]feature2,...\n"
138 	    "        Enable/disable features in LatencyTOP.\n"
139 	    "        [no]filter:\n"
140 	    "        Filter large interruptible latencies, e.g. sleep.\n"
141 	    "        [no]sched:\n"
142 	    "        Monitors sched (PID=0).\n"
143 	    "        [no]sobj:\n"
144 	    "        Monitors synchronization objects.\n"
145 	    "        [no]low:\n"
146 	    "        Lower overhead by sampling small latencies.\n"
147 	    "    -l, --log-period TIME\n"
148 	    "        Write and restart log every TIME seconds, TIME >= 60\n"
149 	    "    -s --select [ pid=<pid> | pgid=<pgid> ]\n"
150 	    "        Monitor only the given process or processes in the "
151 	    "given process group.\n");
152 }
153 
154 /*
155  * Properly exit latencytop when it receives SIGINT or SIGTERM.
156  */
157 /* ARGSUSED */
158 static void
159 signal_handler(int sig)
160 {
161 	lt_gpipe_break("q");
162 }
163 
164 /*
165  * Convert string to integer. It returns error if extra characters are found.
166  */
167 static int
168 to_int(const char *str, int *result)
169 {
170 	char *tail = NULL;
171 	long ret;
172 
173 	if (str == NULL || result == NULL) {
174 		return (-1);
175 	}
176 
177 	ret = strtol(str, &tail, 10);
178 
179 	if (tail != NULL && *tail != '\0') {
180 		return (-1);
181 	}
182 
183 	*result = (int)ret;
184 
185 	return (0);
186 }
187 
188 /*
189  * The main function.
190  */
191 int
192 main(int argc, char *argv[])
193 {
194 	const char *opt_string = "t:o:k:hf:l:c:s:";
195 	struct option const longopts[] = {
196 		{"interval", required_argument, NULL, 't'},
197 		{"output-log-file", required_argument, NULL, 'o'},
198 		{"kernel-log-level", required_argument, NULL, 'k'},
199 		{"help", no_argument, NULL, 'h'},
200 		{"feature", required_argument, NULL, 'f'},
201 		{"log-period", required_argument, NULL, 'l'},
202 		{"config", required_argument, NULL, 'c'},
203 		{"select", required_argument, NULL, 's'},
204 		{NULL, 0, NULL, 0}
205 	};
206 
207 	int optc;
208 	int longind = 0;
209 	int running = 1;
210 	int unknown_option = FALSE;
211 	int refresh_interval = 5;
212 	int klog_level = 0;
213 	int log_interval = 0;
214 	long long last_logged = 0;
215 	char *token = NULL;
216 	int retval = 0;
217 	int gpipe;
218 	int err;
219 	uint64_t collect_end;
220 	uint64_t current_time;
221 	uint64_t delta_time;
222 	char logfile[PATH_MAX] = "";
223 	int select_id;
224 	int select_value;
225 	char *select_str;
226 	boolean_t no_dtrace_cleanup = B_TRUE;
227 
228 	lt_gpipe_init();
229 	(void) signal(SIGINT, signal_handler);
230 	(void) signal(SIGTERM, signal_handler);
231 
232 	/* Default global settings */
233 	g_config.lt_cfg_enable_filter = 0;
234 	g_config.lt_cfg_trace_sched = 0;
235 	g_config.lt_cfg_trace_syncobj = 1;
236 	g_config.lt_cfg_low_overhead_mode = 0;
237 	g_config.lt_cfg_trace_pid = 0;
238 	g_config.lt_cfg_trace_pgid = 0;
239 	/* dtrace snapshot every 1 second */
240 	g_config.lt_cfg_snap_interval = 1000;
241 #ifdef EMBED_CONFIGS
242 	g_config.lt_cfg_config_name = NULL;
243 #else
244 	g_config.lt_cfg_config_name = lt_strdup(DEFAULT_CONFIG_NAME);
245 #endif
246 
247 	/* Parse command line arguments. */
248 	while ((optc = getopt_long(argc, argv, opt_string,
249 	    longopts, &longind)) != -1) {
250 		switch (optc) {
251 		case 'h':
252 			print_usage(argv[0], TRUE);
253 			goto end_none;
254 		case 't':
255 			if (to_int(optarg, &refresh_interval) != 0 ||
256 			    refresh_interval < 1 || refresh_interval > 60) {
257 				lt_display_error(
258 				    "Invalid refresh interval: %s\n", optarg);
259 				unknown_option = TRUE;
260 			} else if (check_opt_dup(LT_CMDOPT_INTERVAL,
261 			    refresh_interval)) {
262 				unknown_option = TRUE;
263 			}
264 
265 			break;
266 		case 'k':
267 			if (to_int(optarg, &klog_level) != 0 ||
268 			    lt_klog_set_log_level(klog_level) != 0) {
269 				lt_display_error(
270 				    "Invalid log level: %s\n", optarg);
271 				unknown_option = TRUE;
272 			} else if (check_opt_dup(LT_CMDOPT_LOG_LEVEL,
273 			    refresh_interval)) {
274 				unknown_option = TRUE;
275 			}
276 
277 			break;
278 		case 'o':
279 			if (check_opt_dup(LT_CMDOPT_LOG_FILE, optind)) {
280 				unknown_option = TRUE;
281 			} else if (strlen(optarg) >= sizeof (logfile)) {
282 				lt_display_error(
283 				    "Log file name is too long: %s\n",
284 				    optarg);
285 				unknown_option = TRUE;
286 			} else {
287 				(void) strncpy(logfile, optarg,
288 				    sizeof (logfile));
289 			}
290 
291 			break;
292 		case 'f':
293 			for (token = strtok(optarg, ","); token != NULL;
294 			    token = strtok(NULL, ",")) {
295 				int v = TRUE;
296 
297 				if (strncmp(token, "no", 2) == 0) {
298 					v = FALSE;
299 					token = &token[2];
300 				}
301 
302 				if (CMPOPT(token, "filter") == 0) {
303 					if (check_opt_dup(LT_CMDOPT_F_FILTER,
304 					    v)) {
305 						unknown_option = TRUE;
306 					} else {
307 						g_config.lt_cfg_enable_filter
308 						    = v;
309 					}
310 				} else if (CMPOPT(token, "sched") == 0) {
311 					if (check_opt_dup(LT_CMDOPT_F_SCHED,
312 					    v)) {
313 						unknown_option = TRUE;
314 					} else {
315 						g_config.lt_cfg_trace_sched
316 						    = v;
317 					}
318 				} else if (CMPOPT(token, "sobj") == 0) {
319 					if (check_opt_dup(LT_CMDOPT_F_SOBJ,
320 					    v)) {
321 						unknown_option = TRUE;
322 					} else {
323 						g_config.lt_cfg_trace_syncobj
324 						    = v;
325 					}
326 				} else if (CMPOPT(token, "low") == 0) {
327 					if (check_opt_dup(LT_CMDOPT_F_LOW,
328 					    v)) {
329 						unknown_option = TRUE;
330 					} else {
331 						g_config.
332 						    lt_cfg_low_overhead_mode
333 						    = v;
334 					}
335 				} else {
336 					lt_display_error(
337 					    "Unknown feature: %s\n", token);
338 					unknown_option = TRUE;
339 				}
340 			}
341 
342 			break;
343 		case 'l':
344 			if (to_int(optarg, &log_interval) != 0 ||
345 			    log_interval < 60) {
346 				lt_display_error(
347 				    "Invalid log interval: %s\n", optarg);
348 				unknown_option = TRUE;
349 			} else if (check_opt_dup(LT_CMDOPT_LOG_INTERVAL,
350 			    log_interval)) {
351 				unknown_option = TRUE;
352 			}
353 
354 			break;
355 		case 'c':
356 			if (strlen(optarg) >= PATH_MAX) {
357 				lt_display_error(
358 				    "Configuration name is too long.\n");
359 				unknown_option = TRUE;
360 			} else if (check_opt_dup(LT_CMDOPT_CONFIG_FILE,
361 			    optind)) {
362 				unknown_option = TRUE;
363 			} else {
364 				g_config.lt_cfg_config_name =
365 				    lt_strdup(optarg);
366 			}
367 
368 			break;
369 		case 's':
370 			if (strncmp(optarg, "pid=", 4) == 0) {
371 				select_id = 0;
372 				select_str = &optarg[4];
373 			} else if (strncmp(optarg, "pgid=", 5) == 0) {
374 				select_id = 1;
375 				select_str = &optarg[5];
376 			} else {
377 				lt_display_error(
378 				    "Invalid select option: %s\n", optarg);
379 				unknown_option = TRUE;
380 				break;
381 			}
382 
383 			if (to_int(select_str, &select_value) != 0) {
384 				lt_display_error(
385 				    "Invalid select option: %s\n", optarg);
386 				unknown_option = TRUE;
387 				break;
388 			}
389 
390 			if (select_value <= 0) {
391 				lt_display_error(
392 				    "Process/process group ID must be "
393 				    "greater than 0: %s\n", optarg);
394 				unknown_option = TRUE;
395 				break;
396 			}
397 
398 			if (check_opt_dup(LT_CMDOPT_SELECT,
399 			    (((uint64_t)select_id) << 32) | select_value)) {
400 				unknown_option = TRUE;
401 				break;
402 			}
403 
404 			if (select_id == 0) {
405 				g_config.lt_cfg_trace_pid = select_value;
406 			} else {
407 				g_config.lt_cfg_trace_pgid = select_value;
408 			}
409 			break;
410 		default:
411 			unknown_option = TRUE;
412 			break;
413 		}
414 	}
415 
416 	if (!unknown_option && strlen(logfile) > 0) {
417 		err = lt_klog_set_log_file(logfile);
418 
419 		if (err == -1) {
420 			lt_display_error("Log file name is too long: %s\n",
421 			    logfile);
422 			unknown_option = TRUE;
423 		} else if (err == -2) {
424 			lt_display_error("Cannot write to log file: %s\n",
425 			    logfile);
426 			unknown_option = TRUE;
427 		}
428 	}
429 
430 	/* Throw error for invalid/junk arguments */
431 	if (optind  < argc) {
432 		int tmpind = optind;
433 		(void) fprintf(stderr, "Unknown option(s): ");
434 
435 		while (tmpind < argc) {
436 			(void) fprintf(stderr, "%s ", argv[tmpind++]);
437 		}
438 
439 		(void) fprintf(stderr, "\n");
440 		unknown_option = TRUE;
441 	}
442 
443 	if (unknown_option) {
444 		print_usage(argv[0], FALSE);
445 		retval = 1;
446 		goto end_none;
447 	}
448 
449 	(void) printf("%s\n%s\n", TITLE, COPYRIGHT);
450 
451 	/*
452 	 * Initialization
453 	 */
454 	lt_klog_init();
455 
456 	if (lt_table_init() != 0) {
457 		lt_display_error("Unable to load configuration table.\n");
458 		retval = 1;
459 		goto end_notable;
460 	}
461 
462 	if (lt_dtrace_init() != 0) {
463 		lt_display_error("Unable to initialize dtrace.\n");
464 		retval = 1;
465 		goto end_nodtrace;
466 	}
467 
468 	last_logged = lt_millisecond();
469 
470 	(void) printf("Collecting data for %d seconds...\n",
471 	    refresh_interval);
472 
473 	gpipe = lt_gpipe_readfd();
474 	collect_end = last_logged + refresh_interval * 1000;
475 	for (;;) {
476 		fd_set read_fd;
477 		struct timeval timeout;
478 		int tsleep = collect_end - lt_millisecond();
479 
480 		if (tsleep <= 0) {
481 			break;
482 		}
483 
484 		/*
485 		 * Interval when we call dtrace_status() and collect
486 		 * aggregated data.
487 		 */
488 		if (tsleep > g_config.lt_cfg_snap_interval) {
489 			tsleep = g_config.lt_cfg_snap_interval;
490 		}
491 
492 		timeout.tv_sec = tsleep / 1000;
493 		timeout.tv_usec = (tsleep % 1000) * 1000;
494 
495 		FD_ZERO(&read_fd);
496 		FD_SET(gpipe, &read_fd);
497 
498 		if (select(gpipe + 1, &read_fd, NULL, NULL, &timeout) > 0) {
499 			goto end_ubreak;
500 		}
501 
502 		(void) lt_dtrace_work(0);
503 	}
504 
505 	lt_display_init();
506 
507 	do {
508 		current_time = lt_millisecond();
509 
510 		lt_stat_clear_all();
511 		(void) lt_dtrace_collect();
512 
513 		delta_time = current_time;
514 		current_time = lt_millisecond();
515 		delta_time = current_time - delta_time;
516 
517 		if (log_interval > 0 &&
518 		    current_time - last_logged > log_interval * 1000) {
519 			lt_klog_write();
520 			last_logged = current_time;
521 		}
522 
523 		running = lt_display_loop(refresh_interval * 1000 -
524 		    delta_time);
525 
526 		/*
527 		 * This is to avoid dynamic variable drop
528 		 * in DTrace.
529 		 */
530 		if (lt_drop_detected == B_TRUE) {
531 			if (lt_dtrace_deinit() != 0) {
532 				no_dtrace_cleanup = B_FALSE;
533 				retval = 1;
534 				break;
535 			}
536 
537 			lt_drop_detected = B_FALSE;
538 			if (lt_dtrace_init() != 0) {
539 				retval = 1;
540 				break;
541 			}
542 		}
543 	} while (running != 0);
544 
545 	lt_klog_write();
546 
547 	/* Cleanup */
548 	lt_display_deinit();
549 
550 end_ubreak:
551 	if (no_dtrace_cleanup == B_FALSE || lt_dtrace_deinit() != 0)
552 		retval = 1;
553 
554 	lt_stat_free_all();
555 
556 end_nodtrace:
557 	lt_table_deinit();
558 
559 end_notable:
560 	lt_klog_deinit();
561 
562 end_none:
563 	lt_gpipe_deinit();
564 
565 	if (g_config.lt_cfg_config_name != NULL) {
566 		free(g_config.lt_cfg_config_name);
567 	}
568 
569 	return (retval);
570 }
571