xref: /titanic_50/usr/src/cmd/latencytop/common/latencytop.c (revision 53089ab7c84db6fb76c16ca50076c147cda11757)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2008-2009, Intel Corporation.
23  * All Rights Reserved.
24  */
25 
26 #include <unistd.h>
27 #include <getopt.h>
28 #include <stdio.h>
29 #include <string.h>
30 #include <stdlib.h>
31 #include <limits.h>
32 #include <libgen.h>
33 #include <signal.h>
34 #include "latencytop.h"
35 
36 #define	CMPOPT(a, b)	strncmp((a), (b), sizeof (b))
37 
38 /*
39  * This variable is used to check if "dynamic variable drop" in dtrace
40  * has happened.
41  */
42 boolean_t lt_drop_detected = 0;
43 
44 lt_config_t g_config;
45 
46 typedef enum {
47 	LT_CMDOPT_INTERVAL,
48 	LT_CMDOPT_LOG_FILE,
49 	LT_CMDOPT_LOG_LEVEL,
50 	LT_CMDOPT_LOG_INTERVAL,
51 	LT_CMDOPT_CONFIG_FILE,
52 	LT_CMDOPT_F_FILTER,
53 	LT_CMDOPT_F_SCHED,
54 	LT_CMDOPT_F_SOBJ,
55 	LT_CMDOPT_F_LOW,
56 	LT_CMDOPT_SELECT,
57 	LT_CMDOPT__LAST	/* Must be the last one */
58 } lt_cmd_option_id_t;
59 
60 /*
61  * Check for duplicate command line options.
62  * Returns TRUE if duplicate options with different values are found,
63  * returns FALSE otherwise.
64  */
65 static int
66 check_opt_dup(lt_cmd_option_id_t id, uint64_t value) {
67 
68 	static int opt_set[(int)LT_CMDOPT__LAST];
69 	static uint64_t opt_val[(int)LT_CMDOPT__LAST];
70 
71 	const char *errmsg[] = {
72 		"-t is set more than once with different values.",
73 		"-o is set more than once.",
74 		"-k is set more than once with different values.",
75 		"-l is set more than once with different values.",
76 		"-c is set more than once.",
77 		"-f [no]filter is set more than once with different values.",
78 		"-f [no]sched is set more than once with different values.",
79 		"-f [no]sobj is set more than once with different values.",
80 		"-f [no]low is set more than once with different values.",
81 		"-s is set more than once with different values."
82 	};
83 
84 	g_assert(sizeof (errmsg)/sizeof (errmsg[0]) == (int)LT_CMDOPT__LAST);
85 
86 	if (!opt_set[(int)id]) {
87 		opt_set[(int)id] = TRUE;
88 		opt_val[(int)id] = value;
89 		return (FALSE);
90 	}
91 
92 	if (opt_val[(int)id] != value) {
93 		(void) fprintf(stderr, "%s\n", errmsg[(int)id]);
94 		return (TRUE);
95 	}
96 
97 	return (FALSE);
98 }
99 
100 /*
101  * Print command-line help message.
102  */
103 static void
104 print_usage(const char *execname, int long_help)
105 {
106 	char buffer[PATH_MAX];
107 	(void) snprintf(buffer, sizeof (buffer), "%s", execname);
108 
109 	if (!long_help) {
110 		/* Print short help to stderr. */
111 		(void) fprintf(stderr, "Usage: %s [option(s)], ",
112 		    basename(buffer));
113 		(void) fprintf(stderr, "use '%s -h' for details.\n",
114 		    basename(buffer));
115 		return;
116 	}
117 
118 	(void) printf("Usage: %s [option(s)]\n", basename(buffer));
119 	(void) printf("Options:\n"
120 	    "    -h, --help\n"
121 	    "        Print this help.\n"
122 	    "    -t, --interval TIME\n"
123 	    "        Set refresh interval to TIME. "
124 	    "Valid range [1...60] seconds, default = 5\n"
125 	/*
126 	 * Option "-c, --config FILE" is not user-visible for now.
127 	 * When we have chance to properly document the format of translation
128 	 * rules, we'll make it user-visible.
129 	 */
130 	    "    -o, --output-log-file FILE\n"
131 	    "        Output kernel log to FILE. Default = "
132 	    DEFAULT_KLOG_FILE "\n"
133 	    "    -k, --kernel-log-level LEVEL\n"
134 	    "        Set kernel log level to LEVEL.\n"
135 	    "        0(default) = None, 1 = Unmapped, 2 = Mapped, 3 = All.\n"
136 	    "    -f, --feature [no]feature1,[no]feature2,...\n"
137 	    "        Enable/disable features in LatencyTOP.\n"
138 	    "        [no]filter:\n"
139 	    "        Filter large interruptible latencies, e.g. sleep.\n"
140 	    "        [no]sched:\n"
141 	    "        Monitors sched (PID=0).\n"
142 	    "        [no]sobj:\n"
143 	    "        Monitors synchronization objects.\n"
144 	    "        [no]low:\n"
145 	    "        Lower overhead by sampling small latencies.\n"
146 	    "    -l, --log-period TIME\n"
147 	    "        Write and restart log every TIME seconds, TIME >= 60\n"
148 	    "    -s --select [ pid=<pid> | pgid=<pgid> ]\n"
149 	    "        Monitor only the given process or processes in the "
150 	    "given process group.\n");
151 }
152 
153 /*
154  * Properly exit latencytop when it receives SIGINT or SIGTERM.
155  */
156 /* ARGSUSED */
157 static void
158 signal_handler(int sig)
159 {
160 	lt_gpipe_break("q");
161 }
162 
163 /*
164  * Convert string to integer. It returns error if extra characters are found.
165  */
166 static int
167 to_int(const char *str, int *result)
168 {
169 	char *tail = NULL;
170 	long ret;
171 
172 	if (str == NULL || result == NULL) {
173 		return (-1);
174 	}
175 
176 	ret = strtol(str, &tail, 10);
177 
178 	if (tail != NULL && *tail != '\0') {
179 		return (-1);
180 	}
181 
182 	*result = (int)ret;
183 
184 	return (0);
185 }
186 
187 /*
188  * The main function.
189  */
190 int
191 main(int argc, char *argv[])
192 {
193 	const char *opt_string = "t:o:k:hf:l:c:s:";
194 	struct option const longopts[] = {
195 		{"interval", required_argument, NULL, 't'},
196 		{"output-log-file", required_argument, NULL, 'o'},
197 		{"kernel-log-level", required_argument, NULL, 'k'},
198 		{"help", no_argument, NULL, 'h'},
199 		{"feature", required_argument, NULL, 'f'},
200 		{"log-period", required_argument, NULL, 'l'},
201 		{"config", required_argument, NULL, 'c'},
202 		{"select", required_argument, NULL, 's'},
203 		{NULL, 0, NULL, 0}
204 	};
205 
206 	int optc;
207 	int longind = 0;
208 	int running = 1;
209 	int unknown_option = FALSE;
210 	int refresh_interval = 5;
211 	int klog_level = 0;
212 	int log_interval = 0;
213 	long long last_logged = 0;
214 	char *token = NULL;
215 	int retval = 0;
216 	int gpipe;
217 	int err;
218 	uint64_t collect_end;
219 	uint64_t current_time;
220 	uint64_t delta_time;
221 	char logfile[PATH_MAX] = "";
222 	int select_id;
223 	int select_value;
224 	char *select_str;
225 	boolean_t no_dtrace_cleanup = B_TRUE;
226 
227 	lt_gpipe_init();
228 	(void) signal(SIGINT, signal_handler);
229 	(void) signal(SIGTERM, signal_handler);
230 
231 	/* Default global settings */
232 	g_config.lt_cfg_enable_filter = 0;
233 	g_config.lt_cfg_trace_sched = 0;
234 	g_config.lt_cfg_trace_syncobj = 1;
235 	g_config.lt_cfg_low_overhead_mode = 0;
236 	g_config.lt_cfg_trace_pid = 0;
237 	g_config.lt_cfg_trace_pgid = 0;
238 	/* dtrace snapshot every 1 second */
239 	g_config.lt_cfg_snap_interval = 1000;
240 #ifdef EMBED_CONFIGS
241 	g_config.lt_cfg_config_name = NULL;
242 #else
243 	g_config.lt_cfg_config_name = lt_strdup(DEFAULT_CONFIG_NAME);
244 #endif
245 
246 	/* Parse command line arguments. */
247 	while ((optc = getopt_long(argc, argv, opt_string,
248 	    longopts, &longind)) != -1) {
249 		switch (optc) {
250 		case 'h':
251 			print_usage(argv[0], TRUE);
252 			goto end_none;
253 		case 't':
254 			if (to_int(optarg, &refresh_interval) != 0 ||
255 			    refresh_interval < 1 || refresh_interval > 60) {
256 				lt_display_error(
257 				    "Invalid refresh interval: %s\n", optarg);
258 				unknown_option = TRUE;
259 			} else if (check_opt_dup(LT_CMDOPT_INTERVAL,
260 			    refresh_interval)) {
261 				unknown_option = TRUE;
262 			}
263 
264 			break;
265 		case 'k':
266 			if (to_int(optarg, &klog_level) != 0 ||
267 			    lt_klog_set_log_level(klog_level) != 0) {
268 				lt_display_error(
269 				    "Invalid log level: %s\n", optarg);
270 				unknown_option = TRUE;
271 			} else if (check_opt_dup(LT_CMDOPT_LOG_LEVEL,
272 			    refresh_interval)) {
273 				unknown_option = TRUE;
274 			}
275 
276 			break;
277 		case 'o':
278 			if (check_opt_dup(LT_CMDOPT_LOG_FILE, optind)) {
279 				unknown_option = TRUE;
280 			} else if (strlen(optarg) >= sizeof (logfile)) {
281 				lt_display_error(
282 				    "Log file name is too long: %s\n",
283 				    optarg);
284 				unknown_option = TRUE;
285 			} else {
286 				(void) strncpy(logfile, optarg,
287 				    sizeof (logfile));
288 			}
289 
290 			break;
291 		case 'f':
292 			for (token = strtok(optarg, ","); token != NULL;
293 			    token = strtok(NULL, ",")) {
294 				int v = TRUE;
295 
296 				if (strncmp(token, "no", 2) == 0) {
297 					v = FALSE;
298 					token = &token[2];
299 				}
300 
301 				if (CMPOPT(token, "filter") == 0) {
302 					if (check_opt_dup(LT_CMDOPT_F_FILTER,
303 					    v)) {
304 						unknown_option = TRUE;
305 					} else {
306 						g_config.lt_cfg_enable_filter
307 						    = v;
308 					}
309 				} else if (CMPOPT(token, "sched") == 0) {
310 					if (check_opt_dup(LT_CMDOPT_F_SCHED,
311 					    v)) {
312 						unknown_option = TRUE;
313 					} else {
314 						g_config.lt_cfg_trace_sched
315 						    = v;
316 					}
317 				} else if (CMPOPT(token, "sobj") == 0) {
318 					if (check_opt_dup(LT_CMDOPT_F_SOBJ,
319 					    v)) {
320 						unknown_option = TRUE;
321 					} else {
322 						g_config.lt_cfg_trace_syncobj
323 						    = v;
324 					}
325 				} else if (CMPOPT(token, "low") == 0) {
326 					if (check_opt_dup(LT_CMDOPT_F_LOW,
327 					    v)) {
328 						unknown_option = TRUE;
329 					} else {
330 						g_config.
331 						    lt_cfg_low_overhead_mode
332 						    = v;
333 					}
334 				} else {
335 					lt_display_error(
336 					    "Unknown feature: %s\n", token);
337 					unknown_option = TRUE;
338 				}
339 			}
340 
341 			break;
342 		case 'l':
343 			if (to_int(optarg, &log_interval) != 0 ||
344 			    log_interval < 60) {
345 				lt_display_error(
346 				    "Invalid log interval: %s\n", optarg);
347 				unknown_option = TRUE;
348 			} else if (check_opt_dup(LT_CMDOPT_LOG_INTERVAL,
349 			    log_interval)) {
350 				unknown_option = TRUE;
351 			}
352 
353 			break;
354 		case 'c':
355 			if (strlen(optarg) >= PATH_MAX) {
356 				lt_display_error(
357 				    "Configuration name is too long.\n");
358 				unknown_option = TRUE;
359 			} else if (check_opt_dup(LT_CMDOPT_CONFIG_FILE,
360 			    optind)) {
361 				unknown_option = TRUE;
362 			} else {
363 				g_config.lt_cfg_config_name =
364 				    lt_strdup(optarg);
365 			}
366 
367 			break;
368 		case 's':
369 			if (strncmp(optarg, "pid=", 4) == 0) {
370 				select_id = 0;
371 				select_str = &optarg[4];
372 			} else if (strncmp(optarg, "pgid=", 5) == 0) {
373 				select_id = 1;
374 				select_str = &optarg[5];
375 			} else {
376 				lt_display_error(
377 				    "Invalid select option: %s\n", optarg);
378 				unknown_option = TRUE;
379 				break;
380 			}
381 
382 			if (to_int(select_str, &select_value) != 0) {
383 				lt_display_error(
384 				    "Invalid select option: %s\n", optarg);
385 				unknown_option = TRUE;
386 				break;
387 			}
388 
389 			if (select_value <= 0) {
390 				lt_display_error(
391 				    "Process/process group ID must be "
392 				    "greater than 0: %s\n", optarg);
393 				unknown_option = TRUE;
394 				break;
395 			}
396 
397 			if (check_opt_dup(LT_CMDOPT_SELECT,
398 			    (((uint64_t)select_id) << 32) | select_value)) {
399 				unknown_option = TRUE;
400 				break;
401 			}
402 
403 			if (select_id == 0) {
404 				g_config.lt_cfg_trace_pid = select_value;
405 			} else {
406 				g_config.lt_cfg_trace_pgid = select_value;
407 			}
408 			break;
409 		default:
410 			unknown_option = TRUE;
411 			break;
412 		}
413 	}
414 
415 	if (!unknown_option && strlen(logfile) > 0) {
416 		err = lt_klog_set_log_file(logfile);
417 
418 		if (err == -1) {
419 			lt_display_error("Log file name is too long: %s\n",
420 			    logfile);
421 			unknown_option = TRUE;
422 		} else if (err == -2) {
423 			lt_display_error("Cannot write to log file: %s\n",
424 			    logfile);
425 			unknown_option = TRUE;
426 		}
427 	}
428 
429 	/* Throw error for invalid/junk arguments */
430 	if (optind  < argc) {
431 		int tmpind = optind;
432 		(void) fprintf(stderr, "Unknown option(s): ");
433 
434 		while (tmpind < argc) {
435 			(void) fprintf(stderr, "%s ", argv[tmpind++]);
436 		}
437 
438 		(void) fprintf(stderr, "\n");
439 		unknown_option = TRUE;
440 	}
441 
442 	if (unknown_option) {
443 		print_usage(argv[0], FALSE);
444 		retval = 1;
445 		goto end_none;
446 	}
447 
448 	(void) printf("%s\n%s\n", TITLE, COPYRIGHT);
449 
450 	/*
451 	 * Initialization
452 	 */
453 	lt_klog_init();
454 
455 	if (lt_table_init() != 0) {
456 		lt_display_error("Unable to load configuration table.\n");
457 		retval = 1;
458 		goto end_notable;
459 	}
460 
461 	if (lt_dtrace_init() != 0) {
462 		lt_display_error("Unable to initialize dtrace.\n");
463 		retval = 1;
464 		goto end_nodtrace;
465 	}
466 
467 	last_logged = lt_millisecond();
468 
469 	(void) printf("Collecting data for %d seconds...\n",
470 	    refresh_interval);
471 
472 	gpipe = lt_gpipe_readfd();
473 	collect_end = last_logged + refresh_interval * 1000;
474 	for (;;) {
475 		fd_set read_fd;
476 		struct timeval timeout;
477 		int tsleep = collect_end - lt_millisecond();
478 
479 		if (tsleep <= 0) {
480 			break;
481 		}
482 
483 		/*
484 		 * Interval when we call dtrace_status() and collect
485 		 * aggregated data.
486 		 */
487 		if (tsleep > g_config.lt_cfg_snap_interval) {
488 			tsleep = g_config.lt_cfg_snap_interval;
489 		}
490 
491 		timeout.tv_sec = tsleep / 1000;
492 		timeout.tv_usec = (tsleep % 1000) * 1000;
493 
494 		FD_ZERO(&read_fd);
495 		FD_SET(gpipe, &read_fd);
496 
497 		if (select(gpipe + 1, &read_fd, NULL, NULL, &timeout) > 0) {
498 			goto end_ubreak;
499 		}
500 
501 		(void) lt_dtrace_work(0);
502 	}
503 
504 	lt_display_init();
505 
506 	do {
507 		current_time = lt_millisecond();
508 
509 		lt_stat_clear_all();
510 		(void) lt_dtrace_collect();
511 
512 		delta_time = current_time;
513 		current_time = lt_millisecond();
514 		delta_time = current_time - delta_time;
515 
516 		if (log_interval > 0 &&
517 		    current_time - last_logged > log_interval * 1000) {
518 			lt_klog_write();
519 			last_logged = current_time;
520 		}
521 
522 		running = lt_display_loop(refresh_interval * 1000 -
523 		    delta_time);
524 
525 		/*
526 		 * This is to avoid dynamic variable drop
527 		 * in DTrace.
528 		 */
529 		if (lt_drop_detected == B_TRUE) {
530 			if (lt_dtrace_deinit() != 0) {
531 				no_dtrace_cleanup = B_FALSE;
532 				retval = 1;
533 				break;
534 			}
535 
536 			lt_drop_detected = B_FALSE;
537 			if (lt_dtrace_init() != 0) {
538 				retval = 1;
539 				break;
540 			}
541 		}
542 	} while (running != 0);
543 
544 	lt_klog_write();
545 
546 	/* Cleanup */
547 	lt_display_deinit();
548 
549 end_ubreak:
550 	if (no_dtrace_cleanup == B_FALSE || lt_dtrace_deinit() != 0)
551 		retval = 1;
552 
553 	lt_stat_free_all();
554 
555 end_nodtrace:
556 	lt_table_deinit();
557 
558 end_notable:
559 	lt_klog_deinit();
560 
561 end_none:
562 	lt_gpipe_deinit();
563 
564 	if (g_config.lt_cfg_config_name != NULL) {
565 		free(g_config.lt_cfg_config_name);
566 	}
567 
568 	return (retval);
569 }
570