xref: /titanic_41/usr/src/cmd/lvm/md_monitord/md_monitord.c (revision 80148899834a4078a2bd348504aa2d6de9752837)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * probedev issues ioctls for all the metadevices
28  */
29 
30 #include "md_monitord.h"
31 #include <sdssc.h>
32 
33 extern char queue_name[];
34 boolean_e issue_ioctl = True;
35 
36 
37 #define	DEBUG_LEVEL_FORK	9	/* will run in background at all */
38 					/* levels less than DEBUG_LEVEL_FORK */
39 
40 /* function prototypes */
41 static void usage(void);
42 static void catch_sig(int);
43 static pid_t enter_daemon_lock(void);
44 static void exit_daemon_lock(void);
45 static void probe_all_devs(boolean_e, md_error_t *, boolean_e);
46 
47 #define	DAEMON_LOCK_FILE "/etc/lvm/.mdmonitord.lock"
48 
49 /*
50  * Global variable
51  */
52 mdsetname_t	*sp;
53 
54 static int hold_daemon_lock;
55 static const char *daemon_lock_file = DAEMON_LOCK_FILE;
56 static int daemon_lock_fd;
57 
58 static int		debug_level;
59 static int		logflag;
60 static char		*prog;
61 static struct itimerval	itimer;
62 static boolean_e	probe_started;	/* flag to indicate main is probing */
63 
64 static void
usage()65 usage() {
66 	(void) fprintf(stderr, gettext(
67 		"usage: mdmonitord [-d <debug_level>] [-t poll time]\n"
68 		    "higher debug levels get progressively more"
69 		    "detailed debug information.\n\n"
70 		    "mdmonitord will run in background if run"
71 		    "with a debug_level less than %d.\n"), DEBUG_LEVEL_FORK);
72 	exit(-1);
73 }
74 
75 
76 /* common exit function which ensures releasing locks */
77 void
monitord_exit(int status)78 monitord_exit(int status)
79 {
80 	monitord_print(1, gettext("exit status = %d\n"), status);
81 
82 	monitord_print(8, "hold_daemon_lock %d\n", hold_daemon_lock);
83 	if (hold_daemon_lock) {
84 		exit_daemon_lock();
85 	}
86 	md_exit(sp, status);
87 }
88 
89 
90 /*
91  * When SIGHUP is received, reload modules?
92  */
93 void
catch_sig(int sig)94 catch_sig(int sig)
95 {
96 	boolean_e startup = False;
97 	md_error_t status = mdnullerror;
98 	boolean_e sig_verbose = True;
99 
100 	if (sig == SIGALRM) {
101 		monitord_print(6, gettext("SIGALRM processing"));
102 		if (probe_started == True) {
103 			monitord_print(6, gettext(
104 			    " probe_started returning\n"));
105 			return;
106 		}
107 		monitord_print(6, gettext(
108 		    " starting probe from signal handler\n"));
109 		probe_all_devs(startup, &status, sig_verbose);
110 		(void) setitimer(ITIMER_REAL, &itimer, NULL);
111 	}
112 	if (sig == SIGHUP)
113 		monitord_exit(sig);
114 }
115 
116 /*
117  * Use an advisory lock to ensure that only one daemon process is
118  * active at any point in time.
119  */
120 static pid_t
check_daemon_lock(void)121 check_daemon_lock(void)
122 {
123 	struct flock	lock;
124 
125 	monitord_print(1, gettext("check_daemon_lock: lock file = %s\n"),
126 	    daemon_lock_file);
127 
128 	daemon_lock_fd = open(daemon_lock_file, O_CREAT|O_RDWR, 0644);
129 	if (daemon_lock_fd < 0) {
130 		monitord_print(0, "open(%s) - %s\n", daemon_lock_file,
131 		    strerror(errno));
132 		monitord_exit(-1);
133 	}
134 
135 	lock.l_type = F_WRLCK;
136 	lock.l_whence = SEEK_SET;
137 	lock.l_start = 0;
138 	lock.l_len = 0;
139 
140 	if (fcntl(daemon_lock_fd, F_GETLK, &lock) == -1) {
141 		monitord_print(0, "lock(%s) - %s", daemon_lock_file,
142 		    strerror(errno));
143 		monitord_exit(-1);
144 	}
145 
146 	return (lock.l_type == F_UNLCK ? 0 : lock.l_pid);
147 }
148 
149 static pid_t
enter_daemon_lock(void)150 enter_daemon_lock(void)
151 {
152 	struct flock	lock;
153 
154 	monitord_print(1, gettext(
155 	    "enter_daemon_lock: lock file = %s\n"), daemon_lock_file);
156 
157 	daemon_lock_fd = open(daemon_lock_file, O_CREAT|O_RDWR, 0644);
158 	if (daemon_lock_fd < 0) {
159 		monitord_print(0, "open(%s) - %s\n",
160 		    daemon_lock_file, strerror(errno));
161 		monitord_exit(-1);
162 	}
163 
164 	lock.l_type = F_WRLCK;
165 	lock.l_whence = SEEK_SET;
166 	lock.l_start = 0;
167 	lock.l_len = 0;
168 
169 	if (fcntl(daemon_lock_fd, F_SETLK, &lock) == -1) {
170 
171 		if (errno == EAGAIN || errno == EDEADLK) {
172 
173 			if (fcntl(daemon_lock_fd, F_GETLK, &lock) == -1) {
174 				monitord_print(0, "lock(%s) - %s",
175 				    daemon_lock_file, strerror(errno));
176 				monitord_exit(-1);
177 			}
178 
179 			return (lock.l_pid);
180 		}
181 	}
182 	hold_daemon_lock = 1;
183 
184 	return (0);
185 }
186 
187 /*
188  * Drop the advisory daemon lock, close lock file
189  */
190 static void
exit_daemon_lock(void)191 exit_daemon_lock(void)
192 {
193 	struct flock lock;
194 
195 	lock.l_type = F_UNLCK;
196 	lock.l_whence = SEEK_SET;
197 	lock.l_start = 0;
198 	lock.l_len = 0;
199 
200 	if (fcntl(daemon_lock_fd, F_SETLK, &lock) == -1) {
201 		monitord_print(0, "unlock(%s) - %s",
202 		    daemon_lock_file, strerror(errno));
203 	}
204 
205 	if (close(daemon_lock_fd) == -1) {
206 		monitord_print(0, "close(%s) failed - %s\n",
207 		    daemon_lock_file, strerror(errno));
208 		monitord_exit(-1);
209 	}
210 	(void) unlink(daemon_lock_file);
211 }
212 
213 
214 /*
215  * print error messages to the terminal or to syslog
216  */
217 /*PRINTFLIKE2*/
218 void
monitord_print(int level,char * message,...)219 monitord_print(int level, char *message, ...)
220 {
221 	va_list ap;
222 	static int newline = 1;
223 
224 	if (level > debug_level) {
225 		return;
226 	}
227 
228 	va_start(ap, message);
229 	if (level == 0) {
230 		if (logflag) {
231 			(void) vsyslog(LOG_ERR, message, ap);
232 		} else {
233 			(void) vfprintf(stderr, message, ap);
234 		}
235 
236 	} else {
237 		if (logflag) {
238 			(void) syslog(LOG_DEBUG, "%s[%ld]: ",
239 			    prog, getpid());
240 			(void) vsyslog(LOG_DEBUG, message, ap);
241 		} else {
242 			if (newline) {
243 				(void) fprintf(stdout, "%s[%ld]: ",
244 				    prog, getpid());
245 				(void) vfprintf(stdout, message, ap);
246 			} else {
247 				(void) vfprintf(stdout, message, ap);
248 			}
249 		}
250 	}
251 	if (message[strlen(message)-1] == '\n') {
252 		newline = 1;
253 	} else {
254 		newline = 0;
255 	}
256 	va_end(ap);
257 }
258 
259 
260 char *
int2string(intmap_t * map,int value)261 int2string(intmap_t *map, int value)
262 {
263 	const char	*name = (const char *)NULL;
264 	char		charstr[100];
265 
266 	for (; map->im_name != (const char *)NULL; map++) {
267 		if (map->im_int == value) {
268 			name = map->im_name;
269 			break;
270 		}
271 	}
272 	if (name == (const char *)NULL) {
273 		/* No match.  Convert the string to an int. */
274 		(void) sprintf(charstr, "%d", value);
275 	} else {
276 		(void) snprintf(charstr, sizeof (charstr), "%d %s",
277 		    value, name);
278 	}
279 	return (strdup(charstr));
280 }
281 
282 void
probe_all_devs(boolean_e startup,md_error_t * statusp,boolean_e verbose)283 probe_all_devs(boolean_e startup, md_error_t *statusp, boolean_e verbose)
284 {
285 	set_t		max_sets, set_idx;
286 
287 	probe_started = True;
288 	(void) set_snarf(statusp);
289 
290 	if ((max_sets = get_max_sets(statusp)) == 0) {
291 		mde_perror(statusp, gettext(
292 		    "Can't find max number of sets\n"));
293 		monitord_exit(1);
294 	}
295 
296 	/*
297 	 * We delete the FF_Q to avoid recurse errors. Yes we will lose
298 	 * some but its the corner case.
299 	 */
300 
301 	if (startup == False &&
302 	    (meta_notify_deleteq(MD_FF_Q, statusp) != 0)) {
303 		mde_perror(statusp, gettext(
304 		    "delete queue failed\n"));
305 		monitord_exit(1);
306 	}
307 
308 	for (set_idx = 0; set_idx < max_sets; set_idx++) {
309 		if ((sp = metasetnosetname(set_idx, statusp)) == NULL) {
310 			if (mdiserror(statusp, MDE_NO_SET) == 0) {
311 				/*
312 				 * done break the loop
313 				 */
314 				break;
315 			} else {
316 				mdclrerror(statusp);
317 				continue;
318 			}
319 		}
320 
321 		/* if we dont have ownership or cannot lock it continue. */
322 		if ((meta_check_ownership(sp, statusp) == NULL) &&
323 		    meta_lock(sp, TRUE, statusp))
324 			continue;
325 
326 		/* Skip if a MN set */
327 		if (meta_is_mn_set(sp, statusp)) {
328 			(void) meta_unlock(sp, statusp);
329 			continue;
330 		}
331 
332 		probe_mirror_devs(verbose);
333 		probe_raid_devs(verbose);
334 		probe_trans_devs(verbose);
335 		probe_hotspare_devs(verbose);
336 		(void) meta_unlock(sp, statusp);
337 	}
338 	if (meta_notify_createq(MD_FF_Q, 0, statusp)) {
339 		mde_perror(statusp, gettext(
340 		    "create queue failed"));
341 		monitord_exit(1);
342 	}
343 	probe_started = False;
344 	/*
345 	 * need to do it here only at startup.
346 	 * The daemon will restart the alarm.
347 	 */
348 
349 	if (startup == True)
350 		(void) setitimer(ITIMER_REAL, &itimer, NULL);
351 }
352 
353 evid_t
wait_for_event(md_error_t * statusp)354 wait_for_event(md_error_t *statusp)
355 {
356 	md_ev_t		event;
357 
358 
359 	event.setno = EV_ALLSETS;
360 	event.obj = EV_ALLOBJS;
361 
362 	do {
363 		if (meta_notify_getev(MD_FF_Q, EVFLG_WAIT, &event,
364 		    statusp) < 0) {
365 			monitord_print(8,
366 			    "meta_notify_getev: errno 0x%x\n", -errno);
367 			monitord_exit(-errno);
368 		}
369 	} while ((event.ev != EV_IOERR && event.ev != EV_ERRED &&
370 	    event.ev != EV_LASTERRED));
371 	return (event.ev);
372 }
373 
374 int
main(int argc,char ** argv)375 main(int argc, char **argv)
376 {
377 	boolean_e	startup = True;
378 	boolean_e	verbose = False;
379 	int		i;
380 	char		c;
381 	md_error_t	status = mdnullerror;
382 	struct sigaction act;
383 	sigset_t	mask;
384 	unsigned long	timerval = 0;
385 
386 	/*
387 	 * Get the locale set up before calling any other routines
388 	 * with messages to ouput.  Just in case we're not in a build
389 	 * environment, make sure that TEXT_DOMAIN gets set to
390 	 * something.
391 	 */
392 #if !defined(TEXT_DOMAIN)
393 #define	TEXT_DOMAIN "SYS_TEST"
394 #endif
395 	(void) setlocale(LC_ALL, "");
396 	(void) textdomain(TEXT_DOMAIN);
397 
398 	if (sdssc_bind_library() == SDSSC_ERROR) {
399 		(void) printf(gettext(
400 		    "%s: Interface error with libsds_sc.so\n"), argv[0]);
401 		exit(1);
402 	}
403 
404 	if (md_init(argc, argv, 0, 1, &status) != 0 ||
405 	    meta_check_root(&status) != 0) {
406 		mde_perror(&status, "");
407 		monitord_exit(1);
408 	}
409 
410 	(void) sigfillset(&mask);
411 	(void) thr_sigsetmask(SIG_BLOCK, &mask, NULL);
412 
413 	if (argc > 7) {
414 		usage();
415 	}
416 
417 	if ((prog = strrchr(argv[0], '/')) == NULL) {
418 		prog = argv[0];
419 	} else {
420 		prog++;
421 	}
422 
423 	/*
424 	 * Reset optind/opterr so that the command line arguments can be
425 	 * parsed. This is in case anything has already called getopt,
426 	 * for example sdssc_cmd_proxy which is not currently used but
427 	 * may be in the future.
428 	 */
429 	optind = 1;
430 	opterr = 1;
431 	while ((c = getopt(argc, argv, "ivd:t:")) != EOF) {
432 		switch (c) {
433 		case 'v':
434 			verbose = True;
435 			break;
436 		case 'i':
437 			issue_ioctl = True;
438 			break;
439 		case 'd':
440 			debug_level = atoi(optarg);
441 			break;
442 		case 't':
443 			timerval = atol(optarg);
444 			break;
445 		default:
446 			usage();
447 			exit(0);
448 		}
449 	}
450 
451 	if (timerval == 0) {
452 		monitord_print(8, gettext(
453 		    "operating in interrupt mode\n"));
454 	} else {
455 		itimer.it_value.tv_sec = timerval;
456 		itimer.it_interval.tv_sec = timerval;
457 		monitord_print(8, gettext(
458 		    "set value and interval %lu sec  mode\n"), timerval);
459 	}
460 	/*
461 	 * set up our signal handler for SIGALRM. The
462 	 * rest are setup by md_init.
463 	 */
464 
465 	act.sa_handler = catch_sig;
466 	(void) sigemptyset(&act.sa_mask);
467 	act.sa_flags = SA_RESTART;
468 	(void) sigaction(SIGALRM, &act, NULL);
469 	(void) sigaction(SIGHUP, &act, NULL);
470 
471 	(void) sigemptyset(&mask);
472 	(void) sigaddset(&mask, SIGALRM);
473 	(void) sigaddset(&mask, SIGHUP);
474 	(void) thr_sigsetmask(SIG_UNBLOCK, &mask, NULL);
475 
476 	/* demonize ourselves */
477 	if (debug_level < DEBUG_LEVEL_FORK) {
478 		pid_t pid;
479 
480 		if ((pid = check_daemon_lock()) != 0) {
481 			monitord_print(0, gettext(
482 			    "mdmonitord daemon pid %ld already running\n"),
483 			    pid);
484 			exit(-1);
485 		}
486 
487 		if (fork()) {
488 			exit(0);
489 		}
490 
491 		/* only one daemon can run at a time */
492 		if ((pid = enter_daemon_lock()) != 0) {
493 			monitord_print(0, gettext(
494 			    "mdmonitord daemon pid %ld already running\n"),
495 			    pid);
496 			exit(-1);
497 		}
498 
499 		(void) chdir("/");
500 
501 		(void) setsid();
502 		if (debug_level <= 1) {
503 			for (i = 0; i < 3; i++) {
504 				(void) close(i);
505 			}
506 			(void) open("/dev/null", 0);
507 			(void) dup2(0, 1);
508 			(void) dup2(0, 2);
509 			logflag = 1;
510 		}
511 	}
512 
513 	openlog("mdmonitord", LOG_PID, LOG_DAEMON);
514 
515 	monitord_print(8, gettext(
516 	    "mdmonitord started, debug level = %d\n"), debug_level);
517 
518 
519 	/* loop forever waiting for events */
520 	do {
521 		metaflushnames(1);
522 		probe_all_devs(startup, &status, verbose);
523 		startup = False; /* since we have gone through once */
524 	} while (wait_for_event(&status));
525 	return (0);
526 }
527