xref: /titanic_52/usr/src/cmd/rcm_daemon/common/rcm_main.c (revision 9b4e3ac25d882519cad3fc11f0c53b07f4e60536)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Reconfiguration Coordination Daemon
28  *
29  * Accept RCM messages in the form of RCM events and process them
30  * - to build and update the system resource map
31  * - to allow clients to register/unregister for resource
32  * - to allow dr initiators to offline a resource before removal
33  * - to call into clients to perform suspend/offline actions
34  *
35  * The goal is to enable fully automated Dynamic Reconfiguration and better
36  * DR information tracking.
37  */
38 
39 #include <librcm_event.h>
40 
41 #include "rcm_impl.h"
42 
43 /* will run in daemon mode if debug level < DEBUG_LEVEL_FORK */
44 #define	DEBUG_LEVEL_FORK	RCM_DEBUG
45 
46 #define	DAEMON_LOCK_FILE "/var/run/rcm_daemon_lock"
47 
48 static int hold_daemon_lock;
49 static int daemon_lock_fd;
50 static const char *daemon_lock_file = DAEMON_LOCK_FILE;
51 
52 int debug_level = 0;
53 static int idle_timeout;
54 static int logflag = 0;
55 static char *prog;
56 
57 static void usage(void);
58 static void catch_sighup(void);
59 static void catch_sigusr1(void);
60 static pid_t enter_daemon_lock(void);
61 static void exit_daemon_lock(void);
62 
63 extern void init_poll_thread();
64 extern void cleanup_poll_thread();
65 
66 /*
67  * Print command line syntax for starting rcm_daemon
68  */
69 static void
70 usage() {
71 	(void) fprintf(stderr,
72 	    gettext("usage: %s [-d debug_level] [-t idle_timeout]\n"), prog);
73 	rcmd_exit(EINVAL);
74 }
75 
76 /*
77  * common cleanup/exit functions to ensure releasing locks
78  */
79 static void
80 rcmd_cleanup(int status)
81 {
82 	if (status == 0) {
83 		rcm_log_message(RCM_INFO,
84 		    gettext("rcm_daemon normal exit\n"));
85 	} else {
86 		rcm_log_message(RCM_ERROR,
87 		    gettext("rcm_daemon exit: errno = %d\n"), status);
88 	}
89 
90 	if (hold_daemon_lock) {
91 		exit_daemon_lock();
92 	}
93 }
94 
95 void
96 rcmd_exit(int status)
97 {
98 	rcmd_cleanup(status);
99 	exit(status);
100 }
101 
102 /*
103  * When SIGHUP is received, reload modules at the next safe moment (when
104  * there is no DR activity.
105  */
106 void
107 catch_sighup(void)
108 {
109 	rcm_log_message(RCM_INFO,
110 	    gettext("SIGHUP received, will exit when daemon is idle\n"));
111 	rcmd_thr_signal();
112 }
113 
114 /*
115  * When SIGUSR1 is received, exit the thread
116  */
117 void
118 catch_sigusr1(void)
119 {
120 	rcm_log_message(RCM_DEBUG, "SIGUSR1 received in thread %d\n",
121 	    thr_self());
122 	cleanup_poll_thread();
123 	thr_exit(NULL);
124 }
125 
126 /*
127  * Use an advisory lock to ensure that only one daemon process is
128  * active at any point in time.
129  */
130 static pid_t
131 enter_daemon_lock(void)
132 {
133 	struct flock lock;
134 
135 	rcm_log_message(RCM_TRACE1,
136 	    "enter_daemon_lock: lock file = %s\n", daemon_lock_file);
137 
138 	daemon_lock_fd = open(daemon_lock_file, O_CREAT|O_RDWR, 0644);
139 	if (daemon_lock_fd < 0) {
140 		rcm_log_message(RCM_ERROR, gettext("open(%s) - %s\n"),
141 		    daemon_lock_file, strerror(errno));
142 		rcmd_exit(errno);
143 	}
144 
145 	lock.l_type = F_WRLCK;
146 	lock.l_whence = SEEK_SET;
147 	lock.l_start = 0;
148 	lock.l_len = 0;
149 
150 	if (fcntl(daemon_lock_fd, F_SETLK, &lock) == 0) {
151 		hold_daemon_lock = 1;
152 		return (getpid());
153 	}
154 
155 	/* failed to get lock, attempt to find lock owner */
156 	if ((errno == EAGAIN || errno == EDEADLK) &&
157 	    (fcntl(daemon_lock_fd, F_GETLK, &lock) == 0)) {
158 		return (lock.l_pid);
159 	}
160 
161 	/* die a horrible death */
162 	rcm_log_message(RCM_ERROR, gettext("lock(%s) - %s"), daemon_lock_file,
163 	    strerror(errno));
164 	exit(errno);
165 	/*NOTREACHED*/
166 }
167 
168 /*
169  * Drop the advisory daemon lock, close lock file
170  */
171 static void
172 exit_daemon_lock(void)
173 {
174 	struct flock lock;
175 
176 	lock.l_type = F_UNLCK;
177 	lock.l_whence = SEEK_SET;
178 	lock.l_start = 0;
179 	lock.l_len = 0;
180 
181 	if (fcntl(daemon_lock_fd, F_SETLK, &lock) == -1) {
182 		rcm_log_message(RCM_ERROR, gettext("unlock(%s) - %s"),
183 		    daemon_lock_file, strerror(errno));
184 	}
185 
186 	(void) close(daemon_lock_fd);
187 }
188 
189 /*PRINTFLIKE2*/
190 static void
191 rcm_log_msg_impl(int level, char *message, va_list ap)
192 {
193 	int log_level;
194 
195 	if (!logflag) {
196 		/*
197 		 * RCM_ERROR goes to stderr, others go to stdout
198 		 */
199 		FILE *out = (level <= RCM_ERROR) ? stderr : stdout;
200 		(void) vfprintf(out, message, ap);
201 		return;
202 	}
203 
204 	/*
205 	 * translate RCM_* to LOG_*
206 	 */
207 	switch (level) {
208 	case RCM_ERROR:
209 		log_level = LOG_ERR;
210 		break;
211 
212 	case RCM_WARNING:
213 		log_level = LOG_WARNING;
214 		break;
215 
216 	case RCM_NOTICE:
217 		log_level = LOG_NOTICE;
218 		break;
219 
220 	case RCM_INFO:
221 		log_level = LOG_INFO;
222 		break;
223 
224 	case RCM_DEBUG:
225 		log_level = LOG_DEBUG;
226 		break;
227 
228 	default:
229 		/*
230 		 * Don't log RCM_TRACEn messages
231 		 */
232 		return;
233 	}
234 
235 	(void) vsyslog(log_level, message, ap);
236 }
237 
238 /*
239  * print error messages to the terminal or to syslog
240  */
241 void
242 rcm_log_message(int level, char *message, ...)
243 {
244 	va_list ap;
245 
246 	if (level > debug_level) {
247 		return;
248 	}
249 
250 	va_start(ap, message);
251 	rcm_log_msg_impl(level, message, ap);
252 	va_end(ap);
253 }
254 
255 /*
256  * Print error messages to the terminal or to syslog.
257  * Same as rcm_log_message except that it does not check for
258  * level > debug_level
259  * allowing callers to override the global debug_level.
260  */
261 void
262 rcm_log_msg(int level, char *message, ...)
263 {
264 	va_list ap;
265 
266 	va_start(ap, message);
267 	rcm_log_msg_impl(level, message, ap);
268 	va_end(ap);
269 }
270 
271 /*
272  * grab daemon_lock and direct messages to syslog
273  */
274 static void
275 detachfromtty()
276 {
277 	(void) chdir("/");
278 	(void) setsid();
279 	(void) close(0);
280 	(void) close(1);
281 	(void) close(2);
282 	(void) open("/dev/null", O_RDWR, 0);
283 	(void) dup2(0, 1);
284 	(void) dup2(0, 2);
285 	openlog(prog, LOG_PID, LOG_DAEMON);
286 	logflag = 1;
287 }
288 
289 int
290 main(int argc, char **argv)
291 {
292 	int c;
293 	pid_t pid;
294 	extern char *optarg;
295 	sigset_t mask;
296 	struct sigaction act;
297 
298 	(void) setlocale(LC_ALL, "");
299 #ifndef	TEXT_DOMAIN
300 #define	TEXT_DOMAIN	"SYS_TEST"
301 #endif
302 	(void) textdomain(TEXT_DOMAIN);
303 
304 	if ((prog = strrchr(argv[0], '/')) == NULL) {
305 		prog = argv[0];
306 	} else {
307 		prog++;
308 	}
309 
310 	(void) enable_extended_FILE_stdio(-1, -1);
311 
312 	/*
313 	 * process arguments
314 	 */
315 	if (argc > 3) {
316 		usage();
317 	}
318 	while ((c = getopt(argc, argv, "d:t:")) != EOF) {
319 		switch (c) {
320 		case 'd':
321 			debug_level = atoi(optarg);
322 			break;
323 		case 't':
324 			idle_timeout = atoi(optarg);
325 			break;
326 		case '?':
327 		default:
328 			usage();
329 			/*NOTREACHED*/
330 		}
331 	}
332 
333 	/*
334 	 * Check permission
335 	 */
336 	if (getuid() != 0) {
337 		(void) fprintf(stderr, gettext("Must be root to run %s\n"),
338 		    prog);
339 		exit(EPERM);
340 	}
341 
342 	/*
343 	 * When rcm_daemon is started by a call to librcm, it inherits file
344 	 * descriptors from the DR initiator making a call. The file
345 	 * descriptors may correspond to devices that can be removed by DR.
346 	 * Since keeping them remain opened is problematic, close everything
347 	 * but stdin/stdout/stderr.
348 	 */
349 	closefrom(3);
350 
351 	/*
352 	 * When rcm_daemon is started by the caller, it will inherit the
353 	 * signal block mask.  We unblock all signals to make sure the
354 	 * signal handling will work normally.
355 	 */
356 	(void) sigfillset(&mask);
357 	(void) thr_sigsetmask(SIG_UNBLOCK, &mask, NULL);
358 
359 	/*
360 	 * block SIGUSR1, use it for killing specific threads
361 	 */
362 	(void) sigemptyset(&mask);
363 	(void) sigaddset(&mask, SIGUSR1);
364 	(void) thr_sigsetmask(SIG_BLOCK, &mask, NULL);
365 
366 	/*
367 	 * Setup signal handlers for SIGHUP and SIGUSR1
368 	 * SIGHUP - causes a "delayed" daemon exit, effectively the same
369 	 *	as a daemon restart.
370 	 * SIGUSR1 - causes a thr_exit(). Unblocked in selected threads.
371 	 */
372 	act.sa_flags = 0;
373 	act.sa_handler = catch_sighup;
374 	(void) sigaction(SIGHUP, &act, NULL);
375 	act.sa_handler = catch_sigusr1;
376 	(void) sigaction(SIGUSR1, &act, NULL);
377 
378 	/*
379 	 * ignore SIGPIPE so that the rcm daemon does not exit when it
380 	 * attempts to read or write from a pipe whose corresponding
381 	 * rcm script process exited.
382 	 */
383 	act.sa_handler = SIG_IGN;
384 	(void) sigaction(SIGPIPE, &act, NULL);
385 
386 	/*
387 	 * run in daemon mode
388 	 */
389 	if (debug_level < DEBUG_LEVEL_FORK) {
390 		if (fork()) {
391 			exit(0);
392 		}
393 		detachfromtty();
394 	}
395 
396 	/* only one daemon can run at a time */
397 	if ((pid = enter_daemon_lock()) != getpid()) {
398 		rcm_log_message(RCM_DEBUG, "%s pid %d already running\n",
399 		    prog, pid);
400 		exit(EDEADLK);
401 	}
402 
403 	rcm_log_message(RCM_TRACE1, "%s started, debug level = %d\n",
404 	    prog, debug_level);
405 
406 	/*
407 	 * Set daemon state to block RCM requests before rcm_daemon is
408 	 * fully initialized. See rcmd_thr_incr().
409 	 */
410 	rcmd_set_state(RCMD_INIT);
411 
412 	/*
413 	 * create rcm_daemon door and set permission to 0400
414 	 */
415 	if (create_event_service(RCM_SERVICE_DOOR, event_service) == -1) {
416 		rcm_log_message(RCM_ERROR,
417 		    gettext("cannot create door service: %s\n"),
418 		    strerror(errno));
419 		rcmd_exit(errno);
420 	}
421 	(void) chmod(RCM_SERVICE_DOOR, S_IRUSR);
422 
423 	init_poll_thread(); /* initialize poll thread related data */
424 
425 	/*
426 	 * Initialize database by asking modules to register.
427 	 */
428 	rcmd_db_init();
429 
430 	/*
431 	 * Initialize locking, including lock recovery in the event of
432 	 * unexpected daemon failure.
433 	 */
434 	rcmd_lock_init();
435 
436 	/*
437 	 * Start accepting normal requests
438 	 */
439 	rcmd_set_state(RCMD_NORMAL);
440 
441 	/*
442 	 * Start cleanup thread
443 	 */
444 	rcmd_db_clean();
445 
446 	/*
447 	 * Loop within daemon and return after a period of inactivity.
448 	 */
449 	rcmd_start_timer(idle_timeout);
450 
451 	rcmd_cleanup(0);
452 	return (0);
453 }
454