xref: /titanic_50/usr/src/cmd/rcm_daemon/common/rcm_main.c (revision bd335c6465ddbafe543900df4b03247bfa288eff)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * Reconfiguration Coordination Daemon
31  *
32  * Accept RCM messages in the form of RCM events and process them
33  * - to build and update the system resource map
34  * - to allow clients to register/unregister for resource
35  * - to allow dr initiators to offline a resource before removal
36  * - to call into clients to perform suspend/offline actions
37  *
38  * The goal is to enable fully automated Dynamic Reconfiguration and better
39  * DR information tracking.
40  */
41 
42 #include <librcm_event.h>
43 
44 #include "rcm_impl.h"
45 
46 /* will run in daemon mode if debug level < DEBUG_LEVEL_FORK */
47 #define	DEBUG_LEVEL_FORK	RCM_DEBUG
48 
49 #define	DAEMON_LOCK_FILE "/var/run/rcm_daemon_lock"
50 
51 static int hold_daemon_lock;
52 static int daemon_lock_fd;
53 static const char *daemon_lock_file = DAEMON_LOCK_FILE;
54 
55 int debug_level = 0;
56 static int idle_timeout;
57 static int logflag = 0;
58 static char *prog;
59 
60 static void usage(void);
61 static void catch_sighup(void);
62 static void catch_sigusr1(void);
63 static pid_t enter_daemon_lock(void);
64 static void exit_daemon_lock(void);
65 
66 extern void init_poll_thread();
67 extern void cleanup_poll_thread();
68 
69 /*
70  * Print command line syntax for starting rcm_daemon
71  */
72 static void
73 usage() {
74 	(void) fprintf(stderr,
75 	    gettext("usage: %s [-d debug_level] [-t idle_timeout]\n"), prog);
76 	rcmd_exit(EINVAL);
77 }
78 
79 /*
80  * common exit function which ensures releasing locks
81  */
82 void
83 rcmd_exit(int status)
84 {
85 	if (status == 0) {
86 		rcm_log_message(RCM_INFO,
87 		    gettext("rcm_daemon normal exit\n"));
88 	} else {
89 		rcm_log_message(RCM_ERROR,
90 		    gettext("rcm_daemon exit: errno = %d\n"), status);
91 	}
92 
93 	if (hold_daemon_lock) {
94 		exit_daemon_lock();
95 	}
96 
97 	exit(status);
98 }
99 
100 /*
101  * When SIGHUP is received, reload modules at the next safe moment (when
102  * there is no DR activity.
103  */
104 void
105 catch_sighup(void)
106 {
107 	rcm_log_message(RCM_INFO,
108 	    gettext("SIGHUP received, will exit when daemon is idle\n"));
109 	rcmd_thr_signal();
110 }
111 
112 /*
113  * When SIGUSR1 is received, exit the thread
114  */
115 void
116 catch_sigusr1(void)
117 {
118 	rcm_log_message(RCM_DEBUG, "SIGUSR1 received in thread %d\n",
119 	    thr_self());
120 	cleanup_poll_thread();
121 	thr_exit(NULL);
122 }
123 
124 /*
125  * Use an advisory lock to ensure that only one daemon process is
126  * active at any point in time.
127  */
128 static pid_t
129 enter_daemon_lock(void)
130 {
131 	struct flock lock;
132 
133 	rcm_log_message(RCM_TRACE1,
134 	    "enter_daemon_lock: lock file = %s\n", daemon_lock_file);
135 
136 	daemon_lock_fd = open(daemon_lock_file, O_CREAT|O_RDWR, 0644);
137 	if (daemon_lock_fd < 0) {
138 		rcm_log_message(RCM_ERROR, gettext("open(%s) - %s\n"),
139 		    daemon_lock_file, strerror(errno));
140 		rcmd_exit(errno);
141 	}
142 
143 	lock.l_type = F_WRLCK;
144 	lock.l_whence = SEEK_SET;
145 	lock.l_start = 0;
146 	lock.l_len = 0;
147 
148 	if (fcntl(daemon_lock_fd, F_SETLK, &lock) == 0) {
149 		hold_daemon_lock = 1;
150 		return (getpid());
151 	}
152 
153 	/* failed to get lock, attempt to find lock owner */
154 	if ((errno == EAGAIN || errno == EDEADLK) &&
155 	    (fcntl(daemon_lock_fd, F_GETLK, &lock) == 0)) {
156 		return (lock.l_pid);
157 	}
158 
159 	/* die a horrible death */
160 	rcm_log_message(RCM_ERROR, gettext("lock(%s) - %s"), daemon_lock_file,
161 	    strerror(errno));
162 	exit(errno);
163 	/*NOTREACHED*/
164 }
165 
166 /*
167  * Drop the advisory daemon lock, close lock file
168  */
169 static void
170 exit_daemon_lock(void)
171 {
172 	struct flock lock;
173 
174 	lock.l_type = F_UNLCK;
175 	lock.l_whence = SEEK_SET;
176 	lock.l_start = 0;
177 	lock.l_len = 0;
178 
179 	if (fcntl(daemon_lock_fd, F_SETLK, &lock) == -1) {
180 		rcm_log_message(RCM_ERROR, gettext("unlock(%s) - %s"),
181 		    daemon_lock_file, strerror(errno));
182 	}
183 
184 	(void) close(daemon_lock_fd);
185 }
186 
187 /*PRINTFLIKE2*/
188 static void
189 rcm_log_msg_impl(int level, char *message, va_list ap)
190 {
191 	int log_level;
192 
193 	if (!logflag) {
194 		/*
195 		 * RCM_ERROR goes to stderr, others go to stdout
196 		 */
197 		FILE *out = (level <= RCM_ERROR) ? stderr : stdout;
198 		(void) vfprintf(out, message, ap);
199 		return;
200 	}
201 
202 	/*
203 	 * translate RCM_* to LOG_*
204 	 */
205 	switch (level) {
206 	case RCM_ERROR:
207 		log_level = LOG_ERR;
208 		break;
209 
210 	case RCM_WARNING:
211 		log_level = LOG_WARNING;
212 		break;
213 
214 	case RCM_NOTICE:
215 		log_level = LOG_NOTICE;
216 		break;
217 
218 	case RCM_INFO:
219 		log_level = LOG_INFO;
220 		break;
221 
222 	case RCM_DEBUG:
223 		log_level = LOG_DEBUG;
224 		break;
225 
226 	default:
227 		/*
228 		 * Don't log RCM_TRACEn messages
229 		 */
230 		return;
231 	}
232 
233 	(void) vsyslog(log_level, message, ap);
234 }
235 
236 /*
237  * print error messages to the terminal or to syslog
238  */
239 void
240 rcm_log_message(int level, char *message, ...)
241 {
242 	va_list ap;
243 
244 	if (level > debug_level) {
245 		return;
246 	}
247 
248 	va_start(ap, message);
249 	rcm_log_msg_impl(level, message, ap);
250 	va_end(ap);
251 }
252 
253 /*
254  * Print error messages to the terminal or to syslog.
255  * Same as rcm_log_message except that it does not check for
256  * level > debug_level
257  * allowing callers to override the global debug_level.
258  */
259 void
260 rcm_log_msg(int level, char *message, ...)
261 {
262 	va_list ap;
263 
264 	va_start(ap, message);
265 	rcm_log_msg_impl(level, message, ap);
266 	va_end(ap);
267 }
268 
269 /*
270  * grab daemon_lock and direct messages to syslog
271  */
272 static void
273 detachfromtty()
274 {
275 	(void) chdir("/");
276 	(void) setsid();
277 	(void) close(0);
278 	(void) close(1);
279 	(void) close(2);
280 	(void) open("/dev/null", O_RDWR, 0);
281 	(void) dup2(0, 1);
282 	(void) dup2(0, 2);
283 	openlog(prog, LOG_PID, LOG_DAEMON);
284 	logflag = 1;
285 }
286 
287 void
288 main(int argc, char **argv)
289 {
290 	int c;
291 	pid_t pid;
292 	extern char *optarg;
293 	sigset_t mask;
294 	struct sigaction act;
295 
296 	(void) setlocale(LC_ALL, "");
297 #ifndef	TEXT_DOMAIN
298 #define	TEXT_DOMAIN	"SYS_TEST"
299 #endif
300 	(void) textdomain(TEXT_DOMAIN);
301 
302 	if ((prog = strrchr(argv[0], '/')) == NULL) {
303 		prog = argv[0];
304 	} else {
305 		prog++;
306 	}
307 
308 	/*
309 	 * process arguments
310 	 */
311 	if (argc > 3) {
312 		usage();
313 	}
314 	while ((c = getopt(argc, argv, "d:t:")) != EOF) {
315 		switch (c) {
316 		case 'd':
317 			debug_level = atoi(optarg);
318 			break;
319 		case 't':
320 			idle_timeout = atoi(optarg);
321 			break;
322 		case '?':
323 		default:
324 			usage();
325 			/*NOTREACHED*/
326 		}
327 	}
328 
329 	/*
330 	 * Check permission
331 	 */
332 	if (getuid() != 0) {
333 		(void) fprintf(stderr, gettext("Must be root to run %s\n"),
334 		    prog);
335 		exit(EPERM);
336 	}
337 
338 	/*
339 	 * When rcm_daemon is started by a call to librcm, it inherits file
340 	 * descriptors from the DR initiator making a call. The file
341 	 * descriptors may correspond to devices that can be removed by DR.
342 	 * Since keeping them remain opened is problematic, close everything
343 	 * but stdin/stdout/stderr.
344 	 */
345 	closefrom(3);
346 
347 	/*
348 	 * block SIGUSR1, use it for killing specific threads
349 	 */
350 	(void) sigemptyset(&mask);
351 	(void) sigaddset(&mask, SIGUSR1);
352 	(void) thr_sigsetmask(SIG_BLOCK, &mask, NULL);
353 
354 	/*
355 	 * Setup signal handlers for SIGHUP and SIGUSR1
356 	 * SIGHUP - causes a "delayed" daemon exit, effectively the same
357 	 *	as a daemon restart.
358 	 * SIGUSR1 - causes a thr_exit(). Unblocked in selected threads.
359 	 */
360 	act.sa_flags = 0;
361 	act.sa_handler = catch_sighup;
362 	(void) sigaction(SIGHUP, &act, NULL);
363 	act.sa_handler = catch_sigusr1;
364 	(void) sigaction(SIGUSR1, &act, NULL);
365 
366 	/*
367 	 * ignore SIGPIPE so that the rcm daemon does not exit when it
368 	 * attempts to read or write from a pipe whose corresponding
369 	 * rcm script process exited.
370 	 */
371 	act.sa_handler = SIG_IGN;
372 	(void) sigaction(SIGPIPE, &act, NULL);
373 
374 	/*
375 	 * run in daemon mode
376 	 */
377 	if (debug_level < DEBUG_LEVEL_FORK) {
378 		if (fork()) {
379 			exit(0);
380 		}
381 		detachfromtty();
382 	}
383 
384 	/* only one daemon can run at a time */
385 	if ((pid = enter_daemon_lock()) != getpid()) {
386 		rcm_log_message(RCM_DEBUG, "%s pid %d already running\n",
387 		    prog, pid);
388 		exit(EDEADLK);
389 	}
390 
391 	rcm_log_message(RCM_TRACE1, "%s started, debug level = %d\n",
392 	    prog, debug_level);
393 
394 	/*
395 	 * Set daemon state to block RCM requests before rcm_daemon is
396 	 * fully initialized. See rcmd_thr_incr().
397 	 */
398 	rcmd_set_state(RCMD_INIT);
399 
400 	/*
401 	 * create rcm_daemon door and set permission to 0400
402 	 */
403 	if (create_event_service(RCM_SERVICE_DOOR, event_service) == -1) {
404 		rcm_log_message(RCM_ERROR,
405 		    gettext("cannot create door service: %s\n"),
406 		    strerror(errno));
407 		rcmd_exit(errno);
408 	}
409 	(void) chmod(RCM_SERVICE_DOOR, S_IRUSR);
410 
411 	init_poll_thread(); /* initialize poll thread related data */
412 
413 	/*
414 	 * Initialize database by asking modules to register.
415 	 */
416 	rcmd_db_init();
417 
418 	/*
419 	 * Initialize locking, including lock recovery in the event of
420 	 * unexpected daemon failure.
421 	 */
422 	rcmd_lock_init();
423 
424 	/*
425 	 * Start accepting normal requests
426 	 */
427 	rcmd_set_state(RCMD_NORMAL);
428 
429 	/*
430 	 * Start cleanup thread
431 	 */
432 	rcmd_db_clean();
433 
434 	/*
435 	 * Loop and shutdown daemon after a period of inactivity.
436 	 */
437 	rcmd_start_timer(idle_timeout);
438 	/* NOTREACHED */
439 }
440