xref: /titanic_50/usr/src/cmd/avs/dscfglockd/dscfglockd.c (revision 6aa4fc89ec1cf2cdf7d7c3b9ec059802ac9abe65)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <signal.h>
27 #include <sys/types.h>
28 #include <sys/time.h>
29 #include <sys/socket.h>
30 #include <netinet/in.h>
31 #include <netinet/tcp.h>
32 #include <arpa/inet.h>
33 #include <netdb.h>
34 #include <fcntl.h>
35 #include <string.h>
36 #include <memory.h>
37 #include <sys/param.h>
38 #include <sys/pathconf.h>
39 #include <netdir.h>
40 #include <netconfig.h>
41 #include <sys/sockio.h>
42 #include <net/if.h>
43 #include <sys/resource.h>
44 #include <stdio.h>
45 #include <errno.h>
46 #include <assert.h>
47 #include <locale.h>
48 #include <unistd.h>
49 #include <stdlib.h>
50 #include <string.h>
51 #include <strings.h>
52 #include <sys/unistat/spcs_s.h>
53 #include <sys/unistat/spcs_s_u.h>
54 #include <sys/unistat/spcs_errors.h>
55 
56 #include <sys/nsctl/cfg.h>
57 #include <sys/nsctl/cfg_lockd.h>
58 
59 #ifdef DEBUG
60 #define	DPF(m)		if (debug) (void) fprintf m
61 #else
62 #define	DPF(m)
63 #endif
64 
65 #ifdef	TTY_MESSAGES
66 #define	CLOSE_FD	3
67 #else
68 #define	CLOSE_FD	0
69 #endif
70 
71 #define	MAX_LOCKQ	1024
72 #define	MAX_DAEMONS	1024
73 #define	MAX_LOCAL	1024
74 #define	MAX_UNLOCK	32
75 #define	MAX_TIMEOUTS	3
76 #define	TIMEOUT_SECS	5
77 
78 static char program[] = "dscfglockd";
79 static int debug;
80 static int lstate;
81 static int msgtrace;
82 static FILE *debugfile = NULL;
83 
84 struct lock_req {
85 	cfglockd_t	type;	/* read or write */
86 	pid_t	pid;		/* pid of read locker or local writer */
87 	daemonaddr_t	remote;	/* remote machine requesting write lock */
88 	int		state;	/* for write locks */
89 	int32_t		order;	/* who gets priority? */
90 } lock_queue[MAX_LOCKQ];
91 
92 struct unlock_s {
93 	pid_t	pid;		/* pid of locker */
94 	uint8_t seq;		/* seq number of last lock request */
95 } unlock_buf[MAX_UNLOCK];
96 
97 int next_req;
98 int32_t order;
99 
100 #define	lock_wanted	lock_queue[0]
101 long	ticker	= 1l;
102 
103 #define	ALIVE		0x10
104 #define	READ_LOCK	0x11
105 #define	WRITE_LOCK	0x12
106 #define	UNLOCK		0x13
107 #define	GRANTED		0x14
108 
109 int next_q;
110 
111 struct {
112 	cfglockd_t	type;
113 	int		nholders;
114 	int		state;
115 	daemonaddr_t	holder;
116 	struct lockdaemon	*remote_daemon;
117 	pid_t		holding_pid[MAX_LOCAL];
118 } the_lock;
119 
120 daemonaddr_t	thishost;
121 daemonaddr_t	localhost;
122 
123 #define	STATE_CLEAR	0
124 #define	STATE_ASKED	1
125 #define	STATE_OKAYED	2
126 #define	STATE_WANTS	3
127 #define	lockdaemon_dead(ldp)	((ticker - (ldp)->timeout) > MAX_TIMEOUTS)
128 #define	CRIT_BEGIN()	(void) sighold(SIGALRM)
129 #define	CRIT_END()	(void) sigrelse(SIGALRM)
130 
131 #define	NORMAL_UNLOCK	0
132 #define	FORCE_UNLOCK	1
133 
134 struct lockdaemon {
135 	daemonaddr_t	host;
136 	int	up;
137 	long	timeout;
138 	int	inuse;
139 	int	state;
140 	int32_t	order;
141 } daemon_list[MAX_DAEMONS];
142 
143 unsigned short	lock_port = CFG_SERVER_PORT;
144 int	lock_soc = 0;
145 int	pf_inet = PF_INET;
146 #define	dp_addr(p)	inet_ntoa(((struct sockaddr_in *)p)->sin_addr)
147 
148 #define	MAXIFS 32
149 
150 static char *
151 lockd_type(cfglockd_t type)
152 {
153 	switch (type) {
154 	case LOCK_NOTLOCKED:	return "NotLocked";
155 	case LOCK_READ:		return "Read";
156 	case LOCK_WRITE:	return "Write";
157 	case LOCK_LOCKED:	return "Locked";
158 	case LOCK_LOCKEDBY:	return "LockedBy";
159 	case LOCK_STAT:		return "Stat";
160 	case LOCK_ACK:		return "Ack";
161 	default:		return "*unknown*";
162 	}
163 }
164 
165 static char *
166 lockd_state(int state)
167 {
168 	switch (state) {
169 	case STATE_CLEAR:	return "Clear";
170 	case STATE_ASKED:	return "Asked";
171 	case STATE_OKAYED:	return "Okayed";
172 	case STATE_WANTS:	return "Wants";
173 	default:		return "*unknown*";
174 	}
175 }
176 
177 static char *
178 lockd_msg(int message)
179 {
180 	switch (message) {
181 	case ALIVE:		return "Alive";
182 	case READ_LOCK:		return "ReadLock";
183 	case WRITE_LOCK:	return "WriteLock";
184 	case UNLOCK:		return "Unlock";
185 	case GRANTED:		return "Granted";
186 	default:		return lockd_type((cfglockd_t)message);
187 	}
188 }
189 
190 /*
191  * The following is stolen from autod_nfs.c
192  */
193 static void
194 getmyaddrs(struct ifconf *ifc)
195 {
196 	int sock;
197 	int numifs;
198 	char *buf;
199 	int family;
200 
201 	ifc->ifc_buf = NULL;
202 	ifc->ifc_len = 0;
203 
204 #ifdef AF_INET6
205 	family = AF_INET6;
206 #else
207 	family = AF_INET;
208 #endif
209 	if ((sock = socket(family, SOCK_DGRAM, 0)) < 0) {
210 #ifdef DEBUG
211 		perror("getmyaddrs(): socket");
212 #endif
213 		return;
214 	}
215 
216 	if (ioctl(sock, SIOCGIFNUM, (char *)&numifs) < 0) {
217 #ifdef DEBUG
218 		perror("getmyaddrs(): SIOCGIFNUM");
219 #endif
220 		numifs = MAXIFS;
221 	}
222 
223 	buf = (char *)malloc(numifs * sizeof (struct ifreq));
224 	if (buf == NULL) {
225 #ifdef DEBUG
226 		(void) fprintf(stderr, "getmyaddrs(): malloc failed\n");
227 #endif
228 		(void) close(sock);
229 		return;
230 	}
231 
232 	ifc->ifc_buf = buf;
233 	ifc->ifc_len = numifs * sizeof (struct ifreq);
234 
235 	if (ioctl(sock, SIOCGIFCONF, (char *)ifc) < 0) {
236 #ifdef DEBUG
237 		perror("getmyaddrs(): SIOCGIFCONF");
238 #endif
239 	}
240 
241 	(void) close(sock);
242 }
243 
244 struct ifconf *ifc;
245 
246 static int
247 cmp_addr(daemonaddr_t *a, daemonaddr_t *b)
248 {
249 	int rc;
250 	rc = memcmp(&(a->sin_addr), &(b->sin_addr), sizeof (a->sin_addr));
251 	DPF((stderr, "compare %s %hu with", dp_addr(a), a->sin_port));
252 	DPF((stderr, " %s %hu = %d\n", dp_addr(b), b->sin_port, rc));
253 	return (rc);
254 }
255 
256 static int
257 addr_is_holder(int32_t order)
258 {
259 	return ((the_lock.nholders > 0) && the_lock.remote_daemon != NULL &&
260 	    (order == the_lock.remote_daemon->order));
261 }
262 
263 static int
264 islocalhost(daemonaddr_t *host)
265 {
266 	int n;
267 	struct sockaddr_in *s1, *s2;
268 	struct ifreq *ifr;
269 	int retval = 0;
270 
271 	ifr = ifc->ifc_req;
272 	n = ifc->ifc_len / sizeof (struct ifreq);
273 	s1 = host;
274 	s2 = NULL;
275 	for (; n > 0; n--, ifr++) {
276 		if (ifr->ifr_addr.sa_family != AF_INET)
277 			continue;
278 
279 		/* LINTED pointer alignment */
280 		s2 = (struct sockaddr_in *)&ifr->ifr_addr;
281 
282 		if (memcmp((char *)&s2->sin_addr,
283 		    (char *)&s1->sin_addr, sizeof (s1->sin_addr)) == 0) {
284 			retval = 1;
285 			/* it's me */
286 			break;
287 		}
288 	}
289 	return (retval);
290 }
291 
292 static void
293 send_lockmsg(int cmd, pid_t pid, daemonaddr_t *dp, uint8_t seq)
294 {
295 	struct lock_msg message_buf;
296 	int rc;
297 
298 	if (msgtrace && debugfile) {
299 		time_t t = time(0);
300 		(void) fprintf(debugfile, "%19.19s send %-9.9s to   %s\n",
301 		    ctime(&t), lockd_msg(cmd), dp_addr(dp));
302 	}
303 	DPF((stderr, "send %d to %s port %hu\n", cmd,
304 	    dp_addr(dp), dp->sin_port));
305 	message_buf.message = cmd;
306 	message_buf.pid = pid;
307 	message_buf.order = order;
308 	message_buf.seq = seq;
309 	do {
310 		rc = sendto(lock_soc, &message_buf, sizeof (message_buf), 0,
311 		    (struct sockaddr *)dp, sizeof (struct sockaddr));
312 	} while (rc == -1 && errno == EINTR);
313 	if (rc == -1)
314 		spcs_log("cfglockd", NULL, "sendto rc -1 errno %d", errno);
315 }
316 
317 /*
318  * send an alive message to all configured daemons so that they can tell
319  * us if they are holding a write lock.
320  */
321 
322 static void
323 send_aliveall()
324 {
325 	struct lockdaemon *ldp;
326 	int i;
327 	for (i = 0, ldp = daemon_list; i < MAX_DAEMONS; i++, ldp++) {
328 		if (ldp->inuse == 0)
329 			break;
330 		send_lockmsg(ALIVE, (pid_t)0, &(ldp->host), 0);
331 	}
332 }
333 
334 /* find the lock daemon structure for a give daemon address */
335 
336 static struct lockdaemon *
337 find_lockdaemon(daemonaddr_t *d)
338 {
339 	struct lockdaemon *ldp;
340 	int i;
341 	for (i = 0, ldp = daemon_list; i < MAX_DAEMONS; i++, ldp++) {
342 		if (ldp->inuse == 0)
343 			break;
344 		if (cmp_addr(&(ldp->host), d) == 0)
345 			return (ldp);
346 	}
347 	return (NULL);
348 }
349 
350 /*
351  * a messge has been received from daemon, note this and if the daemon
352  * was previously dead  and we have the write lock tell it that we do.
353  */
354 
355 static void
356 daemon_alive(daemonaddr_t *daemon, int32_t order)
357 {
358 	struct lockdaemon *ldp;
359 	int i;
360 
361 	for (i = 0, ldp = daemon_list; i < MAX_DAEMONS; i++, ldp++) {
362 		if (ldp->inuse == 0)
363 			break;
364 		if (cmp_addr(&(ldp->host), daemon) == 0) {
365 			ldp->order = order;
366 			ldp->timeout = ticker;
367 			if (ldp->up == 0) {
368 				spcs_log("cfglockd", NULL,
369 				    "daemon restarted on %s\n",
370 				    dp_addr(daemon));
371 				DPF((stderr, "daemon restarted on %s\n",
372 				    dp_addr(daemon)));
373 				ldp->up = 1;
374 				goto come_up;
375 			}
376 			return;
377 		}
378 	}
379 	/* new daemon has announced itself */
380 	if (i < MAX_DAEMONS) {
381 		DPF((stderr, "new daemon on %s\n", dp_addr(daemon)));
382 		spcs_log("cfglockd", NULL,
383 		    "new daemon on %s\n", dp_addr(daemon));
384 		ldp->host = *daemon;
385 		ldp->inuse = 1;
386 		ldp->timeout = ticker;
387 		ldp->order = order;
388 	} else {
389 		/* problem, more daemons than expected */
390 		i++;
391 	}
392 come_up:
393 	if (the_lock.type == LOCK_WRITE && the_lock.remote_daemon == NULL)
394 		send_lockmsg(WRITE_LOCK, (pid_t)0, daemon, 0);
395 }
396 
397 static void
398 delete_queue_entry(struct  lock_req *req)
399 {
400 	int i;
401 
402 	for (i = (req - lock_queue); i++ < next_req; req++)
403 		*req = *(req+1);
404 	next_req--;
405 }
406 
407 static void
408 take_lock(int ackmessage)
409 {
410 	send_lockmsg(ackmessage, (pid_t)0, &lock_wanted.remote, 0);
411 	delete_queue_entry(lock_queue);
412 }
413 
414 static void
415 check_for_write_lock()
416 {
417 	struct lockdaemon *ldp;
418 	int i;
419 	int	wait = 0;
420 
421 	DPF((stderr, "check for lock\n"));
422 	if (lock_wanted.state != STATE_ASKED)
423 		return;
424 	for (i = 0, ldp = daemon_list; i < MAX_DAEMONS; i++, ldp++) {
425 		if (ldp->inuse == 0)
426 			break;
427 		if (ldp->up && ldp->state != STATE_OKAYED) {
428 			wait = 1;
429 			break;
430 		}
431 	}
432 	if (wait == 0 && lock_wanted.type == LOCK_WRITE) {
433 		the_lock.type = LOCK_WRITE;
434 		the_lock.holding_pid[0] = lock_wanted.pid;
435 		the_lock.nholders = 1;
436 		the_lock.state = STATE_CLEAR;
437 		take_lock(LOCK_LOCKED);
438 	}
439 }
440 
441 static void
442 lock_granted(daemonaddr_t *da)
443 {
444 	struct lockdaemon *ldp;
445 
446 	if ((ldp = find_lockdaemon(da)) != NULL) {
447 		/* if we already own the lock, throw the msg away */
448 		if (the_lock.remote_daemon == NULL &&
449 		    the_lock.type == LOCK_WRITE) {
450 			return;
451 		}
452 
453 		/*
454 		 * If the current lock isn't a write lock and we're not
455 		 * asking for one
456 		 * -OR-
457 		 * The current lock is a write lock and it's not owned by us
458 		 * -THEN-
459 		 * send back an unlocked message.
460 		 */
461 		if ((the_lock.type != LOCK_WRITE &&
462 		    the_lock.state != STATE_ASKED) ||
463 		    (the_lock.type == LOCK_WRITE &&
464 		    the_lock.remote_daemon != NULL)) {
465 			send_lockmsg(UNLOCK, (pid_t)0, &(ldp->host), 0);
466 			return;
467 		}
468 		ldp->state = STATE_OKAYED;
469 	}
470 	check_for_write_lock();
471 }
472 
473 static int
474 try_lock()
475 {
476 	struct lockdaemon *ldp;
477 	int i;
478 
479 	switch (the_lock.type) {
480 	case LOCK_READ:
481 		if (lock_wanted.type == LOCK_READ) {
482 			i = the_lock.nholders++;
483 			the_lock.holding_pid[i] = lock_wanted.pid;
484 			the_lock.state = STATE_CLEAR;
485 			DPF((stderr, "increment read lockers to %d\n",
486 			    the_lock.nholders));
487 			take_lock(LOCK_LOCKED);
488 			break;
489 		}
490 		/* write lock has to wait */
491 		break;
492 	case LOCK_WRITE:
493 		/* lock has to wait until write lock is cleared */
494 		break;
495 	case LOCK_NOTLOCKED:
496 		if (lock_wanted.type == LOCK_READ) {
497 			DPF((stderr, "local locker, 1 lock holder\n"));
498 			the_lock.holding_pid[0] = lock_wanted.pid;
499 			the_lock.nholders = 1;
500 			the_lock.type = LOCK_READ;
501 			the_lock.state = STATE_CLEAR;
502 			the_lock.remote_daemon = NULL;
503 			take_lock(LOCK_LOCKED);
504 			return (1);
505 		}
506 		if (islocalhost(&lock_wanted.remote)) {
507 			DPF((stderr, "local locker, take write lock\n"));
508 			/* tell everyone I'm locking */
509 			if (lock_wanted.state != STATE_ASKED) {
510 				for (i = 0, ldp = daemon_list; i < MAX_DAEMONS;
511 				    i++, ldp++) {
512 					if (ldp->inuse == 0)
513 						break;
514 					ldp->state = STATE_ASKED;
515 					send_lockmsg(WRITE_LOCK, (pid_t)0,
516 					    &(ldp->host), 0);
517 				}
518 			}
519 			lock_wanted.state = STATE_ASKED;
520 			check_for_write_lock();
521 			the_lock.remote_daemon = NULL;
522 			the_lock.state = STATE_ASKED;
523 			return (0);
524 		} else {
525 			DPF((stderr, "remote locker, take write lock\n"));
526 			the_lock.type = LOCK_WRITE;
527 			the_lock.holder = lock_wanted.remote;
528 			the_lock.nholders = 1;
529 			the_lock.remote_daemon =
530 			    find_lockdaemon(&the_lock.holder);
531 			the_lock.state = STATE_CLEAR;
532 			/* okay to remote */
533 			take_lock(GRANTED);
534 		}
535 		break;
536 	default:
537 		DPF((stderr, "weird lock type held - %d\n", the_lock.type));
538 		the_lock.type = LOCK_NOTLOCKED;
539 		break;
540 	}
541 	return (0);
542 }
543 
544 static void
545 process_queue()
546 {
547 	if (next_req < 1)
548 		return;		/* no locks queued */
549 	while (try_lock())
550 		;
551 }
552 
553 static int
554 lock_sort(const void *a, const void *b)
555 {
556 	struct lock_req *left = (struct lock_req *)a;
557 	struct lock_req *right = (struct lock_req *)b;
558 
559 	return (left->order - right->order);
560 }
561 
562 static void
563 queue_lock(cfglockd_t type, struct lock_msg *msg, daemonaddr_t *addr)
564 {
565 	int	i;
566 	struct lock_req *lrp;
567 	struct lockdaemon *ldp;
568 
569 	/* first check if new lock matches current lock */
570 	if (the_lock.type == type && addr_is_holder(msg->order)) {
571 		/* remote daemon missed locked message */
572 		send_lockmsg(GRANTED, (pid_t)0, addr, msg->seq);
573 		return;
574 	}
575 
576 	/* next search queue to check for duplicate */
577 	for (i = 0, lrp = lock_queue; i++ < next_req; lrp++) {
578 		if (lrp->type == type && lrp->pid == msg->pid &&
579 		    cmp_addr(addr, &(lrp->remote)) == 0)
580 			return;
581 
582 	}
583 
584 	/*
585 	 * It's a new lock request.  Are we in the middle of
586 	 * obtaining one for ourselves?
587 	 */
588 
589 	if (the_lock.type == LOCK_NOTLOCKED && the_lock.state == STATE_ASKED) {
590 		/* did a higher priority request just come in? */
591 		if (msg->order < order) {
592 			/* requeue our request */
593 			the_lock.state = STATE_CLEAR;
594 			lock_wanted.state = STATE_CLEAR;
595 
596 			/* let the other lockds know */
597 			for (i = 0, ldp = daemon_list; i < MAX_DAEMONS;
598 			    i++, ldp++) {
599 				if (ldp->inuse == 0)
600 					break;
601 				if (ldp->up && ldp->state == STATE_OKAYED) {
602 					send_lockmsg(UNLOCK, (pid_t)0,
603 					    &(ldp->host), 0);
604 				}
605 			}
606 		}
607 	}
608 
609 
610 	lrp = lock_queue;
611 	lrp += (next_req++);
612 	lrp->type = type;
613 	lrp->pid = msg->pid;
614 	lrp->state = STATE_CLEAR;
615 	lrp->order = msg->order;
616 	if (addr) {
617 		lrp->remote = *addr;
618 	}
619 
620 	if (next_req > 1)
621 		qsort(lock_queue, next_req, sizeof (lock_queue[0]), lock_sort);
622 
623 	if (the_lock.type != LOCK_WRITE)
624 		process_queue();
625 }
626 
627 static void
628 lock_stat()
629 {
630 	char *lt = "Unknown";
631 	struct lockdaemon *ldp;
632 	int i;
633 
634 	switch (the_lock.type) {
635 	case LOCK_NOTLOCKED:
636 		lt = "not locked";
637 		break;
638 	case LOCK_READ:
639 		lt = "read locked";
640 		break;
641 	case LOCK_WRITE:
642 		lt = "write locked";
643 		break;
644 	}
645 	spcs_log("cfglockd", NULL, "Lock is %s (%d)", lt, the_lock.type);
646 	spcs_log("cfglockd", NULL, "There are %d holders of the lock",
647 	    the_lock.nholders);
648 	if (the_lock.nholders > 0) {
649 		for (i = 0; i < the_lock.nholders; i++)
650 			spcs_log("cfglockd", NULL, "holding_pid[%d] = %6d", i,
651 			    the_lock.holding_pid[i]);
652 	}
653 	spcs_log("cfglockd", NULL, "holder daemon was %s port %hu, remote %x",
654 	    dp_addr(&the_lock.holder), the_lock.holder.sin_port,
655 	    the_lock.remote_daemon);
656 	spcs_log("cfglockd", NULL, "Lock queue, %d requests", next_req);
657 	for (i = 0; i < next_req; i++) {
658 		spcs_log("cfglockd", NULL, "request %d type %d order %d", i,
659 		    lock_queue[i].type, lock_queue[i].order);
660 		spcs_log("cfglockd", NULL, "  client %s port %hu, pid %d",
661 		    dp_addr(&lock_queue[i].remote),
662 		    lock_queue[i].remote.sin_port, lock_queue[i].pid);
663 	}
664 	spcs_log("cfglockd", NULL, "Daemon list");
665 
666 	for (i = 0, ldp = daemon_list; i < MAX_DAEMONS; i++, ldp++) {
667 		if (ldp->inuse == 0)
668 			break;
669 		spcs_log("cfglockd", NULL, "daemon %d, %s port %hu", i,
670 		    dp_addr(&ldp->host), ldp->host.sin_port);
671 		spcs_log("cfglockd", NULL,
672 		    "  up %d timeout %ld missed %d state %d\n", ldp->up,
673 		    ldp->timeout, ticker - ldp->timeout, ldp->state);
674 	}
675 }
676 
677 static int
678 is_duplicate(cfglockd_t type, pid_t pid, uint8_t seq)
679 {
680 	struct unlock_s *bufp;
681 	int i;
682 
683 	if (!pid) {
684 		return (0);
685 	}
686 
687 	for (i = 0, bufp = unlock_buf; bufp->pid && i < MAX_UNLOCK;
688 	    i++, bufp++) {
689 		if (bufp->pid == pid && bufp->seq == seq) {
690 			/* throw message away */
691 #ifdef DEBUG
692 			spcs_log("cfglockd", NULL,
693 			    "duplicate '%d' request received from %d",
694 			    type, pid);
695 #endif
696 			return (1);
697 		}
698 	}
699 
700 	/* add it to the list */
701 	bcopy(unlock_buf, &unlock_buf[ 1 ],
702 	    sizeof (unlock_buf) - sizeof (struct unlock_s));
703 	(*unlock_buf).pid = pid;
704 	(*unlock_buf).seq = seq;
705 
706 	return (0);
707 }
708 
709 static void
710 local_lock(cfglockd_t type, struct lock_msg *msg, daemonaddr_t *client)
711 {
712 	if (is_duplicate(type, msg->pid, msg->seq)) {
713 		if (the_lock.remote_daemon == NULL &&
714 		    (the_lock.type == LOCK_WRITE ||
715 		    the_lock.type == LOCK_READ) &&
716 		    the_lock.holding_pid[0] == msg->pid) {
717 			send_lockmsg(LOCK_LOCKED, (pid_t)0, client, msg->seq);
718 		}
719 	} else {
720 		queue_lock(type, msg, client);
721 	}
722 }
723 
724 static void
725 remote_lock(struct sockaddr_in *remote, struct lock_msg *msg)
726 {
727 	/* make sure remote knows we are alive */
728 	send_lockmsg(ALIVE, (pid_t)0, remote, 0);
729 
730 	/* clear out pid as it is meaningless on this node */
731 	msg->pid = (pid_t)0;
732 
733 	queue_lock(LOCK_WRITE, msg, (daemonaddr_t *)remote);
734 }
735 
736 static void
737 unqueue_lock(daemonaddr_t *d, pid_t pid)
738 {
739 	int	i;
740 	struct lock_req *lrp, *xrp;
741 	int diff;
742 
743 	/* search queue to delete ungranted locks */
744 	for (i = 0, xrp = lrp = lock_queue; i++ < next_req; lrp++) {
745 		*xrp = *lrp;
746 		diff = 0;
747 		if (pid != (pid_t)0 && lrp->pid != pid)
748 			diff = 1;
749 		if (d != NULL && cmp_addr(d, &(lrp->remote)) != 0)
750 			diff = 1;
751 		if (!diff)
752 			continue;
753 
754 		xrp++;
755 	}
756 	next_req = xrp - lock_queue;
757 }
758 
759 static void
760 xxunlock()
761 {
762 	DPF((stderr, "** UNLOCK **\n"));
763 	the_lock.remote_daemon = NULL;
764 	the_lock.type = LOCK_NOTLOCKED;
765 	the_lock.nholders = 0;
766 	the_lock.state = STATE_CLEAR;
767 	process_queue();
768 }
769 
770 
771 static void
772 local_unlock(pid_t pid, uint8_t seq, int method)
773 {
774 	struct lockdaemon *ldp;
775 	int i;
776 
777 	if (method == NORMAL_UNLOCK && is_duplicate(LOCK_NOTLOCKED, pid, seq)) {
778 		return;
779 	}
780 
781 	if (the_lock.type == LOCK_READ) {
782 		/* delete reference to pid of reading process */
783 		for (i = 0; i < the_lock.nholders; i++) {
784 			if (the_lock.holding_pid[i] == pid) {
785 				DPF((stderr, "decrement lockers from %d\n",
786 				    the_lock.nholders));
787 				--the_lock.nholders;
788 				break;
789 			}
790 		}
791 		for (; i < the_lock.nholders; i++) {
792 			the_lock.holding_pid[i] = the_lock.holding_pid[i+1];
793 		}
794 		if (the_lock.nholders > 0)
795 			return;
796 	} else {
797 		/* LOCK_WRITE */
798 		if (pid != the_lock.holding_pid[0])
799 			return;
800 		the_lock.holding_pid[0] = (pid_t)0;
801 		for (i = 0, ldp = daemon_list; i < MAX_DAEMONS; i++, ldp++) {
802 			if (ldp->inuse == 0)
803 				break;
804 			if (ldp->up)
805 				send_lockmsg(UNLOCK, (pid_t)0, &(ldp->host), 0);
806 		}
807 	}
808 	xxunlock();
809 }
810 
811 static void
812 remote_unlock(int32_t order, daemonaddr_t *d)
813 {
814 	int	i;
815 	struct lock_req *lrp;
816 
817 	DPF((stderr, "remote unlock from %s ", dp_addr(d)));
818 	DPF((stderr, "when %s holds lock\n", dp_addr(&the_lock.holder)));
819 
820 	/* search queue to check for ungranted lock */
821 	for (i = 0, lrp = lock_queue; i++ < next_req; lrp++) {
822 		if (lrp->type == LOCK_WRITE &&
823 		    cmp_addr(d, &(lrp->remote)) == 0) {
824 			delete_queue_entry(lrp);
825 			return;
826 		}
827 
828 	}
829 	if (addr_is_holder(order)) {
830 		xxunlock();
831 	}
832 }
833 
834 static void
835 lockedby(daemonaddr_t *d, uint8_t seq)
836 {
837 	DPF((stderr, "lockby enquiry from %s ", dp_addr(d)));
838 	switch (the_lock.type) {
839 	case LOCK_NOTLOCKED:
840 		send_lockmsg(LOCK_NOTLOCKED, (pid_t)0, d, seq);
841 		break;
842 	case LOCK_READ:
843 		send_lockmsg(LOCK_READ, the_lock.holding_pid[0], d, seq);
844 		break;
845 	case LOCK_WRITE:
846 		send_lockmsg(LOCK_WRITE, the_lock.holding_pid[0], d, seq);
847 		break;
848 	}
849 }
850 
851 /* ARGSUSED */
852 static void
853 keepalive(int signo)
854 {
855 	int i;
856 	struct lock_req *locker;
857 	struct lockdaemon *ldp;
858 
859 	DPF((stderr, "keepalive...\n"));
860 	ticker++;
861 
862 	/*
863 	 * tell any other daemon that has a lock request in our queue that
864 	 * this daemon is still alive.
865 	 */
866 
867 	for (i = 0, locker = lock_queue; i < next_req; i++, locker++) {
868 		if (locker->pid == 0)	/* remote lock request */
869 			send_lockmsg(ALIVE, (pid_t)0, &(locker->remote), 0);
870 	}
871 
872 	/*
873 	 * if a remote daemon holds the lock, check it is still alive and
874 	 * if the remote daemon is sent it a grant message in case the
875 	 * remote daemon missed our original grant.
876 	 */
877 
878 	if (the_lock.remote_daemon) {
879 		if (lockdaemon_dead(the_lock.remote_daemon)) {
880 			DPF((stderr, "lock owner died\n"));
881 			the_lock.remote_daemon->up = 0;
882 			xxunlock();
883 		} else {
884 			send_lockmsg(GRANTED, (pid_t)0, &the_lock.holder, 0);
885 		}
886 	}
887 
888 	/*
889 	 * check for response from daemons preventing this daemon
890 	 * from taking a write lock by not sending a grant message.
891 	 * if the remote daemon is alive send another lock request,
892 	 * otherwise mark it as dead.
893 	 * send alive message to any live remote daemons if this
894 	 * daemon has the write lock.
895 	 */
896 	if (lstate) {
897 		(void) printf("\nlock: %s\n", lockd_type(the_lock.type));
898 		(void) printf("    no. holders: %d\n", the_lock.nholders);
899 		(void) printf("    hold addr  : %s\n", the_lock.remote_daemon?
900 		    dp_addr(the_lock.remote_daemon): "0.0.0.0");
901 		(void) printf("    holding pid:");
902 		for (i = 0; i < the_lock.nholders; i++) {
903 			(void) printf(" %ld", the_lock.holding_pid[ i ]);
904 		}
905 		(void) printf("\n");
906 	}
907 	for (i = 0, ldp = daemon_list; i < MAX_DAEMONS; i++, ldp++) {
908 		if (ldp->inuse == 0)
909 			break;
910 
911 		if (lstate) {
912 			(void) printf("%-15.15s ", dp_addr(&ldp->host));
913 			(void) printf("%-4.4s ", ldp->up? "up" : "down");
914 			(void) printf("%5ld ", ldp->timeout);
915 			(void) printf("%-10.10s ", lockd_state(ldp->state));
916 			(void) printf("%6d\n", ldp->order);
917 		}
918 
919 		if (ldp->state == STATE_ASKED) {
920 			if (lockdaemon_dead(ldp)) {
921 				ldp->up = 0;
922 				ldp->state = STATE_CLEAR;
923 				continue;
924 			}
925 			send_lockmsg(WRITE_LOCK, (pid_t)0, &(ldp->host), 0);
926 			continue;
927 		}
928 		if (the_lock.type == LOCK_WRITE &&
929 		    the_lock.remote_daemon == NULL)
930 			send_lockmsg(ALIVE, (pid_t)0, &(ldp->host), 0);
931 	}
932 }
933 
934 static void
935 dispatch(struct lock_msg *mp, daemonaddr_t *host)
936 {
937 	int message = mp->message;
938 	int localhost;
939 
940 	localhost = islocalhost(host);
941 	if (msgtrace && debugfile) {
942 		time_t t = time(0);
943 		if (localhost) {
944 			(void) fprintf(debugfile,
945 			    "%19.19s recv %-9.9s from %s (%ld)\n", ctime(&t),
946 			    lockd_msg(message), dp_addr(host), mp->pid);
947 		} else {
948 			(void) fprintf(debugfile,
949 			    "%19.19s recv %-9.9s from %s order %d (%ld)\n",
950 			    ctime(&t), lockd_msg(message), dp_addr(host),
951 			    mp->order, mp->pid);
952 		}
953 	}
954 	DPF((stderr, "received message %d\n", message));
955 	DPF((stderr, "from %s port %hu\n", dp_addr(host), host->sin_port));
956 	if (!localhost)
957 		daemon_alive(host, mp->order);
958 	else
959 		mp->order = order;
960 	switch (message) {
961 	case ALIVE:
962 		DPF((stderr, "received ALIVE %s\n", dp_addr(host)));
963 		/* do nothing, general "not localhost" code above does this */
964 		break;
965 	case UNLOCK:
966 		DPF((stderr, "received UNLOCK\n"));
967 		remote_unlock(mp->order, host);
968 		break;
969 	case GRANTED:
970 		DPF((stderr, "received GRANTED\n"));
971 		lock_granted(host);
972 		break;
973 	case WRITE_LOCK:
974 		DPF((stderr, "received WRITE_LOCK\n"));
975 		assert(!localhost);
976 		remote_lock(host, mp);
977 		break;
978 	case READ_LOCK:
979 	case LOCK_READ:
980 		DPF((stderr, "received READ_LOCK\n"));
981 		assert(localhost);
982 		local_lock(LOCK_READ, mp, host);
983 		break;
984 	case LOCK_WRITE:
985 		DPF((stderr, "received LOCK_WRITE\n"));
986 		assert(localhost);
987 		local_lock(LOCK_WRITE, mp, host);
988 		break;
989 	case LOCK_NOTLOCKED:
990 		DPF((stderr, "received LOCK_NOTLOCKED\n"));
991 		send_lockmsg(LOCK_ACK, (pid_t)0, host, mp->seq);
992 		if (the_lock.type != LOCK_NOTLOCKED) {
993 			local_unlock(mp->pid, mp->seq, NORMAL_UNLOCK);
994 		}
995 		break;
996 	case LOCK_LOCKEDBY:
997 		lockedby(host, mp->seq);
998 		break;
999 	case LOCK_STAT:
1000 		lock_stat();
1001 		break;
1002 	case LOCK_ACK:
1003 		/* throw message away -- this is an error to receive */
1004 		break;
1005 	}
1006 }
1007 
1008 /*
1009  * unqueue any locks asked for by pid and unlock any locks held by pid.
1010  */
1011 
1012 static void
1013 purge_pid(pid_t pid)
1014 {
1015 	DPF((stderr, "purge locks for %ld\n", pid));
1016 	unqueue_lock(NULL, pid);
1017 	if (the_lock.type != LOCK_NOTLOCKED)
1018 		local_unlock(pid, 0, FORCE_UNLOCK);
1019 }
1020 
1021 /*
1022  * Check for exit or exec of client processes.
1023  * The lock protecting the processes pid in the lockfile will
1024  * be removed by the kernel when a client exits or execs.
1025  */
1026 
1027 static void
1028 check_for_dead()
1029 {
1030 	int i, x;
1031 	pid_t pid;
1032 
1033 	for (i = 0; (x = cfg_filelock(i, 0)) != CFG_LF_EOF; i++) {
1034 		if (x == CFG_LF_AGAIN)
1035 			continue; /* can't take lock, must be still alive */
1036 		cfg_readpid(i, &pid);
1037 		cfg_writepid(i, (pid_t)0);
1038 		(void) cfg_fileunlock(i);
1039 		if (pid != (pid_t)0)
1040 			purge_pid(pid);
1041 	}
1042 }
1043 
1044 static void
1045 build_daemon_list(char *cf_file, int exe)
1046 {
1047 	FILE *fp;
1048 	char	host[1024];
1049 	int	port;
1050 	int	i;
1051 	struct	hostent *hp;
1052 	struct lockdaemon *ldp;
1053 
1054 	if ((hp = gethostbyname("localhost")) == NULL) {
1055 		(void) fprintf(stderr, "%s: Can't find hostent for %s\n",
1056 		    program, "localhost");
1057 		spcs_log("cfglockd", NULL, "couldn't find localhost");
1058 		exit(1);
1059 	}
1060 
1061 	(void) memcpy(&(localhost.sin_addr.s_addr), *(hp->h_addr_list),
1062 	    sizeof (localhost.sin_addr));
1063 	if (cf_file == NULL) {
1064 		(void) endhostent();
1065 		return;
1066 	}
1067 	if (exe) {
1068 		if ((fp = popen(cf_file, "r")) == NULL) {
1069 			perror(cf_file);
1070 			(void) fprintf(stderr,
1071 			    "%s: Can't open config program\n", program);
1072 			spcs_log("cfglockd", NULL, "couldn't read config");
1073 			exit(1);
1074 		}
1075 	} else {
1076 		if ((fp = fopen(cf_file, "r")) == NULL) {
1077 			perror(cf_file);
1078 			(void) fprintf(stderr, "%s: Can't open config file\n",
1079 			    program);
1080 			spcs_log("cfglockd", NULL, "couldn't read config");
1081 			exit(1);
1082 		}
1083 	}
1084 	ldp = daemon_list;
1085 	while ((i = fscanf(fp, "%s %d\n", host, &port)) != EOF) {
1086 		if (host[0] == '#')	/* line starting with # are comments */
1087 			continue;
1088 		if (i == 1) {
1089 			port = lock_port;
1090 		} else {
1091 			if (strcmp(host, "localhost") == 0) {
1092 				lock_port = port;
1093 				continue;
1094 			}
1095 		}
1096 
1097 		if ((hp = gethostbyname(host)) == NULL) {
1098 			(void) fprintf(stderr,
1099 			    "%s: Can't find hostent for %s\n", program, host);
1100 			continue;
1101 		}
1102 
1103 		(void) memcpy(&(ldp->host.sin_addr.s_addr), *(hp->h_addr_list),
1104 		    sizeof (ldp->host.sin_addr));
1105 		DPF((stderr, "daemon: %s\t%s\n",
1106 		    inet_ntoa(ldp->host.sin_addr), hp->h_name));
1107 		if (islocalhost(&(ldp->host))) {
1108 			DPF((stderr, "is an alias for this host, skipping\n"));
1109 			continue;
1110 		}
1111 		ldp->host.sin_port = htons((short)port);
1112 		ldp->host.sin_family = hp->h_addrtype;
1113 		ldp->inuse = 1;
1114 		ldp->up = 1;
1115 		ldp++;
1116 	}
1117 	if (exe)
1118 		(void) pclose(fp);
1119 	else
1120 		(void) fclose(fp);
1121 	(void) endhostent();
1122 }
1123 
1124 static void
1125 usage()
1126 {
1127 	(void) fprintf(stderr,
1128 	    gettext("usage: %s [-d] [-f file]|[-e program]\n"), program);
1129 	exit(1);
1130 }
1131 
1132 static void
1133 unexpected(int sig)
1134 {
1135 	spcs_log("cfglockd", NULL, "pid %d unexpected signal %d, ignoring",
1136 	    getpid(), sig);
1137 }
1138 
1139 static void
1140 term(int sig)
1141 {
1142 	(void) unlink(CFG_PIDFILE);
1143 	spcs_log("cfglockd", NULL, "pid %d terminate on signal %d", getpid(),
1144 	    sig);
1145 	exit(0);
1146 }
1147 
1148 static void
1149 init(int argc, char *argv[])
1150 {
1151 #if defined(_SunOS_5_6) || defined(_SunOS_5_7) || defined(_SunOS_5_8)
1152 	struct rlimit rl;
1153 #endif
1154 	int	c, i, x;
1155 	int	rc;
1156 	char	*cp = NULL;
1157 	struct	itimerval	tv;
1158 	struct	timeval		tp;
1159 	socklen_t len = sizeof (thishost);
1160 	int	exe = 0;
1161 	pid_t	pid;
1162 	FILE	*fp;
1163 
1164 	lstate = (getenv("LOCKD_STATE") != NULL);
1165 	msgtrace = (getenv("LOCKD_MSG") != NULL);
1166 
1167 	/*
1168 	 * Fork off a child that becomes the daemon.
1169 	 */
1170 
1171 #ifndef TTY_MESSAGES
1172 	if ((rc = fork()) > 0)
1173 		exit(0);
1174 	else if (rc < 0) {
1175 		spcs_log("cfglockd", NULL, "can't fork %d", errno);
1176 		(void) fprintf(stderr, gettext("dscfglockd: cannot fork: %s\n"),
1177 		    strerror(errno));
1178 		exit(1);
1179 	}
1180 #endif
1181 
1182 	/*
1183 	 * In child - become daemon.
1184 	 */
1185 
1186 #if !defined(_SunOS_5_6) && !defined(_SunOS_5_7) && !defined(_SunOS_5_8)
1187 	/* use closefrom(3C) from PSARC/2000/193 when possible */
1188 	closefrom(CLOSE_FD);
1189 #else
1190 	(void) getrlimit(RLIMIT_NOFILE, &rl);
1191 	for (i = CLOSE_FD; i < rl.rlim_max; i++)
1192 		(void) close(i);
1193 #endif
1194 
1195 #ifdef DEBUG
1196 #ifndef	TTY_MESSAGES
1197 	(void) open("/dev/console", O_WRONLY|O_APPEND);
1198 	(void) dup(0);
1199 	(void) dup(0);
1200 #endif
1201 #endif
1202 	(void) close(0);
1203 
1204 	if (msgtrace || lstate) {
1205 		debugfile = fopen("/var/tmp/dscfglockd.out", "a");
1206 		if (debugfile) {
1207 			time_t t = time(0);
1208 			setbuf(debugfile, (char *)0);
1209 			(void) fprintf(debugfile, "%19.19s dscfglockd start\n",
1210 			    ctime(&t));
1211 		}
1212 	}
1213 
1214 	(void) setpgrp();
1215 	spcs_log("cfglockd", NULL, "new lock daemon, pid %d", getpid());
1216 
1217 	/*
1218 	 * Catch as unexpected all signals apart from SIGTERM.
1219 	 */
1220 
1221 	for (i = 1; i < _sys_nsig; i++)
1222 		(void) sigset(i, unexpected);
1223 	(void) sigset(SIGTERM, term);
1224 
1225 	for (i = 0; (c = getopt(argc, argv, "df:e:")) != EOF; i++) {
1226 		switch (c) {
1227 		case 'd':
1228 			debug = 1;
1229 			break;
1230 		case 'e':
1231 			exe = 1;
1232 			if (cp) {
1233 				usage();
1234 			}
1235 			cp = optarg;
1236 			break;
1237 		case 'f':
1238 			if (cp) {
1239 				usage();
1240 			}
1241 			cp = optarg;
1242 			break;
1243 		default:
1244 			usage();
1245 			break;
1246 		}
1247 	}
1248 
1249 	ifc = (struct ifconf *)malloc(sizeof (struct ifconf));
1250 	if (ifc == NULL) {
1251 		perror(CFG_PIDFILE);
1252 		DPF((stderr, "Can't open pid file\n"));
1253 		exit(1);
1254 	}
1255 	(void) memset((char *)ifc, 0, sizeof (struct ifconf));
1256 	getmyaddrs(ifc);
1257 
1258 	/*
1259 	 * if (lockdaemonalive()) {
1260 	 *	(void) fprintf(stderr, "%s: %s\n", program,
1261 	 *		gettext("There is already a live lockdaemon"));
1262 	 *	exit(1);
1263 	 * }
1264 	 */
1265 	if ((fp = fopen(CFG_PIDFILE, "w")) == NULL) {
1266 		perror(CFG_PIDFILE);
1267 		DPF((stderr, "Can't open pid file\n"));
1268 		exit(1);
1269 	}
1270 	(void) fprintf(fp, "%ld\n", getpid());
1271 	(void) fclose(fp);
1272 
1273 	/* order should be set to node number within cluster */
1274 	order = cfg_iscluster();
1275 	cfg_lfinit();
1276 
1277 	if (!order) {
1278 		(void) gettimeofday(&tp, NULL);
1279 		srand48(tp.tv_usec);
1280 		order = lrand48();
1281 		if (debugfile) {
1282 			(void) fprintf(debugfile, "WARNING: order number "
1283 			    "is 0 -- changing randomly to %d\n", order);
1284 		}
1285 	}
1286 	c = 0;
1287 	for (i = 0; (x = cfg_filelock(i, 0)) != CFG_LF_EOF; i++) {
1288 		if (x == CFG_LF_AGAIN) {
1289 			cfg_readpid(i, &pid);
1290 			if (c++ == 0)
1291 				spcs_log("cfglockd", NULL,
1292 				    "init .dscfg.lck slot %d pid %d locked",
1293 				    i, pid);
1294 			DPF((stderr, "client process %ld still alive\n", pid));
1295 			continue; /* can't take lock, must be still alive */
1296 		}
1297 		cfg_writepid(i, 0);
1298 		(void) cfg_fileunlock(i);
1299 	}
1300 
1301 	tv.it_interval.tv_sec = TIMEOUT_SECS;
1302 	tv.it_interval.tv_usec = 0;
1303 	tv.it_value = tv.it_interval;
1304 
1305 	bzero(unlock_buf, sizeof (unlock_buf));
1306 	next_q = 0;
1307 	build_daemon_list(cp, exe);
1308 	if ((lock_soc = socket(pf_inet, SOCK_DGRAM, 0)) < 0) {
1309 		(void) fprintf(stderr, "%s: %s\n", program,
1310 		    gettext("failed to create socket"));
1311 		perror("socket");
1312 		spcs_log("cfglockd", NULL, "couldn't create socket");
1313 		exit(1);
1314 	}
1315 	thishost.sin_family = AF_INET;
1316 	thishost.sin_addr.s_addr = INADDR_ANY;
1317 	thishost.sin_port = htons(lock_port);
1318 	rc = bind(lock_soc, (struct sockaddr *)&thishost, sizeof (thishost));
1319 	if (rc < 0) {
1320 		perror("bind");
1321 		spcs_log("cfglockd", NULL, "couldn't bind");
1322 		exit(1);
1323 	}
1324 	if (getsockname(lock_soc, (struct sockaddr *)&thishost, &len) < 0)
1325 		perror("getsockname");
1326 	send_aliveall();
1327 	(void) sigset(SIGALRM, keepalive);
1328 	(void) setitimer(ITIMER_REAL, &tv, NULL);
1329 	/*
1330 	 * wait 2 time outs before allowing a lock to find if someone else
1331 	 * currently has the lock.
1332 	 */
1333 }
1334 
1335 #ifdef lint
1336 int
1337 lintmain(int argc, char *argv[])
1338 #else
1339 int
1340 main(int argc, char *argv[])
1341 #endif
1342 {
1343 	struct lock_msg message_buf;
1344 	daemonaddr_t from;
1345 	int addrlen;
1346 	int rc;
1347 	int x = 1;		/* kludge to stop warnings from compiler */
1348 
1349 	init(argc, argv);
1350 	CRIT_BEGIN();
1351 	while (x) {
1352 		CRIT_END();
1353 		addrlen = sizeof (from);
1354 		DPF((stderr, "begin recvfrom\n"));
1355 		rc = recvfrom(lock_soc, &message_buf, sizeof (message_buf),
1356 		    0, (struct sockaddr *)&from, &addrlen);
1357 		DPF((stderr, "end recvfrom rc = %d\n", rc));
1358 		CRIT_BEGIN();
1359 		if (rc == sizeof (message_buf))
1360 			dispatch(&message_buf, &from);
1361 		else
1362 			check_for_write_lock();
1363 
1364 		/* if we own the lock, check to see if the process died */
1365 		if (the_lock.type != LOCK_NOTLOCKED &&
1366 		    the_lock.remote_daemon == NULL)
1367 			check_for_dead();
1368 	}
1369 	CRIT_END();
1370 	return (0);
1371 }
1372