xref: /titanic_41/usr/src/uts/common/io/lvm/md/md_med.c (revision f6911211edbabe97da98b0229c5e476991bbc73b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
24  */
25 
26 /*
27  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
28  * Use is subject to license terms.
29  */
30 
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/sysmacros.h>
34 #include <sys/errno.h>
35 #include <sys/cmn_err.h>
36 #include <sys/buf.h>
37 #include <sys/disp.h>
38 #include <sys/kmem.h>
39 /* #include <sys/ddi.h> */
40 /* #include <sys/sunddi.h> */
41 #include <sys/debug.h>
42 
43 #include <sys/time.h>
44 #include <sys/pathname.h>
45 #include <sys/netconfig.h>
46 #include <sys/socket.h>
47 #include <netinet/in.h>
48 
49 #include <rpc/types.h>
50 #include <rpc/auth.h>
51 #include <rpc/clnt.h>
52 #include <rpc/clnt_soc.h>
53 #include <rpc/pmap_prot.h>	/* PMAPPORT */
54 #include <rpc/rpc.h>
55 #include <rpc/rpcb_prot.h>
56 #include <rpc/xdr.h>		/* This also gets us htonl() et al. */
57 
58 
59 #include <sys/lvm/mdmed.h>
60 
61 #define	MDDB
62 #include <sys/lvm/mdvar.h>
63 #include <sys/lvm/md_mddb.h>
64 #include <sys/lvm/md_crc.h>
65 #include <sys/callb.h>
66 
67 /*
68  * Flag to turn off the kernel RPC client delay code. This only takes effect
69  * if the route to the remote node is marked as RTF_REJECT and the RPC path
70  * manager has been flushed such that any 'old' path information is no longer
71  * present.
72  */
73 static	bool_t		clset = TRUE;
74 
75 extern	int		md_nmedh;			/* declared in md.c */
76 extern	char		*md_med_trans_lst;
77 extern md_set_t		md_set[];			/* declared in md.c */
78 
79 /*
80  * Structures used only by mediators
81  */
82 typedef	struct	med_thr_a_args {
83 	uint_t			mtaa_mag;
84 	char			*mtaa_h_nm;
85 	in_addr_t		mtaa_h_ip;
86 	uint_t			mtaa_h_flags;
87 	int			(*mtaa_err_func)(struct med_thr_a_args *);
88 	struct med_thr_h_args	*mtaa_mthap;
89 	int			mtaa_flags;
90 	rpcprog_t		mtaa_prog;
91 	rpcvers_t		mtaa_vers;
92 	rpcproc_t		mtaa_proc;
93 	xdrproc_t		mtaa_inproc;
94 	caddr_t			mtaa_in;
95 	xdrproc_t		mtaa_outproc;
96 	caddr_t			mtaa_out;
97 	struct	timeval		*mtaa_timout;
98 	int			mtaa_err;
99 } med_thr_a_args_t;
100 
101 #define	MTAA_MAGIC		0xbadbabed
102 #define	MDT_A_OK		0x00000001
103 
104 typedef	struct	med_thr_h_args {
105 	uint_t			mtha_mag;
106 	md_hi_t			*mtha_mhp;
107 	char			*mtha_setname;
108 	med_data_t		*mtha_meddp;
109 	struct	med_thr		*mtha_mtp;
110 	int			mtha_flags;
111 	set_t			mtha_setno;
112 	int			mtha_a_cnt;
113 	kcondvar_t		mtha_a_cv;
114 	kmutex_t		mtha_a_mx;
115 	uint_t			mtha_a_nthr;
116 	med_thr_a_args_t	mtha_a_args[MAX_HOST_ADDRS];
117 } med_thr_h_args_t;
118 
119 #define	MTHA_MAGIC		0xbadbabee
120 #define	MDT_H_OK		0x00000001
121 
122 typedef	struct	med_thr	{
123 	uint_t			mt_mag;
124 	kmutex_t		mt_mx;
125 	kcondvar_t		mt_cv;
126 	uint_t			mt_nthr;
127 	med_thr_h_args_t	*mt_h_args[MED_MAX_HOSTS];
128 } med_thr_t;
129 
130 #define	MTH_MAGIC		0xbadbabef
131 
132 #ifdef DEBUG
133 
134 static	struct	timeval	btv;
135 static	struct	timeval	etv;
136 
137 #define	DBGLVL_NONE	0x00000000
138 #define	DBGLVL_MAJOR	0x00000100
139 #define	DBGLVL_MINOR	0x00000200
140 #define	DBGLVL_MINUTE	0x00000400
141 #define	DBGLVL_TRIVIA	0x00000800
142 #define	DBGLVL_HIDEOUS	0x00001000
143 
144 #define	DBGFLG_NONE		0x00000000
145 #define	DBGFLG_NOPANIC		0x00000001
146 #define	DBGFLG_LVLONLY		0x00000002
147 #define	DBGFLG_FIXWOULDPANIC	0x00000004
148 
149 #define	DBGFLG_FLAGMASK		0x0000000F
150 #define	DBGFLG_LEVELMASK	~DBGFLG_FLAGMASK
151 
152 #define	DEBUG_FLAGS	(md_medup_failure_dbg & DBGFLG_FLAGMASK)
153 #define	DEBUG_LEVEL	(md_medup_failure_dbg & DBGFLG_LEVELMASK)
154 
155 #ifdef JEC
156 unsigned int md_medup_failure_dbg =	DBGLVL_MINOR | DBGFLG_NONE;
157 #else	/* ! JEC */
158 unsigned int md_medup_failure_dbg =	DBGLVL_NONE | DBGFLG_NONE;
159 #endif	/* JEC */
160 
161 #define	DCALL(dbg_level, call)						\
162 	{								\
163 		if (DEBUG_LEVEL != DBGLVL_NONE) {			\
164 			if (DEBUG_FLAGS & DBGFLG_LVLONLY) {		\
165 				if (DEBUG_LEVEL & dbg_level) {		\
166 					call;				\
167 				}					\
168 			} else {					\
169 				if (dbg_level <= DEBUG_LEVEL) {		\
170 					call;				\
171 				}					\
172 			}						\
173 		}							\
174 	}
175 
176 #define	DPRINTF(dbg_level, msg)		DCALL(dbg_level, printf msg)
177 
178 #define	MAJOR(msg)			DPRINTF(DBGLVL_MAJOR, msg)
179 #define	MINOR(msg)			DPRINTF(DBGLVL_MINOR, msg)
180 #define	MINUTE(msg)			DPRINTF(DBGLVL_MINUTE, msg)
181 #define	TRIVIA(msg)			DPRINTF(DBGLVL_TRIVIA, msg)
182 #define	HIDEOUS(msg)			DPRINTF(DBGLVL_HIDEOUS, msg)
183 #define	BSTAMP				{ uniqtime(&btv); }
184 
185 #define	ESTAMP(msg)							\
186 	{								\
187 		time_t	esec, eusec;					\
188 									\
189 		uniqtime(&etv);						\
190 									\
191 		eusec = etv.tv_usec - btv.tv_usec;			\
192 		esec = etv.tv_sec - btv.tv_sec;				\
193 		if (eusec < 0) {					\
194 			eusec += MICROSEC;				\
195 			esec--;						\
196 		}							\
197 		MINOR(("%s: sec=%ld, usec=%ld\n", msg, esec, eusec));	\
198 	}
199 
200 #else	/* ! DEBUG */
201 
202 #define	DCALL(ignored_dbg_level, ignored_routine)
203 #define	MAJOR(ignored)
204 #define	MINOR(ignored)
205 #define	MINUTE(ignored)
206 #define	TRIVIA(ignored)
207 #define	HIDEOUS(ignored)
208 #define	BSTAMP		{ }
209 #define	ESTAMP(msg)	{ }
210 
211 #endif /* DEBUG */
212 
213 static	int		md_med_protocol_retry = 2;
214 static	int		md_med_transdevs_set = 0;
215 
216 /*
217  * Definitions and declarations.
218  */
219 kmutex_t		med_lck;
220 
221 struct med_client {
222 	rpcprog_t prog;
223 	rpcvers_t vers;
224 	struct netbuf addr;	/* Address to this <prog,vers> */
225 	CLIENT *client;
226 };
227 
228 /*
229  * unrecoverable RPC status codes; cf. rfscall()
230  */
231 #define	MED_IS_UNRECOVERABLE_RPC(s)	(((s) == RPC_AUTHERROR) || \
232 	((s) == RPC_CANTENCODEARGS) || \
233 	((s) == RPC_CANTDECODERES) || \
234 	((s) == RPC_VERSMISMATCH) || \
235 	((s) == RPC_PROCUNAVAIL) || \
236 	((s) == RPC_PROGUNAVAIL) || \
237 	((s) == RPC_PROGVERSMISMATCH) || \
238 	((s) == RPC_CANTDECODEARGS))
239 
240 /*
241  * When trying to contact a portmapper that doesn't speak the version we're
242  * using, we should theoretically get back RPC_PROGVERSMISMATCH.
243  * Unfortunately, some (all?) 4.x hosts return an accept_stat of
244  * PROG_UNAVAIL, which gets mapped to RPC_PROGUNAVAIL, so we have to check
245  * for that, too.
246  */
247 #define	PMAP_WRONG_VERSION(s)	((s) == RPC_PROGVERSMISMATCH || \
248 	(s) == RPC_PROGUNAVAIL)
249 
250 #define	NULLSTR(str)	(! (str) || *(str) == '\0'? "<null>" : (str))
251 #define	NULSTRING	""
252 
253 /* Flags used in med_addr (netconfig) table */
254 
255 #define	UAFLG_NONE		0x00000000
256 #define	UAFLG_SKIP		0x00000001
257 #define	UAFLG_ERROR		0x00000002
258 #define	UAFLG_RPCERROR		0x00000004
259 #define	UAFLG_LOOPBACK		0x00000008
260 #define	UAFLG_LOCKINIT		0x00000010
261 
262 /*
263  * most of this data is static.  The mutex protects the changable items:
264  *	ua_flags
265  */
266 static struct med_addr {
267 	struct knetconfig	ua_kn;
268 	char			*ua_devname;	/* const */
269 	char			*ua_netid;	/* const */
270 	uint_t			ua_flags;
271 	kmutex_t		ua_mutex;
272 } med_addr_tab[] =
273 
274 /*
275  * The order of the entries in this table is the order in
276  * which we'll try to connect to the user-level daemon.
277  * The final entry must have a NULL ua_devname.
278  *
279  * This is basically a tablified version of /etc/netconfig
280  * (with additional entries for loopback TCP and UDP networks
281  * that are missing from the user-level version.)
282  */
283 {
284 
285 /* loopback UDP */
286 	/* semantics	protofmly	proto,		dev_t */
287 {	{ NC_TPI_CLTS,	NC_INET,	NC_UDP,		NODEV },
288 	/* devname	netid		flags */
289 	"/dev/udp",	"udp-loopback",	UAFLG_LOOPBACK
290 },
291 
292 /* UDP */
293 	/* semantics	protofmly	proto,		dev_t */
294 {	{ NC_TPI_CLTS,	NC_INET,	NC_UDP,		NODEV },
295 	/* devname	netid		flags */
296 	"/dev/udp",	"udp", 		UAFLG_NONE
297 },
298 
299 /* loopback TCP */
300 	/* semantics	protofmly	proto,		dev_t */
301 {	{ NC_TPI_COTS_ORD, NC_INET,	NC_TCP,		NODEV },
302 	/* devname	netid		flags */
303 	"/dev/tcp",	"tcp-loopback",	UAFLG_LOOPBACK
304 },
305 
306 /* TCP */
307 	/* semantics	protofmly	proto,		dev_t */
308 {	{ NC_TPI_COTS_ORD, NC_INET,	NC_TCP,		NODEV },
309 	/* devname	netid		flags */
310 	"/dev/tcp",	"tcp",		UAFLG_NONE
311 },
312 
313 /* ticlts */
314 	/* semantics	protofmly	proto,		dev_t */
315 {	{ NC_TPI_CLTS,	NC_LOOPBACK,	NC_NOPROTO,	NODEV },
316 	/* devname	netid		flags */
317 	"/dev/ticlts",	"ticlts",	UAFLG_LOOPBACK
318 },
319 
320 /* ticotsord */
321 	/* semantics	protofmly	proto,		dev_t */
322 {	{ NC_TPI_COTS_ORD, NC_LOOPBACK,	NC_NOPROTO,	NODEV },
323 	/* devname	  netid		flags */
324 	"/dev/ticotsord", "ticotsord",	UAFLG_LOOPBACK
325 },
326 
327 /* ticots */
328 	/* semantics	protofmly	proto,		dev_t */
329 {	{ NC_TPI_COTS,	NC_LOOPBACK,	NC_NOPROTO,	NODEV },
330 	/* devname	netid		flags */
331 	"/dev/ticots",	"ticots",	UAFLG_LOOPBACK
332 }
333 };
334 
335 /* The number of entries in the table */
336 int	med_addr_tab_nents = sizeof (med_addr_tab) / sizeof (med_addr_tab[0]);
337 
338 /*
339  * Private Functions
340  */
341 
342 /* A useful utility. */
343 static char *
med_dup(void * str,int len)344 med_dup(void *str, int len)
345 {
346 	char *s = (char *)kmem_zalloc(len, KM_SLEEP);
347 
348 	if (s == NULL)
349 		return (NULL);
350 
351 	bcopy(str, s, len);
352 
353 	return (s);
354 }
355 
356 /*
357  * Utilities for manipulating netbuf's.
358  * These utilities are the only knc_protofmly specific functions in the MED.
359  */
360 
361 /*
362  * Utilities to patch a port number (for NC_INET protocols) or a
363  *	port name (for NC_LOOPBACK) into a network address.
364  */
365 static void
med_put_inet_port(struct netbuf * addr,ushort_t port)366 med_put_inet_port(struct netbuf *addr, ushort_t port)
367 {
368 	/*
369 	 * Easy - we always patch an unsigned short on top of an
370 	 * unsigned short.  No changes to addr's len or maxlen are
371 	 * necessary.
372 	 */
373 	/*LINTED*/
374 	((struct sockaddr_in *)(addr->buf))->sin_port = port;
375 }
376 
377 static void
med_put_loopback_port(struct netbuf * addr,char * port)378 med_put_loopback_port(struct netbuf *addr, char *port)
379 {
380 	char *dot;
381 	char *newbuf;
382 	int newlen;
383 
384 	/*
385 	 * We must make sure the addr has enough space for us,
386 	 * patch in `port', and then adjust addr's len and maxlen
387 	 * to reflect the change.
388 	 */
389 	if ((dot = strchr(addr->buf, '.')) == (char *)NULL) {
390 		TRIVIA(("put_loopb_port - malformed loopback addr %s\n",
391 		    addr->buf));
392 		return;
393 	}
394 
395 	newlen = (int)((dot - addr->buf + 1) + strlen(port));
396 	if (newlen > addr->maxlen) {
397 		newbuf = (char *)kmem_zalloc((size_t)newlen, KM_SLEEP);
398 		(void) bcopy(addr->buf, newbuf, (size_t)addr->len);
399 		kmem_free(addr->buf, (size_t)addr->maxlen);
400 		addr->buf = newbuf;
401 		addr->len = addr->maxlen = (uint_t)newlen;
402 		dot = strchr(addr->buf, '.');
403 	} else {
404 		addr->len = newlen;
405 	}
406 
407 	(void) strncpy(++dot, port, strlen(port));
408 
409 }
410 
411 /*
412  * Make sure the given netbuf has a maxlen at least as big as the given
413  * length.
414  */
415 static void
grow_netbuf(struct netbuf * nb,size_t length)416 grow_netbuf(struct netbuf *nb, size_t length)
417 {
418 	char *newbuf;
419 
420 	if (nb->maxlen >= length)
421 		return;
422 
423 	newbuf = kmem_zalloc(length, KM_SLEEP);
424 	bcopy(nb->buf, newbuf, (size_t)nb->len);
425 	kmem_free(nb->buf, (size_t)nb->maxlen);
426 	nb->buf = newbuf;
427 	nb->maxlen = (uint_t)length;
428 }
429 
430 /*
431  * Convert a loopback universal address to a loopback transport address.
432  */
433 static void
loopb_u2t(const char * ua,struct netbuf * addr)434 loopb_u2t(const char *ua, struct netbuf *addr)
435 {
436 	size_t stringlen = strlen(ua) + 1;
437 	const char *univp;		/* ptr into universal addr */
438 	char *transp;			/* ptr into transport addr */
439 
440 	/* Make sure the netbuf will be big enough. */
441 	if (addr->maxlen < stringlen) {
442 		grow_netbuf(addr, stringlen);
443 	}
444 
445 	univp = ua;
446 	transp = addr->buf;
447 	while (*univp != NULL) {
448 		if (*univp == '\\' && *(univp+1) == '\\') {
449 			*transp = '\\';
450 			univp += 2;
451 		} else if (*univp == '\\') {
452 			/* octal character */
453 			*transp = (((*(univp+1) - '0') & 3) << 6) +
454 			    (((*(univp+2) - '0') & 7) << 3) +
455 			    ((*(univp+3) - '0') & 7);
456 			univp += 4;
457 		} else {
458 			*transp = *univp;
459 			univp++;
460 		}
461 		transp++;
462 	}
463 
464 	addr->len = (uint_t)(transp - addr->buf);
465 	ASSERT(addr->len <= addr->maxlen);
466 }
467 
468 
469 /*
470  * xdr_md_pmap
471  *
472  * Taken from libnsl/rpc/pmap_prot.c
473  */
474 bool_t
xdr_md_pmap(xdrs,regs)475 xdr_md_pmap(xdrs, regs)
476 	XDR *xdrs;
477 	struct pmap *regs;
478 {
479 	if (xdr_u_int(xdrs, &regs->pm_prog) &&
480 		xdr_u_int(xdrs, &regs->pm_vers) &&
481 		xdr_u_int(xdrs, &regs->pm_prot))
482 		return (xdr_u_int(xdrs, &regs->pm_port));
483 	return (FALSE);
484 }
485 
486 /*
487  * We need an version of CLNT_DESTROY which also frees the auth structure.
488  */
489 static void
med_clnt_destroy(CLIENT ** clp)490 med_clnt_destroy(CLIENT **clp)
491 {
492 	if (*clp) {
493 		if ((*clp)->cl_auth) {
494 			AUTH_DESTROY((*clp)->cl_auth);
495 			(*clp)->cl_auth = NULL;
496 		}
497 		CLNT_DESTROY(*clp);
498 		*clp = NULL;
499 	}
500 }
501 
502 /*
503  * Release this med_client entry.
504  * Do also destroy the entry if there was an error != EINTR,
505  * and mark the entry as not-valid, by setting time=0.
506  */
507 static void
med_rel_client(struct med_client * medc,int error)508 med_rel_client(struct med_client *medc, int error)
509 {
510 	TRIVIA(("rel_client - addr = (%p, %u %u)\n",
511 	    (void *) medc->addr.buf, medc->addr.len, medc->addr.maxlen));
512 	/*LINTED*/
513 	if (1 || error && error != EINTR) {
514 		TRIVIA(("rel_client - destroying addr = (%p, %u %u)\n",
515 		    (void *) medc->addr.buf, medc->addr.len,
516 		    medc->addr.maxlen));
517 		med_clnt_destroy(&medc->client);
518 		if (medc->addr.buf) {
519 			kmem_free(medc->addr.buf, medc->addr.maxlen);
520 			medc->addr.buf = NULL;
521 		}
522 	}
523 }
524 
525 /*
526  * Try to get the address for the desired service by using the old
527  * portmapper protocol.  Ignores signals.
528  *
529  * Returns RPC_UNKNOWNPROTO if the request uses the loopback transport.
530  * Use med_get_rpcb_addr instead.
531  */
532 static enum clnt_stat
med_get_pmap_addr(struct knetconfig * kncfp,rpcprog_t prog,rpcvers_t vers,struct netbuf * addr)533 med_get_pmap_addr(
534 	struct	knetconfig	*kncfp,
535 	rpcprog_t		prog,
536 	rpcvers_t		vers,
537 	struct	netbuf		*addr
538 )
539 {
540 	ushort_t			port = 0;
541 	int			error;
542 	enum	clnt_stat	status;
543 	CLIENT			*client = NULL;
544 	struct	pmap		parms;
545 	struct	timeval		tmo;
546 	k_sigset_t		oldmask;
547 	k_sigset_t		newmask;
548 
549 	/*
550 	 * Call rpcbind version 2 or earlier (SunOS portmapper, remote
551 	 * only) to get an address we can use in an RPC client handle.
552 	 * We simply obtain a port no. for <prog, vers> and plug it
553 	 * into `addr'.
554 	 */
555 	if (strcmp(kncfp->knc_protofmly, NC_INET) == 0) {
556 		med_put_inet_port(addr, htons(PMAPPORT));
557 	} else {
558 		TRIVIA(("get_pmap_addr - unsupported protofmly %s\n",
559 		    kncfp->knc_protofmly));
560 		status = RPC_UNKNOWNPROTO;
561 		goto out;
562 	}
563 
564 	TRIVIA(("get_pmap_addr - semantics=%u, protofmly=%s, proto=%s\n",
565 	    kncfp->knc_semantics, kncfp->knc_protofmly, kncfp->knc_proto));
566 
567 	/*
568 	 * Mask signals for the duration of the handle creation and
569 	 * RPC call.  This allows relatively normal operation with a
570 	 * signal already posted to our thread.
571 	 *
572 	 * Any further exit paths from this routine must restore
573 	 * the original signal mask.
574 	 */
575 	sigfillset(&newmask);
576 	sigreplace(&newmask, &oldmask);
577 
578 	if ((error = clnt_tli_kcreate(kncfp, addr, PMAPPROG, PMAPVERS,
579 	    0, 0, kcred, &client)) != RPC_SUCCESS) {
580 		status = RPC_TLIERROR;
581 		sigreplace(&oldmask, (k_sigset_t *)NULL);
582 		MINUTE(("get_pmap_addr - kcreate() returned %d\n", error));
583 		goto out;
584 	}
585 
586 	if (!CLNT_CONTROL(client, CLSET_NODELAYONERR, (char *)&clset)) {
587 		MINUTE(("get_pmap_addr - unable to set CLSET_NODELAYONERR\n"));
588 	}
589 
590 	client->cl_auth = authkern_create();
591 
592 	parms.pm_prog = prog;
593 	parms.pm_vers = vers;
594 	if (strcmp(kncfp->knc_proto, NC_TCP) == 0) {
595 		parms.pm_prot = IPPROTO_TCP;
596 	} else {
597 		parms.pm_prot = IPPROTO_UDP;
598 	}
599 	parms.pm_port = 0;
600 	tmo = md_med_pmap_timeout;
601 
602 	if ((status = CLNT_CALL(client, PMAPPROC_GETPORT,
603 	    xdr_md_pmap, (char *)&parms,
604 	    xdr_u_short, (char *)&port,
605 	    tmo)) != RPC_SUCCESS) {
606 		sigreplace(&oldmask, (k_sigset_t *)NULL);
607 		MINUTE(("get_pmap_addr - CLNT_CALL(GETPORT) returned %d\n",
608 		    status));
609 		goto out;
610 	}
611 
612 	sigreplace(&oldmask, (k_sigset_t *)NULL);
613 
614 	/* A zero value of port indicates a mapping failure */
615 	if (port == 0) {
616 		status = RPC_PROGNOTREGISTERED;
617 		MINUTE(("get_pmap_addr - program not registered\n"));
618 		goto out;
619 	}
620 
621 	TRIVIA(("get_pmap_addr - port=%d\n", port));
622 	med_put_inet_port(addr, ntohs(port));
623 
624 out:
625 	if (client)
626 		med_clnt_destroy(&client);
627 	return (status);
628 }
629 
630 /*
631  * Try to get the address for the desired service by using the rpcbind
632  * protocol.  Ignores signals.
633  */
634 static enum clnt_stat
med_get_rpcb_addr(struct knetconfig * kncfp,rpcprog_t prog,rpcvers_t vers,struct netbuf * addr)635 med_get_rpcb_addr(
636 	struct	knetconfig	*kncfp,
637 	rpcprog_t		prog,
638 	rpcvers_t		vers,
639 	struct	netbuf		 *addr
640 )
641 {
642 	int			error;
643 	char			*ua = NULL;
644 	enum	clnt_stat	status;
645 	RPCB			parms;
646 	struct	timeval		tmo;
647 	CLIENT			*client = NULL;
648 	k_sigset_t		oldmask;
649 	k_sigset_t		newmask;
650 	ushort_t			port;
651 
652 	/*
653 	 * Call rpcbind (local or remote) to get an address we can use
654 	 * in an RPC client handle.
655 	 */
656 	tmo = md_med_pmap_timeout;
657 	parms.r_prog = prog;
658 	parms.r_vers = vers;
659 	parms.r_addr = parms.r_owner = "";
660 
661 	if (strcmp(kncfp->knc_protofmly, NC_INET) == 0) {
662 		if (strcmp(kncfp->knc_proto, NC_TCP) == 0) {
663 			parms.r_netid = "tcp";
664 		} else {
665 			parms.r_netid = "udp";
666 		}
667 		med_put_inet_port(addr, htons(PMAPPORT));
668 	} else if (strcmp(kncfp->knc_protofmly, NC_LOOPBACK) == 0) {
669 		parms.r_netid = "ticlts";
670 		med_put_loopback_port(addr, "rpc");
671 		TRIVIA((
672 		    "get_rpcb_addr - semantics=%s, protofmly=%s, proto=%s\n",
673 		    (kncfp->knc_semantics == NC_TPI_CLTS ?
674 		    "NC_TPI_CLTS" : "?"),
675 		    kncfp->knc_protofmly, kncfp->knc_proto));
676 	} else {
677 		TRIVIA(("get_rpcb_addr - unsupported protofmly %s\n",
678 		    kncfp->knc_protofmly));
679 		status = RPC_UNKNOWNPROTO;
680 		goto out;
681 	}
682 
683 	/*
684 	 * Mask signals for the duration of the handle creation and
685 	 * RPC calls.  This allows relatively normal operation with a
686 	 * signal already posted to our thread.
687 	 *
688 	 * Any further exit paths from this routine must restore
689 	 * the original signal mask.
690 	 */
691 	sigfillset(&newmask);
692 	sigreplace(&newmask, &oldmask);
693 
694 	if ((error = clnt_tli_kcreate(kncfp, addr, RPCBPROG, RPCBVERS,
695 	    0, 0, kcred, &client)) != 0) {
696 		status = RPC_TLIERROR;
697 		sigreplace(&oldmask, (k_sigset_t *)NULL);
698 		MINUTE(("get_rpcb_addr - kcreate() returned %d\n", error));
699 		goto out;
700 	}
701 
702 	if (!CLNT_CONTROL(client, CLSET_NODELAYONERR, (char *)&clset)) {
703 		MINUTE(("get_rpcb_addr - unable to set CLSET_NODELAYONERR\n"));
704 	}
705 
706 	client->cl_auth = authkern_create();
707 
708 	if ((status = CLNT_CALL(client, RPCBPROC_GETADDR,
709 	    xdr_rpcb, (char *)&parms, xdr_wrapstring, (char *)&ua,
710 	    tmo)) != RPC_SUCCESS) {
711 		sigreplace(&oldmask, (k_sigset_t *)NULL);
712 		MINUTE(("get_rpcb_addr - CLNT_CALL(GETADDR) returned %d\n",
713 		    status));
714 		goto out;
715 	}
716 
717 	sigreplace(&oldmask, (k_sigset_t *)NULL);
718 
719 	if (ua == NULL || *ua == NULL) {
720 		status = RPC_PROGNOTREGISTERED;
721 		MINUTE(("get_rpcb_addr - program not registered\n"));
722 		goto out;
723 	}
724 
725 	/*
726 	 * Convert the universal address to the transport address.
727 	 * Theoretically, we should call the local rpcbind to translate
728 	 * from the universal address to the transport address, but it gets
729 	 * complicated (e.g., there's no direct way to tell rpcbind that we
730 	 * want an IP address instead of a loopback address).  Note that
731 	 * the transport address is potentially host-specific, so we can't
732 	 * just ask the remote rpcbind, because it might give us the wrong
733 	 * answer.
734 	 */
735 	if (strcmp(kncfp->knc_protofmly, NC_INET) == 0) {
736 		port = rpc_uaddr2port(AF_INET, ua);
737 		med_put_inet_port(addr, ntohs(port));
738 	} else if (strcmp(kncfp->knc_protofmly, NC_LOOPBACK) == 0) {
739 		loopb_u2t(ua, addr);
740 	} else {
741 		/* "can't happen" - should have been checked for above */
742 		cmn_err(CE_PANIC, "med_get_rpcb_addr: bad protocol family");
743 	}
744 
745 out:
746 	if (client != NULL)
747 		med_clnt_destroy(&client);
748 	if (ua != NULL)
749 		xdr_free(xdr_wrapstring, (char *)&ua);
750 	return (status);
751 }
752 
753 /*
754  * Get the RPC client handle to talk to the service at addrp.
755  * Returns:
756  * RPC_SUCCESS		Success.
757  * RPC_RPCBFAILURE	Couldn't talk to the remote portmapper (e.g.,
758  * 			timeouts).
759  * RPC_INTR		Caught a signal before we could successfully return.
760  * RPC_TLIERROR		Couldn't initialize the handle after talking to the
761  * 			remote portmapper (shouldn't happen).
762  */
763 static enum clnt_stat
med_get_rpc_handle(struct knetconfig * kncfp,struct netbuf * addrp,rpcprog_t prog,rpcvers_t vers,CLIENT ** clientp)764 med_get_rpc_handle(
765 	struct	knetconfig	*kncfp,
766 	struct	netbuf		*addrp,
767 	rpcprog_t		prog,
768 	rpcvers_t		vers,
769 	CLIENT			**clientp
770 )
771 {
772 	enum	clnt_stat	status;
773 	k_sigset_t		oldmask;
774 	k_sigset_t		newmask;
775 	int			error;
776 
777 	/*
778 	 * Try to get the address from either portmapper or rpcbind.
779 	 * We check for posted signals after trying and failing to
780 	 * contact the portmapper since it can take uncomfortably
781 	 * long for this entire procedure to time out.
782 	 */
783 	BSTAMP
784 	status = med_get_pmap_addr(kncfp, prog, vers, addrp);
785 	if (MED_IS_UNRECOVERABLE_RPC(status) && status != RPC_UNKNOWNPROTO &&
786 	    ! PMAP_WRONG_VERSION(status)) {
787 		status = RPC_RPCBFAILURE;
788 		goto bailout;
789 	}
790 
791 	if (status == RPC_SUCCESS)
792 		ESTAMP("done OK med_get_pmap_addr")
793 	else
794 		ESTAMP("done Not OK med_get_pmap_addr")
795 
796 	if (status != RPC_SUCCESS) {
797 		BSTAMP
798 		status = med_get_rpcb_addr(kncfp, prog, vers, addrp);
799 		if (status != RPC_SUCCESS) {
800 			ESTAMP("done Not OK med_get_rpcb_addr")
801 			MINOR((
802 		    "get_rpc_handle - can't contact portmapper or rpcbind\n"));
803 			status = RPC_RPCBFAILURE;
804 			goto bailout;
805 		}
806 	}
807 	ESTAMP("done OK med_get_rpcb_addr")
808 
809 	med_clnt_destroy(clientp);
810 
811 	/*
812 	 * Mask signals for the duration of the handle creation,
813 	 * allowing relatively normal operation with a signal
814 	 * already posted to our thread.
815 	 *
816 	 * Any further exit paths from this routine must restore
817 	 * the original signal mask.
818 	 */
819 	sigfillset(&newmask);
820 	sigreplace(&newmask, &oldmask);
821 
822 	if ((error = clnt_tli_kcreate(kncfp, addrp, prog, vers,
823 	    0, 0, kcred, clientp)) != 0) {
824 		status = RPC_TLIERROR;
825 		sigreplace(&oldmask, (k_sigset_t *)NULL);
826 		MINUTE(("get_rpc_handle - kcreate(prog) returned %d\n", error));
827 		goto bailout;
828 	}
829 
830 	if (!CLNT_CONTROL(*clientp, CLSET_NODELAYONERR, (char *)&clset)) {
831 		MINUTE(("get_rpc_handle - unable to set CLSET_NODELAYONERR\n"));
832 	}
833 
834 	(*clientp)->cl_auth = authkern_create();
835 
836 	sigreplace(&oldmask, (k_sigset_t *)NULL);
837 
838 bailout:
839 	return (status);
840 }
841 
842 /*
843  * Return a med_client to the <prog,vers>.
844  * The med_client found is marked as in_use.
845  * It is the responsibility of the caller to release the med_client by
846  * calling med_rel_client().
847  *
848  * Returns:
849  * RPC_SUCCESS		Success.
850  * RPC_CANTSEND		Temporarily cannot send.
851  * RPC_TLIERROR		Unspecified TLI error.
852  * RPC_UNKNOWNPROTO	kncfp is from an unrecognised protocol family.
853  * RPC_PROGNOTREGISTERED The prog `prog' isn't registered on the server.
854  * RPC_RPCBFAILURE	Couldn't contact portmapper on remote host.
855  * Any unsuccessful return codes from CLNT_CALL().
856  */
857 static enum clnt_stat
med_get_client(struct knetconfig * kncfp,struct netbuf * addrp,rpcprog_t prog,rpcvers_t vers,struct med_client ** mcp)858 med_get_client(
859 	struct	knetconfig	*kncfp,
860 	struct	netbuf		*addrp,
861 	rpcprog_t		prog,
862 	rpcvers_t		vers,
863 	struct	med_client	**mcp
864 )
865 {
866 	struct	med_client	*med_clnt = NULL;
867 	enum	clnt_stat	status = RPC_SUCCESS;
868 
869 	mutex_enter(&med_lck);
870 
871 	/*
872 	 * Create an med_client
873 	 */
874 	med_clnt = kmem_zalloc(sizeof (*med_clnt), KM_SLEEP);
875 	med_clnt->client = NULL;
876 	med_clnt->prog = prog;
877 	med_clnt->vers = vers;
878 	med_clnt->addr.buf = med_dup(addrp->buf, addrp->maxlen);
879 	med_clnt->addr.len = addrp->len;
880 	med_clnt->addr.maxlen = addrp->maxlen;
881 
882 	mutex_exit(&med_lck);
883 
884 	status = med_get_rpc_handle(kncfp, &med_clnt->addr, prog, vers,
885 	    &med_clnt->client);
886 
887 out:
888 	TRIVIA(("get_client - End: med_clnt=%p status=%d, client=%p\n",
889 	    (void *)med_clnt, status,
890 	    (med_clnt ? med_clnt->client : (void *) -1L)));
891 
892 	if (status == RPC_SUCCESS) {
893 		*mcp = med_clnt;
894 	} else {
895 		/* Cleanup */
896 		if (med_clnt) {
897 			mutex_enter(&med_lck);
898 			med_rel_client(med_clnt, EINVAL);
899 			kmem_free(med_clnt, sizeof (*med_clnt));
900 			mutex_exit(&med_lck);
901 		}
902 		*mcp = NULL;
903 	}
904 
905 	return (status);
906 }
907 
908 /*
909  * Make an RPC call to addr via config.
910  *
911  * Returns:
912  * 0		Success.
913  * EIO		Couldn't get client handle, timed out, or got unexpected
914  *		RPC status within md_med_protocol_retry attempts.
915  * EINVAL	Unrecoverable error in RPC call.  Causes client handle
916  *		to be destroyed.
917  * EINTR	RPC call was interrupted within md_med_protocol_retry attempts.
918  */
919 static int
med_callrpc(struct knetconfig * kncfp,struct netbuf * addrp,rpcprog_t prog,rpcvers_t vers,rpcproc_t proc,xdrproc_t inproc,caddr_t in,xdrproc_t outproc,caddr_t out,struct timeval * timout)920 med_callrpc(
921 	struct	knetconfig	*kncfp,
922 	struct	netbuf		*addrp,
923 	rpcprog_t		prog,
924 	rpcvers_t		vers,
925 	rpcproc_t		proc,
926 	xdrproc_t		inproc,
927 	caddr_t			in,
928 	xdrproc_t		outproc,
929 	caddr_t			out,
930 	struct	timeval		*timout
931 )
932 {
933 	struct	med_client	*med_clnt = NULL;
934 	enum	clnt_stat	cl_stat;
935 	int			tries = md_med_protocol_retry;
936 	int			error;
937 	k_sigset_t		oldmask;
938 	k_sigset_t		newmask;
939 
940 	MINUTE(("med_callrpc - Calling [%u, %u, %u]\n", prog, vers, proc));
941 
942 	sigfillset(&newmask);
943 
944 	while (tries--) {
945 		error = 0;
946 		cl_stat = med_get_client(kncfp, addrp, prog, vers, &med_clnt);
947 		if (MED_IS_UNRECOVERABLE_RPC(cl_stat)) {
948 			error = EINVAL;
949 			goto rel_client;
950 		} else if (cl_stat != RPC_SUCCESS) {
951 			error = EIO;
952 			continue;
953 		}
954 
955 		ASSERT(med_clnt != NULL);
956 		ASSERT(med_clnt->client != NULL);
957 
958 		sigreplace(&newmask, &oldmask);
959 		cl_stat = CLNT_CALL(med_clnt->client, proc, inproc, in,
960 		    outproc, out, *timout);
961 		sigreplace(&oldmask, (k_sigset_t *)NULL);
962 
963 		switch (cl_stat) {
964 		case RPC_SUCCESS:
965 			/*
966 			 * Update the timestamp on the client cache entry.
967 			 */
968 			error = 0;
969 			break;
970 
971 		case RPC_TIMEDOUT:
972 			MINOR(("med_callrpc - RPC_TIMEDOUT\n"));
973 			if (timout == 0) {
974 				/*
975 				 * We will always time out when timout == 0.
976 				 */
977 				error = 0;
978 				break;
979 			}
980 			/* FALLTHROUGH */
981 		case RPC_CANTSEND:
982 		case RPC_XPRTFAILED:
983 		default:
984 			if (MED_IS_UNRECOVERABLE_RPC(cl_stat)) {
985 				error = EINVAL;
986 			} else {
987 				error = EIO;
988 			}
989 		}
990 
991 rel_client:
992 		MINOR(("med_callrpc - RPC cl_stat=%d error=%d\n",
993 		    cl_stat, error));
994 		if (med_clnt != NULL) {
995 			med_rel_client(med_clnt, error);
996 			kmem_free(med_clnt, sizeof (*med_clnt));
997 		}
998 
999 		/*
1000 		 * If EIO, loop else we're done.
1001 		 */
1002 		if (error != EIO) {
1003 			break;
1004 		}
1005 	}
1006 
1007 	MINUTE(("med_callrpc - End: error=%d, tries=%d\n", error, tries));
1008 
1009 	return (error);
1010 }
1011 
1012 /*
1013  * Try various transports to get the rpc call through.
1014  */
1015 static int
med_net_callrpc(char * h_nm,in_addr_t h_ip,uint_t h_flags,rpcprog_t prog,rpcvers_t vers,rpcproc_t proc,xdrproc_t inproc,caddr_t in,xdrproc_t outproc,caddr_t out,struct timeval * timout)1016 med_net_callrpc(
1017 	char			*h_nm,
1018 	in_addr_t		h_ip,
1019 	uint_t			h_flags,
1020 	rpcprog_t		prog,
1021 	rpcvers_t		vers,
1022 	rpcproc_t		proc,
1023 	xdrproc_t		inproc,
1024 	caddr_t			in,
1025 	xdrproc_t		outproc,
1026 	caddr_t			out,
1027 	struct	timeval		*timout
1028 )
1029 {
1030 	int			err;
1031 	struct	med_addr	*uap;
1032 	int			uapi;
1033 	struct	netbuf		dst;
1034 	int			done = 0;
1035 
1036 	ASSERT(h_nm != NULL);
1037 	ASSERT(h_ip != 0);
1038 
1039 	/*
1040 	 * Loop through our table of transports and try to get the data out.
1041 	 */
1042 	for (uapi = 0; uapi < med_addr_tab_nents && ! done; uapi++) {
1043 
1044 		/* Shorthand */
1045 		uap = &med_addr_tab[uapi];
1046 
1047 		/*
1048 		 * UAFLG_SKIP is used for debugging and by the protocol
1049 		 * selection code.
1050 		 */
1051 		if (uap->ua_flags & UAFLG_SKIP) {
1052 			MINUTE(("med_net_callrpc - %s - marked \"skip\"\n",
1053 			    uap->ua_netid));
1054 			continue;
1055 		}
1056 
1057 		/*
1058 		 * If we are not talking to this host, we can skip all LOOPBACK
1059 		 * transport options.
1060 		 */
1061 		if (! (h_flags & NMIP_F_LOCAL) &&
1062 		    (uap->ua_flags & UAFLG_LOOPBACK))
1063 			continue;
1064 
1065 		if (uap->ua_flags & UAFLG_ERROR)
1066 			continue;
1067 
1068 		if (uap->ua_flags & UAFLG_RPCERROR)
1069 			continue;
1070 
1071 		/* Unknown protocol, skip it */
1072 		if (! uap->ua_kn.knc_protofmly) {
1073 			MINUTE(("med_net_callrpc - bad protofmly\n"));
1074 			continue;
1075 		}
1076 
1077 		if (strcmp(uap->ua_kn.knc_protofmly, NC_LOOPBACK) == 0) {
1078 			/*
1079 			 * strlen("localhost.") is 10
1080 			 */
1081 			dst.len = dst.maxlen = 10;
1082 			dst.buf = kmem_alloc(dst.len, KM_SLEEP);
1083 			(void) strncpy(dst.buf, "localhost.", dst.len);
1084 		} else if (strcmp(uap->ua_kn.knc_protofmly, NC_INET) == 0) {
1085 			struct sockaddr_in	*s;
1086 
1087 			/*
1088 			 * If we have not allocated a buffer for an INET addrs
1089 			 * or the buffer allocated will not contain an INET
1090 			 * addr, allocate or re-allocate.
1091 			 */
1092 			dst.buf = kmem_zalloc(sizeof (struct sockaddr_in),
1093 			    KM_SLEEP);
1094 			dst.maxlen = sizeof (struct sockaddr_in);
1095 
1096 			/* Short hand */
1097 			/*LINTED*/
1098 			s = (struct sockaddr_in *)dst.buf;
1099 
1100 			/* Initialize the socket */
1101 			if (uap->ua_flags & UAFLG_LOOPBACK)
1102 				s->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
1103 			else
1104 				s->sin_addr.s_addr = h_ip;
1105 			s->sin_port = 0;
1106 			s->sin_family = AF_INET;
1107 		}
1108 
1109 		dst.len = dst.maxlen;
1110 
1111 		MINOR(("med_net_callrpc - Trying %s\n", uap->ua_netid));
1112 
1113 		err = med_callrpc(&uap->ua_kn, &dst, prog, vers, proc, inproc,
1114 		    in, outproc, out, timout);
1115 
1116 		if (dst.buf) {
1117 			kmem_free(dst.buf, dst.maxlen);
1118 			dst.buf = NULL;
1119 			dst.len = 0;
1120 			dst.maxlen = 0;
1121 		}
1122 
1123 		if (err) {
1124 			MINUTE(("med_net_callrpc - %s failed\n\n",
1125 			    uap->ua_netid));
1126 			continue;
1127 		}
1128 
1129 		MINUTE(("med_net_callrpc - %s OK\n\n", uap->ua_netid));
1130 		done = 1;
1131 	}
1132 
1133 	/*
1134 	 * Print a message if we could not reach a host.
1135 	 */
1136 	if (! done) {
1137 		cmn_err(CE_WARN, "%s on host %s not responding", MED_SERVNAME,
1138 		    h_nm);
1139 		return (1);
1140 	}
1141 
1142 	return (0);
1143 }
1144 
1145 /*
1146  * Validate the mediator data
1147  */
1148 static int
med_ok(set_t setno,med_data_t * meddp)1149 med_ok(set_t setno, med_data_t *meddp)
1150 {
1151 	/* Not initialized, or not a mediator data record */
1152 	if (meddp->med_dat_mag != MED_DATA_MAGIC)
1153 		goto fail;
1154 
1155 	MINUTE(("Magic OK\n"));
1156 
1157 	/* Mismatch in revisions */
1158 	if (meddp->med_dat_rev != MED_DATA_REV)
1159 		goto fail;
1160 
1161 	MINUTE(("Revision OK\n"));
1162 
1163 	/* Not for the right set, this is paranoid */
1164 	if (setno != meddp->med_dat_sn)
1165 		goto fail;
1166 
1167 	MINUTE(("Setno OK\n"));
1168 
1169 	/* The record checksum is not correct */
1170 	if (crcchk(meddp, &meddp->med_dat_cks, sizeof (med_data_t), NULL))
1171 		goto fail;
1172 
1173 	MINUTE(("Mediator validated\n"));
1174 
1175 	return (1);
1176 
1177 fail:
1178 	return (0);
1179 }
1180 
1181 static void
med_adl(med_data_lst_t ** meddlpp,med_data_t * meddp)1182 med_adl(med_data_lst_t **meddlpp, med_data_t *meddp)
1183 {
1184 	/*
1185 	 * Run to the end of the list
1186 	 */
1187 	for (/* void */; (*meddlpp != NULL); meddlpp = &(*meddlpp)->mdl_nx)
1188 		/* void */;
1189 
1190 	*meddlpp = (med_data_lst_t *)kmem_zalloc(sizeof (med_data_lst_t),
1191 	    KM_SLEEP);
1192 
1193 	(*meddlpp)->mdl_med = (med_data_t *)med_dup(meddp, sizeof (med_data_t));
1194 }
1195 
1196 static void
mtaa_upd_init(med_thr_a_args_t * mtaap,med_thr_h_args_t * mthap)1197 mtaa_upd_init(med_thr_a_args_t *mtaap, med_thr_h_args_t *mthap)
1198 {
1199 	med_upd_data_args_t	*argsp;
1200 	med_err_t		*resp;
1201 
1202 	argsp = kmem_zalloc(sizeof (med_upd_data_args_t), KM_SLEEP);
1203 	argsp->med.med_setno = mthap->mtha_setno;
1204 	if (MD_MNSET_SETNO(argsp->med.med_setno)) {
1205 		/*
1206 		 * In MN diskset, use a generic nodename, multiowner, in the
1207 		 * mediator record which allows any node to access mediator
1208 		 * information.  MN diskset reconfig cycle forces consistent
1209 		 * view of set/node/drive/mediator information across all nodes
1210 		 * in the MN diskset.  This allows the relaxation of
1211 		 * node name checking in rpc.metamedd for MN disksets.
1212 		 */
1213 		argsp->med.med_caller = md_strdup(MED_MN_CALLER);
1214 	} else {
1215 		argsp->med.med_caller = md_strdup(utsname.nodename);
1216 	}
1217 	argsp->med.med_setname = md_strdup(mthap->mtha_setname);
1218 	argsp->med_data = *mthap->mtha_meddp;
1219 
1220 	resp = kmem_zalloc(sizeof (med_err_t), KM_SLEEP);
1221 
1222 	mtaap->mtaa_mag = MTAA_MAGIC;
1223 	mtaap->mtaa_mthap = mthap;
1224 	mtaap->mtaa_prog = MED_PROG;
1225 	mtaap->mtaa_vers = MED_VERS;
1226 	mtaap->mtaa_proc = MED_UPD_DATA;
1227 	mtaap->mtaa_inproc = xdr_med_upd_data_args_t;
1228 	mtaap->mtaa_in = (caddr_t)argsp;
1229 	mtaap->mtaa_outproc = xdr_med_err_t;
1230 	mtaap->mtaa_out = (caddr_t)resp;
1231 	mtaap->mtaa_timout = (struct timeval *)&md_med_def_timeout;
1232 }
1233 
1234 static void
mtaa_upd_free(med_thr_a_args_t * mtaap)1235 mtaa_upd_free(med_thr_a_args_t *mtaap)
1236 {
1237 	med_upd_data_args_t	*argsp = (med_upd_data_args_t *)mtaap->mtaa_in;
1238 	med_err_t		*resp = (med_err_t *)mtaap->mtaa_out;
1239 
1240 	freestr(argsp->med.med_caller);
1241 	freestr(argsp->med.med_setname);
1242 	kmem_free(argsp, sizeof (med_upd_data_args_t));
1243 
1244 	if (mtaap->mtaa_flags & MDT_A_OK)
1245 		xdr_free(mtaap->mtaa_outproc, mtaap->mtaa_out);
1246 
1247 	kmem_free(resp, sizeof (med_err_t));
1248 }
1249 
1250 static int
mtaa_upd_err(med_thr_a_args_t * mtaap)1251 mtaa_upd_err(med_thr_a_args_t *mtaap)
1252 {
1253 	/*LINTED*/
1254 	med_err_t		*resp = (med_err_t *)mtaap->mtaa_out;
1255 
1256 	if (resp->med_errno == MDE_MED_NOERROR) {
1257 		MAJOR(("upd_med_hosts - %s - OK\n\n", mtaap->mtaa_h_nm));
1258 		return (0);
1259 	} else {
1260 		MAJOR(("upd_med_hosts - %s - errno=%d\n\n", mtaap->mtaa_h_nm,
1261 		    resp->med_errno));
1262 		return (1);
1263 	}
1264 }
1265 
1266 static void
mtaa_get_init(med_thr_a_args_t * mtaap,med_thr_h_args_t * mthap)1267 mtaa_get_init(med_thr_a_args_t *mtaap, med_thr_h_args_t *mthap)
1268 {
1269 	med_args_t		*argsp;
1270 	med_get_data_res_t	*resp;
1271 
1272 	argsp = kmem_zalloc(sizeof (med_args_t), KM_SLEEP);
1273 	argsp->med.med_setno = mthap->mtha_setno;
1274 	if (MD_MNSET_SETNO(argsp->med.med_setno)) {
1275 		/*
1276 		 * In MN diskset, use a generic nodename, multiowner, in the
1277 		 * mediator record which allows any node to access mediator
1278 		 * information.  MN diskset reconfig cycle forces consistent
1279 		 * view of set/node/drive/mediator information across all nodes
1280 		 * in the MN diskset.  This allows the relaxation of
1281 		 * node name checking in rpc.metamedd for MN disksets.
1282 		 */
1283 		argsp->med.med_caller = md_strdup(MED_MN_CALLER);
1284 	} else {
1285 		argsp->med.med_caller = md_strdup(utsname.nodename);
1286 	}
1287 
1288 	argsp->med.med_setname = md_strdup(mthap->mtha_setname);
1289 
1290 	resp = kmem_zalloc(sizeof (med_get_data_res_t), KM_SLEEP);
1291 
1292 	mtaap->mtaa_mag = MTAA_MAGIC;
1293 	mtaap->mtaa_mthap = mthap;
1294 	mtaap->mtaa_prog = MED_PROG;
1295 	mtaap->mtaa_vers = MED_VERS;
1296 	mtaap->mtaa_proc = MED_GET_DATA;
1297 	mtaap->mtaa_inproc = xdr_med_args_t;
1298 	mtaap->mtaa_in = (caddr_t)argsp;
1299 	mtaap->mtaa_outproc = xdr_med_get_data_res_t;
1300 	mtaap->mtaa_out = (caddr_t)resp;
1301 	mtaap->mtaa_timout = (struct timeval *)&md_med_def_timeout;
1302 }
1303 
1304 static void
mtaa_get_free(med_thr_a_args_t * mtaap)1305 mtaa_get_free(med_thr_a_args_t *mtaap)
1306 {
1307 	/*LINTED*/
1308 	med_args_t		*argsp = (med_args_t *)mtaap->mtaa_in;
1309 	/*LINTED*/
1310 	med_get_data_res_t	*resp = (med_get_data_res_t *)mtaap->mtaa_out;
1311 
1312 	freestr(argsp->med.med_caller);
1313 	freestr(argsp->med.med_setname);
1314 	kmem_free(argsp, sizeof (med_args_t));
1315 
1316 	if (mtaap->mtaa_flags & MDT_A_OK)
1317 		xdr_free(mtaap->mtaa_outproc, mtaap->mtaa_out);
1318 
1319 	kmem_free(resp, sizeof (med_get_data_res_t));
1320 }
1321 
1322 static int
mtaa_get_err(med_thr_a_args_t * mtaap)1323 mtaa_get_err(med_thr_a_args_t *mtaap)
1324 {
1325 	/*LINTED*/
1326 	med_get_data_res_t	*resp = (med_get_data_res_t *)mtaap->mtaa_out;
1327 
1328 	if (resp->med_status.med_errno == MDE_MED_NOERROR) {
1329 		MAJOR(("get_med_host_data - %s - OK\n\n", mtaap->mtaa_h_nm));
1330 		return (0);
1331 	} else {
1332 		MAJOR(("get_med_host_data - %s - errno=%d\n\n",
1333 		    mtaap->mtaa_h_nm, resp->med_status.med_errno));
1334 		return (1);
1335 	}
1336 }
1337 
1338 static void
mtha_init(med_thr_t * mtp,med_thr_h_args_t * mthap,md_hi_t * mhp,char * setname,med_data_t * meddp,set_t setno,void (* mtaa_init_func)(med_thr_a_args_t *,med_thr_h_args_t *),int (* mtaa_err_func)(med_thr_a_args_t *))1339 mtha_init(
1340 	med_thr_t		*mtp,
1341 	med_thr_h_args_t	*mthap,
1342 	md_hi_t			*mhp,
1343 	char			*setname,
1344 	med_data_t		*meddp,
1345 	set_t			setno,
1346 	void			(*mtaa_init_func)(med_thr_a_args_t *,
1347 				    med_thr_h_args_t *),
1348 	int			(*mtaa_err_func)(med_thr_a_args_t *)
1349 )
1350 {
1351 	int		j;
1352 
1353 	mthap->mtha_mag		= MTHA_MAGIC;
1354 	mthap->mtha_mtp 	= mtp;
1355 	mthap->mtha_mhp 	= mhp;
1356 	mthap->mtha_setname	= md_strdup(setname);
1357 	if (meddp)
1358 		mthap->mtha_meddp	= meddp;
1359 	else
1360 		mthap->mtha_meddp	= NULL;
1361 	mthap->mtha_setno 	= setno;
1362 	mthap->mtha_a_cnt 	= mhp->a_cnt;
1363 	mthap->mtha_a_nthr	= 0;
1364 
1365 	mutex_init(&mthap->mtha_a_mx, NULL, MUTEX_DEFAULT,
1366 	    NULL);
1367 	cv_init(&mthap->mtha_a_cv, NULL, CV_DEFAULT, NULL);
1368 
1369 	j = MIN(mthap->mtha_a_cnt, MAX_HOST_ADDRS) - 1;
1370 	for (; j >= 0; j--) {
1371 		(*mtaa_init_func)(&mthap->mtha_a_args[j], mthap);
1372 		mthap->mtha_a_args[j].mtaa_h_nm = mhp->a_nm[j];
1373 		mthap->mtha_a_args[j].mtaa_h_ip = mhp->a_ip[j];
1374 		mthap->mtha_a_args[j].mtaa_h_flags = mhp->a_flg;
1375 		mthap->mtha_a_args[j].mtaa_err_func = mtaa_err_func;
1376 	}
1377 }
1378 
1379 static void
mtha_free(med_thr_h_args_t * mthap,void (* mtaa_free_func)(med_thr_a_args_t *))1380 mtha_free(
1381 	med_thr_h_args_t	*mthap,
1382 	void			(*mtaa_free_func)(med_thr_a_args_t *)
1383 )
1384 {
1385 	int		j;
1386 
1387 	freestr(mthap->mtha_setname);
1388 
1389 	j = MIN(mthap->mtha_a_cnt, MAX_HOST_ADDRS) - 1;
1390 	for (; j >= 0; j--)
1391 		(*mtaa_free_func)(&mthap->mtha_a_args[j]);
1392 
1393 	mutex_destroy(&mthap->mtha_a_mx);
1394 	cv_destroy(&mthap->mtha_a_cv);
1395 }
1396 
1397 static void
med_a_thr(med_thr_a_args_t * mtaap)1398 med_a_thr(med_thr_a_args_t *mtaap)
1399 {
1400 	callb_cpr_t	cprinfo;
1401 
1402 	/*
1403 	 * Register cpr callback
1404 	 */
1405 	CALLB_CPR_INIT(&cprinfo, &mtaap->mtaa_mthap->mtha_a_mx,
1406 	    callb_generic_cpr, "med_a_thr");
1407 
1408 	mutex_enter(&mtaap->mtaa_mthap->mtha_a_mx);
1409 	if (mtaap->mtaa_mthap->mtha_flags & MDT_H_OK)
1410 		goto done;
1411 
1412 	mutex_exit(&mtaap->mtaa_mthap->mtha_a_mx);
1413 
1414 	mtaap->mtaa_err = med_net_callrpc(
1415 	    mtaap->mtaa_h_nm, mtaap->mtaa_h_ip, mtaap->mtaa_h_flags,
1416 	    mtaap->mtaa_prog, mtaap->mtaa_vers, mtaap->mtaa_proc,
1417 	    mtaap->mtaa_inproc, mtaap->mtaa_in,
1418 	    mtaap->mtaa_outproc, mtaap->mtaa_out,
1419 	    mtaap->mtaa_timout);
1420 
1421 	mutex_enter(&mtaap->mtaa_mthap->mtha_a_mx);
1422 
1423 	if (mtaap->mtaa_err) {
1424 		MAJOR(("med_net_callrpc(%u, %u, %u) - %s - failed\n\n",
1425 		    mtaap->mtaa_prog, mtaap->mtaa_vers, mtaap->mtaa_proc,
1426 		    mtaap->mtaa_h_nm));
1427 		xdr_free(mtaap->mtaa_outproc, mtaap->mtaa_out);
1428 	} else {
1429 		if ((*mtaap->mtaa_err_func)(mtaap) == 0) {
1430 			if (! (mtaap->mtaa_mthap->mtha_flags & MDT_H_OK)) {
1431 				mtaap->mtaa_mthap->mtha_flags |= MDT_H_OK;
1432 				mtaap->mtaa_flags |= MDT_A_OK;
1433 			} else
1434 				xdr_free(mtaap->mtaa_outproc, mtaap->mtaa_out);
1435 		} else
1436 			xdr_free(mtaap->mtaa_outproc, mtaap->mtaa_out);
1437 	}
1438 
1439 done:
1440 	mtaap->mtaa_mthap->mtha_a_nthr--;
1441 	cv_signal(&mtaap->mtaa_mthap->mtha_a_cv);
1442 
1443 	/*
1444 	 * CALLB_CPR_EXIT will do mutex_exit(&mtaap->mtaa_mthap->mtha_a_mx)
1445 	 */
1446 	CALLB_CPR_EXIT(&cprinfo);
1447 	thread_exit();
1448 }
1449 
1450 static void
med_h_thr(med_thr_h_args_t * mthap)1451 med_h_thr(med_thr_h_args_t *mthap)
1452 {
1453 	int		j;
1454 	callb_cpr_t	cprinfo;
1455 
1456 	/*
1457 	 * Register cpr callback
1458 	 */
1459 	CALLB_CPR_INIT(&cprinfo, &mthap->mtha_mtp->mt_mx, callb_generic_cpr,
1460 	    "med_a_thr");
1461 	/*
1462 	 * Lock mthap->mtha_mtp->mt_mx is held early to avoid releasing the
1463 	 * locks out of order.
1464 	 */
1465 	mutex_enter(&mthap->mtha_mtp->mt_mx);
1466 	mutex_enter(&mthap->mtha_a_mx);
1467 
1468 	j = MIN(mthap->mtha_a_cnt, MAX_HOST_ADDRS) - 1;
1469 	for (; j >= 0; j--) {
1470 		(void) thread_create(NULL, 0, med_a_thr,
1471 		    &mthap->mtha_a_args[j], 0, &p0, TS_RUN, minclsyspri);
1472 		mthap->mtha_a_nthr++;
1473 	}
1474 
1475 	/*
1476 	 * cpr safe to suspend while waiting for other threads
1477 	 */
1478 	CALLB_CPR_SAFE_BEGIN(&cprinfo);
1479 	while (mthap->mtha_a_nthr > 0)
1480 		cv_wait(&mthap->mtha_a_cv, &mthap->mtha_a_mx);
1481 	mutex_exit(&mthap->mtha_a_mx);
1482 	CALLB_CPR_SAFE_END(&cprinfo, &mthap->mtha_mtp->mt_mx);
1483 
1484 
1485 	mthap->mtha_mtp->mt_nthr--;
1486 	cv_signal(&mthap->mtha_mtp->mt_cv);
1487 
1488 	/*
1489 	 * set up cpr exit
1490 	 * CALLB_CPR_EXIT will do mutex_exit(&mtaap->mta_mtp->mt_mx)
1491 	 */
1492 	CALLB_CPR_EXIT(&cprinfo);
1493 	thread_exit();
1494 }
1495 
1496 static med_get_data_res_t *
mtaa_get_resp(med_thr_h_args_t * mthap)1497 mtaa_get_resp(med_thr_h_args_t *mthap)
1498 {
1499 	med_thr_a_args_t	*mtaap;
1500 	int			j;
1501 
1502 	j = MIN(mthap->mtha_a_cnt, MAX_HOST_ADDRS) - 1;
1503 	for (; j >= 0; j--) {
1504 		mtaap = &mthap->mtha_a_args[j];
1505 		if (mtaap->mtaa_flags & MDT_A_OK)
1506 			/*LINTED*/
1507 			return ((med_get_data_res_t *)mtaap->mtaa_out);
1508 	}
1509 	return ((med_get_data_res_t *)NULL);
1510 }
1511 
1512 /*
1513  * Public Functions
1514  */
1515 
1516 /*
1517  * initializes med structs, locks, etc
1518  */
1519 void
med_init(void)1520 med_init(void)
1521 {
1522 	int		uapi;
1523 
1524 	TRIVIA(("[med_init"));
1525 
1526 	for (uapi = 0; uapi < med_addr_tab_nents; uapi++) {
1527 		struct	med_addr	*uap = &med_addr_tab[uapi];
1528 
1529 		/* If the protocol is skipped, the mutex is not needed either */
1530 		if (md_med_trans_lst != NULL &&
1531 		    strstr(md_med_trans_lst, uap->ua_kn.knc_proto) == NULL &&
1532 		    strstr(md_med_trans_lst, uap->ua_netid) == NULL) {
1533 			uap->ua_flags |= UAFLG_SKIP;
1534 			continue;
1535 		}
1536 
1537 		mutex_init(&uap->ua_mutex, NULL, MUTEX_DEFAULT, NULL);
1538 		uap->ua_flags |= UAFLG_LOCKINIT;
1539 		bzero((caddr_t)&uap->ua_kn.knc_unused,
1540 		    sizeof (uap->ua_kn.knc_unused));
1541 	}
1542 
1543 	TRIVIA(("]\n"));
1544 }
1545 
1546 /*
1547  * free any med structs, locks, etc
1548  */
1549 void
med_fini(void)1550 med_fini(void)
1551 {
1552 	int	uapi;
1553 
1554 	TRIVIA(("[med_fini"));
1555 
1556 	for (uapi = 0; uapi < med_addr_tab_nents; uapi++) {
1557 		struct med_addr *uap = &med_addr_tab[uapi];
1558 
1559 		if (uap->ua_flags & UAFLG_LOCKINIT) {
1560 			mutex_destroy(&uap->ua_mutex);
1561 			uap->ua_flags &= ~UAFLG_LOCKINIT;
1562 		}
1563 	}
1564 
1565 	TRIVIA(("]\n"));
1566 }
1567 
1568 /*
1569  * Update all the mediators
1570  */
1571 int
upd_med_hosts(md_hi_arr_t * mp,char * setname,med_data_t * meddp,char * caller)1572 upd_med_hosts(
1573 	md_hi_arr_t		*mp,
1574 	char			*setname,
1575 	med_data_t		*meddp,
1576 	char			*caller
1577 )
1578 {
1579 	med_thr_t		*mtp;
1580 	med_thr_h_args_t	*mthap;
1581 	int			i;
1582 	int			medok = 0;
1583 
1584 	MAJOR(("upd_med_hosts - called from <%s>\n", NULLSTR(caller)));
1585 
1586 	/* No mediators, were done */
1587 	if (mp->n_cnt == 0)
1588 		return (0);
1589 
1590 	mtp = kmem_zalloc(sizeof (med_thr_t), KM_SLEEP);
1591 	ASSERT(mtp != NULL);
1592 
1593 	mutex_init(&mtp->mt_mx, NULL, MUTEX_DEFAULT, NULL);
1594 	cv_init(&mtp->mt_cv, NULL, CV_DEFAULT, NULL);
1595 	mtp->mt_mag = MTH_MAGIC;
1596 
1597 	mutex_enter(&mtp->mt_mx);
1598 
1599 	mtp->mt_nthr = 0;
1600 
1601 	/* Loop through our list of mediator hosts, start a thread per host */
1602 	for (i = 0; i < md_nmedh; i++) {
1603 
1604 		if (mp->n_lst[i].a_cnt == 0)
1605 			continue;
1606 
1607 		mtp->mt_h_args[i] = kmem_zalloc(sizeof (med_thr_h_args_t),
1608 		    KM_SLEEP);
1609 		mthap = mtp->mt_h_args[i];
1610 		ASSERT(mthap != NULL);
1611 		mtha_init(mtp, mthap, &mp->n_lst[i], setname, meddp,
1612 		    meddp->med_dat_sn, mtaa_upd_init, mtaa_upd_err);
1613 
1614 		MAJOR(("upd_med_hosts - updating %s\n",
1615 		    NULLSTR(mp->n_lst[i].a_nm[0])));
1616 
1617 		(void) thread_create(NULL, 0, med_h_thr, mthap, 0, &p0,
1618 		    TS_RUN, minclsyspri);
1619 
1620 		mtp->mt_nthr++;
1621 	}
1622 
1623 	while (mtp->mt_nthr > 0)
1624 		cv_wait(&mtp->mt_cv, &mtp->mt_mx);
1625 
1626 	mutex_exit(&mtp->mt_mx);
1627 
1628 	for (i = 0; i < md_nmedh; i++) {
1629 		mthap = mtp->mt_h_args[i];
1630 		if (mthap != NULL) {
1631 			if (mthap->mtha_flags & MDT_H_OK)
1632 				medok++;
1633 			mtha_free(mthap, mtaa_upd_free);
1634 			kmem_free(mthap, sizeof (med_thr_h_args_t));
1635 		}
1636 	}
1637 
1638 	mutex_destroy(&mtp->mt_mx);
1639 	cv_destroy(&mtp->mt_cv);
1640 
1641 	kmem_free(mtp, sizeof (med_thr_t));
1642 
1643 	return (medok);
1644 }
1645 
1646 /*
1647  * Get the mediator data.
1648  */
1649 med_data_lst_t *
get_med_host_data(md_hi_arr_t * mp,char * setname,set_t setno)1650 get_med_host_data(
1651 	md_hi_arr_t		*mp,
1652 	char			*setname,
1653 	set_t			setno
1654 )
1655 {
1656 	med_thr_t		*mtp;
1657 	med_thr_h_args_t	*mthap;
1658 	med_get_data_res_t	*resp;
1659 	med_data_lst_t		*retval = NULL;
1660 	int			i;
1661 
1662 	/* No mediators, were done */
1663 	if (mp->n_cnt == 0)
1664 		return (NULL);
1665 
1666 	mtp = kmem_zalloc(sizeof (med_thr_t), KM_SLEEP);
1667 	ASSERT(mtp != NULL);
1668 
1669 	mutex_init(&mtp->mt_mx, NULL, MUTEX_DEFAULT, NULL);
1670 	cv_init(&mtp->mt_cv, NULL, CV_DEFAULT, NULL);
1671 
1672 	mutex_enter(&mtp->mt_mx);
1673 
1674 	mtp->mt_nthr = 0;
1675 
1676 	/* Loop through our list of mediator hosts, start a thread per host */
1677 	for (i = 0; i < md_nmedh; i++) {
1678 
1679 		if (mp->n_lst[i].a_cnt == 0)
1680 			continue;
1681 
1682 		mtp->mt_h_args[i] = kmem_zalloc(sizeof (med_thr_h_args_t),
1683 		    KM_SLEEP);
1684 		mthap = mtp->mt_h_args[i];
1685 		ASSERT(mthap != NULL);
1686 		mtha_init(mtp, mthap, &mp->n_lst[i], setname, NULL, setno,
1687 		    mtaa_get_init, mtaa_get_err);
1688 
1689 		MAJOR(("get_med_host_data from %s\n",
1690 		    NULLSTR(mp->n_lst[i].a_nm[0])));
1691 
1692 		(void) thread_create(NULL, 0, med_h_thr, mthap, 0, &p0,
1693 		    TS_RUN, minclsyspri);
1694 
1695 		mtp->mt_nthr++;
1696 	}
1697 
1698 	while (mtp->mt_nthr > 0)
1699 		cv_wait(&mtp->mt_cv, &mtp->mt_mx);
1700 
1701 	mutex_exit(&mtp->mt_mx);
1702 
1703 	for (i = 0; i < md_nmedh; i++) {
1704 		mthap = mtp->mt_h_args[i];
1705 		if (mthap != NULL) {
1706 			if (mthap->mtha_flags & MDT_H_OK) {
1707 				resp = mtaa_get_resp(mthap);
1708 				ASSERT(resp != NULL);
1709 
1710 				if (med_ok(setno, &resp->med_data))
1711 					med_adl(&retval, &resp->med_data);
1712 			}
1713 			mtha_free(mthap, mtaa_get_free);
1714 			kmem_free(mthap, sizeof (med_thr_h_args_t));
1715 		}
1716 	}
1717 
1718 	mutex_destroy(&mtp->mt_mx);
1719 	cv_destroy(&mtp->mt_cv);
1720 
1721 	kmem_free(mtp, sizeof (med_thr_t));
1722 
1723 	return (retval);
1724 }
1725 
1726 int
med_get_t_size_ioctl(mddb_med_t_parm_t * tpp,int mode)1727 med_get_t_size_ioctl(mddb_med_t_parm_t *tpp, int mode)
1728 {
1729 	md_error_t	*ep = &tpp->med_tp_mde;
1730 
1731 	mdclrerror(ep);
1732 
1733 	if ((mode & FREAD) == 0)
1734 		return (mdsyserror(ep, EACCES));
1735 
1736 	tpp->med_tp_nents = med_addr_tab_nents;
1737 	tpp->med_tp_setup = md_med_transdevs_set;
1738 
1739 	return (0);
1740 }
1741 
1742 int
med_get_t_ioctl(mddb_med_t_parm_t * tpp,int mode)1743 med_get_t_ioctl(mddb_med_t_parm_t *tpp, int mode)
1744 {
1745 	md_error_t	*ep = &tpp->med_tp_mde;
1746 	int		uapi = 0;
1747 
1748 	mdclrerror(ep);
1749 
1750 	if ((mode & FREAD) == 0)
1751 		return (mdsyserror(ep, EACCES));
1752 
1753 	for (uapi = 0; uapi < med_addr_tab_nents; uapi++) {
1754 		struct	med_addr	*uap = &med_addr_tab[uapi];
1755 
1756 		(void) strncpy(tpp->med_tp_ents[uapi].med_te_nm,
1757 		    uap->ua_devname, MED_TE_NM_LEN);
1758 		tpp->med_tp_ents[uapi].med_te_dev =
1759 		    (md_dev64_t)uap->ua_kn.knc_rdev;
1760 	}
1761 
1762 	tpp->med_tp_nents = med_addr_tab_nents;
1763 
1764 	return (0);
1765 }
1766 
1767 int
med_set_t_ioctl(mddb_med_t_parm_t * tpp,int mode)1768 med_set_t_ioctl(mddb_med_t_parm_t *tpp, int mode)
1769 {
1770 	md_error_t	*ep = &tpp->med_tp_mde;
1771 	int		uapi = 0;
1772 
1773 	mdclrerror(ep);
1774 
1775 	if ((mode & FWRITE) == 0)
1776 		return (mdsyserror(ep, EACCES));
1777 
1778 	for (uapi = 0; uapi < med_addr_tab_nents; uapi++) {
1779 		struct	med_addr	*uap = &med_addr_tab[uapi];
1780 
1781 		mutex_enter(&uap->ua_mutex);
1782 		uap->ua_kn.knc_rdev = md_dev64_to_dev(
1783 		    tpp->med_tp_ents[uapi].med_te_dev);
1784 		mutex_exit(&uap->ua_mutex);
1785 	}
1786 
1787 	md_med_transdevs_set = 1;
1788 
1789 	return (0);
1790 }
1791