xref: /titanic_51/usr/src/uts/common/inet/ip/spdsock.c (revision 3125ebfc35130d243e775dc38a6a59be4df0b137)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/param.h>
29 #include <sys/types.h>
30 #include <sys/stream.h>
31 #include <sys/strsubr.h>
32 #include <sys/strsun.h>
33 #include <sys/stropts.h>
34 #include <sys/zone.h>
35 #include <sys/vnode.h>
36 #include <sys/sysmacros.h>
37 #define	_SUN_TPI_VERSION 2
38 #include <sys/tihdr.h>
39 #include <sys/ddi.h>
40 #include <sys/sunddi.h>
41 #include <sys/mkdev.h>
42 #include <sys/debug.h>
43 #include <sys/kmem.h>
44 #include <sys/cmn_err.h>
45 #include <sys/suntpi.h>
46 #include <sys/policy.h>
47 
48 #include <sys/socket.h>
49 #include <netinet/in.h>
50 #include <net/pfkeyv2.h>
51 #include <net/pfpolicy.h>
52 
53 #include <inet/common.h>
54 #include <netinet/ip6.h>
55 #include <inet/ip.h>
56 #include <inet/ip6.h>
57 #include <inet/mi.h>
58 #include <inet/nd.h>
59 #include <inet/ip_if.h>
60 #include <inet/tun.h>
61 #include <inet/optcom.h>
62 #include <inet/ipsec_info.h>
63 #include <inet/ipsec_impl.h>
64 #include <inet/spdsock.h>
65 #include <inet/sadb.h>
66 
67 #include <sys/isa_defs.h>
68 
69 /*
70  * This is a transport provider for the PF_POLICY IPsec policy
71  * management socket, which provides a management interface into the
72  * SPD, allowing policy rules to be added, deleted, and queried.
73  *
74  * This effectively replaces the old private SIOC*IPSECONFIG ioctls
75  * with an extensible interface which will hopefully be public some
76  * day.
77  *
78  * See <net/pfpolicy.h> for more details on the protocol.
79  *
80  * We link against drv/ip and call directly into it to manipulate the
81  * SPD; see ipsec_impl.h for the policy data structures and spd.c for
82  * the code which maintains them.
83  *
84  * The MT model of this is QPAIR with the addition of some explicit
85  * locking to protect system-wide policy data structures.
86  */
87 
88 static vmem_t *spdsock_vmem;		/* for minor numbers. */
89 
90 #define	ALIGNED64(x) IS_P2ALIGNED((x), sizeof (uint64_t))
91 
92 /* Default structure copied into T_INFO_ACK messages (from rts.c...) */
93 static struct T_info_ack spdsock_g_t_info_ack = {
94 	T_INFO_ACK,
95 	T_INFINITE,	/* TSDU_size. Maximum size messages. */
96 	T_INVALID,	/* ETSDU_size. No expedited data. */
97 	T_INVALID,	/* CDATA_size. No connect data. */
98 	T_INVALID,	/* DDATA_size. No disconnect data. */
99 	0,		/* ADDR_size. */
100 	0,		/* OPT_size. No user-settable options */
101 	64 * 1024,	/* TIDU_size. spdsock allows maximum size messages. */
102 	T_COTS,		/* SERV_type. spdsock supports connection oriented. */
103 	TS_UNBND,	/* CURRENT_state. This is set from spdsock_state. */
104 	(XPG4_1)	/* Provider flags */
105 };
106 
107 /* Named Dispatch Parameter Management Structure */
108 typedef struct spdsockparam_s {
109 	uint_t	spdsock_param_min;
110 	uint_t	spdsock_param_max;
111 	uint_t	spdsock_param_value;
112 	char *spdsock_param_name;
113 } spdsockparam_t;
114 
115 /*
116  * Table of NDD variables supported by spdsock. These are loaded into
117  * spdsock_g_nd in spdsock_init_nd.
118  * All of these are alterable, within the min/max values given, at run time.
119  */
120 static	spdsockparam_t	lcl_param_arr[] = {
121 	/* min	max	value	name */
122 	{ 4096, 65536,	8192,	"spdsock_xmit_hiwat"},
123 	{ 0,	65536,	1024,	"spdsock_xmit_lowat"},
124 	{ 4096, 65536,	8192,	"spdsock_recv_hiwat"},
125 	{ 65536, 1024*1024*1024, 256*1024,	"spdsock_max_buf"},
126 	{ 0,	3,	0,	"spdsock_debug"},
127 };
128 #define	spds_xmit_hiwat	spds_params[0].spdsock_param_value
129 #define	spds_xmit_lowat	spds_params[1].spdsock_param_value
130 #define	spds_recv_hiwat	spds_params[2].spdsock_param_value
131 #define	spds_max_buf	spds_params[3].spdsock_param_value
132 #define	spds_debug		spds_params[4].spdsock_param_value
133 
134 #define	ss0dbg(a)	printf a
135 /* NOTE:  != 0 instead of > 0 so lint doesn't complain. */
136 #define	ss1dbg(spds, a)	if (spds->spds_debug != 0) printf a
137 #define	ss2dbg(spds, a)	if (spds->spds_debug > 1) printf a
138 #define	ss3dbg(spds, a)	if (spds->spds_debug > 2) printf a
139 
140 static int spdsock_close(queue_t *);
141 static int spdsock_open(queue_t *, dev_t *, int, int, cred_t *);
142 static void spdsock_wput(queue_t *, mblk_t *);
143 static void spdsock_wsrv(queue_t *);
144 static void spdsock_rsrv(queue_t *);
145 static void *spdsock_stack_init(netstackid_t stackid, netstack_t *ns);
146 static void spdsock_stack_fini(netstackid_t stackid, void *arg);
147 static void spdsock_loadcheck(void *);
148 static void spdsock_merge_algs(spd_stack_t *);
149 static void spdsock_flush_one(ipsec_policy_head_t *, netstack_t *);
150 static mblk_t *spdsock_dump_next_record(spdsock_t *);
151 
152 static struct module_info info = {
153 	5138, "spdsock", 1, INFPSZ, 512, 128
154 };
155 
156 static struct qinit rinit = {
157 	NULL, (pfi_t)spdsock_rsrv, spdsock_open, spdsock_close,
158 	NULL, &info
159 };
160 
161 static struct qinit winit = {
162 	(pfi_t)spdsock_wput, (pfi_t)spdsock_wsrv, NULL, NULL, NULL, &info
163 };
164 
165 struct streamtab spdsockinfo = {
166 	&rinit, &winit
167 };
168 
169 /* mapping from alg type to protocol number, as per RFC 2407 */
170 static const uint_t algproto[] = {
171 	PROTO_IPSEC_AH,
172 	PROTO_IPSEC_ESP,
173 };
174 
175 #define	NALGPROTOS	(sizeof (algproto) / sizeof (algproto[0]))
176 
177 /* mapping from kernel exec mode to spdsock exec mode */
178 static const uint_t execmodes[] = {
179 	SPD_ALG_EXEC_MODE_SYNC,
180 	SPD_ALG_EXEC_MODE_ASYNC
181 };
182 
183 #define	NEXECMODES	(sizeof (execmodes) / sizeof (execmodes[0]))
184 
185 #define	ALL_ACTIVE_POLHEADS ((ipsec_policy_head_t *)-1)
186 #define	ALL_INACTIVE_POLHEADS ((ipsec_policy_head_t *)-2)
187 
188 /* ARGSUSED */
189 static int
190 spdsock_param_get(q, mp, cp, cr)
191 	queue_t	*q;
192 	mblk_t	*mp;
193 	caddr_t	cp;
194 	cred_t *cr;
195 {
196 	spdsockparam_t	*spdsockpa = (spdsockparam_t *)cp;
197 	uint_t value;
198 	spdsock_t *ss = (spdsock_t *)q->q_ptr;
199 	spd_stack_t	*spds = ss->spdsock_spds;
200 
201 	mutex_enter(&spds->spds_param_lock);
202 	value = spdsockpa->spdsock_param_value;
203 	mutex_exit(&spds->spds_param_lock);
204 
205 	(void) mi_mpprintf(mp, "%u", value);
206 	return (0);
207 }
208 
209 /* This routine sets an NDD variable in a spdsockparam_t structure. */
210 /* ARGSUSED */
211 static int
212 spdsock_param_set(q, mp, value, cp, cr)
213 	queue_t	*q;
214 	mblk_t	*mp;
215 	char *value;
216 	caddr_t	cp;
217 	cred_t *cr;
218 {
219 	ulong_t	new_value;
220 	spdsockparam_t	*spdsockpa = (spdsockparam_t *)cp;
221 	spdsock_t *ss = (spdsock_t *)q->q_ptr;
222 	spd_stack_t	*spds = ss->spdsock_spds;
223 
224 	/* Convert the value from a string into a long integer. */
225 	if (ddi_strtoul(value, NULL, 10, &new_value) != 0)
226 		return (EINVAL);
227 
228 	mutex_enter(&spds->spds_param_lock);
229 	/*
230 	 * Fail the request if the new value does not lie within the
231 	 * required bounds.
232 	 */
233 	if (new_value < spdsockpa->spdsock_param_min ||
234 	    new_value > spdsockpa->spdsock_param_max) {
235 		mutex_exit(&spds->spds_param_lock);
236 		return (EINVAL);
237 	}
238 
239 	/* Set the new value */
240 	spdsockpa->spdsock_param_value = new_value;
241 	mutex_exit(&spds->spds_param_lock);
242 
243 	return (0);
244 }
245 
246 /*
247  * Initialize at module load time
248  */
249 boolean_t
250 spdsock_ddi_init(void)
251 {
252 	spdsock_max_optsize = optcom_max_optsize(
253 	    spdsock_opt_obj.odb_opt_des_arr, spdsock_opt_obj.odb_opt_arr_cnt);
254 
255 	spdsock_vmem = vmem_create("spdsock", (void *)1, MAXMIN, 1,
256 	    NULL, NULL, NULL, 1, VM_SLEEP | VMC_IDENTIFIER);
257 
258 	/*
259 	 * We want to be informed each time a stack is created or
260 	 * destroyed in the kernel, so we can maintain the
261 	 * set of spd_stack_t's.
262 	 */
263 	netstack_register(NS_SPDSOCK, spdsock_stack_init, NULL,
264 	    spdsock_stack_fini);
265 
266 	return (B_TRUE);
267 }
268 
269 /*
270  * Walk through the param array specified registering each element with the
271  * named dispatch handler.
272  */
273 static boolean_t
274 spdsock_param_register(IDP *ndp, spdsockparam_t *ssp, int cnt)
275 {
276 	for (; cnt-- > 0; ssp++) {
277 		if (ssp->spdsock_param_name != NULL &&
278 		    ssp->spdsock_param_name[0]) {
279 			if (!nd_load(ndp,
280 			    ssp->spdsock_param_name,
281 			    spdsock_param_get, spdsock_param_set,
282 			    (caddr_t)ssp)) {
283 				nd_free(ndp);
284 				return (B_FALSE);
285 			}
286 		}
287 	}
288 	return (B_TRUE);
289 }
290 
291 /*
292  * Initialize for each stack instance
293  */
294 /* ARGSUSED */
295 static void *
296 spdsock_stack_init(netstackid_t stackid, netstack_t *ns)
297 {
298 	spd_stack_t	*spds;
299 	spdsockparam_t	*ssp;
300 
301 	spds = (spd_stack_t *)kmem_zalloc(sizeof (*spds), KM_SLEEP);
302 	spds->spds_netstack = ns;
303 
304 	ASSERT(spds->spds_g_nd == NULL);
305 
306 	ssp = (spdsockparam_t *)kmem_alloc(sizeof (lcl_param_arr), KM_SLEEP);
307 	spds->spds_params = ssp;
308 	bcopy(lcl_param_arr, ssp, sizeof (lcl_param_arr));
309 
310 	(void) spdsock_param_register(&spds->spds_g_nd, ssp,
311 	    A_CNT(lcl_param_arr));
312 
313 	mutex_init(&spds->spds_param_lock, NULL, MUTEX_DEFAULT, NULL);
314 	mutex_init(&spds->spds_alg_lock, NULL, MUTEX_DEFAULT, NULL);
315 
316 	return (spds);
317 }
318 
319 void
320 spdsock_ddi_destroy(void)
321 {
322 	vmem_destroy(spdsock_vmem);
323 
324 	netstack_unregister(NS_SPDSOCK);
325 }
326 
327 /* ARGSUSED */
328 static void
329 spdsock_stack_fini(netstackid_t stackid, void *arg)
330 {
331 	spd_stack_t *spds = (spd_stack_t *)arg;
332 
333 	mutex_destroy(&spds->spds_param_lock);
334 	mutex_destroy(&spds->spds_alg_lock);
335 	nd_free(&spds->spds_g_nd);
336 	kmem_free(spds->spds_params, sizeof (lcl_param_arr));
337 	spds->spds_params = NULL;
338 
339 	kmem_free(spds, sizeof (*spds));
340 }
341 
342 /*
343  * NOTE: large quantities of this should be shared with keysock.
344  * Would be nice to combine some of this into a common module, but
345  * not possible given time pressures.
346  */
347 
348 /*
349  * High-level reality checking of extensions.
350  */
351 /* ARGSUSED */ /* XXX */
352 static boolean_t
353 ext_check(spd_ext_t *ext)
354 {
355 	spd_if_t *tunname = (spd_if_t *)ext;
356 	int i;
357 	char *idstr;
358 
359 	if (ext->spd_ext_type == SPD_EXT_TUN_NAME) {
360 		/* (NOTE:  Modified from SADB_EXT_IDENTITY..) */
361 
362 		/*
363 		 * Make sure the strings in these identities are
364 		 * null-terminated.  Let's "proactively" null-terminate the
365 		 * string at the last byte if it's not terminated sooner.
366 		 */
367 		i = SPD_64TO8(tunname->spd_if_len) - sizeof (spd_if_t);
368 		idstr = (char *)(tunname + 1);
369 		while (*idstr != '\0' && i > 0) {
370 			i--;
371 			idstr++;
372 		}
373 		if (i == 0) {
374 			/*
375 			 * I.e., if the bozo user didn't NULL-terminate the
376 			 * string...
377 			 */
378 			idstr--;
379 			*idstr = '\0';
380 		}
381 	}
382 	return (B_TRUE);	/* For now... */
383 }
384 
385 
386 
387 /* Return values for spdsock_get_ext(). */
388 #define	KGE_OK	0
389 #define	KGE_DUP	1
390 #define	KGE_UNK	2
391 #define	KGE_LEN	3
392 #define	KGE_CHK	4
393 
394 /*
395  * Parse basic extension headers and return in the passed-in pointer vector.
396  * Return values include:
397  *
398  *	KGE_OK	Everything's nice and parsed out.
399  *		If there are no extensions, place NULL in extv[0].
400  *	KGE_DUP	There is a duplicate extension.
401  *		First instance in appropriate bin.  First duplicate in
402  *		extv[0].
403  *	KGE_UNK	Unknown extension type encountered.  extv[0] contains
404  *		unknown header.
405  *	KGE_LEN	Extension length error.
406  *	KGE_CHK	High-level reality check failed on specific extension.
407  *
408  * My apologies for some of the pointer arithmetic in here.  I'm thinking
409  * like an assembly programmer, yet trying to make the compiler happy.
410  */
411 static int
412 spdsock_get_ext(spd_ext_t *extv[], spd_msg_t *basehdr, uint_t msgsize)
413 {
414 	bzero(extv, sizeof (spd_ext_t *) * (SPD_EXT_MAX + 1));
415 
416 	/* Use extv[0] as the "current working pointer". */
417 
418 	extv[0] = (spd_ext_t *)(basehdr + 1);
419 
420 	while (extv[0] < (spd_ext_t *)(((uint8_t *)basehdr) + msgsize)) {
421 		/* Check for unknown headers. */
422 		if (extv[0]->spd_ext_type == 0 ||
423 		    extv[0]->spd_ext_type > SPD_EXT_MAX)
424 			return (KGE_UNK);
425 
426 		/*
427 		 * Check length.  Use uint64_t because extlen is in units
428 		 * of 64-bit words.  If length goes beyond the msgsize,
429 		 * return an error.  (Zero length also qualifies here.)
430 		 */
431 		if (extv[0]->spd_ext_len == 0 ||
432 		    (void *)((uint64_t *)extv[0] + extv[0]->spd_ext_len) >
433 		    (void *)((uint8_t *)basehdr + msgsize))
434 			return (KGE_LEN);
435 
436 		/* Check for redundant headers. */
437 		if (extv[extv[0]->spd_ext_type] != NULL)
438 			return (KGE_DUP);
439 
440 		/*
441 		 * Reality check the extension if possible at the spdsock
442 		 * level.
443 		 */
444 		if (!ext_check(extv[0]))
445 			return (KGE_CHK);
446 
447 		/* If I make it here, assign the appropriate bin. */
448 		extv[extv[0]->spd_ext_type] = extv[0];
449 
450 		/* Advance pointer (See above for uint64_t ptr reasoning.) */
451 		extv[0] = (spd_ext_t *)
452 		    ((uint64_t *)extv[0] + extv[0]->spd_ext_len);
453 	}
454 
455 	/* Everything's cool. */
456 
457 	/*
458 	 * If extv[0] == NULL, then there are no extension headers in this
459 	 * message.  Ensure that this is the case.
460 	 */
461 	if (extv[0] == (spd_ext_t *)(basehdr + 1))
462 		extv[0] = NULL;
463 
464 	return (KGE_OK);
465 }
466 
467 static const int bad_ext_diag[] = {
468 	SPD_DIAGNOSTIC_MALFORMED_LCLPORT,
469 	SPD_DIAGNOSTIC_MALFORMED_REMPORT,
470 	SPD_DIAGNOSTIC_MALFORMED_PROTO,
471 	SPD_DIAGNOSTIC_MALFORMED_LCLADDR,
472 	SPD_DIAGNOSTIC_MALFORMED_REMADDR,
473 	SPD_DIAGNOSTIC_MALFORMED_ACTION,
474 	SPD_DIAGNOSTIC_MALFORMED_RULE,
475 	SPD_DIAGNOSTIC_MALFORMED_RULESET,
476 	SPD_DIAGNOSTIC_MALFORMED_ICMP_TYPECODE
477 };
478 
479 static const int dup_ext_diag[] = {
480 	SPD_DIAGNOSTIC_DUPLICATE_LCLPORT,
481 	SPD_DIAGNOSTIC_DUPLICATE_REMPORT,
482 	SPD_DIAGNOSTIC_DUPLICATE_PROTO,
483 	SPD_DIAGNOSTIC_DUPLICATE_LCLADDR,
484 	SPD_DIAGNOSTIC_DUPLICATE_REMADDR,
485 	SPD_DIAGNOSTIC_DUPLICATE_ACTION,
486 	SPD_DIAGNOSTIC_DUPLICATE_RULE,
487 	SPD_DIAGNOSTIC_DUPLICATE_RULESET,
488 	SPD_DIAGNOSTIC_DUPLICATE_ICMP_TYPECODE
489 };
490 
491 /*
492  * Transmit a PF_POLICY error message to the instance either pointed to
493  * by ks, the instance with serial number serial, or more, depending.
494  *
495  * The faulty message (or a reasonable facsimile thereof) is in mp.
496  * This function will free mp or recycle it for delivery, thereby causing
497  * the stream head to free it.
498  */
499 static void
500 spdsock_error(queue_t *q, mblk_t *mp, int error, int diagnostic)
501 {
502 	spd_msg_t *spmsg = (spd_msg_t *)mp->b_rptr;
503 
504 	ASSERT(mp->b_datap->db_type == M_DATA);
505 
506 	if (spmsg->spd_msg_type < SPD_MIN ||
507 	    spmsg->spd_msg_type > SPD_MAX)
508 		spmsg->spd_msg_type = SPD_RESERVED;
509 
510 	/*
511 	 * Strip out extension headers.
512 	 */
513 	ASSERT(mp->b_rptr + sizeof (*spmsg) <= mp->b_datap->db_lim);
514 	mp->b_wptr = mp->b_rptr + sizeof (*spmsg);
515 	spmsg->spd_msg_len = SPD_8TO64(sizeof (spd_msg_t));
516 	spmsg->spd_msg_errno = (uint8_t)error;
517 	spmsg->spd_msg_diagnostic = (uint16_t)diagnostic;
518 
519 	qreply(q, mp);
520 }
521 
522 static void
523 spdsock_diag(queue_t *q, mblk_t *mp, int diagnostic)
524 {
525 	spdsock_error(q, mp, EINVAL, diagnostic);
526 }
527 
528 static void
529 spd_echo(queue_t *q, mblk_t *mp)
530 {
531 	qreply(q, mp);
532 }
533 
534 /*
535  * Do NOT consume a reference to itp.
536  */
537 /*ARGSUSED*/
538 static void
539 spdsock_flush_node(ipsec_tun_pol_t *itp, void *cookie, netstack_t *ns)
540 {
541 	boolean_t active = (boolean_t)cookie;
542 	ipsec_policy_head_t *iph;
543 
544 	iph = active ? itp->itp_policy : itp->itp_inactive;
545 	IPPH_REFHOLD(iph);
546 	mutex_enter(&itp->itp_lock);
547 	spdsock_flush_one(iph, ns);
548 	if (active)
549 		itp->itp_flags &= ~ITPF_PFLAGS;
550 	else
551 		itp->itp_flags &= ~ITPF_IFLAGS;
552 	mutex_exit(&itp->itp_lock);
553 }
554 
555 /*
556  * Clear out one polhead.
557  */
558 static void
559 spdsock_flush_one(ipsec_policy_head_t *iph, netstack_t *ns)
560 {
561 	rw_enter(&iph->iph_lock, RW_WRITER);
562 	ipsec_polhead_flush(iph, ns);
563 	rw_exit(&iph->iph_lock);
564 	IPPH_REFRELE(iph, ns);
565 }
566 
567 static void
568 spdsock_flush(queue_t *q, ipsec_policy_head_t *iph, mblk_t *mp)
569 {
570 	boolean_t active;
571 	spdsock_t *ss = (spdsock_t *)q->q_ptr;
572 	spd_stack_t *spds = ss->spdsock_spds;
573 	netstack_t *ns = spds->spds_netstack;
574 
575 	if (iph != ALL_ACTIVE_POLHEADS && iph != ALL_INACTIVE_POLHEADS) {
576 		spdsock_flush_one(iph, spds->spds_netstack);
577 	} else {
578 		active = (iph == ALL_ACTIVE_POLHEADS);
579 
580 		/* First flush the global policy. */
581 		spdsock_flush_one(active ? ipsec_system_policy(ns) :
582 		    ipsec_inactive_policy(ns), ns);
583 
584 		/* Then flush every tunnel's appropriate one. */
585 		itp_walk(spdsock_flush_node, (void *)active,
586 		    spds->spds_netstack);
587 	}
588 
589 	spd_echo(q, mp);
590 }
591 
592 static boolean_t
593 spdsock_ext_to_sel(spd_ext_t **extv, ipsec_selkey_t *sel, int *diag)
594 {
595 	bzero(sel, sizeof (*sel));
596 
597 	if (extv[SPD_EXT_PROTO] != NULL) {
598 		struct spd_proto *pr =
599 		    (struct spd_proto *)extv[SPD_EXT_PROTO];
600 		sel->ipsl_proto = pr->spd_proto_number;
601 		sel->ipsl_valid |= IPSL_PROTOCOL;
602 	}
603 	if (extv[SPD_EXT_LCLPORT] != NULL) {
604 		struct spd_portrange *pr =
605 		    (struct spd_portrange *)extv[SPD_EXT_LCLPORT];
606 		sel->ipsl_lport = pr->spd_ports_minport;
607 		sel->ipsl_valid |= IPSL_LOCAL_PORT;
608 	}
609 	if (extv[SPD_EXT_REMPORT] != NULL) {
610 		struct spd_portrange *pr =
611 		    (struct spd_portrange *)extv[SPD_EXT_REMPORT];
612 		sel->ipsl_rport = pr->spd_ports_minport;
613 		sel->ipsl_valid |= IPSL_REMOTE_PORT;
614 	}
615 
616 	if (extv[SPD_EXT_ICMP_TYPECODE] != NULL) {
617 		struct spd_typecode *tc=
618 		    (struct spd_typecode *)extv[SPD_EXT_ICMP_TYPECODE];
619 
620 		sel->ipsl_valid |= IPSL_ICMP_TYPE;
621 		sel->ipsl_icmp_type = tc->spd_typecode_type;
622 		if (tc->spd_typecode_type_end < tc->spd_typecode_type)
623 			sel->ipsl_icmp_type_end = tc->spd_typecode_type;
624 		else
625 			sel->ipsl_icmp_type_end = tc->spd_typecode_type_end;
626 
627 		if (tc->spd_typecode_code != 255) {
628 			sel->ipsl_valid |= IPSL_ICMP_CODE;
629 			sel->ipsl_icmp_code = tc->spd_typecode_code;
630 			if (tc->spd_typecode_code_end < tc->spd_typecode_code)
631 				sel->ipsl_icmp_code_end = tc->spd_typecode_code;
632 			else
633 				sel->ipsl_icmp_code_end =
634 				    tc->spd_typecode_code_end;
635 		}
636 	}
637 #define	ADDR2SEL(sel, extv, field, pfield, extn, bit)			      \
638 	if ((extv)[(extn)] != NULL) {					      \
639 		uint_t addrlen;						      \
640 		struct spd_address *ap = 				      \
641 			(struct spd_address *)((extv)[(extn)]); 	      \
642 		addrlen = (ap->spd_address_af == AF_INET6) ? 		      \
643 			IPV6_ADDR_LEN : IP_ADDR_LEN;			      \
644 		if (SPD_64TO8(ap->spd_address_len) < 			      \
645 			(addrlen + sizeof (*ap))) {			      \
646 			*diag = SPD_DIAGNOSTIC_BAD_ADDR_LEN;		      \
647 			return (B_FALSE);				      \
648 		}							      \
649 		bcopy((ap+1), &((sel)->field), addrlen);		      \
650 		(sel)->pfield = ap->spd_address_prefixlen;		      \
651 		(sel)->ipsl_valid |= (bit);				      \
652 		(sel)->ipsl_valid |= (ap->spd_address_af == AF_INET6) ?	      \
653 			IPSL_IPV6 : IPSL_IPV4;				      \
654 	}
655 
656 	ADDR2SEL(sel, extv, ipsl_local, ipsl_local_pfxlen,
657 	    SPD_EXT_LCLADDR, IPSL_LOCAL_ADDR);
658 	ADDR2SEL(sel, extv, ipsl_remote, ipsl_remote_pfxlen,
659 	    SPD_EXT_REMADDR, IPSL_REMOTE_ADDR);
660 
661 	if ((sel->ipsl_valid & (IPSL_IPV6|IPSL_IPV4)) ==
662 	    (IPSL_IPV6|IPSL_IPV4)) {
663 		*diag = SPD_DIAGNOSTIC_MIXED_AF;
664 		return (B_FALSE);
665 	}
666 
667 #undef ADDR2SEL
668 
669 	return (B_TRUE);
670 }
671 
672 static boolean_t
673 spd_convert_type(uint32_t type, ipsec_act_t *act)
674 {
675 	switch (type) {
676 	case SPD_ACTTYPE_DROP:
677 		act->ipa_type = IPSEC_ACT_DISCARD;
678 		return (B_TRUE);
679 
680 	case SPD_ACTTYPE_PASS:
681 		act->ipa_type = IPSEC_ACT_CLEAR;
682 		return (B_TRUE);
683 
684 	case SPD_ACTTYPE_IPSEC:
685 		act->ipa_type = IPSEC_ACT_APPLY;
686 		return (B_TRUE);
687 	}
688 	return (B_FALSE);
689 }
690 
691 static boolean_t
692 spd_convert_flags(uint32_t flags, ipsec_act_t *act)
693 {
694 	/*
695 	 * Note use of !! for boolean canonicalization.
696 	 */
697 	act->ipa_apply.ipp_use_ah = !!(flags & SPD_APPLY_AH);
698 	act->ipa_apply.ipp_use_esp = !!(flags & SPD_APPLY_ESP);
699 	act->ipa_apply.ipp_use_espa = !!(flags & SPD_APPLY_ESPA);
700 	act->ipa_apply.ipp_use_se = !!(flags & SPD_APPLY_SE);
701 	act->ipa_apply.ipp_use_unique = !!(flags & SPD_APPLY_UNIQUE);
702 	return (B_TRUE);
703 }
704 
705 static void
706 spdsock_reset_act(ipsec_act_t *act)
707 {
708 	bzero(act, sizeof (*act));
709 	act->ipa_apply.ipp_espe_maxbits = IPSEC_MAX_KEYBITS;
710 	act->ipa_apply.ipp_espa_maxbits = IPSEC_MAX_KEYBITS;
711 	act->ipa_apply.ipp_ah_maxbits = IPSEC_MAX_KEYBITS;
712 }
713 
714 /*
715  * Sanity check action against reality, and shrink-wrap key sizes..
716  */
717 static boolean_t
718 spdsock_check_action(ipsec_act_t *act, boolean_t tunnel_polhead, int *diag,
719     spd_stack_t *spds)
720 {
721 	if (tunnel_polhead && act->ipa_apply.ipp_use_unique) {
722 		*diag = SPD_DIAGNOSTIC_ADD_INCON_FLAGS;
723 		return (B_FALSE);
724 	}
725 	if ((act->ipa_type != IPSEC_ACT_APPLY) &&
726 	    (act->ipa_apply.ipp_use_ah ||
727 		act->ipa_apply.ipp_use_esp ||
728 		act->ipa_apply.ipp_use_espa ||
729 		act->ipa_apply.ipp_use_se ||
730 		act->ipa_apply.ipp_use_unique)) {
731 		*diag = SPD_DIAGNOSTIC_ADD_INCON_FLAGS;
732 		return (B_FALSE);
733 	}
734 	if ((act->ipa_type == IPSEC_ACT_APPLY) &&
735 	    !act->ipa_apply.ipp_use_ah &&
736 	    !act->ipa_apply.ipp_use_esp) {
737 		*diag = SPD_DIAGNOSTIC_ADD_INCON_FLAGS;
738 		return (B_FALSE);
739 	}
740 	return (ipsec_check_action(act, diag, spds->spds_netstack));
741 }
742 
743 /*
744  * We may be short a few error checks here..
745  */
746 static boolean_t
747 spdsock_ext_to_actvec(spd_ext_t **extv, ipsec_act_t **actpp, uint_t *nactp,
748     int *diag, spd_stack_t *spds)
749 {
750 	struct spd_ext_actions *sactp =
751 	    (struct spd_ext_actions *)extv[SPD_EXT_ACTION];
752 	ipsec_act_t act, *actp, *endactp;
753 	struct spd_attribute *attrp, *endattrp;
754 	uint64_t *endp;
755 	int nact;
756 	boolean_t tunnel_polhead;
757 
758 	tunnel_polhead = (extv[SPD_EXT_TUN_NAME] != NULL &&
759 	    (((struct spd_rule *)extv[SPD_EXT_RULE])->spd_rule_flags &
760 		SPD_RULE_FLAG_TUNNEL));
761 
762 	*actpp = NULL;
763 	*nactp = 0;
764 
765 	if (sactp == NULL) {
766 		*diag = SPD_DIAGNOSTIC_NO_ACTION_EXT;
767 		return (B_FALSE);
768 	}
769 
770 	/*
771 	 * Parse the "action" extension and convert into an action chain.
772 	 */
773 
774 	nact = sactp->spd_actions_count;
775 
776 	endp = (uint64_t *)sactp;
777 	endp += sactp->spd_actions_len;
778 	endattrp = (struct spd_attribute *)endp;
779 
780 	actp = kmem_alloc(sizeof (*actp) * nact, KM_NOSLEEP);
781 	if (actp == NULL) {
782 		*diag = SPD_DIAGNOSTIC_ADD_NO_MEM;
783 		return (B_FALSE);
784 	}
785 	*actpp = actp;
786 	*nactp = nact;
787 	endactp = actp + nact;
788 
789 	spdsock_reset_act(&act);
790 	attrp = (struct spd_attribute *)(&sactp[1]);
791 
792 	for (; attrp < endattrp; attrp++) {
793 		switch (attrp->spd_attr_tag) {
794 		case SPD_ATTR_NOP:
795 			break;
796 
797 		case SPD_ATTR_EMPTY:
798 			spdsock_reset_act(&act);
799 			break;
800 
801 		case SPD_ATTR_END:
802 			attrp = endattrp;
803 			/* FALLTHRU */
804 		case SPD_ATTR_NEXT:
805 			if (actp >= endactp) {
806 				*diag = SPD_DIAGNOSTIC_ADD_WRONG_ACT_COUNT;
807 				goto fail;
808 			}
809 			if (!spdsock_check_action(&act, tunnel_polhead,
810 			    diag, spds))
811 				goto fail;
812 			*actp++ = act;
813 			spdsock_reset_act(&act);
814 			break;
815 
816 		case SPD_ATTR_TYPE:
817 			if (!spd_convert_type(attrp->spd_attr_value, &act)) {
818 				*diag = SPD_DIAGNOSTIC_ADD_BAD_TYPE;
819 				goto fail;
820 			}
821 			break;
822 
823 		case SPD_ATTR_FLAGS:
824 			if (!tunnel_polhead && extv[SPD_EXT_TUN_NAME] != NULL) {
825 				/*
826 				 * Set "sa unique" for transport-mode
827 				 * tunnels whether we want to or not.
828 				 */
829 				attrp->spd_attr_value |= SPD_APPLY_UNIQUE;
830 			}
831 			if (!spd_convert_flags(attrp->spd_attr_value, &act)) {
832 				*diag = SPD_DIAGNOSTIC_ADD_BAD_FLAGS;
833 				goto fail;
834 			}
835 			break;
836 
837 		case SPD_ATTR_AH_AUTH:
838 			if (attrp->spd_attr_value == 0) {
839 				*diag = SPD_DIAGNOSTIC_UNSUPP_AH_ALG;
840 				goto fail;
841 			}
842 			act.ipa_apply.ipp_auth_alg = attrp->spd_attr_value;
843 			break;
844 
845 		case SPD_ATTR_ESP_ENCR:
846 			if (attrp->spd_attr_value == 0) {
847 				*diag = SPD_DIAGNOSTIC_UNSUPP_ESP_ENCR_ALG;
848 				goto fail;
849 			}
850 			act.ipa_apply.ipp_encr_alg = attrp->spd_attr_value;
851 			break;
852 
853 		case SPD_ATTR_ESP_AUTH:
854 			if (attrp->spd_attr_value == 0) {
855 				*diag = SPD_DIAGNOSTIC_UNSUPP_ESP_AUTH_ALG;
856 				goto fail;
857 			}
858 			act.ipa_apply.ipp_esp_auth_alg = attrp->spd_attr_value;
859 			break;
860 
861 		case SPD_ATTR_ENCR_MINBITS:
862 			act.ipa_apply.ipp_espe_minbits = attrp->spd_attr_value;
863 			break;
864 
865 		case SPD_ATTR_ENCR_MAXBITS:
866 			act.ipa_apply.ipp_espe_maxbits = attrp->spd_attr_value;
867 			break;
868 
869 		case SPD_ATTR_AH_MINBITS:
870 			act.ipa_apply.ipp_ah_minbits = attrp->spd_attr_value;
871 			break;
872 
873 		case SPD_ATTR_AH_MAXBITS:
874 			act.ipa_apply.ipp_ah_maxbits = attrp->spd_attr_value;
875 			break;
876 
877 		case SPD_ATTR_ESPA_MINBITS:
878 			act.ipa_apply.ipp_espa_minbits = attrp->spd_attr_value;
879 			break;
880 
881 		case SPD_ATTR_ESPA_MAXBITS:
882 			act.ipa_apply.ipp_espa_maxbits = attrp->spd_attr_value;
883 			break;
884 
885 		case SPD_ATTR_LIFE_SOFT_TIME:
886 		case SPD_ATTR_LIFE_HARD_TIME:
887 		case SPD_ATTR_LIFE_SOFT_BYTES:
888 		case SPD_ATTR_LIFE_HARD_BYTES:
889 			break;
890 
891 		case SPD_ATTR_KM_PROTO:
892 			act.ipa_apply.ipp_km_proto = attrp->spd_attr_value;
893 			break;
894 
895 		case SPD_ATTR_KM_COOKIE:
896 			act.ipa_apply.ipp_km_cookie = attrp->spd_attr_value;
897 			break;
898 
899 		case SPD_ATTR_REPLAY_DEPTH:
900 			act.ipa_apply.ipp_replay_depth = attrp->spd_attr_value;
901 			break;
902 		}
903 	}
904 	if (actp != endactp) {
905 		*diag = SPD_DIAGNOSTIC_ADD_WRONG_ACT_COUNT;
906 		goto fail;
907 	}
908 
909 	return (B_TRUE);
910 fail:
911 	ipsec_actvec_free(*actpp, nact);
912 	*actpp = NULL;
913 	return (B_FALSE);
914 }
915 
916 typedef struct
917 {
918 	ipsec_policy_t *pol;
919 	int dir;
920 } tmprule_t;
921 
922 static int
923 mkrule(ipsec_policy_head_t *iph, struct spd_rule *rule,
924     ipsec_selkey_t *sel, ipsec_act_t *actp, int nact, uint_t dir, uint_t af,
925     tmprule_t **rp, uint64_t *index, spd_stack_t *spds)
926 {
927 	ipsec_policy_t *pol;
928 
929 	sel->ipsl_valid &= ~(IPSL_IPV6|IPSL_IPV4);
930 	sel->ipsl_valid |= af;
931 
932 	pol = ipsec_policy_create(sel, actp, nact, rule->spd_rule_priority,
933 	    index, spds->spds_netstack);
934 	if (pol == NULL)
935 		return (ENOMEM);
936 
937 	(*rp)->pol = pol;
938 	(*rp)->dir = dir;
939 	(*rp)++;
940 
941 	if (!ipsec_check_policy(iph, pol, dir))
942 		return (EEXIST);
943 
944 	rule->spd_rule_index = pol->ipsp_index;
945 	return (0);
946 }
947 
948 static int
949 mkrulepair(ipsec_policy_head_t *iph, struct spd_rule *rule,
950     ipsec_selkey_t *sel, ipsec_act_t *actp, int nact, uint_t dir, uint_t afs,
951     tmprule_t **rp, uint64_t *index, spd_stack_t *spds)
952 {
953 	int error;
954 
955 	if (afs & IPSL_IPV4) {
956 		error = mkrule(iph, rule, sel, actp, nact, dir, IPSL_IPV4, rp,
957 		    index, spds);
958 		if (error != 0)
959 			return (error);
960 	}
961 	if (afs & IPSL_IPV6) {
962 		error = mkrule(iph, rule, sel, actp, nact, dir, IPSL_IPV6, rp,
963 		    index, spds);
964 		if (error != 0)
965 			return (error);
966 	}
967 	return (0);
968 }
969 
970 
971 static void
972 spdsock_addrule(queue_t *q, ipsec_policy_head_t *iph, mblk_t *mp,
973     spd_ext_t **extv, ipsec_tun_pol_t *itp)
974 {
975 	ipsec_selkey_t sel;
976 	ipsec_act_t *actp;
977 	uint_t nact;
978 	int diag = 0, error, afs;
979 	struct spd_rule *rule = (struct spd_rule *)extv[SPD_EXT_RULE];
980 	tmprule_t rules[4], *rulep = &rules[0];
981 	boolean_t tunnel_mode, empty_itp, active;
982 	uint64_t *index = (itp == NULL) ? NULL : &itp->itp_next_policy_index;
983 	spdsock_t *ss = (spdsock_t *)q->q_ptr;
984 	spd_stack_t	*spds = ss->spdsock_spds;
985 
986 	if (rule == NULL) {
987 		spdsock_diag(q, mp, SPD_DIAGNOSTIC_NO_RULE_EXT);
988 		return;
989 	}
990 
991 	tunnel_mode = (rule->spd_rule_flags & SPD_RULE_FLAG_TUNNEL);
992 
993 	if (itp != NULL) {
994 		mutex_enter(&itp->itp_lock);
995 		ASSERT(itp->itp_policy == iph || itp->itp_inactive == iph);
996 		active = (itp->itp_policy == iph);
997 		if (ITP_P_ISACTIVE(itp, iph)) {
998 			/* Check for mix-and-match of tunnel/transport. */
999 			if ((tunnel_mode && !ITP_P_ISTUNNEL(itp, iph)) ||
1000 			    (!tunnel_mode && ITP_P_ISTUNNEL(itp, iph))) {
1001 				mutex_exit(&itp->itp_lock);
1002 				spdsock_error(q, mp, EBUSY, 0);
1003 				return;
1004 			}
1005 			empty_itp = B_FALSE;
1006 		} else {
1007 			empty_itp = B_TRUE;
1008 			itp->itp_flags = active ? ITPF_P_ACTIVE : ITPF_I_ACTIVE;
1009 			if (tunnel_mode)
1010 				itp->itp_flags |= active ? ITPF_P_TUNNEL :
1011 				    ITPF_I_TUNNEL;
1012 		}
1013 	} else {
1014 		empty_itp = B_FALSE;
1015 	}
1016 
1017 	if (rule->spd_rule_index != 0) {
1018 		diag = SPD_DIAGNOSTIC_INVALID_RULE_INDEX;
1019 		error = EINVAL;
1020 		goto fail2;
1021 	}
1022 
1023 	if (!spdsock_ext_to_sel(extv, &sel, &diag)) {
1024 		error = EINVAL;
1025 		goto fail2;
1026 	}
1027 
1028 	if (itp != NULL) {
1029 		if (tunnel_mode) {
1030 			if (sel.ipsl_valid &
1031 			    (IPSL_REMOTE_PORT | IPSL_LOCAL_PORT)) {
1032 				itp->itp_flags |= active ?
1033 				    ITPF_P_PER_PORT_SECURITY :
1034 				    ITPF_I_PER_PORT_SECURITY;
1035 			}
1036 		} else {
1037 			/*
1038 			 * For now, we don't allow transport-mode on a tunnel
1039 			 * with ANY specific selectors.  Bail if we have such
1040 			 * a request.
1041 			 */
1042 			if (sel.ipsl_valid & IPSL_WILDCARD) {
1043 				diag = SPD_DIAGNOSTIC_NO_TUNNEL_SELECTORS;
1044 				error = EINVAL;
1045 				goto fail2;
1046 			}
1047 		}
1048 	}
1049 
1050 	if (!spdsock_ext_to_actvec(extv, &actp, &nact, &diag, spds)) {
1051 		error = EINVAL;
1052 		goto fail2;
1053 	}
1054 	/*
1055 	 * If no addresses were specified, add both.
1056 	 */
1057 	afs = sel.ipsl_valid & (IPSL_IPV6|IPSL_IPV4);
1058 	if (afs == 0)
1059 		afs = (IPSL_IPV6|IPSL_IPV4);
1060 
1061 	rw_enter(&iph->iph_lock, RW_WRITER);
1062 
1063 	if (rule->spd_rule_flags & SPD_RULE_FLAG_OUTBOUND) {
1064 		error = mkrulepair(iph, rule, &sel, actp, nact,
1065 		    IPSEC_TYPE_OUTBOUND, afs, &rulep, index, spds);
1066 		if (error != 0)
1067 			goto fail;
1068 	}
1069 
1070 	if (rule->spd_rule_flags & SPD_RULE_FLAG_INBOUND) {
1071 		error = mkrulepair(iph, rule, &sel, actp, nact,
1072 		    IPSEC_TYPE_INBOUND, afs, &rulep, index, spds);
1073 		if (error != 0)
1074 			goto fail;
1075 	}
1076 
1077 	while ((--rulep) >= &rules[0]) {
1078 		ipsec_enter_policy(iph, rulep->pol, rulep->dir,
1079 		    spds->spds_netstack);
1080 	}
1081 	rw_exit(&iph->iph_lock);
1082 	if (itp != NULL)
1083 		mutex_exit(&itp->itp_lock);
1084 
1085 	ipsec_actvec_free(actp, nact);
1086 	spd_echo(q, mp);
1087 	return;
1088 
1089 fail:
1090 	rw_exit(&iph->iph_lock);
1091 	while ((--rulep) >= &rules[0]) {
1092 		IPPOL_REFRELE(rulep->pol, spds->spds_netstack);
1093 	}
1094 	ipsec_actvec_free(actp, nact);
1095 fail2:
1096 	if (itp != NULL) {
1097 		if (empty_itp)
1098 			itp->itp_flags = 0;
1099 		mutex_exit(&itp->itp_lock);
1100 	}
1101 	spdsock_error(q, mp, error, diag);
1102 }
1103 
1104 void
1105 spdsock_deleterule(queue_t *q, ipsec_policy_head_t *iph, mblk_t *mp,
1106     spd_ext_t **extv, ipsec_tun_pol_t *itp)
1107 {
1108 	ipsec_selkey_t sel;
1109 	struct spd_rule *rule = (struct spd_rule *)extv[SPD_EXT_RULE];
1110 	int err, diag = 0;
1111 	spdsock_t *ss = (spdsock_t *)q->q_ptr;
1112 	spd_stack_t	*spds = ss->spdsock_spds;
1113 
1114 	if (rule == NULL) {
1115 		spdsock_diag(q, mp, SPD_DIAGNOSTIC_NO_RULE_EXT);
1116 		return;
1117 	}
1118 
1119 	/*
1120 	 * Must enter itp_lock first to avoid deadlock.  See tun.c's
1121 	 * set_sec_simple() for the other case of itp_lock and iph_lock.
1122 	 */
1123 	if (itp != NULL)
1124 		mutex_enter(&itp->itp_lock);
1125 
1126 	if (rule->spd_rule_index != 0) {
1127 		if (ipsec_policy_delete_index(iph, rule->spd_rule_index,
1128 			spds->spds_netstack) != 0) {
1129 			err = ESRCH;
1130 			goto fail;
1131 		}
1132 	} else {
1133 		if (!spdsock_ext_to_sel(extv, &sel, &diag)) {
1134 			err = EINVAL;	/* diag already set... */
1135 			goto fail;
1136 		}
1137 
1138 		if ((rule->spd_rule_flags & SPD_RULE_FLAG_INBOUND) &&
1139 		    !ipsec_policy_delete(iph, &sel, IPSEC_TYPE_INBOUND,
1140 		    spds->spds_netstack)) {
1141 			err = ESRCH;
1142 			goto fail;
1143 		}
1144 
1145 		if ((rule->spd_rule_flags & SPD_RULE_FLAG_OUTBOUND) &&
1146 		    !ipsec_policy_delete(iph, &sel, IPSEC_TYPE_OUTBOUND,
1147 		    spds->spds_netstack)) {
1148 			err = ESRCH;
1149 			goto fail;
1150 		}
1151 	}
1152 
1153 	if (itp != NULL) {
1154 		ASSERT(iph == itp->itp_policy || iph == itp->itp_inactive);
1155 		rw_enter(&iph->iph_lock, RW_READER);
1156 		if (avl_numnodes(&iph->iph_rulebyid) == 0) {
1157 			if (iph == itp->itp_policy)
1158 				itp->itp_flags &= ~ITPF_PFLAGS;
1159 			else
1160 				itp->itp_flags &= ~ITPF_IFLAGS;
1161 		}
1162 		/* Can exit locks in any order. */
1163 		rw_exit(&iph->iph_lock);
1164 		mutex_exit(&itp->itp_lock);
1165 	}
1166 	spd_echo(q, mp);
1167 	return;
1168 fail:
1169 	if (itp != NULL)
1170 		mutex_exit(&itp->itp_lock);
1171 	spdsock_error(q, mp, err, diag);
1172 }
1173 
1174 /* Do NOT consume a reference to itp. */
1175 /* ARGSUSED */
1176 static void
1177 spdsock_flip_node(ipsec_tun_pol_t *itp, void *ignoreme, netstack_t *ns)
1178 {
1179 	mutex_enter(&itp->itp_lock);
1180 	ITPF_SWAP(itp->itp_flags);
1181 	ipsec_swap_policy(itp->itp_policy, itp->itp_inactive, ns);
1182 	mutex_exit(&itp->itp_lock);
1183 }
1184 
1185 void
1186 spdsock_flip(queue_t *q, mblk_t *mp, spd_if_t *tunname)
1187 {
1188 	char *tname;
1189 	ipsec_tun_pol_t *itp;
1190 	spdsock_t *ss = (spdsock_t *)q->q_ptr;
1191 	spd_stack_t	*spds = ss->spdsock_spds;
1192 
1193 	if (tunname != NULL) {
1194 		tname = (char *)tunname->spd_if_name;
1195 		if (*tname == '\0') {
1196 			/* can't fail */
1197 			ipsec_swap_global_policy(spds->spds_netstack);
1198 			itp_walk(spdsock_flip_node, NULL, spds->spds_netstack);
1199 		} else {
1200 			itp = get_tunnel_policy(tname, spds->spds_netstack);
1201 			if (itp == NULL) {
1202 				/* Better idea for "tunnel not found"? */
1203 				spdsock_error(q, mp, ESRCH, 0);
1204 				return;
1205 			}
1206 			spdsock_flip_node(itp, NULL, NULL);
1207 			ITP_REFRELE(itp, spds->spds_netstack);
1208 		}
1209 	} else {
1210 		ipsec_swap_global_policy(spds->spds_netstack);	/* can't fail */
1211 	}
1212 	spd_echo(q, mp);
1213 }
1214 
1215 /*
1216  * Unimplemented feature
1217  */
1218 /* ARGSUSED */
1219 static void
1220 spdsock_lookup(queue_t *q, ipsec_policy_head_t *iph, mblk_t *mp,
1221     spd_ext_t **extv, ipsec_tun_pol_t *itp)
1222 {
1223 	spdsock_error(q, mp, EINVAL, 0);
1224 }
1225 
1226 
1227 static mblk_t *
1228 spdsock_dump_ruleset(mblk_t *req, ipsec_policy_head_t *iph,
1229     uint32_t count, uint16_t error)
1230 {
1231 	size_t len = sizeof (spd_ruleset_ext_t) + sizeof (spd_msg_t);
1232 	spd_msg_t *msg;
1233 	spd_ruleset_ext_t *ruleset;
1234 	mblk_t *m = allocb(len, BPRI_HI);
1235 
1236 	ASSERT(RW_READ_HELD(&iph->iph_lock));
1237 
1238 	if (m == NULL) {
1239 		return (NULL);
1240 	}
1241 	msg = (spd_msg_t *)m->b_rptr;
1242 	ruleset = (spd_ruleset_ext_t *)(&msg[1]);
1243 
1244 	m->b_wptr = (uint8_t *)&ruleset[1];
1245 
1246 	*msg = *(spd_msg_t *)(req->b_rptr);
1247 	msg->spd_msg_len = SPD_8TO64(len);
1248 	msg->spd_msg_errno = error;
1249 
1250 	ruleset->spd_ruleset_len = SPD_8TO64(sizeof (*ruleset));
1251 	ruleset->spd_ruleset_type = SPD_EXT_RULESET;
1252 	ruleset->spd_ruleset_count = count;
1253 	ruleset->spd_ruleset_version = iph->iph_gen;
1254 	return (m);
1255 }
1256 
1257 static mblk_t *
1258 spdsock_dump_finish(spdsock_t *ss, int error)
1259 {
1260 	mblk_t *m;
1261 	ipsec_policy_head_t *iph = ss->spdsock_dump_head;
1262 	mblk_t *req = ss->spdsock_dump_req;
1263 	ipsec_tun_pol_t *itp, dummy;
1264 	spd_stack_t *spds = ss->spdsock_spds;
1265 	netstack_t *ns = spds->spds_netstack;
1266 	ipsec_stack_t *ipss = ns->netstack_ipsec;
1267 
1268 	ss->spdsock_dump_remaining_polheads--;
1269 	if (error == 0 && ss->spdsock_dump_remaining_polheads != 0) {
1270 		/* Attempt a respin with a new policy head. */
1271 		rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_READER);
1272 		/* NOTE:  No need for ITP_REF*() macros here. */
1273 		if (ipss->ipsec_tunnel_policy_gen > ss->spdsock_dump_tun_gen) {
1274 			/* Bail with EAGAIN. */
1275 			error = EAGAIN;
1276 		} else if (ss->spdsock_dump_name[0] == '\0') {
1277 			/* Just finished global, find first node. */
1278 			itp = (ipsec_tun_pol_t *)avl_first(
1279 			    &ipss->ipsec_tunnel_policies);
1280 		} else {
1281 			/*
1282 			 * We just finished current-named polhead, find
1283 			 * the next one.
1284 			 */
1285 			(void) strncpy(dummy.itp_name, ss->spdsock_dump_name,
1286 			    LIFNAMSIZ);
1287 			itp = (ipsec_tun_pol_t *)avl_find(
1288 			    &ipss->ipsec_tunnel_policies, &dummy, NULL);
1289 			ASSERT(itp != NULL);
1290 			itp = (ipsec_tun_pol_t *)AVL_NEXT(
1291 			    &ipss->ipsec_tunnel_policies, itp);
1292 			/* remaining_polheads should maintain this assertion. */
1293 			ASSERT(itp != NULL);
1294 		}
1295 		if (error == 0) {
1296 			(void) strncpy(ss->spdsock_dump_name, itp->itp_name,
1297 			    LIFNAMSIZ);
1298 			/* Reset other spdsock_dump thingies. */
1299 			IPPH_REFRELE(ss->spdsock_dump_head, ns);
1300 			if (ss->spdsock_dump_active) {
1301 				ss->spdsock_dump_tunnel =
1302 				    itp->itp_flags & ITPF_P_TUNNEL;
1303 				iph = itp->itp_policy;
1304 			} else {
1305 				ss->spdsock_dump_tunnel =
1306 				    itp->itp_flags & ITPF_I_TUNNEL;
1307 				iph = itp->itp_inactive;
1308 			}
1309 			IPPH_REFHOLD(iph);
1310 			rw_enter(&iph->iph_lock, RW_READER);
1311 			ss->spdsock_dump_head = iph;
1312 			ss->spdsock_dump_gen = iph->iph_gen;
1313 			ss->spdsock_dump_cur_type = 0;
1314 			ss->spdsock_dump_cur_af = IPSEC_AF_V4;
1315 			ss->spdsock_dump_cur_rule = NULL;
1316 			ss->spdsock_dump_count = 0;
1317 			ss->spdsock_dump_cur_chain = 0;
1318 			rw_exit(&iph->iph_lock);
1319 			rw_exit(&ipss->ipsec_tunnel_policy_lock);
1320 			/* And start again. */
1321 			return (spdsock_dump_next_record(ss));
1322 		}
1323 		rw_exit(&ipss->ipsec_tunnel_policy_lock);
1324 	}
1325 
1326 	rw_enter(&iph->iph_lock, RW_READER);
1327 	m = spdsock_dump_ruleset(req, iph, ss->spdsock_dump_count, error);
1328 	rw_exit(&iph->iph_lock);
1329 	IPPH_REFRELE(iph, ns);
1330 	ss->spdsock_dump_req = NULL;
1331 	freemsg(req);
1332 
1333 	return (m);
1334 }
1335 
1336 /*
1337  * Rule encoding functions.
1338  * We do a two-pass encode.
1339  * If base != NULL, fill in encoded rule part starting at base+offset.
1340  * Always return "offset" plus length of to-be-encoded data.
1341  */
1342 static uint_t
1343 spdsock_encode_typecode(uint8_t *base, uint_t offset, uint8_t type,
1344     uint8_t type_end, uint8_t code, uint8_t code_end)
1345 {
1346 	struct spd_typecode *tcp;
1347 
1348 	ASSERT(ALIGNED64(offset));
1349 
1350 	if (base != NULL) {
1351 		tcp = (struct spd_typecode *)(base + offset);
1352 		tcp->spd_typecode_len = SPD_8TO64(sizeof (*tcp));
1353 		tcp->spd_typecode_exttype = SPD_EXT_ICMP_TYPECODE;
1354 		tcp->spd_typecode_code = code;
1355 		tcp->spd_typecode_type = type;
1356 		tcp->spd_typecode_type_end = type_end;
1357 		tcp->spd_typecode_code_end = code_end;
1358 	}
1359 	offset += sizeof (*tcp);
1360 
1361 	ASSERT(ALIGNED64(offset));
1362 
1363 	return (offset);
1364 }
1365 
1366 static uint_t
1367 spdsock_encode_proto(uint8_t *base, uint_t offset, uint8_t proto)
1368 {
1369 	struct spd_proto *spp;
1370 
1371 	ASSERT(ALIGNED64(offset));
1372 
1373 	if (base != NULL) {
1374 		spp = (struct spd_proto *)(base + offset);
1375 		spp->spd_proto_len = SPD_8TO64(sizeof (*spp));
1376 		spp->spd_proto_exttype = SPD_EXT_PROTO;
1377 		spp->spd_proto_number = proto;
1378 		spp->spd_proto_reserved1 = 0;
1379 		spp->spd_proto_reserved2 = 0;
1380 	}
1381 	offset += sizeof (*spp);
1382 
1383 	ASSERT(ALIGNED64(offset));
1384 
1385 	return (offset);
1386 }
1387 
1388 static uint_t
1389 spdsock_encode_port(uint8_t *base, uint_t offset, uint16_t ext, uint16_t port)
1390 {
1391 	struct spd_portrange *spp;
1392 
1393 	ASSERT(ALIGNED64(offset));
1394 
1395 	if (base != NULL) {
1396 		spp = (struct spd_portrange *)(base + offset);
1397 		spp->spd_ports_len = SPD_8TO64(sizeof (*spp));
1398 		spp->spd_ports_exttype = ext;
1399 		spp->spd_ports_minport = port;
1400 		spp->spd_ports_maxport = port;
1401 	}
1402 	offset += sizeof (*spp);
1403 
1404 	ASSERT(ALIGNED64(offset));
1405 
1406 	return (offset);
1407 }
1408 
1409 static uint_t
1410 spdsock_encode_addr(uint8_t *base, uint_t offset, uint16_t ext,
1411     const ipsec_selkey_t *sel, const ipsec_addr_t *addr, uint_t pfxlen)
1412 {
1413 	struct spd_address *sae;
1414 	ipsec_addr_t *spdaddr;
1415 	uint_t start = offset;
1416 	uint_t addrlen;
1417 	uint_t af;
1418 
1419 	if (sel->ipsl_valid & IPSL_IPV4) {
1420 		af = AF_INET;
1421 		addrlen = IP_ADDR_LEN;
1422 	} else {
1423 		af = AF_INET6;
1424 		addrlen = IPV6_ADDR_LEN;
1425 	}
1426 
1427 	ASSERT(ALIGNED64(offset));
1428 
1429 	if (base != NULL) {
1430 		sae = (struct spd_address *)(base + offset);
1431 		sae->spd_address_exttype = ext;
1432 		sae->spd_address_af = af;
1433 		sae->spd_address_prefixlen = pfxlen;
1434 		sae->spd_address_reserved2 = 0;
1435 
1436 		spdaddr = (ipsec_addr_t *)(&sae[1]);
1437 		bcopy(addr, spdaddr, addrlen);
1438 	}
1439 	offset += sizeof (*sae);
1440 	addrlen = roundup(addrlen, sizeof (uint64_t));
1441 	offset += addrlen;
1442 
1443 	ASSERT(ALIGNED64(offset));
1444 
1445 	if (base != NULL)
1446 		sae->spd_address_len = SPD_8TO64(offset - start);
1447 	return (offset);
1448 }
1449 
1450 static uint_t
1451 spdsock_encode_sel(uint8_t *base, uint_t offset, const ipsec_sel_t *sel)
1452 {
1453 	const ipsec_selkey_t *selkey = &sel->ipsl_key;
1454 
1455 	if (selkey->ipsl_valid & IPSL_PROTOCOL)
1456 		offset = spdsock_encode_proto(base, offset, selkey->ipsl_proto);
1457 	if (selkey->ipsl_valid & IPSL_LOCAL_PORT)
1458 		offset = spdsock_encode_port(base, offset, SPD_EXT_LCLPORT,
1459 		    selkey->ipsl_lport);
1460 	if (selkey->ipsl_valid & IPSL_REMOTE_PORT)
1461 		offset = spdsock_encode_port(base, offset, SPD_EXT_REMPORT,
1462 		    selkey->ipsl_rport);
1463 	if (selkey->ipsl_valid & IPSL_REMOTE_ADDR)
1464 		offset = spdsock_encode_addr(base, offset, SPD_EXT_REMADDR,
1465 		    selkey, &selkey->ipsl_remote, selkey->ipsl_remote_pfxlen);
1466 	if (selkey->ipsl_valid & IPSL_LOCAL_ADDR)
1467 		offset = spdsock_encode_addr(base, offset, SPD_EXT_LCLADDR,
1468 		    selkey, &selkey->ipsl_local, selkey->ipsl_local_pfxlen);
1469 	if (selkey->ipsl_valid & IPSL_ICMP_TYPE) {
1470 		offset = spdsock_encode_typecode(base, offset,
1471 		    selkey->ipsl_icmp_type, selkey->ipsl_icmp_type_end,
1472 		    (selkey->ipsl_valid & IPSL_ICMP_CODE) ?
1473 			selkey->ipsl_icmp_code : 255,
1474 		    (selkey->ipsl_valid & IPSL_ICMP_CODE) ?
1475 			selkey->ipsl_icmp_code_end : 255);
1476 	}
1477 	return (offset);
1478 }
1479 
1480 static uint_t
1481 spdsock_encode_actattr(uint8_t *base, uint_t offset, uint32_t tag,
1482     uint32_t value)
1483 {
1484 	struct spd_attribute *attr;
1485 
1486 	ASSERT(ALIGNED64(offset));
1487 
1488 	if (base != NULL) {
1489 		attr = (struct spd_attribute *)(base + offset);
1490 		attr->spd_attr_tag = tag;
1491 		attr->spd_attr_value = value;
1492 	}
1493 	offset += sizeof (struct spd_attribute);
1494 
1495 	ASSERT(ALIGNED64(offset));
1496 
1497 	return (offset);
1498 }
1499 
1500 
1501 #define	EMIT(t, v) offset = spdsock_encode_actattr(base, offset, (t), (v))
1502 
1503 static uint_t
1504 spdsock_encode_action(uint8_t *base, uint_t offset, const ipsec_action_t *ap)
1505 {
1506 	const struct ipsec_act *act = &(ap->ipa_act);
1507 	uint_t flags;
1508 
1509 	EMIT(SPD_ATTR_EMPTY, 0);
1510 	switch (act->ipa_type) {
1511 	case IPSEC_ACT_DISCARD:
1512 	case IPSEC_ACT_REJECT:
1513 		EMIT(SPD_ATTR_TYPE, SPD_ACTTYPE_DROP);
1514 		break;
1515 	case IPSEC_ACT_BYPASS:
1516 	case IPSEC_ACT_CLEAR:
1517 		EMIT(SPD_ATTR_TYPE, SPD_ACTTYPE_PASS);
1518 		break;
1519 
1520 	case IPSEC_ACT_APPLY:
1521 		EMIT(SPD_ATTR_TYPE, SPD_ACTTYPE_IPSEC);
1522 		flags = 0;
1523 		if (act->ipa_apply.ipp_use_ah)
1524 			flags |= SPD_APPLY_AH;
1525 		if (act->ipa_apply.ipp_use_esp)
1526 			flags |= SPD_APPLY_ESP;
1527 		if (act->ipa_apply.ipp_use_espa)
1528 			flags |= SPD_APPLY_ESPA;
1529 		if (act->ipa_apply.ipp_use_se)
1530 			flags |= SPD_APPLY_SE;
1531 		if (act->ipa_apply.ipp_use_unique)
1532 			flags |= SPD_APPLY_UNIQUE;
1533 		EMIT(SPD_ATTR_FLAGS, flags);
1534 		if (flags & SPD_APPLY_AH) {
1535 			EMIT(SPD_ATTR_AH_AUTH, act->ipa_apply.ipp_auth_alg);
1536 			EMIT(SPD_ATTR_AH_MINBITS,
1537 			    act->ipa_apply.ipp_ah_minbits);
1538 			EMIT(SPD_ATTR_AH_MAXBITS,
1539 			    act->ipa_apply.ipp_ah_maxbits);
1540 		}
1541 		if (flags & SPD_APPLY_ESP) {
1542 			EMIT(SPD_ATTR_ESP_ENCR, act->ipa_apply.ipp_encr_alg);
1543 			EMIT(SPD_ATTR_ENCR_MINBITS,
1544 			    act->ipa_apply.ipp_espe_minbits);
1545 			EMIT(SPD_ATTR_ENCR_MAXBITS,
1546 			    act->ipa_apply.ipp_espe_maxbits);
1547 			if (flags & SPD_APPLY_ESPA) {
1548 				EMIT(SPD_ATTR_ESP_AUTH,
1549 				    act->ipa_apply.ipp_esp_auth_alg);
1550 				EMIT(SPD_ATTR_ESPA_MINBITS,
1551 				    act->ipa_apply.ipp_espa_minbits);
1552 				EMIT(SPD_ATTR_ESPA_MAXBITS,
1553 				    act->ipa_apply.ipp_espa_maxbits);
1554 			}
1555 		}
1556 		if (act->ipa_apply.ipp_km_proto != 0)
1557 			EMIT(SPD_ATTR_KM_PROTO, act->ipa_apply.ipp_km_proto);
1558 		if (act->ipa_apply.ipp_km_cookie != 0)
1559 			EMIT(SPD_ATTR_KM_PROTO, act->ipa_apply.ipp_km_cookie);
1560 		if (act->ipa_apply.ipp_replay_depth != 0)
1561 			EMIT(SPD_ATTR_REPLAY_DEPTH,
1562 			    act->ipa_apply.ipp_replay_depth);
1563 		/* Add more here */
1564 		break;
1565 	}
1566 
1567 	return (offset);
1568 }
1569 
1570 static uint_t
1571 spdsock_encode_action_list(uint8_t *base, uint_t offset,
1572     const ipsec_action_t *ap)
1573 {
1574 	struct spd_ext_actions *act;
1575 	uint_t nact = 0;
1576 	uint_t start = offset;
1577 
1578 	ASSERT(ALIGNED64(offset));
1579 
1580 	if (base != NULL) {
1581 		act = (struct spd_ext_actions *)(base + offset);
1582 		act->spd_actions_len = 0;
1583 		act->spd_actions_exttype = SPD_EXT_ACTION;
1584 		act->spd_actions_count = 0;
1585 		act->spd_actions_reserved = 0;
1586 	}
1587 
1588 	offset += sizeof (*act);
1589 
1590 	ASSERT(ALIGNED64(offset));
1591 
1592 	while (ap != NULL) {
1593 		offset = spdsock_encode_action(base, offset, ap);
1594 		ap = ap->ipa_next;
1595 		nact++;
1596 		if (ap != NULL) {
1597 			EMIT(SPD_ATTR_NEXT, 0);
1598 		}
1599 	}
1600 	EMIT(SPD_ATTR_END, 0);
1601 
1602 	ASSERT(ALIGNED64(offset));
1603 
1604 	if (base != NULL) {
1605 		act->spd_actions_count = nact;
1606 		act->spd_actions_len = SPD_8TO64(offset - start);
1607 	}
1608 
1609 	return (offset);
1610 }
1611 
1612 #undef EMIT
1613 
1614 /* ARGSUSED */
1615 static uint_t
1616 spdsock_rule_flags(uint_t dir, uint_t af)
1617 {
1618 	uint_t flags = 0;
1619 
1620 	if (dir == IPSEC_TYPE_INBOUND)
1621 		flags |= SPD_RULE_FLAG_INBOUND;
1622 	if (dir == IPSEC_TYPE_OUTBOUND)
1623 		flags |= SPD_RULE_FLAG_OUTBOUND;
1624 
1625 	return (flags);
1626 }
1627 
1628 
1629 static uint_t
1630 spdsock_encode_rule_head(uint8_t *base, uint_t offset, spd_msg_t *req,
1631     const ipsec_policy_t *rule, uint_t dir, uint_t af, char *name,
1632     boolean_t tunnel)
1633 {
1634 	struct spd_msg *spmsg;
1635 	struct spd_rule *spr;
1636 	spd_if_t *sid;
1637 
1638 	uint_t start = offset;
1639 
1640 	ASSERT(ALIGNED64(offset));
1641 
1642 	if (base != NULL) {
1643 		spmsg = (struct spd_msg *)(base + offset);
1644 		bzero(spmsg, sizeof (*spmsg));
1645 		spmsg->spd_msg_version = PF_POLICY_V1;
1646 		spmsg->spd_msg_type = SPD_DUMP;
1647 		spmsg->spd_msg_seq = req->spd_msg_seq;
1648 		spmsg->spd_msg_pid = req->spd_msg_pid;
1649 	}
1650 	offset += sizeof (struct spd_msg);
1651 
1652 	ASSERT(ALIGNED64(offset));
1653 
1654 	if (base != NULL) {
1655 		spr = (struct spd_rule *)(base + offset);
1656 		spr->spd_rule_type = SPD_EXT_RULE;
1657 		spr->spd_rule_priority = rule->ipsp_prio;
1658 		spr->spd_rule_flags = spdsock_rule_flags(dir, af);
1659 		if (tunnel)
1660 			spr->spd_rule_flags |= SPD_RULE_FLAG_TUNNEL;
1661 		spr->spd_rule_unused = 0;
1662 		spr->spd_rule_len = SPD_8TO64(sizeof (*spr));
1663 		spr->spd_rule_index = rule->ipsp_index;
1664 	}
1665 	offset += sizeof (struct spd_rule);
1666 
1667 	/*
1668 	 * If we have an interface name (i.e. if this policy head came from
1669 	 * a tunnel), add the SPD_EXT_TUN_NAME extension.
1670 	 */
1671 	if (name[0] != '\0') {
1672 
1673 		ASSERT(ALIGNED64(offset));
1674 
1675 		if (base != NULL) {
1676 			sid = (spd_if_t *)(base + offset);
1677 			sid->spd_if_exttype = SPD_EXT_TUN_NAME;
1678 			sid->spd_if_len = SPD_8TO64(sizeof (spd_if_t) +
1679 			    roundup((strlen(name) - 4), 8));
1680 			(void) strlcpy((char *)sid->spd_if_name, name,
1681 			    LIFNAMSIZ);
1682 		}
1683 
1684 		offset += sizeof (spd_if_t) + roundup((strlen(name) - 4), 8);
1685 	}
1686 
1687 	offset = spdsock_encode_sel(base, offset, rule->ipsp_sel);
1688 	offset = spdsock_encode_action_list(base, offset, rule->ipsp_act);
1689 
1690 	ASSERT(ALIGNED64(offset));
1691 
1692 	if (base != NULL) {
1693 		spmsg->spd_msg_len = SPD_8TO64(offset - start);
1694 	}
1695 	return (offset);
1696 }
1697 
1698 /* ARGSUSED */
1699 static mblk_t *
1700 spdsock_encode_rule(mblk_t *req, const ipsec_policy_t *rule,
1701     uint_t dir, uint_t af, char *name, boolean_t tunnel)
1702 {
1703 	mblk_t *m;
1704 	uint_t len;
1705 	spd_msg_t *mreq = (spd_msg_t *)req->b_rptr;
1706 
1707 	/*
1708 	 * Figure out how much space we'll need.
1709 	 */
1710 	len = spdsock_encode_rule_head(NULL, 0, mreq, rule, dir, af, name,
1711 	    tunnel);
1712 
1713 	/*
1714 	 * Allocate mblk.
1715 	 */
1716 	m = allocb(len, BPRI_HI);
1717 	if (m == NULL)
1718 		return (NULL);
1719 
1720 	/*
1721 	 * Fill it in..
1722 	 */
1723 	m->b_wptr = m->b_rptr + len;
1724 	bzero(m->b_rptr, len);
1725 	(void) spdsock_encode_rule_head(m->b_rptr, 0, mreq, rule, dir, af,
1726 	    name, tunnel);
1727 	return (m);
1728 }
1729 
1730 static ipsec_policy_t *
1731 spdsock_dump_next_in_chain(spdsock_t *ss, ipsec_policy_head_t *iph,
1732     ipsec_policy_t *cur)
1733 {
1734 	ASSERT(RW_READ_HELD(&iph->iph_lock));
1735 
1736 	ss->spdsock_dump_count++;
1737 	ss->spdsock_dump_cur_rule = cur->ipsp_hash.hash_next;
1738 	return (cur);
1739 }
1740 
1741 static ipsec_policy_t *
1742 spdsock_dump_next_rule(spdsock_t *ss, ipsec_policy_head_t *iph)
1743 {
1744 	ipsec_policy_t *cur;
1745 	ipsec_policy_root_t *ipr;
1746 	int chain, nchains, type, af;
1747 
1748 	ASSERT(RW_READ_HELD(&iph->iph_lock));
1749 
1750 	cur = ss->spdsock_dump_cur_rule;
1751 
1752 	if (cur != NULL)
1753 		return (spdsock_dump_next_in_chain(ss, iph, cur));
1754 
1755 	type = ss->spdsock_dump_cur_type;
1756 
1757 next:
1758 	chain = ss->spdsock_dump_cur_chain;
1759 	ipr = &iph->iph_root[type];
1760 	nchains = ipr->ipr_nchains;
1761 
1762 	while (chain < nchains) {
1763 		cur = ipr->ipr_hash[chain].hash_head;
1764 		chain++;
1765 		if (cur != NULL) {
1766 			ss->spdsock_dump_cur_chain = chain;
1767 			return (spdsock_dump_next_in_chain(ss, iph, cur));
1768 		}
1769 	}
1770 	ss->spdsock_dump_cur_chain = nchains;
1771 
1772 	af = ss->spdsock_dump_cur_af;
1773 	while (af < IPSEC_NAF) {
1774 		cur = ipr->ipr_nonhash[af];
1775 		af++;
1776 		if (cur != NULL) {
1777 			ss->spdsock_dump_cur_af = af;
1778 			return (spdsock_dump_next_in_chain(ss, iph, cur));
1779 		}
1780 	}
1781 
1782 	type++;
1783 	if (type >= IPSEC_NTYPES)
1784 		return (NULL);
1785 
1786 	ss->spdsock_dump_cur_chain = 0;
1787 	ss->spdsock_dump_cur_type = type;
1788 	ss->spdsock_dump_cur_af = IPSEC_AF_V4;
1789 	goto next;
1790 
1791 }
1792 
1793 static mblk_t *
1794 spdsock_dump_next_record(spdsock_t *ss)
1795 {
1796 	ipsec_policy_head_t *iph;
1797 	ipsec_policy_t *rule;
1798 	mblk_t *m;
1799 	mblk_t *req = ss->spdsock_dump_req;
1800 
1801 	iph = ss->spdsock_dump_head;
1802 
1803 	ASSERT(iph != NULL);
1804 
1805 	rw_enter(&iph->iph_lock, RW_READER);
1806 
1807 	if (iph->iph_gen != ss->spdsock_dump_gen) {
1808 		rw_exit(&iph->iph_lock);
1809 		return (spdsock_dump_finish(ss, EAGAIN));
1810 	}
1811 
1812 	rule = spdsock_dump_next_rule(ss, iph);
1813 
1814 	if (!rule) {
1815 		rw_exit(&iph->iph_lock);
1816 		return (spdsock_dump_finish(ss, 0));
1817 	}
1818 
1819 	m = spdsock_encode_rule(req, rule, ss->spdsock_dump_cur_type,
1820 	    ss->spdsock_dump_cur_af, ss->spdsock_dump_name,
1821 	    ss->spdsock_dump_tunnel);
1822 	rw_exit(&iph->iph_lock);
1823 
1824 	if (m == NULL)
1825 		return (spdsock_dump_finish(ss, ENOMEM));
1826 	return (m);
1827 }
1828 
1829 /*
1830  * Dump records until we run into flow-control back-pressure.
1831  */
1832 static void
1833 spdsock_dump_some(queue_t *q, spdsock_t *ss)
1834 {
1835 	mblk_t *m, *dataind;
1836 
1837 	while ((ss->spdsock_dump_req != NULL) && canputnext(q)) {
1838 		m = spdsock_dump_next_record(ss);
1839 		if (m == NULL)
1840 			return;
1841 		dataind = allocb(sizeof (struct T_data_req), BPRI_HI);
1842 		if (dataind == NULL) {
1843 			freemsg(m);
1844 			return;
1845 		}
1846 		dataind->b_cont = m;
1847 		dataind->b_wptr += sizeof (struct T_data_req);
1848 		((struct T_data_ind *)dataind->b_rptr)->PRIM_type = T_DATA_IND;
1849 		((struct T_data_ind *)dataind->b_rptr)->MORE_flag = 0;
1850 		dataind->b_datap->db_type = M_PROTO;
1851 		putnext(q, dataind);
1852 	}
1853 }
1854 
1855 /*
1856  * Start dumping.
1857  * Format a start-of-dump record, and set up the stream and kick the rsrv
1858  * procedure to continue the job..
1859  */
1860 /* ARGSUSED */
1861 static void
1862 spdsock_dump(queue_t *q, ipsec_policy_head_t *iph, mblk_t *mp)
1863 {
1864 	spdsock_t *ss = (spdsock_t *)q->q_ptr;
1865 	spd_stack_t *spds = ss->spdsock_spds;
1866 	netstack_t *ns = spds->spds_netstack;
1867 	ipsec_stack_t *ipss = ns->netstack_ipsec;
1868 	mblk_t *mr;
1869 
1870 	/* spdsock_parse() already NULL-terminated spdsock_dump_name. */
1871 	if (iph == ALL_ACTIVE_POLHEADS || iph == ALL_INACTIVE_POLHEADS) {
1872 		rw_enter(&ipss->ipsec_tunnel_policy_lock, RW_READER);
1873 		ss->spdsock_dump_remaining_polheads = 1 +
1874 		    avl_numnodes(&ipss->ipsec_tunnel_policies);
1875 		ss->spdsock_dump_tun_gen = ipss->ipsec_tunnel_policy_gen;
1876 		rw_exit(&ipss->ipsec_tunnel_policy_lock);
1877 		if (iph == ALL_ACTIVE_POLHEADS) {
1878 			iph = ipsec_system_policy(ns);
1879 			ss->spdsock_dump_active = B_TRUE;
1880 		} else {
1881 			iph = ipsec_inactive_policy(spds->spds_netstack);
1882 			ss->spdsock_dump_active = B_FALSE;
1883 		}
1884 		ASSERT(ss->spdsock_dump_name[0] == '\0');
1885 	} else {
1886 		ss->spdsock_dump_remaining_polheads = 1;
1887 	}
1888 
1889 	rw_enter(&iph->iph_lock, RW_READER);
1890 
1891 	mr = spdsock_dump_ruleset(mp, iph, 0, 0);
1892 
1893 	if (!mr) {
1894 		rw_exit(&iph->iph_lock);
1895 		spdsock_error(q, mp, ENOMEM, 0);
1896 		return;
1897 	}
1898 
1899 	ss->spdsock_dump_req = mp;
1900 	ss->spdsock_dump_head = iph;
1901 	ss->spdsock_dump_gen = iph->iph_gen;
1902 	ss->spdsock_dump_cur_type = 0;
1903 	ss->spdsock_dump_cur_af = IPSEC_AF_V4;
1904 	ss->spdsock_dump_cur_rule = NULL;
1905 	ss->spdsock_dump_count = 0;
1906 	ss->spdsock_dump_cur_chain = 0;
1907 	rw_exit(&iph->iph_lock);
1908 
1909 	qreply(q, mr);
1910 	qenable(OTHERQ(q));
1911 }
1912 
1913 /* Do NOT consume a reference to ITP. */
1914 void
1915 spdsock_clone_node(ipsec_tun_pol_t *itp, void *ep, netstack_t *ns)
1916 {
1917 	int *errptr = (int *)ep;
1918 
1919 	if (*errptr != 0)
1920 		return;	/* We've failed already for some reason. */
1921 	mutex_enter(&itp->itp_lock);
1922 	ITPF_CLONE(itp->itp_flags);
1923 	*errptr = ipsec_copy_polhead(itp->itp_policy, itp->itp_inactive, ns);
1924 	mutex_exit(&itp->itp_lock);
1925 }
1926 
1927 void
1928 spdsock_clone(queue_t *q, mblk_t *mp, spd_if_t *tunname)
1929 {
1930 	int error;
1931 	char *tname;
1932 	ipsec_tun_pol_t *itp;
1933 	spdsock_t *ss = (spdsock_t *)q->q_ptr;
1934 	spd_stack_t	*spds = ss->spdsock_spds;
1935 
1936 	if (tunname != NULL) {
1937 		tname = (char *)tunname->spd_if_name;
1938 		if (*tname == '\0') {
1939 			error = ipsec_clone_system_policy(spds->spds_netstack);
1940 			if (error == 0)
1941 				itp_walk(spdsock_clone_node, &error,
1942 				    spds->spds_netstack);
1943 		} else {
1944 			itp = get_tunnel_policy(tname, spds->spds_netstack);
1945 			if (itp == NULL) {
1946 				spdsock_error(q, mp, ENOENT, 0);
1947 				return;
1948 			}
1949 			spdsock_clone_node(itp, &error, NULL);
1950 			ITP_REFRELE(itp, spds->spds_netstack);
1951 		}
1952 	} else {
1953 		error = ipsec_clone_system_policy(spds->spds_netstack);
1954 	}
1955 
1956 	if (error != 0)
1957 		spdsock_error(q, mp, error, 0);
1958 	else
1959 		spd_echo(q, mp);
1960 }
1961 
1962 /*
1963  * Process a SPD_ALGLIST request. The caller expects separate alg entries
1964  * for AH authentication, ESP authentication, and ESP encryption.
1965  * The same distinction is then used when setting the min and max key
1966  * sizes when defining policies.
1967  */
1968 
1969 #define	SPDSOCK_AH_AUTH		0
1970 #define	SPDSOCK_ESP_AUTH	1
1971 #define	SPDSOCK_ESP_ENCR	2
1972 #define	SPDSOCK_NTYPES		3
1973 
1974 static const uint_t algattr[SPDSOCK_NTYPES] = {
1975 	SPD_ATTR_AH_AUTH,
1976 	SPD_ATTR_ESP_AUTH,
1977 	SPD_ATTR_ESP_ENCR
1978 };
1979 static const uint_t minbitsattr[SPDSOCK_NTYPES] = {
1980 	SPD_ATTR_AH_MINBITS,
1981 	SPD_ATTR_ESPA_MINBITS,
1982 	SPD_ATTR_ENCR_MINBITS
1983 };
1984 static const uint_t maxbitsattr[SPDSOCK_NTYPES] = {
1985 	SPD_ATTR_AH_MAXBITS,
1986 	SPD_ATTR_ESPA_MAXBITS,
1987 	SPD_ATTR_ENCR_MAXBITS
1988 };
1989 static const uint_t defbitsattr[SPDSOCK_NTYPES] = {
1990 	SPD_ATTR_AH_DEFBITS,
1991 	SPD_ATTR_ESPA_DEFBITS,
1992 	SPD_ATTR_ENCR_DEFBITS
1993 };
1994 static const uint_t incrbitsattr[SPDSOCK_NTYPES] = {
1995 	SPD_ATTR_AH_INCRBITS,
1996 	SPD_ATTR_ESPA_INCRBITS,
1997 	SPD_ATTR_ENCR_INCRBITS
1998 };
1999 
2000 #define	ATTRPERALG	6	/* fixed attributes per algs */
2001 
2002 void
2003 spdsock_alglist(queue_t *q, mblk_t *mp)
2004 {
2005 	uint_t algtype;
2006 	uint_t algidx;
2007 	uint_t algcount;
2008 	uint_t size;
2009 	mblk_t *m;
2010 	uint8_t *cur;
2011 	spd_msg_t *msg;
2012 	struct spd_ext_actions *act;
2013 	struct spd_attribute *attr;
2014 	spdsock_t *ss = (spdsock_t *)q->q_ptr;
2015 	spd_stack_t	*spds = ss->spdsock_spds;
2016 	ipsec_stack_t	*ipss = spds->spds_netstack->netstack_ipsec;
2017 
2018 	mutex_enter(&ipss->ipsec_alg_lock);
2019 	/*
2020 	 * The SPD client expects to receive separate entries for
2021 	 * AH authentication and ESP authentication supported algorithms.
2022 	 *
2023 	 * Don't return the "any" algorithms, if defined, as no
2024 	 * kernel policies can be set for these algorithms.
2025 	 */
2026 	algcount = 2 * ipss->ipsec_nalgs[IPSEC_ALG_AUTH] +
2027 	    ipss->ipsec_nalgs[IPSEC_ALG_ENCR];
2028 
2029 	if (ipss->ipsec_alglists[IPSEC_ALG_AUTH][SADB_AALG_NONE] != NULL)
2030 		algcount--;
2031 	if (ipss->ipsec_alglists[IPSEC_ALG_ENCR][SADB_EALG_NONE] != NULL)
2032 		algcount--;
2033 
2034 	/*
2035 	 * For each algorithm, we encode:
2036 	 * ALG / MINBITS / MAXBITS / DEFBITS / INCRBITS / {END, NEXT}
2037 	 */
2038 
2039 	size = sizeof (spd_msg_t) + sizeof (struct spd_ext_actions) +
2040 	    ATTRPERALG * sizeof (struct spd_attribute) * algcount;
2041 
2042 	ASSERT(ALIGNED64(size));
2043 
2044 	m = allocb(size, BPRI_HI);
2045 	if (m == NULL) {
2046 		mutex_exit(&ipss->ipsec_alg_lock);
2047 		spdsock_error(q, mp, ENOMEM, 0);
2048 		return;
2049 	}
2050 
2051 	m->b_wptr = m->b_rptr + size;
2052 	cur = m->b_rptr;
2053 
2054 	msg = (spd_msg_t *)cur;
2055 	bcopy(mp->b_rptr, cur, sizeof (*msg));
2056 
2057 	msg->spd_msg_len = SPD_8TO64(size);
2058 	msg->spd_msg_errno = 0;
2059 	msg->spd_msg_diagnostic = 0;
2060 
2061 	cur += sizeof (*msg);
2062 
2063 	act = (struct spd_ext_actions *)cur;
2064 	cur += sizeof (*act);
2065 
2066 	act->spd_actions_len = SPD_8TO64(size - sizeof (spd_msg_t));
2067 	act->spd_actions_exttype = SPD_EXT_ACTION;
2068 	act->spd_actions_count = algcount;
2069 	act->spd_actions_reserved = 0;
2070 
2071 	attr = (struct spd_attribute *)cur;
2072 
2073 #define	EMIT(tag, value) {					\
2074 		attr->spd_attr_tag = (tag); 			\
2075 		attr->spd_attr_value = (value); 		\
2076 		attr++;			  			\
2077 	}
2078 
2079 	/*
2080 	 * If you change the number of EMIT's here, change
2081 	 * ATTRPERALG above to match
2082 	 */
2083 #define	EMITALGATTRS(_type) {					\
2084 		EMIT(algattr[_type], algid); 		/* 1 */	\
2085 		EMIT(minbitsattr[_type], minbits);	/* 2 */	\
2086 		EMIT(maxbitsattr[_type], maxbits);	/* 3 */	\
2087 		EMIT(defbitsattr[_type], defbits);	/* 4 */	\
2088 		EMIT(incrbitsattr[_type], incr);	/* 5 */	\
2089 		EMIT(SPD_ATTR_NEXT, 0);			/* 6 */	\
2090 	}
2091 
2092 	for (algtype = 0; algtype < IPSEC_NALGTYPES; algtype++) {
2093 		for (algidx = 0; algidx < ipss->ipsec_nalgs[algtype];
2094 		    algidx++) {
2095 			int algid = ipss->ipsec_sortlist[algtype][algidx];
2096 			ipsec_alginfo_t *alg =
2097 			    ipss->ipsec_alglists[algtype][algid];
2098 			uint_t minbits = alg->alg_minbits;
2099 			uint_t maxbits = alg->alg_maxbits;
2100 			uint_t defbits = alg->alg_default_bits;
2101 			uint_t incr = alg->alg_increment;
2102 
2103 			if (algtype == IPSEC_ALG_AUTH) {
2104 				if (algid == SADB_AALG_NONE)
2105 					continue;
2106 				EMITALGATTRS(SPDSOCK_AH_AUTH);
2107 				EMITALGATTRS(SPDSOCK_ESP_AUTH);
2108 			} else {
2109 				if (algid == SADB_EALG_NONE)
2110 					continue;
2111 				ASSERT(algtype == IPSEC_ALG_ENCR);
2112 				EMITALGATTRS(SPDSOCK_ESP_ENCR);
2113 			}
2114 		}
2115 	}
2116 
2117 	mutex_exit(&ipss->ipsec_alg_lock);
2118 
2119 #undef EMITALGATTRS
2120 #undef EMIT
2121 #undef ATTRPERALG
2122 
2123 	attr--;
2124 	attr->spd_attr_tag = SPD_ATTR_END;
2125 
2126 	freemsg(mp);
2127 	qreply(q, m);
2128 }
2129 
2130 /*
2131  * Process a SPD_DUMPALGS request.
2132  */
2133 
2134 #define	ATTRPERALG	7	/* fixed attributes per algs */
2135 
2136 void
2137 spdsock_dumpalgs(queue_t *q, mblk_t *mp)
2138 {
2139 	uint_t algtype;
2140 	uint_t algidx;
2141 	uint_t size;
2142 	mblk_t *m;
2143 	uint8_t *cur;
2144 	spd_msg_t *msg;
2145 	struct spd_ext_actions *act;
2146 	struct spd_attribute *attr;
2147 	ipsec_alginfo_t *alg;
2148 	uint_t algid;
2149 	uint_t i;
2150 	uint_t alg_size;
2151 	spdsock_t *ss = (spdsock_t *)q->q_ptr;
2152 	spd_stack_t	*spds = ss->spdsock_spds;
2153 	ipsec_stack_t	*ipss = spds->spds_netstack->netstack_ipsec;
2154 
2155 	mutex_enter(&ipss->ipsec_alg_lock);
2156 
2157 	/*
2158 	 * For each algorithm, we encode:
2159 	 * ALG / MINBITS / MAXBITS / DEFBITS / INCRBITS / {END, NEXT}
2160 	 *
2161 	 * ALG_ID / ALG_PROTO / ALG_INCRBITS / ALG_NKEYSIZES / ALG_KEYSIZE*
2162 	 * ALG_NBLOCKSIZES / ALG_BLOCKSIZE* / ALG_MECHNAME / {END, NEXT}
2163 	 */
2164 
2165 	/*
2166 	 * Compute the size of the SPD message.
2167 	 */
2168 	size = sizeof (spd_msg_t) + sizeof (struct spd_ext_actions);
2169 
2170 	for (algtype = 0; algtype < IPSEC_NALGTYPES; algtype++) {
2171 		for (algidx = 0; algidx < ipss->ipsec_nalgs[algtype];
2172 		    algidx++) {
2173 			algid = ipss->ipsec_sortlist[algtype][algidx];
2174 			alg = ipss->ipsec_alglists[algtype][algid];
2175 			alg_size = sizeof (struct spd_attribute) *
2176 			    (ATTRPERALG + alg->alg_nkey_sizes +
2177 			    alg->alg_nblock_sizes) + CRYPTO_MAX_MECH_NAME;
2178 			size += alg_size;
2179 		}
2180 	}
2181 
2182 	ASSERT(ALIGNED64(size));
2183 
2184 	m = allocb(size, BPRI_HI);
2185 	if (m == NULL) {
2186 		mutex_exit(&ipss->ipsec_alg_lock);
2187 		spdsock_error(q, mp, ENOMEM, 0);
2188 		return;
2189 	}
2190 
2191 	m->b_wptr = m->b_rptr + size;
2192 	cur = m->b_rptr;
2193 
2194 	msg = (spd_msg_t *)cur;
2195 	bcopy(mp->b_rptr, cur, sizeof (*msg));
2196 
2197 	msg->spd_msg_len = SPD_8TO64(size);
2198 	msg->spd_msg_errno = 0;
2199 	msg->spd_msg_diagnostic = 0;
2200 
2201 	cur += sizeof (*msg);
2202 
2203 	act = (struct spd_ext_actions *)cur;
2204 	cur += sizeof (*act);
2205 
2206 	act->spd_actions_len = SPD_8TO64(size - sizeof (spd_msg_t));
2207 	act->spd_actions_exttype = SPD_EXT_ACTION;
2208 	act->spd_actions_count = ipss->ipsec_nalgs[IPSEC_ALG_AUTH] +
2209 	    ipss->ipsec_nalgs[IPSEC_ALG_ENCR];
2210 	act->spd_actions_reserved = 0;
2211 
2212 	attr = (struct spd_attribute *)cur;
2213 
2214 #define	EMIT(tag, value) {					\
2215 		attr->spd_attr_tag = (tag); 			\
2216 		attr->spd_attr_value = (value); 		\
2217 		attr++;			  			\
2218 	}
2219 
2220 	for (algtype = 0; algtype < IPSEC_NALGTYPES; algtype++) {
2221 		for (algidx = 0; algidx < ipss->ipsec_nalgs[algtype];
2222 		    algidx++) {
2223 
2224 			algid = ipss->ipsec_sortlist[algtype][algidx];
2225 			alg = ipss->ipsec_alglists[algtype][algid];
2226 
2227 			/*
2228 			 * If you change the number of EMIT's here, change
2229 			 * ATTRPERALG above to match
2230 			 */
2231 			EMIT(SPD_ATTR_ALG_ID, algid);
2232 			EMIT(SPD_ATTR_ALG_PROTO, algproto[algtype]);
2233 			EMIT(SPD_ATTR_ALG_INCRBITS, alg->alg_increment);
2234 
2235 			EMIT(SPD_ATTR_ALG_NKEYSIZES, alg->alg_nkey_sizes);
2236 			for (i = 0; i < alg->alg_nkey_sizes; i++)
2237 				EMIT(SPD_ATTR_ALG_KEYSIZE,
2238 				    alg->alg_key_sizes[i]);
2239 
2240 			EMIT(SPD_ATTR_ALG_NBLOCKSIZES, alg->alg_nblock_sizes);
2241 			for (i = 0; i < alg->alg_nblock_sizes; i++)
2242 				EMIT(SPD_ATTR_ALG_BLOCKSIZE,
2243 				    alg->alg_block_sizes[i]);
2244 
2245 			EMIT(SPD_ATTR_ALG_MECHNAME, CRYPTO_MAX_MECH_NAME);
2246 			bcopy(alg->alg_mech_name, attr, CRYPTO_MAX_MECH_NAME);
2247 			attr = (struct spd_attribute *)((char *)attr +
2248 			    CRYPTO_MAX_MECH_NAME);
2249 
2250 			EMIT(SPD_ATTR_NEXT, 0);
2251 		}
2252 	}
2253 
2254 	mutex_exit(&ipss->ipsec_alg_lock);
2255 
2256 #undef EMITALGATTRS
2257 #undef EMIT
2258 #undef ATTRPERALG
2259 
2260 	attr--;
2261 	attr->spd_attr_tag = SPD_ATTR_END;
2262 
2263 	freemsg(mp);
2264 	qreply(q, m);
2265 }
2266 
2267 /*
2268  * Do the actual work of processing an SPD_UPDATEALGS request. Can
2269  * be invoked either once IPsec is loaded on a cached request, or
2270  * when a request is received while IPsec is loaded.
2271  */
2272 static void
2273 spdsock_do_updatealg(spd_ext_t *extv[], int *diag, spd_stack_t *spds)
2274 {
2275 	struct spd_ext_actions *actp;
2276 	struct spd_attribute *attr, *endattr;
2277 	uint64_t *start, *end;
2278 	ipsec_alginfo_t *alg = NULL;
2279 	ipsec_algtype_t alg_type = 0;
2280 	boolean_t skip_alg = B_TRUE, doing_proto = B_FALSE;
2281 	uint_t i, cur_key, cur_block, algid;
2282 
2283 	*diag = -1;
2284 	ASSERT(MUTEX_HELD(&spds->spds_alg_lock));
2285 
2286 	/* parse the message, building the list of algorithms */
2287 
2288 	actp = (struct spd_ext_actions *)extv[SPD_EXT_ACTION];
2289 	if (actp == NULL) {
2290 		*diag = SPD_DIAGNOSTIC_NO_ACTION_EXT;
2291 		return;
2292 	}
2293 
2294 	start = (uint64_t *)actp;
2295 	end = (start + actp->spd_actions_len);
2296 	endattr = (struct spd_attribute *)end;
2297 	attr = (struct spd_attribute *)&actp[1];
2298 
2299 	bzero(spds->spds_algs, IPSEC_NALGTYPES * IPSEC_MAX_ALGS *
2300 	    sizeof (ipsec_alginfo_t *));
2301 
2302 	alg = kmem_zalloc(sizeof (*alg), KM_SLEEP);
2303 
2304 #define	ALG_KEY_SIZES(a)   (((a)->alg_nkey_sizes + 1) * sizeof (uint16_t))
2305 #define	ALG_BLOCK_SIZES(a) (((a)->alg_nblock_sizes + 1) * sizeof (uint16_t))
2306 
2307 	while (attr < endattr) {
2308 		switch (attr->spd_attr_tag) {
2309 		case SPD_ATTR_NOP:
2310 		case SPD_ATTR_EMPTY:
2311 			break;
2312 		case SPD_ATTR_END:
2313 			attr = endattr;
2314 			/* FALLTHRU */
2315 		case SPD_ATTR_NEXT:
2316 			if (doing_proto) {
2317 				doing_proto = B_FALSE;
2318 				break;
2319 			}
2320 			if (skip_alg) {
2321 				ipsec_alg_free(alg);
2322 			} else {
2323 				ipsec_alg_free(
2324 				    spds->spds_algs[alg_type][alg->alg_id]);
2325 				spds->spds_algs[alg_type][alg->alg_id] =
2326 				    alg;
2327 			}
2328 			alg = kmem_zalloc(sizeof (*alg), KM_SLEEP);
2329 			break;
2330 
2331 		case SPD_ATTR_ALG_ID:
2332 			if (attr->spd_attr_value >= IPSEC_MAX_ALGS) {
2333 				ss1dbg(spds, ("spdsock_do_updatealg: "
2334 				    "invalid alg id %d\n",
2335 				    attr->spd_attr_value));
2336 				*diag = SPD_DIAGNOSTIC_ALG_ID_RANGE;
2337 				goto bail;
2338 			}
2339 			alg->alg_id = attr->spd_attr_value;
2340 			break;
2341 
2342 		case SPD_ATTR_ALG_PROTO:
2343 			/* find the alg type */
2344 			for (i = 0; i < NALGPROTOS; i++)
2345 				if (algproto[i] == attr->spd_attr_value)
2346 					break;
2347 			skip_alg = (i == NALGPROTOS);
2348 			if (!skip_alg)
2349 				alg_type = i;
2350 			break;
2351 
2352 		case SPD_ATTR_ALG_INCRBITS:
2353 			alg->alg_increment = attr->spd_attr_value;
2354 			break;
2355 
2356 		case SPD_ATTR_ALG_NKEYSIZES:
2357 			if (alg->alg_key_sizes != NULL) {
2358 				kmem_free(alg->alg_key_sizes,
2359 				    ALG_KEY_SIZES(alg));
2360 			}
2361 			alg->alg_nkey_sizes = attr->spd_attr_value;
2362 			/*
2363 			 * Allocate room for the trailing zero key size
2364 			 * value as well.
2365 			 */
2366 			alg->alg_key_sizes = kmem_zalloc(ALG_KEY_SIZES(alg),
2367 			    KM_SLEEP);
2368 			cur_key = 0;
2369 			break;
2370 
2371 		case SPD_ATTR_ALG_KEYSIZE:
2372 			if (alg->alg_key_sizes == NULL ||
2373 			    cur_key >= alg->alg_nkey_sizes) {
2374 				ss1dbg(spds, ("spdsock_do_updatealg: "
2375 					"too many key sizes\n"));
2376 				*diag = SPD_DIAGNOSTIC_ALG_NUM_KEY_SIZES;
2377 				goto bail;
2378 			}
2379 			alg->alg_key_sizes[cur_key++] = attr->spd_attr_value;
2380 			break;
2381 
2382 		case SPD_ATTR_ALG_NBLOCKSIZES:
2383 			if (alg->alg_block_sizes != NULL) {
2384 				kmem_free(alg->alg_block_sizes,
2385 				    ALG_BLOCK_SIZES(alg));
2386 			}
2387 			alg->alg_nblock_sizes = attr->spd_attr_value;
2388 			/*
2389 			 * Allocate room for the trailing zero block size
2390 			 * value as well.
2391 			 */
2392 			alg->alg_block_sizes = kmem_zalloc(ALG_BLOCK_SIZES(alg),
2393 			    KM_SLEEP);
2394 			cur_block = 0;
2395 			break;
2396 
2397 		case SPD_ATTR_ALG_BLOCKSIZE:
2398 			if (alg->alg_block_sizes == NULL ||
2399 			    cur_block >= alg->alg_nblock_sizes) {
2400 				ss1dbg(spds, ("spdsock_do_updatealg: "
2401 					"too many block sizes\n"));
2402 				*diag = SPD_DIAGNOSTIC_ALG_NUM_BLOCK_SIZES;
2403 				goto bail;
2404 			}
2405 			alg->alg_block_sizes[cur_block++] =
2406 			    attr->spd_attr_value;
2407 			break;
2408 
2409 		case SPD_ATTR_ALG_MECHNAME: {
2410 			char *mech_name;
2411 
2412 			if (attr->spd_attr_value > CRYPTO_MAX_MECH_NAME) {
2413 				ss1dbg(spds, ("spdsock_do_updatealg: "
2414 					"mech name too long\n"));
2415 				*diag = SPD_DIAGNOSTIC_ALG_MECH_NAME_LEN;
2416 				goto bail;
2417 			}
2418 			mech_name = (char *)(attr + 1);
2419 			bcopy(mech_name, alg->alg_mech_name,
2420 			    attr->spd_attr_value);
2421 			alg->alg_mech_name[CRYPTO_MAX_MECH_NAME-1] = '\0';
2422 			attr = (struct spd_attribute *)((char *)attr +
2423 			    attr->spd_attr_value);
2424 			break;
2425 		}
2426 
2427 		case SPD_ATTR_PROTO_ID:
2428 			doing_proto = B_TRUE;
2429 			for (i = 0; i < NALGPROTOS; i++) {
2430 				if (algproto[i] == attr->spd_attr_value) {
2431 					alg_type = i;
2432 					break;
2433 				}
2434 			}
2435 			break;
2436 
2437 		case SPD_ATTR_PROTO_EXEC_MODE:
2438 			if (!doing_proto)
2439 				break;
2440 			for (i = 0; i < NEXECMODES; i++) {
2441 				if (execmodes[i] == attr->spd_attr_value) {
2442 					spds->spds_algs_exec_mode[alg_type] = i;
2443 					break;
2444 				}
2445 			}
2446 			break;
2447 		}
2448 		attr++;
2449 	}
2450 
2451 #undef	ALG_KEY_SIZES
2452 #undef	ALG_BLOCK_SIZES
2453 
2454 	/* update the algorithm tables */
2455 	spdsock_merge_algs(spds);
2456 bail:
2457 	/* cleanup */
2458 	ipsec_alg_free(alg);
2459 	for (alg_type = 0; alg_type < IPSEC_NALGTYPES; alg_type++)
2460 	    for (algid = 0; algid < IPSEC_MAX_ALGS; algid++)
2461 		if (spds->spds_algs[alg_type][algid] != NULL)
2462 		    ipsec_alg_free(spds->spds_algs[alg_type][algid]);
2463 }
2464 
2465 /*
2466  * Process an SPD_UPDATEALGS request. If IPsec is not loaded, queue
2467  * the request until IPsec loads. If IPsec is loaded, act on it
2468  * immediately.
2469  */
2470 
2471 static void
2472 spdsock_updatealg(queue_t *q, mblk_t *mp, spd_ext_t *extv[])
2473 {
2474 	spdsock_t *ss = (spdsock_t *)q->q_ptr;
2475 	spd_stack_t	*spds = ss->spdsock_spds;
2476 	ipsec_stack_t	*ipss = spds->spds_netstack->netstack_ipsec;
2477 
2478 	if (!ipsec_loaded(ipss)) {
2479 		/*
2480 		 * IPsec is not loaded, save request and return nicely,
2481 		 * the message will be processed once IPsec loads.
2482 		 */
2483 		mblk_t *new_mp;
2484 
2485 		/* last update message wins */
2486 		if ((new_mp = copymsg(mp)) == NULL) {
2487 			spdsock_error(q, mp, ENOMEM, 0);
2488 			return;
2489 		}
2490 		mutex_enter(&spds->spds_alg_lock);
2491 		bcopy(extv, spds->spds_extv_algs,
2492 		    sizeof (spd_ext_t *) * (SPD_EXT_MAX + 1));
2493 		if (spds->spds_mp_algs != NULL)
2494 			freemsg(spds->spds_mp_algs);
2495 		spds->spds_mp_algs = mp;
2496 		spds->spds_algs_pending = B_TRUE;
2497 		mutex_exit(&spds->spds_alg_lock);
2498 
2499 		spd_echo(q, new_mp);
2500 	} else {
2501 		/*
2502 		 * IPsec is loaded, act on the message immediately.
2503 		 */
2504 		int diag;
2505 
2506 		mutex_enter(&spds->spds_alg_lock);
2507 		spdsock_do_updatealg(extv, &diag, spds);
2508 		mutex_exit(&spds->spds_alg_lock);
2509 		if (diag == -1)
2510 			spd_echo(q, mp);
2511 		else
2512 			spdsock_diag(q, mp, diag);
2513 	}
2514 }
2515 
2516 /*
2517  * With a reference-held ill, dig down and find an instance of "tun", and
2518  * assign its tunnel policy pointer, while reference-holding it.  Also,
2519  * release ill's refrence when finished.
2520  *
2521  * We'll be messing with q_next, so be VERY careful.
2522  */
2523 static void
2524 find_tun_and_set_itp(ill_t *ill, ipsec_tun_pol_t *itp)
2525 {
2526 	queue_t *q;
2527 	tun_t *tun;
2528 
2529 	/* Don't bother if this ill is going away. */
2530 	if (ill->ill_flags & ILL_CONDEMNED) {
2531 		ill_refrele(ill);
2532 		return;
2533 	}
2534 
2535 
2536 	q = ill->ill_wq;
2537 	claimstr(q);	/* Lighter-weight than freezestr(). */
2538 
2539 	do {
2540 		/* Use strcmp() because "tun" is bounded. */
2541 		if (strcmp(q->q_qinfo->qi_minfo->mi_idname, "tun") == 0) {
2542 			/* Aha!  Got it. */
2543 			tun = (tun_t *)q->q_ptr;
2544 			if (tun != NULL) {
2545 				mutex_enter(&tun->tun_lock);
2546 				if (tun->tun_itp != itp) {
2547 					ASSERT(tun->tun_itp == NULL);
2548 					ITP_REFHOLD(itp);
2549 					tun->tun_itp = itp;
2550 				}
2551 				mutex_exit(&tun->tun_lock);
2552 				goto release_and_return;
2553 			}
2554 			/*
2555 			 * Else assume this is some other module named "tun"
2556 			 * and move on, hoping we find one that actually has
2557 			 * something in q_ptr.
2558 			 */
2559 		}
2560 		q = q->q_next;
2561 	} while (q != NULL);
2562 
2563 release_and_return:
2564 	releasestr(ill->ill_wq);
2565 	ill_refrele(ill);
2566 }
2567 
2568 /*
2569  * Sort through the mess of polhead options to retrieve an appropriate one.
2570  * Returns NULL if we send an spdsock error.  Returns a valid pointer if we
2571  * found a valid polhead.  Returns ALL_ACTIVE_POLHEADS (aka. -1) or
2572  * ALL_INACTIVE_POLHEADS (aka. -2) if the operation calls for the operation to
2573  * act on ALL policy heads.
2574  */
2575 static ipsec_policy_head_t *
2576 get_appropriate_polhead(queue_t *q, mblk_t *mp, spd_if_t *tunname, int spdid,
2577     int msgtype, ipsec_tun_pol_t **itpp)
2578 {
2579 	ipsec_tun_pol_t *itp;
2580 	ipsec_policy_head_t *iph;
2581 	int errno;
2582 	char *tname;
2583 	boolean_t active;
2584 	spdsock_t *ss = (spdsock_t *)q->q_ptr;
2585 	spd_stack_t	*spds = ss->spdsock_spds;
2586 	netstack_t	*ns = spds->spds_netstack;
2587 	uint64_t gen;	/* Placeholder */
2588 	ill_t *v4, *v6;
2589 
2590 	active = (spdid == SPD_ACTIVE);
2591 	*itpp = NULL;
2592 	if (!active && spdid != SPD_STANDBY) {
2593 		spdsock_diag(q, mp, SPD_DIAGNOSTIC_BAD_SPDID);
2594 		return (NULL);
2595 	}
2596 
2597 	if (tunname != NULL) {
2598 		/* Acting on a tunnel's SPD. */
2599 		tname = (char *)tunname->spd_if_name;
2600 		if (*tname == '\0') {
2601 			/* Handle all-polhead cases here. */
2602 			if (msgtype != SPD_FLUSH && msgtype != SPD_DUMP) {
2603 				spdsock_diag(q, mp,
2604 				    SPD_DIAGNOSTIC_NOT_GLOBAL_OP);
2605 				return (NULL);
2606 			}
2607 			return (active ? ALL_ACTIVE_POLHEADS :
2608 			    ALL_INACTIVE_POLHEADS);
2609 		}
2610 
2611 		itp = get_tunnel_policy(tname, spds->spds_netstack);
2612 		if (itp == NULL) {
2613 			if (msgtype != SPD_ADDRULE) {
2614 				/* "Tunnel not found" */
2615 				spdsock_error(q, mp, ENOENT, 0);
2616 				return (NULL);
2617 			}
2618 
2619 			errno = 0;
2620 			itp = create_tunnel_policy(tname, &errno, &gen,
2621 			    spds->spds_netstack);
2622 			if (itp == NULL) {
2623 				/*
2624 				 * Something very bad happened, most likely
2625 				 * ENOMEM.  Return an indicator.
2626 				 */
2627 				spdsock_error(q, mp, errno, 0);
2628 				return (NULL);
2629 			}
2630 		}
2631 		/*
2632 		 * Troll the plumbed tunnels and see if we have a
2633 		 * match.  We need to do this always in case we add
2634 		 * policy AFTER plumbing a tunnel.
2635 		 */
2636 		v4 = ill_lookup_on_name(tname, B_FALSE, B_FALSE, NULL,
2637 		    NULL, NULL, &errno, NULL, ns->netstack_ip);
2638 		if (v4 != NULL)
2639 			find_tun_and_set_itp(v4, itp);
2640 		v6 = ill_lookup_on_name(tname, B_FALSE, B_TRUE, NULL,
2641 		    NULL, NULL, &errno, NULL, ns->netstack_ip);
2642 		if (v6 != NULL)
2643 			find_tun_and_set_itp(v6, itp);
2644 		ASSERT(itp != NULL);
2645 		*itpp = itp;
2646 		/* For spdsock dump state, set the polhead's name. */
2647 		if (msgtype == SPD_DUMP) {
2648 			(void) strncpy(ss->spdsock_dump_name, tname, LIFNAMSIZ);
2649 			ss->spdsock_dump_tunnel = itp->itp_flags &
2650 			    (active ? ITPF_P_TUNNEL : ITPF_I_TUNNEL);
2651 		}
2652 	} else {
2653 		itp = NULL;
2654 		/* For spdsock dump state, indicate it's global policy. */
2655 		if (msgtype == SPD_DUMP)
2656 			ss->spdsock_dump_name[0] = '\0';
2657 	}
2658 
2659 	if (active)
2660 		iph = (itp == NULL) ? ipsec_system_policy(ns) : itp->itp_policy;
2661 	else
2662 		iph = (itp == NULL) ? ipsec_inactive_policy(ns) :
2663 		    itp->itp_inactive;
2664 
2665 	ASSERT(iph != NULL);
2666 	if (itp != NULL) {
2667 		IPPH_REFHOLD(iph);
2668 	}
2669 
2670 	return (iph);
2671 }
2672 
2673 static void
2674 spdsock_parse(queue_t *q, mblk_t *mp)
2675 {
2676 	spd_msg_t *spmsg;
2677 	spd_ext_t *extv[SPD_EXT_MAX + 1];
2678 	uint_t msgsize;
2679 	ipsec_policy_head_t *iph;
2680 	ipsec_tun_pol_t *itp;
2681 	spd_if_t *tunname;
2682 	spdsock_t *ss = (spdsock_t *)q->q_ptr;
2683 	spd_stack_t *spds = ss->spdsock_spds;
2684 	netstack_t *ns = spds->spds_netstack;
2685 	ipsec_stack_t *ipss = ns->netstack_ipsec;
2686 
2687 	/* Make sure nothing's below me. */
2688 	ASSERT(WR(q)->q_next == NULL);
2689 
2690 	spmsg = (spd_msg_t *)mp->b_rptr;
2691 
2692 	msgsize = SPD_64TO8(spmsg->spd_msg_len);
2693 
2694 	if (msgdsize(mp) != msgsize) {
2695 		/*
2696 		 * Message len incorrect w.r.t. actual size.  Send an error
2697 		 * (EMSGSIZE).	It may be necessary to massage things a
2698 		 * bit.	 For example, if the spd_msg_type is hosed,
2699 		 * I need to set it to SPD_RESERVED to get delivery to
2700 		 * do the right thing.	Then again, maybe just letting
2701 		 * the error delivery do the right thing.
2702 		 */
2703 		ss2dbg(spds,
2704 		    ("mblk (%lu) and base (%d) message sizes don't jibe.\n",
2705 		    msgdsize(mp), msgsize));
2706 		spdsock_error(q, mp, EMSGSIZE, SPD_DIAGNOSTIC_NONE);
2707 		return;
2708 	}
2709 
2710 	if (msgsize > (uint_t)(mp->b_wptr - mp->b_rptr)) {
2711 		/* Get all message into one mblk. */
2712 		if (pullupmsg(mp, -1) == 0) {
2713 			/*
2714 			 * Something screwy happened.
2715 			 */
2716 			ss3dbg(spds, ("spdsock_parse: pullupmsg() failed.\n"));
2717 			return;
2718 		} else {
2719 			spmsg = (spd_msg_t *)mp->b_rptr;
2720 		}
2721 	}
2722 
2723 	switch (spdsock_get_ext(extv, spmsg, msgsize)) {
2724 	case KGE_DUP:
2725 		/* Handle duplicate extension. */
2726 		ss1dbg(spds, ("Got duplicate extension of type %d.\n",
2727 		    extv[0]->spd_ext_type));
2728 		spdsock_diag(q, mp, dup_ext_diag[extv[0]->spd_ext_type]);
2729 		return;
2730 	case KGE_UNK:
2731 		/* Handle unknown extension. */
2732 		ss1dbg(spds, ("Got unknown extension of type %d.\n",
2733 		    extv[0]->spd_ext_type));
2734 		spdsock_diag(q, mp, SPD_DIAGNOSTIC_UNKNOWN_EXT);
2735 		return;
2736 	case KGE_LEN:
2737 		/* Length error. */
2738 		ss1dbg(spds, ("Length %d on extension type %d overrun or 0.\n",
2739 		    extv[0]->spd_ext_len, extv[0]->spd_ext_type));
2740 		spdsock_diag(q, mp, SPD_DIAGNOSTIC_BAD_EXTLEN);
2741 		return;
2742 	case KGE_CHK:
2743 		/* Reality check failed. */
2744 		ss1dbg(spds, ("Reality check failed on extension type %d.\n",
2745 		    extv[0]->spd_ext_type));
2746 		spdsock_diag(q, mp, bad_ext_diag[extv[0]->spd_ext_type]);
2747 		return;
2748 	default:
2749 		/* Default case is no errors. */
2750 		break;
2751 	}
2752 
2753 	/*
2754 	 * Special-case SPD_UPDATEALGS so as not to load IPsec.
2755 	 */
2756 	if (!ipsec_loaded(ipss) && spmsg->spd_msg_type != SPD_UPDATEALGS) {
2757 		spdsock_t *ss = (spdsock_t *)q->q_ptr;
2758 
2759 		ASSERT(ss != NULL);
2760 		ipsec_loader_loadnow(ipss);
2761 		ss->spdsock_timeout_arg = mp;
2762 		ss->spdsock_timeout = qtimeout(q, spdsock_loadcheck,
2763 		    q, LOADCHECK_INTERVAL);
2764 		return;
2765 	}
2766 
2767 	/* First check for messages that need no polheads at all. */
2768 	switch (spmsg->spd_msg_type) {
2769 	case SPD_UPDATEALGS:
2770 		spdsock_updatealg(q, mp, extv);
2771 		return;
2772 	case SPD_ALGLIST:
2773 		spdsock_alglist(q, mp);
2774 		return;
2775 	case SPD_DUMPALGS:
2776 		spdsock_dumpalgs(q, mp);
2777 		return;
2778 	}
2779 
2780 	/*
2781 	 * Then check for ones that need both primary/secondary polheads,
2782 	 * finding the appropriate tunnel policy if need be.
2783 	 */
2784 	tunname = (spd_if_t *)extv[SPD_EXT_TUN_NAME];
2785 	switch (spmsg->spd_msg_type) {
2786 	case SPD_FLIP:
2787 		spdsock_flip(q, mp, tunname);
2788 		return;
2789 	case SPD_CLONE:
2790 		spdsock_clone(q, mp, tunname);
2791 		return;
2792 	}
2793 
2794 	/*
2795 	 * Finally, find ones that operate on exactly one polhead, or
2796 	 * "all polheads" of a given type (active/inactive).
2797 	 */
2798 	iph = get_appropriate_polhead(q, mp, tunname, spmsg->spd_msg_spdid,
2799 	    spmsg->spd_msg_type, &itp);
2800 	if (iph == NULL)
2801 		return;
2802 
2803 	/* All-polheads-ready operations. */
2804 	switch (spmsg->spd_msg_type) {
2805 	case SPD_FLUSH:
2806 		if (itp != NULL) {
2807 			mutex_enter(&itp->itp_lock);
2808 			if (spmsg->spd_msg_spdid == SPD_ACTIVE)
2809 				itp->itp_flags &= ~ITPF_PFLAGS;
2810 			else
2811 				itp->itp_flags &= ~ITPF_IFLAGS;
2812 			mutex_exit(&itp->itp_lock);
2813 			ITP_REFRELE(itp, ns);
2814 		}
2815 		spdsock_flush(q, iph, mp);
2816 		return;
2817 	case SPD_DUMP:
2818 		if (itp != NULL)
2819 			ITP_REFRELE(itp, ns);
2820 		spdsock_dump(q, iph, mp);
2821 		return;
2822 	}
2823 
2824 	if (iph == ALL_ACTIVE_POLHEADS || iph == ALL_INACTIVE_POLHEADS) {
2825 		spdsock_diag(q, mp, SPD_DIAGNOSTIC_NOT_GLOBAL_OP);
2826 		return;
2827 	}
2828 
2829 	/* Single-polhead-only operations. */
2830 	switch (spmsg->spd_msg_type) {
2831 	case SPD_ADDRULE:
2832 		spdsock_addrule(q, iph, mp, extv, itp);
2833 		break;
2834 	case SPD_DELETERULE:
2835 		spdsock_deleterule(q, iph, mp, extv, itp);
2836 		break;
2837 	case SPD_LOOKUP:
2838 		spdsock_lookup(q, iph, mp, extv, itp);
2839 		break;
2840 	default:
2841 		spdsock_diag(q, mp, SPD_DIAGNOSTIC_BAD_MSG_TYPE);
2842 		break;
2843 	}
2844 
2845 	IPPH_REFRELE(iph, spds->spds_netstack);
2846 	if (itp != NULL)
2847 		ITP_REFRELE(itp, ns);
2848 }
2849 
2850 /*
2851  * If an algorithm mapping was received before IPsec was loaded, process it.
2852  * Called from the IPsec loader.
2853  */
2854 void
2855 spdsock_update_pending_algs(netstack_t *ns)
2856 {
2857 	spd_stack_t *spds = ns->netstack_spdsock;
2858 
2859 	mutex_enter(&spds->spds_alg_lock);
2860 	if (spds->spds_algs_pending) {
2861 		int diag;
2862 
2863 		spdsock_do_updatealg(spds->spds_extv_algs, &diag,
2864 		    spds);
2865 		spds->spds_algs_pending = B_FALSE;
2866 	}
2867 	mutex_exit(&spds->spds_alg_lock);
2868 }
2869 
2870 static void
2871 spdsock_loadcheck(void *arg)
2872 {
2873 	queue_t *q = (queue_t *)arg;
2874 	spdsock_t *ss = (spdsock_t *)q->q_ptr;
2875 	mblk_t *mp;
2876 	spd_stack_t	*spds = ss->spdsock_spds;
2877 	ipsec_stack_t	*ipss = spds->spds_netstack->netstack_ipsec;
2878 
2879 	ASSERT(ss != NULL);
2880 
2881 	ss->spdsock_timeout = 0;
2882 	mp = ss->spdsock_timeout_arg;
2883 	ASSERT(mp != NULL);
2884 	ss->spdsock_timeout_arg = NULL;
2885 	if (ipsec_failed(ipss))
2886 		spdsock_error(q, mp, EPROTONOSUPPORT, 0);
2887 	else
2888 		spdsock_parse(q, mp);
2889 }
2890 
2891 /*
2892  * Copy relevant state bits.
2893  */
2894 static void
2895 spdsock_copy_info(struct T_info_ack *tap, spdsock_t *ss)
2896 {
2897 	*tap = spdsock_g_t_info_ack;
2898 	tap->CURRENT_state = ss->spdsock_state;
2899 	tap->OPT_size = spdsock_max_optsize;
2900 }
2901 
2902 /*
2903  * This routine responds to T_CAPABILITY_REQ messages.  It is called by
2904  * spdsock_wput.  Much of the T_CAPABILITY_ACK information is copied from
2905  * spdsock_g_t_info_ack.  The current state of the stream is copied from
2906  * spdsock_state.
2907  */
2908 static void
2909 spdsock_capability_req(queue_t *q, mblk_t *mp)
2910 {
2911 	spdsock_t *ss = (spdsock_t *)q->q_ptr;
2912 	t_uscalar_t cap_bits1;
2913 	struct T_capability_ack	*tcap;
2914 
2915 	cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
2916 
2917 	mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
2918 		mp->b_datap->db_type, T_CAPABILITY_ACK);
2919 	if (mp == NULL)
2920 		return;
2921 
2922 	tcap = (struct T_capability_ack *)mp->b_rptr;
2923 	tcap->CAP_bits1 = 0;
2924 
2925 	if (cap_bits1 & TC1_INFO) {
2926 		spdsock_copy_info(&tcap->INFO_ack, ss);
2927 		tcap->CAP_bits1 |= TC1_INFO;
2928 	}
2929 
2930 	qreply(q, mp);
2931 }
2932 
2933 /*
2934  * This routine responds to T_INFO_REQ messages. It is called by
2935  * spdsock_wput_other.
2936  * Most of the T_INFO_ACK information is copied from spdsock_g_t_info_ack.
2937  * The current state of the stream is copied from spdsock_state.
2938  */
2939 static void
2940 spdsock_info_req(q, mp)
2941 	queue_t	*q;
2942 	mblk_t	*mp;
2943 {
2944 	mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO,
2945 	    T_INFO_ACK);
2946 	if (mp == NULL)
2947 		return;
2948 	spdsock_copy_info((struct T_info_ack *)mp->b_rptr,
2949 	    (spdsock_t *)q->q_ptr);
2950 	qreply(q, mp);
2951 }
2952 
2953 /*
2954  * spdsock_err_ack. This routine creates a
2955  * T_ERROR_ACK message and passes it
2956  * upstream.
2957  */
2958 static void
2959 spdsock_err_ack(q, mp, t_error, sys_error)
2960 	queue_t	*q;
2961 	mblk_t	*mp;
2962 	int	t_error;
2963 	int	sys_error;
2964 {
2965 	if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL)
2966 		qreply(q, mp);
2967 }
2968 
2969 /*
2970  * This routine retrieves the current status of socket options.
2971  * It returns the size of the option retrieved.
2972  */
2973 /* ARGSUSED */
2974 int
2975 spdsock_opt_get(queue_t *q, int level, int name, uchar_t *ptr)
2976 {
2977 	int *i1 = (int *)ptr;
2978 
2979 	switch (level) {
2980 	case SOL_SOCKET:
2981 		switch (name) {
2982 		case SO_TYPE:
2983 			*i1 = SOCK_RAW;
2984 			break;
2985 		/*
2986 		 * The following two items can be manipulated,
2987 		 * but changing them should do nothing.
2988 		 */
2989 		case SO_SNDBUF:
2990 			*i1 = (int)q->q_hiwat;
2991 			break;
2992 		case SO_RCVBUF:
2993 			*i1 = (int)(RD(q)->q_hiwat);
2994 			break;
2995 		}
2996 		break;
2997 	default:
2998 		return (0);
2999 	}
3000 	return (sizeof (int));
3001 }
3002 
3003 /*
3004  * This routine sets socket options.
3005  */
3006 /* ARGSUSED */
3007 int
3008 spdsock_opt_set(queue_t *q, uint_t mgmt_flags, int level, int name,
3009     uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
3010     void *thisdg_attrs, cred_t *cr, mblk_t *mblk)
3011 {
3012 	int *i1 = (int *)invalp;
3013 	spdsock_t *ss = (spdsock_t *)q->q_ptr;
3014 	spd_stack_t	*spds = ss->spdsock_spds;
3015 
3016 	switch (level) {
3017 	case SOL_SOCKET:
3018 		switch (name) {
3019 		case SO_SNDBUF:
3020 			if (*i1 > spds->spds_max_buf)
3021 				return (ENOBUFS);
3022 			q->q_hiwat = *i1;
3023 			break;
3024 		case SO_RCVBUF:
3025 			if (*i1 > spds->spds_max_buf)
3026 				return (ENOBUFS);
3027 			RD(q)->q_hiwat = *i1;
3028 			(void) mi_set_sth_hiwat(RD(q), *i1);
3029 			break;
3030 		}
3031 		break;
3032 	}
3033 	return (0);
3034 }
3035 
3036 
3037 /*
3038  * Handle STREAMS messages.
3039  */
3040 static void
3041 spdsock_wput_other(queue_t *q, mblk_t *mp)
3042 {
3043 	struct iocblk *iocp;
3044 	int error;
3045 	spdsock_t *ss = (spdsock_t *)q->q_ptr;
3046 	spd_stack_t	*spds = ss->spdsock_spds;
3047 	cred_t		*cr;
3048 
3049 	switch (mp->b_datap->db_type) {
3050 	case M_PROTO:
3051 	case M_PCPROTO:
3052 		if ((mp->b_wptr - mp->b_rptr) < sizeof (long)) {
3053 			ss3dbg(spds, (
3054 			    "spdsock_wput_other: Not big enough M_PROTO\n"));
3055 			freemsg(mp);
3056 			return;
3057 		}
3058 		cr = zone_get_kcred(netstackid_to_zoneid(
3059 			spds->spds_netstack->netstack_stackid));
3060 		ASSERT(cr != NULL);
3061 
3062 		switch (((union T_primitives *)mp->b_rptr)->type) {
3063 		case T_CAPABILITY_REQ:
3064 			spdsock_capability_req(q, mp);
3065 			break;
3066 		case T_INFO_REQ:
3067 			spdsock_info_req(q, mp);
3068 			break;
3069 		case T_SVR4_OPTMGMT_REQ:
3070 			(void) svr4_optcom_req(q, mp, DB_CREDDEF(mp, cr),
3071 			    &spdsock_opt_obj);
3072 			break;
3073 		case T_OPTMGMT_REQ:
3074 			(void) tpi_optcom_req(q, mp, DB_CREDDEF(mp, cr),
3075 			    &spdsock_opt_obj);
3076 			break;
3077 		case T_DATA_REQ:
3078 		case T_EXDATA_REQ:
3079 		case T_ORDREL_REQ:
3080 			/* Illegal for spdsock. */
3081 			freemsg(mp);
3082 			(void) putnextctl1(RD(q), M_ERROR, EPROTO);
3083 			break;
3084 		default:
3085 			/* Not supported by spdsock. */
3086 			spdsock_err_ack(q, mp, TNOTSUPPORT, 0);
3087 			break;
3088 		}
3089 		crfree(cr);
3090 		return;
3091 	case M_IOCTL:
3092 		iocp = (struct iocblk *)mp->b_rptr;
3093 		error = EINVAL;
3094 
3095 		switch (iocp->ioc_cmd) {
3096 		case ND_SET:
3097 		case ND_GET:
3098 			if (nd_getset(q, spds->spds_g_nd, mp)) {
3099 				qreply(q, mp);
3100 				return;
3101 			} else
3102 				error = ENOENT;
3103 			/* FALLTHRU */
3104 		default:
3105 			miocnak(q, mp, 0, error);
3106 			return;
3107 		}
3108 	case M_FLUSH:
3109 		if (*mp->b_rptr & FLUSHW) {
3110 			flushq(q, FLUSHALL);
3111 			*mp->b_rptr &= ~FLUSHW;
3112 		}
3113 		if (*mp->b_rptr & FLUSHR) {
3114 			qreply(q, mp);
3115 			return;
3116 		}
3117 		/* Else FALLTHRU */
3118 	}
3119 
3120 	/* If fell through, just black-hole the message. */
3121 	freemsg(mp);
3122 }
3123 
3124 static void
3125 spdsock_wput(queue_t *q, mblk_t *mp)
3126 {
3127 	uint8_t *rptr = mp->b_rptr;
3128 	mblk_t *mp1;
3129 	spdsock_t *ss = (spdsock_t *)q->q_ptr;
3130 	spd_stack_t	*spds = ss->spdsock_spds;
3131 
3132 	/*
3133 	 * If we're dumping, defer processing other messages until the
3134 	 * dump completes.
3135 	 */
3136 	if (ss->spdsock_dump_req != NULL) {
3137 		if (!putq(q, mp))
3138 			freemsg(mp);
3139 		return;
3140 	}
3141 
3142 	switch (mp->b_datap->db_type) {
3143 	case M_DATA:
3144 		/*
3145 		 * Silently discard.
3146 		 */
3147 		ss2dbg(spds, ("raw M_DATA in spdsock.\n"));
3148 		freemsg(mp);
3149 		return;
3150 	case M_PROTO:
3151 	case M_PCPROTO:
3152 		if ((mp->b_wptr - rptr) >= sizeof (struct T_data_req)) {
3153 			if (((union T_primitives *)rptr)->type == T_DATA_REQ) {
3154 				if ((mp1 = mp->b_cont) == NULL) {
3155 					/* No data after T_DATA_REQ. */
3156 					ss2dbg(spds,
3157 					    ("No data after DATA_REQ.\n"));
3158 					freemsg(mp);
3159 					return;
3160 				}
3161 				freeb(mp);
3162 				mp = mp1;
3163 				ss2dbg(spds, ("T_DATA_REQ\n"));
3164 				break;	/* Out of switch. */
3165 			}
3166 		}
3167 		/* FALLTHRU */
3168 	default:
3169 		ss3dbg(spds, ("In default wput case (%d %d).\n",
3170 		    mp->b_datap->db_type, ((union T_primitives *)rptr)->type));
3171 		spdsock_wput_other(q, mp);
3172 		return;
3173 	}
3174 
3175 	/* I now have a PF_POLICY message in an M_DATA block. */
3176 	spdsock_parse(q, mp);
3177 }
3178 
3179 /*
3180  * Device open procedure, called when new queue pair created.
3181  * We are passed the read-side queue.
3182  */
3183 /* ARGSUSED */
3184 static int
3185 spdsock_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
3186 {
3187 	spdsock_t *ss;
3188 	queue_t *oq = OTHERQ(q);
3189 	minor_t ssminor;
3190 	netstack_t *ns;
3191 	spd_stack_t *spds;
3192 
3193 	if (secpolicy_ip_config(credp, B_FALSE) != 0)
3194 		return (EPERM);
3195 
3196 	if (q->q_ptr != NULL)
3197 		return (0);  /* Re-open of an already open instance. */
3198 
3199 	if (sflag & MODOPEN)
3200 		return (EINVAL);
3201 
3202 	ns = netstack_find_by_cred(credp);
3203 	ASSERT(ns != NULL);
3204 	spds = ns->netstack_spdsock;
3205 	ASSERT(spds != NULL);
3206 
3207 	ss2dbg(spds, ("Made it into PF_POLICY socket open.\n"));
3208 
3209 	ssminor = (minor_t)(uintptr_t)vmem_alloc(spdsock_vmem, 1, VM_NOSLEEP);
3210 	if (ssminor == 0) {
3211 		netstack_rele(spds->spds_netstack);
3212 		return (ENOMEM);
3213 	}
3214 	ss = kmem_zalloc(sizeof (spdsock_t), KM_NOSLEEP);
3215 	if (ss == NULL) {
3216 		vmem_free(spdsock_vmem, (void *)(uintptr_t)ssminor, 1);
3217 		netstack_rele(spds->spds_netstack);
3218 		return (ENOMEM);
3219 	}
3220 
3221 	ss->spdsock_minor = ssminor;
3222 	ss->spdsock_state = TS_UNBND;
3223 	ss->spdsock_dump_req = NULL;
3224 
3225 	ss->spdsock_spds = spds;
3226 
3227 	q->q_ptr = ss;
3228 	oq->q_ptr = ss;
3229 
3230 	q->q_hiwat = spds->spds_recv_hiwat;
3231 
3232 	oq->q_hiwat = spds->spds_xmit_hiwat;
3233 	oq->q_lowat = spds->spds_xmit_lowat;
3234 
3235 	qprocson(q);
3236 	(void) mi_set_sth_hiwat(q, spds->spds_recv_hiwat);
3237 
3238 	*devp = makedevice(getmajor(*devp), ss->spdsock_minor);
3239 	return (0);
3240 }
3241 
3242 /*
3243  * Read-side service procedure, invoked when we get back-enabled
3244  * when buffer space becomes available.
3245  *
3246  * Dump another chunk if we were dumping before; when we finish, kick
3247  * the write-side queue in case it's waiting for read queue space.
3248  */
3249 void
3250 spdsock_rsrv(queue_t *q)
3251 {
3252 	spdsock_t *ss = q->q_ptr;
3253 
3254 	if (ss->spdsock_dump_req != NULL)
3255 		spdsock_dump_some(q, ss);
3256 
3257 	if (ss->spdsock_dump_req == NULL)
3258 		qenable(OTHERQ(q));
3259 }
3260 
3261 /*
3262  * Write-side service procedure, invoked when we defer processing
3263  * if another message is received while a dump is in progress.
3264  */
3265 void
3266 spdsock_wsrv(queue_t *q)
3267 {
3268 	spdsock_t *ss = q->q_ptr;
3269 	mblk_t *mp;
3270 	spd_stack_t	*spds = ss->spdsock_spds;
3271 	ipsec_stack_t	*ipss = spds->spds_netstack->netstack_ipsec;
3272 
3273 	if (ss->spdsock_dump_req != NULL) {
3274 		qenable(OTHERQ(q));
3275 		return;
3276 	}
3277 
3278 	while ((mp = getq(q)) != NULL) {
3279 		if (ipsec_loaded(ipss)) {
3280 			spdsock_wput(q, mp);
3281 			if (ss->spdsock_dump_req != NULL)
3282 				return;
3283 		} else if (!ipsec_failed(ipss)) {
3284 			(void) putq(q, mp);
3285 		} else {
3286 			spdsock_error(q, mp, EPFNOSUPPORT, 0);
3287 		}
3288 	}
3289 }
3290 
3291 static int
3292 spdsock_close(queue_t *q)
3293 {
3294 	spdsock_t *ss = q->q_ptr;
3295 	spd_stack_t	*spds = ss->spdsock_spds;
3296 
3297 	qprocsoff(q);
3298 
3299 	/* Safe assumption. */
3300 	ASSERT(ss != NULL);
3301 
3302 	if (ss->spdsock_timeout != 0)
3303 		(void) quntimeout(q, ss->spdsock_timeout);
3304 
3305 	ss3dbg(spds, ("Driver close, PF_POLICY socket is going away.\n"));
3306 
3307 	vmem_free(spdsock_vmem, (void *)(uintptr_t)ss->spdsock_minor, 1);
3308 	netstack_rele(ss->spdsock_spds->spds_netstack);
3309 
3310 	kmem_free(ss, sizeof (spdsock_t));
3311 	return (0);
3312 }
3313 
3314 /*
3315  * Merge the IPsec algorithms tables with the received algorithm information.
3316  */
3317 void
3318 spdsock_merge_algs(spd_stack_t *spds)
3319 {
3320 	ipsec_alginfo_t *alg, *oalg;
3321 	ipsec_algtype_t algtype;
3322 	uint_t algidx, algid, nalgs;
3323 	crypto_mech_name_t *mechs;
3324 	uint_t mech_count, mech_idx;
3325 	netstack_t	*ns = spds->spds_netstack;
3326 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
3327 
3328 	ASSERT(MUTEX_HELD(&spds->spds_alg_lock));
3329 
3330 	/*
3331 	 * Get the list of supported mechanisms from the crypto framework.
3332 	 * If a mechanism is supported by KCF, resolve its mechanism
3333 	 * id and mark it as being valid. This operation must be done
3334 	 * without holding alg_lock, since it can cause a provider
3335 	 * module to be loaded and the provider notification callback to
3336 	 * be invoked.
3337 	 */
3338 	mechs = crypto_get_mech_list(&mech_count, KM_SLEEP);
3339 	for (algtype = 0; algtype < IPSEC_NALGTYPES; algtype++) {
3340 		for (algid = 0; algid < IPSEC_MAX_ALGS; algid++) {
3341 			int algflags = 0;
3342 			crypto_mech_type_t mt = CRYPTO_MECHANISM_INVALID;
3343 
3344 			alg = spds->spds_algs[algtype][algid];
3345 			if (alg == NULL)
3346 				continue;
3347 
3348 			/*
3349 			 * The NULL encryption algorithm is a special
3350 			 * case because there are no mechanisms, yet
3351 			 * the algorithm is still valid.
3352 			 */
3353 			if (alg->alg_id == SADB_EALG_NULL) {
3354 				alg->alg_mech_type = CRYPTO_MECHANISM_INVALID;
3355 				alg->alg_flags = ALG_FLAG_VALID;
3356 				continue;
3357 			}
3358 
3359 			for (mech_idx = 0; mech_idx < mech_count; mech_idx++) {
3360 				if (strncmp(alg->alg_mech_name, mechs[mech_idx],
3361 				    CRYPTO_MAX_MECH_NAME) == 0) {
3362 					mt = crypto_mech2id(alg->alg_mech_name);
3363 					ASSERT(mt != CRYPTO_MECHANISM_INVALID);
3364 					algflags = ALG_FLAG_VALID;
3365 					break;
3366 				}
3367 			}
3368 			alg->alg_mech_type = mt;
3369 			alg->alg_flags = algflags;
3370 		}
3371 	}
3372 
3373 	mutex_enter(&ipss->ipsec_alg_lock);
3374 
3375 	/*
3376 	 * For each algorithm currently defined, check if it is
3377 	 * present in the new tables created from the SPD_UPDATEALGS
3378 	 * message received from user-space.
3379 	 * Delete the algorithm entries that are currently defined
3380 	 * but not part of the new tables.
3381 	 */
3382 	for (algtype = 0; algtype < IPSEC_NALGTYPES; algtype++) {
3383 		nalgs = ipss->ipsec_nalgs[algtype];
3384 		for (algidx = 0; algidx < nalgs; algidx++) {
3385 			algid = ipss->ipsec_sortlist[algtype][algidx];
3386 			if (spds->spds_algs[algtype][algid] == NULL)
3387 				ipsec_alg_unreg(algtype, algid, ns);
3388 		}
3389 	}
3390 
3391 	/*
3392 	 * For each algorithm we just received, check if it is
3393 	 * present in the currently defined tables. If it is, swap
3394 	 * the entry with the one we just allocated.
3395 	 * If the new algorithm is not in the current tables,
3396 	 * add it.
3397 	 */
3398 	for (algtype = 0; algtype < IPSEC_NALGTYPES; algtype++) {
3399 		for (algid = 0; algid < IPSEC_MAX_ALGS; algid++) {
3400 			alg = spds->spds_algs[algtype][algid];
3401 			if (alg == NULL)
3402 				continue;
3403 
3404 			if ((oalg = ipss->ipsec_alglists[algtype][algid]) ==
3405 			    NULL) {
3406 				/*
3407 				 * New algorithm, add it to the algorithm
3408 				 * table.
3409 				 */
3410 				ipsec_alg_reg(algtype, alg, ns);
3411 			} else {
3412 				/*
3413 				 * Algorithm is already in the table. Swap
3414 				 * the existing entry with the new one.
3415 				 */
3416 				ipsec_alg_fix_min_max(alg, algtype, ns);
3417 				ipss->ipsec_alglists[algtype][algid] = alg;
3418 				ipsec_alg_free(oalg);
3419 			}
3420 			spds->spds_algs[algtype][algid] = NULL;
3421 		}
3422 	}
3423 
3424 	for (algtype = 0; algtype < IPSEC_NALGTYPES; algtype++) {
3425 		ipss->ipsec_algs_exec_mode[algtype] =
3426 		    spds->spds_algs_exec_mode[algtype];
3427 	}
3428 
3429 	mutex_exit(&ipss->ipsec_alg_lock);
3430 
3431 	crypto_free_mech_list(mechs, mech_count);
3432 
3433 	ipsecah_algs_changed(ns);
3434 	ipsecesp_algs_changed(ns);
3435 }
3436