xref: /titanic_41/usr/src/uts/common/inet/ip/keysock.c (revision 3afe87ebb25691cb6d158edaa34a6fb9b703a691)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/param.h>
27 #include <sys/types.h>
28 #include <sys/stream.h>
29 #include <sys/strsubr.h>
30 #include <sys/strsun.h>
31 #include <sys/stropts.h>
32 #include <sys/vnode.h>
33 #include <sys/zone.h>
34 #include <sys/strlog.h>
35 #include <sys/sysmacros.h>
36 #define	_SUN_TPI_VERSION 2
37 #include <sys/tihdr.h>
38 #include <sys/timod.h>
39 #include <sys/tiuser.h>
40 #include <sys/ddi.h>
41 #include <sys/sunddi.h>
42 #include <sys/sunldi.h>
43 #include <sys/file.h>
44 #include <sys/modctl.h>
45 #include <sys/debug.h>
46 #include <sys/kmem.h>
47 #include <sys/cmn_err.h>
48 #include <sys/proc.h>
49 #include <sys/suntpi.h>
50 #include <sys/atomic.h>
51 #include <sys/mkdev.h>
52 #include <sys/policy.h>
53 #include <sys/disp.h>
54 
55 #include <sys/socket.h>
56 #include <netinet/in.h>
57 #include <net/pfkeyv2.h>
58 
59 #include <inet/common.h>
60 #include <netinet/ip6.h>
61 #include <inet/ip.h>
62 #include <inet/proto_set.h>
63 #include <inet/nd.h>
64 #include <inet/optcom.h>
65 #include <inet/ipsec_info.h>
66 #include <inet/ipsec_impl.h>
67 #include <inet/keysock.h>
68 
69 #include <sys/isa_defs.h>
70 
71 /*
72  * This is a transport provider for the PF_KEY key mangement socket.
73  * (See RFC 2367 for details.)
74  * Downstream messages are wrapped in a keysock consumer interface KEYSOCK_IN
75  * messages (see ipsec_info.h), and passed to the appropriate consumer.
76  * Upstream messages are generated for all open PF_KEY sockets, when
77  * appropriate, as well as the sender (as long as SO_USELOOPBACK is enabled)
78  * in reply to downstream messages.
79  *
80  * Upstream messages must be created asynchronously for the following
81  * situations:
82  *
83  *	1.) A keysock consumer requires an SA, and there is currently none.
84  *	2.) An SA expires, either hard or soft lifetime.
85  *	3.) Other events a consumer deems fit.
86  *
87  * The MT model of this is PERMOD, with shared put procedures.  Two types of
88  * messages, SADB_FLUSH and SADB_DUMP, need to lock down the perimeter to send
89  * down the *multiple* messages they create.
90  */
91 
92 static vmem_t *keysock_vmem;		/* for minor numbers. */
93 
94 #define	KEYSOCK_MAX_CONSUMERS 256
95 
96 /* Default structure copied into T_INFO_ACK messages (from rts.c...) */
97 static struct T_info_ack keysock_g_t_info_ack = {
98 	T_INFO_ACK,
99 	T_INFINITE,	/* TSDU_size. Maximum size messages. */
100 	T_INVALID,	/* ETSDU_size. No expedited data. */
101 	T_INVALID,	/* CDATA_size. No connect data. */
102 	T_INVALID,	/* DDATA_size. No disconnect data. */
103 	0,		/* ADDR_size. */
104 	0,		/* OPT_size. No user-settable options */
105 	64 * 1024,	/* TIDU_size. keysock allows maximum size messages. */
106 	T_COTS,		/* SERV_type. keysock supports connection oriented. */
107 	TS_UNBND,	/* CURRENT_state. This is set from keysock_state. */
108 	(XPG4_1)	/* Provider flags */
109 };
110 
111 /* Named Dispatch Parameter Management Structure */
112 typedef struct keysockparam_s {
113 	uint_t	keysock_param_min;
114 	uint_t	keysock_param_max;
115 	uint_t	keysock_param_value;
116 	char	*keysock_param_name;
117 } keysockparam_t;
118 
119 /*
120  * Table of NDD variables supported by keysock. These are loaded into
121  * keysock_g_nd in keysock_init_nd.
122  * All of these are alterable, within the min/max values given, at run time.
123  */
124 static	keysockparam_t	lcl_param_arr[] = {
125 	/* min	max	value	name */
126 	{ 4096, 65536,	8192,	"keysock_xmit_hiwat"},
127 	{ 0,	65536,	1024,	"keysock_xmit_lowat"},
128 	{ 4096, 65536,	8192,	"keysock_recv_hiwat"},
129 	{ 65536, 1024*1024*1024, 256*1024,	"keysock_max_buf"},
130 	{ 0,	3,	0,	"keysock_debug"},
131 };
132 #define	keystack_xmit_hiwat	keystack_params[0].keysock_param_value
133 #define	keystack_xmit_lowat	keystack_params[1].keysock_param_value
134 #define	keystack_recv_hiwat	keystack_params[2].keysock_param_value
135 #define	keystack_max_buf	keystack_params[3].keysock_param_value
136 #define	keystack_debug	keystack_params[4].keysock_param_value
137 
138 #define	ks0dbg(a)	printf a
139 /* NOTE:  != 0 instead of > 0 so lint doesn't complain. */
140 #define	ks1dbg(keystack, a)	if (keystack->keystack_debug != 0) printf a
141 #define	ks2dbg(keystack, a)	if (keystack->keystack_debug > 1) printf a
142 #define	ks3dbg(keystack, a)	if (keystack->keystack_debug > 2) printf a
143 
144 static int keysock_close(queue_t *);
145 static int keysock_open(queue_t *, dev_t *, int, int, cred_t *);
146 static void keysock_wput(queue_t *, mblk_t *);
147 static void keysock_rput(queue_t *, mblk_t *);
148 static void keysock_rsrv(queue_t *);
149 static void keysock_passup(mblk_t *, sadb_msg_t *, minor_t,
150     keysock_consumer_t *, boolean_t, keysock_stack_t *);
151 static void *keysock_stack_init(netstackid_t stackid, netstack_t *ns);
152 static void keysock_stack_fini(netstackid_t stackid, void *arg);
153 
154 static struct module_info info = {
155 	5138, "keysock", 1, INFPSZ, 512, 128
156 };
157 
158 static struct qinit rinit = {
159 	(pfi_t)keysock_rput, (pfi_t)keysock_rsrv, keysock_open, keysock_close,
160 	NULL, &info
161 };
162 
163 static struct qinit winit = {
164 	(pfi_t)keysock_wput, NULL, NULL, NULL, NULL, &info
165 };
166 
167 struct streamtab keysockinfo = {
168 	&rinit, &winit
169 };
170 
171 extern struct modlinkage *keysock_modlp;
172 
173 /*
174  * Plumb IPsec.
175  *
176  * NOTE:  New "default" modules will need to be loaded here if needed before
177  *	  boot time.
178  */
179 
180 /* Keep these in global space to keep the lint from complaining. */
181 static char *IPSECESP = "ipsecesp";
182 static char *IPSECESPDEV = "/devices/pseudo/ipsecesp@0:ipsecesp";
183 static char *IPSECAH = "ipsecah";
184 static char *IPSECAHDEV = "/devices/pseudo/ipsecah@0:ipsecah";
185 static char *IP6DEV = "/devices/pseudo/ip6@0:ip6";
186 static char *KEYSOCK = "keysock";
187 static char *STRMOD = "strmod";
188 
189 /*
190  * Load the other ipsec modules and plumb them together.
191  */
192 int
193 keysock_plumb_ipsec(netstack_t *ns)
194 {
195 	ldi_handle_t	lh, ip6_lh = NULL;
196 	ldi_ident_t	li = NULL;
197 	int		err = 0;
198 	int		muxid, rval;
199 	boolean_t	esp_present = B_TRUE;
200 	cred_t		*cr;
201 	keysock_stack_t *keystack = ns->netstack_keysock;
202 
203 #ifdef NS_DEBUG
204 	(void) printf("keysock_plumb_ipsec(%d)\n",
205 	    ns->netstack_stackid);
206 #endif
207 
208 	keystack->keystack_plumbed = 0;	/* we're trying again.. */
209 
210 	cr = zone_get_kcred(netstackid_to_zoneid(
211 	    keystack->keystack_netstack->netstack_stackid));
212 	ASSERT(cr != NULL);
213 	/*
214 	 * Load up the drivers (AH/ESP).
215 	 *
216 	 * I do this separately from the actual plumbing in case this function
217 	 * ever gets called from a diskless boot before the root filesystem is
218 	 * up.  I don't have to worry about "keysock" because, well, if I'm
219 	 * here, keysock must've loaded successfully.
220 	 */
221 	if (i_ddi_attach_pseudo_node(IPSECAH) == NULL) {
222 		ks0dbg(("IPsec:  AH failed to attach.\n"));
223 		goto bail;
224 	}
225 	if (i_ddi_attach_pseudo_node(IPSECESP) == NULL) {
226 		ks0dbg(("IPsec:  ESP failed to attach.\n"));
227 		esp_present = B_FALSE;
228 	}
229 
230 	/*
231 	 * Set up the IP streams for AH and ESP, as well as tacking keysock
232 	 * on top of them.  Assume keysock has set the autopushes up already.
233 	 */
234 
235 	/* Open IP. */
236 	err = ldi_ident_from_mod(keysock_modlp, &li);
237 	if (err) {
238 		ks0dbg(("IPsec:  lid_ident_from_mod failed (err %d).\n",
239 		    err));
240 		goto bail;
241 	}
242 
243 	err = ldi_open_by_name(IP6DEV, FREAD|FWRITE, cr, &ip6_lh, li);
244 	if (err) {
245 		ks0dbg(("IPsec:  Open of IP6 failed (err %d).\n", err));
246 		goto bail;
247 	}
248 
249 	/* PLINK KEYSOCK/AH */
250 	err = ldi_open_by_name(IPSECAHDEV, FREAD|FWRITE, cr, &lh, li);
251 	if (err) {
252 		ks0dbg(("IPsec:  Open of AH failed (err %d).\n", err));
253 		goto bail;
254 	}
255 	err = ldi_ioctl(lh,
256 	    I_PUSH, (intptr_t)KEYSOCK, FKIOCTL, cr, &rval);
257 	if (err) {
258 		ks0dbg(("IPsec:  Push of KEYSOCK onto AH failed (err %d).\n",
259 		    err));
260 		(void) ldi_close(lh, FREAD|FWRITE, cr);
261 		goto bail;
262 	}
263 	err = ldi_ioctl(ip6_lh, I_PLINK, (intptr_t)lh,
264 	    FREAD+FWRITE+FNOCTTY+FKIOCTL, cr, &muxid);
265 	if (err) {
266 		ks0dbg(("IPsec:  PLINK of KEYSOCK/AH failed (err %d).\n", err));
267 		(void) ldi_close(lh, FREAD|FWRITE, cr);
268 		goto bail;
269 	}
270 	(void) ldi_close(lh, FREAD|FWRITE, cr);
271 
272 	/* PLINK KEYSOCK/ESP */
273 	if (esp_present) {
274 		err = ldi_open_by_name(IPSECESPDEV,
275 		    FREAD|FWRITE, cr, &lh, li);
276 		if (err) {
277 			ks0dbg(("IPsec:  Open of ESP failed (err %d).\n", err));
278 			goto bail;
279 		}
280 		err = ldi_ioctl(lh,
281 		    I_PUSH, (intptr_t)KEYSOCK, FKIOCTL, cr, &rval);
282 		if (err) {
283 			ks0dbg(("IPsec:  "
284 			    "Push of KEYSOCK onto ESP failed (err %d).\n",
285 			    err));
286 			(void) ldi_close(lh, FREAD|FWRITE, cr);
287 			goto bail;
288 		}
289 		err = ldi_ioctl(ip6_lh, I_PLINK, (intptr_t)lh,
290 		    FREAD+FWRITE+FNOCTTY+FKIOCTL, cr, &muxid);
291 		if (err) {
292 			ks0dbg(("IPsec:  "
293 			    "PLINK of KEYSOCK/ESP failed (err %d).\n", err));
294 			(void) ldi_close(lh, FREAD|FWRITE, cr);
295 			goto bail;
296 		}
297 		(void) ldi_close(lh, FREAD|FWRITE, cr);
298 	}
299 
300 bail:
301 	keystack->keystack_plumbed = (err == 0) ? 1 : -1;
302 	if (ip6_lh != NULL) {
303 		(void) ldi_close(ip6_lh, FREAD|FWRITE, cr);
304 	}
305 	if (li != NULL)
306 		ldi_ident_release(li);
307 #ifdef NS_DEBUG
308 	(void) printf("keysock_plumb_ipsec -> %d\n",
309 	    keystack->keystack_plumbed);
310 #endif
311 	crfree(cr);
312 	return (err);
313 }
314 
315 /* ARGSUSED */
316 static int
317 keysock_param_get(q, mp, cp, cr)
318 	queue_t	*q;
319 	mblk_t	*mp;
320 	caddr_t	cp;
321 	cred_t *cr;
322 {
323 	keysockparam_t	*keysockpa = (keysockparam_t *)cp;
324 	uint_t value;
325 	keysock_t *ks = (keysock_t *)q->q_ptr;
326 	keysock_stack_t	*keystack = ks->keysock_keystack;
327 
328 	mutex_enter(&keystack->keystack_param_lock);
329 	value = keysockpa->keysock_param_value;
330 	mutex_exit(&keystack->keystack_param_lock);
331 
332 	(void) mi_mpprintf(mp, "%u", value);
333 	return (0);
334 }
335 
336 /* This routine sets an NDD variable in a keysockparam_t structure. */
337 /* ARGSUSED */
338 static int
339 keysock_param_set(q, mp, value, cp, cr)
340 	queue_t	*q;
341 	mblk_t	*mp;
342 	char	*value;
343 	caddr_t	cp;
344 	cred_t *cr;
345 {
346 	ulong_t	new_value;
347 	keysockparam_t	*keysockpa = (keysockparam_t *)cp;
348 	keysock_t *ks = (keysock_t *)q->q_ptr;
349 	keysock_stack_t	*keystack = ks->keysock_keystack;
350 
351 	/* Convert the value from a string into a long integer. */
352 	if (ddi_strtoul(value, NULL, 10, &new_value) != 0)
353 		return (EINVAL);
354 
355 	mutex_enter(&keystack->keystack_param_lock);
356 	/*
357 	 * Fail the request if the new value does not lie within the
358 	 * required bounds.
359 	 */
360 	if (new_value < keysockpa->keysock_param_min ||
361 	    new_value > keysockpa->keysock_param_max) {
362 		mutex_exit(&keystack->keystack_param_lock);
363 		return (EINVAL);
364 	}
365 
366 	/* Set the new value */
367 	keysockpa->keysock_param_value = new_value;
368 	mutex_exit(&keystack->keystack_param_lock);
369 
370 	return (0);
371 }
372 
373 /*
374  * Initialize keysock at module load time
375  */
376 boolean_t
377 keysock_ddi_init(void)
378 {
379 	keysock_max_optsize = optcom_max_optsize(
380 	    keysock_opt_obj.odb_opt_des_arr, keysock_opt_obj.odb_opt_arr_cnt);
381 
382 	keysock_vmem = vmem_create("keysock", (void *)1, MAXMIN, 1,
383 	    NULL, NULL, NULL, 1, VM_SLEEP | VMC_IDENTIFIER);
384 
385 	/*
386 	 * We want to be informed each time a stack is created or
387 	 * destroyed in the kernel, so we can maintain the
388 	 * set of keysock_stack_t's.
389 	 */
390 	netstack_register(NS_KEYSOCK, keysock_stack_init, NULL,
391 	    keysock_stack_fini);
392 
393 	return (B_TRUE);
394 }
395 
396 /*
397  * Walk through the param array specified registering each element with the
398  * named dispatch handler.
399  */
400 static boolean_t
401 keysock_param_register(IDP *ndp, keysockparam_t *ksp, int cnt)
402 {
403 	for (; cnt-- > 0; ksp++) {
404 		if (ksp->keysock_param_name != NULL &&
405 		    ksp->keysock_param_name[0]) {
406 			if (!nd_load(ndp,
407 			    ksp->keysock_param_name,
408 			    keysock_param_get, keysock_param_set,
409 			    (caddr_t)ksp)) {
410 				nd_free(ndp);
411 				return (B_FALSE);
412 			}
413 		}
414 	}
415 	return (B_TRUE);
416 }
417 
418 /*
419  * Initialize keysock for one stack instance
420  */
421 /* ARGSUSED */
422 static void *
423 keysock_stack_init(netstackid_t stackid, netstack_t *ns)
424 {
425 	keysock_stack_t	*keystack;
426 	keysockparam_t *ksp;
427 
428 	keystack = (keysock_stack_t *)kmem_zalloc(sizeof (*keystack), KM_SLEEP);
429 	keystack->keystack_netstack = ns;
430 
431 	keystack->keystack_acquire_seq = 0xffffffff;
432 
433 	ksp = (keysockparam_t *)kmem_alloc(sizeof (lcl_param_arr), KM_SLEEP);
434 	keystack->keystack_params = ksp;
435 	bcopy(lcl_param_arr, ksp, sizeof (lcl_param_arr));
436 
437 	(void) keysock_param_register(&keystack->keystack_g_nd, ksp,
438 	    A_CNT(lcl_param_arr));
439 
440 	mutex_init(&keystack->keystack_list_lock, NULL, MUTEX_DEFAULT, NULL);
441 	mutex_init(&keystack->keystack_consumers_lock,
442 	    NULL, MUTEX_DEFAULT, NULL);
443 	mutex_init(&keystack->keystack_param_lock, NULL, MUTEX_DEFAULT, NULL);
444 	return (keystack);
445 }
446 
447 /*
448  * Free NDD variable space, and other destructors, for keysock.
449  */
450 void
451 keysock_ddi_destroy(void)
452 {
453 	netstack_unregister(NS_KEYSOCK);
454 	vmem_destroy(keysock_vmem);
455 }
456 
457 /*
458  * Remove one stack instance from keysock
459  */
460 /* ARGSUSED */
461 static void
462 keysock_stack_fini(netstackid_t stackid, void *arg)
463 {
464 	keysock_stack_t *keystack = (keysock_stack_t *)arg;
465 
466 	nd_free(&keystack->keystack_g_nd);
467 	kmem_free(keystack->keystack_params, sizeof (lcl_param_arr));
468 	keystack->keystack_params = NULL;
469 
470 	mutex_destroy(&keystack->keystack_list_lock);
471 	mutex_destroy(&keystack->keystack_consumers_lock);
472 	mutex_destroy(&keystack->keystack_param_lock);
473 
474 	kmem_free(keystack, sizeof (*keystack));
475 }
476 
477 /*
478  * Close routine for keysock.
479  */
480 static int
481 keysock_close(queue_t *q)
482 {
483 	keysock_t *ks;
484 	keysock_consumer_t *kc;
485 	void *ptr = q->q_ptr;
486 	int size;
487 	keysock_stack_t	*keystack;
488 
489 
490 	qprocsoff(q);
491 
492 	/* Safe assumption. */
493 	ASSERT(ptr != NULL);
494 
495 	if (WR(q)->q_next) {
496 		kc = (keysock_consumer_t *)ptr;
497 		keystack = kc->kc_keystack;
498 
499 		ks1dbg(keystack, ("Module close, removing a consumer (%d).\n",
500 		    kc->kc_sa_type));
501 		/*
502 		 * Because of PERMOD open/close exclusive perimeter, I
503 		 * can inspect KC_FLUSHING w/o locking down kc->kc_lock.
504 		 */
505 		if (kc->kc_flags & KC_FLUSHING) {
506 			/*
507 			 * If this decrement was the last one, send
508 			 * down the next pending one, if any.
509 			 *
510 			 * With a PERMOD perimeter, the mutexes ops aren't
511 			 * really necessary, but if we ever loosen up, we will
512 			 * have this bit covered already.
513 			 */
514 			keystack->keystack_flushdump--;
515 			if (keystack->keystack_flushdump == 0) {
516 				/*
517 				 * The flush/dump terminated by having a
518 				 * consumer go away.  I need to send up to the
519 				 * appropriate keysock all of the relevant
520 				 * information.  Unfortunately, I don't
521 				 * have that handy.
522 				 */
523 				ks0dbg(("Consumer went away while flushing or"
524 				    " dumping.\n"));
525 			}
526 		}
527 		size = sizeof (keysock_consumer_t);
528 		mutex_enter(&keystack->keystack_consumers_lock);
529 		keystack->keystack_consumers[kc->kc_sa_type] = NULL;
530 		mutex_exit(&keystack->keystack_consumers_lock);
531 		mutex_destroy(&kc->kc_lock);
532 		netstack_rele(kc->kc_keystack->keystack_netstack);
533 	} else {
534 		ks = (keysock_t *)ptr;
535 		keystack = ks->keysock_keystack;
536 
537 		ks3dbg(keystack,
538 		    ("Driver close, PF_KEY socket is going away.\n"));
539 		if ((ks->keysock_flags & KEYSOCK_EXTENDED) != 0)
540 			atomic_add_32(&keystack->keystack_num_extended, -1);
541 		size = sizeof (keysock_t);
542 		mutex_enter(&keystack->keystack_list_lock);
543 		*(ks->keysock_ptpn) = ks->keysock_next;
544 		if (ks->keysock_next != NULL)
545 			ks->keysock_next->keysock_ptpn = ks->keysock_ptpn;
546 		mutex_exit(&keystack->keystack_list_lock);
547 		mutex_destroy(&ks->keysock_lock);
548 		vmem_free(keysock_vmem, (void *)(uintptr_t)ks->keysock_serial,
549 		    1);
550 		netstack_rele(ks->keysock_keystack->keystack_netstack);
551 	}
552 
553 	/* Now I'm free. */
554 	kmem_free(ptr, size);
555 	return (0);
556 }
557 /*
558  * Open routine for keysock.
559  */
560 /* ARGSUSED */
561 static int
562 keysock_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
563 {
564 	keysock_t *ks;
565 	keysock_consumer_t *kc;
566 	mblk_t *mp;
567 	ipsec_info_t *ii;
568 	netstack_t *ns;
569 	keysock_stack_t *keystack;
570 
571 	if (secpolicy_ip_config(credp, B_FALSE) != 0) {
572 		/* Privilege debugging will log the error */
573 		return (EPERM);
574 	}
575 
576 	if (q->q_ptr != NULL)
577 		return (0);  /* Re-open of an already open instance. */
578 
579 	ns = netstack_find_by_cred(credp);
580 	ASSERT(ns != NULL);
581 	keystack = ns->netstack_keysock;
582 	ASSERT(keystack != NULL);
583 
584 	ks3dbg(keystack, ("Entering keysock open.\n"));
585 
586 	if (keystack->keystack_plumbed < 1) {
587 		netstack_t *ns = keystack->keystack_netstack;
588 
589 		keystack->keystack_plumbed = 0;
590 #ifdef NS_DEBUG
591 		printf("keysock_open(%d) - plumb\n",
592 		    keystack->keystack_netstack->netstack_stackid);
593 #endif
594 		/*
595 		 * Don't worry about ipsec_failure being true here.
596 		 * (See ip.c).  An open of keysock should try and force
597 		 * the issue.  Maybe it was a transient failure.
598 		 */
599 		ipsec_loader_loadnow(ns->netstack_ipsec);
600 	}
601 
602 	if (sflag & MODOPEN) {
603 		/* Initialize keysock_consumer state here. */
604 		kc = kmem_zalloc(sizeof (keysock_consumer_t), KM_NOSLEEP);
605 		if (kc == NULL) {
606 			netstack_rele(keystack->keystack_netstack);
607 			return (ENOMEM);
608 		}
609 		mutex_init(&kc->kc_lock, NULL, MUTEX_DEFAULT, 0);
610 		kc->kc_rq = q;
611 		kc->kc_wq = WR(q);
612 
613 		q->q_ptr = kc;
614 		WR(q)->q_ptr = kc;
615 
616 		kc->kc_keystack = keystack;
617 		qprocson(q);
618 
619 		/*
620 		 * Send down initial message to whatever I was pushed on top
621 		 * of asking for its consumer type.  The reply will set it.
622 		 */
623 
624 		/* Allocate it. */
625 		mp = allocb(sizeof (ipsec_info_t), BPRI_HI);
626 		if (mp == NULL) {
627 			ks1dbg(keystack, (
628 			    "keysock_open:  Cannot allocate KEYSOCK_HELLO.\n"));
629 			/* Do I need to set these to null? */
630 			q->q_ptr = NULL;
631 			WR(q)->q_ptr = NULL;
632 			mutex_destroy(&kc->kc_lock);
633 			kmem_free(kc, sizeof (*kc));
634 			netstack_rele(keystack->keystack_netstack);
635 			return (ENOMEM);
636 		}
637 
638 		/* If I allocated okay, putnext to what I was pushed atop. */
639 		mp->b_wptr += sizeof (ipsec_info_t);
640 		mp->b_datap->db_type = M_CTL;
641 		ii = (ipsec_info_t *)mp->b_rptr;
642 		ii->ipsec_info_type = KEYSOCK_HELLO;
643 		/* Length only of type/len. */
644 		ii->ipsec_info_len = sizeof (ii->ipsec_allu);
645 		ks2dbg(keystack, ("Ready to putnext KEYSOCK_HELLO.\n"));
646 		putnext(kc->kc_wq, mp);
647 	} else {
648 		minor_t ksminor;
649 
650 		/* Initialize keysock state. */
651 
652 		ks2dbg(keystack, ("Made it into PF_KEY socket open.\n"));
653 
654 		ksminor = (minor_t)(uintptr_t)
655 		    vmem_alloc(keysock_vmem, 1, VM_NOSLEEP);
656 		if (ksminor == 0) {
657 			netstack_rele(keystack->keystack_netstack);
658 			return (ENOMEM);
659 		}
660 		ks = kmem_zalloc(sizeof (keysock_t), KM_NOSLEEP);
661 		if (ks == NULL) {
662 			vmem_free(keysock_vmem, (void *)(uintptr_t)ksminor, 1);
663 			netstack_rele(keystack->keystack_netstack);
664 			return (ENOMEM);
665 		}
666 
667 		mutex_init(&ks->keysock_lock, NULL, MUTEX_DEFAULT, 0);
668 		ks->keysock_rq = q;
669 		ks->keysock_wq = WR(q);
670 		ks->keysock_state = TS_UNBND;
671 		ks->keysock_serial = ksminor;
672 
673 		q->q_ptr = ks;
674 		WR(q)->q_ptr = ks;
675 		ks->keysock_keystack = keystack;
676 
677 		/*
678 		 * The receive hiwat is only looked at on the stream head
679 		 * queue.  Store in q_hiwat in order to return on SO_RCVBUF
680 		 * getsockopts.
681 		 */
682 
683 		q->q_hiwat = keystack->keystack_recv_hiwat;
684 
685 		/*
686 		 * The transmit hiwat/lowat is only looked at on IP's queue.
687 		 * Store in q_hiwat/q_lowat in order to return on
688 		 * SO_SNDBUF/SO_SNDLOWAT getsockopts.
689 		 */
690 
691 		WR(q)->q_hiwat = keystack->keystack_xmit_hiwat;
692 		WR(q)->q_lowat = keystack->keystack_xmit_lowat;
693 
694 		*devp = makedevice(getmajor(*devp), ksminor);
695 
696 		/*
697 		 * Thread keysock into the global keysock list.
698 		 */
699 		mutex_enter(&keystack->keystack_list_lock);
700 		ks->keysock_next = keystack->keystack_list;
701 		ks->keysock_ptpn = &keystack->keystack_list;
702 		if (keystack->keystack_list != NULL) {
703 			keystack->keystack_list->keysock_ptpn =
704 			    &ks->keysock_next;
705 		}
706 		keystack->keystack_list = ks;
707 		mutex_exit(&keystack->keystack_list_lock);
708 
709 		qprocson(q);
710 		(void) proto_set_rx_hiwat(q, NULL,
711 		    keystack->keystack_recv_hiwat);
712 		/*
713 		 * Wait outside the keysock module perimeter for IPsec
714 		 * plumbing to be completed.  If it fails, keysock_close()
715 		 * undoes everything we just did.
716 		 */
717 		if (!ipsec_loader_wait(q,
718 		    keystack->keystack_netstack->netstack_ipsec)) {
719 			(void) keysock_close(q);
720 			return (EPFNOSUPPORT);
721 		}
722 	}
723 
724 	return (0);
725 }
726 
727 /* BELOW THIS LINE ARE ROUTINES INCLUDING AND RELATED TO keysock_wput(). */
728 
729 /*
730  * Copy relevant state bits.
731  */
732 static void
733 keysock_copy_info(struct T_info_ack *tap, keysock_t *ks)
734 {
735 	*tap = keysock_g_t_info_ack;
736 	tap->CURRENT_state = ks->keysock_state;
737 	tap->OPT_size = keysock_max_optsize;
738 }
739 
740 /*
741  * This routine responds to T_CAPABILITY_REQ messages.  It is called by
742  * keysock_wput.  Much of the T_CAPABILITY_ACK information is copied from
743  * keysock_g_t_info_ack.  The current state of the stream is copied from
744  * keysock_state.
745  */
746 static void
747 keysock_capability_req(queue_t *q, mblk_t *mp)
748 {
749 	keysock_t *ks = (keysock_t *)q->q_ptr;
750 	t_uscalar_t cap_bits1;
751 	struct T_capability_ack	*tcap;
752 
753 	cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
754 
755 	mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
756 	    mp->b_datap->db_type, T_CAPABILITY_ACK);
757 	if (mp == NULL)
758 		return;
759 
760 	tcap = (struct T_capability_ack *)mp->b_rptr;
761 	tcap->CAP_bits1 = 0;
762 
763 	if (cap_bits1 & TC1_INFO) {
764 		keysock_copy_info(&tcap->INFO_ack, ks);
765 		tcap->CAP_bits1 |= TC1_INFO;
766 	}
767 
768 	qreply(q, mp);
769 }
770 
771 /*
772  * This routine responds to T_INFO_REQ messages. It is called by
773  * keysock_wput_other.
774  * Most of the T_INFO_ACK information is copied from keysock_g_t_info_ack.
775  * The current state of the stream is copied from keysock_state.
776  */
777 static void
778 keysock_info_req(q, mp)
779 	queue_t	*q;
780 	mblk_t	*mp;
781 {
782 	mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO,
783 	    T_INFO_ACK);
784 	if (mp == NULL)
785 		return;
786 	keysock_copy_info((struct T_info_ack *)mp->b_rptr,
787 	    (keysock_t *)q->q_ptr);
788 	qreply(q, mp);
789 }
790 
791 /*
792  * keysock_err_ack. This routine creates a
793  * T_ERROR_ACK message and passes it
794  * upstream.
795  */
796 static void
797 keysock_err_ack(q, mp, t_error, sys_error)
798 	queue_t	*q;
799 	mblk_t	*mp;
800 	int	t_error;
801 	int	sys_error;
802 {
803 	if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL)
804 		qreply(q, mp);
805 }
806 
807 /*
808  * This routine retrieves the current status of socket options.
809  * It returns the size of the option retrieved.
810  */
811 /* ARGSUSED */
812 int
813 keysock_opt_get(queue_t *q, int level, int name, uchar_t *ptr)
814 {
815 	int *i1 = (int *)ptr;
816 	keysock_t *ks = (keysock_t *)q->q_ptr;
817 
818 	switch (level) {
819 	case SOL_SOCKET:
820 		mutex_enter(&ks->keysock_lock);
821 		switch (name) {
822 		case SO_TYPE:
823 			*i1 = SOCK_RAW;
824 			break;
825 		case SO_USELOOPBACK:
826 			*i1 = (int)(!((ks->keysock_flags & KEYSOCK_NOLOOP) ==
827 			    KEYSOCK_NOLOOP));
828 			break;
829 		/*
830 		 * The following two items can be manipulated,
831 		 * but changing them should do nothing.
832 		 */
833 		case SO_SNDBUF:
834 			*i1 = (int)q->q_hiwat;
835 			break;
836 		case SO_RCVBUF:
837 			*i1 = (int)(RD(q)->q_hiwat);
838 			break;
839 		}
840 		mutex_exit(&ks->keysock_lock);
841 		break;
842 	default:
843 		return (0);
844 	}
845 	return (sizeof (int));
846 }
847 
848 /*
849  * This routine sets socket options.
850  */
851 /* ARGSUSED */
852 int
853 keysock_opt_set(queue_t *q, uint_t mgmt_flags, int level,
854     int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp,
855     uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk)
856 {
857 	int *i1 = (int *)invalp;
858 	keysock_t *ks = (keysock_t *)q->q_ptr;
859 	keysock_stack_t	*keystack = ks->keysock_keystack;
860 
861 	switch (level) {
862 	case SOL_SOCKET:
863 		mutex_enter(&ks->keysock_lock);
864 		switch (name) {
865 		case SO_USELOOPBACK:
866 			if (!(*i1))
867 				ks->keysock_flags |= KEYSOCK_NOLOOP;
868 			else ks->keysock_flags &= ~KEYSOCK_NOLOOP;
869 			break;
870 		case SO_SNDBUF:
871 			if (*i1 > keystack->keystack_max_buf)
872 				return (ENOBUFS);
873 			q->q_hiwat = *i1;
874 			break;
875 		case SO_RCVBUF:
876 			if (*i1 > keystack->keystack_max_buf)
877 				return (ENOBUFS);
878 			RD(q)->q_hiwat = *i1;
879 			(void) proto_set_rx_hiwat(RD(q), NULL, *i1);
880 			break;
881 		}
882 		mutex_exit(&ks->keysock_lock);
883 		break;
884 	}
885 	return (0);
886 }
887 
888 /*
889  * Handle STREAMS messages.
890  */
891 static void
892 keysock_wput_other(queue_t *q, mblk_t *mp)
893 {
894 	struct iocblk *iocp;
895 	int error;
896 	keysock_t *ks = (keysock_t *)q->q_ptr;
897 	keysock_stack_t	*keystack = ks->keysock_keystack;
898 	cred_t		*cr;
899 
900 	switch (mp->b_datap->db_type) {
901 	case M_PROTO:
902 	case M_PCPROTO:
903 		if ((mp->b_wptr - mp->b_rptr) < sizeof (long)) {
904 			ks3dbg(keystack, (
905 			    "keysock_wput_other: Not big enough M_PROTO\n"));
906 			freemsg(mp);
907 			return;
908 		}
909 		cr = zone_get_kcred(netstackid_to_zoneid(
910 		    keystack->keystack_netstack->netstack_stackid));
911 		ASSERT(cr != NULL);
912 
913 		switch (((union T_primitives *)mp->b_rptr)->type) {
914 		case T_CAPABILITY_REQ:
915 			keysock_capability_req(q, mp);
916 			break;
917 		case T_INFO_REQ:
918 			keysock_info_req(q, mp);
919 			break;
920 		case T_SVR4_OPTMGMT_REQ:
921 			(void) svr4_optcom_req(q, mp, DB_CREDDEF(mp, cr),
922 			    &keysock_opt_obj, B_FALSE);
923 			break;
924 		case T_OPTMGMT_REQ:
925 			(void) tpi_optcom_req(q, mp, DB_CREDDEF(mp, cr),
926 			    &keysock_opt_obj, B_FALSE);
927 			break;
928 		case T_DATA_REQ:
929 		case T_EXDATA_REQ:
930 		case T_ORDREL_REQ:
931 			/* Illegal for keysock. */
932 			freemsg(mp);
933 			(void) putnextctl1(RD(q), M_ERROR, EPROTO);
934 			break;
935 		default:
936 			/* Not supported by keysock. */
937 			keysock_err_ack(q, mp, TNOTSUPPORT, 0);
938 			break;
939 		}
940 		crfree(cr);
941 		return;
942 	case M_IOCTL:
943 		iocp = (struct iocblk *)mp->b_rptr;
944 		error = EINVAL;
945 
946 		switch (iocp->ioc_cmd) {
947 		case ND_SET:
948 		case ND_GET:
949 			if (nd_getset(q, keystack->keystack_g_nd, mp)) {
950 				qreply(q, mp);
951 				return;
952 			} else
953 				error = ENOENT;
954 			/* FALLTHRU */
955 		default:
956 			miocnak(q, mp, 0, error);
957 			return;
958 		}
959 	case M_FLUSH:
960 		if (*mp->b_rptr & FLUSHW) {
961 			flushq(q, FLUSHALL);
962 			*mp->b_rptr &= ~FLUSHW;
963 		}
964 		if (*mp->b_rptr & FLUSHR) {
965 			qreply(q, mp);
966 			return;
967 		}
968 		/* Else FALLTHRU */
969 	}
970 
971 	/* If fell through, just black-hole the message. */
972 	freemsg(mp);
973 }
974 
975 /*
976  * Transmit a PF_KEY error message to the instance either pointed to
977  * by ks, the instance with serial number serial, or more, depending.
978  *
979  * The faulty message (or a reasonable facsimile thereof) is in mp.
980  * This function will free mp or recycle it for delivery, thereby causing
981  * the stream head to free it.
982  */
983 static void
984 keysock_error(keysock_t *ks, mblk_t *mp, int error, int diagnostic)
985 {
986 	sadb_msg_t *samsg = (sadb_msg_t *)mp->b_rptr;
987 	keysock_stack_t	*keystack = ks->keysock_keystack;
988 
989 	ASSERT(mp->b_datap->db_type == M_DATA);
990 
991 	if (samsg->sadb_msg_type < SADB_GETSPI ||
992 	    samsg->sadb_msg_type > SADB_MAX)
993 		samsg->sadb_msg_type = SADB_RESERVED;
994 
995 	/*
996 	 * Strip out extension headers.
997 	 */
998 	ASSERT(mp->b_rptr + sizeof (*samsg) <= mp->b_datap->db_lim);
999 	mp->b_wptr = mp->b_rptr + sizeof (*samsg);
1000 	samsg->sadb_msg_len = SADB_8TO64(sizeof (sadb_msg_t));
1001 	samsg->sadb_msg_errno = (uint8_t)error;
1002 	samsg->sadb_x_msg_diagnostic = (uint16_t)diagnostic;
1003 
1004 	keysock_passup(mp, samsg, ks->keysock_serial, NULL, B_FALSE, keystack);
1005 }
1006 
1007 /*
1008  * Pass down a message to a consumer.  Wrap it in KEYSOCK_IN, and copy
1009  * in the extv if passed in.
1010  */
1011 static void
1012 keysock_passdown(keysock_t *ks, mblk_t *mp, uint8_t satype, sadb_ext_t *extv[],
1013     boolean_t flushmsg)
1014 {
1015 	keysock_consumer_t *kc;
1016 	mblk_t *wrapper;
1017 	keysock_in_t *ksi;
1018 	int i;
1019 	keysock_stack_t	*keystack = ks->keysock_keystack;
1020 
1021 	wrapper = allocb(sizeof (ipsec_info_t), BPRI_HI);
1022 	if (wrapper == NULL) {
1023 		ks3dbg(keystack, ("keysock_passdown: allocb failed.\n"));
1024 		if (extv[SADB_EXT_KEY_ENCRYPT] != NULL)
1025 			bzero(extv[SADB_EXT_KEY_ENCRYPT],
1026 			    SADB_64TO8(
1027 			    extv[SADB_EXT_KEY_ENCRYPT]->sadb_ext_len));
1028 		if (extv[SADB_EXT_KEY_AUTH] != NULL)
1029 			bzero(extv[SADB_EXT_KEY_AUTH],
1030 			    SADB_64TO8(
1031 			    extv[SADB_EXT_KEY_AUTH]->sadb_ext_len));
1032 		if (flushmsg) {
1033 			ks0dbg((
1034 			    "keysock: Downwards flush/dump message failed!\n"));
1035 			/* If this is true, I hold the perimeter. */
1036 			keystack->keystack_flushdump--;
1037 		}
1038 		freemsg(mp);
1039 		return;
1040 	}
1041 
1042 	wrapper->b_datap->db_type = M_CTL;
1043 	ksi = (keysock_in_t *)wrapper->b_rptr;
1044 	ksi->ks_in_type = KEYSOCK_IN;
1045 	ksi->ks_in_len = sizeof (keysock_in_t);
1046 	if (extv[SADB_EXT_ADDRESS_SRC] != NULL)
1047 		ksi->ks_in_srctype = KS_IN_ADDR_UNKNOWN;
1048 	else ksi->ks_in_srctype = KS_IN_ADDR_NOTTHERE;
1049 	if (extv[SADB_EXT_ADDRESS_DST] != NULL)
1050 		ksi->ks_in_dsttype = KS_IN_ADDR_UNKNOWN;
1051 	else ksi->ks_in_dsttype = KS_IN_ADDR_NOTTHERE;
1052 	for (i = 0; i <= SADB_EXT_MAX; i++)
1053 		ksi->ks_in_extv[i] = extv[i];
1054 	ksi->ks_in_serial = ks->keysock_serial;
1055 	wrapper->b_wptr += sizeof (ipsec_info_t);
1056 	wrapper->b_cont = mp;
1057 
1058 	/*
1059 	 * Find the appropriate consumer where the message is passed down.
1060 	 */
1061 	kc = keystack->keystack_consumers[satype];
1062 	if (kc == NULL) {
1063 		freeb(wrapper);
1064 		keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_UNKNOWN_SATYPE);
1065 		if (flushmsg) {
1066 			ks0dbg((
1067 			    "keysock: Downwards flush/dump message failed!\n"));
1068 			/* If this is true, I hold the perimeter. */
1069 			keystack->keystack_flushdump--;
1070 		}
1071 		return;
1072 	}
1073 
1074 	/*
1075 	 * NOTE: There used to be code in here to spin while a flush or
1076 	 *	 dump finished.  Keysock now assumes that consumers have enough
1077 	 *	 MT-savviness to deal with that.
1078 	 */
1079 
1080 	/*
1081 	 * Current consumers (AH and ESP) are guaranteed to return a
1082 	 * FLUSH or DUMP message back, so when we reach here, we don't
1083 	 * have to worry about keysock_flushdumps.
1084 	 */
1085 
1086 	putnext(kc->kc_wq, wrapper);
1087 }
1088 
1089 /*
1090  * High-level reality checking of extensions.
1091  */
1092 static boolean_t
1093 ext_check(sadb_ext_t *ext, keysock_stack_t *keystack)
1094 {
1095 	int i;
1096 	uint64_t *lp;
1097 	sadb_ident_t *id;
1098 	char *idstr;
1099 
1100 	switch (ext->sadb_ext_type) {
1101 	case SADB_EXT_ADDRESS_SRC:
1102 	case SADB_EXT_ADDRESS_DST:
1103 	case SADB_X_EXT_ADDRESS_INNER_SRC:
1104 	case SADB_X_EXT_ADDRESS_INNER_DST:
1105 		/* Check for at least enough addtl length for a sockaddr. */
1106 		if (ext->sadb_ext_len <= SADB_8TO64(sizeof (sadb_address_t)))
1107 			return (B_FALSE);
1108 		break;
1109 	case SADB_EXT_LIFETIME_HARD:
1110 	case SADB_EXT_LIFETIME_SOFT:
1111 	case SADB_EXT_LIFETIME_CURRENT:
1112 		if (ext->sadb_ext_len != SADB_8TO64(sizeof (sadb_lifetime_t)))
1113 			return (B_FALSE);
1114 		break;
1115 	case SADB_EXT_SPIRANGE:
1116 		/* See if the SPI range is legit. */
1117 		if (htonl(((sadb_spirange_t *)ext)->sadb_spirange_min) >
1118 		    htonl(((sadb_spirange_t *)ext)->sadb_spirange_max))
1119 			return (B_FALSE);
1120 		break;
1121 	case SADB_EXT_KEY_AUTH:
1122 	case SADB_EXT_KEY_ENCRYPT:
1123 		/* Key length check. */
1124 		if (((sadb_key_t *)ext)->sadb_key_bits == 0)
1125 			return (B_FALSE);
1126 		/*
1127 		 * Check to see if the key length (in bits) is less than the
1128 		 * extension length (in 8-bits words).
1129 		 */
1130 		if ((roundup(SADB_1TO8(((sadb_key_t *)ext)->sadb_key_bits), 8) +
1131 		    sizeof (sadb_key_t)) != SADB_64TO8(ext->sadb_ext_len)) {
1132 			ks1dbg(keystack, (
1133 			    "ext_check:  Key bits/length inconsistent.\n"));
1134 			ks1dbg(keystack, ("%d bits, len is %d bytes.\n",
1135 			    ((sadb_key_t *)ext)->sadb_key_bits,
1136 			    SADB_64TO8(ext->sadb_ext_len)));
1137 			return (B_FALSE);
1138 		}
1139 
1140 		/* All-zeroes key check. */
1141 		lp = (uint64_t *)(((char *)ext) + sizeof (sadb_key_t));
1142 		for (i = 0;
1143 		    i < (ext->sadb_ext_len - SADB_8TO64(sizeof (sadb_key_t)));
1144 		    i++)
1145 			if (lp[i] != 0)
1146 				break;	/* Out of for loop. */
1147 		/* If finished the loop naturally, it's an all zero key. */
1148 		if (lp[i] == 0)
1149 			return (B_FALSE);
1150 		break;
1151 	case SADB_EXT_IDENTITY_SRC:
1152 	case SADB_EXT_IDENTITY_DST:
1153 		/*
1154 		 * Make sure the strings in these identities are
1155 		 * null-terminated.  RFC 2367 underspecified how to handle
1156 		 * such a case.  I "proactively" null-terminate the string
1157 		 * at the last byte if it's not terminated sooner.
1158 		 */
1159 		id = (sadb_ident_t *)ext;
1160 		i = SADB_64TO8(id->sadb_ident_len);
1161 		i -= sizeof (sadb_ident_t);
1162 		idstr = (char *)(id + 1);
1163 		while (*idstr != '\0' && i > 0) {
1164 			i--;
1165 			idstr++;
1166 		}
1167 		if (i == 0) {
1168 			/*
1169 			 * I.e., if the bozo user didn't NULL-terminate the
1170 			 * string...
1171 			 */
1172 			idstr--;
1173 			*idstr = '\0';
1174 		}
1175 		break;
1176 	}
1177 	return (B_TRUE);	/* For now... */
1178 }
1179 
1180 /* Return values for keysock_get_ext(). */
1181 #define	KGE_OK	0
1182 #define	KGE_DUP	1
1183 #define	KGE_UNK	2
1184 #define	KGE_LEN	3
1185 #define	KGE_CHK	4
1186 
1187 /*
1188  * Parse basic extension headers and return in the passed-in pointer vector.
1189  * Return values include:
1190  *
1191  *	KGE_OK	Everything's nice and parsed out.
1192  *		If there are no extensions, place NULL in extv[0].
1193  *	KGE_DUP	There is a duplicate extension.
1194  *		First instance in appropriate bin.  First duplicate in
1195  *		extv[0].
1196  *	KGE_UNK	Unknown extension type encountered.  extv[0] contains
1197  *		unknown header.
1198  *	KGE_LEN	Extension length error.
1199  *	KGE_CHK	High-level reality check failed on specific extension.
1200  *
1201  * My apologies for some of the pointer arithmetic in here.  I'm thinking
1202  * like an assembly programmer, yet trying to make the compiler happy.
1203  */
1204 static int
1205 keysock_get_ext(sadb_ext_t *extv[], sadb_msg_t *basehdr, uint_t msgsize,
1206     keysock_stack_t *keystack)
1207 {
1208 	bzero(extv, sizeof (sadb_ext_t *) * (SADB_EXT_MAX + 1));
1209 
1210 	/* Use extv[0] as the "current working pointer". */
1211 
1212 	extv[0] = (sadb_ext_t *)(basehdr + 1);
1213 
1214 	while (extv[0] < (sadb_ext_t *)(((uint8_t *)basehdr) + msgsize)) {
1215 		/* Check for unknown headers. */
1216 		if (extv[0]->sadb_ext_type == 0 ||
1217 		    extv[0]->sadb_ext_type > SADB_EXT_MAX)
1218 			return (KGE_UNK);
1219 
1220 		/*
1221 		 * Check length.  Use uint64_t because extlen is in units
1222 		 * of 64-bit words.  If length goes beyond the msgsize,
1223 		 * return an error.  (Zero length also qualifies here.)
1224 		 */
1225 		if (extv[0]->sadb_ext_len == 0 ||
1226 		    (void *)((uint64_t *)extv[0] + extv[0]->sadb_ext_len) >
1227 		    (void *)((uint8_t *)basehdr + msgsize))
1228 			return (KGE_LEN);
1229 
1230 		/* Check for redundant headers. */
1231 		if (extv[extv[0]->sadb_ext_type] != NULL)
1232 			return (KGE_DUP);
1233 
1234 		/*
1235 		 * Reality check the extension if possible at the keysock
1236 		 * level.
1237 		 */
1238 		if (!ext_check(extv[0], keystack))
1239 			return (KGE_CHK);
1240 
1241 		/* If I make it here, assign the appropriate bin. */
1242 		extv[extv[0]->sadb_ext_type] = extv[0];
1243 
1244 		/* Advance pointer (See above for uint64_t ptr reasoning.) */
1245 		extv[0] = (sadb_ext_t *)
1246 		    ((uint64_t *)extv[0] + extv[0]->sadb_ext_len);
1247 	}
1248 
1249 	/* Everything's cool. */
1250 
1251 	/*
1252 	 * If extv[0] == NULL, then there are no extension headers in this
1253 	 * message.  Ensure that this is the case.
1254 	 */
1255 	if (extv[0] == (sadb_ext_t *)(basehdr + 1))
1256 		extv[0] = NULL;
1257 
1258 	return (KGE_OK);
1259 }
1260 
1261 /*
1262  * qwriter() callback to handle flushes and dumps.  This routine will hold
1263  * the inner perimeter.
1264  */
1265 void
1266 keysock_do_flushdump(queue_t *q, mblk_t *mp)
1267 {
1268 	int i, start, finish;
1269 	mblk_t *mp1 = NULL;
1270 	keysock_t *ks = (keysock_t *)q->q_ptr;
1271 	sadb_ext_t *extv[SADB_EXT_MAX + 1];
1272 	sadb_msg_t *samsg = (sadb_msg_t *)mp->b_rptr;
1273 	keysock_stack_t	*keystack = ks->keysock_keystack;
1274 
1275 	/*
1276 	 * I am guaranteed this will work.  I did the work in keysock_parse()
1277 	 * already.
1278 	 */
1279 	(void) keysock_get_ext(extv, samsg, SADB_64TO8(samsg->sadb_msg_len),
1280 	    keystack);
1281 
1282 	/*
1283 	 * I hold the perimeter, therefore I don't need to use atomic ops.
1284 	 */
1285 	if (keystack->keystack_flushdump != 0) {
1286 		/* XXX Should I instead use EBUSY? */
1287 		/* XXX Or is there a way to queue these up? */
1288 		keysock_error(ks, mp, ENOMEM, SADB_X_DIAGNOSTIC_NONE);
1289 		return;
1290 	}
1291 
1292 	if (samsg->sadb_msg_satype == SADB_SATYPE_UNSPEC) {
1293 		start = 0;
1294 		finish = KEYSOCK_MAX_CONSUMERS - 1;
1295 	} else {
1296 		start = samsg->sadb_msg_satype;
1297 		finish = samsg->sadb_msg_satype;
1298 	}
1299 
1300 	/*
1301 	 * Fill up keysock_flushdump with the number of outstanding dumps
1302 	 * and/or flushes.
1303 	 */
1304 
1305 	keystack->keystack_flushdump_errno = 0;
1306 
1307 	/*
1308 	 * Okay, I hold the perimeter.  Eventually keysock_flushdump will
1309 	 * contain the number of consumers with outstanding flush operations.
1310 	 *
1311 	 * SO, here's the plan:
1312 	 *	* For each relevant consumer (Might be one, might be all)
1313 	 *		* Twiddle on the FLUSHING flag.
1314 	 *		* Pass down the FLUSH/DUMP message.
1315 	 *
1316 	 * When I see upbound FLUSH/DUMP messages, I will decrement the
1317 	 * keysock_flushdump.  When I decrement it to 0, I will pass the
1318 	 * FLUSH/DUMP message back up to the PF_KEY sockets.  Because I will
1319 	 * pass down the right SA type to the consumer (either its own, or
1320 	 * that of UNSPEC), the right one will be reflected from each consumer,
1321 	 * and accordingly back to the socket.
1322 	 */
1323 
1324 	mutex_enter(&keystack->keystack_consumers_lock);
1325 	for (i = start; i <= finish; i++) {
1326 		if (keystack->keystack_consumers[i] != NULL) {
1327 			mp1 = copymsg(mp);
1328 			if (mp1 == NULL) {
1329 				ks0dbg(("SADB_FLUSH copymsg() failed.\n"));
1330 				/*
1331 				 * Error?  And what about outstanding
1332 				 * flushes?  Oh, yeah, they get sucked up and
1333 				 * the counter is decremented.  Consumers
1334 				 * (see keysock_passdown()) are guaranteed
1335 				 * to deliver back a flush request, even if
1336 				 * it's an error.
1337 				 */
1338 				keysock_error(ks, mp, ENOMEM,
1339 				    SADB_X_DIAGNOSTIC_NONE);
1340 				return;
1341 			}
1342 			/*
1343 			 * Because my entry conditions are met above, the
1344 			 * following assertion should hold true.
1345 			 */
1346 			mutex_enter(&keystack->keystack_consumers[i]->kc_lock);
1347 			ASSERT((keystack->keystack_consumers[i]->kc_flags &
1348 			    KC_FLUSHING) == 0);
1349 			keystack->keystack_consumers[i]->kc_flags |=
1350 			    KC_FLUSHING;
1351 			mutex_exit(&(keystack->keystack_consumers[i]->kc_lock));
1352 			/* Always increment the number of flushes... */
1353 			keystack->keystack_flushdump++;
1354 			/* Guaranteed to return a message. */
1355 			keysock_passdown(ks, mp1, i, extv, B_TRUE);
1356 		} else if (start == finish) {
1357 			/*
1358 			 * In case where start == finish, and there's no
1359 			 * consumer, should we force an error?  Yes.
1360 			 */
1361 			mutex_exit(&keystack->keystack_consumers_lock);
1362 			keysock_error(ks, mp, EINVAL,
1363 			    SADB_X_DIAGNOSTIC_UNKNOWN_SATYPE);
1364 			return;
1365 		}
1366 	}
1367 	mutex_exit(&keystack->keystack_consumers_lock);
1368 
1369 	if (keystack->keystack_flushdump == 0) {
1370 		/*
1371 		 * There were no consumers at all for this message.
1372 		 * XXX For now return ESRCH.
1373 		 */
1374 		keysock_error(ks, mp, ESRCH, SADB_X_DIAGNOSTIC_NO_SADBS);
1375 	} else {
1376 		/* Otherwise, free the original message. */
1377 		freemsg(mp);
1378 	}
1379 }
1380 
1381 /*
1382  * Get the right diagnostic for a duplicate.  Should probably use a static
1383  * table lookup.
1384  */
1385 int
1386 keysock_duplicate(int ext_type)
1387 {
1388 	int rc = 0;
1389 
1390 	switch (ext_type) {
1391 	case SADB_EXT_ADDRESS_SRC:
1392 		rc = SADB_X_DIAGNOSTIC_DUPLICATE_SRC;
1393 		break;
1394 	case SADB_EXT_ADDRESS_DST:
1395 		rc = SADB_X_DIAGNOSTIC_DUPLICATE_DST;
1396 		break;
1397 	case SADB_X_EXT_ADDRESS_INNER_SRC:
1398 		rc = SADB_X_DIAGNOSTIC_DUPLICATE_INNER_SRC;
1399 		break;
1400 	case SADB_X_EXT_ADDRESS_INNER_DST:
1401 		rc = SADB_X_DIAGNOSTIC_DUPLICATE_INNER_DST;
1402 		break;
1403 	case SADB_EXT_SA:
1404 		rc = SADB_X_DIAGNOSTIC_DUPLICATE_SA;
1405 		break;
1406 	case SADB_EXT_SPIRANGE:
1407 		rc = SADB_X_DIAGNOSTIC_DUPLICATE_RANGE;
1408 		break;
1409 	case SADB_EXT_KEY_AUTH:
1410 		rc = SADB_X_DIAGNOSTIC_DUPLICATE_AKEY;
1411 		break;
1412 	case SADB_EXT_KEY_ENCRYPT:
1413 		rc = SADB_X_DIAGNOSTIC_DUPLICATE_EKEY;
1414 		break;
1415 	}
1416 	return (rc);
1417 }
1418 
1419 /*
1420  * Get the right diagnostic for a reality check failure.  Should probably use
1421  * a static table lookup.
1422  */
1423 int
1424 keysock_malformed(int ext_type)
1425 {
1426 	int rc = 0;
1427 
1428 	switch (ext_type) {
1429 	case SADB_EXT_ADDRESS_SRC:
1430 		rc = SADB_X_DIAGNOSTIC_MALFORMED_SRC;
1431 		break;
1432 	case SADB_EXT_ADDRESS_DST:
1433 		rc = SADB_X_DIAGNOSTIC_MALFORMED_DST;
1434 		break;
1435 	case SADB_X_EXT_ADDRESS_INNER_SRC:
1436 		rc = SADB_X_DIAGNOSTIC_MALFORMED_INNER_SRC;
1437 		break;
1438 	case SADB_X_EXT_ADDRESS_INNER_DST:
1439 		rc = SADB_X_DIAGNOSTIC_MALFORMED_INNER_DST;
1440 		break;
1441 	case SADB_EXT_SA:
1442 		rc = SADB_X_DIAGNOSTIC_MALFORMED_SA;
1443 		break;
1444 	case SADB_EXT_SPIRANGE:
1445 		rc = SADB_X_DIAGNOSTIC_MALFORMED_RANGE;
1446 		break;
1447 	case SADB_EXT_KEY_AUTH:
1448 		rc = SADB_X_DIAGNOSTIC_MALFORMED_AKEY;
1449 		break;
1450 	case SADB_EXT_KEY_ENCRYPT:
1451 		rc = SADB_X_DIAGNOSTIC_MALFORMED_EKEY;
1452 		break;
1453 	}
1454 	return (rc);
1455 }
1456 
1457 /*
1458  * Keysock massaging of an inverse ACQUIRE.  Consult policy,
1459  * and construct an appropriate response.
1460  */
1461 static void
1462 keysock_inverse_acquire(mblk_t *mp, sadb_msg_t *samsg, sadb_ext_t *extv[],
1463     keysock_t *ks)
1464 {
1465 	mblk_t *reply_mp;
1466 	keysock_stack_t	*keystack = ks->keysock_keystack;
1467 
1468 	/*
1469 	 * Reality check things...
1470 	 */
1471 	if (extv[SADB_EXT_ADDRESS_SRC] == NULL) {
1472 		keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_MISSING_SRC);
1473 		return;
1474 	}
1475 	if (extv[SADB_EXT_ADDRESS_DST] == NULL) {
1476 		keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_MISSING_DST);
1477 		return;
1478 	}
1479 
1480 	if (extv[SADB_X_EXT_ADDRESS_INNER_SRC] != NULL &&
1481 	    extv[SADB_X_EXT_ADDRESS_INNER_DST] == NULL) {
1482 		keysock_error(ks, mp, EINVAL,
1483 		    SADB_X_DIAGNOSTIC_MISSING_INNER_DST);
1484 		return;
1485 	}
1486 
1487 	if (extv[SADB_X_EXT_ADDRESS_INNER_SRC] == NULL &&
1488 	    extv[SADB_X_EXT_ADDRESS_INNER_DST] != NULL) {
1489 		keysock_error(ks, mp, EINVAL,
1490 		    SADB_X_DIAGNOSTIC_MISSING_INNER_SRC);
1491 		return;
1492 	}
1493 
1494 	reply_mp = ipsec_construct_inverse_acquire(samsg, extv,
1495 	    keystack->keystack_netstack);
1496 
1497 	if (reply_mp != NULL) {
1498 		freemsg(mp);
1499 		keysock_passup(reply_mp, (sadb_msg_t *)reply_mp->b_rptr,
1500 		    ks->keysock_serial, NULL, B_FALSE, keystack);
1501 	} else {
1502 		keysock_error(ks, mp, samsg->sadb_msg_errno,
1503 		    samsg->sadb_x_msg_diagnostic);
1504 	}
1505 }
1506 
1507 /*
1508  * Spew an extended REGISTER down to the relevant consumers.
1509  */
1510 static void
1511 keysock_extended_register(keysock_t *ks, mblk_t *mp, sadb_ext_t *extv[])
1512 {
1513 	sadb_x_ereg_t *ereg = (sadb_x_ereg_t *)extv[SADB_X_EXT_EREG];
1514 	uint8_t *satypes, *fencepost;
1515 	mblk_t *downmp;
1516 	sadb_ext_t *downextv[SADB_EXT_MAX + 1];
1517 	keysock_stack_t	*keystack = ks->keysock_keystack;
1518 
1519 	if (ks->keysock_registered[0] != 0 || ks->keysock_registered[1] != 0 ||
1520 	    ks->keysock_registered[2] != 0 || ks->keysock_registered[3] != 0) {
1521 		keysock_error(ks, mp, EBUSY, 0);
1522 	}
1523 
1524 	ks->keysock_flags |= KEYSOCK_EXTENDED;
1525 	if (ereg == NULL) {
1526 		keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_SATYPE_NEEDED);
1527 	} else {
1528 		ASSERT(mp->b_rptr + msgdsize(mp) == mp->b_wptr);
1529 		fencepost = (uint8_t *)mp->b_wptr;
1530 		satypes = ereg->sadb_x_ereg_satypes;
1531 		while (*satypes != SADB_SATYPE_UNSPEC && satypes != fencepost) {
1532 			downmp = copymsg(mp);
1533 			if (downmp == NULL) {
1534 				keysock_error(ks, mp, ENOMEM, 0);
1535 				return;
1536 			}
1537 			/*
1538 			 * Since we've made it here, keysock_get_ext will work!
1539 			 */
1540 			(void) keysock_get_ext(downextv,
1541 			    (sadb_msg_t *)downmp->b_rptr, msgdsize(downmp),
1542 			    keystack);
1543 			keysock_passdown(ks, downmp, *satypes, downextv,
1544 			    B_FALSE);
1545 			++satypes;
1546 		}
1547 		freemsg(mp);
1548 	}
1549 
1550 	/*
1551 	 * Set global to indicate we prefer an extended ACQUIRE.
1552 	 */
1553 	atomic_add_32(&keystack->keystack_num_extended, 1);
1554 }
1555 
1556 static void
1557 keysock_delpair_all(keysock_t *ks, mblk_t *mp, sadb_ext_t *extv[])
1558 {
1559 	int i, start, finish;
1560 	mblk_t *mp1 = NULL;
1561 	keysock_stack_t *keystack = ks->keysock_keystack;
1562 
1563 	start = 0;
1564 	finish = KEYSOCK_MAX_CONSUMERS - 1;
1565 
1566 	for (i = start; i <= finish; i++) {
1567 		if (keystack->keystack_consumers[i] != NULL) {
1568 			mp1 = copymsg(mp);
1569 			if (mp1 == NULL) {
1570 				keysock_error(ks, mp, ENOMEM,
1571 				    SADB_X_DIAGNOSTIC_NONE);
1572 				return;
1573 			}
1574 			keysock_passdown(ks, mp1, i, extv, B_FALSE);
1575 		}
1576 	}
1577 }
1578 
1579 /*
1580  * Handle PF_KEY messages.
1581  */
1582 static void
1583 keysock_parse(queue_t *q, mblk_t *mp)
1584 {
1585 	sadb_msg_t *samsg;
1586 	sadb_ext_t *extv[SADB_EXT_MAX + 1];
1587 	keysock_t *ks = (keysock_t *)q->q_ptr;
1588 	uint_t msgsize;
1589 	uint8_t satype;
1590 	keysock_stack_t	*keystack = ks->keysock_keystack;
1591 
1592 	/* Make sure I'm a PF_KEY socket.  (i.e. nothing's below me) */
1593 	ASSERT(WR(q)->q_next == NULL);
1594 
1595 	samsg = (sadb_msg_t *)mp->b_rptr;
1596 	ks2dbg(keystack, ("Received possible PF_KEY message, type %d.\n",
1597 	    samsg->sadb_msg_type));
1598 
1599 	msgsize = SADB_64TO8(samsg->sadb_msg_len);
1600 
1601 	if (msgdsize(mp) != msgsize) {
1602 		/*
1603 		 * Message len incorrect w.r.t. actual size.  Send an error
1604 		 * (EMSGSIZE).	It may be necessary to massage things a
1605 		 * bit.	 For example, if the sadb_msg_type is hosed,
1606 		 * I need to set it to SADB_RESERVED to get delivery to
1607 		 * do the right thing.	Then again, maybe just letting
1608 		 * the error delivery do the right thing.
1609 		 */
1610 		ks2dbg(keystack,
1611 		    ("mblk (%lu) and base (%d) message sizes don't jibe.\n",
1612 		    msgdsize(mp), msgsize));
1613 		keysock_error(ks, mp, EMSGSIZE, SADB_X_DIAGNOSTIC_NONE);
1614 		return;
1615 	}
1616 
1617 	if (msgsize > (uint_t)(mp->b_wptr - mp->b_rptr)) {
1618 		/* Get all message into one mblk. */
1619 		if (pullupmsg(mp, -1) == 0) {
1620 			/*
1621 			 * Something screwy happened.
1622 			 */
1623 			ks3dbg(keystack,
1624 			    ("keysock_parse: pullupmsg() failed.\n"));
1625 			return;
1626 		} else {
1627 			samsg = (sadb_msg_t *)mp->b_rptr;
1628 		}
1629 	}
1630 
1631 	switch (keysock_get_ext(extv, samsg, msgsize, keystack)) {
1632 	case KGE_DUP:
1633 		/* Handle duplicate extension. */
1634 		ks1dbg(keystack, ("Got duplicate extension of type %d.\n",
1635 		    extv[0]->sadb_ext_type));
1636 		keysock_error(ks, mp, EINVAL,
1637 		    keysock_duplicate(extv[0]->sadb_ext_type));
1638 		return;
1639 	case KGE_UNK:
1640 		/* Handle unknown extension. */
1641 		ks1dbg(keystack, ("Got unknown extension of type %d.\n",
1642 		    extv[0]->sadb_ext_type));
1643 		keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_UNKNOWN_EXT);
1644 		return;
1645 	case KGE_LEN:
1646 		/* Length error. */
1647 		ks1dbg(keystack,
1648 		    ("Length %d on extension type %d overrun or 0.\n",
1649 		    extv[0]->sadb_ext_len, extv[0]->sadb_ext_type));
1650 		keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_BAD_EXTLEN);
1651 		return;
1652 	case KGE_CHK:
1653 		/* Reality check failed. */
1654 		ks1dbg(keystack,
1655 		    ("Reality check failed on extension type %d.\n",
1656 		    extv[0]->sadb_ext_type));
1657 		keysock_error(ks, mp, EINVAL,
1658 		    keysock_malformed(extv[0]->sadb_ext_type));
1659 		return;
1660 	default:
1661 		/* Default case is no errors. */
1662 		break;
1663 	}
1664 
1665 	switch (samsg->sadb_msg_type) {
1666 	case SADB_REGISTER:
1667 		/*
1668 		 * There's a semantic weirdness in that a message OTHER than
1669 		 * the return REGISTER message may be passed up if I set the
1670 		 * registered bit BEFORE I pass it down.
1671 		 *
1672 		 * SOOOO, I'll not twiddle any registered bits until I see
1673 		 * the upbound REGISTER (with a serial number in it).
1674 		 */
1675 		if (samsg->sadb_msg_satype == SADB_SATYPE_UNSPEC) {
1676 			/* Handle extended register here. */
1677 			keysock_extended_register(ks, mp, extv);
1678 			return;
1679 		} else if (ks->keysock_flags & KEYSOCK_EXTENDED) {
1680 			keysock_error(ks, mp, EBUSY, 0);
1681 			return;
1682 		}
1683 		/* FALLTHRU */
1684 	case SADB_GETSPI:
1685 	case SADB_ADD:
1686 	case SADB_UPDATE:
1687 	case SADB_X_UPDATEPAIR:
1688 	case SADB_DELETE:
1689 	case SADB_X_DELPAIR:
1690 	case SADB_GET:
1691 		/*
1692 		 * Pass down to appropriate consumer.
1693 		 */
1694 		if (samsg->sadb_msg_satype != SADB_SATYPE_UNSPEC)
1695 			keysock_passdown(ks, mp, samsg->sadb_msg_satype, extv,
1696 			    B_FALSE);
1697 		else keysock_error(ks, mp, EINVAL,
1698 		    SADB_X_DIAGNOSTIC_SATYPE_NEEDED);
1699 		return;
1700 	case SADB_X_DELPAIR_STATE:
1701 		if (samsg->sadb_msg_satype == SADB_SATYPE_UNSPEC) {
1702 			keysock_delpair_all(ks, mp, extv);
1703 		} else {
1704 			keysock_passdown(ks, mp, samsg->sadb_msg_satype, extv,
1705 			    B_FALSE);
1706 		}
1707 		return;
1708 	case SADB_ACQUIRE:
1709 		/*
1710 		 * If I _receive_ an acquire, this means I should spread it
1711 		 * out to registered sockets.  Unless there's an errno...
1712 		 *
1713 		 * Need ADDRESS, may have ID, SENS, and PROP, unless errno,
1714 		 * in which case there should be NO extensions.
1715 		 *
1716 		 * Return to registered.
1717 		 */
1718 		if (samsg->sadb_msg_errno != 0) {
1719 			satype = samsg->sadb_msg_satype;
1720 			if (satype == SADB_SATYPE_UNSPEC) {
1721 				if (!(ks->keysock_flags & KEYSOCK_EXTENDED)) {
1722 					keysock_error(ks, mp, EINVAL,
1723 					    SADB_X_DIAGNOSTIC_SATYPE_NEEDED);
1724 					return;
1725 				}
1726 				/*
1727 				 * Reassign satype based on the first
1728 				 * flags that KEYSOCK_SETREG says.
1729 				 */
1730 				while (satype <= SADB_SATYPE_MAX) {
1731 					if (KEYSOCK_ISREG(ks, satype))
1732 						break;
1733 					satype++;
1734 				}
1735 				if (satype > SADB_SATYPE_MAX) {
1736 					keysock_error(ks, mp, EBUSY, 0);
1737 					return;
1738 				}
1739 			}
1740 			keysock_passdown(ks, mp, satype, extv, B_FALSE);
1741 		} else {
1742 			if (samsg->sadb_msg_satype == SADB_SATYPE_UNSPEC) {
1743 				keysock_error(ks, mp, EINVAL,
1744 				    SADB_X_DIAGNOSTIC_SATYPE_NEEDED);
1745 			} else {
1746 				keysock_passup(mp, samsg, 0, NULL, B_FALSE,
1747 				    keystack);
1748 			}
1749 		}
1750 		return;
1751 	case SADB_EXPIRE:
1752 		/*
1753 		 * If someone sends this in, then send out to all senders.
1754 		 * (Save maybe ESP or AH, I have to be careful here.)
1755 		 *
1756 		 * Need ADDRESS, may have ID and SENS.
1757 		 *
1758 		 * XXX for now this is unsupported.
1759 		 */
1760 		break;
1761 	case SADB_FLUSH:
1762 		/*
1763 		 * Nuke all SAs.
1764 		 *
1765 		 * No extensions at all.  Return to all listeners.
1766 		 *
1767 		 * Question:	Should I hold a lock here to prevent
1768 		 *		additions/deletions while flushing?
1769 		 * Answer:	No.  (See keysock_passdown() for details.)
1770 		 */
1771 		if (extv[0] != NULL) {
1772 			/*
1773 			 * FLUSH messages shouldn't have extensions.
1774 			 * Return EINVAL.
1775 			 */
1776 			ks2dbg(keystack, ("FLUSH message with extension.\n"));
1777 			keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_NO_EXT);
1778 			return;
1779 		}
1780 
1781 		/* Passing down of DUMP/FLUSH messages are special. */
1782 		qwriter(q, mp, keysock_do_flushdump, PERIM_INNER);
1783 		return;
1784 	case SADB_DUMP:	 /* not used by normal applications */
1785 		if ((extv[0] != NULL) &&
1786 		    ((msgsize >
1787 		    (sizeof (sadb_msg_t) + sizeof (sadb_x_edump_t))) ||
1788 		    (extv[SADB_X_EXT_EDUMP] == NULL))) {
1789 				keysock_error(ks, mp, EINVAL,
1790 				    SADB_X_DIAGNOSTIC_NO_EXT);
1791 				return;
1792 		}
1793 		qwriter(q, mp, keysock_do_flushdump, PERIM_INNER);
1794 		return;
1795 	case SADB_X_PROMISC:
1796 		/*
1797 		 * Promiscuous processing message.
1798 		 */
1799 		if (samsg->sadb_msg_satype == 0)
1800 			ks->keysock_flags &= ~KEYSOCK_PROMISC;
1801 		else
1802 			ks->keysock_flags |= KEYSOCK_PROMISC;
1803 		keysock_passup(mp, samsg, ks->keysock_serial, NULL, B_FALSE,
1804 		    keystack);
1805 		return;
1806 	case SADB_X_INVERSE_ACQUIRE:
1807 		keysock_inverse_acquire(mp, samsg, extv, ks);
1808 		return;
1809 	default:
1810 		ks2dbg(keystack, ("Got unknown message type %d.\n",
1811 		    samsg->sadb_msg_type));
1812 		keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_UNKNOWN_MSG);
1813 		return;
1814 	}
1815 
1816 	/* As a placeholder... */
1817 	ks0dbg(("keysock_parse():  Hit EOPNOTSUPP\n"));
1818 	keysock_error(ks, mp, EOPNOTSUPP, SADB_X_DIAGNOSTIC_NONE);
1819 }
1820 
1821 /*
1822  * wput routing for PF_KEY/keysock/whatever.  Unlike the routing socket,
1823  * I don't convert to ioctl()'s for IP.  I am the end-all driver as far
1824  * as PF_KEY sockets are concerned.  I do some conversion, but not as much
1825  * as IP/rts does.
1826  */
1827 static void
1828 keysock_wput(queue_t *q, mblk_t *mp)
1829 {
1830 	uchar_t *rptr = mp->b_rptr;
1831 	mblk_t *mp1;
1832 	keysock_t *ks;
1833 	keysock_stack_t	*keystack;
1834 
1835 	if (WR(q)->q_next) {
1836 		keysock_consumer_t *kc = (keysock_consumer_t *)q->q_ptr;
1837 		keystack = kc->kc_keystack;
1838 
1839 		ks3dbg(keystack, ("In keysock_wput\n"));
1840 
1841 		/*
1842 		 * We shouldn't get writes on a consumer instance.
1843 		 * But for now, just passthru.
1844 		 */
1845 		ks1dbg(keystack, ("Huh?  wput for an consumer instance (%d)?\n",
1846 		    kc->kc_sa_type));
1847 		putnext(q, mp);
1848 		return;
1849 	}
1850 	ks = (keysock_t *)q->q_ptr;
1851 	keystack = ks->keysock_keystack;
1852 
1853 	ks3dbg(keystack, ("In keysock_wput\n"));
1854 
1855 	switch (mp->b_datap->db_type) {
1856 	case M_DATA:
1857 		/*
1858 		 * Silently discard.
1859 		 */
1860 		ks2dbg(keystack, ("raw M_DATA in keysock.\n"));
1861 		freemsg(mp);
1862 		return;
1863 	case M_PROTO:
1864 	case M_PCPROTO:
1865 		if ((mp->b_wptr - rptr) >= sizeof (struct T_data_req)) {
1866 			if (((union T_primitives *)rptr)->type == T_DATA_REQ) {
1867 				if ((mp1 = mp->b_cont) == NULL) {
1868 					/* No data after T_DATA_REQ. */
1869 					ks2dbg(keystack,
1870 					    ("No data after DATA_REQ.\n"));
1871 					freemsg(mp);
1872 					return;
1873 				}
1874 				freeb(mp);
1875 				mp = mp1;
1876 				ks2dbg(keystack, ("T_DATA_REQ\n"));
1877 				break;	/* Out of switch. */
1878 			}
1879 		}
1880 		/* FALLTHRU */
1881 	default:
1882 		ks3dbg(keystack, ("In default wput case (%d %d).\n",
1883 		    mp->b_datap->db_type, ((union T_primitives *)rptr)->type));
1884 		keysock_wput_other(q, mp);
1885 		return;
1886 	}
1887 
1888 	/* I now have a PF_KEY message in an M_DATA block, pointed to by mp. */
1889 	keysock_parse(q, mp);
1890 }
1891 
1892 /* BELOW THIS LINE ARE ROUTINES INCLUDING AND RELATED TO keysock_rput(). */
1893 
1894 /*
1895  * Called upon receipt of a KEYSOCK_HELLO_ACK to set up the appropriate
1896  * state vectors.
1897  */
1898 static void
1899 keysock_link_consumer(uint8_t satype, keysock_consumer_t *kc)
1900 {
1901 	keysock_t *ks;
1902 	keysock_stack_t	*keystack = kc->kc_keystack;
1903 
1904 	mutex_enter(&keystack->keystack_consumers_lock);
1905 	mutex_enter(&kc->kc_lock);
1906 	if (keystack->keystack_consumers[satype] != NULL) {
1907 		ks0dbg((
1908 		    "Hmmmm, someone closed %d before the HELLO_ACK happened.\n",
1909 		    satype));
1910 		/*
1911 		 * Perhaps updating the new below-me consumer with what I have
1912 		 * so far would work too?
1913 		 */
1914 		mutex_exit(&kc->kc_lock);
1915 		mutex_exit(&keystack->keystack_consumers_lock);
1916 	} else {
1917 		/* Add new below-me consumer. */
1918 		keystack->keystack_consumers[satype] = kc;
1919 
1920 		kc->kc_flags = 0;
1921 		kc->kc_sa_type = satype;
1922 		mutex_exit(&kc->kc_lock);
1923 		mutex_exit(&keystack->keystack_consumers_lock);
1924 
1925 		/* Scan the keysock list. */
1926 		mutex_enter(&keystack->keystack_list_lock);
1927 		for (ks = keystack->keystack_list; ks != NULL;
1928 		    ks = ks->keysock_next) {
1929 			if (KEYSOCK_ISREG(ks, satype)) {
1930 				/*
1931 				 * XXX Perhaps send an SADB_REGISTER down on
1932 				 * the socket's behalf.
1933 				 */
1934 				ks1dbg(keystack,
1935 				    ("Socket %u registered already for "
1936 				    "new consumer.\n", ks->keysock_serial));
1937 			}
1938 		}
1939 		mutex_exit(&keystack->keystack_list_lock);
1940 	}
1941 }
1942 
1943 /*
1944  * Generate a KEYSOCK_OUT_ERR message for my consumer.
1945  */
1946 static void
1947 keysock_out_err(keysock_consumer_t *kc, int ks_errno, mblk_t *mp)
1948 {
1949 	keysock_out_err_t *kse;
1950 	mblk_t *imp;
1951 	keysock_stack_t	*keystack = kc->kc_keystack;
1952 
1953 	imp = allocb(sizeof (ipsec_info_t), BPRI_HI);
1954 	if (imp == NULL) {
1955 		ks1dbg(keystack, ("keysock_out_err:  Can't alloc message.\n"));
1956 		return;
1957 	}
1958 
1959 	imp->b_datap->db_type = M_CTL;
1960 	imp->b_wptr += sizeof (ipsec_info_t);
1961 
1962 	kse = (keysock_out_err_t *)imp->b_rptr;
1963 	imp->b_cont = mp;
1964 	kse->ks_err_type = KEYSOCK_OUT_ERR;
1965 	kse->ks_err_len = sizeof (*kse);
1966 	/* Is serial necessary? */
1967 	kse->ks_err_serial = 0;
1968 	kse->ks_err_errno = ks_errno;
1969 
1970 	/*
1971 	 * XXX What else do I need to do here w.r.t. information
1972 	 * to tell the consumer what caused this error?
1973 	 *
1974 	 * I believe the answer is the PF_KEY ACQUIRE (or other) message
1975 	 * attached in mp, which is appended at the end.  I believe the
1976 	 * db_ref won't matter here, because the PF_KEY message is only read
1977 	 * for KEYSOCK_OUT_ERR.
1978 	 */
1979 
1980 	putnext(kc->kc_wq, imp);
1981 }
1982 
1983 /* XXX this is a hack errno. */
1984 #define	EIPSECNOSA 255
1985 
1986 /*
1987  * Route message (pointed by mp, header in samsg) toward appropriate
1988  * sockets.  Assume the message's creator did its job correctly.
1989  *
1990  * This should be a function that is followed by a return in its caller.
1991  * The compiler _should_ be able to use tail-call optimizations to make the
1992  * large ## of parameters not a huge deal.
1993  */
1994 static void
1995 keysock_passup(mblk_t *mp, sadb_msg_t *samsg, minor_t serial,
1996     keysock_consumer_t *kc, boolean_t persistent, keysock_stack_t *keystack)
1997 {
1998 	keysock_t *ks;
1999 	uint8_t satype = samsg->sadb_msg_satype;
2000 	boolean_t toall = B_FALSE, allreg = B_FALSE, allereg = B_FALSE,
2001 	    setalg = B_FALSE;
2002 	mblk_t *mp1;
2003 	int err = EIPSECNOSA;
2004 
2005 	/* Convert mp, which is M_DATA, into an M_PROTO of type T_DATA_IND */
2006 	mp1 = allocb(sizeof (struct T_data_req), BPRI_HI);
2007 	if (mp1 == NULL) {
2008 		err = ENOMEM;
2009 		goto error;
2010 	}
2011 	mp1->b_wptr += sizeof (struct T_data_req);
2012 	((struct T_data_ind *)mp1->b_rptr)->PRIM_type = T_DATA_IND;
2013 	((struct T_data_ind *)mp1->b_rptr)->MORE_flag = 0;
2014 	mp1->b_datap->db_type = M_PROTO;
2015 	mp1->b_cont = mp;
2016 	mp = mp1;
2017 
2018 	switch (samsg->sadb_msg_type) {
2019 	case SADB_FLUSH:
2020 	case SADB_GETSPI:
2021 	case SADB_UPDATE:
2022 	case SADB_X_UPDATEPAIR:
2023 	case SADB_ADD:
2024 	case SADB_DELETE:
2025 	case SADB_X_DELPAIR:
2026 	case SADB_EXPIRE:
2027 		/*
2028 		 * These are most likely replies.  Don't worry about
2029 		 * KEYSOCK_OUT_ERR handling.  Deliver to all sockets.
2030 		 */
2031 		ks3dbg(keystack,
2032 		    ("Delivering normal message (%d) to all sockets.\n",
2033 		    samsg->sadb_msg_type));
2034 		toall = B_TRUE;
2035 		break;
2036 	case SADB_REGISTER:
2037 		/*
2038 		 * REGISTERs come up for one of three reasons:
2039 		 *
2040 		 *	1.) In response to a normal SADB_REGISTER
2041 		 *		(samsg->sadb_msg_satype != SADB_SATYPE_UNSPEC &&
2042 		 *		    serial != 0)
2043 		 *		Deliver to normal SADB_REGISTERed sockets.
2044 		 *	2.) In response to an extended REGISTER
2045 		 *		(samsg->sadb_msg_satype == SADB_SATYPE_UNSPEC)
2046 		 *		Deliver to extended REGISTERed socket.
2047 		 *	3.) Spontaneous algorithm changes
2048 		 *		(samsg->sadb_msg_satype != SADB_SATYPE_UNSPEC &&
2049 		 *		    serial == 0)
2050 		 *		Deliver to REGISTERed sockets of all sorts.
2051 		 */
2052 		if (kc == NULL) {
2053 			/* Here because of keysock_error() call. */
2054 			ASSERT(samsg->sadb_msg_errno != 0);
2055 			break;	/* Out of switch. */
2056 		}
2057 		ks3dbg(keystack, ("Delivering REGISTER.\n"));
2058 		if (satype == SADB_SATYPE_UNSPEC) {
2059 			/* REGISTER Reason #2 */
2060 			allereg = B_TRUE;
2061 			/*
2062 			 * Rewhack SA type so PF_KEY socket holder knows what
2063 			 * consumer generated this algorithm list.
2064 			 */
2065 			satype = kc->kc_sa_type;
2066 			samsg->sadb_msg_satype = satype;
2067 			setalg = B_TRUE;
2068 		} else if (serial == 0) {
2069 			/* REGISTER Reason #3 */
2070 			allreg = B_TRUE;
2071 			allereg = B_TRUE;
2072 		} else {
2073 			/* REGISTER Reason #1 */
2074 			allreg = B_TRUE;
2075 			setalg = B_TRUE;
2076 		}
2077 		break;
2078 	case SADB_ACQUIRE:
2079 		/*
2080 		 * ACQUIREs are either extended (sadb_msg_satype == 0) or
2081 		 * regular (sadb_msg_satype != 0).  And we're guaranteed
2082 		 * that serial == 0 for an ACQUIRE.
2083 		 */
2084 		ks3dbg(keystack, ("Delivering ACQUIRE.\n"));
2085 		allereg = (satype == SADB_SATYPE_UNSPEC);
2086 		allreg = !allereg;
2087 		/*
2088 		 * Corner case - if we send a regular ACQUIRE and there's
2089 		 * extended ones registered, don't send an error down to
2090 		 * consumers if nobody's listening and prematurely destroy
2091 		 * their ACQUIRE record.  This might be too hackish of a
2092 		 * solution.
2093 		 */
2094 		if (allreg && keystack->keystack_num_extended > 0)
2095 			err = 0;
2096 		break;
2097 	case SADB_X_PROMISC:
2098 	case SADB_X_INVERSE_ACQUIRE:
2099 	case SADB_DUMP:
2100 	case SADB_GET:
2101 	default:
2102 		/*
2103 		 * Deliver to the sender and promiscuous only.
2104 		 */
2105 		ks3dbg(keystack, ("Delivering sender/promisc only (%d).\n",
2106 		    samsg->sadb_msg_type));
2107 		break;
2108 	}
2109 
2110 	mutex_enter(&keystack->keystack_list_lock);
2111 	for (ks = keystack->keystack_list; ks != NULL; ks = ks->keysock_next) {
2112 		/* Delivery loop. */
2113 
2114 		/*
2115 		 * Check special keysock-setting cases (REGISTER replies)
2116 		 * here.
2117 		 */
2118 		if (setalg && serial == ks->keysock_serial) {
2119 			ASSERT(kc != NULL);
2120 			ASSERT(kc->kc_sa_type == satype);
2121 			KEYSOCK_SETREG(ks, satype);
2122 		}
2123 
2124 		/*
2125 		 * NOLOOP takes precedence over PROMISC.  So if you've set
2126 		 * !SO_USELOOPBACK, don't expect to see any data...
2127 		 */
2128 		if (ks->keysock_flags & KEYSOCK_NOLOOP)
2129 			continue;
2130 
2131 		/*
2132 		 * Messages to all, or promiscuous sockets just GET the
2133 		 * message.  Perform rules-type checking iff it's not for all
2134 		 * listeners or the socket is in promiscuous mode.
2135 		 *
2136 		 * NOTE:Because of the (kc != NULL && ISREG()), make sure
2137 		 *	extended ACQUIREs arrive off a consumer that is
2138 		 *	part of the extended REGISTER set of consumers.
2139 		 */
2140 		if (serial != ks->keysock_serial &&
2141 		    !toall &&
2142 		    !(ks->keysock_flags & KEYSOCK_PROMISC) &&
2143 		    !((ks->keysock_flags & KEYSOCK_EXTENDED) ?
2144 		    allereg : allreg && kc != NULL &&
2145 		    KEYSOCK_ISREG(ks, kc->kc_sa_type)))
2146 			continue;
2147 
2148 		mp1 = dupmsg(mp);
2149 		if (mp1 == NULL) {
2150 			ks2dbg(keystack, (
2151 			    "keysock_passup():  dupmsg() failed.\n"));
2152 			mp1 = mp;
2153 			mp = NULL;
2154 			err = ENOMEM;
2155 		}
2156 
2157 		/*
2158 		 * At this point, we can deliver or attempt to deliver
2159 		 * this message.  We're free of obligation to report
2160 		 * no listening PF_KEY sockets.  So set err to 0.
2161 		 */
2162 		err = 0;
2163 
2164 		/*
2165 		 * See if we canputnext(), as well as see if the message
2166 		 * needs to be queued if we can't.
2167 		 */
2168 		if (!canputnext(ks->keysock_rq)) {
2169 			if (persistent) {
2170 				if (putq(ks->keysock_rq, mp1) == 0) {
2171 					ks1dbg(keystack, (
2172 					    "keysock_passup: putq failed.\n"));
2173 				} else {
2174 					continue;
2175 				}
2176 			}
2177 			freemsg(mp1);
2178 			continue;
2179 		}
2180 
2181 		ks3dbg(keystack,
2182 		    ("Putting to serial %d.\n", ks->keysock_serial));
2183 		/*
2184 		 * Unlike the specific keysock instance case, this
2185 		 * will only hit for listeners, so we will only
2186 		 * putnext() if we can.
2187 		 */
2188 		putnext(ks->keysock_rq, mp1);
2189 		if (mp == NULL)
2190 			break;	/* out of for loop. */
2191 	}
2192 	mutex_exit(&keystack->keystack_list_lock);
2193 
2194 error:
2195 	if ((err != 0) && (kc != NULL)) {
2196 		/*
2197 		 * Generate KEYSOCK_OUT_ERR for consumer.
2198 		 * Basically, I send this back if I have not been able to
2199 		 * transmit (for whatever reason)
2200 		 */
2201 		ks1dbg(keystack,
2202 		    ("keysock_passup():  No registered of type %d.\n",
2203 		    satype));
2204 		if (mp != NULL) {
2205 			if (mp->b_datap->db_type == M_PROTO) {
2206 				mp1 = mp;
2207 				mp = mp->b_cont;
2208 				freeb(mp1);
2209 			}
2210 			/*
2211 			 * Do a copymsg() because people who get
2212 			 * KEYSOCK_OUT_ERR may alter the message contents.
2213 			 */
2214 			mp1 = copymsg(mp);
2215 			if (mp1 == NULL) {
2216 				ks2dbg(keystack,
2217 				    ("keysock_passup: copymsg() failed.\n"));
2218 				mp1 = mp;
2219 				mp = NULL;
2220 			}
2221 			keysock_out_err(kc, err, mp1);
2222 		}
2223 	}
2224 
2225 	/*
2226 	 * XXX Blank the message somehow.  This is difficult because we don't
2227 	 * know at this point if the message has db_ref > 1, etc.
2228 	 *
2229 	 * Optimally, keysock messages containing actual keying material would
2230 	 * be allocated with esballoc(), with a zeroing free function.
2231 	 */
2232 	if (mp != NULL)
2233 		freemsg(mp);
2234 }
2235 
2236 /*
2237  * Keysock's read service procedure is there only for PF_KEY reply
2238  * messages that really need to reach the top.
2239  */
2240 static void
2241 keysock_rsrv(queue_t *q)
2242 {
2243 	mblk_t *mp;
2244 
2245 	while ((mp = getq(q)) != NULL) {
2246 		if (canputnext(q)) {
2247 			putnext(q, mp);
2248 		} else {
2249 			(void) putbq(q, mp);
2250 			return;
2251 		}
2252 	}
2253 }
2254 
2255 /*
2256  * The read procedure should only be invoked by a keysock consumer, like
2257  * ESP, AH, etc.  I should only see KEYSOCK_OUT and KEYSOCK_HELLO_ACK
2258  * messages on my read queues.
2259  */
2260 static void
2261 keysock_rput(queue_t *q, mblk_t *mp)
2262 {
2263 	keysock_consumer_t *kc = (keysock_consumer_t *)q->q_ptr;
2264 	ipsec_info_t *ii;
2265 	keysock_hello_ack_t *ksa;
2266 	minor_t serial;
2267 	mblk_t *mp1;
2268 	sadb_msg_t *samsg;
2269 	keysock_stack_t	*keystack = kc->kc_keystack;
2270 
2271 	/* Make sure I'm a consumer instance.  (i.e. something's below me) */
2272 	ASSERT(WR(q)->q_next != NULL);
2273 
2274 	if (mp->b_datap->db_type != M_CTL) {
2275 		/*
2276 		 * Keysock should only see keysock consumer interface
2277 		 * messages (see ipsec_info.h) on its read procedure.
2278 		 * To be robust, however, putnext() up so the STREAM head can
2279 		 * deal with it appropriately.
2280 		 */
2281 		ks1dbg(keystack,
2282 		    ("Hmmm, a non M_CTL (%d, 0x%x) on keysock_rput.\n",
2283 		    mp->b_datap->db_type, mp->b_datap->db_type));
2284 		putnext(q, mp);
2285 		return;
2286 	}
2287 
2288 	ii = (ipsec_info_t *)mp->b_rptr;
2289 
2290 	switch (ii->ipsec_info_type) {
2291 	case KEYSOCK_OUT:
2292 		/*
2293 		 * A consumer needs to pass a response message or an ACQUIRE
2294 		 * UP.  I assume that the consumer has done the right
2295 		 * thing w.r.t. message creation, etc.
2296 		 */
2297 		serial = ((keysock_out_t *)mp->b_rptr)->ks_out_serial;
2298 		mp1 = mp->b_cont;	/* Get M_DATA portion. */
2299 		freeb(mp);
2300 		samsg = (sadb_msg_t *)mp1->b_rptr;
2301 		if (samsg->sadb_msg_type == SADB_FLUSH ||
2302 		    (samsg->sadb_msg_type == SADB_DUMP &&
2303 		    samsg->sadb_msg_len == SADB_8TO64(sizeof (*samsg)))) {
2304 			/*
2305 			 * If I'm an end-of-FLUSH or an end-of-DUMP marker...
2306 			 */
2307 			ASSERT(keystack->keystack_flushdump != 0);
2308 						/* Am I flushing? */
2309 
2310 			mutex_enter(&kc->kc_lock);
2311 			kc->kc_flags &= ~KC_FLUSHING;
2312 			mutex_exit(&kc->kc_lock);
2313 
2314 			if (samsg->sadb_msg_errno != 0)
2315 				keystack->keystack_flushdump_errno =
2316 				    samsg->sadb_msg_errno;
2317 
2318 			/*
2319 			 * Lower the atomic "flushing" count.  If it's
2320 			 * the last one, send up the end-of-{FLUSH,DUMP} to
2321 			 * the appropriate PF_KEY socket.
2322 			 */
2323 			if (atomic_add_32_nv(&keystack->keystack_flushdump,
2324 			    -1) != 0) {
2325 				ks1dbg(keystack,
2326 				    ("One flush/dump message back from %d,"
2327 				    " more to go.\n", samsg->sadb_msg_satype));
2328 				freemsg(mp1);
2329 				return;
2330 			}
2331 
2332 			samsg->sadb_msg_errno =
2333 			    (uint8_t)keystack->keystack_flushdump_errno;
2334 			if (samsg->sadb_msg_type == SADB_DUMP) {
2335 				samsg->sadb_msg_seq = 0;
2336 			}
2337 		}
2338 		keysock_passup(mp1, samsg, serial, kc,
2339 		    (samsg->sadb_msg_type == SADB_DUMP), keystack);
2340 		return;
2341 	case KEYSOCK_HELLO_ACK:
2342 		/* Aha, now we can link in the consumer! */
2343 		ksa = (keysock_hello_ack_t *)ii;
2344 		keysock_link_consumer(ksa->ks_hello_satype, kc);
2345 		freemsg(mp);
2346 		return;
2347 	default:
2348 		ks1dbg(keystack, ("Hmmm, an IPsec info I'm not used to, 0x%x\n",
2349 		    ii->ipsec_info_type));
2350 		putnext(q, mp);
2351 	}
2352 }
2353 
2354 /*
2355  * So we can avoid external linking problems....
2356  */
2357 boolean_t
2358 keysock_extended_reg(netstack_t *ns)
2359 {
2360 	keysock_stack_t	*keystack = ns->netstack_keysock;
2361 
2362 	return (keystack->keystack_num_extended != 0);
2363 }
2364 
2365 uint32_t
2366 keysock_next_seq(netstack_t *ns)
2367 {
2368 	keysock_stack_t	*keystack = ns->netstack_keysock;
2369 
2370 	return (atomic_add_32_nv(&keystack->keystack_acquire_seq, -1));
2371 }
2372