xref: /illumos-gate/usr/src/uts/common/inet/ip/keysock.c (revision 4e93fb0f6383eaac21897dcdae56b87118131e4d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 
29 #include <sys/param.h>
30 #include <sys/types.h>
31 #include <sys/stream.h>
32 #include <sys/strsubr.h>
33 #include <sys/strsun.h>
34 #include <sys/stropts.h>
35 #include <sys/vnode.h>
36 #include <sys/zone.h>
37 #include <sys/strlog.h>
38 #include <sys/sysmacros.h>
39 #define	_SUN_TPI_VERSION 2
40 #include <sys/tihdr.h>
41 #include <sys/timod.h>
42 #include <sys/tiuser.h>
43 #include <sys/ddi.h>
44 #include <sys/sunddi.h>
45 #include <sys/sunldi.h>
46 #include <sys/file.h>
47 #include <sys/modctl.h>
48 #include <sys/debug.h>
49 #include <sys/kmem.h>
50 #include <sys/cmn_err.h>
51 #include <sys/proc.h>
52 #include <sys/suntpi.h>
53 #include <sys/atomic.h>
54 #include <sys/mkdev.h>
55 #include <sys/policy.h>
56 #include <sys/disp.h>
57 
58 #include <sys/socket.h>
59 #include <netinet/in.h>
60 #include <net/pfkeyv2.h>
61 
62 #include <inet/common.h>
63 #include <netinet/ip6.h>
64 #include <inet/ip.h>
65 #include <inet/mi.h>
66 #include <inet/nd.h>
67 #include <inet/optcom.h>
68 #include <inet/ipsec_info.h>
69 #include <inet/ipsec_impl.h>
70 #include <inet/keysock.h>
71 
72 #include <sys/isa_defs.h>
73 
74 /*
75  * This is a transport provider for the PF_KEY key mangement socket.
76  * (See RFC 2367 for details.)
77  * Downstream messages are wrapped in a keysock consumer interface KEYSOCK_IN
78  * messages (see ipsec_info.h), and passed to the appropriate consumer.
79  * Upstream messages are generated for all open PF_KEY sockets, when
80  * appropriate, as well as the sender (as long as SO_USELOOPBACK is enabled)
81  * in reply to downstream messages.
82  *
83  * Upstream messages must be created asynchronously for the following
84  * situations:
85  *
86  *	1.) A keysock consumer requires an SA, and there is currently none.
87  *	2.) An SA expires, either hard or soft lifetime.
88  *	3.) Other events a consumer deems fit.
89  *
90  * The MT model of this is PERMOD, with shared put procedures.  Two types of
91  * messages, SADB_FLUSH and SADB_DUMP, need to lock down the perimeter to send
92  * down the *multiple* messages they create.
93  */
94 
95 static vmem_t *keysock_vmem;		/* for minor numbers. */
96 
97 #define	KEYSOCK_MAX_CONSUMERS 256
98 
99 /* Default structure copied into T_INFO_ACK messages (from rts.c...) */
100 static struct T_info_ack keysock_g_t_info_ack = {
101 	T_INFO_ACK,
102 	T_INFINITE,	/* TSDU_size. Maximum size messages. */
103 	T_INVALID,	/* ETSDU_size. No expedited data. */
104 	T_INVALID,	/* CDATA_size. No connect data. */
105 	T_INVALID,	/* DDATA_size. No disconnect data. */
106 	0,		/* ADDR_size. */
107 	0,		/* OPT_size. No user-settable options */
108 	64 * 1024,	/* TIDU_size. keysock allows maximum size messages. */
109 	T_COTS,		/* SERV_type. keysock supports connection oriented. */
110 	TS_UNBND,	/* CURRENT_state. This is set from keysock_state. */
111 	(XPG4_1)	/* Provider flags */
112 };
113 
114 /* Named Dispatch Parameter Management Structure */
115 typedef struct keysockparam_s {
116 	uint_t	keysock_param_min;
117 	uint_t	keysock_param_max;
118 	uint_t	keysock_param_value;
119 	char	*keysock_param_name;
120 } keysockparam_t;
121 
122 /*
123  * Table of NDD variables supported by keysock. These are loaded into
124  * keysock_g_nd in keysock_init_nd.
125  * All of these are alterable, within the min/max values given, at run time.
126  */
127 static	keysockparam_t	lcl_param_arr[] = {
128 	/* min	max	value	name */
129 	{ 4096, 65536,	8192,	"keysock_xmit_hiwat"},
130 	{ 0,	65536,	1024,	"keysock_xmit_lowat"},
131 	{ 4096, 65536,	8192,	"keysock_recv_hiwat"},
132 	{ 65536, 1024*1024*1024, 256*1024,	"keysock_max_buf"},
133 	{ 0,	3,	0,	"keysock_debug"},
134 };
135 #define	keystack_xmit_hiwat	keystack_params[0].keysock_param_value
136 #define	keystack_xmit_lowat	keystack_params[1].keysock_param_value
137 #define	keystack_recv_hiwat	keystack_params[2].keysock_param_value
138 #define	keystack_max_buf	keystack_params[3].keysock_param_value
139 #define	keystack_debug	keystack_params[4].keysock_param_value
140 
141 #define	ks0dbg(a)	printf a
142 /* NOTE:  != 0 instead of > 0 so lint doesn't complain. */
143 #define	ks1dbg(keystack, a)	if (keystack->keystack_debug != 0) printf a
144 #define	ks2dbg(keystack, a)	if (keystack->keystack_debug > 1) printf a
145 #define	ks3dbg(keystack, a)	if (keystack->keystack_debug > 2) printf a
146 
147 static int keysock_close(queue_t *);
148 static int keysock_open(queue_t *, dev_t *, int, int, cred_t *);
149 static void keysock_wput(queue_t *, mblk_t *);
150 static void keysock_rput(queue_t *, mblk_t *);
151 static void keysock_rsrv(queue_t *);
152 static void keysock_passup(mblk_t *, sadb_msg_t *, minor_t,
153     keysock_consumer_t *, boolean_t, keysock_stack_t *);
154 static void *keysock_stack_init(netstackid_t stackid, netstack_t *ns);
155 static void keysock_stack_fini(netstackid_t stackid, void *arg);
156 
157 static struct module_info info = {
158 	5138, "keysock", 1, INFPSZ, 512, 128
159 };
160 
161 static struct qinit rinit = {
162 	(pfi_t)keysock_rput, (pfi_t)keysock_rsrv, keysock_open, keysock_close,
163 	NULL, &info
164 };
165 
166 static struct qinit winit = {
167 	(pfi_t)keysock_wput, NULL, NULL, NULL, NULL, &info
168 };
169 
170 struct streamtab keysockinfo = {
171 	&rinit, &winit
172 };
173 
174 extern struct modlinkage *keysock_modlp;
175 
176 /*
177  * Plumb IPsec.
178  *
179  * NOTE:  New "default" modules will need to be loaded here if needed before
180  *	  boot time.
181  */
182 
183 /* Keep these in global space to keep the lint from complaining. */
184 static char *IPSECESP = "ipsecesp";
185 static char *IPSECESPDEV = "/devices/pseudo/ipsecesp@0:ipsecesp";
186 static char *IPSECAH = "ipsecah";
187 static char *IPSECAHDEV = "/devices/pseudo/ipsecah@0:ipsecah";
188 static char *IP6DEV = "/devices/pseudo/ip6@0:ip6";
189 static char *KEYSOCK = "keysock";
190 static char *STRMOD = "strmod";
191 
192 /*
193  * Load the other ipsec modules and plumb them together.
194  */
195 int
196 keysock_plumb_ipsec(netstack_t *ns)
197 {
198 	ldi_handle_t	lh, ip6_lh = NULL;
199 	ldi_ident_t	li = NULL;
200 	int		err = 0;
201 	int		muxid, rval;
202 	boolean_t	esp_present = B_TRUE;
203 	cred_t		*cr;
204 	keysock_stack_t *keystack = ns->netstack_keysock;
205 
206 #ifdef NS_DEBUG
207 	(void) printf("keysock_plumb_ipsec(%d)\n",
208 	    ns->netstack_stackid);
209 #endif
210 
211 	keystack->keystack_plumbed = 0;	/* we're trying again.. */
212 
213 	cr = zone_get_kcred(netstackid_to_zoneid(
214 		keystack->keystack_netstack->netstack_stackid));
215 	ASSERT(cr != NULL);
216 	/*
217 	 * Load up the drivers (AH/ESP).
218 	 *
219 	 * I do this separately from the actual plumbing in case this function
220 	 * ever gets called from a diskless boot before the root filesystem is
221 	 * up.  I don't have to worry about "keysock" because, well, if I'm
222 	 * here, keysock must've loaded successfully.
223 	 */
224 	if (i_ddi_attach_pseudo_node(IPSECAH) == NULL) {
225 		ks0dbg(("IPsec:  AH failed to attach.\n"));
226 		goto bail;
227 	}
228 	if (i_ddi_attach_pseudo_node(IPSECESP) == NULL) {
229 		ks0dbg(("IPsec:  ESP failed to attach.\n"));
230 		esp_present = B_FALSE;
231 	}
232 
233 	/*
234 	 * Set up the IP streams for AH and ESP, as well as tacking keysock
235 	 * on top of them.  Assume keysock has set the autopushes up already.
236 	 */
237 
238 	/* Open IP. */
239 	err = ldi_ident_from_mod(keysock_modlp, &li);
240 	if (err) {
241 		ks0dbg(("IPsec:  lid_ident_from_mod failed (err %d).\n",
242 		    err));
243 		goto bail;
244 	}
245 
246 	err = ldi_open_by_name(IP6DEV, FREAD|FWRITE, cr, &ip6_lh, li);
247 	if (err) {
248 		ks0dbg(("IPsec:  Open of IP6 failed (err %d).\n", err));
249 		goto bail;
250 	}
251 
252 	/* PLINK KEYSOCK/AH */
253 	err = ldi_open_by_name(IPSECAHDEV, FREAD|FWRITE, cr, &lh, li);
254 	if (err) {
255 		ks0dbg(("IPsec:  Open of AH failed (err %d).\n", err));
256 		goto bail;
257 	}
258 	err = ldi_ioctl(lh,
259 	    I_PUSH, (intptr_t)KEYSOCK, FKIOCTL, cr, &rval);
260 	if (err) {
261 		ks0dbg(("IPsec:  Push of KEYSOCK onto AH failed (err %d).\n",
262 		    err));
263 		(void) ldi_close(lh, FREAD|FWRITE, cr);
264 		goto bail;
265 	}
266 	err = ldi_ioctl(ip6_lh, I_PLINK, (intptr_t)lh,
267 			FREAD+FWRITE+FNOCTTY+FKIOCTL, cr, &muxid);
268 	if (err) {
269 		ks0dbg(("IPsec:  PLINK of KEYSOCK/AH failed (err %d).\n", err));
270 		(void) ldi_close(lh, FREAD|FWRITE, cr);
271 		goto bail;
272 	}
273 	(void) ldi_close(lh, FREAD|FWRITE, cr);
274 
275 	/* PLINK KEYSOCK/ESP */
276 	if (esp_present) {
277 		err = ldi_open_by_name(IPSECESPDEV,
278 		    FREAD|FWRITE, cr, &lh, li);
279 		if (err) {
280 			ks0dbg(("IPsec:  Open of ESP failed (err %d).\n", err));
281 			goto bail;
282 		}
283 		err = ldi_ioctl(lh,
284 		    I_PUSH, (intptr_t)KEYSOCK, FKIOCTL, cr, &rval);
285 		if (err) {
286 			ks0dbg(("IPsec:  "
287 			    "Push of KEYSOCK onto ESP failed (err %d).\n",
288 			    err));
289 			(void) ldi_close(lh, FREAD|FWRITE, cr);
290 			goto bail;
291 		}
292 		err = ldi_ioctl(ip6_lh, I_PLINK, (intptr_t)lh,
293 				FREAD+FWRITE+FNOCTTY+FKIOCTL, cr, &muxid);
294 		if (err) {
295 			ks0dbg(("IPsec:  "
296 			    "PLINK of KEYSOCK/ESP failed (err %d).\n", err));
297 			(void) ldi_close(lh, FREAD|FWRITE, cr);
298 			goto bail;
299 		}
300 		(void) ldi_close(lh, FREAD|FWRITE, cr);
301 	}
302 
303 bail:
304 	keystack->keystack_plumbed = (err == 0) ? 1 : -1;
305 	if (ip6_lh != NULL) {
306 		(void) ldi_close(ip6_lh, FREAD|FWRITE, cr);
307 	}
308 	if (li != NULL)
309 		ldi_ident_release(li);
310 #ifdef NS_DEBUG
311 	(void) printf("keysock_plumb_ipsec -> %d\n",
312 	    keystack->keystack_plumbed);
313 #endif
314 	crfree(cr);
315 	return (err);
316 }
317 
318 /* ARGSUSED */
319 static int
320 keysock_param_get(q, mp, cp, cr)
321 	queue_t	*q;
322 	mblk_t	*mp;
323 	caddr_t	cp;
324 	cred_t *cr;
325 {
326 	keysockparam_t	*keysockpa = (keysockparam_t *)cp;
327 	uint_t value;
328 	keysock_t *ks = (keysock_t *)q->q_ptr;
329 	keysock_stack_t	*keystack = ks->keysock_keystack;
330 
331 	mutex_enter(&keystack->keystack_param_lock);
332 	value = keysockpa->keysock_param_value;
333 	mutex_exit(&keystack->keystack_param_lock);
334 
335 	(void) mi_mpprintf(mp, "%u", value);
336 	return (0);
337 }
338 
339 /* This routine sets an NDD variable in a keysockparam_t structure. */
340 /* ARGSUSED */
341 static int
342 keysock_param_set(q, mp, value, cp, cr)
343 	queue_t	*q;
344 	mblk_t	*mp;
345 	char	*value;
346 	caddr_t	cp;
347 	cred_t *cr;
348 {
349 	ulong_t	new_value;
350 	keysockparam_t	*keysockpa = (keysockparam_t *)cp;
351 	keysock_t *ks = (keysock_t *)q->q_ptr;
352 	keysock_stack_t	*keystack = ks->keysock_keystack;
353 
354 	/* Convert the value from a string into a long integer. */
355 	if (ddi_strtoul(value, NULL, 10, &new_value) != 0)
356 		return (EINVAL);
357 
358 	mutex_enter(&keystack->keystack_param_lock);
359 	/*
360 	 * Fail the request if the new value does not lie within the
361 	 * required bounds.
362 	 */
363 	if (new_value < keysockpa->keysock_param_min ||
364 	    new_value > keysockpa->keysock_param_max) {
365 		mutex_exit(&keystack->keystack_param_lock);
366 		return (EINVAL);
367 	}
368 
369 	/* Set the new value */
370 	keysockpa->keysock_param_value = new_value;
371 	mutex_exit(&keystack->keystack_param_lock);
372 
373 	return (0);
374 }
375 
376 /*
377  * Initialize keysock at module load time
378  */
379 boolean_t
380 keysock_ddi_init(void)
381 {
382 	keysock_max_optsize = optcom_max_optsize(
383 	    keysock_opt_obj.odb_opt_des_arr, keysock_opt_obj.odb_opt_arr_cnt);
384 
385 	keysock_vmem = vmem_create("keysock", (void *)1, MAXMIN, 1,
386 	    NULL, NULL, NULL, 1, VM_SLEEP | VMC_IDENTIFIER);
387 
388 	/*
389 	 * We want to be informed each time a stack is created or
390 	 * destroyed in the kernel, so we can maintain the
391 	 * set of keysock_stack_t's.
392 	 */
393 	netstack_register(NS_KEYSOCK, keysock_stack_init, NULL,
394 	    keysock_stack_fini);
395 
396 	return (B_TRUE);
397 }
398 
399 /*
400  * Walk through the param array specified registering each element with the
401  * named dispatch handler.
402  */
403 static boolean_t
404 keysock_param_register(IDP *ndp, keysockparam_t *ksp, int cnt)
405 {
406 	for (; cnt-- > 0; ksp++) {
407 		if (ksp->keysock_param_name != NULL &&
408 		    ksp->keysock_param_name[0]) {
409 			if (!nd_load(ndp,
410 			    ksp->keysock_param_name,
411 			    keysock_param_get, keysock_param_set,
412 			    (caddr_t)ksp)) {
413 				nd_free(ndp);
414 				return (B_FALSE);
415 			}
416 		}
417 	}
418 	return (B_TRUE);
419 }
420 
421 /*
422  * Initialize keysock for one stack instance
423  */
424 /* ARGSUSED */
425 static void *
426 keysock_stack_init(netstackid_t stackid, netstack_t *ns)
427 {
428 	keysock_stack_t	*keystack;
429 	keysockparam_t *ksp;
430 
431 	keystack = (keysock_stack_t *)kmem_zalloc(sizeof (*keystack), KM_SLEEP);
432 	keystack->keystack_netstack = ns;
433 
434 	keystack->keystack_acquire_seq = 0xffffffff;
435 
436 	ksp = (keysockparam_t *)kmem_alloc(sizeof (lcl_param_arr), KM_SLEEP);
437 	keystack->keystack_params = ksp;
438 	bcopy(lcl_param_arr, ksp, sizeof (lcl_param_arr));
439 
440 	(void) keysock_param_register(&keystack->keystack_g_nd, ksp,
441 	    A_CNT(lcl_param_arr));
442 
443 	mutex_init(&keystack->keystack_list_lock, NULL, MUTEX_DEFAULT, NULL);
444 	mutex_init(&keystack->keystack_consumers_lock,
445 	    NULL, MUTEX_DEFAULT, NULL);
446 	mutex_init(&keystack->keystack_param_lock, NULL, MUTEX_DEFAULT, NULL);
447 	return (keystack);
448 }
449 
450 /*
451  * Free NDD variable space, and other destructors, for keysock.
452  */
453 void
454 keysock_ddi_destroy(void)
455 {
456 	netstack_unregister(NS_KEYSOCK);
457 	vmem_destroy(keysock_vmem);
458 }
459 
460 /*
461  * Remove one stack instance from keysock
462  */
463 /* ARGSUSED */
464 static void
465 keysock_stack_fini(netstackid_t stackid, void *arg)
466 {
467 	keysock_stack_t *keystack = (keysock_stack_t *)arg;
468 
469 	nd_free(&keystack->keystack_g_nd);
470 	kmem_free(keystack->keystack_params, sizeof (lcl_param_arr));
471 	keystack->keystack_params = NULL;
472 
473 	mutex_destroy(&keystack->keystack_list_lock);
474 	mutex_destroy(&keystack->keystack_consumers_lock);
475 	mutex_destroy(&keystack->keystack_param_lock);
476 
477 	kmem_free(keystack, sizeof (*keystack));
478 }
479 
480 /*
481  * Close routine for keysock.
482  */
483 static int
484 keysock_close(queue_t *q)
485 {
486 	keysock_t *ks;
487 	keysock_consumer_t *kc;
488 	void *ptr = q->q_ptr;
489 	int size;
490 	keysock_stack_t	*keystack;
491 
492 
493 	qprocsoff(q);
494 
495 	/* Safe assumption. */
496 	ASSERT(ptr != NULL);
497 
498 	if (WR(q)->q_next) {
499 		kc = (keysock_consumer_t *)ptr;
500 		keystack = kc->kc_keystack;
501 
502 		ks1dbg(keystack, ("Module close, removing a consumer (%d).\n",
503 		    kc->kc_sa_type));
504 		/*
505 		 * Because of PERMOD open/close exclusive perimeter, I
506 		 * can inspect KC_FLUSHING w/o locking down kc->kc_lock.
507 		 */
508 		if (kc->kc_flags & KC_FLUSHING) {
509 			/*
510 			 * If this decrement was the last one, send
511 			 * down the next pending one, if any.
512 			 *
513 			 * With a PERMOD perimeter, the mutexes ops aren't
514 			 * really necessary, but if we ever loosen up, we will
515 			 * have this bit covered already.
516 			 */
517 			keystack->keystack_flushdump--;
518 			if (keystack->keystack_flushdump == 0) {
519 				/*
520 				 * The flush/dump terminated by having a
521 				 * consumer go away.  I need to send up to the
522 				 * appropriate keysock all of the relevant
523 				 * information.  Unfortunately, I don't
524 				 * have that handy.
525 				 */
526 				ks0dbg(("Consumer went away while flushing or"
527 				    " dumping.\n"));
528 			}
529 		}
530 		size = sizeof (keysock_consumer_t);
531 		mutex_enter(&keystack->keystack_consumers_lock);
532 		keystack->keystack_consumers[kc->kc_sa_type] = NULL;
533 		mutex_exit(&keystack->keystack_consumers_lock);
534 		mutex_destroy(&kc->kc_lock);
535 		netstack_rele(kc->kc_keystack->keystack_netstack);
536 	} else {
537 		ks = (keysock_t *)ptr;
538 		keystack = ks->keysock_keystack;
539 
540 		ks3dbg(keystack,
541 		    ("Driver close, PF_KEY socket is going away.\n"));
542 		if ((ks->keysock_flags & KEYSOCK_EXTENDED) != 0)
543 			atomic_add_32(&keystack->keystack_num_extended, -1);
544 		size = sizeof (keysock_t);
545 		mutex_enter(&keystack->keystack_list_lock);
546 		*(ks->keysock_ptpn) = ks->keysock_next;
547 		if (ks->keysock_next != NULL)
548 			ks->keysock_next->keysock_ptpn = ks->keysock_ptpn;
549 		mutex_exit(&keystack->keystack_list_lock);
550 		mutex_destroy(&ks->keysock_lock);
551 		vmem_free(keysock_vmem, (void *)(uintptr_t)ks->keysock_serial,
552 		    1);
553 		netstack_rele(ks->keysock_keystack->keystack_netstack);
554 	}
555 
556 	/* Now I'm free. */
557 	kmem_free(ptr, size);
558 	return (0);
559 }
560 /*
561  * Open routine for keysock.
562  */
563 /* ARGSUSED */
564 static int
565 keysock_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
566 {
567 	keysock_t *ks;
568 	keysock_consumer_t *kc;
569 	mblk_t *mp;
570 	ipsec_info_t *ii;
571 	netstack_t *ns;
572 	keysock_stack_t *keystack;
573 
574 	if (secpolicy_ip_config(credp, B_FALSE) != 0) {
575 		/* Privilege debugging will log the error */
576 		return (EPERM);
577 	}
578 
579 	if (q->q_ptr != NULL)
580 		return (0);  /* Re-open of an already open instance. */
581 
582 	ns = netstack_find_by_cred(credp);
583 	ASSERT(ns != NULL);
584 	keystack = ns->netstack_keysock;
585 	ASSERT(keystack != NULL);
586 
587 	ks3dbg(keystack, ("Entering keysock open.\n"));
588 
589 	if (keystack->keystack_plumbed < 1) {
590 		netstack_t *ns = keystack->keystack_netstack;
591 
592 		keystack->keystack_plumbed = 0;
593 #ifdef NS_DEBUG
594 		printf("keysock_open(%d) - plumb\n",
595 		    keystack->keystack_netstack->netstack_stackid);
596 #endif
597 		/*
598 		 * Don't worry about ipsec_failure being true here.
599 		 * (See ip.c).  An open of keysock should try and force
600 		 * the issue.  Maybe it was a transient failure.
601 		 */
602 		ipsec_loader_loadnow(ns->netstack_ipsec);
603 	}
604 
605 	if (sflag & MODOPEN) {
606 		/* Initialize keysock_consumer state here. */
607 		kc = kmem_zalloc(sizeof (keysock_consumer_t), KM_NOSLEEP);
608 		if (kc == NULL) {
609 			netstack_rele(keystack->keystack_netstack);
610 			return (ENOMEM);
611 		}
612 		mutex_init(&kc->kc_lock, NULL, MUTEX_DEFAULT, 0);
613 		kc->kc_rq = q;
614 		kc->kc_wq = WR(q);
615 
616 		q->q_ptr = kc;
617 		WR(q)->q_ptr = kc;
618 
619 		kc->kc_keystack = keystack;
620 		qprocson(q);
621 
622 		/*
623 		 * Send down initial message to whatever I was pushed on top
624 		 * of asking for its consumer type.  The reply will set it.
625 		 */
626 
627 		/* Allocate it. */
628 		mp = allocb(sizeof (ipsec_info_t), BPRI_HI);
629 		if (mp == NULL) {
630 			ks1dbg(keystack, (
631 			    "keysock_open:  Cannot allocate KEYSOCK_HELLO.\n"));
632 			/* Do I need to set these to null? */
633 			q->q_ptr = NULL;
634 			WR(q)->q_ptr = NULL;
635 			mutex_destroy(&kc->kc_lock);
636 			kmem_free(kc, sizeof (*kc));
637 			netstack_rele(keystack->keystack_netstack);
638 			return (ENOMEM);
639 		}
640 
641 		/* If I allocated okay, putnext to what I was pushed atop. */
642 		mp->b_wptr += sizeof (ipsec_info_t);
643 		mp->b_datap->db_type = M_CTL;
644 		ii = (ipsec_info_t *)mp->b_rptr;
645 		ii->ipsec_info_type = KEYSOCK_HELLO;
646 		/* Length only of type/len. */
647 		ii->ipsec_info_len = sizeof (ii->ipsec_allu);
648 		ks2dbg(keystack, ("Ready to putnext KEYSOCK_HELLO.\n"));
649 		putnext(kc->kc_wq, mp);
650 	} else {
651 		minor_t ksminor;
652 
653 		/* Initialize keysock state. */
654 
655 		ks2dbg(keystack, ("Made it into PF_KEY socket open.\n"));
656 
657 		ksminor = (minor_t)(uintptr_t)
658 		    vmem_alloc(keysock_vmem, 1, VM_NOSLEEP);
659 		if (ksminor == 0) {
660 			netstack_rele(keystack->keystack_netstack);
661 			return (ENOMEM);
662 		}
663 		ks = kmem_zalloc(sizeof (keysock_t), KM_NOSLEEP);
664 		if (ks == NULL) {
665 			vmem_free(keysock_vmem, (void *)(uintptr_t)ksminor, 1);
666 			netstack_rele(keystack->keystack_netstack);
667 			return (ENOMEM);
668 		}
669 
670 		mutex_init(&ks->keysock_lock, NULL, MUTEX_DEFAULT, 0);
671 		ks->keysock_rq = q;
672 		ks->keysock_wq = WR(q);
673 		ks->keysock_state = TS_UNBND;
674 		ks->keysock_serial = ksminor;
675 
676 		q->q_ptr = ks;
677 		WR(q)->q_ptr = ks;
678 		ks->keysock_keystack = keystack;
679 
680 		/*
681 		 * The receive hiwat is only looked at on the stream head
682 		 * queue.  Store in q_hiwat in order to return on SO_RCVBUF
683 		 * getsockopts.
684 		 */
685 
686 		q->q_hiwat = keystack->keystack_recv_hiwat;
687 
688 		/*
689 		 * The transmit hiwat/lowat is only looked at on IP's queue.
690 		 * Store in q_hiwat/q_lowat in order to return on
691 		 * SO_SNDBUF/SO_SNDLOWAT getsockopts.
692 		 */
693 
694 		WR(q)->q_hiwat = keystack->keystack_xmit_hiwat;
695 		WR(q)->q_lowat = keystack->keystack_xmit_lowat;
696 
697 		*devp = makedevice(getmajor(*devp), ksminor);
698 
699 		/*
700 		 * Thread keysock into the global keysock list.
701 		 */
702 		mutex_enter(&keystack->keystack_list_lock);
703 		ks->keysock_next = keystack->keystack_list;
704 		ks->keysock_ptpn = &keystack->keystack_list;
705 		if (keystack->keystack_list != NULL) {
706 			keystack->keystack_list->keysock_ptpn =
707 			    &ks->keysock_next;
708 		}
709 		keystack->keystack_list = ks;
710 		mutex_exit(&keystack->keystack_list_lock);
711 
712 		qprocson(q);
713 		(void) mi_set_sth_hiwat(q, keystack->keystack_recv_hiwat);
714 		/*
715 		 * Wait outside the keysock module perimeter for IPsec
716 		 * plumbing to be completed.  If it fails, keysock_close()
717 		 * undoes everything we just did.
718 		 */
719 		if (!ipsec_loader_wait(q,
720 		    keystack->keystack_netstack->netstack_ipsec)) {
721 			(void) keysock_close(q);
722 			return (EPFNOSUPPORT);
723 		}
724 	}
725 
726 	return (0);
727 }
728 
729 /* BELOW THIS LINE ARE ROUTINES INCLUDING AND RELATED TO keysock_wput(). */
730 
731 /*
732  * Copy relevant state bits.
733  */
734 static void
735 keysock_copy_info(struct T_info_ack *tap, keysock_t *ks)
736 {
737 	*tap = keysock_g_t_info_ack;
738 	tap->CURRENT_state = ks->keysock_state;
739 	tap->OPT_size = keysock_max_optsize;
740 }
741 
742 /*
743  * This routine responds to T_CAPABILITY_REQ messages.  It is called by
744  * keysock_wput.  Much of the T_CAPABILITY_ACK information is copied from
745  * keysock_g_t_info_ack.  The current state of the stream is copied from
746  * keysock_state.
747  */
748 static void
749 keysock_capability_req(queue_t *q, mblk_t *mp)
750 {
751 	keysock_t *ks = (keysock_t *)q->q_ptr;
752 	t_uscalar_t cap_bits1;
753 	struct T_capability_ack	*tcap;
754 
755 	cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
756 
757 	mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
758 		mp->b_datap->db_type, T_CAPABILITY_ACK);
759 	if (mp == NULL)
760 		return;
761 
762 	tcap = (struct T_capability_ack *)mp->b_rptr;
763 	tcap->CAP_bits1 = 0;
764 
765 	if (cap_bits1 & TC1_INFO) {
766 		keysock_copy_info(&tcap->INFO_ack, ks);
767 		tcap->CAP_bits1 |= TC1_INFO;
768 	}
769 
770 	qreply(q, mp);
771 }
772 
773 /*
774  * This routine responds to T_INFO_REQ messages. It is called by
775  * keysock_wput_other.
776  * Most of the T_INFO_ACK information is copied from keysock_g_t_info_ack.
777  * The current state of the stream is copied from keysock_state.
778  */
779 static void
780 keysock_info_req(q, mp)
781 	queue_t	*q;
782 	mblk_t	*mp;
783 {
784 	mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO,
785 	    T_INFO_ACK);
786 	if (mp == NULL)
787 		return;
788 	keysock_copy_info((struct T_info_ack *)mp->b_rptr,
789 	    (keysock_t *)q->q_ptr);
790 	qreply(q, mp);
791 }
792 
793 /*
794  * keysock_err_ack. This routine creates a
795  * T_ERROR_ACK message and passes it
796  * upstream.
797  */
798 static void
799 keysock_err_ack(q, mp, t_error, sys_error)
800 	queue_t	*q;
801 	mblk_t	*mp;
802 	int	t_error;
803 	int	sys_error;
804 {
805 	if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL)
806 		qreply(q, mp);
807 }
808 
809 /*
810  * This routine retrieves the current status of socket options.
811  * It returns the size of the option retrieved.
812  */
813 /* ARGSUSED */
814 int
815 keysock_opt_get(queue_t *q, int level, int name, uchar_t *ptr)
816 {
817 	int *i1 = (int *)ptr;
818 	keysock_t *ks = (keysock_t *)q->q_ptr;
819 
820 	switch (level) {
821 	case SOL_SOCKET:
822 		mutex_enter(&ks->keysock_lock);
823 		switch (name) {
824 		case SO_TYPE:
825 			*i1 = SOCK_RAW;
826 			break;
827 		case SO_USELOOPBACK:
828 			*i1 = (int)(!((ks->keysock_flags & KEYSOCK_NOLOOP) ==
829 			    KEYSOCK_NOLOOP));
830 			break;
831 		/*
832 		 * The following two items can be manipulated,
833 		 * but changing them should do nothing.
834 		 */
835 		case SO_SNDBUF:
836 			*i1 = (int)q->q_hiwat;
837 			break;
838 		case SO_RCVBUF:
839 			*i1 = (int)(RD(q)->q_hiwat);
840 			break;
841 		}
842 		mutex_exit(&ks->keysock_lock);
843 		break;
844 	default:
845 		return (0);
846 	}
847 	return (sizeof (int));
848 }
849 
850 /*
851  * This routine sets socket options.
852  */
853 /* ARGSUSED */
854 int
855 keysock_opt_set(queue_t *q, uint_t mgmt_flags, int level,
856     int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp,
857     uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk)
858 {
859 	int *i1 = (int *)invalp;
860 	keysock_t *ks = (keysock_t *)q->q_ptr;
861 	keysock_stack_t	*keystack = ks->keysock_keystack;
862 
863 	switch (level) {
864 	case SOL_SOCKET:
865 		mutex_enter(&ks->keysock_lock);
866 		switch (name) {
867 		case SO_USELOOPBACK:
868 			if (!(*i1))
869 				ks->keysock_flags |= KEYSOCK_NOLOOP;
870 			else ks->keysock_flags &= ~KEYSOCK_NOLOOP;
871 			break;
872 		case SO_SNDBUF:
873 			if (*i1 > keystack->keystack_max_buf)
874 				return (ENOBUFS);
875 			q->q_hiwat = *i1;
876 			break;
877 		case SO_RCVBUF:
878 			if (*i1 > keystack->keystack_max_buf)
879 				return (ENOBUFS);
880 			RD(q)->q_hiwat = *i1;
881 			(void) mi_set_sth_hiwat(RD(q), *i1);
882 			break;
883 		}
884 		mutex_exit(&ks->keysock_lock);
885 		break;
886 	}
887 	return (0);
888 }
889 
890 /*
891  * Handle STREAMS messages.
892  */
893 static void
894 keysock_wput_other(queue_t *q, mblk_t *mp)
895 {
896 	struct iocblk *iocp;
897 	int error;
898 	keysock_t *ks = (keysock_t *)q->q_ptr;
899 	keysock_stack_t	*keystack = ks->keysock_keystack;
900 	cred_t		*cr;
901 
902 	switch (mp->b_datap->db_type) {
903 	case M_PROTO:
904 	case M_PCPROTO:
905 		if ((mp->b_wptr - mp->b_rptr) < sizeof (long)) {
906 			ks3dbg(keystack, (
907 			    "keysock_wput_other: Not big enough M_PROTO\n"));
908 			freemsg(mp);
909 			return;
910 		}
911 		cr = zone_get_kcred(netstackid_to_zoneid(
912 			keystack->keystack_netstack->netstack_stackid));
913 		ASSERT(cr != NULL);
914 
915 		switch (((union T_primitives *)mp->b_rptr)->type) {
916 		case T_CAPABILITY_REQ:
917 			keysock_capability_req(q, mp);
918 			break;
919 		case T_INFO_REQ:
920 			keysock_info_req(q, mp);
921 			break;
922 		case T_SVR4_OPTMGMT_REQ:
923 			(void) svr4_optcom_req(q, mp, DB_CREDDEF(mp, cr),
924 			    &keysock_opt_obj);
925 			break;
926 		case T_OPTMGMT_REQ:
927 			(void) tpi_optcom_req(q, mp, DB_CREDDEF(mp, cr),
928 			    &keysock_opt_obj);
929 			break;
930 		case T_DATA_REQ:
931 		case T_EXDATA_REQ:
932 		case T_ORDREL_REQ:
933 			/* Illegal for keysock. */
934 			freemsg(mp);
935 			(void) putnextctl1(RD(q), M_ERROR, EPROTO);
936 			break;
937 		default:
938 			/* Not supported by keysock. */
939 			keysock_err_ack(q, mp, TNOTSUPPORT, 0);
940 			break;
941 		}
942 		crfree(cr);
943 		return;
944 	case M_IOCTL:
945 		iocp = (struct iocblk *)mp->b_rptr;
946 		error = EINVAL;
947 
948 		switch (iocp->ioc_cmd) {
949 		case ND_SET:
950 		case ND_GET:
951 			if (nd_getset(q, keystack->keystack_g_nd, mp)) {
952 				qreply(q, mp);
953 				return;
954 			} else
955 				error = ENOENT;
956 			/* FALLTHRU */
957 		default:
958 			miocnak(q, mp, 0, error);
959 			return;
960 		}
961 	case M_FLUSH:
962 		if (*mp->b_rptr & FLUSHW) {
963 			flushq(q, FLUSHALL);
964 			*mp->b_rptr &= ~FLUSHW;
965 		}
966 		if (*mp->b_rptr & FLUSHR) {
967 			qreply(q, mp);
968 			return;
969 		}
970 		/* Else FALLTHRU */
971 	}
972 
973 	/* If fell through, just black-hole the message. */
974 	freemsg(mp);
975 }
976 
977 /*
978  * Transmit a PF_KEY error message to the instance either pointed to
979  * by ks, the instance with serial number serial, or more, depending.
980  *
981  * The faulty message (or a reasonable facsimile thereof) is in mp.
982  * This function will free mp or recycle it for delivery, thereby causing
983  * the stream head to free it.
984  */
985 static void
986 keysock_error(keysock_t *ks, mblk_t *mp, int error, int diagnostic)
987 {
988 	sadb_msg_t *samsg = (sadb_msg_t *)mp->b_rptr;
989 	keysock_stack_t	*keystack = ks->keysock_keystack;
990 
991 	ASSERT(mp->b_datap->db_type == M_DATA);
992 
993 	if (samsg->sadb_msg_type < SADB_GETSPI ||
994 	    samsg->sadb_msg_type > SADB_MAX)
995 		samsg->sadb_msg_type = SADB_RESERVED;
996 
997 	/*
998 	 * Strip out extension headers.
999 	 */
1000 	ASSERT(mp->b_rptr + sizeof (*samsg) <= mp->b_datap->db_lim);
1001 	mp->b_wptr = mp->b_rptr + sizeof (*samsg);
1002 	samsg->sadb_msg_len = SADB_8TO64(sizeof (sadb_msg_t));
1003 	samsg->sadb_msg_errno = (uint8_t)error;
1004 	samsg->sadb_x_msg_diagnostic = (uint16_t)diagnostic;
1005 
1006 	keysock_passup(mp, samsg, ks->keysock_serial, NULL, B_FALSE, keystack);
1007 }
1008 
1009 /*
1010  * Pass down a message to a consumer.  Wrap it in KEYSOCK_IN, and copy
1011  * in the extv if passed in.
1012  */
1013 static void
1014 keysock_passdown(keysock_t *ks, mblk_t *mp, uint8_t satype, sadb_ext_t *extv[],
1015     boolean_t flushmsg)
1016 {
1017 	keysock_consumer_t *kc;
1018 	mblk_t *wrapper;
1019 	keysock_in_t *ksi;
1020 	int i;
1021 	keysock_stack_t	*keystack = ks->keysock_keystack;
1022 
1023 	wrapper = allocb(sizeof (ipsec_info_t), BPRI_HI);
1024 	if (wrapper == NULL) {
1025 		ks3dbg(keystack, ("keysock_passdown: allocb failed.\n"));
1026 		if (extv[SADB_EXT_KEY_ENCRYPT] != NULL)
1027 			bzero(extv[SADB_EXT_KEY_ENCRYPT],
1028 			    SADB_64TO8(
1029 				extv[SADB_EXT_KEY_ENCRYPT]->sadb_ext_len));
1030 		if (extv[SADB_EXT_KEY_AUTH] != NULL)
1031 			bzero(extv[SADB_EXT_KEY_AUTH],
1032 			    SADB_64TO8(
1033 				extv[SADB_EXT_KEY_AUTH]->sadb_ext_len));
1034 		if (flushmsg) {
1035 			ks0dbg((
1036 			    "keysock: Downwards flush/dump message failed!\n"));
1037 			/* If this is true, I hold the perimeter. */
1038 			keystack->keystack_flushdump--;
1039 		}
1040 		freemsg(mp);
1041 		return;
1042 	}
1043 
1044 	wrapper->b_datap->db_type = M_CTL;
1045 	ksi = (keysock_in_t *)wrapper->b_rptr;
1046 	ksi->ks_in_type = KEYSOCK_IN;
1047 	ksi->ks_in_len = sizeof (keysock_in_t);
1048 	if (extv[SADB_EXT_ADDRESS_SRC] != NULL)
1049 		ksi->ks_in_srctype = KS_IN_ADDR_UNKNOWN;
1050 	else ksi->ks_in_srctype = KS_IN_ADDR_NOTTHERE;
1051 	if (extv[SADB_EXT_ADDRESS_DST] != NULL)
1052 		ksi->ks_in_dsttype = KS_IN_ADDR_UNKNOWN;
1053 	else ksi->ks_in_dsttype = KS_IN_ADDR_NOTTHERE;
1054 	for (i = 0; i <= SADB_EXT_MAX; i++)
1055 		ksi->ks_in_extv[i] = extv[i];
1056 	ksi->ks_in_serial = ks->keysock_serial;
1057 	wrapper->b_wptr += sizeof (ipsec_info_t);
1058 	wrapper->b_cont = mp;
1059 
1060 	/*
1061 	 * Find the appropriate consumer where the message is passed down.
1062 	 */
1063 	kc = keystack->keystack_consumers[satype];
1064 	if (kc == NULL) {
1065 		freeb(wrapper);
1066 		keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_UNKNOWN_SATYPE);
1067 		if (flushmsg) {
1068 			ks0dbg((
1069 			    "keysock: Downwards flush/dump message failed!\n"));
1070 			/* If this is true, I hold the perimeter. */
1071 			keystack->keystack_flushdump--;
1072 		}
1073 		return;
1074 	}
1075 
1076 	/*
1077 	 * NOTE: There used to be code in here to spin while a flush or
1078 	 *	 dump finished.  Keysock now assumes that consumers have enough
1079 	 *	 MT-savviness to deal with that.
1080 	 */
1081 
1082 	/*
1083 	 * Current consumers (AH and ESP) are guaranteed to return a
1084 	 * FLUSH or DUMP message back, so when we reach here, we don't
1085 	 * have to worry about keysock_flushdumps.
1086 	 */
1087 
1088 	putnext(kc->kc_wq, wrapper);
1089 }
1090 
1091 /*
1092  * High-level reality checking of extensions.
1093  */
1094 static boolean_t
1095 ext_check(sadb_ext_t *ext, keysock_stack_t *keystack)
1096 {
1097 	int i;
1098 	uint64_t *lp;
1099 	sadb_ident_t *id;
1100 	char *idstr;
1101 
1102 	switch (ext->sadb_ext_type) {
1103 	case SADB_EXT_ADDRESS_SRC:
1104 	case SADB_EXT_ADDRESS_DST:
1105 	case SADB_X_EXT_ADDRESS_INNER_SRC:
1106 	case SADB_X_EXT_ADDRESS_INNER_DST:
1107 		/* Check for at least enough addtl length for a sockaddr. */
1108 		if (ext->sadb_ext_len <= SADB_8TO64(sizeof (sadb_address_t)))
1109 			return (B_FALSE);
1110 		break;
1111 	case SADB_EXT_LIFETIME_HARD:
1112 	case SADB_EXT_LIFETIME_SOFT:
1113 	case SADB_EXT_LIFETIME_CURRENT:
1114 		if (ext->sadb_ext_len != SADB_8TO64(sizeof (sadb_lifetime_t)))
1115 			return (B_FALSE);
1116 		break;
1117 	case SADB_EXT_SPIRANGE:
1118 		/* See if the SPI range is legit. */
1119 		if (htonl(((sadb_spirange_t *)ext)->sadb_spirange_min) >
1120 		    htonl(((sadb_spirange_t *)ext)->sadb_spirange_max))
1121 			return (B_FALSE);
1122 		break;
1123 	case SADB_EXT_KEY_AUTH:
1124 	case SADB_EXT_KEY_ENCRYPT:
1125 		/* Key length check. */
1126 		if (((sadb_key_t *)ext)->sadb_key_bits == 0)
1127 			return (B_FALSE);
1128 		/*
1129 		 * Check to see if the key length (in bits) is less than the
1130 		 * extension length (in 8-bits words).
1131 		 */
1132 		if ((roundup(SADB_1TO8(((sadb_key_t *)ext)->sadb_key_bits), 8) +
1133 		    sizeof (sadb_key_t)) != SADB_64TO8(ext->sadb_ext_len)) {
1134 			ks1dbg(keystack, (
1135 			    "ext_check:  Key bits/length inconsistent.\n"));
1136 			ks1dbg(keystack, ("%d bits, len is %d bytes.\n",
1137 			    ((sadb_key_t *)ext)->sadb_key_bits,
1138 			    SADB_64TO8(ext->sadb_ext_len)));
1139 			return (B_FALSE);
1140 		}
1141 
1142 		/* All-zeroes key check. */
1143 		lp = (uint64_t *)(((char *)ext) + sizeof (sadb_key_t));
1144 		for (i = 0;
1145 		    i < (ext->sadb_ext_len - SADB_8TO64(sizeof (sadb_key_t)));
1146 		    i++)
1147 			if (lp[i] != 0)
1148 				break;	/* Out of for loop. */
1149 		/* If finished the loop naturally, it's an all zero key. */
1150 		if (lp[i] == 0)
1151 			return (B_FALSE);
1152 		break;
1153 	case SADB_EXT_IDENTITY_SRC:
1154 	case SADB_EXT_IDENTITY_DST:
1155 		/*
1156 		 * Make sure the strings in these identities are
1157 		 * null-terminated.  RFC 2367 underspecified how to handle
1158 		 * such a case.  I "proactively" null-terminate the string
1159 		 * at the last byte if it's not terminated sooner.
1160 		 */
1161 		id = (sadb_ident_t *)ext;
1162 		i = SADB_64TO8(id->sadb_ident_len);
1163 		i -= sizeof (sadb_ident_t);
1164 		idstr = (char *)(id + 1);
1165 		while (*idstr != '\0' && i > 0) {
1166 			i--;
1167 			idstr++;
1168 		}
1169 		if (i == 0) {
1170 			/*
1171 			 * I.e., if the bozo user didn't NULL-terminate the
1172 			 * string...
1173 			 */
1174 			idstr--;
1175 			*idstr = '\0';
1176 		}
1177 		break;
1178 	}
1179 	return (B_TRUE);	/* For now... */
1180 }
1181 
1182 /* Return values for keysock_get_ext(). */
1183 #define	KGE_OK	0
1184 #define	KGE_DUP	1
1185 #define	KGE_UNK	2
1186 #define	KGE_LEN	3
1187 #define	KGE_CHK	4
1188 
1189 /*
1190  * Parse basic extension headers and return in the passed-in pointer vector.
1191  * Return values include:
1192  *
1193  *	KGE_OK	Everything's nice and parsed out.
1194  *		If there are no extensions, place NULL in extv[0].
1195  *	KGE_DUP	There is a duplicate extension.
1196  *		First instance in appropriate bin.  First duplicate in
1197  *		extv[0].
1198  *	KGE_UNK	Unknown extension type encountered.  extv[0] contains
1199  *		unknown header.
1200  *	KGE_LEN	Extension length error.
1201  *	KGE_CHK	High-level reality check failed on specific extension.
1202  *
1203  * My apologies for some of the pointer arithmetic in here.  I'm thinking
1204  * like an assembly programmer, yet trying to make the compiler happy.
1205  */
1206 static int
1207 keysock_get_ext(sadb_ext_t *extv[], sadb_msg_t *basehdr, uint_t msgsize,
1208     keysock_stack_t *keystack)
1209 {
1210 	bzero(extv, sizeof (sadb_ext_t *) * (SADB_EXT_MAX + 1));
1211 
1212 	/* Use extv[0] as the "current working pointer". */
1213 
1214 	extv[0] = (sadb_ext_t *)(basehdr + 1);
1215 
1216 	while (extv[0] < (sadb_ext_t *)(((uint8_t *)basehdr) + msgsize)) {
1217 		/* Check for unknown headers. */
1218 		if (extv[0]->sadb_ext_type == 0 ||
1219 		    extv[0]->sadb_ext_type > SADB_EXT_MAX)
1220 			return (KGE_UNK);
1221 
1222 		/*
1223 		 * Check length.  Use uint64_t because extlen is in units
1224 		 * of 64-bit words.  If length goes beyond the msgsize,
1225 		 * return an error.  (Zero length also qualifies here.)
1226 		 */
1227 		if (extv[0]->sadb_ext_len == 0 ||
1228 		    (void *)((uint64_t *)extv[0] + extv[0]->sadb_ext_len) >
1229 		    (void *)((uint8_t *)basehdr + msgsize))
1230 			return (KGE_LEN);
1231 
1232 		/* Check for redundant headers. */
1233 		if (extv[extv[0]->sadb_ext_type] != NULL)
1234 			return (KGE_DUP);
1235 
1236 		/*
1237 		 * Reality check the extension if possible at the keysock
1238 		 * level.
1239 		 */
1240 		if (!ext_check(extv[0], keystack))
1241 			return (KGE_CHK);
1242 
1243 		/* If I make it here, assign the appropriate bin. */
1244 		extv[extv[0]->sadb_ext_type] = extv[0];
1245 
1246 		/* Advance pointer (See above for uint64_t ptr reasoning.) */
1247 		extv[0] = (sadb_ext_t *)
1248 		    ((uint64_t *)extv[0] + extv[0]->sadb_ext_len);
1249 	}
1250 
1251 	/* Everything's cool. */
1252 
1253 	/*
1254 	 * If extv[0] == NULL, then there are no extension headers in this
1255 	 * message.  Ensure that this is the case.
1256 	 */
1257 	if (extv[0] == (sadb_ext_t *)(basehdr + 1))
1258 		extv[0] = NULL;
1259 
1260 	return (KGE_OK);
1261 }
1262 
1263 /*
1264  * qwriter() callback to handle flushes and dumps.  This routine will hold
1265  * the inner perimeter.
1266  */
1267 void
1268 keysock_do_flushdump(queue_t *q, mblk_t *mp)
1269 {
1270 	int i, start, finish;
1271 	mblk_t *mp1 = NULL;
1272 	keysock_t *ks = (keysock_t *)q->q_ptr;
1273 	sadb_ext_t *extv[SADB_EXT_MAX + 1];
1274 	sadb_msg_t *samsg = (sadb_msg_t *)mp->b_rptr;
1275 	keysock_stack_t	*keystack = ks->keysock_keystack;
1276 
1277 	/*
1278 	 * I am guaranteed this will work.  I did the work in keysock_parse()
1279 	 * already.
1280 	 */
1281 	(void) keysock_get_ext(extv, samsg, SADB_64TO8(samsg->sadb_msg_len),
1282 	    keystack);
1283 
1284 	/*
1285 	 * I hold the perimeter, therefore I don't need to use atomic ops.
1286 	 */
1287 	if (keystack->keystack_flushdump != 0) {
1288 		/* XXX Should I instead use EBUSY? */
1289 		/* XXX Or is there a way to queue these up? */
1290 		keysock_error(ks, mp, ENOMEM, SADB_X_DIAGNOSTIC_NONE);
1291 		return;
1292 	}
1293 
1294 	if (samsg->sadb_msg_satype == SADB_SATYPE_UNSPEC) {
1295 		start = 0;
1296 		finish = KEYSOCK_MAX_CONSUMERS - 1;
1297 	} else {
1298 		start = samsg->sadb_msg_satype;
1299 		finish = samsg->sadb_msg_satype;
1300 	}
1301 
1302 	/*
1303 	 * Fill up keysock_flushdump with the number of outstanding dumps
1304 	 * and/or flushes.
1305 	 */
1306 
1307 	keystack->keystack_flushdump_errno = 0;
1308 
1309 	/*
1310 	 * Okay, I hold the perimeter.  Eventually keysock_flushdump will
1311 	 * contain the number of consumers with outstanding flush operations.
1312 	 *
1313 	 * SO, here's the plan:
1314 	 *	* For each relevant consumer (Might be one, might be all)
1315 	 *		* Twiddle on the FLUSHING flag.
1316 	 *		* Pass down the FLUSH/DUMP message.
1317 	 *
1318 	 * When I see upbound FLUSH/DUMP messages, I will decrement the
1319 	 * keysock_flushdump.  When I decrement it to 0, I will pass the
1320 	 * FLUSH/DUMP message back up to the PF_KEY sockets.  Because I will
1321 	 * pass down the right SA type to the consumer (either its own, or
1322 	 * that of UNSPEC), the right one will be reflected from each consumer,
1323 	 * and accordingly back to the socket.
1324 	 */
1325 
1326 	mutex_enter(&keystack->keystack_consumers_lock);
1327 	for (i = start; i <= finish; i++) {
1328 		if (keystack->keystack_consumers[i] != NULL) {
1329 			mp1 = copymsg(mp);
1330 			if (mp1 == NULL) {
1331 				ks0dbg(("SADB_FLUSH copymsg() failed.\n"));
1332 				/*
1333 				 * Error?  And what about outstanding
1334 				 * flushes?  Oh, yeah, they get sucked up and
1335 				 * the counter is decremented.  Consumers
1336 				 * (see keysock_passdown()) are guaranteed
1337 				 * to deliver back a flush request, even if
1338 				 * it's an error.
1339 				 */
1340 				keysock_error(ks, mp, ENOMEM,
1341 				    SADB_X_DIAGNOSTIC_NONE);
1342 				return;
1343 			}
1344 			/*
1345 			 * Because my entry conditions are met above, the
1346 			 * following assertion should hold true.
1347 			 */
1348 			mutex_enter(&keystack->keystack_consumers[i]->kc_lock);
1349 			ASSERT((keystack->keystack_consumers[i]->kc_flags &
1350 				KC_FLUSHING) == 0);
1351 			keystack->keystack_consumers[i]->kc_flags |=
1352 			    KC_FLUSHING;
1353 			mutex_exit(&(keystack->keystack_consumers[i]->kc_lock));
1354 			/* Always increment the number of flushes... */
1355 			keystack->keystack_flushdump++;
1356 			/* Guaranteed to return a message. */
1357 			keysock_passdown(ks, mp1, i, extv, B_TRUE);
1358 		} else if (start == finish) {
1359 			/*
1360 			 * In case where start == finish, and there's no
1361 			 * consumer, should we force an error?  Yes.
1362 			 */
1363 			mutex_exit(&keystack->keystack_consumers_lock);
1364 			keysock_error(ks, mp, EINVAL,
1365 			    SADB_X_DIAGNOSTIC_UNKNOWN_SATYPE);
1366 			return;
1367 		}
1368 	}
1369 	mutex_exit(&keystack->keystack_consumers_lock);
1370 
1371 	if (keystack->keystack_flushdump == 0) {
1372 		/*
1373 		 * There were no consumers at all for this message.
1374 		 * XXX For now return ESRCH.
1375 		 */
1376 		keysock_error(ks, mp, ESRCH, SADB_X_DIAGNOSTIC_NO_SADBS);
1377 	} else {
1378 		/* Otherwise, free the original message. */
1379 		freemsg(mp);
1380 	}
1381 }
1382 
1383 /*
1384  * Get the right diagnostic for a duplicate.  Should probably use a static
1385  * table lookup.
1386  */
1387 int
1388 keysock_duplicate(int ext_type)
1389 {
1390 	int rc = 0;
1391 
1392 	switch (ext_type) {
1393 	case SADB_EXT_ADDRESS_SRC:
1394 		rc = SADB_X_DIAGNOSTIC_DUPLICATE_SRC;
1395 		break;
1396 	case SADB_EXT_ADDRESS_DST:
1397 		rc = SADB_X_DIAGNOSTIC_DUPLICATE_DST;
1398 		break;
1399 	case SADB_X_EXT_ADDRESS_INNER_SRC:
1400 		rc = SADB_X_DIAGNOSTIC_DUPLICATE_INNER_SRC;
1401 		break;
1402 	case SADB_X_EXT_ADDRESS_INNER_DST:
1403 		rc = SADB_X_DIAGNOSTIC_DUPLICATE_INNER_DST;
1404 		break;
1405 	case SADB_EXT_SA:
1406 		rc = SADB_X_DIAGNOSTIC_DUPLICATE_SA;
1407 		break;
1408 	case SADB_EXT_SPIRANGE:
1409 		rc = SADB_X_DIAGNOSTIC_DUPLICATE_RANGE;
1410 		break;
1411 	case SADB_EXT_KEY_AUTH:
1412 		rc = SADB_X_DIAGNOSTIC_DUPLICATE_AKEY;
1413 		break;
1414 	case SADB_EXT_KEY_ENCRYPT:
1415 		rc = SADB_X_DIAGNOSTIC_DUPLICATE_EKEY;
1416 		break;
1417 	}
1418 	return (rc);
1419 }
1420 
1421 /*
1422  * Get the right diagnostic for a reality check failure.  Should probably use
1423  * a static table lookup.
1424  */
1425 int
1426 keysock_malformed(int ext_type)
1427 {
1428 	int rc = 0;
1429 
1430 	switch (ext_type) {
1431 	case SADB_EXT_ADDRESS_SRC:
1432 		rc = SADB_X_DIAGNOSTIC_MALFORMED_SRC;
1433 		break;
1434 	case SADB_EXT_ADDRESS_DST:
1435 		rc = SADB_X_DIAGNOSTIC_MALFORMED_DST;
1436 		break;
1437 	case SADB_X_EXT_ADDRESS_INNER_SRC:
1438 		rc = SADB_X_DIAGNOSTIC_MALFORMED_INNER_SRC;
1439 		break;
1440 	case SADB_X_EXT_ADDRESS_INNER_DST:
1441 		rc = SADB_X_DIAGNOSTIC_MALFORMED_INNER_DST;
1442 		break;
1443 	case SADB_EXT_SA:
1444 		rc = SADB_X_DIAGNOSTIC_MALFORMED_SA;
1445 		break;
1446 	case SADB_EXT_SPIRANGE:
1447 		rc = SADB_X_DIAGNOSTIC_MALFORMED_RANGE;
1448 		break;
1449 	case SADB_EXT_KEY_AUTH:
1450 		rc = SADB_X_DIAGNOSTIC_MALFORMED_AKEY;
1451 		break;
1452 	case SADB_EXT_KEY_ENCRYPT:
1453 		rc = SADB_X_DIAGNOSTIC_MALFORMED_EKEY;
1454 		break;
1455 	}
1456 	return (rc);
1457 }
1458 
1459 /*
1460  * Keysock massaging of an inverse ACQUIRE.  Consult policy,
1461  * and construct an appropriate response.
1462  */
1463 static void
1464 keysock_inverse_acquire(mblk_t *mp, sadb_msg_t *samsg, sadb_ext_t *extv[],
1465     keysock_t *ks)
1466 {
1467 	mblk_t *reply_mp;
1468 	keysock_stack_t	*keystack = ks->keysock_keystack;
1469 
1470 	/*
1471 	 * Reality check things...
1472 	 */
1473 	if (extv[SADB_EXT_ADDRESS_SRC] == NULL) {
1474 		keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_MISSING_SRC);
1475 		return;
1476 	}
1477 	if (extv[SADB_EXT_ADDRESS_DST] == NULL) {
1478 		keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_MISSING_DST);
1479 		return;
1480 	}
1481 
1482 	if (extv[SADB_X_EXT_ADDRESS_INNER_SRC] != NULL &&
1483 	    extv[SADB_X_EXT_ADDRESS_INNER_DST] == NULL) {
1484 		keysock_error(ks, mp, EINVAL,
1485 		    SADB_X_DIAGNOSTIC_MISSING_INNER_DST);
1486 		return;
1487 	}
1488 
1489 	if (extv[SADB_X_EXT_ADDRESS_INNER_SRC] == NULL &&
1490 	    extv[SADB_X_EXT_ADDRESS_INNER_DST] != NULL) {
1491 		keysock_error(ks, mp, EINVAL,
1492 		    SADB_X_DIAGNOSTIC_MISSING_INNER_SRC);
1493 		return;
1494 	}
1495 
1496 	reply_mp = ipsec_construct_inverse_acquire(samsg, extv,
1497 	    keystack->keystack_netstack);
1498 
1499 	if (reply_mp != NULL) {
1500 		freemsg(mp);
1501 		keysock_passup(reply_mp, (sadb_msg_t *)reply_mp->b_rptr,
1502 		    ks->keysock_serial, NULL, B_FALSE, keystack);
1503 	} else {
1504 		keysock_error(ks, mp, samsg->sadb_msg_errno,
1505 		    samsg->sadb_x_msg_diagnostic);
1506 	}
1507 }
1508 
1509 /*
1510  * Spew an extended REGISTER down to the relevant consumers.
1511  */
1512 static void
1513 keysock_extended_register(keysock_t *ks, mblk_t *mp, sadb_ext_t *extv[])
1514 {
1515 	sadb_x_ereg_t *ereg = (sadb_x_ereg_t *)extv[SADB_X_EXT_EREG];
1516 	uint8_t *satypes, *fencepost;
1517 	mblk_t *downmp;
1518 	sadb_ext_t *downextv[SADB_EXT_MAX + 1];
1519 	keysock_stack_t	*keystack = ks->keysock_keystack;
1520 
1521 	if (ks->keysock_registered[0] != 0 || ks->keysock_registered[1] != 0 ||
1522 	    ks->keysock_registered[2] != 0 || ks->keysock_registered[3] != 0) {
1523 		keysock_error(ks, mp, EBUSY, 0);
1524 	}
1525 
1526 	ks->keysock_flags |= KEYSOCK_EXTENDED;
1527 	if (ereg == NULL) {
1528 		keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_SATYPE_NEEDED);
1529 	} else {
1530 		ASSERT(mp->b_rptr + msgdsize(mp) == mp->b_wptr);
1531 		fencepost = (uint8_t *)mp->b_wptr;
1532 		satypes = ereg->sadb_x_ereg_satypes;
1533 		while (*satypes != SADB_SATYPE_UNSPEC && satypes != fencepost) {
1534 			downmp = copymsg(mp);
1535 			if (downmp == NULL) {
1536 				keysock_error(ks, mp, ENOMEM, 0);
1537 				return;
1538 			}
1539 			/*
1540 			 * Since we've made it here, keysock_get_ext will work!
1541 			 */
1542 			(void) keysock_get_ext(downextv,
1543 			    (sadb_msg_t *)downmp->b_rptr, msgdsize(downmp),
1544 			    keystack);
1545 			keysock_passdown(ks, downmp, *satypes, downextv,
1546 			    B_FALSE);
1547 			++satypes;
1548 		}
1549 		freemsg(mp);
1550 	}
1551 
1552 	/*
1553 	 * Set global to indicate we prefer an extended ACQUIRE.
1554 	 */
1555 	atomic_add_32(&keystack->keystack_num_extended, 1);
1556 }
1557 
1558 /*
1559  * Handle PF_KEY messages.
1560  */
1561 static void
1562 keysock_parse(queue_t *q, mblk_t *mp)
1563 {
1564 	sadb_msg_t *samsg;
1565 	sadb_ext_t *extv[SADB_EXT_MAX + 1];
1566 	keysock_t *ks = (keysock_t *)q->q_ptr;
1567 	uint_t msgsize;
1568 	uint8_t satype;
1569 	keysock_stack_t	*keystack = ks->keysock_keystack;
1570 
1571 	/* Make sure I'm a PF_KEY socket.  (i.e. nothing's below me) */
1572 	ASSERT(WR(q)->q_next == NULL);
1573 
1574 	samsg = (sadb_msg_t *)mp->b_rptr;
1575 	ks2dbg(keystack, ("Received possible PF_KEY message, type %d.\n",
1576 	    samsg->sadb_msg_type));
1577 
1578 	msgsize = SADB_64TO8(samsg->sadb_msg_len);
1579 
1580 	if (msgdsize(mp) != msgsize) {
1581 		/*
1582 		 * Message len incorrect w.r.t. actual size.  Send an error
1583 		 * (EMSGSIZE).	It may be necessary to massage things a
1584 		 * bit.	 For example, if the sadb_msg_type is hosed,
1585 		 * I need to set it to SADB_RESERVED to get delivery to
1586 		 * do the right thing.	Then again, maybe just letting
1587 		 * the error delivery do the right thing.
1588 		 */
1589 		ks2dbg(keystack,
1590 		    ("mblk (%lu) and base (%d) message sizes don't jibe.\n",
1591 		    msgdsize(mp), msgsize));
1592 		keysock_error(ks, mp, EMSGSIZE, SADB_X_DIAGNOSTIC_NONE);
1593 		return;
1594 	}
1595 
1596 	if (msgsize > (uint_t)(mp->b_wptr - mp->b_rptr)) {
1597 		/* Get all message into one mblk. */
1598 		if (pullupmsg(mp, -1) == 0) {
1599 			/*
1600 			 * Something screwy happened.
1601 			 */
1602 			ks3dbg(keystack,
1603 			    ("keysock_parse: pullupmsg() failed.\n"));
1604 			return;
1605 		} else {
1606 			samsg = (sadb_msg_t *)mp->b_rptr;
1607 		}
1608 	}
1609 
1610 	switch (keysock_get_ext(extv, samsg, msgsize, keystack)) {
1611 	case KGE_DUP:
1612 		/* Handle duplicate extension. */
1613 		ks1dbg(keystack, ("Got duplicate extension of type %d.\n",
1614 		    extv[0]->sadb_ext_type));
1615 		keysock_error(ks, mp, EINVAL,
1616 		    keysock_duplicate(extv[0]->sadb_ext_type));
1617 		return;
1618 	case KGE_UNK:
1619 		/* Handle unknown extension. */
1620 		ks1dbg(keystack, ("Got unknown extension of type %d.\n",
1621 		    extv[0]->sadb_ext_type));
1622 		keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_UNKNOWN_EXT);
1623 		return;
1624 	case KGE_LEN:
1625 		/* Length error. */
1626 		ks1dbg(keystack,
1627 		    ("Length %d on extension type %d overrun or 0.\n",
1628 		    extv[0]->sadb_ext_len, extv[0]->sadb_ext_type));
1629 		keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_BAD_EXTLEN);
1630 		return;
1631 	case KGE_CHK:
1632 		/* Reality check failed. */
1633 		ks1dbg(keystack,
1634 		    ("Reality check failed on extension type %d.\n",
1635 		    extv[0]->sadb_ext_type));
1636 		keysock_error(ks, mp, EINVAL,
1637 		    keysock_malformed(extv[0]->sadb_ext_type));
1638 		return;
1639 	default:
1640 		/* Default case is no errors. */
1641 		break;
1642 	}
1643 
1644 	switch (samsg->sadb_msg_type) {
1645 	case SADB_REGISTER:
1646 		/*
1647 		 * There's a semantic weirdness in that a message OTHER than
1648 		 * the return REGISTER message may be passed up if I set the
1649 		 * registered bit BEFORE I pass it down.
1650 		 *
1651 		 * SOOOO, I'll not twiddle any registered bits until I see
1652 		 * the upbound REGISTER (with a serial number in it).
1653 		 */
1654 		if (samsg->sadb_msg_satype == SADB_SATYPE_UNSPEC) {
1655 			/* Handle extended register here. */
1656 			keysock_extended_register(ks, mp, extv);
1657 			return;
1658 		} else if (ks->keysock_flags & KEYSOCK_EXTENDED) {
1659 			keysock_error(ks, mp, EBUSY, 0);
1660 			return;
1661 		}
1662 		/* FALLTHRU */
1663 	case SADB_GETSPI:
1664 	case SADB_ADD:
1665 	case SADB_UPDATE:
1666 	case SADB_DELETE:
1667 	case SADB_GET:
1668 		/*
1669 		 * Pass down to appropriate consumer.
1670 		 */
1671 		if (samsg->sadb_msg_satype != SADB_SATYPE_UNSPEC)
1672 			keysock_passdown(ks, mp, samsg->sadb_msg_satype, extv,
1673 			    B_FALSE);
1674 		else keysock_error(ks, mp, EINVAL,
1675 		    SADB_X_DIAGNOSTIC_SATYPE_NEEDED);
1676 		return;
1677 	case SADB_ACQUIRE:
1678 		/*
1679 		 * If I _receive_ an acquire, this means I should spread it
1680 		 * out to registered sockets.  Unless there's an errno...
1681 		 *
1682 		 * Need ADDRESS, may have ID, SENS, and PROP, unless errno,
1683 		 * in which case there should be NO extensions.
1684 		 *
1685 		 * Return to registered.
1686 		 */
1687 		if (samsg->sadb_msg_errno != 0) {
1688 			satype = samsg->sadb_msg_satype;
1689 			if (satype == SADB_SATYPE_UNSPEC) {
1690 				if (!(ks->keysock_flags & KEYSOCK_EXTENDED)) {
1691 					keysock_error(ks, mp, EINVAL,
1692 					    SADB_X_DIAGNOSTIC_SATYPE_NEEDED);
1693 					return;
1694 				}
1695 				/*
1696 				 * Reassign satype based on the first
1697 				 * flags that KEYSOCK_SETREG says.
1698 				 */
1699 				while (satype <= SADB_SATYPE_MAX) {
1700 					if (KEYSOCK_ISREG(ks, satype))
1701 						break;
1702 					satype++;
1703 				}
1704 				if (satype > SADB_SATYPE_MAX) {
1705 					keysock_error(ks, mp, EBUSY, 0);
1706 					return;
1707 				}
1708 			}
1709 			keysock_passdown(ks, mp, satype, extv, B_FALSE);
1710 		} else {
1711 			if (samsg->sadb_msg_satype == SADB_SATYPE_UNSPEC) {
1712 				keysock_error(ks, mp, EINVAL,
1713 				    SADB_X_DIAGNOSTIC_SATYPE_NEEDED);
1714 			} else {
1715 				keysock_passup(mp, samsg, 0, NULL, B_FALSE,
1716 				    keystack);
1717 			}
1718 		}
1719 		return;
1720 	case SADB_EXPIRE:
1721 		/*
1722 		 * If someone sends this in, then send out to all senders.
1723 		 * (Save maybe ESP or AH, I have to be careful here.)
1724 		 *
1725 		 * Need ADDRESS, may have ID and SENS.
1726 		 *
1727 		 * XXX for now this is unsupported.
1728 		 */
1729 		break;
1730 	case SADB_FLUSH:
1731 	case SADB_DUMP:	 /* not used by normal applications */
1732 		/*
1733 		 * Nuke all SAs, or dump out the whole SA table to sender only.
1734 		 *
1735 		 * No extensions at all.  Return to all listeners.
1736 		 *
1737 		 * Question:	Should I hold a lock here to prevent
1738 		 *		additions/deletions while flushing?
1739 		 * Answer:	No.  (See keysock_passdown() for details.)
1740 		 */
1741 		if (extv[0] != NULL) {
1742 			/*
1743 			 * FLUSH or DUMP messages shouldn't have extensions.
1744 			 * Return EINVAL.
1745 			 */
1746 			ks2dbg(keystack, ("FLUSH message with extension.\n"));
1747 			keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_NO_EXT);
1748 			return;
1749 		}
1750 
1751 		/* Passing down of DUMP/FLUSH messages are special. */
1752 		qwriter(q, mp, keysock_do_flushdump, PERIM_INNER);
1753 		return;
1754 	case SADB_X_PROMISC:
1755 		/*
1756 		 * Promiscuous processing message.
1757 		 */
1758 		if (samsg->sadb_msg_satype == 0)
1759 			ks->keysock_flags &= ~KEYSOCK_PROMISC;
1760 		else
1761 			ks->keysock_flags |= KEYSOCK_PROMISC;
1762 		keysock_passup(mp, samsg, ks->keysock_serial, NULL, B_FALSE,
1763 		    keystack);
1764 		return;
1765 	case SADB_X_INVERSE_ACQUIRE:
1766 		keysock_inverse_acquire(mp, samsg, extv, ks);
1767 		return;
1768 	default:
1769 		ks2dbg(keystack, ("Got unknown message type %d.\n",
1770 		    samsg->sadb_msg_type));
1771 		keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_UNKNOWN_MSG);
1772 		return;
1773 	}
1774 
1775 	/* As a placeholder... */
1776 	ks0dbg(("keysock_parse():  Hit EOPNOTSUPP\n"));
1777 	keysock_error(ks, mp, EOPNOTSUPP, SADB_X_DIAGNOSTIC_NONE);
1778 }
1779 
1780 /*
1781  * wput routing for PF_KEY/keysock/whatever.  Unlike the routing socket,
1782  * I don't convert to ioctl()'s for IP.  I am the end-all driver as far
1783  * as PF_KEY sockets are concerned.  I do some conversion, but not as much
1784  * as IP/rts does.
1785  */
1786 static void
1787 keysock_wput(queue_t *q, mblk_t *mp)
1788 {
1789 	uchar_t *rptr = mp->b_rptr;
1790 	mblk_t *mp1;
1791 	keysock_t *ks;
1792 	keysock_stack_t	*keystack;
1793 
1794 	if (WR(q)->q_next) {
1795 		keysock_consumer_t *kc = (keysock_consumer_t *)q->q_ptr;
1796 		keystack = kc->kc_keystack;
1797 
1798 		ks3dbg(keystack, ("In keysock_wput\n"));
1799 
1800 		/*
1801 		 * We shouldn't get writes on a consumer instance.
1802 		 * But for now, just passthru.
1803 		 */
1804 		ks1dbg(keystack, ("Huh?  wput for an consumer instance (%d)?\n",
1805 		    kc->kc_sa_type));
1806 		putnext(q, mp);
1807 		return;
1808 	}
1809 	ks = (keysock_t *)q->q_ptr;
1810 	keystack = ks->keysock_keystack;
1811 
1812 	ks3dbg(keystack, ("In keysock_wput\n"));
1813 
1814 	switch (mp->b_datap->db_type) {
1815 	case M_DATA:
1816 		/*
1817 		 * Silently discard.
1818 		 */
1819 		ks2dbg(keystack, ("raw M_DATA in keysock.\n"));
1820 		freemsg(mp);
1821 		return;
1822 	case M_PROTO:
1823 	case M_PCPROTO:
1824 		if ((mp->b_wptr - rptr) >= sizeof (struct T_data_req)) {
1825 			if (((union T_primitives *)rptr)->type == T_DATA_REQ) {
1826 				if ((mp1 = mp->b_cont) == NULL) {
1827 					/* No data after T_DATA_REQ. */
1828 					ks2dbg(keystack,
1829 					    ("No data after DATA_REQ.\n"));
1830 					freemsg(mp);
1831 					return;
1832 				}
1833 				freeb(mp);
1834 				mp = mp1;
1835 				ks2dbg(keystack, ("T_DATA_REQ\n"));
1836 				break;	/* Out of switch. */
1837 			}
1838 		}
1839 		/* FALLTHRU */
1840 	default:
1841 		ks3dbg(keystack, ("In default wput case (%d %d).\n",
1842 		    mp->b_datap->db_type, ((union T_primitives *)rptr)->type));
1843 		keysock_wput_other(q, mp);
1844 		return;
1845 	}
1846 
1847 	/* I now have a PF_KEY message in an M_DATA block, pointed to by mp. */
1848 	keysock_parse(q, mp);
1849 }
1850 
1851 /* BELOW THIS LINE ARE ROUTINES INCLUDING AND RELATED TO keysock_rput(). */
1852 
1853 /*
1854  * Called upon receipt of a KEYSOCK_HELLO_ACK to set up the appropriate
1855  * state vectors.
1856  */
1857 static void
1858 keysock_link_consumer(uint8_t satype, keysock_consumer_t *kc)
1859 {
1860 	keysock_t *ks;
1861 	keysock_stack_t	*keystack = kc->kc_keystack;
1862 
1863 	mutex_enter(&keystack->keystack_consumers_lock);
1864 	mutex_enter(&kc->kc_lock);
1865 	if (keystack->keystack_consumers[satype] != NULL) {
1866 		ks0dbg((
1867 		    "Hmmmm, someone closed %d before the HELLO_ACK happened.\n",
1868 		    satype));
1869 		/*
1870 		 * Perhaps updating the new below-me consumer with what I have
1871 		 * so far would work too?
1872 		 */
1873 		mutex_exit(&kc->kc_lock);
1874 		mutex_exit(&keystack->keystack_consumers_lock);
1875 	} else {
1876 		/* Add new below-me consumer. */
1877 		keystack->keystack_consumers[satype] = kc;
1878 
1879 		kc->kc_flags = 0;
1880 		kc->kc_sa_type = satype;
1881 		mutex_exit(&kc->kc_lock);
1882 		mutex_exit(&keystack->keystack_consumers_lock);
1883 
1884 		/* Scan the keysock list. */
1885 		mutex_enter(&keystack->keystack_list_lock);
1886 		for (ks = keystack->keystack_list; ks != NULL;
1887 		    ks = ks->keysock_next) {
1888 			if (KEYSOCK_ISREG(ks, satype)) {
1889 				/*
1890 				 * XXX Perhaps send an SADB_REGISTER down on
1891 				 * the socket's behalf.
1892 				 */
1893 				ks1dbg(keystack,
1894 				    ("Socket %u registered already for "
1895 				    "new consumer.\n", ks->keysock_serial));
1896 			}
1897 		}
1898 		mutex_exit(&keystack->keystack_list_lock);
1899 	}
1900 }
1901 
1902 /*
1903  * Generate a KEYSOCK_OUT_ERR message for my consumer.
1904  */
1905 static void
1906 keysock_out_err(keysock_consumer_t *kc, int ks_errno, mblk_t *mp)
1907 {
1908 	keysock_out_err_t *kse;
1909 	mblk_t *imp;
1910 	keysock_stack_t	*keystack = kc->kc_keystack;
1911 
1912 	imp = allocb(sizeof (ipsec_info_t), BPRI_HI);
1913 	if (imp == NULL) {
1914 		ks1dbg(keystack, ("keysock_out_err:  Can't alloc message.\n"));
1915 		return;
1916 	}
1917 
1918 	imp->b_datap->db_type = M_CTL;
1919 	imp->b_wptr += sizeof (ipsec_info_t);
1920 
1921 	kse = (keysock_out_err_t *)imp->b_rptr;
1922 	imp->b_cont = mp;
1923 	kse->ks_err_type = KEYSOCK_OUT_ERR;
1924 	kse->ks_err_len = sizeof (*kse);
1925 	/* Is serial necessary? */
1926 	kse->ks_err_serial = 0;
1927 	kse->ks_err_errno = ks_errno;
1928 
1929 	/*
1930 	 * XXX What else do I need to do here w.r.t. information
1931 	 * to tell the consumer what caused this error?
1932 	 *
1933 	 * I believe the answer is the PF_KEY ACQUIRE (or other) message
1934 	 * attached in mp, which is appended at the end.  I believe the
1935 	 * db_ref won't matter here, because the PF_KEY message is only read
1936 	 * for KEYSOCK_OUT_ERR.
1937 	 */
1938 
1939 	putnext(kc->kc_wq, imp);
1940 }
1941 
1942 /* XXX this is a hack errno. */
1943 #define	EIPSECNOSA 255
1944 
1945 /*
1946  * Route message (pointed by mp, header in samsg) toward appropriate
1947  * sockets.  Assume the message's creator did its job correctly.
1948  *
1949  * This should be a function that is followed by a return in its caller.
1950  * The compiler _should_ be able to use tail-call optimizations to make the
1951  * large ## of parameters not a huge deal.
1952  */
1953 static void
1954 keysock_passup(mblk_t *mp, sadb_msg_t *samsg, minor_t serial,
1955     keysock_consumer_t *kc, boolean_t persistent, keysock_stack_t *keystack)
1956 {
1957 	keysock_t *ks;
1958 	uint8_t satype = samsg->sadb_msg_satype;
1959 	boolean_t toall = B_FALSE, allreg = B_FALSE, allereg = B_FALSE,
1960 	    setalg = B_FALSE;
1961 	mblk_t *mp1;
1962 	int err = EIPSECNOSA;
1963 
1964 	/* Convert mp, which is M_DATA, into an M_PROTO of type T_DATA_IND */
1965 	mp1 = allocb(sizeof (struct T_data_req), BPRI_HI);
1966 	if (mp1 == NULL) {
1967 		err = ENOMEM;
1968 		goto error;
1969 	}
1970 	mp1->b_wptr += sizeof (struct T_data_req);
1971 	((struct T_data_ind *)mp1->b_rptr)->PRIM_type = T_DATA_IND;
1972 	((struct T_data_ind *)mp1->b_rptr)->MORE_flag = 0;
1973 	mp1->b_datap->db_type = M_PROTO;
1974 	mp1->b_cont = mp;
1975 	mp = mp1;
1976 
1977 	switch (samsg->sadb_msg_type) {
1978 	case SADB_FLUSH:
1979 	case SADB_GETSPI:
1980 	case SADB_UPDATE:
1981 	case SADB_ADD:
1982 	case SADB_DELETE:
1983 	case SADB_EXPIRE:
1984 		/*
1985 		 * These are most likely replies.  Don't worry about
1986 		 * KEYSOCK_OUT_ERR handling.  Deliver to all sockets.
1987 		 */
1988 		ks3dbg(keystack,
1989 		    ("Delivering normal message (%d) to all sockets.\n",
1990 		    samsg->sadb_msg_type));
1991 		toall = B_TRUE;
1992 		break;
1993 	case SADB_REGISTER:
1994 		/*
1995 		 * REGISTERs come up for one of three reasons:
1996 		 *
1997 		 *	1.) In response to a normal SADB_REGISTER
1998 		 *		(samsg->sadb_msg_satype != SADB_SATYPE_UNSPEC &&
1999 		 *		    serial != 0)
2000 		 *		Deliver to normal SADB_REGISTERed sockets.
2001 		 *	2.) In response to an extended REGISTER
2002 		 *		(samsg->sadb_msg_satype == SADB_SATYPE_UNSPEC)
2003 		 *		Deliver to extended REGISTERed socket.
2004 		 *	3.) Spontaneous algorithm changes
2005 		 *		(samsg->sadb_msg_satype != SADB_SATYPE_UNSPEC &&
2006 		 *		    serial == 0)
2007 		 *		Deliver to REGISTERed sockets of all sorts.
2008 		 */
2009 		if (kc == NULL) {
2010 			/* Here because of keysock_error() call. */
2011 			ASSERT(samsg->sadb_msg_errno != 0);
2012 			break;	/* Out of switch. */
2013 		}
2014 		ks3dbg(keystack, ("Delivering REGISTER.\n"));
2015 		if (satype == SADB_SATYPE_UNSPEC) {
2016 			/* REGISTER Reason #2 */
2017 			allereg = B_TRUE;
2018 			/*
2019 			 * Rewhack SA type so PF_KEY socket holder knows what
2020 			 * consumer generated this algorithm list.
2021 			 */
2022 			satype = kc->kc_sa_type;
2023 			samsg->sadb_msg_satype = satype;
2024 			setalg = B_TRUE;
2025 		} else if (serial == 0) {
2026 			/* REGISTER Reason #3 */
2027 			allreg = B_TRUE;
2028 			allereg = B_TRUE;
2029 		} else {
2030 			/* REGISTER Reason #1 */
2031 			allreg = B_TRUE;
2032 			setalg = B_TRUE;
2033 		}
2034 		break;
2035 	case SADB_ACQUIRE:
2036 		/*
2037 		 * ACQUIREs are either extended (sadb_msg_satype == 0) or
2038 		 * regular (sadb_msg_satype != 0).  And we're guaranteed
2039 		 * that serial == 0 for an ACQUIRE.
2040 		 */
2041 		ks3dbg(keystack, ("Delivering ACQUIRE.\n"));
2042 		allereg = (satype == SADB_SATYPE_UNSPEC);
2043 		allreg = !allereg;
2044 		/*
2045 		 * Corner case - if we send a regular ACQUIRE and there's
2046 		 * extended ones registered, don't send an error down to
2047 		 * consumers if nobody's listening and prematurely destroy
2048 		 * their ACQUIRE record.  This might be too hackish of a
2049 		 * solution.
2050 		 */
2051 		if (allreg && keystack->keystack_num_extended > 0)
2052 			err = 0;
2053 		break;
2054 	case SADB_X_PROMISC:
2055 	case SADB_X_INVERSE_ACQUIRE:
2056 	case SADB_DUMP:
2057 	case SADB_GET:
2058 	default:
2059 		/*
2060 		 * Deliver to the sender and promiscuous only.
2061 		 */
2062 		ks3dbg(keystack, ("Delivering sender/promisc only (%d).\n",
2063 		    samsg->sadb_msg_type));
2064 		break;
2065 	}
2066 
2067 	mutex_enter(&keystack->keystack_list_lock);
2068 	for (ks = keystack->keystack_list; ks != NULL; ks = ks->keysock_next) {
2069 		/* Delivery loop. */
2070 
2071 		/*
2072 		 * Check special keysock-setting cases (REGISTER replies)
2073 		 * here.
2074 		 */
2075 		if (setalg && serial == ks->keysock_serial) {
2076 			ASSERT(kc != NULL);
2077 			ASSERT(kc->kc_sa_type == satype);
2078 			KEYSOCK_SETREG(ks, satype);
2079 		}
2080 
2081 		/*
2082 		 * NOLOOP takes precedence over PROMISC.  So if you've set
2083 		 * !SO_USELOOPBACK, don't expect to see any data...
2084 		 */
2085 		if (ks->keysock_flags & KEYSOCK_NOLOOP)
2086 			continue;
2087 
2088 		/*
2089 		 * Messages to all, or promiscuous sockets just GET the
2090 		 * message.  Perform rules-type checking iff it's not for all
2091 		 * listeners or the socket is in promiscuous mode.
2092 		 *
2093 		 * NOTE:Because of the (kc != NULL && ISREG()), make sure
2094 		 *	extended ACQUIREs arrive off a consumer that is
2095 		 *	part of the extended REGISTER set of consumers.
2096 		 */
2097 		if (serial != ks->keysock_serial &&
2098 		    !toall &&
2099 		    !(ks->keysock_flags & KEYSOCK_PROMISC) &&
2100 		    !((ks->keysock_flags & KEYSOCK_EXTENDED) ?
2101 			allereg : allreg && kc != NULL &&
2102 			KEYSOCK_ISREG(ks, kc->kc_sa_type)))
2103 			continue;
2104 
2105 		mp1 = dupmsg(mp);
2106 		if (mp1 == NULL) {
2107 			ks2dbg(keystack, (
2108 			    "keysock_passup():  dupmsg() failed.\n"));
2109 			mp1 = mp;
2110 			mp = NULL;
2111 			err = ENOMEM;
2112 		}
2113 
2114 		/*
2115 		 * At this point, we can deliver or attempt to deliver
2116 		 * this message.  We're free of obligation to report
2117 		 * no listening PF_KEY sockets.  So set err to 0.
2118 		 */
2119 		err = 0;
2120 
2121 		/*
2122 		 * See if we canputnext(), as well as see if the message
2123 		 * needs to be queued if we can't.
2124 		 */
2125 		if (!canputnext(ks->keysock_rq)) {
2126 			if (persistent) {
2127 				if (putq(ks->keysock_rq, mp1) == 0) {
2128 					ks1dbg(keystack, (
2129 					    "keysock_passup: putq failed.\n"));
2130 				} else {
2131 					continue;
2132 				}
2133 			}
2134 			freemsg(mp1);
2135 			continue;
2136 		}
2137 
2138 		ks3dbg(keystack,
2139 		    ("Putting to serial %d.\n", ks->keysock_serial));
2140 		/*
2141 		 * Unlike the specific keysock instance case, this
2142 		 * will only hit for listeners, so we will only
2143 		 * putnext() if we can.
2144 		 */
2145 		putnext(ks->keysock_rq, mp1);
2146 		if (mp == NULL)
2147 			break;	/* out of for loop. */
2148 	}
2149 	mutex_exit(&keystack->keystack_list_lock);
2150 
2151 error:
2152 	if ((err != 0) && (kc != NULL)) {
2153 		/*
2154 		 * Generate KEYSOCK_OUT_ERR for consumer.
2155 		 * Basically, I send this back if I have not been able to
2156 		 * transmit (for whatever reason)
2157 		 */
2158 		ks1dbg(keystack,
2159 		    ("keysock_passup():  No registered of type %d.\n",
2160 		    satype));
2161 		if (mp != NULL) {
2162 			if (mp->b_datap->db_type == M_PROTO) {
2163 				mp1 = mp;
2164 				mp = mp->b_cont;
2165 				freeb(mp1);
2166 			}
2167 			/*
2168 			 * Do a copymsg() because people who get
2169 			 * KEYSOCK_OUT_ERR may alter the message contents.
2170 			 */
2171 			mp1 = copymsg(mp);
2172 			if (mp1 == NULL) {
2173 				ks2dbg(keystack,
2174 				    ("keysock_passup: copymsg() failed.\n"));
2175 				mp1 = mp;
2176 				mp = NULL;
2177 			}
2178 			keysock_out_err(kc, err, mp1);
2179 		}
2180 	}
2181 
2182 	/*
2183 	 * XXX Blank the message somehow.  This is difficult because we don't
2184 	 * know at this point if the message has db_ref > 1, etc.
2185 	 *
2186 	 * Optimally, keysock messages containing actual keying material would
2187 	 * be allocated with esballoc(), with a zeroing free function.
2188 	 */
2189 	if (mp != NULL)
2190 		freemsg(mp);
2191 }
2192 
2193 /*
2194  * Keysock's read service procedure is there only for PF_KEY reply
2195  * messages that really need to reach the top.
2196  */
2197 static void
2198 keysock_rsrv(queue_t *q)
2199 {
2200 	mblk_t *mp;
2201 
2202 	while ((mp = getq(q)) != NULL) {
2203 		if (canputnext(q)) {
2204 			putnext(q, mp);
2205 		} else {
2206 			(void) putbq(q, mp);
2207 			return;
2208 		}
2209 	}
2210 }
2211 
2212 /*
2213  * The read procedure should only be invoked by a keysock consumer, like
2214  * ESP, AH, etc.  I should only see KEYSOCK_OUT and KEYSOCK_HELLO_ACK
2215  * messages on my read queues.
2216  */
2217 static void
2218 keysock_rput(queue_t *q, mblk_t *mp)
2219 {
2220 	keysock_consumer_t *kc = (keysock_consumer_t *)q->q_ptr;
2221 	ipsec_info_t *ii;
2222 	keysock_hello_ack_t *ksa;
2223 	minor_t serial;
2224 	mblk_t *mp1;
2225 	sadb_msg_t *samsg;
2226 	keysock_stack_t	*keystack = kc->kc_keystack;
2227 
2228 	/* Make sure I'm a consumer instance.  (i.e. something's below me) */
2229 	ASSERT(WR(q)->q_next != NULL);
2230 
2231 	if (mp->b_datap->db_type != M_CTL) {
2232 		/*
2233 		 * Keysock should only see keysock consumer interface
2234 		 * messages (see ipsec_info.h) on its read procedure.
2235 		 * To be robust, however, putnext() up so the STREAM head can
2236 		 * deal with it appropriately.
2237 		 */
2238 		ks1dbg(keystack,
2239 		    ("Hmmm, a non M_CTL (%d, 0x%x) on keysock_rput.\n",
2240 		    mp->b_datap->db_type, mp->b_datap->db_type));
2241 		putnext(q, mp);
2242 		return;
2243 	}
2244 
2245 	ii = (ipsec_info_t *)mp->b_rptr;
2246 
2247 	switch (ii->ipsec_info_type) {
2248 	case KEYSOCK_OUT:
2249 		/*
2250 		 * A consumer needs to pass a response message or an ACQUIRE
2251 		 * UP.  I assume that the consumer has done the right
2252 		 * thing w.r.t. message creation, etc.
2253 		 */
2254 		serial = ((keysock_out_t *)mp->b_rptr)->ks_out_serial;
2255 		mp1 = mp->b_cont;	/* Get M_DATA portion. */
2256 		freeb(mp);
2257 		samsg = (sadb_msg_t *)mp1->b_rptr;
2258 		if (samsg->sadb_msg_type == SADB_FLUSH ||
2259 		    (samsg->sadb_msg_type == SADB_DUMP &&
2260 			samsg->sadb_msg_len == SADB_8TO64(sizeof (*samsg)))) {
2261 			/*
2262 			 * If I'm an end-of-FLUSH or an end-of-DUMP marker...
2263 			 */
2264 			ASSERT(keystack->keystack_flushdump != 0);
2265 						/* Am I flushing? */
2266 
2267 			mutex_enter(&kc->kc_lock);
2268 			kc->kc_flags &= ~KC_FLUSHING;
2269 			mutex_exit(&kc->kc_lock);
2270 
2271 			if (samsg->sadb_msg_errno != 0)
2272 				keystack->keystack_flushdump_errno =
2273 				    samsg->sadb_msg_errno;
2274 
2275 			/*
2276 			 * Lower the atomic "flushing" count.  If it's
2277 			 * the last one, send up the end-of-{FLUSH,DUMP} to
2278 			 * the appropriate PF_KEY socket.
2279 			 */
2280 			if (atomic_add_32_nv(&keystack->keystack_flushdump,
2281 			    -1) != 0) {
2282 				ks1dbg(keystack,
2283 				    ("One flush/dump message back from %d,"
2284 				    " more to go.\n", samsg->sadb_msg_satype));
2285 				freemsg(mp1);
2286 				return;
2287 			}
2288 
2289 			samsg->sadb_msg_errno =
2290 			    (uint8_t)keystack->keystack_flushdump_errno;
2291 			if (samsg->sadb_msg_type == SADB_DUMP) {
2292 				samsg->sadb_msg_seq = 0;
2293 			}
2294 		}
2295 		keysock_passup(mp1, samsg, serial, kc,
2296 		    (samsg->sadb_msg_type == SADB_DUMP), keystack);
2297 		return;
2298 	case KEYSOCK_HELLO_ACK:
2299 		/* Aha, now we can link in the consumer! */
2300 		ksa = (keysock_hello_ack_t *)ii;
2301 		keysock_link_consumer(ksa->ks_hello_satype, kc);
2302 		freemsg(mp);
2303 		return;
2304 	default:
2305 		ks1dbg(keystack, ("Hmmm, an IPsec info I'm not used to, 0x%x\n",
2306 		    ii->ipsec_info_type));
2307 		putnext(q, mp);
2308 	}
2309 }
2310 
2311 /*
2312  * So we can avoid external linking problems....
2313  */
2314 boolean_t
2315 keysock_extended_reg(netstack_t *ns)
2316 {
2317 	keysock_stack_t	*keystack = ns->netstack_keysock;
2318 
2319 	return (keystack->keystack_num_extended != 0);
2320 }
2321 
2322 uint32_t
2323 keysock_next_seq(netstack_t *ns)
2324 {
2325 	keysock_stack_t	*keystack = ns->netstack_keysock;
2326 
2327 	return (atomic_add_32_nv(&keystack->keystack_acquire_seq, -1));
2328 }
2329