xref: /illumos-gate/usr/src/uts/common/inet/ip/keysock.c (revision 88f8b78a88cbdc6d8c1af5c3e54bc49d25095c98)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 
30 #include <sys/param.h>
31 #include <sys/types.h>
32 #include <sys/stream.h>
33 #include <sys/strsubr.h>
34 #include <sys/strsun.h>
35 #include <sys/stropts.h>
36 #include <sys/vnode.h>
37 #include <sys/strlog.h>
38 #include <sys/sysmacros.h>
39 #define	_SUN_TPI_VERSION 2
40 #include <sys/tihdr.h>
41 #include <sys/timod.h>
42 #include <sys/tiuser.h>
43 #include <sys/ddi.h>
44 #include <sys/sunddi.h>
45 #include <sys/sunldi.h>
46 #include <sys/file.h>
47 #include <sys/modctl.h>
48 #include <sys/debug.h>
49 #include <sys/kmem.h>
50 #include <sys/cmn_err.h>
51 #include <sys/proc.h>
52 #include <sys/suntpi.h>
53 #include <sys/atomic.h>
54 #include <sys/mkdev.h>
55 #include <sys/policy.h>
56 
57 #include <sys/socket.h>
58 #include <netinet/in.h>
59 #include <net/pfkeyv2.h>
60 
61 #include <inet/common.h>
62 #include <netinet/ip6.h>
63 #include <inet/ip.h>
64 #include <inet/mi.h>
65 #include <inet/nd.h>
66 #include <inet/optcom.h>
67 #include <inet/ipsec_info.h>
68 #include <inet/ipsec_impl.h>
69 #include <inet/keysock.h>
70 
71 #include <sys/isa_defs.h>
72 
73 /*
74  * This is a transport provider for the PF_KEY key mangement socket.
75  * (See RFC 2367 for details.)
76  * Downstream messages are wrapped in a keysock consumer interface KEYSOCK_IN
77  * messages (see ipsec_info.h), and passed to the appropriate consumer.
78  * Upstream messages are generated for all open PF_KEY sockets, when
79  * appropriate, as well as the sender (as long as SO_USELOOPBACK is enabled)
80  * in reply to downstream messages.
81  *
82  * Upstream messages must be created asynchronously for the following
83  * situations:
84  *
85  *	1.) A keysock consumer requires an SA, and there is currently none.
86  *	2.) An SA expires, either hard or soft lifetime.
87  *	3.) Other events a consumer deems fit.
88  *
89  * The MT model of this is PERMOD, with shared put procedures.  Two types of
90  * messages, SADB_FLUSH and SADB_DUMP, need to lock down the perimeter to send
91  * down the *multiple* messages they create.
92  */
93 
94 /* List of open PF_KEY sockets, protected by keysock_list_lock. */
95 static kmutex_t keysock_list_lock;
96 static keysock_t *keysock_list;
97 
98 static vmem_t *keysock_vmem;		/* for minor numbers. */
99 
100 /* Consumers table.  If an entry is NULL, keysock maintains the table. */
101 static kmutex_t keysock_consumers_lock;
102 
103 #define	KEYSOCK_MAX_CONSUMERS 256
104 static keysock_consumer_t *keysock_consumers[KEYSOCK_MAX_CONSUMERS];
105 
106 /* Default structure copied into T_INFO_ACK messages (from rts.c...) */
107 static struct T_info_ack keysock_g_t_info_ack = {
108 	T_INFO_ACK,
109 	T_INFINITE,	/* TSDU_size. Maximum size messages. */
110 	T_INVALID,	/* ETSDU_size. No expedited data. */
111 	T_INVALID,	/* CDATA_size. No connect data. */
112 	T_INVALID,	/* DDATA_size. No disconnect data. */
113 	0,		/* ADDR_size. */
114 	0,		/* OPT_size. No user-settable options */
115 	64 * 1024,	/* TIDU_size. keysock allows maximum size messages. */
116 	T_COTS,		/* SERV_type. keysock supports connection oriented. */
117 	TS_UNBND,	/* CURRENT_state. This is set from keysock_state. */
118 	(XPG4_1)	/* Provider flags */
119 };
120 
121 /* Named Dispatch Parameter Management Structure */
122 typedef struct keysockpparam_s {
123 	uint_t	keysock_param_min;
124 	uint_t	keysock_param_max;
125 	uint_t	keysock_param_value;
126 	char	*keysock_param_name;
127 } keysockparam_t;
128 
129 /*
130  * Table of NDD variables supported by keysock. These are loaded into
131  * keysock_g_nd in keysock_init_nd.
132  * All of these are alterable, within the min/max values given, at run time.
133  */
134 static	keysockparam_t	keysock_param_arr[] = {
135 	/* min	max	value	name */
136 	{ 4096, 65536,	8192,	"keysock_xmit_hiwat"},
137 	{ 0,	65536,	1024,	"keysock_xmit_lowat"},
138 	{ 4096, 65536,	8192,	"keysock_recv_hiwat"},
139 	{ 65536, 1024*1024*1024, 256*1024,	"keysock_max_buf"},
140 	{ 0,	3,	0,	"keysock_debug"},
141 };
142 #define	keysock_xmit_hiwat	keysock_param_arr[0].keysock_param_value
143 #define	keysock_xmit_lowat	keysock_param_arr[1].keysock_param_value
144 #define	keysock_recv_hiwat	keysock_param_arr[2].keysock_param_value
145 #define	keysock_max_buf		keysock_param_arr[3].keysock_param_value
146 #define	keysock_debug		keysock_param_arr[4].keysock_param_value
147 
148 kmutex_t keysock_param_lock;	/* Protects the NDD variables. */
149 
150 #define	ks0dbg(a)	printf a
151 /* NOTE:  != 0 instead of > 0 so lint doesn't complain. */
152 #define	ks1dbg(a)	if (keysock_debug != 0) printf a
153 #define	ks2dbg(a)	if (keysock_debug > 1) printf a
154 #define	ks3dbg(a)	if (keysock_debug > 2) printf a
155 
156 static IDP keysock_g_nd;
157 
158 /*
159  * State for flush/dump.  This would normally be a boolean_t, but
160  * cas32() works best for a known 32-bit quantity.
161  */
162 static uint32_t keysock_flushdump;
163 static int keysock_flushdump_errno;
164 
165 static int keysock_close(queue_t *);
166 static int keysock_open(queue_t *, dev_t *, int, int, cred_t *);
167 static void keysock_wput(queue_t *, mblk_t *);
168 static void keysock_rput(queue_t *, mblk_t *);
169 static void keysock_rsrv(queue_t *);
170 static void keysock_passup(mblk_t *, sadb_msg_t *, minor_t,
171     keysock_consumer_t *, boolean_t);
172 
173 static struct module_info info = {
174 	5138, "keysock", 1, INFPSZ, 512, 128
175 };
176 
177 static struct qinit rinit = {
178 	(pfi_t)keysock_rput, (pfi_t)keysock_rsrv, keysock_open, keysock_close,
179 	NULL, &info
180 };
181 
182 static struct qinit winit = {
183 	(pfi_t)keysock_wput, NULL, NULL, NULL, NULL, &info
184 };
185 
186 struct streamtab keysockinfo = {
187 	&rinit, &winit
188 };
189 
190 extern struct modlinkage *keysock_modlp;
191 
192 /*
193  * Plumb IPsec.
194  *
195  * NOTE:  New "default" modules will need to be loaded here if needed before
196  *	  boot time.
197  */
198 
199 /* Keep these in global space to keep the lint from complaining. */
200 static char *IPSECESP = "ipsecesp";
201 static char *IPSECESPDEV = "/devices/pseudo/ipsecesp@0:ipsecesp";
202 static char *IPSECAH = "ipsecah";
203 static char *IPSECAHDEV = "/devices/pseudo/ipsecah@0:ipsecah";
204 static char *IP6DEV = "/devices/pseudo/ip6@0:ip6";
205 static char *KEYSOCK = "keysock";
206 static char *STRMOD = "strmod";
207 
208 /*
209  * keysock_plumbed: zero if plumb not attempted, positive if it succeeded,
210  * negative if it failed.
211  */
212 static int keysock_plumbed = 0;
213 
214 /*
215  * This integer counts the number of extended REGISTERed sockets.  This
216  * determines if we should send extended REGISTERs.
217  */
218 static uint32_t keysock_num_extended = 0;
219 
220 /*
221  * Global sequence space for SADB_ACQUIRE messages of any sort.
222  */
223 static uint32_t keysock_acquire_seq = 0xffffffff;
224 
225 /*
226  * Load the other ipsec modules and plumb them together.
227  */
228 int
229 keysock_plumb_ipsec(void)
230 {
231 	ldi_handle_t	lh, ip6_lh = NULL;
232 	ldi_ident_t	li = NULL;
233 	int		err = 0;
234 	int		muxid, rval;
235 	boolean_t	esp_present = B_TRUE;
236 
237 
238 	keysock_plumbed = 0;	/* we're trying again.. */
239 
240 	/*
241 	 * Load up the drivers (AH/ESP).
242 	 *
243 	 * I do this separately from the actual plumbing in case this function
244 	 * ever gets called from a diskless boot before the root filesystem is
245 	 * up.  I don't have to worry about "keysock" because, well, if I'm
246 	 * here, keysock must've loaded successfully.
247 	 */
248 	if (i_ddi_attach_pseudo_node(IPSECAH) == NULL) {
249 		ks0dbg(("IPsec:  AH failed to attach.\n"));
250 		goto bail;
251 	}
252 	if (i_ddi_attach_pseudo_node(IPSECESP) == NULL) {
253 		ks0dbg(("IPsec:  ESP failed to attach.\n"));
254 		esp_present = B_FALSE;
255 	}
256 
257 	/*
258 	 * Set up the IP streams for AH and ESP, as well as tacking keysock
259 	 * on top of them.  Assume keysock has set the autopushes up already.
260 	 */
261 
262 	/* Open IP. */
263 	err = ldi_ident_from_mod(keysock_modlp, &li);
264 	if (err) {
265 		ks0dbg(("IPsec:  lid_ident_from_mod failed (err %d).\n",
266 		    err));
267 		goto bail;
268 	}
269 
270 	err = ldi_open_by_name(IP6DEV, FREAD|FWRITE, CRED(), &ip6_lh, li);
271 	if (err) {
272 		ks0dbg(("IPsec:  Open of IP6 failed (err %d).\n", err));
273 		goto bail;
274 	}
275 
276 	/* PLINK KEYSOCK/AH */
277 	err = ldi_open_by_name(IPSECAHDEV, FREAD|FWRITE, CRED(), &lh, li);
278 	if (err) {
279 		ks0dbg(("IPsec:  Open of AH failed (err %d).\n", err));
280 		goto bail;
281 	}
282 	err = ldi_ioctl(lh,
283 	    I_PUSH, (intptr_t)KEYSOCK, FKIOCTL, CRED(), &rval);
284 	if (err) {
285 		ks0dbg(("IPsec:  Push of KEYSOCK onto AH failed (err %d).\n",
286 		    err));
287 		(void) ldi_close(lh, FREAD|FWRITE, CRED());
288 		goto bail;
289 	}
290 	err = ldi_ioctl(ip6_lh, I_PLINK, (intptr_t)lh,
291 			FREAD+FWRITE+FNOCTTY+FKIOCTL, kcred, &muxid);
292 	if (err) {
293 		ks0dbg(("IPsec:  PLINK of KEYSOCK/AH failed (err %d).\n", err));
294 		(void) ldi_close(lh, FREAD|FWRITE, CRED());
295 		goto bail;
296 	}
297 	(void) ldi_close(lh, FREAD|FWRITE, CRED());
298 
299 	/* PLINK KEYSOCK/ESP */
300 	if (esp_present) {
301 		err = ldi_open_by_name(IPSECESPDEV,
302 		    FREAD|FWRITE, CRED(), &lh, li);
303 		if (err) {
304 			ks0dbg(("IPsec:  Open of ESP failed (err %d).\n", err));
305 			goto bail;
306 		}
307 		err = ldi_ioctl(lh,
308 		    I_PUSH, (intptr_t)KEYSOCK, FKIOCTL, CRED(), &rval);
309 		if (err) {
310 			ks0dbg(("IPsec:  "
311 			    "Push of KEYSOCK onto ESP failed (err %d).\n",
312 			    err));
313 			(void) ldi_close(lh, FREAD|FWRITE, CRED());
314 			goto bail;
315 		}
316 		err = ldi_ioctl(ip6_lh, I_PLINK, (intptr_t)lh,
317 				FREAD+FWRITE+FNOCTTY+FKIOCTL, kcred, &muxid);
318 		if (err) {
319 			ks0dbg(("IPsec:  "
320 			    "PLINK of KEYSOCK/ESP failed (err %d).\n", err));
321 			(void) ldi_close(lh, FREAD|FWRITE, CRED());
322 			goto bail;
323 		}
324 		(void) ldi_close(lh, FREAD|FWRITE, CRED());
325 	}
326 
327 bail:
328 	keysock_plumbed = (err == 0) ? 1 : -1;
329 	if (ip6_lh != NULL) {
330 		(void) ldi_close(ip6_lh, FREAD|FWRITE, CRED());
331 	}
332 	if (li != NULL)
333 		ldi_ident_release(li);
334 	return (err);
335 }
336 
337 /* ARGSUSED */
338 static int
339 keysock_param_get(q, mp, cp, cr)
340 	queue_t	*q;
341 	mblk_t	*mp;
342 	caddr_t	cp;
343 	cred_t *cr;
344 {
345 	keysockparam_t	*keysockpa = (keysockparam_t *)cp;
346 	uint_t value;
347 
348 	mutex_enter(&keysock_param_lock);
349 	value = keysockpa->keysock_param_value;
350 	mutex_exit(&keysock_param_lock);
351 
352 	(void) mi_mpprintf(mp, "%u", value);
353 	return (0);
354 }
355 
356 /* This routine sets an NDD variable in a keysockparam_t structure. */
357 /* ARGSUSED */
358 static int
359 keysock_param_set(q, mp, value, cp, cr)
360 	queue_t	*q;
361 	mblk_t	*mp;
362 	char	*value;
363 	caddr_t	cp;
364 	cred_t *cr;
365 {
366 	ulong_t	new_value;
367 	keysockparam_t	*keysockpa = (keysockparam_t *)cp;
368 
369 	/* Convert the value from a string into a long integer. */
370 	if (ddi_strtoul(value, NULL, 10, &new_value) != 0)
371 		return (EINVAL);
372 
373 	mutex_enter(&keysock_param_lock);
374 	/*
375 	 * Fail the request if the new value does not lie within the
376 	 * required bounds.
377 	 */
378 	if (new_value < keysockpa->keysock_param_min ||
379 	    new_value > keysockpa->keysock_param_max) {
380 		mutex_exit(&keysock_param_lock);
381 		return (EINVAL);
382 	}
383 
384 	/* Set the new value */
385 	keysockpa->keysock_param_value = new_value;
386 	mutex_exit(&keysock_param_lock);
387 
388 	return (0);
389 }
390 
391 /*
392  * Initialize NDD variables, and other things, for keysock.
393  */
394 boolean_t
395 keysock_ddi_init(void)
396 {
397 	keysockparam_t *ksp = keysock_param_arr;
398 	int count = A_CNT(keysock_param_arr);
399 
400 	if (!keysock_g_nd) {
401 		for (; count-- > 0; ksp++) {
402 			if (ksp->keysock_param_name != NULL &&
403 			    ksp->keysock_param_name[0]) {
404 				if (!nd_load(&keysock_g_nd,
405 				    ksp->keysock_param_name,
406 				    keysock_param_get, keysock_param_set,
407 				    (caddr_t)ksp)) {
408 					nd_free(&keysock_g_nd);
409 					return (B_FALSE);
410 				}
411 			}
412 		}
413 	}
414 
415 	keysock_max_optsize = optcom_max_optsize(
416 	    keysock_opt_obj.odb_opt_des_arr, keysock_opt_obj.odb_opt_arr_cnt);
417 
418 	keysock_vmem = vmem_create("keysock", (void *)1, MAXMIN, 1,
419 	    NULL, NULL, NULL, 1, VM_SLEEP | VMC_IDENTIFIER);
420 
421 	mutex_init(&keysock_list_lock, NULL, MUTEX_DEFAULT, NULL);
422 	mutex_init(&keysock_consumers_lock, NULL, MUTEX_DEFAULT, NULL);
423 	mutex_init(&keysock_param_lock, NULL, MUTEX_DEFAULT, NULL);
424 
425 	return (B_TRUE);
426 }
427 
428 /*
429  * Free NDD variable space, and other destructors, for keysock.
430  */
431 void
432 keysock_ddi_destroy(void)
433 {
434 	/* XXX Free instances? */
435 	ks0dbg(("keysock_ddi_destroy being called.\n"));
436 
437 	vmem_destroy(keysock_vmem);
438 	mutex_destroy(&keysock_list_lock);
439 	mutex_destroy(&keysock_consumers_lock);
440 	mutex_destroy(&keysock_param_lock);
441 	nd_free(&keysock_g_nd);
442 }
443 
444 /*
445  * Close routine for keysock.
446  */
447 static int
448 keysock_close(queue_t *q)
449 {
450 	keysock_t *ks;
451 	keysock_consumer_t *kc;
452 	void *ptr = q->q_ptr;
453 	int size;
454 
455 	qprocsoff(q);
456 
457 	/* Safe assumption. */
458 	ASSERT(ptr != NULL);
459 
460 	if (WR(q)->q_next) {
461 		kc = (keysock_consumer_t *)ptr;
462 		ks0dbg(("Module close, removing a consumer (%d).\n",
463 		    kc->kc_sa_type));
464 		/*
465 		 * Because of PERMOD open/close exclusive perimeter, I
466 		 * can inspect KC_FLUSHING w/o locking down kc->kc_lock.
467 		 */
468 		if (kc->kc_flags & KC_FLUSHING) {
469 			/*
470 			 * If this decrement was the last one, send
471 			 * down the next pending one, if any.
472 			 *
473 			 * With a PERMOD perimeter, the mutexes ops aren't
474 			 * really necessary, but if we ever loosen up, we will
475 			 * have this bit covered already.
476 			 */
477 			keysock_flushdump--;
478 			if (keysock_flushdump == 0) {
479 				/*
480 				 * The flush/dump terminated by having a
481 				 * consumer go away.  I need to send up to the
482 				 * appropriate keysock all of the relevant
483 				 * information.  Unfortunately, I don't
484 				 * have that handy.
485 				 */
486 				ks0dbg(("Consumer went away while flushing or"
487 				    " dumping.\n"));
488 			}
489 		}
490 		size = sizeof (keysock_consumer_t);
491 		mutex_enter(&keysock_consumers_lock);
492 		keysock_consumers[kc->kc_sa_type] = NULL;
493 		mutex_exit(&keysock_consumers_lock);
494 		mutex_destroy(&kc->kc_lock);
495 	} else {
496 		ks3dbg(("Driver close, PF_KEY socket is going away.\n"));
497 		ks = (keysock_t *)ptr;
498 		if ((ks->keysock_flags & KEYSOCK_EXTENDED) != 0)
499 			atomic_add_32(&keysock_num_extended, -1);
500 		size = sizeof (keysock_t);
501 		mutex_enter(&keysock_list_lock);
502 		*(ks->keysock_ptpn) = ks->keysock_next;
503 		if (ks->keysock_next != NULL)
504 			ks->keysock_next->keysock_ptpn = ks->keysock_ptpn;
505 		mutex_exit(&keysock_list_lock);
506 		mutex_destroy(&ks->keysock_lock);
507 	}
508 
509 	/* Now I'm free. */
510 	kmem_free(ptr, size);
511 	return (0);
512 }
513 /*
514  * Open routine for keysock.
515  */
516 /* ARGSUSED */
517 static int
518 keysock_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
519 {
520 	keysock_t *ks;
521 	keysock_consumer_t *kc;
522 	mblk_t *mp;
523 	ipsec_info_t *ii;
524 
525 	ks3dbg(("Entering keysock open.\n"));
526 
527 	if (secpolicy_net_config(credp, B_FALSE) != 0) {
528 		/* Privilege debugging will log the error */
529 		return (EPERM);
530 	}
531 
532 	if (q->q_ptr != NULL)
533 		return (0);  /* Re-open of an already open instance. */
534 
535 	if (keysock_plumbed < 1) {
536 		keysock_plumbed = 0;
537 		/*
538 		 * Don't worry about ipsec_failure being true here.
539 		 * (See ip.c).  An open of keysock should try and force
540 		 * the issue.  Maybe it was a transient failure.
541 		 */
542 		ipsec_loader_loadnow();
543 	}
544 
545 	if (sflag & MODOPEN) {
546 		/* Initialize keysock_consumer state here. */
547 		kc = kmem_zalloc(sizeof (keysock_consumer_t), KM_NOSLEEP);
548 		if (kc == NULL)
549 			return (ENOMEM);
550 		mutex_init(&kc->kc_lock, NULL, MUTEX_DEFAULT, 0);
551 		kc->kc_rq = q;
552 		kc->kc_wq = WR(q);
553 
554 		q->q_ptr = kc;
555 		WR(q)->q_ptr = kc;
556 
557 		qprocson(q);
558 
559 		/*
560 		 * Send down initial message to whatever I was pushed on top
561 		 * of asking for its consumer type.  The reply will set it.
562 		 */
563 
564 		/* Allocate it. */
565 		mp = allocb(sizeof (ipsec_info_t), BPRI_HI);
566 		if (mp == NULL) {
567 			ks1dbg((
568 			    "keysock_open:  Cannot allocate KEYSOCK_HELLO.\n"));
569 			/* Do I need to set these to null? */
570 			q->q_ptr = NULL;
571 			WR(q)->q_ptr = NULL;
572 			mutex_destroy(&kc->kc_lock);
573 			kmem_free(kc, sizeof (*kc));
574 			return (ENOMEM);
575 		}
576 
577 		/* If I allocated okay, putnext to what I was pushed atop. */
578 		mp->b_wptr += sizeof (ipsec_info_t);
579 		mp->b_datap->db_type = M_CTL;
580 		ii = (ipsec_info_t *)mp->b_rptr;
581 		ii->ipsec_info_type = KEYSOCK_HELLO;
582 		/* Length only of type/len. */
583 		ii->ipsec_info_len = sizeof (ii->ipsec_allu);
584 		ks2dbg(("Ready to putnext KEYSOCK_HELLO.\n"));
585 		putnext(kc->kc_wq, mp);
586 	} else {
587 		minor_t ksminor;
588 
589 		/* Initialize keysock state. */
590 
591 		ks2dbg(("Made it into PF_KEY socket open.\n"));
592 
593 		ksminor = (minor_t)(uintptr_t)
594 		    vmem_alloc(keysock_vmem, 1, VM_NOSLEEP);
595 		if (ksminor == 0)
596 			return (ENOMEM);
597 
598 		ks = kmem_zalloc(sizeof (keysock_t), KM_NOSLEEP);
599 		if (ks == NULL) {
600 			vmem_free(keysock_vmem, (void *)(uintptr_t)ksminor, 1);
601 			return (ENOMEM);
602 		}
603 
604 		mutex_init(&ks->keysock_lock, NULL, MUTEX_DEFAULT, 0);
605 		ks->keysock_rq = q;
606 		ks->keysock_wq = WR(q);
607 		ks->keysock_state = TS_UNBND;
608 		ks->keysock_serial = ksminor;
609 
610 		q->q_ptr = ks;
611 		WR(q)->q_ptr = ks;
612 
613 		/*
614 		 * The receive hiwat is only looked at on the stream head
615 		 * queue.  Store in q_hiwat in order to return on SO_RCVBUF
616 		 * getsockopts.
617 		 */
618 
619 		q->q_hiwat = keysock_recv_hiwat;
620 
621 		/*
622 		 * The transmit hiwat/lowat is only looked at on IP's queue.
623 		 * Store in q_hiwat/q_lowat in order to return on
624 		 * SO_SNDBUF/SO_SNDLOWAT getsockopts.
625 		 */
626 
627 		WR(q)->q_hiwat = keysock_xmit_hiwat;
628 		WR(q)->q_lowat = keysock_xmit_lowat;
629 
630 		*devp = makedevice(getmajor(*devp), ksminor);
631 
632 		/*
633 		 * Thread keysock into the global keysock list.
634 		 */
635 		mutex_enter(&keysock_list_lock);
636 		ks->keysock_next = keysock_list;
637 		ks->keysock_ptpn = &keysock_list;
638 		if (keysock_list != NULL)
639 			keysock_list->keysock_ptpn = &ks->keysock_next;
640 		keysock_list = ks;
641 		mutex_exit(&keysock_list_lock);
642 
643 		qprocson(q);
644 		(void) mi_set_sth_hiwat(q, keysock_recv_hiwat);
645 		/*
646 		 * Wait outside the keysock module perimeter for IPsec
647 		 * plumbing to be completed.  If it fails, keysock_close()
648 		 * undoes everything we just did.
649 		 */
650 		if (!ipsec_loader_wait(q)) {
651 			(void) keysock_close(q);
652 			return (EPFNOSUPPORT);
653 		}
654 	}
655 
656 	return (0);
657 }
658 
659 /* BELOW THIS LINE ARE ROUTINES INCLUDING AND RELATED TO keysock_wput(). */
660 
661 /*
662  * Copy relevant state bits.
663  */
664 static void
665 keysock_copy_info(struct T_info_ack *tap, keysock_t *ks)
666 {
667 	*tap = keysock_g_t_info_ack;
668 	tap->CURRENT_state = ks->keysock_state;
669 	tap->OPT_size = keysock_max_optsize;
670 }
671 
672 /*
673  * This routine responds to T_CAPABILITY_REQ messages.  It is called by
674  * keysock_wput.  Much of the T_CAPABILITY_ACK information is copied from
675  * keysock_g_t_info_ack.  The current state of the stream is copied from
676  * keysock_state.
677  */
678 static void
679 keysock_capability_req(queue_t *q, mblk_t *mp)
680 {
681 	keysock_t *ks = (keysock_t *)q->q_ptr;
682 	t_uscalar_t cap_bits1;
683 	struct T_capability_ack	*tcap;
684 
685 	cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
686 
687 	mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
688 		mp->b_datap->db_type, T_CAPABILITY_ACK);
689 	if (mp == NULL)
690 		return;
691 
692 	tcap = (struct T_capability_ack *)mp->b_rptr;
693 	tcap->CAP_bits1 = 0;
694 
695 	if (cap_bits1 & TC1_INFO) {
696 		keysock_copy_info(&tcap->INFO_ack, ks);
697 		tcap->CAP_bits1 |= TC1_INFO;
698 	}
699 
700 	qreply(q, mp);
701 }
702 
703 /*
704  * This routine responds to T_INFO_REQ messages. It is called by
705  * keysock_wput_other.
706  * Most of the T_INFO_ACK information is copied from keysock_g_t_info_ack.
707  * The current state of the stream is copied from keysock_state.
708  */
709 static void
710 keysock_info_req(q, mp)
711 	queue_t	*q;
712 	mblk_t	*mp;
713 {
714 	mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO,
715 	    T_INFO_ACK);
716 	if (mp == NULL)
717 		return;
718 	keysock_copy_info((struct T_info_ack *)mp->b_rptr,
719 	    (keysock_t *)q->q_ptr);
720 	qreply(q, mp);
721 }
722 
723 /*
724  * keysock_err_ack. This routine creates a
725  * T_ERROR_ACK message and passes it
726  * upstream.
727  */
728 static void
729 keysock_err_ack(q, mp, t_error, sys_error)
730 	queue_t	*q;
731 	mblk_t	*mp;
732 	int	t_error;
733 	int	sys_error;
734 {
735 	if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL)
736 		qreply(q, mp);
737 }
738 
739 /*
740  * This routine retrieves the current status of socket options.
741  * It returns the size of the option retrieved.
742  */
743 /* ARGSUSED */
744 int
745 keysock_opt_get(queue_t *q, int level, int name, uchar_t *ptr)
746 {
747 	int *i1 = (int *)ptr;
748 	keysock_t *ks = (keysock_t *)q->q_ptr;
749 
750 	switch (level) {
751 	case SOL_SOCKET:
752 		mutex_enter(&ks->keysock_lock);
753 		switch (name) {
754 		case SO_TYPE:
755 			*i1 = SOCK_RAW;
756 			break;
757 		case SO_USELOOPBACK:
758 			*i1 = (int)(!((ks->keysock_flags & KEYSOCK_NOLOOP) ==
759 			    KEYSOCK_NOLOOP));
760 			break;
761 		/*
762 		 * The following two items can be manipulated,
763 		 * but changing them should do nothing.
764 		 */
765 		case SO_SNDBUF:
766 			*i1 = (int)q->q_hiwat;
767 			break;
768 		case SO_RCVBUF:
769 			*i1 = (int)(RD(q)->q_hiwat);
770 			break;
771 		}
772 		mutex_exit(&ks->keysock_lock);
773 		break;
774 	default:
775 		return (0);
776 	}
777 	return (sizeof (int));
778 }
779 
780 /*
781  * This routine sets socket options.
782  */
783 /* ARGSUSED */
784 int
785 keysock_opt_set(queue_t *q, uint_t mgmt_flags, int level,
786     int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp,
787     uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk)
788 {
789 	int *i1 = (int *)invalp;
790 	keysock_t *ks = (keysock_t *)q->q_ptr;
791 
792 	switch (level) {
793 	case SOL_SOCKET:
794 		mutex_enter(&ks->keysock_lock);
795 		switch (name) {
796 		case SO_USELOOPBACK:
797 			if (!(*i1))
798 				ks->keysock_flags |= KEYSOCK_NOLOOP;
799 			else ks->keysock_flags &= ~KEYSOCK_NOLOOP;
800 			break;
801 		case SO_SNDBUF:
802 			if (*i1 > keysock_max_buf)
803 				return (ENOBUFS);
804 			q->q_hiwat = *i1;
805 			break;
806 		case SO_RCVBUF:
807 			if (*i1 > keysock_max_buf)
808 				return (ENOBUFS);
809 			RD(q)->q_hiwat = *i1;
810 			(void) mi_set_sth_hiwat(RD(q), *i1);
811 			break;
812 		}
813 		mutex_exit(&ks->keysock_lock);
814 		break;
815 	}
816 	return (0);
817 }
818 
819 /*
820  * Handle STREAMS messages.
821  */
822 static void
823 keysock_wput_other(queue_t *q, mblk_t *mp)
824 {
825 	struct iocblk *iocp;
826 	int error;
827 
828 	switch (mp->b_datap->db_type) {
829 	case M_PROTO:
830 	case M_PCPROTO:
831 		if ((mp->b_wptr - mp->b_rptr) < sizeof (long)) {
832 			ks3dbg((
833 			    "keysock_wput_other: Not big enough M_PROTO\n"));
834 			freemsg(mp);
835 			return;
836 		}
837 		switch (((union T_primitives *)mp->b_rptr)->type) {
838 		case T_CAPABILITY_REQ:
839 			keysock_capability_req(q, mp);
840 			return;
841 		case T_INFO_REQ:
842 			keysock_info_req(q, mp);
843 			return;
844 		case T_SVR4_OPTMGMT_REQ:
845 			(void) svr4_optcom_req(q, mp, DB_CREDDEF(mp, kcred),
846 			    &keysock_opt_obj);
847 			return;
848 		case T_OPTMGMT_REQ:
849 			(void) tpi_optcom_req(q, mp, DB_CREDDEF(mp, kcred),
850 			    &keysock_opt_obj);
851 			return;
852 		case T_DATA_REQ:
853 		case T_EXDATA_REQ:
854 		case T_ORDREL_REQ:
855 			/* Illegal for keysock. */
856 			freemsg(mp);
857 			(void) putnextctl1(RD(q), M_ERROR, EPROTO);
858 			return;
859 		default:
860 			/* Not supported by keysock. */
861 			keysock_err_ack(q, mp, TNOTSUPPORT, 0);
862 			return;
863 		}
864 	case M_IOCTL:
865 		iocp = (struct iocblk *)mp->b_rptr;
866 		error = EINVAL;
867 
868 		switch (iocp->ioc_cmd) {
869 		case ND_SET:
870 		case ND_GET:
871 			if (nd_getset(q, keysock_g_nd, mp)) {
872 				qreply(q, mp);
873 				return;
874 			} else
875 				error = ENOENT;
876 			/* FALLTHRU */
877 		default:
878 			miocnak(q, mp, 0, error);
879 			return;
880 		}
881 	case M_FLUSH:
882 		if (*mp->b_rptr & FLUSHW) {
883 			flushq(q, FLUSHALL);
884 			*mp->b_rptr &= ~FLUSHW;
885 		}
886 		if (*mp->b_rptr & FLUSHR) {
887 			qreply(q, mp);
888 			return;
889 		}
890 		/* Else FALLTHRU */
891 	}
892 
893 	/* If fell through, just black-hole the message. */
894 	freemsg(mp);
895 }
896 
897 /*
898  * Transmit a PF_KEY error message to the instance either pointed to
899  * by ks, the instance with serial number serial, or more, depending.
900  *
901  * The faulty message (or a reasonable facsimile thereof) is in mp.
902  * This function will free mp or recycle it for delivery, thereby causing
903  * the stream head to free it.
904  */
905 static void
906 keysock_error(keysock_t *ks, mblk_t *mp, int error, int diagnostic)
907 {
908 	sadb_msg_t *samsg = (sadb_msg_t *)mp->b_rptr;
909 
910 	ASSERT(mp->b_datap->db_type == M_DATA);
911 
912 	if (samsg->sadb_msg_type < SADB_GETSPI ||
913 	    samsg->sadb_msg_type > SADB_MAX)
914 		samsg->sadb_msg_type = SADB_RESERVED;
915 
916 	/*
917 	 * Strip out extension headers.
918 	 */
919 	ASSERT(mp->b_rptr + sizeof (*samsg) <= mp->b_datap->db_lim);
920 	mp->b_wptr = mp->b_rptr + sizeof (*samsg);
921 	samsg->sadb_msg_len = SADB_8TO64(sizeof (sadb_msg_t));
922 	samsg->sadb_msg_errno = (uint8_t)error;
923 	samsg->sadb_x_msg_diagnostic = (uint16_t)diagnostic;
924 
925 	keysock_passup(mp, samsg, ks->keysock_serial, NULL, B_FALSE);
926 }
927 
928 /*
929  * Pass down a message to a consumer.  Wrap it in KEYSOCK_IN, and copy
930  * in the extv if passed in.
931  */
932 static void
933 keysock_passdown(keysock_t *ks, mblk_t *mp, uint8_t satype, sadb_ext_t *extv[],
934     boolean_t flushmsg)
935 {
936 	keysock_consumer_t *kc;
937 	mblk_t *wrapper;
938 	keysock_in_t *ksi;
939 	int i;
940 
941 	wrapper = allocb(sizeof (ipsec_info_t), BPRI_HI);
942 	if (wrapper == NULL) {
943 		ks3dbg(("keysock_passdown: allocb failed.\n"));
944 		if (extv[SADB_EXT_KEY_ENCRYPT] != NULL)
945 			bzero(extv[SADB_EXT_KEY_ENCRYPT],
946 			    SADB_64TO8(
947 				extv[SADB_EXT_KEY_ENCRYPT]->sadb_ext_len));
948 		if (extv[SADB_EXT_KEY_AUTH] != NULL)
949 			bzero(extv[SADB_EXT_KEY_AUTH],
950 			    SADB_64TO8(
951 				extv[SADB_EXT_KEY_AUTH]->sadb_ext_len));
952 		if (flushmsg) {
953 			ks0dbg((
954 			    "keysock: Downwards flush/dump message failed!\n"));
955 			/* If this is true, I hold the perimeter. */
956 			keysock_flushdump--;
957 		}
958 		freemsg(mp);
959 		return;
960 	}
961 
962 	wrapper->b_datap->db_type = M_CTL;
963 	ksi = (keysock_in_t *)wrapper->b_rptr;
964 	ksi->ks_in_type = KEYSOCK_IN;
965 	ksi->ks_in_len = sizeof (keysock_in_t);
966 	if (extv[SADB_EXT_ADDRESS_SRC] != NULL)
967 		ksi->ks_in_srctype = KS_IN_ADDR_UNKNOWN;
968 	else ksi->ks_in_srctype = KS_IN_ADDR_NOTTHERE;
969 	if (extv[SADB_EXT_ADDRESS_DST] != NULL)
970 		ksi->ks_in_dsttype = KS_IN_ADDR_UNKNOWN;
971 	else ksi->ks_in_dsttype = KS_IN_ADDR_NOTTHERE;
972 	if (extv[SADB_EXT_ADDRESS_PROXY] != NULL)
973 		ksi->ks_in_proxytype = KS_IN_ADDR_UNKNOWN;
974 	else ksi->ks_in_proxytype = KS_IN_ADDR_NOTTHERE;
975 	for (i = 0; i <= SADB_EXT_MAX; i++)
976 		ksi->ks_in_extv[i] = extv[i];
977 	ksi->ks_in_serial = ks->keysock_serial;
978 	wrapper->b_wptr += sizeof (ipsec_info_t);
979 	wrapper->b_cont = mp;
980 
981 	/*
982 	 * Find the appropriate consumer where the message is passed down.
983 	 */
984 	kc = keysock_consumers[satype];
985 	if (kc == NULL) {
986 		freeb(wrapper);
987 		keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_UNKNOWN_SATYPE);
988 		if (flushmsg) {
989 			ks0dbg((
990 			    "keysock: Downwards flush/dump message failed!\n"));
991 			/* If this is true, I hold the perimeter. */
992 			keysock_flushdump--;
993 		}
994 		return;
995 	}
996 
997 	/*
998 	 * NOTE: There used to be code in here to spin while a flush or
999 	 *	 dump finished.  Keysock now assumes that consumers have enough
1000 	 *	 MT-savviness to deal with that.
1001 	 */
1002 
1003 	/*
1004 	 * Current consumers (AH and ESP) are guaranteed to return a
1005 	 * FLUSH or DUMP message back, so when we reach here, we don't
1006 	 * have to worry about keysock_flushdumps.
1007 	 */
1008 
1009 	putnext(kc->kc_wq, wrapper);
1010 }
1011 
1012 /*
1013  * High-level reality checking of extensions.
1014  */
1015 static boolean_t
1016 ext_check(sadb_ext_t *ext)
1017 {
1018 	int i;
1019 	uint64_t *lp;
1020 	sadb_ident_t *id;
1021 	char *idstr;
1022 
1023 	switch (ext->sadb_ext_type) {
1024 	case SADB_EXT_ADDRESS_SRC:
1025 	case SADB_EXT_ADDRESS_DST:
1026 	case SADB_EXT_ADDRESS_PROXY:
1027 		/* Check for at least enough addtl length for a sockaddr. */
1028 		if (ext->sadb_ext_len <= SADB_8TO64(sizeof (sadb_address_t)))
1029 			return (B_FALSE);
1030 		break;
1031 	case SADB_EXT_LIFETIME_HARD:
1032 	case SADB_EXT_LIFETIME_SOFT:
1033 	case SADB_EXT_LIFETIME_CURRENT:
1034 		if (ext->sadb_ext_len != SADB_8TO64(sizeof (sadb_lifetime_t)))
1035 			return (B_FALSE);
1036 		break;
1037 	case SADB_EXT_SPIRANGE:
1038 		/* See if the SPI range is legit. */
1039 		if (htonl(((sadb_spirange_t *)ext)->sadb_spirange_min) >
1040 		    htonl(((sadb_spirange_t *)ext)->sadb_spirange_max))
1041 			return (B_FALSE);
1042 		break;
1043 	case SADB_EXT_KEY_AUTH:
1044 	case SADB_EXT_KEY_ENCRYPT:
1045 		/* Key length check. */
1046 		if (((sadb_key_t *)ext)->sadb_key_bits == 0)
1047 			return (B_FALSE);
1048 		/*
1049 		 * Check to see if the key length (in bits) is less than the
1050 		 * extension length (in 8-bits words).
1051 		 */
1052 		if ((roundup(SADB_1TO8(((sadb_key_t *)ext)->sadb_key_bits), 8) +
1053 		    sizeof (sadb_key_t)) != SADB_64TO8(ext->sadb_ext_len)) {
1054 			ks1dbg((
1055 			    "ext_check:  Key bits/length inconsistent.\n"));
1056 			ks1dbg(("%d bits, len is %d bytes.\n",
1057 			    ((sadb_key_t *)ext)->sadb_key_bits,
1058 			    SADB_64TO8(ext->sadb_ext_len)));
1059 			return (B_FALSE);
1060 		}
1061 
1062 		/* All-zeroes key check. */
1063 		lp = (uint64_t *)(((char *)ext) + sizeof (sadb_key_t));
1064 		for (i = 0;
1065 		    i < (ext->sadb_ext_len - SADB_8TO64(sizeof (sadb_key_t)));
1066 		    i++)
1067 			if (lp[i] != 0)
1068 				break;	/* Out of for loop. */
1069 		/* If finished the loop naturally, it's an all zero key. */
1070 		if (lp[i] == 0)
1071 			return (B_FALSE);
1072 		break;
1073 	case SADB_EXT_IDENTITY_SRC:
1074 	case SADB_EXT_IDENTITY_DST:
1075 		/*
1076 		 * Make sure the strings in these identities are
1077 		 * null-terminated.  RFC 2367 underspecified how to handle
1078 		 * such a case.  I "proactively" null-terminate the string
1079 		 * at the last byte if it's not terminated sooner.
1080 		 */
1081 		id = (sadb_ident_t *)ext;
1082 		i = SADB_64TO8(id->sadb_ident_len);
1083 		i -= sizeof (sadb_ident_t);
1084 		idstr = (char *)(id + 1);
1085 		while (*idstr != '\0' && i > 0) {
1086 			i--;
1087 			idstr++;
1088 		}
1089 		if (i == 0) {
1090 			/*
1091 			 * I.e., if the bozo user didn't NULL-terminate the
1092 			 * string...
1093 			 */
1094 			idstr--;
1095 			*idstr = '\0';
1096 		}
1097 		break;
1098 	}
1099 	return (B_TRUE);	/* For now... */
1100 }
1101 
1102 /* Return values for keysock_get_ext(). */
1103 #define	KGE_OK	0
1104 #define	KGE_DUP	1
1105 #define	KGE_UNK	2
1106 #define	KGE_LEN	3
1107 #define	KGE_CHK	4
1108 
1109 /*
1110  * Parse basic extension headers and return in the passed-in pointer vector.
1111  * Return values include:
1112  *
1113  *	KGE_OK	Everything's nice and parsed out.
1114  *		If there are no extensions, place NULL in extv[0].
1115  *	KGE_DUP	There is a duplicate extension.
1116  *		First instance in appropriate bin.  First duplicate in
1117  *		extv[0].
1118  *	KGE_UNK	Unknown extension type encountered.  extv[0] contains
1119  *		unknown header.
1120  *	KGE_LEN	Extension length error.
1121  *	KGE_CHK	High-level reality check failed on specific extension.
1122  *
1123  * My apologies for some of the pointer arithmetic in here.  I'm thinking
1124  * like an assembly programmer, yet trying to make the compiler happy.
1125  */
1126 static int
1127 keysock_get_ext(sadb_ext_t *extv[], sadb_msg_t *basehdr, uint_t msgsize)
1128 {
1129 	bzero(extv, sizeof (sadb_ext_t *) * (SADB_EXT_MAX + 1));
1130 
1131 	/* Use extv[0] as the "current working pointer". */
1132 
1133 	extv[0] = (sadb_ext_t *)(basehdr + 1);
1134 
1135 	while (extv[0] < (sadb_ext_t *)(((uint8_t *)basehdr) + msgsize)) {
1136 		/* Check for unknown headers. */
1137 		if (extv[0]->sadb_ext_type == 0 ||
1138 		    extv[0]->sadb_ext_type > SADB_EXT_MAX)
1139 			return (KGE_UNK);
1140 
1141 		/*
1142 		 * Check length.  Use uint64_t because extlen is in units
1143 		 * of 64-bit words.  If length goes beyond the msgsize,
1144 		 * return an error.  (Zero length also qualifies here.)
1145 		 */
1146 		if (extv[0]->sadb_ext_len == 0 ||
1147 		    (void *)((uint64_t *)extv[0] + extv[0]->sadb_ext_len) >
1148 		    (void *)((uint8_t *)basehdr + msgsize))
1149 			return (KGE_LEN);
1150 
1151 		/* Check for redundant headers. */
1152 		if (extv[extv[0]->sadb_ext_type] != NULL)
1153 			return (KGE_DUP);
1154 
1155 		/*
1156 		 * Reality check the extension if possible at the keysock
1157 		 * level.
1158 		 */
1159 		if (!ext_check(extv[0]))
1160 			return (KGE_CHK);
1161 
1162 		/* If I make it here, assign the appropriate bin. */
1163 		extv[extv[0]->sadb_ext_type] = extv[0];
1164 
1165 		/* Advance pointer (See above for uint64_t ptr reasoning.) */
1166 		extv[0] = (sadb_ext_t *)
1167 		    ((uint64_t *)extv[0] + extv[0]->sadb_ext_len);
1168 	}
1169 
1170 	/* Everything's cool. */
1171 
1172 	/*
1173 	 * If extv[0] == NULL, then there are no extension headers in this
1174 	 * message.  Ensure that this is the case.
1175 	 */
1176 	if (extv[0] == (sadb_ext_t *)(basehdr + 1))
1177 		extv[0] = NULL;
1178 
1179 	return (KGE_OK);
1180 }
1181 
1182 /*
1183  * qwriter() callback to handle flushes and dumps.  This routine will hold
1184  * the inner perimeter.
1185  */
1186 void
1187 keysock_do_flushdump(queue_t *q, mblk_t *mp)
1188 {
1189 	int i, start, finish;
1190 	mblk_t *mp1 = NULL;
1191 	keysock_t *ks = (keysock_t *)q->q_ptr;
1192 	sadb_ext_t *extv[SADB_EXT_MAX + 1];
1193 	sadb_msg_t *samsg = (sadb_msg_t *)mp->b_rptr;
1194 
1195 	/*
1196 	 * I am guaranteed this will work.  I did the work in keysock_parse()
1197 	 * already.
1198 	 */
1199 	(void) keysock_get_ext(extv, samsg, SADB_64TO8(samsg->sadb_msg_len));
1200 
1201 	/*
1202 	 * I hold the perimeter, therefore I don't need to use atomic ops.
1203 	 */
1204 	if (keysock_flushdump != 0) {
1205 		/* XXX Should I instead use EBUSY? */
1206 		/* XXX Or is there a way to queue these up? */
1207 		keysock_error(ks, mp, ENOMEM, SADB_X_DIAGNOSTIC_NONE);
1208 		return;
1209 	}
1210 
1211 	if (samsg->sadb_msg_satype == SADB_SATYPE_UNSPEC) {
1212 		start = 0;
1213 		finish = KEYSOCK_MAX_CONSUMERS - 1;
1214 	} else {
1215 		start = samsg->sadb_msg_satype;
1216 		finish = samsg->sadb_msg_satype;
1217 	}
1218 
1219 	/*
1220 	 * Fill up keysock_flushdump with the number of outstanding dumps
1221 	 * and/or flushes.
1222 	 */
1223 
1224 	keysock_flushdump_errno = 0;
1225 
1226 	/*
1227 	 * Okay, I hold the perimeter.  Eventually keysock_flushdump will
1228 	 * contain the number of consumers with outstanding flush operations.
1229 	 *
1230 	 * SO, here's the plan:
1231 	 *	* For each relevant consumer (Might be one, might be all)
1232 	 *		* Twiddle on the FLUSHING flag.
1233 	 *		* Pass down the FLUSH/DUMP message.
1234 	 *
1235 	 * When I see upbound FLUSH/DUMP messages, I will decrement the
1236 	 * keysock_flushdump.  When I decrement it to 0, I will pass the
1237 	 * FLUSH/DUMP message back up to the PF_KEY sockets.  Because I will
1238 	 * pass down the right SA type to the consumer (either its own, or
1239 	 * that of UNSPEC), the right one will be reflected from each consumer,
1240 	 * and accordingly back to the socket.
1241 	 */
1242 
1243 	mutex_enter(&keysock_consumers_lock);
1244 	for (i = start; i <= finish; i++) {
1245 		if (keysock_consumers[i] != NULL) {
1246 			mp1 = copymsg(mp);
1247 			if (mp1 == NULL) {
1248 				ks0dbg(("SADB_FLUSH copymsg() failed.\n"));
1249 				/*
1250 				 * Error?  And what about outstanding
1251 				 * flushes?  Oh, yeah, they get sucked up and
1252 				 * the counter is decremented.  Consumers
1253 				 * (see keysock_passdown()) are guaranteed
1254 				 * to deliver back a flush request, even if
1255 				 * it's an error.
1256 				 */
1257 				keysock_error(ks, mp, ENOMEM,
1258 				    SADB_X_DIAGNOSTIC_NONE);
1259 				return;
1260 			}
1261 			/*
1262 			 * Because my entry conditions are met above, the
1263 			 * following assertion should hold true.
1264 			 */
1265 			mutex_enter(&(keysock_consumers[i]->kc_lock));
1266 			ASSERT((keysock_consumers[i]->kc_flags & KC_FLUSHING)
1267 			    == 0);
1268 			keysock_consumers[i]->kc_flags |= KC_FLUSHING;
1269 			mutex_exit(&(keysock_consumers[i]->kc_lock));
1270 			/* Always increment the number of flushes... */
1271 			keysock_flushdump++;
1272 			/* Guaranteed to return a message. */
1273 			keysock_passdown(ks, mp1, i, extv, B_TRUE);
1274 		} else if (start == finish) {
1275 			/*
1276 			 * In case where start == finish, and there's no
1277 			 * consumer, should we force an error?  Yes.
1278 			 */
1279 			mutex_exit(&keysock_consumers_lock);
1280 			keysock_error(ks, mp, EINVAL,
1281 			    SADB_X_DIAGNOSTIC_UNKNOWN_SATYPE);
1282 			return;
1283 		}
1284 	}
1285 	mutex_exit(&keysock_consumers_lock);
1286 
1287 	if (keysock_flushdump == 0) {
1288 		/*
1289 		 * There were no consumers at all for this message.
1290 		 * XXX For now return ESRCH.
1291 		 */
1292 		keysock_error(ks, mp, ESRCH, SADB_X_DIAGNOSTIC_NO_SADBS);
1293 	} else {
1294 		/* Otherwise, free the original message. */
1295 		freemsg(mp);
1296 	}
1297 }
1298 
1299 /*
1300  * Get the right diagnostic for a duplicate.  Should probably use a static
1301  * table lookup.
1302  */
1303 int
1304 keysock_duplicate(int ext_type)
1305 {
1306 	int rc = 0;
1307 
1308 	switch (ext_type) {
1309 	case SADB_EXT_ADDRESS_SRC:
1310 		rc = SADB_X_DIAGNOSTIC_DUPLICATE_SRC;
1311 		break;
1312 	case SADB_EXT_ADDRESS_DST:
1313 		rc = SADB_X_DIAGNOSTIC_DUPLICATE_DST;
1314 		break;
1315 	case SADB_EXT_SA:
1316 		rc = SADB_X_DIAGNOSTIC_DUPLICATE_SA;
1317 		break;
1318 	case SADB_EXT_SPIRANGE:
1319 		rc = SADB_X_DIAGNOSTIC_DUPLICATE_RANGE;
1320 		break;
1321 	case SADB_EXT_KEY_AUTH:
1322 		rc = SADB_X_DIAGNOSTIC_DUPLICATE_AKEY;
1323 		break;
1324 	case SADB_EXT_KEY_ENCRYPT:
1325 		rc = SADB_X_DIAGNOSTIC_DUPLICATE_EKEY;
1326 		break;
1327 	}
1328 	return (rc);
1329 }
1330 
1331 /*
1332  * Get the right diagnostic for a reality check failure.  Should probably use
1333  * a static table lookup.
1334  */
1335 int
1336 keysock_malformed(int ext_type)
1337 {
1338 	int rc = 0;
1339 
1340 	switch (ext_type) {
1341 	case SADB_EXT_ADDRESS_SRC:
1342 		rc = SADB_X_DIAGNOSTIC_MALFORMED_SRC;
1343 		break;
1344 	case SADB_EXT_ADDRESS_DST:
1345 		rc = SADB_X_DIAGNOSTIC_MALFORMED_DST;
1346 		break;
1347 	case SADB_EXT_SA:
1348 		rc = SADB_X_DIAGNOSTIC_MALFORMED_SA;
1349 		break;
1350 	case SADB_EXT_SPIRANGE:
1351 		rc = SADB_X_DIAGNOSTIC_MALFORMED_RANGE;
1352 		break;
1353 	case SADB_EXT_KEY_AUTH:
1354 		rc = SADB_X_DIAGNOSTIC_MALFORMED_AKEY;
1355 		break;
1356 	case SADB_EXT_KEY_ENCRYPT:
1357 		rc = SADB_X_DIAGNOSTIC_MALFORMED_EKEY;
1358 		break;
1359 	}
1360 	return (rc);
1361 }
1362 
1363 /*
1364  * Keysock massaging of an inverse ACQUIRE.  Consult policy,
1365  * and construct an appropriate response.
1366  */
1367 static void
1368 keysock_inverse_acquire(mblk_t *mp, sadb_msg_t *samsg, sadb_ext_t *extv[],
1369     keysock_t *ks)
1370 {
1371 	mblk_t *reply_mp;
1372 
1373 	/*
1374 	 * Reality check things...
1375 	 */
1376 	if (extv[SADB_EXT_ADDRESS_SRC] == NULL) {
1377 		keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_MISSING_SRC);
1378 		return;
1379 	}
1380 	if (extv[SADB_EXT_ADDRESS_DST] == NULL) {
1381 		keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_MISSING_DST);
1382 	}
1383 
1384 	reply_mp = ipsec_construct_inverse_acquire(samsg, extv);
1385 
1386 	if (reply_mp != NULL) {
1387 		freemsg(mp);
1388 		keysock_passup(reply_mp, (sadb_msg_t *)reply_mp->b_rptr,
1389 		    ks->keysock_serial, NULL, B_FALSE);
1390 	} else {
1391 		keysock_error(ks, mp, samsg->sadb_msg_errno,
1392 		    samsg->sadb_x_msg_diagnostic);
1393 	}
1394 }
1395 
1396 /*
1397  * Spew an extended REGISTER down to the relevant consumers.
1398  */
1399 static void
1400 keysock_extended_register(keysock_t *ks, mblk_t *mp, sadb_ext_t *extv[])
1401 {
1402 	sadb_x_ereg_t *ereg = (sadb_x_ereg_t *)extv[SADB_X_EXT_EREG];
1403 	uint8_t *satypes, *fencepost;
1404 	mblk_t *downmp;
1405 	sadb_ext_t *downextv[SADB_EXT_MAX + 1];
1406 
1407 	if (ks->keysock_registered[0] != 0 || ks->keysock_registered[1] != 0 ||
1408 	    ks->keysock_registered[2] != 0 || ks->keysock_registered[3] != 0) {
1409 		keysock_error(ks, mp, EBUSY, 0);
1410 	}
1411 
1412 	ks->keysock_flags |= KEYSOCK_EXTENDED;
1413 	if (ereg == NULL) {
1414 		keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_SATYPE_NEEDED);
1415 	} else {
1416 		ASSERT(mp->b_rptr + msgdsize(mp) == mp->b_wptr);
1417 		fencepost = (uint8_t *)mp->b_wptr;
1418 		satypes = ereg->sadb_x_ereg_satypes;
1419 		while (*satypes != SADB_SATYPE_UNSPEC && satypes != fencepost) {
1420 			downmp = copymsg(mp);
1421 			if (downmp == NULL) {
1422 				keysock_error(ks, mp, ENOMEM, 0);
1423 				return;
1424 			}
1425 			/*
1426 			 * Since we've made it here, keysock_get_ext will work!
1427 			 */
1428 			(void) keysock_get_ext(downextv,
1429 			    (sadb_msg_t *)downmp->b_rptr, msgdsize(downmp));
1430 			keysock_passdown(ks, downmp, *satypes, downextv,
1431 			    B_FALSE);
1432 			++satypes;
1433 		}
1434 		freemsg(mp);
1435 	}
1436 
1437 	/*
1438 	 * Set global to indicate we prefer an extended ACQUIRE.
1439 	 */
1440 	atomic_add_32(&keysock_num_extended, 1);
1441 }
1442 
1443 /*
1444  * Handle PF_KEY messages.
1445  */
1446 static void
1447 keysock_parse(queue_t *q, mblk_t *mp)
1448 {
1449 	sadb_msg_t *samsg;
1450 	sadb_ext_t *extv[SADB_EXT_MAX + 1];
1451 	keysock_t *ks = (keysock_t *)q->q_ptr;
1452 	uint_t msgsize;
1453 	uint8_t satype;
1454 
1455 	/* Make sure I'm a PF_KEY socket.  (i.e. nothing's below me) */
1456 	ASSERT(WR(q)->q_next == NULL);
1457 
1458 	samsg = (sadb_msg_t *)mp->b_rptr;
1459 	ks2dbg(("Received possible PF_KEY message, type %d.\n",
1460 	    samsg->sadb_msg_type));
1461 
1462 	msgsize = SADB_64TO8(samsg->sadb_msg_len);
1463 
1464 	if (msgdsize(mp) != msgsize) {
1465 		/*
1466 		 * Message len incorrect w.r.t. actual size.  Send an error
1467 		 * (EMSGSIZE).	It may be necessary to massage things a
1468 		 * bit.	 For example, if the sadb_msg_type is hosed,
1469 		 * I need to set it to SADB_RESERVED to get delivery to
1470 		 * do the right thing.	Then again, maybe just letting
1471 		 * the error delivery do the right thing.
1472 		 */
1473 		ks2dbg(("mblk (%lu) and base (%d) message sizes don't jibe.\n",
1474 		    msgdsize(mp), msgsize));
1475 		keysock_error(ks, mp, EMSGSIZE, SADB_X_DIAGNOSTIC_NONE);
1476 		return;
1477 	}
1478 
1479 	if (msgsize > (uint_t)(mp->b_wptr - mp->b_rptr)) {
1480 		/* Get all message into one mblk. */
1481 		if (pullupmsg(mp, -1) == 0) {
1482 			/*
1483 			 * Something screwy happened.
1484 			 */
1485 			ks3dbg(("keysock_parse: pullupmsg() failed.\n"));
1486 			return;
1487 		} else {
1488 			samsg = (sadb_msg_t *)mp->b_rptr;
1489 		}
1490 	}
1491 
1492 	switch (keysock_get_ext(extv, samsg, msgsize)) {
1493 	case KGE_DUP:
1494 		/* Handle duplicate extension. */
1495 		ks1dbg(("Got duplicate extension of type %d.\n",
1496 		    extv[0]->sadb_ext_type));
1497 		keysock_error(ks, mp, EINVAL,
1498 		    keysock_duplicate(extv[0]->sadb_ext_type));
1499 		return;
1500 	case KGE_UNK:
1501 		/* Handle unknown extension. */
1502 		ks1dbg(("Got unknown extension of type %d.\n",
1503 		    extv[0]->sadb_ext_type));
1504 		keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_UNKNOWN_EXT);
1505 		return;
1506 	case KGE_LEN:
1507 		/* Length error. */
1508 		ks1dbg(("Length %d on extension type %d overrun or 0.\n",
1509 		    extv[0]->sadb_ext_len, extv[0]->sadb_ext_type));
1510 		keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_BAD_EXTLEN);
1511 		return;
1512 	case KGE_CHK:
1513 		/* Reality check failed. */
1514 		ks1dbg(("Reality check failed on extension type %d.\n",
1515 		    extv[0]->sadb_ext_type));
1516 		keysock_error(ks, mp, EINVAL,
1517 		    keysock_malformed(extv[0]->sadb_ext_type));
1518 		return;
1519 	default:
1520 		/* Default case is no errors. */
1521 		break;
1522 	}
1523 
1524 	switch (samsg->sadb_msg_type) {
1525 	case SADB_REGISTER:
1526 		/*
1527 		 * There's a semantic weirdness in that a message OTHER than
1528 		 * the return REGISTER message may be passed up if I set the
1529 		 * registered bit BEFORE I pass it down.
1530 		 *
1531 		 * SOOOO, I'll not twiddle any registered bits until I see
1532 		 * the upbound REGISTER (with a serial number in it).
1533 		 */
1534 		if (samsg->sadb_msg_satype == SADB_SATYPE_UNSPEC) {
1535 			/* Handle extended register here. */
1536 			keysock_extended_register(ks, mp, extv);
1537 			return;
1538 		} else if (ks->keysock_flags & KEYSOCK_EXTENDED) {
1539 			keysock_error(ks, mp, EBUSY, 0);
1540 			return;
1541 		}
1542 		/* FALLTHRU */
1543 	case SADB_GETSPI:
1544 	case SADB_ADD:
1545 	case SADB_UPDATE:
1546 	case SADB_DELETE:
1547 	case SADB_GET:
1548 		/*
1549 		 * Pass down to appropriate consumer.
1550 		 */
1551 		if (samsg->sadb_msg_satype != SADB_SATYPE_UNSPEC)
1552 			keysock_passdown(ks, mp, samsg->sadb_msg_satype, extv,
1553 			    B_FALSE);
1554 		else keysock_error(ks, mp, EINVAL,
1555 		    SADB_X_DIAGNOSTIC_SATYPE_NEEDED);
1556 		return;
1557 	case SADB_ACQUIRE:
1558 		/*
1559 		 * If I _receive_ an acquire, this means I should spread it
1560 		 * out to registered sockets.  Unless there's an errno...
1561 		 *
1562 		 * Need ADDRESS, may have ID, SENS, and PROP, unless errno,
1563 		 * in which case there should be NO extensions.
1564 		 *
1565 		 * Return to registered.
1566 		 */
1567 		if (samsg->sadb_msg_errno != 0) {
1568 			satype = samsg->sadb_msg_satype;
1569 			if (satype == SADB_SATYPE_UNSPEC) {
1570 				if (!(ks->keysock_flags & KEYSOCK_EXTENDED)) {
1571 					keysock_error(ks, mp, EINVAL,
1572 					    SADB_X_DIAGNOSTIC_SATYPE_NEEDED);
1573 					return;
1574 				}
1575 				/*
1576 				 * Reassign satype based on the first
1577 				 * flags that KEYSOCK_SETREG says.
1578 				 */
1579 				while (satype <= SADB_SATYPE_MAX) {
1580 					if (KEYSOCK_ISREG(ks, satype))
1581 						break;
1582 					satype++;
1583 				}
1584 				if (satype > SADB_SATYPE_MAX) {
1585 					keysock_error(ks, mp, EBUSY, 0);
1586 					return;
1587 				}
1588 			}
1589 			keysock_passdown(ks, mp, satype, extv, B_FALSE);
1590 		} else {
1591 			if (samsg->sadb_msg_satype == SADB_SATYPE_UNSPEC)
1592 				keysock_error(ks, mp, EINVAL,
1593 				    SADB_X_DIAGNOSTIC_SATYPE_NEEDED);
1594 			else
1595 				keysock_passup(mp, samsg, 0, NULL, B_FALSE);
1596 		}
1597 		return;
1598 	case SADB_EXPIRE:
1599 		/*
1600 		 * If someone sends this in, then send out to all senders.
1601 		 * (Save maybe ESP or AH, I have to be careful here.)
1602 		 *
1603 		 * Need ADDRESS, may have ID and SENS.
1604 		 *
1605 		 * XXX for now this is unsupported.
1606 		 */
1607 		break;
1608 	case SADB_FLUSH:
1609 	case SADB_DUMP:	 /* not used by normal applications */
1610 		/*
1611 		 * Nuke all SAs, or dump out the whole SA table to sender only.
1612 		 *
1613 		 * No extensions at all.  Return to all listeners.
1614 		 *
1615 		 * Question:	Should I hold a lock here to prevent
1616 		 *		additions/deletions while flushing?
1617 		 * Answer:	No.  (See keysock_passdown() for details.)
1618 		 */
1619 		if (extv[0] != NULL) {
1620 			/*
1621 			 * FLUSH or DUMP messages shouldn't have extensions.
1622 			 * Return EINVAL.
1623 			 */
1624 			ks2dbg(("FLUSH message with extension.\n"));
1625 			keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_NO_EXT);
1626 			return;
1627 		}
1628 
1629 		/* Passing down of DUMP/FLUSH messages are special. */
1630 		qwriter(q, mp, keysock_do_flushdump, PERIM_INNER);
1631 		return;
1632 	case SADB_X_PROMISC:
1633 		/*
1634 		 * Promiscuous processing message.
1635 		 */
1636 		if (samsg->sadb_msg_satype == 0)
1637 			ks->keysock_flags &= ~KEYSOCK_PROMISC;
1638 		else
1639 			ks->keysock_flags |= KEYSOCK_PROMISC;
1640 		keysock_passup(mp, samsg, ks->keysock_serial, NULL, B_FALSE);
1641 		return;
1642 	case SADB_X_INVERSE_ACQUIRE:
1643 		keysock_inverse_acquire(mp, samsg, extv, ks);
1644 		return;
1645 	default:
1646 		ks2dbg(("Got unknown message type %d.\n",
1647 		    samsg->sadb_msg_type));
1648 		keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_UNKNOWN_MSG);
1649 		return;
1650 	}
1651 
1652 	/* As a placeholder... */
1653 	ks0dbg(("keysock_parse():  Hit EOPNOTSUPP\n"));
1654 	keysock_error(ks, mp, EOPNOTSUPP, SADB_X_DIAGNOSTIC_NONE);
1655 }
1656 
1657 /*
1658  * wput routing for PF_KEY/keysock/whatever.  Unlike the routing socket,
1659  * I don't convert to ioctl()'s for IP.  I am the end-all driver as far
1660  * as PF_KEY sockets are concerned.  I do some conversion, but not as much
1661  * as IP/rts does.
1662  */
1663 static void
1664 keysock_wput(queue_t *q, mblk_t *mp)
1665 {
1666 	uchar_t *rptr = mp->b_rptr;
1667 	mblk_t *mp1;
1668 
1669 	ks3dbg(("In keysock_wput\n"));
1670 
1671 	if (WR(q)->q_next) {
1672 		keysock_consumer_t *kc = (keysock_consumer_t *)q->q_ptr;
1673 
1674 		/*
1675 		 * We shouldn't get writes on a consumer instance.
1676 		 * But for now, just passthru.
1677 		 */
1678 		ks1dbg(("Huh?  wput for an consumer instance (%d)?\n",
1679 		    kc->kc_sa_type));
1680 		putnext(q, mp);
1681 		return;
1682 	}
1683 
1684 	switch (mp->b_datap->db_type) {
1685 	case M_DATA:
1686 		/*
1687 		 * Silently discard.
1688 		 */
1689 		ks2dbg(("raw M_DATA in keysock.\n"));
1690 		freemsg(mp);
1691 		return;
1692 	case M_PROTO:
1693 	case M_PCPROTO:
1694 		if ((mp->b_wptr - rptr) >= sizeof (struct T_data_req)) {
1695 			if (((union T_primitives *)rptr)->type == T_DATA_REQ) {
1696 				if ((mp1 = mp->b_cont) == NULL) {
1697 					/* No data after T_DATA_REQ. */
1698 					ks2dbg(("No data after DATA_REQ.\n"));
1699 					freemsg(mp);
1700 					return;
1701 				}
1702 				freeb(mp);
1703 				mp = mp1;
1704 				ks2dbg(("T_DATA_REQ\n"));
1705 				break;	/* Out of switch. */
1706 			}
1707 		}
1708 		/* FALLTHRU */
1709 	default:
1710 		ks3dbg(("In default wput case (%d %d).\n",
1711 		    mp->b_datap->db_type, ((union T_primitives *)rptr)->type));
1712 		keysock_wput_other(q, mp);
1713 		return;
1714 	}
1715 
1716 	/* I now have a PF_KEY message in an M_DATA block, pointed to by mp. */
1717 	keysock_parse(q, mp);
1718 }
1719 
1720 /* BELOW THIS LINE ARE ROUTINES INCLUDING AND RELATED TO keysock_rput(). */
1721 
1722 /*
1723  * Called upon receipt of a KEYSOCK_HELLO_ACK to set up the appropriate
1724  * state vectors.
1725  */
1726 static void
1727 keysock_link_consumer(uint8_t satype, keysock_consumer_t *kc)
1728 {
1729 	keysock_t *ks;
1730 
1731 	mutex_enter(&keysock_consumers_lock);
1732 	mutex_enter(&kc->kc_lock);
1733 	if (keysock_consumers[satype] != NULL) {
1734 		ks0dbg((
1735 		    "Hmmmm, someone closed %d before the HELLO_ACK happened.\n",
1736 		    satype));
1737 		/*
1738 		 * Perhaps updating the new below-me consumer with what I have
1739 		 * so far would work too?
1740 		 */
1741 		mutex_exit(&kc->kc_lock);
1742 		mutex_exit(&keysock_consumers_lock);
1743 	} else {
1744 		/* Add new below-me consumer. */
1745 		keysock_consumers[satype] = kc;
1746 
1747 		kc->kc_flags = 0;
1748 		kc->kc_sa_type = satype;
1749 		mutex_exit(&kc->kc_lock);
1750 		mutex_exit(&keysock_consumers_lock);
1751 
1752 		/* Scan the keysock list. */
1753 		mutex_enter(&keysock_list_lock);
1754 		for (ks = keysock_list; ks != NULL; ks = ks->keysock_next) {
1755 			if (KEYSOCK_ISREG(ks, satype)) {
1756 				/*
1757 				 * XXX Perhaps send an SADB_REGISTER down on
1758 				 * the socket's behalf.
1759 				 */
1760 				ks1dbg(("Socket %u registered already for "
1761 				    "new consumer.\n", ks->keysock_serial));
1762 			}
1763 		}
1764 		mutex_exit(&keysock_list_lock);
1765 	}
1766 }
1767 
1768 /*
1769  * Generate a KEYSOCK_OUT_ERR message for my consumer.
1770  */
1771 static void
1772 keysock_out_err(keysock_consumer_t *kc, int ks_errno, mblk_t *mp)
1773 {
1774 	keysock_out_err_t *kse;
1775 	mblk_t *imp;
1776 
1777 	imp = allocb(sizeof (ipsec_info_t), BPRI_HI);
1778 	if (imp == NULL) {
1779 		ks1dbg(("keysock_out_err:  Can't alloc message.\n"));
1780 		return;
1781 	}
1782 
1783 	imp->b_datap->db_type = M_CTL;
1784 	imp->b_wptr += sizeof (ipsec_info_t);
1785 
1786 	kse = (keysock_out_err_t *)imp->b_rptr;
1787 	imp->b_cont = mp;
1788 	kse->ks_err_type = KEYSOCK_OUT_ERR;
1789 	kse->ks_err_len = sizeof (*kse);
1790 	/* Is serial necessary? */
1791 	kse->ks_err_serial = 0;
1792 	kse->ks_err_errno = ks_errno;
1793 
1794 	/*
1795 	 * XXX What else do I need to do here w.r.t. information
1796 	 * to tell the consumer what caused this error?
1797 	 *
1798 	 * I believe the answer is the PF_KEY ACQUIRE (or other) message
1799 	 * attached in mp, which is appended at the end.  I believe the
1800 	 * db_ref won't matter here, because the PF_KEY message is only read
1801 	 * for KEYSOCK_OUT_ERR.
1802 	 */
1803 
1804 	putnext(kc->kc_wq, imp);
1805 }
1806 
1807 /* XXX this is a hack errno. */
1808 #define	EIPSECNOSA 255
1809 
1810 /*
1811  * Route message (pointed by mp, header in samsg) toward appropriate
1812  * sockets.  Assume the message's creator did its job correctly.
1813  *
1814  * This should be a function that is followed by a return in its caller.
1815  * The compiler _should_ be able to use tail-call optimizations to make the
1816  * large ## of parameters not a huge deal.
1817  */
1818 static void
1819 keysock_passup(mblk_t *mp, sadb_msg_t *samsg, minor_t serial,
1820     keysock_consumer_t *kc, boolean_t persistent)
1821 {
1822 	keysock_t *ks;
1823 	uint8_t satype = samsg->sadb_msg_satype;
1824 	boolean_t toall = B_FALSE, allreg = B_FALSE, allereg = B_FALSE,
1825 	    setalg = B_FALSE;
1826 	mblk_t *mp1;
1827 	int err = EIPSECNOSA;
1828 
1829 	/* Convert mp, which is M_DATA, into an M_PROTO of type T_DATA_IND */
1830 	mp1 = allocb(sizeof (struct T_data_req), BPRI_HI);
1831 	if (mp1 == NULL) {
1832 		err = ENOMEM;
1833 		goto error;
1834 	}
1835 	mp1->b_wptr += sizeof (struct T_data_req);
1836 	((struct T_data_ind *)mp1->b_rptr)->PRIM_type = T_DATA_IND;
1837 	((struct T_data_ind *)mp1->b_rptr)->MORE_flag = 0;
1838 	mp1->b_datap->db_type = M_PROTO;
1839 	mp1->b_cont = mp;
1840 	mp = mp1;
1841 
1842 	switch (samsg->sadb_msg_type) {
1843 	case SADB_FLUSH:
1844 	case SADB_GETSPI:
1845 	case SADB_UPDATE:
1846 	case SADB_ADD:
1847 	case SADB_DELETE:
1848 	case SADB_EXPIRE:
1849 		/*
1850 		 * These are most likely replies.  Don't worry about
1851 		 * KEYSOCK_OUT_ERR handling.  Deliver to all sockets.
1852 		 */
1853 		ks3dbg(("Delivering normal message (%d) to all sockets.\n",
1854 		    samsg->sadb_msg_type));
1855 		toall = B_TRUE;
1856 		break;
1857 	case SADB_REGISTER:
1858 		/*
1859 		 * REGISTERs come up for one of three reasons:
1860 		 *
1861 		 *	1.) In response to a normal SADB_REGISTER
1862 		 *		(samsg->sadb_msg_satype != SADB_SATYPE_UNSPEC &&
1863 		 *		    serial != 0)
1864 		 *		Deliver to normal SADB_REGISTERed sockets.
1865 		 *	2.) In response to an extended REGISTER
1866 		 *		(samsg->sadb_msg_satype == SADB_SATYPE_UNSPEC)
1867 		 *		Deliver to extended REGISTERed socket.
1868 		 *	3.) Spontaneous algorithm changes
1869 		 *		(samsg->sadb_msg_satype != SADB_SATYPE_UNSPEC &&
1870 		 *		    serial == 0)
1871 		 *		Deliver to REGISTERed sockets of all sorts.
1872 		 */
1873 		if (kc == NULL) {
1874 			/* Here because of keysock_error() call. */
1875 			ASSERT(samsg->sadb_msg_errno != 0);
1876 			break;	/* Out of switch. */
1877 		}
1878 		ks3dbg(("Delivering REGISTER.\n"));
1879 		if (satype == SADB_SATYPE_UNSPEC) {
1880 			/* REGISTER Reason #2 */
1881 			allereg = B_TRUE;
1882 			/*
1883 			 * Rewhack SA type so PF_KEY socket holder knows what
1884 			 * consumer generated this algorithm list.
1885 			 */
1886 			satype = kc->kc_sa_type;
1887 			samsg->sadb_msg_satype = satype;
1888 			setalg = B_TRUE;
1889 		} else if (serial == 0) {
1890 			/* REGISTER Reason #3 */
1891 			allreg = B_TRUE;
1892 			allereg = B_TRUE;
1893 		} else {
1894 			/* REGISTER Reason #1 */
1895 			allreg = B_TRUE;
1896 			setalg = B_TRUE;
1897 		}
1898 		break;
1899 	case SADB_ACQUIRE:
1900 		/*
1901 		 * ACQUIREs are either extended (sadb_msg_satype == 0) or
1902 		 * regular (sadb_msg_satype != 0).  And we're guaranteed
1903 		 * that serial == 0 for an ACQUIRE.
1904 		 */
1905 		ks3dbg(("Delivering ACQUIRE.\n"));
1906 		allereg = (satype == SADB_SATYPE_UNSPEC);
1907 		allreg = !allereg;
1908 		/*
1909 		 * Corner case - if we send a regular ACQUIRE and there's
1910 		 * extended ones registered, don't send an error down to
1911 		 * consumers if nobody's listening and prematurely destroy
1912 		 * their ACQUIRE record.  This might be too hackish of a
1913 		 * solution.
1914 		 */
1915 		if (allreg && keysock_num_extended > 0)
1916 			err = 0;
1917 		break;
1918 	case SADB_X_PROMISC:
1919 	case SADB_X_INVERSE_ACQUIRE:
1920 	case SADB_DUMP:
1921 	case SADB_GET:
1922 	default:
1923 		/*
1924 		 * Deliver to the sender and promiscuous only.
1925 		 */
1926 		ks3dbg(("Delivering sender/promisc only (%d).\n",
1927 		    samsg->sadb_msg_type));
1928 		break;
1929 	}
1930 
1931 	mutex_enter(&keysock_list_lock);
1932 	for (ks = keysock_list; ks != NULL; ks = ks->keysock_next) {
1933 		/* Delivery loop. */
1934 
1935 		/*
1936 		 * Check special keysock-setting cases (REGISTER replies)
1937 		 * here.
1938 		 */
1939 		if (setalg && serial == ks->keysock_serial) {
1940 			ASSERT(kc != NULL);
1941 			ASSERT(kc->kc_sa_type == satype);
1942 			KEYSOCK_SETREG(ks, satype);
1943 		}
1944 
1945 		/*
1946 		 * NOLOOP takes precedence over PROMISC.  So if you've set
1947 		 * !SO_USELOOPBACK, don't expect to see any data...
1948 		 */
1949 		if (ks->keysock_flags & KEYSOCK_NOLOOP)
1950 			continue;
1951 
1952 		/*
1953 		 * Messages to all, or promiscuous sockets just GET the
1954 		 * message.  Perform rules-type checking iff it's not for all
1955 		 * listeners or the socket is in promiscuous mode.
1956 		 *
1957 		 * NOTE:Because of the (kc != NULL && ISREG()), make sure
1958 		 *	extended ACQUIREs arrive off a consumer that is
1959 		 *	part of the extended REGISTER set of consumers.
1960 		 */
1961 		if (serial != ks->keysock_serial &&
1962 		    !toall &&
1963 		    !(ks->keysock_flags & KEYSOCK_PROMISC) &&
1964 		    !((ks->keysock_flags & KEYSOCK_EXTENDED) ?
1965 			allereg : allreg && kc != NULL &&
1966 			KEYSOCK_ISREG(ks, kc->kc_sa_type)))
1967 			continue;
1968 
1969 		mp1 = dupmsg(mp);
1970 		if (mp1 == NULL) {
1971 			ks2dbg((
1972 			    "keysock_passup():  dupmsg() failed.\n"));
1973 			mp1 = mp;
1974 			mp = NULL;
1975 			err = ENOMEM;
1976 		}
1977 
1978 		/*
1979 		 * At this point, we can deliver or attempt to deliver
1980 		 * this message.  We're free of obligation to report
1981 		 * no listening PF_KEY sockets.  So set err to 0.
1982 		 */
1983 		err = 0;
1984 
1985 		/*
1986 		 * See if we canputnext(), as well as see if the message
1987 		 * needs to be queued if we can't.
1988 		 */
1989 		if (!canputnext(ks->keysock_rq)) {
1990 			if (persistent) {
1991 				if (putq(ks->keysock_rq, mp1) == 0) {
1992 					ks1dbg((
1993 					    "keysock_passup: putq failed.\n"));
1994 				} else {
1995 					continue;
1996 				}
1997 			}
1998 			freemsg(mp1);
1999 			continue;
2000 		}
2001 
2002 		ks3dbg(("Putting to serial %d.\n", ks->keysock_serial));
2003 		/*
2004 		 * Unlike the specific keysock instance case, this
2005 		 * will only hit for listeners, so we will only
2006 		 * putnext() if we can.
2007 		 */
2008 		putnext(ks->keysock_rq, mp1);
2009 		if (mp == NULL)
2010 			break;	/* out of for loop. */
2011 	}
2012 	mutex_exit(&keysock_list_lock);
2013 
2014 error:
2015 	if ((err != 0) && (kc != NULL)) {
2016 		/*
2017 		 * Generate KEYSOCK_OUT_ERR for consumer.
2018 		 * Basically, I send this back if I have not been able to
2019 		 * transmit (for whatever reason)
2020 		 */
2021 		ks1dbg(("keysock_passup():  No registered of type %d.\n",
2022 		    satype));
2023 		if (mp != NULL) {
2024 			if (mp->b_datap->db_type == M_PROTO) {
2025 				mp1 = mp;
2026 				mp = mp->b_cont;
2027 				freeb(mp1);
2028 			}
2029 			/*
2030 			 * Do a copymsg() because people who get
2031 			 * KEYSOCK_OUT_ERR may alter the message contents.
2032 			 */
2033 			mp1 = copymsg(mp);
2034 			if (mp1 == NULL) {
2035 				ks2dbg(("keysock_passup: copymsg() failed.\n"));
2036 				mp1 = mp;
2037 				mp = NULL;
2038 			}
2039 			keysock_out_err(kc, err, mp1);
2040 		}
2041 	}
2042 
2043 	/*
2044 	 * XXX Blank the message somehow.  This is difficult because we don't
2045 	 * know at this point if the message has db_ref > 1, etc.
2046 	 *
2047 	 * Optimally, keysock messages containing actual keying material would
2048 	 * be allocated with esballoc(), with a zeroing free function.
2049 	 */
2050 	if (mp != NULL)
2051 		freemsg(mp);
2052 }
2053 
2054 /*
2055  * Keysock's read service procedure is there only for PF_KEY reply
2056  * messages that really need to reach the top.
2057  */
2058 static void
2059 keysock_rsrv(queue_t *q)
2060 {
2061 	mblk_t *mp;
2062 
2063 	while ((mp = getq(q)) != NULL) {
2064 		if (canputnext(q)) {
2065 			putnext(q, mp);
2066 		} else {
2067 			(void) putbq(q, mp);
2068 			return;
2069 		}
2070 	}
2071 }
2072 
2073 /*
2074  * The read procedure should only be invoked by a keysock consumer, like
2075  * ESP, AH, etc.  I should only see KEYSOCK_OUT and KEYSOCK_HELLO_ACK
2076  * messages on my read queues.
2077  */
2078 static void
2079 keysock_rput(queue_t *q, mblk_t *mp)
2080 {
2081 	keysock_consumer_t *kc = (keysock_consumer_t *)q->q_ptr;
2082 	ipsec_info_t *ii;
2083 	keysock_hello_ack_t *ksa;
2084 	minor_t serial;
2085 	mblk_t *mp1;
2086 	sadb_msg_t *samsg;
2087 
2088 	/* Make sure I'm a consumer instance.  (i.e. something's below me) */
2089 	ASSERT(WR(q)->q_next != NULL);
2090 
2091 	if (mp->b_datap->db_type != M_CTL) {
2092 		/*
2093 		 * Keysock should only see keysock consumer interface
2094 		 * messages (see ipsec_info.h) on its read procedure.
2095 		 * To be robust, however, putnext() up so the STREAM head can
2096 		 * deal with it appropriately.
2097 		 */
2098 		ks1dbg(("Hmmm, a non M_CTL (%d, 0x%x) on keysock_rput.\n",
2099 		    mp->b_datap->db_type, mp->b_datap->db_type));
2100 		putnext(q, mp);
2101 		return;
2102 	}
2103 
2104 	ii = (ipsec_info_t *)mp->b_rptr;
2105 
2106 	switch (ii->ipsec_info_type) {
2107 	case KEYSOCK_OUT:
2108 		/*
2109 		 * A consumer needs to pass a response message or an ACQUIRE
2110 		 * UP.  I assume that the consumer has done the right
2111 		 * thing w.r.t. message creation, etc.
2112 		 */
2113 		serial = ((keysock_out_t *)mp->b_rptr)->ks_out_serial;
2114 		mp1 = mp->b_cont;	/* Get M_DATA portion. */
2115 		freeb(mp);
2116 		samsg = (sadb_msg_t *)mp1->b_rptr;
2117 		if (samsg->sadb_msg_type == SADB_FLUSH ||
2118 		    (samsg->sadb_msg_type == SADB_DUMP &&
2119 			samsg->sadb_msg_len == SADB_8TO64(sizeof (*samsg)))) {
2120 			/*
2121 			 * If I'm an end-of-FLUSH or an end-of-DUMP marker...
2122 			 */
2123 			ASSERT(keysock_flushdump != 0);  /* Am I flushing? */
2124 
2125 			mutex_enter(&kc->kc_lock);
2126 			kc->kc_flags &= ~KC_FLUSHING;
2127 			mutex_exit(&kc->kc_lock);
2128 
2129 			if (samsg->sadb_msg_errno != 0)
2130 				keysock_flushdump_errno = samsg->sadb_msg_errno;
2131 
2132 			/*
2133 			 * Lower the atomic "flushing" count.  If it's
2134 			 * the last one, send up the end-of-{FLUSH,DUMP} to
2135 			 * the appropriate PF_KEY socket.
2136 			 */
2137 			if (atomic_add_32_nv(&keysock_flushdump, -1) != 0) {
2138 				ks1dbg(("One flush/dump message back from %d,"
2139 				    " more to go.\n", samsg->sadb_msg_satype));
2140 				freemsg(mp1);
2141 				return;
2142 			}
2143 
2144 			samsg->sadb_msg_errno =
2145 			    (uint8_t)keysock_flushdump_errno;
2146 			if (samsg->sadb_msg_type == SADB_DUMP) {
2147 				samsg->sadb_msg_seq = 0;
2148 			}
2149 		}
2150 		keysock_passup(mp1, samsg, serial, kc,
2151 		    (samsg->sadb_msg_type == SADB_DUMP));
2152 		return;
2153 	case KEYSOCK_HELLO_ACK:
2154 		/* Aha, now we can link in the consumer! */
2155 		ksa = (keysock_hello_ack_t *)ii;
2156 		keysock_link_consumer(ksa->ks_hello_satype, kc);
2157 		freemsg(mp);
2158 		return;
2159 	default:
2160 		ks1dbg(("Hmmm, an IPsec info I'm not used to, 0x%x\n",
2161 		    ii->ipsec_info_type));
2162 		putnext(q, mp);
2163 	}
2164 }
2165 
2166 /*
2167  * So we can avoid external linking problems....
2168  */
2169 boolean_t
2170 keysock_extended_reg(void)
2171 {
2172 	return (keysock_num_extended != 0);
2173 }
2174 
2175 uint32_t
2176 keysock_next_seq(void)
2177 {
2178 	return (atomic_add_32_nv(&keysock_acquire_seq, -1));
2179 }
2180