xref: /illumos-gate/usr/src/uts/common/inet/ip/keysock.c (revision d51f1d338914fe15108ef3fb04d422a459cfdeda)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 
29 #include <sys/param.h>
30 #include <sys/types.h>
31 #include <sys/stream.h>
32 #include <sys/strsubr.h>
33 #include <sys/strsun.h>
34 #include <sys/stropts.h>
35 #include <sys/vnode.h>
36 #include <sys/strlog.h>
37 #include <sys/sysmacros.h>
38 #define	_SUN_TPI_VERSION 2
39 #include <sys/tihdr.h>
40 #include <sys/timod.h>
41 #include <sys/tiuser.h>
42 #include <sys/ddi.h>
43 #include <sys/sunddi.h>
44 #include <sys/sunldi.h>
45 #include <sys/file.h>
46 #include <sys/modctl.h>
47 #include <sys/debug.h>
48 #include <sys/kmem.h>
49 #include <sys/cmn_err.h>
50 #include <sys/proc.h>
51 #include <sys/suntpi.h>
52 #include <sys/atomic.h>
53 #include <sys/mkdev.h>
54 #include <sys/policy.h>
55 
56 #include <sys/socket.h>
57 #include <netinet/in.h>
58 #include <net/pfkeyv2.h>
59 
60 #include <inet/common.h>
61 #include <netinet/ip6.h>
62 #include <inet/ip.h>
63 #include <inet/mi.h>
64 #include <inet/nd.h>
65 #include <inet/optcom.h>
66 #include <inet/ipsec_info.h>
67 #include <inet/ipsec_impl.h>
68 #include <inet/keysock.h>
69 
70 #include <sys/isa_defs.h>
71 
72 /*
73  * This is a transport provider for the PF_KEY key mangement socket.
74  * (See RFC 2367 for details.)
75  * Downstream messages are wrapped in a keysock consumer interface KEYSOCK_IN
76  * messages (see ipsec_info.h), and passed to the appropriate consumer.
77  * Upstream messages are generated for all open PF_KEY sockets, when
78  * appropriate, as well as the sender (as long as SO_USELOOPBACK is enabled)
79  * in reply to downstream messages.
80  *
81  * Upstream messages must be created asynchronously for the following
82  * situations:
83  *
84  *	1.) A keysock consumer requires an SA, and there is currently none.
85  *	2.) An SA expires, either hard or soft lifetime.
86  *	3.) Other events a consumer deems fit.
87  *
88  * The MT model of this is PERMOD, with shared put procedures.  Two types of
89  * messages, SADB_FLUSH and SADB_DUMP, need to lock down the perimeter to send
90  * down the *multiple* messages they create.
91  */
92 
93 /* List of open PF_KEY sockets, protected by keysock_list_lock. */
94 static kmutex_t keysock_list_lock;
95 static keysock_t *keysock_list;
96 
97 static vmem_t *keysock_vmem;		/* for minor numbers. */
98 
99 /* Consumers table.  If an entry is NULL, keysock maintains the table. */
100 static kmutex_t keysock_consumers_lock;
101 
102 #define	KEYSOCK_MAX_CONSUMERS 256
103 static keysock_consumer_t *keysock_consumers[KEYSOCK_MAX_CONSUMERS];
104 
105 /* Default structure copied into T_INFO_ACK messages (from rts.c...) */
106 static struct T_info_ack keysock_g_t_info_ack = {
107 	T_INFO_ACK,
108 	T_INFINITE,	/* TSDU_size. Maximum size messages. */
109 	T_INVALID,	/* ETSDU_size. No expedited data. */
110 	T_INVALID,	/* CDATA_size. No connect data. */
111 	T_INVALID,	/* DDATA_size. No disconnect data. */
112 	0,		/* ADDR_size. */
113 	0,		/* OPT_size. No user-settable options */
114 	64 * 1024,	/* TIDU_size. keysock allows maximum size messages. */
115 	T_COTS,		/* SERV_type. keysock supports connection oriented. */
116 	TS_UNBND,	/* CURRENT_state. This is set from keysock_state. */
117 	(XPG4_1)	/* Provider flags */
118 };
119 
120 /* Named Dispatch Parameter Management Structure */
121 typedef struct keysockpparam_s {
122 	uint_t	keysock_param_min;
123 	uint_t	keysock_param_max;
124 	uint_t	keysock_param_value;
125 	char	*keysock_param_name;
126 } keysockparam_t;
127 
128 /*
129  * Table of NDD variables supported by keysock. These are loaded into
130  * keysock_g_nd in keysock_init_nd.
131  * All of these are alterable, within the min/max values given, at run time.
132  */
133 static	keysockparam_t	keysock_param_arr[] = {
134 	/* min	max	value	name */
135 	{ 4096, 65536,	8192,	"keysock_xmit_hiwat"},
136 	{ 0,	65536,	1024,	"keysock_xmit_lowat"},
137 	{ 4096, 65536,	8192,	"keysock_recv_hiwat"},
138 	{ 65536, 1024*1024*1024, 256*1024,	"keysock_max_buf"},
139 	{ 0,	3,	0,	"keysock_debug"},
140 };
141 #define	keysock_xmit_hiwat	keysock_param_arr[0].keysock_param_value
142 #define	keysock_xmit_lowat	keysock_param_arr[1].keysock_param_value
143 #define	keysock_recv_hiwat	keysock_param_arr[2].keysock_param_value
144 #define	keysock_max_buf		keysock_param_arr[3].keysock_param_value
145 #define	keysock_debug		keysock_param_arr[4].keysock_param_value
146 
147 kmutex_t keysock_param_lock;	/* Protects the NDD variables. */
148 
149 #define	ks0dbg(a)	printf a
150 /* NOTE:  != 0 instead of > 0 so lint doesn't complain. */
151 #define	ks1dbg(a)	if (keysock_debug != 0) printf a
152 #define	ks2dbg(a)	if (keysock_debug > 1) printf a
153 #define	ks3dbg(a)	if (keysock_debug > 2) printf a
154 
155 static IDP keysock_g_nd;
156 
157 /*
158  * State for flush/dump.  This would normally be a boolean_t, but
159  * cas32() works best for a known 32-bit quantity.
160  */
161 static uint32_t keysock_flushdump;
162 static int keysock_flushdump_errno;
163 
164 static int keysock_close(queue_t *);
165 static int keysock_open(queue_t *, dev_t *, int, int, cred_t *);
166 static void keysock_wput(queue_t *, mblk_t *);
167 static void keysock_rput(queue_t *, mblk_t *);
168 static void keysock_rsrv(queue_t *);
169 static void keysock_passup(mblk_t *, sadb_msg_t *, minor_t,
170     keysock_consumer_t *, boolean_t);
171 
172 static struct module_info info = {
173 	5138, "keysock", 1, INFPSZ, 512, 128
174 };
175 
176 static struct qinit rinit = {
177 	(pfi_t)keysock_rput, (pfi_t)keysock_rsrv, keysock_open, keysock_close,
178 	NULL, &info
179 };
180 
181 static struct qinit winit = {
182 	(pfi_t)keysock_wput, NULL, NULL, NULL, NULL, &info
183 };
184 
185 struct streamtab keysockinfo = {
186 	&rinit, &winit
187 };
188 
189 extern struct modlinkage *keysock_modlp;
190 
191 /*
192  * Plumb IPsec.
193  *
194  * NOTE:  New "default" modules will need to be loaded here if needed before
195  *	  boot time.
196  */
197 
198 /* Keep these in global space to keep the lint from complaining. */
199 static char *IPSECESP = "ipsecesp";
200 static char *IPSECESPDEV = "/devices/pseudo/ipsecesp@0:ipsecesp";
201 static char *IPSECAH = "ipsecah";
202 static char *IPSECAHDEV = "/devices/pseudo/ipsecah@0:ipsecah";
203 static char *IP6DEV = "/devices/pseudo/ip6@0:ip6";
204 static char *KEYSOCK = "keysock";
205 static char *STRMOD = "strmod";
206 
207 /*
208  * keysock_plumbed: zero if plumb not attempted, positive if it succeeded,
209  * negative if it failed.
210  */
211 static int keysock_plumbed = 0;
212 
213 /*
214  * This integer counts the number of extended REGISTERed sockets.  This
215  * determines if we should send extended REGISTERs.
216  */
217 static uint32_t keysock_num_extended = 0;
218 
219 /*
220  * Global sequence space for SADB_ACQUIRE messages of any sort.
221  */
222 static uint32_t keysock_acquire_seq = 0xffffffff;
223 
224 /*
225  * Load the other ipsec modules and plumb them together.
226  */
227 int
228 keysock_plumb_ipsec(void)
229 {
230 	ldi_handle_t	lh, ip6_lh = NULL;
231 	ldi_ident_t	li = NULL;
232 	int		err = 0;
233 	int		muxid, rval;
234 	boolean_t	esp_present = B_TRUE;
235 
236 
237 	keysock_plumbed = 0;	/* we're trying again.. */
238 
239 	/*
240 	 * Load up the drivers (AH/ESP).
241 	 *
242 	 * I do this separately from the actual plumbing in case this function
243 	 * ever gets called from a diskless boot before the root filesystem is
244 	 * up.  I don't have to worry about "keysock" because, well, if I'm
245 	 * here, keysock must've loaded successfully.
246 	 */
247 	if (i_ddi_attach_pseudo_node(IPSECAH) == NULL) {
248 		ks0dbg(("IPsec:  AH failed to attach.\n"));
249 		goto bail;
250 	}
251 	if (i_ddi_attach_pseudo_node(IPSECESP) == NULL) {
252 		ks0dbg(("IPsec:  ESP failed to attach.\n"));
253 		esp_present = B_FALSE;
254 	}
255 
256 	/*
257 	 * Set up the IP streams for AH and ESP, as well as tacking keysock
258 	 * on top of them.  Assume keysock has set the autopushes up already.
259 	 */
260 
261 	/* Open IP. */
262 	err = ldi_ident_from_mod(keysock_modlp, &li);
263 	if (err) {
264 		ks0dbg(("IPsec:  lid_ident_from_mod failed (err %d).\n",
265 		    err));
266 		goto bail;
267 	}
268 
269 	err = ldi_open_by_name(IP6DEV, FREAD|FWRITE, CRED(), &ip6_lh, li);
270 	if (err) {
271 		ks0dbg(("IPsec:  Open of IP6 failed (err %d).\n", err));
272 		goto bail;
273 	}
274 
275 	/* PLINK KEYSOCK/AH */
276 	err = ldi_open_by_name(IPSECAHDEV, FREAD|FWRITE, CRED(), &lh, li);
277 	if (err) {
278 		ks0dbg(("IPsec:  Open of AH failed (err %d).\n", err));
279 		goto bail;
280 	}
281 	err = ldi_ioctl(lh,
282 	    I_PUSH, (intptr_t)KEYSOCK, FKIOCTL, CRED(), &rval);
283 	if (err) {
284 		ks0dbg(("IPsec:  Push of KEYSOCK onto AH failed (err %d).\n",
285 		    err));
286 		(void) ldi_close(lh, FREAD|FWRITE, CRED());
287 		goto bail;
288 	}
289 	err = ldi_ioctl(ip6_lh, I_PLINK, (intptr_t)lh,
290 			FREAD+FWRITE+FNOCTTY+FKIOCTL, kcred, &muxid);
291 	if (err) {
292 		ks0dbg(("IPsec:  PLINK of KEYSOCK/AH failed (err %d).\n", err));
293 		(void) ldi_close(lh, FREAD|FWRITE, CRED());
294 		goto bail;
295 	}
296 	(void) ldi_close(lh, FREAD|FWRITE, CRED());
297 
298 	/* PLINK KEYSOCK/ESP */
299 	if (esp_present) {
300 		err = ldi_open_by_name(IPSECESPDEV,
301 		    FREAD|FWRITE, CRED(), &lh, li);
302 		if (err) {
303 			ks0dbg(("IPsec:  Open of ESP failed (err %d).\n", err));
304 			goto bail;
305 		}
306 		err = ldi_ioctl(lh,
307 		    I_PUSH, (intptr_t)KEYSOCK, FKIOCTL, CRED(), &rval);
308 		if (err) {
309 			ks0dbg(("IPsec:  "
310 			    "Push of KEYSOCK onto ESP failed (err %d).\n",
311 			    err));
312 			(void) ldi_close(lh, FREAD|FWRITE, CRED());
313 			goto bail;
314 		}
315 		err = ldi_ioctl(ip6_lh, I_PLINK, (intptr_t)lh,
316 				FREAD+FWRITE+FNOCTTY+FKIOCTL, kcred, &muxid);
317 		if (err) {
318 			ks0dbg(("IPsec:  "
319 			    "PLINK of KEYSOCK/ESP failed (err %d).\n", err));
320 			(void) ldi_close(lh, FREAD|FWRITE, CRED());
321 			goto bail;
322 		}
323 		(void) ldi_close(lh, FREAD|FWRITE, CRED());
324 	}
325 
326 bail:
327 	keysock_plumbed = (err == 0) ? 1 : -1;
328 	if (ip6_lh != NULL) {
329 		(void) ldi_close(ip6_lh, FREAD|FWRITE, CRED());
330 	}
331 	if (li != NULL)
332 		ldi_ident_release(li);
333 	return (err);
334 }
335 
336 /* ARGSUSED */
337 static int
338 keysock_param_get(q, mp, cp, cr)
339 	queue_t	*q;
340 	mblk_t	*mp;
341 	caddr_t	cp;
342 	cred_t *cr;
343 {
344 	keysockparam_t	*keysockpa = (keysockparam_t *)cp;
345 	uint_t value;
346 
347 	mutex_enter(&keysock_param_lock);
348 	value = keysockpa->keysock_param_value;
349 	mutex_exit(&keysock_param_lock);
350 
351 	(void) mi_mpprintf(mp, "%u", value);
352 	return (0);
353 }
354 
355 /* This routine sets an NDD variable in a keysockparam_t structure. */
356 /* ARGSUSED */
357 static int
358 keysock_param_set(q, mp, value, cp, cr)
359 	queue_t	*q;
360 	mblk_t	*mp;
361 	char	*value;
362 	caddr_t	cp;
363 	cred_t *cr;
364 {
365 	ulong_t	new_value;
366 	keysockparam_t	*keysockpa = (keysockparam_t *)cp;
367 
368 	/* Convert the value from a string into a long integer. */
369 	if (ddi_strtoul(value, NULL, 10, &new_value) != 0)
370 		return (EINVAL);
371 
372 	mutex_enter(&keysock_param_lock);
373 	/*
374 	 * Fail the request if the new value does not lie within the
375 	 * required bounds.
376 	 */
377 	if (new_value < keysockpa->keysock_param_min ||
378 	    new_value > keysockpa->keysock_param_max) {
379 		mutex_exit(&keysock_param_lock);
380 		return (EINVAL);
381 	}
382 
383 	/* Set the new value */
384 	keysockpa->keysock_param_value = new_value;
385 	mutex_exit(&keysock_param_lock);
386 
387 	return (0);
388 }
389 
390 /*
391  * Initialize NDD variables, and other things, for keysock.
392  */
393 boolean_t
394 keysock_ddi_init(void)
395 {
396 	keysockparam_t *ksp = keysock_param_arr;
397 	int count = A_CNT(keysock_param_arr);
398 
399 	if (!keysock_g_nd) {
400 		for (; count-- > 0; ksp++) {
401 			if (ksp->keysock_param_name != NULL &&
402 			    ksp->keysock_param_name[0]) {
403 				if (!nd_load(&keysock_g_nd,
404 				    ksp->keysock_param_name,
405 				    keysock_param_get, keysock_param_set,
406 				    (caddr_t)ksp)) {
407 					nd_free(&keysock_g_nd);
408 					return (B_FALSE);
409 				}
410 			}
411 		}
412 	}
413 
414 	keysock_max_optsize = optcom_max_optsize(
415 	    keysock_opt_obj.odb_opt_des_arr, keysock_opt_obj.odb_opt_arr_cnt);
416 
417 	keysock_vmem = vmem_create("keysock", (void *)1, MAXMIN, 1,
418 	    NULL, NULL, NULL, 1, VM_SLEEP | VMC_IDENTIFIER);
419 
420 	mutex_init(&keysock_list_lock, NULL, MUTEX_DEFAULT, NULL);
421 	mutex_init(&keysock_consumers_lock, NULL, MUTEX_DEFAULT, NULL);
422 	mutex_init(&keysock_param_lock, NULL, MUTEX_DEFAULT, NULL);
423 
424 	return (B_TRUE);
425 }
426 
427 /*
428  * Free NDD variable space, and other destructors, for keysock.
429  */
430 void
431 keysock_ddi_destroy(void)
432 {
433 	/* XXX Free instances? */
434 	ks0dbg(("keysock_ddi_destroy being called.\n"));
435 
436 	vmem_destroy(keysock_vmem);
437 	mutex_destroy(&keysock_list_lock);
438 	mutex_destroy(&keysock_consumers_lock);
439 	mutex_destroy(&keysock_param_lock);
440 	nd_free(&keysock_g_nd);
441 }
442 
443 /*
444  * Close routine for keysock.
445  */
446 static int
447 keysock_close(queue_t *q)
448 {
449 	keysock_t *ks;
450 	keysock_consumer_t *kc;
451 	void *ptr = q->q_ptr;
452 	int size;
453 
454 	qprocsoff(q);
455 
456 	/* Safe assumption. */
457 	ASSERT(ptr != NULL);
458 
459 	if (WR(q)->q_next) {
460 		kc = (keysock_consumer_t *)ptr;
461 		ks0dbg(("Module close, removing a consumer (%d).\n",
462 		    kc->kc_sa_type));
463 		/*
464 		 * Because of PERMOD open/close exclusive perimeter, I
465 		 * can inspect KC_FLUSHING w/o locking down kc->kc_lock.
466 		 */
467 		if (kc->kc_flags & KC_FLUSHING) {
468 			/*
469 			 * If this decrement was the last one, send
470 			 * down the next pending one, if any.
471 			 *
472 			 * With a PERMOD perimeter, the mutexes ops aren't
473 			 * really necessary, but if we ever loosen up, we will
474 			 * have this bit covered already.
475 			 */
476 			keysock_flushdump--;
477 			if (keysock_flushdump == 0) {
478 				/*
479 				 * The flush/dump terminated by having a
480 				 * consumer go away.  I need to send up to the
481 				 * appropriate keysock all of the relevant
482 				 * information.  Unfortunately, I don't
483 				 * have that handy.
484 				 */
485 				ks0dbg(("Consumer went away while flushing or"
486 				    " dumping.\n"));
487 			}
488 		}
489 		size = sizeof (keysock_consumer_t);
490 		mutex_enter(&keysock_consumers_lock);
491 		keysock_consumers[kc->kc_sa_type] = NULL;
492 		mutex_exit(&keysock_consumers_lock);
493 		mutex_destroy(&kc->kc_lock);
494 	} else {
495 		ks3dbg(("Driver close, PF_KEY socket is going away.\n"));
496 		ks = (keysock_t *)ptr;
497 		if ((ks->keysock_flags & KEYSOCK_EXTENDED) != 0)
498 			atomic_add_32(&keysock_num_extended, -1);
499 		size = sizeof (keysock_t);
500 		mutex_enter(&keysock_list_lock);
501 		*(ks->keysock_ptpn) = ks->keysock_next;
502 		if (ks->keysock_next != NULL)
503 			ks->keysock_next->keysock_ptpn = ks->keysock_ptpn;
504 		mutex_exit(&keysock_list_lock);
505 		mutex_destroy(&ks->keysock_lock);
506 		vmem_free(keysock_vmem, (void *)(uintptr_t)ks->keysock_serial,
507 		    1);
508 	}
509 
510 	/* Now I'm free. */
511 	kmem_free(ptr, size);
512 	return (0);
513 }
514 /*
515  * Open routine for keysock.
516  */
517 /* ARGSUSED */
518 static int
519 keysock_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
520 {
521 	keysock_t *ks;
522 	keysock_consumer_t *kc;
523 	mblk_t *mp;
524 	ipsec_info_t *ii;
525 
526 	ks3dbg(("Entering keysock open.\n"));
527 
528 	if (secpolicy_net_config(credp, B_FALSE) != 0) {
529 		/* Privilege debugging will log the error */
530 		return (EPERM);
531 	}
532 
533 	if (q->q_ptr != NULL)
534 		return (0);  /* Re-open of an already open instance. */
535 
536 	if (keysock_plumbed < 1) {
537 		keysock_plumbed = 0;
538 		/*
539 		 * Don't worry about ipsec_failure being true here.
540 		 * (See ip.c).  An open of keysock should try and force
541 		 * the issue.  Maybe it was a transient failure.
542 		 */
543 		ipsec_loader_loadnow();
544 	}
545 
546 	if (sflag & MODOPEN) {
547 		/* Initialize keysock_consumer state here. */
548 		kc = kmem_zalloc(sizeof (keysock_consumer_t), KM_NOSLEEP);
549 		if (kc == NULL)
550 			return (ENOMEM);
551 		mutex_init(&kc->kc_lock, NULL, MUTEX_DEFAULT, 0);
552 		kc->kc_rq = q;
553 		kc->kc_wq = WR(q);
554 
555 		q->q_ptr = kc;
556 		WR(q)->q_ptr = kc;
557 
558 		qprocson(q);
559 
560 		/*
561 		 * Send down initial message to whatever I was pushed on top
562 		 * of asking for its consumer type.  The reply will set it.
563 		 */
564 
565 		/* Allocate it. */
566 		mp = allocb(sizeof (ipsec_info_t), BPRI_HI);
567 		if (mp == NULL) {
568 			ks1dbg((
569 			    "keysock_open:  Cannot allocate KEYSOCK_HELLO.\n"));
570 			/* Do I need to set these to null? */
571 			q->q_ptr = NULL;
572 			WR(q)->q_ptr = NULL;
573 			mutex_destroy(&kc->kc_lock);
574 			kmem_free(kc, sizeof (*kc));
575 			return (ENOMEM);
576 		}
577 
578 		/* If I allocated okay, putnext to what I was pushed atop. */
579 		mp->b_wptr += sizeof (ipsec_info_t);
580 		mp->b_datap->db_type = M_CTL;
581 		ii = (ipsec_info_t *)mp->b_rptr;
582 		ii->ipsec_info_type = KEYSOCK_HELLO;
583 		/* Length only of type/len. */
584 		ii->ipsec_info_len = sizeof (ii->ipsec_allu);
585 		ks2dbg(("Ready to putnext KEYSOCK_HELLO.\n"));
586 		putnext(kc->kc_wq, mp);
587 	} else {
588 		minor_t ksminor;
589 
590 		/* Initialize keysock state. */
591 
592 		ks2dbg(("Made it into PF_KEY socket open.\n"));
593 
594 		ksminor = (minor_t)(uintptr_t)
595 		    vmem_alloc(keysock_vmem, 1, VM_NOSLEEP);
596 		if (ksminor == 0)
597 			return (ENOMEM);
598 
599 		ks = kmem_zalloc(sizeof (keysock_t), KM_NOSLEEP);
600 		if (ks == NULL) {
601 			vmem_free(keysock_vmem, (void *)(uintptr_t)ksminor, 1);
602 			return (ENOMEM);
603 		}
604 
605 		mutex_init(&ks->keysock_lock, NULL, MUTEX_DEFAULT, 0);
606 		ks->keysock_rq = q;
607 		ks->keysock_wq = WR(q);
608 		ks->keysock_state = TS_UNBND;
609 		ks->keysock_serial = ksminor;
610 
611 		q->q_ptr = ks;
612 		WR(q)->q_ptr = ks;
613 
614 		/*
615 		 * The receive hiwat is only looked at on the stream head
616 		 * queue.  Store in q_hiwat in order to return on SO_RCVBUF
617 		 * getsockopts.
618 		 */
619 
620 		q->q_hiwat = keysock_recv_hiwat;
621 
622 		/*
623 		 * The transmit hiwat/lowat is only looked at on IP's queue.
624 		 * Store in q_hiwat/q_lowat in order to return on
625 		 * SO_SNDBUF/SO_SNDLOWAT getsockopts.
626 		 */
627 
628 		WR(q)->q_hiwat = keysock_xmit_hiwat;
629 		WR(q)->q_lowat = keysock_xmit_lowat;
630 
631 		*devp = makedevice(getmajor(*devp), ksminor);
632 
633 		/*
634 		 * Thread keysock into the global keysock list.
635 		 */
636 		mutex_enter(&keysock_list_lock);
637 		ks->keysock_next = keysock_list;
638 		ks->keysock_ptpn = &keysock_list;
639 		if (keysock_list != NULL)
640 			keysock_list->keysock_ptpn = &ks->keysock_next;
641 		keysock_list = ks;
642 		mutex_exit(&keysock_list_lock);
643 
644 		qprocson(q);
645 		(void) mi_set_sth_hiwat(q, keysock_recv_hiwat);
646 		/*
647 		 * Wait outside the keysock module perimeter for IPsec
648 		 * plumbing to be completed.  If it fails, keysock_close()
649 		 * undoes everything we just did.
650 		 */
651 		if (!ipsec_loader_wait(q)) {
652 			(void) keysock_close(q);
653 			return (EPFNOSUPPORT);
654 		}
655 	}
656 
657 	return (0);
658 }
659 
660 /* BELOW THIS LINE ARE ROUTINES INCLUDING AND RELATED TO keysock_wput(). */
661 
662 /*
663  * Copy relevant state bits.
664  */
665 static void
666 keysock_copy_info(struct T_info_ack *tap, keysock_t *ks)
667 {
668 	*tap = keysock_g_t_info_ack;
669 	tap->CURRENT_state = ks->keysock_state;
670 	tap->OPT_size = keysock_max_optsize;
671 }
672 
673 /*
674  * This routine responds to T_CAPABILITY_REQ messages.  It is called by
675  * keysock_wput.  Much of the T_CAPABILITY_ACK information is copied from
676  * keysock_g_t_info_ack.  The current state of the stream is copied from
677  * keysock_state.
678  */
679 static void
680 keysock_capability_req(queue_t *q, mblk_t *mp)
681 {
682 	keysock_t *ks = (keysock_t *)q->q_ptr;
683 	t_uscalar_t cap_bits1;
684 	struct T_capability_ack	*tcap;
685 
686 	cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
687 
688 	mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
689 		mp->b_datap->db_type, T_CAPABILITY_ACK);
690 	if (mp == NULL)
691 		return;
692 
693 	tcap = (struct T_capability_ack *)mp->b_rptr;
694 	tcap->CAP_bits1 = 0;
695 
696 	if (cap_bits1 & TC1_INFO) {
697 		keysock_copy_info(&tcap->INFO_ack, ks);
698 		tcap->CAP_bits1 |= TC1_INFO;
699 	}
700 
701 	qreply(q, mp);
702 }
703 
704 /*
705  * This routine responds to T_INFO_REQ messages. It is called by
706  * keysock_wput_other.
707  * Most of the T_INFO_ACK information is copied from keysock_g_t_info_ack.
708  * The current state of the stream is copied from keysock_state.
709  */
710 static void
711 keysock_info_req(q, mp)
712 	queue_t	*q;
713 	mblk_t	*mp;
714 {
715 	mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO,
716 	    T_INFO_ACK);
717 	if (mp == NULL)
718 		return;
719 	keysock_copy_info((struct T_info_ack *)mp->b_rptr,
720 	    (keysock_t *)q->q_ptr);
721 	qreply(q, mp);
722 }
723 
724 /*
725  * keysock_err_ack. This routine creates a
726  * T_ERROR_ACK message and passes it
727  * upstream.
728  */
729 static void
730 keysock_err_ack(q, mp, t_error, sys_error)
731 	queue_t	*q;
732 	mblk_t	*mp;
733 	int	t_error;
734 	int	sys_error;
735 {
736 	if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL)
737 		qreply(q, mp);
738 }
739 
740 /*
741  * This routine retrieves the current status of socket options.
742  * It returns the size of the option retrieved.
743  */
744 /* ARGSUSED */
745 int
746 keysock_opt_get(queue_t *q, int level, int name, uchar_t *ptr)
747 {
748 	int *i1 = (int *)ptr;
749 	keysock_t *ks = (keysock_t *)q->q_ptr;
750 
751 	switch (level) {
752 	case SOL_SOCKET:
753 		mutex_enter(&ks->keysock_lock);
754 		switch (name) {
755 		case SO_TYPE:
756 			*i1 = SOCK_RAW;
757 			break;
758 		case SO_USELOOPBACK:
759 			*i1 = (int)(!((ks->keysock_flags & KEYSOCK_NOLOOP) ==
760 			    KEYSOCK_NOLOOP));
761 			break;
762 		/*
763 		 * The following two items can be manipulated,
764 		 * but changing them should do nothing.
765 		 */
766 		case SO_SNDBUF:
767 			*i1 = (int)q->q_hiwat;
768 			break;
769 		case SO_RCVBUF:
770 			*i1 = (int)(RD(q)->q_hiwat);
771 			break;
772 		}
773 		mutex_exit(&ks->keysock_lock);
774 		break;
775 	default:
776 		return (0);
777 	}
778 	return (sizeof (int));
779 }
780 
781 /*
782  * This routine sets socket options.
783  */
784 /* ARGSUSED */
785 int
786 keysock_opt_set(queue_t *q, uint_t mgmt_flags, int level,
787     int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp,
788     uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk)
789 {
790 	int *i1 = (int *)invalp;
791 	keysock_t *ks = (keysock_t *)q->q_ptr;
792 
793 	switch (level) {
794 	case SOL_SOCKET:
795 		mutex_enter(&ks->keysock_lock);
796 		switch (name) {
797 		case SO_USELOOPBACK:
798 			if (!(*i1))
799 				ks->keysock_flags |= KEYSOCK_NOLOOP;
800 			else ks->keysock_flags &= ~KEYSOCK_NOLOOP;
801 			break;
802 		case SO_SNDBUF:
803 			if (*i1 > keysock_max_buf)
804 				return (ENOBUFS);
805 			q->q_hiwat = *i1;
806 			break;
807 		case SO_RCVBUF:
808 			if (*i1 > keysock_max_buf)
809 				return (ENOBUFS);
810 			RD(q)->q_hiwat = *i1;
811 			(void) mi_set_sth_hiwat(RD(q), *i1);
812 			break;
813 		}
814 		mutex_exit(&ks->keysock_lock);
815 		break;
816 	}
817 	return (0);
818 }
819 
820 /*
821  * Handle STREAMS messages.
822  */
823 static void
824 keysock_wput_other(queue_t *q, mblk_t *mp)
825 {
826 	struct iocblk *iocp;
827 	int error;
828 
829 	switch (mp->b_datap->db_type) {
830 	case M_PROTO:
831 	case M_PCPROTO:
832 		if ((mp->b_wptr - mp->b_rptr) < sizeof (long)) {
833 			ks3dbg((
834 			    "keysock_wput_other: Not big enough M_PROTO\n"));
835 			freemsg(mp);
836 			return;
837 		}
838 		switch (((union T_primitives *)mp->b_rptr)->type) {
839 		case T_CAPABILITY_REQ:
840 			keysock_capability_req(q, mp);
841 			return;
842 		case T_INFO_REQ:
843 			keysock_info_req(q, mp);
844 			return;
845 		case T_SVR4_OPTMGMT_REQ:
846 			(void) svr4_optcom_req(q, mp, DB_CREDDEF(mp, kcred),
847 			    &keysock_opt_obj);
848 			return;
849 		case T_OPTMGMT_REQ:
850 			(void) tpi_optcom_req(q, mp, DB_CREDDEF(mp, kcred),
851 			    &keysock_opt_obj);
852 			return;
853 		case T_DATA_REQ:
854 		case T_EXDATA_REQ:
855 		case T_ORDREL_REQ:
856 			/* Illegal for keysock. */
857 			freemsg(mp);
858 			(void) putnextctl1(RD(q), M_ERROR, EPROTO);
859 			return;
860 		default:
861 			/* Not supported by keysock. */
862 			keysock_err_ack(q, mp, TNOTSUPPORT, 0);
863 			return;
864 		}
865 	case M_IOCTL:
866 		iocp = (struct iocblk *)mp->b_rptr;
867 		error = EINVAL;
868 
869 		switch (iocp->ioc_cmd) {
870 		case ND_SET:
871 		case ND_GET:
872 			if (nd_getset(q, keysock_g_nd, mp)) {
873 				qreply(q, mp);
874 				return;
875 			} else
876 				error = ENOENT;
877 			/* FALLTHRU */
878 		default:
879 			miocnak(q, mp, 0, error);
880 			return;
881 		}
882 	case M_FLUSH:
883 		if (*mp->b_rptr & FLUSHW) {
884 			flushq(q, FLUSHALL);
885 			*mp->b_rptr &= ~FLUSHW;
886 		}
887 		if (*mp->b_rptr & FLUSHR) {
888 			qreply(q, mp);
889 			return;
890 		}
891 		/* Else FALLTHRU */
892 	}
893 
894 	/* If fell through, just black-hole the message. */
895 	freemsg(mp);
896 }
897 
898 /*
899  * Transmit a PF_KEY error message to the instance either pointed to
900  * by ks, the instance with serial number serial, or more, depending.
901  *
902  * The faulty message (or a reasonable facsimile thereof) is in mp.
903  * This function will free mp or recycle it for delivery, thereby causing
904  * the stream head to free it.
905  */
906 static void
907 keysock_error(keysock_t *ks, mblk_t *mp, int error, int diagnostic)
908 {
909 	sadb_msg_t *samsg = (sadb_msg_t *)mp->b_rptr;
910 
911 	ASSERT(mp->b_datap->db_type == M_DATA);
912 
913 	if (samsg->sadb_msg_type < SADB_GETSPI ||
914 	    samsg->sadb_msg_type > SADB_MAX)
915 		samsg->sadb_msg_type = SADB_RESERVED;
916 
917 	/*
918 	 * Strip out extension headers.
919 	 */
920 	ASSERT(mp->b_rptr + sizeof (*samsg) <= mp->b_datap->db_lim);
921 	mp->b_wptr = mp->b_rptr + sizeof (*samsg);
922 	samsg->sadb_msg_len = SADB_8TO64(sizeof (sadb_msg_t));
923 	samsg->sadb_msg_errno = (uint8_t)error;
924 	samsg->sadb_x_msg_diagnostic = (uint16_t)diagnostic;
925 
926 	keysock_passup(mp, samsg, ks->keysock_serial, NULL, B_FALSE);
927 }
928 
929 /*
930  * Pass down a message to a consumer.  Wrap it in KEYSOCK_IN, and copy
931  * in the extv if passed in.
932  */
933 static void
934 keysock_passdown(keysock_t *ks, mblk_t *mp, uint8_t satype, sadb_ext_t *extv[],
935     boolean_t flushmsg)
936 {
937 	keysock_consumer_t *kc;
938 	mblk_t *wrapper;
939 	keysock_in_t *ksi;
940 	int i;
941 
942 	wrapper = allocb(sizeof (ipsec_info_t), BPRI_HI);
943 	if (wrapper == NULL) {
944 		ks3dbg(("keysock_passdown: allocb failed.\n"));
945 		if (extv[SADB_EXT_KEY_ENCRYPT] != NULL)
946 			bzero(extv[SADB_EXT_KEY_ENCRYPT],
947 			    SADB_64TO8(
948 				extv[SADB_EXT_KEY_ENCRYPT]->sadb_ext_len));
949 		if (extv[SADB_EXT_KEY_AUTH] != NULL)
950 			bzero(extv[SADB_EXT_KEY_AUTH],
951 			    SADB_64TO8(
952 				extv[SADB_EXT_KEY_AUTH]->sadb_ext_len));
953 		if (flushmsg) {
954 			ks0dbg((
955 			    "keysock: Downwards flush/dump message failed!\n"));
956 			/* If this is true, I hold the perimeter. */
957 			keysock_flushdump--;
958 		}
959 		freemsg(mp);
960 		return;
961 	}
962 
963 	wrapper->b_datap->db_type = M_CTL;
964 	ksi = (keysock_in_t *)wrapper->b_rptr;
965 	ksi->ks_in_type = KEYSOCK_IN;
966 	ksi->ks_in_len = sizeof (keysock_in_t);
967 	if (extv[SADB_EXT_ADDRESS_SRC] != NULL)
968 		ksi->ks_in_srctype = KS_IN_ADDR_UNKNOWN;
969 	else ksi->ks_in_srctype = KS_IN_ADDR_NOTTHERE;
970 	if (extv[SADB_EXT_ADDRESS_DST] != NULL)
971 		ksi->ks_in_dsttype = KS_IN_ADDR_UNKNOWN;
972 	else ksi->ks_in_dsttype = KS_IN_ADDR_NOTTHERE;
973 	for (i = 0; i <= SADB_EXT_MAX; i++)
974 		ksi->ks_in_extv[i] = extv[i];
975 	ksi->ks_in_serial = ks->keysock_serial;
976 	wrapper->b_wptr += sizeof (ipsec_info_t);
977 	wrapper->b_cont = mp;
978 
979 	/*
980 	 * Find the appropriate consumer where the message is passed down.
981 	 */
982 	kc = keysock_consumers[satype];
983 	if (kc == NULL) {
984 		freeb(wrapper);
985 		keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_UNKNOWN_SATYPE);
986 		if (flushmsg) {
987 			ks0dbg((
988 			    "keysock: Downwards flush/dump message failed!\n"));
989 			/* If this is true, I hold the perimeter. */
990 			keysock_flushdump--;
991 		}
992 		return;
993 	}
994 
995 	/*
996 	 * NOTE: There used to be code in here to spin while a flush or
997 	 *	 dump finished.  Keysock now assumes that consumers have enough
998 	 *	 MT-savviness to deal with that.
999 	 */
1000 
1001 	/*
1002 	 * Current consumers (AH and ESP) are guaranteed to return a
1003 	 * FLUSH or DUMP message back, so when we reach here, we don't
1004 	 * have to worry about keysock_flushdumps.
1005 	 */
1006 
1007 	putnext(kc->kc_wq, wrapper);
1008 }
1009 
1010 /*
1011  * High-level reality checking of extensions.
1012  */
1013 static boolean_t
1014 ext_check(sadb_ext_t *ext)
1015 {
1016 	int i;
1017 	uint64_t *lp;
1018 	sadb_ident_t *id;
1019 	char *idstr;
1020 
1021 	switch (ext->sadb_ext_type) {
1022 	case SADB_EXT_ADDRESS_SRC:
1023 	case SADB_EXT_ADDRESS_DST:
1024 	case SADB_X_EXT_ADDRESS_INNER_SRC:
1025 	case SADB_X_EXT_ADDRESS_INNER_DST:
1026 		/* Check for at least enough addtl length for a sockaddr. */
1027 		if (ext->sadb_ext_len <= SADB_8TO64(sizeof (sadb_address_t)))
1028 			return (B_FALSE);
1029 		break;
1030 	case SADB_EXT_LIFETIME_HARD:
1031 	case SADB_EXT_LIFETIME_SOFT:
1032 	case SADB_EXT_LIFETIME_CURRENT:
1033 		if (ext->sadb_ext_len != SADB_8TO64(sizeof (sadb_lifetime_t)))
1034 			return (B_FALSE);
1035 		break;
1036 	case SADB_EXT_SPIRANGE:
1037 		/* See if the SPI range is legit. */
1038 		if (htonl(((sadb_spirange_t *)ext)->sadb_spirange_min) >
1039 		    htonl(((sadb_spirange_t *)ext)->sadb_spirange_max))
1040 			return (B_FALSE);
1041 		break;
1042 	case SADB_EXT_KEY_AUTH:
1043 	case SADB_EXT_KEY_ENCRYPT:
1044 		/* Key length check. */
1045 		if (((sadb_key_t *)ext)->sadb_key_bits == 0)
1046 			return (B_FALSE);
1047 		/*
1048 		 * Check to see if the key length (in bits) is less than the
1049 		 * extension length (in 8-bits words).
1050 		 */
1051 		if ((roundup(SADB_1TO8(((sadb_key_t *)ext)->sadb_key_bits), 8) +
1052 		    sizeof (sadb_key_t)) != SADB_64TO8(ext->sadb_ext_len)) {
1053 			ks1dbg((
1054 			    "ext_check:  Key bits/length inconsistent.\n"));
1055 			ks1dbg(("%d bits, len is %d bytes.\n",
1056 			    ((sadb_key_t *)ext)->sadb_key_bits,
1057 			    SADB_64TO8(ext->sadb_ext_len)));
1058 			return (B_FALSE);
1059 		}
1060 
1061 		/* All-zeroes key check. */
1062 		lp = (uint64_t *)(((char *)ext) + sizeof (sadb_key_t));
1063 		for (i = 0;
1064 		    i < (ext->sadb_ext_len - SADB_8TO64(sizeof (sadb_key_t)));
1065 		    i++)
1066 			if (lp[i] != 0)
1067 				break;	/* Out of for loop. */
1068 		/* If finished the loop naturally, it's an all zero key. */
1069 		if (lp[i] == 0)
1070 			return (B_FALSE);
1071 		break;
1072 	case SADB_EXT_IDENTITY_SRC:
1073 	case SADB_EXT_IDENTITY_DST:
1074 		/*
1075 		 * Make sure the strings in these identities are
1076 		 * null-terminated.  RFC 2367 underspecified how to handle
1077 		 * such a case.  I "proactively" null-terminate the string
1078 		 * at the last byte if it's not terminated sooner.
1079 		 */
1080 		id = (sadb_ident_t *)ext;
1081 		i = SADB_64TO8(id->sadb_ident_len);
1082 		i -= sizeof (sadb_ident_t);
1083 		idstr = (char *)(id + 1);
1084 		while (*idstr != '\0' && i > 0) {
1085 			i--;
1086 			idstr++;
1087 		}
1088 		if (i == 0) {
1089 			/*
1090 			 * I.e., if the bozo user didn't NULL-terminate the
1091 			 * string...
1092 			 */
1093 			idstr--;
1094 			*idstr = '\0';
1095 		}
1096 		break;
1097 	}
1098 	return (B_TRUE);	/* For now... */
1099 }
1100 
1101 /* Return values for keysock_get_ext(). */
1102 #define	KGE_OK	0
1103 #define	KGE_DUP	1
1104 #define	KGE_UNK	2
1105 #define	KGE_LEN	3
1106 #define	KGE_CHK	4
1107 
1108 /*
1109  * Parse basic extension headers and return in the passed-in pointer vector.
1110  * Return values include:
1111  *
1112  *	KGE_OK	Everything's nice and parsed out.
1113  *		If there are no extensions, place NULL in extv[0].
1114  *	KGE_DUP	There is a duplicate extension.
1115  *		First instance in appropriate bin.  First duplicate in
1116  *		extv[0].
1117  *	KGE_UNK	Unknown extension type encountered.  extv[0] contains
1118  *		unknown header.
1119  *	KGE_LEN	Extension length error.
1120  *	KGE_CHK	High-level reality check failed on specific extension.
1121  *
1122  * My apologies for some of the pointer arithmetic in here.  I'm thinking
1123  * like an assembly programmer, yet trying to make the compiler happy.
1124  */
1125 static int
1126 keysock_get_ext(sadb_ext_t *extv[], sadb_msg_t *basehdr, uint_t msgsize)
1127 {
1128 	bzero(extv, sizeof (sadb_ext_t *) * (SADB_EXT_MAX + 1));
1129 
1130 	/* Use extv[0] as the "current working pointer". */
1131 
1132 	extv[0] = (sadb_ext_t *)(basehdr + 1);
1133 
1134 	while (extv[0] < (sadb_ext_t *)(((uint8_t *)basehdr) + msgsize)) {
1135 		/* Check for unknown headers. */
1136 		if (extv[0]->sadb_ext_type == 0 ||
1137 		    extv[0]->sadb_ext_type > SADB_EXT_MAX)
1138 			return (KGE_UNK);
1139 
1140 		/*
1141 		 * Check length.  Use uint64_t because extlen is in units
1142 		 * of 64-bit words.  If length goes beyond the msgsize,
1143 		 * return an error.  (Zero length also qualifies here.)
1144 		 */
1145 		if (extv[0]->sadb_ext_len == 0 ||
1146 		    (void *)((uint64_t *)extv[0] + extv[0]->sadb_ext_len) >
1147 		    (void *)((uint8_t *)basehdr + msgsize))
1148 			return (KGE_LEN);
1149 
1150 		/* Check for redundant headers. */
1151 		if (extv[extv[0]->sadb_ext_type] != NULL)
1152 			return (KGE_DUP);
1153 
1154 		/*
1155 		 * Reality check the extension if possible at the keysock
1156 		 * level.
1157 		 */
1158 		if (!ext_check(extv[0]))
1159 			return (KGE_CHK);
1160 
1161 		/* If I make it here, assign the appropriate bin. */
1162 		extv[extv[0]->sadb_ext_type] = extv[0];
1163 
1164 		/* Advance pointer (See above for uint64_t ptr reasoning.) */
1165 		extv[0] = (sadb_ext_t *)
1166 		    ((uint64_t *)extv[0] + extv[0]->sadb_ext_len);
1167 	}
1168 
1169 	/* Everything's cool. */
1170 
1171 	/*
1172 	 * If extv[0] == NULL, then there are no extension headers in this
1173 	 * message.  Ensure that this is the case.
1174 	 */
1175 	if (extv[0] == (sadb_ext_t *)(basehdr + 1))
1176 		extv[0] = NULL;
1177 
1178 	return (KGE_OK);
1179 }
1180 
1181 /*
1182  * qwriter() callback to handle flushes and dumps.  This routine will hold
1183  * the inner perimeter.
1184  */
1185 void
1186 keysock_do_flushdump(queue_t *q, mblk_t *mp)
1187 {
1188 	int i, start, finish;
1189 	mblk_t *mp1 = NULL;
1190 	keysock_t *ks = (keysock_t *)q->q_ptr;
1191 	sadb_ext_t *extv[SADB_EXT_MAX + 1];
1192 	sadb_msg_t *samsg = (sadb_msg_t *)mp->b_rptr;
1193 
1194 	/*
1195 	 * I am guaranteed this will work.  I did the work in keysock_parse()
1196 	 * already.
1197 	 */
1198 	(void) keysock_get_ext(extv, samsg, SADB_64TO8(samsg->sadb_msg_len));
1199 
1200 	/*
1201 	 * I hold the perimeter, therefore I don't need to use atomic ops.
1202 	 */
1203 	if (keysock_flushdump != 0) {
1204 		/* XXX Should I instead use EBUSY? */
1205 		/* XXX Or is there a way to queue these up? */
1206 		keysock_error(ks, mp, ENOMEM, SADB_X_DIAGNOSTIC_NONE);
1207 		return;
1208 	}
1209 
1210 	if (samsg->sadb_msg_satype == SADB_SATYPE_UNSPEC) {
1211 		start = 0;
1212 		finish = KEYSOCK_MAX_CONSUMERS - 1;
1213 	} else {
1214 		start = samsg->sadb_msg_satype;
1215 		finish = samsg->sadb_msg_satype;
1216 	}
1217 
1218 	/*
1219 	 * Fill up keysock_flushdump with the number of outstanding dumps
1220 	 * and/or flushes.
1221 	 */
1222 
1223 	keysock_flushdump_errno = 0;
1224 
1225 	/*
1226 	 * Okay, I hold the perimeter.  Eventually keysock_flushdump will
1227 	 * contain the number of consumers with outstanding flush operations.
1228 	 *
1229 	 * SO, here's the plan:
1230 	 *	* For each relevant consumer (Might be one, might be all)
1231 	 *		* Twiddle on the FLUSHING flag.
1232 	 *		* Pass down the FLUSH/DUMP message.
1233 	 *
1234 	 * When I see upbound FLUSH/DUMP messages, I will decrement the
1235 	 * keysock_flushdump.  When I decrement it to 0, I will pass the
1236 	 * FLUSH/DUMP message back up to the PF_KEY sockets.  Because I will
1237 	 * pass down the right SA type to the consumer (either its own, or
1238 	 * that of UNSPEC), the right one will be reflected from each consumer,
1239 	 * and accordingly back to the socket.
1240 	 */
1241 
1242 	mutex_enter(&keysock_consumers_lock);
1243 	for (i = start; i <= finish; i++) {
1244 		if (keysock_consumers[i] != NULL) {
1245 			mp1 = copymsg(mp);
1246 			if (mp1 == NULL) {
1247 				ks0dbg(("SADB_FLUSH copymsg() failed.\n"));
1248 				/*
1249 				 * Error?  And what about outstanding
1250 				 * flushes?  Oh, yeah, they get sucked up and
1251 				 * the counter is decremented.  Consumers
1252 				 * (see keysock_passdown()) are guaranteed
1253 				 * to deliver back a flush request, even if
1254 				 * it's an error.
1255 				 */
1256 				keysock_error(ks, mp, ENOMEM,
1257 				    SADB_X_DIAGNOSTIC_NONE);
1258 				return;
1259 			}
1260 			/*
1261 			 * Because my entry conditions are met above, the
1262 			 * following assertion should hold true.
1263 			 */
1264 			mutex_enter(&(keysock_consumers[i]->kc_lock));
1265 			ASSERT((keysock_consumers[i]->kc_flags & KC_FLUSHING)
1266 			    == 0);
1267 			keysock_consumers[i]->kc_flags |= KC_FLUSHING;
1268 			mutex_exit(&(keysock_consumers[i]->kc_lock));
1269 			/* Always increment the number of flushes... */
1270 			keysock_flushdump++;
1271 			/* Guaranteed to return a message. */
1272 			keysock_passdown(ks, mp1, i, extv, B_TRUE);
1273 		} else if (start == finish) {
1274 			/*
1275 			 * In case where start == finish, and there's no
1276 			 * consumer, should we force an error?  Yes.
1277 			 */
1278 			mutex_exit(&keysock_consumers_lock);
1279 			keysock_error(ks, mp, EINVAL,
1280 			    SADB_X_DIAGNOSTIC_UNKNOWN_SATYPE);
1281 			return;
1282 		}
1283 	}
1284 	mutex_exit(&keysock_consumers_lock);
1285 
1286 	if (keysock_flushdump == 0) {
1287 		/*
1288 		 * There were no consumers at all for this message.
1289 		 * XXX For now return ESRCH.
1290 		 */
1291 		keysock_error(ks, mp, ESRCH, SADB_X_DIAGNOSTIC_NO_SADBS);
1292 	} else {
1293 		/* Otherwise, free the original message. */
1294 		freemsg(mp);
1295 	}
1296 }
1297 
1298 /*
1299  * Get the right diagnostic for a duplicate.  Should probably use a static
1300  * table lookup.
1301  */
1302 int
1303 keysock_duplicate(int ext_type)
1304 {
1305 	int rc = 0;
1306 
1307 	switch (ext_type) {
1308 	case SADB_EXT_ADDRESS_SRC:
1309 		rc = SADB_X_DIAGNOSTIC_DUPLICATE_SRC;
1310 		break;
1311 	case SADB_EXT_ADDRESS_DST:
1312 		rc = SADB_X_DIAGNOSTIC_DUPLICATE_DST;
1313 		break;
1314 	case SADB_X_EXT_ADDRESS_INNER_SRC:
1315 		rc = SADB_X_DIAGNOSTIC_DUPLICATE_INNER_SRC;
1316 		break;
1317 	case SADB_X_EXT_ADDRESS_INNER_DST:
1318 		rc = SADB_X_DIAGNOSTIC_DUPLICATE_INNER_DST;
1319 		break;
1320 	case SADB_EXT_SA:
1321 		rc = SADB_X_DIAGNOSTIC_DUPLICATE_SA;
1322 		break;
1323 	case SADB_EXT_SPIRANGE:
1324 		rc = SADB_X_DIAGNOSTIC_DUPLICATE_RANGE;
1325 		break;
1326 	case SADB_EXT_KEY_AUTH:
1327 		rc = SADB_X_DIAGNOSTIC_DUPLICATE_AKEY;
1328 		break;
1329 	case SADB_EXT_KEY_ENCRYPT:
1330 		rc = SADB_X_DIAGNOSTIC_DUPLICATE_EKEY;
1331 		break;
1332 	}
1333 	return (rc);
1334 }
1335 
1336 /*
1337  * Get the right diagnostic for a reality check failure.  Should probably use
1338  * a static table lookup.
1339  */
1340 int
1341 keysock_malformed(int ext_type)
1342 {
1343 	int rc = 0;
1344 
1345 	switch (ext_type) {
1346 	case SADB_EXT_ADDRESS_SRC:
1347 		rc = SADB_X_DIAGNOSTIC_MALFORMED_SRC;
1348 		break;
1349 	case SADB_EXT_ADDRESS_DST:
1350 		rc = SADB_X_DIAGNOSTIC_MALFORMED_DST;
1351 		break;
1352 	case SADB_X_EXT_ADDRESS_INNER_SRC:
1353 		rc = SADB_X_DIAGNOSTIC_MALFORMED_INNER_SRC;
1354 		break;
1355 	case SADB_X_EXT_ADDRESS_INNER_DST:
1356 		rc = SADB_X_DIAGNOSTIC_MALFORMED_INNER_DST;
1357 		break;
1358 	case SADB_EXT_SA:
1359 		rc = SADB_X_DIAGNOSTIC_MALFORMED_SA;
1360 		break;
1361 	case SADB_EXT_SPIRANGE:
1362 		rc = SADB_X_DIAGNOSTIC_MALFORMED_RANGE;
1363 		break;
1364 	case SADB_EXT_KEY_AUTH:
1365 		rc = SADB_X_DIAGNOSTIC_MALFORMED_AKEY;
1366 		break;
1367 	case SADB_EXT_KEY_ENCRYPT:
1368 		rc = SADB_X_DIAGNOSTIC_MALFORMED_EKEY;
1369 		break;
1370 	}
1371 	return (rc);
1372 }
1373 
1374 /*
1375  * Keysock massaging of an inverse ACQUIRE.  Consult policy,
1376  * and construct an appropriate response.
1377  */
1378 static void
1379 keysock_inverse_acquire(mblk_t *mp, sadb_msg_t *samsg, sadb_ext_t *extv[],
1380     keysock_t *ks)
1381 {
1382 	mblk_t *reply_mp;
1383 
1384 	/*
1385 	 * Reality check things...
1386 	 */
1387 	if (extv[SADB_EXT_ADDRESS_SRC] == NULL) {
1388 		keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_MISSING_SRC);
1389 		return;
1390 	}
1391 	if (extv[SADB_EXT_ADDRESS_DST] == NULL) {
1392 		keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_MISSING_DST);
1393 		return;
1394 	}
1395 
1396 	if (extv[SADB_X_EXT_ADDRESS_INNER_SRC] != NULL &&
1397 	    extv[SADB_X_EXT_ADDRESS_INNER_DST] == NULL) {
1398 		keysock_error(ks, mp, EINVAL,
1399 		    SADB_X_DIAGNOSTIC_MISSING_INNER_DST);
1400 		return;
1401 	}
1402 
1403 	if (extv[SADB_X_EXT_ADDRESS_INNER_SRC] == NULL &&
1404 	    extv[SADB_X_EXT_ADDRESS_INNER_DST] != NULL) {
1405 		keysock_error(ks, mp, EINVAL,
1406 		    SADB_X_DIAGNOSTIC_MISSING_INNER_SRC);
1407 		return;
1408 	}
1409 
1410 	reply_mp = ipsec_construct_inverse_acquire(samsg, extv);
1411 
1412 	if (reply_mp != NULL) {
1413 		freemsg(mp);
1414 		keysock_passup(reply_mp, (sadb_msg_t *)reply_mp->b_rptr,
1415 		    ks->keysock_serial, NULL, B_FALSE);
1416 	} else {
1417 		keysock_error(ks, mp, samsg->sadb_msg_errno,
1418 		    samsg->sadb_x_msg_diagnostic);
1419 	}
1420 }
1421 
1422 /*
1423  * Spew an extended REGISTER down to the relevant consumers.
1424  */
1425 static void
1426 keysock_extended_register(keysock_t *ks, mblk_t *mp, sadb_ext_t *extv[])
1427 {
1428 	sadb_x_ereg_t *ereg = (sadb_x_ereg_t *)extv[SADB_X_EXT_EREG];
1429 	uint8_t *satypes, *fencepost;
1430 	mblk_t *downmp;
1431 	sadb_ext_t *downextv[SADB_EXT_MAX + 1];
1432 
1433 	if (ks->keysock_registered[0] != 0 || ks->keysock_registered[1] != 0 ||
1434 	    ks->keysock_registered[2] != 0 || ks->keysock_registered[3] != 0) {
1435 		keysock_error(ks, mp, EBUSY, 0);
1436 	}
1437 
1438 	ks->keysock_flags |= KEYSOCK_EXTENDED;
1439 	if (ereg == NULL) {
1440 		keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_SATYPE_NEEDED);
1441 	} else {
1442 		ASSERT(mp->b_rptr + msgdsize(mp) == mp->b_wptr);
1443 		fencepost = (uint8_t *)mp->b_wptr;
1444 		satypes = ereg->sadb_x_ereg_satypes;
1445 		while (*satypes != SADB_SATYPE_UNSPEC && satypes != fencepost) {
1446 			downmp = copymsg(mp);
1447 			if (downmp == NULL) {
1448 				keysock_error(ks, mp, ENOMEM, 0);
1449 				return;
1450 			}
1451 			/*
1452 			 * Since we've made it here, keysock_get_ext will work!
1453 			 */
1454 			(void) keysock_get_ext(downextv,
1455 			    (sadb_msg_t *)downmp->b_rptr, msgdsize(downmp));
1456 			keysock_passdown(ks, downmp, *satypes, downextv,
1457 			    B_FALSE);
1458 			++satypes;
1459 		}
1460 		freemsg(mp);
1461 	}
1462 
1463 	/*
1464 	 * Set global to indicate we prefer an extended ACQUIRE.
1465 	 */
1466 	atomic_add_32(&keysock_num_extended, 1);
1467 }
1468 
1469 /*
1470  * Handle PF_KEY messages.
1471  */
1472 static void
1473 keysock_parse(queue_t *q, mblk_t *mp)
1474 {
1475 	sadb_msg_t *samsg;
1476 	sadb_ext_t *extv[SADB_EXT_MAX + 1];
1477 	keysock_t *ks = (keysock_t *)q->q_ptr;
1478 	uint_t msgsize;
1479 	uint8_t satype;
1480 
1481 	/* Make sure I'm a PF_KEY socket.  (i.e. nothing's below me) */
1482 	ASSERT(WR(q)->q_next == NULL);
1483 
1484 	samsg = (sadb_msg_t *)mp->b_rptr;
1485 	ks2dbg(("Received possible PF_KEY message, type %d.\n",
1486 	    samsg->sadb_msg_type));
1487 
1488 	msgsize = SADB_64TO8(samsg->sadb_msg_len);
1489 
1490 	if (msgdsize(mp) != msgsize) {
1491 		/*
1492 		 * Message len incorrect w.r.t. actual size.  Send an error
1493 		 * (EMSGSIZE).	It may be necessary to massage things a
1494 		 * bit.	 For example, if the sadb_msg_type is hosed,
1495 		 * I need to set it to SADB_RESERVED to get delivery to
1496 		 * do the right thing.	Then again, maybe just letting
1497 		 * the error delivery do the right thing.
1498 		 */
1499 		ks2dbg(("mblk (%lu) and base (%d) message sizes don't jibe.\n",
1500 		    msgdsize(mp), msgsize));
1501 		keysock_error(ks, mp, EMSGSIZE, SADB_X_DIAGNOSTIC_NONE);
1502 		return;
1503 	}
1504 
1505 	if (msgsize > (uint_t)(mp->b_wptr - mp->b_rptr)) {
1506 		/* Get all message into one mblk. */
1507 		if (pullupmsg(mp, -1) == 0) {
1508 			/*
1509 			 * Something screwy happened.
1510 			 */
1511 			ks3dbg(("keysock_parse: pullupmsg() failed.\n"));
1512 			return;
1513 		} else {
1514 			samsg = (sadb_msg_t *)mp->b_rptr;
1515 		}
1516 	}
1517 
1518 	switch (keysock_get_ext(extv, samsg, msgsize)) {
1519 	case KGE_DUP:
1520 		/* Handle duplicate extension. */
1521 		ks1dbg(("Got duplicate extension of type %d.\n",
1522 		    extv[0]->sadb_ext_type));
1523 		keysock_error(ks, mp, EINVAL,
1524 		    keysock_duplicate(extv[0]->sadb_ext_type));
1525 		return;
1526 	case KGE_UNK:
1527 		/* Handle unknown extension. */
1528 		ks1dbg(("Got unknown extension of type %d.\n",
1529 		    extv[0]->sadb_ext_type));
1530 		keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_UNKNOWN_EXT);
1531 		return;
1532 	case KGE_LEN:
1533 		/* Length error. */
1534 		ks1dbg(("Length %d on extension type %d overrun or 0.\n",
1535 		    extv[0]->sadb_ext_len, extv[0]->sadb_ext_type));
1536 		keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_BAD_EXTLEN);
1537 		return;
1538 	case KGE_CHK:
1539 		/* Reality check failed. */
1540 		ks1dbg(("Reality check failed on extension type %d.\n",
1541 		    extv[0]->sadb_ext_type));
1542 		keysock_error(ks, mp, EINVAL,
1543 		    keysock_malformed(extv[0]->sadb_ext_type));
1544 		return;
1545 	default:
1546 		/* Default case is no errors. */
1547 		break;
1548 	}
1549 
1550 	switch (samsg->sadb_msg_type) {
1551 	case SADB_REGISTER:
1552 		/*
1553 		 * There's a semantic weirdness in that a message OTHER than
1554 		 * the return REGISTER message may be passed up if I set the
1555 		 * registered bit BEFORE I pass it down.
1556 		 *
1557 		 * SOOOO, I'll not twiddle any registered bits until I see
1558 		 * the upbound REGISTER (with a serial number in it).
1559 		 */
1560 		if (samsg->sadb_msg_satype == SADB_SATYPE_UNSPEC) {
1561 			/* Handle extended register here. */
1562 			keysock_extended_register(ks, mp, extv);
1563 			return;
1564 		} else if (ks->keysock_flags & KEYSOCK_EXTENDED) {
1565 			keysock_error(ks, mp, EBUSY, 0);
1566 			return;
1567 		}
1568 		/* FALLTHRU */
1569 	case SADB_GETSPI:
1570 	case SADB_ADD:
1571 	case SADB_UPDATE:
1572 	case SADB_DELETE:
1573 	case SADB_GET:
1574 		/*
1575 		 * Pass down to appropriate consumer.
1576 		 */
1577 		if (samsg->sadb_msg_satype != SADB_SATYPE_UNSPEC)
1578 			keysock_passdown(ks, mp, samsg->sadb_msg_satype, extv,
1579 			    B_FALSE);
1580 		else keysock_error(ks, mp, EINVAL,
1581 		    SADB_X_DIAGNOSTIC_SATYPE_NEEDED);
1582 		return;
1583 	case SADB_ACQUIRE:
1584 		/*
1585 		 * If I _receive_ an acquire, this means I should spread it
1586 		 * out to registered sockets.  Unless there's an errno...
1587 		 *
1588 		 * Need ADDRESS, may have ID, SENS, and PROP, unless errno,
1589 		 * in which case there should be NO extensions.
1590 		 *
1591 		 * Return to registered.
1592 		 */
1593 		if (samsg->sadb_msg_errno != 0) {
1594 			satype = samsg->sadb_msg_satype;
1595 			if (satype == SADB_SATYPE_UNSPEC) {
1596 				if (!(ks->keysock_flags & KEYSOCK_EXTENDED)) {
1597 					keysock_error(ks, mp, EINVAL,
1598 					    SADB_X_DIAGNOSTIC_SATYPE_NEEDED);
1599 					return;
1600 				}
1601 				/*
1602 				 * Reassign satype based on the first
1603 				 * flags that KEYSOCK_SETREG says.
1604 				 */
1605 				while (satype <= SADB_SATYPE_MAX) {
1606 					if (KEYSOCK_ISREG(ks, satype))
1607 						break;
1608 					satype++;
1609 				}
1610 				if (satype > SADB_SATYPE_MAX) {
1611 					keysock_error(ks, mp, EBUSY, 0);
1612 					return;
1613 				}
1614 			}
1615 			keysock_passdown(ks, mp, satype, extv, B_FALSE);
1616 		} else {
1617 			if (samsg->sadb_msg_satype == SADB_SATYPE_UNSPEC)
1618 				keysock_error(ks, mp, EINVAL,
1619 				    SADB_X_DIAGNOSTIC_SATYPE_NEEDED);
1620 			else
1621 				keysock_passup(mp, samsg, 0, NULL, B_FALSE);
1622 		}
1623 		return;
1624 	case SADB_EXPIRE:
1625 		/*
1626 		 * If someone sends this in, then send out to all senders.
1627 		 * (Save maybe ESP or AH, I have to be careful here.)
1628 		 *
1629 		 * Need ADDRESS, may have ID and SENS.
1630 		 *
1631 		 * XXX for now this is unsupported.
1632 		 */
1633 		break;
1634 	case SADB_FLUSH:
1635 	case SADB_DUMP:	 /* not used by normal applications */
1636 		/*
1637 		 * Nuke all SAs, or dump out the whole SA table to sender only.
1638 		 *
1639 		 * No extensions at all.  Return to all listeners.
1640 		 *
1641 		 * Question:	Should I hold a lock here to prevent
1642 		 *		additions/deletions while flushing?
1643 		 * Answer:	No.  (See keysock_passdown() for details.)
1644 		 */
1645 		if (extv[0] != NULL) {
1646 			/*
1647 			 * FLUSH or DUMP messages shouldn't have extensions.
1648 			 * Return EINVAL.
1649 			 */
1650 			ks2dbg(("FLUSH message with extension.\n"));
1651 			keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_NO_EXT);
1652 			return;
1653 		}
1654 
1655 		/* Passing down of DUMP/FLUSH messages are special. */
1656 		qwriter(q, mp, keysock_do_flushdump, PERIM_INNER);
1657 		return;
1658 	case SADB_X_PROMISC:
1659 		/*
1660 		 * Promiscuous processing message.
1661 		 */
1662 		if (samsg->sadb_msg_satype == 0)
1663 			ks->keysock_flags &= ~KEYSOCK_PROMISC;
1664 		else
1665 			ks->keysock_flags |= KEYSOCK_PROMISC;
1666 		keysock_passup(mp, samsg, ks->keysock_serial, NULL, B_FALSE);
1667 		return;
1668 	case SADB_X_INVERSE_ACQUIRE:
1669 		keysock_inverse_acquire(mp, samsg, extv, ks);
1670 		return;
1671 	default:
1672 		ks2dbg(("Got unknown message type %d.\n",
1673 		    samsg->sadb_msg_type));
1674 		keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_UNKNOWN_MSG);
1675 		return;
1676 	}
1677 
1678 	/* As a placeholder... */
1679 	ks0dbg(("keysock_parse():  Hit EOPNOTSUPP\n"));
1680 	keysock_error(ks, mp, EOPNOTSUPP, SADB_X_DIAGNOSTIC_NONE);
1681 }
1682 
1683 /*
1684  * wput routing for PF_KEY/keysock/whatever.  Unlike the routing socket,
1685  * I don't convert to ioctl()'s for IP.  I am the end-all driver as far
1686  * as PF_KEY sockets are concerned.  I do some conversion, but not as much
1687  * as IP/rts does.
1688  */
1689 static void
1690 keysock_wput(queue_t *q, mblk_t *mp)
1691 {
1692 	uchar_t *rptr = mp->b_rptr;
1693 	mblk_t *mp1;
1694 
1695 	ks3dbg(("In keysock_wput\n"));
1696 
1697 	if (WR(q)->q_next) {
1698 		keysock_consumer_t *kc = (keysock_consumer_t *)q->q_ptr;
1699 
1700 		/*
1701 		 * We shouldn't get writes on a consumer instance.
1702 		 * But for now, just passthru.
1703 		 */
1704 		ks1dbg(("Huh?  wput for an consumer instance (%d)?\n",
1705 		    kc->kc_sa_type));
1706 		putnext(q, mp);
1707 		return;
1708 	}
1709 
1710 	switch (mp->b_datap->db_type) {
1711 	case M_DATA:
1712 		/*
1713 		 * Silently discard.
1714 		 */
1715 		ks2dbg(("raw M_DATA in keysock.\n"));
1716 		freemsg(mp);
1717 		return;
1718 	case M_PROTO:
1719 	case M_PCPROTO:
1720 		if ((mp->b_wptr - rptr) >= sizeof (struct T_data_req)) {
1721 			if (((union T_primitives *)rptr)->type == T_DATA_REQ) {
1722 				if ((mp1 = mp->b_cont) == NULL) {
1723 					/* No data after T_DATA_REQ. */
1724 					ks2dbg(("No data after DATA_REQ.\n"));
1725 					freemsg(mp);
1726 					return;
1727 				}
1728 				freeb(mp);
1729 				mp = mp1;
1730 				ks2dbg(("T_DATA_REQ\n"));
1731 				break;	/* Out of switch. */
1732 			}
1733 		}
1734 		/* FALLTHRU */
1735 	default:
1736 		ks3dbg(("In default wput case (%d %d).\n",
1737 		    mp->b_datap->db_type, ((union T_primitives *)rptr)->type));
1738 		keysock_wput_other(q, mp);
1739 		return;
1740 	}
1741 
1742 	/* I now have a PF_KEY message in an M_DATA block, pointed to by mp. */
1743 	keysock_parse(q, mp);
1744 }
1745 
1746 /* BELOW THIS LINE ARE ROUTINES INCLUDING AND RELATED TO keysock_rput(). */
1747 
1748 /*
1749  * Called upon receipt of a KEYSOCK_HELLO_ACK to set up the appropriate
1750  * state vectors.
1751  */
1752 static void
1753 keysock_link_consumer(uint8_t satype, keysock_consumer_t *kc)
1754 {
1755 	keysock_t *ks;
1756 
1757 	mutex_enter(&keysock_consumers_lock);
1758 	mutex_enter(&kc->kc_lock);
1759 	if (keysock_consumers[satype] != NULL) {
1760 		ks0dbg((
1761 		    "Hmmmm, someone closed %d before the HELLO_ACK happened.\n",
1762 		    satype));
1763 		/*
1764 		 * Perhaps updating the new below-me consumer with what I have
1765 		 * so far would work too?
1766 		 */
1767 		mutex_exit(&kc->kc_lock);
1768 		mutex_exit(&keysock_consumers_lock);
1769 	} else {
1770 		/* Add new below-me consumer. */
1771 		keysock_consumers[satype] = kc;
1772 
1773 		kc->kc_flags = 0;
1774 		kc->kc_sa_type = satype;
1775 		mutex_exit(&kc->kc_lock);
1776 		mutex_exit(&keysock_consumers_lock);
1777 
1778 		/* Scan the keysock list. */
1779 		mutex_enter(&keysock_list_lock);
1780 		for (ks = keysock_list; ks != NULL; ks = ks->keysock_next) {
1781 			if (KEYSOCK_ISREG(ks, satype)) {
1782 				/*
1783 				 * XXX Perhaps send an SADB_REGISTER down on
1784 				 * the socket's behalf.
1785 				 */
1786 				ks1dbg(("Socket %u registered already for "
1787 				    "new consumer.\n", ks->keysock_serial));
1788 			}
1789 		}
1790 		mutex_exit(&keysock_list_lock);
1791 	}
1792 }
1793 
1794 /*
1795  * Generate a KEYSOCK_OUT_ERR message for my consumer.
1796  */
1797 static void
1798 keysock_out_err(keysock_consumer_t *kc, int ks_errno, mblk_t *mp)
1799 {
1800 	keysock_out_err_t *kse;
1801 	mblk_t *imp;
1802 
1803 	imp = allocb(sizeof (ipsec_info_t), BPRI_HI);
1804 	if (imp == NULL) {
1805 		ks1dbg(("keysock_out_err:  Can't alloc message.\n"));
1806 		return;
1807 	}
1808 
1809 	imp->b_datap->db_type = M_CTL;
1810 	imp->b_wptr += sizeof (ipsec_info_t);
1811 
1812 	kse = (keysock_out_err_t *)imp->b_rptr;
1813 	imp->b_cont = mp;
1814 	kse->ks_err_type = KEYSOCK_OUT_ERR;
1815 	kse->ks_err_len = sizeof (*kse);
1816 	/* Is serial necessary? */
1817 	kse->ks_err_serial = 0;
1818 	kse->ks_err_errno = ks_errno;
1819 
1820 	/*
1821 	 * XXX What else do I need to do here w.r.t. information
1822 	 * to tell the consumer what caused this error?
1823 	 *
1824 	 * I believe the answer is the PF_KEY ACQUIRE (or other) message
1825 	 * attached in mp, which is appended at the end.  I believe the
1826 	 * db_ref won't matter here, because the PF_KEY message is only read
1827 	 * for KEYSOCK_OUT_ERR.
1828 	 */
1829 
1830 	putnext(kc->kc_wq, imp);
1831 }
1832 
1833 /* XXX this is a hack errno. */
1834 #define	EIPSECNOSA 255
1835 
1836 /*
1837  * Route message (pointed by mp, header in samsg) toward appropriate
1838  * sockets.  Assume the message's creator did its job correctly.
1839  *
1840  * This should be a function that is followed by a return in its caller.
1841  * The compiler _should_ be able to use tail-call optimizations to make the
1842  * large ## of parameters not a huge deal.
1843  */
1844 static void
1845 keysock_passup(mblk_t *mp, sadb_msg_t *samsg, minor_t serial,
1846     keysock_consumer_t *kc, boolean_t persistent)
1847 {
1848 	keysock_t *ks;
1849 	uint8_t satype = samsg->sadb_msg_satype;
1850 	boolean_t toall = B_FALSE, allreg = B_FALSE, allereg = B_FALSE,
1851 	    setalg = B_FALSE;
1852 	mblk_t *mp1;
1853 	int err = EIPSECNOSA;
1854 
1855 	/* Convert mp, which is M_DATA, into an M_PROTO of type T_DATA_IND */
1856 	mp1 = allocb(sizeof (struct T_data_req), BPRI_HI);
1857 	if (mp1 == NULL) {
1858 		err = ENOMEM;
1859 		goto error;
1860 	}
1861 	mp1->b_wptr += sizeof (struct T_data_req);
1862 	((struct T_data_ind *)mp1->b_rptr)->PRIM_type = T_DATA_IND;
1863 	((struct T_data_ind *)mp1->b_rptr)->MORE_flag = 0;
1864 	mp1->b_datap->db_type = M_PROTO;
1865 	mp1->b_cont = mp;
1866 	mp = mp1;
1867 
1868 	switch (samsg->sadb_msg_type) {
1869 	case SADB_FLUSH:
1870 	case SADB_GETSPI:
1871 	case SADB_UPDATE:
1872 	case SADB_ADD:
1873 	case SADB_DELETE:
1874 	case SADB_EXPIRE:
1875 		/*
1876 		 * These are most likely replies.  Don't worry about
1877 		 * KEYSOCK_OUT_ERR handling.  Deliver to all sockets.
1878 		 */
1879 		ks3dbg(("Delivering normal message (%d) to all sockets.\n",
1880 		    samsg->sadb_msg_type));
1881 		toall = B_TRUE;
1882 		break;
1883 	case SADB_REGISTER:
1884 		/*
1885 		 * REGISTERs come up for one of three reasons:
1886 		 *
1887 		 *	1.) In response to a normal SADB_REGISTER
1888 		 *		(samsg->sadb_msg_satype != SADB_SATYPE_UNSPEC &&
1889 		 *		    serial != 0)
1890 		 *		Deliver to normal SADB_REGISTERed sockets.
1891 		 *	2.) In response to an extended REGISTER
1892 		 *		(samsg->sadb_msg_satype == SADB_SATYPE_UNSPEC)
1893 		 *		Deliver to extended REGISTERed socket.
1894 		 *	3.) Spontaneous algorithm changes
1895 		 *		(samsg->sadb_msg_satype != SADB_SATYPE_UNSPEC &&
1896 		 *		    serial == 0)
1897 		 *		Deliver to REGISTERed sockets of all sorts.
1898 		 */
1899 		if (kc == NULL) {
1900 			/* Here because of keysock_error() call. */
1901 			ASSERT(samsg->sadb_msg_errno != 0);
1902 			break;	/* Out of switch. */
1903 		}
1904 		ks3dbg(("Delivering REGISTER.\n"));
1905 		if (satype == SADB_SATYPE_UNSPEC) {
1906 			/* REGISTER Reason #2 */
1907 			allereg = B_TRUE;
1908 			/*
1909 			 * Rewhack SA type so PF_KEY socket holder knows what
1910 			 * consumer generated this algorithm list.
1911 			 */
1912 			satype = kc->kc_sa_type;
1913 			samsg->sadb_msg_satype = satype;
1914 			setalg = B_TRUE;
1915 		} else if (serial == 0) {
1916 			/* REGISTER Reason #3 */
1917 			allreg = B_TRUE;
1918 			allereg = B_TRUE;
1919 		} else {
1920 			/* REGISTER Reason #1 */
1921 			allreg = B_TRUE;
1922 			setalg = B_TRUE;
1923 		}
1924 		break;
1925 	case SADB_ACQUIRE:
1926 		/*
1927 		 * ACQUIREs are either extended (sadb_msg_satype == 0) or
1928 		 * regular (sadb_msg_satype != 0).  And we're guaranteed
1929 		 * that serial == 0 for an ACQUIRE.
1930 		 */
1931 		ks3dbg(("Delivering ACQUIRE.\n"));
1932 		allereg = (satype == SADB_SATYPE_UNSPEC);
1933 		allreg = !allereg;
1934 		/*
1935 		 * Corner case - if we send a regular ACQUIRE and there's
1936 		 * extended ones registered, don't send an error down to
1937 		 * consumers if nobody's listening and prematurely destroy
1938 		 * their ACQUIRE record.  This might be too hackish of a
1939 		 * solution.
1940 		 */
1941 		if (allreg && keysock_num_extended > 0)
1942 			err = 0;
1943 		break;
1944 	case SADB_X_PROMISC:
1945 	case SADB_X_INVERSE_ACQUIRE:
1946 	case SADB_DUMP:
1947 	case SADB_GET:
1948 	default:
1949 		/*
1950 		 * Deliver to the sender and promiscuous only.
1951 		 */
1952 		ks3dbg(("Delivering sender/promisc only (%d).\n",
1953 		    samsg->sadb_msg_type));
1954 		break;
1955 	}
1956 
1957 	mutex_enter(&keysock_list_lock);
1958 	for (ks = keysock_list; ks != NULL; ks = ks->keysock_next) {
1959 		/* Delivery loop. */
1960 
1961 		/*
1962 		 * Check special keysock-setting cases (REGISTER replies)
1963 		 * here.
1964 		 */
1965 		if (setalg && serial == ks->keysock_serial) {
1966 			ASSERT(kc != NULL);
1967 			ASSERT(kc->kc_sa_type == satype);
1968 			KEYSOCK_SETREG(ks, satype);
1969 		}
1970 
1971 		/*
1972 		 * NOLOOP takes precedence over PROMISC.  So if you've set
1973 		 * !SO_USELOOPBACK, don't expect to see any data...
1974 		 */
1975 		if (ks->keysock_flags & KEYSOCK_NOLOOP)
1976 			continue;
1977 
1978 		/*
1979 		 * Messages to all, or promiscuous sockets just GET the
1980 		 * message.  Perform rules-type checking iff it's not for all
1981 		 * listeners or the socket is in promiscuous mode.
1982 		 *
1983 		 * NOTE:Because of the (kc != NULL && ISREG()), make sure
1984 		 *	extended ACQUIREs arrive off a consumer that is
1985 		 *	part of the extended REGISTER set of consumers.
1986 		 */
1987 		if (serial != ks->keysock_serial &&
1988 		    !toall &&
1989 		    !(ks->keysock_flags & KEYSOCK_PROMISC) &&
1990 		    !((ks->keysock_flags & KEYSOCK_EXTENDED) ?
1991 			allereg : allreg && kc != NULL &&
1992 			KEYSOCK_ISREG(ks, kc->kc_sa_type)))
1993 			continue;
1994 
1995 		mp1 = dupmsg(mp);
1996 		if (mp1 == NULL) {
1997 			ks2dbg((
1998 			    "keysock_passup():  dupmsg() failed.\n"));
1999 			mp1 = mp;
2000 			mp = NULL;
2001 			err = ENOMEM;
2002 		}
2003 
2004 		/*
2005 		 * At this point, we can deliver or attempt to deliver
2006 		 * this message.  We're free of obligation to report
2007 		 * no listening PF_KEY sockets.  So set err to 0.
2008 		 */
2009 		err = 0;
2010 
2011 		/*
2012 		 * See if we canputnext(), as well as see if the message
2013 		 * needs to be queued if we can't.
2014 		 */
2015 		if (!canputnext(ks->keysock_rq)) {
2016 			if (persistent) {
2017 				if (putq(ks->keysock_rq, mp1) == 0) {
2018 					ks1dbg((
2019 					    "keysock_passup: putq failed.\n"));
2020 				} else {
2021 					continue;
2022 				}
2023 			}
2024 			freemsg(mp1);
2025 			continue;
2026 		}
2027 
2028 		ks3dbg(("Putting to serial %d.\n", ks->keysock_serial));
2029 		/*
2030 		 * Unlike the specific keysock instance case, this
2031 		 * will only hit for listeners, so we will only
2032 		 * putnext() if we can.
2033 		 */
2034 		putnext(ks->keysock_rq, mp1);
2035 		if (mp == NULL)
2036 			break;	/* out of for loop. */
2037 	}
2038 	mutex_exit(&keysock_list_lock);
2039 
2040 error:
2041 	if ((err != 0) && (kc != NULL)) {
2042 		/*
2043 		 * Generate KEYSOCK_OUT_ERR for consumer.
2044 		 * Basically, I send this back if I have not been able to
2045 		 * transmit (for whatever reason)
2046 		 */
2047 		ks1dbg(("keysock_passup():  No registered of type %d.\n",
2048 		    satype));
2049 		if (mp != NULL) {
2050 			if (mp->b_datap->db_type == M_PROTO) {
2051 				mp1 = mp;
2052 				mp = mp->b_cont;
2053 				freeb(mp1);
2054 			}
2055 			/*
2056 			 * Do a copymsg() because people who get
2057 			 * KEYSOCK_OUT_ERR may alter the message contents.
2058 			 */
2059 			mp1 = copymsg(mp);
2060 			if (mp1 == NULL) {
2061 				ks2dbg(("keysock_passup: copymsg() failed.\n"));
2062 				mp1 = mp;
2063 				mp = NULL;
2064 			}
2065 			keysock_out_err(kc, err, mp1);
2066 		}
2067 	}
2068 
2069 	/*
2070 	 * XXX Blank the message somehow.  This is difficult because we don't
2071 	 * know at this point if the message has db_ref > 1, etc.
2072 	 *
2073 	 * Optimally, keysock messages containing actual keying material would
2074 	 * be allocated with esballoc(), with a zeroing free function.
2075 	 */
2076 	if (mp != NULL)
2077 		freemsg(mp);
2078 }
2079 
2080 /*
2081  * Keysock's read service procedure is there only for PF_KEY reply
2082  * messages that really need to reach the top.
2083  */
2084 static void
2085 keysock_rsrv(queue_t *q)
2086 {
2087 	mblk_t *mp;
2088 
2089 	while ((mp = getq(q)) != NULL) {
2090 		if (canputnext(q)) {
2091 			putnext(q, mp);
2092 		} else {
2093 			(void) putbq(q, mp);
2094 			return;
2095 		}
2096 	}
2097 }
2098 
2099 /*
2100  * The read procedure should only be invoked by a keysock consumer, like
2101  * ESP, AH, etc.  I should only see KEYSOCK_OUT and KEYSOCK_HELLO_ACK
2102  * messages on my read queues.
2103  */
2104 static void
2105 keysock_rput(queue_t *q, mblk_t *mp)
2106 {
2107 	keysock_consumer_t *kc = (keysock_consumer_t *)q->q_ptr;
2108 	ipsec_info_t *ii;
2109 	keysock_hello_ack_t *ksa;
2110 	minor_t serial;
2111 	mblk_t *mp1;
2112 	sadb_msg_t *samsg;
2113 
2114 	/* Make sure I'm a consumer instance.  (i.e. something's below me) */
2115 	ASSERT(WR(q)->q_next != NULL);
2116 
2117 	if (mp->b_datap->db_type != M_CTL) {
2118 		/*
2119 		 * Keysock should only see keysock consumer interface
2120 		 * messages (see ipsec_info.h) on its read procedure.
2121 		 * To be robust, however, putnext() up so the STREAM head can
2122 		 * deal with it appropriately.
2123 		 */
2124 		ks1dbg(("Hmmm, a non M_CTL (%d, 0x%x) on keysock_rput.\n",
2125 		    mp->b_datap->db_type, mp->b_datap->db_type));
2126 		putnext(q, mp);
2127 		return;
2128 	}
2129 
2130 	ii = (ipsec_info_t *)mp->b_rptr;
2131 
2132 	switch (ii->ipsec_info_type) {
2133 	case KEYSOCK_OUT:
2134 		/*
2135 		 * A consumer needs to pass a response message or an ACQUIRE
2136 		 * UP.  I assume that the consumer has done the right
2137 		 * thing w.r.t. message creation, etc.
2138 		 */
2139 		serial = ((keysock_out_t *)mp->b_rptr)->ks_out_serial;
2140 		mp1 = mp->b_cont;	/* Get M_DATA portion. */
2141 		freeb(mp);
2142 		samsg = (sadb_msg_t *)mp1->b_rptr;
2143 		if (samsg->sadb_msg_type == SADB_FLUSH ||
2144 		    (samsg->sadb_msg_type == SADB_DUMP &&
2145 			samsg->sadb_msg_len == SADB_8TO64(sizeof (*samsg)))) {
2146 			/*
2147 			 * If I'm an end-of-FLUSH or an end-of-DUMP marker...
2148 			 */
2149 			ASSERT(keysock_flushdump != 0);  /* Am I flushing? */
2150 
2151 			mutex_enter(&kc->kc_lock);
2152 			kc->kc_flags &= ~KC_FLUSHING;
2153 			mutex_exit(&kc->kc_lock);
2154 
2155 			if (samsg->sadb_msg_errno != 0)
2156 				keysock_flushdump_errno = samsg->sadb_msg_errno;
2157 
2158 			/*
2159 			 * Lower the atomic "flushing" count.  If it's
2160 			 * the last one, send up the end-of-{FLUSH,DUMP} to
2161 			 * the appropriate PF_KEY socket.
2162 			 */
2163 			if (atomic_add_32_nv(&keysock_flushdump, -1) != 0) {
2164 				ks1dbg(("One flush/dump message back from %d,"
2165 				    " more to go.\n", samsg->sadb_msg_satype));
2166 				freemsg(mp1);
2167 				return;
2168 			}
2169 
2170 			samsg->sadb_msg_errno =
2171 			    (uint8_t)keysock_flushdump_errno;
2172 			if (samsg->sadb_msg_type == SADB_DUMP) {
2173 				samsg->sadb_msg_seq = 0;
2174 			}
2175 		}
2176 		keysock_passup(mp1, samsg, serial, kc,
2177 		    (samsg->sadb_msg_type == SADB_DUMP));
2178 		return;
2179 	case KEYSOCK_HELLO_ACK:
2180 		/* Aha, now we can link in the consumer! */
2181 		ksa = (keysock_hello_ack_t *)ii;
2182 		keysock_link_consumer(ksa->ks_hello_satype, kc);
2183 		freemsg(mp);
2184 		return;
2185 	default:
2186 		ks1dbg(("Hmmm, an IPsec info I'm not used to, 0x%x\n",
2187 		    ii->ipsec_info_type));
2188 		putnext(q, mp);
2189 	}
2190 }
2191 
2192 /*
2193  * So we can avoid external linking problems....
2194  */
2195 boolean_t
2196 keysock_extended_reg(void)
2197 {
2198 	return (keysock_num_extended != 0);
2199 }
2200 
2201 uint32_t
2202 keysock_next_seq(void)
2203 {
2204 	return (atomic_add_32_nv(&keysock_acquire_seq, -1));
2205 }
2206