xref: /titanic_41/usr/src/uts/common/inet/ip/sadb.c (revision 8b6220d73c6a079b62251e38103a523c41ee541a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/stream.h>
28 #include <sys/stropts.h>
29 #include <sys/strsubr.h>
30 #include <sys/errno.h>
31 #include <sys/ddi.h>
32 #include <sys/debug.h>
33 #include <sys/cmn_err.h>
34 #include <sys/stream.h>
35 #include <sys/strlog.h>
36 #include <sys/kmem.h>
37 #include <sys/sunddi.h>
38 #include <sys/tihdr.h>
39 #include <sys/atomic.h>
40 #include <sys/socket.h>
41 #include <sys/sysmacros.h>
42 #include <sys/crypto/common.h>
43 #include <sys/crypto/api.h>
44 #include <sys/zone.h>
45 #include <netinet/in.h>
46 #include <net/if.h>
47 #include <net/pfkeyv2.h>
48 #include <net/pfpolicy.h>
49 #include <inet/common.h>
50 #include <netinet/ip6.h>
51 #include <inet/ip.h>
52 #include <inet/ip_ire.h>
53 #include <inet/ip6.h>
54 #include <inet/ipsec_info.h>
55 #include <inet/tcp.h>
56 #include <inet/sadb.h>
57 #include <inet/ipsec_impl.h>
58 #include <inet/ipsecah.h>
59 #include <inet/ipsecesp.h>
60 #include <sys/random.h>
61 #include <sys/dlpi.h>
62 #include <sys/strsun.h>
63 #include <sys/strsubr.h>
64 #include <inet/ip_if.h>
65 #include <inet/ipdrop.h>
66 #include <inet/ipclassifier.h>
67 #include <inet/sctp_ip.h>
68 #include <sys/tsol/tnet.h>
69 
70 /*
71  * This source file contains Security Association Database (SADB) common
72  * routines.  They are linked in with the AH module.  Since AH has no chance
73  * of falling under export control, it was safe to link it in there.
74  */
75 
76 static mblk_t *sadb_extended_acquire(ipsec_selector_t *, ipsec_policy_t *,
77     ipsec_action_t *, boolean_t, uint32_t, uint32_t, sadb_sens_t *,
78     netstack_t *);
79 static ipsa_t *sadb_torch_assoc(isaf_t *, ipsa_t *);
80 static void sadb_destroy_acqlist(iacqf_t **, uint_t, boolean_t,
81 			    netstack_t *);
82 static void sadb_destroy(sadb_t *, netstack_t *);
83 static mblk_t *sadb_sa2msg(ipsa_t *, sadb_msg_t *);
84 static ts_label_t *sadb_label_from_sens(sadb_sens_t *, uint64_t *);
85 static sadb_sens_t *sadb_make_sens_ext(ts_label_t *tsl, int *len);
86 
87 static time_t sadb_add_time(time_t, uint64_t);
88 static void lifetime_fuzz(ipsa_t *);
89 static void age_pair_peer_list(templist_t *, sadb_t *, boolean_t);
90 static int get_ipsa_pair(ipsa_query_t *, ipsap_t *, int *);
91 static void init_ipsa_pair(ipsap_t *);
92 static void destroy_ipsa_pair(ipsap_t *);
93 static int update_pairing(ipsap_t *, ipsa_query_t *, keysock_in_t *, int *);
94 static void ipsa_set_replay(ipsa_t *ipsa, uint32_t offset);
95 
96 /*
97  * ipsacq_maxpackets is defined here to make it tunable
98  * from /etc/system.
99  */
100 extern uint64_t ipsacq_maxpackets;
101 
102 #define	SET_EXPIRE(sa, delta, exp) {				\
103 	if (((sa)->ipsa_ ## delta) != 0) {				\
104 		(sa)->ipsa_ ## exp = sadb_add_time((sa)->ipsa_addtime,	\
105 			(sa)->ipsa_ ## delta);				\
106 	}								\
107 }
108 
109 #define	UPDATE_EXPIRE(sa, delta, exp) {					\
110 	if (((sa)->ipsa_ ## delta) != 0) {				\
111 		time_t tmp = sadb_add_time((sa)->ipsa_usetime,		\
112 			(sa)->ipsa_ ## delta);				\
113 		if (((sa)->ipsa_ ## exp) == 0)				\
114 			(sa)->ipsa_ ## exp = tmp;			\
115 		else							\
116 			(sa)->ipsa_ ## exp = 				\
117 			    MIN((sa)->ipsa_ ## exp, tmp); 		\
118 	}								\
119 }
120 
121 
122 /* wrap the macro so we can pass it as a function pointer */
123 void
sadb_sa_refrele(void * target)124 sadb_sa_refrele(void *target)
125 {
126 	IPSA_REFRELE(((ipsa_t *)target));
127 }
128 
129 /*
130  * We presume that sizeof (long) == sizeof (time_t) and that time_t is
131  * a signed type.
132  */
133 #define	TIME_MAX LONG_MAX
134 
135 /*
136  * PF_KEY gives us lifetimes in uint64_t seconds.  We presume that
137  * time_t is defined to be a signed type with the same range as
138  * "long".  On ILP32 systems, we thus run the risk of wrapping around
139  * at end of time, as well as "overwrapping" the clock back around
140  * into a seemingly valid but incorrect future date earlier than the
141  * desired expiration.
142  *
143  * In order to avoid odd behavior (either negative lifetimes or loss
144  * of high order bits) when someone asks for bizarrely long SA
145  * lifetimes, we do a saturating add for expire times.
146  *
147  * We presume that ILP32 systems will be past end of support life when
148  * the 32-bit time_t overflows (a dangerous assumption, mind you..).
149  *
150  * On LP64, 2^64 seconds are about 5.8e11 years, at which point we
151  * will hopefully have figured out clever ways to avoid the use of
152  * fixed-sized integers in computation.
153  */
154 static time_t
sadb_add_time(time_t base,uint64_t delta)155 sadb_add_time(time_t base, uint64_t delta)
156 {
157 	time_t sum;
158 
159 	/*
160 	 * Clip delta to the maximum possible time_t value to
161 	 * prevent "overwrapping" back into a shorter-than-desired
162 	 * future time.
163 	 */
164 	if (delta > TIME_MAX)
165 		delta = TIME_MAX;
166 	/*
167 	 * This sum may still overflow.
168 	 */
169 	sum = base + delta;
170 
171 	/*
172 	 * .. so if the result is less than the base, we overflowed.
173 	 */
174 	if (sum < base)
175 		sum = TIME_MAX;
176 
177 	return (sum);
178 }
179 
180 /*
181  * Callers of this function have already created a working security
182  * association, and have found the appropriate table & hash chain.  All this
183  * function does is check duplicates, and insert the SA.  The caller needs to
184  * hold the hash bucket lock and increment the refcnt before insertion.
185  *
186  * Return 0 if success, EEXIST if collision.
187  */
188 #define	SA_UNIQUE_MATCH(sa1, sa2) \
189 	(((sa1)->ipsa_unique_id & (sa1)->ipsa_unique_mask) == \
190 	((sa2)->ipsa_unique_id & (sa2)->ipsa_unique_mask))
191 
192 int
sadb_insertassoc(ipsa_t * ipsa,isaf_t * bucket)193 sadb_insertassoc(ipsa_t *ipsa, isaf_t *bucket)
194 {
195 	ipsa_t **ptpn = NULL;
196 	ipsa_t *walker;
197 	boolean_t unspecsrc;
198 
199 	ASSERT(MUTEX_HELD(&bucket->isaf_lock));
200 
201 	unspecsrc = IPSA_IS_ADDR_UNSPEC(ipsa->ipsa_srcaddr, ipsa->ipsa_addrfam);
202 
203 	walker = bucket->isaf_ipsa;
204 	ASSERT(walker == NULL || ipsa->ipsa_addrfam == walker->ipsa_addrfam);
205 
206 	/*
207 	 * Find insertion point (pointed to with **ptpn).  Insert at the head
208 	 * of the list unless there's an unspecified source address, then
209 	 * insert it after the last SA with a specified source address.
210 	 *
211 	 * BTW, you'll have to walk the whole chain, matching on {DST, SPI}
212 	 * checking for collisions.
213 	 */
214 
215 	while (walker != NULL) {
216 		if (IPSA_ARE_ADDR_EQUAL(walker->ipsa_dstaddr,
217 		    ipsa->ipsa_dstaddr, ipsa->ipsa_addrfam)) {
218 			if (walker->ipsa_spi == ipsa->ipsa_spi)
219 				return (EEXIST);
220 
221 			mutex_enter(&walker->ipsa_lock);
222 			if (ipsa->ipsa_state == IPSA_STATE_MATURE &&
223 			    (walker->ipsa_flags & IPSA_F_USED) &&
224 			    SA_UNIQUE_MATCH(walker, ipsa)) {
225 				walker->ipsa_flags |= IPSA_F_CINVALID;
226 			}
227 			mutex_exit(&walker->ipsa_lock);
228 		}
229 
230 		if (ptpn == NULL && unspecsrc) {
231 			if (IPSA_IS_ADDR_UNSPEC(walker->ipsa_srcaddr,
232 			    walker->ipsa_addrfam))
233 				ptpn = walker->ipsa_ptpn;
234 			else if (walker->ipsa_next == NULL)
235 				ptpn = &walker->ipsa_next;
236 		}
237 
238 		walker = walker->ipsa_next;
239 	}
240 
241 	if (ptpn == NULL)
242 		ptpn = &bucket->isaf_ipsa;
243 	ipsa->ipsa_next = *ptpn;
244 	ipsa->ipsa_ptpn = ptpn;
245 	if (ipsa->ipsa_next != NULL)
246 		ipsa->ipsa_next->ipsa_ptpn = &ipsa->ipsa_next;
247 	*ptpn = ipsa;
248 	ipsa->ipsa_linklock = &bucket->isaf_lock;
249 
250 	return (0);
251 }
252 #undef SA_UNIQUE_MATCH
253 
254 /*
255  * Free a security association.  Its reference count is 0, which means
256  * I must free it.  The SA must be unlocked and must not be linked into
257  * any fanout list.
258  */
259 static void
sadb_freeassoc(ipsa_t * ipsa)260 sadb_freeassoc(ipsa_t *ipsa)
261 {
262 	ipsec_stack_t	*ipss = ipsa->ipsa_netstack->netstack_ipsec;
263 	mblk_t		*asyncmp, *mp;
264 
265 	ASSERT(ipss != NULL);
266 	ASSERT(MUTEX_NOT_HELD(&ipsa->ipsa_lock));
267 	ASSERT(ipsa->ipsa_refcnt == 0);
268 	ASSERT(ipsa->ipsa_next == NULL);
269 	ASSERT(ipsa->ipsa_ptpn == NULL);
270 
271 
272 	asyncmp = sadb_clear_lpkt(ipsa);
273 	if (asyncmp != NULL) {
274 		mp = ip_recv_attr_free_mblk(asyncmp);
275 		ip_drop_packet(mp, B_TRUE, NULL,
276 		    DROPPER(ipss, ipds_sadb_inlarval_timeout),
277 		    &ipss->ipsec_sadb_dropper);
278 	}
279 	mutex_enter(&ipsa->ipsa_lock);
280 
281 	if (ipsa->ipsa_tsl != NULL) {
282 		label_rele(ipsa->ipsa_tsl);
283 		ipsa->ipsa_tsl = NULL;
284 	}
285 
286 	if (ipsa->ipsa_otsl != NULL) {
287 		label_rele(ipsa->ipsa_otsl);
288 		ipsa->ipsa_otsl = NULL;
289 	}
290 
291 	ipsec_destroy_ctx_tmpl(ipsa, IPSEC_ALG_AUTH);
292 	ipsec_destroy_ctx_tmpl(ipsa, IPSEC_ALG_ENCR);
293 	mutex_exit(&ipsa->ipsa_lock);
294 
295 	/* bzero() these fields for paranoia's sake. */
296 	if (ipsa->ipsa_authkey != NULL) {
297 		bzero(ipsa->ipsa_authkey, ipsa->ipsa_authkeylen);
298 		kmem_free(ipsa->ipsa_authkey, ipsa->ipsa_authkeylen);
299 	}
300 	if (ipsa->ipsa_encrkey != NULL) {
301 		bzero(ipsa->ipsa_encrkey, ipsa->ipsa_encrkeylen);
302 		kmem_free(ipsa->ipsa_encrkey, ipsa->ipsa_encrkeylen);
303 	}
304 	if (ipsa->ipsa_nonce_buf != NULL) {
305 		bzero(ipsa->ipsa_nonce_buf, sizeof (ipsec_nonce_t));
306 		kmem_free(ipsa->ipsa_nonce_buf, sizeof (ipsec_nonce_t));
307 	}
308 	if (ipsa->ipsa_src_cid != NULL) {
309 		IPSID_REFRELE(ipsa->ipsa_src_cid);
310 	}
311 	if (ipsa->ipsa_dst_cid != NULL) {
312 		IPSID_REFRELE(ipsa->ipsa_dst_cid);
313 	}
314 	if (ipsa->ipsa_emech.cm_param != NULL)
315 		kmem_free(ipsa->ipsa_emech.cm_param,
316 		    ipsa->ipsa_emech.cm_param_len);
317 
318 	mutex_destroy(&ipsa->ipsa_lock);
319 	kmem_free(ipsa, sizeof (*ipsa));
320 }
321 
322 /*
323  * Unlink a security association from a hash bucket.  Assume the hash bucket
324  * lock is held, but the association's lock is not.
325  *
326  * Note that we do not bump the bucket's generation number here because
327  * we might not be making a visible change to the set of visible SA's.
328  * All callers MUST bump the bucket's generation number before they unlock
329  * the bucket if they use sadb_unlinkassoc to permanetly remove an SA which
330  * was present in the bucket at the time it was locked.
331  */
332 void
sadb_unlinkassoc(ipsa_t * ipsa)333 sadb_unlinkassoc(ipsa_t *ipsa)
334 {
335 	ASSERT(ipsa->ipsa_linklock != NULL);
336 	ASSERT(MUTEX_HELD(ipsa->ipsa_linklock));
337 
338 	/* These fields are protected by the link lock. */
339 	*(ipsa->ipsa_ptpn) = ipsa->ipsa_next;
340 	if (ipsa->ipsa_next != NULL) {
341 		ipsa->ipsa_next->ipsa_ptpn = ipsa->ipsa_ptpn;
342 		ipsa->ipsa_next = NULL;
343 	}
344 
345 	ipsa->ipsa_ptpn = NULL;
346 
347 	/* This may destroy the SA. */
348 	IPSA_REFRELE(ipsa);
349 }
350 
351 void
sadb_delete_cluster(ipsa_t * assoc)352 sadb_delete_cluster(ipsa_t *assoc)
353 {
354 	uint8_t protocol;
355 
356 	if (cl_inet_deletespi &&
357 	    ((assoc->ipsa_state == IPSA_STATE_LARVAL) ||
358 	    (assoc->ipsa_state == IPSA_STATE_MATURE))) {
359 		protocol = (assoc->ipsa_type == SADB_SATYPE_AH) ?
360 		    IPPROTO_AH : IPPROTO_ESP;
361 		cl_inet_deletespi(assoc->ipsa_netstack->netstack_stackid,
362 		    protocol, assoc->ipsa_spi, NULL);
363 	}
364 }
365 
366 /*
367  * Create a larval security association with the specified SPI.	 All other
368  * fields are zeroed.
369  */
370 static ipsa_t *
sadb_makelarvalassoc(uint32_t spi,uint32_t * src,uint32_t * dst,int addrfam,netstack_t * ns)371 sadb_makelarvalassoc(uint32_t spi, uint32_t *src, uint32_t *dst, int addrfam,
372     netstack_t *ns)
373 {
374 	ipsa_t *newbie;
375 
376 	/*
377 	 * Allocate...
378 	 */
379 
380 	newbie = (ipsa_t *)kmem_zalloc(sizeof (ipsa_t), KM_NOSLEEP);
381 	if (newbie == NULL) {
382 		/* Can't make new larval SA. */
383 		return (NULL);
384 	}
385 
386 	/* Assigned requested SPI, assume caller does SPI allocation magic. */
387 	newbie->ipsa_spi = spi;
388 	newbie->ipsa_netstack = ns;	/* No netstack_hold */
389 
390 	/*
391 	 * Copy addresses...
392 	 */
393 
394 	IPSA_COPY_ADDR(newbie->ipsa_srcaddr, src, addrfam);
395 	IPSA_COPY_ADDR(newbie->ipsa_dstaddr, dst, addrfam);
396 
397 	newbie->ipsa_addrfam = addrfam;
398 
399 	/*
400 	 * Set common initialization values, including refcnt.
401 	 */
402 	mutex_init(&newbie->ipsa_lock, NULL, MUTEX_DEFAULT, NULL);
403 	newbie->ipsa_state = IPSA_STATE_LARVAL;
404 	newbie->ipsa_refcnt = 1;
405 	newbie->ipsa_freefunc = sadb_freeassoc;
406 
407 	/*
408 	 * There aren't a lot of other common initialization values, as
409 	 * they are copied in from the PF_KEY message.
410 	 */
411 
412 	return (newbie);
413 }
414 
415 /*
416  * Call me to initialize a security association fanout.
417  */
418 static int
sadb_init_fanout(isaf_t ** tablep,uint_t size,int kmflag)419 sadb_init_fanout(isaf_t **tablep, uint_t size, int kmflag)
420 {
421 	isaf_t *table;
422 	int i;
423 
424 	table = (isaf_t *)kmem_alloc(size * sizeof (*table), kmflag);
425 	*tablep = table;
426 
427 	if (table == NULL)
428 		return (ENOMEM);
429 
430 	for (i = 0; i < size; i++) {
431 		mutex_init(&(table[i].isaf_lock), NULL, MUTEX_DEFAULT, NULL);
432 		table[i].isaf_ipsa = NULL;
433 		table[i].isaf_gen = 0;
434 	}
435 
436 	return (0);
437 }
438 
439 /*
440  * Call me to initialize an acquire fanout
441  */
442 static int
sadb_init_acfanout(iacqf_t ** tablep,uint_t size,int kmflag)443 sadb_init_acfanout(iacqf_t **tablep, uint_t size, int kmflag)
444 {
445 	iacqf_t *table;
446 	int i;
447 
448 	table = (iacqf_t *)kmem_alloc(size * sizeof (*table), kmflag);
449 	*tablep = table;
450 
451 	if (table == NULL)
452 		return (ENOMEM);
453 
454 	for (i = 0; i < size; i++) {
455 		mutex_init(&(table[i].iacqf_lock), NULL, MUTEX_DEFAULT, NULL);
456 		table[i].iacqf_ipsacq = NULL;
457 	}
458 
459 	return (0);
460 }
461 
462 /*
463  * Attempt to initialize an SADB instance.  On failure, return ENOMEM;
464  * caller must clean up partial allocations.
465  */
466 static int
sadb_init_trial(sadb_t * sp,uint_t size,int kmflag)467 sadb_init_trial(sadb_t *sp, uint_t size, int kmflag)
468 {
469 	ASSERT(sp->sdb_of == NULL);
470 	ASSERT(sp->sdb_if == NULL);
471 	ASSERT(sp->sdb_acq == NULL);
472 
473 	sp->sdb_hashsize = size;
474 	if (sadb_init_fanout(&sp->sdb_of, size, kmflag) != 0)
475 		return (ENOMEM);
476 	if (sadb_init_fanout(&sp->sdb_if, size, kmflag) != 0)
477 		return (ENOMEM);
478 	if (sadb_init_acfanout(&sp->sdb_acq, size, kmflag) != 0)
479 		return (ENOMEM);
480 
481 	return (0);
482 }
483 
484 /*
485  * Call me to initialize an SADB instance; fall back to default size on failure.
486  */
487 static void
sadb_init(const char * name,sadb_t * sp,uint_t size,uint_t ver,netstack_t * ns)488 sadb_init(const char *name, sadb_t *sp, uint_t size, uint_t ver,
489     netstack_t *ns)
490 {
491 	ASSERT(sp->sdb_of == NULL);
492 	ASSERT(sp->sdb_if == NULL);
493 	ASSERT(sp->sdb_acq == NULL);
494 
495 	if (size < IPSEC_DEFAULT_HASH_SIZE)
496 		size = IPSEC_DEFAULT_HASH_SIZE;
497 
498 	if (sadb_init_trial(sp, size, KM_NOSLEEP) != 0) {
499 
500 		cmn_err(CE_WARN,
501 		    "Unable to allocate %u entry IPv%u %s SADB hash table",
502 		    size, ver, name);
503 
504 		sadb_destroy(sp, ns);
505 		size = IPSEC_DEFAULT_HASH_SIZE;
506 		cmn_err(CE_WARN, "Falling back to %d entries", size);
507 		(void) sadb_init_trial(sp, size, KM_SLEEP);
508 	}
509 }
510 
511 
512 /*
513  * Initialize an SADB-pair.
514  */
515 void
sadbp_init(const char * name,sadbp_t * sp,int type,int size,netstack_t * ns)516 sadbp_init(const char *name, sadbp_t *sp, int type, int size, netstack_t *ns)
517 {
518 	sadb_init(name, &sp->s_v4, size, 4, ns);
519 	sadb_init(name, &sp->s_v6, size, 6, ns);
520 
521 	sp->s_satype = type;
522 
523 	ASSERT((type == SADB_SATYPE_AH) || (type == SADB_SATYPE_ESP));
524 	if (type == SADB_SATYPE_AH) {
525 		ipsec_stack_t	*ipss = ns->netstack_ipsec;
526 
527 		ip_drop_register(&ipss->ipsec_sadb_dropper, "IPsec SADB");
528 		sp->s_addflags = AH_ADD_SETTABLE_FLAGS;
529 		sp->s_updateflags = AH_UPDATE_SETTABLE_FLAGS;
530 	} else {
531 		sp->s_addflags = ESP_ADD_SETTABLE_FLAGS;
532 		sp->s_updateflags = ESP_UPDATE_SETTABLE_FLAGS;
533 	}
534 }
535 
536 /*
537  * Deliver a single SADB_DUMP message representing a single SA.  This is
538  * called many times by sadb_dump().
539  *
540  * If the return value of this is ENOBUFS (not the same as ENOMEM), then
541  * the caller should take that as a hint that dupb() on the "original answer"
542  * failed, and that perhaps the caller should try again with a copyb()ed
543  * "original answer".
544  */
545 static int
sadb_dump_deliver(queue_t * pfkey_q,mblk_t * original_answer,ipsa_t * ipsa,sadb_msg_t * samsg)546 sadb_dump_deliver(queue_t *pfkey_q, mblk_t *original_answer, ipsa_t *ipsa,
547     sadb_msg_t *samsg)
548 {
549 	mblk_t *answer;
550 
551 	answer = dupb(original_answer);
552 	if (answer == NULL)
553 		return (ENOBUFS);
554 	answer->b_cont = sadb_sa2msg(ipsa, samsg);
555 	if (answer->b_cont == NULL) {
556 		freeb(answer);
557 		return (ENOMEM);
558 	}
559 
560 	/* Just do a putnext, and let keysock deal with flow control. */
561 	putnext(pfkey_q, answer);
562 	return (0);
563 }
564 
565 /*
566  * Common function to allocate and prepare a keysock_out_t M_CTL message.
567  */
568 mblk_t *
sadb_keysock_out(minor_t serial)569 sadb_keysock_out(minor_t serial)
570 {
571 	mblk_t *mp;
572 	keysock_out_t *kso;
573 
574 	mp = allocb(sizeof (ipsec_info_t), BPRI_HI);
575 	if (mp != NULL) {
576 		mp->b_datap->db_type = M_CTL;
577 		mp->b_wptr += sizeof (ipsec_info_t);
578 		kso = (keysock_out_t *)mp->b_rptr;
579 		kso->ks_out_type = KEYSOCK_OUT;
580 		kso->ks_out_len = sizeof (*kso);
581 		kso->ks_out_serial = serial;
582 	}
583 
584 	return (mp);
585 }
586 
587 /*
588  * Perform an SADB_DUMP, spewing out every SA in an array of SA fanouts
589  * to keysock.
590  */
591 static int
sadb_dump_fanout(queue_t * pfkey_q,mblk_t * mp,minor_t serial,isaf_t * fanout,int num_entries,boolean_t do_peers,time_t active_time)592 sadb_dump_fanout(queue_t *pfkey_q, mblk_t *mp, minor_t serial, isaf_t *fanout,
593     int num_entries, boolean_t do_peers, time_t active_time)
594 {
595 	int i, error = 0;
596 	mblk_t *original_answer;
597 	ipsa_t *walker;
598 	sadb_msg_t *samsg;
599 	time_t	current;
600 
601 	/*
602 	 * For each IPSA hash bucket do:
603 	 *	- Hold the mutex
604 	 *	- Walk each entry, doing an sadb_dump_deliver() on it.
605 	 */
606 	ASSERT(mp->b_cont != NULL);
607 	samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
608 
609 	original_answer = sadb_keysock_out(serial);
610 	if (original_answer == NULL)
611 		return (ENOMEM);
612 
613 	current = gethrestime_sec();
614 	for (i = 0; i < num_entries; i++) {
615 		mutex_enter(&fanout[i].isaf_lock);
616 		for (walker = fanout[i].isaf_ipsa; walker != NULL;
617 		    walker = walker->ipsa_next) {
618 			if (!do_peers && walker->ipsa_haspeer)
619 				continue;
620 			if ((active_time != 0) &&
621 			    ((current - walker->ipsa_lastuse) > active_time))
622 				continue;
623 			error = sadb_dump_deliver(pfkey_q, original_answer,
624 			    walker, samsg);
625 			if (error == ENOBUFS) {
626 				mblk_t *new_original_answer;
627 
628 				/* Ran out of dupb's.  Try a copyb. */
629 				new_original_answer = copyb(original_answer);
630 				if (new_original_answer == NULL) {
631 					error = ENOMEM;
632 				} else {
633 					freeb(original_answer);
634 					original_answer = new_original_answer;
635 					error = sadb_dump_deliver(pfkey_q,
636 					    original_answer, walker, samsg);
637 				}
638 			}
639 			if (error != 0)
640 				break;	/* out of for loop. */
641 		}
642 		mutex_exit(&fanout[i].isaf_lock);
643 		if (error != 0)
644 			break;	/* out of for loop. */
645 	}
646 
647 	freeb(original_answer);
648 	return (error);
649 }
650 
651 /*
652  * Dump an entire SADB; outbound first, then inbound.
653  */
654 
655 int
sadb_dump(queue_t * pfkey_q,mblk_t * mp,keysock_in_t * ksi,sadb_t * sp)656 sadb_dump(queue_t *pfkey_q, mblk_t *mp, keysock_in_t *ksi, sadb_t *sp)
657 {
658 	int error;
659 	time_t	active_time = 0;
660 	sadb_x_edump_t	*edump =
661 	    (sadb_x_edump_t *)ksi->ks_in_extv[SADB_X_EXT_EDUMP];
662 
663 	if (edump != NULL) {
664 		active_time = edump->sadb_x_edump_timeout;
665 	}
666 
667 	/* Dump outbound */
668 	error = sadb_dump_fanout(pfkey_q, mp, ksi->ks_in_serial, sp->sdb_of,
669 	    sp->sdb_hashsize, B_TRUE, active_time);
670 	if (error)
671 		return (error);
672 
673 	/* Dump inbound */
674 	return sadb_dump_fanout(pfkey_q, mp, ksi->ks_in_serial, sp->sdb_if,
675 	    sp->sdb_hashsize, B_FALSE, active_time);
676 }
677 
678 /*
679  * Generic sadb table walker.
680  *
681  * Call "walkfn" for each SA in each bucket in "table"; pass the
682  * bucket, the entry and "cookie" to the callback function.
683  * Take care to ensure that walkfn can delete the SA without screwing
684  * up our traverse.
685  *
686  * The bucket is locked for the duration of the callback, both so that the
687  * callback can just call sadb_unlinkassoc() when it wants to delete something,
688  * and so that no new entries are added while we're walking the list.
689  */
690 static void
sadb_walker(isaf_t * table,uint_t numentries,void (* walkfn)(isaf_t * head,ipsa_t * entry,void * cookie),void * cookie)691 sadb_walker(isaf_t *table, uint_t numentries,
692     void (*walkfn)(isaf_t *head, ipsa_t *entry, void *cookie),
693     void *cookie)
694 {
695 	int i;
696 	for (i = 0; i < numentries; i++) {
697 		ipsa_t *entry, *next;
698 
699 		mutex_enter(&table[i].isaf_lock);
700 
701 		for (entry = table[i].isaf_ipsa; entry != NULL;
702 		    entry = next) {
703 			next = entry->ipsa_next;
704 			(*walkfn)(&table[i], entry, cookie);
705 		}
706 		mutex_exit(&table[i].isaf_lock);
707 	}
708 }
709 
710 /*
711  * Call me to free up a security association fanout.  Use the forever
712  * variable to indicate freeing up the SAs (forever == B_FALSE, e.g.
713  * an SADB_FLUSH message), or destroying everything (forever == B_TRUE,
714  * when a module is unloaded).
715  */
716 static void
sadb_destroyer(isaf_t ** tablep,uint_t numentries,boolean_t forever,boolean_t inbound)717 sadb_destroyer(isaf_t **tablep, uint_t numentries, boolean_t forever,
718     boolean_t inbound)
719 {
720 	int i;
721 	isaf_t *table = *tablep;
722 	uint8_t protocol;
723 	ipsa_t *sa;
724 	netstackid_t sid;
725 
726 	if (table == NULL)
727 		return;
728 
729 	for (i = 0; i < numentries; i++) {
730 		mutex_enter(&table[i].isaf_lock);
731 		while ((sa = table[i].isaf_ipsa) != NULL) {
732 			if (inbound && cl_inet_deletespi &&
733 			    (sa->ipsa_state != IPSA_STATE_ACTIVE_ELSEWHERE) &&
734 			    (sa->ipsa_state != IPSA_STATE_IDLE)) {
735 				protocol = (sa->ipsa_type == SADB_SATYPE_AH) ?
736 				    IPPROTO_AH : IPPROTO_ESP;
737 				sid = sa->ipsa_netstack->netstack_stackid;
738 				cl_inet_deletespi(sid, protocol, sa->ipsa_spi,
739 				    NULL);
740 			}
741 			sadb_unlinkassoc(sa);
742 		}
743 		table[i].isaf_gen++;
744 		mutex_exit(&table[i].isaf_lock);
745 		if (forever)
746 			mutex_destroy(&(table[i].isaf_lock));
747 	}
748 
749 	if (forever) {
750 		*tablep = NULL;
751 		kmem_free(table, numentries * sizeof (*table));
752 	}
753 }
754 
755 /*
756  * Entry points to sadb_destroyer().
757  */
758 static void
sadb_flush(sadb_t * sp,netstack_t * ns)759 sadb_flush(sadb_t *sp, netstack_t *ns)
760 {
761 	/*
762 	 * Flush out each bucket, one at a time.  Were it not for keysock's
763 	 * enforcement, there would be a subtlety where I could add on the
764 	 * heels of a flush.  With keysock's enforcement, however, this
765 	 * makes ESP's job easy.
766 	 */
767 	sadb_destroyer(&sp->sdb_of, sp->sdb_hashsize, B_FALSE, B_FALSE);
768 	sadb_destroyer(&sp->sdb_if, sp->sdb_hashsize, B_FALSE, B_TRUE);
769 
770 	/* For each acquire, destroy it; leave the bucket mutex alone. */
771 	sadb_destroy_acqlist(&sp->sdb_acq, sp->sdb_hashsize, B_FALSE, ns);
772 }
773 
774 static void
sadb_destroy(sadb_t * sp,netstack_t * ns)775 sadb_destroy(sadb_t *sp, netstack_t *ns)
776 {
777 	sadb_destroyer(&sp->sdb_of, sp->sdb_hashsize, B_TRUE, B_FALSE);
778 	sadb_destroyer(&sp->sdb_if, sp->sdb_hashsize, B_TRUE, B_TRUE);
779 
780 	/* For each acquire, destroy it, including the bucket mutex. */
781 	sadb_destroy_acqlist(&sp->sdb_acq, sp->sdb_hashsize, B_TRUE, ns);
782 
783 	ASSERT(sp->sdb_of == NULL);
784 	ASSERT(sp->sdb_if == NULL);
785 	ASSERT(sp->sdb_acq == NULL);
786 }
787 
788 void
sadbp_flush(sadbp_t * spp,netstack_t * ns)789 sadbp_flush(sadbp_t *spp, netstack_t *ns)
790 {
791 	sadb_flush(&spp->s_v4, ns);
792 	sadb_flush(&spp->s_v6, ns);
793 }
794 
795 void
sadbp_destroy(sadbp_t * spp,netstack_t * ns)796 sadbp_destroy(sadbp_t *spp, netstack_t *ns)
797 {
798 	sadb_destroy(&spp->s_v4, ns);
799 	sadb_destroy(&spp->s_v6, ns);
800 
801 	if (spp->s_satype == SADB_SATYPE_AH) {
802 		ipsec_stack_t	*ipss = ns->netstack_ipsec;
803 
804 		ip_drop_unregister(&ipss->ipsec_sadb_dropper);
805 	}
806 }
807 
808 
809 /*
810  * Check hard vs. soft lifetimes.  If there's a reality mismatch (e.g.
811  * soft lifetimes > hard lifetimes) return an appropriate diagnostic for
812  * EINVAL.
813  */
814 int
sadb_hardsoftchk(sadb_lifetime_t * hard,sadb_lifetime_t * soft,sadb_lifetime_t * idle)815 sadb_hardsoftchk(sadb_lifetime_t *hard, sadb_lifetime_t *soft,
816     sadb_lifetime_t *idle)
817 {
818 	if (hard == NULL || soft == NULL)
819 		return (0);
820 
821 	if (hard->sadb_lifetime_allocations != 0 &&
822 	    soft->sadb_lifetime_allocations != 0 &&
823 	    hard->sadb_lifetime_allocations < soft->sadb_lifetime_allocations)
824 		return (SADB_X_DIAGNOSTIC_ALLOC_HSERR);
825 
826 	if (hard->sadb_lifetime_bytes != 0 &&
827 	    soft->sadb_lifetime_bytes != 0 &&
828 	    hard->sadb_lifetime_bytes < soft->sadb_lifetime_bytes)
829 		return (SADB_X_DIAGNOSTIC_BYTES_HSERR);
830 
831 	if (hard->sadb_lifetime_addtime != 0 &&
832 	    soft->sadb_lifetime_addtime != 0 &&
833 	    hard->sadb_lifetime_addtime < soft->sadb_lifetime_addtime)
834 		return (SADB_X_DIAGNOSTIC_ADDTIME_HSERR);
835 
836 	if (hard->sadb_lifetime_usetime != 0 &&
837 	    soft->sadb_lifetime_usetime != 0 &&
838 	    hard->sadb_lifetime_usetime < soft->sadb_lifetime_usetime)
839 		return (SADB_X_DIAGNOSTIC_USETIME_HSERR);
840 
841 	if (idle != NULL) {
842 		if (hard->sadb_lifetime_addtime != 0 &&
843 		    idle->sadb_lifetime_addtime != 0 &&
844 		    hard->sadb_lifetime_addtime < idle->sadb_lifetime_addtime)
845 			return (SADB_X_DIAGNOSTIC_ADDTIME_HSERR);
846 
847 		if (soft->sadb_lifetime_addtime != 0 &&
848 		    idle->sadb_lifetime_addtime != 0 &&
849 		    soft->sadb_lifetime_addtime < idle->sadb_lifetime_addtime)
850 			return (SADB_X_DIAGNOSTIC_ADDTIME_HSERR);
851 
852 		if (hard->sadb_lifetime_usetime != 0 &&
853 		    idle->sadb_lifetime_usetime != 0 &&
854 		    hard->sadb_lifetime_usetime < idle->sadb_lifetime_usetime)
855 			return (SADB_X_DIAGNOSTIC_USETIME_HSERR);
856 
857 		if (soft->sadb_lifetime_usetime != 0 &&
858 		    idle->sadb_lifetime_usetime != 0 &&
859 		    soft->sadb_lifetime_usetime < idle->sadb_lifetime_usetime)
860 			return (SADB_X_DIAGNOSTIC_USETIME_HSERR);
861 	}
862 
863 	return (0);
864 }
865 
866 /*
867  * Sanity check sensitivity labels.
868  *
869  * For now, just reject labels on unlabeled systems.
870  */
871 int
sadb_labelchk(keysock_in_t * ksi)872 sadb_labelchk(keysock_in_t *ksi)
873 {
874 	if (!is_system_labeled()) {
875 		if (ksi->ks_in_extv[SADB_EXT_SENSITIVITY] != NULL)
876 			return (SADB_X_DIAGNOSTIC_BAD_LABEL);
877 
878 		if (ksi->ks_in_extv[SADB_X_EXT_OUTER_SENS] != NULL)
879 			return (SADB_X_DIAGNOSTIC_BAD_LABEL);
880 	}
881 
882 	return (0);
883 }
884 
885 /*
886  * Clone a security association for the purposes of inserting a single SA
887  * into inbound and outbound tables respectively. This function should only
888  * be called from sadb_common_add().
889  */
890 static ipsa_t *
sadb_cloneassoc(ipsa_t * ipsa)891 sadb_cloneassoc(ipsa_t *ipsa)
892 {
893 	ipsa_t *newbie;
894 	boolean_t error = B_FALSE;
895 
896 	ASSERT(MUTEX_NOT_HELD(&(ipsa->ipsa_lock)));
897 
898 	newbie = kmem_alloc(sizeof (ipsa_t), KM_NOSLEEP);
899 	if (newbie == NULL)
900 		return (NULL);
901 
902 	/* Copy over what we can. */
903 	*newbie = *ipsa;
904 
905 	/* bzero and initialize locks, in case *_init() allocates... */
906 	mutex_init(&newbie->ipsa_lock, NULL, MUTEX_DEFAULT, NULL);
907 
908 	if (newbie->ipsa_tsl != NULL)
909 		label_hold(newbie->ipsa_tsl);
910 
911 	if (newbie->ipsa_otsl != NULL)
912 		label_hold(newbie->ipsa_otsl);
913 
914 	/*
915 	 * While somewhat dain-bramaged, the most graceful way to
916 	 * recover from errors is to keep plowing through the
917 	 * allocations, and getting what I can.  It's easier to call
918 	 * sadb_freeassoc() on the stillborn clone when all the
919 	 * pointers aren't pointing to the parent's data.
920 	 */
921 
922 	if (ipsa->ipsa_authkey != NULL) {
923 		newbie->ipsa_authkey = kmem_alloc(newbie->ipsa_authkeylen,
924 		    KM_NOSLEEP);
925 		if (newbie->ipsa_authkey == NULL) {
926 			error = B_TRUE;
927 		} else {
928 			bcopy(ipsa->ipsa_authkey, newbie->ipsa_authkey,
929 			    newbie->ipsa_authkeylen);
930 
931 			newbie->ipsa_kcfauthkey.ck_data =
932 			    newbie->ipsa_authkey;
933 		}
934 
935 		if (newbie->ipsa_amech.cm_param != NULL) {
936 			newbie->ipsa_amech.cm_param =
937 			    (char *)&newbie->ipsa_mac_len;
938 		}
939 	}
940 
941 	if (ipsa->ipsa_encrkey != NULL) {
942 		newbie->ipsa_encrkey = kmem_alloc(newbie->ipsa_encrkeylen,
943 		    KM_NOSLEEP);
944 		if (newbie->ipsa_encrkey == NULL) {
945 			error = B_TRUE;
946 		} else {
947 			bcopy(ipsa->ipsa_encrkey, newbie->ipsa_encrkey,
948 			    newbie->ipsa_encrkeylen);
949 
950 			newbie->ipsa_kcfencrkey.ck_data =
951 			    newbie->ipsa_encrkey;
952 		}
953 	}
954 
955 	newbie->ipsa_authtmpl = NULL;
956 	newbie->ipsa_encrtmpl = NULL;
957 	newbie->ipsa_haspeer = B_TRUE;
958 
959 	if (ipsa->ipsa_src_cid != NULL) {
960 		newbie->ipsa_src_cid = ipsa->ipsa_src_cid;
961 		IPSID_REFHOLD(ipsa->ipsa_src_cid);
962 	}
963 
964 	if (ipsa->ipsa_dst_cid != NULL) {
965 		newbie->ipsa_dst_cid = ipsa->ipsa_dst_cid;
966 		IPSID_REFHOLD(ipsa->ipsa_dst_cid);
967 	}
968 
969 	if (error) {
970 		sadb_freeassoc(newbie);
971 		return (NULL);
972 	}
973 
974 	return (newbie);
975 }
976 
977 /*
978  * Initialize a SADB address extension at the address specified by addrext.
979  * Return a pointer to the end of the new address extension.
980  */
981 static uint8_t *
sadb_make_addr_ext(uint8_t * start,uint8_t * end,uint16_t exttype,sa_family_t af,uint32_t * addr,uint16_t port,uint8_t proto,int prefix)982 sadb_make_addr_ext(uint8_t *start, uint8_t *end, uint16_t exttype,
983     sa_family_t af, uint32_t *addr, uint16_t port, uint8_t proto, int prefix)
984 {
985 	struct sockaddr_in *sin;
986 	struct sockaddr_in6 *sin6;
987 	uint8_t *cur = start;
988 	int addrext_len;
989 	int sin_len;
990 	sadb_address_t *addrext	= (sadb_address_t *)cur;
991 
992 	if (cur == NULL)
993 		return (NULL);
994 
995 	cur += sizeof (*addrext);
996 	if (cur > end)
997 		return (NULL);
998 
999 	addrext->sadb_address_proto = proto;
1000 	addrext->sadb_address_prefixlen = prefix;
1001 	addrext->sadb_address_reserved = 0;
1002 	addrext->sadb_address_exttype = exttype;
1003 
1004 	switch (af) {
1005 	case AF_INET:
1006 		sin = (struct sockaddr_in *)cur;
1007 		sin_len = sizeof (*sin);
1008 		cur += sin_len;
1009 		if (cur > end)
1010 			return (NULL);
1011 
1012 		sin->sin_family = af;
1013 		bzero(sin->sin_zero, sizeof (sin->sin_zero));
1014 		sin->sin_port = port;
1015 		IPSA_COPY_ADDR(&sin->sin_addr, addr, af);
1016 		break;
1017 	case AF_INET6:
1018 		sin6 = (struct sockaddr_in6 *)cur;
1019 		sin_len = sizeof (*sin6);
1020 		cur += sin_len;
1021 		if (cur > end)
1022 			return (NULL);
1023 
1024 		bzero(sin6, sizeof (*sin6));
1025 		sin6->sin6_family = af;
1026 		sin6->sin6_port = port;
1027 		IPSA_COPY_ADDR(&sin6->sin6_addr, addr, af);
1028 		break;
1029 	}
1030 
1031 	addrext_len = roundup(cur - start, sizeof (uint64_t));
1032 	addrext->sadb_address_len = SADB_8TO64(addrext_len);
1033 
1034 	cur = start + addrext_len;
1035 	if (cur > end)
1036 		cur = NULL;
1037 
1038 	return (cur);
1039 }
1040 
1041 /*
1042  * Construct a key management cookie extension.
1043  */
1044 
1045 static uint8_t *
sadb_make_kmc_ext(uint8_t * cur,uint8_t * end,uint32_t kmp,uint32_t kmc)1046 sadb_make_kmc_ext(uint8_t *cur, uint8_t *end, uint32_t kmp, uint32_t kmc)
1047 {
1048 	sadb_x_kmc_t *kmcext = (sadb_x_kmc_t *)cur;
1049 
1050 	if (cur == NULL)
1051 		return (NULL);
1052 
1053 	cur += sizeof (*kmcext);
1054 
1055 	if (cur > end)
1056 		return (NULL);
1057 
1058 	kmcext->sadb_x_kmc_len = SADB_8TO64(sizeof (*kmcext));
1059 	kmcext->sadb_x_kmc_exttype = SADB_X_EXT_KM_COOKIE;
1060 	kmcext->sadb_x_kmc_proto = kmp;
1061 	kmcext->sadb_x_kmc_cookie = kmc;
1062 	kmcext->sadb_x_kmc_reserved = 0;
1063 
1064 	return (cur);
1065 }
1066 
1067 /*
1068  * Given an original message header with sufficient space following it, and an
1069  * SA, construct a full PF_KEY message with all of the relevant extensions.
1070  * This is mostly used for SADB_GET, and SADB_DUMP.
1071  */
1072 static mblk_t *
sadb_sa2msg(ipsa_t * ipsa,sadb_msg_t * samsg)1073 sadb_sa2msg(ipsa_t *ipsa, sadb_msg_t *samsg)
1074 {
1075 	int alloclen, addrsize, paddrsize, authsize, encrsize;
1076 	int srcidsize, dstidsize, senslen, osenslen;
1077 	sa_family_t fam, pfam;	/* Address family for SADB_EXT_ADDRESS */
1078 				/* src/dst and proxy sockaddrs. */
1079 	/*
1080 	 * The following are pointers into the PF_KEY message this PF_KEY
1081 	 * message creates.
1082 	 */
1083 	sadb_msg_t *newsamsg;
1084 	sadb_sa_t *assoc;
1085 	sadb_lifetime_t *lt;
1086 	sadb_key_t *key;
1087 	sadb_ident_t *ident;
1088 	sadb_sens_t *sens;
1089 	sadb_ext_t *walker;	/* For when we need a generic ext. pointer. */
1090 	sadb_x_replay_ctr_t *repl_ctr;
1091 	sadb_x_pair_t *pair_ext;
1092 
1093 	mblk_t *mp;
1094 	uint8_t *cur, *end;
1095 	/* These indicate the presence of the above extension fields. */
1096 	boolean_t soft = B_FALSE, hard = B_FALSE;
1097 	boolean_t isrc = B_FALSE, idst = B_FALSE;
1098 	boolean_t auth = B_FALSE, encr = B_FALSE;
1099 	boolean_t sensinteg = B_FALSE, osensinteg = B_FALSE;
1100 	boolean_t srcid = B_FALSE, dstid = B_FALSE;
1101 	boolean_t idle;
1102 	boolean_t paired;
1103 	uint32_t otherspi;
1104 
1105 	/* First off, figure out the allocation length for this message. */
1106 	/*
1107 	 * Constant stuff.  This includes base, SA, address (src, dst),
1108 	 * and lifetime (current).
1109 	 */
1110 	alloclen = sizeof (sadb_msg_t) + sizeof (sadb_sa_t) +
1111 	    sizeof (sadb_lifetime_t);
1112 
1113 	fam = ipsa->ipsa_addrfam;
1114 	switch (fam) {
1115 	case AF_INET:
1116 		addrsize = roundup(sizeof (struct sockaddr_in) +
1117 		    sizeof (sadb_address_t), sizeof (uint64_t));
1118 		break;
1119 	case AF_INET6:
1120 		addrsize = roundup(sizeof (struct sockaddr_in6) +
1121 		    sizeof (sadb_address_t), sizeof (uint64_t));
1122 		break;
1123 	default:
1124 		return (NULL);
1125 	}
1126 	/*
1127 	 * Allocate TWO address extensions, for source and destination.
1128 	 * (Thus, the * 2.)
1129 	 */
1130 	alloclen += addrsize * 2;
1131 	if (ipsa->ipsa_flags & IPSA_F_NATT_REM)
1132 		alloclen += addrsize;
1133 	if (ipsa->ipsa_flags & IPSA_F_NATT_LOC)
1134 		alloclen += addrsize;
1135 
1136 	if (ipsa->ipsa_flags & IPSA_F_PAIRED) {
1137 		paired = B_TRUE;
1138 		alloclen += sizeof (sadb_x_pair_t);
1139 		otherspi = ipsa->ipsa_otherspi;
1140 	} else {
1141 		paired = B_FALSE;
1142 	}
1143 
1144 	/* How 'bout other lifetimes? */
1145 	if (ipsa->ipsa_softaddlt != 0 || ipsa->ipsa_softuselt != 0 ||
1146 	    ipsa->ipsa_softbyteslt != 0 || ipsa->ipsa_softalloc != 0) {
1147 		alloclen += sizeof (sadb_lifetime_t);
1148 		soft = B_TRUE;
1149 	}
1150 
1151 	if (ipsa->ipsa_hardaddlt != 0 || ipsa->ipsa_harduselt != 0 ||
1152 	    ipsa->ipsa_hardbyteslt != 0 || ipsa->ipsa_hardalloc != 0) {
1153 		alloclen += sizeof (sadb_lifetime_t);
1154 		hard = B_TRUE;
1155 	}
1156 
1157 	if (ipsa->ipsa_idleaddlt != 0 || ipsa->ipsa_idleuselt != 0) {
1158 		alloclen += sizeof (sadb_lifetime_t);
1159 		idle = B_TRUE;
1160 	} else {
1161 		idle = B_FALSE;
1162 	}
1163 
1164 	/* Inner addresses. */
1165 	if (ipsa->ipsa_innerfam != 0) {
1166 		pfam = ipsa->ipsa_innerfam;
1167 		switch (pfam) {
1168 		case AF_INET6:
1169 			paddrsize = roundup(sizeof (struct sockaddr_in6) +
1170 			    sizeof (sadb_address_t), sizeof (uint64_t));
1171 			break;
1172 		case AF_INET:
1173 			paddrsize = roundup(sizeof (struct sockaddr_in) +
1174 			    sizeof (sadb_address_t), sizeof (uint64_t));
1175 			break;
1176 		default:
1177 			cmn_err(CE_PANIC,
1178 			    "IPsec SADB: Proxy length failure.\n");
1179 			break;
1180 		}
1181 		isrc = B_TRUE;
1182 		idst = B_TRUE;
1183 		alloclen += 2 * paddrsize;
1184 	}
1185 
1186 	/* For the following fields, assume that length != 0 ==> stuff */
1187 	if (ipsa->ipsa_authkeylen != 0) {
1188 		authsize = roundup(sizeof (sadb_key_t) + ipsa->ipsa_authkeylen,
1189 		    sizeof (uint64_t));
1190 		alloclen += authsize;
1191 		auth = B_TRUE;
1192 	}
1193 
1194 	if (ipsa->ipsa_encrkeylen != 0) {
1195 		encrsize = roundup(sizeof (sadb_key_t) + ipsa->ipsa_encrkeylen +
1196 		    ipsa->ipsa_nonce_len, sizeof (uint64_t));
1197 		alloclen += encrsize;
1198 		encr = B_TRUE;
1199 	} else {
1200 		encr = B_FALSE;
1201 	}
1202 
1203 	if (ipsa->ipsa_tsl != NULL) {
1204 		senslen = sadb_sens_len_from_label(ipsa->ipsa_tsl);
1205 		alloclen += senslen;
1206 		sensinteg = B_TRUE;
1207 	}
1208 
1209 	if (ipsa->ipsa_otsl != NULL) {
1210 		osenslen = sadb_sens_len_from_label(ipsa->ipsa_otsl);
1211 		alloclen += osenslen;
1212 		osensinteg = B_TRUE;
1213 	}
1214 
1215 	/*
1216 	 * Must use strlen() here for lengths.	Identities use NULL
1217 	 * pointers to indicate their nonexistence.
1218 	 */
1219 	if (ipsa->ipsa_src_cid != NULL) {
1220 		srcidsize = roundup(sizeof (sadb_ident_t) +
1221 		    strlen(ipsa->ipsa_src_cid->ipsid_cid) + 1,
1222 		    sizeof (uint64_t));
1223 		alloclen += srcidsize;
1224 		srcid = B_TRUE;
1225 	}
1226 
1227 	if (ipsa->ipsa_dst_cid != NULL) {
1228 		dstidsize = roundup(sizeof (sadb_ident_t) +
1229 		    strlen(ipsa->ipsa_dst_cid->ipsid_cid) + 1,
1230 		    sizeof (uint64_t));
1231 		alloclen += dstidsize;
1232 		dstid = B_TRUE;
1233 	}
1234 
1235 	if ((ipsa->ipsa_kmp != 0) || (ipsa->ipsa_kmc != 0))
1236 		alloclen += sizeof (sadb_x_kmc_t);
1237 
1238 	if (ipsa->ipsa_replay != 0) {
1239 		alloclen += sizeof (sadb_x_replay_ctr_t);
1240 	}
1241 
1242 	/* Make sure the allocation length is a multiple of 8 bytes. */
1243 	ASSERT((alloclen & 0x7) == 0);
1244 
1245 	/* XXX Possibly make it esballoc, with a bzero-ing free_ftn. */
1246 	mp = allocb(alloclen, BPRI_HI);
1247 	if (mp == NULL)
1248 		return (NULL);
1249 	bzero(mp->b_rptr, alloclen);
1250 
1251 	mp->b_wptr += alloclen;
1252 	end = mp->b_wptr;
1253 	newsamsg = (sadb_msg_t *)mp->b_rptr;
1254 	*newsamsg = *samsg;
1255 	newsamsg->sadb_msg_len = (uint16_t)SADB_8TO64(alloclen);
1256 
1257 	mutex_enter(&ipsa->ipsa_lock);	/* Since I'm grabbing SA fields... */
1258 
1259 	newsamsg->sadb_msg_satype = ipsa->ipsa_type;
1260 
1261 	assoc = (sadb_sa_t *)(newsamsg + 1);
1262 	assoc->sadb_sa_len = SADB_8TO64(sizeof (*assoc));
1263 	assoc->sadb_sa_exttype = SADB_EXT_SA;
1264 	assoc->sadb_sa_spi = ipsa->ipsa_spi;
1265 	assoc->sadb_sa_replay = ipsa->ipsa_replay_wsize;
1266 	assoc->sadb_sa_state = ipsa->ipsa_state;
1267 	assoc->sadb_sa_auth = ipsa->ipsa_auth_alg;
1268 	assoc->sadb_sa_encrypt = ipsa->ipsa_encr_alg;
1269 	assoc->sadb_sa_flags = ipsa->ipsa_flags;
1270 
1271 	lt = (sadb_lifetime_t *)(assoc + 1);
1272 	lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1273 	lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT;
1274 	/* We do not support the concept. */
1275 	lt->sadb_lifetime_allocations = 0;
1276 	lt->sadb_lifetime_bytes = ipsa->ipsa_bytes;
1277 	lt->sadb_lifetime_addtime = ipsa->ipsa_addtime;
1278 	lt->sadb_lifetime_usetime = ipsa->ipsa_usetime;
1279 
1280 	if (hard) {
1281 		lt++;
1282 		lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1283 		lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD;
1284 		lt->sadb_lifetime_allocations = ipsa->ipsa_hardalloc;
1285 		lt->sadb_lifetime_bytes = ipsa->ipsa_hardbyteslt;
1286 		lt->sadb_lifetime_addtime = ipsa->ipsa_hardaddlt;
1287 		lt->sadb_lifetime_usetime = ipsa->ipsa_harduselt;
1288 	}
1289 
1290 	if (soft) {
1291 		lt++;
1292 		lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1293 		lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT;
1294 		lt->sadb_lifetime_allocations = ipsa->ipsa_softalloc;
1295 		lt->sadb_lifetime_bytes = ipsa->ipsa_softbyteslt;
1296 		lt->sadb_lifetime_addtime = ipsa->ipsa_softaddlt;
1297 		lt->sadb_lifetime_usetime = ipsa->ipsa_softuselt;
1298 	}
1299 
1300 	if (idle) {
1301 		lt++;
1302 		lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1303 		lt->sadb_lifetime_exttype = SADB_X_EXT_LIFETIME_IDLE;
1304 		lt->sadb_lifetime_addtime = ipsa->ipsa_idleaddlt;
1305 		lt->sadb_lifetime_usetime = ipsa->ipsa_idleuselt;
1306 	}
1307 
1308 	cur = (uint8_t *)(lt + 1);
1309 
1310 	/* NOTE:  Don't fill in ports here if we are a tunnel-mode SA. */
1311 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, fam,
1312 	    ipsa->ipsa_srcaddr, (!isrc && !idst) ? SA_SRCPORT(ipsa) : 0,
1313 	    SA_PROTO(ipsa), 0);
1314 	if (cur == NULL) {
1315 		freemsg(mp);
1316 		mp = NULL;
1317 		goto bail;
1318 	}
1319 
1320 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, fam,
1321 	    ipsa->ipsa_dstaddr, (!isrc && !idst) ? SA_DSTPORT(ipsa) : 0,
1322 	    SA_PROTO(ipsa), 0);
1323 	if (cur == NULL) {
1324 		freemsg(mp);
1325 		mp = NULL;
1326 		goto bail;
1327 	}
1328 
1329 	if (ipsa->ipsa_flags & IPSA_F_NATT_LOC) {
1330 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_NATT_LOC,
1331 		    fam, &ipsa->ipsa_natt_addr_loc, ipsa->ipsa_local_nat_port,
1332 		    IPPROTO_UDP, 0);
1333 		if (cur == NULL) {
1334 			freemsg(mp);
1335 			mp = NULL;
1336 			goto bail;
1337 		}
1338 	}
1339 
1340 	if (ipsa->ipsa_flags & IPSA_F_NATT_REM) {
1341 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_NATT_REM,
1342 		    fam, &ipsa->ipsa_natt_addr_rem, ipsa->ipsa_remote_nat_port,
1343 		    IPPROTO_UDP, 0);
1344 		if (cur == NULL) {
1345 			freemsg(mp);
1346 			mp = NULL;
1347 			goto bail;
1348 		}
1349 	}
1350 
1351 	/* If we are a tunnel-mode SA, fill in the inner-selectors. */
1352 	if (isrc) {
1353 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
1354 		    pfam, ipsa->ipsa_innersrc, SA_SRCPORT(ipsa),
1355 		    SA_IPROTO(ipsa), ipsa->ipsa_innersrcpfx);
1356 		if (cur == NULL) {
1357 			freemsg(mp);
1358 			mp = NULL;
1359 			goto bail;
1360 		}
1361 	}
1362 
1363 	if (idst) {
1364 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
1365 		    pfam, ipsa->ipsa_innerdst, SA_DSTPORT(ipsa),
1366 		    SA_IPROTO(ipsa), ipsa->ipsa_innerdstpfx);
1367 		if (cur == NULL) {
1368 			freemsg(mp);
1369 			mp = NULL;
1370 			goto bail;
1371 		}
1372 	}
1373 
1374 	if ((ipsa->ipsa_kmp != 0) || (ipsa->ipsa_kmc != 0)) {
1375 		cur = sadb_make_kmc_ext(cur, end,
1376 		    ipsa->ipsa_kmp, ipsa->ipsa_kmc);
1377 		if (cur == NULL) {
1378 			freemsg(mp);
1379 			mp = NULL;
1380 			goto bail;
1381 		}
1382 	}
1383 
1384 	walker = (sadb_ext_t *)cur;
1385 	if (auth) {
1386 		key = (sadb_key_t *)walker;
1387 		key->sadb_key_len = SADB_8TO64(authsize);
1388 		key->sadb_key_exttype = SADB_EXT_KEY_AUTH;
1389 		key->sadb_key_bits = ipsa->ipsa_authkeybits;
1390 		key->sadb_key_reserved = 0;
1391 		bcopy(ipsa->ipsa_authkey, key + 1, ipsa->ipsa_authkeylen);
1392 		walker = (sadb_ext_t *)((uint64_t *)walker +
1393 		    walker->sadb_ext_len);
1394 	}
1395 
1396 	if (encr) {
1397 		uint8_t *buf_ptr;
1398 		key = (sadb_key_t *)walker;
1399 		key->sadb_key_len = SADB_8TO64(encrsize);
1400 		key->sadb_key_exttype = SADB_EXT_KEY_ENCRYPT;
1401 		key->sadb_key_bits = ipsa->ipsa_encrkeybits;
1402 		key->sadb_key_reserved = ipsa->ipsa_saltbits;
1403 		buf_ptr = (uint8_t *)(key + 1);
1404 		bcopy(ipsa->ipsa_encrkey, buf_ptr, ipsa->ipsa_encrkeylen);
1405 		if (ipsa->ipsa_salt != NULL) {
1406 			buf_ptr += ipsa->ipsa_encrkeylen;
1407 			bcopy(ipsa->ipsa_salt, buf_ptr, ipsa->ipsa_saltlen);
1408 		}
1409 		walker = (sadb_ext_t *)((uint64_t *)walker +
1410 		    walker->sadb_ext_len);
1411 	}
1412 
1413 	if (srcid) {
1414 		ident = (sadb_ident_t *)walker;
1415 		ident->sadb_ident_len = SADB_8TO64(srcidsize);
1416 		ident->sadb_ident_exttype = SADB_EXT_IDENTITY_SRC;
1417 		ident->sadb_ident_type = ipsa->ipsa_src_cid->ipsid_type;
1418 		ident->sadb_ident_id = 0;
1419 		ident->sadb_ident_reserved = 0;
1420 		(void) strcpy((char *)(ident + 1),
1421 		    ipsa->ipsa_src_cid->ipsid_cid);
1422 		walker = (sadb_ext_t *)((uint64_t *)walker +
1423 		    walker->sadb_ext_len);
1424 	}
1425 
1426 	if (dstid) {
1427 		ident = (sadb_ident_t *)walker;
1428 		ident->sadb_ident_len = SADB_8TO64(dstidsize);
1429 		ident->sadb_ident_exttype = SADB_EXT_IDENTITY_DST;
1430 		ident->sadb_ident_type = ipsa->ipsa_dst_cid->ipsid_type;
1431 		ident->sadb_ident_id = 0;
1432 		ident->sadb_ident_reserved = 0;
1433 		(void) strcpy((char *)(ident + 1),
1434 		    ipsa->ipsa_dst_cid->ipsid_cid);
1435 		walker = (sadb_ext_t *)((uint64_t *)walker +
1436 		    walker->sadb_ext_len);
1437 	}
1438 
1439 	if (sensinteg) {
1440 		sens = (sadb_sens_t *)walker;
1441 		sadb_sens_from_label(sens, SADB_EXT_SENSITIVITY,
1442 		    ipsa->ipsa_tsl, senslen);
1443 
1444 		walker = (sadb_ext_t *)((uint64_t *)walker +
1445 		    walker->sadb_ext_len);
1446 	}
1447 
1448 	if (osensinteg) {
1449 		sens = (sadb_sens_t *)walker;
1450 
1451 		sadb_sens_from_label(sens, SADB_X_EXT_OUTER_SENS,
1452 		    ipsa->ipsa_otsl, osenslen);
1453 		if (ipsa->ipsa_mac_exempt)
1454 			sens->sadb_x_sens_flags = SADB_X_SENS_IMPLICIT;
1455 
1456 		walker = (sadb_ext_t *)((uint64_t *)walker +
1457 		    walker->sadb_ext_len);
1458 	}
1459 
1460 	if (paired) {
1461 		pair_ext = (sadb_x_pair_t *)walker;
1462 
1463 		pair_ext->sadb_x_pair_len = SADB_8TO64(sizeof (sadb_x_pair_t));
1464 		pair_ext->sadb_x_pair_exttype = SADB_X_EXT_PAIR;
1465 		pair_ext->sadb_x_pair_spi = otherspi;
1466 
1467 		walker = (sadb_ext_t *)((uint64_t *)walker +
1468 		    walker->sadb_ext_len);
1469 	}
1470 
1471 	if (ipsa->ipsa_replay != 0) {
1472 		repl_ctr = (sadb_x_replay_ctr_t *)walker;
1473 		repl_ctr->sadb_x_rc_len = SADB_8TO64(sizeof (*repl_ctr));
1474 		repl_ctr->sadb_x_rc_exttype = SADB_X_EXT_REPLAY_VALUE;
1475 		repl_ctr->sadb_x_rc_replay32 = ipsa->ipsa_replay;
1476 		repl_ctr->sadb_x_rc_replay64 = 0;
1477 		walker = (sadb_ext_t *)(repl_ctr + 1);
1478 	}
1479 
1480 bail:
1481 	/* Pardon any delays... */
1482 	mutex_exit(&ipsa->ipsa_lock);
1483 
1484 	return (mp);
1485 }
1486 
1487 /*
1488  * Strip out key headers or unmarked headers (SADB_EXT_KEY_*, SADB_EXT_UNKNOWN)
1489  * and adjust base message accordingly.
1490  *
1491  * Assume message is pulled up in one piece of contiguous memory.
1492  *
1493  * Say if we start off with:
1494  *
1495  * +------+----+-------------+-----------+---------------+---------------+
1496  * | base | SA | source addr | dest addr | rsrvd. or key | soft lifetime |
1497  * +------+----+-------------+-----------+---------------+---------------+
1498  *
1499  * we will end up with
1500  *
1501  * +------+----+-------------+-----------+---------------+
1502  * | base | SA | source addr | dest addr | soft lifetime |
1503  * +------+----+-------------+-----------+---------------+
1504  */
1505 static void
sadb_strip(sadb_msg_t * samsg)1506 sadb_strip(sadb_msg_t *samsg)
1507 {
1508 	sadb_ext_t *ext;
1509 	uint8_t *target = NULL;
1510 	uint8_t *msgend;
1511 	int sofar = SADB_8TO64(sizeof (*samsg));
1512 	int copylen;
1513 
1514 	ext = (sadb_ext_t *)(samsg + 1);
1515 	msgend = (uint8_t *)samsg;
1516 	msgend += SADB_64TO8(samsg->sadb_msg_len);
1517 	while ((uint8_t *)ext < msgend) {
1518 		if (ext->sadb_ext_type == SADB_EXT_RESERVED ||
1519 		    ext->sadb_ext_type == SADB_EXT_KEY_AUTH ||
1520 		    ext->sadb_ext_type == SADB_X_EXT_EDUMP ||
1521 		    ext->sadb_ext_type == SADB_EXT_KEY_ENCRYPT) {
1522 			/*
1523 			 * Aha!	 I found a header to be erased.
1524 			 */
1525 
1526 			if (target != NULL) {
1527 				/*
1528 				 * If I had a previous header to be erased,
1529 				 * copy over it.  I can get away with just
1530 				 * copying backwards because the target will
1531 				 * always be 8 bytes behind the source.
1532 				 */
1533 				copylen = ((uint8_t *)ext) - (target +
1534 				    SADB_64TO8(
1535 				    ((sadb_ext_t *)target)->sadb_ext_len));
1536 				ovbcopy(((uint8_t *)ext - copylen), target,
1537 				    copylen);
1538 				target += copylen;
1539 				((sadb_ext_t *)target)->sadb_ext_len =
1540 				    SADB_8TO64(((uint8_t *)ext) - target +
1541 				    SADB_64TO8(ext->sadb_ext_len));
1542 			} else {
1543 				target = (uint8_t *)ext;
1544 			}
1545 		} else {
1546 			sofar += ext->sadb_ext_len;
1547 		}
1548 
1549 		ext = (sadb_ext_t *)(((uint64_t *)ext) + ext->sadb_ext_len);
1550 	}
1551 
1552 	ASSERT((uint8_t *)ext == msgend);
1553 
1554 	if (target != NULL) {
1555 		copylen = ((uint8_t *)ext) - (target +
1556 		    SADB_64TO8(((sadb_ext_t *)target)->sadb_ext_len));
1557 		if (copylen != 0)
1558 			ovbcopy(((uint8_t *)ext - copylen), target, copylen);
1559 	}
1560 
1561 	/* Adjust samsg. */
1562 	samsg->sadb_msg_len = (uint16_t)sofar;
1563 
1564 	/* Assume all of the rest is cleared by caller in sadb_pfkey_echo(). */
1565 }
1566 
1567 /*
1568  * AH needs to send an error to PF_KEY.	 Assume mp points to an M_CTL
1569  * followed by an M_DATA with a PF_KEY message in it.  The serial of
1570  * the sending keysock instance is included.
1571  */
1572 void
sadb_pfkey_error(queue_t * pfkey_q,mblk_t * mp,int error,int diagnostic,uint_t serial)1573 sadb_pfkey_error(queue_t *pfkey_q, mblk_t *mp, int error, int diagnostic,
1574     uint_t serial)
1575 {
1576 	mblk_t *msg = mp->b_cont;
1577 	sadb_msg_t *samsg;
1578 	keysock_out_t *kso;
1579 
1580 	/*
1581 	 * Enough functions call this to merit a NULL queue check.
1582 	 */
1583 	if (pfkey_q == NULL) {
1584 		freemsg(mp);
1585 		return;
1586 	}
1587 
1588 	ASSERT(msg != NULL);
1589 	ASSERT((mp->b_wptr - mp->b_rptr) == sizeof (ipsec_info_t));
1590 	ASSERT((msg->b_wptr - msg->b_rptr) >= sizeof (sadb_msg_t));
1591 	samsg = (sadb_msg_t *)msg->b_rptr;
1592 	kso = (keysock_out_t *)mp->b_rptr;
1593 
1594 	kso->ks_out_type = KEYSOCK_OUT;
1595 	kso->ks_out_len = sizeof (*kso);
1596 	kso->ks_out_serial = serial;
1597 
1598 	/*
1599 	 * Only send the base message up in the event of an error.
1600 	 * Don't worry about bzero()-ing, because it was probably bogus
1601 	 * anyway.
1602 	 */
1603 	msg->b_wptr = msg->b_rptr + sizeof (*samsg);
1604 	samsg = (sadb_msg_t *)msg->b_rptr;
1605 	samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg));
1606 	samsg->sadb_msg_errno = (uint8_t)error;
1607 	if (diagnostic != SADB_X_DIAGNOSTIC_PRESET)
1608 		samsg->sadb_x_msg_diagnostic = (uint16_t)diagnostic;
1609 
1610 	putnext(pfkey_q, mp);
1611 }
1612 
1613 /*
1614  * Send a successful return packet back to keysock via the queue in pfkey_q.
1615  *
1616  * Often, an SA is associated with the reply message, it's passed in if needed,
1617  * and NULL if not.  BTW, that ipsa will have its refcnt appropriately held,
1618  * and the caller will release said refcnt.
1619  */
1620 void
sadb_pfkey_echo(queue_t * pfkey_q,mblk_t * mp,sadb_msg_t * samsg,keysock_in_t * ksi,ipsa_t * ipsa)1621 sadb_pfkey_echo(queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg,
1622     keysock_in_t *ksi, ipsa_t *ipsa)
1623 {
1624 	keysock_out_t *kso;
1625 	mblk_t *mp1;
1626 	sadb_msg_t *newsamsg;
1627 	uint8_t *oldend;
1628 
1629 	ASSERT((mp->b_cont != NULL) &&
1630 	    ((void *)samsg == (void *)mp->b_cont->b_rptr) &&
1631 	    ((void *)mp->b_rptr == (void *)ksi));
1632 
1633 	switch (samsg->sadb_msg_type) {
1634 	case SADB_ADD:
1635 	case SADB_UPDATE:
1636 	case SADB_X_UPDATEPAIR:
1637 	case SADB_X_DELPAIR_STATE:
1638 	case SADB_FLUSH:
1639 	case SADB_DUMP:
1640 		/*
1641 		 * I have all of the message already.  I just need to strip
1642 		 * out the keying material and echo the message back.
1643 		 *
1644 		 * NOTE: for SADB_DUMP, the function sadb_dump() did the
1645 		 * work.  When DUMP reaches here, it should only be a base
1646 		 * message.
1647 		 */
1648 	justecho:
1649 		if (ksi->ks_in_extv[SADB_EXT_KEY_AUTH] != NULL ||
1650 		    ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT] != NULL ||
1651 		    ksi->ks_in_extv[SADB_X_EXT_EDUMP] != NULL) {
1652 			sadb_strip(samsg);
1653 			/* Assume PF_KEY message is contiguous. */
1654 			ASSERT(mp->b_cont->b_cont == NULL);
1655 			oldend = mp->b_cont->b_wptr;
1656 			mp->b_cont->b_wptr = mp->b_cont->b_rptr +
1657 			    SADB_64TO8(samsg->sadb_msg_len);
1658 			bzero(mp->b_cont->b_wptr, oldend - mp->b_cont->b_wptr);
1659 		}
1660 		break;
1661 	case SADB_GET:
1662 		/*
1663 		 * Do a lot of work here, because of the ipsa I just found.
1664 		 * First construct the new PF_KEY message, then abandon
1665 		 * the old one.
1666 		 */
1667 		mp1 = sadb_sa2msg(ipsa, samsg);
1668 		if (mp1 == NULL) {
1669 			sadb_pfkey_error(pfkey_q, mp, ENOMEM,
1670 			    SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial);
1671 			return;
1672 		}
1673 		freemsg(mp->b_cont);
1674 		mp->b_cont = mp1;
1675 		break;
1676 	case SADB_DELETE:
1677 	case SADB_X_DELPAIR:
1678 		if (ipsa == NULL)
1679 			goto justecho;
1680 		/*
1681 		 * Because listening KMds may require more info, treat
1682 		 * DELETE like a special case of GET.
1683 		 */
1684 		mp1 = sadb_sa2msg(ipsa, samsg);
1685 		if (mp1 == NULL) {
1686 			sadb_pfkey_error(pfkey_q, mp, ENOMEM,
1687 			    SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial);
1688 			return;
1689 		}
1690 		newsamsg = (sadb_msg_t *)mp1->b_rptr;
1691 		sadb_strip(newsamsg);
1692 		oldend = mp1->b_wptr;
1693 		mp1->b_wptr = mp1->b_rptr + SADB_64TO8(newsamsg->sadb_msg_len);
1694 		bzero(mp1->b_wptr, oldend - mp1->b_wptr);
1695 		freemsg(mp->b_cont);
1696 		mp->b_cont = mp1;
1697 		break;
1698 	default:
1699 		if (mp != NULL)
1700 			freemsg(mp);
1701 		return;
1702 	}
1703 
1704 	/* ksi is now null and void. */
1705 	kso = (keysock_out_t *)ksi;
1706 	kso->ks_out_type = KEYSOCK_OUT;
1707 	kso->ks_out_len = sizeof (*kso);
1708 	kso->ks_out_serial = ksi->ks_in_serial;
1709 	/* We're ready to send... */
1710 	putnext(pfkey_q, mp);
1711 }
1712 
1713 /*
1714  * Set up a global pfkey_q instance for AH, ESP, or some other consumer.
1715  */
1716 void
sadb_keysock_hello(queue_t ** pfkey_qp,queue_t * q,mblk_t * mp,void (* ager)(void *),void * agerarg,timeout_id_t * top,int satype)1717 sadb_keysock_hello(queue_t **pfkey_qp, queue_t *q, mblk_t *mp,
1718     void (*ager)(void *), void *agerarg, timeout_id_t *top, int satype)
1719 {
1720 	keysock_hello_ack_t *kha;
1721 	queue_t *oldq;
1722 
1723 	ASSERT(OTHERQ(q) != NULL);
1724 
1725 	/*
1726 	 * First, check atomically that I'm the first and only keysock
1727 	 * instance.
1728 	 *
1729 	 * Use OTHERQ(q), because qreply(q, mp) == putnext(OTHERQ(q), mp),
1730 	 * and I want this module to say putnext(*_pfkey_q, mp) for PF_KEY
1731 	 * messages.
1732 	 */
1733 
1734 	oldq = atomic_cas_ptr((void **)pfkey_qp, NULL, OTHERQ(q));
1735 	if (oldq != NULL) {
1736 		ASSERT(oldq != q);
1737 		cmn_err(CE_WARN, "Danger!  Multiple keysocks on top of %s.\n",
1738 		    (satype == SADB_SATYPE_ESP)? "ESP" : "AH or other");
1739 		freemsg(mp);
1740 		return;
1741 	}
1742 
1743 	kha = (keysock_hello_ack_t *)mp->b_rptr;
1744 	kha->ks_hello_len = sizeof (keysock_hello_ack_t);
1745 	kha->ks_hello_type = KEYSOCK_HELLO_ACK;
1746 	kha->ks_hello_satype = (uint8_t)satype;
1747 
1748 	/*
1749 	 * If we made it past the atomic_cas_ptr, then we have "exclusive"
1750 	 * access to the timeout handle.  Fire it off after the default ager
1751 	 * interval.
1752 	 */
1753 	*top = qtimeout(*pfkey_qp, ager, agerarg,
1754 	    drv_usectohz(SADB_AGE_INTERVAL_DEFAULT * 1000));
1755 
1756 	putnext(*pfkey_qp, mp);
1757 }
1758 
1759 /*
1760  * Normalize IPv4-mapped IPv6 addresses (and prefixes) as appropriate.
1761  *
1762  * Check addresses themselves for wildcard or multicast.
1763  * Check ire table for local/non-local/broadcast.
1764  */
1765 int
sadb_addrcheck(queue_t * pfkey_q,mblk_t * mp,sadb_ext_t * ext,uint_t serial,netstack_t * ns)1766 sadb_addrcheck(queue_t *pfkey_q, mblk_t *mp, sadb_ext_t *ext, uint_t serial,
1767     netstack_t *ns)
1768 {
1769 	sadb_address_t *addr = (sadb_address_t *)ext;
1770 	struct sockaddr_in *sin;
1771 	struct sockaddr_in6 *sin6;
1772 	int diagnostic, type;
1773 	boolean_t normalized = B_FALSE;
1774 
1775 	ASSERT(ext != NULL);
1776 	ASSERT((ext->sadb_ext_type == SADB_EXT_ADDRESS_SRC) ||
1777 	    (ext->sadb_ext_type == SADB_EXT_ADDRESS_DST) ||
1778 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC) ||
1779 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_DST) ||
1780 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_NATT_LOC) ||
1781 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_NATT_REM));
1782 
1783 	/* Assign both sockaddrs, the compiler will do the right thing. */
1784 	sin = (struct sockaddr_in *)(addr + 1);
1785 	sin6 = (struct sockaddr_in6 *)(addr + 1);
1786 
1787 	if (sin6->sin6_family == AF_INET6) {
1788 		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
1789 			/*
1790 			 * Convert to an AF_INET sockaddr.  This means the
1791 			 * return messages will have the extra space, but have
1792 			 * AF_INET sockaddrs instead of AF_INET6.
1793 			 *
1794 			 * Yes, RFC 2367 isn't clear on what to do here w.r.t.
1795 			 * mapped addresses, but since AF_INET6 ::ffff:<v4> is
1796 			 * equal to AF_INET <v4>, it shouldnt be a huge
1797 			 * problem.
1798 			 */
1799 			sin->sin_family = AF_INET;
1800 			IN6_V4MAPPED_TO_INADDR(&sin6->sin6_addr,
1801 			    &sin->sin_addr);
1802 			bzero(&sin->sin_zero, sizeof (sin->sin_zero));
1803 			normalized = B_TRUE;
1804 		}
1805 	} else if (sin->sin_family != AF_INET) {
1806 		switch (ext->sadb_ext_type) {
1807 		case SADB_EXT_ADDRESS_SRC:
1808 			diagnostic = SADB_X_DIAGNOSTIC_BAD_SRC_AF;
1809 			break;
1810 		case SADB_EXT_ADDRESS_DST:
1811 			diagnostic = SADB_X_DIAGNOSTIC_BAD_DST_AF;
1812 			break;
1813 		case SADB_X_EXT_ADDRESS_INNER_SRC:
1814 			diagnostic = SADB_X_DIAGNOSTIC_BAD_PROXY_AF;
1815 			break;
1816 		case SADB_X_EXT_ADDRESS_INNER_DST:
1817 			diagnostic = SADB_X_DIAGNOSTIC_BAD_INNER_DST_AF;
1818 			break;
1819 		case SADB_X_EXT_ADDRESS_NATT_LOC:
1820 			diagnostic = SADB_X_DIAGNOSTIC_BAD_NATT_LOC_AF;
1821 			break;
1822 		case SADB_X_EXT_ADDRESS_NATT_REM:
1823 			diagnostic = SADB_X_DIAGNOSTIC_BAD_NATT_REM_AF;
1824 			break;
1825 			/* There is no default, see above ASSERT. */
1826 		}
1827 bail:
1828 		if (pfkey_q != NULL) {
1829 			sadb_pfkey_error(pfkey_q, mp, EINVAL, diagnostic,
1830 			    serial);
1831 		} else {
1832 			/*
1833 			 * Scribble in sadb_msg that we got passed in.
1834 			 * Overload "mp" to be an sadb_msg pointer.
1835 			 */
1836 			sadb_msg_t *samsg = (sadb_msg_t *)mp;
1837 
1838 			samsg->sadb_msg_errno = EINVAL;
1839 			samsg->sadb_x_msg_diagnostic = diagnostic;
1840 		}
1841 		return (KS_IN_ADDR_UNKNOWN);
1842 	}
1843 
1844 	if (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC ||
1845 	    ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_DST) {
1846 		/*
1847 		 * We need only check for prefix issues.
1848 		 */
1849 
1850 		/* Set diagnostic now, in case we need it later. */
1851 		diagnostic =
1852 		    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC) ?
1853 		    SADB_X_DIAGNOSTIC_PREFIX_INNER_SRC :
1854 		    SADB_X_DIAGNOSTIC_PREFIX_INNER_DST;
1855 
1856 		if (normalized)
1857 			addr->sadb_address_prefixlen -= 96;
1858 
1859 		/*
1860 		 * Verify and mask out inner-addresses based on prefix length.
1861 		 */
1862 		if (sin->sin_family == AF_INET) {
1863 			if (addr->sadb_address_prefixlen > 32)
1864 				goto bail;
1865 			sin->sin_addr.s_addr &=
1866 			    ip_plen_to_mask(addr->sadb_address_prefixlen);
1867 		} else {
1868 			in6_addr_t mask;
1869 
1870 			ASSERT(sin->sin_family == AF_INET6);
1871 			/*
1872 			 * ip_plen_to_mask_v6() returns NULL if the value in
1873 			 * question is out of range.
1874 			 */
1875 			if (ip_plen_to_mask_v6(addr->sadb_address_prefixlen,
1876 			    &mask) == NULL)
1877 				goto bail;
1878 			sin6->sin6_addr.s6_addr32[0] &= mask.s6_addr32[0];
1879 			sin6->sin6_addr.s6_addr32[1] &= mask.s6_addr32[1];
1880 			sin6->sin6_addr.s6_addr32[2] &= mask.s6_addr32[2];
1881 			sin6->sin6_addr.s6_addr32[3] &= mask.s6_addr32[3];
1882 		}
1883 
1884 		/* We don't care in these cases. */
1885 		return (KS_IN_ADDR_DONTCARE);
1886 	}
1887 
1888 	if (sin->sin_family == AF_INET6) {
1889 		/* Check the easy ones now. */
1890 		if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
1891 			return (KS_IN_ADDR_MBCAST);
1892 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
1893 			return (KS_IN_ADDR_UNSPEC);
1894 		/*
1895 		 * At this point, we're a unicast IPv6 address.
1896 		 *
1897 		 * XXX Zones alert -> me/notme decision needs to be tempered
1898 		 * by what zone we're in when we go to zone-aware IPsec.
1899 		 */
1900 		if (ip_type_v6(&sin6->sin6_addr, ns->netstack_ip) ==
1901 		    IRE_LOCAL) {
1902 			/* Hey hey, it's local. */
1903 			return (KS_IN_ADDR_ME);
1904 		}
1905 	} else {
1906 		ASSERT(sin->sin_family == AF_INET);
1907 		if (sin->sin_addr.s_addr == INADDR_ANY)
1908 			return (KS_IN_ADDR_UNSPEC);
1909 		if (CLASSD(sin->sin_addr.s_addr))
1910 			return (KS_IN_ADDR_MBCAST);
1911 		/*
1912 		 * At this point we're a unicast or broadcast IPv4 address.
1913 		 *
1914 		 * Check if the address is IRE_BROADCAST or IRE_LOCAL.
1915 		 *
1916 		 * XXX Zones alert -> me/notme decision needs to be tempered
1917 		 * by what zone we're in when we go to zone-aware IPsec.
1918 		 */
1919 		type = ip_type_v4(sin->sin_addr.s_addr, ns->netstack_ip);
1920 		switch (type) {
1921 		case IRE_LOCAL:
1922 			return (KS_IN_ADDR_ME);
1923 		case IRE_BROADCAST:
1924 			return (KS_IN_ADDR_MBCAST);
1925 		}
1926 	}
1927 
1928 	return (KS_IN_ADDR_NOTME);
1929 }
1930 
1931 /*
1932  * Address normalizations and reality checks for inbound PF_KEY messages.
1933  *
1934  * For the case of src == unspecified AF_INET6, and dst == AF_INET, convert
1935  * the source to AF_INET.  Do the same for the inner sources.
1936  */
1937 boolean_t
sadb_addrfix(keysock_in_t * ksi,queue_t * pfkey_q,mblk_t * mp,netstack_t * ns)1938 sadb_addrfix(keysock_in_t *ksi, queue_t *pfkey_q, mblk_t *mp, netstack_t *ns)
1939 {
1940 	struct sockaddr_in *src, *isrc;
1941 	struct sockaddr_in6 *dst, *idst;
1942 	sadb_address_t *srcext, *dstext;
1943 	uint16_t sport;
1944 	sadb_ext_t **extv = ksi->ks_in_extv;
1945 	int rc;
1946 
1947 	if (extv[SADB_EXT_ADDRESS_SRC] != NULL) {
1948 		rc = sadb_addrcheck(pfkey_q, mp, extv[SADB_EXT_ADDRESS_SRC],
1949 		    ksi->ks_in_serial, ns);
1950 		if (rc == KS_IN_ADDR_UNKNOWN)
1951 			return (B_FALSE);
1952 		if (rc == KS_IN_ADDR_MBCAST) {
1953 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
1954 			    SADB_X_DIAGNOSTIC_BAD_SRC, ksi->ks_in_serial);
1955 			return (B_FALSE);
1956 		}
1957 		ksi->ks_in_srctype = rc;
1958 	}
1959 
1960 	if (extv[SADB_EXT_ADDRESS_DST] != NULL) {
1961 		rc = sadb_addrcheck(pfkey_q, mp, extv[SADB_EXT_ADDRESS_DST],
1962 		    ksi->ks_in_serial, ns);
1963 		if (rc == KS_IN_ADDR_UNKNOWN)
1964 			return (B_FALSE);
1965 		if (rc == KS_IN_ADDR_UNSPEC) {
1966 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
1967 			    SADB_X_DIAGNOSTIC_BAD_DST, ksi->ks_in_serial);
1968 			return (B_FALSE);
1969 		}
1970 		ksi->ks_in_dsttype = rc;
1971 	}
1972 
1973 	/*
1974 	 * NAT-Traversal addrs are simple enough to not require all of
1975 	 * the checks in sadb_addrcheck().  Just normalize or reject if not
1976 	 * AF_INET.
1977 	 */
1978 	if (extv[SADB_X_EXT_ADDRESS_NATT_LOC] != NULL) {
1979 		rc = sadb_addrcheck(pfkey_q, mp,
1980 		    extv[SADB_X_EXT_ADDRESS_NATT_LOC], ksi->ks_in_serial, ns);
1981 
1982 		/*
1983 		 * Local NAT-T addresses never use an IRE_LOCAL, so it should
1984 		 * always be NOTME, or UNSPEC (to handle both tunnel mode
1985 		 * AND local-port flexibility).
1986 		 */
1987 		if (rc != KS_IN_ADDR_NOTME && rc != KS_IN_ADDR_UNSPEC) {
1988 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
1989 			    SADB_X_DIAGNOSTIC_MALFORMED_NATT_LOC,
1990 			    ksi->ks_in_serial);
1991 			return (B_FALSE);
1992 		}
1993 		src = (struct sockaddr_in *)
1994 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_NATT_LOC]) + 1);
1995 		if (src->sin_family != AF_INET) {
1996 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
1997 			    SADB_X_DIAGNOSTIC_BAD_NATT_LOC_AF,
1998 			    ksi->ks_in_serial);
1999 			return (B_FALSE);
2000 		}
2001 	}
2002 
2003 	if (extv[SADB_X_EXT_ADDRESS_NATT_REM] != NULL) {
2004 		rc = sadb_addrcheck(pfkey_q, mp,
2005 		    extv[SADB_X_EXT_ADDRESS_NATT_REM], ksi->ks_in_serial, ns);
2006 
2007 		/*
2008 		 * Remote NAT-T addresses never use an IRE_LOCAL, so it should
2009 		 * always be NOTME, or UNSPEC if it's a tunnel-mode SA.
2010 		 */
2011 		if (rc != KS_IN_ADDR_NOTME &&
2012 		    !(extv[SADB_X_EXT_ADDRESS_INNER_SRC] != NULL &&
2013 		    rc == KS_IN_ADDR_UNSPEC)) {
2014 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2015 			    SADB_X_DIAGNOSTIC_MALFORMED_NATT_REM,
2016 			    ksi->ks_in_serial);
2017 			return (B_FALSE);
2018 		}
2019 		src = (struct sockaddr_in *)
2020 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_NATT_REM]) + 1);
2021 		if (src->sin_family != AF_INET) {
2022 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2023 			    SADB_X_DIAGNOSTIC_BAD_NATT_REM_AF,
2024 			    ksi->ks_in_serial);
2025 			return (B_FALSE);
2026 		}
2027 	}
2028 
2029 	if (extv[SADB_X_EXT_ADDRESS_INNER_SRC] != NULL) {
2030 		if (extv[SADB_X_EXT_ADDRESS_INNER_DST] == NULL) {
2031 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2032 			    SADB_X_DIAGNOSTIC_MISSING_INNER_DST,
2033 			    ksi->ks_in_serial);
2034 			return (B_FALSE);
2035 		}
2036 
2037 		if (sadb_addrcheck(pfkey_q, mp,
2038 		    extv[SADB_X_EXT_ADDRESS_INNER_DST], ksi->ks_in_serial, ns)
2039 		    == KS_IN_ADDR_UNKNOWN ||
2040 		    sadb_addrcheck(pfkey_q, mp,
2041 		    extv[SADB_X_EXT_ADDRESS_INNER_SRC], ksi->ks_in_serial, ns)
2042 		    == KS_IN_ADDR_UNKNOWN)
2043 			return (B_FALSE);
2044 
2045 		isrc = (struct sockaddr_in *)
2046 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_SRC]) +
2047 		    1);
2048 		idst = (struct sockaddr_in6 *)
2049 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_DST]) +
2050 		    1);
2051 		if (isrc->sin_family != idst->sin6_family) {
2052 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2053 			    SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH,
2054 			    ksi->ks_in_serial);
2055 			return (B_FALSE);
2056 		}
2057 	} else if (extv[SADB_X_EXT_ADDRESS_INNER_DST] != NULL) {
2058 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2059 			    SADB_X_DIAGNOSTIC_MISSING_INNER_SRC,
2060 			    ksi->ks_in_serial);
2061 			return (B_FALSE);
2062 	} else {
2063 		isrc = NULL;	/* For inner/outer port check below. */
2064 	}
2065 
2066 	dstext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_DST];
2067 	srcext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_SRC];
2068 
2069 	if (dstext == NULL || srcext == NULL)
2070 		return (B_TRUE);
2071 
2072 	dst = (struct sockaddr_in6 *)(dstext + 1);
2073 	src = (struct sockaddr_in *)(srcext + 1);
2074 
2075 	if (isrc != NULL &&
2076 	    (isrc->sin_port != 0 || idst->sin6_port != 0) &&
2077 	    (src->sin_port != 0 || dst->sin6_port != 0)) {
2078 		/* Can't set inner and outer ports in one SA. */
2079 		sadb_pfkey_error(pfkey_q, mp, EINVAL,
2080 		    SADB_X_DIAGNOSTIC_DUAL_PORT_SETS,
2081 		    ksi->ks_in_serial);
2082 		return (B_FALSE);
2083 	}
2084 
2085 	if (dst->sin6_family == src->sin_family)
2086 		return (B_TRUE);
2087 
2088 	if (srcext->sadb_address_proto != dstext->sadb_address_proto) {
2089 		if (srcext->sadb_address_proto == 0) {
2090 			srcext->sadb_address_proto = dstext->sadb_address_proto;
2091 		} else if (dstext->sadb_address_proto == 0) {
2092 			dstext->sadb_address_proto = srcext->sadb_address_proto;
2093 		} else {
2094 			/* Inequal protocols, neither were 0.  Report error. */
2095 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2096 			    SADB_X_DIAGNOSTIC_PROTO_MISMATCH,
2097 			    ksi->ks_in_serial);
2098 			return (B_FALSE);
2099 		}
2100 	}
2101 
2102 	/*
2103 	 * With the exception of an unspec IPv6 source and an IPv4
2104 	 * destination, address families MUST me matched.
2105 	 */
2106 	if (src->sin_family == AF_INET ||
2107 	    ksi->ks_in_srctype != KS_IN_ADDR_UNSPEC) {
2108 		sadb_pfkey_error(pfkey_q, mp, EINVAL,
2109 		    SADB_X_DIAGNOSTIC_AF_MISMATCH, ksi->ks_in_serial);
2110 		return (B_FALSE);
2111 	}
2112 
2113 	/*
2114 	 * Convert "src" to AF_INET INADDR_ANY.  We rely on sin_port being
2115 	 * in the same place for sockaddr_in and sockaddr_in6.
2116 	 */
2117 	sport = src->sin_port;
2118 	bzero(src, sizeof (*src));
2119 	src->sin_family = AF_INET;
2120 	src->sin_port = sport;
2121 
2122 	return (B_TRUE);
2123 }
2124 
2125 /*
2126  * Set the results in "addrtype", given an IRE as requested by
2127  * sadb_addrcheck().
2128  */
2129 int
sadb_addrset(ire_t * ire)2130 sadb_addrset(ire_t *ire)
2131 {
2132 	if ((ire->ire_type & IRE_BROADCAST) ||
2133 	    (ire->ire_ipversion == IPV4_VERSION && CLASSD(ire->ire_addr)) ||
2134 	    (ire->ire_ipversion == IPV6_VERSION &&
2135 	    IN6_IS_ADDR_MULTICAST(&(ire->ire_addr_v6))))
2136 		return (KS_IN_ADDR_MBCAST);
2137 	if (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK))
2138 		return (KS_IN_ADDR_ME);
2139 	return (KS_IN_ADDR_NOTME);
2140 }
2141 
2142 /*
2143  * Match primitives..
2144  * !!! TODO: short term: inner selectors
2145  *		ipv6 scope id (ifindex)
2146  * longer term:  zone id.  sensitivity label. uid.
2147  */
2148 boolean_t
sadb_match_spi(ipsa_query_t * sq,ipsa_t * sa)2149 sadb_match_spi(ipsa_query_t *sq, ipsa_t *sa)
2150 {
2151 	return (sq->spi == sa->ipsa_spi);
2152 }
2153 
2154 boolean_t
sadb_match_dst_v6(ipsa_query_t * sq,ipsa_t * sa)2155 sadb_match_dst_v6(ipsa_query_t *sq, ipsa_t *sa)
2156 {
2157 	return (IPSA_ARE_ADDR_EQUAL(sa->ipsa_dstaddr, sq->dstaddr, AF_INET6));
2158 }
2159 
2160 boolean_t
sadb_match_src_v6(ipsa_query_t * sq,ipsa_t * sa)2161 sadb_match_src_v6(ipsa_query_t *sq, ipsa_t *sa)
2162 {
2163 	return (IPSA_ARE_ADDR_EQUAL(sa->ipsa_srcaddr, sq->srcaddr, AF_INET6));
2164 }
2165 
2166 boolean_t
sadb_match_dst_v4(ipsa_query_t * sq,ipsa_t * sa)2167 sadb_match_dst_v4(ipsa_query_t *sq, ipsa_t *sa)
2168 {
2169 	return (sq->dstaddr[0] == sa->ipsa_dstaddr[0]);
2170 }
2171 
2172 boolean_t
sadb_match_src_v4(ipsa_query_t * sq,ipsa_t * sa)2173 sadb_match_src_v4(ipsa_query_t *sq, ipsa_t *sa)
2174 {
2175 	return (sq->srcaddr[0] == sa->ipsa_srcaddr[0]);
2176 }
2177 
2178 boolean_t
sadb_match_dstid(ipsa_query_t * sq,ipsa_t * sa)2179 sadb_match_dstid(ipsa_query_t *sq, ipsa_t *sa)
2180 {
2181 	return ((sa->ipsa_dst_cid != NULL) &&
2182 	    (sq->didtype == sa->ipsa_dst_cid->ipsid_type) &&
2183 	    (strcmp(sq->didstr, sa->ipsa_dst_cid->ipsid_cid) == 0));
2184 
2185 }
2186 boolean_t
sadb_match_srcid(ipsa_query_t * sq,ipsa_t * sa)2187 sadb_match_srcid(ipsa_query_t *sq, ipsa_t *sa)
2188 {
2189 	return ((sa->ipsa_src_cid != NULL) &&
2190 	    (sq->sidtype == sa->ipsa_src_cid->ipsid_type) &&
2191 	    (strcmp(sq->sidstr, sa->ipsa_src_cid->ipsid_cid) == 0));
2192 }
2193 
2194 boolean_t
sadb_match_kmc(ipsa_query_t * sq,ipsa_t * sa)2195 sadb_match_kmc(ipsa_query_t *sq, ipsa_t *sa)
2196 {
2197 #define	M(a, b) (((a) == 0) || ((b) == 0) || ((a) == (b)))
2198 
2199 	return (M(sq->kmc, sa->ipsa_kmc) && M(sq->kmp, sa->ipsa_kmp));
2200 
2201 #undef M
2202 }
2203 
2204 /*
2205  * Common function which extracts several PF_KEY extensions for ease of
2206  * SADB matching.
2207  *
2208  * XXX TODO: weed out ipsa_query_t fields not used during matching
2209  * or afterwards?
2210  */
2211 int
sadb_form_query(keysock_in_t * ksi,uint32_t req,uint32_t match,ipsa_query_t * sq,int * diagnostic)2212 sadb_form_query(keysock_in_t *ksi, uint32_t req, uint32_t match,
2213     ipsa_query_t *sq, int *diagnostic)
2214 {
2215 	int i;
2216 	ipsa_match_fn_t *mfpp = &(sq->matchers[0]);
2217 
2218 	for (i = 0; i < IPSA_NMATCH; i++)
2219 		sq->matchers[i] = NULL;
2220 
2221 	ASSERT((req & ~match) == 0);
2222 
2223 	sq->req = req;
2224 	sq->dstext = (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
2225 	sq->srcext = (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
2226 	sq->assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
2227 
2228 	if ((req & IPSA_Q_DST) && (sq->dstext == NULL)) {
2229 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
2230 		return (EINVAL);
2231 	}
2232 	if ((req & IPSA_Q_SRC) && (sq->srcext == NULL)) {
2233 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
2234 		return (EINVAL);
2235 	}
2236 	if ((req & IPSA_Q_SA) && (sq->assoc == NULL)) {
2237 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
2238 		return (EINVAL);
2239 	}
2240 
2241 	if (match & IPSA_Q_SA) {
2242 		*mfpp++ = sadb_match_spi;
2243 		sq->spi = sq->assoc->sadb_sa_spi;
2244 	}
2245 
2246 	if (sq->dstext != NULL)
2247 		sq->dst = (struct sockaddr_in *)(sq->dstext + 1);
2248 	else {
2249 		sq->dst = NULL;
2250 		sq->dst6 = NULL;
2251 		sq->dstaddr = NULL;
2252 	}
2253 
2254 	if (sq->srcext != NULL)
2255 		sq->src = (struct sockaddr_in *)(sq->srcext + 1);
2256 	else {
2257 		sq->src = NULL;
2258 		sq->src6 = NULL;
2259 		sq->srcaddr = NULL;
2260 	}
2261 
2262 	if (sq->dst != NULL)
2263 		sq->af = sq->dst->sin_family;
2264 	else if (sq->src != NULL)
2265 		sq->af = sq->src->sin_family;
2266 	else
2267 		sq->af = AF_INET;
2268 
2269 	if (sq->af == AF_INET6) {
2270 		if ((match & IPSA_Q_DST) && (sq->dstext != NULL)) {
2271 			*mfpp++ = sadb_match_dst_v6;
2272 			sq->dst6 = (struct sockaddr_in6 *)sq->dst;
2273 			sq->dstaddr = (uint32_t *)&(sq->dst6->sin6_addr);
2274 		} else {
2275 			match &= ~IPSA_Q_DST;
2276 			sq->dstaddr = ALL_ZEROES_PTR;
2277 		}
2278 
2279 		if ((match & IPSA_Q_SRC) && (sq->srcext != NULL)) {
2280 			sq->src6 = (struct sockaddr_in6 *)(sq->srcext + 1);
2281 			sq->srcaddr = (uint32_t *)&sq->src6->sin6_addr;
2282 			if (sq->src6->sin6_family != AF_INET6) {
2283 				*diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
2284 				return (EINVAL);
2285 			}
2286 			*mfpp++ = sadb_match_src_v6;
2287 		} else {
2288 			match &= ~IPSA_Q_SRC;
2289 			sq->srcaddr = ALL_ZEROES_PTR;
2290 		}
2291 	} else {
2292 		sq->src6 = sq->dst6 = NULL;
2293 		if ((match & IPSA_Q_DST) && (sq->dstext != NULL)) {
2294 			*mfpp++ = sadb_match_dst_v4;
2295 			sq->dstaddr = (uint32_t *)&sq->dst->sin_addr;
2296 		} else {
2297 			match &= ~IPSA_Q_DST;
2298 			sq->dstaddr = ALL_ZEROES_PTR;
2299 		}
2300 		if ((match & IPSA_Q_SRC) && (sq->srcext != NULL)) {
2301 			sq->srcaddr = (uint32_t *)&sq->src->sin_addr;
2302 			if (sq->src->sin_family != AF_INET) {
2303 				*diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
2304 				return (EINVAL);
2305 			}
2306 			*mfpp++ = sadb_match_src_v4;
2307 		} else {
2308 			match &= ~IPSA_Q_SRC;
2309 			sq->srcaddr = ALL_ZEROES_PTR;
2310 		}
2311 	}
2312 
2313 	sq->dstid = (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_DST];
2314 	if ((match & IPSA_Q_DSTID) && (sq->dstid != NULL)) {
2315 		sq->didstr = (char *)(sq->dstid + 1);
2316 		sq->didtype = sq->dstid->sadb_ident_type;
2317 		*mfpp++ = sadb_match_dstid;
2318 	}
2319 
2320 	sq->srcid = (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC];
2321 
2322 	if ((match & IPSA_Q_SRCID) && (sq->srcid != NULL)) {
2323 		sq->sidstr = (char *)(sq->srcid + 1);
2324 		sq->sidtype = sq->srcid->sadb_ident_type;
2325 		*mfpp++ = sadb_match_srcid;
2326 	}
2327 
2328 	sq->kmcext = (sadb_x_kmc_t *)ksi->ks_in_extv[SADB_X_EXT_KM_COOKIE];
2329 	sq->kmc = 0;
2330 	sq->kmp = 0;
2331 
2332 	if ((match & IPSA_Q_KMC) && (sq->kmcext)) {
2333 		sq->kmc = sq->kmcext->sadb_x_kmc_cookie;
2334 		sq->kmp = sq->kmcext->sadb_x_kmc_proto;
2335 		*mfpp++ = sadb_match_kmc;
2336 	}
2337 
2338 	if (match & (IPSA_Q_INBOUND|IPSA_Q_OUTBOUND)) {
2339 		if (sq->af == AF_INET6)
2340 			sq->sp = &sq->spp->s_v6;
2341 		else
2342 			sq->sp = &sq->spp->s_v4;
2343 	} else {
2344 		sq->sp = NULL;
2345 	}
2346 
2347 	if (match & IPSA_Q_INBOUND) {
2348 		sq->inhash = INBOUND_HASH(sq->sp, sq->assoc->sadb_sa_spi);
2349 		sq->inbound = &sq->sp->sdb_if[sq->inhash];
2350 	} else {
2351 		sq->inhash = 0;
2352 		sq->inbound = NULL;
2353 	}
2354 
2355 	if (match & IPSA_Q_OUTBOUND) {
2356 		if (sq->af == AF_INET6) {
2357 			sq->outhash = OUTBOUND_HASH_V6(sq->sp, *(sq->dstaddr));
2358 		} else {
2359 			sq->outhash = OUTBOUND_HASH_V4(sq->sp, *(sq->dstaddr));
2360 		}
2361 		sq->outbound = &sq->sp->sdb_of[sq->outhash];
2362 	} else {
2363 		sq->outhash = 0;
2364 		sq->outbound = NULL;
2365 	}
2366 	sq->match = match;
2367 	return (0);
2368 }
2369 
2370 /*
2371  * Match an initialized query structure with a security association;
2372  * return B_TRUE on a match, B_FALSE on a miss.
2373  * Applies match functions set up by sadb_form_query() until one returns false.
2374  */
2375 boolean_t
sadb_match_query(ipsa_query_t * sq,ipsa_t * sa)2376 sadb_match_query(ipsa_query_t *sq, ipsa_t *sa)
2377 {
2378 	ipsa_match_fn_t *mfpp = &(sq->matchers[0]);
2379 	ipsa_match_fn_t mfp;
2380 
2381 	for (mfp = *mfpp++; mfp != NULL; mfp = *mfpp++) {
2382 		if (!mfp(sq, sa))
2383 			return (B_FALSE);
2384 	}
2385 	return (B_TRUE);
2386 }
2387 
2388 /*
2389  * Walker callback function to delete sa's based on src/dst address.
2390  * Assumes that we're called with *head locked, no other locks held;
2391  * Conveniently, and not coincidentally, this is both what sadb_walker
2392  * gives us and also what sadb_unlinkassoc expects.
2393  */
2394 struct sadb_purge_state
2395 {
2396 	ipsa_query_t sq;
2397 	boolean_t inbnd;
2398 	uint8_t sadb_sa_state;
2399 };
2400 
2401 static void
sadb_purge_cb(isaf_t * head,ipsa_t * entry,void * cookie)2402 sadb_purge_cb(isaf_t *head, ipsa_t *entry, void *cookie)
2403 {
2404 	struct sadb_purge_state *ps = (struct sadb_purge_state *)cookie;
2405 
2406 	ASSERT(MUTEX_HELD(&head->isaf_lock));
2407 
2408 	mutex_enter(&entry->ipsa_lock);
2409 
2410 	if (entry->ipsa_state == IPSA_STATE_LARVAL ||
2411 	    !sadb_match_query(&ps->sq, entry)) {
2412 		mutex_exit(&entry->ipsa_lock);
2413 		return;
2414 	}
2415 
2416 	if (ps->inbnd) {
2417 		sadb_delete_cluster(entry);
2418 	}
2419 	entry->ipsa_state = IPSA_STATE_DEAD;
2420 	(void) sadb_torch_assoc(head, entry);
2421 }
2422 
2423 /*
2424  * Common code to purge an SA with a matching src or dst address.
2425  * Don't kill larval SA's in such a purge.
2426  */
2427 int
sadb_purge_sa(mblk_t * mp,keysock_in_t * ksi,sadb_t * sp,int * diagnostic,queue_t * pfkey_q)2428 sadb_purge_sa(mblk_t *mp, keysock_in_t *ksi, sadb_t *sp,
2429 	int *diagnostic, queue_t *pfkey_q)
2430 {
2431 	struct sadb_purge_state ps;
2432 	int error = sadb_form_query(ksi, 0,
2433 	    IPSA_Q_SRC|IPSA_Q_DST|IPSA_Q_SRCID|IPSA_Q_DSTID|IPSA_Q_KMC,
2434 	    &ps.sq, diagnostic);
2435 
2436 	if (error != 0)
2437 		return (error);
2438 
2439 	/*
2440 	 * This is simple, crude, and effective.
2441 	 * Unimplemented optimizations (TBD):
2442 	 * - we can limit how many places we search based on where we
2443 	 * think the SA is filed.
2444 	 * - if we get a dst address, we can hash based on dst addr to find
2445 	 * the correct bucket in the outbound table.
2446 	 */
2447 	ps.inbnd = B_TRUE;
2448 	sadb_walker(sp->sdb_if, sp->sdb_hashsize, sadb_purge_cb, &ps);
2449 	ps.inbnd = B_FALSE;
2450 	sadb_walker(sp->sdb_of, sp->sdb_hashsize, sadb_purge_cb, &ps);
2451 
2452 	ASSERT(mp->b_cont != NULL);
2453 	sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi,
2454 	    NULL);
2455 	return (0);
2456 }
2457 
2458 static void
sadb_delpair_state_one(isaf_t * head,ipsa_t * entry,void * cookie)2459 sadb_delpair_state_one(isaf_t *head, ipsa_t *entry, void *cookie)
2460 {
2461 	struct sadb_purge_state *ps = (struct sadb_purge_state *)cookie;
2462 	isaf_t  *inbound_bucket;
2463 	ipsa_t *peer_assoc;
2464 	ipsa_query_t *sq = &ps->sq;
2465 
2466 	ASSERT(MUTEX_HELD(&head->isaf_lock));
2467 
2468 	mutex_enter(&entry->ipsa_lock);
2469 
2470 	if ((entry->ipsa_state != ps->sadb_sa_state) ||
2471 	    ((sq->srcaddr != NULL) &&
2472 	    !IPSA_ARE_ADDR_EQUAL(entry->ipsa_srcaddr, sq->srcaddr, sq->af))) {
2473 		mutex_exit(&entry->ipsa_lock);
2474 		return;
2475 	}
2476 
2477 	/*
2478 	 * The isaf_t *, which is passed in , is always an outbound bucket,
2479 	 * and we are preserving the outbound-then-inbound hash-bucket lock
2480 	 * ordering. The sadb_walker() which triggers this function is called
2481 	 * only on the outbound fanout, and the corresponding inbound bucket
2482 	 * lock is safe to acquire here.
2483 	 */
2484 
2485 	if (entry->ipsa_haspeer) {
2486 		inbound_bucket = INBOUND_BUCKET(sq->sp, entry->ipsa_spi);
2487 		mutex_enter(&inbound_bucket->isaf_lock);
2488 		peer_assoc = ipsec_getassocbyspi(inbound_bucket,
2489 		    entry->ipsa_spi, entry->ipsa_srcaddr,
2490 		    entry->ipsa_dstaddr, entry->ipsa_addrfam);
2491 	} else {
2492 		inbound_bucket = INBOUND_BUCKET(sq->sp, entry->ipsa_otherspi);
2493 		mutex_enter(&inbound_bucket->isaf_lock);
2494 		peer_assoc = ipsec_getassocbyspi(inbound_bucket,
2495 		    entry->ipsa_otherspi, entry->ipsa_dstaddr,
2496 		    entry->ipsa_srcaddr, entry->ipsa_addrfam);
2497 	}
2498 
2499 	entry->ipsa_state = IPSA_STATE_DEAD;
2500 	(void) sadb_torch_assoc(head, entry);
2501 	if (peer_assoc != NULL) {
2502 		mutex_enter(&peer_assoc->ipsa_lock);
2503 		peer_assoc->ipsa_state = IPSA_STATE_DEAD;
2504 		(void) sadb_torch_assoc(inbound_bucket, peer_assoc);
2505 	}
2506 	mutex_exit(&inbound_bucket->isaf_lock);
2507 }
2508 
2509 static int
sadb_delpair_state(mblk_t * mp,keysock_in_t * ksi,sadbp_t * spp,int * diagnostic,queue_t * pfkey_q)2510 sadb_delpair_state(mblk_t *mp, keysock_in_t *ksi, sadbp_t *spp,
2511     int *diagnostic, queue_t *pfkey_q)
2512 {
2513 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
2514 	struct sadb_purge_state ps;
2515 	int error;
2516 
2517 	ps.sq.spp = spp;		/* XXX param */
2518 
2519 	error = sadb_form_query(ksi, IPSA_Q_DST|IPSA_Q_SRC,
2520 	    IPSA_Q_SRC|IPSA_Q_DST|IPSA_Q_SRCID|IPSA_Q_DSTID|IPSA_Q_KMC,
2521 	    &ps.sq, diagnostic);
2522 	if (error != 0)
2523 		return (error);
2524 
2525 	ps.inbnd = B_FALSE;
2526 	ps.sadb_sa_state = assoc->sadb_sa_state;
2527 	sadb_walker(ps.sq.sp->sdb_of, ps.sq.sp->sdb_hashsize,
2528 	    sadb_delpair_state_one, &ps);
2529 
2530 	ASSERT(mp->b_cont != NULL);
2531 	sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr,
2532 	    ksi, NULL);
2533 	return (0);
2534 }
2535 
2536 /*
2537  * Common code to delete/get an SA.
2538  */
2539 int
sadb_delget_sa(mblk_t * mp,keysock_in_t * ksi,sadbp_t * spp,int * diagnostic,queue_t * pfkey_q,uint8_t sadb_msg_type)2540 sadb_delget_sa(mblk_t *mp, keysock_in_t *ksi, sadbp_t *spp,
2541     int *diagnostic, queue_t *pfkey_q, uint8_t sadb_msg_type)
2542 {
2543 	ipsa_query_t sq;
2544 	ipsa_t *echo_target = NULL;
2545 	ipsap_t ipsapp;
2546 	uint_t	error = 0;
2547 
2548 	if (sadb_msg_type == SADB_X_DELPAIR_STATE)
2549 		return (sadb_delpair_state(mp, ksi, spp, diagnostic, pfkey_q));
2550 
2551 	sq.spp = spp;		/* XXX param */
2552 	error = sadb_form_query(ksi, IPSA_Q_DST|IPSA_Q_SA,
2553 	    IPSA_Q_SRC|IPSA_Q_DST|IPSA_Q_SA|IPSA_Q_INBOUND|IPSA_Q_OUTBOUND,
2554 	    &sq, diagnostic);
2555 	if (error != 0)
2556 		return (error);
2557 
2558 	error = get_ipsa_pair(&sq, &ipsapp, diagnostic);
2559 	if (error != 0) {
2560 		return (error);
2561 	}
2562 
2563 	echo_target = ipsapp.ipsap_sa_ptr;
2564 	if (echo_target == NULL)
2565 		echo_target = ipsapp.ipsap_psa_ptr;
2566 
2567 	if (sadb_msg_type == SADB_DELETE || sadb_msg_type == SADB_X_DELPAIR) {
2568 		/*
2569 		 * Bucket locks will be required if SA is actually unlinked.
2570 		 * get_ipsa_pair() returns valid hash bucket pointers even
2571 		 * if it can't find a pair SA pointer. To prevent a potential
2572 		 * deadlock, always lock the outbound bucket before the inbound.
2573 		 */
2574 		if (ipsapp.in_inbound_table) {
2575 			mutex_enter(&ipsapp.ipsap_pbucket->isaf_lock);
2576 			mutex_enter(&ipsapp.ipsap_bucket->isaf_lock);
2577 		} else {
2578 			mutex_enter(&ipsapp.ipsap_bucket->isaf_lock);
2579 			mutex_enter(&ipsapp.ipsap_pbucket->isaf_lock);
2580 		}
2581 
2582 		if (ipsapp.ipsap_sa_ptr != NULL) {
2583 			mutex_enter(&ipsapp.ipsap_sa_ptr->ipsa_lock);
2584 			if (ipsapp.ipsap_sa_ptr->ipsa_flags & IPSA_F_INBOUND) {
2585 				sadb_delete_cluster(ipsapp.ipsap_sa_ptr);
2586 			}
2587 			ipsapp.ipsap_sa_ptr->ipsa_state = IPSA_STATE_DEAD;
2588 			(void) sadb_torch_assoc(ipsapp.ipsap_bucket,
2589 			    ipsapp.ipsap_sa_ptr);
2590 			/*
2591 			 * sadb_torch_assoc() releases the ipsa_lock
2592 			 * and calls sadb_unlinkassoc() which does a
2593 			 * IPSA_REFRELE.
2594 			 */
2595 		}
2596 		if (ipsapp.ipsap_psa_ptr != NULL) {
2597 			mutex_enter(&ipsapp.ipsap_psa_ptr->ipsa_lock);
2598 			if (sadb_msg_type == SADB_X_DELPAIR ||
2599 			    ipsapp.ipsap_psa_ptr->ipsa_haspeer) {
2600 				if (ipsapp.ipsap_psa_ptr->ipsa_flags &
2601 				    IPSA_F_INBOUND) {
2602 					sadb_delete_cluster
2603 					    (ipsapp.ipsap_psa_ptr);
2604 				}
2605 				ipsapp.ipsap_psa_ptr->ipsa_state =
2606 				    IPSA_STATE_DEAD;
2607 				(void) sadb_torch_assoc(ipsapp.ipsap_pbucket,
2608 				    ipsapp.ipsap_psa_ptr);
2609 			} else {
2610 				/*
2611 				 * Only half of the "pair" has been deleted.
2612 				 * Update the remaining SA and remove references
2613 				 * to its pair SA, which is now gone.
2614 				 */
2615 				ipsapp.ipsap_psa_ptr->ipsa_otherspi = 0;
2616 				ipsapp.ipsap_psa_ptr->ipsa_flags &=
2617 				    ~IPSA_F_PAIRED;
2618 				mutex_exit(&ipsapp.ipsap_psa_ptr->ipsa_lock);
2619 			}
2620 		} else if (sadb_msg_type == SADB_X_DELPAIR) {
2621 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_SA_NOTFOUND;
2622 			error = ESRCH;
2623 		}
2624 		mutex_exit(&ipsapp.ipsap_bucket->isaf_lock);
2625 		mutex_exit(&ipsapp.ipsap_pbucket->isaf_lock);
2626 	}
2627 
2628 	ASSERT(mp->b_cont != NULL);
2629 
2630 	if (error == 0)
2631 		sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)
2632 		    mp->b_cont->b_rptr, ksi, echo_target);
2633 
2634 	destroy_ipsa_pair(&ipsapp);
2635 
2636 	return (error);
2637 }
2638 
2639 /*
2640  * This function takes a sadb_sa_t and finds the ipsa_t structure
2641  * and the isaf_t (hash bucket) that its stored under. If the security
2642  * association has a peer, the ipsa_t structure and bucket for that security
2643  * association are also searched for. The "pair" of ipsa_t's and isaf_t's
2644  * are returned as a ipsap_t.
2645  *
2646  * The hash buckets are returned for convenience, if the calling function
2647  * needs to use the hash bucket locks, say to remove the SA's, it should
2648  * take care to observe the convention of locking outbound bucket then
2649  * inbound bucket. The flag in_inbound_table provides direction.
2650  *
2651  * Note that a "pair" is defined as one (but not both) of the following:
2652  *
2653  * A security association which has a soft reference to another security
2654  * association via its SPI.
2655  *
2656  * A security association that is not obviously "inbound" or "outbound" so
2657  * it appears in both hash tables, the "peer" being the same security
2658  * association in the other hash table.
2659  *
2660  * This function will return NULL if the ipsa_t can't be found in the
2661  * inbound or outbound  hash tables (not found). If only one ipsa_t is
2662  * found, the pair ipsa_t will be NULL. Both isaf_t values are valid
2663  * provided at least one ipsa_t is found.
2664  */
2665 static int
get_ipsa_pair(ipsa_query_t * sq,ipsap_t * ipsapp,int * diagnostic)2666 get_ipsa_pair(ipsa_query_t *sq, ipsap_t *ipsapp, int *diagnostic)
2667 {
2668 	uint32_t pair_srcaddr[IPSA_MAX_ADDRLEN];
2669 	uint32_t pair_dstaddr[IPSA_MAX_ADDRLEN];
2670 	uint32_t pair_spi;
2671 
2672 	init_ipsa_pair(ipsapp);
2673 
2674 	ipsapp->in_inbound_table = B_FALSE;
2675 
2676 	/* Lock down both buckets. */
2677 	mutex_enter(&sq->outbound->isaf_lock);
2678 	mutex_enter(&sq->inbound->isaf_lock);
2679 
2680 	if (sq->assoc->sadb_sa_flags & IPSA_F_INBOUND) {
2681 		ipsapp->ipsap_sa_ptr = ipsec_getassocbyspi(sq->inbound,
2682 		    sq->assoc->sadb_sa_spi, sq->srcaddr, sq->dstaddr, sq->af);
2683 		if (ipsapp->ipsap_sa_ptr != NULL) {
2684 			ipsapp->ipsap_bucket = sq->inbound;
2685 			ipsapp->ipsap_pbucket = sq->outbound;
2686 			ipsapp->in_inbound_table = B_TRUE;
2687 		} else {
2688 			ipsapp->ipsap_sa_ptr = ipsec_getassocbyspi(sq->outbound,
2689 			    sq->assoc->sadb_sa_spi, sq->srcaddr, sq->dstaddr,
2690 			    sq->af);
2691 			ipsapp->ipsap_bucket = sq->outbound;
2692 			ipsapp->ipsap_pbucket = sq->inbound;
2693 		}
2694 	} else {
2695 		/* IPSA_F_OUTBOUND is set *or* no directions flags set. */
2696 		ipsapp->ipsap_sa_ptr =
2697 		    ipsec_getassocbyspi(sq->outbound,
2698 		    sq->assoc->sadb_sa_spi, sq->srcaddr, sq->dstaddr, sq->af);
2699 		if (ipsapp->ipsap_sa_ptr != NULL) {
2700 			ipsapp->ipsap_bucket = sq->outbound;
2701 			ipsapp->ipsap_pbucket = sq->inbound;
2702 		} else {
2703 			ipsapp->ipsap_sa_ptr = ipsec_getassocbyspi(sq->inbound,
2704 			    sq->assoc->sadb_sa_spi, sq->srcaddr, sq->dstaddr,
2705 			    sq->af);
2706 			ipsapp->ipsap_bucket = sq->inbound;
2707 			ipsapp->ipsap_pbucket = sq->outbound;
2708 			if (ipsapp->ipsap_sa_ptr != NULL)
2709 				ipsapp->in_inbound_table = B_TRUE;
2710 		}
2711 	}
2712 
2713 	if (ipsapp->ipsap_sa_ptr == NULL) {
2714 		mutex_exit(&sq->outbound->isaf_lock);
2715 		mutex_exit(&sq->inbound->isaf_lock);
2716 		*diagnostic = SADB_X_DIAGNOSTIC_SA_NOTFOUND;
2717 		return (ESRCH);
2718 	}
2719 
2720 	if ((ipsapp->ipsap_sa_ptr->ipsa_state == IPSA_STATE_LARVAL) &&
2721 	    ipsapp->in_inbound_table) {
2722 		mutex_exit(&sq->outbound->isaf_lock);
2723 		mutex_exit(&sq->inbound->isaf_lock);
2724 		return (0);
2725 	}
2726 
2727 	mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
2728 	if (ipsapp->ipsap_sa_ptr->ipsa_haspeer) {
2729 		/*
2730 		 * haspeer implies no sa_pairing, look for same spi
2731 		 * in other hashtable.
2732 		 */
2733 		ipsapp->ipsap_psa_ptr =
2734 		    ipsec_getassocbyspi(ipsapp->ipsap_pbucket,
2735 		    sq->assoc->sadb_sa_spi, sq->srcaddr, sq->dstaddr, sq->af);
2736 		mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
2737 		mutex_exit(&sq->outbound->isaf_lock);
2738 		mutex_exit(&sq->inbound->isaf_lock);
2739 		return (0);
2740 	}
2741 	pair_spi = ipsapp->ipsap_sa_ptr->ipsa_otherspi;
2742 	IPSA_COPY_ADDR(&pair_srcaddr,
2743 	    ipsapp->ipsap_sa_ptr->ipsa_srcaddr, sq->af);
2744 	IPSA_COPY_ADDR(&pair_dstaddr,
2745 	    ipsapp->ipsap_sa_ptr->ipsa_dstaddr, sq->af);
2746 	mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
2747 	mutex_exit(&sq->inbound->isaf_lock);
2748 	mutex_exit(&sq->outbound->isaf_lock);
2749 
2750 	if (pair_spi == 0) {
2751 		ASSERT(ipsapp->ipsap_bucket != NULL);
2752 		ASSERT(ipsapp->ipsap_pbucket != NULL);
2753 		return (0);
2754 	}
2755 
2756 	/* found sa in outbound sadb, peer should be inbound */
2757 
2758 	if (ipsapp->in_inbound_table) {
2759 		/* Found SA in inbound table, pair will be in outbound. */
2760 		if (sq->af == AF_INET6) {
2761 			ipsapp->ipsap_pbucket = OUTBOUND_BUCKET_V6(sq->sp,
2762 			    *(uint32_t *)pair_srcaddr);
2763 		} else {
2764 			ipsapp->ipsap_pbucket = OUTBOUND_BUCKET_V4(sq->sp,
2765 			    *(uint32_t *)pair_srcaddr);
2766 		}
2767 	} else {
2768 		ipsapp->ipsap_pbucket = INBOUND_BUCKET(sq->sp, pair_spi);
2769 	}
2770 	mutex_enter(&ipsapp->ipsap_pbucket->isaf_lock);
2771 	ipsapp->ipsap_psa_ptr = ipsec_getassocbyspi(ipsapp->ipsap_pbucket,
2772 	    pair_spi, pair_dstaddr, pair_srcaddr, sq->af);
2773 	mutex_exit(&ipsapp->ipsap_pbucket->isaf_lock);
2774 	ASSERT(ipsapp->ipsap_bucket != NULL);
2775 	ASSERT(ipsapp->ipsap_pbucket != NULL);
2776 	return (0);
2777 }
2778 
2779 /*
2780  * Perform NAT-traversal cached checksum offset calculations here.
2781  */
2782 static void
sadb_nat_calculations(ipsa_t * newbie,sadb_address_t * natt_loc_ext,sadb_address_t * natt_rem_ext,uint32_t * src_addr_ptr,uint32_t * dst_addr_ptr)2783 sadb_nat_calculations(ipsa_t *newbie, sadb_address_t *natt_loc_ext,
2784     sadb_address_t *natt_rem_ext, uint32_t *src_addr_ptr,
2785     uint32_t *dst_addr_ptr)
2786 {
2787 	struct sockaddr_in *natt_loc, *natt_rem;
2788 	uint32_t *natt_loc_ptr = NULL, *natt_rem_ptr = NULL;
2789 	uint32_t running_sum = 0;
2790 
2791 #define	DOWN_SUM(x) (x) = ((x) & 0xFFFF) +	 ((x) >> 16)
2792 
2793 	if (natt_rem_ext != NULL) {
2794 		uint32_t l_src;
2795 		uint32_t l_rem;
2796 
2797 		natt_rem = (struct sockaddr_in *)(natt_rem_ext + 1);
2798 
2799 		/* Ensured by sadb_addrfix(). */
2800 		ASSERT(natt_rem->sin_family == AF_INET);
2801 
2802 		natt_rem_ptr = (uint32_t *)(&natt_rem->sin_addr);
2803 		newbie->ipsa_remote_nat_port = natt_rem->sin_port;
2804 		l_src = *src_addr_ptr;
2805 		l_rem = *natt_rem_ptr;
2806 
2807 		/* Instead of IPSA_COPY_ADDR(), just copy first 32 bits. */
2808 		newbie->ipsa_natt_addr_rem = *natt_rem_ptr;
2809 
2810 		l_src = ntohl(l_src);
2811 		DOWN_SUM(l_src);
2812 		DOWN_SUM(l_src);
2813 		l_rem = ntohl(l_rem);
2814 		DOWN_SUM(l_rem);
2815 		DOWN_SUM(l_rem);
2816 
2817 		/*
2818 		 * We're 1's complement for checksums, so check for wraparound
2819 		 * here.
2820 		 */
2821 		if (l_rem > l_src)
2822 			l_src--;
2823 
2824 		running_sum += l_src - l_rem;
2825 
2826 		DOWN_SUM(running_sum);
2827 		DOWN_SUM(running_sum);
2828 	}
2829 
2830 	if (natt_loc_ext != NULL) {
2831 		natt_loc = (struct sockaddr_in *)(natt_loc_ext + 1);
2832 
2833 		/* Ensured by sadb_addrfix(). */
2834 		ASSERT(natt_loc->sin_family == AF_INET);
2835 
2836 		natt_loc_ptr = (uint32_t *)(&natt_loc->sin_addr);
2837 		newbie->ipsa_local_nat_port = natt_loc->sin_port;
2838 
2839 		/* Instead of IPSA_COPY_ADDR(), just copy first 32 bits. */
2840 		newbie->ipsa_natt_addr_loc = *natt_loc_ptr;
2841 
2842 		/*
2843 		 * NAT-T port agility means we may have natt_loc_ext, but
2844 		 * only for a local-port change.
2845 		 */
2846 		if (natt_loc->sin_addr.s_addr != INADDR_ANY) {
2847 			uint32_t l_dst = ntohl(*dst_addr_ptr);
2848 			uint32_t l_loc = ntohl(*natt_loc_ptr);
2849 
2850 			DOWN_SUM(l_loc);
2851 			DOWN_SUM(l_loc);
2852 			DOWN_SUM(l_dst);
2853 			DOWN_SUM(l_dst);
2854 
2855 			/*
2856 			 * We're 1's complement for checksums, so check for
2857 			 * wraparound here.
2858 			 */
2859 			if (l_loc > l_dst)
2860 				l_dst--;
2861 
2862 			running_sum += l_dst - l_loc;
2863 			DOWN_SUM(running_sum);
2864 			DOWN_SUM(running_sum);
2865 		}
2866 	}
2867 
2868 	newbie->ipsa_inbound_cksum = running_sum;
2869 #undef DOWN_SUM
2870 }
2871 
2872 /*
2873  * This function is called from consumers that need to insert a fully-grown
2874  * security association into its tables.  This function takes into account that
2875  * SAs can be "inbound", "outbound", or "both".	 The "primary" and "secondary"
2876  * hash bucket parameters are set in order of what the SA will be most of the
2877  * time.  (For example, an SA with an unspecified source, and a multicast
2878  * destination will primarily be an outbound SA.  OTOH, if that destination
2879  * is unicast for this node, then the SA will primarily be inbound.)
2880  *
2881  * It takes a lot of parameters because even if clone is B_FALSE, this needs
2882  * to check both buckets for purposes of collision.
2883  *
2884  * Return 0 upon success.  Return various errnos (ENOMEM, EEXIST) for
2885  * various error conditions.  We may need to set samsg->sadb_x_msg_diagnostic
2886  * with additional diagnostic information because there is at least one EINVAL
2887  * case here.
2888  */
2889 int
sadb_common_add(queue_t * pfkey_q,mblk_t * mp,sadb_msg_t * samsg,keysock_in_t * ksi,isaf_t * primary,isaf_t * secondary,ipsa_t * newbie,boolean_t clone,boolean_t is_inbound,int * diagnostic,netstack_t * ns,sadbp_t * spp)2890 sadb_common_add(queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg,
2891     keysock_in_t *ksi, isaf_t *primary, isaf_t *secondary,
2892     ipsa_t *newbie, boolean_t clone, boolean_t is_inbound, int *diagnostic,
2893     netstack_t *ns, sadbp_t *spp)
2894 {
2895 	ipsa_t *newbie_clone = NULL, *scratch;
2896 	ipsap_t ipsapp;
2897 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
2898 	sadb_address_t *srcext =
2899 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
2900 	sadb_address_t *dstext =
2901 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
2902 	sadb_address_t *isrcext =
2903 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC];
2904 	sadb_address_t *idstext =
2905 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_DST];
2906 	sadb_x_kmc_t *kmcext =
2907 	    (sadb_x_kmc_t *)ksi->ks_in_extv[SADB_X_EXT_KM_COOKIE];
2908 	sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
2909 	sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT];
2910 	sadb_sens_t *sens =
2911 	    (sadb_sens_t *)ksi->ks_in_extv[SADB_EXT_SENSITIVITY];
2912 	sadb_sens_t *osens =
2913 	    (sadb_sens_t *)ksi->ks_in_extv[SADB_X_EXT_OUTER_SENS];
2914 	sadb_x_pair_t *pair_ext =
2915 	    (sadb_x_pair_t *)ksi->ks_in_extv[SADB_X_EXT_PAIR];
2916 	sadb_x_replay_ctr_t *replayext =
2917 	    (sadb_x_replay_ctr_t *)ksi->ks_in_extv[SADB_X_EXT_REPLAY_VALUE];
2918 	uint8_t protocol =
2919 	    (samsg->sadb_msg_satype == SADB_SATYPE_AH) ? IPPROTO_AH:IPPROTO_ESP;
2920 	int salt_offset;
2921 	uint8_t *buf_ptr;
2922 	struct sockaddr_in *src, *dst, *isrc, *idst;
2923 	struct sockaddr_in6 *src6, *dst6, *isrc6, *idst6;
2924 	sadb_lifetime_t *soft =
2925 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT];
2926 	sadb_lifetime_t *hard =
2927 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD];
2928 	sadb_lifetime_t	*idle =
2929 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_X_EXT_LIFETIME_IDLE];
2930 	sa_family_t af;
2931 	int error = 0;
2932 	boolean_t isupdate = (newbie != NULL);
2933 	uint32_t *src_addr_ptr, *dst_addr_ptr, *isrc_addr_ptr, *idst_addr_ptr;
2934 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
2935 	ip_stack_t 	*ipst = ns->netstack_ip;
2936 	ipsec_alginfo_t *alg;
2937 	int		rcode;
2938 	boolean_t	async = B_FALSE;
2939 
2940 	init_ipsa_pair(&ipsapp);
2941 
2942 	if (srcext == NULL) {
2943 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
2944 		return (EINVAL);
2945 	}
2946 	if (dstext == NULL) {
2947 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
2948 		return (EINVAL);
2949 	}
2950 	if (assoc == NULL) {
2951 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
2952 		return (EINVAL);
2953 	}
2954 
2955 	src = (struct sockaddr_in *)(srcext + 1);
2956 	src6 = (struct sockaddr_in6 *)(srcext + 1);
2957 	dst = (struct sockaddr_in *)(dstext + 1);
2958 	dst6 = (struct sockaddr_in6 *)(dstext + 1);
2959 	if (isrcext != NULL) {
2960 		isrc = (struct sockaddr_in *)(isrcext + 1);
2961 		isrc6 = (struct sockaddr_in6 *)(isrcext + 1);
2962 		ASSERT(idstext != NULL);
2963 		idst = (struct sockaddr_in *)(idstext + 1);
2964 		idst6 = (struct sockaddr_in6 *)(idstext + 1);
2965 	} else {
2966 		isrc = NULL;
2967 		isrc6 = NULL;
2968 	}
2969 
2970 	af = src->sin_family;
2971 
2972 	if (af == AF_INET) {
2973 		src_addr_ptr = (uint32_t *)&src->sin_addr;
2974 		dst_addr_ptr = (uint32_t *)&dst->sin_addr;
2975 	} else {
2976 		ASSERT(af == AF_INET6);
2977 		src_addr_ptr = (uint32_t *)&src6->sin6_addr;
2978 		dst_addr_ptr = (uint32_t *)&dst6->sin6_addr;
2979 	}
2980 
2981 	if (!isupdate && (clone == B_TRUE || is_inbound == B_TRUE) &&
2982 	    cl_inet_checkspi &&
2983 	    (assoc->sadb_sa_state != SADB_X_SASTATE_ACTIVE_ELSEWHERE)) {
2984 		rcode = cl_inet_checkspi(ns->netstack_stackid, protocol,
2985 		    assoc->sadb_sa_spi, NULL);
2986 		if (rcode == -1) {
2987 			return (EEXIST);
2988 		}
2989 	}
2990 
2991 	/*
2992 	 * Check to see if the new SA will be cloned AND paired. The
2993 	 * reason a SA will be cloned is the source or destination addresses
2994 	 * are not specific enough to determine if the SA goes in the outbound
2995 	 * or the inbound hash table, so its cloned and put in both. If
2996 	 * the SA is paired, it's soft linked to another SA for the other
2997 	 * direction. Keeping track and looking up SA's that are direction
2998 	 * unspecific and linked is too hard.
2999 	 */
3000 	if (clone && (pair_ext != NULL)) {
3001 		*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
3002 		return (EINVAL);
3003 	}
3004 
3005 	if (!isupdate) {
3006 		newbie = sadb_makelarvalassoc(assoc->sadb_sa_spi,
3007 		    src_addr_ptr, dst_addr_ptr, af, ns);
3008 		if (newbie == NULL)
3009 			return (ENOMEM);
3010 	}
3011 
3012 	mutex_enter(&newbie->ipsa_lock);
3013 
3014 	if (isrc != NULL) {
3015 		if (isrc->sin_family == AF_INET) {
3016 			if (srcext->sadb_address_proto != IPPROTO_ENCAP) {
3017 				if (srcext->sadb_address_proto != 0) {
3018 					/*
3019 					 * Mismatched outer-packet protocol
3020 					 * and inner-packet address family.
3021 					 */
3022 					mutex_exit(&newbie->ipsa_lock);
3023 					error = EPROTOTYPE;
3024 					*diagnostic =
3025 					    SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH;
3026 					goto error;
3027 				} else {
3028 					/* Fill in with explicit protocol. */
3029 					srcext->sadb_address_proto =
3030 					    IPPROTO_ENCAP;
3031 					dstext->sadb_address_proto =
3032 					    IPPROTO_ENCAP;
3033 				}
3034 			}
3035 			isrc_addr_ptr = (uint32_t *)&isrc->sin_addr;
3036 			idst_addr_ptr = (uint32_t *)&idst->sin_addr;
3037 		} else {
3038 			ASSERT(isrc->sin_family == AF_INET6);
3039 			if (srcext->sadb_address_proto != IPPROTO_IPV6) {
3040 				if (srcext->sadb_address_proto != 0) {
3041 					/*
3042 					 * Mismatched outer-packet protocol
3043 					 * and inner-packet address family.
3044 					 */
3045 					mutex_exit(&newbie->ipsa_lock);
3046 					error = EPROTOTYPE;
3047 					*diagnostic =
3048 					    SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH;
3049 					goto error;
3050 				} else {
3051 					/* Fill in with explicit protocol. */
3052 					srcext->sadb_address_proto =
3053 					    IPPROTO_IPV6;
3054 					dstext->sadb_address_proto =
3055 					    IPPROTO_IPV6;
3056 				}
3057 			}
3058 			isrc_addr_ptr = (uint32_t *)&isrc6->sin6_addr;
3059 			idst_addr_ptr = (uint32_t *)&idst6->sin6_addr;
3060 		}
3061 		newbie->ipsa_innerfam = isrc->sin_family;
3062 
3063 		IPSA_COPY_ADDR(newbie->ipsa_innersrc, isrc_addr_ptr,
3064 		    newbie->ipsa_innerfam);
3065 		IPSA_COPY_ADDR(newbie->ipsa_innerdst, idst_addr_ptr,
3066 		    newbie->ipsa_innerfam);
3067 		newbie->ipsa_innersrcpfx = isrcext->sadb_address_prefixlen;
3068 		newbie->ipsa_innerdstpfx = idstext->sadb_address_prefixlen;
3069 
3070 		/* Unique value uses inner-ports for Tunnel Mode... */
3071 		newbie->ipsa_unique_id = SA_UNIQUE_ID(isrc->sin_port,
3072 		    idst->sin_port, dstext->sadb_address_proto,
3073 		    idstext->sadb_address_proto);
3074 		newbie->ipsa_unique_mask = SA_UNIQUE_MASK(isrc->sin_port,
3075 		    idst->sin_port, dstext->sadb_address_proto,
3076 		    idstext->sadb_address_proto);
3077 	} else {
3078 		/* ... and outer-ports for Transport Mode. */
3079 		newbie->ipsa_unique_id = SA_UNIQUE_ID(src->sin_port,
3080 		    dst->sin_port, dstext->sadb_address_proto, 0);
3081 		newbie->ipsa_unique_mask = SA_UNIQUE_MASK(src->sin_port,
3082 		    dst->sin_port, dstext->sadb_address_proto, 0);
3083 	}
3084 	if (newbie->ipsa_unique_mask != (uint64_t)0)
3085 		newbie->ipsa_flags |= IPSA_F_UNIQUE;
3086 
3087 	sadb_nat_calculations(newbie,
3088 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC],
3089 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM],
3090 	    src_addr_ptr, dst_addr_ptr);
3091 
3092 	newbie->ipsa_type = samsg->sadb_msg_satype;
3093 
3094 	ASSERT((assoc->sadb_sa_state == SADB_SASTATE_MATURE) ||
3095 	    (assoc->sadb_sa_state == SADB_X_SASTATE_ACTIVE_ELSEWHERE));
3096 	newbie->ipsa_auth_alg = assoc->sadb_sa_auth;
3097 	newbie->ipsa_encr_alg = assoc->sadb_sa_encrypt;
3098 
3099 	newbie->ipsa_flags |= assoc->sadb_sa_flags;
3100 	if (newbie->ipsa_flags & SADB_X_SAFLAGS_NATT_LOC &&
3101 	    ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC] == NULL) {
3102 		mutex_exit(&newbie->ipsa_lock);
3103 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_LOC;
3104 		error = EINVAL;
3105 		goto error;
3106 	}
3107 	if (newbie->ipsa_flags & SADB_X_SAFLAGS_NATT_REM &&
3108 	    ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM] == NULL) {
3109 		mutex_exit(&newbie->ipsa_lock);
3110 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_REM;
3111 		error = EINVAL;
3112 		goto error;
3113 	}
3114 	if (newbie->ipsa_flags & SADB_X_SAFLAGS_TUNNEL &&
3115 	    ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC] == NULL) {
3116 		mutex_exit(&newbie->ipsa_lock);
3117 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC;
3118 		error = EINVAL;
3119 		goto error;
3120 	}
3121 	/*
3122 	 * If unspecified source address, force replay_wsize to 0.
3123 	 * This is because an SA that has multiple sources of secure
3124 	 * traffic cannot enforce a replay counter w/o synchronizing the
3125 	 * senders.
3126 	 */
3127 	if (ksi->ks_in_srctype != KS_IN_ADDR_UNSPEC)
3128 		newbie->ipsa_replay_wsize = assoc->sadb_sa_replay;
3129 	else
3130 		newbie->ipsa_replay_wsize = 0;
3131 
3132 	newbie->ipsa_addtime = gethrestime_sec();
3133 
3134 	if (kmcext != NULL) {
3135 		newbie->ipsa_kmp = kmcext->sadb_x_kmc_proto;
3136 		newbie->ipsa_kmc = kmcext->sadb_x_kmc_cookie;
3137 	}
3138 
3139 	/*
3140 	 * XXX CURRENT lifetime checks MAY BE needed for an UPDATE.
3141 	 * The spec says that one can update current lifetimes, but
3142 	 * that seems impractical, especially in the larval-to-mature
3143 	 * update that this function performs.
3144 	 */
3145 	if (soft != NULL) {
3146 		newbie->ipsa_softaddlt = soft->sadb_lifetime_addtime;
3147 		newbie->ipsa_softuselt = soft->sadb_lifetime_usetime;
3148 		newbie->ipsa_softbyteslt = soft->sadb_lifetime_bytes;
3149 		newbie->ipsa_softalloc = soft->sadb_lifetime_allocations;
3150 		SET_EXPIRE(newbie, softaddlt, softexpiretime);
3151 	}
3152 	if (hard != NULL) {
3153 		newbie->ipsa_hardaddlt = hard->sadb_lifetime_addtime;
3154 		newbie->ipsa_harduselt = hard->sadb_lifetime_usetime;
3155 		newbie->ipsa_hardbyteslt = hard->sadb_lifetime_bytes;
3156 		newbie->ipsa_hardalloc = hard->sadb_lifetime_allocations;
3157 		SET_EXPIRE(newbie, hardaddlt, hardexpiretime);
3158 	}
3159 	if (idle != NULL) {
3160 		newbie->ipsa_idleaddlt = idle->sadb_lifetime_addtime;
3161 		newbie->ipsa_idleuselt = idle->sadb_lifetime_usetime;
3162 		newbie->ipsa_idleexpiretime = newbie->ipsa_addtime +
3163 		    newbie->ipsa_idleaddlt;
3164 		newbie->ipsa_idletime = newbie->ipsa_idleaddlt;
3165 	}
3166 
3167 	newbie->ipsa_authtmpl = NULL;
3168 	newbie->ipsa_encrtmpl = NULL;
3169 
3170 #ifdef IPSEC_LATENCY_TEST
3171 	if (akey != NULL && newbie->ipsa_auth_alg != SADB_AALG_NONE) {
3172 #else
3173 	if (akey != NULL) {
3174 #endif
3175 		async = (ipss->ipsec_algs_exec_mode[IPSEC_ALG_AUTH] ==
3176 		    IPSEC_ALGS_EXEC_ASYNC);
3177 
3178 		newbie->ipsa_authkeybits = akey->sadb_key_bits;
3179 		newbie->ipsa_authkeylen = SADB_1TO8(akey->sadb_key_bits);
3180 		/* In case we have to round up to the next byte... */
3181 		if ((akey->sadb_key_bits & 0x7) != 0)
3182 			newbie->ipsa_authkeylen++;
3183 		newbie->ipsa_authkey = kmem_alloc(newbie->ipsa_authkeylen,
3184 		    KM_NOSLEEP);
3185 		if (newbie->ipsa_authkey == NULL) {
3186 			error = ENOMEM;
3187 			mutex_exit(&newbie->ipsa_lock);
3188 			goto error;
3189 		}
3190 		bcopy(akey + 1, newbie->ipsa_authkey, newbie->ipsa_authkeylen);
3191 		bzero(akey + 1, newbie->ipsa_authkeylen);
3192 
3193 		/*
3194 		 * Pre-initialize the kernel crypto framework key
3195 		 * structure.
3196 		 */
3197 		newbie->ipsa_kcfauthkey.ck_format = CRYPTO_KEY_RAW;
3198 		newbie->ipsa_kcfauthkey.ck_length = newbie->ipsa_authkeybits;
3199 		newbie->ipsa_kcfauthkey.ck_data = newbie->ipsa_authkey;
3200 
3201 		mutex_enter(&ipss->ipsec_alg_lock);
3202 		alg = ipss->ipsec_alglists[IPSEC_ALG_AUTH]
3203 		    [newbie->ipsa_auth_alg];
3204 		if (alg != NULL && ALG_VALID(alg)) {
3205 			newbie->ipsa_amech.cm_type = alg->alg_mech_type;
3206 			newbie->ipsa_amech.cm_param =
3207 			    (char *)&newbie->ipsa_mac_len;
3208 			newbie->ipsa_amech.cm_param_len = sizeof (size_t);
3209 			newbie->ipsa_mac_len = (size_t)alg->alg_datalen;
3210 		} else {
3211 			newbie->ipsa_amech.cm_type = CRYPTO_MECHANISM_INVALID;
3212 		}
3213 		error = ipsec_create_ctx_tmpl(newbie, IPSEC_ALG_AUTH);
3214 		mutex_exit(&ipss->ipsec_alg_lock);
3215 		if (error != 0) {
3216 			mutex_exit(&newbie->ipsa_lock);
3217 			/*
3218 			 * An error here indicates that alg is the wrong type
3219 			 * (IE: not authentication) or its not in the alg tables
3220 			 * created by ipsecalgs(1m), or Kcf does not like the
3221 			 * parameters passed in with this algorithm, which is
3222 			 * probably a coding error!
3223 			 */
3224 			*diagnostic = SADB_X_DIAGNOSTIC_BAD_CTX;
3225 
3226 			goto error;
3227 		}
3228 	}
3229 
3230 	if (ekey != NULL) {
3231 		mutex_enter(&ipss->ipsec_alg_lock);
3232 		async = async || (ipss->ipsec_algs_exec_mode[IPSEC_ALG_ENCR] ==
3233 		    IPSEC_ALGS_EXEC_ASYNC);
3234 		alg = ipss->ipsec_alglists[IPSEC_ALG_ENCR]
3235 		    [newbie->ipsa_encr_alg];
3236 
3237 		if (alg != NULL && ALG_VALID(alg)) {
3238 			newbie->ipsa_emech.cm_type = alg->alg_mech_type;
3239 			newbie->ipsa_datalen = alg->alg_datalen;
3240 			if (alg->alg_flags & ALG_FLAG_COUNTERMODE)
3241 				newbie->ipsa_flags |= IPSA_F_COUNTERMODE;
3242 
3243 			if (alg->alg_flags & ALG_FLAG_COMBINED) {
3244 				newbie->ipsa_flags |= IPSA_F_COMBINED;
3245 				newbie->ipsa_mac_len =  alg->alg_icvlen;
3246 			}
3247 
3248 			if (alg->alg_flags & ALG_FLAG_CCM)
3249 				newbie->ipsa_noncefunc = ccm_params_init;
3250 			else if (alg->alg_flags & ALG_FLAG_GCM)
3251 				newbie->ipsa_noncefunc = gcm_params_init;
3252 			else newbie->ipsa_noncefunc = cbc_params_init;
3253 
3254 			newbie->ipsa_saltlen = alg->alg_saltlen;
3255 			newbie->ipsa_saltbits = SADB_8TO1(newbie->ipsa_saltlen);
3256 			newbie->ipsa_iv_len = alg->alg_ivlen;
3257 			newbie->ipsa_nonce_len = newbie->ipsa_saltlen +
3258 			    newbie->ipsa_iv_len;
3259 			newbie->ipsa_emech.cm_param = NULL;
3260 			newbie->ipsa_emech.cm_param_len = 0;
3261 		} else {
3262 			newbie->ipsa_emech.cm_type = CRYPTO_MECHANISM_INVALID;
3263 		}
3264 		mutex_exit(&ipss->ipsec_alg_lock);
3265 
3266 		/*
3267 		 * The byte stream following the sadb_key_t is made up of:
3268 		 * key bytes, [salt bytes], [IV initial value]
3269 		 * All of these have variable length. The IV is typically
3270 		 * randomly generated by this function and not passed in.
3271 		 * By supporting the injection of a known IV, the whole
3272 		 * IPsec subsystem and the underlying crypto subsystem
3273 		 * can be tested with known test vectors.
3274 		 *
3275 		 * The keying material has been checked by ext_check()
3276 		 * and ipsec_valid_key_size(), after removing salt/IV
3277 		 * bits, whats left is the encryption key. If this is too
3278 		 * short, ipsec_create_ctx_tmpl() will fail and the SA
3279 		 * won't get created.
3280 		 *
3281 		 * set ipsa_encrkeylen to length of key only.
3282 		 */
3283 		newbie->ipsa_encrkeybits = ekey->sadb_key_bits;
3284 		newbie->ipsa_encrkeybits -= ekey->sadb_key_reserved;
3285 		newbie->ipsa_encrkeybits -= newbie->ipsa_saltbits;
3286 		newbie->ipsa_encrkeylen = SADB_1TO8(newbie->ipsa_encrkeybits);
3287 
3288 		/* In case we have to round up to the next byte... */
3289 		if ((ekey->sadb_key_bits & 0x7) != 0)
3290 			newbie->ipsa_encrkeylen++;
3291 
3292 		newbie->ipsa_encrkey = kmem_alloc(newbie->ipsa_encrkeylen,
3293 		    KM_NOSLEEP);
3294 		if (newbie->ipsa_encrkey == NULL) {
3295 			error = ENOMEM;
3296 			mutex_exit(&newbie->ipsa_lock);
3297 			goto error;
3298 		}
3299 
3300 		buf_ptr = (uint8_t *)(ekey + 1);
3301 		bcopy(buf_ptr, newbie->ipsa_encrkey, newbie->ipsa_encrkeylen);
3302 
3303 		if (newbie->ipsa_flags & IPSA_F_COMBINED) {
3304 			/*
3305 			 * Combined mode algs need a nonce. Copy the salt and
3306 			 * IV into a buffer. The ipsa_nonce is a pointer into
3307 			 * this buffer, some bytes at the start of the buffer
3308 			 * may be unused, depends on the salt length. The IV
3309 			 * is 64 bit aligned so it can be incremented as a
3310 			 * uint64_t. Zero out key in samsg_t before freeing.
3311 			 */
3312 
3313 			newbie->ipsa_nonce_buf = kmem_alloc(
3314 			    sizeof (ipsec_nonce_t), KM_NOSLEEP);
3315 			if (newbie->ipsa_nonce_buf == NULL) {
3316 				error = ENOMEM;
3317 				mutex_exit(&newbie->ipsa_lock);
3318 				goto error;
3319 			}
3320 			/*
3321 			 * Initialize nonce and salt pointers to point
3322 			 * to the nonce buffer. This is just in case we get
3323 			 * bad data, the pointers will be valid, the data
3324 			 * won't be.
3325 			 *
3326 			 * See sadb.h for layout of nonce.
3327 			 */
3328 			newbie->ipsa_iv = &newbie->ipsa_nonce_buf->iv;
3329 			newbie->ipsa_salt = (uint8_t *)newbie->ipsa_nonce_buf;
3330 			newbie->ipsa_nonce = newbie->ipsa_salt;
3331 			if (newbie->ipsa_saltlen != 0) {
3332 				salt_offset = MAXSALTSIZE -
3333 				    newbie->ipsa_saltlen;
3334 				newbie->ipsa_salt = (uint8_t *)
3335 				    &newbie->ipsa_nonce_buf->salt[salt_offset];
3336 				newbie->ipsa_nonce = newbie->ipsa_salt;
3337 				buf_ptr += newbie->ipsa_encrkeylen;
3338 				bcopy(buf_ptr, newbie->ipsa_salt,
3339 				    newbie->ipsa_saltlen);
3340 			}
3341 			/*
3342 			 * The IV for CCM/GCM mode increments, it should not
3343 			 * repeat. Get a random value for the IV, make a
3344 			 * copy, the SA will expire when/if the IV ever
3345 			 * wraps back to the initial value. If an Initial IV
3346 			 * is passed in via PF_KEY, save this in the SA.
3347 			 * Initialising IV for inbound is pointless as its
3348 			 * taken from the inbound packet.
3349 			 */
3350 			if (!is_inbound) {
3351 				if (ekey->sadb_key_reserved != 0) {
3352 					buf_ptr += newbie->ipsa_saltlen;
3353 					bcopy(buf_ptr, (uint8_t *)newbie->
3354 					    ipsa_iv, SADB_1TO8(ekey->
3355 					    sadb_key_reserved));
3356 				} else {
3357 					(void) random_get_pseudo_bytes(
3358 					    (uint8_t *)newbie->ipsa_iv,
3359 					    newbie->ipsa_iv_len);
3360 				}
3361 				newbie->ipsa_iv_softexpire =
3362 				    (*newbie->ipsa_iv) << 9;
3363 				newbie->ipsa_iv_hardexpire = *newbie->ipsa_iv;
3364 			}
3365 		}
3366 		bzero((ekey + 1), SADB_1TO8(ekey->sadb_key_bits));
3367 
3368 		/*
3369 		 * Pre-initialize the kernel crypto framework key
3370 		 * structure.
3371 		 */
3372 		newbie->ipsa_kcfencrkey.ck_format = CRYPTO_KEY_RAW;
3373 		newbie->ipsa_kcfencrkey.ck_length = newbie->ipsa_encrkeybits;
3374 		newbie->ipsa_kcfencrkey.ck_data = newbie->ipsa_encrkey;
3375 
3376 		mutex_enter(&ipss->ipsec_alg_lock);
3377 		error = ipsec_create_ctx_tmpl(newbie, IPSEC_ALG_ENCR);
3378 		mutex_exit(&ipss->ipsec_alg_lock);
3379 		if (error != 0) {
3380 			mutex_exit(&newbie->ipsa_lock);
3381 			/* See above for error explanation. */
3382 			*diagnostic = SADB_X_DIAGNOSTIC_BAD_CTX;
3383 			goto error;
3384 		}
3385 	}
3386 
3387 	if (async)
3388 		newbie->ipsa_flags |= IPSA_F_ASYNC;
3389 
3390 	/*
3391 	 * Ptrs to processing functions.
3392 	 */
3393 	if (newbie->ipsa_type == SADB_SATYPE_ESP)
3394 		ipsecesp_init_funcs(newbie);
3395 	else
3396 		ipsecah_init_funcs(newbie);
3397 	ASSERT(newbie->ipsa_output_func != NULL &&
3398 	    newbie->ipsa_input_func != NULL);
3399 
3400 	/*
3401 	 * Certificate ID stuff.
3402 	 */
3403 	if (ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC] != NULL) {
3404 		sadb_ident_t *id =
3405 		    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC];
3406 
3407 		/*
3408 		 * Can assume strlen() will return okay because ext_check() in
3409 		 * keysock.c prepares the string for us.
3410 		 */
3411 		newbie->ipsa_src_cid = ipsid_lookup(id->sadb_ident_type,
3412 		    (char *)(id+1), ns);
3413 		if (newbie->ipsa_src_cid == NULL) {
3414 			error = ENOMEM;
3415 			mutex_exit(&newbie->ipsa_lock);
3416 			goto error;
3417 		}
3418 	}
3419 
3420 	if (ksi->ks_in_extv[SADB_EXT_IDENTITY_DST] != NULL) {
3421 		sadb_ident_t *id =
3422 		    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_DST];
3423 
3424 		/*
3425 		 * Can assume strlen() will return okay because ext_check() in
3426 		 * keysock.c prepares the string for us.
3427 		 */
3428 		newbie->ipsa_dst_cid = ipsid_lookup(id->sadb_ident_type,
3429 		    (char *)(id+1), ns);
3430 		if (newbie->ipsa_dst_cid == NULL) {
3431 			error = ENOMEM;
3432 			mutex_exit(&newbie->ipsa_lock);
3433 			goto error;
3434 		}
3435 	}
3436 
3437 	/*
3438 	 * sensitivity label handling code:
3439 	 * Convert sens + bitmap into cred_t, and associate it
3440 	 * with the new SA.
3441 	 */
3442 	if (sens != NULL) {
3443 		uint64_t *bitmap = (uint64_t *)(sens + 1);
3444 
3445 		newbie->ipsa_tsl = sadb_label_from_sens(sens, bitmap);
3446 	}
3447 
3448 	/*
3449 	 * Likewise for outer sensitivity.
3450 	 */
3451 	if (osens != NULL) {
3452 		uint64_t *bitmap = (uint64_t *)(osens + 1);
3453 		ts_label_t *tsl, *effective_tsl;
3454 		uint32_t *peer_addr_ptr;
3455 		zoneid_t zoneid = GLOBAL_ZONEID;
3456 		zone_t *zone;
3457 
3458 		peer_addr_ptr = is_inbound ? src_addr_ptr : dst_addr_ptr;
3459 
3460 		tsl = sadb_label_from_sens(osens, bitmap);
3461 		newbie->ipsa_mac_exempt = CONN_MAC_DEFAULT;
3462 
3463 		if (osens->sadb_x_sens_flags & SADB_X_SENS_IMPLICIT) {
3464 			newbie->ipsa_mac_exempt = CONN_MAC_IMPLICIT;
3465 		}
3466 
3467 		error = tsol_check_dest(tsl, peer_addr_ptr,
3468 		    (af == AF_INET6)?IPV6_VERSION:IPV4_VERSION,
3469 		    newbie->ipsa_mac_exempt, B_TRUE, &effective_tsl);
3470 		if (error != 0) {
3471 			label_rele(tsl);
3472 			mutex_exit(&newbie->ipsa_lock);
3473 			goto error;
3474 		}
3475 
3476 		if (effective_tsl != NULL) {
3477 			label_rele(tsl);
3478 			tsl = effective_tsl;
3479 		}
3480 
3481 		newbie->ipsa_otsl = tsl;
3482 
3483 		zone = zone_find_by_label(tsl);
3484 		if (zone != NULL) {
3485 			zoneid = zone->zone_id;
3486 			zone_rele(zone);
3487 		}
3488 		/*
3489 		 * For exclusive stacks we set the zoneid to zero to operate
3490 		 * as if in the global zone for tsol_compute_label_v4/v6
3491 		 */
3492 		if (ipst->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID)
3493 			zoneid = GLOBAL_ZONEID;
3494 
3495 		if (af == AF_INET6) {
3496 			error = tsol_compute_label_v6(tsl, zoneid,
3497 			    (in6_addr_t *)peer_addr_ptr,
3498 			    newbie->ipsa_opt_storage, ipst);
3499 		} else {
3500 			error = tsol_compute_label_v4(tsl, zoneid,
3501 			    *peer_addr_ptr, newbie->ipsa_opt_storage, ipst);
3502 		}
3503 		if (error != 0) {
3504 			mutex_exit(&newbie->ipsa_lock);
3505 			goto error;
3506 		}
3507 	}
3508 
3509 
3510 	if (replayext != NULL) {
3511 		if ((replayext->sadb_x_rc_replay32 == 0) &&
3512 		    (replayext->sadb_x_rc_replay64 != 0)) {
3513 			error = EOPNOTSUPP;
3514 			*diagnostic = SADB_X_DIAGNOSTIC_INVALID_REPLAY;
3515 			mutex_exit(&newbie->ipsa_lock);
3516 			goto error;
3517 		}
3518 		newbie->ipsa_replay = replayext->sadb_x_rc_replay32;
3519 	}
3520 
3521 	/* now that the SA has been updated, set its new state */
3522 	newbie->ipsa_state = assoc->sadb_sa_state;
3523 
3524 	if (clone) {
3525 		newbie->ipsa_haspeer = B_TRUE;
3526 	} else {
3527 		if (!is_inbound) {
3528 			lifetime_fuzz(newbie);
3529 		}
3530 	}
3531 	/*
3532 	 * The less locks I hold when doing an insertion and possible cloning,
3533 	 * the better!
3534 	 */
3535 	mutex_exit(&newbie->ipsa_lock);
3536 
3537 	if (clone) {
3538 		newbie_clone = sadb_cloneassoc(newbie);
3539 
3540 		if (newbie_clone == NULL) {
3541 			error = ENOMEM;
3542 			goto error;
3543 		}
3544 	}
3545 
3546 	/*
3547 	 * Enter the bucket locks.  The order of entry is outbound,
3548 	 * inbound.  We map "primary" and "secondary" into outbound and inbound
3549 	 * based on the destination address type.  If the destination address
3550 	 * type is for a node that isn't mine (or potentially mine), the
3551 	 * "primary" bucket is the outbound one.
3552 	 */
3553 	if (!is_inbound) {
3554 		/* primary == outbound */
3555 		mutex_enter(&primary->isaf_lock);
3556 		mutex_enter(&secondary->isaf_lock);
3557 	} else {
3558 		/* primary == inbound */
3559 		mutex_enter(&secondary->isaf_lock);
3560 		mutex_enter(&primary->isaf_lock);
3561 	}
3562 
3563 	/*
3564 	 * sadb_insertassoc() doesn't increment the reference
3565 	 * count.  We therefore have to increment the
3566 	 * reference count one more time to reflect the
3567 	 * pointers of the table that reference this SA.
3568 	 */
3569 	IPSA_REFHOLD(newbie);
3570 
3571 	if (isupdate) {
3572 		/*
3573 		 * Unlink from larval holding cell in the "inbound" fanout.
3574 		 */
3575 		ASSERT(newbie->ipsa_linklock == &primary->isaf_lock ||
3576 		    newbie->ipsa_linklock == &secondary->isaf_lock);
3577 		sadb_unlinkassoc(newbie);
3578 	}
3579 
3580 	mutex_enter(&newbie->ipsa_lock);
3581 	error = sadb_insertassoc(newbie, primary);
3582 	mutex_exit(&newbie->ipsa_lock);
3583 
3584 	if (error != 0) {
3585 		/*
3586 		 * Since sadb_insertassoc() failed, we must decrement the
3587 		 * refcount again so the cleanup code will actually free
3588 		 * the offending SA.
3589 		 */
3590 		IPSA_REFRELE(newbie);
3591 		goto error_unlock;
3592 	}
3593 
3594 	if (newbie_clone != NULL) {
3595 		mutex_enter(&newbie_clone->ipsa_lock);
3596 		error = sadb_insertassoc(newbie_clone, secondary);
3597 		mutex_exit(&newbie_clone->ipsa_lock);
3598 		if (error != 0) {
3599 			/* Collision in secondary table. */
3600 			sadb_unlinkassoc(newbie);  /* This does REFRELE. */
3601 			goto error_unlock;
3602 		}
3603 		IPSA_REFHOLD(newbie_clone);
3604 	} else {
3605 		ASSERT(primary != secondary);
3606 		scratch = ipsec_getassocbyspi(secondary, newbie->ipsa_spi,
3607 		    ALL_ZEROES_PTR, newbie->ipsa_dstaddr, af);
3608 		if (scratch != NULL) {
3609 			/* Collision in secondary table. */
3610 			sadb_unlinkassoc(newbie);  /* This does REFRELE. */
3611 			/* Set the error, since ipsec_getassocbyspi() can't. */
3612 			error = EEXIST;
3613 			goto error_unlock;
3614 		}
3615 	}
3616 
3617 	/* OKAY!  So let's do some reality check assertions. */
3618 
3619 	ASSERT(MUTEX_NOT_HELD(&newbie->ipsa_lock));
3620 	ASSERT(newbie_clone == NULL ||
3621 	    (MUTEX_NOT_HELD(&newbie_clone->ipsa_lock)));
3622 
3623 error_unlock:
3624 
3625 	/*
3626 	 * We can exit the locks in any order.	Only entrance needs to
3627 	 * follow any protocol.
3628 	 */
3629 	mutex_exit(&secondary->isaf_lock);
3630 	mutex_exit(&primary->isaf_lock);
3631 
3632 	if (pair_ext != NULL && error == 0) {
3633 		/* update pair_spi if it exists. */
3634 		ipsa_query_t sq;
3635 
3636 		sq.spp = spp;		/* XXX param */
3637 		error = sadb_form_query(ksi, IPSA_Q_DST, IPSA_Q_SRC|IPSA_Q_DST|
3638 		    IPSA_Q_SA|IPSA_Q_INBOUND|IPSA_Q_OUTBOUND, &sq, diagnostic);
3639 		if (error)
3640 			return (error);
3641 
3642 		error = get_ipsa_pair(&sq, &ipsapp, diagnostic);
3643 
3644 		if (error != 0)
3645 			goto error;
3646 
3647 		if (ipsapp.ipsap_psa_ptr != NULL) {
3648 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_ALREADY;
3649 			error = EINVAL;
3650 		} else {
3651 			/* update_pairing() sets diagnostic */
3652 			error = update_pairing(&ipsapp, &sq, ksi, diagnostic);
3653 		}
3654 	}
3655 	/* Common error point for this routine. */
3656 error:
3657 	if (newbie != NULL) {
3658 		if (error != 0) {
3659 			/* This SA is broken, let the reaper clean up. */
3660 			mutex_enter(&newbie->ipsa_lock);
3661 			newbie->ipsa_state = IPSA_STATE_DEAD;
3662 			newbie->ipsa_hardexpiretime = 1;
3663 			mutex_exit(&newbie->ipsa_lock);
3664 		}
3665 		IPSA_REFRELE(newbie);
3666 	}
3667 	if (newbie_clone != NULL) {
3668 		IPSA_REFRELE(newbie_clone);
3669 	}
3670 
3671 	if (error == 0) {
3672 		/*
3673 		 * Construct favorable PF_KEY return message and send to
3674 		 * keysock. Update the flags in the original keysock message
3675 		 * to reflect the actual flags in the new SA.
3676 		 *  (Q:  Do I need to pass "newbie"?  If I do,
3677 		 * make sure to REFHOLD, call, then REFRELE.)
3678 		 */
3679 		assoc->sadb_sa_flags = newbie->ipsa_flags;
3680 		sadb_pfkey_echo(pfkey_q, mp, samsg, ksi, NULL);
3681 	}
3682 
3683 	destroy_ipsa_pair(&ipsapp);
3684 	return (error);
3685 }
3686 
3687 /*
3688  * Set the time of first use for a security association.  Update any
3689  * expiration times as a result.
3690  */
3691 void
3692 sadb_set_usetime(ipsa_t *assoc)
3693 {
3694 	time_t snapshot = gethrestime_sec();
3695 
3696 	mutex_enter(&assoc->ipsa_lock);
3697 	assoc->ipsa_lastuse = snapshot;
3698 	assoc->ipsa_idleexpiretime = snapshot + assoc->ipsa_idletime;
3699 
3700 	/*
3701 	 * Caller does check usetime before calling me usually, and
3702 	 * double-checking is better than a mutex_enter/exit hit.
3703 	 */
3704 	if (assoc->ipsa_usetime == 0) {
3705 		/*
3706 		 * This is redundant for outbound SA's, as
3707 		 * ipsec_getassocbyconn() sets the IPSA_F_USED flag already.
3708 		 * Inbound SAs, however, have no such protection.
3709 		 */
3710 		assoc->ipsa_flags |= IPSA_F_USED;
3711 		assoc->ipsa_usetime = snapshot;
3712 
3713 		/*
3714 		 * After setting the use time, see if we have a use lifetime
3715 		 * that would cause the actual SA expiration time to shorten.
3716 		 */
3717 		UPDATE_EXPIRE(assoc, softuselt, softexpiretime);
3718 		UPDATE_EXPIRE(assoc, harduselt, hardexpiretime);
3719 	}
3720 	mutex_exit(&assoc->ipsa_lock);
3721 }
3722 
3723 /*
3724  * Send up a PF_KEY expire message for this association.
3725  */
3726 static void
3727 sadb_expire_assoc(queue_t *pfkey_q, ipsa_t *assoc)
3728 {
3729 	mblk_t *mp, *mp1;
3730 	int alloclen, af;
3731 	sadb_msg_t *samsg;
3732 	sadb_lifetime_t *current, *expire;
3733 	sadb_sa_t *saext;
3734 	uint8_t *end;
3735 	boolean_t tunnel_mode;
3736 
3737 	ASSERT(MUTEX_HELD(&assoc->ipsa_lock));
3738 
3739 	/* Don't bother sending if there's no queue. */
3740 	if (pfkey_q == NULL)
3741 		return;
3742 
3743 	mp = sadb_keysock_out(0);
3744 	if (mp == NULL) {
3745 		/* cmn_err(CE_WARN, */
3746 		/*	"sadb_expire_assoc: Can't allocate KEYSOCK_OUT.\n"); */
3747 		return;
3748 	}
3749 
3750 	alloclen = sizeof (*samsg) + sizeof (*current) + sizeof (*expire) +
3751 	    2 * sizeof (sadb_address_t) + sizeof (*saext);
3752 
3753 	af = assoc->ipsa_addrfam;
3754 	switch (af) {
3755 	case AF_INET:
3756 		alloclen += 2 * sizeof (struct sockaddr_in);
3757 		break;
3758 	case AF_INET6:
3759 		alloclen += 2 * sizeof (struct sockaddr_in6);
3760 		break;
3761 	default:
3762 		/* Won't happen unless there's a kernel bug. */
3763 		freeb(mp);
3764 		cmn_err(CE_WARN,
3765 		    "sadb_expire_assoc: Unknown address length.\n");
3766 		return;
3767 	}
3768 
3769 	tunnel_mode = (assoc->ipsa_flags & IPSA_F_TUNNEL);
3770 	if (tunnel_mode) {
3771 		alloclen += 2 * sizeof (sadb_address_t);
3772 		switch (assoc->ipsa_innerfam) {
3773 		case AF_INET:
3774 			alloclen += 2 * sizeof (struct sockaddr_in);
3775 			break;
3776 		case AF_INET6:
3777 			alloclen += 2 * sizeof (struct sockaddr_in6);
3778 			break;
3779 		default:
3780 			/* Won't happen unless there's a kernel bug. */
3781 			freeb(mp);
3782 			cmn_err(CE_WARN, "sadb_expire_assoc: "
3783 			    "Unknown inner address length.\n");
3784 			return;
3785 		}
3786 	}
3787 
3788 	mp->b_cont = allocb(alloclen, BPRI_HI);
3789 	if (mp->b_cont == NULL) {
3790 		freeb(mp);
3791 		/* cmn_err(CE_WARN, */
3792 		/*	"sadb_expire_assoc: Can't allocate message.\n"); */
3793 		return;
3794 	}
3795 
3796 	mp1 = mp;
3797 	mp = mp->b_cont;
3798 	end = mp->b_wptr + alloclen;
3799 
3800 	samsg = (sadb_msg_t *)mp->b_wptr;
3801 	mp->b_wptr += sizeof (*samsg);
3802 	samsg->sadb_msg_version = PF_KEY_V2;
3803 	samsg->sadb_msg_type = SADB_EXPIRE;
3804 	samsg->sadb_msg_errno = 0;
3805 	samsg->sadb_msg_satype = assoc->ipsa_type;
3806 	samsg->sadb_msg_len = SADB_8TO64(alloclen);
3807 	samsg->sadb_msg_reserved = 0;
3808 	samsg->sadb_msg_seq = 0;
3809 	samsg->sadb_msg_pid = 0;
3810 
3811 	saext = (sadb_sa_t *)mp->b_wptr;
3812 	mp->b_wptr += sizeof (*saext);
3813 	saext->sadb_sa_len = SADB_8TO64(sizeof (*saext));
3814 	saext->sadb_sa_exttype = SADB_EXT_SA;
3815 	saext->sadb_sa_spi = assoc->ipsa_spi;
3816 	saext->sadb_sa_replay = assoc->ipsa_replay_wsize;
3817 	saext->sadb_sa_state = assoc->ipsa_state;
3818 	saext->sadb_sa_auth = assoc->ipsa_auth_alg;
3819 	saext->sadb_sa_encrypt = assoc->ipsa_encr_alg;
3820 	saext->sadb_sa_flags = assoc->ipsa_flags;
3821 
3822 	current = (sadb_lifetime_t *)mp->b_wptr;
3823 	mp->b_wptr += sizeof (sadb_lifetime_t);
3824 	current->sadb_lifetime_len = SADB_8TO64(sizeof (*current));
3825 	current->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT;
3826 	/* We do not support the concept. */
3827 	current->sadb_lifetime_allocations = 0;
3828 	current->sadb_lifetime_bytes = assoc->ipsa_bytes;
3829 	current->sadb_lifetime_addtime = assoc->ipsa_addtime;
3830 	current->sadb_lifetime_usetime = assoc->ipsa_usetime;
3831 
3832 	expire = (sadb_lifetime_t *)mp->b_wptr;
3833 	mp->b_wptr += sizeof (*expire);
3834 	expire->sadb_lifetime_len = SADB_8TO64(sizeof (*expire));
3835 
3836 	if (assoc->ipsa_state == IPSA_STATE_DEAD) {
3837 		expire->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD;
3838 		expire->sadb_lifetime_allocations = assoc->ipsa_hardalloc;
3839 		expire->sadb_lifetime_bytes = assoc->ipsa_hardbyteslt;
3840 		expire->sadb_lifetime_addtime = assoc->ipsa_hardaddlt;
3841 		expire->sadb_lifetime_usetime = assoc->ipsa_harduselt;
3842 	} else if (assoc->ipsa_state == IPSA_STATE_DYING) {
3843 		expire->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT;
3844 		expire->sadb_lifetime_allocations = assoc->ipsa_softalloc;
3845 		expire->sadb_lifetime_bytes = assoc->ipsa_softbyteslt;
3846 		expire->sadb_lifetime_addtime = assoc->ipsa_softaddlt;
3847 		expire->sadb_lifetime_usetime = assoc->ipsa_softuselt;
3848 	} else {
3849 		ASSERT(assoc->ipsa_state == IPSA_STATE_MATURE);
3850 		expire->sadb_lifetime_exttype = SADB_X_EXT_LIFETIME_IDLE;
3851 		expire->sadb_lifetime_allocations = 0;
3852 		expire->sadb_lifetime_bytes = 0;
3853 		expire->sadb_lifetime_addtime = assoc->ipsa_idleaddlt;
3854 		expire->sadb_lifetime_usetime = assoc->ipsa_idleuselt;
3855 	}
3856 
3857 	mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end, SADB_EXT_ADDRESS_SRC,
3858 	    af, assoc->ipsa_srcaddr, tunnel_mode ? 0 : SA_SRCPORT(assoc),
3859 	    SA_PROTO(assoc), 0);
3860 	ASSERT(mp->b_wptr != NULL);
3861 
3862 	mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end, SADB_EXT_ADDRESS_DST,
3863 	    af, assoc->ipsa_dstaddr, tunnel_mode ? 0 : SA_DSTPORT(assoc),
3864 	    SA_PROTO(assoc), 0);
3865 	ASSERT(mp->b_wptr != NULL);
3866 
3867 	if (tunnel_mode) {
3868 		mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end,
3869 		    SADB_X_EXT_ADDRESS_INNER_SRC, assoc->ipsa_innerfam,
3870 		    assoc->ipsa_innersrc, SA_SRCPORT(assoc), SA_IPROTO(assoc),
3871 		    assoc->ipsa_innersrcpfx);
3872 		ASSERT(mp->b_wptr != NULL);
3873 		mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end,
3874 		    SADB_X_EXT_ADDRESS_INNER_DST, assoc->ipsa_innerfam,
3875 		    assoc->ipsa_innerdst, SA_DSTPORT(assoc), SA_IPROTO(assoc),
3876 		    assoc->ipsa_innerdstpfx);
3877 		ASSERT(mp->b_wptr != NULL);
3878 	}
3879 
3880 	/* Can just putnext, we're ready to go! */
3881 	putnext(pfkey_q, mp1);
3882 }
3883 
3884 /*
3885  * "Age" the SA with the number of bytes that was used to protect traffic.
3886  * Send an SADB_EXPIRE message if appropriate.	Return B_TRUE if there was
3887  * enough "charge" left in the SA to protect the data.	Return B_FALSE
3888  * otherwise.  (If B_FALSE is returned, the association either was, or became
3889  * DEAD.)
3890  */
3891 boolean_t
3892 sadb_age_bytes(queue_t *pfkey_q, ipsa_t *assoc, uint64_t bytes,
3893     boolean_t sendmsg)
3894 {
3895 	boolean_t rc = B_TRUE;
3896 	uint64_t newtotal;
3897 
3898 	mutex_enter(&assoc->ipsa_lock);
3899 	newtotal = assoc->ipsa_bytes + bytes;
3900 	if (assoc->ipsa_hardbyteslt != 0 &&
3901 	    newtotal >= assoc->ipsa_hardbyteslt) {
3902 		if (assoc->ipsa_state != IPSA_STATE_DEAD) {
3903 			sadb_delete_cluster(assoc);
3904 			/*
3905 			 * Send EXPIRE message to PF_KEY.  May wish to pawn
3906 			 * this off on another non-interrupt thread.  Also
3907 			 * unlink this SA immediately.
3908 			 */
3909 			assoc->ipsa_state = IPSA_STATE_DEAD;
3910 			if (sendmsg)
3911 				sadb_expire_assoc(pfkey_q, assoc);
3912 			/*
3913 			 * Set non-zero expiration time so sadb_age_assoc()
3914 			 * will work when reaping.
3915 			 */
3916 			assoc->ipsa_hardexpiretime = (time_t)1;
3917 		} /* Else someone beat me to it! */
3918 		rc = B_FALSE;
3919 	} else if (assoc->ipsa_softbyteslt != 0 &&
3920 	    (newtotal >= assoc->ipsa_softbyteslt)) {
3921 		if (assoc->ipsa_state < IPSA_STATE_DYING) {
3922 			/*
3923 			 * Send EXPIRE message to PF_KEY.  May wish to pawn
3924 			 * this off on another non-interrupt thread.
3925 			 */
3926 			assoc->ipsa_state = IPSA_STATE_DYING;
3927 			assoc->ipsa_bytes = newtotal;
3928 			if (sendmsg)
3929 				sadb_expire_assoc(pfkey_q, assoc);
3930 		} /* Else someone beat me to it! */
3931 	}
3932 	if (rc == B_TRUE)
3933 		assoc->ipsa_bytes = newtotal;
3934 	mutex_exit(&assoc->ipsa_lock);
3935 	return (rc);
3936 }
3937 
3938 /*
3939  * "Torch" an individual SA.  Returns NULL, so it can be tail-called from
3940  *     sadb_age_assoc().
3941  */
3942 static ipsa_t *
3943 sadb_torch_assoc(isaf_t *head, ipsa_t *sa)
3944 {
3945 	ASSERT(MUTEX_HELD(&head->isaf_lock));
3946 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
3947 	ASSERT(sa->ipsa_state == IPSA_STATE_DEAD);
3948 
3949 	/*
3950 	 * Force cached SAs to be revalidated..
3951 	 */
3952 	head->isaf_gen++;
3953 
3954 	mutex_exit(&sa->ipsa_lock);
3955 	sadb_unlinkassoc(sa);
3956 
3957 	return (NULL);
3958 }
3959 
3960 /*
3961  * Do various SA-is-idle activities depending on delta (the number of idle
3962  * seconds on the SA) and/or other properties of the SA.
3963  *
3964  * Return B_TRUE if I've sent a packet, because I have to drop the
3965  * association's mutex before sending a packet out the wire.
3966  */
3967 /* ARGSUSED */
3968 static boolean_t
3969 sadb_idle_activities(ipsa_t *assoc, time_t delta, boolean_t inbound)
3970 {
3971 	ipsecesp_stack_t *espstack = assoc->ipsa_netstack->netstack_ipsecesp;
3972 	int nat_t_interval = espstack->ipsecesp_nat_keepalive_interval;
3973 
3974 	ASSERT(MUTEX_HELD(&assoc->ipsa_lock));
3975 
3976 	if (!inbound && (assoc->ipsa_flags & IPSA_F_NATT_LOC) &&
3977 	    delta >= nat_t_interval &&
3978 	    gethrestime_sec() - assoc->ipsa_last_nat_t_ka >= nat_t_interval) {
3979 		ASSERT(assoc->ipsa_type == SADB_SATYPE_ESP);
3980 		assoc->ipsa_last_nat_t_ka = gethrestime_sec();
3981 		mutex_exit(&assoc->ipsa_lock);
3982 		ipsecesp_send_keepalive(assoc);
3983 		return (B_TRUE);
3984 	}
3985 	return (B_FALSE);
3986 }
3987 
3988 /*
3989  * Return "assoc" if haspeer is true and I send an expire.  This allows
3990  * the consumers' aging functions to tidy up an expired SA's peer.
3991  */
3992 static ipsa_t *
3993 sadb_age_assoc(isaf_t *head, queue_t *pfkey_q, ipsa_t *assoc,
3994     time_t current, int reap_delay, boolean_t inbound)
3995 {
3996 	ipsa_t *retval = NULL;
3997 	boolean_t dropped_mutex = B_FALSE;
3998 
3999 	ASSERT(MUTEX_HELD(&head->isaf_lock));
4000 
4001 	mutex_enter(&assoc->ipsa_lock);
4002 
4003 	if (((assoc->ipsa_state == IPSA_STATE_LARVAL) ||
4004 	    ((assoc->ipsa_state == IPSA_STATE_IDLE) ||
4005 	    (assoc->ipsa_state == IPSA_STATE_ACTIVE_ELSEWHERE) &&
4006 	    (assoc->ipsa_hardexpiretime != 0))) &&
4007 	    (assoc->ipsa_hardexpiretime <= current)) {
4008 		assoc->ipsa_state = IPSA_STATE_DEAD;
4009 		return (sadb_torch_assoc(head, assoc));
4010 	}
4011 
4012 	/*
4013 	 * Check lifetimes.  Fortunately, SA setup is done
4014 	 * such that there are only two times to look at,
4015 	 * softexpiretime, and hardexpiretime.
4016 	 *
4017 	 * Check hard first.
4018 	 */
4019 
4020 	if (assoc->ipsa_hardexpiretime != 0 &&
4021 	    assoc->ipsa_hardexpiretime <= current) {
4022 		if (assoc->ipsa_state == IPSA_STATE_DEAD)
4023 			return (sadb_torch_assoc(head, assoc));
4024 
4025 		if (inbound) {
4026 			sadb_delete_cluster(assoc);
4027 		}
4028 
4029 		/*
4030 		 * Send SADB_EXPIRE with hard lifetime, delay for unlinking.
4031 		 */
4032 		assoc->ipsa_state = IPSA_STATE_DEAD;
4033 		if (assoc->ipsa_haspeer || assoc->ipsa_otherspi != 0) {
4034 			/*
4035 			 * If the SA is paired or peered with another, put
4036 			 * a copy on a list which can be processed later, the
4037 			 * pair/peer SA needs to be updated so the both die
4038 			 * at the same time.
4039 			 *
4040 			 * If I return assoc, I have to bump up its reference
4041 			 * count to keep with the ipsa_t reference count
4042 			 * semantics.
4043 			 */
4044 			IPSA_REFHOLD(assoc);
4045 			retval = assoc;
4046 		}
4047 		sadb_expire_assoc(pfkey_q, assoc);
4048 		assoc->ipsa_hardexpiretime = current + reap_delay;
4049 	} else if (assoc->ipsa_softexpiretime != 0 &&
4050 	    assoc->ipsa_softexpiretime <= current &&
4051 	    assoc->ipsa_state < IPSA_STATE_DYING) {
4052 		/*
4053 		 * Send EXPIRE message to PF_KEY.  May wish to pawn
4054 		 * this off on another non-interrupt thread.
4055 		 */
4056 		assoc->ipsa_state = IPSA_STATE_DYING;
4057 		if (assoc->ipsa_haspeer) {
4058 			/*
4059 			 * If the SA has a peer, update the peer's state
4060 			 * on SOFT_EXPIRE, this is mostly to prevent two
4061 			 * expire messages from effectively the same SA.
4062 			 *
4063 			 * Don't care about paired SA's, then can (and should)
4064 			 * be able to soft expire at different times.
4065 			 *
4066 			 * If I return assoc, I have to bump up its
4067 			 * reference count to keep with the ipsa_t reference
4068 			 * count semantics.
4069 			 */
4070 			IPSA_REFHOLD(assoc);
4071 			retval = assoc;
4072 		}
4073 		sadb_expire_assoc(pfkey_q, assoc);
4074 	} else if (assoc->ipsa_idletime != 0 &&
4075 	    assoc->ipsa_idleexpiretime <= current) {
4076 		if (assoc->ipsa_state == IPSA_STATE_ACTIVE_ELSEWHERE) {
4077 			assoc->ipsa_state = IPSA_STATE_IDLE;
4078 		}
4079 
4080 		/*
4081 		 * Need to handle Mature case
4082 		 */
4083 		if (assoc->ipsa_state == IPSA_STATE_MATURE) {
4084 			sadb_expire_assoc(pfkey_q, assoc);
4085 		}
4086 	} else {
4087 		/* Check idle time activities. */
4088 		dropped_mutex = sadb_idle_activities(assoc,
4089 		    current - assoc->ipsa_lastuse, inbound);
4090 	}
4091 
4092 	if (!dropped_mutex)
4093 		mutex_exit(&assoc->ipsa_lock);
4094 	return (retval);
4095 }
4096 
4097 /*
4098  * Called by a consumer protocol to do ther dirty work of reaping dead
4099  * Security Associations.
4100  *
4101  * NOTE: sadb_age_assoc() marks expired SA's as DEAD but only removed
4102  * SA's that are already marked DEAD, so expired SA's are only reaped
4103  * the second time sadb_ager() runs.
4104  */
4105 void
4106 sadb_ager(sadb_t *sp, queue_t *pfkey_q, int reap_delay, netstack_t *ns)
4107 {
4108 	int i;
4109 	isaf_t *bucket;
4110 	ipsa_t *assoc, *spare;
4111 	iacqf_t *acqlist;
4112 	ipsacq_t *acqrec, *spareacq;
4113 	templist_t *haspeerlist, *newbie;
4114 	/* Snapshot current time now. */
4115 	time_t current = gethrestime_sec();
4116 	haspeerlist = NULL;
4117 
4118 	/*
4119 	 * Do my dirty work.  This includes aging real entries, aging
4120 	 * larvals, and aging outstanding ACQUIREs.
4121 	 *
4122 	 * I hope I don't tie up resources for too long.
4123 	 */
4124 
4125 	/* Age acquires. */
4126 
4127 	for (i = 0; i < sp->sdb_hashsize; i++) {
4128 		acqlist = &sp->sdb_acq[i];
4129 		mutex_enter(&acqlist->iacqf_lock);
4130 		for (acqrec = acqlist->iacqf_ipsacq; acqrec != NULL;
4131 		    acqrec = spareacq) {
4132 			spareacq = acqrec->ipsacq_next;
4133 			if (current > acqrec->ipsacq_expire)
4134 				sadb_destroy_acquire(acqrec, ns);
4135 		}
4136 		mutex_exit(&acqlist->iacqf_lock);
4137 	}
4138 
4139 	/* Age inbound associations. */
4140 	for (i = 0; i < sp->sdb_hashsize; i++) {
4141 		bucket = &(sp->sdb_if[i]);
4142 		mutex_enter(&bucket->isaf_lock);
4143 		for (assoc = bucket->isaf_ipsa; assoc != NULL;
4144 		    assoc = spare) {
4145 			spare = assoc->ipsa_next;
4146 			if (sadb_age_assoc(bucket, pfkey_q, assoc, current,
4147 			    reap_delay, B_TRUE) != NULL) {
4148 				/*
4149 				 * Put SA's which have a peer or SA's which
4150 				 * are paired on a list for processing after
4151 				 * all the hash tables have been walked.
4152 				 *
4153 				 * sadb_age_assoc() increments the refcnt,
4154 				 * effectively doing an IPSA_REFHOLD().
4155 				 */
4156 				newbie = kmem_alloc(sizeof (*newbie),
4157 				    KM_NOSLEEP);
4158 				if (newbie == NULL) {
4159 					/*
4160 					 * Don't forget to REFRELE().
4161 					 */
4162 					IPSA_REFRELE(assoc);
4163 					continue;	/* for loop... */
4164 				}
4165 				newbie->next = haspeerlist;
4166 				newbie->ipsa = assoc;
4167 				haspeerlist = newbie;
4168 			}
4169 		}
4170 		mutex_exit(&bucket->isaf_lock);
4171 	}
4172 
4173 	age_pair_peer_list(haspeerlist, sp, B_FALSE);
4174 	haspeerlist = NULL;
4175 
4176 	/* Age outbound associations. */
4177 	for (i = 0; i < sp->sdb_hashsize; i++) {
4178 		bucket = &(sp->sdb_of[i]);
4179 		mutex_enter(&bucket->isaf_lock);
4180 		for (assoc = bucket->isaf_ipsa; assoc != NULL;
4181 		    assoc = spare) {
4182 			spare = assoc->ipsa_next;
4183 			if (sadb_age_assoc(bucket, pfkey_q, assoc, current,
4184 			    reap_delay, B_FALSE) != NULL) {
4185 				/*
4186 				 * sadb_age_assoc() increments the refcnt,
4187 				 * effectively doing an IPSA_REFHOLD().
4188 				 */
4189 				newbie = kmem_alloc(sizeof (*newbie),
4190 				    KM_NOSLEEP);
4191 				if (newbie == NULL) {
4192 					/*
4193 					 * Don't forget to REFRELE().
4194 					 */
4195 					IPSA_REFRELE(assoc);
4196 					continue;	/* for loop... */
4197 				}
4198 				newbie->next = haspeerlist;
4199 				newbie->ipsa = assoc;
4200 				haspeerlist = newbie;
4201 			}
4202 		}
4203 		mutex_exit(&bucket->isaf_lock);
4204 	}
4205 
4206 	age_pair_peer_list(haspeerlist, sp, B_TRUE);
4207 
4208 	/*
4209 	 * Run a GC pass to clean out dead identities.
4210 	 */
4211 	ipsid_gc(ns);
4212 }
4213 
4214 /*
4215  * Figure out when to reschedule the ager.
4216  */
4217 timeout_id_t
4218 sadb_retimeout(hrtime_t begin, queue_t *pfkey_q, void (*ager)(void *),
4219     void *agerarg, uint_t *intp, uint_t intmax, short mid)
4220 {
4221 	hrtime_t end = gethrtime();
4222 	uint_t interval = *intp;	/* "interval" is in ms. */
4223 
4224 	/*
4225 	 * See how long this took.  If it took too long, increase the
4226 	 * aging interval.
4227 	 */
4228 	if ((end - begin) > MSEC2NSEC(interval)) {
4229 		if (interval >= intmax) {
4230 			/* XXX Rate limit this?  Or recommend flush? */
4231 			(void) strlog(mid, 0, 0, SL_ERROR | SL_WARN,
4232 			    "Too many SA's to age out in %d msec.\n",
4233 			    intmax);
4234 		} else {
4235 			/* Double by shifting by one bit. */
4236 			interval <<= 1;
4237 			interval = min(interval, intmax);
4238 		}
4239 	} else if ((end - begin) <= (MSEC2NSEC(interval) / 2) &&
4240 	    interval > SADB_AGE_INTERVAL_DEFAULT) {
4241 		/*
4242 		 * If I took less than half of the interval, then I should
4243 		 * ratchet the interval back down.  Never automatically
4244 		 * shift below the default aging interval.
4245 		 *
4246 		 * NOTE:This even overrides manual setting of the age
4247 		 *	interval using NDD to lower the setting past the
4248 		 *	default.  In other words, if you set the interval
4249 		 *	lower than the default, and your SADB gets too big,
4250 		 *	the interval will only self-lower back to the default.
4251 		 */
4252 		/* Halve by shifting one bit. */
4253 		interval >>= 1;
4254 		interval = max(interval, SADB_AGE_INTERVAL_DEFAULT);
4255 	}
4256 	*intp = interval;
4257 	return (qtimeout(pfkey_q, ager, agerarg,
4258 	    drv_usectohz(interval * (MICROSEC / MILLISEC))));
4259 }
4260 
4261 
4262 /*
4263  * Update the lifetime values of an SA.	 This is the path an SADB_UPDATE
4264  * message takes when updating a MATURE or DYING SA.
4265  */
4266 static void
4267 sadb_update_lifetimes(ipsa_t *assoc, sadb_lifetime_t *hard,
4268     sadb_lifetime_t *soft, sadb_lifetime_t *idle, boolean_t outbound)
4269 {
4270 	mutex_enter(&assoc->ipsa_lock);
4271 
4272 	/*
4273 	 * XXX RFC 2367 mentions how an SADB_EXT_LIFETIME_CURRENT can be
4274 	 * passed in during an update message.	We currently don't handle
4275 	 * these.
4276 	 */
4277 
4278 	if (hard != NULL) {
4279 		if (hard->sadb_lifetime_bytes != 0)
4280 			assoc->ipsa_hardbyteslt = hard->sadb_lifetime_bytes;
4281 		if (hard->sadb_lifetime_usetime != 0)
4282 			assoc->ipsa_harduselt = hard->sadb_lifetime_usetime;
4283 		if (hard->sadb_lifetime_addtime != 0)
4284 			assoc->ipsa_hardaddlt = hard->sadb_lifetime_addtime;
4285 		if (assoc->ipsa_hardaddlt != 0) {
4286 			assoc->ipsa_hardexpiretime =
4287 			    assoc->ipsa_addtime + assoc->ipsa_hardaddlt;
4288 		}
4289 		if (assoc->ipsa_harduselt != 0 &&
4290 		    assoc->ipsa_flags & IPSA_F_USED) {
4291 			UPDATE_EXPIRE(assoc, harduselt, hardexpiretime);
4292 		}
4293 		if (hard->sadb_lifetime_allocations != 0)
4294 			assoc->ipsa_hardalloc = hard->sadb_lifetime_allocations;
4295 	}
4296 
4297 	if (soft != NULL) {
4298 		if (soft->sadb_lifetime_bytes != 0) {
4299 			if (soft->sadb_lifetime_bytes >
4300 			    assoc->ipsa_hardbyteslt) {
4301 				assoc->ipsa_softbyteslt =
4302 				    assoc->ipsa_hardbyteslt;
4303 			} else {
4304 				assoc->ipsa_softbyteslt =
4305 				    soft->sadb_lifetime_bytes;
4306 			}
4307 		}
4308 		if (soft->sadb_lifetime_usetime != 0) {
4309 			if (soft->sadb_lifetime_usetime >
4310 			    assoc->ipsa_harduselt) {
4311 				assoc->ipsa_softuselt =
4312 				    assoc->ipsa_harduselt;
4313 			} else {
4314 				assoc->ipsa_softuselt =
4315 				    soft->sadb_lifetime_usetime;
4316 			}
4317 		}
4318 		if (soft->sadb_lifetime_addtime != 0) {
4319 			if (soft->sadb_lifetime_addtime >
4320 			    assoc->ipsa_hardexpiretime) {
4321 				assoc->ipsa_softexpiretime =
4322 				    assoc->ipsa_hardexpiretime;
4323 			} else {
4324 				assoc->ipsa_softaddlt =
4325 				    soft->sadb_lifetime_addtime;
4326 			}
4327 		}
4328 		if (assoc->ipsa_softaddlt != 0) {
4329 			assoc->ipsa_softexpiretime =
4330 			    assoc->ipsa_addtime + assoc->ipsa_softaddlt;
4331 		}
4332 		if (assoc->ipsa_softuselt != 0 &&
4333 		    assoc->ipsa_flags & IPSA_F_USED) {
4334 			UPDATE_EXPIRE(assoc, softuselt, softexpiretime);
4335 		}
4336 		if (outbound && assoc->ipsa_softexpiretime != 0) {
4337 			if (assoc->ipsa_state == IPSA_STATE_MATURE)
4338 				lifetime_fuzz(assoc);
4339 		}
4340 
4341 		if (soft->sadb_lifetime_allocations != 0)
4342 			assoc->ipsa_softalloc = soft->sadb_lifetime_allocations;
4343 	}
4344 
4345 	if (idle != NULL) {
4346 		time_t current = gethrestime_sec();
4347 		if ((assoc->ipsa_idleexpiretime <= current) &&
4348 		    (assoc->ipsa_idleaddlt == idle->sadb_lifetime_addtime)) {
4349 			assoc->ipsa_idleexpiretime =
4350 			    current + assoc->ipsa_idleaddlt;
4351 		}
4352 		if (idle->sadb_lifetime_addtime != 0)
4353 			assoc->ipsa_idleaddlt = idle->sadb_lifetime_addtime;
4354 		if (idle->sadb_lifetime_usetime != 0)
4355 			assoc->ipsa_idleuselt = idle->sadb_lifetime_usetime;
4356 		if (assoc->ipsa_idleaddlt != 0) {
4357 			assoc->ipsa_idleexpiretime =
4358 			    current + idle->sadb_lifetime_addtime;
4359 			assoc->ipsa_idletime = idle->sadb_lifetime_addtime;
4360 		}
4361 		if (assoc->ipsa_idleuselt != 0) {
4362 			if (assoc->ipsa_idletime != 0) {
4363 				assoc->ipsa_idletime = min(assoc->ipsa_idletime,
4364 				    assoc->ipsa_idleuselt);
4365 			assoc->ipsa_idleexpiretime =
4366 			    current + assoc->ipsa_idletime;
4367 			} else {
4368 				assoc->ipsa_idleexpiretime =
4369 				    current + assoc->ipsa_idleuselt;
4370 				assoc->ipsa_idletime = assoc->ipsa_idleuselt;
4371 			}
4372 		}
4373 	}
4374 	mutex_exit(&assoc->ipsa_lock);
4375 }
4376 
4377 static int
4378 sadb_update_state(ipsa_t *assoc, uint_t new_state, mblk_t **ipkt_lst)
4379 {
4380 	int rcode = 0;
4381 	time_t current = gethrestime_sec();
4382 
4383 	mutex_enter(&assoc->ipsa_lock);
4384 
4385 	switch (new_state) {
4386 	case SADB_X_SASTATE_ACTIVE_ELSEWHERE:
4387 		if (assoc->ipsa_state == SADB_X_SASTATE_IDLE) {
4388 			assoc->ipsa_state = IPSA_STATE_ACTIVE_ELSEWHERE;
4389 			assoc->ipsa_idleexpiretime =
4390 			    current + assoc->ipsa_idletime;
4391 		}
4392 		break;
4393 	case SADB_X_SASTATE_IDLE:
4394 		if (assoc->ipsa_state == SADB_X_SASTATE_ACTIVE_ELSEWHERE) {
4395 			assoc->ipsa_state = IPSA_STATE_IDLE;
4396 			assoc->ipsa_idleexpiretime =
4397 			    current + assoc->ipsa_idletime;
4398 		} else {
4399 			rcode = EINVAL;
4400 		}
4401 		break;
4402 
4403 	case SADB_X_SASTATE_ACTIVE:
4404 		if (assoc->ipsa_state != SADB_X_SASTATE_IDLE) {
4405 			rcode = EINVAL;
4406 			break;
4407 		}
4408 		assoc->ipsa_state = IPSA_STATE_MATURE;
4409 		assoc->ipsa_idleexpiretime = current + assoc->ipsa_idletime;
4410 
4411 		if (ipkt_lst == NULL) {
4412 			break;
4413 		}
4414 
4415 		if (assoc->ipsa_bpkt_head != NULL) {
4416 			*ipkt_lst = assoc->ipsa_bpkt_head;
4417 			assoc->ipsa_bpkt_head = assoc->ipsa_bpkt_tail = NULL;
4418 			assoc->ipsa_mblkcnt = 0;
4419 		} else {
4420 			*ipkt_lst = NULL;
4421 		}
4422 		break;
4423 	default:
4424 		rcode = EINVAL;
4425 		break;
4426 	}
4427 
4428 	mutex_exit(&assoc->ipsa_lock);
4429 	return (rcode);
4430 }
4431 
4432 /*
4433  * Check a proposed KMC update for sanity.
4434  */
4435 static int
4436 sadb_check_kmc(ipsa_query_t *sq, ipsa_t *sa, int *diagnostic)
4437 {
4438 	uint32_t kmp = sq->kmp;
4439 	uint32_t kmc = sq->kmc;
4440 
4441 	if (sa == NULL)
4442 		return (0);
4443 
4444 	if (sa->ipsa_state == IPSA_STATE_DEAD)
4445 		return (ESRCH);	/* DEAD == Not there, in this case. */
4446 
4447 	if ((kmp != 0) && ((sa->ipsa_kmp != 0) || (sa->ipsa_kmp != kmp))) {
4448 		*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMP;
4449 		return (EINVAL);
4450 	}
4451 
4452 	if ((kmc != 0) && ((sa->ipsa_kmc != 0) || (sa->ipsa_kmc != kmc))) {
4453 		*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMC;
4454 		return (EINVAL);
4455 	}
4456 
4457 	return (0);
4458 }
4459 
4460 /*
4461  * Actually update the KMC info.
4462  */
4463 static void
4464 sadb_update_kmc(ipsa_query_t *sq, ipsa_t *sa)
4465 {
4466 	uint32_t kmp = sq->kmp;
4467 	uint32_t kmc = sq->kmc;
4468 
4469 	if (kmp != 0)
4470 		sa->ipsa_kmp = kmp;
4471 	if (kmc != 0)
4472 		sa->ipsa_kmc = kmc;
4473 }
4474 
4475 /*
4476  * Common code to update an SA.
4477  */
4478 
4479 int
4480 sadb_update_sa(mblk_t *mp, keysock_in_t *ksi, mblk_t **ipkt_lst,
4481     sadbp_t *spp, int *diagnostic, queue_t *pfkey_q,
4482     int (*add_sa_func)(mblk_t *, keysock_in_t *, int *, netstack_t *),
4483     netstack_t *ns, uint8_t sadb_msg_type)
4484 {
4485 	sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
4486 	sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT];
4487 	sadb_x_replay_ctr_t *replext =
4488 	    (sadb_x_replay_ctr_t *)ksi->ks_in_extv[SADB_X_EXT_REPLAY_VALUE];
4489 	sadb_lifetime_t *soft =
4490 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT];
4491 	sadb_lifetime_t *hard =
4492 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD];
4493 	sadb_lifetime_t *idle =
4494 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_X_EXT_LIFETIME_IDLE];
4495 	sadb_x_pair_t *pair_ext =
4496 	    (sadb_x_pair_t *)ksi->ks_in_extv[SADB_X_EXT_PAIR];
4497 	ipsa_t *echo_target = NULL;
4498 	ipsap_t ipsapp;
4499 	ipsa_query_t sq;
4500 	time_t current = gethrestime_sec();
4501 
4502 	sq.spp = spp;		/* XXX param */
4503 	int error = sadb_form_query(ksi, IPSA_Q_SRC|IPSA_Q_DST|IPSA_Q_SA,
4504 	    IPSA_Q_SRC|IPSA_Q_DST|IPSA_Q_SA|IPSA_Q_INBOUND|IPSA_Q_OUTBOUND,
4505 	    &sq, diagnostic);
4506 
4507 	if (error != 0)
4508 		return (error);
4509 
4510 	error = get_ipsa_pair(&sq, &ipsapp, diagnostic);
4511 	if (error != 0)
4512 		return (error);
4513 
4514 	if (ipsapp.ipsap_psa_ptr == NULL && ipsapp.ipsap_sa_ptr != NULL) {
4515 		if (ipsapp.ipsap_sa_ptr->ipsa_state == IPSA_STATE_LARVAL) {
4516 			/*
4517 			 * REFRELE the target and let the add_sa_func()
4518 			 * deal with updating a larval SA.
4519 			 */
4520 			destroy_ipsa_pair(&ipsapp);
4521 			return (add_sa_func(mp, ksi, diagnostic, ns));
4522 		}
4523 	}
4524 
4525 	/*
4526 	 * At this point we have an UPDATE to a MATURE SA. There should
4527 	 * not be any keying material present.
4528 	 */
4529 	if (akey != NULL) {
4530 		*diagnostic = SADB_X_DIAGNOSTIC_AKEY_PRESENT;
4531 		error = EINVAL;
4532 		goto bail;
4533 	}
4534 	if (ekey != NULL) {
4535 		*diagnostic = SADB_X_DIAGNOSTIC_EKEY_PRESENT;
4536 		error = EINVAL;
4537 		goto bail;
4538 	}
4539 
4540 	if (sq.assoc->sadb_sa_state == SADB_X_SASTATE_ACTIVE_ELSEWHERE) {
4541 		if (ipsapp.ipsap_sa_ptr != NULL &&
4542 		    ipsapp.ipsap_sa_ptr->ipsa_state == IPSA_STATE_IDLE) {
4543 			if ((error = sadb_update_state(ipsapp.ipsap_sa_ptr,
4544 			    sq.assoc->sadb_sa_state, NULL)) != 0) {
4545 				*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4546 				goto bail;
4547 			}
4548 		}
4549 		if (ipsapp.ipsap_psa_ptr != NULL &&
4550 		    ipsapp.ipsap_psa_ptr->ipsa_state == IPSA_STATE_IDLE) {
4551 			if ((error = sadb_update_state(ipsapp.ipsap_psa_ptr,
4552 			    sq.assoc->sadb_sa_state, NULL)) != 0) {
4553 				*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4554 				goto bail;
4555 			}
4556 		}
4557 	}
4558 	if (sq.assoc->sadb_sa_state == SADB_X_SASTATE_ACTIVE) {
4559 		if (ipsapp.ipsap_sa_ptr != NULL) {
4560 			error = sadb_update_state(ipsapp.ipsap_sa_ptr,
4561 			    sq.assoc->sadb_sa_state,
4562 			    (ipsapp.ipsap_sa_ptr->ipsa_flags &
4563 			    IPSA_F_INBOUND) ? ipkt_lst : NULL);
4564 			if (error) {
4565 				*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4566 				goto bail;
4567 			}
4568 		}
4569 		if (ipsapp.ipsap_psa_ptr != NULL) {
4570 			error = sadb_update_state(ipsapp.ipsap_psa_ptr,
4571 			    sq.assoc->sadb_sa_state,
4572 			    (ipsapp.ipsap_psa_ptr->ipsa_flags &
4573 			    IPSA_F_INBOUND) ? ipkt_lst : NULL);
4574 			if (error) {
4575 				*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4576 				goto bail;
4577 			}
4578 		}
4579 		sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr,
4580 		    ksi, echo_target);
4581 		goto bail;
4582 	}
4583 
4584 	/*
4585 	 * Reality checks for updates of active associations.
4586 	 * Sundry first-pass UPDATE-specific reality checks.
4587 	 * Have to do the checks here, because it's after the add_sa code.
4588 	 * XXX STATS : logging/stats here?
4589 	 */
4590 
4591 	if (!((sq.assoc->sadb_sa_state == SADB_SASTATE_MATURE) ||
4592 	    (sq.assoc->sadb_sa_state == SADB_X_SASTATE_ACTIVE_ELSEWHERE))) {
4593 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4594 		error = EINVAL;
4595 		goto bail;
4596 	}
4597 	if (sq.assoc->sadb_sa_flags & ~spp->s_updateflags) {
4598 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS;
4599 		error = EINVAL;
4600 		goto bail;
4601 	}
4602 	if (ksi->ks_in_extv[SADB_EXT_LIFETIME_CURRENT] != NULL) {
4603 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_LIFETIME;
4604 		error = EOPNOTSUPP;
4605 		goto bail;
4606 	}
4607 
4608 	if ((*diagnostic = sadb_hardsoftchk(hard, soft, idle)) != 0) {
4609 		error = EINVAL;
4610 		goto bail;
4611 	}
4612 
4613 	if ((*diagnostic = sadb_labelchk(ksi)) != 0)
4614 		return (EINVAL);
4615 
4616 	error = sadb_check_kmc(&sq, ipsapp.ipsap_sa_ptr, diagnostic);
4617 	if (error != 0)
4618 		goto bail;
4619 
4620 	error = sadb_check_kmc(&sq, ipsapp.ipsap_psa_ptr, diagnostic);
4621 	if (error != 0)
4622 		goto bail;
4623 
4624 
4625 	if (ipsapp.ipsap_sa_ptr != NULL) {
4626 		/*
4627 		 * Do not allow replay value change for MATURE or LARVAL SA.
4628 		 */
4629 
4630 		if ((replext != NULL) &&
4631 		    ((ipsapp.ipsap_sa_ptr->ipsa_state == IPSA_STATE_LARVAL) ||
4632 		    (ipsapp.ipsap_sa_ptr->ipsa_state == IPSA_STATE_MATURE))) {
4633 			*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4634 			error = EINVAL;
4635 			goto bail;
4636 		}
4637 	}
4638 
4639 
4640 	if (ipsapp.ipsap_sa_ptr != NULL) {
4641 		sadb_update_lifetimes(ipsapp.ipsap_sa_ptr, hard, soft,
4642 		    idle, B_TRUE);
4643 		sadb_update_kmc(&sq, ipsapp.ipsap_sa_ptr);
4644 		if ((replext != NULL) &&
4645 		    (ipsapp.ipsap_sa_ptr->ipsa_replay_wsize != 0)) {
4646 			/*
4647 			 * If an inbound SA, update the replay counter
4648 			 * and check off all the other sequence number
4649 			 */
4650 			if (ksi->ks_in_dsttype == KS_IN_ADDR_ME) {
4651 				if (!sadb_replay_check(ipsapp.ipsap_sa_ptr,
4652 				    replext->sadb_x_rc_replay32)) {
4653 					*diagnostic =
4654 					    SADB_X_DIAGNOSTIC_INVALID_REPLAY;
4655 					error = EINVAL;
4656 					goto bail;
4657 				}
4658 				mutex_enter(&ipsapp.ipsap_sa_ptr->ipsa_lock);
4659 				ipsapp.ipsap_sa_ptr->ipsa_idleexpiretime =
4660 				    current +
4661 				    ipsapp.ipsap_sa_ptr->ipsa_idletime;
4662 				mutex_exit(&ipsapp.ipsap_sa_ptr->ipsa_lock);
4663 			} else {
4664 				mutex_enter(&ipsapp.ipsap_sa_ptr->ipsa_lock);
4665 				ipsapp.ipsap_sa_ptr->ipsa_replay =
4666 				    replext->sadb_x_rc_replay32;
4667 				ipsapp.ipsap_sa_ptr->ipsa_idleexpiretime =
4668 				    current +
4669 				    ipsapp.ipsap_sa_ptr->ipsa_idletime;
4670 				mutex_exit(&ipsapp.ipsap_sa_ptr->ipsa_lock);
4671 			}
4672 		}
4673 	}
4674 
4675 	if (sadb_msg_type == SADB_X_UPDATEPAIR) {
4676 		if (ipsapp.ipsap_psa_ptr != NULL) {
4677 			sadb_update_lifetimes(ipsapp.ipsap_psa_ptr, hard, soft,
4678 			    idle, B_FALSE);
4679 			sadb_update_kmc(&sq, ipsapp.ipsap_psa_ptr);
4680 		} else {
4681 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_SA_NOTFOUND;
4682 			error = ESRCH;
4683 			goto bail;
4684 		}
4685 	}
4686 
4687 	if (pair_ext != NULL)
4688 		error = update_pairing(&ipsapp, &sq, ksi, diagnostic);
4689 
4690 	if (error == 0)
4691 		sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr,
4692 		    ksi, echo_target);
4693 bail:
4694 
4695 	destroy_ipsa_pair(&ipsapp);
4696 
4697 	return (error);
4698 }
4699 
4700 
4701 static int
4702 update_pairing(ipsap_t *ipsapp, ipsa_query_t *sq, keysock_in_t *ksi,
4703     int *diagnostic)
4704 {
4705 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
4706 	sadb_x_pair_t *pair_ext =
4707 	    (sadb_x_pair_t *)ksi->ks_in_extv[SADB_X_EXT_PAIR];
4708 	int error = 0;
4709 	ipsap_t oipsapp;
4710 	boolean_t undo_pair = B_FALSE;
4711 	uint32_t ipsa_flags;
4712 
4713 	if (pair_ext->sadb_x_pair_spi == 0 || pair_ext->sadb_x_pair_spi ==
4714 	    assoc->sadb_sa_spi) {
4715 		*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4716 		return (EINVAL);
4717 	}
4718 
4719 	/*
4720 	 * Assume for now that the spi value provided in the SADB_UPDATE
4721 	 * message was valid, update the SA with its pair spi value.
4722 	 * If the spi turns out to be bogus or the SA no longer exists
4723 	 * then this will be detected when the reverse update is made
4724 	 * below.
4725 	 */
4726 	mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4727 	ipsapp->ipsap_sa_ptr->ipsa_flags |= IPSA_F_PAIRED;
4728 	ipsapp->ipsap_sa_ptr->ipsa_otherspi = pair_ext->sadb_x_pair_spi;
4729 	mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4730 
4731 	/*
4732 	 * After updating the ipsa_otherspi element of the SA, get_ipsa_pair()
4733 	 * should now return pointers to the SA *AND* its pair, if this is not
4734 	 * the case, the "otherspi" either did not exist or was deleted. Also
4735 	 * check that "otherspi" is not already paired. If everything looks
4736 	 * good, complete the update. IPSA_REFRELE the first pair_pointer
4737 	 * after this update to ensure its not deleted until we are done.
4738 	 */
4739 	error = get_ipsa_pair(sq, &oipsapp, diagnostic);
4740 	if (error != 0) {
4741 		/*
4742 		 * This should never happen, calling function still has
4743 		 * IPSA_REFHELD on the SA we just updated.
4744 		 */
4745 		return (error);	/* XXX EINVAL instead of ESRCH? */
4746 	}
4747 
4748 	if (oipsapp.ipsap_psa_ptr == NULL) {
4749 		*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4750 		error = EINVAL;
4751 		undo_pair = B_TRUE;
4752 	} else {
4753 		ipsa_flags = oipsapp.ipsap_psa_ptr->ipsa_flags;
4754 		if ((oipsapp.ipsap_psa_ptr->ipsa_state == IPSA_STATE_DEAD) ||
4755 		    (oipsapp.ipsap_psa_ptr->ipsa_state == IPSA_STATE_DYING)) {
4756 			/* Its dead Jim! */
4757 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4758 			undo_pair = B_TRUE;
4759 		} else if ((ipsa_flags & (IPSA_F_OUTBOUND | IPSA_F_INBOUND)) ==
4760 		    (IPSA_F_OUTBOUND | IPSA_F_INBOUND)) {
4761 			/* This SA is in both hashtables. */
4762 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4763 			undo_pair = B_TRUE;
4764 		} else if (ipsa_flags & IPSA_F_PAIRED) {
4765 			/* This SA is already paired with another. */
4766 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_ALREADY;
4767 			undo_pair = B_TRUE;
4768 		}
4769 	}
4770 
4771 	if (undo_pair) {
4772 		/* The pair SA does not exist. */
4773 		mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4774 		ipsapp->ipsap_sa_ptr->ipsa_flags &= ~IPSA_F_PAIRED;
4775 		ipsapp->ipsap_sa_ptr->ipsa_otherspi = 0;
4776 		mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4777 	} else {
4778 		mutex_enter(&oipsapp.ipsap_psa_ptr->ipsa_lock);
4779 		oipsapp.ipsap_psa_ptr->ipsa_otherspi = assoc->sadb_sa_spi;
4780 		oipsapp.ipsap_psa_ptr->ipsa_flags |= IPSA_F_PAIRED;
4781 		mutex_exit(&oipsapp.ipsap_psa_ptr->ipsa_lock);
4782 	}
4783 
4784 	destroy_ipsa_pair(&oipsapp);
4785 	return (error);
4786 }
4787 
4788 /*
4789  * The following functions deal with ACQUIRE LISTS.  An ACQUIRE list is
4790  * a list of outstanding SADB_ACQUIRE messages.	 If ipsec_getassocbyconn() fails
4791  * for an outbound datagram, that datagram is queued up on an ACQUIRE record,
4792  * and an SADB_ACQUIRE message is sent up.  Presumably, a user-space key
4793  * management daemon will process the ACQUIRE, use a SADB_GETSPI to reserve
4794  * an SPI value and a larval SA, then SADB_UPDATE the larval SA, and ADD the
4795  * other direction's SA.
4796  */
4797 
4798 /*
4799  * Check the ACQUIRE lists.  If there's an existing ACQUIRE record,
4800  * grab it, lock it, and return it.  Otherwise return NULL.
4801  *
4802  * XXX MLS number of arguments getting unwieldy here
4803  */
4804 static ipsacq_t *
4805 sadb_checkacquire(iacqf_t *bucket, ipsec_action_t *ap, ipsec_policy_t *pp,
4806     uint32_t *src, uint32_t *dst, uint32_t *isrc, uint32_t *idst,
4807     uint64_t unique_id, ts_label_t *tsl)
4808 {
4809 	ipsacq_t *walker;
4810 	sa_family_t fam;
4811 	uint32_t blank_address[4] = {0, 0, 0, 0};
4812 
4813 	if (isrc == NULL) {
4814 		ASSERT(idst == NULL);
4815 		isrc = idst = blank_address;
4816 	}
4817 
4818 	/*
4819 	 * Scan list for duplicates.  Check for UNIQUE, src/dest, policy.
4820 	 *
4821 	 * XXX May need search for duplicates based on other things too!
4822 	 */
4823 	for (walker = bucket->iacqf_ipsacq; walker != NULL;
4824 	    walker = walker->ipsacq_next) {
4825 		mutex_enter(&walker->ipsacq_lock);
4826 		fam = walker->ipsacq_addrfam;
4827 		if (IPSA_ARE_ADDR_EQUAL(dst, walker->ipsacq_dstaddr, fam) &&
4828 		    IPSA_ARE_ADDR_EQUAL(src, walker->ipsacq_srcaddr, fam) &&
4829 		    ip_addr_match((uint8_t *)isrc, walker->ipsacq_innersrcpfx,
4830 		    (in6_addr_t *)walker->ipsacq_innersrc) &&
4831 		    ip_addr_match((uint8_t *)idst, walker->ipsacq_innerdstpfx,
4832 		    (in6_addr_t *)walker->ipsacq_innerdst) &&
4833 		    (ap == walker->ipsacq_act) &&
4834 		    (pp == walker->ipsacq_policy) &&
4835 		    /* XXX do deep compares of ap/pp? */
4836 		    (unique_id == walker->ipsacq_unique_id) &&
4837 		    (ipsec_label_match(tsl, walker->ipsacq_tsl)))
4838 			break;			/* everything matched */
4839 		mutex_exit(&walker->ipsacq_lock);
4840 	}
4841 
4842 	return (walker);
4843 }
4844 
4845 /*
4846  * For this mblk, insert a new acquire record.  Assume bucket contains addrs
4847  * of all of the same length.  Give up (and drop) if memory
4848  * cannot be allocated for a new one; otherwise, invoke callback to
4849  * send the acquire up..
4850  *
4851  * In cases where we need both AH and ESP, add the SA to the ESP ACQUIRE
4852  * list.  The ah_add_sa_finish() routines can look at the packet's attached
4853  * attributes and handle this case specially.
4854  */
4855 void
4856 sadb_acquire(mblk_t *datamp, ip_xmit_attr_t *ixa, boolean_t need_ah,
4857     boolean_t need_esp)
4858 {
4859 	mblk_t	*asyncmp;
4860 	sadbp_t *spp;
4861 	sadb_t *sp;
4862 	ipsacq_t *newbie;
4863 	iacqf_t *bucket;
4864 	mblk_t *extended;
4865 	ipha_t *ipha = (ipha_t *)datamp->b_rptr;
4866 	ip6_t *ip6h = (ip6_t *)datamp->b_rptr;
4867 	uint32_t *src, *dst, *isrc, *idst;
4868 	ipsec_policy_t *pp = ixa->ixa_ipsec_policy;
4869 	ipsec_action_t *ap = ixa->ixa_ipsec_action;
4870 	sa_family_t af;
4871 	int hashoffset;
4872 	uint32_t seq;
4873 	uint64_t unique_id = 0;
4874 	ipsec_selector_t sel;
4875 	boolean_t tunnel_mode = (ixa->ixa_flags & IXAF_IPSEC_TUNNEL) != 0;
4876 	ts_label_t 	*tsl = NULL;
4877 	netstack_t	*ns = ixa->ixa_ipst->ips_netstack;
4878 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
4879 	sadb_sens_t 	*sens = NULL;
4880 	int 		sens_len;
4881 
4882 	ASSERT((pp != NULL) || (ap != NULL));
4883 
4884 	ASSERT(need_ah != NULL || need_esp != NULL);
4885 
4886 	/* Assign sadb pointers */
4887 	if (need_esp) { /* ESP for AH+ESP */
4888 		ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
4889 
4890 		spp = &espstack->esp_sadb;
4891 	} else {
4892 		ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
4893 
4894 		spp = &ahstack->ah_sadb;
4895 	}
4896 	sp = (ixa->ixa_flags & IXAF_IS_IPV4) ? &spp->s_v4 : &spp->s_v6;
4897 
4898 	if (is_system_labeled())
4899 		tsl = ixa->ixa_tsl;
4900 
4901 	if (ap == NULL)
4902 		ap = pp->ipsp_act;
4903 
4904 	ASSERT(ap != NULL);
4905 
4906 	if (ap->ipa_act.ipa_apply.ipp_use_unique || tunnel_mode)
4907 		unique_id = SA_FORM_UNIQUE_ID(ixa);
4908 
4909 	/*
4910 	 * Set up an ACQUIRE record.
4911 	 *
4912 	 * Immediately, make sure the ACQUIRE sequence number doesn't slip
4913 	 * below the lowest point allowed in the kernel.  (In other words,
4914 	 * make sure the high bit on the sequence number is set.)
4915 	 */
4916 
4917 	seq = keysock_next_seq(ns) | IACQF_LOWEST_SEQ;
4918 
4919 	if (IPH_HDR_VERSION(ipha) == IP_VERSION) {
4920 		src = (uint32_t *)&ipha->ipha_src;
4921 		dst = (uint32_t *)&ipha->ipha_dst;
4922 		af = AF_INET;
4923 		hashoffset = OUTBOUND_HASH_V4(sp, ipha->ipha_dst);
4924 		ASSERT(ixa->ixa_flags & IXAF_IS_IPV4);
4925 	} else {
4926 		ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
4927 		src = (uint32_t *)&ip6h->ip6_src;
4928 		dst = (uint32_t *)&ip6h->ip6_dst;
4929 		af = AF_INET6;
4930 		hashoffset = OUTBOUND_HASH_V6(sp, ip6h->ip6_dst);
4931 		ASSERT(!(ixa->ixa_flags & IXAF_IS_IPV4));
4932 	}
4933 
4934 	if (tunnel_mode) {
4935 		if (pp == NULL) {
4936 			/*
4937 			 * Tunnel mode with no policy pointer means this is a
4938 			 * reflected ICMP (like a ECHO REQUEST) that came in
4939 			 * with self-encapsulated protection.  Until we better
4940 			 * support this, drop the packet.
4941 			 */
4942 			ip_drop_packet(datamp, B_FALSE, NULL,
4943 			    DROPPER(ipss, ipds_spd_got_selfencap),
4944 			    &ipss->ipsec_spd_dropper);
4945 			return;
4946 		}
4947 		/* Snag inner addresses. */
4948 		isrc = ixa->ixa_ipsec_insrc;
4949 		idst = ixa->ixa_ipsec_indst;
4950 	} else {
4951 		isrc = idst = NULL;
4952 	}
4953 
4954 	/*
4955 	 * Check buckets to see if there is an existing entry.  If so,
4956 	 * grab it.  sadb_checkacquire locks newbie if found.
4957 	 */
4958 	bucket = &(sp->sdb_acq[hashoffset]);
4959 	mutex_enter(&bucket->iacqf_lock);
4960 	newbie = sadb_checkacquire(bucket, ap, pp, src, dst, isrc, idst,
4961 	    unique_id, tsl);
4962 
4963 	if (newbie == NULL) {
4964 		/*
4965 		 * Otherwise, allocate a new one.
4966 		 */
4967 		newbie = kmem_zalloc(sizeof (*newbie), KM_NOSLEEP);
4968 		if (newbie == NULL) {
4969 			mutex_exit(&bucket->iacqf_lock);
4970 			ip_drop_packet(datamp, B_FALSE, NULL,
4971 			    DROPPER(ipss, ipds_sadb_acquire_nomem),
4972 			    &ipss->ipsec_sadb_dropper);
4973 			return;
4974 		}
4975 		newbie->ipsacq_policy = pp;
4976 		if (pp != NULL) {
4977 			IPPOL_REFHOLD(pp);
4978 		}
4979 		IPACT_REFHOLD(ap);
4980 		newbie->ipsacq_act = ap;
4981 		newbie->ipsacq_linklock = &bucket->iacqf_lock;
4982 		newbie->ipsacq_next = bucket->iacqf_ipsacq;
4983 		newbie->ipsacq_ptpn = &bucket->iacqf_ipsacq;
4984 		if (newbie->ipsacq_next != NULL)
4985 			newbie->ipsacq_next->ipsacq_ptpn = &newbie->ipsacq_next;
4986 
4987 		bucket->iacqf_ipsacq = newbie;
4988 		mutex_init(&newbie->ipsacq_lock, NULL, MUTEX_DEFAULT, NULL);
4989 		mutex_enter(&newbie->ipsacq_lock);
4990 	}
4991 
4992 	/*
4993 	 * XXX MLS does it actually help us to drop the bucket lock here?
4994 	 * we have inserted a half-built, locked acquire record into the
4995 	 * bucket.  any competing thread will now be able to lock the bucket
4996 	 * to scan it, but will immediately pile up on the new acquire
4997 	 * record's lock; I don't think we gain anything here other than to
4998 	 * disperse blame for lock contention.
4999 	 *
5000 	 * we might be able to dispense with acquire record locks entirely..
5001 	 * just use the bucket locks..
5002 	 */
5003 
5004 	mutex_exit(&bucket->iacqf_lock);
5005 
5006 	/*
5007 	 * This assert looks silly for now, but we may need to enter newbie's
5008 	 * mutex during a search.
5009 	 */
5010 	ASSERT(MUTEX_HELD(&newbie->ipsacq_lock));
5011 
5012 	/*
5013 	 * Make the ip_xmit_attr_t into something we can queue.
5014 	 * If no memory it frees datamp.
5015 	 */
5016 	asyncmp = ip_xmit_attr_to_mblk(ixa);
5017 	if (asyncmp != NULL)
5018 		linkb(asyncmp, datamp);
5019 
5020 	/* Queue up packet.  Use b_next. */
5021 
5022 	if (asyncmp == NULL) {
5023 		/* Statistics for allocation failure */
5024 		if (ixa->ixa_flags & IXAF_IS_IPV4) {
5025 			BUMP_MIB(&ixa->ixa_ipst->ips_ip_mib,
5026 			    ipIfStatsOutDiscards);
5027 		} else {
5028 			BUMP_MIB(&ixa->ixa_ipst->ips_ip6_mib,
5029 			    ipIfStatsOutDiscards);
5030 		}
5031 		ip_drop_output("No memory for asyncmp", datamp, NULL);
5032 		freemsg(datamp);
5033 	} else if (newbie->ipsacq_numpackets == 0) {
5034 		/* First one. */
5035 		newbie->ipsacq_mp = asyncmp;
5036 		newbie->ipsacq_numpackets = 1;
5037 		newbie->ipsacq_expire = gethrestime_sec();
5038 		/*
5039 		 * Extended ACQUIRE with both AH+ESP will use ESP's timeout
5040 		 * value.
5041 		 */
5042 		newbie->ipsacq_expire += *spp->s_acquire_timeout;
5043 		newbie->ipsacq_seq = seq;
5044 		newbie->ipsacq_addrfam = af;
5045 
5046 		newbie->ipsacq_srcport = ixa->ixa_ipsec_src_port;
5047 		newbie->ipsacq_dstport = ixa->ixa_ipsec_dst_port;
5048 		newbie->ipsacq_icmp_type = ixa->ixa_ipsec_icmp_type;
5049 		newbie->ipsacq_icmp_code = ixa->ixa_ipsec_icmp_code;
5050 		if (tunnel_mode) {
5051 			newbie->ipsacq_inneraddrfam = ixa->ixa_ipsec_inaf;
5052 			newbie->ipsacq_proto = ixa->ixa_ipsec_inaf == AF_INET6 ?
5053 			    IPPROTO_IPV6 : IPPROTO_ENCAP;
5054 			newbie->ipsacq_innersrcpfx = ixa->ixa_ipsec_insrcpfx;
5055 			newbie->ipsacq_innerdstpfx = ixa->ixa_ipsec_indstpfx;
5056 			IPSA_COPY_ADDR(newbie->ipsacq_innersrc,
5057 			    ixa->ixa_ipsec_insrc, ixa->ixa_ipsec_inaf);
5058 			IPSA_COPY_ADDR(newbie->ipsacq_innerdst,
5059 			    ixa->ixa_ipsec_indst, ixa->ixa_ipsec_inaf);
5060 		} else {
5061 			newbie->ipsacq_proto = ixa->ixa_ipsec_proto;
5062 		}
5063 		newbie->ipsacq_unique_id = unique_id;
5064 
5065 		if (ixa->ixa_tsl != NULL) {
5066 			label_hold(ixa->ixa_tsl);
5067 			newbie->ipsacq_tsl = ixa->ixa_tsl;
5068 		}
5069 	} else {
5070 		/* Scan to the end of the list & insert. */
5071 		mblk_t *lastone = newbie->ipsacq_mp;
5072 
5073 		while (lastone->b_next != NULL)
5074 			lastone = lastone->b_next;
5075 		lastone->b_next = asyncmp;
5076 		if (newbie->ipsacq_numpackets++ == ipsacq_maxpackets) {
5077 			newbie->ipsacq_numpackets = ipsacq_maxpackets;
5078 			lastone = newbie->ipsacq_mp;
5079 			newbie->ipsacq_mp = lastone->b_next;
5080 			lastone->b_next = NULL;
5081 
5082 			/* Freeing the async message */
5083 			lastone = ip_xmit_attr_free_mblk(lastone);
5084 			ip_drop_packet(lastone, B_FALSE, NULL,
5085 			    DROPPER(ipss, ipds_sadb_acquire_toofull),
5086 			    &ipss->ipsec_sadb_dropper);
5087 		} else {
5088 			IP_ACQUIRE_STAT(ipss, qhiwater,
5089 			    newbie->ipsacq_numpackets);
5090 		}
5091 	}
5092 
5093 	/*
5094 	 * Reset addresses.  Set them to the most recently added mblk chain,
5095 	 * so that the address pointers in the acquire record will point
5096 	 * at an mblk still attached to the acquire list.
5097 	 */
5098 
5099 	newbie->ipsacq_srcaddr = src;
5100 	newbie->ipsacq_dstaddr = dst;
5101 
5102 	/*
5103 	 * If the acquire record has more than one queued packet, we've
5104 	 * already sent an ACQUIRE, and don't need to repeat ourself.
5105 	 */
5106 	if (newbie->ipsacq_seq != seq || newbie->ipsacq_numpackets > 1) {
5107 		/* I have an acquire outstanding already! */
5108 		mutex_exit(&newbie->ipsacq_lock);
5109 		return;
5110 	}
5111 
5112 	if (!keysock_extended_reg(ns))
5113 		goto punt_extended;
5114 	/*
5115 	 * Construct an extended ACQUIRE.  There are logging
5116 	 * opportunities here in failure cases.
5117 	 */
5118 	bzero(&sel, sizeof (sel));
5119 	sel.ips_isv4 = (ixa->ixa_flags & IXAF_IS_IPV4) != 0;
5120 	if (tunnel_mode) {
5121 		sel.ips_protocol = (ixa->ixa_ipsec_inaf == AF_INET) ?
5122 		    IPPROTO_ENCAP : IPPROTO_IPV6;
5123 	} else {
5124 		sel.ips_protocol = ixa->ixa_ipsec_proto;
5125 		sel.ips_local_port = ixa->ixa_ipsec_src_port;
5126 		sel.ips_remote_port = ixa->ixa_ipsec_dst_port;
5127 	}
5128 	sel.ips_icmp_type = ixa->ixa_ipsec_icmp_type;
5129 	sel.ips_icmp_code = ixa->ixa_ipsec_icmp_code;
5130 	sel.ips_is_icmp_inv_acq = 0;
5131 	if (af == AF_INET) {
5132 		sel.ips_local_addr_v4 = ipha->ipha_src;
5133 		sel.ips_remote_addr_v4 = ipha->ipha_dst;
5134 	} else {
5135 		sel.ips_local_addr_v6 = ip6h->ip6_src;
5136 		sel.ips_remote_addr_v6 = ip6h->ip6_dst;
5137 	}
5138 
5139 	extended = sadb_keysock_out(0);
5140 	if (extended == NULL)
5141 		goto punt_extended;
5142 
5143 	if (ixa->ixa_tsl != NULL) {
5144 		/*
5145 		 * XXX MLS correct condition here?
5146 		 * XXX MLS other credential attributes in acquire?
5147 		 * XXX malloc failure?  don't fall back to original?
5148 		 */
5149 		sens = sadb_make_sens_ext(ixa->ixa_tsl, &sens_len);
5150 
5151 		if (sens == NULL) {
5152 			freeb(extended);
5153 			goto punt_extended;
5154 		}
5155 	}
5156 
5157 	extended->b_cont = sadb_extended_acquire(&sel, pp, ap, tunnel_mode,
5158 	    seq, 0, sens, ns);
5159 
5160 	if (sens != NULL)
5161 		kmem_free(sens, sens_len);
5162 
5163 	if (extended->b_cont == NULL) {
5164 		freeb(extended);
5165 		goto punt_extended;
5166 	}
5167 
5168 	/*
5169 	 * Send an ACQUIRE message (and possible an extended ACQUIRE) based on
5170 	 * this new record.  The send-acquire callback assumes that acqrec is
5171 	 * already locked.
5172 	 */
5173 	(*spp->s_acqfn)(newbie, extended, ns);
5174 	return;
5175 
5176 punt_extended:
5177 	(*spp->s_acqfn)(newbie, NULL, ns);
5178 }
5179 
5180 /*
5181  * Unlink and free an acquire record.
5182  */
5183 void
5184 sadb_destroy_acquire(ipsacq_t *acqrec, netstack_t *ns)
5185 {
5186 	mblk_t		*mp;
5187 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
5188 
5189 	ASSERT(MUTEX_HELD(acqrec->ipsacq_linklock));
5190 
5191 	if (acqrec->ipsacq_policy != NULL) {
5192 		IPPOL_REFRELE(acqrec->ipsacq_policy);
5193 	}
5194 	if (acqrec->ipsacq_act != NULL) {
5195 		IPACT_REFRELE(acqrec->ipsacq_act);
5196 	}
5197 
5198 	/* Unlink */
5199 	*(acqrec->ipsacq_ptpn) = acqrec->ipsacq_next;
5200 	if (acqrec->ipsacq_next != NULL)
5201 		acqrec->ipsacq_next->ipsacq_ptpn = acqrec->ipsacq_ptpn;
5202 
5203 	if (acqrec->ipsacq_tsl != NULL) {
5204 		label_rele(acqrec->ipsacq_tsl);
5205 		acqrec->ipsacq_tsl = NULL;
5206 	}
5207 
5208 	/*
5209 	 * Free hanging mp's.
5210 	 *
5211 	 * XXX Instead of freemsg(), perhaps use IPSEC_REQ_FAILED.
5212 	 */
5213 
5214 	mutex_enter(&acqrec->ipsacq_lock);
5215 	while (acqrec->ipsacq_mp != NULL) {
5216 		mp = acqrec->ipsacq_mp;
5217 		acqrec->ipsacq_mp = mp->b_next;
5218 		mp->b_next = NULL;
5219 		/* Freeing the async message */
5220 		mp = ip_xmit_attr_free_mblk(mp);
5221 		ip_drop_packet(mp, B_FALSE, NULL,
5222 		    DROPPER(ipss, ipds_sadb_acquire_timeout),
5223 		    &ipss->ipsec_sadb_dropper);
5224 	}
5225 	mutex_exit(&acqrec->ipsacq_lock);
5226 
5227 	/* Free */
5228 	mutex_destroy(&acqrec->ipsacq_lock);
5229 	kmem_free(acqrec, sizeof (*acqrec));
5230 }
5231 
5232 /*
5233  * Destroy an acquire list fanout.
5234  */
5235 static void
5236 sadb_destroy_acqlist(iacqf_t **listp, uint_t numentries, boolean_t forever,
5237     netstack_t *ns)
5238 {
5239 	int i;
5240 	iacqf_t *list = *listp;
5241 
5242 	if (list == NULL)
5243 		return;
5244 
5245 	for (i = 0; i < numentries; i++) {
5246 		mutex_enter(&(list[i].iacqf_lock));
5247 		while (list[i].iacqf_ipsacq != NULL)
5248 			sadb_destroy_acquire(list[i].iacqf_ipsacq, ns);
5249 		mutex_exit(&(list[i].iacqf_lock));
5250 		if (forever)
5251 			mutex_destroy(&(list[i].iacqf_lock));
5252 	}
5253 
5254 	if (forever) {
5255 		*listp = NULL;
5256 		kmem_free(list, numentries * sizeof (*list));
5257 	}
5258 }
5259 
5260 /*
5261  * Create an algorithm descriptor for an extended ACQUIRE.  Filter crypto
5262  * framework's view of reality vs. IPsec's.  EF's wins, BTW.
5263  */
5264 static uint8_t *
5265 sadb_new_algdesc(uint8_t *start, uint8_t *limit,
5266     sadb_x_ecomb_t *ecomb, uint8_t satype, uint8_t algtype,
5267     uint8_t alg, uint16_t minbits, uint16_t maxbits, ipsec_stack_t *ipss)
5268 {
5269 	uint8_t *cur = start;
5270 	ipsec_alginfo_t *algp;
5271 	sadb_x_algdesc_t *algdesc = (sadb_x_algdesc_t *)cur;
5272 
5273 	cur += sizeof (*algdesc);
5274 	if (cur >= limit)
5275 		return (NULL);
5276 
5277 	ecomb->sadb_x_ecomb_numalgs++;
5278 
5279 	/*
5280 	 * Normalize vs. crypto framework's limits.  This way, you can specify
5281 	 * a stronger policy, and when the framework loads a stronger version,
5282 	 * you can just keep plowing w/o rewhacking your SPD.
5283 	 */
5284 	mutex_enter(&ipss->ipsec_alg_lock);
5285 	algp = ipss->ipsec_alglists[(algtype == SADB_X_ALGTYPE_AUTH) ?
5286 	    IPSEC_ALG_AUTH : IPSEC_ALG_ENCR][alg];
5287 	if (algp == NULL) {
5288 		mutex_exit(&ipss->ipsec_alg_lock);
5289 		return (NULL);	/* Algorithm doesn't exist.  Fail gracefully. */
5290 	}
5291 	if (minbits < algp->alg_ef_minbits)
5292 		minbits = algp->alg_ef_minbits;
5293 	if (maxbits > algp->alg_ef_maxbits)
5294 		maxbits = algp->alg_ef_maxbits;
5295 	mutex_exit(&ipss->ipsec_alg_lock);
5296 
5297 	algdesc->sadb_x_algdesc_reserved = SADB_8TO1(algp->alg_saltlen);
5298 	algdesc->sadb_x_algdesc_satype = satype;
5299 	algdesc->sadb_x_algdesc_algtype = algtype;
5300 	algdesc->sadb_x_algdesc_alg = alg;
5301 	algdesc->sadb_x_algdesc_minbits = minbits;
5302 	algdesc->sadb_x_algdesc_maxbits = maxbits;
5303 
5304 	return (cur);
5305 }
5306 
5307 /*
5308  * Convert the given ipsec_action_t into an ecomb starting at *ecomb
5309  * which must fit before *limit
5310  *
5311  * return NULL if we ran out of room or a pointer to the end of the ecomb.
5312  */
5313 static uint8_t *
5314 sadb_action_to_ecomb(uint8_t *start, uint8_t *limit, ipsec_action_t *act,
5315     netstack_t *ns)
5316 {
5317 	uint8_t *cur = start;
5318 	sadb_x_ecomb_t *ecomb = (sadb_x_ecomb_t *)cur;
5319 	ipsec_prot_t *ipp;
5320 	ipsec_stack_t *ipss = ns->netstack_ipsec;
5321 
5322 	cur += sizeof (*ecomb);
5323 	if (cur >= limit)
5324 		return (NULL);
5325 
5326 	ASSERT(act->ipa_act.ipa_type == IPSEC_ACT_APPLY);
5327 
5328 	ipp = &act->ipa_act.ipa_apply;
5329 
5330 	ecomb->sadb_x_ecomb_numalgs = 0;
5331 	ecomb->sadb_x_ecomb_reserved = 0;
5332 	ecomb->sadb_x_ecomb_reserved2 = 0;
5333 	/*
5334 	 * No limits on allocations, since we really don't support that
5335 	 * concept currently.
5336 	 */
5337 	ecomb->sadb_x_ecomb_soft_allocations = 0;
5338 	ecomb->sadb_x_ecomb_hard_allocations = 0;
5339 
5340 	/*
5341 	 * XXX TBD: Policy or global parameters will eventually be
5342 	 * able to fill in some of these.
5343 	 */
5344 	ecomb->sadb_x_ecomb_flags = 0;
5345 	ecomb->sadb_x_ecomb_soft_bytes = 0;
5346 	ecomb->sadb_x_ecomb_hard_bytes = 0;
5347 	ecomb->sadb_x_ecomb_soft_addtime = 0;
5348 	ecomb->sadb_x_ecomb_hard_addtime = 0;
5349 	ecomb->sadb_x_ecomb_soft_usetime = 0;
5350 	ecomb->sadb_x_ecomb_hard_usetime = 0;
5351 
5352 	if (ipp->ipp_use_ah) {
5353 		cur = sadb_new_algdesc(cur, limit, ecomb,
5354 		    SADB_SATYPE_AH, SADB_X_ALGTYPE_AUTH, ipp->ipp_auth_alg,
5355 		    ipp->ipp_ah_minbits, ipp->ipp_ah_maxbits, ipss);
5356 		if (cur == NULL)
5357 			return (NULL);
5358 		ipsecah_fill_defs(ecomb, ns);
5359 	}
5360 
5361 	if (ipp->ipp_use_esp) {
5362 		if (ipp->ipp_use_espa) {
5363 			cur = sadb_new_algdesc(cur, limit, ecomb,
5364 			    SADB_SATYPE_ESP, SADB_X_ALGTYPE_AUTH,
5365 			    ipp->ipp_esp_auth_alg,
5366 			    ipp->ipp_espa_minbits,
5367 			    ipp->ipp_espa_maxbits, ipss);
5368 			if (cur == NULL)
5369 				return (NULL);
5370 		}
5371 
5372 		cur = sadb_new_algdesc(cur, limit, ecomb,
5373 		    SADB_SATYPE_ESP, SADB_X_ALGTYPE_CRYPT,
5374 		    ipp->ipp_encr_alg,
5375 		    ipp->ipp_espe_minbits,
5376 		    ipp->ipp_espe_maxbits, ipss);
5377 		if (cur == NULL)
5378 			return (NULL);
5379 		/* Fill in lifetimes if and only if AH didn't already... */
5380 		if (!ipp->ipp_use_ah)
5381 			ipsecesp_fill_defs(ecomb, ns);
5382 	}
5383 
5384 	return (cur);
5385 }
5386 
5387 #include <sys/tsol/label_macro.h> /* XXX should not need this */
5388 
5389 /*
5390  * From a cred_t, construct a sensitivity label extension
5391  *
5392  * We send up a fixed-size sensitivity label bitmap, and are perhaps
5393  * overly chummy with the underlying data structures here.
5394  */
5395 
5396 /* ARGSUSED */
5397 int
5398 sadb_sens_len_from_label(ts_label_t *tsl)
5399 {
5400 	int baselen = sizeof (sadb_sens_t) + _C_LEN * 4;
5401 	return (roundup(baselen, sizeof (uint64_t)));
5402 }
5403 
5404 void
5405 sadb_sens_from_label(sadb_sens_t *sens, int exttype, ts_label_t *tsl,
5406     int senslen)
5407 {
5408 	uint8_t *bitmap;
5409 	bslabel_t *sl;
5410 
5411 	/* LINTED */
5412 	ASSERT((_C_LEN & 1) == 0);
5413 	ASSERT((senslen & 7) == 0);
5414 
5415 	sl = label2bslabel(tsl);
5416 
5417 	sens->sadb_sens_exttype = exttype;
5418 	sens->sadb_sens_len = SADB_8TO64(senslen);
5419 
5420 	sens->sadb_sens_dpd = tsl->tsl_doi;
5421 	sens->sadb_sens_sens_level = LCLASS(sl);
5422 	sens->sadb_sens_integ_level = 0; /* TBD */
5423 	sens->sadb_sens_sens_len = _C_LEN >> 1;
5424 	sens->sadb_sens_integ_len = 0; /* TBD */
5425 	sens->sadb_x_sens_flags = 0;
5426 
5427 	bitmap = (uint8_t *)(sens + 1);
5428 	bcopy(&(((_bslabel_impl_t *)sl)->compartments), bitmap, _C_LEN * 4);
5429 }
5430 
5431 static sadb_sens_t *
5432 sadb_make_sens_ext(ts_label_t *tsl, int *len)
5433 {
5434 	/* XXX allocation failure? */
5435 	int sens_len = sadb_sens_len_from_label(tsl);
5436 
5437 	sadb_sens_t *sens = kmem_alloc(sens_len, KM_SLEEP);
5438 
5439 	sadb_sens_from_label(sens, SADB_EXT_SENSITIVITY, tsl, sens_len);
5440 
5441 	*len = sens_len;
5442 
5443 	return (sens);
5444 }
5445 
5446 /*
5447  * Okay, how do we report errors/invalid labels from this?
5448  * With a special designated "not a label" cred_t ?
5449  */
5450 /* ARGSUSED */
5451 ts_label_t *
5452 sadb_label_from_sens(sadb_sens_t *sens, uint64_t *bitmap)
5453 {
5454 	int bitmap_len = SADB_64TO8(sens->sadb_sens_sens_len);
5455 	bslabel_t sl;
5456 	ts_label_t *tsl;
5457 
5458 	if (sens->sadb_sens_integ_level != 0)
5459 		return (NULL);
5460 	if (sens->sadb_sens_integ_len != 0)
5461 		return (NULL);
5462 	if (bitmap_len > _C_LEN * 4)
5463 		return (NULL);
5464 
5465 	bsllow(&sl);
5466 	LCLASS_SET((_bslabel_impl_t *)&sl, sens->sadb_sens_sens_level);
5467 	bcopy(bitmap, &((_bslabel_impl_t *)&sl)->compartments,
5468 	    bitmap_len);
5469 
5470 	tsl = labelalloc(&sl, sens->sadb_sens_dpd, KM_NOSLEEP);
5471 	if (tsl == NULL)
5472 		return (NULL);
5473 
5474 	if (sens->sadb_x_sens_flags & SADB_X_SENS_UNLABELED)
5475 		tsl->tsl_flags |= TSLF_UNLABELED;
5476 	return (tsl);
5477 }
5478 
5479 /* End XXX label-library-leakage */
5480 
5481 /*
5482  * Construct an extended ACQUIRE message based on a selector and the resulting
5483  * IPsec action.
5484  *
5485  * NOTE: This is used by both inverse ACQUIRE and actual ACQUIRE
5486  * generation. As a consequence, expect this function to evolve
5487  * rapidly.
5488  */
5489 static mblk_t *
5490 sadb_extended_acquire(ipsec_selector_t *sel, ipsec_policy_t *pol,
5491     ipsec_action_t *act, boolean_t tunnel_mode, uint32_t seq, uint32_t pid,
5492     sadb_sens_t *sens, netstack_t *ns)
5493 {
5494 	mblk_t *mp;
5495 	sadb_msg_t *samsg;
5496 	uint8_t *start, *cur, *end;
5497 	uint32_t *saddrptr, *daddrptr;
5498 	sa_family_t af;
5499 	sadb_prop_t *eprop;
5500 	ipsec_action_t *ap, *an;
5501 	ipsec_selkey_t *ipsl;
5502 	uint8_t proto, pfxlen;
5503 	uint16_t lport, rport;
5504 	uint32_t kmp, kmc;
5505 
5506 	/*
5507 	 * Find the action we want sooner rather than later..
5508 	 */
5509 	an = NULL;
5510 	if (pol == NULL) {
5511 		ap = act;
5512 	} else {
5513 		ap = pol->ipsp_act;
5514 
5515 		if (ap != NULL)
5516 			an = ap->ipa_next;
5517 	}
5518 
5519 	/*
5520 	 * Just take a swag for the allocation for now.	 We can always
5521 	 * alter it later.
5522 	 */
5523 #define	SADB_EXTENDED_ACQUIRE_SIZE	4096
5524 	mp = allocb(SADB_EXTENDED_ACQUIRE_SIZE, BPRI_HI);
5525 	if (mp == NULL)
5526 		return (NULL);
5527 
5528 	start = mp->b_rptr;
5529 	end = start + SADB_EXTENDED_ACQUIRE_SIZE;
5530 
5531 	cur = start;
5532 
5533 	samsg = (sadb_msg_t *)cur;
5534 	cur += sizeof (*samsg);
5535 
5536 	samsg->sadb_msg_version = PF_KEY_V2;
5537 	samsg->sadb_msg_type = SADB_ACQUIRE;
5538 	samsg->sadb_msg_errno = 0;
5539 	samsg->sadb_msg_reserved = 0;
5540 	samsg->sadb_msg_satype = 0;
5541 	samsg->sadb_msg_seq = seq;
5542 	samsg->sadb_msg_pid = pid;
5543 
5544 	if (tunnel_mode) {
5545 		/*
5546 		 * Form inner address extensions based NOT on the inner
5547 		 * selectors (i.e. the packet data), but on the policy's
5548 		 * selector key (i.e. the policy's selector information).
5549 		 *
5550 		 * NOTE:  The position of IPv4 and IPv6 addresses is the
5551 		 * same in ipsec_selkey_t (unless the compiler does very
5552 		 * strange things with unions, consult your local C language
5553 		 * lawyer for details).
5554 		 */
5555 		ASSERT(pol != NULL);
5556 
5557 		ipsl = &(pol->ipsp_sel->ipsl_key);
5558 		if (ipsl->ipsl_valid & IPSL_IPV4) {
5559 			af = AF_INET;
5560 			ASSERT(sel->ips_protocol == IPPROTO_ENCAP);
5561 			ASSERT(!(ipsl->ipsl_valid & IPSL_IPV6));
5562 		} else {
5563 			af = AF_INET6;
5564 			ASSERT(sel->ips_protocol == IPPROTO_IPV6);
5565 			ASSERT(ipsl->ipsl_valid & IPSL_IPV6);
5566 		}
5567 
5568 		if (ipsl->ipsl_valid & IPSL_LOCAL_ADDR) {
5569 			saddrptr = (uint32_t *)(&ipsl->ipsl_local);
5570 			pfxlen = ipsl->ipsl_local_pfxlen;
5571 		} else {
5572 			saddrptr = (uint32_t *)(&ipv6_all_zeros);
5573 			pfxlen = 0;
5574 		}
5575 		/* XXX What about ICMP type/code? */
5576 		lport = (ipsl->ipsl_valid & IPSL_LOCAL_PORT) ?
5577 		    ipsl->ipsl_lport : 0;
5578 		proto = (ipsl->ipsl_valid & IPSL_PROTOCOL) ?
5579 		    ipsl->ipsl_proto : 0;
5580 
5581 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
5582 		    af, saddrptr, lport, proto, pfxlen);
5583 		if (cur == NULL) {
5584 			freeb(mp);
5585 			return (NULL);
5586 		}
5587 
5588 		if (ipsl->ipsl_valid & IPSL_REMOTE_ADDR) {
5589 			daddrptr = (uint32_t *)(&ipsl->ipsl_remote);
5590 			pfxlen = ipsl->ipsl_remote_pfxlen;
5591 		} else {
5592 			daddrptr = (uint32_t *)(&ipv6_all_zeros);
5593 			pfxlen = 0;
5594 		}
5595 		/* XXX What about ICMP type/code? */
5596 		rport = (ipsl->ipsl_valid & IPSL_REMOTE_PORT) ?
5597 		    ipsl->ipsl_rport : 0;
5598 
5599 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
5600 		    af, daddrptr, rport, proto, pfxlen);
5601 		if (cur == NULL) {
5602 			freeb(mp);
5603 			return (NULL);
5604 		}
5605 		/*
5606 		 * TODO  - if we go to 3408's dream of transport mode IP-in-IP
5607 		 * _with_ inner-packet address selectors, we'll need to further
5608 		 * distinguish tunnel mode here.  For now, having inner
5609 		 * addresses and/or ports is sufficient.
5610 		 *
5611 		 * Meanwhile, whack proto/ports to reflect IP-in-IP for the
5612 		 * outer addresses.
5613 		 */
5614 		proto = sel->ips_protocol;	/* Either _ENCAP or _IPV6 */
5615 		lport = rport = 0;
5616 	} else if ((ap != NULL) && (!ap->ipa_want_unique)) {
5617 		proto = 0;
5618 		lport = 0;
5619 		rport = 0;
5620 		if (pol != NULL) {
5621 			ipsl = &(pol->ipsp_sel->ipsl_key);
5622 			if (ipsl->ipsl_valid & IPSL_PROTOCOL)
5623 				proto = ipsl->ipsl_proto;
5624 			if (ipsl->ipsl_valid & IPSL_REMOTE_PORT)
5625 				rport = ipsl->ipsl_rport;
5626 			if (ipsl->ipsl_valid & IPSL_LOCAL_PORT)
5627 				lport = ipsl->ipsl_lport;
5628 		}
5629 	} else {
5630 		proto = sel->ips_protocol;
5631 		lport = sel->ips_local_port;
5632 		rport = sel->ips_remote_port;
5633 	}
5634 
5635 	af = sel->ips_isv4 ? AF_INET : AF_INET6;
5636 
5637 	/*
5638 	 * NOTE:  The position of IPv4 and IPv6 addresses is the same in
5639 	 * ipsec_selector_t.
5640 	 */
5641 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, af,
5642 	    (uint32_t *)(&sel->ips_local_addr_v6), lport, proto, 0);
5643 
5644 	if (cur == NULL) {
5645 		freeb(mp);
5646 		return (NULL);
5647 	}
5648 
5649 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, af,
5650 	    (uint32_t *)(&sel->ips_remote_addr_v6), rport, proto, 0);
5651 
5652 	if (cur == NULL) {
5653 		freeb(mp);
5654 		return (NULL);
5655 	}
5656 
5657 	if (sens != NULL) {
5658 		uint8_t *sensext = cur;
5659 		int senslen = SADB_64TO8(sens->sadb_sens_len);
5660 
5661 		cur += senslen;
5662 		if (cur > end) {
5663 			freeb(mp);
5664 			return (NULL);
5665 		}
5666 		bcopy(sens, sensext, senslen);
5667 	}
5668 
5669 	/*
5670 	 * This section will change a lot as policy evolves.
5671 	 * For now, it'll be relatively simple.
5672 	 */
5673 	eprop = (sadb_prop_t *)cur;
5674 	cur += sizeof (*eprop);
5675 	if (cur > end) {
5676 		/* no space left */
5677 		freeb(mp);
5678 		return (NULL);
5679 	}
5680 
5681 	eprop->sadb_prop_exttype = SADB_X_EXT_EPROP;
5682 	eprop->sadb_x_prop_ereserved = 0;
5683 	eprop->sadb_x_prop_numecombs = 0;
5684 	eprop->sadb_prop_replay = 32;	/* default */
5685 
5686 	kmc = kmp = 0;
5687 
5688 	for (; ap != NULL; ap = an) {
5689 		an = (pol != NULL) ? ap->ipa_next : NULL;
5690 
5691 		/*
5692 		 * Skip non-IPsec policies
5693 		 */
5694 		if (ap->ipa_act.ipa_type != IPSEC_ACT_APPLY)
5695 			continue;
5696 
5697 		if (ap->ipa_act.ipa_apply.ipp_km_proto)
5698 			kmp = ap->ipa_act.ipa_apply.ipp_km_proto;
5699 		if (ap->ipa_act.ipa_apply.ipp_km_cookie)
5700 			kmc = ap->ipa_act.ipa_apply.ipp_km_cookie;
5701 		if (ap->ipa_act.ipa_apply.ipp_replay_depth) {
5702 			eprop->sadb_prop_replay =
5703 			    ap->ipa_act.ipa_apply.ipp_replay_depth;
5704 		}
5705 
5706 		cur = sadb_action_to_ecomb(cur, end, ap, ns);
5707 		if (cur == NULL) { /* no space */
5708 			freeb(mp);
5709 			return (NULL);
5710 		}
5711 		eprop->sadb_x_prop_numecombs++;
5712 	}
5713 
5714 	if (eprop->sadb_x_prop_numecombs == 0) {
5715 		/*
5716 		 * This will happen if we fail to find a policy
5717 		 * allowing for IPsec processing.
5718 		 * Construct an error message.
5719 		 */
5720 		samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg));
5721 		samsg->sadb_msg_errno = ENOENT;
5722 		samsg->sadb_x_msg_diagnostic = 0;
5723 		return (mp);
5724 	}
5725 
5726 	if ((kmp != 0) || (kmc != 0)) {
5727 		cur = sadb_make_kmc_ext(cur, end, kmp, kmc);
5728 		if (cur == NULL) {
5729 			freeb(mp);
5730 			return (NULL);
5731 		}
5732 	}
5733 
5734 	eprop->sadb_prop_len = SADB_8TO64(cur - (uint8_t *)eprop);
5735 	samsg->sadb_msg_len = SADB_8TO64(cur - start);
5736 	mp->b_wptr = cur;
5737 
5738 	return (mp);
5739 }
5740 
5741 /*
5742  * Generic setup of an RFC 2367 ACQUIRE message.  Caller sets satype.
5743  *
5744  * NOTE: This function acquires alg_lock as a side-effect if-and-only-if we
5745  * succeed (i.e. return non-NULL).  Caller MUST release it.  This is to
5746  * maximize code consolidation while preventing algorithm changes from messing
5747  * with the callers finishing touches on the ACQUIRE itself.
5748  */
5749 mblk_t *
5750 sadb_setup_acquire(ipsacq_t *acqrec, uint8_t satype, ipsec_stack_t *ipss)
5751 {
5752 	uint_t allocsize;
5753 	mblk_t *pfkeymp, *msgmp;
5754 	sa_family_t af;
5755 	uint8_t *cur, *end;
5756 	sadb_msg_t *samsg;
5757 	uint16_t sport_typecode;
5758 	uint16_t dport_typecode;
5759 	uint8_t check_proto;
5760 	boolean_t tunnel_mode = (acqrec->ipsacq_inneraddrfam != 0);
5761 
5762 	ASSERT(MUTEX_HELD(&acqrec->ipsacq_lock));
5763 
5764 	pfkeymp = sadb_keysock_out(0);
5765 	if (pfkeymp == NULL)
5766 		return (NULL);
5767 
5768 	/*
5769 	 * First, allocate a basic ACQUIRE message
5770 	 */
5771 	allocsize = sizeof (sadb_msg_t) + sizeof (sadb_address_t) +
5772 	    sizeof (sadb_address_t) + sizeof (sadb_prop_t);
5773 
5774 	/* Make sure there's enough to cover both AF_INET and AF_INET6. */
5775 	allocsize += 2 * sizeof (struct sockaddr_in6);
5776 
5777 	mutex_enter(&ipss->ipsec_alg_lock);
5778 	/* NOTE:  The lock is now held through to this function's return. */
5779 	allocsize += ipss->ipsec_nalgs[IPSEC_ALG_AUTH] *
5780 	    ipss->ipsec_nalgs[IPSEC_ALG_ENCR] * sizeof (sadb_comb_t);
5781 
5782 	if (tunnel_mode) {
5783 		/* Tunnel mode! */
5784 		allocsize += 2 * sizeof (sadb_address_t);
5785 		/* Enough to cover both AF_INET and AF_INET6. */
5786 		allocsize += 2 * sizeof (struct sockaddr_in6);
5787 	}
5788 
5789 	msgmp = allocb(allocsize, BPRI_HI);
5790 	if (msgmp == NULL) {
5791 		freeb(pfkeymp);
5792 		mutex_exit(&ipss->ipsec_alg_lock);
5793 		return (NULL);
5794 	}
5795 
5796 	pfkeymp->b_cont = msgmp;
5797 	cur = msgmp->b_rptr;
5798 	end = cur + allocsize;
5799 	samsg = (sadb_msg_t *)cur;
5800 	cur += sizeof (sadb_msg_t);
5801 
5802 	af = acqrec->ipsacq_addrfam;
5803 	switch (af) {
5804 	case AF_INET:
5805 		check_proto = IPPROTO_ICMP;
5806 		break;
5807 	case AF_INET6:
5808 		check_proto = IPPROTO_ICMPV6;
5809 		break;
5810 	default:
5811 		/* This should never happen unless we have kernel bugs. */
5812 		cmn_err(CE_WARN,
5813 		    "sadb_setup_acquire:  corrupt ACQUIRE record.\n");
5814 		ASSERT(0);
5815 		mutex_exit(&ipss->ipsec_alg_lock);
5816 		return (NULL);
5817 	}
5818 
5819 	samsg->sadb_msg_version = PF_KEY_V2;
5820 	samsg->sadb_msg_type = SADB_ACQUIRE;
5821 	samsg->sadb_msg_satype = satype;
5822 	samsg->sadb_msg_errno = 0;
5823 	samsg->sadb_msg_pid = 0;
5824 	samsg->sadb_msg_reserved = 0;
5825 	samsg->sadb_msg_seq = acqrec->ipsacq_seq;
5826 
5827 	ASSERT(MUTEX_HELD(&acqrec->ipsacq_lock));
5828 
5829 	if ((acqrec->ipsacq_proto == check_proto) || tunnel_mode) {
5830 		sport_typecode = dport_typecode = 0;
5831 	} else {
5832 		sport_typecode = acqrec->ipsacq_srcport;
5833 		dport_typecode = acqrec->ipsacq_dstport;
5834 	}
5835 
5836 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, af,
5837 	    acqrec->ipsacq_srcaddr, sport_typecode, acqrec->ipsacq_proto, 0);
5838 
5839 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, af,
5840 	    acqrec->ipsacq_dstaddr, dport_typecode, acqrec->ipsacq_proto, 0);
5841 
5842 	if (tunnel_mode) {
5843 		sport_typecode = acqrec->ipsacq_srcport;
5844 		dport_typecode = acqrec->ipsacq_dstport;
5845 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
5846 		    acqrec->ipsacq_inneraddrfam, acqrec->ipsacq_innersrc,
5847 		    sport_typecode, acqrec->ipsacq_inner_proto,
5848 		    acqrec->ipsacq_innersrcpfx);
5849 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
5850 		    acqrec->ipsacq_inneraddrfam, acqrec->ipsacq_innerdst,
5851 		    dport_typecode, acqrec->ipsacq_inner_proto,
5852 		    acqrec->ipsacq_innerdstpfx);
5853 	}
5854 
5855 	/* XXX Insert identity information here. */
5856 
5857 	/* XXXMLS Insert sensitivity information here. */
5858 
5859 	if (cur != NULL)
5860 		samsg->sadb_msg_len = SADB_8TO64(cur - msgmp->b_rptr);
5861 	else
5862 		mutex_exit(&ipss->ipsec_alg_lock);
5863 
5864 	return (pfkeymp);
5865 }
5866 
5867 /*
5868  * Given an SADB_GETSPI message, find an appropriately ranged SA and
5869  * allocate an SA.  If there are message improprieties, return (ipsa_t *)-1.
5870  * If there was a memory allocation error, return NULL.	 (Assume NULL !=
5871  * (ipsa_t *)-1).
5872  *
5873  * master_spi is passed in host order.
5874  */
5875 ipsa_t *
5876 sadb_getspi(keysock_in_t *ksi, uint32_t master_spi, int *diagnostic,
5877     netstack_t *ns, uint_t sa_type)
5878 {
5879 	sadb_address_t *src =
5880 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC],
5881 	    *dst = (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
5882 	sadb_spirange_t *range =
5883 	    (sadb_spirange_t *)ksi->ks_in_extv[SADB_EXT_SPIRANGE];
5884 	struct sockaddr_in *ssa, *dsa;
5885 	struct sockaddr_in6 *ssa6, *dsa6;
5886 	uint32_t *srcaddr, *dstaddr;
5887 	sa_family_t af;
5888 	uint32_t add, min, max;
5889 	uint8_t protocol =
5890 	    (sa_type == SADB_SATYPE_AH) ? IPPROTO_AH : IPPROTO_ESP;
5891 
5892 	if (src == NULL) {
5893 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
5894 		return ((ipsa_t *)-1);
5895 	}
5896 	if (dst == NULL) {
5897 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
5898 		return ((ipsa_t *)-1);
5899 	}
5900 	if (range == NULL) {
5901 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_RANGE;
5902 		return ((ipsa_t *)-1);
5903 	}
5904 
5905 	min = ntohl(range->sadb_spirange_min);
5906 	max = ntohl(range->sadb_spirange_max);
5907 	dsa = (struct sockaddr_in *)(dst + 1);
5908 	dsa6 = (struct sockaddr_in6 *)dsa;
5909 
5910 	ssa = (struct sockaddr_in *)(src + 1);
5911 	ssa6 = (struct sockaddr_in6 *)ssa;
5912 	ASSERT(dsa->sin_family == ssa->sin_family);
5913 
5914 	srcaddr = ALL_ZEROES_PTR;
5915 	af = dsa->sin_family;
5916 	switch (af) {
5917 	case AF_INET:
5918 		if (src != NULL)
5919 			srcaddr = (uint32_t *)(&ssa->sin_addr);
5920 		dstaddr = (uint32_t *)(&dsa->sin_addr);
5921 		break;
5922 	case AF_INET6:
5923 		if (src != NULL)
5924 			srcaddr = (uint32_t *)(&ssa6->sin6_addr);
5925 		dstaddr = (uint32_t *)(&dsa6->sin6_addr);
5926 		break;
5927 	default:
5928 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_DST_AF;
5929 		return ((ipsa_t *)-1);
5930 	}
5931 
5932 	if (master_spi < min || master_spi > max) {
5933 		/* Return a random value in the range. */
5934 		if (cl_inet_getspi) {
5935 			cl_inet_getspi(ns->netstack_stackid, protocol,
5936 			    (uint8_t *)&add, sizeof (add), NULL);
5937 		} else {
5938 			(void) random_get_pseudo_bytes((uint8_t *)&add,
5939 			    sizeof (add));
5940 		}
5941 		master_spi = min + (add % (max - min + 1));
5942 	}
5943 
5944 	/*
5945 	 * Since master_spi is passed in host order, we need to htonl() it
5946 	 * for the purposes of creating a new SA.
5947 	 */
5948 	return (sadb_makelarvalassoc(htonl(master_spi), srcaddr, dstaddr, af,
5949 	    ns));
5950 }
5951 
5952 /*
5953  *
5954  * Locate an ACQUIRE and nuke it.  If I have an samsg that's larger than the
5955  * base header, just ignore it.	 Otherwise, lock down the whole ACQUIRE list
5956  * and scan for the sequence number in question.  I may wish to accept an
5957  * address pair with it, for easier searching.
5958  *
5959  * Caller frees the message, so we don't have to here.
5960  *
5961  * NOTE:	The pfkey_q parameter may be used in the future for ACQUIRE
5962  *		failures.
5963  */
5964 /* ARGSUSED */
5965 void
5966 sadb_in_acquire(sadb_msg_t *samsg, sadbp_t *sp, queue_t *pfkey_q,
5967     netstack_t *ns)
5968 {
5969 	int i;
5970 	ipsacq_t *acqrec;
5971 	iacqf_t *bucket;
5972 
5973 	/*
5974 	 * I only accept the base header for this!
5975 	 * Though to be honest, requiring the dst address would help
5976 	 * immensely.
5977 	 *
5978 	 * XXX	There are already cases where I can get the dst address.
5979 	 */
5980 	if (samsg->sadb_msg_len > SADB_8TO64(sizeof (*samsg)))
5981 		return;
5982 
5983 	/*
5984 	 * Using the samsg->sadb_msg_seq, find the ACQUIRE record, delete it,
5985 	 * (and in the future send a message to IP with the appropriate error
5986 	 * number).
5987 	 *
5988 	 * Q: Do I want to reject if pid != 0?
5989 	 */
5990 
5991 	for (i = 0; i < sp->s_v4.sdb_hashsize; i++) {
5992 		bucket = &sp->s_v4.sdb_acq[i];
5993 		mutex_enter(&bucket->iacqf_lock);
5994 		for (acqrec = bucket->iacqf_ipsacq; acqrec != NULL;
5995 		    acqrec = acqrec->ipsacq_next) {
5996 			if (samsg->sadb_msg_seq == acqrec->ipsacq_seq)
5997 				break;	/* for acqrec... loop. */
5998 		}
5999 		if (acqrec != NULL)
6000 			break;	/* for i = 0... loop. */
6001 
6002 		mutex_exit(&bucket->iacqf_lock);
6003 	}
6004 
6005 	if (acqrec == NULL) {
6006 		for (i = 0; i < sp->s_v6.sdb_hashsize; i++) {
6007 			bucket = &sp->s_v6.sdb_acq[i];
6008 			mutex_enter(&bucket->iacqf_lock);
6009 			for (acqrec = bucket->iacqf_ipsacq; acqrec != NULL;
6010 			    acqrec = acqrec->ipsacq_next) {
6011 				if (samsg->sadb_msg_seq == acqrec->ipsacq_seq)
6012 					break;	/* for acqrec... loop. */
6013 			}
6014 			if (acqrec != NULL)
6015 				break;	/* for i = 0... loop. */
6016 
6017 			mutex_exit(&bucket->iacqf_lock);
6018 		}
6019 	}
6020 
6021 
6022 	if (acqrec == NULL)
6023 		return;
6024 
6025 	/*
6026 	 * What do I do with the errno and IP?	I may need mp's services a
6027 	 * little more.	 See sadb_destroy_acquire() for future directions
6028 	 * beyond free the mblk chain on the acquire record.
6029 	 */
6030 
6031 	ASSERT(&bucket->iacqf_lock == acqrec->ipsacq_linklock);
6032 	sadb_destroy_acquire(acqrec, ns);
6033 	/* Have to exit mutex here, because of breaking out of for loop. */
6034 	mutex_exit(&bucket->iacqf_lock);
6035 }
6036 
6037 /*
6038  * The following functions work with the replay windows of an SA.  They assume
6039  * the ipsa->ipsa_replay_arr is an array of uint64_t, and that the bit vector
6040  * represents the highest sequence number packet received, and back
6041  * (ipsa->ipsa_replay_wsize) packets.
6042  */
6043 
6044 /*
6045  * Is the replay bit set?
6046  */
6047 static boolean_t
6048 ipsa_is_replay_set(ipsa_t *ipsa, uint32_t offset)
6049 {
6050 	uint64_t bit = (uint64_t)1 << (uint64_t)(offset & 63);
6051 
6052 	return ((bit & ipsa->ipsa_replay_arr[offset >> 6]) ? B_TRUE : B_FALSE);
6053 }
6054 
6055 /*
6056  * Shift the bits of the replay window over.
6057  */
6058 static void
6059 ipsa_shift_replay(ipsa_t *ipsa, uint32_t shift)
6060 {
6061 	int i;
6062 	int jump = ((shift - 1) >> 6) + 1;
6063 
6064 	if (shift == 0)
6065 		return;
6066 
6067 	for (i = (ipsa->ipsa_replay_wsize - 1) >> 6; i >= 0; i--) {
6068 		if (i + jump <= (ipsa->ipsa_replay_wsize - 1) >> 6) {
6069 			ipsa->ipsa_replay_arr[i + jump] |=
6070 			    ipsa->ipsa_replay_arr[i] >> (64 - (shift & 63));
6071 		}
6072 		ipsa->ipsa_replay_arr[i] <<= shift;
6073 	}
6074 }
6075 
6076 /*
6077  * Set a bit in the bit vector.
6078  */
6079 static void
6080 ipsa_set_replay(ipsa_t *ipsa, uint32_t offset)
6081 {
6082 	uint64_t bit = (uint64_t)1 << (uint64_t)(offset & 63);
6083 
6084 	ipsa->ipsa_replay_arr[offset >> 6] |= bit;
6085 }
6086 
6087 #define	SADB_MAX_REPLAY_VALUE 0xffffffff
6088 
6089 /*
6090  * Assume caller has NOT done ntohl() already on seq.  Check to see
6091  * if replay sequence number "seq" has been seen already.
6092  */
6093 boolean_t
6094 sadb_replay_check(ipsa_t *ipsa, uint32_t seq)
6095 {
6096 	boolean_t rc;
6097 	uint32_t diff;
6098 
6099 	if (ipsa->ipsa_replay_wsize == 0)
6100 		return (B_TRUE);
6101 
6102 	/*
6103 	 * NOTE:  I've already checked for 0 on the wire in sadb_replay_peek().
6104 	 */
6105 
6106 	/* Convert sequence number into host order before holding the mutex. */
6107 	seq = ntohl(seq);
6108 
6109 	mutex_enter(&ipsa->ipsa_lock);
6110 
6111 	/* Initialize inbound SA's ipsa_replay field to last one received. */
6112 	if (ipsa->ipsa_replay == 0)
6113 		ipsa->ipsa_replay = 1;
6114 
6115 	if (seq > ipsa->ipsa_replay) {
6116 		/*
6117 		 * I have received a new "highest value received".  Shift
6118 		 * the replay window over.
6119 		 */
6120 		diff = seq - ipsa->ipsa_replay;
6121 		if (diff < ipsa->ipsa_replay_wsize) {
6122 			/* In replay window, shift bits over. */
6123 			ipsa_shift_replay(ipsa, diff);
6124 		} else {
6125 			/* WAY FAR AHEAD, clear bits and start again. */
6126 			bzero(ipsa->ipsa_replay_arr,
6127 			    sizeof (ipsa->ipsa_replay_arr));
6128 		}
6129 		ipsa_set_replay(ipsa, 0);
6130 		ipsa->ipsa_replay = seq;
6131 		rc = B_TRUE;
6132 		goto done;
6133 	}
6134 	diff = ipsa->ipsa_replay - seq;
6135 	if (diff >= ipsa->ipsa_replay_wsize || ipsa_is_replay_set(ipsa, diff)) {
6136 		rc = B_FALSE;
6137 		goto done;
6138 	}
6139 	/* Set this packet as seen. */
6140 	ipsa_set_replay(ipsa, diff);
6141 
6142 	rc = B_TRUE;
6143 done:
6144 	mutex_exit(&ipsa->ipsa_lock);
6145 	return (rc);
6146 }
6147 
6148 /*
6149  * "Peek" and see if we should even bother going through the effort of
6150  * running an authentication check on the sequence number passed in.
6151  * this takes into account packets that are below the replay window,
6152  * and collisions with already replayed packets.  Return B_TRUE if it
6153  * is okay to proceed, B_FALSE if this packet should be dropped immediately.
6154  * Assume same byte-ordering as sadb_replay_check.
6155  */
6156 boolean_t
6157 sadb_replay_peek(ipsa_t *ipsa, uint32_t seq)
6158 {
6159 	boolean_t rc = B_FALSE;
6160 	uint32_t diff;
6161 
6162 	if (ipsa->ipsa_replay_wsize == 0)
6163 		return (B_TRUE);
6164 
6165 	/*
6166 	 * 0 is 0, regardless of byte order... :)
6167 	 *
6168 	 * If I get 0 on the wire (and there is a replay window) then the
6169 	 * sender most likely wrapped.	This ipsa may need to be marked or
6170 	 * something.
6171 	 */
6172 	if (seq == 0)
6173 		return (B_FALSE);
6174 
6175 	seq = ntohl(seq);
6176 	mutex_enter(&ipsa->ipsa_lock);
6177 	if (seq < ipsa->ipsa_replay - ipsa->ipsa_replay_wsize &&
6178 	    ipsa->ipsa_replay >= ipsa->ipsa_replay_wsize)
6179 		goto done;
6180 
6181 	/*
6182 	 * If I've hit 0xffffffff, then quite honestly, I don't need to
6183 	 * bother with formalities.  I'm not accepting any more packets
6184 	 * on this SA.
6185 	 */
6186 	if (ipsa->ipsa_replay == SADB_MAX_REPLAY_VALUE) {
6187 		/*
6188 		 * Since we're already holding the lock, update the
6189 		 * expire time ala. sadb_replay_delete() and return.
6190 		 */
6191 		ipsa->ipsa_hardexpiretime = (time_t)1;
6192 		goto done;
6193 	}
6194 
6195 	if (seq <= ipsa->ipsa_replay) {
6196 		/*
6197 		 * This seq is in the replay window.  I'm not below it,
6198 		 * because I already checked for that above!
6199 		 */
6200 		diff = ipsa->ipsa_replay - seq;
6201 		if (ipsa_is_replay_set(ipsa, diff))
6202 			goto done;
6203 	}
6204 	/* Else return B_TRUE, I'm going to advance the window. */
6205 
6206 	rc = B_TRUE;
6207 done:
6208 	mutex_exit(&ipsa->ipsa_lock);
6209 	return (rc);
6210 }
6211 
6212 /*
6213  * Delete a single SA.
6214  *
6215  * For now, use the quick-and-dirty trick of making the association's
6216  * hard-expire lifetime (time_t)1, ensuring deletion by the *_ager().
6217  */
6218 void
6219 sadb_replay_delete(ipsa_t *assoc)
6220 {
6221 	mutex_enter(&assoc->ipsa_lock);
6222 	assoc->ipsa_hardexpiretime = (time_t)1;
6223 	mutex_exit(&assoc->ipsa_lock);
6224 }
6225 
6226 /*
6227  * Special front-end to ipsec_rl_strlog() dealing with SA failure.
6228  * this is designed to take only a format string with "* %x * %s *", so
6229  * that "spi" is printed first, then "addr" is converted using inet_pton().
6230  *
6231  * This is abstracted out to save the stack space for only when inet_pton()
6232  * is called.  Make sure "spi" is in network order; it usually is when this
6233  * would get called.
6234  */
6235 void
6236 ipsec_assocfailure(short mid, short sid, char level, ushort_t sl, char *fmt,
6237     uint32_t spi, void *addr, int af, netstack_t *ns)
6238 {
6239 	char buf[INET6_ADDRSTRLEN];
6240 
6241 	ASSERT(af == AF_INET6 || af == AF_INET);
6242 
6243 	ipsec_rl_strlog(ns, mid, sid, level, sl, fmt, ntohl(spi),
6244 	    inet_ntop(af, addr, buf, sizeof (buf)));
6245 }
6246 
6247 /*
6248  * Fills in a reference to the policy, if any, from the conn, in *ppp
6249  */
6250 static void
6251 ipsec_conn_pol(ipsec_selector_t *sel, conn_t *connp, ipsec_policy_t **ppp)
6252 {
6253 	ipsec_policy_t	*pp;
6254 	ipsec_latch_t	*ipl = connp->conn_latch;
6255 
6256 	if ((ipl != NULL) && (connp->conn_ixa->ixa_ipsec_policy != NULL)) {
6257 		pp = connp->conn_ixa->ixa_ipsec_policy;
6258 		IPPOL_REFHOLD(pp);
6259 	} else {
6260 		pp = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, sel,
6261 		    connp->conn_netstack);
6262 	}
6263 	*ppp = pp;
6264 }
6265 
6266 /*
6267  * The following functions scan through active conn_t structures
6268  * and return a reference to the best-matching policy it can find.
6269  * Caller must release the reference.
6270  */
6271 static void
6272 ipsec_udp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, ip_stack_t *ipst)
6273 {
6274 	connf_t *connfp;
6275 	conn_t *connp = NULL;
6276 	ipsec_selector_t portonly;
6277 
6278 	bzero((void *)&portonly, sizeof (portonly));
6279 
6280 	if (sel->ips_local_port == 0)
6281 		return;
6282 
6283 	connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(sel->ips_local_port,
6284 	    ipst)];
6285 	mutex_enter(&connfp->connf_lock);
6286 
6287 	if (sel->ips_isv4) {
6288 		connp = connfp->connf_head;
6289 		while (connp != NULL) {
6290 			if (IPCL_UDP_MATCH(connp, sel->ips_local_port,
6291 			    sel->ips_local_addr_v4, sel->ips_remote_port,
6292 			    sel->ips_remote_addr_v4))
6293 				break;
6294 			connp = connp->conn_next;
6295 		}
6296 
6297 		if (connp == NULL) {
6298 			/* Try port-only match in IPv6. */
6299 			portonly.ips_local_port = sel->ips_local_port;
6300 			sel = &portonly;
6301 		}
6302 	}
6303 
6304 	if (connp == NULL) {
6305 		connp = connfp->connf_head;
6306 		while (connp != NULL) {
6307 			if (IPCL_UDP_MATCH_V6(connp, sel->ips_local_port,
6308 			    sel->ips_local_addr_v6, sel->ips_remote_port,
6309 			    sel->ips_remote_addr_v6))
6310 				break;
6311 			connp = connp->conn_next;
6312 		}
6313 
6314 		if (connp == NULL) {
6315 			mutex_exit(&connfp->connf_lock);
6316 			return;
6317 		}
6318 	}
6319 
6320 	CONN_INC_REF(connp);
6321 	mutex_exit(&connfp->connf_lock);
6322 
6323 	ipsec_conn_pol(sel, connp, ppp);
6324 	CONN_DEC_REF(connp);
6325 }
6326 
6327 static conn_t *
6328 ipsec_find_listen_conn(uint16_t *pptr, ipsec_selector_t *sel, ip_stack_t *ipst)
6329 {
6330 	connf_t *connfp;
6331 	conn_t *connp = NULL;
6332 	const in6_addr_t *v6addrmatch = &sel->ips_local_addr_v6;
6333 
6334 	if (sel->ips_local_port == 0)
6335 		return (NULL);
6336 
6337 	connfp = &ipst->ips_ipcl_bind_fanout[
6338 	    IPCL_BIND_HASH(sel->ips_local_port, ipst)];
6339 	mutex_enter(&connfp->connf_lock);
6340 
6341 	if (sel->ips_isv4) {
6342 		connp = connfp->connf_head;
6343 		while (connp != NULL) {
6344 			if (IPCL_BIND_MATCH(connp, IPPROTO_TCP,
6345 			    sel->ips_local_addr_v4, pptr[1]))
6346 				break;
6347 			connp = connp->conn_next;
6348 		}
6349 
6350 		if (connp == NULL) {
6351 			/* Match to all-zeroes. */
6352 			v6addrmatch = &ipv6_all_zeros;
6353 		}
6354 	}
6355 
6356 	if (connp == NULL) {
6357 		connp = connfp->connf_head;
6358 		while (connp != NULL) {
6359 			if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP,
6360 			    *v6addrmatch, pptr[1]))
6361 				break;
6362 			connp = connp->conn_next;
6363 		}
6364 
6365 		if (connp == NULL) {
6366 			mutex_exit(&connfp->connf_lock);
6367 			return (NULL);
6368 		}
6369 	}
6370 
6371 	CONN_INC_REF(connp);
6372 	mutex_exit(&connfp->connf_lock);
6373 	return (connp);
6374 }
6375 
6376 static void
6377 ipsec_tcp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, ip_stack_t *ipst)
6378 {
6379 	connf_t 	*connfp;
6380 	conn_t		*connp;
6381 	uint32_t	ports;
6382 	uint16_t	*pptr = (uint16_t *)&ports;
6383 
6384 	/*
6385 	 * Find TCP state in the following order:
6386 	 * 1.) Connected conns.
6387 	 * 2.) Listeners.
6388 	 *
6389 	 * Even though #2 will be the common case for inbound traffic, only
6390 	 * following this order insures correctness.
6391 	 */
6392 
6393 	if (sel->ips_local_port == 0)
6394 		return;
6395 
6396 	/*
6397 	 * 0 should be fport, 1 should be lport.  SRC is the local one here.
6398 	 * See ipsec_construct_inverse_acquire() for details.
6399 	 */
6400 	pptr[0] = sel->ips_remote_port;
6401 	pptr[1] = sel->ips_local_port;
6402 
6403 	connfp = &ipst->ips_ipcl_conn_fanout[
6404 	    IPCL_CONN_HASH(sel->ips_remote_addr_v4, ports, ipst)];
6405 	mutex_enter(&connfp->connf_lock);
6406 	connp = connfp->connf_head;
6407 
6408 	if (sel->ips_isv4) {
6409 		while (connp != NULL) {
6410 			if (IPCL_CONN_MATCH(connp, IPPROTO_TCP,
6411 			    sel->ips_remote_addr_v4, sel->ips_local_addr_v4,
6412 			    ports))
6413 				break;
6414 			connp = connp->conn_next;
6415 		}
6416 	} else {
6417 		while (connp != NULL) {
6418 			if (IPCL_CONN_MATCH_V6(connp, IPPROTO_TCP,
6419 			    sel->ips_remote_addr_v6, sel->ips_local_addr_v6,
6420 			    ports))
6421 				break;
6422 			connp = connp->conn_next;
6423 		}
6424 	}
6425 
6426 	if (connp != NULL) {
6427 		CONN_INC_REF(connp);
6428 		mutex_exit(&connfp->connf_lock);
6429 	} else {
6430 		mutex_exit(&connfp->connf_lock);
6431 
6432 		/* Try the listen hash. */
6433 		if ((connp = ipsec_find_listen_conn(pptr, sel, ipst)) == NULL)
6434 			return;
6435 	}
6436 
6437 	ipsec_conn_pol(sel, connp, ppp);
6438 	CONN_DEC_REF(connp);
6439 }
6440 
6441 static void
6442 ipsec_sctp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
6443     ip_stack_t *ipst)
6444 {
6445 	conn_t		*connp;
6446 	uint32_t	ports;
6447 	uint16_t	*pptr = (uint16_t *)&ports;
6448 
6449 	/*
6450 	 * Find SCP state in the following order:
6451 	 * 1.) Connected conns.
6452 	 * 2.) Listeners.
6453 	 *
6454 	 * Even though #2 will be the common case for inbound traffic, only
6455 	 * following this order insures correctness.
6456 	 */
6457 
6458 	if (sel->ips_local_port == 0)
6459 		return;
6460 
6461 	/*
6462 	 * 0 should be fport, 1 should be lport.  SRC is the local one here.
6463 	 * See ipsec_construct_inverse_acquire() for details.
6464 	 */
6465 	pptr[0] = sel->ips_remote_port;
6466 	pptr[1] = sel->ips_local_port;
6467 
6468 	/*
6469 	 * For labeled systems, there's no need to check the
6470 	 * label here.  It's known to be good as we checked
6471 	 * before allowing the connection to become bound.
6472 	 */
6473 	if (sel->ips_isv4) {
6474 		in6_addr_t	src, dst;
6475 
6476 		IN6_IPADDR_TO_V4MAPPED(sel->ips_remote_addr_v4, &dst);
6477 		IN6_IPADDR_TO_V4MAPPED(sel->ips_local_addr_v4, &src);
6478 		connp = sctp_find_conn(&dst, &src, ports, ALL_ZONES,
6479 		    0, ipst->ips_netstack->netstack_sctp);
6480 	} else {
6481 		connp = sctp_find_conn(&sel->ips_remote_addr_v6,
6482 		    &sel->ips_local_addr_v6, ports, ALL_ZONES,
6483 		    0, ipst->ips_netstack->netstack_sctp);
6484 	}
6485 	if (connp == NULL)
6486 		return;
6487 	ipsec_conn_pol(sel, connp, ppp);
6488 	CONN_DEC_REF(connp);
6489 }
6490 
6491 /*
6492  * Fill in a query for the SPD (in "sel") using two PF_KEY address extensions.
6493  * Returns 0 or errno, and always sets *diagnostic to something appropriate
6494  * to PF_KEY.
6495  *
6496  * NOTE:  For right now, this function (and ipsec_selector_t for that matter),
6497  * ignore prefix lengths in the address extension.  Since we match on first-
6498  * entered policies, this shouldn't matter.  Also, since we normalize prefix-
6499  * set addresses to mask out the lower bits, we should get a suitable search
6500  * key for the SPD anyway.  This is the function to change if the assumption
6501  * about suitable search keys is wrong.
6502  */
6503 static int
6504 ipsec_get_inverse_acquire_sel(ipsec_selector_t *sel, sadb_address_t *srcext,
6505     sadb_address_t *dstext, int *diagnostic)
6506 {
6507 	struct sockaddr_in *src, *dst;
6508 	struct sockaddr_in6 *src6, *dst6;
6509 
6510 	*diagnostic = 0;
6511 
6512 	bzero(sel, sizeof (*sel));
6513 	sel->ips_protocol = srcext->sadb_address_proto;
6514 	dst = (struct sockaddr_in *)(dstext + 1);
6515 	if (dst->sin_family == AF_INET6) {
6516 		dst6 = (struct sockaddr_in6 *)dst;
6517 		src6 = (struct sockaddr_in6 *)(srcext + 1);
6518 		if (src6->sin6_family != AF_INET6) {
6519 			*diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
6520 			return (EINVAL);
6521 		}
6522 		sel->ips_remote_addr_v6 = dst6->sin6_addr;
6523 		sel->ips_local_addr_v6 = src6->sin6_addr;
6524 		if (sel->ips_protocol == IPPROTO_ICMPV6) {
6525 			sel->ips_is_icmp_inv_acq = 1;
6526 		} else {
6527 			sel->ips_remote_port = dst6->sin6_port;
6528 			sel->ips_local_port = src6->sin6_port;
6529 		}
6530 		sel->ips_isv4 = B_FALSE;
6531 	} else {
6532 		src = (struct sockaddr_in *)(srcext + 1);
6533 		if (src->sin_family != AF_INET) {
6534 			*diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
6535 			return (EINVAL);
6536 		}
6537 		sel->ips_remote_addr_v4 = dst->sin_addr.s_addr;
6538 		sel->ips_local_addr_v4 = src->sin_addr.s_addr;
6539 		if (sel->ips_protocol == IPPROTO_ICMP) {
6540 			sel->ips_is_icmp_inv_acq = 1;
6541 		} else {
6542 			sel->ips_remote_port = dst->sin_port;
6543 			sel->ips_local_port = src->sin_port;
6544 		}
6545 		sel->ips_isv4 = B_TRUE;
6546 	}
6547 	return (0);
6548 }
6549 
6550 /*
6551  * We have encapsulation.
6552  * - Lookup tun_t by address and look for an associated
6553  *   tunnel policy
6554  * - If there are inner selectors
6555  *   - check ITPF_P_TUNNEL and ITPF_P_ACTIVE
6556  *   - Look up tunnel policy based on selectors
6557  * - Else
6558  *   - Sanity check the negotation
6559  *   - If appropriate, fall through to global policy
6560  */
6561 static int
6562 ipsec_tun_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
6563     sadb_address_t *innsrcext, sadb_address_t *inndstext, ipsec_tun_pol_t *itp,
6564     int *diagnostic)
6565 {
6566 	int err;
6567 	ipsec_policy_head_t *polhead;
6568 
6569 	*diagnostic = 0;
6570 
6571 	/* Check for inner selectors and act appropriately */
6572 
6573 	if (innsrcext != NULL) {
6574 		/* Inner selectors present */
6575 		ASSERT(inndstext != NULL);
6576 		if ((itp == NULL) ||
6577 		    (itp->itp_flags & (ITPF_P_ACTIVE | ITPF_P_TUNNEL)) !=
6578 		    (ITPF_P_ACTIVE | ITPF_P_TUNNEL)) {
6579 			/*
6580 			 * If inner packet selectors, we must have negotiate
6581 			 * tunnel and active policy.  If the tunnel has
6582 			 * transport-mode policy set on it, or has no policy,
6583 			 * fail.
6584 			 */
6585 			return (ENOENT);
6586 		} else {
6587 			/*
6588 			 * Reset "sel" to indicate inner selectors.  Pass
6589 			 * inner PF_KEY address extensions for this to happen.
6590 			 */
6591 			if ((err = ipsec_get_inverse_acquire_sel(sel,
6592 			    innsrcext, inndstext, diagnostic)) != 0)
6593 				return (err);
6594 			/*
6595 			 * Now look for a tunnel policy based on those inner
6596 			 * selectors.  (Common code is below.)
6597 			 */
6598 		}
6599 	} else {
6600 		/* No inner selectors present */
6601 		if ((itp == NULL) || !(itp->itp_flags & ITPF_P_ACTIVE)) {
6602 			/*
6603 			 * Transport mode negotiation with no tunnel policy
6604 			 * configured - return to indicate a global policy
6605 			 * check is needed.
6606 			 */
6607 			return (0);
6608 		} else if (itp->itp_flags & ITPF_P_TUNNEL) {
6609 			/* Tunnel mode set with no inner selectors. */
6610 			return (ENOENT);
6611 		}
6612 		/*
6613 		 * Else, this is a tunnel policy configured with ifconfig(1m)
6614 		 * or "negotiate transport" with ipsecconf(1m).  We have an
6615 		 * itp with policy set based on any match, so don't bother
6616 		 * changing fields in "sel".
6617 		 */
6618 	}
6619 
6620 	ASSERT(itp != NULL);
6621 	polhead = itp->itp_policy;
6622 	ASSERT(polhead != NULL);
6623 	rw_enter(&polhead->iph_lock, RW_READER);
6624 	*ppp = ipsec_find_policy_head(NULL, polhead, IPSEC_TYPE_INBOUND, sel);
6625 	rw_exit(&polhead->iph_lock);
6626 
6627 	/*
6628 	 * Don't default to global if we didn't find a matching policy entry.
6629 	 * Instead, send ENOENT, just like if we hit a transport-mode tunnel.
6630 	 */
6631 	if (*ppp == NULL)
6632 		return (ENOENT);
6633 
6634 	return (0);
6635 }
6636 
6637 /*
6638  * For sctp conn_faddr is the primary address, hence this is of limited
6639  * use for sctp.
6640  */
6641 static void
6642 ipsec_oth_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
6643     ip_stack_t *ipst)
6644 {
6645 	boolean_t	isv4 = sel->ips_isv4;
6646 	connf_t		*connfp;
6647 	conn_t		*connp;
6648 
6649 	if (isv4) {
6650 		connfp = &ipst->ips_ipcl_proto_fanout_v4[sel->ips_protocol];
6651 	} else {
6652 		connfp = &ipst->ips_ipcl_proto_fanout_v6[sel->ips_protocol];
6653 	}
6654 
6655 	mutex_enter(&connfp->connf_lock);
6656 	for (connp = connfp->connf_head; connp != NULL;
6657 	    connp = connp->conn_next) {
6658 		if (isv4) {
6659 			if ((connp->conn_laddr_v4 == INADDR_ANY ||
6660 			    connp->conn_laddr_v4 == sel->ips_local_addr_v4) &&
6661 			    (connp->conn_faddr_v4 == INADDR_ANY ||
6662 			    connp->conn_faddr_v4 == sel->ips_remote_addr_v4))
6663 				break;
6664 		} else {
6665 			if ((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) ||
6666 			    IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6,
6667 			    &sel->ips_local_addr_v6)) &&
6668 			    (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) ||
6669 			    IN6_ARE_ADDR_EQUAL(&connp->conn_faddr_v6,
6670 			    &sel->ips_remote_addr_v6)))
6671 				break;
6672 		}
6673 	}
6674 	if (connp == NULL) {
6675 		mutex_exit(&connfp->connf_lock);
6676 		return;
6677 	}
6678 
6679 	CONN_INC_REF(connp);
6680 	mutex_exit(&connfp->connf_lock);
6681 
6682 	ipsec_conn_pol(sel, connp, ppp);
6683 	CONN_DEC_REF(connp);
6684 }
6685 
6686 /*
6687  * Construct an inverse ACQUIRE reply based on:
6688  *
6689  * 1.) Current global policy.
6690  * 2.) An conn_t match depending on what all was passed in the extv[].
6691  * 3.) A tunnel's policy head.
6692  * ...
6693  * N.) Other stuff TBD (e.g. identities)
6694  *
6695  * If there is an error, set sadb_msg_errno and sadb_x_msg_diagnostic
6696  * in this function so the caller can extract them where appropriately.
6697  *
6698  * The SRC address is the local one - just like an outbound ACQUIRE message.
6699  *
6700  * XXX MLS: key management supplies a label which we just reflect back up
6701  * again.  clearly we need to involve the label in the rest of the checks.
6702  */
6703 mblk_t *
6704 ipsec_construct_inverse_acquire(sadb_msg_t *samsg, sadb_ext_t *extv[],
6705     netstack_t *ns)
6706 {
6707 	int err;
6708 	int diagnostic;
6709 	sadb_address_t *srcext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_SRC],
6710 	    *dstext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_DST],
6711 	    *innsrcext = (sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_SRC],
6712 	    *inndstext = (sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_DST];
6713 	sadb_sens_t *sens = (sadb_sens_t *)extv[SADB_EXT_SENSITIVITY];
6714 	struct sockaddr_in6 *src, *dst;
6715 	struct sockaddr_in6 *isrc, *idst;
6716 	ipsec_tun_pol_t *itp = NULL;
6717 	ipsec_policy_t *pp = NULL;
6718 	ipsec_selector_t sel, isel;
6719 	mblk_t *retmp = NULL;
6720 	ip_stack_t	*ipst = ns->netstack_ip;
6721 
6722 
6723 	/* Normalize addresses */
6724 	if (sadb_addrcheck(NULL, (mblk_t *)samsg, (sadb_ext_t *)srcext, 0, ns)
6725 	    == KS_IN_ADDR_UNKNOWN) {
6726 		err = EINVAL;
6727 		diagnostic = SADB_X_DIAGNOSTIC_BAD_SRC;
6728 		goto bail;
6729 	}
6730 	src = (struct sockaddr_in6 *)(srcext + 1);
6731 	if (sadb_addrcheck(NULL, (mblk_t *)samsg, (sadb_ext_t *)dstext, 0, ns)
6732 	    == KS_IN_ADDR_UNKNOWN) {
6733 		err = EINVAL;
6734 		diagnostic = SADB_X_DIAGNOSTIC_BAD_DST;
6735 		goto bail;
6736 	}
6737 	dst = (struct sockaddr_in6 *)(dstext + 1);
6738 	if (src->sin6_family != dst->sin6_family) {
6739 		err = EINVAL;
6740 		diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
6741 		goto bail;
6742 	}
6743 
6744 	/* Check for tunnel mode and act appropriately */
6745 	if (innsrcext != NULL) {
6746 		if (inndstext == NULL) {
6747 			err = EINVAL;
6748 			diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_DST;
6749 			goto bail;
6750 		}
6751 		if (sadb_addrcheck(NULL, (mblk_t *)samsg,
6752 		    (sadb_ext_t *)innsrcext, 0, ns) == KS_IN_ADDR_UNKNOWN) {
6753 			err = EINVAL;
6754 			diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_INNER_SRC;
6755 			goto bail;
6756 		}
6757 		isrc = (struct sockaddr_in6 *)(innsrcext + 1);
6758 		if (sadb_addrcheck(NULL, (mblk_t *)samsg,
6759 		    (sadb_ext_t *)inndstext, 0, ns) == KS_IN_ADDR_UNKNOWN) {
6760 			err = EINVAL;
6761 			diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_INNER_DST;
6762 			goto bail;
6763 		}
6764 		idst = (struct sockaddr_in6 *)(inndstext + 1);
6765 		if (isrc->sin6_family != idst->sin6_family) {
6766 			err = EINVAL;
6767 			diagnostic = SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH;
6768 			goto bail;
6769 		}
6770 		if (isrc->sin6_family != AF_INET &&
6771 		    isrc->sin6_family != AF_INET6) {
6772 			err = EINVAL;
6773 			diagnostic = SADB_X_DIAGNOSTIC_BAD_INNER_SRC_AF;
6774 			goto bail;
6775 		}
6776 	} else if (inndstext != NULL) {
6777 		err = EINVAL;
6778 		diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC;
6779 		goto bail;
6780 	}
6781 
6782 	/* Get selectors first, based on outer addresses */
6783 	err = ipsec_get_inverse_acquire_sel(&sel, srcext, dstext, &diagnostic);
6784 	if (err != 0)
6785 		goto bail;
6786 
6787 	/* Check for tunnel mode mismatches. */
6788 	if (innsrcext != NULL &&
6789 	    ((isrc->sin6_family == AF_INET &&
6790 	    sel.ips_protocol != IPPROTO_ENCAP && sel.ips_protocol != 0) ||
6791 	    (isrc->sin6_family == AF_INET6 &&
6792 	    sel.ips_protocol != IPPROTO_IPV6 && sel.ips_protocol != 0))) {
6793 		err = EPROTOTYPE;
6794 		goto bail;
6795 	}
6796 
6797 	/*
6798 	 * Okay, we have the addresses and other selector information.
6799 	 * Let's first find a conn...
6800 	 */
6801 	pp = NULL;
6802 	switch (sel.ips_protocol) {
6803 	case IPPROTO_TCP:
6804 		ipsec_tcp_pol(&sel, &pp, ipst);
6805 		break;
6806 	case IPPROTO_UDP:
6807 		ipsec_udp_pol(&sel, &pp, ipst);
6808 		break;
6809 	case IPPROTO_SCTP:
6810 		ipsec_sctp_pol(&sel, &pp, ipst);
6811 		break;
6812 	case IPPROTO_ENCAP:
6813 	case IPPROTO_IPV6:
6814 		/*
6815 		 * Assume sel.ips_remote_addr_* has the right address at
6816 		 * that exact position.
6817 		 */
6818 		itp = itp_get_byaddr((uint32_t *)(&sel.ips_local_addr_v6),
6819 		    (uint32_t *)(&sel.ips_remote_addr_v6), src->sin6_family,
6820 		    ipst);
6821 
6822 		if (innsrcext == NULL) {
6823 			/*
6824 			 * Transport-mode tunnel, make sure we fake out isel
6825 			 * to contain something based on the outer protocol.
6826 			 */
6827 			bzero(&isel, sizeof (isel));
6828 			isel.ips_isv4 = (sel.ips_protocol == IPPROTO_ENCAP);
6829 		} /* Else isel is initialized by ipsec_tun_pol(). */
6830 		err = ipsec_tun_pol(&isel, &pp, innsrcext, inndstext, itp,
6831 		    &diagnostic);
6832 		/*
6833 		 * NOTE:  isel isn't used for now, but in RFC 430x IPsec, it
6834 		 * may be.
6835 		 */
6836 		if (err != 0)
6837 			goto bail;
6838 		break;
6839 	default:
6840 		ipsec_oth_pol(&sel, &pp, ipst);
6841 		break;
6842 	}
6843 
6844 	/*
6845 	 * If we didn't find a matching conn_t or other policy head, take a
6846 	 * look in the global policy.
6847 	 */
6848 	if (pp == NULL) {
6849 		pp = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, NULL, &sel, ns);
6850 		if (pp == NULL) {
6851 			/* There's no global policy. */
6852 			err = ENOENT;
6853 			diagnostic = 0;
6854 			goto bail;
6855 		}
6856 	}
6857 
6858 	/*
6859 	 * Now that we have a policy entry/widget, construct an ACQUIRE
6860 	 * message based on that, fix fields where appropriate,
6861 	 * and return the message.
6862 	 */
6863 	retmp = sadb_extended_acquire(&sel, pp, NULL,
6864 	    (itp != NULL && (itp->itp_flags & ITPF_P_TUNNEL)),
6865 	    samsg->sadb_msg_seq, samsg->sadb_msg_pid, sens, ns);
6866 	if (pp != NULL) {
6867 		IPPOL_REFRELE(pp);
6868 	}
6869 	ASSERT(err == 0 && diagnostic == 0);
6870 	if (retmp == NULL)
6871 		err = ENOMEM;
6872 bail:
6873 	if (itp != NULL) {
6874 		ITP_REFRELE(itp, ns);
6875 	}
6876 	samsg->sadb_msg_errno = (uint8_t)err;
6877 	samsg->sadb_x_msg_diagnostic = (uint16_t)diagnostic;
6878 	return (retmp);
6879 }
6880 
6881 /*
6882  * ipsa_lpkt is a one-element queue, only manipulated by the next two
6883  * functions.  They have to hold the ipsa_lock because of potential races
6884  * between key management using SADB_UPDATE, and inbound packets that may
6885  * queue up on the larval SA (hence the 'l' in "lpkt").
6886  */
6887 
6888 /*
6889  * sadb_set_lpkt:
6890  *
6891  * Returns the passed-in packet if the SA is no longer larval.
6892  *
6893  * Returns NULL if the SA is larval, and needs to be swapped into the SA for
6894  * processing after an SADB_UPDATE.
6895  */
6896 mblk_t *
6897 sadb_set_lpkt(ipsa_t *ipsa, mblk_t *npkt, ip_recv_attr_t *ira)
6898 {
6899 	mblk_t		*opkt;
6900 
6901 	mutex_enter(&ipsa->ipsa_lock);
6902 	opkt = ipsa->ipsa_lpkt;
6903 	if (ipsa->ipsa_state == IPSA_STATE_LARVAL) {
6904 		/*
6905 		 * Consume npkt and place it in the LARVAL SA's inbound
6906 		 * packet slot.
6907 		 */
6908 		mblk_t	*attrmp;
6909 
6910 		attrmp = ip_recv_attr_to_mblk(ira);
6911 		if (attrmp == NULL) {
6912 			ill_t *ill = ira->ira_ill;
6913 
6914 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
6915 			ip_drop_input("ipIfStatsInDiscards", npkt, ill);
6916 			freemsg(npkt);
6917 			opkt = NULL;
6918 		} else {
6919 			ASSERT(attrmp->b_cont == NULL);
6920 			attrmp->b_cont = npkt;
6921 			ipsa->ipsa_lpkt = attrmp;
6922 		}
6923 		npkt = NULL;
6924 	} else {
6925 		/*
6926 		 * If not larval, we lost the race.  NOTE: ipsa_lpkt may still
6927 		 * have been non-NULL in the non-larval case, because of
6928 		 * inbound packets arriving prior to sadb_common_add()
6929 		 * transferring the SA completely out of larval state, but
6930 		 * after lpkt was grabbed by the AH/ESP-specific add routines.
6931 		 * We should clear the old ipsa_lpkt in this case to make sure
6932 		 * that it doesn't linger on the now-MATURE IPsec SA, or get
6933 		 * picked up as an out-of-order packet.
6934 		 */
6935 		ipsa->ipsa_lpkt = NULL;
6936 	}
6937 	mutex_exit(&ipsa->ipsa_lock);
6938 
6939 	if (opkt != NULL) {
6940 		ipsec_stack_t	*ipss;
6941 
6942 		ipss = ira->ira_ill->ill_ipst->ips_netstack->netstack_ipsec;
6943 		opkt = ip_recv_attr_free_mblk(opkt);
6944 		ip_drop_packet(opkt, B_TRUE, ira->ira_ill,
6945 		    DROPPER(ipss, ipds_sadb_inlarval_replace),
6946 		    &ipss->ipsec_sadb_dropper);
6947 	}
6948 	return (npkt);
6949 }
6950 
6951 /*
6952  * sadb_clear_lpkt: Atomically clear ipsa->ipsa_lpkt and return the
6953  * previous value.
6954  */
6955 mblk_t *
6956 sadb_clear_lpkt(ipsa_t *ipsa)
6957 {
6958 	mblk_t *opkt;
6959 
6960 	mutex_enter(&ipsa->ipsa_lock);
6961 	opkt = ipsa->ipsa_lpkt;
6962 	ipsa->ipsa_lpkt = NULL;
6963 	mutex_exit(&ipsa->ipsa_lock);
6964 	return (opkt);
6965 }
6966 
6967 /*
6968  * Buffer a packet that's in IDLE state as set by Solaris Clustering.
6969  */
6970 void
6971 sadb_buf_pkt(ipsa_t *ipsa, mblk_t *bpkt, ip_recv_attr_t *ira)
6972 {
6973 	netstack_t	*ns = ira->ira_ill->ill_ipst->ips_netstack;
6974 	ipsec_stack_t   *ipss = ns->netstack_ipsec;
6975 	in6_addr_t *srcaddr = (in6_addr_t *)(&ipsa->ipsa_srcaddr);
6976 	in6_addr_t *dstaddr = (in6_addr_t *)(&ipsa->ipsa_dstaddr);
6977 	mblk_t		*mp;
6978 
6979 	ASSERT(ipsa->ipsa_state == IPSA_STATE_IDLE);
6980 
6981 	if (cl_inet_idlesa == NULL) {
6982 		ip_drop_packet(bpkt, B_TRUE, ira->ira_ill,
6983 		    DROPPER(ipss, ipds_sadb_inidle_overflow),
6984 		    &ipss->ipsec_sadb_dropper);
6985 		return;
6986 	}
6987 
6988 	cl_inet_idlesa(ns->netstack_stackid,
6989 	    (ipsa->ipsa_type == SADB_SATYPE_AH) ? IPPROTO_AH : IPPROTO_ESP,
6990 	    ipsa->ipsa_spi, ipsa->ipsa_addrfam, *srcaddr, *dstaddr, NULL);
6991 
6992 	mp = ip_recv_attr_to_mblk(ira);
6993 	if (mp == NULL) {
6994 		ip_drop_packet(bpkt, B_TRUE, ira->ira_ill,
6995 		    DROPPER(ipss, ipds_sadb_inidle_overflow),
6996 		    &ipss->ipsec_sadb_dropper);
6997 		return;
6998 	}
6999 	linkb(mp, bpkt);
7000 
7001 	mutex_enter(&ipsa->ipsa_lock);
7002 	ipsa->ipsa_mblkcnt++;
7003 	if (ipsa->ipsa_bpkt_head == NULL) {
7004 		ipsa->ipsa_bpkt_head = ipsa->ipsa_bpkt_tail = bpkt;
7005 	} else {
7006 		ipsa->ipsa_bpkt_tail->b_next = bpkt;
7007 		ipsa->ipsa_bpkt_tail = bpkt;
7008 		if (ipsa->ipsa_mblkcnt > SADB_MAX_IDLEPKTS) {
7009 			mblk_t *tmp;
7010 
7011 			tmp = ipsa->ipsa_bpkt_head;
7012 			ipsa->ipsa_bpkt_head = ipsa->ipsa_bpkt_head->b_next;
7013 			tmp = ip_recv_attr_free_mblk(tmp);
7014 			ip_drop_packet(tmp, B_TRUE, NULL,
7015 			    DROPPER(ipss, ipds_sadb_inidle_overflow),
7016 			    &ipss->ipsec_sadb_dropper);
7017 			ipsa->ipsa_mblkcnt --;
7018 		}
7019 	}
7020 	mutex_exit(&ipsa->ipsa_lock);
7021 }
7022 
7023 /*
7024  * Stub function that taskq_dispatch() invokes to take the mblk (in arg)
7025  * and put into STREAMS again.
7026  */
7027 void
7028 sadb_clear_buf_pkt(void *ipkt)
7029 {
7030 	mblk_t	*tmp, *buf_pkt;
7031 	ip_recv_attr_t	iras;
7032 
7033 	buf_pkt = (mblk_t *)ipkt;
7034 
7035 	while (buf_pkt != NULL) {
7036 		mblk_t *data_mp;
7037 
7038 		tmp = buf_pkt->b_next;
7039 		buf_pkt->b_next = NULL;
7040 
7041 		data_mp = buf_pkt->b_cont;
7042 		buf_pkt->b_cont = NULL;
7043 		if (!ip_recv_attr_from_mblk(buf_pkt, &iras)) {
7044 			/* The ill or ip_stack_t disappeared on us. */
7045 			ip_drop_input("ip_recv_attr_from_mblk", data_mp, NULL);
7046 			freemsg(data_mp);
7047 		} else {
7048 			ip_input_post_ipsec(data_mp, &iras);
7049 		}
7050 		ira_cleanup(&iras, B_TRUE);
7051 		buf_pkt = tmp;
7052 	}
7053 }
7054 /*
7055  * Walker callback used by sadb_alg_update() to free/create crypto
7056  * context template when a crypto software provider is removed or
7057  * added.
7058  */
7059 
7060 struct sadb_update_alg_state {
7061 	ipsec_algtype_t alg_type;
7062 	uint8_t alg_id;
7063 	boolean_t is_added;
7064 	boolean_t async_auth;
7065 	boolean_t async_encr;
7066 };
7067 
7068 static void
7069 sadb_alg_update_cb(isaf_t *head, ipsa_t *entry, void *cookie)
7070 {
7071 	struct sadb_update_alg_state *update_state =
7072 	    (struct sadb_update_alg_state *)cookie;
7073 	crypto_ctx_template_t *ctx_tmpl = NULL;
7074 
7075 	ASSERT(MUTEX_HELD(&head->isaf_lock));
7076 
7077 	if (entry->ipsa_state == IPSA_STATE_LARVAL)
7078 		return;
7079 
7080 	mutex_enter(&entry->ipsa_lock);
7081 
7082 	if ((entry->ipsa_encr_alg != SADB_EALG_NONE && entry->ipsa_encr_alg !=
7083 	    SADB_EALG_NULL && update_state->async_encr) ||
7084 	    (entry->ipsa_auth_alg != SADB_AALG_NONE &&
7085 	    update_state->async_auth)) {
7086 		entry->ipsa_flags |= IPSA_F_ASYNC;
7087 	} else {
7088 		entry->ipsa_flags &= ~IPSA_F_ASYNC;
7089 	}
7090 
7091 	switch (update_state->alg_type) {
7092 	case IPSEC_ALG_AUTH:
7093 		if (entry->ipsa_auth_alg == update_state->alg_id)
7094 			ctx_tmpl = &entry->ipsa_authtmpl;
7095 		break;
7096 	case IPSEC_ALG_ENCR:
7097 		if (entry->ipsa_encr_alg == update_state->alg_id)
7098 			ctx_tmpl = &entry->ipsa_encrtmpl;
7099 		break;
7100 	default:
7101 		ctx_tmpl = NULL;
7102 	}
7103 
7104 	if (ctx_tmpl == NULL) {
7105 		mutex_exit(&entry->ipsa_lock);
7106 		return;
7107 	}
7108 
7109 	/*
7110 	 * The context template of the SA may be affected by the change
7111 	 * of crypto provider.
7112 	 */
7113 	if (update_state->is_added) {
7114 		/* create the context template if not already done */
7115 		if (*ctx_tmpl == NULL) {
7116 			(void) ipsec_create_ctx_tmpl(entry,
7117 			    update_state->alg_type);
7118 		}
7119 	} else {
7120 		/*
7121 		 * The crypto provider was removed. If the context template
7122 		 * exists but it is no longer valid, free it.
7123 		 */
7124 		if (*ctx_tmpl != NULL)
7125 			ipsec_destroy_ctx_tmpl(entry, update_state->alg_type);
7126 	}
7127 
7128 	mutex_exit(&entry->ipsa_lock);
7129 }
7130 
7131 /*
7132  * Invoked by IP when an software crypto provider has been updated, or if
7133  * the crypto synchrony changes.  The type and id of the corresponding
7134  * algorithm is passed as argument.  The type is set to ALL in the case of
7135  * a synchrony change.
7136  *
7137  * is_added is B_TRUE if the provider was added, B_FALSE if it was
7138  * removed. The function updates the SADB and free/creates the
7139  * context templates associated with SAs if needed.
7140  */
7141 
7142 #define	SADB_ALG_UPDATE_WALK(sadb, table) \
7143     sadb_walker((sadb).table, (sadb).sdb_hashsize, sadb_alg_update_cb, \
7144 	&update_state)
7145 
7146 void
7147 sadb_alg_update(ipsec_algtype_t alg_type, uint8_t alg_id, boolean_t is_added,
7148     netstack_t *ns)
7149 {
7150 	struct sadb_update_alg_state update_state;
7151 	ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
7152 	ipsecesp_stack_t	*espstack = ns->netstack_ipsecesp;
7153 	ipsec_stack_t *ipss = ns->netstack_ipsec;
7154 
7155 	update_state.alg_type = alg_type;
7156 	update_state.alg_id = alg_id;
7157 	update_state.is_added = is_added;
7158 	update_state.async_auth = ipss->ipsec_algs_exec_mode[IPSEC_ALG_AUTH] ==
7159 	    IPSEC_ALGS_EXEC_ASYNC;
7160 	update_state.async_encr = ipss->ipsec_algs_exec_mode[IPSEC_ALG_ENCR] ==
7161 	    IPSEC_ALGS_EXEC_ASYNC;
7162 
7163 	if (alg_type == IPSEC_ALG_AUTH || alg_type == IPSEC_ALG_ALL) {
7164 		/* walk the AH tables only for auth. algorithm changes */
7165 		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v4, sdb_of);
7166 		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v4, sdb_if);
7167 		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v6, sdb_of);
7168 		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v6, sdb_if);
7169 	}
7170 
7171 	/* walk the ESP tables */
7172 	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v4, sdb_of);
7173 	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v4, sdb_if);
7174 	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v6, sdb_of);
7175 	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v6, sdb_if);
7176 }
7177 
7178 /*
7179  * Creates a context template for the specified SA. This function
7180  * is called when an SA is created and when a context template needs
7181  * to be created due to a change of software provider.
7182  */
7183 int
7184 ipsec_create_ctx_tmpl(ipsa_t *sa, ipsec_algtype_t alg_type)
7185 {
7186 	ipsec_alginfo_t *alg;
7187 	crypto_mechanism_t mech;
7188 	crypto_key_t *key;
7189 	crypto_ctx_template_t *sa_tmpl;
7190 	int rv;
7191 	ipsec_stack_t	*ipss = sa->ipsa_netstack->netstack_ipsec;
7192 
7193 	ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock));
7194 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
7195 
7196 	/* get pointers to the algorithm info, context template, and key */
7197 	switch (alg_type) {
7198 	case IPSEC_ALG_AUTH:
7199 		key = &sa->ipsa_kcfauthkey;
7200 		sa_tmpl = &sa->ipsa_authtmpl;
7201 		alg = ipss->ipsec_alglists[alg_type][sa->ipsa_auth_alg];
7202 		break;
7203 	case IPSEC_ALG_ENCR:
7204 		key = &sa->ipsa_kcfencrkey;
7205 		sa_tmpl = &sa->ipsa_encrtmpl;
7206 		alg = ipss->ipsec_alglists[alg_type][sa->ipsa_encr_alg];
7207 		break;
7208 	default:
7209 		alg = NULL;
7210 	}
7211 
7212 	if (alg == NULL || !ALG_VALID(alg))
7213 		return (EINVAL);
7214 
7215 	/* initialize the mech info structure for the framework */
7216 	ASSERT(alg->alg_mech_type != CRYPTO_MECHANISM_INVALID);
7217 	mech.cm_type = alg->alg_mech_type;
7218 	mech.cm_param = NULL;
7219 	mech.cm_param_len = 0;
7220 
7221 	/* create a new context template */
7222 	rv = crypto_create_ctx_template(&mech, key, sa_tmpl, KM_NOSLEEP);
7223 
7224 	/*
7225 	 * CRYPTO_MECH_NOT_SUPPORTED can be returned if only hardware
7226 	 * providers are available for that mechanism. In that case
7227 	 * we don't fail, and will generate the context template from
7228 	 * the framework callback when a software provider for that
7229 	 * mechanism registers.
7230 	 *
7231 	 * The context template is assigned the special value
7232 	 * IPSEC_CTX_TMPL_ALLOC if the allocation failed due to a
7233 	 * lack of memory. No attempt will be made to use
7234 	 * the context template if it is set to this value.
7235 	 */
7236 	if (rv == CRYPTO_HOST_MEMORY) {
7237 		*sa_tmpl = IPSEC_CTX_TMPL_ALLOC;
7238 	} else if (rv != CRYPTO_SUCCESS) {
7239 		*sa_tmpl = NULL;
7240 		if (rv != CRYPTO_MECH_NOT_SUPPORTED)
7241 			return (EINVAL);
7242 	}
7243 
7244 	return (0);
7245 }
7246 
7247 /*
7248  * Destroy the context template of the specified algorithm type
7249  * of the specified SA. Must be called while holding the SA lock.
7250  */
7251 void
7252 ipsec_destroy_ctx_tmpl(ipsa_t *sa, ipsec_algtype_t alg_type)
7253 {
7254 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
7255 
7256 	if (alg_type == IPSEC_ALG_AUTH) {
7257 		if (sa->ipsa_authtmpl == IPSEC_CTX_TMPL_ALLOC)
7258 			sa->ipsa_authtmpl = NULL;
7259 		else if (sa->ipsa_authtmpl != NULL) {
7260 			crypto_destroy_ctx_template(sa->ipsa_authtmpl);
7261 			sa->ipsa_authtmpl = NULL;
7262 		}
7263 	} else {
7264 		ASSERT(alg_type == IPSEC_ALG_ENCR);
7265 		if (sa->ipsa_encrtmpl == IPSEC_CTX_TMPL_ALLOC)
7266 			sa->ipsa_encrtmpl = NULL;
7267 		else if (sa->ipsa_encrtmpl != NULL) {
7268 			crypto_destroy_ctx_template(sa->ipsa_encrtmpl);
7269 			sa->ipsa_encrtmpl = NULL;
7270 		}
7271 	}
7272 }
7273 
7274 /*
7275  * Use the kernel crypto framework to check the validity of a key received
7276  * via keysock. Returns 0 if the key is OK, -1 otherwise.
7277  */
7278 int
7279 ipsec_check_key(crypto_mech_type_t mech_type, sadb_key_t *sadb_key,
7280     boolean_t is_auth, int *diag)
7281 {
7282 	crypto_mechanism_t mech;
7283 	crypto_key_t crypto_key;
7284 	int crypto_rc;
7285 
7286 	mech.cm_type = mech_type;
7287 	mech.cm_param = NULL;
7288 	mech.cm_param_len = 0;
7289 
7290 	crypto_key.ck_format = CRYPTO_KEY_RAW;
7291 	crypto_key.ck_data = sadb_key + 1;
7292 	crypto_key.ck_length = sadb_key->sadb_key_bits;
7293 
7294 	crypto_rc = crypto_key_check(&mech, &crypto_key);
7295 
7296 	switch (crypto_rc) {
7297 	case CRYPTO_SUCCESS:
7298 		return (0);
7299 	case CRYPTO_MECHANISM_INVALID:
7300 	case CRYPTO_MECH_NOT_SUPPORTED:
7301 		*diag = is_auth ? SADB_X_DIAGNOSTIC_BAD_AALG :
7302 		    SADB_X_DIAGNOSTIC_BAD_EALG;
7303 		break;
7304 	case CRYPTO_KEY_SIZE_RANGE:
7305 		*diag = is_auth ? SADB_X_DIAGNOSTIC_BAD_AKEYBITS :
7306 		    SADB_X_DIAGNOSTIC_BAD_EKEYBITS;
7307 		break;
7308 	case CRYPTO_WEAK_KEY:
7309 		*diag = is_auth ? SADB_X_DIAGNOSTIC_WEAK_AKEY :
7310 		    SADB_X_DIAGNOSTIC_WEAK_EKEY;
7311 		break;
7312 	}
7313 
7314 	return (-1);
7315 }
7316 
7317 /*
7318  * Whack options in the outer IP header when ipsec changes the outer label
7319  *
7320  * This is inelegant and really could use refactoring.
7321  */
7322 mblk_t *
7323 sadb_whack_label_v4(mblk_t *mp, ipsa_t *assoc, kstat_named_t *counter,
7324     ipdropper_t *dropper)
7325 {
7326 	int delta;
7327 	int plen;
7328 	dblk_t *db;
7329 	int hlen;
7330 	uint8_t *opt_storage = assoc->ipsa_opt_storage;
7331 	ipha_t *ipha = (ipha_t *)mp->b_rptr;
7332 
7333 	plen = ntohs(ipha->ipha_length);
7334 
7335 	delta = tsol_remove_secopt(ipha, MBLKL(mp));
7336 	mp->b_wptr += delta;
7337 	plen += delta;
7338 
7339 	/* XXX XXX code copied from tsol_check_label */
7340 
7341 	/* Make sure we have room for the worst-case addition */
7342 	hlen = IPH_HDR_LENGTH(ipha) + opt_storage[IPOPT_OLEN];
7343 	hlen = (hlen + 3) & ~3;
7344 	if (hlen > IP_MAX_HDR_LENGTH)
7345 		hlen = IP_MAX_HDR_LENGTH;
7346 	hlen -= IPH_HDR_LENGTH(ipha);
7347 
7348 	db = mp->b_datap;
7349 	if ((db->db_ref != 1) || (mp->b_wptr + hlen > db->db_lim)) {
7350 		int copylen;
7351 		mblk_t *new_mp;
7352 
7353 		/* allocate enough to be meaningful, but not *too* much */
7354 		copylen = MBLKL(mp);
7355 		if (copylen > 256)
7356 			copylen = 256;
7357 		new_mp = allocb_tmpl(hlen + copylen +
7358 		    (mp->b_rptr - mp->b_datap->db_base), mp);
7359 
7360 		if (new_mp == NULL) {
7361 			ip_drop_packet(mp, B_FALSE, NULL, counter,  dropper);
7362 			return (NULL);
7363 		}
7364 
7365 		/* keep the bias */
7366 		new_mp->b_rptr += mp->b_rptr - mp->b_datap->db_base;
7367 		new_mp->b_wptr = new_mp->b_rptr + copylen;
7368 		bcopy(mp->b_rptr, new_mp->b_rptr, copylen);
7369 		new_mp->b_cont = mp;
7370 		if ((mp->b_rptr += copylen) >= mp->b_wptr) {
7371 			new_mp->b_cont = mp->b_cont;
7372 			freeb(mp);
7373 		}
7374 		mp = new_mp;
7375 		ipha = (ipha_t *)mp->b_rptr;
7376 	}
7377 
7378 	delta = tsol_prepend_option(assoc->ipsa_opt_storage, ipha, MBLKL(mp));
7379 
7380 	ASSERT(delta != -1);
7381 
7382 	plen += delta;
7383 	mp->b_wptr += delta;
7384 
7385 	/*
7386 	 * Paranoia
7387 	 */
7388 	db = mp->b_datap;
7389 
7390 	ASSERT3P(mp->b_wptr, <=, db->db_lim);
7391 	ASSERT3P(mp->b_rptr, <=, db->db_lim);
7392 
7393 	ASSERT3P(mp->b_wptr, >=, db->db_base);
7394 	ASSERT3P(mp->b_rptr, >=, db->db_base);
7395 	/* End paranoia */
7396 
7397 	ipha->ipha_length = htons(plen);
7398 
7399 	return (mp);
7400 }
7401 
7402 mblk_t *
7403 sadb_whack_label_v6(mblk_t *mp, ipsa_t *assoc, kstat_named_t *counter,
7404     ipdropper_t *dropper)
7405 {
7406 	int delta;
7407 	int plen;
7408 	dblk_t *db;
7409 	int hlen;
7410 	uint8_t *opt_storage = assoc->ipsa_opt_storage;
7411 	uint_t sec_opt_len; /* label option length not including type, len */
7412 	ip6_t *ip6h = (ip6_t *)mp->b_rptr;
7413 
7414 	plen = ntohs(ip6h->ip6_plen);
7415 
7416 	delta = tsol_remove_secopt_v6(ip6h, MBLKL(mp));
7417 	mp->b_wptr += delta;
7418 	plen += delta;
7419 
7420 	/* XXX XXX code copied from tsol_check_label_v6 */
7421 	/*
7422 	 * Make sure we have room for the worst-case addition. Add 2 bytes for
7423 	 * the hop-by-hop ext header's next header and length fields. Add
7424 	 * another 2 bytes for the label option type, len and then round
7425 	 * up to the next 8-byte multiple.
7426 	 */
7427 	sec_opt_len = opt_storage[1];
7428 
7429 	db = mp->b_datap;
7430 	hlen = (4 + sec_opt_len + 7) & ~7;
7431 
7432 	if ((db->db_ref != 1) || (mp->b_wptr + hlen > db->db_lim)) {
7433 		int copylen;
7434 		mblk_t *new_mp;
7435 		uint16_t hdr_len;
7436 
7437 		hdr_len = ip_hdr_length_v6(mp, ip6h);
7438 		/*
7439 		 * Allocate enough to be meaningful, but not *too* much.
7440 		 * Also all the IPv6 extension headers must be in the same mblk
7441 		 */
7442 		copylen = MBLKL(mp);
7443 		if (copylen > 256)
7444 			copylen = 256;
7445 		if (copylen < hdr_len)
7446 			copylen = hdr_len;
7447 		new_mp = allocb_tmpl(hlen + copylen +
7448 		    (mp->b_rptr - mp->b_datap->db_base), mp);
7449 		if (new_mp == NULL) {
7450 			ip_drop_packet(mp, B_FALSE, NULL, counter,  dropper);
7451 			return (NULL);
7452 		}
7453 
7454 		/* keep the bias */
7455 		new_mp->b_rptr += mp->b_rptr - mp->b_datap->db_base;
7456 		new_mp->b_wptr = new_mp->b_rptr + copylen;
7457 		bcopy(mp->b_rptr, new_mp->b_rptr, copylen);
7458 		new_mp->b_cont = mp;
7459 		if ((mp->b_rptr += copylen) >= mp->b_wptr) {
7460 			new_mp->b_cont = mp->b_cont;
7461 			freeb(mp);
7462 		}
7463 		mp = new_mp;
7464 		ip6h = (ip6_t *)mp->b_rptr;
7465 	}
7466 
7467 	delta = tsol_prepend_option_v6(assoc->ipsa_opt_storage,
7468 	    ip6h, MBLKL(mp));
7469 
7470 	ASSERT(delta != -1);
7471 
7472 	plen += delta;
7473 	mp->b_wptr += delta;
7474 
7475 	/*
7476 	 * Paranoia
7477 	 */
7478 	db = mp->b_datap;
7479 
7480 	ASSERT3P(mp->b_wptr, <=, db->db_lim);
7481 	ASSERT3P(mp->b_rptr, <=, db->db_lim);
7482 
7483 	ASSERT3P(mp->b_wptr, >=, db->db_base);
7484 	ASSERT3P(mp->b_rptr, >=, db->db_base);
7485 	/* End paranoia */
7486 
7487 	ip6h->ip6_plen = htons(plen);
7488 
7489 	return (mp);
7490 }
7491 
7492 /* Whack the labels and update ip_xmit_attr_t as needed */
7493 mblk_t *
7494 sadb_whack_label(mblk_t *mp, ipsa_t *assoc, ip_xmit_attr_t *ixa,
7495     kstat_named_t *counter, ipdropper_t *dropper)
7496 {
7497 	int adjust;
7498 	int iplen;
7499 
7500 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
7501 		ipha_t		*ipha = (ipha_t *)mp->b_rptr;
7502 
7503 		ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION);
7504 		iplen = ntohs(ipha->ipha_length);
7505 		mp = sadb_whack_label_v4(mp, assoc, counter, dropper);
7506 		if (mp == NULL)
7507 			return (NULL);
7508 
7509 		ipha = (ipha_t *)mp->b_rptr;
7510 		ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION);
7511 		adjust = (int)ntohs(ipha->ipha_length) - iplen;
7512 	} else {
7513 		ip6_t		*ip6h = (ip6_t *)mp->b_rptr;
7514 
7515 		ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION);
7516 		iplen = ntohs(ip6h->ip6_plen);
7517 		mp = sadb_whack_label_v6(mp, assoc, counter, dropper);
7518 		if (mp == NULL)
7519 			return (NULL);
7520 
7521 		ip6h = (ip6_t *)mp->b_rptr;
7522 		ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION);
7523 		adjust = (int)ntohs(ip6h->ip6_plen) - iplen;
7524 	}
7525 	ixa->ixa_pktlen += adjust;
7526 	ixa->ixa_ip_hdr_length += adjust;
7527 	return (mp);
7528 }
7529 
7530 /*
7531  * If this is an outgoing SA then add some fuzz to the
7532  * SOFT EXPIRE time. The reason for this is to stop
7533  * peers trying to renegotiate SOFT expiring SA's at
7534  * the same time. The amount of fuzz needs to be at
7535  * least 8 seconds which is the typical interval
7536  * sadb_ager(), although this is only a guide as it
7537  * selftunes.
7538  */
7539 static void
7540 lifetime_fuzz(ipsa_t *assoc)
7541 {
7542 	uint8_t rnd;
7543 
7544 	if (assoc->ipsa_softaddlt == 0)
7545 		return;
7546 
7547 	(void) random_get_pseudo_bytes(&rnd, sizeof (rnd));
7548 	rnd = (rnd & 0xF) + 8;
7549 	assoc->ipsa_softexpiretime -= rnd;
7550 	assoc->ipsa_softaddlt -= rnd;
7551 }
7552 
7553 static void
7554 destroy_ipsa_pair(ipsap_t *ipsapp)
7555 {
7556 	/*
7557 	 * Because of the multi-line macro nature of IPSA_REFRELE, keep
7558 	 * them in { }.
7559 	 */
7560 	if (ipsapp->ipsap_sa_ptr != NULL) {
7561 		IPSA_REFRELE(ipsapp->ipsap_sa_ptr);
7562 	}
7563 	if (ipsapp->ipsap_psa_ptr != NULL) {
7564 		IPSA_REFRELE(ipsapp->ipsap_psa_ptr);
7565 	}
7566 	init_ipsa_pair(ipsapp);
7567 }
7568 
7569 static void
7570 init_ipsa_pair(ipsap_t *ipsapp)
7571 {
7572 	ipsapp->ipsap_bucket = NULL;
7573 	ipsapp->ipsap_sa_ptr = NULL;
7574 	ipsapp->ipsap_pbucket = NULL;
7575 	ipsapp->ipsap_psa_ptr = NULL;
7576 }
7577 
7578 /*
7579  * The sadb_ager() function walks through the hash tables of SA's and ages
7580  * them, if the SA expires as a result, its marked as DEAD and will be reaped
7581  * the next time sadb_ager() runs. SA's which are paired or have a peer (same
7582  * SA appears in both the inbound and outbound tables because its not possible
7583  * to determine its direction) are placed on a list when they expire. This is
7584  * to ensure that pair/peer SA's are reaped at the same time, even if they
7585  * expire at different times.
7586  *
7587  * This function is called twice by sadb_ager(), one after processing the
7588  * inbound table, then again after processing the outbound table.
7589  */
7590 void
7591 age_pair_peer_list(templist_t *haspeerlist, sadb_t *sp, boolean_t outbound)
7592 {
7593 	templist_t *listptr;
7594 	int outhash;
7595 	isaf_t *bucket;
7596 	boolean_t haspeer;
7597 	ipsa_t *peer_assoc, *dying;
7598 	/*
7599 	 * Haspeer cases will contain both IPv4 and IPv6.  This code
7600 	 * is address independent.
7601 	 */
7602 	while (haspeerlist != NULL) {
7603 		/* "dying" contains the SA that has a peer. */
7604 		dying = haspeerlist->ipsa;
7605 		haspeer = (dying->ipsa_haspeer);
7606 		listptr = haspeerlist;
7607 		haspeerlist = listptr->next;
7608 		kmem_free(listptr, sizeof (*listptr));
7609 		/*
7610 		 * Pick peer bucket based on addrfam.
7611 		 */
7612 		if (outbound) {
7613 			if (haspeer)
7614 				bucket = INBOUND_BUCKET(sp, dying->ipsa_spi);
7615 			else
7616 				bucket = INBOUND_BUCKET(sp,
7617 				    dying->ipsa_otherspi);
7618 		} else { /* inbound */
7619 			if (haspeer) {
7620 				if (dying->ipsa_addrfam == AF_INET6) {
7621 					outhash = OUTBOUND_HASH_V6(sp,
7622 					    *((in6_addr_t *)&dying->
7623 					    ipsa_dstaddr));
7624 				} else {
7625 					outhash = OUTBOUND_HASH_V4(sp,
7626 					    *((ipaddr_t *)&dying->
7627 					    ipsa_dstaddr));
7628 				}
7629 			} else if (dying->ipsa_addrfam == AF_INET6) {
7630 				outhash = OUTBOUND_HASH_V6(sp,
7631 				    *((in6_addr_t *)&dying->
7632 				    ipsa_srcaddr));
7633 			} else {
7634 				outhash = OUTBOUND_HASH_V4(sp,
7635 				    *((ipaddr_t *)&dying->
7636 				    ipsa_srcaddr));
7637 			}
7638 			bucket = &(sp->sdb_of[outhash]);
7639 		}
7640 
7641 		mutex_enter(&bucket->isaf_lock);
7642 		/*
7643 		 * "haspeer" SA's have the same src/dst address ordering,
7644 		 * "paired" SA's have the src/dst addresses reversed.
7645 		 */
7646 		if (haspeer) {
7647 			peer_assoc = ipsec_getassocbyspi(bucket,
7648 			    dying->ipsa_spi, dying->ipsa_srcaddr,
7649 			    dying->ipsa_dstaddr, dying->ipsa_addrfam);
7650 		} else {
7651 			peer_assoc = ipsec_getassocbyspi(bucket,
7652 			    dying->ipsa_otherspi, dying->ipsa_dstaddr,
7653 			    dying->ipsa_srcaddr, dying->ipsa_addrfam);
7654 		}
7655 
7656 		mutex_exit(&bucket->isaf_lock);
7657 		if (peer_assoc != NULL) {
7658 			mutex_enter(&peer_assoc->ipsa_lock);
7659 			mutex_enter(&dying->ipsa_lock);
7660 			if (!haspeer) {
7661 				/*
7662 				 * Only SA's which have a "peer" or are
7663 				 * "paired" end up on this list, so this
7664 				 * must be a "paired" SA, update the flags
7665 				 * to break the pair.
7666 				 */
7667 				peer_assoc->ipsa_otherspi = 0;
7668 				peer_assoc->ipsa_flags &= ~IPSA_F_PAIRED;
7669 				dying->ipsa_otherspi = 0;
7670 				dying->ipsa_flags &= ~IPSA_F_PAIRED;
7671 			}
7672 			if (haspeer || outbound) {
7673 				/*
7674 				 * Update the state of the "inbound" SA when
7675 				 * the "outbound" SA has expired. Don't update
7676 				 * the "outbound" SA when the "inbound" SA
7677 				 * SA expires because setting the hard_addtime
7678 				 * below will cause this to happen.
7679 				 */
7680 				peer_assoc->ipsa_state = dying->ipsa_state;
7681 			}
7682 			if (dying->ipsa_state == IPSA_STATE_DEAD)
7683 				peer_assoc->ipsa_hardexpiretime = 1;
7684 
7685 			mutex_exit(&dying->ipsa_lock);
7686 			mutex_exit(&peer_assoc->ipsa_lock);
7687 			IPSA_REFRELE(peer_assoc);
7688 		}
7689 		IPSA_REFRELE(dying);
7690 	}
7691 }
7692 
7693 /*
7694  * Ensure that the IV used for CCM mode never repeats. The IV should
7695  * only be updated by this function. Also check to see if the IV
7696  * is about to wrap and generate a SOFT Expire. This function is only
7697  * called for outgoing packets, the IV for incomming packets is taken
7698  * from the wire. If the outgoing SA needs to be expired, update
7699  * the matching incomming SA.
7700  */
7701 boolean_t
7702 update_iv(uint8_t *iv_ptr, queue_t *pfkey_q, ipsa_t *assoc,
7703     ipsecesp_stack_t *espstack)
7704 {
7705 	boolean_t rc = B_TRUE;
7706 	isaf_t *inbound_bucket;
7707 	sadb_t *sp;
7708 	ipsa_t *pair_sa = NULL;
7709 	int sa_new_state = 0;
7710 
7711 	/* For non counter modes, the IV is random data. */
7712 	if (!(assoc->ipsa_flags & IPSA_F_COUNTERMODE)) {
7713 		(void) random_get_pseudo_bytes(iv_ptr, assoc->ipsa_iv_len);
7714 		return (rc);
7715 	}
7716 
7717 	mutex_enter(&assoc->ipsa_lock);
7718 
7719 	(*assoc->ipsa_iv)++;
7720 
7721 	if (*assoc->ipsa_iv == assoc->ipsa_iv_hardexpire) {
7722 		sa_new_state = IPSA_STATE_DEAD;
7723 		rc = B_FALSE;
7724 	} else if (*assoc->ipsa_iv == assoc->ipsa_iv_softexpire) {
7725 		if (assoc->ipsa_state != IPSA_STATE_DYING) {
7726 			/*
7727 			 * This SA may have already been expired when its
7728 			 * PAIR_SA expired.
7729 			 */
7730 			sa_new_state = IPSA_STATE_DYING;
7731 		}
7732 	}
7733 	if (sa_new_state) {
7734 		/*
7735 		 * If there is a state change, we need to update this SA
7736 		 * and its "pair", we can find the bucket for the "pair" SA
7737 		 * while holding the ipsa_t mutex, but we won't actually
7738 		 * update anything untill the ipsa_t mutex has been released
7739 		 * for _this_ SA.
7740 		 */
7741 		assoc->ipsa_state = sa_new_state;
7742 		if (assoc->ipsa_addrfam == AF_INET6) {
7743 			sp = &espstack->esp_sadb.s_v6;
7744 		} else {
7745 			sp = &espstack->esp_sadb.s_v4;
7746 		}
7747 		inbound_bucket = INBOUND_BUCKET(sp, assoc->ipsa_otherspi);
7748 		sadb_expire_assoc(pfkey_q, assoc);
7749 	}
7750 	if (rc == B_TRUE)
7751 		bcopy(assoc->ipsa_iv, iv_ptr, assoc->ipsa_iv_len);
7752 
7753 	mutex_exit(&assoc->ipsa_lock);
7754 
7755 	if (sa_new_state) {
7756 		/* Find the inbound SA, need to lock hash bucket. */
7757 		mutex_enter(&inbound_bucket->isaf_lock);
7758 		pair_sa = ipsec_getassocbyspi(inbound_bucket,
7759 		    assoc->ipsa_otherspi, assoc->ipsa_dstaddr,
7760 		    assoc->ipsa_srcaddr, assoc->ipsa_addrfam);
7761 		mutex_exit(&inbound_bucket->isaf_lock);
7762 		if (pair_sa != NULL) {
7763 			mutex_enter(&pair_sa->ipsa_lock);
7764 			pair_sa->ipsa_state = sa_new_state;
7765 			mutex_exit(&pair_sa->ipsa_lock);
7766 			IPSA_REFRELE(pair_sa);
7767 		}
7768 	}
7769 
7770 	return (rc);
7771 }
7772 
7773 void
7774 ccm_params_init(ipsa_t *assoc, uchar_t *esph, uint_t data_len, uchar_t *iv_ptr,
7775     ipsa_cm_mech_t *cm_mech, crypto_data_t *crypto_data)
7776 {
7777 	uchar_t *nonce;
7778 	crypto_mechanism_t *combined_mech;
7779 	CK_AES_CCM_PARAMS *params;
7780 
7781 	combined_mech = (crypto_mechanism_t *)cm_mech;
7782 	params = (CK_AES_CCM_PARAMS *)(combined_mech + 1);
7783 	nonce = (uchar_t *)(params + 1);
7784 	params->ulMACSize = assoc->ipsa_mac_len;
7785 	params->ulNonceSize = assoc->ipsa_nonce_len;
7786 	params->ulAuthDataSize = sizeof (esph_t);
7787 	params->ulDataSize = data_len;
7788 	params->nonce = nonce;
7789 	params->authData = esph;
7790 
7791 	cm_mech->combined_mech.cm_type = assoc->ipsa_emech.cm_type;
7792 	cm_mech->combined_mech.cm_param_len = sizeof (CK_AES_CCM_PARAMS);
7793 	cm_mech->combined_mech.cm_param = (caddr_t)params;
7794 	/* See gcm_params_init() for comments. */
7795 	bcopy(assoc->ipsa_nonce, nonce, assoc->ipsa_saltlen);
7796 	nonce += assoc->ipsa_saltlen;
7797 	bcopy(iv_ptr, nonce, assoc->ipsa_iv_len);
7798 	crypto_data->cd_miscdata = NULL;
7799 }
7800 
7801 /* ARGSUSED */
7802 void
7803 cbc_params_init(ipsa_t *assoc, uchar_t *esph, uint_t data_len, uchar_t *iv_ptr,
7804     ipsa_cm_mech_t *cm_mech, crypto_data_t *crypto_data)
7805 {
7806 	cm_mech->combined_mech.cm_type = assoc->ipsa_emech.cm_type;
7807 	cm_mech->combined_mech.cm_param_len = 0;
7808 	cm_mech->combined_mech.cm_param = NULL;
7809 	crypto_data->cd_miscdata = (char *)iv_ptr;
7810 }
7811 
7812 /* ARGSUSED */
7813 void
7814 gcm_params_init(ipsa_t *assoc, uchar_t *esph, uint_t data_len, uchar_t *iv_ptr,
7815     ipsa_cm_mech_t *cm_mech, crypto_data_t *crypto_data)
7816 {
7817 	uchar_t *nonce;
7818 	crypto_mechanism_t *combined_mech;
7819 	CK_AES_GCM_PARAMS *params;
7820 
7821 	combined_mech = (crypto_mechanism_t *)cm_mech;
7822 	params = (CK_AES_GCM_PARAMS *)(combined_mech + 1);
7823 	nonce = (uchar_t *)(params + 1);
7824 
7825 	params->pIv = nonce;
7826 	params->ulIvLen = assoc->ipsa_nonce_len;
7827 	params->ulIvBits = SADB_8TO1(assoc->ipsa_nonce_len);
7828 	params->pAAD = esph;
7829 	params->ulAADLen = sizeof (esph_t);
7830 	params->ulTagBits = SADB_8TO1(assoc->ipsa_mac_len);
7831 
7832 	cm_mech->combined_mech.cm_type = assoc->ipsa_emech.cm_type;
7833 	cm_mech->combined_mech.cm_param_len = sizeof (CK_AES_GCM_PARAMS);
7834 	cm_mech->combined_mech.cm_param = (caddr_t)params;
7835 	/*
7836 	 * Create the nonce, which is made up of the salt and the IV.
7837 	 * Copy the salt from the SA and the IV from the packet.
7838 	 * For inbound packets we copy the IV from the packet because it
7839 	 * was set by the sending system, for outbound packets we copy the IV
7840 	 * from the packet because the IV in the SA may be changed by another
7841 	 * thread, the IV in the packet was created while holding a mutex.
7842 	 */
7843 	bcopy(assoc->ipsa_nonce, nonce, assoc->ipsa_saltlen);
7844 	nonce += assoc->ipsa_saltlen;
7845 	bcopy(iv_ptr, nonce, assoc->ipsa_iv_len);
7846 	crypto_data->cd_miscdata = NULL;
7847 }
7848