xref: /titanic_44/usr/src/uts/common/inet/ip/tn_ipopt.c (revision 08045defdf65ee890fef6e20510a093a17feb8fe)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/systm.h>
28 #include <sys/kmem.h>
29 #include <sys/disp.h>
30 #include <sys/stream.h>
31 #include <sys/strsubr.h>
32 #include <sys/strsun.h>
33 #include <sys/policy.h>
34 #include <sys/tsol/label_macro.h>
35 #include <sys/tsol/tndb.h>
36 #include <sys/tsol/tnet.h>
37 #include <inet/ip.h>
38 #include <inet/ip6.h>
39 #include <inet/tcp.h>
40 #include <inet/ipclassifier.h>
41 #include <inet/ip_ire.h>
42 #include <inet/ip_ftable.h>
43 
44 /*
45  * This routine takes a sensitivity label as input and creates a CIPSO
46  * option in the specified buffer.  It returns the size of the CIPSO option.
47  * If the sensitivity label is too large for the CIPSO option, then 0
48  * is returned.
49  *
50  * tsol2cipso_tt1 returns 0 for failure and greater than 0 for success
51  * (more accurately, success means a return value between 10 and 40).
52  */
53 
54 static int
55 tsol2cipso_tt1(const bslabel_t *sl, unsigned char *cop, uint32_t doi)
56 {
57 	struct cipso_tag_type_1 *tt1;
58 	const _bslabel_impl_t *bsl;
59 	const uchar_t *ucp;
60 	int i;
61 
62 	if (doi == 0)
63 		return (0);
64 
65 	/* check for Admin High sensitivity label */
66 	if (blequal(sl, label2bslabel(l_admin_high)))
67 		return (0);
68 
69 	/* check whether classification will fit in one octet */
70 	bsl = (const _bslabel_impl_t *)sl;
71 	if (LCLASS(bsl) & 0xFF00)
72 		return (0);
73 
74 	/*
75 	 * Check whether compartments will fit in 30 octets.
76 	 * Compartments 241 - 256 are not allowed.
77 	 */
78 	if (ntohl(bsl->compartments.c8) & 0x0000FFFF)
79 		return (0);
80 
81 	/*
82 	 * Compute option length and tag length.
83 	 * 'p' points to the last two bytes in the Sensitivity Label's
84 	 * compartments; these cannot be mapped into CIPSO compartments.
85 	 */
86 	ucp = (const uchar_t *)&bsl->compartments.c8 + 2;
87 	while (--ucp >= (const uchar_t *)&bsl->compartments.c1)
88 		if (*ucp != 0)
89 			break;
90 
91 	i =  ucp - (const uchar_t *)&bsl->compartments.c1 + 1;
92 
93 	if (cop == NULL)
94 		return (10 + i);
95 
96 	doi = htonl(doi);
97 	ucp = (const uchar_t *)&doi;
98 	cop[IPOPT_OPTVAL] = IPOPT_COMSEC;
99 	cop[IPOPT_OLEN] = 10 + i;
100 	cop[IPOPT_OLEN+1] = ucp[0];
101 	cop[IPOPT_OLEN+2] = ucp[1];
102 	cop[IPOPT_OLEN+3] = ucp[2];
103 	cop[IPOPT_OLEN+4] = ucp[3];
104 	tt1 = (struct cipso_tag_type_1 *)&cop[IPOPT_OLEN + 5];
105 	tt1->tag_type = 1;
106 	tt1->tag_align = 0;
107 	tt1->tag_sl = LCLASS(bsl);
108 	tt1->tag_length = 4 + i;
109 
110 	bcopy(&bsl->compartments.c1, tt1->tag_cat, i);
111 
112 	return (cop[IPOPT_OLEN]);
113 }
114 
115 /*
116  * The following routine copies a datagram's option into the specified buffer
117  * (if buffer pointer is non-null), or returns a pointer to the label within
118  * the streams message (if buffer is null).  In both cases, tsol_get_option
119  * returns the option's type.
120  *
121  * tsol_get_option assumes that the specified buffer is large enough to
122  * hold the largest valid CIPSO option.  Since the total number of
123  * IP header options cannot exceed 40 bytes, a 40 byte buffer is a good choice.
124  */
125 
126 tsol_ip_label_t
127 tsol_get_option(mblk_t *mp, uchar_t **buffer)
128 {
129 	ipha_t	*ipha;
130 	uchar_t	*opt;
131 	uint32_t	totallen;
132 	uint32_t	optval;
133 	uint32_t	optlen;
134 
135 	ipha = (ipha_t *)mp->b_rptr;
136 
137 	/*
138 	 * Get length (in 4 byte octets) of IP header options.
139 	 * If header doesn't contain options, then return OPT_NONE.
140 	 */
141 	totallen = ipha->ipha_version_and_hdr_length -
142 	    (uint8_t)((IP_VERSION << 4) + IP_SIMPLE_HDR_LENGTH_IN_WORDS);
143 
144 	if (totallen == 0)
145 		return (OPT_NONE);
146 
147 	totallen <<= 2;
148 
149 	/*
150 	 * Search for CIPSO option.
151 	 * If no such option is present, then return OPT_NONE.
152 	 */
153 	opt = (uchar_t *)&ipha[1];
154 	while (totallen != 0) {
155 		switch (optval = opt[IPOPT_OPTVAL]) {
156 		case IPOPT_EOL:
157 			return (OPT_NONE);
158 		case IPOPT_NOP:
159 			optlen = 1;
160 			break;
161 		default:
162 			if (totallen <= IPOPT_OLEN)
163 				return (OPT_NONE);
164 			optlen = opt[IPOPT_OLEN];
165 			if (optlen < 2)
166 				return (OPT_NONE);
167 		}
168 		if (optlen > totallen)
169 			return (OPT_NONE);
170 		/*
171 		 * Copy pointer to option into '*buffer' and
172 		 * return the option type.
173 		 */
174 		switch (optval) {
175 		case IPOPT_COMSEC:
176 			*buffer = opt;
177 			if (TSOL_CIPSO_TAG_OFFSET < optlen &&
178 			    opt[TSOL_CIPSO_TAG_OFFSET] == 1)
179 				return (OPT_CIPSO);
180 			return (OPT_NONE);
181 		}
182 		totallen -= optlen;
183 		opt += optlen;
184 	}
185 	return (OPT_NONE);
186 }
187 
188 /*
189  * tsol_check_dest()
190  *
191  * This routine verifies if a destination is allowed to recieve messages
192  * based on the message cred's security label. If any adjustments to
193  * the cred are needed due to the connection's MAC-exempt status or
194  * the destination's ability to receive labels, an "effective cred"
195  * will be returned.
196  *
197  * On successful return, effective_cred will point to the new creds needed
198  * or will be NULL if new creds aren't needed. On error, effective_cred
199  * is NULL.
200  *
201  * Returns:
202  *	0		Have or constructed appropriate credentials
203  *	EHOSTUNREACH	The credentials failed the remote host accreditation
204  *      ENOMEM		Memory allocation failure
205  */
206 int
207 tsol_check_dest(const cred_t *credp, const void *dst, uchar_t version,
208     boolean_t mac_exempt, cred_t **effective_cred)
209 {
210 	ts_label_t	*tsl, *newtsl = NULL;
211 	tsol_tpc_t	*dst_rhtp;
212 	zoneid_t	zoneid;
213 
214 	*effective_cred = NULL;
215 	ASSERT(version == IPV4_VERSION ||
216 	    (version == IPV6_VERSION &&
217 	    !IN6_IS_ADDR_V4MAPPED((in6_addr_t *)dst)));
218 
219 	/* Always pass kernel level communication (NULL label) */
220 	if ((tsl = crgetlabel(credp)) == NULL) {
221 		DTRACE_PROBE2(tx__tnopt__log__info__labeling__mac__allownull,
222 		    char *, "destination ip(1) with null cred was passed",
223 		    ipaddr_t, dst);
224 		return (0);
225 	}
226 
227 	/* Always pass multicast */
228 	if (version == IPV4_VERSION &&
229 	    CLASSD(*(ipaddr_t *)dst)) {
230 		DTRACE_PROBE2(tx__tnopt__log__info__labeling__mac__allowmult,
231 		    char *, "destination ip(1) with multicast dest was passed",
232 		    ipaddr_t, dst);
233 		return (0);
234 	} else if (version == IPV6_VERSION &&
235 	    IN6_IS_ADDR_MULTICAST((in6_addr_t *)dst)) {
236 		DTRACE_PROBE2(tx__tnopt__log__info__labeling__mac__allowmult_v6,
237 		    char *, "destination ip(1) with multicast dest was passed",
238 		    in6_addr_t *, dst);
239 		return (0);
240 	}
241 
242 	/* Never pass an undefined destination */
243 	if ((dst_rhtp = find_tpc(dst, version, B_FALSE)) == NULL) {
244 		DTRACE_PROBE2(tx__tnopt__log__info__labeling__lookupdst,
245 		    char *, "destination ip(1) not in tn database.",
246 		    void *, dst);
247 		return (EHOSTUNREACH);
248 	}
249 
250 	switch (dst_rhtp->tpc_tp.host_type) {
251 	case UNLABELED:
252 		/*
253 		 * Can talk to unlabeled hosts if
254 		 * (1) zone's label matches the default label, or
255 		 * (2) SO_MAC_EXEMPT is on and we dominate the peer's label
256 		 * (3) SO_MAC_EXEMPT is on and this is the global zone
257 		 */
258 		if (dst_rhtp->tpc_tp.tp_doi != tsl->tsl_doi) {
259 			DTRACE_PROBE4(tx__tnopt__log__info__labeling__doi,
260 			    char *, "unlabeled dest ip(1)/tpc(2) doi does "
261 			    "not match msg label(3) doi.", void *, dst,
262 			    tsol_tpc_t *, dst_rhtp, ts_label_t *, tsl);
263 			TPC_RELE(dst_rhtp);
264 			return (EHOSTUNREACH);
265 		}
266 		if (!blequal(&dst_rhtp->tpc_tp.tp_def_label,
267 		    &tsl->tsl_label)) {
268 			zoneid = crgetzoneid(credp);
269 			if (!mac_exempt ||
270 			    !(zoneid == GLOBAL_ZONEID ||
271 			    bldominates(&tsl->tsl_label,
272 			    &dst_rhtp->tpc_tp.tp_def_label))) {
273 				DTRACE_PROBE4(
274 				    tx__tnopt__log__info__labeling__mac,
275 				    char *, "unlabeled dest ip(1)/tpc(2) does "
276 				    "not match msg label(3).", void *, dst,
277 				    tsol_tpc_t *, dst_rhtp, ts_label_t *, tsl);
278 				TPC_RELE(dst_rhtp);
279 				return (EHOSTUNREACH);
280 			}
281 			/*
282 			 * This is a downlabel MAC-exempt exchange.
283 			 * Use the remote destination's default label
284 			 * as the label of the message data.
285 			 */
286 			if ((newtsl = labelalloc(&dst_rhtp->tpc_tp.tp_def_label,
287 			    dst_rhtp->tpc_tp.tp_doi, KM_NOSLEEP)) == NULL) {
288 				TPC_RELE(dst_rhtp);
289 				return (ENOMEM);
290 			}
291 			newtsl->tsl_flags |= TSLF_UNLABELED;
292 
293 		} else if (!(tsl->tsl_flags & TSLF_UNLABELED)) {
294 			/*
295 			 * The security labels are the same but we need
296 			 * to flag that the remote node is unlabeled.
297 			 */
298 			if ((newtsl = labeldup(tsl, KM_NOSLEEP)) == NULL) {
299 				TPC_RELE(dst_rhtp);
300 				return (ENOMEM);
301 			}
302 			newtsl->tsl_flags |= TSLF_UNLABELED;
303 		}
304 		break;
305 
306 	case SUN_CIPSO:
307 		/*
308 		 * Can talk to labeled hosts if zone's label is within target's
309 		 * label range or set.
310 		 */
311 		if (dst_rhtp->tpc_tp.tp_cipso_doi_cipso != tsl->tsl_doi ||
312 		    (!_blinrange(&tsl->tsl_label,
313 		    &dst_rhtp->tpc_tp.tp_sl_range_cipso) &&
314 		    !blinlset(&tsl->tsl_label,
315 		    dst_rhtp->tpc_tp.tp_sl_set_cipso))) {
316 			DTRACE_PROBE4(tx__tnopt__log__info__labeling__mac,
317 			    char *, "labeled dest ip(1)/tpc(2) does not "
318 			    "match msg label(3).", void *, dst,
319 			    tsol_tpc_t *, dst_rhtp, ts_label_t *, tsl);
320 			TPC_RELE(dst_rhtp);
321 			return (EHOSTUNREACH);
322 		}
323 		if (tsl->tsl_flags & TSLF_UNLABELED) {
324 			/*
325 			 * The security label is a match but we need to
326 			 * clear the unlabeled flag for this remote node.
327 			 */
328 			if ((newtsl = labeldup(tsl, KM_NOSLEEP)) == NULL) {
329 				TPC_RELE(dst_rhtp);
330 				return (ENOMEM);
331 			}
332 			newtsl->tsl_flags ^= TSLF_UNLABELED;
333 		}
334 		break;
335 
336 	default:
337 		TPC_RELE(dst_rhtp);
338 		return (EHOSTUNREACH);
339 	}
340 
341 	/*
342 	 * Generate a new cred if we modified the security label or
343 	 * label flags.
344 	 */
345 	if (newtsl != NULL) {
346 		*effective_cred = copycred_from_tslabel(credp,
347 		    newtsl, KM_NOSLEEP);
348 		label_rele(newtsl);
349 		if (*effective_cred == NULL) {
350 			TPC_RELE(dst_rhtp);
351 			return (ENOMEM);
352 		}
353 	}
354 	TPC_RELE(dst_rhtp);
355 	return (0);
356 }
357 
358 /*
359  * tsol_compute_label()
360  *
361  * This routine computes the IP label that should be on a packet based on the
362  * connection and destination information.
363  *
364  * Returns:
365  *      0		Fetched label
366  *	EHOSTUNREACH	No route to destination
367  *	EINVAL		Label cannot be computed
368  */
369 int
370 tsol_compute_label(const cred_t *credp, ipaddr_t dst, uchar_t *opt_storage,
371     ip_stack_t *ipst)
372 {
373 	uint_t		sec_opt_len;
374 	ts_label_t	*tsl;
375 	ire_t		*ire, *sire = NULL;
376 	tsol_ire_gw_secattr_t *attrp;
377 	zoneid_t	zoneid, ip_zoneid;
378 
379 	ASSERT(credp != NULL);
380 
381 	if (opt_storage != NULL)
382 		opt_storage[IPOPT_OLEN] = 0;
383 
384 	if ((tsl = crgetlabel(credp)) == NULL)
385 		return (0);
386 
387 	/* always pass multicast */
388 	if (CLASSD(dst))
389 		return (0);
390 
391 	if (tsl->tsl_flags & TSLF_UNLABELED) {
392 
393 		/*
394 		 * The destination is unlabeled. Only add a label if the
395 		 * destination is not a broadcast/local/loopback address,
396 		 * the destination is not on the same subnet, and the
397 		 * next-hop gateway is labeled.
398 		 *
399 		 * For exclusive stacks we set the zoneid to zero
400 		 * to operate as if we are in the global zone for
401 		 * IRE lookups.
402 		 */
403 		zoneid = crgetzoneid(credp);
404 		if (ipst->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID)
405 			ip_zoneid = GLOBAL_ZONEID;
406 		else
407 			ip_zoneid = zoneid;
408 
409 		ire = ire_cache_lookup(dst, ip_zoneid, tsl, ipst);
410 
411 		if (ire != NULL && (ire->ire_type & (IRE_BROADCAST | IRE_LOCAL |
412 		    IRE_LOOPBACK | IRE_INTERFACE)) != 0) {
413 			IRE_REFRELE(ire);
414 			return (0);
415 		} else if (ire == NULL) {
416 			ire = ire_ftable_lookup(dst, 0, 0, 0, NULL, &sire,
417 			    ip_zoneid, 0, tsl, (MATCH_IRE_RECURSIVE |
418 			    MATCH_IRE_DEFAULT | MATCH_IRE_SECATTR), ipst);
419 		}
420 
421 		/* no route to destination */
422 		if (ire == NULL) {
423 			DTRACE_PROBE3(
424 			    tx__tnopt__log__info__labeling__routedst__v4,
425 			    char *, "No route to unlabeled dest ip(1) with "
426 			    "creds(2).", ipaddr_t, dst, cred_t *, credp);
427 			return (EHOSTUNREACH);
428 		}
429 
430 		/*
431 		 * Prefix IRE from f-table lookup means that the destination
432 		 * is not directly connected; check the next-hop attributes.
433 		 */
434 		if (sire != NULL) {
435 			ASSERT(ire != NULL);
436 			IRE_REFRELE(ire);
437 			ire = sire;
438 		}
439 
440 		/*
441 		 * Return now if next hop gateway is unlabeled. There is
442 		 * no need to generate a CIPSO option for this message.
443 		 */
444 		attrp = ire->ire_gw_secattr;
445 		if (attrp == NULL || attrp->igsa_rhc == NULL ||
446 		    attrp->igsa_rhc->rhc_tpc->tpc_tp.host_type == UNLABELED) {
447 			IRE_REFRELE(ire);
448 			return (0);
449 		}
450 
451 		IRE_REFRELE(ire);
452 
453 	}
454 
455 	/* compute the CIPSO option */
456 	sec_opt_len = tsol2cipso_tt1(&tsl->tsl_label, opt_storage,
457 	    tsl->tsl_doi);
458 
459 	if (sec_opt_len == 0) {
460 		DTRACE_PROBE3(tx__tnopt__log__error__labeling__lostops__v4,
461 		    char *, "options lack length for dest ip(1) with creds(2).",
462 		    ipaddr_t, dst, cred_t *, credp);
463 		return (EINVAL);
464 	}
465 
466 	return (0);
467 }
468 
469 /*
470  * Remove any existing security option (CIPSO) from the given IP
471  * header, move the 'buflen' bytes back to fill the gap, and return the number
472  * of bytes removed (as zero or negative number).  Assumes that the headers are
473  * sane.
474  */
475 int
476 tsol_remove_secopt(ipha_t *ipha, int buflen)
477 {
478 	int remlen, olen, oval, delta;
479 	uchar_t *fptr, *tptr;
480 	boolean_t noop_keep;
481 
482 	remlen = IPH_HDR_LENGTH(ipha) - IP_SIMPLE_HDR_LENGTH;
483 	fptr = tptr = (uchar_t *)(ipha + 1);
484 	noop_keep = B_TRUE;
485 	while (remlen > 0) {
486 		oval = fptr[IPOPT_OPTVAL];
487 
488 		/* terminate on end of list */
489 		if (oval == IPOPT_EOL)
490 			break;
491 
492 		/*
493 		 * Delete any no-ops following a deleted option, at least up
494 		 * to a 4 octet alignment; copy others.
495 		 */
496 		if (oval == IPOPT_NOP) {
497 			if (((fptr - (uchar_t *)ipha) & 3) == 0)
498 				noop_keep = B_TRUE;
499 			if (noop_keep)
500 				*tptr++ = oval;
501 			fptr++;
502 			remlen--;
503 			continue;
504 		}
505 
506 		/* stop on corrupted list; just do nothing. */
507 		if (remlen < 2)
508 			return (0);
509 		olen = fptr[IPOPT_OLEN];
510 		if (olen < 2 || olen > remlen)
511 			return (0);
512 
513 		/* skip over security options to delete them */
514 		if (oval == IPOPT_COMSEC || oval == IPOPT_SECURITY) {
515 			noop_keep = B_FALSE;
516 			fptr += olen;
517 			remlen -= olen;
518 			continue;
519 		}
520 
521 		/* copy the rest */
522 		noop_keep = B_TRUE;
523 		if (tptr != fptr)
524 			ovbcopy(fptr, tptr, olen);
525 		fptr += olen;
526 		tptr += olen;
527 		remlen -= olen;
528 	}
529 
530 	fptr += remlen;
531 
532 	/* figure how much padding we'll need for header alignment */
533 	olen = (tptr - (uchar_t *)ipha) & 3;
534 	if (olen > 0) {
535 		olen = 4 - olen;
536 		/* pad with end-of-list */
537 		bzero(tptr, olen);
538 		tptr += olen;
539 	}
540 
541 	/* slide back the headers that follow and update the IP header */
542 	delta = fptr - tptr;
543 	if (delta != 0) {
544 		ovbcopy(fptr, tptr, ((uchar_t *)ipha + buflen) - fptr);
545 		ipha->ipha_version_and_hdr_length -= delta / 4;
546 	}
547 	return (-delta);
548 }
549 
550 /*
551  * Insert the option in 'optbuf' into the IP header pointed to by 'ipha', and
552  * move the data following the IP header (up to buflen) to accomodate the new
553  * option.  Assumes that up to IP_MAX_OPT_LENGTH bytes are available (in total)
554  * for IP options.  Returns the number of bytes actually inserted, or -1 if the
555  * option cannot be inserted.  (Note that negative return values are possible
556  * when noops must be compressed, and that only -1 indicates error.  Successful
557  * return value is always evenly divisible by 4, by definition.)
558  */
559 int
560 tsol_prepend_option(uchar_t *optbuf, ipha_t *ipha, int buflen)
561 {
562 	int remlen, padding, lastpad, totlen;
563 	int oval, olen;
564 	int delta;
565 	uchar_t *optr;
566 	uchar_t tempopt[IP_MAX_OPT_LENGTH], *toptr;
567 
568 	if (optbuf[IPOPT_OPTVAL] == IPOPT_EOL ||
569 	    optbuf[IPOPT_OPTVAL] == IPOPT_NOP ||
570 	    optbuf[IPOPT_OLEN] == 0)
571 		return (0);
572 
573 	ASSERT(optbuf[IPOPT_OLEN] >= 2 &&
574 	    optbuf[IPOPT_OLEN] <= IP_MAX_OPT_LENGTH);
575 
576 	/* first find the real (unpadded) length of the existing options */
577 	remlen = IPH_HDR_LENGTH(ipha) - IP_SIMPLE_HDR_LENGTH;
578 	padding = totlen = lastpad = 0;
579 	optr = (uchar_t *)(ipha + 1);
580 	while (remlen > 0) {
581 		oval = optr[IPOPT_OPTVAL];
582 
583 		/* stop at end of list */
584 		if (oval == IPOPT_EOL)
585 			break;
586 
587 		/* skip no-ops, noting that length byte isn't present */
588 		if (oval == IPOPT_NOP) {
589 			optr++;
590 			padding++;
591 			lastpad++;
592 			totlen++;
593 			remlen--;
594 			continue;
595 		}
596 
597 		/* give up on a corrupted list; report failure */
598 		if (remlen < 2)
599 			return (-1);
600 		olen = optr[IPOPT_OLEN];
601 		if (olen < 2 || olen > remlen)
602 			return (-1);
603 
604 		lastpad = 0;
605 		optr += olen;
606 		totlen += olen;
607 		remlen -= olen;
608 	}
609 
610 	/* completely ignore any trailing padding */
611 	totlen -= lastpad;
612 	padding -= lastpad;
613 
614 	/*
615 	 * If some sort of inter-option alignment was present, try to preserve
616 	 * that alignment.  If alignment pushes us out past the maximum, then
617 	 * discard it and try to compress to fit.  (We just "assume" that any
618 	 * padding added was attempting to get 32 bit alignment.  If that's
619 	 * wrong, that's just too bad.)
620 	 */
621 	if (padding > 0) {
622 		olen = (optbuf[IPOPT_OLEN] + 3) & ~3;
623 		if (olen + totlen > IP_MAX_OPT_LENGTH) {
624 			totlen -= padding;
625 			if (olen + totlen > IP_MAX_OPT_LENGTH)
626 				return (-1);
627 			padding = 0;
628 		}
629 	}
630 
631 	/*
632 	 * Since we may need to compress or expand the option list, we write to
633 	 * a temporary buffer and then copy the results back to the IP header.
634 	 */
635 	toptr = tempopt;
636 
637 	/* compute actual option to insert */
638 	olen = optbuf[IPOPT_OLEN];
639 	bcopy(optbuf, toptr, olen);
640 	toptr += olen;
641 	if (padding > 0) {
642 		while ((olen & 3) != 0) {
643 			*toptr++ = IPOPT_NOP;
644 			olen++;
645 		}
646 	}
647 
648 	/* copy over the existing options */
649 	optr = (uchar_t *)(ipha + 1);
650 	while (totlen > 0) {
651 		oval = optr[IPOPT_OPTVAL];
652 
653 		/* totlen doesn't include end-of-list marker */
654 		ASSERT(oval != IPOPT_EOL);
655 
656 		/* handle no-ops; copy if desired, ignore otherwise */
657 		if (oval == IPOPT_NOP) {
658 			if (padding > 0) {
659 				/* note: cannot overflow due to checks above */
660 				ASSERT(toptr < tempopt + IP_MAX_OPT_LENGTH);
661 				*toptr++ = oval;
662 			}
663 			optr++;
664 			totlen--;
665 			continue;
666 		}
667 
668 		/* list cannot be corrupt at this point */
669 		ASSERT(totlen >= 2);
670 		olen = optr[IPOPT_OLEN];
671 		ASSERT(olen >= 2 && olen <= totlen);
672 
673 		/* cannot run out of room due to tests above */
674 		ASSERT(toptr + olen <= tempopt + IP_MAX_OPT_LENGTH);
675 
676 		bcopy(optr, toptr, olen);
677 		optr += olen;
678 		toptr += olen;
679 		totlen -= olen;
680 	}
681 
682 	/* figure how much padding we'll need for header alignment */
683 	olen = (toptr - tempopt) & 3;
684 	if (olen > 0) {
685 		olen = 4 - olen;
686 		ASSERT(toptr + olen <= tempopt + IP_MAX_OPT_LENGTH);
687 		/* pad with end-of-list value */
688 		bzero(toptr, olen);
689 		toptr += olen;
690 	}
691 
692 	/* move the headers as needed and update IP header */
693 	olen = (toptr - tempopt) + IP_SIMPLE_HDR_LENGTH;
694 	remlen = IPH_HDR_LENGTH(ipha);
695 	delta = olen - remlen;
696 	if (delta != 0) {
697 		ovbcopy((uchar_t *)ipha + remlen, (uchar_t *)ipha + olen,
698 		    buflen - remlen);
699 		ipha->ipha_version_and_hdr_length += delta / 4;
700 	}
701 
702 	/* slap in the new options */
703 	bcopy(tempopt, ipha + 1, olen - IP_SIMPLE_HDR_LENGTH);
704 
705 	return (delta);
706 }
707 
708 /*
709  * tsol_check_label()
710  *
711  * This routine computes the IP label that should be on the packet based on the
712  * connection and destination information.  If the label is there, it returns
713  * zero, so the caller knows that the label is syncronized, and further calls
714  * are not required.  If the label isn't right, then the right one is inserted.
715  *
716  * The packet's header is clear before entering IPsec's engine.
717  *
718  * Returns:
719  *      0		Label on packet (was|is now) correct
720  *      EACCES		The packet failed the remote host accreditation.
721  *      ENOMEM		Memory allocation failure.
722  *	EINVAL		Label cannot be computed
723  */
724 int
725 tsol_check_label(const cred_t *credp, mblk_t **mpp, boolean_t isexempt,
726     ip_stack_t *ipst, pid_t pid)
727 {
728 	mblk_t *mp = *mpp;
729 	ipha_t  *ipha;
730 	cred_t *effective_cred = NULL;
731 	uchar_t opt_storage[IP_MAX_OPT_LENGTH];
732 	uint_t hlen;
733 	uint_t sec_opt_len;
734 	uchar_t *optr;
735 	int delta_remove = 0, delta_add, adjust;
736 	int retv;
737 
738 	opt_storage[IPOPT_OPTVAL] = 0;
739 
740 	ipha = (ipha_t *)mp->b_rptr;
741 
742 	/*
743 	 * Verify the destination is allowed to receive packets at
744 	 * the security label of the message data. check_dest()
745 	 * may create a new effective cred with a modified label
746 	 * or label flags. Apply any such cred to the message block
747 	 * for use in future routing decisions.
748 	 */
749 	retv = tsol_check_dest(credp, &ipha->ipha_dst, IPV4_VERSION,
750 	    isexempt, &effective_cred);
751 	if (retv != 0)
752 		return (retv);
753 
754 	/*
755 	 * Calculate the security label to be placed in the text
756 	 * of the message (if any).
757 	 */
758 	if (effective_cred != NULL) {
759 		if ((retv = tsol_compute_label(effective_cred,
760 		    ipha->ipha_dst, opt_storage, ipst)) != 0) {
761 			crfree(effective_cred);
762 			return (retv);
763 		}
764 		mblk_setcred(mp, effective_cred, pid);
765 		crfree(effective_cred);
766 	} else {
767 		if ((retv = tsol_compute_label(credp,
768 		    ipha->ipha_dst, opt_storage, ipst)) != 0) {
769 			return (retv);
770 		}
771 	}
772 
773 	optr = (uchar_t *)(ipha + 1);
774 	hlen = IPH_HDR_LENGTH(ipha) - IP_SIMPLE_HDR_LENGTH;
775 	sec_opt_len = opt_storage[IPOPT_OLEN];
776 
777 	if (hlen >= sec_opt_len) {
778 		/* If no option is supposed to be there, make sure it's not */
779 		if (sec_opt_len == 0 && hlen > 0 &&
780 		    optr[IPOPT_OPTVAL] != IPOPT_COMSEC &&
781 		    optr[IPOPT_OPTVAL] != IPOPT_SECURITY)
782 			return (0);
783 		/* if the option is there, it's always first */
784 		if (sec_opt_len != 0 &&
785 		    bcmp(opt_storage, optr, sec_opt_len) == 0)
786 			return (0);
787 	}
788 
789 	/*
790 	 * If there is an option there, then it must be the wrong one; delete.
791 	 */
792 	if (hlen > 0) {
793 		delta_remove = tsol_remove_secopt(ipha, MBLKL(mp));
794 		mp->b_wptr += delta_remove;
795 	}
796 
797 	/* Make sure we have room for the worst-case addition */
798 	hlen = IPH_HDR_LENGTH(ipha) + opt_storage[IPOPT_OLEN];
799 	hlen = (hlen + 3) & ~3;
800 	if (hlen > IP_MAX_HDR_LENGTH)
801 		hlen = IP_MAX_HDR_LENGTH;
802 	hlen -= IPH_HDR_LENGTH(ipha);
803 	if (mp->b_wptr + hlen > mp->b_datap->db_lim) {
804 		int copylen;
805 		mblk_t *new_mp;
806 
807 		/* allocate enough to be meaningful, but not *too* much */
808 		copylen = MBLKL(mp);
809 		if (copylen > 256)
810 			copylen = 256;
811 		new_mp = allocb_tmpl(hlen + copylen +
812 		    (mp->b_rptr - mp->b_datap->db_base), mp);
813 		if (new_mp == NULL)
814 			return (ENOMEM);
815 
816 		/* keep the bias */
817 		new_mp->b_rptr += mp->b_rptr - mp->b_datap->db_base;
818 		new_mp->b_wptr = new_mp->b_rptr + copylen;
819 		bcopy(mp->b_rptr, new_mp->b_rptr, copylen);
820 		new_mp->b_cont = mp;
821 		if ((mp->b_rptr += copylen) >= mp->b_wptr) {
822 			new_mp->b_cont = mp->b_cont;
823 			freeb(mp);
824 		}
825 		*mpp = mp = new_mp;
826 		ipha = (ipha_t *)mp->b_rptr;
827 	}
828 
829 	delta_add = tsol_prepend_option(opt_storage, ipha, MBLKL(mp));
830 	if (delta_add == -1)
831 		goto param_prob;
832 
833 	ASSERT((mp->b_wptr + delta_add) <= DB_LIM(mp));
834 	mp->b_wptr += delta_add;
835 
836 	adjust = delta_remove + delta_add;
837 	adjust += ntohs(ipha->ipha_length);
838 	ipha->ipha_length = htons(adjust);
839 
840 	return (0);
841 
842 param_prob:
843 	return (EINVAL);
844 }
845 
846 /*
847  * IPv6 HopOpt extension header for the label option layout:
848  *	- One octet giving the type of the 'next extension header'
849  *	- Header extension length in 8-byte words, not including the
850  *	  1st 8 bytes, but including any pad bytes at the end.
851  *	  Eg. A value of 2 means 16 bytes not including the 1st 8 bytes.
852  *	- Followed by TLV encoded IPv6 label option. Option layout is
853  *		* One octet, IP6OPT_LS
854  *		* One octet option length in bytes of the option data following
855  *		  the length, but not including any pad bytes at the end.
856  *		* Four-octet DOI (IP6LS_DOI_V4)
857  *		* One octet suboption, IP6LS_TT_V4
858  *		* One octet suboption length in bytes of the suboption
859  *		  following the suboption length, including the suboption
860  *		  header length, but not including any pad bytes at the end.
861  *	- Pad to make the extension header a multiple of 8 bytes.
862  *
863  * This function returns the contents of 'IPv6 option structure' in the above.
864  * i.e starting from the IP6OPT_LS but not including the pad at the end.
865  * The user must prepend two octets (either padding or next header / length)
866  * and append padding out to the next 8 octet boundary.
867  */
868 int
869 tsol_compute_label_v6(const cred_t *credp, const in6_addr_t *dst,
870     uchar_t *opt_storage, ip_stack_t *ipst)
871 {
872 	ts_label_t	*tsl;
873 	uint_t		sec_opt_len;
874 	uint32_t	doi;
875 	zoneid_t	zoneid, ip_zoneid;
876 	ire_t		*ire, *sire;
877 	tsol_ire_gw_secattr_t *attrp;
878 
879 	ASSERT(credp != NULL);
880 
881 	if (ip6opt_ls == 0)
882 		return (EINVAL);
883 
884 	if (opt_storage != NULL)
885 		opt_storage[IPOPT_OLEN] = 0;
886 
887 	if ((tsl = crgetlabel(credp)) == NULL)
888 		return (0);
889 
890 	/* Always pass multicast */
891 	if (IN6_IS_ADDR_MULTICAST(dst))
892 		return (0);
893 
894 	zoneid = crgetzoneid(credp);
895 
896 	/*
897 	 * Fill in a V6 label.  If a new format is added here, make certain
898 	 * that the maximum size of this label is reflected in sys/tsol/tnet.h
899 	 * as TSOL_MAX_IPV6_OPTION.
900 	 */
901 	if (tsl->tsl_flags & TSLF_UNLABELED) {
902 		/*
903 		 * The destination is unlabeled. Only add a label if the
904 		 * destination is not broadcast/local/loopback address,
905 		 * the destination is not on the same subnet, and the
906 		 * next-hop gateway is labeled.
907 		 *
908 		 * For exclusive stacks we set the zoneid to zero to
909 		 * operate as if we are in the global zone when
910 		 * performing IRE lookups and conn_t comparisons.
911 		 */
912 		if (ipst->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID)
913 			ip_zoneid = GLOBAL_ZONEID;
914 		else
915 			ip_zoneid = zoneid;
916 
917 		sire = NULL;
918 		ire = ire_cache_lookup_v6(dst, ip_zoneid, tsl, ipst);
919 
920 		if (ire != NULL && (ire->ire_type & (IRE_LOCAL |
921 		    IRE_LOOPBACK | IRE_INTERFACE)) != 0) {
922 			IRE_REFRELE(ire);
923 			return (0);
924 		} else if (ire == NULL) {
925 			ire = ire_ftable_lookup_v6(dst, NULL, NULL, 0, NULL,
926 			    &sire, ip_zoneid, 0, tsl, (MATCH_IRE_RECURSIVE |
927 			    MATCH_IRE_DEFAULT | MATCH_IRE_SECATTR), ipst);
928 		}
929 
930 		/* no route to destination */
931 		if (ire == NULL) {
932 			DTRACE_PROBE3(
933 			    tx__tnopt__log__info__labeling__routedst__v6,
934 			    char *, "No route to unlabeled dest ip6(1) with "
935 			    "creds(2).", in6_addr_t *, dst, cred_t *, credp);
936 			return (EHOSTUNREACH);
937 		}
938 
939 		/*
940 		 * Prefix IRE from f-table lookup means that the destination
941 		 * is not directly connected; check the next-hop attributes.
942 		 */
943 		if (sire != NULL) {
944 			ASSERT(ire != NULL);
945 			IRE_REFRELE(ire);
946 			ire = sire;
947 		}
948 
949 		/*
950 		 * Return now if next hop gateway is unlabeled. There is
951 		 * no need to generate a CIPSO option for this message.
952 		 */
953 		attrp = ire->ire_gw_secattr;
954 		if (attrp == NULL || attrp->igsa_rhc == NULL ||
955 		    attrp->igsa_rhc->rhc_tpc->tpc_tp.host_type == UNLABELED) {
956 			IRE_REFRELE(ire);
957 			return (0);
958 		}
959 		IRE_REFRELE(ire);
960 	}
961 
962 	/* compute the CIPSO option */
963 	if (opt_storage != NULL)
964 		opt_storage += 8;
965 	sec_opt_len = tsol2cipso_tt1(&tsl->tsl_label, opt_storage,
966 	    tsl->tsl_doi);
967 
968 	if (sec_opt_len == 0) {
969 		DTRACE_PROBE3(tx__tnopt__log__error__labeling__lostops__v6,
970 		    char *, "options lack length for dest ip6(1) with "
971 		    "creds(2).", in6_addr_t *, dst, cred_t *, credp);
972 		return (EINVAL);
973 	}
974 
975 	if (opt_storage == NULL)
976 		return (0);
977 
978 	if (sec_opt_len < IP_MAX_OPT_LENGTH)
979 		opt_storage[sec_opt_len] = IPOPT_EOL;
980 
981 	/*
982 	 * Just in case the option length is odd, round it up to the next even
983 	 * multiple.  The IPv6 option definition doesn't like odd numbers for
984 	 * some reason.
985 	 *
986 	 * Length in the overall option header (IP6OPT_LS) does not include the
987 	 * option header itself, but the length in the suboption does include
988 	 * the suboption header.  Thus, when there's just one suboption, the
989 	 * length in the option header is the suboption length plus 4 (for the
990 	 * DOI value).
991 	 */
992 	opt_storage[-2] = IP6LS_TT_V4;
993 	opt_storage[-1] = (sec_opt_len + 2 + 1) & ~1;
994 	opt_storage[-8] = ip6opt_ls;
995 	opt_storage[-7] = opt_storage[-1] + 4;
996 	doi = htons(IP6LS_DOI_V4);
997 	bcopy(&doi, opt_storage - 6, 4);
998 
999 	return (0);
1000 }
1001 
1002 /*
1003  * Locate the start of the IP6OPT_LS label option and return it.
1004  * Also return the start of the next non-pad option in after_secoptp.
1005  * Usually the label option is the first option at least when packets
1006  * are generated, but for generality we don't assume that on received packets.
1007  */
1008 uchar_t *
1009 tsol_find_secopt_v6(
1010     const uchar_t *ip6hbh,	/* Start of the hop-by-hop extension header */
1011     uint_t hbhlen,		/* Length of the hop-by-hop extension header */
1012     uchar_t **after_secoptp,	/* Non-pad option following the label option */
1013     boolean_t *hbh_needed)	/* Is hop-by-hop hdr needed w/o label */
1014 {
1015 	uint_t	optlen;
1016 	uint_t	optused;
1017 	const uchar_t *optptr;
1018 	uchar_t	opt_type;
1019 	const uchar_t *secopt = NULL;
1020 
1021 	*hbh_needed = B_FALSE;
1022 	*after_secoptp = NULL;
1023 	optlen = hbhlen - 2;
1024 	optptr = ip6hbh + 2;
1025 	while (optlen != 0) {
1026 		opt_type = *optptr;
1027 		if (opt_type == IP6OPT_PAD1) {
1028 			optptr++;
1029 			optlen--;
1030 			continue;
1031 		}
1032 		if (optlen == 1)
1033 			break;
1034 		optused = 2 + optptr[1];
1035 		if (optused > optlen)
1036 			break;
1037 		/*
1038 		 * if we get here, ip6opt_ls can
1039 		 * not be 0 because it will always
1040 		 * match the IP6OPT_PAD1 above.
1041 		 * Therefore ip6opt_ls == 0 forces
1042 		 * this test to always fail here.
1043 		 */
1044 		if (opt_type == ip6opt_ls)
1045 			secopt = optptr;
1046 		else switch (opt_type) {
1047 		case IP6OPT_PADN:
1048 			break;
1049 		default:
1050 			/*
1051 			 * There is at least 1 option other than
1052 			 * the label option. So the hop-by-hop header is needed
1053 			 */
1054 			*hbh_needed = B_TRUE;
1055 			if (secopt != NULL) {
1056 				*after_secoptp = (uchar_t *)optptr;
1057 				return ((uchar_t *)secopt);
1058 			}
1059 			break;
1060 		}
1061 		optlen -= optused;
1062 		optptr += optused;
1063 	}
1064 	return ((uchar_t *)secopt);
1065 }
1066 
1067 /*
1068  * Remove the label option from the hop-by-hop options header if it exists.
1069  * 'buflen' is the total length of the packet typically b_wptr - b_rptr.
1070  * Header and data following the label option that is deleted are copied
1071  * (i.e. slid backward) to the right position, and returns the number
1072  * of bytes removed (as zero or negative number.)
1073  */
1074 int
1075 tsol_remove_secopt_v6(ip6_t *ip6h, int buflen)
1076 {
1077 	uchar_t	*ip6hbh;	/* hop-by-hop header */
1078 	uint_t	hbhlen;		/* hop-by-hop extension header length */
1079 	uchar_t *secopt = NULL;
1080 	uchar_t *after_secopt;
1081 	uint_t	pad;
1082 	uint_t	delta;
1083 	boolean_t hbh_needed;
1084 
1085 	/*
1086 	 * hop-by-hop extension header must appear first, if it does not
1087 	 * exist, there is no label option.
1088 	 */
1089 	if (ip6h->ip6_nxt != IPPROTO_HOPOPTS)
1090 		return (0);
1091 
1092 	ip6hbh = (uchar_t *)&ip6h[1];
1093 	hbhlen = (ip6hbh[1] + 1) << 3;
1094 	/*
1095 	 * Locate the start of the label option if it exists and the end
1096 	 * of the label option including pads if any.
1097 	 */
1098 	secopt = tsol_find_secopt_v6(ip6hbh, hbhlen, &after_secopt,
1099 	    &hbh_needed);
1100 	if (secopt == NULL)
1101 		return (0);
1102 	if (!hbh_needed) {
1103 		uchar_t	next_hdr;
1104 		/*
1105 		 * The label option was the only option in the hop-by-hop
1106 		 * header. We don't need the hop-by-hop header itself any
1107 		 * longer.
1108 		 */
1109 		next_hdr = ip6hbh[0];
1110 		ovbcopy(ip6hbh + hbhlen, ip6hbh,
1111 		    buflen - (IPV6_HDR_LEN + hbhlen));
1112 		ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - hbhlen);
1113 		ip6h->ip6_nxt = next_hdr;
1114 		return (-hbhlen);
1115 	}
1116 
1117 	if (after_secopt == NULL) {
1118 		/* There is no option following the label option */
1119 		after_secopt = ip6hbh + hbhlen;
1120 	}
1121 
1122 	/*
1123 	 * After deleting the label option, we need to slide the headers
1124 	 * and data back, while still maintaining the same alignment (module 8)
1125 	 * for the other options. So we slide the headers and data back only
1126 	 * by an integral multiple of 8 bytes, and fill the remaining bytes
1127 	 * with pads.
1128 	 */
1129 	delta = after_secopt - secopt;
1130 	pad = delta % 8;
1131 	if (pad == 1) {
1132 		secopt[0] = IP6OPT_PAD1;
1133 	} else if (pad > 1) {
1134 		secopt[0] = IP6OPT_PADN;
1135 		secopt[1] = pad - 2;
1136 		if (pad > 2)
1137 			bzero(&secopt[2], pad - 2);
1138 	}
1139 	secopt += pad;
1140 	delta -= pad;
1141 	ovbcopy(after_secopt, secopt,
1142 	    (uchar_t *)ip6h + buflen - after_secopt);
1143 	ip6hbh[1] -= delta/8;
1144 	ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - delta);
1145 
1146 	return (-delta);
1147 }
1148 
1149 /*
1150  * 'optbuf' contains a CIPSO label embedded in an IPv6 hop-by-hop option,
1151  * starting with the IP6OPT_LS option type. The format of this hop-by-hop
1152  * option is described in the block comment above tsol_compute_label_v6.
1153  * This function prepends this hop-by-hop option before any other hop-by-hop
1154  * options in the hop-by-hop header if one already exists, else a new
1155  * hop-by-hop header is created and stuffed into the packet following
1156  * the IPv6 header. 'buflen' is the total length of the packet i.e.
1157  * b_wptr - b_rptr. The caller ensures that there is enough space for the
1158  * extra option being added. Header and data following the position where
1159  * the label option is inserted are copied (i.e. slid forward) to the right
1160  * position.
1161  */
1162 int
1163 tsol_prepend_option_v6(uchar_t *optbuf, ip6_t *ip6h, int buflen)
1164 {
1165 	/*
1166 	 * rawlen is the length of the label option in bytes, not including
1167 	 * any pads, starting from the IP6OPT_LS (option type) byte.
1168 	 */
1169 	uint_t	rawlen;
1170 
1171 	uint_t	optlen;		/* rawlen rounded to an 8 byte multiple */
1172 	uchar_t	*ip6hbh;	/* start of the hop-by-hop extension header */
1173 	uint_t	hbhlen;		/* Length of the hop-by-hop extension header */
1174 	uint_t	pad_len;
1175 	uchar_t	*pad_position;
1176 	int	delta;		/* Actual number of bytes inserted */
1177 
1178 	rawlen = optbuf[1] + 2;	/* Add 2 for the option type, option length */
1179 	ip6hbh = (uchar_t *)&ip6h[1];
1180 	if (ip6h->ip6_nxt == IPPROTO_HOPOPTS) {
1181 		/*
1182 		 * There is a hop-by-hop header present already. In order to
1183 		 * preserve the alignment of the other options at the existing
1184 		 * value (modulo 8) we need to pad the label option to a
1185 		 * multiple of 8 bytes before prepending it to the other
1186 		 * options. Slide the extension headers and data forward to
1187 		 * accomodate the label option at the start of the hop-by-hop
1188 		 * header
1189 		 */
1190 		delta = optlen = (rawlen + 7) & ~7;
1191 		pad_len = optlen - rawlen;
1192 		pad_position = ip6hbh + 2 + rawlen;
1193 		ovbcopy(ip6hbh + 2, ip6hbh + 2 + optlen,
1194 		    buflen - (IPV6_HDR_LEN + 2));
1195 		/*
1196 		 * Bump up the hop-by-hop extension header length by
1197 		 * the number of 8-byte words added
1198 		 */
1199 		optlen >>= 3;
1200 		if (ip6hbh[1] + optlen > 255)
1201 			return (-1);
1202 		ip6hbh[1] += optlen;
1203 	} else {
1204 		/*
1205 		 * There is no hop-by-hop header in the packet. Construct a
1206 		 * new Hop-by-hop extension header (a multiple of 8 bytes).
1207 		 * Slide any other extension headers and data forward to
1208 		 * accomodate this hop-by-hop header
1209 		 */
1210 		delta = hbhlen = (2 + rawlen + 7) & ~7; /* +2 for nxthdr, len */
1211 		pad_len = hbhlen - (2 + rawlen);
1212 		pad_position = ip6hbh + 2 + rawlen;
1213 		ovbcopy(ip6hbh, ip6hbh + hbhlen, buflen - IPV6_HDR_LEN);
1214 		ip6hbh[0] = ip6h->ip6_nxt;
1215 		/*
1216 		 * hop-by-hop extension header length in 8-byte words, not
1217 		 * including the 1st 8 bytes of the hop-by-hop header.
1218 		 */
1219 		ip6hbh[1] = (hbhlen >> 3) - 1;
1220 		ip6h->ip6_nxt = IPPROTO_HOPOPTS;
1221 	}
1222 	/*
1223 	 * Copy the label option into the hop-by-hop header and insert any
1224 	 * needed pads
1225 	 */
1226 	bcopy(optbuf, ip6hbh + 2, rawlen);
1227 	if (pad_len == 1) {
1228 		pad_position[0] = IP6OPT_PAD1;
1229 	} else if (pad_len > 1) {
1230 		pad_position[0] = IP6OPT_PADN;
1231 		pad_position[1] = pad_len - 2;
1232 		if (pad_len > 2)
1233 			bzero(pad_position + 2, pad_len - 2);
1234 	}
1235 	ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) + delta);
1236 	return (delta);
1237 }
1238 
1239 /*
1240  * tsol_check_label_v6()
1241  *
1242  * This routine computes the IP label that should be on the packet based on the
1243  * connection and destination information.  It's called only by the IP
1244  * forwarding logic, because all internal modules atop IP know how to generate
1245  * their own labels.
1246  *
1247  * Returns:
1248  *      0		Label on packet was already correct
1249  *      EACCES		The packet failed the remote host accreditation.
1250  *      ENOMEM		Memory allocation failure.
1251  */
1252 int
1253 tsol_check_label_v6(const cred_t *credp, mblk_t **mpp, boolean_t isexempt,
1254     ip_stack_t *ipst, pid_t pid)
1255 {
1256 	mblk_t *mp = *mpp;
1257 	ip6_t  *ip6h;
1258 	cred_t *effective_cred;
1259 	/*
1260 	 * Label option length is limited to IP_MAX_OPT_LENGTH for
1261 	 * symmetry with IPv4. Can be relaxed if needed
1262 	 */
1263 	uchar_t opt_storage[TSOL_MAX_IPV6_OPTION];
1264 	uint_t hlen;
1265 	uint_t sec_opt_len; /* label option length not including type, len */
1266 	int delta_remove = 0, delta_add;
1267 	int retv;
1268 	uchar_t	*after_secopt;
1269 	uchar_t	*secopt = NULL;
1270 	uchar_t	*ip6hbh;
1271 	uint_t	hbhlen;
1272 	boolean_t hbh_needed;
1273 
1274 	/*
1275 	 * Verify the destination is allowed to receive packets at
1276 	 * the security label of the message data. check_dest()
1277 	 * may create a new effective cred with a modified label
1278 	 * or label flags. Apply any such cred to the message block
1279 	 * for use in future routing decisions.
1280 	 */
1281 	ip6h = (ip6_t *)mp->b_rptr;
1282 	retv = tsol_check_dest(credp, &ip6h->ip6_dst, IPV6_VERSION,
1283 	    isexempt, &effective_cred);
1284 	if (retv != 0)
1285 		return (retv);
1286 
1287 	/*
1288 	 * Calculate the security label to be placed in the text
1289 	 * of the message (if any).
1290 	 */
1291 	if (effective_cred != NULL) {
1292 		if ((retv = tsol_compute_label_v6(effective_cred,
1293 		    &ip6h->ip6_dst, opt_storage, ipst)) != 0) {
1294 			crfree(effective_cred);
1295 			return (retv);
1296 		}
1297 		mblk_setcred(mp, effective_cred, pid);
1298 		crfree(effective_cred);
1299 	} else {
1300 		if ((retv = tsol_compute_label_v6(credp,
1301 		    &ip6h->ip6_dst, opt_storage, ipst)) != 0)
1302 			return (retv);
1303 	}
1304 
1305 	sec_opt_len = opt_storage[1];
1306 
1307 	if (ip6h->ip6_nxt == IPPROTO_HOPOPTS) {
1308 		ip6hbh = (uchar_t *)&ip6h[1];
1309 		hbhlen = (ip6hbh[1] + 1) << 3;
1310 		secopt = tsol_find_secopt_v6(ip6hbh, hbhlen, &after_secopt,
1311 		    &hbh_needed);
1312 	}
1313 
1314 	if (sec_opt_len == 0 && secopt == NULL) {
1315 		/*
1316 		 * The packet is not supposed to have a label, and it
1317 		 * does not have one currently
1318 		 */
1319 		return (0);
1320 	}
1321 	if (secopt != NULL && sec_opt_len != 0 &&
1322 	    (bcmp(opt_storage, secopt, sec_opt_len + 2) == 0)) {
1323 		/* The packet has the correct label already */
1324 		return (0);
1325 	}
1326 
1327 	/*
1328 	 * If there is an option there, then it must be the wrong one; delete.
1329 	 */
1330 	if (secopt != NULL) {
1331 		delta_remove = tsol_remove_secopt_v6(ip6h, MBLKL(mp));
1332 		mp->b_wptr += delta_remove;
1333 	}
1334 
1335 	/*
1336 	 * Make sure we have room for the worst-case addition. Add 2 bytes for
1337 	 * the hop-by-hop ext header's next header and length fields. Add
1338 	 * another 2 bytes for the label option type, len and then round
1339 	 * up to the next 8-byte multiple.
1340 	 */
1341 	hlen = (4 + sec_opt_len + 7) & ~7;
1342 	if (mp->b_wptr + hlen > mp->b_datap->db_lim) {
1343 		int copylen;
1344 		mblk_t *new_mp;
1345 		uint16_t hdr_len;
1346 
1347 		hdr_len = ip_hdr_length_v6(mp, ip6h);
1348 		/*
1349 		 * Allocate enough to be meaningful, but not *too* much.
1350 		 * Also all the IPv6 extension headers must be in the same mblk
1351 		 */
1352 		copylen = MBLKL(mp);
1353 		if (copylen > 256)
1354 			copylen = 256;
1355 		if (copylen < hdr_len)
1356 			copylen = hdr_len;
1357 		new_mp = allocb_tmpl(hlen + copylen +
1358 		    (mp->b_rptr - mp->b_datap->db_base), mp);
1359 		if (new_mp == NULL)
1360 			return (ENOMEM);
1361 
1362 		/* keep the bias */
1363 		new_mp->b_rptr += mp->b_rptr - mp->b_datap->db_base;
1364 		new_mp->b_wptr = new_mp->b_rptr + copylen;
1365 		bcopy(mp->b_rptr, new_mp->b_rptr, copylen);
1366 		new_mp->b_cont = mp;
1367 		if ((mp->b_rptr += copylen) >= mp->b_wptr) {
1368 			new_mp->b_cont = mp->b_cont;
1369 			freeb(mp);
1370 		}
1371 		*mpp = mp = new_mp;
1372 		ip6h = (ip6_t *)mp->b_rptr;
1373 	}
1374 
1375 	delta_add = tsol_prepend_option_v6(opt_storage, ip6h, MBLKL(mp));
1376 	if (delta_add == -1)
1377 		goto param_prob;
1378 
1379 	ASSERT(mp->b_wptr + delta_add <= DB_LIM(mp));
1380 	mp->b_wptr += delta_add;
1381 
1382 	return (0);
1383 
1384 param_prob:
1385 	return (EINVAL);
1386 }
1387 
1388 /*
1389  * Update the given IPv6 "sticky options" structure to contain the provided
1390  * label, which is encoded as an IPv6 option.  Existing label is removed if
1391  * necessary, and storage is allocated/freed/resized.
1392  *
1393  * Returns 0 on success, errno on failure.
1394  */
1395 int
1396 tsol_update_sticky(ip6_pkt_t *ipp, uint_t *labellen, const uchar_t *labelopt)
1397 {
1398 	int rawlen, optlen, newlen;
1399 	uchar_t *newopts;
1400 
1401 	/*
1402 	 * rawlen is the size of the IPv6 label to be inserted from labelopt.
1403 	 * optlen is the total length of that option, including any necessary
1404 	 * headers and padding.  newlen is the new size of the total hop-by-hop
1405 	 * options buffer, including user options.
1406 	 */
1407 	ASSERT(*labellen <= ipp->ipp_hopoptslen);
1408 	ASSERT((ipp->ipp_hopopts == NULL && ipp->ipp_hopoptslen == 0) ||
1409 	    (ipp->ipp_hopopts != NULL && ipp->ipp_hopoptslen != 0));
1410 
1411 	if ((rawlen = labelopt[1]) != 0) {
1412 		rawlen += 2;	/* add in header size */
1413 		optlen = (2 + rawlen + 7) & ~7;
1414 	} else {
1415 		optlen = 0;
1416 	}
1417 	newlen = ipp->ipp_hopoptslen + optlen - *labellen;
1418 	if (newlen == 0 && ipp->ipp_hopopts != NULL) {
1419 		/* Deleting all existing hop-by-hop options */
1420 		kmem_free(ipp->ipp_hopopts, ipp->ipp_hopoptslen);
1421 		ipp->ipp_hopopts = NULL;
1422 		ipp->ipp_fields &= ~IPPF_HOPOPTS;
1423 	} else if (optlen != *labellen) {
1424 		/* If the label not same size as last time, then reallocate */
1425 		if (newlen > IP6_MAX_OPT_LENGTH)
1426 			return (EHOSTUNREACH);
1427 		newopts = kmem_alloc(newlen, KM_NOSLEEP);
1428 		if (newopts == NULL)
1429 			return (ENOMEM);
1430 		/*
1431 		 * If the user has hop-by-hop stickyoptions set, then copy his
1432 		 * options in after the security label.
1433 		 */
1434 		if (ipp->ipp_hopoptslen > *labellen) {
1435 			bcopy(ipp->ipp_hopopts + *labellen, newopts + optlen,
1436 			    ipp->ipp_hopoptslen - *labellen);
1437 			/*
1438 			 * Stomp out any header gunk here - this was the
1439 			 * previous next-header and option length field.
1440 			 */
1441 			newopts[optlen] = IP6OPT_PADN;
1442 			newopts[optlen + 1] = 0;
1443 		}
1444 		if (ipp->ipp_hopopts != NULL)
1445 			kmem_free(ipp->ipp_hopopts, ipp->ipp_hopoptslen);
1446 		ipp->ipp_hopopts = (ip6_hbh_t *)newopts;
1447 	}
1448 	ipp->ipp_hopoptslen = newlen;
1449 	*labellen = optlen;
1450 
1451 	newopts = (uchar_t *)ipp->ipp_hopopts;
1452 
1453 	/* If there are any options, then fix up reported length */
1454 	if (newlen > 0) {
1455 		newopts[1] = (newlen + 7) / 8 - 1;
1456 		ipp->ipp_fields |= IPPF_HOPOPTS;
1457 	}
1458 
1459 	/* If there's a label, then insert it now */
1460 	if (optlen > 0) {
1461 		/* skip next-header and length fields */
1462 		newopts += 2;
1463 		bcopy(labelopt, newopts, rawlen);
1464 		newopts += rawlen;
1465 		/* make sure padding comes out right */
1466 		optlen -= 2 + rawlen;
1467 		if (optlen == 1) {
1468 			newopts[0] = IP6OPT_PAD1;
1469 		} else if (optlen > 1) {
1470 			newopts[0] = IP6OPT_PADN;
1471 			optlen -=  2;
1472 			newopts[1] = optlen;
1473 			if (optlen > 0)
1474 				bzero(newopts + 2, optlen);
1475 		}
1476 	}
1477 	return (0);
1478 }
1479 
1480 int
1481 tsol_update_options(uchar_t **opts, uint_t *totlen, uint_t *labellen,
1482     const uchar_t *labelopt)
1483 {
1484 	int optlen, newlen;
1485 	uchar_t *newopts;
1486 
1487 	optlen = (labelopt[IPOPT_OLEN] + 3) & ~3;
1488 	newlen = *totlen + optlen - *labellen;
1489 	if (optlen > *labellen) {
1490 		if (newlen > IP_MAX_OPT_LENGTH)
1491 			return (EHOSTUNREACH);
1492 		newopts = (uchar_t *)mi_alloc(newlen, BPRI_HI);
1493 		if (newopts == NULL)
1494 			return (ENOMEM);
1495 		if (*totlen > *labellen) {
1496 			bcopy(*opts + *labellen, newopts + optlen,
1497 			    *totlen - *labellen);
1498 		}
1499 		if (*opts != NULL)
1500 			mi_free((char *)*opts);
1501 		*opts = newopts;
1502 	} else if (optlen < *labellen) {
1503 		if (newlen == 0 && *opts != NULL) {
1504 			mi_free((char *)*opts);
1505 			*opts = NULL;
1506 		}
1507 		if (*totlen > *labellen) {
1508 			ovbcopy(*opts + *labellen, *opts + optlen,
1509 			    *totlen - *labellen);
1510 		}
1511 	}
1512 	*totlen = newlen;
1513 	*labellen = optlen;
1514 	if (optlen > 0) {
1515 		newopts = *opts;
1516 		bcopy(labelopt, newopts, optlen);
1517 		/* check if there are user-supplied options that follow */
1518 		if (optlen < newlen) {
1519 			/* compute amount of embedded alignment needed */
1520 			optlen -= newopts[IPOPT_OLEN];
1521 			newopts += newopts[IPOPT_OLEN];
1522 			while (--optlen >= 0)
1523 				*newopts++ = IPOPT_NOP;
1524 		} else if (optlen != newopts[IPOPT_OLEN]) {
1525 			/*
1526 			 * The label option is the only option and it is
1527 			 * not a multiple of 4 bytes.
1528 			 */
1529 			optlen -= newopts[IPOPT_OLEN];
1530 			newopts += newopts[IPOPT_OLEN];
1531 			while (--optlen >= 0)
1532 				*newopts++ = IPOPT_EOL;
1533 		}
1534 	}
1535 	return (0);
1536 }
1537 
1538 /*
1539  * This does the bulk of the processing for setting IPPROTO_IP {T_,}IP_OPTIONS.
1540  */
1541 boolean_t
1542 tsol_option_set(uchar_t **opts, uint_t *optlen, uint_t labellen,
1543     const uchar_t *useropts, uint_t userlen)
1544 {
1545 	int newlen;
1546 	uchar_t *newopts;
1547 
1548 	newlen = userlen + labellen;
1549 	if (newlen > *optlen) {
1550 		/* need more room */
1551 		newopts = (uchar_t *)mi_alloc(newlen, BPRI_HI);
1552 		if (newopts == NULL)
1553 			return (B_FALSE);
1554 		/*
1555 		 * The supplied *opts can't be NULL in this case,
1556 		 * since there's an existing label.
1557 		 */
1558 		if (labellen > 0)
1559 			bcopy(*opts, newopts, labellen);
1560 		if (*opts != NULL)
1561 			mi_free((char *)*opts);
1562 		*opts = newopts;
1563 	}
1564 
1565 	if (newlen == 0) {
1566 		/* special case -- no remaining IP options at all */
1567 		if (*opts != NULL) {
1568 			mi_free((char *)*opts);
1569 			*opts = NULL;
1570 		}
1571 	} else if (userlen > 0) {
1572 		/* merge in the user's options */
1573 		newopts = *opts;
1574 		if (labellen > 0) {
1575 			int extra = labellen - newopts[IPOPT_OLEN];
1576 
1577 			newopts += newopts[IPOPT_OLEN];
1578 			while (--extra >= 0)
1579 				*newopts++ = IPOPT_NOP;
1580 		}
1581 		bcopy(useropts, newopts, userlen);
1582 	}
1583 
1584 	*optlen = newlen;
1585 	return (B_TRUE);
1586 }
1587