xref: /titanic_50/usr/src/uts/common/inet/ip/tn_ipopt.c (revision 74e20cfe817b82802b16fac8690dadcda76f54f5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/systm.h>
30 #include <sys/kmem.h>
31 #include <sys/disp.h>
32 #include <sys/stream.h>
33 #include <sys/strsubr.h>
34 #include <sys/strsun.h>
35 #include <sys/policy.h>
36 #include <sys/tsol/label_macro.h>
37 #include <sys/tsol/tndb.h>
38 #include <sys/tsol/tnet.h>
39 #include <inet/ip.h>
40 #include <inet/ip6.h>
41 #include <inet/tcp.h>
42 #include <inet/ipclassifier.h>
43 #include <inet/ip_ire.h>
44 #include <inet/ip_ftable.h>
45 
46 /*
47  * This routine takes a sensitivity label as input and creates a CIPSO
48  * option in the specified buffer.  It returns the size of the CIPSO option.
49  * If the sensitivity label is too large for the CIPSO option, then 0
50  * is returned.
51  *
52  * tsol2cipso_tt1 returns 0 for failure and greater than 0 for success
53  * (more accurately, success means a return value between 10 and 40).
54  */
55 
56 static int
57 tsol2cipso_tt1(const bslabel_t *sl, unsigned char *cop, uint32_t doi)
58 {
59 	struct cipso_tag_type_1 *tt1;
60 	const _bslabel_impl_t *bsl;
61 	const uchar_t *ucp;
62 	int i;
63 
64 	if (doi == 0)
65 		return (0);
66 
67 	/* check for Admin High sensitivity label */
68 	if (blequal(sl, label2bslabel(l_admin_high)))
69 		return (0);
70 
71 	/* check whether classification will fit in one octet */
72 	bsl = (const _bslabel_impl_t *)sl;
73 	if (LCLASS(bsl) & 0xFF00)
74 		return (0);
75 
76 	/*
77 	 * Check whether compartments will fit in 30 octets.
78 	 * Compartments 241 - 256 are not allowed.
79 	 */
80 	if (ntohl(bsl->compartments.c8) & 0x0000FFFF)
81 		return (0);
82 
83 	/*
84 	 * Compute option length and tag length.
85 	 * 'p' points to the last two bytes in the Sensitivity Label's
86 	 * compartments; these cannot be mapped into CIPSO compartments.
87 	 */
88 	ucp = (const uchar_t *)&bsl->compartments.c8 + 2;
89 	while (--ucp >= (const uchar_t *)&bsl->compartments.c1)
90 		if (*ucp != 0)
91 			break;
92 
93 	i =  ucp - (const uchar_t *)&bsl->compartments.c1 + 1;
94 
95 	if (cop == NULL)
96 		return (10 + i);
97 
98 	doi = htonl(doi);
99 	ucp = (const uchar_t *)&doi;
100 	cop[IPOPT_OPTVAL] = IPOPT_COMSEC;
101 	cop[IPOPT_OLEN] = 10 + i;
102 	cop[IPOPT_OLEN+1] = ucp[0];
103 	cop[IPOPT_OLEN+2] = ucp[1];
104 	cop[IPOPT_OLEN+3] = ucp[2];
105 	cop[IPOPT_OLEN+4] = ucp[3];
106 	tt1 = (struct cipso_tag_type_1 *)&cop[IPOPT_OLEN + 5];
107 	tt1->tag_type = 1;
108 	tt1->tag_align = 0;
109 	tt1->tag_sl = LCLASS(bsl);
110 	tt1->tag_length = 4 + i;
111 
112 	bcopy(&bsl->compartments.c1, tt1->tag_cat, i);
113 
114 	return (cop[IPOPT_OLEN]);
115 }
116 
117 /*
118  * The following routine copies a datagram's option into the specified buffer
119  * (if buffer pointer is non-null), or returns a pointer to the label within
120  * the streams message (if buffer is null).  In both cases, tsol_get_option
121  * returns the option's type.
122  *
123  * tsol_get_option assumes that the specified buffer is large enough to
124  * hold the largest valid CIPSO option.  Since the total number of
125  * IP header options cannot exceed 40 bytes, a 40 byte buffer is a good choice.
126  */
127 
128 tsol_ip_label_t
129 tsol_get_option(mblk_t *mp, uchar_t **buffer)
130 {
131 	ipha_t	*ipha;
132 	uchar_t	*opt;
133 	uint32_t	totallen;
134 	uint32_t	optval;
135 	uint32_t	optlen;
136 
137 	ipha = (ipha_t *)mp->b_rptr;
138 
139 	/*
140 	 * Get length (in 4 byte octets) of IP header options.
141 	 * If header doesn't contain options, then return OPT_NONE.
142 	 */
143 	totallen = ipha->ipha_version_and_hdr_length -
144 	    (uint8_t)((IP_VERSION << 4) + IP_SIMPLE_HDR_LENGTH_IN_WORDS);
145 
146 	if (totallen == 0)
147 		return (OPT_NONE);
148 
149 	totallen <<= 2;
150 
151 	/*
152 	 * Search for CIPSO option.
153 	 * If no such option is present, then return OPT_NONE.
154 	 */
155 	opt = (uchar_t *)&ipha[1];
156 	while (totallen != 0) {
157 		switch (optval = opt[IPOPT_OPTVAL]) {
158 		case IPOPT_EOL:
159 			return (OPT_NONE);
160 		case IPOPT_NOP:
161 			optlen = 1;
162 			break;
163 		default:
164 			if (totallen <= IPOPT_OLEN)
165 				return (OPT_NONE);
166 			optlen = opt[IPOPT_OLEN];
167 			if (optlen < 2)
168 				return (OPT_NONE);
169 		}
170 		if (optlen > totallen)
171 			return (OPT_NONE);
172 		/*
173 		 * Copy pointer to option into '*buffer' and
174 		 * return the option type.
175 		 */
176 		switch (optval) {
177 		case IPOPT_COMSEC:
178 			*buffer = opt;
179 			if (TSOL_CIPSO_TAG_OFFSET < optlen &&
180 			    opt[TSOL_CIPSO_TAG_OFFSET] == 1)
181 				return (OPT_CIPSO);
182 			return (OPT_NONE);
183 		}
184 		totallen -= optlen;
185 		opt += optlen;
186 	}
187 	return (OPT_NONE);
188 }
189 
190 /*
191  * tsol_compute_label()
192  *
193  * This routine computes the IP label that should be on a packet based on the
194  * connection and destination information.
195  *
196  * Returns:
197  *      0		Fetched label
198  *      EACCES		The packet failed the remote host accreditation
199  *      ENOMEM		Memory allocation failure
200  *	EINVAL		Label cannot be computed
201  */
202 int
203 tsol_compute_label(const cred_t *credp, ipaddr_t dst, uchar_t *opt_storage,
204     boolean_t isexempt)
205 {
206 	uint_t		sec_opt_len;
207 	ts_label_t	*tsl;
208 	tsol_tpc_t	*dst_rhtp;
209 	ire_t		*ire, *sire = NULL;
210 	boolean_t	compute_label = B_FALSE;
211 	tsol_ire_gw_secattr_t *attrp;
212 	zoneid_t	zoneid;
213 
214 	if (opt_storage != NULL)
215 		opt_storage[IPOPT_OLEN] = 0;
216 
217 	if ((tsl = crgetlabel(credp)) == NULL)
218 		return (0);
219 
220 	/* always pass multicast */
221 	if (CLASSD(dst))
222 		return (0);
223 
224 	if ((dst_rhtp = find_tpc(&dst, IPV4_VERSION, B_FALSE)) == NULL) {
225 		DTRACE_PROBE3(tx__tnopt__log__info__labeling__lookupdst__v4,
226 		    char *, "destination ip(1) not in database (with creds(2))",
227 		    ipaddr_t, dst, cred_t *, credp);
228 		return (EINVAL);
229 	}
230 
231 	zoneid = crgetzoneid(credp);
232 
233 	switch (dst_rhtp->tpc_tp.host_type) {
234 	case UNLABELED:
235 		/*
236 		 * Only add a label if the unlabeled destination is
237 		 * not broadcast/local/loopback address, that it is
238 		 * not on the same subnet, and that the next-hop
239 		 * gateway is labeled.
240 		 */
241 		ire = ire_cache_lookup(dst, zoneid, tsl);
242 
243 		if (ire != NULL && (ire->ire_type & (IRE_BROADCAST | IRE_LOCAL |
244 		    IRE_LOOPBACK | IRE_INTERFACE)) != 0) {
245 			IRE_REFRELE(ire);
246 			TPC_RELE(dst_rhtp);
247 			return (0);
248 		} else if (ire == NULL) {
249 			ire = ire_ftable_lookup(dst, 0, 0, 0, NULL, &sire,
250 			    zoneid, 0, tsl, (MATCH_IRE_RECURSIVE |
251 			    MATCH_IRE_DEFAULT | MATCH_IRE_SECATTR));
252 		}
253 
254 		/* no route to destination */
255 		if (ire == NULL) {
256 			DTRACE_PROBE4(
257 			    tx__tnopt__log__info__labeling__routedst__v4,
258 			    char *, "No route to unlabeled dest ip(1)/tpc(2) "
259 			    "with creds(3).", ipaddr_t, dst, tsol_tpc_t *,
260 			    dst_rhtp, cred_t *, credp);
261 			TPC_RELE(dst_rhtp);
262 			return (EINVAL);
263 		}
264 
265 		/*
266 		 * Prefix IRE from f-table lookup means that the destination
267 		 * is not directly connected; check the next-hop attributes.
268 		 */
269 		if (sire != NULL) {
270 			ASSERT(ire != NULL);
271 			IRE_REFRELE(ire);
272 			ire = sire;
273 		}
274 
275 		attrp = ire->ire_gw_secattr;
276 		if (attrp != NULL && attrp->igsa_rhc != NULL &&
277 		    attrp->igsa_rhc->rhc_tpc->tpc_tp.host_type != UNLABELED)
278 			compute_label = B_TRUE;
279 
280 		/*
281 		 * Can talk to unlabeled hosts if
282 		 * (1) zone's label matches the default label, or
283 		 * (2) SO_MAC_EXEMPT is on and we dominate the peer's label
284 		 * (3) SO_MAC_EXEMPT is on and this is the global zone
285 		 */
286 		if (dst_rhtp->tpc_tp.tp_doi != tsl->tsl_doi ||
287 		    (!blequal(&dst_rhtp->tpc_tp.tp_def_label,
288 		    &tsl->tsl_label) && (!isexempt ||
289 		    (zoneid != GLOBAL_ZONEID && !bldominates(&tsl->tsl_label,
290 		    &dst_rhtp->tpc_tp.tp_def_label))))) {
291 			DTRACE_PROBE4(tx__tnopt__log__info__labeling__mac__v4,
292 			    char *, "unlabeled dest ip(1)/tpc(2) "
293 			    "non-matching creds(3).", ipaddr_t, dst,
294 			    tsol_tpc_t *, dst_rhtp, cred_t *, credp);
295 			IRE_REFRELE(ire);
296 			TPC_RELE(dst_rhtp);
297 			return (EACCES);
298 		}
299 
300 		IRE_REFRELE(ire);
301 		break;
302 
303 	case SUN_CIPSO:
304 		/*
305 		 * Can talk to labeled hosts if zone's label is within target's
306 		 * label range or set.
307 		 */
308 		if (dst_rhtp->tpc_tp.tp_cipso_doi_cipso != tsl->tsl_doi ||
309 		    (!_blinrange(&tsl->tsl_label,
310 		    &dst_rhtp->tpc_tp.tp_sl_range_cipso) &&
311 		    !blinlset(&tsl->tsl_label,
312 		    dst_rhtp->tpc_tp.tp_sl_set_cipso))) {
313 			DTRACE_PROBE4(tx__tnopt__log__info__labeling__mac__v4,
314 			    char *, "labeled dest ip(1)/tpc(2) "
315 			    "non-matching creds(3).", ipaddr_t, dst,
316 			    tsol_tpc_t *, dst_rhtp, cred_t *, credp);
317 			TPC_RELE(dst_rhtp);
318 			return (EACCES);
319 		}
320 		compute_label = B_TRUE;
321 		break;
322 
323 	default:
324 		TPC_RELE(dst_rhtp);
325 		return (EACCES);
326 	}
327 
328 	if (!compute_label) {
329 		TPC_RELE(dst_rhtp);
330 		return (0);
331 	}
332 
333 	/* compute the CIPSO option */
334 	if (dst_rhtp->tpc_tp.host_type != UNLABELED)
335 		sec_opt_len = tsol2cipso_tt1(&tsl->tsl_label, opt_storage,
336 		    tsl->tsl_doi);
337 	else
338 		sec_opt_len = tsol2cipso_tt1(&dst_rhtp->tpc_tp.tp_def_label,
339 		    opt_storage, tsl->tsl_doi);
340 	TPC_RELE(dst_rhtp);
341 
342 	if (sec_opt_len == 0) {
343 		DTRACE_PROBE4(tx__tnopt__log__error__labeling__lostops__v4,
344 		    char *,
345 		    "options lack length for dest ip(1)/tpc(2) with creds(3).",
346 		    ipaddr_t, dst, tsol_tpc_t *, dst_rhtp, cred_t *, credp);
347 		return (EINVAL);
348 	}
349 
350 	return (0);
351 }
352 
353 /*
354  * Remove any existing security option (CIPSO) from the given IP
355  * header, move the 'buflen' bytes back to fill the gap, and return the number
356  * of bytes removed (as zero or negative number).  Assumes that the headers are
357  * sane.
358  */
359 int
360 tsol_remove_secopt(ipha_t *ipha, int buflen)
361 {
362 	int remlen, olen, oval, delta;
363 	uchar_t *fptr, *tptr;
364 	boolean_t noop_keep;
365 
366 	remlen = IPH_HDR_LENGTH(ipha) - IP_SIMPLE_HDR_LENGTH;
367 	fptr = tptr = (uchar_t *)(ipha + 1);
368 	noop_keep = B_TRUE;
369 	while (remlen > 0) {
370 		oval = fptr[IPOPT_OPTVAL];
371 
372 		/* terminate on end of list */
373 		if (oval == IPOPT_EOL)
374 			break;
375 
376 		/*
377 		 * Delete any no-ops following a deleted option, at least up
378 		 * to a 4 octet alignment; copy others.
379 		 */
380 		if (oval == IPOPT_NOP) {
381 			if (((fptr - (uchar_t *)ipha) & 3) == 0)
382 				noop_keep = B_TRUE;
383 			if (noop_keep)
384 				*tptr++ = oval;
385 			fptr++;
386 			remlen--;
387 			continue;
388 		}
389 
390 		/* stop on corrupted list; just do nothing. */
391 		if (remlen < 2)
392 			return (0);
393 		olen = fptr[IPOPT_OLEN];
394 		if (olen < 2 || olen > remlen)
395 			return (0);
396 
397 		/* skip over security options to delete them */
398 		if (oval == IPOPT_COMSEC || oval == IPOPT_SECURITY) {
399 			noop_keep = B_FALSE;
400 			fptr += olen;
401 			remlen -= olen;
402 			continue;
403 		}
404 
405 		/* copy the rest */
406 		noop_keep = B_TRUE;
407 		if (tptr != fptr)
408 			ovbcopy(fptr, tptr, olen);
409 		fptr += olen;
410 		tptr += olen;
411 		remlen -= olen;
412 	}
413 
414 	fptr += remlen;
415 
416 	/* figure how much padding we'll need for header alignment */
417 	olen = (tptr - (uchar_t *)ipha) & 3;
418 	if (olen > 0) {
419 		olen = 4 - olen;
420 		/* pad with end-of-list */
421 		bzero(tptr, olen);
422 		tptr += olen;
423 	}
424 
425 	/* slide back the headers that follow and update the IP header */
426 	delta = fptr - tptr;
427 	if (delta != 0) {
428 		ovbcopy(fptr, tptr, ((uchar_t *)ipha + buflen) - fptr);
429 		ipha->ipha_version_and_hdr_length -= delta / 4;
430 	}
431 	return (-delta);
432 }
433 
434 /*
435  * Insert the option in 'optbuf' into the IP header pointed to by 'ipha', and
436  * move the data following the IP header (up to buflen) to accomodate the new
437  * option.  Assumes that up to IP_MAX_OPT_LENGTH bytes are available (in total)
438  * for IP options.  Returns the number of bytes actually inserted, or -1 if the
439  * option cannot be inserted.  (Note that negative return values are possible
440  * when noops must be compressed, and that only -1 indicates error.  Successful
441  * return value is always evenly divisible by 4, by definition.)
442  */
443 int
444 tsol_prepend_option(uchar_t *optbuf, ipha_t *ipha, int buflen)
445 {
446 	int remlen, padding, lastpad, totlen;
447 	int oval, olen;
448 	int delta;
449 	uchar_t *optr;
450 	uchar_t tempopt[IP_MAX_OPT_LENGTH], *toptr;
451 
452 	if (optbuf[IPOPT_OPTVAL] == IPOPT_EOL ||
453 	    optbuf[IPOPT_OPTVAL] == IPOPT_NOP ||
454 	    optbuf[IPOPT_OLEN] == 0)
455 		return (0);
456 
457 	ASSERT(optbuf[IPOPT_OLEN] >= 2 &&
458 	    optbuf[IPOPT_OLEN] <= IP_MAX_OPT_LENGTH);
459 
460 	/* first find the real (unpadded) length of the existing options */
461 	remlen = IPH_HDR_LENGTH(ipha) - IP_SIMPLE_HDR_LENGTH;
462 	padding = totlen = lastpad = 0;
463 	optr = (uchar_t *)(ipha + 1);
464 	while (remlen > 0) {
465 		oval = optr[IPOPT_OPTVAL];
466 
467 		/* stop at end of list */
468 		if (oval == IPOPT_EOL)
469 			break;
470 
471 		/* skip no-ops, noting that length byte isn't present */
472 		if (oval == IPOPT_NOP) {
473 			optr++;
474 			padding++;
475 			lastpad++;
476 			totlen++;
477 			remlen--;
478 			continue;
479 		}
480 
481 		/* give up on a corrupted list; report failure */
482 		if (remlen < 2)
483 			return (-1);
484 		olen = optr[IPOPT_OLEN];
485 		if (olen < 2 || olen > remlen)
486 			return (-1);
487 
488 		lastpad = 0;
489 		optr += olen;
490 		totlen += olen;
491 		remlen -= olen;
492 	}
493 
494 	/* completely ignore any trailing padding */
495 	totlen -= lastpad;
496 	padding -= lastpad;
497 
498 	/*
499 	 * If some sort of inter-option alignment was present, try to preserve
500 	 * that alignment.  If alignment pushes us out past the maximum, then
501 	 * discard it and try to compress to fit.  (We just "assume" that any
502 	 * padding added was attempting to get 32 bit alignment.  If that's
503 	 * wrong, that's just too bad.)
504 	 */
505 	if (padding > 0) {
506 		olen = (optbuf[IPOPT_OLEN] + 3) & ~3;
507 		if (olen + totlen > IP_MAX_OPT_LENGTH) {
508 			totlen -= padding;
509 			if (olen + totlen > IP_MAX_OPT_LENGTH)
510 				return (-1);
511 			padding = 0;
512 		}
513 	}
514 
515 	/*
516 	 * Since we may need to compress or expand the option list, we write to
517 	 * a temporary buffer and then copy the results back to the IP header.
518 	 */
519 	toptr = tempopt;
520 
521 	/* compute actual option to insert */
522 	olen = optbuf[IPOPT_OLEN];
523 	bcopy(optbuf, toptr, olen);
524 	toptr += olen;
525 	if (padding > 0) {
526 		while ((olen & 3) != 0) {
527 			*toptr++ = IPOPT_NOP;
528 			olen++;
529 		}
530 	}
531 
532 	/* copy over the existing options */
533 	optr = (uchar_t *)(ipha + 1);
534 	while (totlen > 0) {
535 		oval = optr[IPOPT_OPTVAL];
536 
537 		/* totlen doesn't include end-of-list marker */
538 		ASSERT(oval != IPOPT_EOL);
539 
540 		/* handle no-ops; copy if desired, ignore otherwise */
541 		if (oval == IPOPT_NOP) {
542 			if (padding > 0) {
543 				/* note: cannot overflow due to checks above */
544 				ASSERT(toptr < tempopt + IP_MAX_OPT_LENGTH);
545 				*toptr++ = oval;
546 			}
547 			optr++;
548 			totlen--;
549 			continue;
550 		}
551 
552 		/* list cannot be corrupt at this point */
553 		ASSERT(totlen >= 2);
554 		olen = optr[IPOPT_OLEN];
555 		ASSERT(olen >= 2 && olen <= totlen);
556 
557 		/* cannot run out of room due to tests above */
558 		ASSERT(toptr + olen <= tempopt + IP_MAX_OPT_LENGTH);
559 
560 		bcopy(optr, toptr, olen);
561 		optr += olen;
562 		toptr += olen;
563 		totlen -= olen;
564 	}
565 
566 	/* figure how much padding we'll need for header alignment */
567 	olen = (toptr - tempopt) & 3;
568 	if (olen > 0) {
569 		olen = 4 - olen;
570 		ASSERT(toptr + olen <= tempopt + IP_MAX_OPT_LENGTH);
571 		/* pad with end-of-list value */
572 		bzero(toptr, olen);
573 		toptr += olen;
574 	}
575 
576 	/* move the headers as needed and update IP header */
577 	olen = (toptr - tempopt) + IP_SIMPLE_HDR_LENGTH;
578 	remlen = IPH_HDR_LENGTH(ipha);
579 	delta = olen - remlen;
580 	if (delta != 0) {
581 		ovbcopy((uchar_t *)ipha + remlen, (uchar_t *)ipha + olen,
582 		    buflen - remlen);
583 		ipha->ipha_version_and_hdr_length += delta / 4;
584 	}
585 
586 	/* slap in the new options */
587 	bcopy(tempopt, ipha + 1, olen - IP_SIMPLE_HDR_LENGTH);
588 
589 	return (delta);
590 }
591 
592 /*
593  * tsol_check_label()
594  *
595  * This routine computes the IP label that should be on the packet based on the
596  * connection and destination information.  If the label is there, it returns
597  * zero, so the caller knows that the label is syncronized, and further calls
598  * are not required.  If the label isn't right, then the right one is inserted.
599  *
600  * The packet's header is clear, before entering IPSec's engine.
601  *
602  * Returns:
603  *      0		Label on packet (was|is now) correct
604  *      EACCES		The packet failed the remote host accreditation.
605  *      ENOMEM		Memory allocation failure.
606  *	EINVAL		Label cannot be computed
607  */
608 int
609 tsol_check_label(const cred_t *credp, mblk_t **mpp, int *addedp,
610     boolean_t isexempt)
611 {
612 	mblk_t *mp = *mpp;
613 	ipha_t  *ipha;
614 	uchar_t opt_storage[IP_MAX_OPT_LENGTH];
615 	uint_t hlen;
616 	uint_t sec_opt_len;
617 	uchar_t *optr;
618 	int added;
619 	int retv;
620 
621 	if (addedp != NULL)
622 		*addedp = 0;
623 
624 	opt_storage[IPOPT_OPTVAL] = 0;
625 
626 	ipha = (ipha_t *)mp->b_rptr;
627 
628 	retv = tsol_compute_label(credp, ipha->ipha_dst, opt_storage, isexempt);
629 	if (retv != 0)
630 		return (retv);
631 
632 	optr = (uchar_t *)(ipha + 1);
633 	hlen = IPH_HDR_LENGTH(ipha) - IP_SIMPLE_HDR_LENGTH;
634 	sec_opt_len = opt_storage[IPOPT_OLEN];
635 
636 	if (hlen >= sec_opt_len) {
637 		/* If no option is supposed to be there, make sure it's not */
638 		if (sec_opt_len == 0 && hlen > 0 &&
639 		    optr[IPOPT_OPTVAL] != IPOPT_COMSEC &&
640 		    optr[IPOPT_OPTVAL] != IPOPT_SECURITY)
641 			return (0);
642 		/* if the option is there, it's always first */
643 		if (sec_opt_len != 0 &&
644 		    bcmp(opt_storage, optr, sec_opt_len) == 0)
645 			return (0);
646 	}
647 
648 	/*
649 	 * If there is an option there, then it must be the wrong one; delete.
650 	 */
651 	if (hlen > 0)
652 		mp->b_wptr += tsol_remove_secopt(ipha, MBLKL(mp));
653 
654 	/* Make sure we have room for the worst-case addition */
655 	hlen = IPH_HDR_LENGTH(ipha) + opt_storage[IPOPT_OLEN];
656 	hlen = (hlen + 3) & ~3;
657 	if (hlen > IP_MAX_HDR_LENGTH)
658 		hlen = IP_MAX_HDR_LENGTH;
659 	hlen -= IPH_HDR_LENGTH(ipha);
660 	if (mp->b_wptr + hlen > mp->b_datap->db_lim) {
661 		int copylen;
662 		mblk_t *new_mp;
663 
664 		/* allocate enough to be meaningful, but not *too* much */
665 		copylen = MBLKL(mp);
666 		if (copylen > 256)
667 			copylen = 256;
668 		new_mp = allocb(hlen + copylen +
669 		    (mp->b_rptr - mp->b_datap->db_base), BPRI_HI);
670 		if (new_mp == NULL)
671 			return (ENOMEM);
672 		mblk_setcred(new_mp, DB_CRED(mp));
673 
674 		/* keep the bias */
675 		new_mp->b_rptr += mp->b_rptr - mp->b_datap->db_base;
676 		new_mp->b_wptr = new_mp->b_rptr + copylen;
677 		bcopy(mp->b_rptr, new_mp->b_rptr, copylen);
678 		new_mp->b_cont = mp;
679 		if ((mp->b_rptr += copylen) >= mp->b_wptr) {
680 			new_mp->b_cont = mp->b_cont;
681 			freeb(mp);
682 		}
683 		*mpp = mp = new_mp;
684 		ipha = (ipha_t *)mp->b_rptr;
685 	}
686 
687 	added = tsol_prepend_option(opt_storage, ipha, MBLKL(mp));
688 	if (added == -1)
689 		goto param_prob;
690 
691 	if (addedp != NULL)
692 		*addedp = added;
693 
694 	ASSERT((mp->b_wptr + added) <= DB_LIM(mp));
695 	mp->b_wptr += added;
696 
697 	return (0);
698 
699 param_prob:
700 	return (EINVAL);
701 }
702 
703 /*
704  * IPv6 HopOpt extension header for the label option layout:
705  *	- One octet giving the type of the 'next extension header'
706  *	- Header extension length in 8-byte words, not including the
707  *	  1st 8 bytes, but including any pad bytes at the end.
708  *	  Eg. A value of 2 means 16 bytes not including the 1st 8 bytes.
709  *	- Followed by TLV encoded IPv6 label option. Option layout is
710  *		* One octet, IP6OPT_LS
711  *		* One octet option length in bytes of the option data following
712  *		  the length, but not including any pad bytes at the end.
713  *		* Four-octet DOI (IP6LS_DOI_V4)
714  *		* One octet suboption, IP6LS_TT_V4
715  *		* One octet suboption length in bytes of the suboption
716  *		  following the suboption length, including the suboption
717  *		  header length, but not including any pad bytes at the end.
718  *	- Pad to make the extension header a multiple of 8 bytes.
719  *
720  * This function returns the contents of 'IPv6 option structure' in the above.
721  * i.e starting from the IP6OPT_LS but not including the pad at the end.
722  * The user must prepend two octets (either padding or next header / length)
723  * and append padding out to the next 8 octet boundary.
724  */
725 int
726 tsol_compute_label_v6(const cred_t *credp, const in6_addr_t *dst,
727     uchar_t *opt_storage, boolean_t isexempt)
728 {
729 	tsol_tpc_t	*dst_rhtp;
730 	ts_label_t	*tsl;
731 	uint_t		sec_opt_len;
732 	uint32_t	doi;
733 	zoneid_t	zoneid;
734 	ire_t		*ire, *sire;
735 	tsol_ire_gw_secattr_t *attrp;
736 	boolean_t	compute_label;
737 
738 	if (ip6opt_ls == 0)
739 		return (EINVAL);
740 
741 	if (opt_storage != NULL)
742 		opt_storage[IPOPT_OLEN] = 0;
743 
744 	if ((tsl = crgetlabel(credp)) == NULL)
745 		return (0);
746 
747 	/* Always pass multicast */
748 	if (IN6_IS_ADDR_MULTICAST(dst))
749 		return (0);
750 
751 	if ((dst_rhtp = find_tpc(dst, IPV6_VERSION, B_FALSE)) == NULL) {
752 		DTRACE_PROBE3(tx__tnopt__log__info__labeling__lookupdst__v6,
753 		    char *, "destination ip6(1) not in database with creds(2)",
754 		    in6_addr_t *, dst, cred_t *, credp);
755 		return (EINVAL);
756 	}
757 
758 	zoneid = crgetzoneid(credp);
759 
760 	/*
761 	 * Fill in a V6 label.  If a new format is added here, make certain
762 	 * that the maximum size of this label is reflected in sys/tsol/tnet.h
763 	 * as TSOL_MAX_IPV6_OPTION.
764 	 */
765 	compute_label = B_FALSE;
766 	switch (dst_rhtp->tpc_tp.host_type) {
767 	case UNLABELED:
768 		/*
769 		 * Only add a label if the unlabeled destination is
770 		 * not local or loopback address, that it is
771 		 * not on the same subnet, and that the next-hop
772 		 * gateway is labeled.
773 		 */
774 		sire = NULL;
775 		ire = ire_cache_lookup_v6(dst, zoneid, tsl);
776 
777 		if (ire != NULL && (ire->ire_type & (IRE_LOCAL |
778 		    IRE_LOOPBACK | IRE_INTERFACE)) != 0) {
779 			IRE_REFRELE(ire);
780 			TPC_RELE(dst_rhtp);
781 			return (0);
782 		} else if (ire == NULL) {
783 			ire = ire_ftable_lookup_v6(dst, NULL, NULL, 0, NULL,
784 			    &sire, zoneid, 0, tsl, (MATCH_IRE_RECURSIVE |
785 			    MATCH_IRE_DEFAULT | MATCH_IRE_SECATTR));
786 		}
787 
788 		/* no route to destination */
789 		if (ire == NULL) {
790 			DTRACE_PROBE4(
791 			    tx__tnopt__log__info__labeling__routedst__v6,
792 			    char *, "No route to unlabeled dest ip6(1)/tpc(2) "
793 			    "with creds(3).", in6_addr_t *, dst, tsol_tpc_t *,
794 			    dst_rhtp, cred_t *, credp);
795 			TPC_RELE(dst_rhtp);
796 			return (EINVAL);
797 		}
798 
799 		/*
800 		 * Prefix IRE from f-table lookup means that the destination
801 		 * is not directly connected; check the next-hop attributes.
802 		 */
803 		if (sire != NULL) {
804 			ASSERT(ire != NULL);
805 			IRE_REFRELE(ire);
806 			ire = sire;
807 		}
808 
809 		attrp = ire->ire_gw_secattr;
810 		if (attrp != NULL && attrp->igsa_rhc != NULL &&
811 		    attrp->igsa_rhc->rhc_tpc->tpc_tp.host_type != UNLABELED)
812 			compute_label = B_TRUE;
813 
814 		if (dst_rhtp->tpc_tp.tp_doi != tsl->tsl_doi ||
815 		    (!blequal(&dst_rhtp->tpc_tp.tp_def_label,
816 		    &tsl->tsl_label) && (!isexempt ||
817 		    (zoneid != GLOBAL_ZONEID && !bldominates(&tsl->tsl_label,
818 		    &dst_rhtp->tpc_tp.tp_def_label))))) {
819 			DTRACE_PROBE4(tx__tnopt__log__info__labeling__mac__v6,
820 			    char *, "unlabeled dest ip6(1)/tpc(2) "
821 			    "non-matching creds(3)", in6_addr_t *, dst,
822 			    tsol_tpc_t *, dst_rhtp, cred_t *, credp);
823 			IRE_REFRELE(ire);
824 			TPC_RELE(dst_rhtp);
825 			return (EACCES);
826 		}
827 
828 		IRE_REFRELE(ire);
829 		break;
830 
831 	case SUN_CIPSO:
832 		if (dst_rhtp->tpc_tp.tp_cipso_doi_cipso != tsl->tsl_doi ||
833 		    (!_blinrange(&tsl->tsl_label,
834 		    &dst_rhtp->tpc_tp.tp_sl_range_cipso) &&
835 		    !blinlset(&tsl->tsl_label,
836 		    dst_rhtp->tpc_tp.tp_sl_set_cipso))) {
837 			DTRACE_PROBE4(tx__tnopt__log__info__labeling__mac__v6,
838 			    char *,
839 			    "labeled dest ip6(1)/tpc(2) non-matching creds(3).",
840 			    in6_addr_t *, dst, tsol_tpc_t *, dst_rhtp,
841 			    cred_t *, credp);
842 			TPC_RELE(dst_rhtp);
843 			return (EACCES);
844 		}
845 		compute_label = B_TRUE;
846 		break;
847 
848 	default:
849 		TPC_RELE(dst_rhtp);
850 		return (EACCES);
851 	}
852 
853 	if (!compute_label) {
854 		TPC_RELE(dst_rhtp);
855 		return (0);
856 	}
857 
858 	/* compute the CIPSO option */
859 	if (opt_storage != NULL)
860 		opt_storage += 8;
861 	if (dst_rhtp->tpc_tp.host_type != UNLABELED) {
862 		sec_opt_len = tsol2cipso_tt1(&tsl->tsl_label, opt_storage,
863 		    tsl->tsl_doi);
864 	} else {
865 		sec_opt_len = tsol2cipso_tt1(&dst_rhtp->tpc_tp.tp_def_label,
866 		    opt_storage, tsl->tsl_doi);
867 	}
868 	TPC_RELE(dst_rhtp);
869 
870 	if (sec_opt_len == 0) {
871 		DTRACE_PROBE4(tx__tnopt__log__error__labeling__lostops__v6,
872 		    char *,
873 		    "options lack length for dest ip6(1)/tpc(2) with creds(3).",
874 		    in6_addr_t *, dst, tsol_tpc_t *, dst_rhtp, cred_t *, credp);
875 		return (EINVAL);
876 	}
877 
878 	if (opt_storage == NULL)
879 		return (0);
880 
881 	if (sec_opt_len < IP_MAX_OPT_LENGTH)
882 		opt_storage[sec_opt_len] = IPOPT_EOL;
883 
884 	/*
885 	 * Just in case the option length is odd, round it up to the next even
886 	 * multiple.  The IPv6 option definition doesn't like odd numbers for
887 	 * some reason.
888 	 *
889 	 * Length in the overall option header (IP6OPT_LS) does not include the
890 	 * option header itself, but the length in the suboption does include
891 	 * the suboption header.  Thus, when there's just one suboption, the
892 	 * length in the option header is the suboption length plus 4 (for the
893 	 * DOI value).
894 	 */
895 	opt_storage[-2] = IP6LS_TT_V4;
896 	opt_storage[-1] = (sec_opt_len + 2 + 1) & ~1;
897 	opt_storage[-8] = ip6opt_ls;
898 	opt_storage[-7] = opt_storage[-1] + 4;
899 	doi = htons(IP6LS_DOI_V4);
900 	bcopy(&doi, opt_storage - 6, 4);
901 
902 	return (0);
903 }
904 
905 /*
906  * Locate the start of the IP6OPT_LS label option and return it.
907  * Also return the start of the next non-pad option in after_secoptp.
908  * Usually the label option is the first option at least when packets
909  * are generated, but for generality we don't assume that on received packets.
910  */
911 uchar_t *
912 tsol_find_secopt_v6(
913     const uchar_t *ip6hbh,	/* Start of the hop-by-hop extension header */
914     uint_t hbhlen,		/* Length of the hop-by-hop extension header */
915     uchar_t **after_secoptp,	/* Non-pad option following the label option */
916     boolean_t *hbh_needed)	/* Is hop-by-hop hdr needed w/o label */
917 {
918 	uint_t	optlen;
919 	uint_t	optused;
920 	const uchar_t *optptr;
921 	uchar_t	opt_type;
922 	const uchar_t *secopt = NULL;
923 
924 	*hbh_needed = B_FALSE;
925 	*after_secoptp = NULL;
926 	optlen = hbhlen - 2;
927 	optptr = ip6hbh + 2;
928 	while (optlen != 0) {
929 		opt_type = *optptr;
930 		if (opt_type == IP6OPT_PAD1) {
931 			optptr++;
932 			optlen--;
933 			continue;
934 		}
935 		if (optlen == 1)
936 			break;
937 		optused = 2 + optptr[1];
938 		if (optused > optlen)
939 			break;
940 		/*
941 		 * if we get here, ip6opt_ls can
942 		 * not be 0 because it will always
943 		 * match the IP6OPT_PAD1 above.
944 		 * Therefore ip6opt_ls == 0 forces
945 		 * this test to always fail here.
946 		 */
947 		if (opt_type == ip6opt_ls)
948 			secopt = optptr;
949 		else switch (opt_type) {
950 		case IP6OPT_PADN:
951 			break;
952 		default:
953 			/*
954 			 * There is at least 1 option other than
955 			 * the label option. So the hop-by-hop header is needed
956 			 */
957 			*hbh_needed = B_TRUE;
958 			if (secopt != NULL) {
959 				*after_secoptp = (uchar_t *)optptr;
960 				return ((uchar_t *)secopt);
961 			}
962 			break;
963 		}
964 		optlen -= optused;
965 		optptr += optused;
966 	}
967 	return ((uchar_t *)secopt);
968 }
969 
970 /*
971  * Remove the label option from the hop-by-hop options header if it exists.
972  * 'buflen' is the total length of the packet typically b_wptr - b_rptr.
973  * Header and data following the label option that is deleted are copied
974  * (i.e. slid backward) to the right position.
975  */
976 int
977 tsol_remove_secopt_v6(ip6_t *ip6h, int buflen)
978 {
979 	uchar_t	*ip6hbh;	/* hop-by-hop header */
980 	uint_t	hbhlen;		/* hop-by-hop extension header length */
981 	uchar_t *secopt = NULL;
982 	uchar_t *after_secopt;
983 	uint_t	pad;
984 	uint_t	delta;
985 	boolean_t hbh_needed;
986 
987 	/*
988 	 * hop-by-hop extension header must appear first, if it does not
989 	 * exist, there is no label option.
990 	 */
991 	if (ip6h->ip6_nxt != IPPROTO_HOPOPTS)
992 		return (0);
993 
994 	ip6hbh = (uchar_t *)&ip6h[1];
995 	hbhlen = (ip6hbh[1] + 1) << 3;
996 	/*
997 	 * Locate the start of the label option if it exists and the end
998 	 * of the label option including pads if any.
999 	 */
1000 	secopt = tsol_find_secopt_v6(ip6hbh, hbhlen, &after_secopt,
1001 	    &hbh_needed);
1002 	if (secopt == NULL)
1003 		return (0);
1004 	if (!hbh_needed) {
1005 		uchar_t	next_hdr;
1006 		/*
1007 		 * The label option was the only option in the hop-by-hop
1008 		 * header. We don't need the hop-by-hop header itself any
1009 		 * longer.
1010 		 */
1011 		next_hdr = ip6hbh[0];
1012 		ovbcopy(ip6hbh + hbhlen, ip6hbh,
1013 		    buflen - (IPV6_HDR_LEN + hbhlen));
1014 		ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - hbhlen);
1015 		ip6h->ip6_nxt = next_hdr;
1016 		return (hbhlen);
1017 	}
1018 
1019 	if (after_secopt == NULL) {
1020 		/* There is no option following the label option */
1021 		after_secopt = ip6hbh + hbhlen;
1022 	}
1023 
1024 	/*
1025 	 * After deleting the label option, we need to slide the headers
1026 	 * and data back, while still maintaining the same alignment (module 8)
1027 	 * for the other options. So we slide the headers and data back only
1028 	 * by an integral multiple of 8 bytes, and fill the remaining bytes
1029 	 * with pads.
1030 	 */
1031 	delta = after_secopt - secopt;
1032 	pad = delta % 8;
1033 	if (pad == 1) {
1034 		secopt[0] = IP6OPT_PAD1;
1035 	} else if (pad > 1) {
1036 		secopt[0] = IP6OPT_PADN;
1037 		secopt[1] = pad - 2;
1038 		if (pad > 2)
1039 			bzero(&secopt[2], pad - 2);
1040 	}
1041 	secopt += pad;
1042 	delta -= pad;
1043 	ovbcopy(after_secopt, secopt,
1044 	    (uchar_t *)ip6h + buflen - after_secopt);
1045 	ip6hbh[1] -= delta/8;
1046 	ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - delta);
1047 
1048 	return (delta);
1049 }
1050 
1051 /*
1052  * 'optbuf' contains a CIPSO label embedded in an IPv6 hop-by-hop option,
1053  * starting with the IP6OPT_LS option type. The format of this hop-by-hop
1054  * option is described in the block comment above tsol_compute_label_v6.
1055  * This function prepends this hop-by-hop option before any other hop-by-hop
1056  * options in the hop-by-hop header if one already exists, else a new
1057  * hop-by-hop header is created and stuffed into the packet following
1058  * the IPv6 header. 'buflen' is the total length of the packet i.e.
1059  * b_wptr - b_rptr. The caller ensures that there is enough space for the
1060  * extra option being added. Header and data following the position where
1061  * the label option is inserted are copied (i.e. slid forward) to the right
1062  * position.
1063  */
1064 int
1065 tsol_prepend_option_v6(uchar_t *optbuf, ip6_t *ip6h, int buflen)
1066 {
1067 	/*
1068 	 * rawlen is the length of the label option in bytes, not including
1069 	 * any pads, starting from the IP6OPT_LS (option type) byte.
1070 	 */
1071 	uint_t	rawlen;
1072 
1073 	uint_t	optlen;		/* rawlen rounded to an 8 byte multiple */
1074 	uchar_t	*ip6hbh;	/* start of the hop-by-hop extension header */
1075 	uint_t	hbhlen;		/* Length of the hop-by-hop extension header */
1076 	uint_t	pad_len;
1077 	uchar_t	*pad_position;
1078 	int	delta;		/* Actual number of bytes inserted */
1079 
1080 	rawlen = optbuf[1] + 2;	/* Add 2 for the option type, option length */
1081 	ip6hbh = (uchar_t *)&ip6h[1];
1082 	if (ip6h->ip6_nxt == IPPROTO_HOPOPTS) {
1083 		/*
1084 		 * There is a hop-by-hop header present already. In order to
1085 		 * preserve the alignment of the other options at the existing
1086 		 * value (modulo 8) we need to pad the label option to a
1087 		 * multiple of 8 bytes before prepending it to the other
1088 		 * options. Slide the extension headers and data forward to
1089 		 * accomodate the label option at the start of the hop-by-hop
1090 		 * header
1091 		 */
1092 		delta = optlen = (rawlen + 7) & ~7;
1093 		pad_len = optlen - rawlen;
1094 		pad_position = ip6hbh + 2 + rawlen;
1095 		ovbcopy(ip6hbh + 2, ip6hbh + 2 + optlen,
1096 		    buflen - (IPV6_HDR_LEN + 2));
1097 		/*
1098 		 * Bump up the hop-by-hop extension header length by
1099 		 * the number of 8-byte words added
1100 		 */
1101 		optlen >>= 3;
1102 		if (ip6hbh[1] + optlen > 255)
1103 			return (-1);
1104 		ip6hbh[1] += optlen;
1105 	} else {
1106 		/*
1107 		 * There is no hop-by-hop header in the packet. Construct a
1108 		 * new Hop-by-hop extension header (a multiple of 8 bytes).
1109 		 * Slide any other extension headers and data forward to
1110 		 * accomodate this hop-by-hop header
1111 		 */
1112 		delta = hbhlen = (2 + rawlen + 7) & ~7; /* +2 for nxthdr, len */
1113 		pad_len = hbhlen - (2 + rawlen);
1114 		pad_position = ip6hbh + 2 + rawlen;
1115 		ovbcopy(ip6hbh, ip6hbh + hbhlen, buflen - IPV6_HDR_LEN);
1116 		ip6hbh[0] = ip6h->ip6_nxt;
1117 		/*
1118 		 * hop-by-hop extension header length in 8-byte words, not
1119 		 * including the 1st 8 bytes of the hop-by-hop header.
1120 		 */
1121 		ip6hbh[1] = (hbhlen >> 3) - 1;
1122 		ip6h->ip6_nxt = IPPROTO_HOPOPTS;
1123 	}
1124 	/*
1125 	 * Copy the label option into the hop-by-hop header and insert any
1126 	 * needed pads
1127 	 */
1128 	bcopy(optbuf, ip6hbh + 2, rawlen);
1129 	if (pad_len == 1) {
1130 		pad_position[0] = IP6OPT_PAD1;
1131 	} else if (pad_len > 1) {
1132 		pad_position[0] = IP6OPT_PADN;
1133 		pad_position[1] = pad_len - 2;
1134 		if (pad_len > 2)
1135 			bzero(pad_position + 2, pad_len - 2);
1136 	}
1137 	ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) + delta);
1138 	return (delta);
1139 }
1140 
1141 /*
1142  * tsol_check_label_v6()
1143  *
1144  * This routine computes the IP label that should be on the packet based on the
1145  * connection and destination information.  It's called only by the IP
1146  * forwarding logic, because all internal modules atop IP know how to generate
1147  * their own labels.
1148  *
1149  * Returns:
1150  *      0		Label on packet was already correct
1151  *      EACCESS		The packet failed the remote host accreditation.
1152  *      ENOMEM		Memory allocation failure.
1153  */
1154 int
1155 tsol_check_label_v6(const cred_t *credp, mblk_t **mpp, int *addedp,
1156     boolean_t isexempt)
1157 {
1158 	mblk_t *mp = *mpp;
1159 	ip6_t  *ip6h;
1160 	/*
1161 	 * Label option length is limited to IP_MAX_OPT_LENGTH for
1162 	 * symmetry with IPv4. Can be relaxed if needed
1163 	 */
1164 	uchar_t opt_storage[TSOL_MAX_IPV6_OPTION];
1165 	uint_t hlen;
1166 	uint_t sec_opt_len; /* label option length not including type, len */
1167 	int added;
1168 	int retv;
1169 	uchar_t	*after_secopt;
1170 	uchar_t	*secopt = NULL;
1171 	uchar_t	*ip6hbh;
1172 	uint_t	hbhlen;
1173 	boolean_t hbh_needed;
1174 
1175 	if (addedp != NULL)
1176 		*addedp = 0;
1177 
1178 	ip6h = (ip6_t *)mp->b_rptr;
1179 	retv = tsol_compute_label_v6(credp, &ip6h->ip6_dst, opt_storage,
1180 	    isexempt);
1181 	if (retv != 0)
1182 		return (retv);
1183 
1184 	sec_opt_len = opt_storage[1];
1185 
1186 	if (ip6h->ip6_nxt == IPPROTO_HOPOPTS) {
1187 		ip6hbh = (uchar_t *)&ip6h[1];
1188 		hbhlen = (ip6hbh[1] + 1) << 3;
1189 		secopt = tsol_find_secopt_v6(ip6hbh, hbhlen, &after_secopt,
1190 		    &hbh_needed);
1191 	}
1192 
1193 	if (sec_opt_len == 0 && secopt == NULL) {
1194 		/*
1195 		 * The packet is not supposed to have a label, and it
1196 		 * does not have one currently
1197 		 */
1198 		return (0);
1199 	}
1200 	if (secopt != NULL && sec_opt_len != 0 &&
1201 	    (bcmp(opt_storage, secopt, sec_opt_len + 2) == 0)) {
1202 		/* The packet has the correct label already */
1203 		return (0);
1204 	}
1205 
1206 	/*
1207 	 * If there is an option there, then it must be the wrong one; delete.
1208 	 */
1209 	if (secopt != NULL)
1210 		mp->b_wptr += tsol_remove_secopt_v6(ip6h, MBLKL(mp));
1211 
1212 	/*
1213 	 * Make sure we have room for the worst-case addition. Add 2 bytes for
1214 	 * the hop-by-hop ext header's next header and length fields. Add
1215 	 * another 2 bytes for the label option type, len and then round
1216 	 * up to the next 8-byte multiple.
1217 	 */
1218 	hlen = (4 + sec_opt_len + 7) & ~7;
1219 	if (mp->b_wptr + hlen > mp->b_datap->db_lim) {
1220 		int copylen;
1221 		mblk_t *new_mp;
1222 		uint16_t hdr_len;
1223 
1224 		hdr_len = ip_hdr_length_v6(mp, ip6h);
1225 		/*
1226 		 * Allocate enough to be meaningful, but not *too* much.
1227 		 * Also all the IPv6 extension headers must be in the same mblk
1228 		 */
1229 		copylen = MBLKL(mp);
1230 		if (copylen > 256)
1231 			copylen = 256;
1232 		if (copylen < hdr_len)
1233 			copylen = hdr_len;
1234 		new_mp = allocb(hlen + copylen +
1235 		    (mp->b_rptr - mp->b_datap->db_base), BPRI_HI);
1236 		if (new_mp == NULL)
1237 			return (ENOMEM);
1238 		mblk_setcred(new_mp, DB_CRED(mp));
1239 
1240 		/* keep the bias */
1241 		new_mp->b_rptr += mp->b_rptr - mp->b_datap->db_base;
1242 		new_mp->b_wptr = new_mp->b_rptr + copylen;
1243 		bcopy(mp->b_rptr, new_mp->b_rptr, copylen);
1244 		new_mp->b_cont = mp;
1245 		if ((mp->b_rptr += copylen) >= mp->b_wptr) {
1246 			new_mp->b_cont = mp->b_cont;
1247 			freeb(mp);
1248 		}
1249 		*mpp = mp = new_mp;
1250 		ip6h = (ip6_t *)mp->b_rptr;
1251 	}
1252 
1253 	added = tsol_prepend_option_v6(opt_storage, ip6h, MBLKL(mp));
1254 	if (added == -1)
1255 		goto param_prob;
1256 
1257 	if (addedp != NULL)
1258 		*addedp = added;
1259 
1260 	ASSERT(mp->b_wptr + added <= DB_LIM(mp));
1261 	mp->b_wptr += added;
1262 
1263 	return (0);
1264 
1265 param_prob:
1266 	return (EINVAL);
1267 }
1268 
1269 /*
1270  * Update the given IPv6 "sticky options" structure to contain the provided
1271  * label, which is encoded as an IPv6 option.  Existing label is removed if
1272  * necessary, and storage is allocated/freed/resized.
1273  *
1274  * Returns 0 on success, errno on failure.
1275  */
1276 int
1277 tsol_update_sticky(ip6_pkt_t *ipp, uint_t *labellen, const uchar_t *labelopt)
1278 {
1279 	int rawlen, optlen, newlen;
1280 	uchar_t *newopts;
1281 
1282 	/*
1283 	 * rawlen is the size of the IPv6 label to be inserted from labelopt.
1284 	 * optlen is the total length of that option, including any necessary
1285 	 * headers and padding.  newlen is the new size of the total hop-by-hop
1286 	 * options buffer, including user options.
1287 	 */
1288 	ASSERT(*labellen <= ipp->ipp_hopoptslen);
1289 	ASSERT((ipp->ipp_hopopts == NULL && ipp->ipp_hopoptslen == 0) ||
1290 	    (ipp->ipp_hopopts != NULL && ipp->ipp_hopoptslen != 0));
1291 
1292 	if ((rawlen = labelopt[1]) != 0) {
1293 		rawlen += 2;	/* add in header size */
1294 		optlen = (2 + rawlen + 7) & ~7;
1295 	} else {
1296 		optlen = 0;
1297 	}
1298 	newlen = ipp->ipp_hopoptslen + optlen - *labellen;
1299 	if (newlen == 0 && ipp->ipp_hopopts != NULL) {
1300 		/* Deleting all existing hop-by-hop options */
1301 		kmem_free(ipp->ipp_hopopts, ipp->ipp_hopoptslen);
1302 		ipp->ipp_hopopts = NULL;
1303 		ipp->ipp_fields &= ~IPPF_HOPOPTS;
1304 	} else if (optlen != *labellen) {
1305 		/* If the label not same size as last time, then reallocate */
1306 		if (newlen > IP6_MAX_OPT_LENGTH)
1307 			return (EHOSTUNREACH);
1308 		newopts = kmem_alloc(newlen, KM_NOSLEEP);
1309 		if (newopts == NULL)
1310 			return (ENOMEM);
1311 		/*
1312 		 * If the user has hop-by-hop stickyoptions set, then copy his
1313 		 * options in after the security label.
1314 		 */
1315 		if (ipp->ipp_hopoptslen > *labellen) {
1316 			bcopy(ipp->ipp_hopopts + *labellen, newopts + optlen,
1317 			    ipp->ipp_hopoptslen - *labellen);
1318 			/*
1319 			 * Stomp out any header gunk here - this was the
1320 			 * previous next-header and option length field.
1321 			 */
1322 			newopts[optlen] = IP6OPT_PADN;
1323 			newopts[optlen + 1] = 0;
1324 		}
1325 		if (ipp->ipp_hopopts != NULL)
1326 			kmem_free(ipp->ipp_hopopts, ipp->ipp_hopoptslen);
1327 		ipp->ipp_hopopts = (ip6_hbh_t *)newopts;
1328 	}
1329 	ipp->ipp_hopoptslen = newlen;
1330 	*labellen = optlen;
1331 
1332 	newopts = (uchar_t *)ipp->ipp_hopopts;
1333 
1334 	/* If there are any options, then fix up reported length */
1335 	if (newlen > 0) {
1336 		newopts[1] = (newlen + 7) / 8 - 1;
1337 		ipp->ipp_fields |= IPPF_HOPOPTS;
1338 	}
1339 
1340 	/* If there's a label, then insert it now */
1341 	if (optlen > 0) {
1342 		/* skip next-header and length fields */
1343 		newopts += 2;
1344 		bcopy(labelopt, newopts, rawlen);
1345 		newopts += rawlen;
1346 		/* make sure padding comes out right */
1347 		optlen -= 2 + rawlen;
1348 		if (optlen == 1) {
1349 			newopts[0] = IP6OPT_PAD1;
1350 		} else if (optlen > 1) {
1351 			newopts[0] = IP6OPT_PADN;
1352 			optlen -=  2;
1353 			newopts[1] = optlen;
1354 			if (optlen > 0)
1355 				bzero(newopts + 2, optlen);
1356 		}
1357 	}
1358 	return (0);
1359 }
1360 
1361 int
1362 tsol_update_options(uchar_t **opts, uint_t *totlen, uint_t *labellen,
1363     const uchar_t *labelopt)
1364 {
1365 	int optlen, newlen;
1366 	uchar_t *newopts;
1367 
1368 	optlen = (labelopt[IPOPT_OLEN] + 3) & ~3;
1369 	newlen = *totlen + optlen - *labellen;
1370 	if (optlen > *labellen) {
1371 		if (newlen > IP_MAX_OPT_LENGTH)
1372 			return (EHOSTUNREACH);
1373 		newopts = (uchar_t *)mi_alloc(newlen, BPRI_HI);
1374 		if (newopts == NULL)
1375 			return (ENOMEM);
1376 		if (*totlen > *labellen) {
1377 			bcopy(*opts + *labellen, newopts + optlen,
1378 			    *totlen - *labellen);
1379 		}
1380 		if (*opts != NULL)
1381 			mi_free((char *)*opts);
1382 		*opts = newopts;
1383 	} else if (optlen < *labellen) {
1384 		if (newlen == 0 && *opts != NULL) {
1385 			mi_free((char *)*opts);
1386 			*opts = NULL;
1387 		}
1388 		if (*totlen > *labellen) {
1389 			ovbcopy(*opts + *labellen, *opts + optlen,
1390 			    *totlen - *labellen);
1391 		}
1392 	}
1393 	*totlen = newlen;
1394 	*labellen = optlen;
1395 	if (optlen > 0) {
1396 		newopts = *opts;
1397 		bcopy(labelopt, newopts, optlen);
1398 		/* check if there are user-supplied options that follow */
1399 		if (optlen < newlen) {
1400 			/* compute amount of embedded alignment needed */
1401 			optlen -= newopts[IPOPT_OLEN];
1402 			newopts += newopts[IPOPT_OLEN];
1403 			while (--optlen >= 0)
1404 				*newopts++ = IPOPT_NOP;
1405 		} else if (optlen != newopts[IPOPT_OLEN]) {
1406 			/*
1407 			 * The label option is the only option and it is
1408 			 * not a multiple of 4 bytes.
1409 			 */
1410 			optlen -= newopts[IPOPT_OLEN];
1411 			newopts += newopts[IPOPT_OLEN];
1412 			while (--optlen >= 0)
1413 				*newopts++ = IPOPT_EOL;
1414 		}
1415 	}
1416 	return (0);
1417 }
1418 
1419 /*
1420  * This does the bulk of the processing for setting IPPROTO_IP {T_,}IP_OPTIONS.
1421  */
1422 boolean_t
1423 tsol_option_set(uchar_t **opts, uint_t *optlen, uint_t labellen,
1424     const uchar_t *useropts, uint_t userlen)
1425 {
1426 	int newlen;
1427 	uchar_t *newopts;
1428 
1429 	newlen = userlen + labellen;
1430 	if (newlen > *optlen) {
1431 		/* need more room */
1432 		newopts = (uchar_t *)mi_alloc(newlen, BPRI_HI);
1433 		if (newopts == NULL)
1434 			return (B_FALSE);
1435 		/*
1436 		 * The supplied *opts can't be NULL in this case,
1437 		 * since there's an existing label.
1438 		 */
1439 		if (labellen > 0)
1440 			bcopy(*opts, newopts, labellen);
1441 		if (*opts != NULL)
1442 			mi_free((char *)*opts);
1443 		*opts = newopts;
1444 	}
1445 
1446 	if (newlen == 0) {
1447 		/* special case -- no remaining IP options at all */
1448 		if (*opts != NULL) {
1449 			mi_free((char *)*opts);
1450 			*opts = NULL;
1451 		}
1452 	} else if (userlen > 0) {
1453 		/* merge in the user's options */
1454 		newopts = *opts;
1455 		if (labellen > 0) {
1456 			int extra = labellen - newopts[IPOPT_OLEN];
1457 
1458 			newopts += newopts[IPOPT_OLEN];
1459 			while (--extra >= 0)
1460 				*newopts++ = IPOPT_NOP;
1461 		}
1462 		bcopy(useropts, newopts, userlen);
1463 	}
1464 
1465 	*optlen = newlen;
1466 	return (B_TRUE);
1467 }
1468