xref: /titanic_41/usr/src/uts/common/inet/ip/tn_ipopt.c (revision ab5a7454a6d76e82a121d74c74d5589cc3d37a8f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/systm.h>
28 #include <sys/kmem.h>
29 #include <sys/disp.h>
30 #include <sys/stream.h>
31 #include <sys/strsubr.h>
32 #include <sys/strsun.h>
33 #include <sys/policy.h>
34 #include <sys/tsol/label_macro.h>
35 #include <sys/tsol/tndb.h>
36 #include <sys/tsol/tnet.h>
37 #include <inet/ip.h>
38 #include <inet/ip6.h>
39 #include <inet/tcp.h>
40 #include <inet/ipclassifier.h>
41 #include <inet/ip_ire.h>
42 #include <inet/ip_ftable.h>
43 
44 /*
45  * This routine takes a sensitivity label as input and creates a CIPSO
46  * option in the specified buffer.  It returns the size of the CIPSO option.
47  * If the sensitivity label is too large for the CIPSO option, then 0
48  * is returned.
49  *
50  * tsol2cipso_tt1 returns 0 for failure and greater than 0 for success
51  * (more accurately, success means a return value between 10 and 40).
52  */
53 
54 static int
55 tsol2cipso_tt1(const bslabel_t *sl, unsigned char *cop, uint32_t doi)
56 {
57 	struct cipso_tag_type_1 *tt1;
58 	const _bslabel_impl_t *bsl;
59 	const uchar_t *ucp;
60 	int i;
61 
62 	if (doi == 0)
63 		return (0);
64 
65 	/* check for Admin High sensitivity label */
66 	if (blequal(sl, label2bslabel(l_admin_high)))
67 		return (0);
68 
69 	/* check whether classification will fit in one octet */
70 	bsl = (const _bslabel_impl_t *)sl;
71 	if (LCLASS(bsl) & 0xFF00)
72 		return (0);
73 
74 	/*
75 	 * Check whether compartments will fit in 30 octets.
76 	 * Compartments 241 - 256 are not allowed.
77 	 */
78 	if (ntohl(bsl->compartments.c8) & 0x0000FFFF)
79 		return (0);
80 
81 	/*
82 	 * Compute option length and tag length.
83 	 * 'p' points to the last two bytes in the Sensitivity Label's
84 	 * compartments; these cannot be mapped into CIPSO compartments.
85 	 */
86 	ucp = (const uchar_t *)&bsl->compartments.c8 + 2;
87 	while (--ucp >= (const uchar_t *)&bsl->compartments.c1)
88 		if (*ucp != 0)
89 			break;
90 
91 	i =  ucp - (const uchar_t *)&bsl->compartments.c1 + 1;
92 
93 	if (cop == NULL)
94 		return (10 + i);
95 
96 	doi = htonl(doi);
97 	ucp = (const uchar_t *)&doi;
98 	cop[IPOPT_OPTVAL] = IPOPT_COMSEC;
99 	cop[IPOPT_OLEN] = 10 + i;
100 	cop[IPOPT_OLEN+1] = ucp[0];
101 	cop[IPOPT_OLEN+2] = ucp[1];
102 	cop[IPOPT_OLEN+3] = ucp[2];
103 	cop[IPOPT_OLEN+4] = ucp[3];
104 	tt1 = (struct cipso_tag_type_1 *)&cop[IPOPT_OLEN + 5];
105 	tt1->tag_type = 1;
106 	tt1->tag_align = 0;
107 	tt1->tag_sl = LCLASS(bsl);
108 	tt1->tag_length = 4 + i;
109 
110 	bcopy(&bsl->compartments.c1, tt1->tag_cat, i);
111 
112 	return (cop[IPOPT_OLEN]);
113 }
114 
115 /*
116  * The following routine searches for a security label in an IPv4 datagram.
117  * It returns label_type of:
118  *    OPT_CIPSO if a CIPSO IP option is found.
119  *    OPT_NONE if no security label is found.
120  *
121  * If OPT_CIPSO, a pointer to the CIPSO IP option will be returned in
122  * the buffer parameter.
123  *
124  * The function will return with B_FALSE if an IP format error
125  * is encountered.
126  */
127 
128 boolean_t
129 tsol_get_option_v4(mblk_t *mp, tsol_ip_label_t *label_type, uchar_t **buffer)
130 {
131 	ipha_t	*ipha;
132 	uchar_t	*opt;
133 	uint32_t	totallen;
134 	uint32_t	optval;
135 	uint32_t	optlen;
136 
137 	*label_type = OPT_NONE;
138 
139 	/*
140 	 * Get length (in 4 byte octets) of IP header options.
141 	 * If header doesn't contain options, then return a label_type
142 	 * of OPT_NONE.
143 	 */
144 	ipha = (ipha_t *)mp->b_rptr;
145 	totallen = ipha->ipha_version_and_hdr_length -
146 	    (uint8_t)((IP_VERSION << 4));
147 	totallen <<= 2;
148 	if (totallen < IP_SIMPLE_HDR_LENGTH || totallen > MBLKL(mp))
149 		return (B_FALSE);
150 	totallen -= IP_SIMPLE_HDR_LENGTH;
151 	if (totallen == 0)
152 		return (B_TRUE);
153 
154 	/*
155 	 * Search for CIPSO option.
156 	 * If no such option is present, then return OPT_NONE.
157 	 */
158 	opt = (uchar_t *)&ipha[1];
159 	while (totallen != 0) {
160 		switch (optval = opt[IPOPT_OPTVAL]) {
161 		case IPOPT_EOL:
162 			return (B_TRUE);
163 		case IPOPT_NOP:
164 			optlen = 1;
165 			break;
166 		default:
167 			if (totallen <= IPOPT_OLEN)
168 				return (B_FALSE);
169 			optlen = opt[IPOPT_OLEN];
170 			if (optlen < 2)
171 				return (B_FALSE);
172 		}
173 		if (optlen > totallen)
174 			return (B_FALSE);
175 		/*
176 		 * Copy pointer to option into '*buffer' and
177 		 * return the option type.
178 		 */
179 		switch (optval) {
180 		case IPOPT_COMSEC:
181 			if (TSOL_CIPSO_TAG_OFFSET < optlen &&
182 			    opt[TSOL_CIPSO_TAG_OFFSET] == 1) {
183 				*label_type = OPT_CIPSO;
184 				*buffer = opt;
185 				return (B_TRUE);
186 			}
187 			return (B_FALSE);
188 		}
189 		totallen -= optlen;
190 		opt += optlen;
191 	}
192 	return (B_TRUE);
193 }
194 
195 /*
196  * The following routine searches for a security label in an IPv6 datagram.
197  * It returns label_type of:
198  *    OPT_CIPSO if a CIPSO IP option is found.
199  *    OPT_NONE if no security label is found.
200  *
201  * If OPT_CIPSO, a pointer to the IPv4 portion of the CIPSO IP option will
202  * be returned in the buffer parameter.
203  *
204  * The function will return with B_FALSE if an IP format error
205  * or an unexpected label content error is encountered.
206  */
207 
208 boolean_t
209 tsol_get_option_v6(mblk_t *mp, tsol_ip_label_t *label_type, uchar_t **buffer)
210 {
211 	uchar_t		*opt_ptr = NULL;
212 	uchar_t		*after_secopt;
213 	boolean_t	hbh_needed;
214 	const uchar_t	*ip6hbh;
215 	size_t		optlen;
216 	uint32_t	doi;
217 	const ip6_t	*ip6h;
218 
219 	*label_type = OPT_NONE;
220 	*buffer = NULL;
221 	ip6h = (const ip6_t *)mp->b_rptr;
222 	if (ip6h->ip6_nxt != IPPROTO_HOPOPTS)
223 		return (B_TRUE);
224 	ip6hbh = (const uchar_t *)&ip6h[1];
225 	if (ip6hbh + MIN_EHDR_LEN > mp->b_wptr)
226 		return (B_FALSE);
227 	optlen = (ip6hbh[1] + 1) << 3;
228 	if (ip6hbh + optlen > mp->b_wptr)
229 		return (B_FALSE);
230 	if (!tsol_find_secopt_v6(ip6hbh, optlen,
231 	    &opt_ptr, &after_secopt, &hbh_needed))
232 		return (B_FALSE);
233 	/* tsol_find_secopt_v6 guarantees some sanity */
234 	if (opt_ptr != NULL) {
235 		/*
236 		 * IPv6 Option
237 		 *   opt_ptr[0]: Option type
238 		 *   opt_ptr[1]: Length of option data in bytes
239 		 *   opt_ptr[2]: First byte of option data
240 		 */
241 		if ((optlen = opt_ptr[1]) < 8)
242 			return (B_FALSE);
243 		opt_ptr += 2;
244 		/*
245 		 * From "Generalized Labeled Security Option for IPv6" draft
246 		 *   opt_ptr[0] - opt_ptr[4]: DOI = IP6LS_DOI_V4
247 		 *   opt_ptr[4]: Tag type = IP6LS_TT_V4
248 		 *   opt_ptr[5]: Tag length in bytes starting at Tag type field
249 		 * IPv4 CIPSO Option
250 		 *   opt_ptr[6]: option type
251 		 *   opt_ptr[7]: option length in bytes starting at type field
252 		 */
253 		bcopy(opt_ptr, &doi, sizeof (doi));
254 		doi = ntohl(doi);
255 		if (doi == IP6LS_DOI_V4 &&
256 		    opt_ptr[4] == IP6LS_TT_V4 &&
257 		    opt_ptr[5] <= optlen - 4 &&
258 		    opt_ptr[7] <= optlen - 6 &&
259 		    opt_ptr[7] <= opt_ptr[5] - 2) {
260 			opt_ptr += sizeof (doi) + 2;
261 			*label_type = OPT_CIPSO;
262 			*buffer = opt_ptr;
263 			return (B_TRUE);
264 		}
265 		return (B_FALSE);
266 	}
267 	return (B_TRUE);
268 }
269 
270 /*
271  * tsol_check_dest()
272  *
273  * This routine verifies if a destination is allowed to recieve messages
274  * based on the security label. If any adjustments to the label are needed
275  * due to the connection's MAC mode or the destination's ability
276  * to receive labels, an "effective label" will be returned.
277  *
278  * zone_is_global is set if the actual zoneid is global. That is, it is
279  * not set for an exclusive-IP zone.
280  *
281  * On successful return, effective_tsl will point to the new label needed
282  * or will be NULL if a new label isn't needed. On error, effective_tsl will
283  * point to NULL.
284  *
285  * Returns:
286  *      0		Label (was|is now) correct
287  *	EHOSTUNREACH	The label failed the remote host accreditation
288  *      ENOMEM		Memory allocation failure
289  */
290 int
291 tsol_check_dest(const ts_label_t *tsl, const void *dst,
292     uchar_t version, uint_t mac_mode, boolean_t zone_is_global,
293     ts_label_t **effective_tsl)
294 {
295 	ts_label_t	*newtsl = NULL;
296 	tsol_tpc_t	*dst_rhtp;
297 
298 	if (effective_tsl != NULL)
299 		*effective_tsl = NULL;
300 	ASSERT(version == IPV4_VERSION ||
301 	    (version == IPV6_VERSION &&
302 	    !IN6_IS_ADDR_V4MAPPED((in6_addr_t *)dst)));
303 
304 	/* Always pass kernel level communication (NULL label) */
305 	if (tsl == NULL) {
306 		DTRACE_PROBE2(tx__tnopt__log__info__labeling__mac__allownull,
307 		    char *, "destination ip(1) with null label was passed",
308 		    ipaddr_t, dst);
309 		return (0);
310 	}
311 
312 	if (tsl->tsl_flags & TSLF_IMPLICIT_IN) {
313 		DTRACE_PROBE3(tx__tnopt__log__info__labeling__unresolved__label,
314 		    char *,
315 		    "implicit-in packet to ip(1) reached tsol_check_dest "
316 		    "with implied security label sl(2)",
317 		    ipaddr_t, dst, ts_label_t *, tsl);
318 	}
319 
320 	/* Always pass multicast */
321 	if (version == IPV4_VERSION &&
322 	    CLASSD(*(ipaddr_t *)dst)) {
323 		DTRACE_PROBE2(tx__tnopt__log__info__labeling__mac__allowmult,
324 		    char *, "destination ip(1) with multicast dest was passed",
325 		    ipaddr_t, dst);
326 		return (0);
327 	} else if (version == IPV6_VERSION &&
328 	    IN6_IS_ADDR_MULTICAST((in6_addr_t *)dst)) {
329 		DTRACE_PROBE2(tx__tnopt__log__info__labeling__mac__allowmult_v6,
330 		    char *, "destination ip(1) with multicast dest was passed",
331 		    in6_addr_t *, dst);
332 		return (0);
333 	}
334 
335 	/* Never pass an undefined destination */
336 	if ((dst_rhtp = find_tpc(dst, version, B_FALSE)) == NULL) {
337 		DTRACE_PROBE2(tx__tnopt__log__info__labeling__lookupdst,
338 		    char *, "destination ip(1) not in tn database.",
339 		    void *, dst);
340 		return (EHOSTUNREACH);
341 	}
342 
343 	switch (dst_rhtp->tpc_tp.host_type) {
344 	case UNLABELED:
345 		/*
346 		 * Can talk to unlabeled hosts if
347 		 * (1) zone's label matches the default label, or
348 		 * (2) SO_MAC_EXEMPT is on and we
349 		 * dominate the peer's label, or
350 		 * (3) SO_MAC_EXEMPT is on and
351 		 * this is the global zone
352 		 */
353 		if (dst_rhtp->tpc_tp.tp_doi != tsl->tsl_doi) {
354 			DTRACE_PROBE4(tx__tnopt__log__info__labeling__doi,
355 			    char *, "unlabeled dest ip(1)/tpc(2) doi does "
356 			    "not match msg label(3) doi.", void *, dst,
357 			    tsol_tpc_t *, dst_rhtp, ts_label_t *, tsl);
358 			TPC_RELE(dst_rhtp);
359 			return (EHOSTUNREACH);
360 		}
361 		if (!blequal(&dst_rhtp->tpc_tp.tp_def_label,
362 		    &tsl->tsl_label)) {
363 			if (mac_mode != CONN_MAC_AWARE ||
364 			    !(zone_is_global ||
365 			    bldominates(&tsl->tsl_label,
366 			    &dst_rhtp->tpc_tp.tp_def_label))) {
367 				DTRACE_PROBE4(
368 				    tx__tnopt__log__info__labeling__mac,
369 				    char *, "unlabeled dest ip(1)/tpc(2) does "
370 				    "not match msg label(3).", void *, dst,
371 				    tsol_tpc_t *, dst_rhtp, ts_label_t *, tsl);
372 				TPC_RELE(dst_rhtp);
373 				return (EHOSTUNREACH);
374 			}
375 			/*
376 			 * This is a downlabel MAC-exempt exchange.
377 			 * Use the remote destination's default label
378 			 * as the label of the message data.
379 			 */
380 			if ((newtsl = labelalloc(&dst_rhtp->tpc_tp.tp_def_label,
381 			    dst_rhtp->tpc_tp.tp_doi, KM_NOSLEEP)) == NULL) {
382 				TPC_RELE(dst_rhtp);
383 				return (ENOMEM);
384 			}
385 			newtsl->tsl_flags |= TSLF_UNLABELED;
386 
387 		} else if (!(tsl->tsl_flags & TSLF_UNLABELED)) {
388 			/*
389 			 * The security labels are the same but we need
390 			 * to flag that the remote node is unlabeled.
391 			 */
392 			if ((newtsl = labeldup(tsl, KM_NOSLEEP)) == NULL) {
393 				TPC_RELE(dst_rhtp);
394 				return (ENOMEM);
395 			}
396 			newtsl->tsl_flags |= TSLF_UNLABELED;
397 		}
398 		break;
399 
400 	case SUN_CIPSO:
401 		/*
402 		 * Can talk to labeled hosts if zone's label is within target's
403 		 * label range or set.
404 		 */
405 		if (dst_rhtp->tpc_tp.tp_cipso_doi_cipso != tsl->tsl_doi ||
406 		    (!_blinrange(&tsl->tsl_label,
407 		    &dst_rhtp->tpc_tp.tp_sl_range_cipso) &&
408 		    !blinlset(&tsl->tsl_label,
409 		    dst_rhtp->tpc_tp.tp_sl_set_cipso))) {
410 			DTRACE_PROBE4(tx__tnopt__log__info__labeling__mac,
411 			    char *, "labeled dest ip(1)/tpc(2) does not "
412 			    "match msg label(3).", void *, dst,
413 			    tsol_tpc_t *, dst_rhtp, ts_label_t *, tsl);
414 			TPC_RELE(dst_rhtp);
415 			return (EHOSTUNREACH);
416 		}
417 		if ((tsl->tsl_flags & TSLF_UNLABELED) ||
418 		    (mac_mode == CONN_MAC_IMPLICIT)) {
419 			/*
420 			 * Copy label so we can modify the flags
421 			 */
422 			if ((newtsl = labeldup(tsl, KM_NOSLEEP)) == NULL) {
423 				TPC_RELE(dst_rhtp);
424 				return (ENOMEM);
425 			}
426 			/*
427 			 * The security label is a match but we need to
428 			 * clear the unlabeled flag for this remote node.
429 			 */
430 			newtsl->tsl_flags &= ~TSLF_UNLABELED;
431 			if (mac_mode == CONN_MAC_IMPLICIT)
432 				newtsl->tsl_flags |= TSLF_IMPLICIT_OUT;
433 		}
434 		break;
435 
436 	default:
437 		TPC_RELE(dst_rhtp);
438 		return (EHOSTUNREACH);
439 	}
440 
441 	/*
442 	 * Return the new label.
443 	 */
444 	if (newtsl != NULL) {
445 		if (effective_tsl != NULL)
446 			*effective_tsl = newtsl;
447 		else
448 			label_rele(newtsl);
449 	}
450 	TPC_RELE(dst_rhtp);
451 	return (0);
452 }
453 
454 /*
455  * tsol_compute_label_v4()
456  *
457  * This routine computes the IP label that should be on a packet based on the
458  * connection and destination information.
459  *
460  * The zoneid is the IP zoneid (i.e., GLOBAL_ZONEID for exlusive-IP zones).
461  *
462  * Returns:
463  *      0		Fetched label
464  *	EHOSTUNREACH	No route to destination
465  *	EINVAL		Label cannot be computed
466  */
467 int
468 tsol_compute_label_v4(const ts_label_t *tsl, zoneid_t zoneid, ipaddr_t dst,
469     uchar_t *opt_storage, ip_stack_t *ipst)
470 {
471 	uint_t		sec_opt_len;
472 	ire_t		*ire;
473 	tsol_ire_gw_secattr_t *attrp = NULL;
474 
475 	if (opt_storage != NULL)
476 		opt_storage[IPOPT_OLEN] = 0;
477 
478 	if (tsl == NULL)
479 		return (0);
480 
481 	/* always pass multicast */
482 	if (CLASSD(dst))
483 		return (0);
484 
485 	if (tsl->tsl_flags & TSLF_IMPLICIT_OUT)
486 		return (0);
487 
488 	if (tsl->tsl_flags & TSLF_UNLABELED) {
489 		/*
490 		 * The destination is unlabeled. Only add a label if the
491 		 * destination is not a broadcast/local/loopback address,
492 		 * the destination is not on the same subnet, and the
493 		 * next-hop gateway is labeled.
494 		 */
495 		ire = ire_route_recursive_v4(dst, 0, NULL, zoneid, tsl,
496 		    MATCH_IRE_SECATTR, B_TRUE, 0, ipst, NULL, &attrp, NULL);
497 		ASSERT(ire != NULL);
498 		if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
499 			/* no route to destination */
500 			ire_refrele(ire);
501 			DTRACE_PROBE3(
502 			    tx__tnopt__log__info__labeling__routedst__v4,
503 			    char *, "No route to unlabeled dest ip(1) with "
504 			    "with label(2).", ipaddr_t, dst, ts_label_t *, tsl);
505 			return (EHOSTUNREACH);
506 		}
507 		if (ire->ire_type & (IRE_BROADCAST | IRE_LOCAL | IRE_LOOPBACK |
508 		    IRE_INTERFACE)) {
509 			ire_refrele(ire);
510 			return (0);
511 		}
512 
513 		/*
514 		 * ire_route_recursive gives us the first attrp it finds
515 		 * in the recursive lookup.
516 		 */
517 		/*
518 		 * Return now if next hop gateway is unlabeled. There is
519 		 * no need to generate a CIPSO option for this message.
520 		 */
521 		if (attrp == NULL || attrp->igsa_rhc == NULL ||
522 		    attrp->igsa_rhc->rhc_tpc->tpc_tp.host_type == UNLABELED) {
523 			ire_refrele(ire);
524 			return (0);
525 		}
526 		ire_refrele(ire);
527 	}
528 
529 	/* compute the CIPSO option */
530 	sec_opt_len = tsol2cipso_tt1(&tsl->tsl_label, opt_storage,
531 	    tsl->tsl_doi);
532 
533 	if (sec_opt_len == 0) {
534 		DTRACE_PROBE3(tx__tnopt__log__error__labeling__lostops__v4,
535 		    char *, "options lack length for dest ip(1) with label(2).",
536 		    ipaddr_t, dst, ts_label_t *, tsl);
537 		return (EINVAL);
538 	}
539 
540 	return (0);
541 }
542 
543 /*
544  * Remove any existing security option (CIPSO) from the given IP
545  * header, move the 'buflen' bytes back to fill the gap, and return the number
546  * of bytes removed (as zero or negative number).  Assumes that the headers are
547  * sane.
548  *
549  * Note that tsol_remove_secopt does not adjust ipha_length but
550  * tsol_remove_secopt_v6 does adjust ip6_plen.
551  */
552 int
553 tsol_remove_secopt(ipha_t *ipha, int buflen)
554 {
555 	int remlen, olen, oval, delta;
556 	uchar_t *fptr, *tptr;
557 	boolean_t noop_keep;
558 
559 	remlen = IPH_HDR_LENGTH(ipha) - IP_SIMPLE_HDR_LENGTH;
560 	fptr = tptr = (uchar_t *)(ipha + 1);
561 	noop_keep = B_TRUE;
562 	while (remlen > 0) {
563 		oval = fptr[IPOPT_OPTVAL];
564 
565 		/* terminate on end of list */
566 		if (oval == IPOPT_EOL)
567 			break;
568 
569 		/*
570 		 * Delete any no-ops following a deleted option, at least up
571 		 * to a 4 octet alignment; copy others.
572 		 */
573 		if (oval == IPOPT_NOP) {
574 			if (((fptr - (uchar_t *)ipha) & 3) == 0)
575 				noop_keep = B_TRUE;
576 			if (noop_keep)
577 				*tptr++ = oval;
578 			fptr++;
579 			remlen--;
580 			continue;
581 		}
582 
583 		/* stop on corrupted list; just do nothing. */
584 		if (remlen < 2)
585 			return (0);
586 		olen = fptr[IPOPT_OLEN];
587 		if (olen < 2 || olen > remlen)
588 			return (0);
589 
590 		/* skip over security options to delete them */
591 		if (oval == IPOPT_COMSEC || oval == IPOPT_SECURITY) {
592 			noop_keep = B_FALSE;
593 			fptr += olen;
594 			remlen -= olen;
595 			continue;
596 		}
597 
598 		/* copy the rest */
599 		noop_keep = B_TRUE;
600 		if (tptr != fptr)
601 			ovbcopy(fptr, tptr, olen);
602 		fptr += olen;
603 		tptr += olen;
604 		remlen -= olen;
605 	}
606 
607 	fptr += remlen;
608 
609 	/* figure how much padding we'll need for header alignment */
610 	olen = (tptr - (uchar_t *)ipha) & 3;
611 	if (olen > 0) {
612 		olen = 4 - olen;
613 		/* pad with end-of-list */
614 		bzero(tptr, olen);
615 		tptr += olen;
616 	}
617 
618 	/* slide back the headers that follow and update the IP header */
619 	delta = fptr - tptr;
620 	if (delta != 0) {
621 		ovbcopy(fptr, tptr, ((uchar_t *)ipha + buflen) - fptr);
622 		ipha->ipha_version_and_hdr_length -= delta / 4;
623 	}
624 	return (-delta);
625 }
626 
627 /*
628  * Insert the option in 'optbuf' into the IP header pointed to by 'ipha', and
629  * move the data following the IP header (up to buflen) to accomodate the new
630  * option.  Assumes that up to IP_MAX_OPT_LENGTH bytes are available (in total)
631  * for IP options.  Returns the number of bytes actually inserted, or -1 if the
632  * option cannot be inserted.  (Note that negative return values are possible
633  * when noops must be compressed, and that only -1 indicates error.  Successful
634  * return value is always evenly divisible by 4, by definition.)
635  *
636  * Note that tsol_prepend_option does not adjust ipha_length but
637  * tsol_prepend_option_v6 does adjust ip6_plen.
638  */
639 int
640 tsol_prepend_option(uchar_t *optbuf, ipha_t *ipha, int buflen)
641 {
642 	int remlen, padding, lastpad, totlen;
643 	int oval, olen;
644 	int delta;
645 	uchar_t *optr;
646 	uchar_t tempopt[IP_MAX_OPT_LENGTH], *toptr;
647 
648 	if (optbuf[IPOPT_OPTVAL] == IPOPT_EOL ||
649 	    optbuf[IPOPT_OPTVAL] == IPOPT_NOP ||
650 	    optbuf[IPOPT_OLEN] == 0)
651 		return (0);
652 
653 	ASSERT(optbuf[IPOPT_OLEN] >= 2 &&
654 	    optbuf[IPOPT_OLEN] <= IP_MAX_OPT_LENGTH);
655 
656 	/* first find the real (unpadded) length of the existing options */
657 	remlen = IPH_HDR_LENGTH(ipha) - IP_SIMPLE_HDR_LENGTH;
658 	padding = totlen = lastpad = 0;
659 	optr = (uchar_t *)(ipha + 1);
660 	while (remlen > 0) {
661 		oval = optr[IPOPT_OPTVAL];
662 
663 		/* stop at end of list */
664 		if (oval == IPOPT_EOL)
665 			break;
666 
667 		/* skip no-ops, noting that length byte isn't present */
668 		if (oval == IPOPT_NOP) {
669 			optr++;
670 			padding++;
671 			lastpad++;
672 			totlen++;
673 			remlen--;
674 			continue;
675 		}
676 
677 		/* give up on a corrupted list; report failure */
678 		if (remlen < 2)
679 			return (-1);
680 		olen = optr[IPOPT_OLEN];
681 		if (olen < 2 || olen > remlen)
682 			return (-1);
683 
684 		lastpad = 0;
685 		optr += olen;
686 		totlen += olen;
687 		remlen -= olen;
688 	}
689 
690 	/* completely ignore any trailing padding */
691 	totlen -= lastpad;
692 	padding -= lastpad;
693 
694 	/*
695 	 * If some sort of inter-option alignment was present, try to preserve
696 	 * that alignment.  If alignment pushes us out past the maximum, then
697 	 * discard it and try to compress to fit.  (We just "assume" that any
698 	 * padding added was attempting to get 32 bit alignment.  If that's
699 	 * wrong, that's just too bad.)
700 	 */
701 	if (padding > 0) {
702 		olen = (optbuf[IPOPT_OLEN] + 3) & ~3;
703 		if (olen + totlen > IP_MAX_OPT_LENGTH) {
704 			totlen -= padding;
705 			if (olen + totlen > IP_MAX_OPT_LENGTH)
706 				return (-1);
707 			padding = 0;
708 		}
709 	}
710 
711 	/*
712 	 * Since we may need to compress or expand the option list, we write to
713 	 * a temporary buffer and then copy the results back to the IP header.
714 	 */
715 	toptr = tempopt;
716 
717 	/* compute actual option to insert */
718 	olen = optbuf[IPOPT_OLEN];
719 	bcopy(optbuf, toptr, olen);
720 	toptr += olen;
721 	if (padding > 0) {
722 		while ((olen & 3) != 0) {
723 			*toptr++ = IPOPT_NOP;
724 			olen++;
725 		}
726 	}
727 
728 	/* copy over the existing options */
729 	optr = (uchar_t *)(ipha + 1);
730 	while (totlen > 0) {
731 		oval = optr[IPOPT_OPTVAL];
732 
733 		/* totlen doesn't include end-of-list marker */
734 		ASSERT(oval != IPOPT_EOL);
735 
736 		/* handle no-ops; copy if desired, ignore otherwise */
737 		if (oval == IPOPT_NOP) {
738 			if (padding > 0) {
739 				/* note: cannot overflow due to checks above */
740 				ASSERT(toptr < tempopt + IP_MAX_OPT_LENGTH);
741 				*toptr++ = oval;
742 			}
743 			optr++;
744 			totlen--;
745 			continue;
746 		}
747 
748 		/* list cannot be corrupt at this point */
749 		ASSERT(totlen >= 2);
750 		olen = optr[IPOPT_OLEN];
751 		ASSERT(olen >= 2 && olen <= totlen);
752 
753 		/* cannot run out of room due to tests above */
754 		ASSERT(toptr + olen <= tempopt + IP_MAX_OPT_LENGTH);
755 
756 		bcopy(optr, toptr, olen);
757 		optr += olen;
758 		toptr += olen;
759 		totlen -= olen;
760 	}
761 
762 	/* figure how much padding we'll need for header alignment */
763 	olen = (toptr - tempopt) & 3;
764 	if (olen > 0) {
765 		olen = 4 - olen;
766 		ASSERT(toptr + olen <= tempopt + IP_MAX_OPT_LENGTH);
767 		/* pad with end-of-list value */
768 		bzero(toptr, olen);
769 		toptr += olen;
770 	}
771 
772 	/* move the headers as needed and update IP header */
773 	olen = (toptr - tempopt) + IP_SIMPLE_HDR_LENGTH;
774 	remlen = IPH_HDR_LENGTH(ipha);
775 	delta = olen - remlen;
776 	if (delta != 0) {
777 		ovbcopy((uchar_t *)ipha + remlen, (uchar_t *)ipha + olen,
778 		    buflen - remlen);
779 		ipha->ipha_version_and_hdr_length += delta / 4;
780 	}
781 
782 	/* slap in the new options */
783 	bcopy(tempopt, ipha + 1, olen - IP_SIMPLE_HDR_LENGTH);
784 
785 	return (delta);
786 }
787 
788 /*
789  * tsol_check_label_v4()
790  *
791  * This routine computes the IP label that should be on the packet based on the
792  * connection and destination information.  It's called by the IP forwarding
793  * logic and by ip_output_simple. The ULPs generate the labels before calling
794  * conn_ip_output. If any adjustments to
795  * the label are needed due to the connection's MAC-exempt status or
796  * the destination's ability to receive labels, an "effective label"
797  * will be returned.
798  *
799  * The packet's header is clear before entering IPsec's engine.
800  *
801  * The zoneid is the IP zoneid (i.e., GLOBAL_ZONEID for exlusive-IP zones).
802  * zone_is_global is set if the actual zoneid is global.
803  *
804  * On successful return, effective_tslp will point to the new label needed
805  * or will be NULL if a new label isn't needed. On error, effective_tsl will
806  * point to NULL.
807  *
808  * Returns:
809  *      0		Label (was|is now) correct
810  *      EACCES		The packet failed the remote host accreditation.
811  *      ENOMEM		Memory allocation failure.
812  *	EINVAL		Label cannot be computed
813  */
814 int
815 tsol_check_label_v4(const ts_label_t *tsl, zoneid_t zoneid, mblk_t **mpp,
816     uint_t mac_mode, boolean_t zone_is_global, ip_stack_t *ipst,
817     ts_label_t **effective_tslp)
818 {
819 	mblk_t *mp = *mpp;
820 	ipha_t  *ipha;
821 	ts_label_t *effective_tsl = NULL;
822 	uchar_t opt_storage[IP_MAX_OPT_LENGTH];
823 	uint_t hlen;
824 	uint_t sec_opt_len;
825 	uchar_t *optr;
826 	int delta_remove = 0, delta_add, adjust;
827 	int retv;
828 
829 	*effective_tslp = NULL;
830 	opt_storage[IPOPT_OPTVAL] = 0;
831 
832 	ipha = (ipha_t *)mp->b_rptr;
833 
834 	/*
835 	 * Verify the destination is allowed to receive packets at
836 	 * the security label of the message data. tsol_check_dest()
837 	 * may create a new effective label or label flags.
838 	 */
839 	retv = tsol_check_dest(tsl, &ipha->ipha_dst, IPV4_VERSION,
840 	    mac_mode, zone_is_global, &effective_tsl);
841 	if (retv != 0)
842 		return (retv);
843 
844 	/*
845 	 * Calculate the security label to be placed in the text
846 	 * of the message (if any).
847 	 */
848 	if (effective_tsl != NULL) {
849 		if ((retv = tsol_compute_label_v4(effective_tsl, zoneid,
850 		    ipha->ipha_dst, opt_storage, ipst)) != 0) {
851 			label_rele(effective_tsl);
852 			return (retv);
853 		}
854 		*effective_tslp = effective_tsl;
855 	} else {
856 		if ((retv = tsol_compute_label_v4(tsl, zoneid,
857 		    ipha->ipha_dst, opt_storage, ipst)) != 0) {
858 			return (retv);
859 		}
860 	}
861 
862 	optr = (uchar_t *)(ipha + 1);
863 	hlen = IPH_HDR_LENGTH(ipha) - IP_SIMPLE_HDR_LENGTH;
864 	sec_opt_len = opt_storage[IPOPT_OLEN];
865 
866 	if (hlen >= sec_opt_len) {
867 		/* If no option is supposed to be there, make sure it's not */
868 		if (sec_opt_len == 0 && hlen > 0 &&
869 		    optr[IPOPT_OPTVAL] != IPOPT_COMSEC &&
870 		    optr[IPOPT_OPTVAL] != IPOPT_SECURITY)
871 			return (0);
872 		/* if the option is there, it's always first */
873 		if (sec_opt_len != 0 &&
874 		    bcmp(opt_storage, optr, sec_opt_len) == 0)
875 			return (0);
876 	}
877 
878 	/*
879 	 * If there is an option there, then it must be the wrong one; delete.
880 	 */
881 	if (hlen > 0) {
882 		delta_remove = tsol_remove_secopt(ipha, MBLKL(mp));
883 		mp->b_wptr += delta_remove;
884 	}
885 
886 	/* Make sure we have room for the worst-case addition */
887 	hlen = IPH_HDR_LENGTH(ipha) + opt_storage[IPOPT_OLEN];
888 	hlen = (hlen + 3) & ~3;
889 	if (hlen > IP_MAX_HDR_LENGTH)
890 		hlen = IP_MAX_HDR_LENGTH;
891 	hlen -= IPH_HDR_LENGTH(ipha);
892 	if (mp->b_wptr + hlen > mp->b_datap->db_lim) {
893 		int copylen;
894 		mblk_t *new_mp;
895 
896 		/* allocate enough to be meaningful, but not *too* much */
897 		copylen = MBLKL(mp);
898 		if (copylen > 256)
899 			copylen = 256;
900 		new_mp = allocb_tmpl(hlen + copylen +
901 		    (mp->b_rptr - mp->b_datap->db_base), mp);
902 		if (new_mp == NULL) {
903 			if (effective_tsl != NULL) {
904 				label_rele(effective_tsl);
905 				*effective_tslp = NULL;
906 			}
907 			return (ENOMEM);
908 		}
909 
910 		/* keep the bias */
911 		new_mp->b_rptr += mp->b_rptr - mp->b_datap->db_base;
912 		new_mp->b_wptr = new_mp->b_rptr + copylen;
913 		bcopy(mp->b_rptr, new_mp->b_rptr, copylen);
914 		new_mp->b_cont = mp;
915 		if ((mp->b_rptr += copylen) >= mp->b_wptr) {
916 			new_mp->b_cont = mp->b_cont;
917 			freeb(mp);
918 		}
919 		*mpp = mp = new_mp;
920 		ipha = (ipha_t *)mp->b_rptr;
921 	}
922 
923 	delta_add = tsol_prepend_option(opt_storage, ipha, MBLKL(mp));
924 	if (delta_add == -1)
925 		goto param_prob;
926 
927 	ASSERT((mp->b_wptr + delta_add) <= DB_LIM(mp));
928 	mp->b_wptr += delta_add;
929 
930 	adjust = delta_remove + delta_add;
931 	adjust += ntohs(ipha->ipha_length);
932 	ipha->ipha_length = htons(adjust);
933 
934 	return (0);
935 
936 param_prob:
937 	if (effective_tsl != NULL) {
938 		label_rele(effective_tsl);
939 		*effective_tslp = NULL;
940 	}
941 	return (EINVAL);
942 }
943 
944 /*
945  * IPv6 HopOpt extension header for the label option layout:
946  *	- One octet giving the type of the 'next extension header'
947  *	- Header extension length in 8-byte words, not including the
948  *	  1st 8 bytes, but including any pad bytes at the end.
949  *	  Eg. A value of 2 means 16 bytes not including the 1st 8 bytes.
950  *	- Followed by TLV encoded IPv6 label option. Option layout is
951  *		* One octet, IP6OPT_LS
952  *		* One octet option length in bytes of the option data following
953  *		  the length, but not including any pad bytes at the end.
954  *		* Four-octet DOI (IP6LS_DOI_V4)
955  *		* One octet suboption, IP6LS_TT_V4
956  *		* One octet suboption length in bytes of the suboption
957  *		  following the suboption length, including the suboption
958  *		  header length, but not including any pad bytes at the end.
959  *	- Pad to make the extension header a multiple of 8 bytes.
960  *
961  * This function returns the contents of 'IPv6 option structure' in the above.
962  * i.e starting from the IP6OPT_LS but not including the pad at the end.
963  * The user must prepend two octets (either padding or next header / length)
964  * and append padding out to the next 8 octet boundary.
965  *
966  * The zoneid is the IP zoneid (i.e., GLOBAL_ZONEID for exlusive-IP zones).
967  */
968 int
969 tsol_compute_label_v6(const ts_label_t *tsl, zoneid_t zoneid,
970     const in6_addr_t *dst, uchar_t *opt_storage, ip_stack_t *ipst)
971 {
972 	uint_t		sec_opt_len;
973 	uint32_t	doi;
974 	ire_t		*ire;
975 	tsol_ire_gw_secattr_t *attrp = NULL;
976 
977 	if (ip6opt_ls == 0)
978 		return (EINVAL);
979 
980 	if (opt_storage != NULL)
981 		opt_storage[IPOPT_OLEN] = 0;
982 
983 	if (tsl == NULL)
984 		return (0);
985 
986 	/* Always pass multicast */
987 	if (IN6_IS_ADDR_MULTICAST(dst))
988 		return (0);
989 
990 	/*
991 	 * Fill in a V6 label.  If a new format is added here, make certain
992 	 * that the maximum size of this label is reflected in sys/tsol/tnet.h
993 	 * as TSOL_MAX_IPV6_OPTION.
994 	 */
995 	if (tsl->tsl_flags & TSLF_IMPLICIT_OUT)
996 		return (0);
997 
998 	if (tsl->tsl_flags & TSLF_UNLABELED) {
999 		/*
1000 		 * The destination is unlabeled. Only add a label if the
1001 		 * destination is not a broadcast/local/loopback address,
1002 		 * the destination is not on the same subnet, and the
1003 		 * next-hop gateway is labeled.
1004 		 */
1005 		ire = ire_route_recursive_v6(dst, 0, NULL, zoneid, tsl,
1006 		    MATCH_IRE_SECATTR, B_TRUE, 0, ipst, NULL, &attrp, NULL);
1007 		ASSERT(ire != NULL);
1008 		if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
1009 			/* no route to destination */
1010 			ire_refrele(ire);
1011 			DTRACE_PROBE3(
1012 			    tx__tnopt__log__info__labeling__routedst__v6,
1013 			    char *, "No route to unlabeled dest ip6(1) with "
1014 			    "label(2).", in6_addr_t *, dst, ts_label_t *, tsl);
1015 			return (EHOSTUNREACH);
1016 		}
1017 		if (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK |
1018 		    IRE_INTERFACE)) {
1019 			ire_refrele(ire);
1020 			return (0);
1021 		}
1022 		/*
1023 		 * ire_route_recursive gives us the first attrp it finds
1024 		 * in the recursive lookup.
1025 		 */
1026 		/*
1027 		 * Return now if next hop gateway is unlabeled. There is
1028 		 * no need to generate a CIPSO option for this message.
1029 		 */
1030 		if (attrp == NULL || attrp->igsa_rhc == NULL ||
1031 		    attrp->igsa_rhc->rhc_tpc->tpc_tp.host_type == UNLABELED) {
1032 			ire_refrele(ire);
1033 			return (0);
1034 		}
1035 		ire_refrele(ire);
1036 	}
1037 
1038 	/* compute the CIPSO option */
1039 	if (opt_storage != NULL)
1040 		opt_storage += 8;
1041 	sec_opt_len = tsol2cipso_tt1(&tsl->tsl_label, opt_storage,
1042 	    tsl->tsl_doi);
1043 
1044 	if (sec_opt_len == 0) {
1045 		DTRACE_PROBE3(tx__tnopt__log__error__labeling__lostops__v6,
1046 		    char *, "options lack length for dest ip6(1) with "
1047 		    "label(2).", in6_addr_t *, dst, ts_label_t *, tsl);
1048 		return (EINVAL);
1049 	}
1050 
1051 	if (opt_storage == NULL)
1052 		return (0);
1053 
1054 	if (sec_opt_len < IP_MAX_OPT_LENGTH)
1055 		opt_storage[sec_opt_len] = IPOPT_EOL;
1056 
1057 	/*
1058 	 * Just in case the option length is odd, round it up to the next even
1059 	 * multiple.  The IPv6 option definition doesn't like odd numbers for
1060 	 * some reason.
1061 	 *
1062 	 * Length in the overall option header (IP6OPT_LS) does not include the
1063 	 * option header itself, but the length in the suboption does include
1064 	 * the suboption header.  Thus, when there's just one suboption, the
1065 	 * length in the option header is the suboption length plus 4 (for the
1066 	 * DOI value).
1067 	 */
1068 	opt_storage[-2] = IP6LS_TT_V4;
1069 	opt_storage[-1] = (sec_opt_len + 2 + 1) & ~1;
1070 	opt_storage[-8] = ip6opt_ls;
1071 	opt_storage[-7] = opt_storage[-1] + 4;
1072 	doi = htons(IP6LS_DOI_V4);
1073 	bcopy(&doi, opt_storage - 6, 4);
1074 
1075 	return (0);
1076 }
1077 
1078 /*
1079  * Locate the start of the IP6OPT_LS label option and return it.
1080  * Also return the start of the next non-pad option in after_secoptp.
1081  * Usually the label option is the first option at least when packets
1082  * are generated, but for generality we don't assume that on received packets.
1083  *
1084  * The function will return with B_FALSE if an IP format error
1085  * or an unexpected label content error is encountered.
1086  */
1087 boolean_t
1088 tsol_find_secopt_v6(
1089     const uchar_t *ip6hbh,	/* Start of the hop-by-hop extension header */
1090     uint_t hbhlen,		/* Length of the hop-by-hop extension header */
1091     uchar_t **secoptp,		/* Location of IP6OPT_LS label option */
1092     uchar_t **after_secoptp,	/* Non-pad option following the label option */
1093     boolean_t *hbh_needed)	/* Is hop-by-hop hdr needed w/o label */
1094 {
1095 	uint_t	optlen;
1096 	uint_t	optused;
1097 	const uchar_t *optptr;
1098 	uchar_t	opt_type;
1099 
1100 	*secoptp = NULL;
1101 	*hbh_needed = B_FALSE;
1102 	*after_secoptp = NULL;
1103 	optlen = hbhlen - 2;
1104 	optptr = ip6hbh + 2;
1105 	while (optlen != 0) {
1106 		opt_type = *optptr;
1107 		if (opt_type == IP6OPT_PAD1) {
1108 			optptr++;
1109 			optlen--;
1110 			continue;
1111 		}
1112 		if (optlen == 1)
1113 			return (B_FALSE);
1114 		optused = 2 + optptr[1];
1115 		if (optused > optlen)
1116 			return (B_FALSE);
1117 		/*
1118 		 * if we get here, ip6opt_ls can
1119 		 * not be 0 because it will always
1120 		 * match the IP6OPT_PAD1 above.
1121 		 * Therefore ip6opt_ls == 0 forces
1122 		 * this test to always fail here.
1123 		 */
1124 		if (opt_type == ip6opt_ls) {
1125 			if (*secoptp != NULL)
1126 				/* More than one security option found */
1127 				return (B_FALSE);
1128 			*secoptp = (uchar_t *)optptr;
1129 		} else switch (opt_type) {
1130 		case IP6OPT_PADN:
1131 			break;
1132 		default:
1133 			/*
1134 			 * There is at least 1 option other than
1135 			 * the label option. So the hop-by-hop header is needed
1136 			 */
1137 			*hbh_needed = B_TRUE;
1138 			if (*secoptp != NULL) {
1139 				*after_secoptp = (uchar_t *)optptr;
1140 				return (B_TRUE);
1141 			}
1142 			break;
1143 		}
1144 		optlen -= optused;
1145 		optptr += optused;
1146 	}
1147 	return (B_TRUE);
1148 }
1149 
1150 /*
1151  * Remove the label option from the hop-by-hop options header if it exists.
1152  * 'buflen' is the total length of the packet typically b_wptr - b_rptr.
1153  * Header and data following the label option that is deleted are copied
1154  * (i.e. slid backward) to the right position, and returns the number
1155  * of bytes removed (as zero or negative number.)
1156  *
1157  * Note that tsol_remove_secopt does not adjust ipha_length but
1158  * tsol_remove_secopt_v6 does adjust ip6_plen.
1159  */
1160 int
1161 tsol_remove_secopt_v6(ip6_t *ip6h, int buflen)
1162 {
1163 	uchar_t	*ip6hbh;	/* hop-by-hop header */
1164 	uint_t	hbhlen;		/* hop-by-hop extension header length */
1165 	uchar_t *secopt = NULL;
1166 	uchar_t *after_secopt;
1167 	uint_t	pad;
1168 	uint_t	delta;
1169 	boolean_t hbh_needed;
1170 
1171 	/*
1172 	 * hop-by-hop extension header must appear first, if it does not
1173 	 * exist, there is no label option.
1174 	 */
1175 	if (ip6h->ip6_nxt != IPPROTO_HOPOPTS)
1176 		return (0);
1177 
1178 	ip6hbh = (uchar_t *)&ip6h[1];
1179 	hbhlen = (ip6hbh[1] + 1) << 3;
1180 	/*
1181 	 * Locate the start of the label option if it exists and the end
1182 	 * of the label option including pads if any.
1183 	 */
1184 	if (!tsol_find_secopt_v6(ip6hbh, hbhlen, &secopt, &after_secopt,
1185 	    &hbh_needed)) {
1186 		/*
1187 		 * This function should not see invalid messages.
1188 		 * If one occurs, it would indicate either an
1189 		 * option previously verified in the forwarding
1190 		 * path has been corrupted or an option was
1191 		 * incorrectly generated locally.
1192 		 */
1193 		ASSERT(0);
1194 		return (0);
1195 	}
1196 	if (secopt == NULL)
1197 		return (0);
1198 	if (!hbh_needed) {
1199 		uchar_t	next_hdr;
1200 		/*
1201 		 * The label option was the only option in the hop-by-hop
1202 		 * header. We don't need the hop-by-hop header itself any
1203 		 * longer.
1204 		 */
1205 		next_hdr = ip6hbh[0];
1206 		ovbcopy(ip6hbh + hbhlen, ip6hbh,
1207 		    buflen - (IPV6_HDR_LEN + hbhlen));
1208 		ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - hbhlen);
1209 		ip6h->ip6_nxt = next_hdr;
1210 		return (-hbhlen);
1211 	}
1212 
1213 	if (after_secopt == NULL) {
1214 		/* There is no option following the label option */
1215 		after_secopt = ip6hbh + hbhlen;
1216 	}
1217 
1218 	/*
1219 	 * After deleting the label option, we need to slide the headers
1220 	 * and data back, while still maintaining the same alignment (module 8)
1221 	 * for the other options. So we slide the headers and data back only
1222 	 * by an integral multiple of 8 bytes, and fill the remaining bytes
1223 	 * with pads.
1224 	 */
1225 	delta = after_secopt - secopt;
1226 	pad = delta % 8;
1227 	if (pad == 1) {
1228 		secopt[0] = IP6OPT_PAD1;
1229 	} else if (pad > 1) {
1230 		secopt[0] = IP6OPT_PADN;
1231 		secopt[1] = pad - 2;
1232 		if (pad > 2)
1233 			bzero(&secopt[2], pad - 2);
1234 	}
1235 	secopt += pad;
1236 	delta -= pad;
1237 	ovbcopy(after_secopt, secopt,
1238 	    (uchar_t *)ip6h + buflen - after_secopt);
1239 	ip6hbh[1] -= delta/8;
1240 	ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - delta);
1241 
1242 	return (-delta);
1243 }
1244 
1245 /*
1246  * 'optbuf' contains a CIPSO label embedded in an IPv6 hop-by-hop option,
1247  * starting with the IP6OPT_LS option type. The format of this hop-by-hop
1248  * option is described in the block comment above tsol_compute_label_v6.
1249  * This function prepends this hop-by-hop option before any other hop-by-hop
1250  * options in the hop-by-hop header if one already exists, else a new
1251  * hop-by-hop header is created and stuffed into the packet following
1252  * the IPv6 header. 'buflen' is the total length of the packet i.e.
1253  * b_wptr - b_rptr. The caller ensures that there is enough space for the
1254  * extra option being added. Header and data following the position where
1255  * the label option is inserted are copied (i.e. slid forward) to the right
1256  * position.
1257  *
1258  * Note that tsol_prepend_option does not adjust ipha_length but
1259  * tsol_prepend_option_v6 does adjust ip6_plen.
1260  */
1261 int
1262 tsol_prepend_option_v6(uchar_t *optbuf, ip6_t *ip6h, int buflen)
1263 {
1264 	/*
1265 	 * rawlen is the length of the label option in bytes, not including
1266 	 * any pads, starting from the IP6OPT_LS (option type) byte.
1267 	 */
1268 	uint_t	rawlen;
1269 
1270 	uint_t	optlen;		/* rawlen rounded to an 8 byte multiple */
1271 	uchar_t	*ip6hbh;	/* start of the hop-by-hop extension header */
1272 	uint_t	hbhlen;		/* Length of the hop-by-hop extension header */
1273 	uint_t	pad_len;
1274 	uchar_t	*pad_position;
1275 	int	delta;		/* Actual number of bytes inserted */
1276 
1277 	rawlen = optbuf[1] + 2;	/* Add 2 for the option type, option length */
1278 	ip6hbh = (uchar_t *)&ip6h[1];
1279 	if (ip6h->ip6_nxt == IPPROTO_HOPOPTS) {
1280 		/*
1281 		 * There is a hop-by-hop header present already. In order to
1282 		 * preserve the alignment of the other options at the existing
1283 		 * value (modulo 8) we need to pad the label option to a
1284 		 * multiple of 8 bytes before prepending it to the other
1285 		 * options. Slide the extension headers and data forward to
1286 		 * accomodate the label option at the start of the hop-by-hop
1287 		 * header
1288 		 */
1289 		delta = optlen = (rawlen + 7) & ~7;
1290 		pad_len = optlen - rawlen;
1291 		pad_position = ip6hbh + 2 + rawlen;
1292 		ovbcopy(ip6hbh + 2, ip6hbh + 2 + optlen,
1293 		    buflen - (IPV6_HDR_LEN + 2));
1294 		/*
1295 		 * Bump up the hop-by-hop extension header length by
1296 		 * the number of 8-byte words added
1297 		 */
1298 		optlen >>= 3;
1299 		if (ip6hbh[1] + optlen > 255)
1300 			return (-1);
1301 		ip6hbh[1] += optlen;
1302 	} else {
1303 		/*
1304 		 * There is no hop-by-hop header in the packet. Construct a
1305 		 * new Hop-by-hop extension header (a multiple of 8 bytes).
1306 		 * Slide any other extension headers and data forward to
1307 		 * accomodate this hop-by-hop header
1308 		 */
1309 		delta = hbhlen = (2 + rawlen + 7) & ~7; /* +2 for nxthdr, len */
1310 		pad_len = hbhlen - (2 + rawlen);
1311 		pad_position = ip6hbh + 2 + rawlen;
1312 		ovbcopy(ip6hbh, ip6hbh + hbhlen, buflen - IPV6_HDR_LEN);
1313 		ip6hbh[0] = ip6h->ip6_nxt;
1314 		/*
1315 		 * hop-by-hop extension header length in 8-byte words, not
1316 		 * including the 1st 8 bytes of the hop-by-hop header.
1317 		 */
1318 		ip6hbh[1] = (hbhlen >> 3) - 1;
1319 		ip6h->ip6_nxt = IPPROTO_HOPOPTS;
1320 	}
1321 	/*
1322 	 * Copy the label option into the hop-by-hop header and insert any
1323 	 * needed pads
1324 	 */
1325 	bcopy(optbuf, ip6hbh + 2, rawlen);
1326 	if (pad_len == 1) {
1327 		pad_position[0] = IP6OPT_PAD1;
1328 	} else if (pad_len > 1) {
1329 		pad_position[0] = IP6OPT_PADN;
1330 		pad_position[1] = pad_len - 2;
1331 		if (pad_len > 2)
1332 			bzero(pad_position + 2, pad_len - 2);
1333 	}
1334 	ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) + delta);
1335 	return (delta);
1336 }
1337 
1338 /*
1339  * tsol_check_label_v6()
1340  *
1341  * This routine computes the IP label that should be on the packet based on the
1342  * connection and destination information.  It's called by the IP forwarding
1343  * logic and by ip_output_simple. The ULPs generate the labels before calling
1344  * conn_ip_output. If any adjustments to
1345  * the label are needed due to the connection's MAC-exempt status or
1346  * the destination's ability to receive labels, an "effective label"
1347  * will be returned.
1348  *
1349  * The packet's header is clear before entering IPsec's engine.
1350  *
1351  * The zoneid is the IP zoneid (i.e., GLOBAL_ZONEID for exlusive-IP zones).
1352  * zone_is_global is set if the actual zoneid is global.
1353  *
1354  * On successful return, effective_tslp will point to the new label needed
1355  * or will be NULL if a new label isn't needed. On error, effective_tsl will
1356  * point to NULL.
1357  *
1358  * Returns:
1359  *      0		Label (was|is now) correct
1360  *      EACCES		The packet failed the remote host accreditation.
1361  *      ENOMEM		Memory allocation failure.
1362  *	EINVAL		Label cannot be computed
1363  */
1364 int
1365 tsol_check_label_v6(const ts_label_t *tsl, zoneid_t zoneid, mblk_t **mpp,
1366     uint_t mac_mode, boolean_t zone_is_global, ip_stack_t *ipst,
1367     ts_label_t **effective_tslp)
1368 {
1369 	mblk_t *mp = *mpp;
1370 	ip6_t  *ip6h;
1371 	ts_label_t *effective_tsl = NULL;
1372 	/*
1373 	 * Label option length is limited to IP_MAX_OPT_LENGTH for
1374 	 * symmetry with IPv4. Can be relaxed if needed
1375 	 */
1376 	uchar_t opt_storage[TSOL_MAX_IPV6_OPTION];
1377 	uint_t hlen;
1378 	uint_t sec_opt_len; /* label option length not including type, len */
1379 	int delta_remove = 0, delta_add;
1380 	int retv;
1381 	uchar_t	*after_secopt;
1382 	uchar_t	*secopt = NULL;
1383 	uchar_t	*ip6hbh;
1384 	uint_t	hbhlen;
1385 	boolean_t hbh_needed;
1386 
1387 	*effective_tslp = NULL;
1388 
1389 	/*
1390 	 * Verify the destination is allowed to receive packets at
1391 	 * the security label of the message data. tsol_check_dest()
1392 	 * may create a new effective label or label flags.
1393 	 */
1394 	ip6h = (ip6_t *)mp->b_rptr;
1395 	retv = tsol_check_dest(tsl, &ip6h->ip6_dst, IPV6_VERSION,
1396 	    mac_mode, zone_is_global, &effective_tsl);
1397 	if (retv != 0)
1398 		return (retv);
1399 
1400 	/*
1401 	 * Calculate the security label to be placed in the text
1402 	 * of the message (if any).
1403 	 */
1404 	if (effective_tsl != NULL) {
1405 		if ((retv = tsol_compute_label_v6(effective_tsl, zoneid,
1406 		    &ip6h->ip6_dst, opt_storage, ipst)) != 0) {
1407 			label_rele(effective_tsl);
1408 			return (retv);
1409 		}
1410 		*effective_tslp = effective_tsl;
1411 	} else {
1412 		if ((retv = tsol_compute_label_v6(tsl, zoneid,
1413 		    &ip6h->ip6_dst, opt_storage, ipst)) != 0)
1414 			return (retv);
1415 	}
1416 
1417 	sec_opt_len = opt_storage[1];
1418 
1419 	if (ip6h->ip6_nxt == IPPROTO_HOPOPTS) {
1420 		ip6hbh = (uchar_t *)&ip6h[1];
1421 		hbhlen = (ip6hbh[1] + 1) << 3;
1422 		if (!tsol_find_secopt_v6(ip6hbh, hbhlen, &secopt,
1423 		    &after_secopt, &hbh_needed)) {
1424 			/*
1425 			 * This function should not see invalid messages.
1426 			 * If one occurs, it would indicate either an
1427 			 * option previously verified in the forwarding
1428 			 * path has been corrupted or an option was
1429 			 * incorrectly generated locally.
1430 			 */
1431 			ASSERT(0);
1432 			return (EACCES);
1433 		}
1434 	}
1435 
1436 	if (sec_opt_len == 0 && secopt == NULL) {
1437 		/*
1438 		 * The packet is not supposed to have a label, and it
1439 		 * does not have one currently
1440 		 */
1441 		return (0);
1442 	}
1443 
1444 	if (secopt != NULL && sec_opt_len != 0 &&
1445 	    (bcmp(opt_storage, secopt, sec_opt_len + 2) == 0)) {
1446 		/* The packet has the correct label already */
1447 		return (0);
1448 	}
1449 
1450 	/*
1451 	 * If there is an option there, then it must be the wrong one; delete.
1452 	 */
1453 	if (secopt != NULL) {
1454 		delta_remove = tsol_remove_secopt_v6(ip6h, MBLKL(mp));
1455 		mp->b_wptr += delta_remove;
1456 	}
1457 
1458 	/*
1459 	 * Make sure we have room for the worst-case addition. Add 2 bytes for
1460 	 * the hop-by-hop ext header's next header and length fields. Add
1461 	 * another 2 bytes for the label option type, len and then round
1462 	 * up to the next 8-byte multiple.
1463 	 */
1464 	hlen = (4 + sec_opt_len + 7) & ~7;
1465 	if (mp->b_wptr + hlen > mp->b_datap->db_lim) {
1466 		int copylen;
1467 		mblk_t *new_mp;
1468 		uint16_t hdr_len;
1469 
1470 		hdr_len = ip_hdr_length_v6(mp, ip6h);
1471 		/*
1472 		 * Allocate enough to be meaningful, but not *too* much.
1473 		 * Also all the IPv6 extension headers must be in the same mblk
1474 		 */
1475 		copylen = MBLKL(mp);
1476 		if (copylen > 256)
1477 			copylen = 256;
1478 		if (copylen < hdr_len)
1479 			copylen = hdr_len;
1480 		new_mp = allocb_tmpl(hlen + copylen +
1481 		    (mp->b_rptr - mp->b_datap->db_base), mp);
1482 		if (new_mp == NULL) {
1483 			if (effective_tsl != NULL) {
1484 				label_rele(effective_tsl);
1485 				*effective_tslp = NULL;
1486 			}
1487 			return (ENOMEM);
1488 		}
1489 
1490 		/* keep the bias */
1491 		new_mp->b_rptr += mp->b_rptr - mp->b_datap->db_base;
1492 		new_mp->b_wptr = new_mp->b_rptr + copylen;
1493 		bcopy(mp->b_rptr, new_mp->b_rptr, copylen);
1494 		new_mp->b_cont = mp;
1495 		if ((mp->b_rptr += copylen) >= mp->b_wptr) {
1496 			new_mp->b_cont = mp->b_cont;
1497 			freeb(mp);
1498 		}
1499 		*mpp = mp = new_mp;
1500 		ip6h = (ip6_t *)mp->b_rptr;
1501 	}
1502 
1503 	delta_add = tsol_prepend_option_v6(opt_storage, ip6h, MBLKL(mp));
1504 	if (delta_add == -1)
1505 		goto param_prob;
1506 
1507 	ASSERT(mp->b_wptr + delta_add <= DB_LIM(mp));
1508 	mp->b_wptr += delta_add;
1509 
1510 	/* tsol_prepend_option_v6 has adjusted ip6_plen */
1511 	return (0);
1512 
1513 param_prob:
1514 	if (effective_tsl != NULL) {
1515 		label_rele(effective_tsl);
1516 		*effective_tslp = NULL;
1517 	}
1518 	return (EINVAL);
1519 }
1520