xref: /titanic_41/usr/src/uts/common/inet/ip/tn_ipopt.c (revision 98c507c4288789fc67365c4cb51f80eb641e7182)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/systm.h>
28 #include <sys/kmem.h>
29 #include <sys/disp.h>
30 #include <sys/stream.h>
31 #include <sys/strsubr.h>
32 #include <sys/strsun.h>
33 #include <sys/policy.h>
34 #include <sys/tsol/label_macro.h>
35 #include <sys/tsol/tndb.h>
36 #include <sys/tsol/tnet.h>
37 #include <inet/ip.h>
38 #include <inet/ip6.h>
39 #include <inet/tcp.h>
40 #include <inet/ipclassifier.h>
41 #include <inet/ip_ire.h>
42 #include <inet/ip_ftable.h>
43 
44 /*
45  * This routine takes a sensitivity label as input and creates a CIPSO
46  * option in the specified buffer.  It returns the size of the CIPSO option.
47  * If the sensitivity label is too large for the CIPSO option, then 0
48  * is returned.
49  *
50  * tsol2cipso_tt1 returns 0 for failure and greater than 0 for success
51  * (more accurately, success means a return value between 10 and 40).
52  */
53 
54 static int
55 tsol2cipso_tt1(const bslabel_t *sl, unsigned char *cop, uint32_t doi)
56 {
57 	struct cipso_tag_type_1 *tt1;
58 	const _bslabel_impl_t *bsl;
59 	const uchar_t *ucp;
60 	int i;
61 
62 	if (doi == 0)
63 		return (0);
64 
65 	/* check for Admin High sensitivity label */
66 	if (blequal(sl, label2bslabel(l_admin_high)))
67 		return (0);
68 
69 	/* check whether classification will fit in one octet */
70 	bsl = (const _bslabel_impl_t *)sl;
71 	if (LCLASS(bsl) & 0xFF00)
72 		return (0);
73 
74 	/*
75 	 * Check whether compartments will fit in 30 octets.
76 	 * Compartments 241 - 256 are not allowed.
77 	 */
78 	if (ntohl(bsl->compartments.c8) & 0x0000FFFF)
79 		return (0);
80 
81 	/*
82 	 * Compute option length and tag length.
83 	 * 'p' points to the last two bytes in the Sensitivity Label's
84 	 * compartments; these cannot be mapped into CIPSO compartments.
85 	 */
86 	ucp = (const uchar_t *)&bsl->compartments.c8 + 2;
87 	while (--ucp >= (const uchar_t *)&bsl->compartments.c1)
88 		if (*ucp != 0)
89 			break;
90 
91 	i =  ucp - (const uchar_t *)&bsl->compartments.c1 + 1;
92 
93 	if (cop == NULL)
94 		return (10 + i);
95 
96 	doi = htonl(doi);
97 	ucp = (const uchar_t *)&doi;
98 	cop[IPOPT_OPTVAL] = IPOPT_COMSEC;
99 	cop[IPOPT_OLEN] = 10 + i;
100 	cop[IPOPT_OLEN+1] = ucp[0];
101 	cop[IPOPT_OLEN+2] = ucp[1];
102 	cop[IPOPT_OLEN+3] = ucp[2];
103 	cop[IPOPT_OLEN+4] = ucp[3];
104 	tt1 = (struct cipso_tag_type_1 *)&cop[IPOPT_OLEN + 5];
105 	tt1->tag_type = 1;
106 	tt1->tag_align = 0;
107 	tt1->tag_sl = LCLASS(bsl);
108 	tt1->tag_length = 4 + i;
109 
110 	bcopy(&bsl->compartments.c1, tt1->tag_cat, i);
111 
112 	return (cop[IPOPT_OLEN]);
113 }
114 
115 /*
116  * The following routine searches for a security label in an IPv4 datagram.
117  * It returns label_type of:
118  *    OPT_CIPSO if a CIPSO IP option is found.
119  *    OPT_NONE if no security label is found.
120  *
121  * If OPT_CIPSO, a pointer to the CIPSO IP option will be returned in
122  * the buffer parameter.
123  *
124  * The function will return with B_FALSE if an IP format error
125  * is encountered.
126  */
127 
128 boolean_t
129 tsol_get_option_v4(mblk_t *mp, tsol_ip_label_t *label_type, uchar_t **buffer)
130 {
131 	ipha_t	*ipha;
132 	uchar_t	*opt;
133 	uint32_t	totallen;
134 	uint32_t	optval;
135 	uint32_t	optlen;
136 
137 	*label_type = OPT_NONE;
138 
139 	/*
140 	 * Get length (in 4 byte octets) of IP header options.
141 	 * If header doesn't contain options, then return a label_type
142 	 * of OPT_NONE.
143 	 */
144 	ipha = (ipha_t *)mp->b_rptr;
145 	totallen = ipha->ipha_version_and_hdr_length -
146 	    (uint8_t)((IP_VERSION << 4));
147 	totallen <<= 2;
148 	if (totallen < IP_SIMPLE_HDR_LENGTH || totallen > MBLKL(mp))
149 		return (B_FALSE);
150 	totallen -= IP_SIMPLE_HDR_LENGTH;
151 	if (totallen == 0)
152 		return (B_TRUE);
153 
154 	/*
155 	 * Search for CIPSO option.
156 	 * If no such option is present, then return OPT_NONE.
157 	 */
158 	opt = (uchar_t *)&ipha[1];
159 	while (totallen != 0) {
160 		switch (optval = opt[IPOPT_OPTVAL]) {
161 		case IPOPT_EOL:
162 			return (B_TRUE);
163 		case IPOPT_NOP:
164 			optlen = 1;
165 			break;
166 		default:
167 			if (totallen <= IPOPT_OLEN)
168 				return (B_FALSE);
169 			optlen = opt[IPOPT_OLEN];
170 			if (optlen < 2)
171 				return (B_FALSE);
172 		}
173 		if (optlen > totallen)
174 			return (B_FALSE);
175 		/*
176 		 * Copy pointer to option into '*buffer' and
177 		 * return the option type.
178 		 */
179 		switch (optval) {
180 		case IPOPT_COMSEC:
181 			if (TSOL_CIPSO_TAG_OFFSET < optlen &&
182 			    opt[TSOL_CIPSO_TAG_OFFSET] == 1) {
183 				*label_type = OPT_CIPSO;
184 				*buffer = opt;
185 				return (B_TRUE);
186 			}
187 			return (B_FALSE);
188 		}
189 		totallen -= optlen;
190 		opt += optlen;
191 	}
192 	return (B_TRUE);
193 }
194 
195 /*
196  * The following routine searches for a security label in an IPv6 datagram.
197  * It returns label_type of:
198  *    OPT_CIPSO if a CIPSO IP option is found.
199  *    OPT_NONE if no security label is found.
200  *
201  * If OPT_CIPSO, a pointer to the IPv4 portion of the CIPSO IP option will
202  * be returned in the buffer parameter.
203  *
204  * The function will return with B_FALSE if an IP format error
205  * or an unexpected label content error is encountered.
206  */
207 
208 boolean_t
209 tsol_get_option_v6(mblk_t *mp, tsol_ip_label_t *label_type, uchar_t **buffer)
210 {
211 	uchar_t		*opt_ptr = NULL;
212 	uchar_t		*after_secopt;
213 	boolean_t	hbh_needed;
214 	const uchar_t	*ip6hbh;
215 	size_t		optlen;
216 	uint32_t	doi;
217 	const ip6_t	*ip6h;
218 
219 	*label_type = OPT_NONE;
220 	*buffer = NULL;
221 	ip6h = (const ip6_t *)mp->b_rptr;
222 	if (ip6h->ip6_nxt != IPPROTO_HOPOPTS)
223 		return (B_TRUE);
224 	ip6hbh = (const uchar_t *)&ip6h[1];
225 	if (ip6hbh + MIN_EHDR_LEN > mp->b_wptr)
226 		return (B_FALSE);
227 	optlen = (ip6hbh[1] + 1) << 3;
228 	if (ip6hbh + optlen > mp->b_wptr)
229 		return (B_FALSE);
230 	if (!tsol_find_secopt_v6(ip6hbh, optlen,
231 	    &opt_ptr, &after_secopt, &hbh_needed))
232 		return (B_FALSE);
233 	/* tsol_find_secopt_v6 guarantees some sanity */
234 	if (opt_ptr != NULL) {
235 		/*
236 		 * IPv6 Option
237 		 *   opt_ptr[0]: Option type
238 		 *   opt_ptr[1]: Length of option data in bytes
239 		 *   opt_ptr[2]: First byte of option data
240 		 */
241 		if ((optlen = opt_ptr[1]) < 8)
242 			return (B_FALSE);
243 		opt_ptr += 2;
244 		/*
245 		 * From "Generalized Labeled Security Option for IPv6" draft
246 		 *   opt_ptr[0] - opt_ptr[4]: DOI = IP6LS_DOI_V4
247 		 *   opt_ptr[4]: Tag type = IP6LS_TT_V4
248 		 *   opt_ptr[5]: Tag length in bytes starting at Tag type field
249 		 * IPv4 CIPSO Option
250 		 *   opt_ptr[6]: option type
251 		 *   opt_ptr[7]: option length in bytes starting at type field
252 		 */
253 		bcopy(opt_ptr, &doi, sizeof (doi));
254 		doi = ntohl(doi);
255 		if (doi == IP6LS_DOI_V4 &&
256 		    opt_ptr[4] == IP6LS_TT_V4 &&
257 		    opt_ptr[5] <= optlen - 4 &&
258 		    opt_ptr[7] <= optlen - 6 &&
259 		    opt_ptr[7] <= opt_ptr[5] - 2) {
260 			opt_ptr += sizeof (doi) + 2;
261 			*label_type = OPT_CIPSO;
262 			*buffer = opt_ptr;
263 			return (B_TRUE);
264 		}
265 		return (B_FALSE);
266 	}
267 	return (B_TRUE);
268 }
269 
270 /*
271  * tsol_check_dest()
272  *
273  * This routine verifies if a destination is allowed to recieve messages
274  * based on the message cred's security label. If any adjustments to
275  * the cred are needed due to the connection's MAC mode or
276  * the destination's ability to receive labels, an "effective cred"
277  * will be returned.
278  *
279  * On successful return, effective_cred will point to the new creds needed
280  * or will be NULL if new creds aren't needed. On error, effective_cred
281  * is NULL.
282  *
283  * Returns:
284  *	0		Have or constructed appropriate credentials
285  *	EHOSTUNREACH	The credentials failed the remote host accreditation
286  *      ENOMEM		Memory allocation failure
287  */
288 int
289 tsol_check_dest(const cred_t *credp, const void *dst, uchar_t version,
290     uint_t mac_mode, cred_t **effective_cred)
291 {
292 	ts_label_t	*tsl, *newtsl = NULL;
293 	tsol_tpc_t	*dst_rhtp;
294 	zoneid_t	zoneid;
295 
296 	if (effective_cred != NULL)
297 		*effective_cred = NULL;
298 	ASSERT(version == IPV4_VERSION ||
299 	    (version == IPV6_VERSION &&
300 	    !IN6_IS_ADDR_V4MAPPED((in6_addr_t *)dst)));
301 
302 	/* Always pass kernel level communication (NULL label) */
303 	if ((tsl = crgetlabel(credp)) == NULL) {
304 		DTRACE_PROBE2(tx__tnopt__log__info__labeling__mac__allownull,
305 		    char *, "destination ip(1) with null cred was passed",
306 		    ipaddr_t, dst);
307 		return (0);
308 	}
309 
310 	if (tsl->tsl_flags & TSLF_IMPLICIT_IN) {
311 		DTRACE_PROBE3(tx__tnopt__log__info__labeling__unresolved__label,
312 		    char *,
313 		    "implicit-in packet to ip(1) reached tsol_check_dest "
314 		    "with implied security label sl(2)",
315 		    ipaddr_t, dst, ts_label_t *, tsl);
316 	}
317 
318 	/* Always pass multicast */
319 	if (version == IPV4_VERSION &&
320 	    CLASSD(*(ipaddr_t *)dst)) {
321 		DTRACE_PROBE2(tx__tnopt__log__info__labeling__mac__allowmult,
322 		    char *, "destination ip(1) with multicast dest was passed",
323 		    ipaddr_t, dst);
324 		return (0);
325 	} else if (version == IPV6_VERSION &&
326 	    IN6_IS_ADDR_MULTICAST((in6_addr_t *)dst)) {
327 		DTRACE_PROBE2(tx__tnopt__log__info__labeling__mac__allowmult_v6,
328 		    char *, "destination ip(1) with multicast dest was passed",
329 		    in6_addr_t *, dst);
330 		return (0);
331 	}
332 
333 	/* Never pass an undefined destination */
334 	if ((dst_rhtp = find_tpc(dst, version, B_FALSE)) == NULL) {
335 		DTRACE_PROBE2(tx__tnopt__log__info__labeling__lookupdst,
336 		    char *, "destination ip(1) not in tn database.",
337 		    void *, dst);
338 		return (EHOSTUNREACH);
339 	}
340 
341 	switch (dst_rhtp->tpc_tp.host_type) {
342 	case UNLABELED:
343 		/*
344 		 * Can talk to unlabeled hosts if
345 		 * (1) zone's label matches the default label, or
346 		 * (2) SO_MAC_EXEMPT is on and we
347 		 * dominate the peer's label, or
348 		 * (3) SO_MAC_EXEMPT is on and
349 		 * this is the global zone
350 		 */
351 		if (dst_rhtp->tpc_tp.tp_doi != tsl->tsl_doi) {
352 			DTRACE_PROBE4(tx__tnopt__log__info__labeling__doi,
353 			    char *, "unlabeled dest ip(1)/tpc(2) doi does "
354 			    "not match msg label(3) doi.", void *, dst,
355 			    tsol_tpc_t *, dst_rhtp, ts_label_t *, tsl);
356 			TPC_RELE(dst_rhtp);
357 			return (EHOSTUNREACH);
358 		}
359 		if (!blequal(&dst_rhtp->tpc_tp.tp_def_label,
360 		    &tsl->tsl_label)) {
361 			zoneid = crgetzoneid(credp);
362 			if (mac_mode != CONN_MAC_AWARE ||
363 			    !(zoneid == GLOBAL_ZONEID ||
364 			    bldominates(&tsl->tsl_label,
365 			    &dst_rhtp->tpc_tp.tp_def_label))) {
366 				DTRACE_PROBE4(
367 				    tx__tnopt__log__info__labeling__mac,
368 				    char *, "unlabeled dest ip(1)/tpc(2) does "
369 				    "not match msg label(3).", void *, dst,
370 				    tsol_tpc_t *, dst_rhtp, ts_label_t *, tsl);
371 				TPC_RELE(dst_rhtp);
372 				return (EHOSTUNREACH);
373 			}
374 			/*
375 			 * This is a downlabel MAC-exempt exchange.
376 			 * Use the remote destination's default label
377 			 * as the label of the message data.
378 			 */
379 			if ((newtsl = labelalloc(&dst_rhtp->tpc_tp.tp_def_label,
380 			    dst_rhtp->tpc_tp.tp_doi, KM_NOSLEEP)) == NULL) {
381 				TPC_RELE(dst_rhtp);
382 				return (ENOMEM);
383 			}
384 			newtsl->tsl_flags |= TSLF_UNLABELED;
385 
386 		} else if (!(tsl->tsl_flags & TSLF_UNLABELED)) {
387 			/*
388 			 * The security labels are the same but we need
389 			 * to flag that the remote node is unlabeled.
390 			 */
391 			if ((newtsl = labeldup(tsl, KM_NOSLEEP)) == NULL) {
392 				TPC_RELE(dst_rhtp);
393 				return (ENOMEM);
394 			}
395 			newtsl->tsl_flags |= TSLF_UNLABELED;
396 		}
397 		break;
398 
399 	case SUN_CIPSO:
400 		/*
401 		 * Can talk to labeled hosts if zone's label is within target's
402 		 * label range or set.
403 		 */
404 		if (dst_rhtp->tpc_tp.tp_cipso_doi_cipso != tsl->tsl_doi ||
405 		    (!_blinrange(&tsl->tsl_label,
406 		    &dst_rhtp->tpc_tp.tp_sl_range_cipso) &&
407 		    !blinlset(&tsl->tsl_label,
408 		    dst_rhtp->tpc_tp.tp_sl_set_cipso))) {
409 			DTRACE_PROBE4(tx__tnopt__log__info__labeling__mac,
410 			    char *, "labeled dest ip(1)/tpc(2) does not "
411 			    "match msg label(3).", void *, dst,
412 			    tsol_tpc_t *, dst_rhtp, ts_label_t *, tsl);
413 			TPC_RELE(dst_rhtp);
414 			return (EHOSTUNREACH);
415 		}
416 		if ((tsl->tsl_flags & TSLF_UNLABELED) ||
417 		    (mac_mode == CONN_MAC_IMPLICIT)) {
418 			/*
419 			 * Copy label so we can modify the flags
420 			 */
421 			if ((newtsl = labeldup(tsl, KM_NOSLEEP)) == NULL) {
422 				TPC_RELE(dst_rhtp);
423 				return (ENOMEM);
424 			}
425 			/*
426 			 * The security label is a match but we need to
427 			 * clear the unlabeled flag for this remote node.
428 			 */
429 			newtsl->tsl_flags &= ~TSLF_UNLABELED;
430 			if (mac_mode == CONN_MAC_IMPLICIT)
431 				newtsl->tsl_flags |= TSLF_IMPLICIT_OUT;
432 		}
433 		break;
434 
435 	default:
436 		TPC_RELE(dst_rhtp);
437 		return (EHOSTUNREACH);
438 	}
439 
440 	/*
441 	 * Generate a new cred if we modified the security label or
442 	 * label flags.
443 	 */
444 	if (newtsl != NULL) {
445 		if (effective_cred != NULL) {
446 			*effective_cred = copycred_from_tslabel(credp,
447 			    newtsl, KM_NOSLEEP);
448 		}
449 		label_rele(newtsl);
450 		if (effective_cred != NULL && *effective_cred == NULL) {
451 			TPC_RELE(dst_rhtp);
452 			return (ENOMEM);
453 		}
454 	}
455 	TPC_RELE(dst_rhtp);
456 	return (0);
457 }
458 
459 /*
460  * tsol_compute_label()
461  *
462  * This routine computes the IP label that should be on a packet based on the
463  * connection and destination information.
464  *
465  * Returns:
466  *      0		Fetched label
467  *	EHOSTUNREACH	No route to destination
468  *	EINVAL		Label cannot be computed
469  */
470 int
471 tsol_compute_label(const cred_t *credp, ipaddr_t dst, uchar_t *opt_storage,
472     ip_stack_t *ipst)
473 {
474 	uint_t		sec_opt_len;
475 	ts_label_t	*tsl;
476 	ire_t		*ire, *sire = NULL;
477 	tsol_ire_gw_secattr_t *attrp;
478 	zoneid_t	zoneid, ip_zoneid;
479 
480 	ASSERT(credp != NULL);
481 
482 	if (opt_storage != NULL)
483 		opt_storage[IPOPT_OLEN] = 0;
484 
485 	if ((tsl = crgetlabel(credp)) == NULL)
486 		return (0);
487 
488 	/* always pass multicast */
489 	if (CLASSD(dst))
490 		return (0);
491 
492 	if (tsl->tsl_flags & TSLF_IMPLICIT_OUT)
493 		return (0);
494 
495 	if (tsl->tsl_flags & TSLF_UNLABELED) {
496 
497 		/*
498 		 * The destination is unlabeled. Only add a label if the
499 		 * destination is not a broadcast/local/loopback address,
500 		 * the destination is not on the same subnet, and the
501 		 * next-hop gateway is labeled.
502 		 *
503 		 * For exclusive stacks we set the zoneid to zero
504 		 * to operate as if we are in the global zone for
505 		 * IRE lookups.
506 		 */
507 		zoneid = crgetzoneid(credp);
508 		if (ipst->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID)
509 			ip_zoneid = GLOBAL_ZONEID;
510 		else
511 			ip_zoneid = zoneid;
512 
513 		ire = ire_cache_lookup(dst, ip_zoneid, tsl, ipst);
514 
515 		if (ire != NULL && (ire->ire_type & (IRE_BROADCAST | IRE_LOCAL |
516 		    IRE_LOOPBACK | IRE_INTERFACE)) != 0) {
517 			IRE_REFRELE(ire);
518 			return (0);
519 		} else if (ire == NULL) {
520 			ire = ire_ftable_lookup(dst, 0, 0, 0, NULL, &sire,
521 			    ip_zoneid, 0, tsl, (MATCH_IRE_RECURSIVE |
522 			    MATCH_IRE_DEFAULT | MATCH_IRE_SECATTR), ipst);
523 		}
524 
525 		/* no route to destination */
526 		if (ire == NULL) {
527 			DTRACE_PROBE3(
528 			    tx__tnopt__log__info__labeling__routedst__v4,
529 			    char *, "No route to unlabeled dest ip(1) with "
530 			    "creds(2).", ipaddr_t, dst, cred_t *, credp);
531 			return (EHOSTUNREACH);
532 		}
533 
534 		/*
535 		 * Prefix IRE from f-table lookup means that the destination
536 		 * is not directly connected; check the next-hop attributes.
537 		 */
538 		if (sire != NULL) {
539 			ASSERT(ire != NULL);
540 			IRE_REFRELE(ire);
541 			ire = sire;
542 		}
543 
544 		/*
545 		 * Return now if next hop gateway is unlabeled. There is
546 		 * no need to generate a CIPSO option for this message.
547 		 */
548 		attrp = ire->ire_gw_secattr;
549 		if (attrp == NULL || attrp->igsa_rhc == NULL ||
550 		    attrp->igsa_rhc->rhc_tpc->tpc_tp.host_type == UNLABELED) {
551 			IRE_REFRELE(ire);
552 			return (0);
553 		}
554 
555 		IRE_REFRELE(ire);
556 
557 	}
558 
559 	/* compute the CIPSO option */
560 	sec_opt_len = tsol2cipso_tt1(&tsl->tsl_label, opt_storage,
561 	    tsl->tsl_doi);
562 
563 	if (sec_opt_len == 0) {
564 		DTRACE_PROBE3(tx__tnopt__log__error__labeling__lostops__v4,
565 		    char *, "options lack length for dest ip(1) with creds(2).",
566 		    ipaddr_t, dst, cred_t *, credp);
567 		return (EINVAL);
568 	}
569 
570 	return (0);
571 }
572 
573 /*
574  * Remove any existing security option (CIPSO) from the given IP
575  * header, move the 'buflen' bytes back to fill the gap, and return the number
576  * of bytes removed (as zero or negative number).  Assumes that the headers are
577  * sane.
578  */
579 int
580 tsol_remove_secopt(ipha_t *ipha, int buflen)
581 {
582 	int remlen, olen, oval, delta;
583 	uchar_t *fptr, *tptr;
584 	boolean_t noop_keep;
585 
586 	remlen = IPH_HDR_LENGTH(ipha) - IP_SIMPLE_HDR_LENGTH;
587 	fptr = tptr = (uchar_t *)(ipha + 1);
588 	noop_keep = B_TRUE;
589 	while (remlen > 0) {
590 		oval = fptr[IPOPT_OPTVAL];
591 
592 		/* terminate on end of list */
593 		if (oval == IPOPT_EOL)
594 			break;
595 
596 		/*
597 		 * Delete any no-ops following a deleted option, at least up
598 		 * to a 4 octet alignment; copy others.
599 		 */
600 		if (oval == IPOPT_NOP) {
601 			if (((fptr - (uchar_t *)ipha) & 3) == 0)
602 				noop_keep = B_TRUE;
603 			if (noop_keep)
604 				*tptr++ = oval;
605 			fptr++;
606 			remlen--;
607 			continue;
608 		}
609 
610 		/* stop on corrupted list; just do nothing. */
611 		if (remlen < 2)
612 			return (0);
613 		olen = fptr[IPOPT_OLEN];
614 		if (olen < 2 || olen > remlen)
615 			return (0);
616 
617 		/* skip over security options to delete them */
618 		if (oval == IPOPT_COMSEC || oval == IPOPT_SECURITY) {
619 			noop_keep = B_FALSE;
620 			fptr += olen;
621 			remlen -= olen;
622 			continue;
623 		}
624 
625 		/* copy the rest */
626 		noop_keep = B_TRUE;
627 		if (tptr != fptr)
628 			ovbcopy(fptr, tptr, olen);
629 		fptr += olen;
630 		tptr += olen;
631 		remlen -= olen;
632 	}
633 
634 	fptr += remlen;
635 
636 	/* figure how much padding we'll need for header alignment */
637 	olen = (tptr - (uchar_t *)ipha) & 3;
638 	if (olen > 0) {
639 		olen = 4 - olen;
640 		/* pad with end-of-list */
641 		bzero(tptr, olen);
642 		tptr += olen;
643 	}
644 
645 	/* slide back the headers that follow and update the IP header */
646 	delta = fptr - tptr;
647 	if (delta != 0) {
648 		ovbcopy(fptr, tptr, ((uchar_t *)ipha + buflen) - fptr);
649 		ipha->ipha_version_and_hdr_length -= delta / 4;
650 	}
651 	return (-delta);
652 }
653 
654 /*
655  * Insert the option in 'optbuf' into the IP header pointed to by 'ipha', and
656  * move the data following the IP header (up to buflen) to accomodate the new
657  * option.  Assumes that up to IP_MAX_OPT_LENGTH bytes are available (in total)
658  * for IP options.  Returns the number of bytes actually inserted, or -1 if the
659  * option cannot be inserted.  (Note that negative return values are possible
660  * when noops must be compressed, and that only -1 indicates error.  Successful
661  * return value is always evenly divisible by 4, by definition.)
662  */
663 int
664 tsol_prepend_option(uchar_t *optbuf, ipha_t *ipha, int buflen)
665 {
666 	int remlen, padding, lastpad, totlen;
667 	int oval, olen;
668 	int delta;
669 	uchar_t *optr;
670 	uchar_t tempopt[IP_MAX_OPT_LENGTH], *toptr;
671 
672 	if (optbuf[IPOPT_OPTVAL] == IPOPT_EOL ||
673 	    optbuf[IPOPT_OPTVAL] == IPOPT_NOP ||
674 	    optbuf[IPOPT_OLEN] == 0)
675 		return (0);
676 
677 	ASSERT(optbuf[IPOPT_OLEN] >= 2 &&
678 	    optbuf[IPOPT_OLEN] <= IP_MAX_OPT_LENGTH);
679 
680 	/* first find the real (unpadded) length of the existing options */
681 	remlen = IPH_HDR_LENGTH(ipha) - IP_SIMPLE_HDR_LENGTH;
682 	padding = totlen = lastpad = 0;
683 	optr = (uchar_t *)(ipha + 1);
684 	while (remlen > 0) {
685 		oval = optr[IPOPT_OPTVAL];
686 
687 		/* stop at end of list */
688 		if (oval == IPOPT_EOL)
689 			break;
690 
691 		/* skip no-ops, noting that length byte isn't present */
692 		if (oval == IPOPT_NOP) {
693 			optr++;
694 			padding++;
695 			lastpad++;
696 			totlen++;
697 			remlen--;
698 			continue;
699 		}
700 
701 		/* give up on a corrupted list; report failure */
702 		if (remlen < 2)
703 			return (-1);
704 		olen = optr[IPOPT_OLEN];
705 		if (olen < 2 || olen > remlen)
706 			return (-1);
707 
708 		lastpad = 0;
709 		optr += olen;
710 		totlen += olen;
711 		remlen -= olen;
712 	}
713 
714 	/* completely ignore any trailing padding */
715 	totlen -= lastpad;
716 	padding -= lastpad;
717 
718 	/*
719 	 * If some sort of inter-option alignment was present, try to preserve
720 	 * that alignment.  If alignment pushes us out past the maximum, then
721 	 * discard it and try to compress to fit.  (We just "assume" that any
722 	 * padding added was attempting to get 32 bit alignment.  If that's
723 	 * wrong, that's just too bad.)
724 	 */
725 	if (padding > 0) {
726 		olen = (optbuf[IPOPT_OLEN] + 3) & ~3;
727 		if (olen + totlen > IP_MAX_OPT_LENGTH) {
728 			totlen -= padding;
729 			if (olen + totlen > IP_MAX_OPT_LENGTH)
730 				return (-1);
731 			padding = 0;
732 		}
733 	}
734 
735 	/*
736 	 * Since we may need to compress or expand the option list, we write to
737 	 * a temporary buffer and then copy the results back to the IP header.
738 	 */
739 	toptr = tempopt;
740 
741 	/* compute actual option to insert */
742 	olen = optbuf[IPOPT_OLEN];
743 	bcopy(optbuf, toptr, olen);
744 	toptr += olen;
745 	if (padding > 0) {
746 		while ((olen & 3) != 0) {
747 			*toptr++ = IPOPT_NOP;
748 			olen++;
749 		}
750 	}
751 
752 	/* copy over the existing options */
753 	optr = (uchar_t *)(ipha + 1);
754 	while (totlen > 0) {
755 		oval = optr[IPOPT_OPTVAL];
756 
757 		/* totlen doesn't include end-of-list marker */
758 		ASSERT(oval != IPOPT_EOL);
759 
760 		/* handle no-ops; copy if desired, ignore otherwise */
761 		if (oval == IPOPT_NOP) {
762 			if (padding > 0) {
763 				/* note: cannot overflow due to checks above */
764 				ASSERT(toptr < tempopt + IP_MAX_OPT_LENGTH);
765 				*toptr++ = oval;
766 			}
767 			optr++;
768 			totlen--;
769 			continue;
770 		}
771 
772 		/* list cannot be corrupt at this point */
773 		ASSERT(totlen >= 2);
774 		olen = optr[IPOPT_OLEN];
775 		ASSERT(olen >= 2 && olen <= totlen);
776 
777 		/* cannot run out of room due to tests above */
778 		ASSERT(toptr + olen <= tempopt + IP_MAX_OPT_LENGTH);
779 
780 		bcopy(optr, toptr, olen);
781 		optr += olen;
782 		toptr += olen;
783 		totlen -= olen;
784 	}
785 
786 	/* figure how much padding we'll need for header alignment */
787 	olen = (toptr - tempopt) & 3;
788 	if (olen > 0) {
789 		olen = 4 - olen;
790 		ASSERT(toptr + olen <= tempopt + IP_MAX_OPT_LENGTH);
791 		/* pad with end-of-list value */
792 		bzero(toptr, olen);
793 		toptr += olen;
794 	}
795 
796 	/* move the headers as needed and update IP header */
797 	olen = (toptr - tempopt) + IP_SIMPLE_HDR_LENGTH;
798 	remlen = IPH_HDR_LENGTH(ipha);
799 	delta = olen - remlen;
800 	if (delta != 0) {
801 		ovbcopy((uchar_t *)ipha + remlen, (uchar_t *)ipha + olen,
802 		    buflen - remlen);
803 		ipha->ipha_version_and_hdr_length += delta / 4;
804 	}
805 
806 	/* slap in the new options */
807 	bcopy(tempopt, ipha + 1, olen - IP_SIMPLE_HDR_LENGTH);
808 
809 	return (delta);
810 }
811 
812 /*
813  * tsol_check_label()
814  *
815  * This routine computes the IP label that should be on the packet based on the
816  * connection and destination information.  If the label is there, it returns
817  * zero, so the caller knows that the label is syncronized, and further calls
818  * are not required.  If the label isn't right, then the right one is inserted.
819  *
820  * The packet's header is clear before entering IPsec's engine.
821  *
822  * Returns:
823  *      0		Label on packet (was|is now) correct
824  *      EACCES		The packet failed the remote host accreditation.
825  *      ENOMEM		Memory allocation failure.
826  *	EINVAL		Label cannot be computed
827  */
828 int
829 tsol_check_label(const cred_t *credp, mblk_t **mpp, uint_t mac_mode,
830     ip_stack_t *ipst, pid_t pid)
831 {
832 	mblk_t *mp = *mpp;
833 	ipha_t  *ipha;
834 	cred_t *effective_cred = NULL;
835 	uchar_t opt_storage[IP_MAX_OPT_LENGTH];
836 	uint_t hlen;
837 	uint_t sec_opt_len;
838 	uchar_t *optr;
839 	int delta_remove = 0, delta_add, adjust;
840 	int retv;
841 
842 	opt_storage[IPOPT_OPTVAL] = 0;
843 
844 	ipha = (ipha_t *)mp->b_rptr;
845 
846 	/*
847 	 * Verify the destination is allowed to receive packets at
848 	 * the security label of the message data. check_dest()
849 	 * may create a new effective cred with a modified label
850 	 * or label flags. Apply any such cred to the message block
851 	 * for use in future routing decisions.
852 	 */
853 	retv = tsol_check_dest(credp, &ipha->ipha_dst, IPV4_VERSION,
854 	    mac_mode, &effective_cred);
855 	if (retv != 0)
856 		return (retv);
857 
858 	/*
859 	 * Calculate the security label to be placed in the text
860 	 * of the message (if any).
861 	 */
862 	if (effective_cred != NULL) {
863 		if ((retv = tsol_compute_label(effective_cred,
864 		    ipha->ipha_dst, opt_storage, ipst)) != 0) {
865 			crfree(effective_cred);
866 			return (retv);
867 		}
868 		mblk_setcred(mp, effective_cred, pid);
869 		crfree(effective_cred);
870 	} else {
871 		if ((retv = tsol_compute_label(credp,
872 		    ipha->ipha_dst, opt_storage, ipst)) != 0) {
873 			return (retv);
874 		}
875 	}
876 
877 	optr = (uchar_t *)(ipha + 1);
878 	hlen = IPH_HDR_LENGTH(ipha) - IP_SIMPLE_HDR_LENGTH;
879 	sec_opt_len = opt_storage[IPOPT_OLEN];
880 
881 	if (hlen >= sec_opt_len) {
882 		/* If no option is supposed to be there, make sure it's not */
883 		if (sec_opt_len == 0 && hlen > 0 &&
884 		    optr[IPOPT_OPTVAL] != IPOPT_COMSEC &&
885 		    optr[IPOPT_OPTVAL] != IPOPT_SECURITY)
886 			return (0);
887 		/* if the option is there, it's always first */
888 		if (sec_opt_len != 0 &&
889 		    bcmp(opt_storage, optr, sec_opt_len) == 0)
890 			return (0);
891 	}
892 
893 	if (msg_getcred(mp, NULL) == NULL) {
894 		mblk_setcred(mp, (cred_t *)credp, NOPID);
895 	}
896 
897 	/*
898 	 * If there is an option there, then it must be the wrong one; delete.
899 	 */
900 	if (hlen > 0) {
901 		delta_remove = tsol_remove_secopt(ipha, MBLKL(mp));
902 		mp->b_wptr += delta_remove;
903 	}
904 
905 	/* Make sure we have room for the worst-case addition */
906 	hlen = IPH_HDR_LENGTH(ipha) + opt_storage[IPOPT_OLEN];
907 	hlen = (hlen + 3) & ~3;
908 	if (hlen > IP_MAX_HDR_LENGTH)
909 		hlen = IP_MAX_HDR_LENGTH;
910 	hlen -= IPH_HDR_LENGTH(ipha);
911 	if (mp->b_wptr + hlen > mp->b_datap->db_lim) {
912 		int copylen;
913 		mblk_t *new_mp;
914 
915 		/* allocate enough to be meaningful, but not *too* much */
916 		copylen = MBLKL(mp);
917 		if (copylen > 256)
918 			copylen = 256;
919 		new_mp = allocb_tmpl(hlen + copylen +
920 		    (mp->b_rptr - mp->b_datap->db_base), mp);
921 		if (new_mp == NULL)
922 			return (ENOMEM);
923 
924 		/* keep the bias */
925 		new_mp->b_rptr += mp->b_rptr - mp->b_datap->db_base;
926 		new_mp->b_wptr = new_mp->b_rptr + copylen;
927 		bcopy(mp->b_rptr, new_mp->b_rptr, copylen);
928 		new_mp->b_cont = mp;
929 		if ((mp->b_rptr += copylen) >= mp->b_wptr) {
930 			new_mp->b_cont = mp->b_cont;
931 			freeb(mp);
932 		}
933 		*mpp = mp = new_mp;
934 		ipha = (ipha_t *)mp->b_rptr;
935 	}
936 
937 	delta_add = tsol_prepend_option(opt_storage, ipha, MBLKL(mp));
938 	if (delta_add == -1)
939 		goto param_prob;
940 
941 	ASSERT((mp->b_wptr + delta_add) <= DB_LIM(mp));
942 	mp->b_wptr += delta_add;
943 
944 	adjust = delta_remove + delta_add;
945 	adjust += ntohs(ipha->ipha_length);
946 	ipha->ipha_length = htons(adjust);
947 
948 	return (0);
949 
950 param_prob:
951 	return (EINVAL);
952 }
953 
954 /*
955  * IPv6 HopOpt extension header for the label option layout:
956  *	- One octet giving the type of the 'next extension header'
957  *	- Header extension length in 8-byte words, not including the
958  *	  1st 8 bytes, but including any pad bytes at the end.
959  *	  Eg. A value of 2 means 16 bytes not including the 1st 8 bytes.
960  *	- Followed by TLV encoded IPv6 label option. Option layout is
961  *		* One octet, IP6OPT_LS
962  *		* One octet option length in bytes of the option data following
963  *		  the length, but not including any pad bytes at the end.
964  *		* Four-octet DOI (IP6LS_DOI_V4)
965  *		* One octet suboption, IP6LS_TT_V4
966  *		* One octet suboption length in bytes of the suboption
967  *		  following the suboption length, including the suboption
968  *		  header length, but not including any pad bytes at the end.
969  *	- Pad to make the extension header a multiple of 8 bytes.
970  *
971  * This function returns the contents of 'IPv6 option structure' in the above.
972  * i.e starting from the IP6OPT_LS but not including the pad at the end.
973  * The user must prepend two octets (either padding or next header / length)
974  * and append padding out to the next 8 octet boundary.
975  */
976 int
977 tsol_compute_label_v6(const cred_t *credp, const in6_addr_t *dst,
978     uchar_t *opt_storage, ip_stack_t *ipst)
979 {
980 	ts_label_t	*tsl;
981 	uint_t		sec_opt_len;
982 	uint32_t	doi;
983 	zoneid_t	zoneid, ip_zoneid;
984 	ire_t		*ire, *sire;
985 	tsol_ire_gw_secattr_t *attrp;
986 
987 	ASSERT(credp != NULL);
988 
989 	if (ip6opt_ls == 0)
990 		return (EINVAL);
991 
992 	if (opt_storage != NULL)
993 		opt_storage[IPOPT_OLEN] = 0;
994 
995 	if ((tsl = crgetlabel(credp)) == NULL)
996 		return (0);
997 
998 	/* Always pass multicast */
999 	if (IN6_IS_ADDR_MULTICAST(dst))
1000 		return (0);
1001 
1002 	zoneid = crgetzoneid(credp);
1003 
1004 	/*
1005 	 * Fill in a V6 label.  If a new format is added here, make certain
1006 	 * that the maximum size of this label is reflected in sys/tsol/tnet.h
1007 	 * as TSOL_MAX_IPV6_OPTION.
1008 	 */
1009 	if (tsl->tsl_flags & TSLF_IMPLICIT_OUT)
1010 		return (0);
1011 
1012 	if (tsl->tsl_flags & TSLF_UNLABELED) {
1013 		/*
1014 		 * The destination is unlabeled. Only add a label if the
1015 		 * destination is not broadcast/local/loopback address,
1016 		 * the destination is not on the same subnet, and the
1017 		 * next-hop gateway is labeled.
1018 		 *
1019 		 * For exclusive stacks we set the zoneid to zero to
1020 		 * operate as if we are in the global zone when
1021 		 * performing IRE lookups and conn_t comparisons.
1022 		 */
1023 		if (ipst->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID)
1024 			ip_zoneid = GLOBAL_ZONEID;
1025 		else
1026 			ip_zoneid = zoneid;
1027 
1028 		sire = NULL;
1029 		ire = ire_cache_lookup_v6(dst, ip_zoneid, tsl, ipst);
1030 
1031 		if (ire != NULL && (ire->ire_type & (IRE_LOCAL |
1032 		    IRE_LOOPBACK | IRE_INTERFACE)) != 0) {
1033 			IRE_REFRELE(ire);
1034 			return (0);
1035 		} else if (ire == NULL) {
1036 			ire = ire_ftable_lookup_v6(dst, NULL, NULL, 0, NULL,
1037 			    &sire, ip_zoneid, 0, tsl, (MATCH_IRE_RECURSIVE |
1038 			    MATCH_IRE_DEFAULT | MATCH_IRE_SECATTR), ipst);
1039 		}
1040 
1041 		/* no route to destination */
1042 		if (ire == NULL) {
1043 			DTRACE_PROBE3(
1044 			    tx__tnopt__log__info__labeling__routedst__v6,
1045 			    char *, "No route to unlabeled dest ip6(1) with "
1046 			    "creds(2).", in6_addr_t *, dst, cred_t *, credp);
1047 			return (EHOSTUNREACH);
1048 		}
1049 
1050 		/*
1051 		 * Prefix IRE from f-table lookup means that the destination
1052 		 * is not directly connected; check the next-hop attributes.
1053 		 */
1054 		if (sire != NULL) {
1055 			ASSERT(ire != NULL);
1056 			IRE_REFRELE(ire);
1057 			ire = sire;
1058 		}
1059 
1060 		/*
1061 		 * Return now if next hop gateway is unlabeled. There is
1062 		 * no need to generate a CIPSO option for this message.
1063 		 */
1064 		attrp = ire->ire_gw_secattr;
1065 		if (attrp == NULL || attrp->igsa_rhc == NULL ||
1066 		    attrp->igsa_rhc->rhc_tpc->tpc_tp.host_type == UNLABELED) {
1067 			IRE_REFRELE(ire);
1068 			return (0);
1069 		}
1070 		IRE_REFRELE(ire);
1071 	}
1072 
1073 	/* compute the CIPSO option */
1074 	if (opt_storage != NULL)
1075 		opt_storage += 8;
1076 	sec_opt_len = tsol2cipso_tt1(&tsl->tsl_label, opt_storage,
1077 	    tsl->tsl_doi);
1078 
1079 	if (sec_opt_len == 0) {
1080 		DTRACE_PROBE3(tx__tnopt__log__error__labeling__lostops__v6,
1081 		    char *, "options lack length for dest ip6(1) with "
1082 		    "creds(2).", in6_addr_t *, dst, cred_t *, credp);
1083 		return (EINVAL);
1084 	}
1085 
1086 	if (opt_storage == NULL)
1087 		return (0);
1088 
1089 	if (sec_opt_len < IP_MAX_OPT_LENGTH)
1090 		opt_storage[sec_opt_len] = IPOPT_EOL;
1091 
1092 	/*
1093 	 * Just in case the option length is odd, round it up to the next even
1094 	 * multiple.  The IPv6 option definition doesn't like odd numbers for
1095 	 * some reason.
1096 	 *
1097 	 * Length in the overall option header (IP6OPT_LS) does not include the
1098 	 * option header itself, but the length in the suboption does include
1099 	 * the suboption header.  Thus, when there's just one suboption, the
1100 	 * length in the option header is the suboption length plus 4 (for the
1101 	 * DOI value).
1102 	 */
1103 	opt_storage[-2] = IP6LS_TT_V4;
1104 	opt_storage[-1] = (sec_opt_len + 2 + 1) & ~1;
1105 	opt_storage[-8] = ip6opt_ls;
1106 	opt_storage[-7] = opt_storage[-1] + 4;
1107 	doi = htons(IP6LS_DOI_V4);
1108 	bcopy(&doi, opt_storage - 6, 4);
1109 
1110 	return (0);
1111 }
1112 
1113 /*
1114  * Locate the start of the IP6OPT_LS label option and return it.
1115  * Also return the start of the next non-pad option in after_secoptp.
1116  * Usually the label option is the first option at least when packets
1117  * are generated, but for generality we don't assume that on received packets.
1118  *
1119  * The function will return with B_FALSE if an IP format error
1120  * or an unexpected label content error is encountered.
1121  */
1122 boolean_t
1123 tsol_find_secopt_v6(
1124     const uchar_t *ip6hbh,	/* Start of the hop-by-hop extension header */
1125     uint_t hbhlen,		/* Length of the hop-by-hop extension header */
1126     uchar_t **secoptp,		/* Location of IP6OPT_LS label option */
1127     uchar_t **after_secoptp,	/* Non-pad option following the label option */
1128     boolean_t *hbh_needed)	/* Is hop-by-hop hdr needed w/o label */
1129 {
1130 	uint_t	optlen;
1131 	uint_t	optused;
1132 	const uchar_t *optptr;
1133 	uchar_t	opt_type;
1134 
1135 	*secoptp = NULL;
1136 	*hbh_needed = B_FALSE;
1137 	*after_secoptp = NULL;
1138 	optlen = hbhlen - 2;
1139 	optptr = ip6hbh + 2;
1140 	while (optlen != 0) {
1141 		opt_type = *optptr;
1142 		if (opt_type == IP6OPT_PAD1) {
1143 			optptr++;
1144 			optlen--;
1145 			continue;
1146 		}
1147 		if (optlen == 1)
1148 			return (B_FALSE);
1149 		optused = 2 + optptr[1];
1150 		if (optused > optlen)
1151 			return (B_FALSE);
1152 		/*
1153 		 * if we get here, ip6opt_ls can
1154 		 * not be 0 because it will always
1155 		 * match the IP6OPT_PAD1 above.
1156 		 * Therefore ip6opt_ls == 0 forces
1157 		 * this test to always fail here.
1158 		 */
1159 		if (opt_type == ip6opt_ls) {
1160 			if (*secoptp != NULL)
1161 				/* More than one security option found */
1162 				return (B_FALSE);
1163 			*secoptp = (uchar_t *)optptr;
1164 		} else switch (opt_type) {
1165 		case IP6OPT_PADN:
1166 			break;
1167 		default:
1168 			/*
1169 			 * There is at least 1 option other than
1170 			 * the label option. So the hop-by-hop header is needed
1171 			 */
1172 			*hbh_needed = B_TRUE;
1173 			if (*secoptp != NULL) {
1174 				*after_secoptp = (uchar_t *)optptr;
1175 				return (B_TRUE);
1176 			}
1177 			break;
1178 		}
1179 		optlen -= optused;
1180 		optptr += optused;
1181 	}
1182 	return (B_TRUE);
1183 }
1184 
1185 /*
1186  * Remove the label option from the hop-by-hop options header if it exists.
1187  * 'buflen' is the total length of the packet typically b_wptr - b_rptr.
1188  * Header and data following the label option that is deleted are copied
1189  * (i.e. slid backward) to the right position, and returns the number
1190  * of bytes removed (as zero or negative number.)
1191  */
1192 int
1193 tsol_remove_secopt_v6(ip6_t *ip6h, int buflen)
1194 {
1195 	uchar_t	*ip6hbh;	/* hop-by-hop header */
1196 	uint_t	hbhlen;		/* hop-by-hop extension header length */
1197 	uchar_t *secopt = NULL;
1198 	uchar_t *after_secopt;
1199 	uint_t	pad;
1200 	uint_t	delta;
1201 	boolean_t hbh_needed;
1202 
1203 	/*
1204 	 * hop-by-hop extension header must appear first, if it does not
1205 	 * exist, there is no label option.
1206 	 */
1207 	if (ip6h->ip6_nxt != IPPROTO_HOPOPTS)
1208 		return (0);
1209 
1210 	ip6hbh = (uchar_t *)&ip6h[1];
1211 	hbhlen = (ip6hbh[1] + 1) << 3;
1212 	/*
1213 	 * Locate the start of the label option if it exists and the end
1214 	 * of the label option including pads if any.
1215 	 */
1216 	if (!tsol_find_secopt_v6(ip6hbh, hbhlen, &secopt, &after_secopt,
1217 	    &hbh_needed)) {
1218 		/*
1219 		 * This function should not see invalid messages.
1220 		 * If one occurs, it would indicate either an
1221 		 * option previously verified in the forwarding
1222 		 * path has been corrupted or an option was
1223 		 * incorrectly generated locally.
1224 		 */
1225 		ASSERT(0);
1226 		return (0);
1227 	}
1228 	if (secopt == NULL)
1229 		return (0);
1230 	if (!hbh_needed) {
1231 		uchar_t	next_hdr;
1232 		/*
1233 		 * The label option was the only option in the hop-by-hop
1234 		 * header. We don't need the hop-by-hop header itself any
1235 		 * longer.
1236 		 */
1237 		next_hdr = ip6hbh[0];
1238 		ovbcopy(ip6hbh + hbhlen, ip6hbh,
1239 		    buflen - (IPV6_HDR_LEN + hbhlen));
1240 		ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - hbhlen);
1241 		ip6h->ip6_nxt = next_hdr;
1242 		return (-hbhlen);
1243 	}
1244 
1245 	if (after_secopt == NULL) {
1246 		/* There is no option following the label option */
1247 		after_secopt = ip6hbh + hbhlen;
1248 	}
1249 
1250 	/*
1251 	 * After deleting the label option, we need to slide the headers
1252 	 * and data back, while still maintaining the same alignment (module 8)
1253 	 * for the other options. So we slide the headers and data back only
1254 	 * by an integral multiple of 8 bytes, and fill the remaining bytes
1255 	 * with pads.
1256 	 */
1257 	delta = after_secopt - secopt;
1258 	pad = delta % 8;
1259 	if (pad == 1) {
1260 		secopt[0] = IP6OPT_PAD1;
1261 	} else if (pad > 1) {
1262 		secopt[0] = IP6OPT_PADN;
1263 		secopt[1] = pad - 2;
1264 		if (pad > 2)
1265 			bzero(&secopt[2], pad - 2);
1266 	}
1267 	secopt += pad;
1268 	delta -= pad;
1269 	ovbcopy(after_secopt, secopt,
1270 	    (uchar_t *)ip6h + buflen - after_secopt);
1271 	ip6hbh[1] -= delta/8;
1272 	ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - delta);
1273 
1274 	return (-delta);
1275 }
1276 
1277 /*
1278  * 'optbuf' contains a CIPSO label embedded in an IPv6 hop-by-hop option,
1279  * starting with the IP6OPT_LS option type. The format of this hop-by-hop
1280  * option is described in the block comment above tsol_compute_label_v6.
1281  * This function prepends this hop-by-hop option before any other hop-by-hop
1282  * options in the hop-by-hop header if one already exists, else a new
1283  * hop-by-hop header is created and stuffed into the packet following
1284  * the IPv6 header. 'buflen' is the total length of the packet i.e.
1285  * b_wptr - b_rptr. The caller ensures that there is enough space for the
1286  * extra option being added. Header and data following the position where
1287  * the label option is inserted are copied (i.e. slid forward) to the right
1288  * position.
1289  */
1290 int
1291 tsol_prepend_option_v6(uchar_t *optbuf, ip6_t *ip6h, int buflen)
1292 {
1293 	/*
1294 	 * rawlen is the length of the label option in bytes, not including
1295 	 * any pads, starting from the IP6OPT_LS (option type) byte.
1296 	 */
1297 	uint_t	rawlen;
1298 
1299 	uint_t	optlen;		/* rawlen rounded to an 8 byte multiple */
1300 	uchar_t	*ip6hbh;	/* start of the hop-by-hop extension header */
1301 	uint_t	hbhlen;		/* Length of the hop-by-hop extension header */
1302 	uint_t	pad_len;
1303 	uchar_t	*pad_position;
1304 	int	delta;		/* Actual number of bytes inserted */
1305 
1306 	rawlen = optbuf[1] + 2;	/* Add 2 for the option type, option length */
1307 	ip6hbh = (uchar_t *)&ip6h[1];
1308 	if (ip6h->ip6_nxt == IPPROTO_HOPOPTS) {
1309 		/*
1310 		 * There is a hop-by-hop header present already. In order to
1311 		 * preserve the alignment of the other options at the existing
1312 		 * value (modulo 8) we need to pad the label option to a
1313 		 * multiple of 8 bytes before prepending it to the other
1314 		 * options. Slide the extension headers and data forward to
1315 		 * accomodate the label option at the start of the hop-by-hop
1316 		 * header
1317 		 */
1318 		delta = optlen = (rawlen + 7) & ~7;
1319 		pad_len = optlen - rawlen;
1320 		pad_position = ip6hbh + 2 + rawlen;
1321 		ovbcopy(ip6hbh + 2, ip6hbh + 2 + optlen,
1322 		    buflen - (IPV6_HDR_LEN + 2));
1323 		/*
1324 		 * Bump up the hop-by-hop extension header length by
1325 		 * the number of 8-byte words added
1326 		 */
1327 		optlen >>= 3;
1328 		if (ip6hbh[1] + optlen > 255)
1329 			return (-1);
1330 		ip6hbh[1] += optlen;
1331 	} else {
1332 		/*
1333 		 * There is no hop-by-hop header in the packet. Construct a
1334 		 * new Hop-by-hop extension header (a multiple of 8 bytes).
1335 		 * Slide any other extension headers and data forward to
1336 		 * accomodate this hop-by-hop header
1337 		 */
1338 		delta = hbhlen = (2 + rawlen + 7) & ~7; /* +2 for nxthdr, len */
1339 		pad_len = hbhlen - (2 + rawlen);
1340 		pad_position = ip6hbh + 2 + rawlen;
1341 		ovbcopy(ip6hbh, ip6hbh + hbhlen, buflen - IPV6_HDR_LEN);
1342 		ip6hbh[0] = ip6h->ip6_nxt;
1343 		/*
1344 		 * hop-by-hop extension header length in 8-byte words, not
1345 		 * including the 1st 8 bytes of the hop-by-hop header.
1346 		 */
1347 		ip6hbh[1] = (hbhlen >> 3) - 1;
1348 		ip6h->ip6_nxt = IPPROTO_HOPOPTS;
1349 	}
1350 	/*
1351 	 * Copy the label option into the hop-by-hop header and insert any
1352 	 * needed pads
1353 	 */
1354 	bcopy(optbuf, ip6hbh + 2, rawlen);
1355 	if (pad_len == 1) {
1356 		pad_position[0] = IP6OPT_PAD1;
1357 	} else if (pad_len > 1) {
1358 		pad_position[0] = IP6OPT_PADN;
1359 		pad_position[1] = pad_len - 2;
1360 		if (pad_len > 2)
1361 			bzero(pad_position + 2, pad_len - 2);
1362 	}
1363 	ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) + delta);
1364 	return (delta);
1365 }
1366 
1367 /*
1368  * tsol_check_label_v6()
1369  *
1370  * This routine computes the IP label that should be on the packet based on the
1371  * connection and destination information.  It's called only by the IP
1372  * forwarding logic, because all internal modules atop IP know how to generate
1373  * their own labels.
1374  *
1375  * Returns:
1376  *      0		Label on packet was already correct
1377  *      EACCES		The packet failed the remote host accreditation.
1378  *      ENOMEM		Memory allocation failure.
1379  */
1380 int
1381 tsol_check_label_v6(const cred_t *credp, mblk_t **mpp, uint_t mode,
1382     ip_stack_t *ipst, pid_t pid)
1383 {
1384 	mblk_t *mp = *mpp;
1385 	ip6_t  *ip6h;
1386 	cred_t *effective_cred;
1387 	/*
1388 	 * Label option length is limited to IP_MAX_OPT_LENGTH for
1389 	 * symmetry with IPv4. Can be relaxed if needed
1390 	 */
1391 	uchar_t opt_storage[TSOL_MAX_IPV6_OPTION];
1392 	uint_t hlen;
1393 	uint_t sec_opt_len; /* label option length not including type, len */
1394 	int delta_remove = 0, delta_add;
1395 	int retv;
1396 	uchar_t	*after_secopt;
1397 	uchar_t	*secopt = NULL;
1398 	uchar_t	*ip6hbh;
1399 	uint_t	hbhlen;
1400 	boolean_t hbh_needed;
1401 
1402 	/*
1403 	 * Verify the destination is allowed to receive packets at
1404 	 * the security label of the message data. check_dest()
1405 	 * may create a new effective cred with a modified label
1406 	 * or label flags. Apply any such cred to the message block
1407 	 * for use in future routing decisions.
1408 	 */
1409 	ip6h = (ip6_t *)mp->b_rptr;
1410 	retv = tsol_check_dest(credp, &ip6h->ip6_dst, IPV6_VERSION,
1411 	    mode, &effective_cred);
1412 	if (retv != 0)
1413 		return (retv);
1414 
1415 	/*
1416 	 * Calculate the security label to be placed in the text
1417 	 * of the message (if any).
1418 	 */
1419 	if (effective_cred != NULL) {
1420 		if ((retv = tsol_compute_label_v6(effective_cred,
1421 		    &ip6h->ip6_dst, opt_storage, ipst)) != 0) {
1422 			crfree(effective_cred);
1423 			return (retv);
1424 		}
1425 		mblk_setcred(mp, effective_cred, pid);
1426 		crfree(effective_cred);
1427 	} else {
1428 		if ((retv = tsol_compute_label_v6(credp,
1429 		    &ip6h->ip6_dst, opt_storage, ipst)) != 0)
1430 			return (retv);
1431 	}
1432 
1433 	sec_opt_len = opt_storage[1];
1434 
1435 	if (ip6h->ip6_nxt == IPPROTO_HOPOPTS) {
1436 		ip6hbh = (uchar_t *)&ip6h[1];
1437 		hbhlen = (ip6hbh[1] + 1) << 3;
1438 		if (!tsol_find_secopt_v6(ip6hbh, hbhlen, &secopt,
1439 		    &after_secopt, &hbh_needed)) {
1440 			/*
1441 			 * This function should not see invalid messages.
1442 			 * If one occurs, it would indicate either an
1443 			 * option previously verified in the forwarding
1444 			 * path has been corrupted or an option was
1445 			 * incorrectly generated locally.
1446 			 */
1447 			ASSERT(0);
1448 			return (EACCES);
1449 		}
1450 	}
1451 
1452 	if (sec_opt_len == 0 && secopt == NULL) {
1453 		/*
1454 		 * The packet is not supposed to have a label, and it
1455 		 * does not have one currently
1456 		 */
1457 		return (0);
1458 	}
1459 
1460 	if (msg_getcred(mp, NULL) == NULL) {
1461 		mblk_setcred(mp, (cred_t *)credp, NOPID);
1462 	}
1463 
1464 	if (secopt != NULL && sec_opt_len != 0 &&
1465 	    (bcmp(opt_storage, secopt, sec_opt_len + 2) == 0)) {
1466 		/* The packet has the correct label already */
1467 		return (0);
1468 	}
1469 
1470 	/*
1471 	 * If there is an option there, then it must be the wrong one; delete.
1472 	 */
1473 	if (secopt != NULL) {
1474 		delta_remove = tsol_remove_secopt_v6(ip6h, MBLKL(mp));
1475 		mp->b_wptr += delta_remove;
1476 	}
1477 
1478 	/*
1479 	 * Make sure we have room for the worst-case addition. Add 2 bytes for
1480 	 * the hop-by-hop ext header's next header and length fields. Add
1481 	 * another 2 bytes for the label option type, len and then round
1482 	 * up to the next 8-byte multiple.
1483 	 */
1484 	hlen = (4 + sec_opt_len + 7) & ~7;
1485 	if (mp->b_wptr + hlen > mp->b_datap->db_lim) {
1486 		int copylen;
1487 		mblk_t *new_mp;
1488 		uint16_t hdr_len;
1489 
1490 		hdr_len = ip_hdr_length_v6(mp, ip6h);
1491 		/*
1492 		 * Allocate enough to be meaningful, but not *too* much.
1493 		 * Also all the IPv6 extension headers must be in the same mblk
1494 		 */
1495 		copylen = MBLKL(mp);
1496 		if (copylen > 256)
1497 			copylen = 256;
1498 		if (copylen < hdr_len)
1499 			copylen = hdr_len;
1500 		new_mp = allocb_tmpl(hlen + copylen +
1501 		    (mp->b_rptr - mp->b_datap->db_base), mp);
1502 		if (new_mp == NULL)
1503 			return (ENOMEM);
1504 
1505 		/* keep the bias */
1506 		new_mp->b_rptr += mp->b_rptr - mp->b_datap->db_base;
1507 		new_mp->b_wptr = new_mp->b_rptr + copylen;
1508 		bcopy(mp->b_rptr, new_mp->b_rptr, copylen);
1509 		new_mp->b_cont = mp;
1510 		if ((mp->b_rptr += copylen) >= mp->b_wptr) {
1511 			new_mp->b_cont = mp->b_cont;
1512 			freeb(mp);
1513 		}
1514 		*mpp = mp = new_mp;
1515 		ip6h = (ip6_t *)mp->b_rptr;
1516 	}
1517 
1518 	delta_add = tsol_prepend_option_v6(opt_storage, ip6h, MBLKL(mp));
1519 	if (delta_add == -1)
1520 		goto param_prob;
1521 
1522 	ASSERT(mp->b_wptr + delta_add <= DB_LIM(mp));
1523 	mp->b_wptr += delta_add;
1524 
1525 	return (0);
1526 
1527 param_prob:
1528 	return (EINVAL);
1529 }
1530 
1531 /*
1532  * Update the given IPv6 "sticky options" structure to contain the provided
1533  * label, which is encoded as an IPv6 option.  Existing label is removed if
1534  * necessary, and storage is allocated/freed/resized.
1535  *
1536  * Returns 0 on success, errno on failure.
1537  */
1538 int
1539 tsol_update_sticky(ip6_pkt_t *ipp, uint_t *labellen, const uchar_t *labelopt)
1540 {
1541 	int rawlen, optlen, newlen;
1542 	uchar_t *newopts;
1543 
1544 	/*
1545 	 * rawlen is the size of the IPv6 label to be inserted from labelopt.
1546 	 * optlen is the total length of that option, including any necessary
1547 	 * headers and padding.  newlen is the new size of the total hop-by-hop
1548 	 * options buffer, including user options.
1549 	 */
1550 	ASSERT(*labellen <= ipp->ipp_hopoptslen);
1551 	ASSERT((ipp->ipp_hopopts == NULL && ipp->ipp_hopoptslen == 0) ||
1552 	    (ipp->ipp_hopopts != NULL && ipp->ipp_hopoptslen != 0));
1553 
1554 	if ((rawlen = labelopt[1]) != 0) {
1555 		rawlen += 2;	/* add in header size */
1556 		optlen = (2 + rawlen + 7) & ~7;
1557 	} else {
1558 		optlen = 0;
1559 	}
1560 	newlen = ipp->ipp_hopoptslen + optlen - *labellen;
1561 	if (newlen == 0 && ipp->ipp_hopopts != NULL) {
1562 		/* Deleting all existing hop-by-hop options */
1563 		kmem_free(ipp->ipp_hopopts, ipp->ipp_hopoptslen);
1564 		ipp->ipp_hopopts = NULL;
1565 		ipp->ipp_fields &= ~IPPF_HOPOPTS;
1566 	} else if (optlen != *labellen) {
1567 		/* If the label not same size as last time, then reallocate */
1568 		if (newlen > IP6_MAX_OPT_LENGTH)
1569 			return (EHOSTUNREACH);
1570 		newopts = kmem_alloc(newlen, KM_NOSLEEP);
1571 		if (newopts == NULL)
1572 			return (ENOMEM);
1573 		/*
1574 		 * If the user has hop-by-hop stickyoptions set, then copy his
1575 		 * options in after the security label.
1576 		 */
1577 		if (ipp->ipp_hopoptslen > *labellen) {
1578 			bcopy(ipp->ipp_hopopts + *labellen, newopts + optlen,
1579 			    ipp->ipp_hopoptslen - *labellen);
1580 			/*
1581 			 * Stomp out any header gunk here - this was the
1582 			 * previous next-header and option length field.
1583 			 */
1584 			newopts[optlen] = IP6OPT_PADN;
1585 			newopts[optlen + 1] = 0;
1586 		}
1587 		if (ipp->ipp_hopopts != NULL)
1588 			kmem_free(ipp->ipp_hopopts, ipp->ipp_hopoptslen);
1589 		ipp->ipp_hopopts = (ip6_hbh_t *)newopts;
1590 	}
1591 	ipp->ipp_hopoptslen = newlen;
1592 	*labellen = optlen;
1593 
1594 	newopts = (uchar_t *)ipp->ipp_hopopts;
1595 
1596 	/* If there are any options, then fix up reported length */
1597 	if (newlen > 0) {
1598 		newopts[1] = (newlen + 7) / 8 - 1;
1599 		ipp->ipp_fields |= IPPF_HOPOPTS;
1600 	}
1601 
1602 	/* If there's a label, then insert it now */
1603 	if (optlen > 0) {
1604 		/* skip next-header and length fields */
1605 		newopts += 2;
1606 		bcopy(labelopt, newopts, rawlen);
1607 		newopts += rawlen;
1608 		/* make sure padding comes out right */
1609 		optlen -= 2 + rawlen;
1610 		if (optlen == 1) {
1611 			newopts[0] = IP6OPT_PAD1;
1612 		} else if (optlen > 1) {
1613 			newopts[0] = IP6OPT_PADN;
1614 			optlen -=  2;
1615 			newopts[1] = optlen;
1616 			if (optlen > 0)
1617 				bzero(newopts + 2, optlen);
1618 		}
1619 	}
1620 	return (0);
1621 }
1622 
1623 int
1624 tsol_update_options(uchar_t **opts, uint_t *totlen, uint_t *labellen,
1625     const uchar_t *labelopt)
1626 {
1627 	int optlen, newlen;
1628 	uchar_t *newopts;
1629 
1630 	optlen = (labelopt[IPOPT_OLEN] + 3) & ~3;
1631 	newlen = *totlen + optlen - *labellen;
1632 	if (optlen > *labellen) {
1633 		if (newlen > IP_MAX_OPT_LENGTH)
1634 			return (EHOSTUNREACH);
1635 		newopts = (uchar_t *)mi_alloc(newlen, BPRI_HI);
1636 		if (newopts == NULL)
1637 			return (ENOMEM);
1638 		if (*totlen > *labellen) {
1639 			bcopy(*opts + *labellen, newopts + optlen,
1640 			    *totlen - *labellen);
1641 		}
1642 		if (*opts != NULL)
1643 			mi_free((char *)*opts);
1644 		*opts = newopts;
1645 	} else if (optlen < *labellen) {
1646 		if (newlen == 0 && *opts != NULL) {
1647 			mi_free((char *)*opts);
1648 			*opts = NULL;
1649 		}
1650 		if (*totlen > *labellen) {
1651 			ovbcopy(*opts + *labellen, *opts + optlen,
1652 			    *totlen - *labellen);
1653 		}
1654 	}
1655 	*totlen = newlen;
1656 	*labellen = optlen;
1657 	if (optlen > 0) {
1658 		newopts = *opts;
1659 		bcopy(labelopt, newopts, optlen);
1660 		/* check if there are user-supplied options that follow */
1661 		if (optlen < newlen) {
1662 			/* compute amount of embedded alignment needed */
1663 			optlen -= newopts[IPOPT_OLEN];
1664 			newopts += newopts[IPOPT_OLEN];
1665 			while (--optlen >= 0)
1666 				*newopts++ = IPOPT_NOP;
1667 		} else if (optlen != newopts[IPOPT_OLEN]) {
1668 			/*
1669 			 * The label option is the only option and it is
1670 			 * not a multiple of 4 bytes.
1671 			 */
1672 			optlen -= newopts[IPOPT_OLEN];
1673 			newopts += newopts[IPOPT_OLEN];
1674 			while (--optlen >= 0)
1675 				*newopts++ = IPOPT_EOL;
1676 		}
1677 	}
1678 	return (0);
1679 }
1680 
1681 /*
1682  * This does the bulk of the processing for setting IPPROTO_IP {T_,}IP_OPTIONS.
1683  */
1684 boolean_t
1685 tsol_option_set(uchar_t **opts, uint_t *optlen, uint_t labellen,
1686     const uchar_t *useropts, uint_t userlen)
1687 {
1688 	int newlen;
1689 	uchar_t *newopts;
1690 
1691 	newlen = userlen + labellen;
1692 	if (newlen > *optlen) {
1693 		/* need more room */
1694 		newopts = (uchar_t *)mi_alloc(newlen, BPRI_HI);
1695 		if (newopts == NULL)
1696 			return (B_FALSE);
1697 		/*
1698 		 * The supplied *opts can't be NULL in this case,
1699 		 * since there's an existing label.
1700 		 */
1701 		if (labellen > 0)
1702 			bcopy(*opts, newopts, labellen);
1703 		if (*opts != NULL)
1704 			mi_free((char *)*opts);
1705 		*opts = newopts;
1706 	}
1707 
1708 	if (newlen == 0) {
1709 		/* special case -- no remaining IP options at all */
1710 		if (*opts != NULL) {
1711 			mi_free((char *)*opts);
1712 			*opts = NULL;
1713 		}
1714 	} else if (userlen > 0) {
1715 		/* merge in the user's options */
1716 		newopts = *opts;
1717 		if (labellen > 0) {
1718 			int extra = labellen - newopts[IPOPT_OLEN];
1719 
1720 			newopts += newopts[IPOPT_OLEN];
1721 			while (--extra >= 0)
1722 				*newopts++ = IPOPT_NOP;
1723 		}
1724 		bcopy(useropts, newopts, userlen);
1725 	}
1726 
1727 	*optlen = newlen;
1728 	return (B_TRUE);
1729 }
1730