1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * iptun - IP Tunneling Driver 28 * 29 * This module is a GLDv3 driver that implements virtual datalinks over IP 30 * (a.k.a, IP tunneling). The datalinks are managed through a dld ioctl 31 * interface (see iptun_ctl.c), and registered with GLDv3 using 32 * mac_register(). It implements the logic for various forms of IP (IPv4 or 33 * IPv6) encapsulation within IP (IPv4 or IPv6) by interacting with the ip 34 * module below it. Each virtual IP tunnel datalink has a conn_t associated 35 * with it representing the "outer" IP connection. 36 * 37 * The module implements the following locking semantics: 38 * 39 * Lookups and deletions in iptun_hash are synchronized using iptun_hash_lock. 40 * See comments above iptun_hash_lock for details. 41 * 42 * No locks are ever held while calling up to GLDv3. The general architecture 43 * of GLDv3 requires this, as the mac perimeter (essentially a lock) for a 44 * given link will be held while making downcalls (iptun_m_*() callbacks). 45 * Because we need to hold locks while handling downcalls, holding these locks 46 * while issuing upcalls results in deadlock scenarios. See the block comment 47 * above iptun_task_cb() for details on how we safely issue upcalls without 48 * holding any locks. 49 * 50 * The contents of each iptun_t is protected by an iptun_mutex which is held 51 * in iptun_enter() (called by iptun_enter_by_linkid()), and exited in 52 * iptun_exit(). 53 * 54 * See comments in iptun_delete() and iptun_free() for details on how the 55 * iptun_t is deleted safely. 56 */ 57 58 #include <sys/types.h> 59 #include <sys/kmem.h> 60 #include <sys/errno.h> 61 #include <sys/modhash.h> 62 #include <sys/list.h> 63 #include <sys/strsun.h> 64 #include <sys/file.h> 65 #include <sys/systm.h> 66 #include <sys/tihdr.h> 67 #include <sys/param.h> 68 #include <sys/mac_provider.h> 69 #include <sys/mac_ipv4.h> 70 #include <sys/mac_ipv6.h> 71 #include <sys/mac_6to4.h> 72 #include <sys/tsol/tnet.h> 73 #include <sys/sunldi.h> 74 #include <netinet/in.h> 75 #include <netinet/ip6.h> 76 #include <inet/ip.h> 77 #include <inet/ip_ire.h> 78 #include <inet/ipsec_impl.h> 79 #include <sys/tsol/label.h> 80 #include <sys/tsol/tnet.h> 81 #include <inet/iptun.h> 82 #include "iptun_impl.h" 83 84 /* Do the tunnel type and address family match? */ 85 #define IPTUN_ADDR_MATCH(iptun_type, family) \ 86 ((iptun_type == IPTUN_TYPE_IPV4 && family == AF_INET) || \ 87 (iptun_type == IPTUN_TYPE_IPV6 && family == AF_INET6) || \ 88 (iptun_type == IPTUN_TYPE_6TO4 && family == AF_INET)) 89 90 #define IPTUN_HASH_KEY(key) ((mod_hash_key_t)(uintptr_t)(key)) 91 92 #define IPTUN_MIN_IPV4_MTU 576 /* ip.h still uses 68 (!) */ 93 #define IPTUN_MIN_IPV6_MTU IPV6_MIN_MTU 94 #define IPTUN_MAX_IPV4_MTU (IP_MAXPACKET - sizeof (ipha_t)) 95 #define IPTUN_MAX_IPV6_MTU (IP_MAXPACKET - sizeof (ip6_t) - \ 96 sizeof (iptun_encaplim_t)) 97 98 #define IPTUN_MIN_HOPLIMIT 1 99 #define IPTUN_MAX_HOPLIMIT UINT8_MAX 100 101 #define IPTUN_MIN_ENCAPLIMIT 0 102 #define IPTUN_MAX_ENCAPLIMIT UINT8_MAX 103 104 #define IPTUN_IPSEC_REQ_MASK (IPSEC_PREF_REQUIRED | IPSEC_PREF_NEVER) 105 106 static iptun_encaplim_t iptun_encaplim_init = { 107 { IPPROTO_NONE, 0 }, 108 IP6OPT_TUNNEL_LIMIT, 109 1, 110 IPTUN_DEFAULT_ENCAPLIMIT, /* filled in with actual value later */ 111 IP6OPT_PADN, 112 1, 113 0 114 }; 115 116 /* 117 * Table containing per-iptun-type information. 118 * Since IPv6 can run over all of these we have the IPv6 min as the min MTU. 119 */ 120 static iptun_typeinfo_t iptun_type_table[] = { 121 { IPTUN_TYPE_IPV4, MAC_PLUGIN_IDENT_IPV4, IPV4_VERSION, 122 IPTUN_MIN_IPV6_MTU, IPTUN_MAX_IPV4_MTU, B_TRUE }, 123 { IPTUN_TYPE_IPV6, MAC_PLUGIN_IDENT_IPV6, IPV6_VERSION, 124 IPTUN_MIN_IPV6_MTU, IPTUN_MAX_IPV6_MTU, B_TRUE }, 125 { IPTUN_TYPE_6TO4, MAC_PLUGIN_IDENT_6TO4, IPV4_VERSION, 126 IPTUN_MIN_IPV6_MTU, IPTUN_MAX_IPV4_MTU, B_FALSE }, 127 { IPTUN_TYPE_UNKNOWN, NULL, 0, 0, 0, B_FALSE } 128 }; 129 130 /* 131 * iptun_hash is an iptun_t lookup table by link ID protected by 132 * iptun_hash_lock. While the hash table's integrity is maintained via 133 * internal locking in the mod_hash_*() functions, we need additional locking 134 * so that an iptun_t cannot be deleted after a hash lookup has returned an 135 * iptun_t and before iptun_lock has been entered. As such, we use 136 * iptun_hash_lock when doing lookups and removals from iptun_hash. 137 */ 138 mod_hash_t *iptun_hash; 139 static kmutex_t iptun_hash_lock; 140 141 static uint_t iptun_tunnelcount; /* total for all stacks */ 142 kmem_cache_t *iptun_cache; 143 ddi_taskq_t *iptun_taskq; 144 145 typedef enum { 146 IPTUN_TASK_MTU_UPDATE, /* tell mac about new tunnel link MTU */ 147 IPTUN_TASK_LADDR_UPDATE, /* tell mac about new local address */ 148 IPTUN_TASK_RADDR_UPDATE, /* tell mac about new remote address */ 149 IPTUN_TASK_LINK_UPDATE, /* tell mac about new link state */ 150 IPTUN_TASK_PDATA_UPDATE /* tell mac about updated plugin data */ 151 } iptun_task_t; 152 153 typedef struct iptun_task_data_s { 154 iptun_task_t itd_task; 155 datalink_id_t itd_linkid; 156 } iptun_task_data_t; 157 158 static void iptun_task_dispatch(iptun_t *, iptun_task_t); 159 static int iptun_enter(iptun_t *); 160 static void iptun_exit(iptun_t *); 161 static void iptun_headergen(iptun_t *, boolean_t); 162 static void iptun_drop_pkt(mblk_t *, uint64_t *); 163 static void iptun_input(void *, mblk_t *, void *, ip_recv_attr_t *); 164 static void iptun_input_icmp(void *, mblk_t *, void *, ip_recv_attr_t *); 165 static void iptun_output(iptun_t *, mblk_t *); 166 static uint32_t iptun_get_maxmtu(iptun_t *, ip_xmit_attr_t *, uint32_t); 167 static uint32_t iptun_update_mtu(iptun_t *, ip_xmit_attr_t *, uint32_t); 168 static uint32_t iptun_get_dst_pmtu(iptun_t *, ip_xmit_attr_t *); 169 static void iptun_update_dst_pmtu(iptun_t *, ip_xmit_attr_t *); 170 static int iptun_setladdr(iptun_t *, const struct sockaddr_storage *); 171 172 static void iptun_output_6to4(iptun_t *, mblk_t *); 173 static void iptun_output_common(iptun_t *, ip_xmit_attr_t *, mblk_t *); 174 static boolean_t iptun_verifyicmp(conn_t *, void *, icmph_t *, icmp6_t *, 175 ip_recv_attr_t *); 176 177 static void iptun_notify(void *, ip_xmit_attr_t *, ixa_notify_type_t, 178 ixa_notify_arg_t); 179 180 static mac_callbacks_t iptun_m_callbacks; 181 182 static int 183 iptun_m_getstat(void *arg, uint_t stat, uint64_t *val) 184 { 185 iptun_t *iptun = arg; 186 int err = 0; 187 188 switch (stat) { 189 case MAC_STAT_IERRORS: 190 *val = iptun->iptun_ierrors; 191 break; 192 case MAC_STAT_OERRORS: 193 *val = iptun->iptun_oerrors; 194 break; 195 case MAC_STAT_RBYTES: 196 *val = iptun->iptun_rbytes; 197 break; 198 case MAC_STAT_IPACKETS: 199 *val = iptun->iptun_ipackets; 200 break; 201 case MAC_STAT_OBYTES: 202 *val = iptun->iptun_obytes; 203 break; 204 case MAC_STAT_OPACKETS: 205 *val = iptun->iptun_opackets; 206 break; 207 case MAC_STAT_NORCVBUF: 208 *val = iptun->iptun_norcvbuf; 209 break; 210 case MAC_STAT_NOXMTBUF: 211 *val = iptun->iptun_noxmtbuf; 212 break; 213 default: 214 err = ENOTSUP; 215 } 216 217 return (err); 218 } 219 220 static int 221 iptun_m_start(void *arg) 222 { 223 iptun_t *iptun = arg; 224 int err; 225 226 if ((err = iptun_enter(iptun)) == 0) { 227 iptun->iptun_flags |= IPTUN_MAC_STARTED; 228 iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE); 229 iptun_exit(iptun); 230 } 231 return (err); 232 } 233 234 static void 235 iptun_m_stop(void *arg) 236 { 237 iptun_t *iptun = arg; 238 239 if (iptun_enter(iptun) == 0) { 240 iptun->iptun_flags &= ~IPTUN_MAC_STARTED; 241 iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE); 242 iptun_exit(iptun); 243 } 244 } 245 246 /* 247 * iptun_m_setpromisc() does nothing and always succeeds. This is because a 248 * tunnel data-link only ever receives packets that are destined exclusively 249 * for the local address of the tunnel. 250 */ 251 /* ARGSUSED */ 252 static int 253 iptun_m_setpromisc(void *arg, boolean_t on) 254 { 255 return (0); 256 } 257 258 /* ARGSUSED */ 259 static int 260 iptun_m_multicst(void *arg, boolean_t add, const uint8_t *addrp) 261 { 262 return (ENOTSUP); 263 } 264 265 /* 266 * iptun_m_unicst() sets the local address. 267 */ 268 /* ARGSUSED */ 269 static int 270 iptun_m_unicst(void *arg, const uint8_t *addrp) 271 { 272 iptun_t *iptun = arg; 273 int err; 274 struct sockaddr_storage ss; 275 struct sockaddr_in *sin; 276 struct sockaddr_in6 *sin6; 277 278 if ((err = iptun_enter(iptun)) == 0) { 279 switch (iptun->iptun_typeinfo->iti_ipvers) { 280 case IPV4_VERSION: 281 sin = (struct sockaddr_in *)&ss; 282 sin->sin_family = AF_INET; 283 bcopy(addrp, &sin->sin_addr, sizeof (in_addr_t)); 284 break; 285 case IPV6_VERSION: 286 sin6 = (struct sockaddr_in6 *)&ss; 287 sin6->sin6_family = AF_INET6; 288 bcopy(addrp, &sin6->sin6_addr, sizeof (in6_addr_t)); 289 break; 290 default: 291 ASSERT(0); 292 } 293 err = iptun_setladdr(iptun, &ss); 294 iptun_exit(iptun); 295 } 296 return (err); 297 } 298 299 static mblk_t * 300 iptun_m_tx(void *arg, mblk_t *mpchain) 301 { 302 mblk_t *mp, *nmp; 303 iptun_t *iptun = arg; 304 305 if (!IS_IPTUN_RUNNING(iptun)) { 306 iptun_drop_pkt(mpchain, &iptun->iptun_noxmtbuf); 307 return (NULL); 308 } 309 310 for (mp = mpchain; mp != NULL; mp = nmp) { 311 nmp = mp->b_next; 312 mp->b_next = NULL; 313 iptun_output(iptun, mp); 314 } 315 316 return (NULL); 317 } 318 319 /* ARGSUSED */ 320 static int 321 iptun_m_setprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, 322 uint_t pr_valsize, const void *pr_val) 323 { 324 iptun_t *iptun = barg; 325 uint32_t value = *(uint32_t *)pr_val; 326 int err; 327 328 /* 329 * We need to enter this iptun_t since we'll be modifying the outer 330 * header. 331 */ 332 if ((err = iptun_enter(iptun)) != 0) 333 return (err); 334 335 switch (pr_num) { 336 case MAC_PROP_IPTUN_HOPLIMIT: 337 if (value < IPTUN_MIN_HOPLIMIT || value > IPTUN_MAX_HOPLIMIT) { 338 err = EINVAL; 339 break; 340 } 341 if (value != iptun->iptun_hoplimit) { 342 iptun->iptun_hoplimit = (uint8_t)value; 343 iptun_headergen(iptun, B_TRUE); 344 } 345 break; 346 case MAC_PROP_IPTUN_ENCAPLIMIT: 347 if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_IPV6 || 348 value > IPTUN_MAX_ENCAPLIMIT) { 349 err = EINVAL; 350 break; 351 } 352 if (value != iptun->iptun_encaplimit) { 353 iptun->iptun_encaplimit = (uint8_t)value; 354 iptun_headergen(iptun, B_TRUE); 355 } 356 break; 357 case MAC_PROP_MTU: { 358 uint32_t maxmtu = iptun_get_maxmtu(iptun, NULL, 0); 359 360 if (value < iptun->iptun_typeinfo->iti_minmtu || 361 value > maxmtu) { 362 err = EINVAL; 363 break; 364 } 365 iptun->iptun_flags |= IPTUN_FIXED_MTU; 366 if (value != iptun->iptun_mtu) { 367 iptun->iptun_mtu = value; 368 iptun_task_dispatch(iptun, IPTUN_TASK_MTU_UPDATE); 369 } 370 break; 371 } 372 default: 373 err = EINVAL; 374 } 375 iptun_exit(iptun); 376 return (err); 377 } 378 379 /* ARGSUSED */ 380 static int 381 iptun_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, 382 uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) 383 { 384 iptun_t *iptun = barg; 385 mac_propval_range_t range; 386 boolean_t is_default = (pr_flags & MAC_PROP_DEFAULT); 387 boolean_t is_possible = (pr_flags & MAC_PROP_POSSIBLE); 388 int err; 389 390 if ((err = iptun_enter(iptun)) != 0) 391 return (err); 392 393 if ((pr_flags & ~(MAC_PROP_DEFAULT | MAC_PROP_POSSIBLE)) != 0) { 394 err = ENOTSUP; 395 goto done; 396 } 397 if (is_default && is_possible) { 398 err = EINVAL; 399 goto done; 400 } 401 402 *perm = MAC_PROP_PERM_RW; 403 404 if (is_possible) { 405 if (pr_valsize < sizeof (mac_propval_range_t)) { 406 err = EINVAL; 407 goto done; 408 } 409 range.mpr_count = 1; 410 range.mpr_type = MAC_PROPVAL_UINT32; 411 } else if (pr_valsize < sizeof (uint32_t)) { 412 err = EINVAL; 413 goto done; 414 } 415 416 switch (pr_num) { 417 case MAC_PROP_IPTUN_HOPLIMIT: 418 if (is_possible) { 419 range.range_uint32[0].mpur_min = IPTUN_MIN_HOPLIMIT; 420 range.range_uint32[0].mpur_max = IPTUN_MAX_HOPLIMIT; 421 } else if (is_default) { 422 *(uint32_t *)pr_val = IPTUN_DEFAULT_HOPLIMIT; 423 } else { 424 *(uint32_t *)pr_val = iptun->iptun_hoplimit; 425 } 426 break; 427 case MAC_PROP_IPTUN_ENCAPLIMIT: 428 if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_IPV6) { 429 err = ENOTSUP; 430 goto done; 431 } 432 if (is_possible) { 433 range.range_uint32[0].mpur_min = IPTUN_MIN_ENCAPLIMIT; 434 range.range_uint32[0].mpur_max = IPTUN_MAX_ENCAPLIMIT; 435 } else if (is_default) { 436 *(uint32_t *)pr_val = IPTUN_DEFAULT_ENCAPLIMIT; 437 } else { 438 *(uint32_t *)pr_val = iptun->iptun_encaplimit; 439 } 440 break; 441 case MAC_PROP_MTU: { 442 uint32_t maxmtu = iptun_get_maxmtu(iptun, NULL, 0); 443 444 if (is_possible) { 445 range.range_uint32[0].mpur_min = 446 iptun->iptun_typeinfo->iti_minmtu; 447 range.range_uint32[0].mpur_max = maxmtu; 448 } else { 449 /* 450 * The MAC module knows the current value and should 451 * never call us for it. There is also no default 452 * MTU, as by default, it is a dynamic property. 453 */ 454 err = ENOTSUP; 455 goto done; 456 } 457 break; 458 } 459 default: 460 err = EINVAL; 461 goto done; 462 } 463 if (is_possible) 464 bcopy(&range, pr_val, sizeof (range)); 465 done: 466 iptun_exit(iptun); 467 return (err); 468 } 469 470 uint_t 471 iptun_count(void) 472 { 473 return (iptun_tunnelcount); 474 } 475 476 /* 477 * Enter an iptun_t exclusively. This is essentially just a mutex, but we 478 * don't allow iptun_enter() to succeed on a tunnel if it's in the process of 479 * being deleted. 480 */ 481 static int 482 iptun_enter(iptun_t *iptun) 483 { 484 mutex_enter(&iptun->iptun_lock); 485 while (iptun->iptun_flags & IPTUN_DELETE_PENDING) 486 cv_wait(&iptun->iptun_enter_cv, &iptun->iptun_lock); 487 if (iptun->iptun_flags & IPTUN_CONDEMNED) { 488 mutex_exit(&iptun->iptun_lock); 489 return (ENOENT); 490 } 491 return (0); 492 } 493 494 /* 495 * Exit the tunnel entered in iptun_enter(). 496 */ 497 static void 498 iptun_exit(iptun_t *iptun) 499 { 500 mutex_exit(&iptun->iptun_lock); 501 } 502 503 /* 504 * Enter the IP tunnel instance by datalink ID. 505 */ 506 static int 507 iptun_enter_by_linkid(datalink_id_t linkid, iptun_t **iptun) 508 { 509 int err; 510 511 mutex_enter(&iptun_hash_lock); 512 if (mod_hash_find(iptun_hash, IPTUN_HASH_KEY(linkid), 513 (mod_hash_val_t *)iptun) == 0) 514 err = iptun_enter(*iptun); 515 else 516 err = ENOENT; 517 if (err != 0) 518 *iptun = NULL; 519 mutex_exit(&iptun_hash_lock); 520 return (err); 521 } 522 523 /* 524 * Handle tasks that were deferred through the iptun_taskq because they require 525 * calling up to the mac module, and we can't call up to the mac module while 526 * holding locks. 527 * 528 * This is tricky to get right without introducing race conditions and 529 * deadlocks with the mac module, as we cannot issue an upcall while in the 530 * iptun_t. The reason is that upcalls may try and enter the mac perimeter, 531 * while iptun callbacks (such as iptun_m_setprop()) called from the mac 532 * module will already have the perimeter held, and will then try and enter 533 * the iptun_t. You can see the lock ordering problem with this; this will 534 * deadlock. 535 * 536 * The safe way to do this is to enter the iptun_t in question and copy the 537 * information we need out of it so that we can exit it and know that the 538 * information being passed up to the upcalls won't be subject to modification 539 * by other threads. The problem now is that we need to exit it prior to 540 * issuing the upcall, but once we do this, a thread could come along and 541 * delete the iptun_t and thus the mac handle required to issue the upcall. 542 * To prevent this, we set the IPTUN_UPCALL_PENDING flag prior to exiting the 543 * iptun_t. This flag is the condition associated with iptun_upcall_cv, which 544 * iptun_delete() will cv_wait() on. When the upcall completes, we clear 545 * IPTUN_UPCALL_PENDING and cv_signal() any potentially waiting 546 * iptun_delete(). We can thus still safely use iptun->iptun_mh after having 547 * exited the iptun_t. 548 */ 549 static void 550 iptun_task_cb(void *arg) 551 { 552 iptun_task_data_t *itd = arg; 553 iptun_task_t task = itd->itd_task; 554 datalink_id_t linkid = itd->itd_linkid; 555 iptun_t *iptun; 556 uint32_t mtu; 557 iptun_addr_t addr; 558 link_state_t linkstate; 559 size_t header_size; 560 iptun_header_t header; 561 562 kmem_free(itd, sizeof (*itd)); 563 564 /* 565 * Note that if the lookup fails, it's because the tunnel was deleted 566 * between the time the task was dispatched and now. That isn't an 567 * error. 568 */ 569 if (iptun_enter_by_linkid(linkid, &iptun) != 0) 570 return; 571 572 iptun->iptun_flags |= IPTUN_UPCALL_PENDING; 573 574 switch (task) { 575 case IPTUN_TASK_MTU_UPDATE: 576 mtu = iptun->iptun_mtu; 577 break; 578 case IPTUN_TASK_LADDR_UPDATE: 579 addr = iptun->iptun_laddr; 580 break; 581 case IPTUN_TASK_RADDR_UPDATE: 582 addr = iptun->iptun_raddr; 583 break; 584 case IPTUN_TASK_LINK_UPDATE: 585 linkstate = IS_IPTUN_RUNNING(iptun) ? 586 LINK_STATE_UP : LINK_STATE_DOWN; 587 break; 588 case IPTUN_TASK_PDATA_UPDATE: 589 header_size = iptun->iptun_header_size; 590 header = iptun->iptun_header; 591 break; 592 default: 593 ASSERT(0); 594 } 595 596 iptun_exit(iptun); 597 598 switch (task) { 599 case IPTUN_TASK_MTU_UPDATE: 600 (void) mac_maxsdu_update(iptun->iptun_mh, mtu); 601 break; 602 case IPTUN_TASK_LADDR_UPDATE: 603 mac_unicst_update(iptun->iptun_mh, (uint8_t *)&addr.ia_addr); 604 break; 605 case IPTUN_TASK_RADDR_UPDATE: 606 mac_dst_update(iptun->iptun_mh, (uint8_t *)&addr.ia_addr); 607 break; 608 case IPTUN_TASK_LINK_UPDATE: 609 mac_link_update(iptun->iptun_mh, linkstate); 610 break; 611 case IPTUN_TASK_PDATA_UPDATE: 612 if (mac_pdata_update(iptun->iptun_mh, 613 header_size == 0 ? NULL : &header, header_size) != 0) 614 atomic_inc_64(&iptun->iptun_taskq_fail); 615 break; 616 } 617 618 mutex_enter(&iptun->iptun_lock); 619 iptun->iptun_flags &= ~IPTUN_UPCALL_PENDING; 620 cv_signal(&iptun->iptun_upcall_cv); 621 mutex_exit(&iptun->iptun_lock); 622 } 623 624 static void 625 iptun_task_dispatch(iptun_t *iptun, iptun_task_t iptun_task) 626 { 627 iptun_task_data_t *itd; 628 629 itd = kmem_alloc(sizeof (*itd), KM_NOSLEEP); 630 if (itd == NULL) { 631 atomic_inc_64(&iptun->iptun_taskq_fail); 632 return; 633 } 634 itd->itd_task = iptun_task; 635 itd->itd_linkid = iptun->iptun_linkid; 636 if (ddi_taskq_dispatch(iptun_taskq, iptun_task_cb, itd, DDI_NOSLEEP)) { 637 atomic_inc_64(&iptun->iptun_taskq_fail); 638 kmem_free(itd, sizeof (*itd)); 639 } 640 } 641 642 /* 643 * Convert an iptun_addr_t to sockaddr_storage. 644 */ 645 static void 646 iptun_getaddr(iptun_addr_t *iptun_addr, struct sockaddr_storage *ss) 647 { 648 struct sockaddr_in *sin; 649 struct sockaddr_in6 *sin6; 650 651 bzero(ss, sizeof (*ss)); 652 switch (iptun_addr->ia_family) { 653 case AF_INET: 654 sin = (struct sockaddr_in *)ss; 655 sin->sin_addr.s_addr = iptun_addr->ia_addr.iau_addr4; 656 break; 657 case AF_INET6: 658 sin6 = (struct sockaddr_in6 *)ss; 659 sin6->sin6_addr = iptun_addr->ia_addr.iau_addr6; 660 break; 661 default: 662 ASSERT(0); 663 } 664 ss->ss_family = iptun_addr->ia_family; 665 } 666 667 /* 668 * General purpose function to set an IP tunnel source or destination address. 669 */ 670 static int 671 iptun_setaddr(iptun_type_t iptun_type, iptun_addr_t *iptun_addr, 672 const struct sockaddr_storage *ss) 673 { 674 if (!IPTUN_ADDR_MATCH(iptun_type, ss->ss_family)) 675 return (EINVAL); 676 677 switch (ss->ss_family) { 678 case AF_INET: { 679 struct sockaddr_in *sin = (struct sockaddr_in *)ss; 680 681 if ((sin->sin_addr.s_addr == INADDR_ANY) || 682 (sin->sin_addr.s_addr == INADDR_BROADCAST) || 683 CLASSD(sin->sin_addr.s_addr)) { 684 return (EADDRNOTAVAIL); 685 } 686 iptun_addr->ia_addr.iau_addr4 = sin->sin_addr.s_addr; 687 break; 688 } 689 case AF_INET6: { 690 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)ss; 691 692 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) || 693 IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) || 694 IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 695 return (EADDRNOTAVAIL); 696 } 697 iptun_addr->ia_addr.iau_addr6 = sin6->sin6_addr; 698 break; 699 } 700 default: 701 return (EAFNOSUPPORT); 702 } 703 iptun_addr->ia_family = ss->ss_family; 704 return (0); 705 } 706 707 static int 708 iptun_setladdr(iptun_t *iptun, const struct sockaddr_storage *laddr) 709 { 710 return (iptun_setaddr(iptun->iptun_typeinfo->iti_type, 711 &iptun->iptun_laddr, laddr)); 712 } 713 714 static int 715 iptun_setraddr(iptun_t *iptun, const struct sockaddr_storage *raddr) 716 { 717 if (!(iptun->iptun_typeinfo->iti_hasraddr)) 718 return (EINVAL); 719 return (iptun_setaddr(iptun->iptun_typeinfo->iti_type, 720 &iptun->iptun_raddr, raddr)); 721 } 722 723 static boolean_t 724 iptun_canbind(iptun_t *iptun) 725 { 726 /* 727 * A tunnel may bind when its source address has been set, and if its 728 * tunnel type requires one, also its destination address. 729 */ 730 return ((iptun->iptun_flags & IPTUN_LADDR) && 731 ((iptun->iptun_flags & IPTUN_RADDR) || 732 !(iptun->iptun_typeinfo->iti_hasraddr))); 733 } 734 735 /* 736 * Verify that the local address is valid, and insert in the fanout 737 */ 738 static int 739 iptun_bind(iptun_t *iptun) 740 { 741 conn_t *connp = iptun->iptun_connp; 742 int error = 0; 743 ip_xmit_attr_t *ixa; 744 iulp_t uinfo; 745 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 746 747 /* Get an exclusive ixa for this thread, and replace conn_ixa */ 748 ixa = conn_get_ixa(connp, B_TRUE); 749 if (ixa == NULL) 750 return (ENOMEM); 751 ASSERT(ixa->ixa_refcnt >= 2); 752 ASSERT(ixa == connp->conn_ixa); 753 754 /* We create PMTU state including for 6to4 */ 755 ixa->ixa_flags |= IXAF_PMTU_DISCOVERY; 756 757 ASSERT(iptun_canbind(iptun)); 758 759 mutex_enter(&connp->conn_lock); 760 /* 761 * Note that conn_proto can't be set since the upper protocol 762 * can be both 41 and 4 when IPv6 and IPv4 are over the same tunnel. 763 * ipcl_iptun_classify doesn't use conn_proto. 764 */ 765 connp->conn_ipversion = iptun->iptun_typeinfo->iti_ipvers; 766 767 switch (iptun->iptun_typeinfo->iti_type) { 768 case IPTUN_TYPE_IPV4: 769 IN6_IPADDR_TO_V4MAPPED(iptun->iptun_laddr4, 770 &connp->conn_laddr_v6); 771 IN6_IPADDR_TO_V4MAPPED(iptun->iptun_raddr4, 772 &connp->conn_faddr_v6); 773 ixa->ixa_flags |= IXAF_IS_IPV4; 774 if (ip_laddr_verify_v4(iptun->iptun_laddr4, IPCL_ZONEID(connp), 775 ipst, B_FALSE) != IPVL_UNICAST_UP) { 776 mutex_exit(&connp->conn_lock); 777 error = EADDRNOTAVAIL; 778 goto done; 779 } 780 break; 781 case IPTUN_TYPE_IPV6: 782 connp->conn_laddr_v6 = iptun->iptun_laddr6; 783 connp->conn_faddr_v6 = iptun->iptun_raddr6; 784 ixa->ixa_flags &= ~IXAF_IS_IPV4; 785 /* We use a zero scopeid for now */ 786 if (ip_laddr_verify_v6(&iptun->iptun_laddr6, IPCL_ZONEID(connp), 787 ipst, B_FALSE, 0) != IPVL_UNICAST_UP) { 788 mutex_exit(&connp->conn_lock); 789 error = EADDRNOTAVAIL; 790 goto done; 791 } 792 break; 793 case IPTUN_TYPE_6TO4: 794 IN6_IPADDR_TO_V4MAPPED(iptun->iptun_laddr4, 795 &connp->conn_laddr_v6); 796 IN6_IPADDR_TO_V4MAPPED(INADDR_ANY, &connp->conn_faddr_v6); 797 ixa->ixa_flags |= IXAF_IS_IPV4; 798 mutex_exit(&connp->conn_lock); 799 800 switch (ip_laddr_verify_v4(iptun->iptun_laddr4, 801 IPCL_ZONEID(connp), ipst, B_FALSE)) { 802 case IPVL_UNICAST_UP: 803 case IPVL_UNICAST_DOWN: 804 break; 805 default: 806 error = EADDRNOTAVAIL; 807 goto done; 808 } 809 goto insert; 810 } 811 812 /* In case previous destination was multirt */ 813 ip_attr_newdst(ixa); 814 815 /* 816 * When we set a tunnel's destination address, we do not 817 * care if the destination is reachable. Transient routing 818 * issues should not inhibit the creation of a tunnel 819 * interface, for example. Thus we pass B_FALSE here. 820 */ 821 connp->conn_saddr_v6 = connp->conn_laddr_v6; 822 mutex_exit(&connp->conn_lock); 823 824 /* As long as the MTU is large we avoid fragmentation */ 825 ixa->ixa_flags |= IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF; 826 827 /* We handle IPsec in iptun_output_common */ 828 error = ip_attr_connect(connp, ixa, &connp->conn_saddr_v6, 829 &connp->conn_faddr_v6, &connp->conn_faddr_v6, 0, 830 &connp->conn_saddr_v6, &uinfo, 0); 831 832 if (error != 0) 833 goto done; 834 835 /* saddr shouldn't change since it was already set */ 836 ASSERT(IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6, 837 &connp->conn_saddr_v6)); 838 839 /* We set IXAF_VERIFY_PMTU to catch PMTU increases */ 840 ixa->ixa_flags |= IXAF_VERIFY_PMTU; 841 ASSERT(uinfo.iulp_mtu != 0); 842 843 /* 844 * Allow setting new policies. 845 * The addresses/ports are already set, thus the IPsec policy calls 846 * can handle their passed-in conn's. 847 */ 848 connp->conn_policy_cached = B_FALSE; 849 850 insert: 851 error = ipcl_conn_insert(connp); 852 if (error != 0) 853 goto done; 854 855 /* Record this as the "last" send even though we haven't sent any */ 856 connp->conn_v6lastdst = connp->conn_faddr_v6; 857 858 iptun->iptun_flags |= IPTUN_BOUND; 859 /* 860 * Now that we're bound with ip below us, this is a good 861 * time to initialize the destination path MTU and to 862 * re-calculate the tunnel's link MTU. 863 */ 864 (void) iptun_update_mtu(iptun, ixa, 0); 865 866 if (IS_IPTUN_RUNNING(iptun)) 867 iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE); 868 869 done: 870 ixa_refrele(ixa); 871 return (error); 872 } 873 874 static void 875 iptun_unbind(iptun_t *iptun) 876 { 877 ASSERT(iptun->iptun_flags & IPTUN_BOUND); 878 ASSERT(mutex_owned(&iptun->iptun_lock) || 879 (iptun->iptun_flags & IPTUN_CONDEMNED)); 880 ip_unbind(iptun->iptun_connp); 881 iptun->iptun_flags &= ~IPTUN_BOUND; 882 if (!(iptun->iptun_flags & IPTUN_CONDEMNED)) 883 iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE); 884 } 885 886 /* 887 * Re-generate the template data-link header for a given IP tunnel given the 888 * tunnel's current parameters. 889 */ 890 static void 891 iptun_headergen(iptun_t *iptun, boolean_t update_mac) 892 { 893 switch (iptun->iptun_typeinfo->iti_ipvers) { 894 case IPV4_VERSION: 895 /* 896 * We only need to use a custom IP header if the administrator 897 * has supplied a non-default hoplimit. 898 */ 899 if (iptun->iptun_hoplimit == IPTUN_DEFAULT_HOPLIMIT) { 900 iptun->iptun_header_size = 0; 901 break; 902 } 903 iptun->iptun_header_size = sizeof (ipha_t); 904 iptun->iptun_header4.ipha_version_and_hdr_length = 905 IP_SIMPLE_HDR_VERSION; 906 iptun->iptun_header4.ipha_fragment_offset_and_flags = 907 htons(IPH_DF); 908 iptun->iptun_header4.ipha_ttl = iptun->iptun_hoplimit; 909 break; 910 case IPV6_VERSION: { 911 ip6_t *ip6hp = &iptun->iptun_header6.it6h_ip6h; 912 913 /* 914 * We only need to use a custom IPv6 header if either the 915 * administrator has supplied a non-default hoplimit, or we 916 * need to include an encapsulation limit option in the outer 917 * header. 918 */ 919 if (iptun->iptun_hoplimit == IPTUN_DEFAULT_HOPLIMIT && 920 iptun->iptun_encaplimit == 0) { 921 iptun->iptun_header_size = 0; 922 break; 923 } 924 925 (void) memset(ip6hp, 0, sizeof (*ip6hp)); 926 if (iptun->iptun_encaplimit == 0) { 927 iptun->iptun_header_size = sizeof (ip6_t); 928 ip6hp->ip6_nxt = IPPROTO_NONE; 929 } else { 930 iptun_encaplim_t *iel; 931 932 iptun->iptun_header_size = sizeof (iptun_ipv6hdrs_t); 933 /* 934 * The mac_ipv6 plugin requires ip6_plen to be in host 935 * byte order and reflect the extension headers 936 * present in the template. The actual network byte 937 * order ip6_plen will be set on a per-packet basis on 938 * transmit. 939 */ 940 ip6hp->ip6_plen = sizeof (*iel); 941 ip6hp->ip6_nxt = IPPROTO_DSTOPTS; 942 iel = &iptun->iptun_header6.it6h_encaplim; 943 *iel = iptun_encaplim_init; 944 iel->iel_telopt.ip6ot_encap_limit = 945 iptun->iptun_encaplimit; 946 } 947 948 ip6hp->ip6_hlim = iptun->iptun_hoplimit; 949 break; 950 } 951 } 952 953 if (update_mac) 954 iptun_task_dispatch(iptun, IPTUN_TASK_PDATA_UPDATE); 955 } 956 957 /* 958 * Insert inbound and outbound IPv4 and IPv6 policy into the given policy 959 * head. 960 */ 961 static boolean_t 962 iptun_insert_simple_policies(ipsec_policy_head_t *ph, ipsec_act_t *actp, 963 uint_t n, netstack_t *ns) 964 { 965 int f = IPSEC_AF_V4; 966 967 if (!ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_INBOUND, ns) || 968 !ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_OUTBOUND, ns)) 969 return (B_FALSE); 970 971 f = IPSEC_AF_V6; 972 return (ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_INBOUND, ns) && 973 ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_OUTBOUND, ns)); 974 } 975 976 /* 977 * Used to set IPsec policy when policy is set through the IPTUN_CREATE or 978 * IPTUN_MODIFY ioctls. 979 */ 980 static int 981 iptun_set_sec_simple(iptun_t *iptun, const ipsec_req_t *ipsr) 982 { 983 int rc = 0; 984 uint_t nact; 985 ipsec_act_t *actp = NULL; 986 boolean_t clear_all, old_policy = B_FALSE; 987 ipsec_tun_pol_t *itp; 988 char name[MAXLINKNAMELEN]; 989 uint64_t gen; 990 netstack_t *ns = iptun->iptun_ns; 991 992 /* Can't specify self-encap on a tunnel. */ 993 if (ipsr->ipsr_self_encap_req != 0) 994 return (EINVAL); 995 996 /* 997 * If it's a "clear-all" entry, unset the security flags and resume 998 * normal cleartext (or inherit-from-global) policy. 999 */ 1000 clear_all = ((ipsr->ipsr_ah_req & IPTUN_IPSEC_REQ_MASK) == 0 && 1001 (ipsr->ipsr_esp_req & IPTUN_IPSEC_REQ_MASK) == 0); 1002 1003 ASSERT(mutex_owned(&iptun->iptun_lock)); 1004 itp = iptun->iptun_itp; 1005 if (itp == NULL) { 1006 if (clear_all) 1007 goto bail; 1008 if ((rc = dls_mgmt_get_linkinfo(iptun->iptun_linkid, name, NULL, 1009 NULL, NULL)) != 0) 1010 goto bail; 1011 ASSERT(name[0] != '\0'); 1012 if ((itp = create_tunnel_policy(name, &rc, &gen, ns)) == NULL) 1013 goto bail; 1014 iptun->iptun_itp = itp; 1015 } 1016 1017 /* Allocate the actvec now, before holding itp or polhead locks. */ 1018 ipsec_actvec_from_req(ipsr, &actp, &nact, ns); 1019 if (actp == NULL) { 1020 rc = ENOMEM; 1021 goto bail; 1022 } 1023 1024 /* 1025 * Just write on the active polhead. Save the primary/secondary stuff 1026 * for spdsock operations. 1027 * 1028 * Mutex because we need to write to the polhead AND flags atomically. 1029 * Other threads will acquire the polhead lock as a reader if the 1030 * (unprotected) flag is set. 1031 */ 1032 mutex_enter(&itp->itp_lock); 1033 if (itp->itp_flags & ITPF_P_TUNNEL) { 1034 /* Oops, we lost a race. Let's get out of here. */ 1035 rc = EBUSY; 1036 goto mutex_bail; 1037 } 1038 old_policy = ((itp->itp_flags & ITPF_P_ACTIVE) != 0); 1039 1040 if (old_policy) { 1041 ITPF_CLONE(itp->itp_flags); 1042 rc = ipsec_copy_polhead(itp->itp_policy, itp->itp_inactive, ns); 1043 if (rc != 0) { 1044 /* inactive has already been cleared. */ 1045 itp->itp_flags &= ~ITPF_IFLAGS; 1046 goto mutex_bail; 1047 } 1048 rw_enter(&itp->itp_policy->iph_lock, RW_WRITER); 1049 ipsec_polhead_flush(itp->itp_policy, ns); 1050 } else { 1051 /* Else assume itp->itp_policy is already flushed. */ 1052 rw_enter(&itp->itp_policy->iph_lock, RW_WRITER); 1053 } 1054 1055 if (clear_all) { 1056 ASSERT(avl_numnodes(&itp->itp_policy->iph_rulebyid) == 0); 1057 itp->itp_flags &= ~ITPF_PFLAGS; 1058 rw_exit(&itp->itp_policy->iph_lock); 1059 old_policy = B_FALSE; /* Clear out the inactive one too. */ 1060 goto recover_bail; 1061 } 1062 1063 if (iptun_insert_simple_policies(itp->itp_policy, actp, nact, ns)) { 1064 rw_exit(&itp->itp_policy->iph_lock); 1065 /* 1066 * Adjust MTU and make sure the DL side knows what's up. 1067 */ 1068 itp->itp_flags = ITPF_P_ACTIVE; 1069 (void) iptun_update_mtu(iptun, NULL, 0); 1070 old_policy = B_FALSE; /* Blank out inactive - we succeeded */ 1071 } else { 1072 rw_exit(&itp->itp_policy->iph_lock); 1073 rc = ENOMEM; 1074 } 1075 1076 recover_bail: 1077 if (old_policy) { 1078 /* Recover policy in in active polhead. */ 1079 ipsec_swap_policy(itp->itp_policy, itp->itp_inactive, ns); 1080 ITPF_SWAP(itp->itp_flags); 1081 } 1082 1083 /* Clear policy in inactive polhead. */ 1084 itp->itp_flags &= ~ITPF_IFLAGS; 1085 rw_enter(&itp->itp_inactive->iph_lock, RW_WRITER); 1086 ipsec_polhead_flush(itp->itp_inactive, ns); 1087 rw_exit(&itp->itp_inactive->iph_lock); 1088 1089 mutex_bail: 1090 mutex_exit(&itp->itp_lock); 1091 1092 bail: 1093 if (actp != NULL) 1094 ipsec_actvec_free(actp, nact); 1095 1096 return (rc); 1097 } 1098 1099 static iptun_typeinfo_t * 1100 iptun_gettypeinfo(iptun_type_t type) 1101 { 1102 int i; 1103 1104 for (i = 0; iptun_type_table[i].iti_type != IPTUN_TYPE_UNKNOWN; i++) { 1105 if (iptun_type_table[i].iti_type == type) 1106 break; 1107 } 1108 return (&iptun_type_table[i]); 1109 } 1110 1111 /* 1112 * Set the parameters included in ik on the tunnel iptun. Parameters that can 1113 * only be set at creation time are set in iptun_create(). 1114 */ 1115 static int 1116 iptun_setparams(iptun_t *iptun, const iptun_kparams_t *ik) 1117 { 1118 int err = 0; 1119 netstack_t *ns = iptun->iptun_ns; 1120 iptun_addr_t orig_laddr, orig_raddr; 1121 uint_t orig_flags = iptun->iptun_flags; 1122 1123 if (ik->iptun_kparam_flags & IPTUN_KPARAM_LADDR) { 1124 if (orig_flags & IPTUN_LADDR) 1125 orig_laddr = iptun->iptun_laddr; 1126 if ((err = iptun_setladdr(iptun, &ik->iptun_kparam_laddr)) != 0) 1127 return (err); 1128 iptun->iptun_flags |= IPTUN_LADDR; 1129 } 1130 1131 if (ik->iptun_kparam_flags & IPTUN_KPARAM_RADDR) { 1132 if (orig_flags & IPTUN_RADDR) 1133 orig_raddr = iptun->iptun_raddr; 1134 if ((err = iptun_setraddr(iptun, &ik->iptun_kparam_raddr)) != 0) 1135 goto done; 1136 iptun->iptun_flags |= IPTUN_RADDR; 1137 } 1138 1139 if (ik->iptun_kparam_flags & IPTUN_KPARAM_SECINFO) { 1140 /* 1141 * Set IPsec policy originating from the ifconfig(1M) command 1142 * line. This is traditionally called "simple" policy because 1143 * the ipsec_req_t (iptun_kparam_secinfo) can only describe a 1144 * simple policy of "do ESP on everything" and/or "do AH on 1145 * everything" (as opposed to the rich policy that can be 1146 * defined with ipsecconf(1M)). 1147 */ 1148 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_6TO4) { 1149 /* 1150 * Can't set security properties for automatic 1151 * tunnels. 1152 */ 1153 err = EINVAL; 1154 goto done; 1155 } 1156 1157 if (!ipsec_loaded(ns->netstack_ipsec)) { 1158 /* If IPsec can be loaded, try and load it now. */ 1159 if (ipsec_failed(ns->netstack_ipsec)) { 1160 err = EPROTONOSUPPORT; 1161 goto done; 1162 } 1163 ipsec_loader_loadnow(ns->netstack_ipsec); 1164 /* 1165 * ipsec_loader_loadnow() returns while IPsec is 1166 * loaded asynchronously. While a method exists to 1167 * wait for IPsec to load (ipsec_loader_wait()), it 1168 * requires use of a STREAMS queue to do a qwait(). 1169 * We're not in STREAMS context here, and so we can't 1170 * use it. This is not a problem in practice because 1171 * in the vast majority of cases, key management and 1172 * global policy will have loaded before any tunnels 1173 * are plumbed, and so IPsec will already have been 1174 * loaded. 1175 */ 1176 err = EAGAIN; 1177 goto done; 1178 } 1179 1180 err = iptun_set_sec_simple(iptun, &ik->iptun_kparam_secinfo); 1181 if (err == 0) { 1182 iptun->iptun_flags |= IPTUN_SIMPLE_POLICY; 1183 iptun->iptun_simple_policy = ik->iptun_kparam_secinfo; 1184 } 1185 } 1186 done: 1187 if (err != 0) { 1188 /* Restore original source and destination. */ 1189 if (ik->iptun_kparam_flags & IPTUN_KPARAM_LADDR && 1190 (orig_flags & IPTUN_LADDR)) 1191 iptun->iptun_laddr = orig_laddr; 1192 if ((ik->iptun_kparam_flags & IPTUN_KPARAM_RADDR) && 1193 (orig_flags & IPTUN_RADDR)) 1194 iptun->iptun_raddr = orig_raddr; 1195 iptun->iptun_flags = orig_flags; 1196 } 1197 return (err); 1198 } 1199 1200 static int 1201 iptun_register(iptun_t *iptun) 1202 { 1203 mac_register_t *mac; 1204 int err; 1205 1206 ASSERT(!(iptun->iptun_flags & IPTUN_MAC_REGISTERED)); 1207 1208 if ((mac = mac_alloc(MAC_VERSION)) == NULL) 1209 return (EINVAL); 1210 1211 mac->m_type_ident = iptun->iptun_typeinfo->iti_ident; 1212 mac->m_driver = iptun; 1213 mac->m_dip = iptun_dip; 1214 mac->m_instance = (uint_t)-1; 1215 mac->m_src_addr = (uint8_t *)&iptun->iptun_laddr.ia_addr; 1216 mac->m_dst_addr = iptun->iptun_typeinfo->iti_hasraddr ? 1217 (uint8_t *)&iptun->iptun_raddr.ia_addr : NULL; 1218 mac->m_callbacks = &iptun_m_callbacks; 1219 mac->m_min_sdu = iptun->iptun_typeinfo->iti_minmtu; 1220 mac->m_max_sdu = iptun->iptun_mtu; 1221 if (iptun->iptun_header_size != 0) { 1222 mac->m_pdata = &iptun->iptun_header; 1223 mac->m_pdata_size = iptun->iptun_header_size; 1224 } 1225 if ((err = mac_register(mac, &iptun->iptun_mh)) == 0) 1226 iptun->iptun_flags |= IPTUN_MAC_REGISTERED; 1227 mac_free(mac); 1228 return (err); 1229 } 1230 1231 static int 1232 iptun_unregister(iptun_t *iptun) 1233 { 1234 int err; 1235 1236 ASSERT(iptun->iptun_flags & IPTUN_MAC_REGISTERED); 1237 if ((err = mac_unregister(iptun->iptun_mh)) == 0) 1238 iptun->iptun_flags &= ~IPTUN_MAC_REGISTERED; 1239 return (err); 1240 } 1241 1242 static conn_t * 1243 iptun_conn_create(iptun_t *iptun, netstack_t *ns, cred_t *credp) 1244 { 1245 conn_t *connp; 1246 1247 if ((connp = ipcl_conn_create(IPCL_IPCCONN, KM_NOSLEEP, ns)) == NULL) 1248 return (NULL); 1249 1250 connp->conn_flags |= IPCL_IPTUN; 1251 connp->conn_iptun = iptun; 1252 connp->conn_recv = iptun_input; 1253 connp->conn_recvicmp = iptun_input_icmp; 1254 connp->conn_verifyicmp = iptun_verifyicmp; 1255 1256 /* 1257 * Register iptun_notify to listen to capability changes detected by IP. 1258 * This upcall is made in the context of the call to conn_ip_output. 1259 */ 1260 connp->conn_ixa->ixa_notify = iptun_notify; 1261 connp->conn_ixa->ixa_notify_cookie = iptun; 1262 1263 /* 1264 * For exclusive stacks we set conn_zoneid to GLOBAL_ZONEID as is done 1265 * for all other conn_t's. 1266 * 1267 * Note that there's an important distinction between iptun_zoneid and 1268 * conn_zoneid. The conn_zoneid is set to GLOBAL_ZONEID in non-global 1269 * exclusive stack zones to make the ip module believe that the 1270 * non-global zone is actually a global zone. Therefore, when 1271 * interacting with the ip module, we must always use conn_zoneid. 1272 */ 1273 connp->conn_zoneid = (ns->netstack_stackid == GLOBAL_NETSTACKID) ? 1274 crgetzoneid(credp) : GLOBAL_ZONEID; 1275 connp->conn_cred = credp; 1276 /* crfree() is done in ipcl_conn_destroy(), called by CONN_DEC_REF() */ 1277 crhold(connp->conn_cred); 1278 connp->conn_cpid = NOPID; 1279 1280 /* conn_allzones can not be set this early, hence no IPCL_ZONEID */ 1281 connp->conn_ixa->ixa_zoneid = connp->conn_zoneid; 1282 ASSERT(connp->conn_ref == 1); 1283 1284 /* Cache things in ixa without an extra refhold */ 1285 connp->conn_ixa->ixa_cred = connp->conn_cred; 1286 connp->conn_ixa->ixa_cpid = connp->conn_cpid; 1287 if (is_system_labeled()) 1288 connp->conn_ixa->ixa_tsl = crgetlabel(connp->conn_cred); 1289 1290 /* 1291 * Have conn_ip_output drop packets should our outer source 1292 * go invalid 1293 */ 1294 connp->conn_ixa->ixa_flags |= IXAF_VERIFY_SOURCE; 1295 1296 switch (iptun->iptun_typeinfo->iti_ipvers) { 1297 case IPV4_VERSION: 1298 connp->conn_family = AF_INET6; 1299 break; 1300 case IPV6_VERSION: 1301 connp->conn_family = AF_INET; 1302 break; 1303 } 1304 mutex_enter(&connp->conn_lock); 1305 connp->conn_state_flags &= ~CONN_INCIPIENT; 1306 mutex_exit(&connp->conn_lock); 1307 return (connp); 1308 } 1309 1310 static void 1311 iptun_conn_destroy(conn_t *connp) 1312 { 1313 ip_quiesce_conn(connp); 1314 connp->conn_iptun = NULL; 1315 ASSERT(connp->conn_ref == 1); 1316 CONN_DEC_REF(connp); 1317 } 1318 1319 static iptun_t * 1320 iptun_alloc(void) 1321 { 1322 iptun_t *iptun; 1323 1324 if ((iptun = kmem_cache_alloc(iptun_cache, KM_NOSLEEP)) != NULL) { 1325 bzero(iptun, sizeof (*iptun)); 1326 atomic_inc_32(&iptun_tunnelcount); 1327 } 1328 return (iptun); 1329 } 1330 1331 static void 1332 iptun_free(iptun_t *iptun) 1333 { 1334 ASSERT(iptun->iptun_flags & IPTUN_CONDEMNED); 1335 1336 if (iptun->iptun_flags & IPTUN_HASH_INSERTED) { 1337 iptun_stack_t *iptuns = iptun->iptun_iptuns; 1338 1339 mutex_enter(&iptun_hash_lock); 1340 VERIFY(mod_hash_remove(iptun_hash, 1341 IPTUN_HASH_KEY(iptun->iptun_linkid), 1342 (mod_hash_val_t *)&iptun) == 0); 1343 mutex_exit(&iptun_hash_lock); 1344 iptun->iptun_flags &= ~IPTUN_HASH_INSERTED; 1345 mutex_enter(&iptuns->iptuns_lock); 1346 list_remove(&iptuns->iptuns_iptunlist, iptun); 1347 mutex_exit(&iptuns->iptuns_lock); 1348 } 1349 1350 if (iptun->iptun_flags & IPTUN_BOUND) 1351 iptun_unbind(iptun); 1352 1353 /* 1354 * After iptun_unregister(), there will be no threads executing a 1355 * downcall from the mac module, including in the tx datapath. 1356 */ 1357 if (iptun->iptun_flags & IPTUN_MAC_REGISTERED) 1358 VERIFY(iptun_unregister(iptun) == 0); 1359 1360 if (iptun->iptun_itp != NULL) { 1361 /* 1362 * Remove from the AVL tree, AND release the reference iptun_t 1363 * itself holds on the ITP. 1364 */ 1365 itp_unlink(iptun->iptun_itp, iptun->iptun_ns); 1366 ITP_REFRELE(iptun->iptun_itp, iptun->iptun_ns); 1367 iptun->iptun_itp = NULL; 1368 iptun->iptun_flags &= ~IPTUN_SIMPLE_POLICY; 1369 } 1370 1371 /* 1372 * After ipcl_conn_destroy(), there will be no threads executing an 1373 * upcall from ip (i.e., iptun_input()), and it is then safe to free 1374 * the iptun_t. 1375 */ 1376 if (iptun->iptun_connp != NULL) { 1377 iptun_conn_destroy(iptun->iptun_connp); 1378 iptun->iptun_connp = NULL; 1379 } 1380 1381 kmem_cache_free(iptun_cache, iptun); 1382 atomic_dec_32(&iptun_tunnelcount); 1383 } 1384 1385 int 1386 iptun_create(iptun_kparams_t *ik, cred_t *credp) 1387 { 1388 iptun_t *iptun = NULL; 1389 int err = 0, mherr; 1390 char linkname[MAXLINKNAMELEN]; 1391 ipsec_tun_pol_t *itp; 1392 netstack_t *ns = NULL; 1393 iptun_stack_t *iptuns; 1394 datalink_id_t tmpid; 1395 zoneid_t zoneid = crgetzoneid(credp); 1396 boolean_t link_created = B_FALSE; 1397 1398 /* The tunnel type is mandatory */ 1399 if (!(ik->iptun_kparam_flags & IPTUN_KPARAM_TYPE)) 1400 return (EINVAL); 1401 1402 /* 1403 * Is the linkid that the caller wishes to associate with this new 1404 * tunnel assigned to this zone? 1405 */ 1406 if (zone_check_datalink(&zoneid, ik->iptun_kparam_linkid) != 0) { 1407 if (zoneid != GLOBAL_ZONEID) 1408 return (EINVAL); 1409 } else if (zoneid == GLOBAL_ZONEID) { 1410 return (EINVAL); 1411 } 1412 1413 /* 1414 * Make sure that we're not trying to create a tunnel that has already 1415 * been created. 1416 */ 1417 if (iptun_enter_by_linkid(ik->iptun_kparam_linkid, &iptun) == 0) { 1418 iptun_exit(iptun); 1419 iptun = NULL; 1420 err = EEXIST; 1421 goto done; 1422 } 1423 1424 ns = netstack_find_by_cred(credp); 1425 iptuns = ns->netstack_iptun; 1426 1427 if ((iptun = iptun_alloc()) == NULL) { 1428 err = ENOMEM; 1429 goto done; 1430 } 1431 1432 iptun->iptun_linkid = ik->iptun_kparam_linkid; 1433 iptun->iptun_zoneid = zoneid; 1434 iptun->iptun_ns = ns; 1435 1436 iptun->iptun_typeinfo = iptun_gettypeinfo(ik->iptun_kparam_type); 1437 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_UNKNOWN) { 1438 err = EINVAL; 1439 goto done; 1440 } 1441 1442 if (ik->iptun_kparam_flags & IPTUN_KPARAM_IMPLICIT) 1443 iptun->iptun_flags |= IPTUN_IMPLICIT; 1444 1445 if ((err = iptun_setparams(iptun, ik)) != 0) 1446 goto done; 1447 1448 iptun->iptun_hoplimit = IPTUN_DEFAULT_HOPLIMIT; 1449 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_IPV6) 1450 iptun->iptun_encaplimit = IPTUN_DEFAULT_ENCAPLIMIT; 1451 1452 iptun_headergen(iptun, B_FALSE); 1453 1454 iptun->iptun_connp = iptun_conn_create(iptun, ns, credp); 1455 if (iptun->iptun_connp == NULL) { 1456 err = ENOMEM; 1457 goto done; 1458 } 1459 1460 iptun->iptun_mtu = iptun->iptun_typeinfo->iti_maxmtu; 1461 iptun->iptun_dpmtu = iptun->iptun_mtu; 1462 1463 /* 1464 * Find an ITP based on linkname. If we have parms already set via 1465 * the iptun_setparams() call above, it may have created an ITP for 1466 * us. We always try get_tunnel_policy() for DEBUG correctness 1467 * checks, and we may wish to refactor this to only check when 1468 * iptun_itp is NULL. 1469 */ 1470 if ((err = dls_mgmt_get_linkinfo(iptun->iptun_linkid, linkname, NULL, 1471 NULL, NULL)) != 0) 1472 goto done; 1473 if ((itp = get_tunnel_policy(linkname, ns)) != NULL) 1474 iptun->iptun_itp = itp; 1475 1476 /* 1477 * See if we have the necessary IP addresses assigned to this tunnel 1478 * to try and bind them with ip underneath us. If we're not ready to 1479 * bind yet, then we'll defer the bind operation until the addresses 1480 * are modified. 1481 */ 1482 if (iptun_canbind(iptun) && ((err = iptun_bind(iptun)) != 0)) 1483 goto done; 1484 1485 if ((err = iptun_register(iptun)) != 0) 1486 goto done; 1487 1488 err = dls_devnet_create(iptun->iptun_mh, iptun->iptun_linkid, 1489 iptun->iptun_zoneid); 1490 if (err != 0) 1491 goto done; 1492 link_created = B_TRUE; 1493 1494 /* 1495 * We hash by link-id as that is the key used by all other iptun 1496 * interfaces (modify, delete, etc.). 1497 */ 1498 if ((mherr = mod_hash_insert(iptun_hash, 1499 IPTUN_HASH_KEY(iptun->iptun_linkid), (mod_hash_val_t)iptun)) == 0) { 1500 mutex_enter(&iptuns->iptuns_lock); 1501 list_insert_head(&iptuns->iptuns_iptunlist, iptun); 1502 mutex_exit(&iptuns->iptuns_lock); 1503 iptun->iptun_flags |= IPTUN_HASH_INSERTED; 1504 } else if (mherr == MH_ERR_NOMEM) { 1505 err = ENOMEM; 1506 } else if (mherr == MH_ERR_DUPLICATE) { 1507 err = EEXIST; 1508 } else { 1509 err = EINVAL; 1510 } 1511 1512 done: 1513 if (iptun == NULL && ns != NULL) 1514 netstack_rele(ns); 1515 if (err != 0 && iptun != NULL) { 1516 if (link_created) { 1517 (void) dls_devnet_destroy(iptun->iptun_mh, &tmpid, 1518 B_TRUE); 1519 } 1520 iptun->iptun_flags |= IPTUN_CONDEMNED; 1521 iptun_free(iptun); 1522 } 1523 return (err); 1524 } 1525 1526 int 1527 iptun_delete(datalink_id_t linkid, cred_t *credp) 1528 { 1529 int err; 1530 iptun_t *iptun = NULL; 1531 1532 if ((err = iptun_enter_by_linkid(linkid, &iptun)) != 0) 1533 return (err); 1534 1535 /* One cannot delete a tunnel that belongs to another zone. */ 1536 if (iptun->iptun_zoneid != crgetzoneid(credp)) { 1537 iptun_exit(iptun); 1538 return (EACCES); 1539 } 1540 1541 /* 1542 * We need to exit iptun in order to issue calls up the stack such as 1543 * dls_devnet_destroy(). If we call up while still in iptun, deadlock 1544 * with calls coming down the stack is possible. We prevent other 1545 * threads from entering this iptun after we've exited it by setting 1546 * the IPTUN_DELETE_PENDING flag. This will cause callers of 1547 * iptun_enter() to block waiting on iptun_enter_cv. The assumption 1548 * here is that the functions we're calling while IPTUN_DELETE_PENDING 1549 * is set dont resuult in an iptun_enter() call, as that would result 1550 * in deadlock. 1551 */ 1552 iptun->iptun_flags |= IPTUN_DELETE_PENDING; 1553 1554 /* Wait for any pending upcall to the mac module to complete. */ 1555 while (iptun->iptun_flags & IPTUN_UPCALL_PENDING) 1556 cv_wait(&iptun->iptun_upcall_cv, &iptun->iptun_lock); 1557 1558 iptun_exit(iptun); 1559 1560 if ((err = dls_devnet_destroy(iptun->iptun_mh, &linkid, B_TRUE)) == 0) { 1561 /* 1562 * mac_disable() will fail with EBUSY if there are references 1563 * to the iptun MAC. If there are none, then mac_disable() 1564 * will assure that none can be acquired until the MAC is 1565 * unregistered. 1566 * 1567 * XXX CR 6791335 prevents us from calling mac_disable() prior 1568 * to dls_devnet_destroy(), so we unfortunately need to 1569 * attempt to re-create the devnet node if mac_disable() 1570 * fails. 1571 */ 1572 if ((err = mac_disable(iptun->iptun_mh)) != 0) { 1573 (void) dls_devnet_create(iptun->iptun_mh, linkid, 1574 iptun->iptun_zoneid); 1575 } 1576 } 1577 1578 /* 1579 * Now that we know the fate of this iptun_t, we need to clear 1580 * IPTUN_DELETE_PENDING, and set IPTUN_CONDEMNED if the iptun_t is 1581 * slated to be freed. Either way, we need to signal the threads 1582 * waiting in iptun_enter() so that they can either fail if 1583 * IPTUN_CONDEMNED is set, or continue if it's not. 1584 */ 1585 mutex_enter(&iptun->iptun_lock); 1586 iptun->iptun_flags &= ~IPTUN_DELETE_PENDING; 1587 if (err == 0) 1588 iptun->iptun_flags |= IPTUN_CONDEMNED; 1589 cv_broadcast(&iptun->iptun_enter_cv); 1590 mutex_exit(&iptun->iptun_lock); 1591 1592 /* 1593 * Note that there is no danger in calling iptun_free() after having 1594 * dropped the iptun_lock since callers of iptun_enter() at this point 1595 * are doing so from iptun_enter_by_linkid() (mac_disable() got rid of 1596 * threads entering from mac callbacks which call iptun_enter() 1597 * directly) which holds iptun_hash_lock, and iptun_free() grabs this 1598 * lock in order to remove the iptun_t from the hash table. 1599 */ 1600 if (err == 0) 1601 iptun_free(iptun); 1602 1603 return (err); 1604 } 1605 1606 int 1607 iptun_modify(const iptun_kparams_t *ik, cred_t *credp) 1608 { 1609 iptun_t *iptun; 1610 boolean_t laddr_change = B_FALSE, raddr_change = B_FALSE; 1611 int err; 1612 1613 if ((err = iptun_enter_by_linkid(ik->iptun_kparam_linkid, &iptun)) != 0) 1614 return (err); 1615 1616 /* One cannot modify a tunnel that belongs to another zone. */ 1617 if (iptun->iptun_zoneid != crgetzoneid(credp)) { 1618 err = EACCES; 1619 goto done; 1620 } 1621 1622 /* The tunnel type cannot be changed */ 1623 if (ik->iptun_kparam_flags & IPTUN_KPARAM_TYPE) { 1624 err = EINVAL; 1625 goto done; 1626 } 1627 1628 if ((err = iptun_setparams(iptun, ik)) != 0) 1629 goto done; 1630 iptun_headergen(iptun, B_FALSE); 1631 1632 /* 1633 * If any of the tunnel's addresses has been modified and the tunnel 1634 * has the necessary addresses assigned to it, we need to try to bind 1635 * with ip underneath us. If we're not ready to bind yet, then we'll 1636 * try again when the addresses are modified later. 1637 */ 1638 laddr_change = (ik->iptun_kparam_flags & IPTUN_KPARAM_LADDR); 1639 raddr_change = (ik->iptun_kparam_flags & IPTUN_KPARAM_RADDR); 1640 if (laddr_change || raddr_change) { 1641 if (iptun->iptun_flags & IPTUN_BOUND) 1642 iptun_unbind(iptun); 1643 if (iptun_canbind(iptun) && (err = iptun_bind(iptun)) != 0) { 1644 if (laddr_change) 1645 iptun->iptun_flags &= ~IPTUN_LADDR; 1646 if (raddr_change) 1647 iptun->iptun_flags &= ~IPTUN_RADDR; 1648 goto done; 1649 } 1650 } 1651 1652 if (laddr_change) 1653 iptun_task_dispatch(iptun, IPTUN_TASK_LADDR_UPDATE); 1654 if (raddr_change) 1655 iptun_task_dispatch(iptun, IPTUN_TASK_RADDR_UPDATE); 1656 1657 done: 1658 iptun_exit(iptun); 1659 return (err); 1660 } 1661 1662 /* Given an IP tunnel's datalink id, fill in its parameters. */ 1663 int 1664 iptun_info(iptun_kparams_t *ik, cred_t *credp) 1665 { 1666 iptun_t *iptun; 1667 int err; 1668 1669 /* Is the tunnel link visible from the caller's zone? */ 1670 if (!dls_devnet_islinkvisible(ik->iptun_kparam_linkid, 1671 crgetzoneid(credp))) 1672 return (ENOENT); 1673 1674 if ((err = iptun_enter_by_linkid(ik->iptun_kparam_linkid, &iptun)) != 0) 1675 return (err); 1676 1677 bzero(ik, sizeof (iptun_kparams_t)); 1678 1679 ik->iptun_kparam_linkid = iptun->iptun_linkid; 1680 ik->iptun_kparam_type = iptun->iptun_typeinfo->iti_type; 1681 ik->iptun_kparam_flags |= IPTUN_KPARAM_TYPE; 1682 1683 if (iptun->iptun_flags & IPTUN_LADDR) { 1684 iptun_getaddr(&iptun->iptun_laddr, &ik->iptun_kparam_laddr); 1685 ik->iptun_kparam_flags |= IPTUN_KPARAM_LADDR; 1686 } 1687 if (iptun->iptun_flags & IPTUN_RADDR) { 1688 iptun_getaddr(&iptun->iptun_raddr, &ik->iptun_kparam_raddr); 1689 ik->iptun_kparam_flags |= IPTUN_KPARAM_RADDR; 1690 } 1691 1692 if (iptun->iptun_flags & IPTUN_IMPLICIT) 1693 ik->iptun_kparam_flags |= IPTUN_KPARAM_IMPLICIT; 1694 1695 if (iptun->iptun_itp != NULL) { 1696 mutex_enter(&iptun->iptun_itp->itp_lock); 1697 if (iptun->iptun_itp->itp_flags & ITPF_P_ACTIVE) { 1698 ik->iptun_kparam_flags |= IPTUN_KPARAM_IPSECPOL; 1699 if (iptun->iptun_flags & IPTUN_SIMPLE_POLICY) { 1700 ik->iptun_kparam_flags |= IPTUN_KPARAM_SECINFO; 1701 ik->iptun_kparam_secinfo = 1702 iptun->iptun_simple_policy; 1703 } 1704 } 1705 mutex_exit(&iptun->iptun_itp->itp_lock); 1706 } 1707 1708 done: 1709 iptun_exit(iptun); 1710 return (err); 1711 } 1712 1713 int 1714 iptun_set_6to4relay(netstack_t *ns, ipaddr_t relay_addr) 1715 { 1716 if (relay_addr == INADDR_BROADCAST || CLASSD(relay_addr)) 1717 return (EADDRNOTAVAIL); 1718 ns->netstack_iptun->iptuns_relay_rtr_addr = relay_addr; 1719 return (0); 1720 } 1721 1722 void 1723 iptun_get_6to4relay(netstack_t *ns, ipaddr_t *relay_addr) 1724 { 1725 *relay_addr = ns->netstack_iptun->iptuns_relay_rtr_addr; 1726 } 1727 1728 void 1729 iptun_set_policy(datalink_id_t linkid, ipsec_tun_pol_t *itp) 1730 { 1731 iptun_t *iptun; 1732 1733 if (iptun_enter_by_linkid(linkid, &iptun) != 0) 1734 return; 1735 if (iptun->iptun_itp != itp) { 1736 ASSERT(iptun->iptun_itp == NULL); 1737 ITP_REFHOLD(itp); 1738 iptun->iptun_itp = itp; 1739 /* IPsec policy means IPsec overhead, which means lower MTU. */ 1740 (void) iptun_update_mtu(iptun, NULL, 0); 1741 } 1742 iptun_exit(iptun); 1743 } 1744 1745 /* 1746 * Obtain the path MTU to the tunnel destination. 1747 * Can return zero in some cases. 1748 */ 1749 static uint32_t 1750 iptun_get_dst_pmtu(iptun_t *iptun, ip_xmit_attr_t *ixa) 1751 { 1752 uint32_t pmtu = 0; 1753 conn_t *connp = iptun->iptun_connp; 1754 boolean_t need_rele = B_FALSE; 1755 1756 /* 1757 * We only obtain the pmtu for tunnels that have a remote tunnel 1758 * address. 1759 */ 1760 if (!(iptun->iptun_flags & IPTUN_RADDR)) 1761 return (0); 1762 1763 if (ixa == NULL) { 1764 ixa = conn_get_ixa(connp, B_FALSE); 1765 if (ixa == NULL) 1766 return (0); 1767 need_rele = B_TRUE; 1768 } 1769 /* 1770 * Guard against ICMP errors before we have sent, as well as against 1771 * and a thread which held conn_ixa. 1772 */ 1773 if (ixa->ixa_ire != NULL) { 1774 pmtu = ip_get_pmtu(ixa); 1775 1776 /* 1777 * For both IPv4 and IPv6 we can have indication that the outer 1778 * header needs fragmentation. 1779 */ 1780 if (ixa->ixa_flags & IXAF_PMTU_TOO_SMALL) { 1781 /* Must allow fragmentation in ip_output */ 1782 ixa->ixa_flags &= ~IXAF_DONTFRAG; 1783 } else if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_6TO4) { 1784 ixa->ixa_flags |= IXAF_DONTFRAG; 1785 } else { 1786 /* ip_get_pmtu might have set this - we don't want it */ 1787 ixa->ixa_flags &= ~IXAF_PMTU_IPV4_DF; 1788 } 1789 } 1790 1791 if (need_rele) 1792 ixa_refrele(ixa); 1793 return (pmtu); 1794 } 1795 1796 /* 1797 * Update the ip_xmit_attr_t to capture the current lower path mtu as known 1798 * by ip. 1799 */ 1800 static void 1801 iptun_update_dst_pmtu(iptun_t *iptun, ip_xmit_attr_t *ixa) 1802 { 1803 uint32_t pmtu; 1804 conn_t *connp = iptun->iptun_connp; 1805 boolean_t need_rele = B_FALSE; 1806 1807 /* IXAF_VERIFY_PMTU is not set if we don't have a fixed destination */ 1808 if (!(iptun->iptun_flags & IPTUN_RADDR)) 1809 return; 1810 1811 if (ixa == NULL) { 1812 ixa = conn_get_ixa(connp, B_FALSE); 1813 if (ixa == NULL) 1814 return; 1815 need_rele = B_TRUE; 1816 } 1817 /* 1818 * Guard against ICMP errors before we have sent, as well as against 1819 * and a thread which held conn_ixa. 1820 */ 1821 if (ixa->ixa_ire != NULL) { 1822 pmtu = ip_get_pmtu(ixa); 1823 /* 1824 * Update ixa_fragsize and ixa_pmtu. 1825 */ 1826 ixa->ixa_fragsize = ixa->ixa_pmtu = pmtu; 1827 1828 /* 1829 * For both IPv4 and IPv6 we can have indication that the outer 1830 * header needs fragmentation. 1831 */ 1832 if (ixa->ixa_flags & IXAF_PMTU_TOO_SMALL) { 1833 /* Must allow fragmentation in ip_output */ 1834 ixa->ixa_flags &= ~IXAF_DONTFRAG; 1835 } else if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_6TO4) { 1836 ixa->ixa_flags |= IXAF_DONTFRAG; 1837 } else { 1838 /* ip_get_pmtu might have set this - we don't want it */ 1839 ixa->ixa_flags &= ~IXAF_PMTU_IPV4_DF; 1840 } 1841 } 1842 1843 if (need_rele) 1844 ixa_refrele(ixa); 1845 } 1846 1847 /* 1848 * There is nothing that iptun can verify in addition to IP having 1849 * verified the IP addresses in the fanout. 1850 */ 1851 /* ARGSUSED */ 1852 static boolean_t 1853 iptun_verifyicmp(conn_t *connp, void *arg2, icmph_t *icmph, icmp6_t *icmp6, 1854 ip_recv_attr_t *ira) 1855 { 1856 return (B_TRUE); 1857 } 1858 1859 /* 1860 * Notify function registered with ip_xmit_attr_t. 1861 */ 1862 static void 1863 iptun_notify(void *arg, ip_xmit_attr_t *ixa, ixa_notify_type_t ntype, 1864 ixa_notify_arg_t narg) 1865 { 1866 iptun_t *iptun = (iptun_t *)arg; 1867 1868 switch (ntype) { 1869 case IXAN_PMTU: 1870 (void) iptun_update_mtu(iptun, ixa, narg); 1871 break; 1872 } 1873 } 1874 1875 /* 1876 * Returns the max of old_ovhd and the overhead associated with pol. 1877 */ 1878 static uint32_t 1879 iptun_max_policy_overhead(ipsec_policy_t *pol, uint32_t old_ovhd) 1880 { 1881 uint32_t new_ovhd = old_ovhd; 1882 1883 while (pol != NULL) { 1884 new_ovhd = max(new_ovhd, 1885 ipsec_act_ovhd(&pol->ipsp_act->ipa_act)); 1886 pol = pol->ipsp_hash.hash_next; 1887 } 1888 return (new_ovhd); 1889 } 1890 1891 static uint32_t 1892 iptun_get_ipsec_overhead(iptun_t *iptun) 1893 { 1894 ipsec_policy_root_t *ipr; 1895 ipsec_policy_head_t *iph; 1896 ipsec_policy_t *pol; 1897 ipsec_selector_t sel; 1898 int i; 1899 uint32_t ipsec_ovhd = 0; 1900 ipsec_tun_pol_t *itp = iptun->iptun_itp; 1901 netstack_t *ns = iptun->iptun_ns; 1902 1903 if (itp == NULL || !(itp->itp_flags & ITPF_P_ACTIVE)) { 1904 /* 1905 * Consult global policy, just in case. This will only work 1906 * if we have both source and destination addresses to work 1907 * with. 1908 */ 1909 if ((iptun->iptun_flags & (IPTUN_LADDR|IPTUN_RADDR)) != 1910 (IPTUN_LADDR|IPTUN_RADDR)) 1911 return (0); 1912 1913 iph = ipsec_system_policy(ns); 1914 bzero(&sel, sizeof (sel)); 1915 sel.ips_isv4 = 1916 (iptun->iptun_typeinfo->iti_ipvers == IPV4_VERSION); 1917 switch (iptun->iptun_typeinfo->iti_ipvers) { 1918 case IPV4_VERSION: 1919 sel.ips_local_addr_v4 = iptun->iptun_laddr4; 1920 sel.ips_remote_addr_v4 = iptun->iptun_raddr4; 1921 break; 1922 case IPV6_VERSION: 1923 sel.ips_local_addr_v6 = iptun->iptun_laddr6; 1924 sel.ips_remote_addr_v6 = iptun->iptun_raddr6; 1925 break; 1926 } 1927 /* Check for both IPv4 and IPv6. */ 1928 sel.ips_protocol = IPPROTO_ENCAP; 1929 pol = ipsec_find_policy_head(NULL, iph, IPSEC_TYPE_OUTBOUND, 1930 &sel); 1931 if (pol != NULL) { 1932 ipsec_ovhd = ipsec_act_ovhd(&pol->ipsp_act->ipa_act); 1933 IPPOL_REFRELE(pol); 1934 } 1935 sel.ips_protocol = IPPROTO_IPV6; 1936 pol = ipsec_find_policy_head(NULL, iph, IPSEC_TYPE_OUTBOUND, 1937 &sel); 1938 if (pol != NULL) { 1939 ipsec_ovhd = max(ipsec_ovhd, 1940 ipsec_act_ovhd(&pol->ipsp_act->ipa_act)); 1941 IPPOL_REFRELE(pol); 1942 } 1943 IPPH_REFRELE(iph, ns); 1944 } else { 1945 /* 1946 * Look through all of the possible IPsec actions for the 1947 * tunnel, and find the largest potential IPsec overhead. 1948 */ 1949 iph = itp->itp_policy; 1950 rw_enter(&iph->iph_lock, RW_READER); 1951 ipr = &(iph->iph_root[IPSEC_TYPE_OUTBOUND]); 1952 ipsec_ovhd = iptun_max_policy_overhead( 1953 ipr->ipr_nonhash[IPSEC_AF_V4], 0); 1954 ipsec_ovhd = iptun_max_policy_overhead( 1955 ipr->ipr_nonhash[IPSEC_AF_V6], ipsec_ovhd); 1956 for (i = 0; i < ipr->ipr_nchains; i++) { 1957 ipsec_ovhd = iptun_max_policy_overhead( 1958 ipr->ipr_hash[i].hash_head, ipsec_ovhd); 1959 } 1960 rw_exit(&iph->iph_lock); 1961 } 1962 1963 return (ipsec_ovhd); 1964 } 1965 1966 /* 1967 * Calculate and return the maximum possible upper MTU for the given tunnel. 1968 * 1969 * If new_pmtu is set then we also need to update the lower path MTU information 1970 * in the ip_xmit_attr_t. That is needed since we set IXAF_VERIFY_PMTU so that 1971 * we are notified by conn_ip_output() when the path MTU increases. 1972 */ 1973 static uint32_t 1974 iptun_get_maxmtu(iptun_t *iptun, ip_xmit_attr_t *ixa, uint32_t new_pmtu) 1975 { 1976 size_t header_size, ipsec_overhead; 1977 uint32_t maxmtu, pmtu; 1978 1979 /* 1980 * Start with the path-MTU to the remote address, which is either 1981 * provided as the new_pmtu argument, or obtained using 1982 * iptun_get_dst_pmtu(). 1983 */ 1984 if (new_pmtu != 0) { 1985 if (iptun->iptun_flags & IPTUN_RADDR) 1986 iptun->iptun_dpmtu = new_pmtu; 1987 pmtu = new_pmtu; 1988 } else if (iptun->iptun_flags & IPTUN_RADDR) { 1989 if ((pmtu = iptun_get_dst_pmtu(iptun, ixa)) == 0) { 1990 /* 1991 * We weren't able to obtain the path-MTU of the 1992 * destination. Use the previous value. 1993 */ 1994 pmtu = iptun->iptun_dpmtu; 1995 } else { 1996 iptun->iptun_dpmtu = pmtu; 1997 } 1998 } else { 1999 /* 2000 * We have no path-MTU information to go on, use the maximum 2001 * possible value. 2002 */ 2003 pmtu = iptun->iptun_typeinfo->iti_maxmtu; 2004 } 2005 2006 /* 2007 * Now calculate tunneling overhead and subtract that from the 2008 * path-MTU information obtained above. 2009 */ 2010 if (iptun->iptun_header_size != 0) { 2011 header_size = iptun->iptun_header_size; 2012 } else { 2013 switch (iptun->iptun_typeinfo->iti_ipvers) { 2014 case IPV4_VERSION: 2015 header_size = sizeof (ipha_t); 2016 if (is_system_labeled()) 2017 header_size += IP_MAX_OPT_LENGTH; 2018 break; 2019 case IPV6_VERSION: 2020 header_size = sizeof (iptun_ipv6hdrs_t); 2021 break; 2022 } 2023 } 2024 2025 ipsec_overhead = iptun_get_ipsec_overhead(iptun); 2026 2027 maxmtu = pmtu - (header_size + ipsec_overhead); 2028 return (max(maxmtu, iptun->iptun_typeinfo->iti_minmtu)); 2029 } 2030 2031 /* 2032 * Re-calculate the tunnel's MTU as seen from above and notify the MAC layer 2033 * of any change in MTU. The new_pmtu argument is the new lower path MTU to 2034 * the tunnel destination to be used in the tunnel MTU calculation. Passing 2035 * in 0 for new_pmtu causes the lower path MTU to be dynamically updated using 2036 * ip_get_pmtu(). 2037 * 2038 * If the calculated tunnel MTU is different than its previous value, then we 2039 * notify the MAC layer above us of this change using mac_maxsdu_update(). 2040 */ 2041 static uint32_t 2042 iptun_update_mtu(iptun_t *iptun, ip_xmit_attr_t *ixa, uint32_t new_pmtu) 2043 { 2044 uint32_t newmtu; 2045 2046 /* We always update the ixa since we might have set IXAF_VERIFY_PMTU */ 2047 iptun_update_dst_pmtu(iptun, ixa); 2048 2049 /* 2050 * We return the current MTU without updating it if it was pegged to a 2051 * static value using the MAC_PROP_MTU link property. 2052 */ 2053 if (iptun->iptun_flags & IPTUN_FIXED_MTU) 2054 return (iptun->iptun_mtu); 2055 2056 /* If the MTU isn't fixed, then use the maximum possible value. */ 2057 newmtu = iptun_get_maxmtu(iptun, ixa, new_pmtu); 2058 /* 2059 * We only dynamically adjust the tunnel MTU for tunnels with 2060 * destinations because dynamic MTU calculations are based on the 2061 * destination path-MTU. 2062 */ 2063 if ((iptun->iptun_flags & IPTUN_RADDR) && newmtu != iptun->iptun_mtu) { 2064 iptun->iptun_mtu = newmtu; 2065 if (iptun->iptun_flags & IPTUN_MAC_REGISTERED) 2066 iptun_task_dispatch(iptun, IPTUN_TASK_MTU_UPDATE); 2067 } 2068 2069 return (newmtu); 2070 } 2071 2072 /* 2073 * Frees a packet or packet chain and bumps stat for each freed packet. 2074 */ 2075 static void 2076 iptun_drop_pkt(mblk_t *mp, uint64_t *stat) 2077 { 2078 mblk_t *pktmp; 2079 2080 for (pktmp = mp; pktmp != NULL; pktmp = mp) { 2081 mp = mp->b_next; 2082 pktmp->b_next = NULL; 2083 if (stat != NULL) 2084 atomic_inc_64(stat); 2085 freemsg(pktmp); 2086 } 2087 } 2088 2089 /* 2090 * Allocate and return a new mblk to hold an IP and ICMP header, and chain the 2091 * original packet to its b_cont. Returns NULL on failure. 2092 */ 2093 static mblk_t * 2094 iptun_build_icmperr(size_t hdrs_size, mblk_t *orig_pkt) 2095 { 2096 mblk_t *icmperr_mp; 2097 2098 if ((icmperr_mp = allocb(hdrs_size, BPRI_MED)) != NULL) { 2099 icmperr_mp->b_wptr += hdrs_size; 2100 /* tack on the offending packet */ 2101 icmperr_mp->b_cont = orig_pkt; 2102 } 2103 return (icmperr_mp); 2104 } 2105 2106 /* 2107 * Transmit an ICMP error. mp->b_rptr points at the packet to be included in 2108 * the ICMP error. 2109 */ 2110 static void 2111 iptun_sendicmp_v4(iptun_t *iptun, icmph_t *icmp, ipha_t *orig_ipha, mblk_t *mp, 2112 ts_label_t *tsl) 2113 { 2114 size_t orig_pktsize, hdrs_size; 2115 mblk_t *icmperr_mp; 2116 ipha_t *new_ipha; 2117 icmph_t *new_icmp; 2118 ip_xmit_attr_t ixas; 2119 conn_t *connp = iptun->iptun_connp; 2120 2121 orig_pktsize = msgdsize(mp); 2122 hdrs_size = sizeof (ipha_t) + sizeof (icmph_t); 2123 if ((icmperr_mp = iptun_build_icmperr(hdrs_size, mp)) == NULL) { 2124 iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf); 2125 return; 2126 } 2127 2128 new_ipha = (ipha_t *)icmperr_mp->b_rptr; 2129 new_icmp = (icmph_t *)(new_ipha + 1); 2130 2131 new_ipha->ipha_version_and_hdr_length = IP_SIMPLE_HDR_VERSION; 2132 new_ipha->ipha_type_of_service = 0; 2133 new_ipha->ipha_ident = 0; 2134 new_ipha->ipha_fragment_offset_and_flags = 0; 2135 new_ipha->ipha_ttl = orig_ipha->ipha_ttl; 2136 new_ipha->ipha_protocol = IPPROTO_ICMP; 2137 new_ipha->ipha_src = orig_ipha->ipha_dst; 2138 new_ipha->ipha_dst = orig_ipha->ipha_src; 2139 new_ipha->ipha_hdr_checksum = 0; /* will be computed by ip */ 2140 new_ipha->ipha_length = htons(hdrs_size + orig_pktsize); 2141 2142 *new_icmp = *icmp; 2143 new_icmp->icmph_checksum = 0; 2144 new_icmp->icmph_checksum = IP_CSUM(icmperr_mp, sizeof (ipha_t), 0); 2145 2146 bzero(&ixas, sizeof (ixas)); 2147 ixas.ixa_flags = IXAF_BASIC_SIMPLE_V4; 2148 if (new_ipha->ipha_src == INADDR_ANY) 2149 ixas.ixa_flags |= IXAF_SET_SOURCE; 2150 2151 ixas.ixa_zoneid = IPCL_ZONEID(connp); 2152 ixas.ixa_ipst = connp->conn_netstack->netstack_ip; 2153 ixas.ixa_cred = connp->conn_cred; 2154 ixas.ixa_cpid = NOPID; 2155 if (is_system_labeled()) 2156 ixas.ixa_tsl = tsl; 2157 2158 ixas.ixa_ifindex = 0; 2159 ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 2160 2161 (void) ip_output_simple(icmperr_mp, &ixas); 2162 ixa_cleanup(&ixas); 2163 } 2164 2165 static void 2166 iptun_sendicmp_v6(iptun_t *iptun, icmp6_t *icmp6, ip6_t *orig_ip6h, mblk_t *mp, 2167 ts_label_t *tsl) 2168 { 2169 size_t orig_pktsize, hdrs_size; 2170 mblk_t *icmp6err_mp; 2171 ip6_t *new_ip6h; 2172 icmp6_t *new_icmp6; 2173 ip_xmit_attr_t ixas; 2174 conn_t *connp = iptun->iptun_connp; 2175 2176 orig_pktsize = msgdsize(mp); 2177 hdrs_size = sizeof (ip6_t) + sizeof (icmp6_t); 2178 if ((icmp6err_mp = iptun_build_icmperr(hdrs_size, mp)) == NULL) { 2179 iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf); 2180 return; 2181 } 2182 2183 new_ip6h = (ip6_t *)icmp6err_mp->b_rptr; 2184 new_icmp6 = (icmp6_t *)(new_ip6h + 1); 2185 2186 new_ip6h->ip6_vcf = orig_ip6h->ip6_vcf; 2187 new_ip6h->ip6_plen = htons(sizeof (icmp6_t) + orig_pktsize); 2188 new_ip6h->ip6_hops = orig_ip6h->ip6_hops; 2189 new_ip6h->ip6_nxt = IPPROTO_ICMPV6; 2190 new_ip6h->ip6_src = orig_ip6h->ip6_dst; 2191 new_ip6h->ip6_dst = orig_ip6h->ip6_src; 2192 2193 *new_icmp6 = *icmp6; 2194 /* The checksum is calculated in ip_output_simple and friends. */ 2195 new_icmp6->icmp6_cksum = new_ip6h->ip6_plen; 2196 2197 bzero(&ixas, sizeof (ixas)); 2198 ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6; 2199 if (IN6_IS_ADDR_UNSPECIFIED(&new_ip6h->ip6_src)) 2200 ixas.ixa_flags |= IXAF_SET_SOURCE; 2201 2202 ixas.ixa_zoneid = IPCL_ZONEID(connp); 2203 ixas.ixa_ipst = connp->conn_netstack->netstack_ip; 2204 ixas.ixa_cred = connp->conn_cred; 2205 ixas.ixa_cpid = NOPID; 2206 if (is_system_labeled()) 2207 ixas.ixa_tsl = tsl; 2208 2209 ixas.ixa_ifindex = 0; 2210 ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 2211 2212 (void) ip_output_simple(icmp6err_mp, &ixas); 2213 ixa_cleanup(&ixas); 2214 } 2215 2216 static void 2217 iptun_icmp_error_v4(iptun_t *iptun, ipha_t *orig_ipha, mblk_t *mp, 2218 uint8_t type, uint8_t code, ts_label_t *tsl) 2219 { 2220 icmph_t icmp; 2221 2222 bzero(&icmp, sizeof (icmp)); 2223 icmp.icmph_type = type; 2224 icmp.icmph_code = code; 2225 2226 iptun_sendicmp_v4(iptun, &icmp, orig_ipha, mp, tsl); 2227 } 2228 2229 static void 2230 iptun_icmp_fragneeded_v4(iptun_t *iptun, uint32_t newmtu, ipha_t *orig_ipha, 2231 mblk_t *mp, ts_label_t *tsl) 2232 { 2233 icmph_t icmp; 2234 2235 icmp.icmph_type = ICMP_DEST_UNREACHABLE; 2236 icmp.icmph_code = ICMP_FRAGMENTATION_NEEDED; 2237 icmp.icmph_du_zero = 0; 2238 icmp.icmph_du_mtu = htons(newmtu); 2239 2240 iptun_sendicmp_v4(iptun, &icmp, orig_ipha, mp, tsl); 2241 } 2242 2243 static void 2244 iptun_icmp_error_v6(iptun_t *iptun, ip6_t *orig_ip6h, mblk_t *mp, 2245 uint8_t type, uint8_t code, uint32_t offset, ts_label_t *tsl) 2246 { 2247 icmp6_t icmp6; 2248 2249 bzero(&icmp6, sizeof (icmp6)); 2250 icmp6.icmp6_type = type; 2251 icmp6.icmp6_code = code; 2252 if (type == ICMP6_PARAM_PROB) 2253 icmp6.icmp6_pptr = htonl(offset); 2254 2255 iptun_sendicmp_v6(iptun, &icmp6, orig_ip6h, mp, tsl); 2256 } 2257 2258 static void 2259 iptun_icmp_toobig_v6(iptun_t *iptun, uint32_t newmtu, ip6_t *orig_ip6h, 2260 mblk_t *mp, ts_label_t *tsl) 2261 { 2262 icmp6_t icmp6; 2263 2264 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; 2265 icmp6.icmp6_code = 0; 2266 icmp6.icmp6_mtu = htonl(newmtu); 2267 2268 iptun_sendicmp_v6(iptun, &icmp6, orig_ip6h, mp, tsl); 2269 } 2270 2271 /* 2272 * Determines if the packet pointed to by ipha or ip6h is an ICMP error. The 2273 * mp argument is only used to do bounds checking. 2274 */ 2275 static boolean_t 2276 is_icmp_error(mblk_t *mp, ipha_t *ipha, ip6_t *ip6h) 2277 { 2278 uint16_t hlen; 2279 2280 if (ipha != NULL) { 2281 icmph_t *icmph; 2282 2283 ASSERT(ip6h == NULL); 2284 if (ipha->ipha_protocol != IPPROTO_ICMP) 2285 return (B_FALSE); 2286 2287 hlen = IPH_HDR_LENGTH(ipha); 2288 icmph = (icmph_t *)((uint8_t *)ipha + hlen); 2289 return (ICMP_IS_ERROR(icmph->icmph_type) || 2290 icmph->icmph_type == ICMP_REDIRECT); 2291 } else { 2292 icmp6_t *icmp6; 2293 uint8_t *nexthdrp; 2294 2295 ASSERT(ip6h != NULL); 2296 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hlen, &nexthdrp) || 2297 *nexthdrp != IPPROTO_ICMPV6) { 2298 return (B_FALSE); 2299 } 2300 2301 icmp6 = (icmp6_t *)((uint8_t *)ip6h + hlen); 2302 return (ICMP6_IS_ERROR(icmp6->icmp6_type) || 2303 icmp6->icmp6_type == ND_REDIRECT); 2304 } 2305 } 2306 2307 /* 2308 * Find inner and outer IP headers from a tunneled packet as setup for calls 2309 * into ipsec_tun_{in,out}bound(). 2310 * Note that we need to allow the outer header to be in a separate mblk from 2311 * the inner header. 2312 * If the caller knows the outer_hlen, the caller passes it in. Otherwise zero. 2313 */ 2314 static size_t 2315 iptun_find_headers(mblk_t *mp, size_t outer_hlen, ipha_t **outer4, 2316 ipha_t **inner4, ip6_t **outer6, ip6_t **inner6) 2317 { 2318 ipha_t *ipha; 2319 size_t first_mblkl = MBLKL(mp); 2320 mblk_t *inner_mp; 2321 2322 /* 2323 * Don't bother handling packets that don't have a full IP header in 2324 * the fist mblk. For the input path, the ip module ensures that this 2325 * won't happen, and on the output path, the IP tunneling MAC-type 2326 * plugins ensure that this also won't happen. 2327 */ 2328 if (first_mblkl < sizeof (ipha_t)) 2329 return (0); 2330 ipha = (ipha_t *)(mp->b_rptr); 2331 switch (IPH_HDR_VERSION(ipha)) { 2332 case IPV4_VERSION: 2333 *outer4 = ipha; 2334 *outer6 = NULL; 2335 if (outer_hlen == 0) 2336 outer_hlen = IPH_HDR_LENGTH(ipha); 2337 break; 2338 case IPV6_VERSION: 2339 *outer4 = NULL; 2340 *outer6 = (ip6_t *)ipha; 2341 if (outer_hlen == 0) 2342 outer_hlen = ip_hdr_length_v6(mp, (ip6_t *)ipha); 2343 break; 2344 default: 2345 return (0); 2346 } 2347 2348 if (first_mblkl < outer_hlen || 2349 (first_mblkl == outer_hlen && mp->b_cont == NULL)) 2350 return (0); 2351 2352 /* 2353 * We don't bother doing a pullup here since the outer header will 2354 * just get stripped off soon on input anyway. We just want to ensure 2355 * that the inner* pointer points to a full header. 2356 */ 2357 if (first_mblkl == outer_hlen) { 2358 inner_mp = mp->b_cont; 2359 ipha = (ipha_t *)inner_mp->b_rptr; 2360 } else { 2361 inner_mp = mp; 2362 ipha = (ipha_t *)(mp->b_rptr + outer_hlen); 2363 } 2364 switch (IPH_HDR_VERSION(ipha)) { 2365 case IPV4_VERSION: 2366 if (inner_mp->b_wptr - (uint8_t *)ipha < sizeof (ipha_t)) 2367 return (0); 2368 *inner4 = ipha; 2369 *inner6 = NULL; 2370 break; 2371 case IPV6_VERSION: 2372 if (inner_mp->b_wptr - (uint8_t *)ipha < sizeof (ip6_t)) 2373 return (0); 2374 *inner4 = NULL; 2375 *inner6 = (ip6_t *)ipha; 2376 break; 2377 default: 2378 return (0); 2379 } 2380 2381 return (outer_hlen); 2382 } 2383 2384 /* 2385 * Received ICMP error in response to an X over IPv4 packet that we 2386 * transmitted. 2387 * 2388 * NOTE: "outer" refers to what's inside the ICMP payload. We will get one of 2389 * the following: 2390 * 2391 * [IPv4(0)][ICMPv4][IPv4(1)][IPv4(2)][ULP] 2392 * 2393 * or 2394 * 2395 * [IPv4(0)][ICMPv4][IPv4(1)][IPv6][ULP] 2396 * 2397 * And "outer4" will get set to IPv4(1), and inner[46] will correspond to 2398 * whatever the very-inner packet is (IPv4(2) or IPv6). 2399 */ 2400 static void 2401 iptun_input_icmp_v4(iptun_t *iptun, mblk_t *data_mp, icmph_t *icmph, 2402 ip_recv_attr_t *ira) 2403 { 2404 uint8_t *orig; 2405 ipha_t *outer4, *inner4; 2406 ip6_t *outer6, *inner6; 2407 int outer_hlen; 2408 uint8_t type, code; 2409 2410 ASSERT(data_mp->b_cont == NULL); 2411 /* 2412 * Temporarily move b_rptr forward so that iptun_find_headers() can 2413 * find headers in the ICMP packet payload. 2414 */ 2415 orig = data_mp->b_rptr; 2416 data_mp->b_rptr = (uint8_t *)(icmph + 1); 2417 /* 2418 * The ip module ensures that ICMP errors contain at least the 2419 * original IP header (otherwise, the error would never have made it 2420 * here). 2421 */ 2422 ASSERT(MBLKL(data_mp) >= 0); 2423 outer_hlen = iptun_find_headers(data_mp, 0, &outer4, &inner4, &outer6, 2424 &inner6); 2425 ASSERT(outer6 == NULL); 2426 data_mp->b_rptr = orig; 2427 if (outer_hlen == 0) { 2428 iptun_drop_pkt(data_mp, &iptun->iptun_ierrors); 2429 return; 2430 } 2431 2432 /* Only ICMP errors due to tunneled packets should reach here. */ 2433 ASSERT(outer4->ipha_protocol == IPPROTO_ENCAP || 2434 outer4->ipha_protocol == IPPROTO_IPV6); 2435 2436 data_mp = ipsec_tun_inbound(ira, data_mp, iptun->iptun_itp, 2437 inner4, inner6, outer4, outer6, -outer_hlen, iptun->iptun_ns); 2438 if (data_mp == NULL) { 2439 /* Callee did all of the freeing. */ 2440 atomic_inc_64(&iptun->iptun_ierrors); 2441 return; 2442 } 2443 /* We should never see reassembled fragment here. */ 2444 ASSERT(data_mp->b_next == NULL); 2445 2446 data_mp->b_rptr = (uint8_t *)outer4 + outer_hlen; 2447 2448 /* 2449 * If the original packet being transmitted was itself an ICMP error, 2450 * then drop this packet. We don't want to generate an ICMP error in 2451 * response to an ICMP error. 2452 */ 2453 if (is_icmp_error(data_mp, inner4, inner6)) { 2454 iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf); 2455 return; 2456 } 2457 2458 switch (icmph->icmph_type) { 2459 case ICMP_DEST_UNREACHABLE: 2460 type = (inner4 != NULL ? icmph->icmph_type : ICMP6_DST_UNREACH); 2461 switch (icmph->icmph_code) { 2462 case ICMP_FRAGMENTATION_NEEDED: { 2463 uint32_t newmtu; 2464 2465 /* 2466 * We reconcile this with the fact that the tunnel may 2467 * also have IPsec policy by letting iptun_update_mtu 2468 * take care of it. 2469 */ 2470 newmtu = iptun_update_mtu(iptun, NULL, 2471 ntohs(icmph->icmph_du_mtu)); 2472 2473 if (inner4 != NULL) { 2474 iptun_icmp_fragneeded_v4(iptun, newmtu, inner4, 2475 data_mp, ira->ira_tsl); 2476 } else { 2477 iptun_icmp_toobig_v6(iptun, newmtu, inner6, 2478 data_mp, ira->ira_tsl); 2479 } 2480 return; 2481 } 2482 case ICMP_DEST_NET_UNREACH_ADMIN: 2483 case ICMP_DEST_HOST_UNREACH_ADMIN: 2484 code = (inner4 != NULL ? ICMP_DEST_NET_UNREACH_ADMIN : 2485 ICMP6_DST_UNREACH_ADMIN); 2486 break; 2487 default: 2488 code = (inner4 != NULL ? ICMP_HOST_UNREACHABLE : 2489 ICMP6_DST_UNREACH_ADDR); 2490 break; 2491 } 2492 break; 2493 case ICMP_TIME_EXCEEDED: 2494 if (inner6 != NULL) { 2495 type = ICMP6_TIME_EXCEEDED; 2496 code = 0; 2497 } /* else we're already set. */ 2498 break; 2499 case ICMP_PARAM_PROBLEM: 2500 /* 2501 * This is a problem with the outer header we transmitted. 2502 * Treat this as an output error. 2503 */ 2504 iptun_drop_pkt(data_mp, &iptun->iptun_oerrors); 2505 return; 2506 default: 2507 iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf); 2508 return; 2509 } 2510 2511 if (inner4 != NULL) { 2512 iptun_icmp_error_v4(iptun, inner4, data_mp, type, code, 2513 ira->ira_tsl); 2514 } else { 2515 iptun_icmp_error_v6(iptun, inner6, data_mp, type, code, 0, 2516 ira->ira_tsl); 2517 } 2518 } 2519 2520 /* 2521 * Return B_TRUE if the IPv6 packet pointed to by ip6h contains a Tunnel 2522 * Encapsulation Limit destination option. If there is one, set encaplim_ptr 2523 * to point to the option value. 2524 */ 2525 static boolean_t 2526 iptun_find_encaplimit(mblk_t *mp, ip6_t *ip6h, uint8_t **encaplim_ptr) 2527 { 2528 ip_pkt_t pkt; 2529 uint8_t *endptr; 2530 ip6_dest_t *destp; 2531 struct ip6_opt *optp; 2532 2533 pkt.ipp_fields = 0; /* must be initialized */ 2534 (void) ip_find_hdr_v6(mp, ip6h, B_FALSE, &pkt, NULL); 2535 if ((pkt.ipp_fields & IPPF_DSTOPTS) != 0) { 2536 destp = pkt.ipp_dstopts; 2537 } else if ((pkt.ipp_fields & IPPF_RTHDRDSTOPTS) != 0) { 2538 destp = pkt.ipp_rthdrdstopts; 2539 } else { 2540 return (B_FALSE); 2541 } 2542 2543 endptr = (uint8_t *)destp + 8 * (destp->ip6d_len + 1); 2544 optp = (struct ip6_opt *)(destp + 1); 2545 while (endptr - (uint8_t *)optp > sizeof (*optp)) { 2546 if (optp->ip6o_type == IP6OPT_TUNNEL_LIMIT) { 2547 if ((uint8_t *)(optp + 1) >= endptr) 2548 return (B_FALSE); 2549 *encaplim_ptr = (uint8_t *)&optp[1]; 2550 return (B_TRUE); 2551 } 2552 optp = (struct ip6_opt *)((uint8_t *)optp + optp->ip6o_len + 2); 2553 } 2554 return (B_FALSE); 2555 } 2556 2557 /* 2558 * Received ICMPv6 error in response to an X over IPv6 packet that we 2559 * transmitted. 2560 * 2561 * NOTE: "outer" refers to what's inside the ICMP payload. We will get one of 2562 * the following: 2563 * 2564 * [IPv6(0)][ICMPv6][IPv6(1)][IPv4][ULP] 2565 * 2566 * or 2567 * 2568 * [IPv6(0)][ICMPv6][IPv6(1)][IPv6(2)][ULP] 2569 * 2570 * And "outer6" will get set to IPv6(1), and inner[46] will correspond to 2571 * whatever the very-inner packet is (IPv4 or IPv6(2)). 2572 */ 2573 static void 2574 iptun_input_icmp_v6(iptun_t *iptun, mblk_t *data_mp, icmp6_t *icmp6h, 2575 ip_recv_attr_t *ira) 2576 { 2577 uint8_t *orig; 2578 ipha_t *outer4, *inner4; 2579 ip6_t *outer6, *inner6; 2580 int outer_hlen; 2581 uint8_t type, code; 2582 2583 ASSERT(data_mp->b_cont == NULL); 2584 2585 /* 2586 * Temporarily move b_rptr forward so that iptun_find_headers() can 2587 * find IP headers in the ICMP packet payload. 2588 */ 2589 orig = data_mp->b_rptr; 2590 data_mp->b_rptr = (uint8_t *)(icmp6h + 1); 2591 /* 2592 * The ip module ensures that ICMP errors contain at least the 2593 * original IP header (otherwise, the error would never have made it 2594 * here). 2595 */ 2596 ASSERT(MBLKL(data_mp) >= 0); 2597 outer_hlen = iptun_find_headers(data_mp, 0, &outer4, &inner4, &outer6, 2598 &inner6); 2599 ASSERT(outer4 == NULL); 2600 data_mp->b_rptr = orig; /* Restore r_ptr */ 2601 if (outer_hlen == 0) { 2602 iptun_drop_pkt(data_mp, &iptun->iptun_ierrors); 2603 return; 2604 } 2605 2606 data_mp = ipsec_tun_inbound(ira, data_mp, iptun->iptun_itp, 2607 inner4, inner6, outer4, outer6, -outer_hlen, iptun->iptun_ns); 2608 if (data_mp == NULL) { 2609 /* Callee did all of the freeing. */ 2610 atomic_inc_64(&iptun->iptun_ierrors); 2611 return; 2612 } 2613 /* We should never see reassembled fragment here. */ 2614 ASSERT(data_mp->b_next == NULL); 2615 2616 data_mp->b_rptr = (uint8_t *)outer6 + outer_hlen; 2617 2618 /* 2619 * If the original packet being transmitted was itself an ICMP error, 2620 * then drop this packet. We don't want to generate an ICMP error in 2621 * response to an ICMP error. 2622 */ 2623 if (is_icmp_error(data_mp, inner4, inner6)) { 2624 iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf); 2625 return; 2626 } 2627 2628 switch (icmp6h->icmp6_type) { 2629 case ICMP6_PARAM_PROB: { 2630 uint8_t *encaplim_ptr; 2631 2632 /* 2633 * If the ICMPv6 error points to a valid Tunnel Encapsulation 2634 * Limit option and the limit value is 0, then fall through 2635 * and send a host unreachable message. Otherwise, treat the 2636 * error as an output error, as there must have been a problem 2637 * with a packet we sent. 2638 */ 2639 if (!iptun_find_encaplimit(data_mp, outer6, &encaplim_ptr) || 2640 (icmp6h->icmp6_pptr != 2641 ((ptrdiff_t)encaplim_ptr - (ptrdiff_t)outer6)) || 2642 *encaplim_ptr != 0) { 2643 iptun_drop_pkt(data_mp, &iptun->iptun_oerrors); 2644 return; 2645 } 2646 /* FALLTHRU */ 2647 } 2648 case ICMP6_TIME_EXCEEDED: 2649 case ICMP6_DST_UNREACH: 2650 type = (inner4 != NULL ? ICMP_DEST_UNREACHABLE : 2651 ICMP6_DST_UNREACH); 2652 code = (inner4 != NULL ? ICMP_HOST_UNREACHABLE : 2653 ICMP6_DST_UNREACH_ADDR); 2654 break; 2655 case ICMP6_PACKET_TOO_BIG: { 2656 uint32_t newmtu; 2657 2658 /* 2659 * We reconcile this with the fact that the tunnel may also 2660 * have IPsec policy by letting iptun_update_mtu take care of 2661 * it. 2662 */ 2663 newmtu = iptun_update_mtu(iptun, NULL, 2664 ntohl(icmp6h->icmp6_mtu)); 2665 2666 if (inner4 != NULL) { 2667 iptun_icmp_fragneeded_v4(iptun, newmtu, inner4, 2668 data_mp, ira->ira_tsl); 2669 } else { 2670 iptun_icmp_toobig_v6(iptun, newmtu, inner6, data_mp, 2671 ira->ira_tsl); 2672 } 2673 return; 2674 } 2675 default: 2676 iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf); 2677 return; 2678 } 2679 2680 if (inner4 != NULL) { 2681 iptun_icmp_error_v4(iptun, inner4, data_mp, type, code, 2682 ira->ira_tsl); 2683 } else { 2684 iptun_icmp_error_v6(iptun, inner6, data_mp, type, code, 0, 2685 ira->ira_tsl); 2686 } 2687 } 2688 2689 /* 2690 * Called as conn_recvicmp from IP for ICMP errors. 2691 */ 2692 /* ARGSUSED2 */ 2693 static void 2694 iptun_input_icmp(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *ira) 2695 { 2696 conn_t *connp = arg; 2697 iptun_t *iptun = connp->conn_iptun; 2698 mblk_t *tmpmp; 2699 size_t hlen; 2700 2701 ASSERT(IPCL_IS_IPTUN(connp)); 2702 2703 if (mp->b_cont != NULL) { 2704 /* 2705 * Since ICMP error processing necessitates access to bits 2706 * that are within the ICMP error payload (the original packet 2707 * that caused the error), pull everything up into a single 2708 * block for convenience. 2709 */ 2710 if ((tmpmp = msgpullup(mp, -1)) == NULL) { 2711 iptun_drop_pkt(mp, &iptun->iptun_norcvbuf); 2712 return; 2713 } 2714 freemsg(mp); 2715 mp = tmpmp; 2716 } 2717 2718 hlen = ira->ira_ip_hdr_length; 2719 switch (iptun->iptun_typeinfo->iti_ipvers) { 2720 case IPV4_VERSION: 2721 /* 2722 * The outer IP header coming up from IP is always ipha_t 2723 * alligned (otherwise, we would have crashed in ip). 2724 */ 2725 iptun_input_icmp_v4(iptun, mp, (icmph_t *)(mp->b_rptr + hlen), 2726 ira); 2727 break; 2728 case IPV6_VERSION: 2729 iptun_input_icmp_v6(iptun, mp, (icmp6_t *)(mp->b_rptr + hlen), 2730 ira); 2731 break; 2732 } 2733 } 2734 2735 static boolean_t 2736 iptun_in_6to4_ok(iptun_t *iptun, ipha_t *outer4, ip6_t *inner6) 2737 { 2738 ipaddr_t v4addr; 2739 2740 /* 2741 * It's possible that someone sent us an IPv4-in-IPv4 packet with the 2742 * IPv4 address of a 6to4 tunnel as the destination. 2743 */ 2744 if (inner6 == NULL) 2745 return (B_FALSE); 2746 2747 /* 2748 * Make sure that the IPv6 destination is within the site that this 2749 * 6to4 tunnel is routing for. We don't want people bouncing random 2750 * tunneled IPv6 packets through this 6to4 router. 2751 */ 2752 IN6_6TO4_TO_V4ADDR(&inner6->ip6_dst, (struct in_addr *)&v4addr); 2753 if (outer4->ipha_dst != v4addr) 2754 return (B_FALSE); 2755 2756 if (IN6_IS_ADDR_6TO4(&inner6->ip6_src)) { 2757 /* 2758 * Section 9 of RFC 3056 (security considerations) suggests 2759 * that when a packet is from a 6to4 site (i.e., it's not a 2760 * global address being forwarded froma relay router), make 2761 * sure that the packet was tunneled by that site's 6to4 2762 * router. 2763 */ 2764 IN6_6TO4_TO_V4ADDR(&inner6->ip6_src, (struct in_addr *)&v4addr); 2765 if (outer4->ipha_src != v4addr) 2766 return (B_FALSE); 2767 } else { 2768 /* 2769 * Only accept packets from a relay router if we've configured 2770 * outbound relay router functionality. 2771 */ 2772 if (iptun->iptun_iptuns->iptuns_relay_rtr_addr == INADDR_ANY) 2773 return (B_FALSE); 2774 } 2775 2776 return (B_TRUE); 2777 } 2778 2779 /* 2780 * Input function for everything that comes up from the ip module below us. 2781 * This is called directly from the ip module via connp->conn_recv(). 2782 * 2783 * We receive M_DATA messages with IP-in-IP tunneled packets. 2784 */ 2785 /* ARGSUSED2 */ 2786 static void 2787 iptun_input(void *arg, mblk_t *data_mp, void *arg2, ip_recv_attr_t *ira) 2788 { 2789 conn_t *connp = arg; 2790 iptun_t *iptun = connp->conn_iptun; 2791 int outer_hlen; 2792 ipha_t *outer4, *inner4; 2793 ip6_t *outer6, *inner6; 2794 2795 ASSERT(IPCL_IS_IPTUN(connp)); 2796 ASSERT(DB_TYPE(data_mp) == M_DATA); 2797 2798 outer_hlen = iptun_find_headers(data_mp, ira->ira_ip_hdr_length, 2799 &outer4, &inner4, &outer6, &inner6); 2800 if (outer_hlen == 0) 2801 goto drop; 2802 2803 /* 2804 * If the system is labeled, we call tsol_check_dest() on the packet 2805 * destination (our local tunnel address) to ensure that the packet as 2806 * labeled should be allowed to be sent to us. We don't need to call 2807 * the more involved tsol_receive_local() since the tunnel link itself 2808 * cannot be assigned to shared-stack non-global zones. 2809 */ 2810 if (ira->ira_flags & IRAF_SYSTEM_LABELED) { 2811 if (ira->ira_tsl == NULL) 2812 goto drop; 2813 if (tsol_check_dest(ira->ira_tsl, (outer4 != NULL ? 2814 (void *)&outer4->ipha_dst : (void *)&outer6->ip6_dst), 2815 (outer4 != NULL ? IPV4_VERSION : IPV6_VERSION), 2816 CONN_MAC_DEFAULT, B_FALSE, NULL) != 0) 2817 goto drop; 2818 } 2819 2820 data_mp = ipsec_tun_inbound(ira, data_mp, iptun->iptun_itp, 2821 inner4, inner6, outer4, outer6, outer_hlen, iptun->iptun_ns); 2822 if (data_mp == NULL) { 2823 /* Callee did all of the freeing. */ 2824 return; 2825 } 2826 2827 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_6TO4 && 2828 !iptun_in_6to4_ok(iptun, outer4, inner6)) 2829 goto drop; 2830 2831 /* 2832 * We need to statistically account for each packet individually, so 2833 * we might as well split up any b_next chains here. 2834 */ 2835 do { 2836 mblk_t *mp; 2837 2838 mp = data_mp->b_next; 2839 data_mp->b_next = NULL; 2840 2841 atomic_inc_64(&iptun->iptun_ipackets); 2842 atomic_add_64(&iptun->iptun_rbytes, msgdsize(data_mp)); 2843 mac_rx(iptun->iptun_mh, NULL, data_mp); 2844 2845 data_mp = mp; 2846 } while (data_mp != NULL); 2847 return; 2848 drop: 2849 iptun_drop_pkt(data_mp, &iptun->iptun_ierrors); 2850 } 2851 2852 /* 2853 * Do 6to4-specific header-processing on output. Return B_TRUE if the packet 2854 * was processed without issue, or B_FALSE if the packet had issues and should 2855 * be dropped. 2856 */ 2857 static boolean_t 2858 iptun_out_process_6to4(iptun_t *iptun, ipha_t *outer4, ip6_t *inner6) 2859 { 2860 ipaddr_t v4addr; 2861 2862 /* 2863 * IPv6 source must be a 6to4 address. This is because a conscious 2864 * decision was made to not allow a Solaris system to be used as a 2865 * relay router (for security reasons) when 6to4 was initially 2866 * integrated. If this decision is ever reversed, the following check 2867 * can be removed. 2868 */ 2869 if (!IN6_IS_ADDR_6TO4(&inner6->ip6_src)) 2870 return (B_FALSE); 2871 2872 /* 2873 * RFC3056 mandates that the IPv4 source MUST be set to the IPv4 2874 * portion of the 6to4 IPv6 source address. In other words, make sure 2875 * that we're tunneling packets from our own 6to4 site. 2876 */ 2877 IN6_6TO4_TO_V4ADDR(&inner6->ip6_src, (struct in_addr *)&v4addr); 2878 if (outer4->ipha_src != v4addr) 2879 return (B_FALSE); 2880 2881 /* 2882 * Automatically set the destination of the outer IPv4 header as 2883 * described in RFC3056. There are two possibilities: 2884 * 2885 * a. If the IPv6 destination is a 6to4 address, set the IPv4 address 2886 * to the IPv4 portion of the 6to4 address. 2887 * b. If the IPv6 destination is a native IPv6 address, set the IPv4 2888 * destination to the address of a relay router. 2889 * 2890 * Design Note: b shouldn't be necessary here, and this is a flaw in 2891 * the design of the 6to4relay command. Instead of setting a 6to4 2892 * relay address in this module via an ioctl, the 6to4relay command 2893 * could simply add a IPv6 route for native IPv6 addresses (such as a 2894 * default route) in the forwarding table that uses a 6to4 destination 2895 * as its next hop, and the IPv4 portion of that address could be a 2896 * 6to4 relay address. In order for this to work, IP would have to 2897 * resolve the next hop address, which would necessitate a link-layer 2898 * address resolver for 6to4 links, which doesn't exist today. 2899 * 2900 * In fact, if a resolver existed for 6to4 links, then setting the 2901 * IPv4 destination in the outer header could be done as part of 2902 * link-layer address resolution and fast-path header generation, and 2903 * not here. 2904 */ 2905 if (IN6_IS_ADDR_6TO4(&inner6->ip6_dst)) { 2906 /* destination is a 6to4 router */ 2907 IN6_6TO4_TO_V4ADDR(&inner6->ip6_dst, 2908 (struct in_addr *)&outer4->ipha_dst); 2909 2910 /* Reject attempts to send to INADDR_ANY */ 2911 if (outer4->ipha_dst == INADDR_ANY) 2912 return (B_FALSE); 2913 } else { 2914 /* 2915 * The destination is a native IPv6 address. If output to a 2916 * relay-router is enabled, use the relay-router's IPv4 2917 * address as the destination. 2918 */ 2919 if (iptun->iptun_iptuns->iptuns_relay_rtr_addr == INADDR_ANY) 2920 return (B_FALSE); 2921 outer4->ipha_dst = iptun->iptun_iptuns->iptuns_relay_rtr_addr; 2922 } 2923 2924 /* 2925 * If the outer source and destination are equal, this means that the 2926 * 6to4 router somehow forwarded an IPv6 packet destined for its own 2927 * 6to4 site to its 6to4 tunnel interface, which will result in this 2928 * packet infinitely bouncing between ip and iptun. 2929 */ 2930 return (outer4->ipha_src != outer4->ipha_dst); 2931 } 2932 2933 /* 2934 * Process output packets with outer IPv4 headers. Frees mp and bumps stat on 2935 * error. 2936 */ 2937 static mblk_t * 2938 iptun_out_process_ipv4(iptun_t *iptun, mblk_t *mp, ipha_t *outer4, 2939 ipha_t *inner4, ip6_t *inner6, ip_xmit_attr_t *ixa) 2940 { 2941 uint8_t *innerptr = (inner4 != NULL ? 2942 (uint8_t *)inner4 : (uint8_t *)inner6); 2943 size_t minmtu = iptun->iptun_typeinfo->iti_minmtu; 2944 2945 if (inner4 != NULL) { 2946 ASSERT(outer4->ipha_protocol == IPPROTO_ENCAP); 2947 /* 2948 * Copy the tos from the inner IPv4 header. We mask off ECN 2949 * bits (bits 6 and 7) because there is currently no 2950 * tunnel-tunnel communication to determine if both sides 2951 * support ECN. We opt for the safe choice: don't copy the 2952 * ECN bits when doing encapsulation. 2953 */ 2954 outer4->ipha_type_of_service = 2955 inner4->ipha_type_of_service & ~0x03; 2956 } else { 2957 ASSERT(outer4->ipha_protocol == IPPROTO_IPV6 && 2958 inner6 != NULL); 2959 } 2960 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) 2961 outer4->ipha_fragment_offset_and_flags |= IPH_DF_HTONS; 2962 else 2963 outer4->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS; 2964 2965 /* 2966 * As described in section 3.2.2 of RFC4213, if the packet payload is 2967 * less than or equal to the minimum MTU size, then we need to allow 2968 * IPv4 to fragment the packet. The reason is that even if we end up 2969 * receiving an ICMP frag-needed, the interface above this tunnel 2970 * won't be allowed to drop its MTU as a result, since the packet was 2971 * already smaller than the smallest allowable MTU for that interface. 2972 */ 2973 if (mp->b_wptr - innerptr <= minmtu) { 2974 outer4->ipha_fragment_offset_and_flags = 0; 2975 ixa->ixa_flags &= ~IXAF_DONTFRAG; 2976 } else if (!(ixa->ixa_flags & IXAF_PMTU_TOO_SMALL) && 2977 (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_6TO4)) { 2978 ixa->ixa_flags |= IXAF_DONTFRAG; 2979 } 2980 2981 ixa->ixa_ip_hdr_length = IPH_HDR_LENGTH(outer4); 2982 ixa->ixa_pktlen = msgdsize(mp); 2983 ixa->ixa_protocol = outer4->ipha_protocol; 2984 2985 outer4->ipha_length = htons(ixa->ixa_pktlen); 2986 return (mp); 2987 } 2988 2989 /* 2990 * Insert an encapsulation limit destination option in the packet provided. 2991 * Always consumes the mp argument and returns a new mblk pointer. 2992 */ 2993 static mblk_t * 2994 iptun_insert_encaplimit(iptun_t *iptun, mblk_t *mp, ip6_t *outer6, 2995 uint8_t limit) 2996 { 2997 mblk_t *newmp; 2998 iptun_ipv6hdrs_t *newouter6; 2999 3000 ASSERT(outer6->ip6_nxt == IPPROTO_IPV6); 3001 ASSERT(mp->b_cont == NULL); 3002 3003 mp->b_rptr += sizeof (ip6_t); 3004 newmp = allocb(sizeof (iptun_ipv6hdrs_t) + MBLKL(mp), BPRI_MED); 3005 if (newmp == NULL) { 3006 iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf); 3007 return (NULL); 3008 } 3009 newmp->b_wptr += sizeof (iptun_ipv6hdrs_t); 3010 /* Copy the payload (Starting with the inner IPv6 header). */ 3011 bcopy(mp->b_rptr, newmp->b_wptr, MBLKL(mp)); 3012 newmp->b_wptr += MBLKL(mp); 3013 newouter6 = (iptun_ipv6hdrs_t *)newmp->b_rptr; 3014 /* Now copy the outer IPv6 header. */ 3015 bcopy(outer6, &newouter6->it6h_ip6h, sizeof (ip6_t)); 3016 newouter6->it6h_ip6h.ip6_nxt = IPPROTO_DSTOPTS; 3017 newouter6->it6h_encaplim = iptun_encaplim_init; 3018 newouter6->it6h_encaplim.iel_destopt.ip6d_nxt = outer6->ip6_nxt; 3019 newouter6->it6h_encaplim.iel_telopt.ip6ot_encap_limit = limit; 3020 3021 /* 3022 * The payload length will be set at the end of 3023 * iptun_out_process_ipv6(). 3024 */ 3025 3026 freemsg(mp); 3027 return (newmp); 3028 } 3029 3030 /* 3031 * Process output packets with outer IPv6 headers. Frees mp and bumps stats 3032 * on error. 3033 */ 3034 static mblk_t * 3035 iptun_out_process_ipv6(iptun_t *iptun, mblk_t *mp, ip6_t *outer6, 3036 ipha_t *inner4, ip6_t *inner6, ip_xmit_attr_t *ixa) 3037 { 3038 uint8_t *innerptr = (inner4 != NULL ? 3039 (uint8_t *)inner4 : (uint8_t *)inner6); 3040 size_t minmtu = iptun->iptun_typeinfo->iti_minmtu; 3041 uint8_t *limit, *configlimit; 3042 uint32_t offset; 3043 iptun_ipv6hdrs_t *v6hdrs; 3044 3045 if (inner6 != NULL && iptun_find_encaplimit(mp, inner6, &limit)) { 3046 /* 3047 * The inner packet is an IPv6 packet which itself contains an 3048 * encapsulation limit option. The limit variable points to 3049 * the value in the embedded option. Process the 3050 * encapsulation limit option as specified in RFC 2473. 3051 * 3052 * If limit is 0, then we've exceeded the limit and we need to 3053 * send back an ICMPv6 parameter problem message. 3054 * 3055 * If limit is > 0, then we decrement it by 1 and make sure 3056 * that the encapsulation limit option in the outer header 3057 * reflects that (adding an option if one isn't already 3058 * there). 3059 */ 3060 ASSERT(limit > mp->b_rptr && limit < mp->b_wptr); 3061 if (*limit == 0) { 3062 mp->b_rptr = (uint8_t *)inner6; 3063 offset = limit - mp->b_rptr; 3064 iptun_icmp_error_v6(iptun, inner6, mp, ICMP6_PARAM_PROB, 3065 0, offset, ixa->ixa_tsl); 3066 atomic_inc_64(&iptun->iptun_noxmtbuf); 3067 return (NULL); 3068 } 3069 3070 /* 3071 * The outer header requires an encapsulation limit option. 3072 * If there isn't one already, add one. 3073 */ 3074 if (iptun->iptun_encaplimit == 0) { 3075 if ((mp = iptun_insert_encaplimit(iptun, mp, outer6, 3076 (*limit - 1))) == NULL) 3077 return (NULL); 3078 v6hdrs = (iptun_ipv6hdrs_t *)mp->b_rptr; 3079 } else { 3080 /* 3081 * There is an existing encapsulation limit option in 3082 * the outer header. If the inner encapsulation limit 3083 * is less than the configured encapsulation limit, 3084 * update the outer encapsulation limit to reflect 3085 * this lesser value. 3086 */ 3087 v6hdrs = (iptun_ipv6hdrs_t *)mp->b_rptr; 3088 configlimit = 3089 &v6hdrs->it6h_encaplim.iel_telopt.ip6ot_encap_limit; 3090 if ((*limit - 1) < *configlimit) 3091 *configlimit = (*limit - 1); 3092 } 3093 ixa->ixa_ip_hdr_length = sizeof (iptun_ipv6hdrs_t); 3094 ixa->ixa_protocol = v6hdrs->it6h_encaplim.iel_destopt.ip6d_nxt; 3095 } else { 3096 ixa->ixa_ip_hdr_length = sizeof (ip6_t); 3097 ixa->ixa_protocol = outer6->ip6_nxt; 3098 } 3099 /* 3100 * See iptun_output_process_ipv4() why we allow fragmentation for 3101 * small packets 3102 */ 3103 if (mp->b_wptr - innerptr <= minmtu) 3104 ixa->ixa_flags &= ~IXAF_DONTFRAG; 3105 else if (!(ixa->ixa_flags & IXAF_PMTU_TOO_SMALL)) 3106 ixa->ixa_flags |= IXAF_DONTFRAG; 3107 3108 ixa->ixa_pktlen = msgdsize(mp); 3109 outer6->ip6_plen = htons(ixa->ixa_pktlen - sizeof (ip6_t)); 3110 return (mp); 3111 } 3112 3113 /* 3114 * The IP tunneling MAC-type plugins have already done most of the header 3115 * processing and validity checks. We are simply responsible for multiplexing 3116 * down to the ip module below us. 3117 */ 3118 static void 3119 iptun_output(iptun_t *iptun, mblk_t *mp) 3120 { 3121 conn_t *connp = iptun->iptun_connp; 3122 mblk_t *newmp; 3123 int error; 3124 ip_xmit_attr_t *ixa; 3125 3126 ASSERT(mp->b_datap->db_type == M_DATA); 3127 3128 if (mp->b_cont != NULL) { 3129 if ((newmp = msgpullup(mp, -1)) == NULL) { 3130 iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf); 3131 return; 3132 } 3133 freemsg(mp); 3134 mp = newmp; 3135 } 3136 3137 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_6TO4) { 3138 iptun_output_6to4(iptun, mp); 3139 return; 3140 } 3141 3142 if (is_system_labeled()) { 3143 /* 3144 * Since the label can be different meaning a potentially 3145 * different IRE,we always use a unique ip_xmit_attr_t. 3146 */ 3147 ixa = conn_get_ixa_exclusive(connp); 3148 } else { 3149 /* 3150 * If no other thread is using conn_ixa this just gets a 3151 * reference to conn_ixa. Otherwise we get a safe copy of 3152 * conn_ixa. 3153 */ 3154 ixa = conn_get_ixa(connp, B_FALSE); 3155 } 3156 if (ixa == NULL) { 3157 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3158 return; 3159 } 3160 3161 /* 3162 * In case we got a safe copy of conn_ixa, then we need 3163 * to fill in any pointers in it. 3164 */ 3165 if (ixa->ixa_ire == NULL) { 3166 error = ip_attr_connect(connp, ixa, &connp->conn_saddr_v6, 3167 &connp->conn_faddr_v6, &connp->conn_faddr_v6, 0, 3168 NULL, NULL, 0); 3169 if (error != 0) { 3170 if (ixa->ixa_ire != NULL && 3171 (error == EHOSTUNREACH || error == ENETUNREACH)) { 3172 /* 3173 * Let conn_ip_output/ire_send_noroute return 3174 * the error and send any local ICMP error. 3175 */ 3176 error = 0; 3177 } else { 3178 ixa_refrele(ixa); 3179 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3180 return; 3181 } 3182 } 3183 } 3184 3185 iptun_output_common(iptun, ixa, mp); 3186 ixa_refrele(ixa); 3187 } 3188 3189 /* 3190 * We use an ixa based on the last destination. 3191 */ 3192 static void 3193 iptun_output_6to4(iptun_t *iptun, mblk_t *mp) 3194 { 3195 conn_t *connp = iptun->iptun_connp; 3196 ipha_t *outer4, *inner4; 3197 ip6_t *outer6, *inner6; 3198 ip_xmit_attr_t *ixa; 3199 ip_xmit_attr_t *oldixa; 3200 int error; 3201 boolean_t need_connect; 3202 in6_addr_t v6dst; 3203 3204 ASSERT(mp->b_cont == NULL); /* Verified by iptun_output */ 3205 3206 /* Make sure we set ipha_dst before we look at ipha_dst */ 3207 3208 (void) iptun_find_headers(mp, 0, &outer4, &inner4, &outer6, &inner6); 3209 ASSERT(outer4 != NULL); 3210 if (!iptun_out_process_6to4(iptun, outer4, inner6)) { 3211 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3212 return; 3213 } 3214 3215 if (is_system_labeled()) { 3216 /* 3217 * Since the label can be different meaning a potentially 3218 * different IRE,we always use a unique ip_xmit_attr_t. 3219 */ 3220 ixa = conn_get_ixa_exclusive(connp); 3221 } else { 3222 /* 3223 * If no other thread is using conn_ixa this just gets a 3224 * reference to conn_ixa. Otherwise we get a safe copy of 3225 * conn_ixa. 3226 */ 3227 ixa = conn_get_ixa(connp, B_FALSE); 3228 } 3229 if (ixa == NULL) { 3230 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3231 return; 3232 } 3233 3234 mutex_enter(&connp->conn_lock); 3235 if (connp->conn_v4lastdst == outer4->ipha_dst) { 3236 need_connect = (ixa->ixa_ire == NULL); 3237 } else { 3238 /* In case previous destination was multirt */ 3239 ip_attr_newdst(ixa); 3240 3241 /* 3242 * We later update conn_ixa when we update conn_v4lastdst 3243 * which enables subsequent packets to avoid redoing 3244 * ip_attr_connect 3245 */ 3246 need_connect = B_TRUE; 3247 } 3248 mutex_exit(&connp->conn_lock); 3249 3250 /* 3251 * In case we got a safe copy of conn_ixa, or otherwise we don't 3252 * have a current ixa_ire, then we need to fill in any pointers in 3253 * the ixa. 3254 */ 3255 if (need_connect) { 3256 IN6_IPADDR_TO_V4MAPPED(outer4->ipha_dst, &v6dst); 3257 3258 /* We handle IPsec in iptun_output_common */ 3259 error = ip_attr_connect(connp, ixa, &connp->conn_saddr_v6, 3260 &v6dst, &v6dst, 0, NULL, NULL, 0); 3261 if (error != 0) { 3262 if (ixa->ixa_ire != NULL && 3263 (error == EHOSTUNREACH || error == ENETUNREACH)) { 3264 /* 3265 * Let conn_ip_output/ire_send_noroute return 3266 * the error and send any local ICMP error. 3267 */ 3268 error = 0; 3269 } else { 3270 ixa_refrele(ixa); 3271 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3272 return; 3273 } 3274 } 3275 } 3276 3277 iptun_output_common(iptun, ixa, mp); 3278 3279 /* Atomically replace conn_ixa and conn_v4lastdst */ 3280 mutex_enter(&connp->conn_lock); 3281 if (connp->conn_v4lastdst != outer4->ipha_dst) { 3282 /* Remember the dst which corresponds to conn_ixa */ 3283 connp->conn_v6lastdst = v6dst; 3284 oldixa = conn_replace_ixa(connp, ixa); 3285 } else { 3286 oldixa = NULL; 3287 } 3288 mutex_exit(&connp->conn_lock); 3289 ixa_refrele(ixa); 3290 if (oldixa != NULL) 3291 ixa_refrele(oldixa); 3292 } 3293 3294 /* 3295 * Check the destination/label. Modifies *mpp by adding/removing CIPSO. 3296 * 3297 * We get the label from the message in order to honor the 3298 * ULPs/IPs choice of label. This will be NULL for forwarded 3299 * packets, neighbor discovery packets and some others. 3300 */ 3301 static int 3302 iptun_output_check_label(mblk_t **mpp, ip_xmit_attr_t *ixa) 3303 { 3304 cred_t *cr; 3305 int adjust; 3306 int iplen; 3307 int err; 3308 ts_label_t *effective_tsl = NULL; 3309 3310 3311 ASSERT(is_system_labeled()); 3312 3313 cr = msg_getcred(*mpp, NULL); 3314 if (cr == NULL) 3315 return (0); 3316 3317 /* 3318 * We need to start with a label based on the IP/ULP above us 3319 */ 3320 ip_xmit_attr_restore_tsl(ixa, cr); 3321 3322 /* 3323 * Need to update packet with any CIPSO option since 3324 * conn_ip_output doesn't do that. 3325 */ 3326 if (ixa->ixa_flags & IXAF_IS_IPV4) { 3327 ipha_t *ipha; 3328 3329 ipha = (ipha_t *)(*mpp)->b_rptr; 3330 iplen = ntohs(ipha->ipha_length); 3331 err = tsol_check_label_v4(ixa->ixa_tsl, 3332 ixa->ixa_zoneid, mpp, CONN_MAC_DEFAULT, B_FALSE, 3333 ixa->ixa_ipst, &effective_tsl); 3334 if (err != 0) 3335 return (err); 3336 3337 ipha = (ipha_t *)(*mpp)->b_rptr; 3338 adjust = (int)ntohs(ipha->ipha_length) - iplen; 3339 } else { 3340 ip6_t *ip6h; 3341 3342 ip6h = (ip6_t *)(*mpp)->b_rptr; 3343 iplen = ntohs(ip6h->ip6_plen); 3344 3345 err = tsol_check_label_v6(ixa->ixa_tsl, 3346 ixa->ixa_zoneid, mpp, CONN_MAC_DEFAULT, B_FALSE, 3347 ixa->ixa_ipst, &effective_tsl); 3348 if (err != 0) 3349 return (err); 3350 3351 ip6h = (ip6_t *)(*mpp)->b_rptr; 3352 adjust = (int)ntohs(ip6h->ip6_plen) - iplen; 3353 } 3354 3355 if (effective_tsl != NULL) { 3356 /* Update the label */ 3357 ip_xmit_attr_replace_tsl(ixa, effective_tsl); 3358 } 3359 ixa->ixa_pktlen += adjust; 3360 ixa->ixa_ip_hdr_length += adjust; 3361 return (0); 3362 } 3363 3364 3365 static void 3366 iptun_output_common(iptun_t *iptun, ip_xmit_attr_t *ixa, mblk_t *mp) 3367 { 3368 ipsec_tun_pol_t *itp = iptun->iptun_itp; 3369 int outer_hlen; 3370 mblk_t *newmp; 3371 ipha_t *outer4, *inner4; 3372 ip6_t *outer6, *inner6; 3373 int error; 3374 boolean_t update_pktlen; 3375 3376 ASSERT(ixa->ixa_ire != NULL); 3377 3378 outer_hlen = iptun_find_headers(mp, 0, &outer4, &inner4, &outer6, 3379 &inner6); 3380 if (outer_hlen == 0) { 3381 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3382 return; 3383 } 3384 3385 /* Perform header processing. */ 3386 if (outer4 != NULL) { 3387 mp = iptun_out_process_ipv4(iptun, mp, outer4, inner4, inner6, 3388 ixa); 3389 } else { 3390 mp = iptun_out_process_ipv6(iptun, mp, outer6, inner4, inner6, 3391 ixa); 3392 } 3393 if (mp == NULL) 3394 return; 3395 3396 /* 3397 * Let's hope the compiler optimizes this with "branch taken". 3398 */ 3399 if (itp != NULL && (itp->itp_flags & ITPF_P_ACTIVE)) { 3400 /* This updates the ip_xmit_attr_t */ 3401 mp = ipsec_tun_outbound(mp, iptun, inner4, inner6, outer4, 3402 outer6, outer_hlen, ixa); 3403 if (mp == NULL) { 3404 atomic_inc_64(&iptun->iptun_oerrors); 3405 return; 3406 } 3407 if (is_system_labeled()) { 3408 /* 3409 * Might change the packet by adding/removing CIPSO. 3410 * After this caller inner* and outer* and outer_hlen 3411 * might be invalid. 3412 */ 3413 error = iptun_output_check_label(&mp, ixa); 3414 if (error != 0) { 3415 ip2dbg(("label check failed (%d)\n", error)); 3416 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3417 return; 3418 } 3419 } 3420 3421 /* 3422 * ipsec_tun_outbound() returns a chain of tunneled IP 3423 * fragments linked with b_next (or a single message if the 3424 * tunneled packet wasn't a fragment). 3425 * If fragcache returned a list then we need to update 3426 * ixa_pktlen for all packets in the list. 3427 */ 3428 update_pktlen = (mp->b_next != NULL); 3429 3430 /* 3431 * Otherwise, we're good to go. The ixa has been updated with 3432 * instructions for outbound IPsec processing. 3433 */ 3434 for (newmp = mp; newmp != NULL; newmp = mp) { 3435 atomic_inc_64(&iptun->iptun_opackets); 3436 atomic_add_64(&iptun->iptun_obytes, ixa->ixa_pktlen); 3437 mp = mp->b_next; 3438 newmp->b_next = NULL; 3439 3440 if (update_pktlen) 3441 ixa->ixa_pktlen = msgdsize(mp); 3442 3443 atomic_inc_64(&iptun->iptun_opackets); 3444 atomic_add_64(&iptun->iptun_obytes, ixa->ixa_pktlen); 3445 3446 error = conn_ip_output(newmp, ixa); 3447 if (error == EMSGSIZE) { 3448 /* IPsec policy might have changed */ 3449 (void) iptun_update_mtu(iptun, ixa, 0); 3450 } 3451 } 3452 } else { 3453 /* 3454 * The ip module will potentially apply global policy to the 3455 * packet in its output path if there's no active tunnel 3456 * policy. 3457 */ 3458 ASSERT(ixa->ixa_ipsec_policy == NULL); 3459 mp = ip_output_attach_policy(mp, outer4, outer6, NULL, ixa); 3460 if (mp == NULL) { 3461 atomic_inc_64(&iptun->iptun_oerrors); 3462 return; 3463 } 3464 if (is_system_labeled()) { 3465 /* 3466 * Might change the packet by adding/removing CIPSO. 3467 * After this caller inner* and outer* and outer_hlen 3468 * might be invalid. 3469 */ 3470 error = iptun_output_check_label(&mp, ixa); 3471 if (error != 0) { 3472 ip2dbg(("label check failed (%d)\n", error)); 3473 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3474 return; 3475 } 3476 } 3477 3478 atomic_inc_64(&iptun->iptun_opackets); 3479 atomic_add_64(&iptun->iptun_obytes, ixa->ixa_pktlen); 3480 3481 error = conn_ip_output(mp, ixa); 3482 if (error == EMSGSIZE) { 3483 /* IPsec policy might have changed */ 3484 (void) iptun_update_mtu(iptun, ixa, 0); 3485 } 3486 } 3487 if (ixa->ixa_flags & IXAF_IPSEC_SECURE) 3488 ipsec_out_release_refs(ixa); 3489 } 3490 3491 static mac_callbacks_t iptun_m_callbacks = { 3492 .mc_callbacks = (MC_SETPROP | MC_GETPROP), 3493 .mc_getstat = iptun_m_getstat, 3494 .mc_start = iptun_m_start, 3495 .mc_stop = iptun_m_stop, 3496 .mc_setpromisc = iptun_m_setpromisc, 3497 .mc_multicst = iptun_m_multicst, 3498 .mc_unicst = iptun_m_unicst, 3499 .mc_tx = iptun_m_tx, 3500 .mc_setprop = iptun_m_setprop, 3501 .mc_getprop = iptun_m_getprop 3502 }; 3503