1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * iptun - IP Tunneling Driver 28 * 29 * This module is a GLDv3 driver that implements virtual datalinks over IP 30 * (a.k.a, IP tunneling). The datalinks are managed through a dld ioctl 31 * interface (see iptun_ctl.c), and registered with GLDv3 using 32 * mac_register(). It implements the logic for various forms of IP (IPv4 or 33 * IPv6) encapsulation within IP (IPv4 or IPv6) by interacting with the ip 34 * module below it. Each virtual IP tunnel datalink has a conn_t associated 35 * with it representing the "outer" IP connection. 36 * 37 * The module implements the following locking semantics: 38 * 39 * Lookups and deletions in iptun_hash are synchronized using iptun_hash_lock. 40 * See comments above iptun_hash_lock for details. 41 * 42 * No locks are ever held while calling up to GLDv3. The general architecture 43 * of GLDv3 requires this, as the mac perimeter (essentially a lock) for a 44 * given link will be held while making downcalls (iptun_m_*() callbacks). 45 * Because we need to hold locks while handling downcalls, holding these locks 46 * while issuing upcalls results in deadlock scenarios. See the block comment 47 * above iptun_task_cb() for details on how we safely issue upcalls without 48 * holding any locks. 49 * 50 * The contents of each iptun_t is protected by an iptun_mutex which is held 51 * in iptun_enter() (called by iptun_enter_by_linkid()), and exited in 52 * iptun_exit(). 53 * 54 * See comments in iptun_delete() and iptun_free() for details on how the 55 * iptun_t is deleted safely. 56 */ 57 58 #include <sys/types.h> 59 #include <sys/kmem.h> 60 #include <sys/errno.h> 61 #include <sys/modhash.h> 62 #include <sys/list.h> 63 #include <sys/strsun.h> 64 #include <sys/file.h> 65 #include <sys/systm.h> 66 #include <sys/tihdr.h> 67 #include <sys/param.h> 68 #include <sys/mac_provider.h> 69 #include <sys/mac_ipv4.h> 70 #include <sys/mac_ipv6.h> 71 #include <sys/mac_6to4.h> 72 #include <sys/tsol/tnet.h> 73 #include <sys/sunldi.h> 74 #include <netinet/in.h> 75 #include <netinet/ip6.h> 76 #include <inet/ip.h> 77 #include <inet/ip_ire.h> 78 #include <inet/ipsec_impl.h> 79 #include <sys/tsol/label.h> 80 #include <sys/tsol/tnet.h> 81 #include <inet/iptun.h> 82 #include "iptun_impl.h" 83 84 /* Do the tunnel type and address family match? */ 85 #define IPTUN_ADDR_MATCH(iptun_type, family) \ 86 ((iptun_type == IPTUN_TYPE_IPV4 && family == AF_INET) || \ 87 (iptun_type == IPTUN_TYPE_IPV6 && family == AF_INET6) || \ 88 (iptun_type == IPTUN_TYPE_6TO4 && family == AF_INET)) 89 90 #define IPTUN_HASH_KEY(key) ((mod_hash_key_t)(uintptr_t)(key)) 91 92 #define IPTUN_MIN_IPV4_MTU 576 /* ip.h still uses 68 (!) */ 93 #define IPTUN_MIN_IPV6_MTU IPV6_MIN_MTU 94 #define IPTUN_MAX_IPV4_MTU (IP_MAXPACKET - sizeof (ipha_t)) 95 #define IPTUN_MAX_IPV6_MTU (IP_MAXPACKET - sizeof (ip6_t) - \ 96 sizeof (iptun_encaplim_t)) 97 98 #define IPTUN_MIN_HOPLIMIT 1 99 #define IPTUN_MAX_HOPLIMIT UINT8_MAX 100 101 #define IPTUN_MIN_ENCAPLIMIT 0 102 #define IPTUN_MAX_ENCAPLIMIT UINT8_MAX 103 104 #define IPTUN_IPSEC_REQ_MASK (IPSEC_PREF_REQUIRED | IPSEC_PREF_NEVER) 105 106 static iptun_encaplim_t iptun_encaplim_init = { 107 { IPPROTO_NONE, 0 }, 108 IP6OPT_TUNNEL_LIMIT, 109 1, 110 IPTUN_DEFAULT_ENCAPLIMIT, /* filled in with actual value later */ 111 IP6OPT_PADN, 112 1, 113 0 114 }; 115 116 /* 117 * Table containing per-iptun-type information. 118 * Since IPv6 can run over all of these we have the IPv6 min as the min MTU. 119 */ 120 static iptun_typeinfo_t iptun_type_table[] = { 121 { IPTUN_TYPE_IPV4, MAC_PLUGIN_IDENT_IPV4, IPV4_VERSION, 122 IPTUN_MIN_IPV6_MTU, IPTUN_MAX_IPV4_MTU, B_TRUE }, 123 { IPTUN_TYPE_IPV6, MAC_PLUGIN_IDENT_IPV6, IPV6_VERSION, 124 IPTUN_MIN_IPV6_MTU, IPTUN_MAX_IPV6_MTU, B_TRUE }, 125 { IPTUN_TYPE_6TO4, MAC_PLUGIN_IDENT_6TO4, IPV4_VERSION, 126 IPTUN_MIN_IPV6_MTU, IPTUN_MAX_IPV4_MTU, B_FALSE }, 127 { IPTUN_TYPE_UNKNOWN, NULL, 0, 0, 0, B_FALSE } 128 }; 129 130 /* 131 * iptun_hash is an iptun_t lookup table by link ID protected by 132 * iptun_hash_lock. While the hash table's integrity is maintained via 133 * internal locking in the mod_hash_*() functions, we need additional locking 134 * so that an iptun_t cannot be deleted after a hash lookup has returned an 135 * iptun_t and before iptun_lock has been entered. As such, we use 136 * iptun_hash_lock when doing lookups and removals from iptun_hash. 137 */ 138 mod_hash_t *iptun_hash; 139 static kmutex_t iptun_hash_lock; 140 141 static uint_t iptun_tunnelcount; /* total for all stacks */ 142 kmem_cache_t *iptun_cache; 143 ddi_taskq_t *iptun_taskq; 144 145 typedef enum { 146 IPTUN_TASK_MTU_UPDATE, /* tell mac about new tunnel link MTU */ 147 IPTUN_TASK_LADDR_UPDATE, /* tell mac about new local address */ 148 IPTUN_TASK_RADDR_UPDATE, /* tell mac about new remote address */ 149 IPTUN_TASK_LINK_UPDATE, /* tell mac about new link state */ 150 IPTUN_TASK_PDATA_UPDATE /* tell mac about updated plugin data */ 151 } iptun_task_t; 152 153 typedef struct iptun_task_data_s { 154 iptun_task_t itd_task; 155 datalink_id_t itd_linkid; 156 } iptun_task_data_t; 157 158 static void iptun_task_dispatch(iptun_t *, iptun_task_t); 159 static int iptun_enter(iptun_t *); 160 static void iptun_exit(iptun_t *); 161 static void iptun_headergen(iptun_t *, boolean_t); 162 static void iptun_drop_pkt(mblk_t *, uint64_t *); 163 static void iptun_input(void *, mblk_t *, void *, ip_recv_attr_t *); 164 static void iptun_input_icmp(void *, mblk_t *, void *, ip_recv_attr_t *); 165 static void iptun_output(iptun_t *, mblk_t *); 166 static uint32_t iptun_get_maxmtu(iptun_t *, ip_xmit_attr_t *, uint32_t); 167 static uint32_t iptun_update_mtu(iptun_t *, ip_xmit_attr_t *, uint32_t); 168 static uint32_t iptun_get_dst_pmtu(iptun_t *, ip_xmit_attr_t *); 169 static void iptun_update_dst_pmtu(iptun_t *, ip_xmit_attr_t *); 170 static int iptun_setladdr(iptun_t *, const struct sockaddr_storage *); 171 172 static void iptun_output_6to4(iptun_t *, mblk_t *); 173 static void iptun_output_common(iptun_t *, ip_xmit_attr_t *, mblk_t *); 174 static boolean_t iptun_verifyicmp(conn_t *, void *, icmph_t *, icmp6_t *, 175 ip_recv_attr_t *); 176 177 static void iptun_notify(void *, ip_xmit_attr_t *, ixa_notify_type_t, 178 ixa_notify_arg_t); 179 180 static mac_callbacks_t iptun_m_callbacks; 181 182 static int 183 iptun_m_getstat(void *arg, uint_t stat, uint64_t *val) 184 { 185 iptun_t *iptun = arg; 186 int err = 0; 187 188 switch (stat) { 189 case MAC_STAT_IERRORS: 190 *val = iptun->iptun_ierrors; 191 break; 192 case MAC_STAT_OERRORS: 193 *val = iptun->iptun_oerrors; 194 break; 195 case MAC_STAT_RBYTES: 196 *val = iptun->iptun_rbytes; 197 break; 198 case MAC_STAT_IPACKETS: 199 *val = iptun->iptun_ipackets; 200 break; 201 case MAC_STAT_OBYTES: 202 *val = iptun->iptun_obytes; 203 break; 204 case MAC_STAT_OPACKETS: 205 *val = iptun->iptun_opackets; 206 break; 207 case MAC_STAT_NORCVBUF: 208 *val = iptun->iptun_norcvbuf; 209 break; 210 case MAC_STAT_NOXMTBUF: 211 *val = iptun->iptun_noxmtbuf; 212 break; 213 default: 214 err = ENOTSUP; 215 } 216 217 return (err); 218 } 219 220 static int 221 iptun_m_start(void *arg) 222 { 223 iptun_t *iptun = arg; 224 int err; 225 226 if ((err = iptun_enter(iptun)) == 0) { 227 iptun->iptun_flags |= IPTUN_MAC_STARTED; 228 iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE); 229 iptun_exit(iptun); 230 } 231 return (err); 232 } 233 234 static void 235 iptun_m_stop(void *arg) 236 { 237 iptun_t *iptun = arg; 238 239 if (iptun_enter(iptun) == 0) { 240 iptun->iptun_flags &= ~IPTUN_MAC_STARTED; 241 iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE); 242 iptun_exit(iptun); 243 } 244 } 245 246 /* 247 * iptun_m_setpromisc() does nothing and always succeeds. This is because a 248 * tunnel data-link only ever receives packets that are destined exclusively 249 * for the local address of the tunnel. 250 */ 251 /* ARGSUSED */ 252 static int 253 iptun_m_setpromisc(void *arg, boolean_t on) 254 { 255 return (0); 256 } 257 258 /* ARGSUSED */ 259 static int 260 iptun_m_multicst(void *arg, boolean_t add, const uint8_t *addrp) 261 { 262 return (ENOTSUP); 263 } 264 265 /* 266 * iptun_m_unicst() sets the local address. 267 */ 268 /* ARGSUSED */ 269 static int 270 iptun_m_unicst(void *arg, const uint8_t *addrp) 271 { 272 iptun_t *iptun = arg; 273 int err; 274 struct sockaddr_storage ss; 275 struct sockaddr_in *sin; 276 struct sockaddr_in6 *sin6; 277 278 if ((err = iptun_enter(iptun)) == 0) { 279 switch (iptun->iptun_typeinfo->iti_ipvers) { 280 case IPV4_VERSION: 281 sin = (struct sockaddr_in *)&ss; 282 sin->sin_family = AF_INET; 283 bcopy(addrp, &sin->sin_addr, sizeof (in_addr_t)); 284 break; 285 case IPV6_VERSION: 286 sin6 = (struct sockaddr_in6 *)&ss; 287 sin6->sin6_family = AF_INET6; 288 bcopy(addrp, &sin6->sin6_addr, sizeof (in6_addr_t)); 289 break; 290 default: 291 ASSERT(0); 292 } 293 err = iptun_setladdr(iptun, &ss); 294 iptun_exit(iptun); 295 } 296 return (err); 297 } 298 299 static mblk_t * 300 iptun_m_tx(void *arg, mblk_t *mpchain) 301 { 302 mblk_t *mp, *nmp; 303 iptun_t *iptun = arg; 304 305 if (!IS_IPTUN_RUNNING(iptun)) { 306 iptun_drop_pkt(mpchain, &iptun->iptun_noxmtbuf); 307 return (NULL); 308 } 309 310 for (mp = mpchain; mp != NULL; mp = nmp) { 311 nmp = mp->b_next; 312 mp->b_next = NULL; 313 iptun_output(iptun, mp); 314 } 315 316 return (NULL); 317 } 318 319 /* ARGSUSED */ 320 static int 321 iptun_m_setprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, 322 uint_t pr_valsize, const void *pr_val) 323 { 324 iptun_t *iptun = barg; 325 uint32_t value = *(uint32_t *)pr_val; 326 int err; 327 328 /* 329 * We need to enter this iptun_t since we'll be modifying the outer 330 * header. 331 */ 332 if ((err = iptun_enter(iptun)) != 0) 333 return (err); 334 335 switch (pr_num) { 336 case MAC_PROP_IPTUN_HOPLIMIT: 337 if (value < IPTUN_MIN_HOPLIMIT || value > IPTUN_MAX_HOPLIMIT) { 338 err = EINVAL; 339 break; 340 } 341 if (value != iptun->iptun_hoplimit) { 342 iptun->iptun_hoplimit = (uint8_t)value; 343 iptun_headergen(iptun, B_TRUE); 344 } 345 break; 346 case MAC_PROP_IPTUN_ENCAPLIMIT: 347 if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_IPV6 || 348 value > IPTUN_MAX_ENCAPLIMIT) { 349 err = EINVAL; 350 break; 351 } 352 if (value != iptun->iptun_encaplimit) { 353 iptun->iptun_encaplimit = (uint8_t)value; 354 iptun_headergen(iptun, B_TRUE); 355 } 356 break; 357 case MAC_PROP_MTU: { 358 uint32_t maxmtu = iptun_get_maxmtu(iptun, NULL, 0); 359 360 if (value < iptun->iptun_typeinfo->iti_minmtu || 361 value > maxmtu) { 362 err = EINVAL; 363 break; 364 } 365 iptun->iptun_flags |= IPTUN_FIXED_MTU; 366 if (value != iptun->iptun_mtu) { 367 iptun->iptun_mtu = value; 368 iptun_task_dispatch(iptun, IPTUN_TASK_MTU_UPDATE); 369 } 370 break; 371 } 372 default: 373 err = EINVAL; 374 } 375 iptun_exit(iptun); 376 return (err); 377 } 378 379 /* ARGSUSED */ 380 static int 381 iptun_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, 382 uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) 383 { 384 iptun_t *iptun = barg; 385 mac_propval_range_t range; 386 boolean_t is_default = (pr_flags & MAC_PROP_DEFAULT); 387 boolean_t is_possible = (pr_flags & MAC_PROP_POSSIBLE); 388 int err; 389 390 if ((err = iptun_enter(iptun)) != 0) 391 return (err); 392 393 if ((pr_flags & ~(MAC_PROP_DEFAULT | MAC_PROP_POSSIBLE)) != 0) { 394 err = ENOTSUP; 395 goto done; 396 } 397 if (is_default && is_possible) { 398 err = EINVAL; 399 goto done; 400 } 401 402 *perm = MAC_PROP_PERM_RW; 403 404 if (is_possible) { 405 if (pr_valsize < sizeof (mac_propval_range_t)) { 406 err = EINVAL; 407 goto done; 408 } 409 range.mpr_count = 1; 410 range.mpr_type = MAC_PROPVAL_UINT32; 411 } else if (pr_valsize < sizeof (uint32_t)) { 412 err = EINVAL; 413 goto done; 414 } 415 416 switch (pr_num) { 417 case MAC_PROP_IPTUN_HOPLIMIT: 418 if (is_possible) { 419 range.range_uint32[0].mpur_min = IPTUN_MIN_HOPLIMIT; 420 range.range_uint32[0].mpur_max = IPTUN_MAX_HOPLIMIT; 421 } else if (is_default) { 422 *(uint32_t *)pr_val = IPTUN_DEFAULT_HOPLIMIT; 423 } else { 424 *(uint32_t *)pr_val = iptun->iptun_hoplimit; 425 } 426 break; 427 case MAC_PROP_IPTUN_ENCAPLIMIT: 428 if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_IPV6) { 429 err = ENOTSUP; 430 goto done; 431 } 432 if (is_possible) { 433 range.range_uint32[0].mpur_min = IPTUN_MIN_ENCAPLIMIT; 434 range.range_uint32[0].mpur_max = IPTUN_MAX_ENCAPLIMIT; 435 } else if (is_default) { 436 *(uint32_t *)pr_val = IPTUN_DEFAULT_ENCAPLIMIT; 437 } else { 438 *(uint32_t *)pr_val = iptun->iptun_encaplimit; 439 } 440 break; 441 case MAC_PROP_MTU: { 442 uint32_t maxmtu = iptun_get_maxmtu(iptun, NULL, 0); 443 444 if (is_possible) { 445 range.range_uint32[0].mpur_min = 446 iptun->iptun_typeinfo->iti_minmtu; 447 range.range_uint32[0].mpur_max = maxmtu; 448 } else { 449 /* 450 * The MAC module knows the current value and should 451 * never call us for it. There is also no default 452 * MTU, as by default, it is a dynamic property. 453 */ 454 err = ENOTSUP; 455 goto done; 456 } 457 break; 458 } 459 default: 460 err = EINVAL; 461 goto done; 462 } 463 if (is_possible) 464 bcopy(&range, pr_val, sizeof (range)); 465 done: 466 iptun_exit(iptun); 467 return (err); 468 } 469 470 uint_t 471 iptun_count(void) 472 { 473 return (iptun_tunnelcount); 474 } 475 476 /* 477 * Enter an iptun_t exclusively. This is essentially just a mutex, but we 478 * don't allow iptun_enter() to succeed on a tunnel if it's in the process of 479 * being deleted. 480 */ 481 static int 482 iptun_enter(iptun_t *iptun) 483 { 484 mutex_enter(&iptun->iptun_lock); 485 while (iptun->iptun_flags & IPTUN_DELETE_PENDING) 486 cv_wait(&iptun->iptun_enter_cv, &iptun->iptun_lock); 487 if (iptun->iptun_flags & IPTUN_CONDEMNED) { 488 mutex_exit(&iptun->iptun_lock); 489 return (ENOENT); 490 } 491 return (0); 492 } 493 494 /* 495 * Exit the tunnel entered in iptun_enter(). 496 */ 497 static void 498 iptun_exit(iptun_t *iptun) 499 { 500 mutex_exit(&iptun->iptun_lock); 501 } 502 503 /* 504 * Enter the IP tunnel instance by datalink ID. 505 */ 506 static int 507 iptun_enter_by_linkid(datalink_id_t linkid, iptun_t **iptun) 508 { 509 int err; 510 511 mutex_enter(&iptun_hash_lock); 512 if (mod_hash_find(iptun_hash, IPTUN_HASH_KEY(linkid), 513 (mod_hash_val_t *)iptun) == 0) 514 err = iptun_enter(*iptun); 515 else 516 err = ENOENT; 517 if (err != 0) 518 *iptun = NULL; 519 mutex_exit(&iptun_hash_lock); 520 return (err); 521 } 522 523 /* 524 * Handle tasks that were deferred through the iptun_taskq because they require 525 * calling up to the mac module, and we can't call up to the mac module while 526 * holding locks. 527 * 528 * This is tricky to get right without introducing race conditions and 529 * deadlocks with the mac module, as we cannot issue an upcall while in the 530 * iptun_t. The reason is that upcalls may try and enter the mac perimeter, 531 * while iptun callbacks (such as iptun_m_setprop()) called from the mac 532 * module will already have the perimeter held, and will then try and enter 533 * the iptun_t. You can see the lock ordering problem with this; this will 534 * deadlock. 535 * 536 * The safe way to do this is to enter the iptun_t in question and copy the 537 * information we need out of it so that we can exit it and know that the 538 * information being passed up to the upcalls won't be subject to modification 539 * by other threads. The problem now is that we need to exit it prior to 540 * issuing the upcall, but once we do this, a thread could come along and 541 * delete the iptun_t and thus the mac handle required to issue the upcall. 542 * To prevent this, we set the IPTUN_UPCALL_PENDING flag prior to exiting the 543 * iptun_t. This flag is the condition associated with iptun_upcall_cv, which 544 * iptun_delete() will cv_wait() on. When the upcall completes, we clear 545 * IPTUN_UPCALL_PENDING and cv_signal() any potentially waiting 546 * iptun_delete(). We can thus still safely use iptun->iptun_mh after having 547 * exited the iptun_t. 548 */ 549 static void 550 iptun_task_cb(void *arg) 551 { 552 iptun_task_data_t *itd = arg; 553 iptun_task_t task = itd->itd_task; 554 datalink_id_t linkid = itd->itd_linkid; 555 iptun_t *iptun; 556 uint32_t mtu; 557 iptun_addr_t addr; 558 link_state_t linkstate; 559 size_t header_size; 560 iptun_header_t header; 561 562 kmem_free(itd, sizeof (*itd)); 563 564 /* 565 * Note that if the lookup fails, it's because the tunnel was deleted 566 * between the time the task was dispatched and now. That isn't an 567 * error. 568 */ 569 if (iptun_enter_by_linkid(linkid, &iptun) != 0) 570 return; 571 572 iptun->iptun_flags |= IPTUN_UPCALL_PENDING; 573 574 switch (task) { 575 case IPTUN_TASK_MTU_UPDATE: 576 mtu = iptun->iptun_mtu; 577 break; 578 case IPTUN_TASK_LADDR_UPDATE: 579 addr = iptun->iptun_laddr; 580 break; 581 case IPTUN_TASK_RADDR_UPDATE: 582 addr = iptun->iptun_raddr; 583 break; 584 case IPTUN_TASK_LINK_UPDATE: 585 linkstate = IS_IPTUN_RUNNING(iptun) ? 586 LINK_STATE_UP : LINK_STATE_DOWN; 587 break; 588 case IPTUN_TASK_PDATA_UPDATE: 589 header_size = iptun->iptun_header_size; 590 header = iptun->iptun_header; 591 break; 592 default: 593 ASSERT(0); 594 } 595 596 iptun_exit(iptun); 597 598 switch (task) { 599 case IPTUN_TASK_MTU_UPDATE: 600 (void) mac_maxsdu_update(iptun->iptun_mh, mtu); 601 break; 602 case IPTUN_TASK_LADDR_UPDATE: 603 mac_unicst_update(iptun->iptun_mh, (uint8_t *)&addr.ia_addr); 604 break; 605 case IPTUN_TASK_RADDR_UPDATE: 606 mac_dst_update(iptun->iptun_mh, (uint8_t *)&addr.ia_addr); 607 break; 608 case IPTUN_TASK_LINK_UPDATE: 609 mac_link_update(iptun->iptun_mh, linkstate); 610 break; 611 case IPTUN_TASK_PDATA_UPDATE: 612 if (mac_pdata_update(iptun->iptun_mh, 613 header_size == 0 ? NULL : &header, header_size) != 0) 614 atomic_inc_64(&iptun->iptun_taskq_fail); 615 break; 616 } 617 618 mutex_enter(&iptun->iptun_lock); 619 iptun->iptun_flags &= ~IPTUN_UPCALL_PENDING; 620 cv_signal(&iptun->iptun_upcall_cv); 621 mutex_exit(&iptun->iptun_lock); 622 } 623 624 static void 625 iptun_task_dispatch(iptun_t *iptun, iptun_task_t iptun_task) 626 { 627 iptun_task_data_t *itd; 628 629 itd = kmem_alloc(sizeof (*itd), KM_NOSLEEP); 630 if (itd == NULL) { 631 atomic_inc_64(&iptun->iptun_taskq_fail); 632 return; 633 } 634 itd->itd_task = iptun_task; 635 itd->itd_linkid = iptun->iptun_linkid; 636 if (ddi_taskq_dispatch(iptun_taskq, iptun_task_cb, itd, DDI_NOSLEEP)) { 637 atomic_inc_64(&iptun->iptun_taskq_fail); 638 kmem_free(itd, sizeof (*itd)); 639 } 640 } 641 642 /* 643 * Convert an iptun_addr_t to sockaddr_storage. 644 */ 645 static void 646 iptun_getaddr(iptun_addr_t *iptun_addr, struct sockaddr_storage *ss) 647 { 648 struct sockaddr_in *sin; 649 struct sockaddr_in6 *sin6; 650 651 bzero(ss, sizeof (*ss)); 652 switch (iptun_addr->ia_family) { 653 case AF_INET: 654 sin = (struct sockaddr_in *)ss; 655 sin->sin_addr.s_addr = iptun_addr->ia_addr.iau_addr4; 656 break; 657 case AF_INET6: 658 sin6 = (struct sockaddr_in6 *)ss; 659 sin6->sin6_addr = iptun_addr->ia_addr.iau_addr6; 660 break; 661 default: 662 ASSERT(0); 663 } 664 ss->ss_family = iptun_addr->ia_family; 665 } 666 667 /* 668 * General purpose function to set an IP tunnel source or destination address. 669 */ 670 static int 671 iptun_setaddr(iptun_type_t iptun_type, iptun_addr_t *iptun_addr, 672 const struct sockaddr_storage *ss) 673 { 674 if (!IPTUN_ADDR_MATCH(iptun_type, ss->ss_family)) 675 return (EINVAL); 676 677 switch (ss->ss_family) { 678 case AF_INET: { 679 struct sockaddr_in *sin = (struct sockaddr_in *)ss; 680 681 if ((sin->sin_addr.s_addr == INADDR_ANY) || 682 (sin->sin_addr.s_addr == INADDR_BROADCAST) || 683 CLASSD(sin->sin_addr.s_addr)) { 684 return (EADDRNOTAVAIL); 685 } 686 iptun_addr->ia_addr.iau_addr4 = sin->sin_addr.s_addr; 687 break; 688 } 689 case AF_INET6: { 690 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)ss; 691 692 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) || 693 IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) || 694 IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 695 return (EADDRNOTAVAIL); 696 } 697 iptun_addr->ia_addr.iau_addr6 = sin6->sin6_addr; 698 break; 699 } 700 default: 701 return (EAFNOSUPPORT); 702 } 703 iptun_addr->ia_family = ss->ss_family; 704 return (0); 705 } 706 707 static int 708 iptun_setladdr(iptun_t *iptun, const struct sockaddr_storage *laddr) 709 { 710 return (iptun_setaddr(iptun->iptun_typeinfo->iti_type, 711 &iptun->iptun_laddr, laddr)); 712 } 713 714 static int 715 iptun_setraddr(iptun_t *iptun, const struct sockaddr_storage *raddr) 716 { 717 if (!(iptun->iptun_typeinfo->iti_hasraddr)) 718 return (EINVAL); 719 return (iptun_setaddr(iptun->iptun_typeinfo->iti_type, 720 &iptun->iptun_raddr, raddr)); 721 } 722 723 static boolean_t 724 iptun_canbind(iptun_t *iptun) 725 { 726 /* 727 * A tunnel may bind when its source address has been set, and if its 728 * tunnel type requires one, also its destination address. 729 */ 730 return ((iptun->iptun_flags & IPTUN_LADDR) && 731 ((iptun->iptun_flags & IPTUN_RADDR) || 732 !(iptun->iptun_typeinfo->iti_hasraddr))); 733 } 734 735 /* 736 * Verify that the local address is valid, and insert in the fanout 737 */ 738 static int 739 iptun_bind(iptun_t *iptun) 740 { 741 conn_t *connp = iptun->iptun_connp; 742 int error = 0; 743 ip_xmit_attr_t *ixa; 744 iulp_t uinfo; 745 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 746 747 /* Get an exclusive ixa for this thread, and replace conn_ixa */ 748 ixa = conn_get_ixa(connp, B_TRUE); 749 if (ixa == NULL) 750 return (ENOMEM); 751 ASSERT(ixa->ixa_refcnt >= 2); 752 ASSERT(ixa == connp->conn_ixa); 753 754 /* We create PMTU state including for 6to4 */ 755 ixa->ixa_flags |= IXAF_PMTU_DISCOVERY; 756 757 ASSERT(iptun_canbind(iptun)); 758 759 mutex_enter(&connp->conn_lock); 760 /* 761 * Note that conn_proto can't be set since the upper protocol 762 * can be both 41 and 4 when IPv6 and IPv4 are over the same tunnel. 763 * ipcl_iptun_classify doesn't use conn_proto. 764 */ 765 connp->conn_ipversion = iptun->iptun_typeinfo->iti_ipvers; 766 767 switch (iptun->iptun_typeinfo->iti_type) { 768 case IPTUN_TYPE_IPV4: 769 IN6_IPADDR_TO_V4MAPPED(iptun->iptun_laddr4, 770 &connp->conn_laddr_v6); 771 IN6_IPADDR_TO_V4MAPPED(iptun->iptun_raddr4, 772 &connp->conn_faddr_v6); 773 ixa->ixa_flags |= IXAF_IS_IPV4; 774 if (ip_laddr_verify_v4(iptun->iptun_laddr4, IPCL_ZONEID(connp), 775 ipst, B_FALSE) != IPVL_UNICAST_UP) { 776 mutex_exit(&connp->conn_lock); 777 error = EADDRNOTAVAIL; 778 goto done; 779 } 780 break; 781 case IPTUN_TYPE_IPV6: 782 connp->conn_laddr_v6 = iptun->iptun_laddr6; 783 connp->conn_faddr_v6 = iptun->iptun_raddr6; 784 ixa->ixa_flags &= ~IXAF_IS_IPV4; 785 /* We use a zero scopeid for now */ 786 if (ip_laddr_verify_v6(&iptun->iptun_laddr6, IPCL_ZONEID(connp), 787 ipst, B_FALSE, 0) != IPVL_UNICAST_UP) { 788 mutex_exit(&connp->conn_lock); 789 error = EADDRNOTAVAIL; 790 goto done; 791 } 792 break; 793 case IPTUN_TYPE_6TO4: 794 IN6_IPADDR_TO_V4MAPPED(iptun->iptun_laddr4, 795 &connp->conn_laddr_v6); 796 IN6_IPADDR_TO_V4MAPPED(INADDR_ANY, &connp->conn_faddr_v6); 797 ixa->ixa_flags |= IXAF_IS_IPV4; 798 mutex_exit(&connp->conn_lock); 799 800 switch (ip_laddr_verify_v4(iptun->iptun_laddr4, 801 IPCL_ZONEID(connp), ipst, B_FALSE)) { 802 case IPVL_UNICAST_UP: 803 case IPVL_UNICAST_DOWN: 804 break; 805 default: 806 error = EADDRNOTAVAIL; 807 goto done; 808 } 809 goto insert; 810 } 811 812 /* In case previous destination was multirt */ 813 ip_attr_newdst(ixa); 814 815 /* 816 * When we set a tunnel's destination address, we do not 817 * care if the destination is reachable. Transient routing 818 * issues should not inhibit the creation of a tunnel 819 * interface, for example. Thus we pass B_FALSE here. 820 */ 821 connp->conn_saddr_v6 = connp->conn_laddr_v6; 822 mutex_exit(&connp->conn_lock); 823 824 /* As long as the MTU is large we avoid fragmentation */ 825 ixa->ixa_flags |= IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF; 826 827 /* We handle IPsec in iptun_output_common */ 828 error = ip_attr_connect(connp, ixa, &connp->conn_saddr_v6, 829 &connp->conn_faddr_v6, &connp->conn_faddr_v6, 0, 830 &connp->conn_saddr_v6, &uinfo, 0); 831 832 if (error != 0) 833 goto done; 834 835 /* saddr shouldn't change since it was already set */ 836 ASSERT(IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6, 837 &connp->conn_saddr_v6)); 838 839 /* We set IXAF_VERIFY_PMTU to catch PMTU increases */ 840 ixa->ixa_flags |= IXAF_VERIFY_PMTU; 841 ASSERT(uinfo.iulp_mtu != 0); 842 843 /* 844 * Allow setting new policies. 845 * The addresses/ports are already set, thus the IPsec policy calls 846 * can handle their passed-in conn's. 847 */ 848 connp->conn_policy_cached = B_FALSE; 849 850 insert: 851 error = ipcl_conn_insert(connp); 852 if (error != 0) 853 goto done; 854 855 /* Record this as the "last" send even though we haven't sent any */ 856 connp->conn_v6lastdst = connp->conn_faddr_v6; 857 858 iptun->iptun_flags |= IPTUN_BOUND; 859 /* 860 * Now that we're bound with ip below us, this is a good 861 * time to initialize the destination path MTU and to 862 * re-calculate the tunnel's link MTU. 863 */ 864 (void) iptun_update_mtu(iptun, ixa, 0); 865 866 if (IS_IPTUN_RUNNING(iptun)) 867 iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE); 868 869 done: 870 ixa_refrele(ixa); 871 return (error); 872 } 873 874 static void 875 iptun_unbind(iptun_t *iptun) 876 { 877 ASSERT(iptun->iptun_flags & IPTUN_BOUND); 878 ASSERT(mutex_owned(&iptun->iptun_lock) || 879 (iptun->iptun_flags & IPTUN_CONDEMNED)); 880 ip_unbind(iptun->iptun_connp); 881 iptun->iptun_flags &= ~IPTUN_BOUND; 882 if (!(iptun->iptun_flags & IPTUN_CONDEMNED)) 883 iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE); 884 } 885 886 /* 887 * Re-generate the template data-link header for a given IP tunnel given the 888 * tunnel's current parameters. 889 */ 890 static void 891 iptun_headergen(iptun_t *iptun, boolean_t update_mac) 892 { 893 switch (iptun->iptun_typeinfo->iti_ipvers) { 894 case IPV4_VERSION: 895 /* 896 * We only need to use a custom IP header if the administrator 897 * has supplied a non-default hoplimit. 898 */ 899 if (iptun->iptun_hoplimit == IPTUN_DEFAULT_HOPLIMIT) { 900 iptun->iptun_header_size = 0; 901 break; 902 } 903 iptun->iptun_header_size = sizeof (ipha_t); 904 iptun->iptun_header4.ipha_version_and_hdr_length = 905 IP_SIMPLE_HDR_VERSION; 906 iptun->iptun_header4.ipha_fragment_offset_and_flags = 907 htons(IPH_DF); 908 iptun->iptun_header4.ipha_ttl = iptun->iptun_hoplimit; 909 break; 910 case IPV6_VERSION: { 911 ip6_t *ip6hp = &iptun->iptun_header6.it6h_ip6h; 912 913 /* 914 * We only need to use a custom IPv6 header if either the 915 * administrator has supplied a non-default hoplimit, or we 916 * need to include an encapsulation limit option in the outer 917 * header. 918 */ 919 if (iptun->iptun_hoplimit == IPTUN_DEFAULT_HOPLIMIT && 920 iptun->iptun_encaplimit == 0) { 921 iptun->iptun_header_size = 0; 922 break; 923 } 924 925 (void) memset(ip6hp, 0, sizeof (*ip6hp)); 926 if (iptun->iptun_encaplimit == 0) { 927 iptun->iptun_header_size = sizeof (ip6_t); 928 ip6hp->ip6_nxt = IPPROTO_NONE; 929 } else { 930 iptun_encaplim_t *iel; 931 932 iptun->iptun_header_size = sizeof (iptun_ipv6hdrs_t); 933 /* 934 * The mac_ipv6 plugin requires ip6_plen to be in host 935 * byte order and reflect the extension headers 936 * present in the template. The actual network byte 937 * order ip6_plen will be set on a per-packet basis on 938 * transmit. 939 */ 940 ip6hp->ip6_plen = sizeof (*iel); 941 ip6hp->ip6_nxt = IPPROTO_DSTOPTS; 942 iel = &iptun->iptun_header6.it6h_encaplim; 943 *iel = iptun_encaplim_init; 944 iel->iel_telopt.ip6ot_encap_limit = 945 iptun->iptun_encaplimit; 946 } 947 948 ip6hp->ip6_hlim = iptun->iptun_hoplimit; 949 break; 950 } 951 } 952 953 if (update_mac) 954 iptun_task_dispatch(iptun, IPTUN_TASK_PDATA_UPDATE); 955 } 956 957 /* 958 * Insert inbound and outbound IPv4 and IPv6 policy into the given policy 959 * head. 960 */ 961 static boolean_t 962 iptun_insert_simple_policies(ipsec_policy_head_t *ph, ipsec_act_t *actp, 963 uint_t n, netstack_t *ns) 964 { 965 int f = IPSEC_AF_V4; 966 967 if (!ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_INBOUND, ns) || 968 !ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_OUTBOUND, ns)) 969 return (B_FALSE); 970 971 f = IPSEC_AF_V6; 972 return (ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_INBOUND, ns) && 973 ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_OUTBOUND, ns)); 974 } 975 976 /* 977 * Used to set IPsec policy when policy is set through the IPTUN_CREATE or 978 * IPTUN_MODIFY ioctls. 979 */ 980 static int 981 iptun_set_sec_simple(iptun_t *iptun, const ipsec_req_t *ipsr) 982 { 983 int rc = 0; 984 uint_t nact; 985 ipsec_act_t *actp = NULL; 986 boolean_t clear_all, old_policy = B_FALSE; 987 ipsec_tun_pol_t *itp; 988 char name[MAXLINKNAMELEN]; 989 uint64_t gen; 990 netstack_t *ns = iptun->iptun_ns; 991 992 /* Can't specify self-encap on a tunnel. */ 993 if (ipsr->ipsr_self_encap_req != 0) 994 return (EINVAL); 995 996 /* 997 * If it's a "clear-all" entry, unset the security flags and resume 998 * normal cleartext (or inherit-from-global) policy. 999 */ 1000 clear_all = ((ipsr->ipsr_ah_req & IPTUN_IPSEC_REQ_MASK) == 0 && 1001 (ipsr->ipsr_esp_req & IPTUN_IPSEC_REQ_MASK) == 0); 1002 1003 ASSERT(mutex_owned(&iptun->iptun_lock)); 1004 itp = iptun->iptun_itp; 1005 if (itp == NULL) { 1006 if (clear_all) 1007 goto bail; 1008 if ((rc = dls_mgmt_get_linkinfo(iptun->iptun_linkid, name, NULL, 1009 NULL, NULL)) != 0) 1010 goto bail; 1011 ASSERT(name[0] != '\0'); 1012 if ((itp = create_tunnel_policy(name, &rc, &gen, ns)) == NULL) 1013 goto bail; 1014 iptun->iptun_itp = itp; 1015 } 1016 1017 /* Allocate the actvec now, before holding itp or polhead locks. */ 1018 ipsec_actvec_from_req(ipsr, &actp, &nact, ns); 1019 if (actp == NULL) { 1020 rc = ENOMEM; 1021 goto bail; 1022 } 1023 1024 /* 1025 * Just write on the active polhead. Save the primary/secondary stuff 1026 * for spdsock operations. 1027 * 1028 * Mutex because we need to write to the polhead AND flags atomically. 1029 * Other threads will acquire the polhead lock as a reader if the 1030 * (unprotected) flag is set. 1031 */ 1032 mutex_enter(&itp->itp_lock); 1033 if (itp->itp_flags & ITPF_P_TUNNEL) { 1034 /* Oops, we lost a race. Let's get out of here. */ 1035 rc = EBUSY; 1036 goto mutex_bail; 1037 } 1038 old_policy = ((itp->itp_flags & ITPF_P_ACTIVE) != 0); 1039 1040 if (old_policy) { 1041 ITPF_CLONE(itp->itp_flags); 1042 rc = ipsec_copy_polhead(itp->itp_policy, itp->itp_inactive, ns); 1043 if (rc != 0) { 1044 /* inactive has already been cleared. */ 1045 itp->itp_flags &= ~ITPF_IFLAGS; 1046 goto mutex_bail; 1047 } 1048 rw_enter(&itp->itp_policy->iph_lock, RW_WRITER); 1049 ipsec_polhead_flush(itp->itp_policy, ns); 1050 } else { 1051 /* Else assume itp->itp_policy is already flushed. */ 1052 rw_enter(&itp->itp_policy->iph_lock, RW_WRITER); 1053 } 1054 1055 if (clear_all) { 1056 ASSERT(avl_numnodes(&itp->itp_policy->iph_rulebyid) == 0); 1057 itp->itp_flags &= ~ITPF_PFLAGS; 1058 rw_exit(&itp->itp_policy->iph_lock); 1059 old_policy = B_FALSE; /* Clear out the inactive one too. */ 1060 goto recover_bail; 1061 } 1062 1063 if (iptun_insert_simple_policies(itp->itp_policy, actp, nact, ns)) { 1064 rw_exit(&itp->itp_policy->iph_lock); 1065 /* 1066 * Adjust MTU and make sure the DL side knows what's up. 1067 */ 1068 itp->itp_flags = ITPF_P_ACTIVE; 1069 (void) iptun_update_mtu(iptun, NULL, 0); 1070 old_policy = B_FALSE; /* Blank out inactive - we succeeded */ 1071 } else { 1072 rw_exit(&itp->itp_policy->iph_lock); 1073 rc = ENOMEM; 1074 } 1075 1076 recover_bail: 1077 if (old_policy) { 1078 /* Recover policy in in active polhead. */ 1079 ipsec_swap_policy(itp->itp_policy, itp->itp_inactive, ns); 1080 ITPF_SWAP(itp->itp_flags); 1081 } 1082 1083 /* Clear policy in inactive polhead. */ 1084 itp->itp_flags &= ~ITPF_IFLAGS; 1085 rw_enter(&itp->itp_inactive->iph_lock, RW_WRITER); 1086 ipsec_polhead_flush(itp->itp_inactive, ns); 1087 rw_exit(&itp->itp_inactive->iph_lock); 1088 1089 mutex_bail: 1090 mutex_exit(&itp->itp_lock); 1091 1092 bail: 1093 if (actp != NULL) 1094 ipsec_actvec_free(actp, nact); 1095 1096 return (rc); 1097 } 1098 1099 static iptun_typeinfo_t * 1100 iptun_gettypeinfo(iptun_type_t type) 1101 { 1102 int i; 1103 1104 for (i = 0; iptun_type_table[i].iti_type != IPTUN_TYPE_UNKNOWN; i++) { 1105 if (iptun_type_table[i].iti_type == type) 1106 break; 1107 } 1108 return (&iptun_type_table[i]); 1109 } 1110 1111 /* 1112 * Set the parameters included in ik on the tunnel iptun. Parameters that can 1113 * only be set at creation time are set in iptun_create(). 1114 */ 1115 static int 1116 iptun_setparams(iptun_t *iptun, const iptun_kparams_t *ik) 1117 { 1118 int err = 0; 1119 netstack_t *ns = iptun->iptun_ns; 1120 iptun_addr_t orig_laddr, orig_raddr; 1121 uint_t orig_flags = iptun->iptun_flags; 1122 1123 if (ik->iptun_kparam_flags & IPTUN_KPARAM_LADDR) { 1124 if (orig_flags & IPTUN_LADDR) 1125 orig_laddr = iptun->iptun_laddr; 1126 if ((err = iptun_setladdr(iptun, &ik->iptun_kparam_laddr)) != 0) 1127 return (err); 1128 iptun->iptun_flags |= IPTUN_LADDR; 1129 } 1130 1131 if (ik->iptun_kparam_flags & IPTUN_KPARAM_RADDR) { 1132 if (orig_flags & IPTUN_RADDR) 1133 orig_raddr = iptun->iptun_raddr; 1134 if ((err = iptun_setraddr(iptun, &ik->iptun_kparam_raddr)) != 0) 1135 goto done; 1136 iptun->iptun_flags |= IPTUN_RADDR; 1137 } 1138 1139 if (ik->iptun_kparam_flags & IPTUN_KPARAM_SECINFO) { 1140 /* 1141 * Set IPsec policy originating from the ifconfig(1M) command 1142 * line. This is traditionally called "simple" policy because 1143 * the ipsec_req_t (iptun_kparam_secinfo) can only describe a 1144 * simple policy of "do ESP on everything" and/or "do AH on 1145 * everything" (as opposed to the rich policy that can be 1146 * defined with ipsecconf(1M)). 1147 */ 1148 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_6TO4) { 1149 /* 1150 * Can't set security properties for automatic 1151 * tunnels. 1152 */ 1153 err = EINVAL; 1154 goto done; 1155 } 1156 1157 if (!ipsec_loaded(ns->netstack_ipsec)) { 1158 /* If IPsec can be loaded, try and load it now. */ 1159 if (ipsec_failed(ns->netstack_ipsec)) { 1160 err = EPROTONOSUPPORT; 1161 goto done; 1162 } 1163 ipsec_loader_loadnow(ns->netstack_ipsec); 1164 /* 1165 * ipsec_loader_loadnow() returns while IPsec is 1166 * loaded asynchronously. While a method exists to 1167 * wait for IPsec to load (ipsec_loader_wait()), it 1168 * requires use of a STREAMS queue to do a qwait(). 1169 * We're not in STREAMS context here, and so we can't 1170 * use it. This is not a problem in practice because 1171 * in the vast majority of cases, key management and 1172 * global policy will have loaded before any tunnels 1173 * are plumbed, and so IPsec will already have been 1174 * loaded. 1175 */ 1176 err = EAGAIN; 1177 goto done; 1178 } 1179 1180 err = iptun_set_sec_simple(iptun, &ik->iptun_kparam_secinfo); 1181 if (err == 0) { 1182 iptun->iptun_flags |= IPTUN_SIMPLE_POLICY; 1183 iptun->iptun_simple_policy = ik->iptun_kparam_secinfo; 1184 } 1185 } 1186 done: 1187 if (err != 0) { 1188 /* Restore original source and destination. */ 1189 if (ik->iptun_kparam_flags & IPTUN_KPARAM_LADDR && 1190 (orig_flags & IPTUN_LADDR)) 1191 iptun->iptun_laddr = orig_laddr; 1192 if ((ik->iptun_kparam_flags & IPTUN_KPARAM_RADDR) && 1193 (orig_flags & IPTUN_RADDR)) 1194 iptun->iptun_raddr = orig_raddr; 1195 iptun->iptun_flags = orig_flags; 1196 } 1197 return (err); 1198 } 1199 1200 static int 1201 iptun_register(iptun_t *iptun) 1202 { 1203 mac_register_t *mac; 1204 int err; 1205 1206 ASSERT(!(iptun->iptun_flags & IPTUN_MAC_REGISTERED)); 1207 1208 if ((mac = mac_alloc(MAC_VERSION)) == NULL) 1209 return (EINVAL); 1210 1211 mac->m_type_ident = iptun->iptun_typeinfo->iti_ident; 1212 mac->m_driver = iptun; 1213 mac->m_dip = iptun_dip; 1214 mac->m_instance = (uint_t)-1; 1215 mac->m_src_addr = (uint8_t *)&iptun->iptun_laddr.ia_addr; 1216 mac->m_dst_addr = iptun->iptun_typeinfo->iti_hasraddr ? 1217 (uint8_t *)&iptun->iptun_raddr.ia_addr : NULL; 1218 mac->m_callbacks = &iptun_m_callbacks; 1219 mac->m_min_sdu = iptun->iptun_typeinfo->iti_minmtu; 1220 mac->m_max_sdu = iptun->iptun_mtu; 1221 if (iptun->iptun_header_size != 0) { 1222 mac->m_pdata = &iptun->iptun_header; 1223 mac->m_pdata_size = iptun->iptun_header_size; 1224 } 1225 if ((err = mac_register(mac, &iptun->iptun_mh)) == 0) 1226 iptun->iptun_flags |= IPTUN_MAC_REGISTERED; 1227 mac_free(mac); 1228 return (err); 1229 } 1230 1231 static int 1232 iptun_unregister(iptun_t *iptun) 1233 { 1234 int err; 1235 1236 ASSERT(iptun->iptun_flags & IPTUN_MAC_REGISTERED); 1237 if ((err = mac_unregister(iptun->iptun_mh)) == 0) 1238 iptun->iptun_flags &= ~IPTUN_MAC_REGISTERED; 1239 return (err); 1240 } 1241 1242 static conn_t * 1243 iptun_conn_create(iptun_t *iptun, netstack_t *ns, cred_t *credp) 1244 { 1245 conn_t *connp; 1246 1247 if ((connp = ipcl_conn_create(IPCL_IPCCONN, KM_NOSLEEP, ns)) == NULL) 1248 return (NULL); 1249 1250 connp->conn_flags |= IPCL_IPTUN; 1251 connp->conn_iptun = iptun; 1252 connp->conn_recv = iptun_input; 1253 connp->conn_recvicmp = iptun_input_icmp; 1254 connp->conn_verifyicmp = iptun_verifyicmp; 1255 1256 /* 1257 * Register iptun_notify to listen to capability changes detected by IP. 1258 * This upcall is made in the context of the call to conn_ip_output. 1259 */ 1260 connp->conn_ixa->ixa_notify = iptun_notify; 1261 connp->conn_ixa->ixa_notify_cookie = iptun; 1262 1263 /* 1264 * For exclusive stacks we set conn_zoneid to GLOBAL_ZONEID as is done 1265 * for all other conn_t's. 1266 * 1267 * Note that there's an important distinction between iptun_zoneid and 1268 * conn_zoneid. The conn_zoneid is set to GLOBAL_ZONEID in non-global 1269 * exclusive stack zones to make the ip module believe that the 1270 * non-global zone is actually a global zone. Therefore, when 1271 * interacting with the ip module, we must always use conn_zoneid. 1272 */ 1273 connp->conn_zoneid = (ns->netstack_stackid == GLOBAL_NETSTACKID) ? 1274 crgetzoneid(credp) : GLOBAL_ZONEID; 1275 connp->conn_cred = credp; 1276 /* crfree() is done in ipcl_conn_destroy(), called by CONN_DEC_REF() */ 1277 crhold(connp->conn_cred); 1278 connp->conn_cpid = NOPID; 1279 1280 /* conn_allzones can not be set this early, hence no IPCL_ZONEID */ 1281 connp->conn_ixa->ixa_zoneid = connp->conn_zoneid; 1282 ASSERT(connp->conn_ref == 1); 1283 1284 /* Cache things in ixa without an extra refhold */ 1285 connp->conn_ixa->ixa_cred = connp->conn_cred; 1286 connp->conn_ixa->ixa_cpid = connp->conn_cpid; 1287 if (is_system_labeled()) 1288 connp->conn_ixa->ixa_tsl = crgetlabel(connp->conn_cred); 1289 1290 /* 1291 * Have conn_ip_output drop packets should our outer source 1292 * go invalid 1293 */ 1294 connp->conn_ixa->ixa_flags |= IXAF_VERIFY_SOURCE; 1295 1296 switch (iptun->iptun_typeinfo->iti_ipvers) { 1297 case IPV4_VERSION: 1298 connp->conn_family = AF_INET6; 1299 break; 1300 case IPV6_VERSION: 1301 connp->conn_family = AF_INET; 1302 break; 1303 } 1304 mutex_enter(&connp->conn_lock); 1305 connp->conn_state_flags &= ~CONN_INCIPIENT; 1306 mutex_exit(&connp->conn_lock); 1307 return (connp); 1308 } 1309 1310 static void 1311 iptun_conn_destroy(conn_t *connp) 1312 { 1313 ip_quiesce_conn(connp); 1314 connp->conn_iptun = NULL; 1315 ASSERT(connp->conn_ref == 1); 1316 CONN_DEC_REF(connp); 1317 } 1318 1319 static iptun_t * 1320 iptun_alloc(void) 1321 { 1322 iptun_t *iptun; 1323 1324 if ((iptun = kmem_cache_alloc(iptun_cache, KM_NOSLEEP)) != NULL) { 1325 bzero(iptun, sizeof (*iptun)); 1326 atomic_inc_32(&iptun_tunnelcount); 1327 } 1328 return (iptun); 1329 } 1330 1331 static void 1332 iptun_free(iptun_t *iptun) 1333 { 1334 ASSERT(iptun->iptun_flags & IPTUN_CONDEMNED); 1335 1336 if (iptun->iptun_flags & IPTUN_HASH_INSERTED) { 1337 iptun_stack_t *iptuns = iptun->iptun_iptuns; 1338 1339 mutex_enter(&iptun_hash_lock); 1340 VERIFY(mod_hash_remove(iptun_hash, 1341 IPTUN_HASH_KEY(iptun->iptun_linkid), 1342 (mod_hash_val_t *)&iptun) == 0); 1343 mutex_exit(&iptun_hash_lock); 1344 iptun->iptun_flags &= ~IPTUN_HASH_INSERTED; 1345 mutex_enter(&iptuns->iptuns_lock); 1346 list_remove(&iptuns->iptuns_iptunlist, iptun); 1347 mutex_exit(&iptuns->iptuns_lock); 1348 } 1349 1350 if (iptun->iptun_flags & IPTUN_BOUND) 1351 iptun_unbind(iptun); 1352 1353 /* 1354 * After iptun_unregister(), there will be no threads executing a 1355 * downcall from the mac module, including in the tx datapath. 1356 */ 1357 if (iptun->iptun_flags & IPTUN_MAC_REGISTERED) 1358 VERIFY(iptun_unregister(iptun) == 0); 1359 1360 if (iptun->iptun_itp != NULL) { 1361 /* 1362 * Remove from the AVL tree, AND release the reference iptun_t 1363 * itself holds on the ITP. 1364 */ 1365 itp_unlink(iptun->iptun_itp, iptun->iptun_ns); 1366 ITP_REFRELE(iptun->iptun_itp, iptun->iptun_ns); 1367 iptun->iptun_itp = NULL; 1368 iptun->iptun_flags &= ~IPTUN_SIMPLE_POLICY; 1369 } 1370 1371 /* 1372 * After ipcl_conn_destroy(), there will be no threads executing an 1373 * upcall from ip (i.e., iptun_input()), and it is then safe to free 1374 * the iptun_t. 1375 */ 1376 if (iptun->iptun_connp != NULL) { 1377 iptun_conn_destroy(iptun->iptun_connp); 1378 iptun->iptun_connp = NULL; 1379 } 1380 1381 kmem_cache_free(iptun_cache, iptun); 1382 atomic_dec_32(&iptun_tunnelcount); 1383 } 1384 1385 int 1386 iptun_create(iptun_kparams_t *ik, cred_t *credp) 1387 { 1388 iptun_t *iptun = NULL; 1389 int err = 0, mherr; 1390 char linkname[MAXLINKNAMELEN]; 1391 ipsec_tun_pol_t *itp; 1392 netstack_t *ns = NULL; 1393 iptun_stack_t *iptuns; 1394 datalink_id_t tmpid; 1395 zoneid_t zoneid = crgetzoneid(credp); 1396 boolean_t link_created = B_FALSE; 1397 1398 /* The tunnel type is mandatory */ 1399 if (!(ik->iptun_kparam_flags & IPTUN_KPARAM_TYPE)) 1400 return (EINVAL); 1401 1402 /* 1403 * Is the linkid that the caller wishes to associate with this new 1404 * tunnel assigned to this zone? 1405 */ 1406 if (zone_check_datalink(&zoneid, ik->iptun_kparam_linkid) != 0) { 1407 if (zoneid != GLOBAL_ZONEID) 1408 return (EINVAL); 1409 } else if (zoneid == GLOBAL_ZONEID) { 1410 return (EINVAL); 1411 } 1412 1413 /* 1414 * Make sure that we're not trying to create a tunnel that has already 1415 * been created. 1416 */ 1417 if (iptun_enter_by_linkid(ik->iptun_kparam_linkid, &iptun) == 0) { 1418 iptun_exit(iptun); 1419 iptun = NULL; 1420 err = EEXIST; 1421 goto done; 1422 } 1423 1424 ns = netstack_find_by_cred(credp); 1425 iptuns = ns->netstack_iptun; 1426 1427 if ((iptun = iptun_alloc()) == NULL) { 1428 err = ENOMEM; 1429 goto done; 1430 } 1431 1432 iptun->iptun_linkid = ik->iptun_kparam_linkid; 1433 iptun->iptun_zoneid = zoneid; 1434 iptun->iptun_ns = ns; 1435 1436 iptun->iptun_typeinfo = iptun_gettypeinfo(ik->iptun_kparam_type); 1437 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_UNKNOWN) { 1438 err = EINVAL; 1439 goto done; 1440 } 1441 1442 if (ik->iptun_kparam_flags & IPTUN_KPARAM_IMPLICIT) 1443 iptun->iptun_flags |= IPTUN_IMPLICIT; 1444 1445 if ((err = iptun_setparams(iptun, ik)) != 0) 1446 goto done; 1447 1448 iptun->iptun_hoplimit = IPTUN_DEFAULT_HOPLIMIT; 1449 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_IPV6) 1450 iptun->iptun_encaplimit = IPTUN_DEFAULT_ENCAPLIMIT; 1451 1452 iptun_headergen(iptun, B_FALSE); 1453 1454 iptun->iptun_connp = iptun_conn_create(iptun, ns, credp); 1455 if (iptun->iptun_connp == NULL) { 1456 err = ENOMEM; 1457 goto done; 1458 } 1459 1460 iptun->iptun_mtu = iptun->iptun_typeinfo->iti_maxmtu; 1461 iptun->iptun_dpmtu = iptun->iptun_mtu; 1462 1463 /* 1464 * Find an ITP based on linkname. If we have parms already set via 1465 * the iptun_setparams() call above, it may have created an ITP for 1466 * us. We always try get_tunnel_policy() for DEBUG correctness 1467 * checks, and we may wish to refactor this to only check when 1468 * iptun_itp is NULL. 1469 */ 1470 if ((err = dls_mgmt_get_linkinfo(iptun->iptun_linkid, linkname, NULL, 1471 NULL, NULL)) != 0) 1472 goto done; 1473 if ((itp = get_tunnel_policy(linkname, ns)) != NULL) 1474 iptun->iptun_itp = itp; 1475 1476 /* 1477 * See if we have the necessary IP addresses assigned to this tunnel 1478 * to try and bind them with ip underneath us. If we're not ready to 1479 * bind yet, then we'll defer the bind operation until the addresses 1480 * are modified. 1481 */ 1482 if (iptun_canbind(iptun) && ((err = iptun_bind(iptun)) != 0)) 1483 goto done; 1484 1485 if ((err = iptun_register(iptun)) != 0) 1486 goto done; 1487 1488 err = dls_devnet_create(iptun->iptun_mh, iptun->iptun_linkid, 1489 iptun->iptun_zoneid); 1490 if (err != 0) 1491 goto done; 1492 link_created = B_TRUE; 1493 1494 /* 1495 * We hash by link-id as that is the key used by all other iptun 1496 * interfaces (modify, delete, etc.). 1497 */ 1498 if ((mherr = mod_hash_insert(iptun_hash, 1499 IPTUN_HASH_KEY(iptun->iptun_linkid), (mod_hash_val_t)iptun)) == 0) { 1500 mutex_enter(&iptuns->iptuns_lock); 1501 list_insert_head(&iptuns->iptuns_iptunlist, iptun); 1502 mutex_exit(&iptuns->iptuns_lock); 1503 iptun->iptun_flags |= IPTUN_HASH_INSERTED; 1504 } else if (mherr == MH_ERR_NOMEM) { 1505 err = ENOMEM; 1506 } else if (mherr == MH_ERR_DUPLICATE) { 1507 err = EEXIST; 1508 } else { 1509 err = EINVAL; 1510 } 1511 1512 done: 1513 if (iptun == NULL && ns != NULL) 1514 netstack_rele(ns); 1515 if (err != 0 && iptun != NULL) { 1516 if (link_created) { 1517 (void) dls_devnet_destroy(iptun->iptun_mh, &tmpid, 1518 B_TRUE); 1519 } 1520 iptun->iptun_flags |= IPTUN_CONDEMNED; 1521 iptun_free(iptun); 1522 } 1523 return (err); 1524 } 1525 1526 int 1527 iptun_delete(datalink_id_t linkid, cred_t *credp) 1528 { 1529 int err; 1530 iptun_t *iptun = NULL; 1531 1532 if ((err = iptun_enter_by_linkid(linkid, &iptun)) != 0) 1533 return (err); 1534 1535 /* One cannot delete a tunnel that belongs to another zone. */ 1536 if (iptun->iptun_zoneid != crgetzoneid(credp)) { 1537 iptun_exit(iptun); 1538 return (EACCES); 1539 } 1540 1541 /* 1542 * We need to exit iptun in order to issue calls up the stack such as 1543 * dls_devnet_destroy(). If we call up while still in iptun, deadlock 1544 * with calls coming down the stack is possible. We prevent other 1545 * threads from entering this iptun after we've exited it by setting 1546 * the IPTUN_DELETE_PENDING flag. This will cause callers of 1547 * iptun_enter() to block waiting on iptun_enter_cv. The assumption 1548 * here is that the functions we're calling while IPTUN_DELETE_PENDING 1549 * is set dont resuult in an iptun_enter() call, as that would result 1550 * in deadlock. 1551 */ 1552 iptun->iptun_flags |= IPTUN_DELETE_PENDING; 1553 1554 /* Wait for any pending upcall to the mac module to complete. */ 1555 while (iptun->iptun_flags & IPTUN_UPCALL_PENDING) 1556 cv_wait(&iptun->iptun_upcall_cv, &iptun->iptun_lock); 1557 1558 iptun_exit(iptun); 1559 1560 if ((err = dls_devnet_destroy(iptun->iptun_mh, &linkid, B_TRUE)) == 0) { 1561 /* 1562 * mac_disable() will fail with EBUSY if there are references 1563 * to the iptun MAC. If there are none, then mac_disable() 1564 * will assure that none can be acquired until the MAC is 1565 * unregistered. 1566 * 1567 * XXX CR 6791335 prevents us from calling mac_disable() prior 1568 * to dls_devnet_destroy(), so we unfortunately need to 1569 * attempt to re-create the devnet node if mac_disable() 1570 * fails. 1571 */ 1572 if ((err = mac_disable(iptun->iptun_mh)) != 0) { 1573 (void) dls_devnet_create(iptun->iptun_mh, linkid, 1574 iptun->iptun_zoneid); 1575 } 1576 } 1577 1578 /* 1579 * Now that we know the fate of this iptun_t, we need to clear 1580 * IPTUN_DELETE_PENDING, and set IPTUN_CONDEMNED if the iptun_t is 1581 * slated to be freed. Either way, we need to signal the threads 1582 * waiting in iptun_enter() so that they can either fail if 1583 * IPTUN_CONDEMNED is set, or continue if it's not. 1584 */ 1585 mutex_enter(&iptun->iptun_lock); 1586 iptun->iptun_flags &= ~IPTUN_DELETE_PENDING; 1587 if (err == 0) 1588 iptun->iptun_flags |= IPTUN_CONDEMNED; 1589 cv_broadcast(&iptun->iptun_enter_cv); 1590 mutex_exit(&iptun->iptun_lock); 1591 1592 /* 1593 * Note that there is no danger in calling iptun_free() after having 1594 * dropped the iptun_lock since callers of iptun_enter() at this point 1595 * are doing so from iptun_enter_by_linkid() (mac_disable() got rid of 1596 * threads entering from mac callbacks which call iptun_enter() 1597 * directly) which holds iptun_hash_lock, and iptun_free() grabs this 1598 * lock in order to remove the iptun_t from the hash table. 1599 */ 1600 if (err == 0) 1601 iptun_free(iptun); 1602 1603 return (err); 1604 } 1605 1606 int 1607 iptun_modify(const iptun_kparams_t *ik, cred_t *credp) 1608 { 1609 iptun_t *iptun; 1610 boolean_t laddr_change = B_FALSE, raddr_change = B_FALSE; 1611 int err; 1612 1613 if ((err = iptun_enter_by_linkid(ik->iptun_kparam_linkid, &iptun)) != 0) 1614 return (err); 1615 1616 /* One cannot modify a tunnel that belongs to another zone. */ 1617 if (iptun->iptun_zoneid != crgetzoneid(credp)) { 1618 err = EACCES; 1619 goto done; 1620 } 1621 1622 /* The tunnel type cannot be changed */ 1623 if (ik->iptun_kparam_flags & IPTUN_KPARAM_TYPE) { 1624 err = EINVAL; 1625 goto done; 1626 } 1627 1628 if ((err = iptun_setparams(iptun, ik)) != 0) 1629 goto done; 1630 iptun_headergen(iptun, B_FALSE); 1631 1632 /* 1633 * If any of the tunnel's addresses has been modified and the tunnel 1634 * has the necessary addresses assigned to it, we need to try to bind 1635 * with ip underneath us. If we're not ready to bind yet, then we'll 1636 * try again when the addresses are modified later. 1637 */ 1638 laddr_change = (ik->iptun_kparam_flags & IPTUN_KPARAM_LADDR); 1639 raddr_change = (ik->iptun_kparam_flags & IPTUN_KPARAM_RADDR); 1640 if (laddr_change || raddr_change) { 1641 if (iptun->iptun_flags & IPTUN_BOUND) 1642 iptun_unbind(iptun); 1643 if (iptun_canbind(iptun) && (err = iptun_bind(iptun)) != 0) { 1644 if (laddr_change) 1645 iptun->iptun_flags &= ~IPTUN_LADDR; 1646 if (raddr_change) 1647 iptun->iptun_flags &= ~IPTUN_RADDR; 1648 goto done; 1649 } 1650 } 1651 1652 if (laddr_change) 1653 iptun_task_dispatch(iptun, IPTUN_TASK_LADDR_UPDATE); 1654 if (raddr_change) 1655 iptun_task_dispatch(iptun, IPTUN_TASK_RADDR_UPDATE); 1656 1657 done: 1658 iptun_exit(iptun); 1659 return (err); 1660 } 1661 1662 /* Given an IP tunnel's datalink id, fill in its parameters. */ 1663 int 1664 iptun_info(iptun_kparams_t *ik, cred_t *credp) 1665 { 1666 iptun_t *iptun; 1667 int err; 1668 1669 /* Is the tunnel link visible from the caller's zone? */ 1670 if (!dls_devnet_islinkvisible(ik->iptun_kparam_linkid, 1671 crgetzoneid(credp))) 1672 return (ENOENT); 1673 1674 if ((err = iptun_enter_by_linkid(ik->iptun_kparam_linkid, &iptun)) != 0) 1675 return (err); 1676 1677 bzero(ik, sizeof (iptun_kparams_t)); 1678 1679 ik->iptun_kparam_linkid = iptun->iptun_linkid; 1680 ik->iptun_kparam_type = iptun->iptun_typeinfo->iti_type; 1681 ik->iptun_kparam_flags |= IPTUN_KPARAM_TYPE; 1682 1683 if (iptun->iptun_flags & IPTUN_LADDR) { 1684 iptun_getaddr(&iptun->iptun_laddr, &ik->iptun_kparam_laddr); 1685 ik->iptun_kparam_flags |= IPTUN_KPARAM_LADDR; 1686 } 1687 if (iptun->iptun_flags & IPTUN_RADDR) { 1688 iptun_getaddr(&iptun->iptun_raddr, &ik->iptun_kparam_raddr); 1689 ik->iptun_kparam_flags |= IPTUN_KPARAM_RADDR; 1690 } 1691 1692 if (iptun->iptun_flags & IPTUN_IMPLICIT) 1693 ik->iptun_kparam_flags |= IPTUN_KPARAM_IMPLICIT; 1694 1695 if (iptun->iptun_itp != NULL) { 1696 mutex_enter(&iptun->iptun_itp->itp_lock); 1697 if (iptun->iptun_itp->itp_flags & ITPF_P_ACTIVE) { 1698 ik->iptun_kparam_flags |= IPTUN_KPARAM_IPSECPOL; 1699 if (iptun->iptun_flags & IPTUN_SIMPLE_POLICY) { 1700 ik->iptun_kparam_flags |= IPTUN_KPARAM_SECINFO; 1701 ik->iptun_kparam_secinfo = 1702 iptun->iptun_simple_policy; 1703 } 1704 } 1705 mutex_exit(&iptun->iptun_itp->itp_lock); 1706 } 1707 1708 done: 1709 iptun_exit(iptun); 1710 return (err); 1711 } 1712 1713 int 1714 iptun_set_6to4relay(netstack_t *ns, ipaddr_t relay_addr) 1715 { 1716 if (relay_addr == INADDR_BROADCAST || CLASSD(relay_addr)) 1717 return (EADDRNOTAVAIL); 1718 ns->netstack_iptun->iptuns_relay_rtr_addr = relay_addr; 1719 return (0); 1720 } 1721 1722 void 1723 iptun_get_6to4relay(netstack_t *ns, ipaddr_t *relay_addr) 1724 { 1725 *relay_addr = ns->netstack_iptun->iptuns_relay_rtr_addr; 1726 } 1727 1728 void 1729 iptun_set_policy(datalink_id_t linkid, ipsec_tun_pol_t *itp) 1730 { 1731 iptun_t *iptun; 1732 1733 if (iptun_enter_by_linkid(linkid, &iptun) != 0) 1734 return; 1735 if (iptun->iptun_itp != itp) { 1736 ASSERT(iptun->iptun_itp == NULL); 1737 ITP_REFHOLD(itp); 1738 iptun->iptun_itp = itp; 1739 } 1740 /* 1741 * IPsec policy means IPsec overhead, which means lower MTU. 1742 * Refresh the MTU for this tunnel. 1743 */ 1744 (void) iptun_update_mtu(iptun, NULL, 0); 1745 iptun_exit(iptun); 1746 } 1747 1748 /* 1749 * Obtain the path MTU to the tunnel destination. 1750 * Can return zero in some cases. 1751 */ 1752 static uint32_t 1753 iptun_get_dst_pmtu(iptun_t *iptun, ip_xmit_attr_t *ixa) 1754 { 1755 uint32_t pmtu = 0; 1756 conn_t *connp = iptun->iptun_connp; 1757 boolean_t need_rele = B_FALSE; 1758 1759 /* 1760 * We only obtain the pmtu for tunnels that have a remote tunnel 1761 * address. 1762 */ 1763 if (!(iptun->iptun_flags & IPTUN_RADDR)) 1764 return (0); 1765 1766 if (ixa == NULL) { 1767 ixa = conn_get_ixa(connp, B_FALSE); 1768 if (ixa == NULL) 1769 return (0); 1770 need_rele = B_TRUE; 1771 } 1772 /* 1773 * Guard against ICMP errors before we have sent, as well as against 1774 * and a thread which held conn_ixa. 1775 */ 1776 if (ixa->ixa_ire != NULL) { 1777 pmtu = ip_get_pmtu(ixa); 1778 1779 /* 1780 * For both IPv4 and IPv6 we can have indication that the outer 1781 * header needs fragmentation. 1782 */ 1783 if (ixa->ixa_flags & IXAF_PMTU_TOO_SMALL) { 1784 /* Must allow fragmentation in ip_output */ 1785 ixa->ixa_flags &= ~IXAF_DONTFRAG; 1786 } else if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_6TO4) { 1787 ixa->ixa_flags |= IXAF_DONTFRAG; 1788 } else { 1789 /* ip_get_pmtu might have set this - we don't want it */ 1790 ixa->ixa_flags &= ~IXAF_PMTU_IPV4_DF; 1791 } 1792 } 1793 1794 if (need_rele) 1795 ixa_refrele(ixa); 1796 return (pmtu); 1797 } 1798 1799 /* 1800 * Update the ip_xmit_attr_t to capture the current lower path mtu as known 1801 * by ip. 1802 */ 1803 static void 1804 iptun_update_dst_pmtu(iptun_t *iptun, ip_xmit_attr_t *ixa) 1805 { 1806 uint32_t pmtu; 1807 conn_t *connp = iptun->iptun_connp; 1808 boolean_t need_rele = B_FALSE; 1809 1810 /* IXAF_VERIFY_PMTU is not set if we don't have a fixed destination */ 1811 if (!(iptun->iptun_flags & IPTUN_RADDR)) 1812 return; 1813 1814 if (ixa == NULL) { 1815 ixa = conn_get_ixa(connp, B_FALSE); 1816 if (ixa == NULL) 1817 return; 1818 need_rele = B_TRUE; 1819 } 1820 /* 1821 * Guard against ICMP errors before we have sent, as well as against 1822 * and a thread which held conn_ixa. 1823 */ 1824 if (ixa->ixa_ire != NULL) { 1825 pmtu = ip_get_pmtu(ixa); 1826 /* 1827 * Update ixa_fragsize and ixa_pmtu. 1828 */ 1829 ixa->ixa_fragsize = ixa->ixa_pmtu = pmtu; 1830 1831 /* 1832 * For both IPv4 and IPv6 we can have indication that the outer 1833 * header needs fragmentation. 1834 */ 1835 if (ixa->ixa_flags & IXAF_PMTU_TOO_SMALL) { 1836 /* Must allow fragmentation in ip_output */ 1837 ixa->ixa_flags &= ~IXAF_DONTFRAG; 1838 } else if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_6TO4) { 1839 ixa->ixa_flags |= IXAF_DONTFRAG; 1840 } else { 1841 /* ip_get_pmtu might have set this - we don't want it */ 1842 ixa->ixa_flags &= ~IXAF_PMTU_IPV4_DF; 1843 } 1844 } 1845 1846 if (need_rele) 1847 ixa_refrele(ixa); 1848 } 1849 1850 /* 1851 * There is nothing that iptun can verify in addition to IP having 1852 * verified the IP addresses in the fanout. 1853 */ 1854 /* ARGSUSED */ 1855 static boolean_t 1856 iptun_verifyicmp(conn_t *connp, void *arg2, icmph_t *icmph, icmp6_t *icmp6, 1857 ip_recv_attr_t *ira) 1858 { 1859 return (B_TRUE); 1860 } 1861 1862 /* 1863 * Notify function registered with ip_xmit_attr_t. 1864 */ 1865 static void 1866 iptun_notify(void *arg, ip_xmit_attr_t *ixa, ixa_notify_type_t ntype, 1867 ixa_notify_arg_t narg) 1868 { 1869 iptun_t *iptun = (iptun_t *)arg; 1870 1871 switch (ntype) { 1872 case IXAN_PMTU: 1873 (void) iptun_update_mtu(iptun, ixa, narg); 1874 break; 1875 } 1876 } 1877 1878 /* 1879 * Returns the max of old_ovhd and the overhead associated with pol. 1880 */ 1881 static uint32_t 1882 iptun_max_policy_overhead(ipsec_policy_t *pol, uint32_t old_ovhd) 1883 { 1884 uint32_t new_ovhd = old_ovhd; 1885 1886 while (pol != NULL) { 1887 new_ovhd = max(new_ovhd, 1888 ipsec_act_ovhd(&pol->ipsp_act->ipa_act)); 1889 pol = pol->ipsp_hash.hash_next; 1890 } 1891 return (new_ovhd); 1892 } 1893 1894 static uint32_t 1895 iptun_get_ipsec_overhead(iptun_t *iptun) 1896 { 1897 ipsec_policy_root_t *ipr; 1898 ipsec_policy_head_t *iph; 1899 ipsec_policy_t *pol; 1900 ipsec_selector_t sel; 1901 int i; 1902 uint32_t ipsec_ovhd = 0; 1903 ipsec_tun_pol_t *itp = iptun->iptun_itp; 1904 netstack_t *ns = iptun->iptun_ns; 1905 1906 if (itp == NULL || !(itp->itp_flags & ITPF_P_ACTIVE)) { 1907 /* 1908 * Consult global policy, just in case. This will only work 1909 * if we have both source and destination addresses to work 1910 * with. 1911 */ 1912 if ((iptun->iptun_flags & (IPTUN_LADDR|IPTUN_RADDR)) != 1913 (IPTUN_LADDR|IPTUN_RADDR)) 1914 return (0); 1915 1916 iph = ipsec_system_policy(ns); 1917 bzero(&sel, sizeof (sel)); 1918 sel.ips_isv4 = 1919 (iptun->iptun_typeinfo->iti_ipvers == IPV4_VERSION); 1920 switch (iptun->iptun_typeinfo->iti_ipvers) { 1921 case IPV4_VERSION: 1922 sel.ips_local_addr_v4 = iptun->iptun_laddr4; 1923 sel.ips_remote_addr_v4 = iptun->iptun_raddr4; 1924 break; 1925 case IPV6_VERSION: 1926 sel.ips_local_addr_v6 = iptun->iptun_laddr6; 1927 sel.ips_remote_addr_v6 = iptun->iptun_raddr6; 1928 break; 1929 } 1930 /* Check for both IPv4 and IPv6. */ 1931 sel.ips_protocol = IPPROTO_ENCAP; 1932 pol = ipsec_find_policy_head(NULL, iph, IPSEC_TYPE_OUTBOUND, 1933 &sel); 1934 if (pol != NULL) { 1935 ipsec_ovhd = ipsec_act_ovhd(&pol->ipsp_act->ipa_act); 1936 IPPOL_REFRELE(pol); 1937 } 1938 sel.ips_protocol = IPPROTO_IPV6; 1939 pol = ipsec_find_policy_head(NULL, iph, IPSEC_TYPE_OUTBOUND, 1940 &sel); 1941 if (pol != NULL) { 1942 ipsec_ovhd = max(ipsec_ovhd, 1943 ipsec_act_ovhd(&pol->ipsp_act->ipa_act)); 1944 IPPOL_REFRELE(pol); 1945 } 1946 IPPH_REFRELE(iph, ns); 1947 } else { 1948 /* 1949 * Look through all of the possible IPsec actions for the 1950 * tunnel, and find the largest potential IPsec overhead. 1951 */ 1952 iph = itp->itp_policy; 1953 rw_enter(&iph->iph_lock, RW_READER); 1954 ipr = &(iph->iph_root[IPSEC_TYPE_OUTBOUND]); 1955 ipsec_ovhd = iptun_max_policy_overhead( 1956 ipr->ipr_nonhash[IPSEC_AF_V4], 0); 1957 ipsec_ovhd = iptun_max_policy_overhead( 1958 ipr->ipr_nonhash[IPSEC_AF_V6], ipsec_ovhd); 1959 for (i = 0; i < ipr->ipr_nchains; i++) { 1960 ipsec_ovhd = iptun_max_policy_overhead( 1961 ipr->ipr_hash[i].hash_head, ipsec_ovhd); 1962 } 1963 rw_exit(&iph->iph_lock); 1964 } 1965 1966 return (ipsec_ovhd); 1967 } 1968 1969 /* 1970 * Calculate and return the maximum possible upper MTU for the given tunnel. 1971 * 1972 * If new_pmtu is set then we also need to update the lower path MTU information 1973 * in the ip_xmit_attr_t. That is needed since we set IXAF_VERIFY_PMTU so that 1974 * we are notified by conn_ip_output() when the path MTU increases. 1975 */ 1976 static uint32_t 1977 iptun_get_maxmtu(iptun_t *iptun, ip_xmit_attr_t *ixa, uint32_t new_pmtu) 1978 { 1979 size_t header_size, ipsec_overhead; 1980 uint32_t maxmtu, pmtu; 1981 1982 /* 1983 * Start with the path-MTU to the remote address, which is either 1984 * provided as the new_pmtu argument, or obtained using 1985 * iptun_get_dst_pmtu(). 1986 */ 1987 if (new_pmtu != 0) { 1988 if (iptun->iptun_flags & IPTUN_RADDR) 1989 iptun->iptun_dpmtu = new_pmtu; 1990 pmtu = new_pmtu; 1991 } else if (iptun->iptun_flags & IPTUN_RADDR) { 1992 if ((pmtu = iptun_get_dst_pmtu(iptun, ixa)) == 0) { 1993 /* 1994 * We weren't able to obtain the path-MTU of the 1995 * destination. Use the previous value. 1996 */ 1997 pmtu = iptun->iptun_dpmtu; 1998 } else { 1999 iptun->iptun_dpmtu = pmtu; 2000 } 2001 } else { 2002 /* 2003 * We have no path-MTU information to go on, use the maximum 2004 * possible value. 2005 */ 2006 pmtu = iptun->iptun_typeinfo->iti_maxmtu; 2007 } 2008 2009 /* 2010 * Now calculate tunneling overhead and subtract that from the 2011 * path-MTU information obtained above. 2012 */ 2013 if (iptun->iptun_header_size != 0) { 2014 header_size = iptun->iptun_header_size; 2015 } else { 2016 switch (iptun->iptun_typeinfo->iti_ipvers) { 2017 case IPV4_VERSION: 2018 header_size = sizeof (ipha_t); 2019 if (is_system_labeled()) 2020 header_size += IP_MAX_OPT_LENGTH; 2021 break; 2022 case IPV6_VERSION: 2023 header_size = sizeof (iptun_ipv6hdrs_t); 2024 break; 2025 } 2026 } 2027 2028 ipsec_overhead = iptun_get_ipsec_overhead(iptun); 2029 2030 maxmtu = pmtu - (header_size + ipsec_overhead); 2031 return (max(maxmtu, iptun->iptun_typeinfo->iti_minmtu)); 2032 } 2033 2034 /* 2035 * Re-calculate the tunnel's MTU as seen from above and notify the MAC layer 2036 * of any change in MTU. The new_pmtu argument is the new lower path MTU to 2037 * the tunnel destination to be used in the tunnel MTU calculation. Passing 2038 * in 0 for new_pmtu causes the lower path MTU to be dynamically updated using 2039 * ip_get_pmtu(). 2040 * 2041 * If the calculated tunnel MTU is different than its previous value, then we 2042 * notify the MAC layer above us of this change using mac_maxsdu_update(). 2043 */ 2044 static uint32_t 2045 iptun_update_mtu(iptun_t *iptun, ip_xmit_attr_t *ixa, uint32_t new_pmtu) 2046 { 2047 uint32_t newmtu; 2048 2049 /* We always update the ixa since we might have set IXAF_VERIFY_PMTU */ 2050 iptun_update_dst_pmtu(iptun, ixa); 2051 2052 /* 2053 * We return the current MTU without updating it if it was pegged to a 2054 * static value using the MAC_PROP_MTU link property. 2055 */ 2056 if (iptun->iptun_flags & IPTUN_FIXED_MTU) 2057 return (iptun->iptun_mtu); 2058 2059 /* If the MTU isn't fixed, then use the maximum possible value. */ 2060 newmtu = iptun_get_maxmtu(iptun, ixa, new_pmtu); 2061 /* 2062 * We only dynamically adjust the tunnel MTU for tunnels with 2063 * destinations because dynamic MTU calculations are based on the 2064 * destination path-MTU. 2065 */ 2066 if ((iptun->iptun_flags & IPTUN_RADDR) && newmtu != iptun->iptun_mtu) { 2067 iptun->iptun_mtu = newmtu; 2068 if (iptun->iptun_flags & IPTUN_MAC_REGISTERED) 2069 iptun_task_dispatch(iptun, IPTUN_TASK_MTU_UPDATE); 2070 } 2071 2072 return (newmtu); 2073 } 2074 2075 /* 2076 * Frees a packet or packet chain and bumps stat for each freed packet. 2077 */ 2078 static void 2079 iptun_drop_pkt(mblk_t *mp, uint64_t *stat) 2080 { 2081 mblk_t *pktmp; 2082 2083 for (pktmp = mp; pktmp != NULL; pktmp = mp) { 2084 mp = mp->b_next; 2085 pktmp->b_next = NULL; 2086 if (stat != NULL) 2087 atomic_inc_64(stat); 2088 freemsg(pktmp); 2089 } 2090 } 2091 2092 /* 2093 * Allocate and return a new mblk to hold an IP and ICMP header, and chain the 2094 * original packet to its b_cont. Returns NULL on failure. 2095 */ 2096 static mblk_t * 2097 iptun_build_icmperr(size_t hdrs_size, mblk_t *orig_pkt) 2098 { 2099 mblk_t *icmperr_mp; 2100 2101 if ((icmperr_mp = allocb(hdrs_size, BPRI_MED)) != NULL) { 2102 icmperr_mp->b_wptr += hdrs_size; 2103 /* tack on the offending packet */ 2104 icmperr_mp->b_cont = orig_pkt; 2105 } 2106 return (icmperr_mp); 2107 } 2108 2109 /* 2110 * Transmit an ICMP error. mp->b_rptr points at the packet to be included in 2111 * the ICMP error. 2112 */ 2113 static void 2114 iptun_sendicmp_v4(iptun_t *iptun, icmph_t *icmp, ipha_t *orig_ipha, mblk_t *mp, 2115 ts_label_t *tsl) 2116 { 2117 size_t orig_pktsize, hdrs_size; 2118 mblk_t *icmperr_mp; 2119 ipha_t *new_ipha; 2120 icmph_t *new_icmp; 2121 ip_xmit_attr_t ixas; 2122 conn_t *connp = iptun->iptun_connp; 2123 2124 orig_pktsize = msgdsize(mp); 2125 hdrs_size = sizeof (ipha_t) + sizeof (icmph_t); 2126 if ((icmperr_mp = iptun_build_icmperr(hdrs_size, mp)) == NULL) { 2127 iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf); 2128 return; 2129 } 2130 2131 new_ipha = (ipha_t *)icmperr_mp->b_rptr; 2132 new_icmp = (icmph_t *)(new_ipha + 1); 2133 2134 new_ipha->ipha_version_and_hdr_length = IP_SIMPLE_HDR_VERSION; 2135 new_ipha->ipha_type_of_service = 0; 2136 new_ipha->ipha_ident = 0; 2137 new_ipha->ipha_fragment_offset_and_flags = 0; 2138 new_ipha->ipha_ttl = orig_ipha->ipha_ttl; 2139 new_ipha->ipha_protocol = IPPROTO_ICMP; 2140 new_ipha->ipha_src = orig_ipha->ipha_dst; 2141 new_ipha->ipha_dst = orig_ipha->ipha_src; 2142 new_ipha->ipha_hdr_checksum = 0; /* will be computed by ip */ 2143 new_ipha->ipha_length = htons(hdrs_size + orig_pktsize); 2144 2145 *new_icmp = *icmp; 2146 new_icmp->icmph_checksum = 0; 2147 new_icmp->icmph_checksum = IP_CSUM(icmperr_mp, sizeof (ipha_t), 0); 2148 2149 bzero(&ixas, sizeof (ixas)); 2150 ixas.ixa_flags = IXAF_BASIC_SIMPLE_V4; 2151 if (new_ipha->ipha_src == INADDR_ANY) 2152 ixas.ixa_flags |= IXAF_SET_SOURCE; 2153 2154 ixas.ixa_zoneid = IPCL_ZONEID(connp); 2155 ixas.ixa_ipst = connp->conn_netstack->netstack_ip; 2156 ixas.ixa_cred = connp->conn_cred; 2157 ixas.ixa_cpid = NOPID; 2158 if (is_system_labeled()) 2159 ixas.ixa_tsl = tsl; 2160 2161 ixas.ixa_ifindex = 0; 2162 ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 2163 2164 (void) ip_output_simple(icmperr_mp, &ixas); 2165 ixa_cleanup(&ixas); 2166 } 2167 2168 static void 2169 iptun_sendicmp_v6(iptun_t *iptun, icmp6_t *icmp6, ip6_t *orig_ip6h, mblk_t *mp, 2170 ts_label_t *tsl) 2171 { 2172 size_t orig_pktsize, hdrs_size; 2173 mblk_t *icmp6err_mp; 2174 ip6_t *new_ip6h; 2175 icmp6_t *new_icmp6; 2176 ip_xmit_attr_t ixas; 2177 conn_t *connp = iptun->iptun_connp; 2178 2179 orig_pktsize = msgdsize(mp); 2180 hdrs_size = sizeof (ip6_t) + sizeof (icmp6_t); 2181 if ((icmp6err_mp = iptun_build_icmperr(hdrs_size, mp)) == NULL) { 2182 iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf); 2183 return; 2184 } 2185 2186 new_ip6h = (ip6_t *)icmp6err_mp->b_rptr; 2187 new_icmp6 = (icmp6_t *)(new_ip6h + 1); 2188 2189 new_ip6h->ip6_vcf = orig_ip6h->ip6_vcf; 2190 new_ip6h->ip6_plen = htons(sizeof (icmp6_t) + orig_pktsize); 2191 new_ip6h->ip6_hops = orig_ip6h->ip6_hops; 2192 new_ip6h->ip6_nxt = IPPROTO_ICMPV6; 2193 new_ip6h->ip6_src = orig_ip6h->ip6_dst; 2194 new_ip6h->ip6_dst = orig_ip6h->ip6_src; 2195 2196 *new_icmp6 = *icmp6; 2197 /* The checksum is calculated in ip_output_simple and friends. */ 2198 new_icmp6->icmp6_cksum = new_ip6h->ip6_plen; 2199 2200 bzero(&ixas, sizeof (ixas)); 2201 ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6; 2202 if (IN6_IS_ADDR_UNSPECIFIED(&new_ip6h->ip6_src)) 2203 ixas.ixa_flags |= IXAF_SET_SOURCE; 2204 2205 ixas.ixa_zoneid = IPCL_ZONEID(connp); 2206 ixas.ixa_ipst = connp->conn_netstack->netstack_ip; 2207 ixas.ixa_cred = connp->conn_cred; 2208 ixas.ixa_cpid = NOPID; 2209 if (is_system_labeled()) 2210 ixas.ixa_tsl = tsl; 2211 2212 ixas.ixa_ifindex = 0; 2213 ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 2214 2215 (void) ip_output_simple(icmp6err_mp, &ixas); 2216 ixa_cleanup(&ixas); 2217 } 2218 2219 static void 2220 iptun_icmp_error_v4(iptun_t *iptun, ipha_t *orig_ipha, mblk_t *mp, 2221 uint8_t type, uint8_t code, ts_label_t *tsl) 2222 { 2223 icmph_t icmp; 2224 2225 bzero(&icmp, sizeof (icmp)); 2226 icmp.icmph_type = type; 2227 icmp.icmph_code = code; 2228 2229 iptun_sendicmp_v4(iptun, &icmp, orig_ipha, mp, tsl); 2230 } 2231 2232 static void 2233 iptun_icmp_fragneeded_v4(iptun_t *iptun, uint32_t newmtu, ipha_t *orig_ipha, 2234 mblk_t *mp, ts_label_t *tsl) 2235 { 2236 icmph_t icmp; 2237 2238 icmp.icmph_type = ICMP_DEST_UNREACHABLE; 2239 icmp.icmph_code = ICMP_FRAGMENTATION_NEEDED; 2240 icmp.icmph_du_zero = 0; 2241 icmp.icmph_du_mtu = htons(newmtu); 2242 2243 iptun_sendicmp_v4(iptun, &icmp, orig_ipha, mp, tsl); 2244 } 2245 2246 static void 2247 iptun_icmp_error_v6(iptun_t *iptun, ip6_t *orig_ip6h, mblk_t *mp, 2248 uint8_t type, uint8_t code, uint32_t offset, ts_label_t *tsl) 2249 { 2250 icmp6_t icmp6; 2251 2252 bzero(&icmp6, sizeof (icmp6)); 2253 icmp6.icmp6_type = type; 2254 icmp6.icmp6_code = code; 2255 if (type == ICMP6_PARAM_PROB) 2256 icmp6.icmp6_pptr = htonl(offset); 2257 2258 iptun_sendicmp_v6(iptun, &icmp6, orig_ip6h, mp, tsl); 2259 } 2260 2261 static void 2262 iptun_icmp_toobig_v6(iptun_t *iptun, uint32_t newmtu, ip6_t *orig_ip6h, 2263 mblk_t *mp, ts_label_t *tsl) 2264 { 2265 icmp6_t icmp6; 2266 2267 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; 2268 icmp6.icmp6_code = 0; 2269 icmp6.icmp6_mtu = htonl(newmtu); 2270 2271 iptun_sendicmp_v6(iptun, &icmp6, orig_ip6h, mp, tsl); 2272 } 2273 2274 /* 2275 * Determines if the packet pointed to by ipha or ip6h is an ICMP error. The 2276 * mp argument is only used to do bounds checking. 2277 */ 2278 static boolean_t 2279 is_icmp_error(mblk_t *mp, ipha_t *ipha, ip6_t *ip6h) 2280 { 2281 uint16_t hlen; 2282 2283 if (ipha != NULL) { 2284 icmph_t *icmph; 2285 2286 ASSERT(ip6h == NULL); 2287 if (ipha->ipha_protocol != IPPROTO_ICMP) 2288 return (B_FALSE); 2289 2290 hlen = IPH_HDR_LENGTH(ipha); 2291 icmph = (icmph_t *)((uint8_t *)ipha + hlen); 2292 return (ICMP_IS_ERROR(icmph->icmph_type) || 2293 icmph->icmph_type == ICMP_REDIRECT); 2294 } else { 2295 icmp6_t *icmp6; 2296 uint8_t *nexthdrp; 2297 2298 ASSERT(ip6h != NULL); 2299 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hlen, &nexthdrp) || 2300 *nexthdrp != IPPROTO_ICMPV6) { 2301 return (B_FALSE); 2302 } 2303 2304 icmp6 = (icmp6_t *)((uint8_t *)ip6h + hlen); 2305 return (ICMP6_IS_ERROR(icmp6->icmp6_type) || 2306 icmp6->icmp6_type == ND_REDIRECT); 2307 } 2308 } 2309 2310 /* 2311 * Find inner and outer IP headers from a tunneled packet as setup for calls 2312 * into ipsec_tun_{in,out}bound(). 2313 * Note that we need to allow the outer header to be in a separate mblk from 2314 * the inner header. 2315 * If the caller knows the outer_hlen, the caller passes it in. Otherwise zero. 2316 */ 2317 static size_t 2318 iptun_find_headers(mblk_t *mp, size_t outer_hlen, ipha_t **outer4, 2319 ipha_t **inner4, ip6_t **outer6, ip6_t **inner6) 2320 { 2321 ipha_t *ipha; 2322 size_t first_mblkl = MBLKL(mp); 2323 mblk_t *inner_mp; 2324 2325 /* 2326 * Don't bother handling packets that don't have a full IP header in 2327 * the fist mblk. For the input path, the ip module ensures that this 2328 * won't happen, and on the output path, the IP tunneling MAC-type 2329 * plugins ensure that this also won't happen. 2330 */ 2331 if (first_mblkl < sizeof (ipha_t)) 2332 return (0); 2333 ipha = (ipha_t *)(mp->b_rptr); 2334 switch (IPH_HDR_VERSION(ipha)) { 2335 case IPV4_VERSION: 2336 *outer4 = ipha; 2337 *outer6 = NULL; 2338 if (outer_hlen == 0) 2339 outer_hlen = IPH_HDR_LENGTH(ipha); 2340 break; 2341 case IPV6_VERSION: 2342 *outer4 = NULL; 2343 *outer6 = (ip6_t *)ipha; 2344 if (outer_hlen == 0) 2345 outer_hlen = ip_hdr_length_v6(mp, (ip6_t *)ipha); 2346 break; 2347 default: 2348 return (0); 2349 } 2350 2351 if (first_mblkl < outer_hlen || 2352 (first_mblkl == outer_hlen && mp->b_cont == NULL)) 2353 return (0); 2354 2355 /* 2356 * We don't bother doing a pullup here since the outer header will 2357 * just get stripped off soon on input anyway. We just want to ensure 2358 * that the inner* pointer points to a full header. 2359 */ 2360 if (first_mblkl == outer_hlen) { 2361 inner_mp = mp->b_cont; 2362 ipha = (ipha_t *)inner_mp->b_rptr; 2363 } else { 2364 inner_mp = mp; 2365 ipha = (ipha_t *)(mp->b_rptr + outer_hlen); 2366 } 2367 switch (IPH_HDR_VERSION(ipha)) { 2368 case IPV4_VERSION: 2369 if (inner_mp->b_wptr - (uint8_t *)ipha < sizeof (ipha_t)) 2370 return (0); 2371 *inner4 = ipha; 2372 *inner6 = NULL; 2373 break; 2374 case IPV6_VERSION: 2375 if (inner_mp->b_wptr - (uint8_t *)ipha < sizeof (ip6_t)) 2376 return (0); 2377 *inner4 = NULL; 2378 *inner6 = (ip6_t *)ipha; 2379 break; 2380 default: 2381 return (0); 2382 } 2383 2384 return (outer_hlen); 2385 } 2386 2387 /* 2388 * Received ICMP error in response to an X over IPv4 packet that we 2389 * transmitted. 2390 * 2391 * NOTE: "outer" refers to what's inside the ICMP payload. We will get one of 2392 * the following: 2393 * 2394 * [IPv4(0)][ICMPv4][IPv4(1)][IPv4(2)][ULP] 2395 * 2396 * or 2397 * 2398 * [IPv4(0)][ICMPv4][IPv4(1)][IPv6][ULP] 2399 * 2400 * And "outer4" will get set to IPv4(1), and inner[46] will correspond to 2401 * whatever the very-inner packet is (IPv4(2) or IPv6). 2402 */ 2403 static void 2404 iptun_input_icmp_v4(iptun_t *iptun, mblk_t *data_mp, icmph_t *icmph, 2405 ip_recv_attr_t *ira) 2406 { 2407 uint8_t *orig; 2408 ipha_t *outer4, *inner4; 2409 ip6_t *outer6, *inner6; 2410 int outer_hlen; 2411 uint8_t type, code; 2412 2413 ASSERT(data_mp->b_cont == NULL); 2414 /* 2415 * Temporarily move b_rptr forward so that iptun_find_headers() can 2416 * find headers in the ICMP packet payload. 2417 */ 2418 orig = data_mp->b_rptr; 2419 data_mp->b_rptr = (uint8_t *)(icmph + 1); 2420 /* 2421 * The ip module ensures that ICMP errors contain at least the 2422 * original IP header (otherwise, the error would never have made it 2423 * here). 2424 */ 2425 ASSERT(MBLKL(data_mp) >= 0); 2426 outer_hlen = iptun_find_headers(data_mp, 0, &outer4, &inner4, &outer6, 2427 &inner6); 2428 ASSERT(outer6 == NULL); 2429 data_mp->b_rptr = orig; 2430 if (outer_hlen == 0) { 2431 iptun_drop_pkt(data_mp, &iptun->iptun_ierrors); 2432 return; 2433 } 2434 2435 /* Only ICMP errors due to tunneled packets should reach here. */ 2436 ASSERT(outer4->ipha_protocol == IPPROTO_ENCAP || 2437 outer4->ipha_protocol == IPPROTO_IPV6); 2438 2439 data_mp = ipsec_tun_inbound(ira, data_mp, iptun->iptun_itp, 2440 inner4, inner6, outer4, outer6, -outer_hlen, iptun->iptun_ns); 2441 if (data_mp == NULL) { 2442 /* Callee did all of the freeing. */ 2443 atomic_inc_64(&iptun->iptun_ierrors); 2444 return; 2445 } 2446 /* We should never see reassembled fragment here. */ 2447 ASSERT(data_mp->b_next == NULL); 2448 2449 data_mp->b_rptr = (uint8_t *)outer4 + outer_hlen; 2450 2451 /* 2452 * If the original packet being transmitted was itself an ICMP error, 2453 * then drop this packet. We don't want to generate an ICMP error in 2454 * response to an ICMP error. 2455 */ 2456 if (is_icmp_error(data_mp, inner4, inner6)) { 2457 iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf); 2458 return; 2459 } 2460 2461 switch (icmph->icmph_type) { 2462 case ICMP_DEST_UNREACHABLE: 2463 type = (inner4 != NULL ? icmph->icmph_type : ICMP6_DST_UNREACH); 2464 switch (icmph->icmph_code) { 2465 case ICMP_FRAGMENTATION_NEEDED: { 2466 uint32_t newmtu; 2467 2468 /* 2469 * We reconcile this with the fact that the tunnel may 2470 * also have IPsec policy by letting iptun_update_mtu 2471 * take care of it. 2472 */ 2473 newmtu = iptun_update_mtu(iptun, NULL, 2474 ntohs(icmph->icmph_du_mtu)); 2475 2476 if (inner4 != NULL) { 2477 iptun_icmp_fragneeded_v4(iptun, newmtu, inner4, 2478 data_mp, ira->ira_tsl); 2479 } else { 2480 iptun_icmp_toobig_v6(iptun, newmtu, inner6, 2481 data_mp, ira->ira_tsl); 2482 } 2483 return; 2484 } 2485 case ICMP_DEST_NET_UNREACH_ADMIN: 2486 case ICMP_DEST_HOST_UNREACH_ADMIN: 2487 code = (inner4 != NULL ? ICMP_DEST_NET_UNREACH_ADMIN : 2488 ICMP6_DST_UNREACH_ADMIN); 2489 break; 2490 default: 2491 code = (inner4 != NULL ? ICMP_HOST_UNREACHABLE : 2492 ICMP6_DST_UNREACH_ADDR); 2493 break; 2494 } 2495 break; 2496 case ICMP_TIME_EXCEEDED: 2497 if (inner6 != NULL) { 2498 type = ICMP6_TIME_EXCEEDED; 2499 code = 0; 2500 } /* else we're already set. */ 2501 break; 2502 case ICMP_PARAM_PROBLEM: 2503 /* 2504 * This is a problem with the outer header we transmitted. 2505 * Treat this as an output error. 2506 */ 2507 iptun_drop_pkt(data_mp, &iptun->iptun_oerrors); 2508 return; 2509 default: 2510 iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf); 2511 return; 2512 } 2513 2514 if (inner4 != NULL) { 2515 iptun_icmp_error_v4(iptun, inner4, data_mp, type, code, 2516 ira->ira_tsl); 2517 } else { 2518 iptun_icmp_error_v6(iptun, inner6, data_mp, type, code, 0, 2519 ira->ira_tsl); 2520 } 2521 } 2522 2523 /* 2524 * Return B_TRUE if the IPv6 packet pointed to by ip6h contains a Tunnel 2525 * Encapsulation Limit destination option. If there is one, set encaplim_ptr 2526 * to point to the option value. 2527 */ 2528 static boolean_t 2529 iptun_find_encaplimit(mblk_t *mp, ip6_t *ip6h, uint8_t **encaplim_ptr) 2530 { 2531 ip_pkt_t pkt; 2532 uint8_t *endptr; 2533 ip6_dest_t *destp; 2534 struct ip6_opt *optp; 2535 2536 pkt.ipp_fields = 0; /* must be initialized */ 2537 (void) ip_find_hdr_v6(mp, ip6h, B_FALSE, &pkt, NULL); 2538 if ((pkt.ipp_fields & IPPF_DSTOPTS) != 0) { 2539 destp = pkt.ipp_dstopts; 2540 } else if ((pkt.ipp_fields & IPPF_RTHDRDSTOPTS) != 0) { 2541 destp = pkt.ipp_rthdrdstopts; 2542 } else { 2543 return (B_FALSE); 2544 } 2545 2546 endptr = (uint8_t *)destp + 8 * (destp->ip6d_len + 1); 2547 optp = (struct ip6_opt *)(destp + 1); 2548 while (endptr - (uint8_t *)optp > sizeof (*optp)) { 2549 if (optp->ip6o_type == IP6OPT_TUNNEL_LIMIT) { 2550 if ((uint8_t *)(optp + 1) >= endptr) 2551 return (B_FALSE); 2552 *encaplim_ptr = (uint8_t *)&optp[1]; 2553 return (B_TRUE); 2554 } 2555 optp = (struct ip6_opt *)((uint8_t *)optp + optp->ip6o_len + 2); 2556 } 2557 return (B_FALSE); 2558 } 2559 2560 /* 2561 * Received ICMPv6 error in response to an X over IPv6 packet that we 2562 * transmitted. 2563 * 2564 * NOTE: "outer" refers to what's inside the ICMP payload. We will get one of 2565 * the following: 2566 * 2567 * [IPv6(0)][ICMPv6][IPv6(1)][IPv4][ULP] 2568 * 2569 * or 2570 * 2571 * [IPv6(0)][ICMPv6][IPv6(1)][IPv6(2)][ULP] 2572 * 2573 * And "outer6" will get set to IPv6(1), and inner[46] will correspond to 2574 * whatever the very-inner packet is (IPv4 or IPv6(2)). 2575 */ 2576 static void 2577 iptun_input_icmp_v6(iptun_t *iptun, mblk_t *data_mp, icmp6_t *icmp6h, 2578 ip_recv_attr_t *ira) 2579 { 2580 uint8_t *orig; 2581 ipha_t *outer4, *inner4; 2582 ip6_t *outer6, *inner6; 2583 int outer_hlen; 2584 uint8_t type, code; 2585 2586 ASSERT(data_mp->b_cont == NULL); 2587 2588 /* 2589 * Temporarily move b_rptr forward so that iptun_find_headers() can 2590 * find IP headers in the ICMP packet payload. 2591 */ 2592 orig = data_mp->b_rptr; 2593 data_mp->b_rptr = (uint8_t *)(icmp6h + 1); 2594 /* 2595 * The ip module ensures that ICMP errors contain at least the 2596 * original IP header (otherwise, the error would never have made it 2597 * here). 2598 */ 2599 ASSERT(MBLKL(data_mp) >= 0); 2600 outer_hlen = iptun_find_headers(data_mp, 0, &outer4, &inner4, &outer6, 2601 &inner6); 2602 ASSERT(outer4 == NULL); 2603 data_mp->b_rptr = orig; /* Restore r_ptr */ 2604 if (outer_hlen == 0) { 2605 iptun_drop_pkt(data_mp, &iptun->iptun_ierrors); 2606 return; 2607 } 2608 2609 data_mp = ipsec_tun_inbound(ira, data_mp, iptun->iptun_itp, 2610 inner4, inner6, outer4, outer6, -outer_hlen, iptun->iptun_ns); 2611 if (data_mp == NULL) { 2612 /* Callee did all of the freeing. */ 2613 atomic_inc_64(&iptun->iptun_ierrors); 2614 return; 2615 } 2616 /* We should never see reassembled fragment here. */ 2617 ASSERT(data_mp->b_next == NULL); 2618 2619 data_mp->b_rptr = (uint8_t *)outer6 + outer_hlen; 2620 2621 /* 2622 * If the original packet being transmitted was itself an ICMP error, 2623 * then drop this packet. We don't want to generate an ICMP error in 2624 * response to an ICMP error. 2625 */ 2626 if (is_icmp_error(data_mp, inner4, inner6)) { 2627 iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf); 2628 return; 2629 } 2630 2631 switch (icmp6h->icmp6_type) { 2632 case ICMP6_PARAM_PROB: { 2633 uint8_t *encaplim_ptr; 2634 2635 /* 2636 * If the ICMPv6 error points to a valid Tunnel Encapsulation 2637 * Limit option and the limit value is 0, then fall through 2638 * and send a host unreachable message. Otherwise, treat the 2639 * error as an output error, as there must have been a problem 2640 * with a packet we sent. 2641 */ 2642 if (!iptun_find_encaplimit(data_mp, outer6, &encaplim_ptr) || 2643 (icmp6h->icmp6_pptr != 2644 ((ptrdiff_t)encaplim_ptr - (ptrdiff_t)outer6)) || 2645 *encaplim_ptr != 0) { 2646 iptun_drop_pkt(data_mp, &iptun->iptun_oerrors); 2647 return; 2648 } 2649 /* FALLTHRU */ 2650 } 2651 case ICMP6_TIME_EXCEEDED: 2652 case ICMP6_DST_UNREACH: 2653 type = (inner4 != NULL ? ICMP_DEST_UNREACHABLE : 2654 ICMP6_DST_UNREACH); 2655 code = (inner4 != NULL ? ICMP_HOST_UNREACHABLE : 2656 ICMP6_DST_UNREACH_ADDR); 2657 break; 2658 case ICMP6_PACKET_TOO_BIG: { 2659 uint32_t newmtu; 2660 2661 /* 2662 * We reconcile this with the fact that the tunnel may also 2663 * have IPsec policy by letting iptun_update_mtu take care of 2664 * it. 2665 */ 2666 newmtu = iptun_update_mtu(iptun, NULL, 2667 ntohl(icmp6h->icmp6_mtu)); 2668 2669 if (inner4 != NULL) { 2670 iptun_icmp_fragneeded_v4(iptun, newmtu, inner4, 2671 data_mp, ira->ira_tsl); 2672 } else { 2673 iptun_icmp_toobig_v6(iptun, newmtu, inner6, data_mp, 2674 ira->ira_tsl); 2675 } 2676 return; 2677 } 2678 default: 2679 iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf); 2680 return; 2681 } 2682 2683 if (inner4 != NULL) { 2684 iptun_icmp_error_v4(iptun, inner4, data_mp, type, code, 2685 ira->ira_tsl); 2686 } else { 2687 iptun_icmp_error_v6(iptun, inner6, data_mp, type, code, 0, 2688 ira->ira_tsl); 2689 } 2690 } 2691 2692 /* 2693 * Called as conn_recvicmp from IP for ICMP errors. 2694 */ 2695 /* ARGSUSED2 */ 2696 static void 2697 iptun_input_icmp(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *ira) 2698 { 2699 conn_t *connp = arg; 2700 iptun_t *iptun = connp->conn_iptun; 2701 mblk_t *tmpmp; 2702 size_t hlen; 2703 2704 ASSERT(IPCL_IS_IPTUN(connp)); 2705 2706 if (mp->b_cont != NULL) { 2707 /* 2708 * Since ICMP error processing necessitates access to bits 2709 * that are within the ICMP error payload (the original packet 2710 * that caused the error), pull everything up into a single 2711 * block for convenience. 2712 */ 2713 if ((tmpmp = msgpullup(mp, -1)) == NULL) { 2714 iptun_drop_pkt(mp, &iptun->iptun_norcvbuf); 2715 return; 2716 } 2717 freemsg(mp); 2718 mp = tmpmp; 2719 } 2720 2721 hlen = ira->ira_ip_hdr_length; 2722 switch (iptun->iptun_typeinfo->iti_ipvers) { 2723 case IPV4_VERSION: 2724 /* 2725 * The outer IP header coming up from IP is always ipha_t 2726 * alligned (otherwise, we would have crashed in ip). 2727 */ 2728 iptun_input_icmp_v4(iptun, mp, (icmph_t *)(mp->b_rptr + hlen), 2729 ira); 2730 break; 2731 case IPV6_VERSION: 2732 iptun_input_icmp_v6(iptun, mp, (icmp6_t *)(mp->b_rptr + hlen), 2733 ira); 2734 break; 2735 } 2736 } 2737 2738 static boolean_t 2739 iptun_in_6to4_ok(iptun_t *iptun, ipha_t *outer4, ip6_t *inner6) 2740 { 2741 ipaddr_t v4addr; 2742 2743 /* 2744 * It's possible that someone sent us an IPv4-in-IPv4 packet with the 2745 * IPv4 address of a 6to4 tunnel as the destination. 2746 */ 2747 if (inner6 == NULL) 2748 return (B_FALSE); 2749 2750 /* 2751 * Make sure that the IPv6 destination is within the site that this 2752 * 6to4 tunnel is routing for. We don't want people bouncing random 2753 * tunneled IPv6 packets through this 6to4 router. 2754 */ 2755 IN6_6TO4_TO_V4ADDR(&inner6->ip6_dst, (struct in_addr *)&v4addr); 2756 if (outer4->ipha_dst != v4addr) 2757 return (B_FALSE); 2758 2759 if (IN6_IS_ADDR_6TO4(&inner6->ip6_src)) { 2760 /* 2761 * Section 9 of RFC 3056 (security considerations) suggests 2762 * that when a packet is from a 6to4 site (i.e., it's not a 2763 * global address being forwarded froma relay router), make 2764 * sure that the packet was tunneled by that site's 6to4 2765 * router. 2766 */ 2767 IN6_6TO4_TO_V4ADDR(&inner6->ip6_src, (struct in_addr *)&v4addr); 2768 if (outer4->ipha_src != v4addr) 2769 return (B_FALSE); 2770 } else { 2771 /* 2772 * Only accept packets from a relay router if we've configured 2773 * outbound relay router functionality. 2774 */ 2775 if (iptun->iptun_iptuns->iptuns_relay_rtr_addr == INADDR_ANY) 2776 return (B_FALSE); 2777 } 2778 2779 return (B_TRUE); 2780 } 2781 2782 /* 2783 * Input function for everything that comes up from the ip module below us. 2784 * This is called directly from the ip module via connp->conn_recv(). 2785 * 2786 * We receive M_DATA messages with IP-in-IP tunneled packets. 2787 */ 2788 /* ARGSUSED2 */ 2789 static void 2790 iptun_input(void *arg, mblk_t *data_mp, void *arg2, ip_recv_attr_t *ira) 2791 { 2792 conn_t *connp = arg; 2793 iptun_t *iptun = connp->conn_iptun; 2794 int outer_hlen; 2795 ipha_t *outer4, *inner4; 2796 ip6_t *outer6, *inner6; 2797 2798 ASSERT(IPCL_IS_IPTUN(connp)); 2799 ASSERT(DB_TYPE(data_mp) == M_DATA); 2800 2801 outer_hlen = iptun_find_headers(data_mp, ira->ira_ip_hdr_length, 2802 &outer4, &inner4, &outer6, &inner6); 2803 if (outer_hlen == 0) 2804 goto drop; 2805 2806 /* 2807 * If the system is labeled, we call tsol_check_dest() on the packet 2808 * destination (our local tunnel address) to ensure that the packet as 2809 * labeled should be allowed to be sent to us. We don't need to call 2810 * the more involved tsol_receive_local() since the tunnel link itself 2811 * cannot be assigned to shared-stack non-global zones. 2812 */ 2813 if (ira->ira_flags & IRAF_SYSTEM_LABELED) { 2814 if (ira->ira_tsl == NULL) 2815 goto drop; 2816 if (tsol_check_dest(ira->ira_tsl, (outer4 != NULL ? 2817 (void *)&outer4->ipha_dst : (void *)&outer6->ip6_dst), 2818 (outer4 != NULL ? IPV4_VERSION : IPV6_VERSION), 2819 CONN_MAC_DEFAULT, B_FALSE, NULL) != 0) 2820 goto drop; 2821 } 2822 2823 data_mp = ipsec_tun_inbound(ira, data_mp, iptun->iptun_itp, 2824 inner4, inner6, outer4, outer6, outer_hlen, iptun->iptun_ns); 2825 if (data_mp == NULL) { 2826 /* Callee did all of the freeing. */ 2827 return; 2828 } 2829 2830 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_6TO4 && 2831 !iptun_in_6to4_ok(iptun, outer4, inner6)) 2832 goto drop; 2833 2834 /* 2835 * We need to statistically account for each packet individually, so 2836 * we might as well split up any b_next chains here. 2837 */ 2838 do { 2839 mblk_t *mp; 2840 2841 mp = data_mp->b_next; 2842 data_mp->b_next = NULL; 2843 2844 atomic_inc_64(&iptun->iptun_ipackets); 2845 atomic_add_64(&iptun->iptun_rbytes, msgdsize(data_mp)); 2846 mac_rx(iptun->iptun_mh, NULL, data_mp); 2847 2848 data_mp = mp; 2849 } while (data_mp != NULL); 2850 return; 2851 drop: 2852 iptun_drop_pkt(data_mp, &iptun->iptun_ierrors); 2853 } 2854 2855 /* 2856 * Do 6to4-specific header-processing on output. Return B_TRUE if the packet 2857 * was processed without issue, or B_FALSE if the packet had issues and should 2858 * be dropped. 2859 */ 2860 static boolean_t 2861 iptun_out_process_6to4(iptun_t *iptun, ipha_t *outer4, ip6_t *inner6) 2862 { 2863 ipaddr_t v4addr; 2864 2865 /* 2866 * IPv6 source must be a 6to4 address. This is because a conscious 2867 * decision was made to not allow a Solaris system to be used as a 2868 * relay router (for security reasons) when 6to4 was initially 2869 * integrated. If this decision is ever reversed, the following check 2870 * can be removed. 2871 */ 2872 if (!IN6_IS_ADDR_6TO4(&inner6->ip6_src)) 2873 return (B_FALSE); 2874 2875 /* 2876 * RFC3056 mandates that the IPv4 source MUST be set to the IPv4 2877 * portion of the 6to4 IPv6 source address. In other words, make sure 2878 * that we're tunneling packets from our own 6to4 site. 2879 */ 2880 IN6_6TO4_TO_V4ADDR(&inner6->ip6_src, (struct in_addr *)&v4addr); 2881 if (outer4->ipha_src != v4addr) 2882 return (B_FALSE); 2883 2884 /* 2885 * Automatically set the destination of the outer IPv4 header as 2886 * described in RFC3056. There are two possibilities: 2887 * 2888 * a. If the IPv6 destination is a 6to4 address, set the IPv4 address 2889 * to the IPv4 portion of the 6to4 address. 2890 * b. If the IPv6 destination is a native IPv6 address, set the IPv4 2891 * destination to the address of a relay router. 2892 * 2893 * Design Note: b shouldn't be necessary here, and this is a flaw in 2894 * the design of the 6to4relay command. Instead of setting a 6to4 2895 * relay address in this module via an ioctl, the 6to4relay command 2896 * could simply add a IPv6 route for native IPv6 addresses (such as a 2897 * default route) in the forwarding table that uses a 6to4 destination 2898 * as its next hop, and the IPv4 portion of that address could be a 2899 * 6to4 relay address. In order for this to work, IP would have to 2900 * resolve the next hop address, which would necessitate a link-layer 2901 * address resolver for 6to4 links, which doesn't exist today. 2902 * 2903 * In fact, if a resolver existed for 6to4 links, then setting the 2904 * IPv4 destination in the outer header could be done as part of 2905 * link-layer address resolution and fast-path header generation, and 2906 * not here. 2907 */ 2908 if (IN6_IS_ADDR_6TO4(&inner6->ip6_dst)) { 2909 /* destination is a 6to4 router */ 2910 IN6_6TO4_TO_V4ADDR(&inner6->ip6_dst, 2911 (struct in_addr *)&outer4->ipha_dst); 2912 2913 /* Reject attempts to send to INADDR_ANY */ 2914 if (outer4->ipha_dst == INADDR_ANY) 2915 return (B_FALSE); 2916 } else { 2917 /* 2918 * The destination is a native IPv6 address. If output to a 2919 * relay-router is enabled, use the relay-router's IPv4 2920 * address as the destination. 2921 */ 2922 if (iptun->iptun_iptuns->iptuns_relay_rtr_addr == INADDR_ANY) 2923 return (B_FALSE); 2924 outer4->ipha_dst = iptun->iptun_iptuns->iptuns_relay_rtr_addr; 2925 } 2926 2927 /* 2928 * If the outer source and destination are equal, this means that the 2929 * 6to4 router somehow forwarded an IPv6 packet destined for its own 2930 * 6to4 site to its 6to4 tunnel interface, which will result in this 2931 * packet infinitely bouncing between ip and iptun. 2932 */ 2933 return (outer4->ipha_src != outer4->ipha_dst); 2934 } 2935 2936 /* 2937 * Process output packets with outer IPv4 headers. Frees mp and bumps stat on 2938 * error. 2939 */ 2940 static mblk_t * 2941 iptun_out_process_ipv4(iptun_t *iptun, mblk_t *mp, ipha_t *outer4, 2942 ipha_t *inner4, ip6_t *inner6, ip_xmit_attr_t *ixa) 2943 { 2944 uint8_t *innerptr = (inner4 != NULL ? 2945 (uint8_t *)inner4 : (uint8_t *)inner6); 2946 size_t minmtu = iptun->iptun_typeinfo->iti_minmtu; 2947 2948 if (inner4 != NULL) { 2949 ASSERT(outer4->ipha_protocol == IPPROTO_ENCAP); 2950 /* 2951 * Copy the tos from the inner IPv4 header. We mask off ECN 2952 * bits (bits 6 and 7) because there is currently no 2953 * tunnel-tunnel communication to determine if both sides 2954 * support ECN. We opt for the safe choice: don't copy the 2955 * ECN bits when doing encapsulation. 2956 */ 2957 outer4->ipha_type_of_service = 2958 inner4->ipha_type_of_service & ~0x03; 2959 } else { 2960 ASSERT(outer4->ipha_protocol == IPPROTO_IPV6 && 2961 inner6 != NULL); 2962 } 2963 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) 2964 outer4->ipha_fragment_offset_and_flags |= IPH_DF_HTONS; 2965 else 2966 outer4->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS; 2967 2968 /* 2969 * As described in section 3.2.2 of RFC4213, if the packet payload is 2970 * less than or equal to the minimum MTU size, then we need to allow 2971 * IPv4 to fragment the packet. The reason is that even if we end up 2972 * receiving an ICMP frag-needed, the interface above this tunnel 2973 * won't be allowed to drop its MTU as a result, since the packet was 2974 * already smaller than the smallest allowable MTU for that interface. 2975 */ 2976 if (mp->b_wptr - innerptr <= minmtu) { 2977 outer4->ipha_fragment_offset_and_flags = 0; 2978 ixa->ixa_flags &= ~IXAF_DONTFRAG; 2979 } else if (!(ixa->ixa_flags & IXAF_PMTU_TOO_SMALL) && 2980 (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_6TO4)) { 2981 ixa->ixa_flags |= IXAF_DONTFRAG; 2982 } 2983 2984 ixa->ixa_ip_hdr_length = IPH_HDR_LENGTH(outer4); 2985 ixa->ixa_pktlen = msgdsize(mp); 2986 ixa->ixa_protocol = outer4->ipha_protocol; 2987 2988 outer4->ipha_length = htons(ixa->ixa_pktlen); 2989 return (mp); 2990 } 2991 2992 /* 2993 * Insert an encapsulation limit destination option in the packet provided. 2994 * Always consumes the mp argument and returns a new mblk pointer. 2995 */ 2996 static mblk_t * 2997 iptun_insert_encaplimit(iptun_t *iptun, mblk_t *mp, ip6_t *outer6, 2998 uint8_t limit) 2999 { 3000 mblk_t *newmp; 3001 iptun_ipv6hdrs_t *newouter6; 3002 3003 ASSERT(outer6->ip6_nxt == IPPROTO_IPV6); 3004 ASSERT(mp->b_cont == NULL); 3005 3006 mp->b_rptr += sizeof (ip6_t); 3007 newmp = allocb(sizeof (iptun_ipv6hdrs_t) + MBLKL(mp), BPRI_MED); 3008 if (newmp == NULL) { 3009 iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf); 3010 return (NULL); 3011 } 3012 newmp->b_wptr += sizeof (iptun_ipv6hdrs_t); 3013 /* Copy the payload (Starting with the inner IPv6 header). */ 3014 bcopy(mp->b_rptr, newmp->b_wptr, MBLKL(mp)); 3015 newmp->b_wptr += MBLKL(mp); 3016 newouter6 = (iptun_ipv6hdrs_t *)newmp->b_rptr; 3017 /* Now copy the outer IPv6 header. */ 3018 bcopy(outer6, &newouter6->it6h_ip6h, sizeof (ip6_t)); 3019 newouter6->it6h_ip6h.ip6_nxt = IPPROTO_DSTOPTS; 3020 newouter6->it6h_encaplim = iptun_encaplim_init; 3021 newouter6->it6h_encaplim.iel_destopt.ip6d_nxt = outer6->ip6_nxt; 3022 newouter6->it6h_encaplim.iel_telopt.ip6ot_encap_limit = limit; 3023 3024 /* 3025 * The payload length will be set at the end of 3026 * iptun_out_process_ipv6(). 3027 */ 3028 3029 freemsg(mp); 3030 return (newmp); 3031 } 3032 3033 /* 3034 * Process output packets with outer IPv6 headers. Frees mp and bumps stats 3035 * on error. 3036 */ 3037 static mblk_t * 3038 iptun_out_process_ipv6(iptun_t *iptun, mblk_t *mp, ip6_t *outer6, 3039 ipha_t *inner4, ip6_t *inner6, ip_xmit_attr_t *ixa) 3040 { 3041 uint8_t *innerptr = (inner4 != NULL ? 3042 (uint8_t *)inner4 : (uint8_t *)inner6); 3043 size_t minmtu = iptun->iptun_typeinfo->iti_minmtu; 3044 uint8_t *limit, *configlimit; 3045 uint32_t offset; 3046 iptun_ipv6hdrs_t *v6hdrs; 3047 3048 if (inner6 != NULL && iptun_find_encaplimit(mp, inner6, &limit)) { 3049 /* 3050 * The inner packet is an IPv6 packet which itself contains an 3051 * encapsulation limit option. The limit variable points to 3052 * the value in the embedded option. Process the 3053 * encapsulation limit option as specified in RFC 2473. 3054 * 3055 * If limit is 0, then we've exceeded the limit and we need to 3056 * send back an ICMPv6 parameter problem message. 3057 * 3058 * If limit is > 0, then we decrement it by 1 and make sure 3059 * that the encapsulation limit option in the outer header 3060 * reflects that (adding an option if one isn't already 3061 * there). 3062 */ 3063 ASSERT(limit > mp->b_rptr && limit < mp->b_wptr); 3064 if (*limit == 0) { 3065 mp->b_rptr = (uint8_t *)inner6; 3066 offset = limit - mp->b_rptr; 3067 iptun_icmp_error_v6(iptun, inner6, mp, ICMP6_PARAM_PROB, 3068 0, offset, ixa->ixa_tsl); 3069 atomic_inc_64(&iptun->iptun_noxmtbuf); 3070 return (NULL); 3071 } 3072 3073 /* 3074 * The outer header requires an encapsulation limit option. 3075 * If there isn't one already, add one. 3076 */ 3077 if (iptun->iptun_encaplimit == 0) { 3078 if ((mp = iptun_insert_encaplimit(iptun, mp, outer6, 3079 (*limit - 1))) == NULL) 3080 return (NULL); 3081 v6hdrs = (iptun_ipv6hdrs_t *)mp->b_rptr; 3082 } else { 3083 /* 3084 * There is an existing encapsulation limit option in 3085 * the outer header. If the inner encapsulation limit 3086 * is less than the configured encapsulation limit, 3087 * update the outer encapsulation limit to reflect 3088 * this lesser value. 3089 */ 3090 v6hdrs = (iptun_ipv6hdrs_t *)mp->b_rptr; 3091 configlimit = 3092 &v6hdrs->it6h_encaplim.iel_telopt.ip6ot_encap_limit; 3093 if ((*limit - 1) < *configlimit) 3094 *configlimit = (*limit - 1); 3095 } 3096 ixa->ixa_ip_hdr_length = sizeof (iptun_ipv6hdrs_t); 3097 ixa->ixa_protocol = v6hdrs->it6h_encaplim.iel_destopt.ip6d_nxt; 3098 } else { 3099 ixa->ixa_ip_hdr_length = sizeof (ip6_t); 3100 ixa->ixa_protocol = outer6->ip6_nxt; 3101 } 3102 /* 3103 * See iptun_output_process_ipv4() why we allow fragmentation for 3104 * small packets 3105 */ 3106 if (mp->b_wptr - innerptr <= minmtu) 3107 ixa->ixa_flags &= ~IXAF_DONTFRAG; 3108 else if (!(ixa->ixa_flags & IXAF_PMTU_TOO_SMALL)) 3109 ixa->ixa_flags |= IXAF_DONTFRAG; 3110 3111 ixa->ixa_pktlen = msgdsize(mp); 3112 outer6->ip6_plen = htons(ixa->ixa_pktlen - sizeof (ip6_t)); 3113 return (mp); 3114 } 3115 3116 /* 3117 * The IP tunneling MAC-type plugins have already done most of the header 3118 * processing and validity checks. We are simply responsible for multiplexing 3119 * down to the ip module below us. 3120 */ 3121 static void 3122 iptun_output(iptun_t *iptun, mblk_t *mp) 3123 { 3124 conn_t *connp = iptun->iptun_connp; 3125 mblk_t *newmp; 3126 int error; 3127 ip_xmit_attr_t *ixa; 3128 3129 ASSERT(mp->b_datap->db_type == M_DATA); 3130 3131 if (mp->b_cont != NULL) { 3132 if ((newmp = msgpullup(mp, -1)) == NULL) { 3133 iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf); 3134 return; 3135 } 3136 freemsg(mp); 3137 mp = newmp; 3138 } 3139 3140 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_6TO4) { 3141 iptun_output_6to4(iptun, mp); 3142 return; 3143 } 3144 3145 if (is_system_labeled()) { 3146 /* 3147 * Since the label can be different meaning a potentially 3148 * different IRE,we always use a unique ip_xmit_attr_t. 3149 */ 3150 ixa = conn_get_ixa_exclusive(connp); 3151 } else { 3152 /* 3153 * If no other thread is using conn_ixa this just gets a 3154 * reference to conn_ixa. Otherwise we get a safe copy of 3155 * conn_ixa. 3156 */ 3157 ixa = conn_get_ixa(connp, B_FALSE); 3158 } 3159 if (ixa == NULL) { 3160 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3161 return; 3162 } 3163 3164 /* 3165 * In case we got a safe copy of conn_ixa, then we need 3166 * to fill in any pointers in it. 3167 */ 3168 if (ixa->ixa_ire == NULL) { 3169 error = ip_attr_connect(connp, ixa, &connp->conn_saddr_v6, 3170 &connp->conn_faddr_v6, &connp->conn_faddr_v6, 0, 3171 NULL, NULL, 0); 3172 if (error != 0) { 3173 if (ixa->ixa_ire != NULL && 3174 (error == EHOSTUNREACH || error == ENETUNREACH)) { 3175 /* 3176 * Let conn_ip_output/ire_send_noroute return 3177 * the error and send any local ICMP error. 3178 */ 3179 error = 0; 3180 } else { 3181 ixa_refrele(ixa); 3182 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3183 return; 3184 } 3185 } 3186 } 3187 3188 iptun_output_common(iptun, ixa, mp); 3189 ixa_refrele(ixa); 3190 } 3191 3192 /* 3193 * We use an ixa based on the last destination. 3194 */ 3195 static void 3196 iptun_output_6to4(iptun_t *iptun, mblk_t *mp) 3197 { 3198 conn_t *connp = iptun->iptun_connp; 3199 ipha_t *outer4, *inner4; 3200 ip6_t *outer6, *inner6; 3201 ip_xmit_attr_t *ixa; 3202 ip_xmit_attr_t *oldixa; 3203 int error; 3204 boolean_t need_connect; 3205 in6_addr_t v6dst; 3206 3207 ASSERT(mp->b_cont == NULL); /* Verified by iptun_output */ 3208 3209 /* Make sure we set ipha_dst before we look at ipha_dst */ 3210 3211 (void) iptun_find_headers(mp, 0, &outer4, &inner4, &outer6, &inner6); 3212 ASSERT(outer4 != NULL); 3213 if (!iptun_out_process_6to4(iptun, outer4, inner6)) { 3214 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3215 return; 3216 } 3217 3218 if (is_system_labeled()) { 3219 /* 3220 * Since the label can be different meaning a potentially 3221 * different IRE,we always use a unique ip_xmit_attr_t. 3222 */ 3223 ixa = conn_get_ixa_exclusive(connp); 3224 } else { 3225 /* 3226 * If no other thread is using conn_ixa this just gets a 3227 * reference to conn_ixa. Otherwise we get a safe copy of 3228 * conn_ixa. 3229 */ 3230 ixa = conn_get_ixa(connp, B_FALSE); 3231 } 3232 if (ixa == NULL) { 3233 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3234 return; 3235 } 3236 3237 mutex_enter(&connp->conn_lock); 3238 if (connp->conn_v4lastdst == outer4->ipha_dst) { 3239 need_connect = (ixa->ixa_ire == NULL); 3240 } else { 3241 /* In case previous destination was multirt */ 3242 ip_attr_newdst(ixa); 3243 3244 /* 3245 * We later update conn_ixa when we update conn_v4lastdst 3246 * which enables subsequent packets to avoid redoing 3247 * ip_attr_connect 3248 */ 3249 need_connect = B_TRUE; 3250 } 3251 mutex_exit(&connp->conn_lock); 3252 3253 /* 3254 * In case we got a safe copy of conn_ixa, or otherwise we don't 3255 * have a current ixa_ire, then we need to fill in any pointers in 3256 * the ixa. 3257 */ 3258 if (need_connect) { 3259 IN6_IPADDR_TO_V4MAPPED(outer4->ipha_dst, &v6dst); 3260 3261 /* We handle IPsec in iptun_output_common */ 3262 error = ip_attr_connect(connp, ixa, &connp->conn_saddr_v6, 3263 &v6dst, &v6dst, 0, NULL, NULL, 0); 3264 if (error != 0) { 3265 if (ixa->ixa_ire != NULL && 3266 (error == EHOSTUNREACH || error == ENETUNREACH)) { 3267 /* 3268 * Let conn_ip_output/ire_send_noroute return 3269 * the error and send any local ICMP error. 3270 */ 3271 error = 0; 3272 } else { 3273 ixa_refrele(ixa); 3274 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3275 return; 3276 } 3277 } 3278 } 3279 3280 iptun_output_common(iptun, ixa, mp); 3281 3282 /* Atomically replace conn_ixa and conn_v4lastdst */ 3283 mutex_enter(&connp->conn_lock); 3284 if (connp->conn_v4lastdst != outer4->ipha_dst) { 3285 /* Remember the dst which corresponds to conn_ixa */ 3286 connp->conn_v6lastdst = v6dst; 3287 oldixa = conn_replace_ixa(connp, ixa); 3288 } else { 3289 oldixa = NULL; 3290 } 3291 mutex_exit(&connp->conn_lock); 3292 ixa_refrele(ixa); 3293 if (oldixa != NULL) 3294 ixa_refrele(oldixa); 3295 } 3296 3297 /* 3298 * Check the destination/label. Modifies *mpp by adding/removing CIPSO. 3299 * 3300 * We get the label from the message in order to honor the 3301 * ULPs/IPs choice of label. This will be NULL for forwarded 3302 * packets, neighbor discovery packets and some others. 3303 */ 3304 static int 3305 iptun_output_check_label(mblk_t **mpp, ip_xmit_attr_t *ixa) 3306 { 3307 cred_t *cr; 3308 int adjust; 3309 int iplen; 3310 int err; 3311 ts_label_t *effective_tsl = NULL; 3312 3313 3314 ASSERT(is_system_labeled()); 3315 3316 cr = msg_getcred(*mpp, NULL); 3317 if (cr == NULL) 3318 return (0); 3319 3320 /* 3321 * We need to start with a label based on the IP/ULP above us 3322 */ 3323 ip_xmit_attr_restore_tsl(ixa, cr); 3324 3325 /* 3326 * Need to update packet with any CIPSO option since 3327 * conn_ip_output doesn't do that. 3328 */ 3329 if (ixa->ixa_flags & IXAF_IS_IPV4) { 3330 ipha_t *ipha; 3331 3332 ipha = (ipha_t *)(*mpp)->b_rptr; 3333 iplen = ntohs(ipha->ipha_length); 3334 err = tsol_check_label_v4(ixa->ixa_tsl, 3335 ixa->ixa_zoneid, mpp, CONN_MAC_DEFAULT, B_FALSE, 3336 ixa->ixa_ipst, &effective_tsl); 3337 if (err != 0) 3338 return (err); 3339 3340 ipha = (ipha_t *)(*mpp)->b_rptr; 3341 adjust = (int)ntohs(ipha->ipha_length) - iplen; 3342 } else { 3343 ip6_t *ip6h; 3344 3345 ip6h = (ip6_t *)(*mpp)->b_rptr; 3346 iplen = ntohs(ip6h->ip6_plen); 3347 3348 err = tsol_check_label_v6(ixa->ixa_tsl, 3349 ixa->ixa_zoneid, mpp, CONN_MAC_DEFAULT, B_FALSE, 3350 ixa->ixa_ipst, &effective_tsl); 3351 if (err != 0) 3352 return (err); 3353 3354 ip6h = (ip6_t *)(*mpp)->b_rptr; 3355 adjust = (int)ntohs(ip6h->ip6_plen) - iplen; 3356 } 3357 3358 if (effective_tsl != NULL) { 3359 /* Update the label */ 3360 ip_xmit_attr_replace_tsl(ixa, effective_tsl); 3361 } 3362 ixa->ixa_pktlen += adjust; 3363 ixa->ixa_ip_hdr_length += adjust; 3364 return (0); 3365 } 3366 3367 3368 static void 3369 iptun_output_common(iptun_t *iptun, ip_xmit_attr_t *ixa, mblk_t *mp) 3370 { 3371 ipsec_tun_pol_t *itp = iptun->iptun_itp; 3372 int outer_hlen; 3373 mblk_t *newmp; 3374 ipha_t *outer4, *inner4; 3375 ip6_t *outer6, *inner6; 3376 int error; 3377 boolean_t update_pktlen; 3378 3379 ASSERT(ixa->ixa_ire != NULL); 3380 3381 outer_hlen = iptun_find_headers(mp, 0, &outer4, &inner4, &outer6, 3382 &inner6); 3383 if (outer_hlen == 0) { 3384 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3385 return; 3386 } 3387 3388 /* Save IXAF_DONTFRAG value */ 3389 iaflags_t dontfrag = ixa->ixa_flags & IXAF_DONTFRAG; 3390 3391 /* Perform header processing. */ 3392 if (outer4 != NULL) { 3393 mp = iptun_out_process_ipv4(iptun, mp, outer4, inner4, inner6, 3394 ixa); 3395 } else { 3396 mp = iptun_out_process_ipv6(iptun, mp, outer6, inner4, inner6, 3397 ixa); 3398 } 3399 if (mp == NULL) 3400 return; 3401 3402 /* 3403 * Let's hope the compiler optimizes this with "branch taken". 3404 */ 3405 if (itp != NULL && (itp->itp_flags & ITPF_P_ACTIVE)) { 3406 /* This updates the ip_xmit_attr_t */ 3407 mp = ipsec_tun_outbound(mp, iptun, inner4, inner6, outer4, 3408 outer6, outer_hlen, ixa); 3409 if (mp == NULL) { 3410 atomic_inc_64(&iptun->iptun_oerrors); 3411 return; 3412 } 3413 if (is_system_labeled()) { 3414 /* 3415 * Might change the packet by adding/removing CIPSO. 3416 * After this caller inner* and outer* and outer_hlen 3417 * might be invalid. 3418 */ 3419 error = iptun_output_check_label(&mp, ixa); 3420 if (error != 0) { 3421 ip2dbg(("label check failed (%d)\n", error)); 3422 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3423 return; 3424 } 3425 } 3426 3427 /* 3428 * ipsec_tun_outbound() returns a chain of tunneled IP 3429 * fragments linked with b_next (or a single message if the 3430 * tunneled packet wasn't a fragment). 3431 * If fragcache returned a list then we need to update 3432 * ixa_pktlen for all packets in the list. 3433 */ 3434 update_pktlen = (mp->b_next != NULL); 3435 3436 /* 3437 * Otherwise, we're good to go. The ixa has been updated with 3438 * instructions for outbound IPsec processing. 3439 */ 3440 for (newmp = mp; newmp != NULL; newmp = mp) { 3441 size_t minmtu = iptun->iptun_typeinfo->iti_minmtu; 3442 3443 atomic_inc_64(&iptun->iptun_opackets); 3444 atomic_add_64(&iptun->iptun_obytes, ixa->ixa_pktlen); 3445 mp = mp->b_next; 3446 newmp->b_next = NULL; 3447 3448 /* 3449 * The IXAF_DONTFRAG flag is global, but there is 3450 * a chain here. Check if we're really already 3451 * smaller than the minimum allowed MTU and reset here 3452 * appropriately. Otherwise one small packet can kill 3453 * the whole chain's path mtu discovery. 3454 * In addition, update the pktlen to the length of 3455 * the actual packet being processed. 3456 */ 3457 if (update_pktlen) { 3458 ixa->ixa_pktlen = msgdsize(newmp); 3459 if (ixa->ixa_pktlen <= minmtu) 3460 ixa->ixa_flags &= ~IXAF_DONTFRAG; 3461 } 3462 3463 atomic_inc_64(&iptun->iptun_opackets); 3464 atomic_add_64(&iptun->iptun_obytes, ixa->ixa_pktlen); 3465 3466 error = conn_ip_output(newmp, ixa); 3467 3468 /* Restore IXAF_DONTFRAG value */ 3469 ixa->ixa_flags |= dontfrag; 3470 3471 if (error == EMSGSIZE) { 3472 /* IPsec policy might have changed */ 3473 (void) iptun_update_mtu(iptun, ixa, 0); 3474 } 3475 } 3476 } else { 3477 /* 3478 * The ip module will potentially apply global policy to the 3479 * packet in its output path if there's no active tunnel 3480 * policy. 3481 */ 3482 ASSERT(ixa->ixa_ipsec_policy == NULL); 3483 mp = ip_output_attach_policy(mp, outer4, outer6, NULL, ixa); 3484 if (mp == NULL) { 3485 atomic_inc_64(&iptun->iptun_oerrors); 3486 return; 3487 } 3488 if (is_system_labeled()) { 3489 /* 3490 * Might change the packet by adding/removing CIPSO. 3491 * After this caller inner* and outer* and outer_hlen 3492 * might be invalid. 3493 */ 3494 error = iptun_output_check_label(&mp, ixa); 3495 if (error != 0) { 3496 ip2dbg(("label check failed (%d)\n", error)); 3497 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3498 return; 3499 } 3500 } 3501 3502 atomic_inc_64(&iptun->iptun_opackets); 3503 atomic_add_64(&iptun->iptun_obytes, ixa->ixa_pktlen); 3504 3505 error = conn_ip_output(mp, ixa); 3506 if (error == EMSGSIZE) { 3507 /* IPsec policy might have changed */ 3508 (void) iptun_update_mtu(iptun, ixa, 0); 3509 } 3510 } 3511 if (ixa->ixa_flags & IXAF_IPSEC_SECURE) 3512 ipsec_out_release_refs(ixa); 3513 } 3514 3515 static mac_callbacks_t iptun_m_callbacks = { 3516 .mc_callbacks = (MC_SETPROP | MC_GETPROP), 3517 .mc_getstat = iptun_m_getstat, 3518 .mc_start = iptun_m_start, 3519 .mc_stop = iptun_m_stop, 3520 .mc_setpromisc = iptun_m_setpromisc, 3521 .mc_multicst = iptun_m_multicst, 3522 .mc_unicst = iptun_m_unicst, 3523 .mc_tx = iptun_m_tx, 3524 .mc_setprop = iptun_m_setprop, 3525 .mc_getprop = iptun_m_getprop 3526 }; 3527