1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * iptun - IP Tunneling Driver 28 * 29 * This module is a GLDv3 driver that implements virtual datalinks over IP 30 * (a.k.a, IP tunneling). The datalinks are managed through a dld ioctl 31 * interface (see iptun_ctl.c), and registered with GLDv3 using 32 * mac_register(). It implements the logic for various forms of IP (IPv4 or 33 * IPv6) encapsulation within IP (IPv4 or IPv6) by interacting with the ip 34 * module below it. Each virtual IP tunnel datalink has a conn_t associated 35 * with it representing the "outer" IP connection. 36 * 37 * The module implements the following locking semantics: 38 * 39 * Lookups and deletions in iptun_hash are synchronized using iptun_hash_lock. 40 * See comments above iptun_hash_lock for details. 41 * 42 * No locks are ever held while calling up to GLDv3. The general architecture 43 * of GLDv3 requires this, as the mac perimeter (essentially a lock) for a 44 * given link will be held while making downcalls (iptun_m_*() callbacks). 45 * Because we need to hold locks while handling downcalls, holding these locks 46 * while issuing upcalls results in deadlock scenarios. See the block comment 47 * above iptun_task_cb() for details on how we safely issue upcalls without 48 * holding any locks. 49 * 50 * The contents of each iptun_t is protected by an iptun_mutex which is held 51 * in iptun_enter() (called by iptun_enter_by_linkid()), and exited in 52 * iptun_exit(). 53 * 54 * See comments in iptun_delete() and iptun_free() for details on how the 55 * iptun_t is deleted safely. 56 */ 57 58 #include <sys/types.h> 59 #include <sys/kmem.h> 60 #include <sys/errno.h> 61 #include <sys/modhash.h> 62 #include <sys/list.h> 63 #include <sys/strsun.h> 64 #include <sys/file.h> 65 #include <sys/systm.h> 66 #include <sys/tihdr.h> 67 #include <sys/param.h> 68 #include <sys/mac_provider.h> 69 #include <sys/mac_ipv4.h> 70 #include <sys/mac_ipv6.h> 71 #include <sys/mac_6to4.h> 72 #include <sys/tsol/tnet.h> 73 #include <sys/sunldi.h> 74 #include <netinet/in.h> 75 #include <netinet/ip6.h> 76 #include <inet/ip.h> 77 #include <inet/ip_ire.h> 78 #include <inet/ipsec_impl.h> 79 #include <inet/iptun.h> 80 #include "iptun_impl.h" 81 82 /* Do the tunnel type and address family match? */ 83 #define IPTUN_ADDR_MATCH(iptun_type, family) \ 84 ((iptun_type == IPTUN_TYPE_IPV4 && family == AF_INET) || \ 85 (iptun_type == IPTUN_TYPE_IPV6 && family == AF_INET6) || \ 86 (iptun_type == IPTUN_TYPE_6TO4 && family == AF_INET)) 87 88 #define IPTUN_HASH_KEY(key) ((mod_hash_key_t)(uintptr_t)(key)) 89 90 #define IPTUNQ_DEV "/dev/iptunq" 91 92 #define IPTUN_MIN_IPV4_MTU 576 /* ip.h still uses 68 (!) */ 93 #define IPTUN_MIN_IPV6_MTU IPV6_MIN_MTU 94 #define IPTUN_MAX_IPV4_MTU (IP_MAXPACKET - sizeof (ipha_t)) 95 #define IPTUN_MAX_IPV6_MTU (IP_MAXPACKET - sizeof (ip6_t) - \ 96 sizeof (iptun_encaplim_t)) 97 98 #define IPTUN_MIN_HOPLIMIT 1 99 #define IPTUN_MAX_HOPLIMIT UINT8_MAX 100 101 #define IPTUN_MIN_ENCAPLIMIT 0 102 #define IPTUN_MAX_ENCAPLIMIT UINT8_MAX 103 104 #define IPTUN_IPSEC_REQ_MASK (IPSEC_PREF_REQUIRED | IPSEC_PREF_NEVER) 105 106 static iptun_encaplim_t iptun_encaplim_init = { 107 { IPPROTO_NONE, 0 }, 108 IP6OPT_TUNNEL_LIMIT, 109 1, 110 IPTUN_DEFAULT_ENCAPLIMIT, /* filled in with actual value later */ 111 IP6OPT_PADN, 112 1, 113 0 114 }; 115 116 /* Table containing per-iptun-type information. */ 117 static iptun_typeinfo_t iptun_type_table[] = { 118 { IPTUN_TYPE_IPV4, MAC_PLUGIN_IDENT_IPV4, IPV4_VERSION, ip_output, 119 IPTUN_MIN_IPV4_MTU, IPTUN_MAX_IPV4_MTU, B_TRUE }, 120 { IPTUN_TYPE_IPV6, MAC_PLUGIN_IDENT_IPV6, IPV6_VERSION, ip_output_v6, 121 IPTUN_MIN_IPV6_MTU, IPTUN_MAX_IPV6_MTU, B_TRUE }, 122 { IPTUN_TYPE_6TO4, MAC_PLUGIN_IDENT_6TO4, IPV4_VERSION, ip_output, 123 IPTUN_MIN_IPV4_MTU, IPTUN_MAX_IPV4_MTU, B_FALSE }, 124 { IPTUN_TYPE_UNKNOWN, NULL, 0, NULL, 0, 0, B_FALSE } 125 }; 126 127 /* 128 * iptun_hash is an iptun_t lookup table by link ID protected by 129 * iptun_hash_lock. While the hash table's integrity is maintained via 130 * internal locking in the mod_hash_*() functions, we need additional locking 131 * so that an iptun_t cannot be deleted after a hash lookup has returned an 132 * iptun_t and before iptun_lock has been entered. As such, we use 133 * iptun_hash_lock when doing lookups and removals from iptun_hash. 134 */ 135 mod_hash_t *iptun_hash; 136 static kmutex_t iptun_hash_lock; 137 138 static uint_t iptun_tunnelcount; /* total for all stacks */ 139 kmem_cache_t *iptun_cache; 140 ddi_taskq_t *iptun_taskq; 141 142 typedef enum { 143 IPTUN_TASK_PMTU_UPDATE, /* obtain new destination path-MTU */ 144 IPTUN_TASK_MTU_UPDATE, /* tell mac about new tunnel link MTU */ 145 IPTUN_TASK_LADDR_UPDATE, /* tell mac about new local address */ 146 IPTUN_TASK_RADDR_UPDATE, /* tell mac about new remote address */ 147 IPTUN_TASK_LINK_UPDATE, /* tell mac about new link state */ 148 IPTUN_TASK_PDATA_UPDATE /* tell mac about updated plugin data */ 149 } iptun_task_t; 150 151 typedef struct iptun_task_data_s { 152 iptun_task_t itd_task; 153 datalink_id_t itd_linkid; 154 } iptun_task_data_t; 155 156 static void iptun_task_dispatch(iptun_t *, iptun_task_t); 157 static int iptun_enter(iptun_t *); 158 static void iptun_exit(iptun_t *); 159 static void iptun_headergen(iptun_t *, boolean_t); 160 static void iptun_drop_pkt(mblk_t *, uint64_t *); 161 static void iptun_input(void *, mblk_t *, void *); 162 static void iptun_output(iptun_t *, mblk_t *); 163 static uint32_t iptun_get_maxmtu(iptun_t *, uint32_t); 164 static uint32_t iptun_update_mtu(iptun_t *, uint32_t); 165 static uint32_t iptun_get_dst_pmtu(iptun_t *); 166 static int iptun_setladdr(iptun_t *, const struct sockaddr_storage *); 167 168 static mac_callbacks_t iptun_m_callbacks; 169 170 static int 171 iptun_m_getstat(void *arg, uint_t stat, uint64_t *val) 172 { 173 iptun_t *iptun = arg; 174 int err = 0; 175 176 switch (stat) { 177 case MAC_STAT_IERRORS: 178 *val = iptun->iptun_ierrors; 179 break; 180 case MAC_STAT_OERRORS: 181 *val = iptun->iptun_oerrors; 182 break; 183 case MAC_STAT_RBYTES: 184 *val = iptun->iptun_rbytes; 185 break; 186 case MAC_STAT_IPACKETS: 187 *val = iptun->iptun_ipackets; 188 break; 189 case MAC_STAT_OBYTES: 190 *val = iptun->iptun_obytes; 191 break; 192 case MAC_STAT_OPACKETS: 193 *val = iptun->iptun_opackets; 194 break; 195 case MAC_STAT_NORCVBUF: 196 *val = iptun->iptun_norcvbuf; 197 break; 198 case MAC_STAT_NOXMTBUF: 199 *val = iptun->iptun_noxmtbuf; 200 break; 201 default: 202 err = ENOTSUP; 203 } 204 205 return (err); 206 } 207 208 static int 209 iptun_m_start(void *arg) 210 { 211 iptun_t *iptun = arg; 212 int err; 213 214 if ((err = iptun_enter(iptun)) == 0) { 215 iptun->iptun_flags |= IPTUN_MAC_STARTED; 216 iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE); 217 iptun_exit(iptun); 218 } 219 return (err); 220 } 221 222 static void 223 iptun_m_stop(void *arg) 224 { 225 iptun_t *iptun = arg; 226 227 if (iptun_enter(iptun) == 0) { 228 iptun->iptun_flags &= ~IPTUN_MAC_STARTED; 229 iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE); 230 iptun_exit(iptun); 231 } 232 } 233 234 /* 235 * iptun_m_setpromisc() does nothing and always succeeds. This is because a 236 * tunnel data-link only ever receives packets that are destined exclusively 237 * for the local address of the tunnel. 238 */ 239 /* ARGSUSED */ 240 static int 241 iptun_m_setpromisc(void *arg, boolean_t on) 242 { 243 return (0); 244 } 245 246 /* ARGSUSED */ 247 static int 248 iptun_m_multicst(void *arg, boolean_t add, const uint8_t *addrp) 249 { 250 return (ENOTSUP); 251 } 252 253 /* 254 * iptun_m_unicst() sets the local address. 255 */ 256 /* ARGSUSED */ 257 static int 258 iptun_m_unicst(void *arg, const uint8_t *addrp) 259 { 260 iptun_t *iptun = arg; 261 int err; 262 struct sockaddr_storage ss; 263 struct sockaddr_in *sin; 264 struct sockaddr_in6 *sin6; 265 266 if ((err = iptun_enter(iptun)) == 0) { 267 switch (iptun->iptun_typeinfo->iti_ipvers) { 268 case IPV4_VERSION: 269 sin = (struct sockaddr_in *)&ss; 270 sin->sin_family = AF_INET; 271 bcopy(addrp, &sin->sin_addr, sizeof (in_addr_t)); 272 break; 273 case IPV6_VERSION: 274 sin6 = (struct sockaddr_in6 *)&ss; 275 sin6->sin6_family = AF_INET6; 276 bcopy(addrp, &sin6->sin6_addr, sizeof (in6_addr_t)); 277 break; 278 default: 279 ASSERT(0); 280 } 281 err = iptun_setladdr(iptun, &ss); 282 iptun_exit(iptun); 283 } 284 return (err); 285 } 286 287 static mblk_t * 288 iptun_m_tx(void *arg, mblk_t *mpchain) 289 { 290 mblk_t *mp, *nmp; 291 iptun_t *iptun = arg; 292 293 if (!IS_IPTUN_RUNNING(iptun)) { 294 iptun_drop_pkt(mpchain, &iptun->iptun_noxmtbuf); 295 return (NULL); 296 } 297 298 /* 299 * Request the destination's path MTU information regularly in case 300 * path MTU has increased. 301 */ 302 if (IPTUN_PMTU_TOO_OLD(iptun)) 303 iptun_task_dispatch(iptun, IPTUN_TASK_PMTU_UPDATE); 304 305 for (mp = mpchain; mp != NULL; mp = nmp) { 306 nmp = mp->b_next; 307 mp->b_next = NULL; 308 iptun_output(iptun, mp); 309 } 310 311 return (NULL); 312 } 313 314 /* ARGSUSED */ 315 static int 316 iptun_m_setprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, 317 uint_t pr_valsize, const void *pr_val) 318 { 319 iptun_t *iptun = barg; 320 uint32_t value = *(uint32_t *)pr_val; 321 int err; 322 323 /* 324 * We need to enter this iptun_t since we'll be modifying the outer 325 * header. 326 */ 327 if ((err = iptun_enter(iptun)) != 0) 328 return (err); 329 330 switch (pr_num) { 331 case MAC_PROP_IPTUN_HOPLIMIT: 332 if (value < IPTUN_MIN_HOPLIMIT || value > IPTUN_MAX_HOPLIMIT) { 333 err = EINVAL; 334 break; 335 } 336 if (value != iptun->iptun_hoplimit) { 337 iptun->iptun_hoplimit = (uint8_t)value; 338 iptun_headergen(iptun, B_TRUE); 339 } 340 break; 341 case MAC_PROP_IPTUN_ENCAPLIMIT: 342 if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_IPV6 || 343 value > IPTUN_MAX_ENCAPLIMIT) { 344 err = EINVAL; 345 break; 346 } 347 if (value != iptun->iptun_encaplimit) { 348 iptun->iptun_encaplimit = (uint8_t)value; 349 iptun_headergen(iptun, B_TRUE); 350 } 351 break; 352 case MAC_PROP_MTU: { 353 uint32_t maxmtu = iptun_get_maxmtu(iptun, 0); 354 355 if (value < iptun->iptun_typeinfo->iti_minmtu || 356 value > maxmtu) { 357 err = EINVAL; 358 break; 359 } 360 iptun->iptun_flags |= IPTUN_FIXED_MTU; 361 if (value != iptun->iptun_mtu) { 362 iptun->iptun_mtu = value; 363 iptun_task_dispatch(iptun, IPTUN_TASK_MTU_UPDATE); 364 } 365 break; 366 } 367 default: 368 err = EINVAL; 369 } 370 iptun_exit(iptun); 371 return (err); 372 } 373 374 /* ARGSUSED */ 375 static int 376 iptun_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, 377 uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) 378 { 379 iptun_t *iptun = barg; 380 mac_propval_range_t range; 381 boolean_t is_default = (pr_flags & MAC_PROP_DEFAULT); 382 boolean_t is_possible = (pr_flags & MAC_PROP_POSSIBLE); 383 int err; 384 385 if ((err = iptun_enter(iptun)) != 0) 386 return (err); 387 388 if ((pr_flags & ~(MAC_PROP_DEFAULT | MAC_PROP_POSSIBLE)) != 0) { 389 err = ENOTSUP; 390 goto done; 391 } 392 if (is_default && is_possible) { 393 err = EINVAL; 394 goto done; 395 } 396 397 *perm = MAC_PROP_PERM_RW; 398 399 if (is_possible) { 400 if (pr_valsize < sizeof (mac_propval_range_t)) { 401 err = EINVAL; 402 goto done; 403 } 404 range.mpr_count = 1; 405 range.mpr_type = MAC_PROPVAL_UINT32; 406 } else if (pr_valsize < sizeof (uint32_t)) { 407 err = EINVAL; 408 goto done; 409 } 410 411 switch (pr_num) { 412 case MAC_PROP_IPTUN_HOPLIMIT: 413 if (is_possible) { 414 range.range_uint32[0].mpur_min = IPTUN_MIN_HOPLIMIT; 415 range.range_uint32[0].mpur_max = IPTUN_MAX_HOPLIMIT; 416 } else if (is_default) { 417 *(uint32_t *)pr_val = IPTUN_DEFAULT_HOPLIMIT; 418 } else { 419 *(uint32_t *)pr_val = iptun->iptun_hoplimit; 420 } 421 break; 422 case MAC_PROP_IPTUN_ENCAPLIMIT: 423 if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_IPV6) { 424 err = ENOTSUP; 425 goto done; 426 } 427 if (is_possible) { 428 range.range_uint32[0].mpur_min = IPTUN_MIN_ENCAPLIMIT; 429 range.range_uint32[0].mpur_max = IPTUN_MAX_ENCAPLIMIT; 430 } else if (is_default) { 431 *(uint32_t *)pr_val = IPTUN_DEFAULT_ENCAPLIMIT; 432 } else { 433 *(uint32_t *)pr_val = iptun->iptun_encaplimit; 434 } 435 break; 436 case MAC_PROP_MTU: { 437 uint32_t maxmtu = iptun_get_maxmtu(iptun, 0); 438 439 if (is_possible) { 440 range.range_uint32[0].mpur_min = 441 iptun->iptun_typeinfo->iti_minmtu; 442 range.range_uint32[0].mpur_max = maxmtu; 443 } else { 444 /* 445 * The MAC module knows the current value and should 446 * never call us for it. There is also no default 447 * MTU, as by default, it is a dynamic property. 448 */ 449 err = ENOTSUP; 450 goto done; 451 } 452 break; 453 } 454 default: 455 err = EINVAL; 456 goto done; 457 } 458 if (is_possible) 459 bcopy(&range, pr_val, sizeof (range)); 460 done: 461 iptun_exit(iptun); 462 return (err); 463 } 464 465 uint_t 466 iptun_count(void) 467 { 468 return (iptun_tunnelcount); 469 } 470 471 /* 472 * Enter an iptun_t exclusively. This is essentially just a mutex, but we 473 * don't allow iptun_enter() to succeed on a tunnel if it's in the process of 474 * being deleted. 475 */ 476 static int 477 iptun_enter(iptun_t *iptun) 478 { 479 mutex_enter(&iptun->iptun_lock); 480 while (iptun->iptun_flags & IPTUN_DELETE_PENDING) 481 cv_wait(&iptun->iptun_enter_cv, &iptun->iptun_lock); 482 if (iptun->iptun_flags & IPTUN_CONDEMNED) { 483 mutex_exit(&iptun->iptun_lock); 484 return (ENOENT); 485 } 486 return (0); 487 } 488 489 /* 490 * Exit the tunnel entered in iptun_enter(). 491 */ 492 static void 493 iptun_exit(iptun_t *iptun) 494 { 495 mutex_exit(&iptun->iptun_lock); 496 } 497 498 /* 499 * Enter the IP tunnel instance by datalink ID. 500 */ 501 static int 502 iptun_enter_by_linkid(datalink_id_t linkid, iptun_t **iptun) 503 { 504 int err; 505 506 mutex_enter(&iptun_hash_lock); 507 if (mod_hash_find(iptun_hash, IPTUN_HASH_KEY(linkid), 508 (mod_hash_val_t *)iptun) == 0) 509 err = iptun_enter(*iptun); 510 else 511 err = ENOENT; 512 if (err != 0) 513 *iptun = NULL; 514 mutex_exit(&iptun_hash_lock); 515 return (err); 516 } 517 518 /* 519 * Handle tasks that were deferred through the iptun_taskq. These fall into 520 * two categories: 521 * 522 * 1. Tasks that were defered because we didn't want to spend time doing them 523 * while in the data path. Only IPTUN_TASK_PMTU_UPDATE falls into this 524 * category. 525 * 526 * 2. Tasks that were defered because they require calling up to the mac 527 * module, and we can't call up to the mac module while holding locks. 528 * 529 * Handling 1 is easy; we just lookup the iptun_t, perform the task, exit the 530 * tunnel, and we're done. 531 * 532 * Handling 2 is tricky to get right without introducing race conditions and 533 * deadlocks with the mac module, as we cannot issue an upcall while in the 534 * iptun_t. The reason is that upcalls may try and enter the mac perimeter, 535 * while iptun callbacks (such as iptun_m_setprop()) called from the mac 536 * module will already have the perimeter held, and will then try and enter 537 * the iptun_t. You can see the lock ordering problem with this; this will 538 * deadlock. 539 * 540 * The safe way to do this is to enter the iptun_t in question and copy the 541 * information we need out of it so that we can exit it and know that the 542 * information being passed up to the upcalls won't be subject to modification 543 * by other threads. The problem now is that we need to exit it prior to 544 * issuing the upcall, but once we do this, a thread could come along and 545 * delete the iptun_t and thus the mac handle required to issue the upcall. 546 * To prevent this, we set the IPTUN_UPCALL_PENDING flag prior to exiting the 547 * iptun_t. This flag is the condition associated with iptun_upcall_cv, which 548 * iptun_delete() will cv_wait() on. When the upcall completes, we clear 549 * IPTUN_UPCALL_PENDING and cv_signal() any potentially waiting 550 * iptun_delete(). We can thus still safely use iptun->iptun_mh after having 551 * exited the iptun_t. 552 */ 553 static void 554 iptun_task_cb(void *arg) 555 { 556 iptun_task_data_t *itd = arg; 557 iptun_task_t task = itd->itd_task; 558 datalink_id_t linkid = itd->itd_linkid; 559 iptun_t *iptun; 560 uint32_t mtu; 561 iptun_addr_t addr; 562 link_state_t linkstate; 563 size_t header_size; 564 iptun_header_t header; 565 566 kmem_free(itd, sizeof (*itd)); 567 568 /* 569 * Note that if the lookup fails, it's because the tunnel was deleted 570 * between the time the task was dispatched and now. That isn't an 571 * error. 572 */ 573 if (iptun_enter_by_linkid(linkid, &iptun) != 0) 574 return; 575 576 if (task == IPTUN_TASK_PMTU_UPDATE) { 577 (void) iptun_update_mtu(iptun, 0); 578 iptun_exit(iptun); 579 return; 580 } 581 582 iptun->iptun_flags |= IPTUN_UPCALL_PENDING; 583 584 switch (task) { 585 case IPTUN_TASK_MTU_UPDATE: 586 mtu = iptun->iptun_mtu; 587 break; 588 case IPTUN_TASK_LADDR_UPDATE: 589 addr = iptun->iptun_laddr; 590 break; 591 case IPTUN_TASK_RADDR_UPDATE: 592 addr = iptun->iptun_raddr; 593 break; 594 case IPTUN_TASK_LINK_UPDATE: 595 linkstate = IS_IPTUN_RUNNING(iptun) ? 596 LINK_STATE_UP : LINK_STATE_DOWN; 597 break; 598 case IPTUN_TASK_PDATA_UPDATE: 599 header_size = iptun->iptun_header_size; 600 header = iptun->iptun_header; 601 break; 602 default: 603 ASSERT(0); 604 } 605 606 iptun_exit(iptun); 607 608 switch (task) { 609 case IPTUN_TASK_MTU_UPDATE: 610 (void) mac_maxsdu_update(iptun->iptun_mh, mtu); 611 break; 612 case IPTUN_TASK_LADDR_UPDATE: 613 mac_unicst_update(iptun->iptun_mh, (uint8_t *)&addr.ia_addr); 614 break; 615 case IPTUN_TASK_RADDR_UPDATE: 616 mac_dst_update(iptun->iptun_mh, (uint8_t *)&addr.ia_addr); 617 break; 618 case IPTUN_TASK_LINK_UPDATE: 619 mac_link_update(iptun->iptun_mh, linkstate); 620 break; 621 case IPTUN_TASK_PDATA_UPDATE: 622 if (mac_pdata_update(iptun->iptun_mh, 623 header_size == 0 ? NULL : &header, header_size) != 0) 624 atomic_inc_64(&iptun->iptun_taskq_fail); 625 break; 626 } 627 628 mutex_enter(&iptun->iptun_lock); 629 iptun->iptun_flags &= ~IPTUN_UPCALL_PENDING; 630 cv_signal(&iptun->iptun_upcall_cv); 631 mutex_exit(&iptun->iptun_lock); 632 } 633 634 static void 635 iptun_task_dispatch(iptun_t *iptun, iptun_task_t iptun_task) 636 { 637 iptun_task_data_t *itd; 638 639 itd = kmem_alloc(sizeof (*itd), KM_NOSLEEP); 640 if (itd == NULL) { 641 atomic_inc_64(&iptun->iptun_taskq_fail); 642 return; 643 } 644 itd->itd_task = iptun_task; 645 itd->itd_linkid = iptun->iptun_linkid; 646 if (ddi_taskq_dispatch(iptun_taskq, iptun_task_cb, itd, DDI_NOSLEEP)) { 647 atomic_inc_64(&iptun->iptun_taskq_fail); 648 kmem_free(itd, sizeof (*itd)); 649 } 650 } 651 652 /* 653 * Convert an iptun_addr_t to sockaddr_storage. 654 */ 655 static void 656 iptun_getaddr(iptun_addr_t *iptun_addr, struct sockaddr_storage *ss) 657 { 658 struct sockaddr_in *sin; 659 struct sockaddr_in6 *sin6; 660 661 bzero(ss, sizeof (*ss)); 662 switch (iptun_addr->ia_family) { 663 case AF_INET: 664 sin = (struct sockaddr_in *)ss; 665 sin->sin_addr.s_addr = iptun_addr->ia_addr.iau_addr4; 666 break; 667 case AF_INET6: 668 sin6 = (struct sockaddr_in6 *)ss; 669 sin6->sin6_addr = iptun_addr->ia_addr.iau_addr6; 670 break; 671 default: 672 ASSERT(0); 673 } 674 ss->ss_family = iptun_addr->ia_family; 675 } 676 677 /* 678 * General purpose function to set an IP tunnel source or destination address. 679 */ 680 static int 681 iptun_setaddr(iptun_type_t iptun_type, iptun_addr_t *iptun_addr, 682 const struct sockaddr_storage *ss) 683 { 684 if (!IPTUN_ADDR_MATCH(iptun_type, ss->ss_family)) 685 return (EINVAL); 686 687 switch (ss->ss_family) { 688 case AF_INET: { 689 struct sockaddr_in *sin = (struct sockaddr_in *)ss; 690 691 if ((sin->sin_addr.s_addr == INADDR_ANY) || 692 (sin->sin_addr.s_addr == INADDR_BROADCAST) || 693 CLASSD(sin->sin_addr.s_addr)) { 694 return (EADDRNOTAVAIL); 695 } 696 iptun_addr->ia_addr.iau_addr4 = sin->sin_addr.s_addr; 697 break; 698 } 699 case AF_INET6: { 700 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)ss; 701 702 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) || 703 IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) || 704 IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 705 return (EADDRNOTAVAIL); 706 } 707 iptun_addr->ia_addr.iau_addr6 = sin6->sin6_addr; 708 break; 709 } 710 default: 711 return (EAFNOSUPPORT); 712 } 713 iptun_addr->ia_family = ss->ss_family; 714 return (0); 715 } 716 717 static int 718 iptun_setladdr(iptun_t *iptun, const struct sockaddr_storage *laddr) 719 { 720 return (iptun_setaddr(iptun->iptun_typeinfo->iti_type, 721 &iptun->iptun_laddr, laddr)); 722 } 723 724 static int 725 iptun_setraddr(iptun_t *iptun, const struct sockaddr_storage *raddr) 726 { 727 if (!(iptun->iptun_typeinfo->iti_hasraddr)) 728 return (EINVAL); 729 return (iptun_setaddr(iptun->iptun_typeinfo->iti_type, 730 &iptun->iptun_raddr, raddr)); 731 } 732 733 static boolean_t 734 iptun_canbind(iptun_t *iptun) 735 { 736 /* 737 * A tunnel may bind when its source address has been set, and if its 738 * tunnel type requires one, also its destination address. 739 */ 740 return ((iptun->iptun_flags & IPTUN_LADDR) && 741 ((iptun->iptun_flags & IPTUN_RADDR) || 742 !(iptun->iptun_typeinfo->iti_hasraddr))); 743 } 744 745 static int 746 iptun_bind(iptun_t *iptun) 747 { 748 conn_t *connp = iptun->iptun_connp; 749 int err; 750 751 ASSERT(iptun_canbind(iptun)); 752 753 switch (iptun->iptun_typeinfo->iti_type) { 754 case IPTUN_TYPE_IPV4: 755 /* 756 * When we set a tunnel's destination address, we do not care 757 * if the destination is reachable. Transient routing issues 758 * should not inhibit the creation of a tunnel interface, for 759 * example. For that reason, we pass in B_FALSE for the 760 * verify_dst argument of ip_proto_bind_connected_v4() (and 761 * similarly for IPv6 tunnels below). 762 */ 763 err = ip_proto_bind_connected_v4(connp, NULL, IPPROTO_ENCAP, 764 &iptun->iptun_laddr4, 0, iptun->iptun_raddr4, 0, B_TRUE, 765 B_FALSE, iptun->iptun_cred); 766 break; 767 case IPTUN_TYPE_IPV6: 768 err = ip_proto_bind_connected_v6(connp, NULL, IPPROTO_IPV6, 769 &iptun->iptun_laddr6, 0, &iptun->iptun_raddr6, NULL, 0, 770 B_TRUE, B_FALSE, iptun->iptun_cred); 771 break; 772 case IPTUN_TYPE_6TO4: 773 err = ip_proto_bind_laddr_v4(connp, NULL, IPPROTO_IPV6, 774 iptun->iptun_laddr4, 0, B_TRUE); 775 break; 776 } 777 778 if (err == 0) { 779 iptun->iptun_flags |= IPTUN_BOUND; 780 781 /* 782 * Now that we're bound with ip below us, this is a good time 783 * to initialize the destination path MTU and to re-calculate 784 * the tunnel's link MTU. 785 */ 786 (void) iptun_update_mtu(iptun, 0); 787 788 if (IS_IPTUN_RUNNING(iptun)) 789 iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE); 790 } 791 return (err); 792 } 793 794 static void 795 iptun_unbind(iptun_t *iptun) 796 { 797 ASSERT(iptun->iptun_flags & IPTUN_BOUND); 798 ASSERT(mutex_owned(&iptun->iptun_lock) || 799 (iptun->iptun_flags & IPTUN_CONDEMNED)); 800 ip_unbind(iptun->iptun_connp); 801 iptun->iptun_flags &= ~IPTUN_BOUND; 802 if (!(iptun->iptun_flags & IPTUN_CONDEMNED)) 803 iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE); 804 } 805 806 /* 807 * Re-generate the template data-link header for a given IP tunnel given the 808 * tunnel's current parameters. 809 */ 810 static void 811 iptun_headergen(iptun_t *iptun, boolean_t update_mac) 812 { 813 switch (iptun->iptun_typeinfo->iti_ipvers) { 814 case IPV4_VERSION: 815 /* 816 * We only need to use a custom IP header if the administrator 817 * has supplied a non-default hoplimit. 818 */ 819 if (iptun->iptun_hoplimit == IPTUN_DEFAULT_HOPLIMIT) { 820 iptun->iptun_header_size = 0; 821 break; 822 } 823 iptun->iptun_header_size = sizeof (ipha_t); 824 iptun->iptun_header4.ipha_version_and_hdr_length = 825 IP_SIMPLE_HDR_VERSION; 826 iptun->iptun_header4.ipha_fragment_offset_and_flags = 827 htons(IPH_DF); 828 iptun->iptun_header4.ipha_ttl = iptun->iptun_hoplimit; 829 break; 830 case IPV6_VERSION: { 831 ip6_t *ip6hp = &iptun->iptun_header6.it6h_ip6h; 832 833 /* 834 * We only need to use a custom IPv6 header if either the 835 * administrator has supplied a non-default hoplimit, or we 836 * need to include an encapsulation limit option in the outer 837 * header. 838 */ 839 if (iptun->iptun_hoplimit == IPTUN_DEFAULT_HOPLIMIT && 840 iptun->iptun_encaplimit == 0) { 841 iptun->iptun_header_size = 0; 842 break; 843 } 844 845 (void) memset(ip6hp, 0, sizeof (*ip6hp)); 846 if (iptun->iptun_encaplimit == 0) { 847 iptun->iptun_header_size = sizeof (ip6_t); 848 ip6hp->ip6_nxt = IPPROTO_NONE; 849 } else { 850 iptun_encaplim_t *iel; 851 852 iptun->iptun_header_size = sizeof (iptun_ipv6hdrs_t); 853 /* 854 * The mac_ipv6 plugin requires ip6_plen to be in host 855 * byte order and reflect the extension headers 856 * present in the template. The actual network byte 857 * order ip6_plen will be set on a per-packet basis on 858 * transmit. 859 */ 860 ip6hp->ip6_plen = sizeof (*iel); 861 ip6hp->ip6_nxt = IPPROTO_DSTOPTS; 862 iel = &iptun->iptun_header6.it6h_encaplim; 863 *iel = iptun_encaplim_init; 864 iel->iel_telopt.ip6ot_encap_limit = 865 iptun->iptun_encaplimit; 866 } 867 868 ip6hp->ip6_hlim = iptun->iptun_hoplimit; 869 break; 870 } 871 } 872 873 if (update_mac) 874 iptun_task_dispatch(iptun, IPTUN_TASK_PDATA_UPDATE); 875 } 876 877 /* 878 * Insert inbound and outbound IPv4 and IPv6 policy into the given policy 879 * head. 880 */ 881 static boolean_t 882 iptun_insert_simple_policies(ipsec_policy_head_t *ph, ipsec_act_t *actp, 883 uint_t n, netstack_t *ns) 884 { 885 int f = IPSEC_AF_V4; 886 887 if (!ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_INBOUND, ns) || 888 !ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_OUTBOUND, ns)) 889 return (B_FALSE); 890 891 f = IPSEC_AF_V6; 892 return (ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_INBOUND, ns) && 893 ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_OUTBOUND, ns)); 894 } 895 896 /* 897 * Used to set IPsec policy when policy is set through the IPTUN_CREATE or 898 * IPTUN_MODIFY ioctls. 899 */ 900 static int 901 iptun_set_sec_simple(iptun_t *iptun, const ipsec_req_t *ipsr) 902 { 903 int rc = 0; 904 uint_t nact; 905 ipsec_act_t *actp = NULL; 906 boolean_t clear_all, old_policy = B_FALSE; 907 ipsec_tun_pol_t *itp; 908 char name[MAXLINKNAMELEN]; 909 uint64_t gen; 910 netstack_t *ns = iptun->iptun_ns; 911 912 /* Can't specify self-encap on a tunnel. */ 913 if (ipsr->ipsr_self_encap_req != 0) 914 return (EINVAL); 915 916 /* 917 * If it's a "clear-all" entry, unset the security flags and resume 918 * normal cleartext (or inherit-from-global) policy. 919 */ 920 clear_all = ((ipsr->ipsr_ah_req & IPTUN_IPSEC_REQ_MASK) == 0 && 921 (ipsr->ipsr_esp_req & IPTUN_IPSEC_REQ_MASK) == 0); 922 923 ASSERT(mutex_owned(&iptun->iptun_lock)); 924 itp = iptun->iptun_itp; 925 if (itp == NULL) { 926 if (clear_all) 927 goto bail; 928 if ((rc = dls_mgmt_get_linkinfo(iptun->iptun_linkid, name, NULL, 929 NULL, NULL)) != 0) 930 goto bail; 931 ASSERT(name[0] != '\0'); 932 if ((itp = create_tunnel_policy(name, &rc, &gen, ns)) == NULL) 933 goto bail; 934 iptun->iptun_itp = itp; 935 } 936 937 /* Allocate the actvec now, before holding itp or polhead locks. */ 938 ipsec_actvec_from_req(ipsr, &actp, &nact, ns); 939 if (actp == NULL) { 940 rc = ENOMEM; 941 goto bail; 942 } 943 944 /* 945 * Just write on the active polhead. Save the primary/secondary stuff 946 * for spdsock operations. 947 * 948 * Mutex because we need to write to the polhead AND flags atomically. 949 * Other threads will acquire the polhead lock as a reader if the 950 * (unprotected) flag is set. 951 */ 952 mutex_enter(&itp->itp_lock); 953 if (itp->itp_flags & ITPF_P_TUNNEL) { 954 /* Oops, we lost a race. Let's get out of here. */ 955 rc = EBUSY; 956 goto mutex_bail; 957 } 958 old_policy = ((itp->itp_flags & ITPF_P_ACTIVE) != 0); 959 960 if (old_policy) { 961 ITPF_CLONE(itp->itp_flags); 962 rc = ipsec_copy_polhead(itp->itp_policy, itp->itp_inactive, ns); 963 if (rc != 0) { 964 /* inactive has already been cleared. */ 965 itp->itp_flags &= ~ITPF_IFLAGS; 966 goto mutex_bail; 967 } 968 rw_enter(&itp->itp_policy->iph_lock, RW_WRITER); 969 ipsec_polhead_flush(itp->itp_policy, ns); 970 } else { 971 /* Else assume itp->itp_policy is already flushed. */ 972 rw_enter(&itp->itp_policy->iph_lock, RW_WRITER); 973 } 974 975 if (clear_all) { 976 ASSERT(avl_numnodes(&itp->itp_policy->iph_rulebyid) == 0); 977 itp->itp_flags &= ~ITPF_PFLAGS; 978 rw_exit(&itp->itp_policy->iph_lock); 979 old_policy = B_FALSE; /* Clear out the inactive one too. */ 980 goto recover_bail; 981 } 982 983 if (iptun_insert_simple_policies(itp->itp_policy, actp, nact, ns)) { 984 rw_exit(&itp->itp_policy->iph_lock); 985 /* 986 * Adjust MTU and make sure the DL side knows what's up. 987 */ 988 itp->itp_flags = ITPF_P_ACTIVE; 989 (void) iptun_update_mtu(iptun, 0); 990 old_policy = B_FALSE; /* Blank out inactive - we succeeded */ 991 } else { 992 rw_exit(&itp->itp_policy->iph_lock); 993 rc = ENOMEM; 994 } 995 996 recover_bail: 997 if (old_policy) { 998 /* Recover policy in in active polhead. */ 999 ipsec_swap_policy(itp->itp_policy, itp->itp_inactive, ns); 1000 ITPF_SWAP(itp->itp_flags); 1001 } 1002 1003 /* Clear policy in inactive polhead. */ 1004 itp->itp_flags &= ~ITPF_IFLAGS; 1005 rw_enter(&itp->itp_inactive->iph_lock, RW_WRITER); 1006 ipsec_polhead_flush(itp->itp_inactive, ns); 1007 rw_exit(&itp->itp_inactive->iph_lock); 1008 1009 mutex_bail: 1010 mutex_exit(&itp->itp_lock); 1011 1012 bail: 1013 if (actp != NULL) 1014 ipsec_actvec_free(actp, nact); 1015 1016 return (rc); 1017 } 1018 1019 static iptun_typeinfo_t * 1020 iptun_gettypeinfo(iptun_type_t type) 1021 { 1022 int i; 1023 1024 for (i = 0; iptun_type_table[i].iti_type != IPTUN_TYPE_UNKNOWN; i++) { 1025 if (iptun_type_table[i].iti_type == type) 1026 break; 1027 } 1028 return (&iptun_type_table[i]); 1029 } 1030 1031 /* 1032 * Set the parameters included in ik on the tunnel iptun. Parameters that can 1033 * only be set at creation time are set in iptun_create(). 1034 */ 1035 static int 1036 iptun_setparams(iptun_t *iptun, const iptun_kparams_t *ik) 1037 { 1038 int err = 0; 1039 netstack_t *ns = iptun->iptun_ns; 1040 iptun_addr_t orig_laddr, orig_raddr; 1041 uint_t orig_flags = iptun->iptun_flags; 1042 1043 if (ik->iptun_kparam_flags & IPTUN_KPARAM_LADDR) { 1044 if (orig_flags & IPTUN_LADDR) 1045 orig_laddr = iptun->iptun_laddr; 1046 if ((err = iptun_setladdr(iptun, &ik->iptun_kparam_laddr)) != 0) 1047 return (err); 1048 iptun->iptun_flags |= IPTUN_LADDR; 1049 } 1050 1051 if (ik->iptun_kparam_flags & IPTUN_KPARAM_RADDR) { 1052 if (orig_flags & IPTUN_RADDR) 1053 orig_raddr = iptun->iptun_raddr; 1054 if ((err = iptun_setraddr(iptun, &ik->iptun_kparam_raddr)) != 0) 1055 goto done; 1056 iptun->iptun_flags |= IPTUN_RADDR; 1057 } 1058 1059 if (ik->iptun_kparam_flags & IPTUN_KPARAM_SECINFO) { 1060 /* 1061 * Set IPsec policy originating from the ifconfig(1M) command 1062 * line. This is traditionally called "simple" policy because 1063 * the ipsec_req_t (iptun_kparam_secinfo) can only describe a 1064 * simple policy of "do ESP on everything" and/or "do AH on 1065 * everything" (as opposed to the rich policy that can be 1066 * defined with ipsecconf(1M)). 1067 */ 1068 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_6TO4) { 1069 /* 1070 * Can't set security properties for automatic 1071 * tunnels. 1072 */ 1073 err = EINVAL; 1074 goto done; 1075 } 1076 1077 if (!ipsec_loaded(ns->netstack_ipsec)) { 1078 /* If IPsec can be loaded, try and load it now. */ 1079 if (ipsec_failed(ns->netstack_ipsec)) { 1080 err = EPROTONOSUPPORT; 1081 goto done; 1082 } 1083 ipsec_loader_loadnow(ns->netstack_ipsec); 1084 /* 1085 * ipsec_loader_loadnow() returns while IPsec is 1086 * loaded asynchronously. While a method exists to 1087 * wait for IPsec to load (ipsec_loader_wait()), it 1088 * requires use of a STREAMS queue to do a qwait(). 1089 * We're not in STREAMS context here, and so we can't 1090 * use it. This is not a problem in practice because 1091 * in the vast majority of cases, key management and 1092 * global policy will have loaded before any tunnels 1093 * are plumbed, and so IPsec will already have been 1094 * loaded. 1095 */ 1096 err = EAGAIN; 1097 goto done; 1098 } 1099 1100 err = iptun_set_sec_simple(iptun, &ik->iptun_kparam_secinfo); 1101 if (err == 0) { 1102 iptun->iptun_flags |= IPTUN_SIMPLE_POLICY; 1103 iptun->iptun_simple_policy = ik->iptun_kparam_secinfo; 1104 } 1105 } 1106 done: 1107 if (err != 0) { 1108 /* Restore original source and destination. */ 1109 if (ik->iptun_kparam_flags & IPTUN_KPARAM_LADDR && 1110 (orig_flags & IPTUN_LADDR)) 1111 iptun->iptun_laddr = orig_laddr; 1112 if ((ik->iptun_kparam_flags & IPTUN_KPARAM_RADDR) && 1113 (orig_flags & IPTUN_RADDR)) 1114 iptun->iptun_raddr = orig_raddr; 1115 iptun->iptun_flags = orig_flags; 1116 } 1117 return (err); 1118 } 1119 1120 static int 1121 iptun_register(iptun_t *iptun) 1122 { 1123 mac_register_t *mac; 1124 int err; 1125 1126 ASSERT(!(iptun->iptun_flags & IPTUN_MAC_REGISTERED)); 1127 1128 if ((mac = mac_alloc(MAC_VERSION)) == NULL) 1129 return (EINVAL); 1130 1131 mac->m_type_ident = iptun->iptun_typeinfo->iti_ident; 1132 mac->m_driver = iptun; 1133 mac->m_dip = iptun_dip; 1134 mac->m_instance = (uint_t)-1; 1135 mac->m_src_addr = (uint8_t *)&iptun->iptun_laddr.ia_addr; 1136 mac->m_dst_addr = iptun->iptun_typeinfo->iti_hasraddr ? 1137 (uint8_t *)&iptun->iptun_raddr.ia_addr : NULL; 1138 mac->m_callbacks = &iptun_m_callbacks; 1139 mac->m_min_sdu = iptun->iptun_typeinfo->iti_minmtu; 1140 mac->m_max_sdu = iptun->iptun_mtu; 1141 if (iptun->iptun_header_size != 0) { 1142 mac->m_pdata = &iptun->iptun_header; 1143 mac->m_pdata_size = iptun->iptun_header_size; 1144 } 1145 if ((err = mac_register(mac, &iptun->iptun_mh)) == 0) 1146 iptun->iptun_flags |= IPTUN_MAC_REGISTERED; 1147 mac_free(mac); 1148 return (err); 1149 } 1150 1151 static int 1152 iptun_unregister(iptun_t *iptun) 1153 { 1154 int err; 1155 1156 ASSERT(iptun->iptun_flags & IPTUN_MAC_REGISTERED); 1157 if ((err = mac_unregister(iptun->iptun_mh)) == 0) 1158 iptun->iptun_flags &= ~IPTUN_MAC_REGISTERED; 1159 return (err); 1160 } 1161 1162 static conn_t * 1163 iptun_conn_create(iptun_t *iptun, netstack_t *ns, cred_t *credp) 1164 { 1165 conn_t *connp; 1166 1167 if ((connp = ipcl_conn_create(IPCL_IPCCONN, KM_NOSLEEP, ns)) == NULL) 1168 return (NULL); 1169 1170 connp->conn_flags |= IPCL_IPTUN; 1171 connp->conn_iptun = iptun; 1172 connp->conn_recv = iptun_input; 1173 connp->conn_rq = ns->netstack_iptun->iptuns_g_q; 1174 connp->conn_wq = WR(connp->conn_rq); 1175 /* 1176 * For exclusive stacks we set conn_zoneid to GLOBAL_ZONEID as is done 1177 * for all other conn_t's. 1178 * 1179 * Note that there's an important distinction between iptun_zoneid and 1180 * conn_zoneid. The conn_zoneid is set to GLOBAL_ZONEID in non-global 1181 * exclusive stack zones to make the ip module believe that the 1182 * non-global zone is actually a global zone. Therefore, when 1183 * interacting with the ip module, we must always use conn_zoneid. 1184 */ 1185 connp->conn_zoneid = (ns->netstack_stackid == GLOBAL_NETSTACKID) ? 1186 crgetzoneid(credp) : GLOBAL_ZONEID; 1187 connp->conn_cred = credp; 1188 /* crfree() is done in ipcl_conn_destroy(), called by CONN_DEC_REF() */ 1189 crhold(connp->conn_cred); 1190 1191 connp->conn_send = iptun->iptun_typeinfo->iti_txfunc; 1192 connp->conn_af_isv6 = iptun->iptun_typeinfo->iti_ipvers == IPV6_VERSION; 1193 ASSERT(connp->conn_ref == 1); 1194 1195 mutex_enter(&connp->conn_lock); 1196 connp->conn_state_flags &= ~CONN_INCIPIENT; 1197 mutex_exit(&connp->conn_lock); 1198 return (connp); 1199 } 1200 1201 static void 1202 iptun_conn_destroy(conn_t *connp) 1203 { 1204 ip_quiesce_conn(connp); 1205 connp->conn_iptun = NULL; 1206 ASSERT(connp->conn_ref == 1); 1207 CONN_DEC_REF(connp); 1208 } 1209 1210 static int 1211 iptun_create_g_q(iptun_stack_t *iptuns, cred_t *credp) 1212 { 1213 int err; 1214 conn_t *connp; 1215 1216 ASSERT(iptuns->iptuns_g_q == NULL); 1217 /* 1218 * The global queue for this stack is set when iptunq_open() calls 1219 * iptun_set_g_q(). 1220 */ 1221 err = ldi_open_by_name(IPTUNQ_DEV, FWRITE|FREAD, credp, 1222 &iptuns->iptuns_g_q_lh, iptun_ldi_ident); 1223 if (err == 0) { 1224 connp = iptuns->iptuns_g_q->q_ptr; 1225 connp->conn_recv = iptun_input; 1226 } 1227 return (err); 1228 } 1229 1230 static iptun_t * 1231 iptun_alloc(void) 1232 { 1233 iptun_t *iptun; 1234 1235 if ((iptun = kmem_cache_alloc(iptun_cache, KM_NOSLEEP)) != NULL) { 1236 bzero(iptun, sizeof (*iptun)); 1237 atomic_inc_32(&iptun_tunnelcount); 1238 } 1239 return (iptun); 1240 } 1241 1242 static void 1243 iptun_free(iptun_t *iptun) 1244 { 1245 ASSERT(iptun->iptun_flags & IPTUN_CONDEMNED); 1246 1247 if (iptun->iptun_flags & IPTUN_HASH_INSERTED) { 1248 iptun_stack_t *iptuns = iptun->iptun_iptuns; 1249 1250 mutex_enter(&iptun_hash_lock); 1251 VERIFY(mod_hash_remove(iptun_hash, 1252 IPTUN_HASH_KEY(iptun->iptun_linkid), 1253 (mod_hash_val_t *)&iptun) == 0); 1254 mutex_exit(&iptun_hash_lock); 1255 iptun->iptun_flags &= ~IPTUN_HASH_INSERTED; 1256 mutex_enter(&iptuns->iptuns_lock); 1257 list_remove(&iptuns->iptuns_iptunlist, iptun); 1258 mutex_exit(&iptuns->iptuns_lock); 1259 } 1260 1261 if (iptun->iptun_flags & IPTUN_BOUND) 1262 iptun_unbind(iptun); 1263 1264 /* 1265 * After iptun_unregister(), there will be no threads executing a 1266 * downcall from the mac module, including in the tx datapath. 1267 */ 1268 if (iptun->iptun_flags & IPTUN_MAC_REGISTERED) 1269 VERIFY(iptun_unregister(iptun) == 0); 1270 1271 if (iptun->iptun_itp != NULL) { 1272 /* 1273 * Remove from the AVL tree, AND release the reference iptun_t 1274 * itself holds on the ITP. 1275 */ 1276 itp_unlink(iptun->iptun_itp, iptun->iptun_ns); 1277 ITP_REFRELE(iptun->iptun_itp, iptun->iptun_ns); 1278 iptun->iptun_itp = NULL; 1279 iptun->iptun_flags &= ~IPTUN_SIMPLE_POLICY; 1280 } 1281 1282 /* 1283 * After ipcl_conn_destroy(), there will be no threads executing an 1284 * upcall from ip (i.e., iptun_input()), and it is then safe to free 1285 * the iptun_t. 1286 */ 1287 if (iptun->iptun_connp != NULL) { 1288 iptun_conn_destroy(iptun->iptun_connp); 1289 iptun->iptun_connp = NULL; 1290 } 1291 1292 netstack_rele(iptun->iptun_ns); 1293 iptun->iptun_ns = NULL; 1294 crfree(iptun->iptun_cred); 1295 iptun->iptun_cred = NULL; 1296 1297 kmem_cache_free(iptun_cache, iptun); 1298 atomic_dec_32(&iptun_tunnelcount); 1299 } 1300 1301 int 1302 iptun_create(iptun_kparams_t *ik, cred_t *credp) 1303 { 1304 iptun_t *iptun = NULL; 1305 int err = 0, mherr; 1306 char linkname[MAXLINKNAMELEN]; 1307 ipsec_tun_pol_t *itp; 1308 netstack_t *ns = NULL; 1309 iptun_stack_t *iptuns; 1310 datalink_id_t tmpid; 1311 zoneid_t zoneid = crgetzoneid(credp); 1312 boolean_t link_created = B_FALSE; 1313 1314 /* The tunnel type is mandatory */ 1315 if (!(ik->iptun_kparam_flags & IPTUN_KPARAM_TYPE)) 1316 return (EINVAL); 1317 1318 /* 1319 * Is the linkid that the caller wishes to associate with this new 1320 * tunnel assigned to this zone? 1321 */ 1322 if (zone_check_datalink(&zoneid, ik->iptun_kparam_linkid) != 0) { 1323 if (zoneid != GLOBAL_ZONEID) 1324 return (EINVAL); 1325 } else if (zoneid == GLOBAL_ZONEID) { 1326 return (EINVAL); 1327 } 1328 1329 /* 1330 * Make sure that we're not trying to create a tunnel that has already 1331 * been created. 1332 */ 1333 if (iptun_enter_by_linkid(ik->iptun_kparam_linkid, &iptun) == 0) { 1334 iptun_exit(iptun); 1335 iptun = NULL; 1336 err = EEXIST; 1337 goto done; 1338 } 1339 1340 ns = netstack_find_by_cred(credp); 1341 iptuns = ns->netstack_iptun; 1342 1343 /* 1344 * Before we create any tunnel, we need to ensure that the default 1345 * STREAMS queue (used to satisfy the ip module's requirement for one) 1346 * is created. We only do this once per stack. The stream is closed 1347 * when the stack is destroyed in iptun_stack_fni(). 1348 */ 1349 mutex_enter(&iptuns->iptuns_lock); 1350 if (iptuns->iptuns_g_q == NULL) 1351 err = iptun_create_g_q(iptuns, zone_kcred()); 1352 mutex_exit(&iptuns->iptuns_lock); 1353 if (err != 0) 1354 goto done; 1355 1356 if ((iptun = iptun_alloc()) == NULL) { 1357 err = ENOMEM; 1358 goto done; 1359 } 1360 1361 iptun->iptun_linkid = ik->iptun_kparam_linkid; 1362 iptun->iptun_zoneid = zoneid; 1363 crhold(credp); 1364 iptun->iptun_cred = credp; 1365 iptun->iptun_ns = ns; 1366 1367 iptun->iptun_typeinfo = iptun_gettypeinfo(ik->iptun_kparam_type); 1368 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_UNKNOWN) { 1369 err = EINVAL; 1370 goto done; 1371 } 1372 1373 if (ik->iptun_kparam_flags & IPTUN_KPARAM_IMPLICIT) 1374 iptun->iptun_flags |= IPTUN_IMPLICIT; 1375 1376 if ((err = iptun_setparams(iptun, ik)) != 0) 1377 goto done; 1378 1379 iptun->iptun_hoplimit = IPTUN_DEFAULT_HOPLIMIT; 1380 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_IPV6) 1381 iptun->iptun_encaplimit = IPTUN_DEFAULT_ENCAPLIMIT; 1382 1383 iptun_headergen(iptun, B_FALSE); 1384 1385 iptun->iptun_connp = iptun_conn_create(iptun, ns, credp); 1386 if (iptun->iptun_connp == NULL) { 1387 err = ENOMEM; 1388 goto done; 1389 } 1390 1391 iptun->iptun_mtu = iptun->iptun_typeinfo->iti_maxmtu; 1392 iptun->iptun_dpmtu = iptun->iptun_mtu; 1393 1394 /* 1395 * Find an ITP based on linkname. If we have parms already set via 1396 * the iptun_setparams() call above, it may have created an ITP for 1397 * us. We always try get_tunnel_policy() for DEBUG correctness 1398 * checks, and we may wish to refactor this to only check when 1399 * iptun_itp is NULL. 1400 */ 1401 if ((err = dls_mgmt_get_linkinfo(iptun->iptun_linkid, linkname, NULL, 1402 NULL, NULL)) != 0) 1403 goto done; 1404 if ((itp = get_tunnel_policy(linkname, ns)) != NULL) 1405 iptun->iptun_itp = itp; 1406 1407 /* 1408 * See if we have the necessary IP addresses assigned to this tunnel 1409 * to try and bind them with ip underneath us. If we're not ready to 1410 * bind yet, then we'll defer the bind operation until the addresses 1411 * are modified. 1412 */ 1413 if (iptun_canbind(iptun) && ((err = iptun_bind(iptun)) != 0)) 1414 goto done; 1415 1416 if ((err = iptun_register(iptun)) != 0) 1417 goto done; 1418 1419 err = dls_devnet_create(iptun->iptun_mh, iptun->iptun_linkid, 1420 iptun->iptun_zoneid); 1421 if (err != 0) 1422 goto done; 1423 link_created = B_TRUE; 1424 1425 /* 1426 * We hash by link-id as that is the key used by all other iptun 1427 * interfaces (modify, delete, etc.). 1428 */ 1429 if ((mherr = mod_hash_insert(iptun_hash, 1430 IPTUN_HASH_KEY(iptun->iptun_linkid), (mod_hash_val_t)iptun)) == 0) { 1431 mutex_enter(&iptuns->iptuns_lock); 1432 list_insert_head(&iptuns->iptuns_iptunlist, iptun); 1433 mutex_exit(&iptuns->iptuns_lock); 1434 iptun->iptun_flags |= IPTUN_HASH_INSERTED; 1435 } else if (mherr == MH_ERR_NOMEM) { 1436 err = ENOMEM; 1437 } else if (mherr == MH_ERR_DUPLICATE) { 1438 err = EEXIST; 1439 } else { 1440 err = EINVAL; 1441 } 1442 1443 done: 1444 if (iptun == NULL && ns != NULL) 1445 netstack_rele(ns); 1446 if (err != 0 && iptun != NULL) { 1447 if (link_created) { 1448 (void) dls_devnet_destroy(iptun->iptun_mh, &tmpid, 1449 B_TRUE); 1450 } 1451 iptun->iptun_flags |= IPTUN_CONDEMNED; 1452 iptun_free(iptun); 1453 } 1454 return (err); 1455 } 1456 1457 int 1458 iptun_delete(datalink_id_t linkid, cred_t *credp) 1459 { 1460 int err; 1461 iptun_t *iptun = NULL; 1462 1463 if ((err = iptun_enter_by_linkid(linkid, &iptun)) != 0) 1464 return (err); 1465 1466 /* One cannot delete a tunnel that belongs to another zone. */ 1467 if (iptun->iptun_zoneid != crgetzoneid(credp)) { 1468 iptun_exit(iptun); 1469 return (EACCES); 1470 } 1471 1472 /* 1473 * We need to exit iptun in order to issue calls up the stack such as 1474 * dls_devnet_destroy(). If we call up while still in iptun, deadlock 1475 * with calls coming down the stack is possible. We prevent other 1476 * threads from entering this iptun after we've exited it by setting 1477 * the IPTUN_DELETE_PENDING flag. This will cause callers of 1478 * iptun_enter() to block waiting on iptun_enter_cv. The assumption 1479 * here is that the functions we're calling while IPTUN_DELETE_PENDING 1480 * is set dont resuult in an iptun_enter() call, as that would result 1481 * in deadlock. 1482 */ 1483 iptun->iptun_flags |= IPTUN_DELETE_PENDING; 1484 1485 /* Wait for any pending upcall to the mac module to complete. */ 1486 while (iptun->iptun_flags & IPTUN_UPCALL_PENDING) 1487 cv_wait(&iptun->iptun_upcall_cv, &iptun->iptun_lock); 1488 1489 iptun_exit(iptun); 1490 1491 if ((err = dls_devnet_destroy(iptun->iptun_mh, &linkid, B_TRUE)) == 0) { 1492 /* 1493 * mac_disable() will fail with EBUSY if there are references 1494 * to the iptun MAC. If there are none, then mac_disable() 1495 * will assure that none can be acquired until the MAC is 1496 * unregistered. 1497 * 1498 * XXX CR 6791335 prevents us from calling mac_disable() prior 1499 * to dls_devnet_destroy(), so we unfortunately need to 1500 * attempt to re-create the devnet node if mac_disable() 1501 * fails. 1502 */ 1503 if ((err = mac_disable(iptun->iptun_mh)) != 0) { 1504 (void) dls_devnet_create(iptun->iptun_mh, linkid, 1505 iptun->iptun_zoneid); 1506 } 1507 } 1508 1509 /* 1510 * Now that we know the fate of this iptun_t, we need to clear 1511 * IPTUN_DELETE_PENDING, and set IPTUN_CONDEMNED if the iptun_t is 1512 * slated to be freed. Either way, we need to signal the threads 1513 * waiting in iptun_enter() so that they can either fail if 1514 * IPTUN_CONDEMNED is set, or continue if it's not. 1515 */ 1516 mutex_enter(&iptun->iptun_lock); 1517 iptun->iptun_flags &= ~IPTUN_DELETE_PENDING; 1518 if (err == 0) 1519 iptun->iptun_flags |= IPTUN_CONDEMNED; 1520 cv_broadcast(&iptun->iptun_enter_cv); 1521 mutex_exit(&iptun->iptun_lock); 1522 1523 /* 1524 * Note that there is no danger in calling iptun_free() after having 1525 * dropped the iptun_lock since callers of iptun_enter() at this point 1526 * are doing so from iptun_enter_by_linkid() (mac_disable() got rid of 1527 * threads entering from mac callbacks which call iptun_enter() 1528 * directly) which holds iptun_hash_lock, and iptun_free() grabs this 1529 * lock in order to remove the iptun_t from the hash table. 1530 */ 1531 if (err == 0) 1532 iptun_free(iptun); 1533 1534 return (err); 1535 } 1536 1537 int 1538 iptun_modify(const iptun_kparams_t *ik, cred_t *credp) 1539 { 1540 iptun_t *iptun; 1541 boolean_t laddr_change = B_FALSE, raddr_change = B_FALSE; 1542 int err; 1543 1544 if ((err = iptun_enter_by_linkid(ik->iptun_kparam_linkid, &iptun)) != 0) 1545 return (err); 1546 1547 /* One cannot modify a tunnel that belongs to another zone. */ 1548 if (iptun->iptun_zoneid != crgetzoneid(credp)) { 1549 err = EACCES; 1550 goto done; 1551 } 1552 1553 /* The tunnel type cannot be changed */ 1554 if (ik->iptun_kparam_flags & IPTUN_KPARAM_TYPE) { 1555 err = EINVAL; 1556 goto done; 1557 } 1558 1559 if ((err = iptun_setparams(iptun, ik)) != 0) 1560 goto done; 1561 iptun_headergen(iptun, B_FALSE); 1562 1563 /* 1564 * If any of the tunnel's addresses has been modified and the tunnel 1565 * has the necessary addresses assigned to it, we need to try to bind 1566 * with ip underneath us. If we're not ready to bind yet, then we'll 1567 * try again when the addresses are modified later. 1568 */ 1569 laddr_change = (ik->iptun_kparam_flags & IPTUN_KPARAM_LADDR); 1570 raddr_change = (ik->iptun_kparam_flags & IPTUN_KPARAM_RADDR); 1571 if (laddr_change || raddr_change) { 1572 if (iptun->iptun_flags & IPTUN_BOUND) 1573 iptun_unbind(iptun); 1574 if (iptun_canbind(iptun) && (err = iptun_bind(iptun)) != 0) { 1575 if (laddr_change) 1576 iptun->iptun_flags &= ~IPTUN_LADDR; 1577 if (raddr_change) 1578 iptun->iptun_flags &= ~IPTUN_RADDR; 1579 goto done; 1580 } 1581 } 1582 1583 if (laddr_change) 1584 iptun_task_dispatch(iptun, IPTUN_TASK_LADDR_UPDATE); 1585 if (raddr_change) 1586 iptun_task_dispatch(iptun, IPTUN_TASK_RADDR_UPDATE); 1587 1588 done: 1589 iptun_exit(iptun); 1590 return (err); 1591 } 1592 1593 /* Given an IP tunnel's datalink id, fill in its parameters. */ 1594 int 1595 iptun_info(iptun_kparams_t *ik, cred_t *credp) 1596 { 1597 iptun_t *iptun; 1598 int err; 1599 1600 /* Is the tunnel link visible from the caller's zone? */ 1601 if (!dls_devnet_islinkvisible(ik->iptun_kparam_linkid, 1602 crgetzoneid(credp))) 1603 return (ENOENT); 1604 1605 if ((err = iptun_enter_by_linkid(ik->iptun_kparam_linkid, &iptun)) != 0) 1606 return (err); 1607 1608 bzero(ik, sizeof (iptun_kparams_t)); 1609 1610 ik->iptun_kparam_linkid = iptun->iptun_linkid; 1611 ik->iptun_kparam_type = iptun->iptun_typeinfo->iti_type; 1612 ik->iptun_kparam_flags |= IPTUN_KPARAM_TYPE; 1613 1614 if (iptun->iptun_flags & IPTUN_LADDR) { 1615 iptun_getaddr(&iptun->iptun_laddr, &ik->iptun_kparam_laddr); 1616 ik->iptun_kparam_flags |= IPTUN_KPARAM_LADDR; 1617 } 1618 if (iptun->iptun_flags & IPTUN_RADDR) { 1619 iptun_getaddr(&iptun->iptun_raddr, &ik->iptun_kparam_raddr); 1620 ik->iptun_kparam_flags |= IPTUN_KPARAM_RADDR; 1621 } 1622 1623 if (iptun->iptun_flags & IPTUN_IMPLICIT) 1624 ik->iptun_kparam_flags |= IPTUN_KPARAM_IMPLICIT; 1625 1626 if (iptun->iptun_itp != NULL) { 1627 mutex_enter(&iptun->iptun_itp->itp_lock); 1628 if (iptun->iptun_itp->itp_flags & ITPF_P_ACTIVE) { 1629 ik->iptun_kparam_flags |= IPTUN_KPARAM_IPSECPOL; 1630 if (iptun->iptun_flags & IPTUN_SIMPLE_POLICY) { 1631 ik->iptun_kparam_flags |= IPTUN_KPARAM_SECINFO; 1632 ik->iptun_kparam_secinfo = 1633 iptun->iptun_simple_policy; 1634 } 1635 } 1636 mutex_exit(&iptun->iptun_itp->itp_lock); 1637 } 1638 1639 done: 1640 iptun_exit(iptun); 1641 return (err); 1642 } 1643 1644 int 1645 iptun_set_6to4relay(netstack_t *ns, ipaddr_t relay_addr) 1646 { 1647 if (relay_addr == INADDR_BROADCAST || CLASSD(relay_addr)) 1648 return (EADDRNOTAVAIL); 1649 ns->netstack_iptun->iptuns_relay_rtr_addr = relay_addr; 1650 return (0); 1651 } 1652 1653 void 1654 iptun_get_6to4relay(netstack_t *ns, ipaddr_t *relay_addr) 1655 { 1656 *relay_addr = ns->netstack_iptun->iptuns_relay_rtr_addr; 1657 } 1658 1659 void 1660 iptun_set_policy(datalink_id_t linkid, ipsec_tun_pol_t *itp) 1661 { 1662 iptun_t *iptun; 1663 1664 if (iptun_enter_by_linkid(linkid, &iptun) != 0) 1665 return; 1666 if (iptun->iptun_itp != itp) { 1667 ASSERT(iptun->iptun_itp == NULL); 1668 ITP_REFHOLD(itp); 1669 iptun->iptun_itp = itp; 1670 /* IPsec policy means IPsec overhead, which means lower MTU. */ 1671 (void) iptun_update_mtu(iptun, 0); 1672 } 1673 iptun_exit(iptun); 1674 } 1675 1676 /* 1677 * Obtain the path MTU to the tunnel destination. 1678 */ 1679 static uint32_t 1680 iptun_get_dst_pmtu(iptun_t *iptun) 1681 { 1682 ire_t *ire = NULL; 1683 ip_stack_t *ipst = iptun->iptun_ns->netstack_ip; 1684 uint32_t pmtu = 0; 1685 1686 /* 1687 * We only obtain the destination IRE for tunnels that have a remote 1688 * tunnel address. 1689 */ 1690 if (!(iptun->iptun_flags & IPTUN_RADDR)) 1691 return (0); 1692 1693 switch (iptun->iptun_typeinfo->iti_ipvers) { 1694 case IPV4_VERSION: 1695 ire = ire_route_lookup(iptun->iptun_raddr4, INADDR_ANY, 1696 INADDR_ANY, 0, NULL, NULL, iptun->iptun_connp->conn_zoneid, 1697 NULL, (MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT), ipst); 1698 break; 1699 case IPV6_VERSION: 1700 ire = ire_route_lookup_v6(&iptun->iptun_raddr6, NULL, NULL, 0, 1701 NULL, NULL, iptun->iptun_connp->conn_zoneid, NULL, 1702 (MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT), ipst); 1703 break; 1704 } 1705 1706 if (ire != NULL) { 1707 pmtu = ire->ire_max_frag; 1708 ire_refrele(ire); 1709 } 1710 return (pmtu); 1711 } 1712 1713 /* 1714 * Returns the max of old_ovhd and the overhead associated with pol. 1715 */ 1716 static uint32_t 1717 iptun_max_policy_overhead(ipsec_policy_t *pol, uint32_t old_ovhd) 1718 { 1719 uint32_t new_ovhd = old_ovhd; 1720 1721 while (pol != NULL) { 1722 new_ovhd = max(new_ovhd, 1723 ipsec_act_ovhd(&pol->ipsp_act->ipa_act)); 1724 pol = pol->ipsp_hash.hash_next; 1725 } 1726 return (new_ovhd); 1727 } 1728 1729 static uint32_t 1730 iptun_get_ipsec_overhead(iptun_t *iptun) 1731 { 1732 ipsec_policy_root_t *ipr; 1733 ipsec_policy_head_t *iph; 1734 ipsec_policy_t *pol; 1735 ipsec_selector_t sel; 1736 int i; 1737 uint32_t ipsec_ovhd = 0; 1738 ipsec_tun_pol_t *itp = iptun->iptun_itp; 1739 netstack_t *ns = iptun->iptun_ns; 1740 1741 if (itp == NULL || !(itp->itp_flags & ITPF_P_ACTIVE)) { 1742 /* 1743 * Consult global policy, just in case. This will only work 1744 * if we have both source and destination addresses to work 1745 * with. 1746 */ 1747 if ((iptun->iptun_flags & (IPTUN_LADDR|IPTUN_RADDR)) != 1748 (IPTUN_LADDR|IPTUN_RADDR)) 1749 return (0); 1750 1751 iph = ipsec_system_policy(ns); 1752 bzero(&sel, sizeof (sel)); 1753 sel.ips_isv4 = 1754 (iptun->iptun_typeinfo->iti_ipvers == IPV4_VERSION); 1755 switch (iptun->iptun_typeinfo->iti_ipvers) { 1756 case IPV4_VERSION: 1757 sel.ips_local_addr_v4 = iptun->iptun_laddr4; 1758 sel.ips_remote_addr_v4 = iptun->iptun_raddr4; 1759 break; 1760 case IPV6_VERSION: 1761 sel.ips_local_addr_v6 = iptun->iptun_laddr6; 1762 sel.ips_remote_addr_v6 = iptun->iptun_raddr6; 1763 break; 1764 } 1765 /* Check for both IPv4 and IPv6. */ 1766 sel.ips_protocol = IPPROTO_ENCAP; 1767 pol = ipsec_find_policy_head(NULL, iph, IPSEC_TYPE_OUTBOUND, 1768 &sel, ns); 1769 if (pol != NULL) { 1770 ipsec_ovhd = ipsec_act_ovhd(&pol->ipsp_act->ipa_act); 1771 IPPOL_REFRELE(pol, ns); 1772 } 1773 sel.ips_protocol = IPPROTO_IPV6; 1774 pol = ipsec_find_policy_head(NULL, iph, IPSEC_TYPE_OUTBOUND, 1775 &sel, ns); 1776 if (pol != NULL) { 1777 ipsec_ovhd = max(ipsec_ovhd, 1778 ipsec_act_ovhd(&pol->ipsp_act->ipa_act)); 1779 IPPOL_REFRELE(pol, ns); 1780 } 1781 IPPH_REFRELE(iph, ns); 1782 } else { 1783 /* 1784 * Look through all of the possible IPsec actions for the 1785 * tunnel, and find the largest potential IPsec overhead. 1786 */ 1787 iph = itp->itp_policy; 1788 rw_enter(&iph->iph_lock, RW_READER); 1789 ipr = &(iph->iph_root[IPSEC_TYPE_OUTBOUND]); 1790 ipsec_ovhd = iptun_max_policy_overhead( 1791 ipr->ipr_nonhash[IPSEC_AF_V4], 0); 1792 ipsec_ovhd = iptun_max_policy_overhead( 1793 ipr->ipr_nonhash[IPSEC_AF_V6], ipsec_ovhd); 1794 for (i = 0; i < ipr->ipr_nchains; i++) { 1795 ipsec_ovhd = iptun_max_policy_overhead( 1796 ipr->ipr_hash[i].hash_head, ipsec_ovhd); 1797 } 1798 rw_exit(&iph->iph_lock); 1799 } 1800 1801 return (ipsec_ovhd); 1802 } 1803 1804 /* 1805 * Calculate and return the maximum possible MTU for the given tunnel. 1806 */ 1807 static uint32_t 1808 iptun_get_maxmtu(iptun_t *iptun, uint32_t new_pmtu) 1809 { 1810 size_t header_size, ipsec_overhead; 1811 uint32_t maxmtu, pmtu; 1812 1813 /* 1814 * Start with the path-MTU to the remote address, which is either 1815 * provided as the new_pmtu argument, or obtained using 1816 * iptun_get_dst_pmtu(). 1817 */ 1818 if (new_pmtu != 0) { 1819 if (iptun->iptun_flags & IPTUN_RADDR) { 1820 iptun->iptun_dpmtu = new_pmtu; 1821 iptun->iptun_dpmtu_lastupdate = ddi_get_lbolt(); 1822 } 1823 pmtu = new_pmtu; 1824 } else if (iptun->iptun_flags & IPTUN_RADDR) { 1825 if ((pmtu = iptun_get_dst_pmtu(iptun)) == 0) { 1826 /* 1827 * We weren't able to obtain the path-MTU of the 1828 * destination. Use the previous value. 1829 */ 1830 pmtu = iptun->iptun_dpmtu; 1831 } else { 1832 iptun->iptun_dpmtu = pmtu; 1833 iptun->iptun_dpmtu_lastupdate = ddi_get_lbolt(); 1834 } 1835 } else { 1836 /* 1837 * We have no path-MTU information to go on, use the maximum 1838 * possible value. 1839 */ 1840 pmtu = iptun->iptun_typeinfo->iti_maxmtu; 1841 } 1842 1843 /* 1844 * Now calculate tunneling overhead and subtract that from the 1845 * path-MTU information obtained above. 1846 */ 1847 if (iptun->iptun_header_size != 0) { 1848 header_size = iptun->iptun_header_size; 1849 } else { 1850 switch (iptun->iptun_typeinfo->iti_ipvers) { 1851 case IPV4_VERSION: 1852 header_size = sizeof (ipha_t); 1853 break; 1854 case IPV6_VERSION: 1855 header_size = sizeof (iptun_ipv6hdrs_t); 1856 break; 1857 } 1858 } 1859 1860 ipsec_overhead = iptun_get_ipsec_overhead(iptun); 1861 1862 maxmtu = pmtu - (header_size + ipsec_overhead); 1863 return (max(maxmtu, iptun->iptun_typeinfo->iti_minmtu)); 1864 } 1865 1866 /* 1867 * Re-calculate the tunnel's MTU and notify the MAC layer of any change in 1868 * MTU. The new_pmtu argument is the new path MTU to the tunnel destination 1869 * to be used in the tunnel MTU calculation. Passing in 0 for new_pmtu causes 1870 * the path MTU to be dynamically updated using iptun_update_pmtu(). 1871 * 1872 * If the calculated tunnel MTU is different than its previous value, then we 1873 * notify the MAC layer above us of this change using mac_maxsdu_update(). 1874 */ 1875 static uint32_t 1876 iptun_update_mtu(iptun_t *iptun, uint32_t new_pmtu) 1877 { 1878 uint32_t newmtu; 1879 1880 /* 1881 * We return the current MTU without updating it if it was pegged to a 1882 * static value using the MAC_PROP_MTU link property. 1883 */ 1884 if (iptun->iptun_flags & IPTUN_FIXED_MTU) 1885 return (iptun->iptun_mtu); 1886 1887 /* If the MTU isn't fixed, then use the maximum possible value. */ 1888 newmtu = iptun_get_maxmtu(iptun, new_pmtu); 1889 1890 /* 1891 * We only dynamically adjust the tunnel MTU for tunnels with 1892 * destinations because dynamic MTU calculations are based on the 1893 * destination path-MTU. 1894 */ 1895 if ((iptun->iptun_flags & IPTUN_RADDR) && newmtu != iptun->iptun_mtu) { 1896 iptun->iptun_mtu = newmtu; 1897 if (iptun->iptun_flags & IPTUN_MAC_REGISTERED) 1898 iptun_task_dispatch(iptun, IPTUN_TASK_MTU_UPDATE); 1899 } 1900 1901 return (newmtu); 1902 } 1903 1904 /* 1905 * Frees a packet or packet chain and bumps stat for each freed packet. 1906 */ 1907 static void 1908 iptun_drop_pkt(mblk_t *mp, uint64_t *stat) 1909 { 1910 mblk_t *pktmp; 1911 1912 for (pktmp = mp; pktmp != NULL; pktmp = mp) { 1913 mp = mp->b_next; 1914 pktmp->b_next = NULL; 1915 if (stat != NULL) 1916 atomic_inc_64(stat); 1917 freemsg(pktmp); 1918 } 1919 } 1920 1921 /* 1922 * Allocate and return a new mblk to hold an IP and ICMP header, and chain the 1923 * original packet to its b_cont. Returns NULL on failure. 1924 */ 1925 static mblk_t * 1926 iptun_build_icmperr(size_t hdrs_size, mblk_t *orig_pkt) 1927 { 1928 mblk_t *icmperr_mp; 1929 1930 if ((icmperr_mp = allocb_tmpl(hdrs_size, orig_pkt)) != NULL) { 1931 icmperr_mp->b_wptr += hdrs_size; 1932 /* tack on the offending packet */ 1933 icmperr_mp->b_cont = orig_pkt; 1934 } 1935 return (icmperr_mp); 1936 } 1937 1938 /* 1939 * Transmit an ICMP error. mp->b_rptr points at the packet to be included in 1940 * the ICMP error. 1941 */ 1942 static void 1943 iptun_sendicmp_v4(iptun_t *iptun, icmph_t *icmp, ipha_t *orig_ipha, mblk_t *mp) 1944 { 1945 size_t orig_pktsize, hdrs_size; 1946 mblk_t *icmperr_mp; 1947 ipha_t *new_ipha; 1948 icmph_t *new_icmp; 1949 1950 orig_pktsize = msgdsize(mp); 1951 hdrs_size = sizeof (ipha_t) + sizeof (icmph_t); 1952 if ((icmperr_mp = iptun_build_icmperr(hdrs_size, mp)) == NULL) { 1953 iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf); 1954 return; 1955 } 1956 1957 new_ipha = (ipha_t *)icmperr_mp->b_rptr; 1958 new_icmp = (icmph_t *)(new_ipha + 1); 1959 1960 new_ipha->ipha_version_and_hdr_length = IP_SIMPLE_HDR_VERSION; 1961 new_ipha->ipha_type_of_service = 0; 1962 new_ipha->ipha_ident = 0; 1963 new_ipha->ipha_fragment_offset_and_flags = 0; 1964 new_ipha->ipha_ttl = orig_ipha->ipha_ttl; 1965 new_ipha->ipha_protocol = IPPROTO_ICMP; 1966 new_ipha->ipha_src = orig_ipha->ipha_dst; 1967 new_ipha->ipha_dst = orig_ipha->ipha_src; 1968 new_ipha->ipha_hdr_checksum = 0; /* will be computed by ip */ 1969 new_ipha->ipha_length = htons(hdrs_size + orig_pktsize); 1970 1971 *new_icmp = *icmp; 1972 new_icmp->icmph_checksum = 0; 1973 new_icmp->icmph_checksum = IP_CSUM(icmperr_mp, sizeof (ipha_t), 0); 1974 1975 ip_output(iptun->iptun_connp, icmperr_mp, iptun->iptun_connp->conn_wq, 1976 IP_WPUT); 1977 } 1978 1979 static void 1980 iptun_sendicmp_v6(iptun_t *iptun, icmp6_t *icmp6, ip6_t *orig_ip6h, mblk_t *mp) 1981 { 1982 size_t orig_pktsize, hdrs_size; 1983 mblk_t *icmp6err_mp; 1984 ip6_t *new_ip6h; 1985 icmp6_t *new_icmp6; 1986 1987 orig_pktsize = msgdsize(mp); 1988 hdrs_size = sizeof (ip6_t) + sizeof (icmp6_t); 1989 if ((icmp6err_mp = iptun_build_icmperr(hdrs_size, mp)) == NULL) { 1990 iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf); 1991 return; 1992 } 1993 1994 new_ip6h = (ip6_t *)icmp6err_mp->b_rptr; 1995 new_icmp6 = (icmp6_t *)(new_ip6h + 1); 1996 1997 new_ip6h->ip6_vcf = orig_ip6h->ip6_vcf; 1998 new_ip6h->ip6_plen = htons(sizeof (icmp6_t) + orig_pktsize); 1999 new_ip6h->ip6_hops = orig_ip6h->ip6_hops; 2000 new_ip6h->ip6_nxt = IPPROTO_ICMPV6; 2001 new_ip6h->ip6_src = orig_ip6h->ip6_dst; 2002 new_ip6h->ip6_dst = orig_ip6h->ip6_src; 2003 2004 *new_icmp6 = *icmp6; 2005 /* The checksum is calculated in ip_wput_ire_v6(). */ 2006 new_icmp6->icmp6_cksum = new_ip6h->ip6_plen; 2007 2008 ip_output_v6(iptun->iptun_connp, icmp6err_mp, 2009 iptun->iptun_connp->conn_wq, IP_WPUT); 2010 } 2011 2012 static void 2013 iptun_icmp_error_v4(iptun_t *iptun, ipha_t *orig_ipha, mblk_t *mp, 2014 uint8_t type, uint8_t code) 2015 { 2016 icmph_t icmp; 2017 2018 bzero(&icmp, sizeof (icmp)); 2019 icmp.icmph_type = type; 2020 icmp.icmph_code = code; 2021 2022 iptun_sendicmp_v4(iptun, &icmp, orig_ipha, mp); 2023 } 2024 2025 static void 2026 iptun_icmp_fragneeded_v4(iptun_t *iptun, uint32_t newmtu, ipha_t *orig_ipha, 2027 mblk_t *mp) 2028 { 2029 icmph_t icmp; 2030 2031 icmp.icmph_type = ICMP_DEST_UNREACHABLE; 2032 icmp.icmph_code = ICMP_FRAGMENTATION_NEEDED; 2033 icmp.icmph_du_zero = 0; 2034 icmp.icmph_du_mtu = htons(newmtu); 2035 2036 iptun_sendicmp_v4(iptun, &icmp, orig_ipha, mp); 2037 } 2038 2039 static void 2040 iptun_icmp_error_v6(iptun_t *iptun, ip6_t *orig_ip6h, mblk_t *mp, 2041 uint8_t type, uint8_t code, uint32_t offset) 2042 { 2043 icmp6_t icmp6; 2044 2045 bzero(&icmp6, sizeof (icmp6)); 2046 icmp6.icmp6_type = type; 2047 icmp6.icmp6_code = code; 2048 if (type == ICMP6_PARAM_PROB) 2049 icmp6.icmp6_pptr = htonl(offset); 2050 2051 iptun_sendicmp_v6(iptun, &icmp6, orig_ip6h, mp); 2052 } 2053 2054 static void 2055 iptun_icmp_toobig_v6(iptun_t *iptun, uint32_t newmtu, ip6_t *orig_ip6h, 2056 mblk_t *mp) 2057 { 2058 icmp6_t icmp6; 2059 2060 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; 2061 icmp6.icmp6_code = 0; 2062 icmp6.icmp6_mtu = htonl(newmtu); 2063 2064 iptun_sendicmp_v6(iptun, &icmp6, orig_ip6h, mp); 2065 } 2066 2067 /* 2068 * Determines if the packet pointed to by ipha or ip6h is an ICMP error. The 2069 * mp argument is only used to do bounds checking. 2070 */ 2071 static boolean_t 2072 is_icmp_error(mblk_t *mp, ipha_t *ipha, ip6_t *ip6h) 2073 { 2074 uint16_t hlen; 2075 2076 if (ipha != NULL) { 2077 icmph_t *icmph; 2078 2079 ASSERT(ip6h == NULL); 2080 if (ipha->ipha_protocol != IPPROTO_ICMP) 2081 return (B_FALSE); 2082 2083 hlen = IPH_HDR_LENGTH(ipha); 2084 icmph = (icmph_t *)((uint8_t *)ipha + hlen); 2085 return (ICMP_IS_ERROR(icmph->icmph_type) || 2086 icmph->icmph_type == ICMP_REDIRECT); 2087 } else { 2088 icmp6_t *icmp6; 2089 uint8_t *nexthdrp; 2090 2091 ASSERT(ip6h != NULL); 2092 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hlen, &nexthdrp) || 2093 *nexthdrp != IPPROTO_ICMPV6) { 2094 return (B_FALSE); 2095 } 2096 2097 icmp6 = (icmp6_t *)((uint8_t *)ip6h + hlen); 2098 return (ICMP6_IS_ERROR(icmp6->icmp6_type) || 2099 icmp6->icmp6_type == ND_REDIRECT); 2100 } 2101 } 2102 2103 /* 2104 * Find inner and outer IP headers from a tunneled packet as setup for calls 2105 * into ipsec_tun_{in,out}bound(). 2106 */ 2107 static size_t 2108 iptun_find_headers(mblk_t *mp, ipha_t **outer4, ipha_t **inner4, ip6_t **outer6, 2109 ip6_t **inner6) 2110 { 2111 ipha_t *ipha; 2112 size_t outer_hlen; 2113 size_t first_mblkl = MBLKL(mp); 2114 mblk_t *inner_mp; 2115 2116 /* 2117 * Don't bother handling packets that don't have a full IP header in 2118 * the fist mblk. For the input path, the ip module ensures that this 2119 * won't happen, and on the output path, the IP tunneling MAC-type 2120 * plugins ensure that this also won't happen. 2121 */ 2122 if (first_mblkl < sizeof (ipha_t)) 2123 return (0); 2124 ipha = (ipha_t *)(mp->b_rptr); 2125 switch (IPH_HDR_VERSION(ipha)) { 2126 case IPV4_VERSION: 2127 *outer4 = ipha; 2128 *outer6 = NULL; 2129 outer_hlen = IPH_HDR_LENGTH(ipha); 2130 break; 2131 case IPV6_VERSION: 2132 *outer4 = NULL; 2133 *outer6 = (ip6_t *)ipha; 2134 outer_hlen = ip_hdr_length_v6(mp, (ip6_t *)ipha); 2135 break; 2136 default: 2137 return (0); 2138 } 2139 2140 if (first_mblkl < outer_hlen || 2141 (first_mblkl == outer_hlen && mp->b_cont == NULL)) 2142 return (0); 2143 2144 /* 2145 * We don't bother doing a pullup here since the outer header will 2146 * just get stripped off soon on input anyway. We just want to ensure 2147 * that the inner* pointer points to a full header. 2148 */ 2149 if (first_mblkl == outer_hlen) { 2150 inner_mp = mp->b_cont; 2151 ipha = (ipha_t *)inner_mp->b_rptr; 2152 } else { 2153 inner_mp = mp; 2154 ipha = (ipha_t *)(mp->b_rptr + outer_hlen); 2155 } 2156 switch (IPH_HDR_VERSION(ipha)) { 2157 case IPV4_VERSION: 2158 if (inner_mp->b_wptr - (uint8_t *)ipha < sizeof (ipha_t)) 2159 return (0); 2160 *inner4 = ipha; 2161 *inner6 = NULL; 2162 break; 2163 case IPV6_VERSION: 2164 if (inner_mp->b_wptr - (uint8_t *)ipha < sizeof (ip6_t)) 2165 return (0); 2166 *inner4 = NULL; 2167 *inner6 = (ip6_t *)ipha; 2168 break; 2169 default: 2170 return (0); 2171 } 2172 2173 return (outer_hlen); 2174 } 2175 2176 /* 2177 * Received ICMP error in response to an X over IPv4 packet that we 2178 * transmitted. 2179 * 2180 * NOTE: "outer" refers to what's inside the ICMP payload. We will get one of 2181 * the following: 2182 * 2183 * [IPv4(0)][ICMPv4][IPv4(1)][IPv4(2)][ULP] 2184 * 2185 * or 2186 * 2187 * [IPv4(0)][ICMPv4][IPv4(1)][IPv6][ULP] 2188 * 2189 * And "outer4" will get set to IPv4(1), and inner[46] will correspond to 2190 * whatever the very-inner packet is (IPv4(2) or IPv6). 2191 */ 2192 static void 2193 iptun_input_icmp_v4(iptun_t *iptun, mblk_t *ipsec_mp, mblk_t *data_mp, 2194 icmph_t *icmph) 2195 { 2196 uint8_t *orig; 2197 ipha_t *outer4, *inner4; 2198 ip6_t *outer6, *inner6; 2199 int outer_hlen; 2200 uint8_t type, code; 2201 2202 /* 2203 * Change the db_type to M_DATA because subsequent operations assume 2204 * the ICMP packet is M_DATA again (i.e. calls to msgdsize()). 2205 */ 2206 data_mp->b_datap->db_type = M_DATA; 2207 2208 ASSERT(data_mp->b_cont == NULL); 2209 /* 2210 * Temporarily move b_rptr forward so that iptun_find_headers() can 2211 * find headers in the ICMP packet payload. 2212 */ 2213 orig = data_mp->b_rptr; 2214 data_mp->b_rptr = (uint8_t *)(icmph + 1); 2215 /* 2216 * The ip module ensures that ICMP errors contain at least the 2217 * original IP header (otherwise, the error would never have made it 2218 * here). 2219 */ 2220 ASSERT(MBLKL(data_mp) >= 0); 2221 outer_hlen = iptun_find_headers(data_mp, &outer4, &inner4, &outer6, 2222 &inner6); 2223 ASSERT(outer6 == NULL); 2224 data_mp->b_rptr = orig; 2225 if (outer_hlen == 0) { 2226 iptun_drop_pkt((ipsec_mp != NULL ? ipsec_mp : data_mp), 2227 &iptun->iptun_ierrors); 2228 return; 2229 } 2230 2231 /* Only ICMP errors due to tunneled packets should reach here. */ 2232 ASSERT(outer4->ipha_protocol == IPPROTO_ENCAP || 2233 outer4->ipha_protocol == IPPROTO_IPV6); 2234 2235 /* ipsec_tun_inbound() always frees ipsec_mp. */ 2236 if (!ipsec_tun_inbound(ipsec_mp, &data_mp, iptun->iptun_itp, 2237 inner4, inner6, outer4, outer6, -outer_hlen, 2238 iptun->iptun_ns)) { 2239 /* Callee did all of the freeing. */ 2240 atomic_inc_64(&iptun->iptun_ierrors); 2241 return; 2242 } 2243 /* We should never see reassembled fragment here. */ 2244 ASSERT(data_mp->b_next == NULL); 2245 2246 data_mp->b_rptr = (uint8_t *)outer4 + outer_hlen; 2247 2248 /* 2249 * If the original packet being transmitted was itself an ICMP error, 2250 * then drop this packet. We don't want to generate an ICMP error in 2251 * response to an ICMP error. 2252 */ 2253 if (is_icmp_error(data_mp, inner4, inner6)) { 2254 iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf); 2255 return; 2256 } 2257 2258 switch (icmph->icmph_type) { 2259 case ICMP_DEST_UNREACHABLE: 2260 type = (inner4 != NULL ? icmph->icmph_type : ICMP6_DST_UNREACH); 2261 switch (icmph->icmph_code) { 2262 case ICMP_FRAGMENTATION_NEEDED: { 2263 uint32_t newmtu; 2264 2265 /* 2266 * We reconcile this with the fact that the tunnel may 2267 * also have IPsec policy by letting iptun_update_mtu 2268 * take care of it. 2269 */ 2270 newmtu = 2271 iptun_update_mtu(iptun, ntohs(icmph->icmph_du_mtu)); 2272 2273 if (inner4 != NULL) { 2274 iptun_icmp_fragneeded_v4(iptun, newmtu, inner4, 2275 data_mp); 2276 } else { 2277 iptun_icmp_toobig_v6(iptun, newmtu, inner6, 2278 data_mp); 2279 } 2280 return; 2281 } 2282 case ICMP_DEST_NET_UNREACH_ADMIN: 2283 case ICMP_DEST_HOST_UNREACH_ADMIN: 2284 code = (inner4 != NULL ? ICMP_DEST_NET_UNREACH_ADMIN : 2285 ICMP6_DST_UNREACH_ADMIN); 2286 break; 2287 default: 2288 code = (inner4 != NULL ? ICMP_HOST_UNREACHABLE : 2289 ICMP6_DST_UNREACH_ADDR); 2290 break; 2291 } 2292 break; 2293 case ICMP_TIME_EXCEEDED: 2294 if (inner6 != NULL) { 2295 type = ICMP6_TIME_EXCEEDED; 2296 code = 0; 2297 } /* else we're already set. */ 2298 break; 2299 case ICMP_PARAM_PROBLEM: 2300 /* 2301 * This is a problem with the outer header we transmitted. 2302 * Treat this as an output error. 2303 */ 2304 iptun_drop_pkt(data_mp, &iptun->iptun_oerrors); 2305 return; 2306 default: 2307 iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf); 2308 return; 2309 } 2310 2311 if (inner4 != NULL) 2312 iptun_icmp_error_v4(iptun, inner4, data_mp, type, code); 2313 else 2314 iptun_icmp_error_v6(iptun, inner6, data_mp, type, code, 0); 2315 } 2316 2317 /* 2318 * Return B_TRUE if the IPv6 packet pointed to by ip6h contains a Tunnel 2319 * Encapsulation Limit destination option. If there is one, set encaplim_ptr 2320 * to point to the option value. 2321 */ 2322 static boolean_t 2323 iptun_find_encaplimit(mblk_t *mp, ip6_t *ip6h, uint8_t **encaplim_ptr) 2324 { 2325 ip6_pkt_t pkt; 2326 uint8_t *endptr; 2327 ip6_dest_t *destp; 2328 struct ip6_opt *optp; 2329 2330 pkt.ipp_fields = 0; /* must be initialized */ 2331 (void) ip_find_hdr_v6(mp, ip6h, &pkt, NULL); 2332 if ((pkt.ipp_fields & IPPF_DSTOPTS) != 0) { 2333 destp = pkt.ipp_dstopts; 2334 } else if ((pkt.ipp_fields & IPPF_RTDSTOPTS) != 0) { 2335 destp = pkt.ipp_rtdstopts; 2336 } else { 2337 return (B_FALSE); 2338 } 2339 2340 endptr = (uint8_t *)destp + 8 * (destp->ip6d_len + 1); 2341 optp = (struct ip6_opt *)(destp + 1); 2342 while (endptr - (uint8_t *)optp > sizeof (*optp)) { 2343 if (optp->ip6o_type == IP6OPT_TUNNEL_LIMIT) { 2344 if ((uint8_t *)(optp + 1) >= endptr) 2345 return (B_FALSE); 2346 *encaplim_ptr = (uint8_t *)&optp[1]; 2347 return (B_TRUE); 2348 } 2349 optp = (struct ip6_opt *)((uint8_t *)optp + optp->ip6o_len + 2); 2350 } 2351 return (B_FALSE); 2352 } 2353 2354 /* 2355 * Received ICMPv6 error in response to an X over IPv6 packet that we 2356 * transmitted. 2357 * 2358 * NOTE: "outer" refers to what's inside the ICMP payload. We will get one of 2359 * the following: 2360 * 2361 * [IPv6(0)][ICMPv6][IPv6(1)][IPv4][ULP] 2362 * 2363 * or 2364 * 2365 * [IPv6(0)][ICMPv6][IPv6(1)][IPv6(2)][ULP] 2366 * 2367 * And "outer6" will get set to IPv6(1), and inner[46] will correspond to 2368 * whatever the very-inner packet is (IPv4 or IPv6(2)). 2369 */ 2370 static void 2371 iptun_input_icmp_v6(iptun_t *iptun, mblk_t *ipsec_mp, mblk_t *data_mp, 2372 icmp6_t *icmp6h) 2373 { 2374 uint8_t *orig; 2375 ipha_t *outer4, *inner4; 2376 ip6_t *outer6, *inner6; 2377 int outer_hlen; 2378 uint8_t type, code; 2379 2380 /* 2381 * Change the db_type to M_DATA because subsequent operations assume 2382 * the ICMP packet is M_DATA again (i.e. calls to msgdsize().) 2383 */ 2384 data_mp->b_datap->db_type = M_DATA; 2385 2386 ASSERT(data_mp->b_cont == NULL); 2387 2388 /* 2389 * Temporarily move b_rptr forward so that iptun_find_headers() can 2390 * find IP headers in the ICMP packet payload. 2391 */ 2392 orig = data_mp->b_rptr; 2393 data_mp->b_rptr = (uint8_t *)(icmp6h + 1); 2394 /* 2395 * The ip module ensures that ICMP errors contain at least the 2396 * original IP header (otherwise, the error would never have made it 2397 * here). 2398 */ 2399 ASSERT(MBLKL(data_mp) >= 0); 2400 outer_hlen = iptun_find_headers(data_mp, &outer4, &inner4, &outer6, 2401 &inner6); 2402 ASSERT(outer4 == NULL); 2403 data_mp->b_rptr = orig; /* Restore r_ptr */ 2404 if (outer_hlen == 0) { 2405 iptun_drop_pkt((ipsec_mp != NULL ? ipsec_mp : data_mp), 2406 &iptun->iptun_ierrors); 2407 return; 2408 } 2409 2410 if (!ipsec_tun_inbound(ipsec_mp, &data_mp, iptun->iptun_itp, 2411 inner4, inner6, outer4, outer6, -outer_hlen, 2412 iptun->iptun_ns)) { 2413 /* Callee did all of the freeing. */ 2414 atomic_inc_64(&iptun->iptun_ierrors); 2415 return; 2416 } 2417 /* We should never see reassembled fragment here. */ 2418 ASSERT(data_mp->b_next == NULL); 2419 2420 data_mp->b_rptr = (uint8_t *)outer6 + outer_hlen; 2421 2422 /* 2423 * If the original packet being transmitted was itself an ICMP error, 2424 * then drop this packet. We don't want to generate an ICMP error in 2425 * response to an ICMP error. 2426 */ 2427 if (is_icmp_error(data_mp, inner4, inner6)) { 2428 iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf); 2429 return; 2430 } 2431 2432 switch (icmp6h->icmp6_type) { 2433 case ICMP6_PARAM_PROB: { 2434 uint8_t *encaplim_ptr; 2435 2436 /* 2437 * If the ICMPv6 error points to a valid Tunnel Encapsulation 2438 * Limit option and the limit value is 0, then fall through 2439 * and send a host unreachable message. Otherwise, treat the 2440 * error as an output error, as there must have been a problem 2441 * with a packet we sent. 2442 */ 2443 if (!iptun_find_encaplimit(data_mp, outer6, &encaplim_ptr) || 2444 (icmp6h->icmp6_pptr != 2445 ((ptrdiff_t)encaplim_ptr - (ptrdiff_t)outer6)) || 2446 *encaplim_ptr != 0) { 2447 iptun_drop_pkt(data_mp, &iptun->iptun_oerrors); 2448 return; 2449 } 2450 /* FALLTHRU */ 2451 } 2452 case ICMP6_TIME_EXCEEDED: 2453 case ICMP6_DST_UNREACH: 2454 type = (inner4 != NULL ? ICMP_DEST_UNREACHABLE : 2455 ICMP6_DST_UNREACH); 2456 code = (inner4 != NULL ? ICMP_HOST_UNREACHABLE : 2457 ICMP6_DST_UNREACH_ADDR); 2458 break; 2459 case ICMP6_PACKET_TOO_BIG: { 2460 uint32_t newmtu; 2461 2462 /* 2463 * We reconcile this with the fact that the tunnel may also 2464 * have IPsec policy by letting iptun_update_mtu take care of 2465 * it. 2466 */ 2467 newmtu = iptun_update_mtu(iptun, ntohl(icmp6h->icmp6_mtu)); 2468 2469 if (inner4 != NULL) { 2470 iptun_icmp_fragneeded_v4(iptun, newmtu, inner4, 2471 data_mp); 2472 } else { 2473 iptun_icmp_toobig_v6(iptun, newmtu, inner6, data_mp); 2474 } 2475 return; 2476 } 2477 default: 2478 iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf); 2479 return; 2480 } 2481 2482 if (inner4 != NULL) 2483 iptun_icmp_error_v4(iptun, inner4, data_mp, type, code); 2484 else 2485 iptun_icmp_error_v6(iptun, inner6, data_mp, type, code, 0); 2486 } 2487 2488 static void 2489 iptun_input_icmp(iptun_t *iptun, mblk_t *ipsec_mp, mblk_t *data_mp) 2490 { 2491 mblk_t *tmpmp; 2492 size_t hlen; 2493 2494 if (data_mp->b_cont != NULL) { 2495 /* 2496 * Since ICMP error processing necessitates access to bits 2497 * that are within the ICMP error payload (the original packet 2498 * that caused the error), pull everything up into a single 2499 * block for convenience. 2500 */ 2501 data_mp->b_datap->db_type = M_DATA; 2502 if ((tmpmp = msgpullup(data_mp, -1)) == NULL) { 2503 iptun_drop_pkt((ipsec_mp != NULL ? ipsec_mp : data_mp), 2504 &iptun->iptun_norcvbuf); 2505 return; 2506 } 2507 freemsg(data_mp); 2508 data_mp = tmpmp; 2509 if (ipsec_mp != NULL) 2510 ipsec_mp->b_cont = data_mp; 2511 } 2512 2513 switch (iptun->iptun_typeinfo->iti_ipvers) { 2514 case IPV4_VERSION: 2515 /* 2516 * The outer IP header coming up from IP is always ipha_t 2517 * alligned (otherwise, we would have crashed in ip). 2518 */ 2519 hlen = IPH_HDR_LENGTH((ipha_t *)data_mp->b_rptr); 2520 iptun_input_icmp_v4(iptun, ipsec_mp, data_mp, 2521 (icmph_t *)(data_mp->b_rptr + hlen)); 2522 break; 2523 case IPV6_VERSION: 2524 hlen = ip_hdr_length_v6(data_mp, (ip6_t *)data_mp->b_rptr); 2525 iptun_input_icmp_v6(iptun, ipsec_mp, data_mp, 2526 (icmp6_t *)(data_mp->b_rptr + hlen)); 2527 break; 2528 } 2529 } 2530 2531 static boolean_t 2532 iptun_in_6to4_ok(iptun_t *iptun, ipha_t *outer4, ip6_t *inner6) 2533 { 2534 ipaddr_t v4addr; 2535 2536 /* 2537 * Make sure that the IPv6 destination is within the site that this 2538 * 6to4 tunnel is routing for. We don't want people bouncing random 2539 * tunneled IPv6 packets through this 6to4 router. 2540 */ 2541 IN6_6TO4_TO_V4ADDR(&inner6->ip6_dst, (struct in_addr *)&v4addr); 2542 if (outer4->ipha_dst != v4addr) 2543 return (B_FALSE); 2544 2545 if (IN6_IS_ADDR_6TO4(&inner6->ip6_src)) { 2546 /* 2547 * Section 9 of RFC 3056 (security considerations) suggests 2548 * that when a packet is from a 6to4 site (i.e., it's not a 2549 * global address being forwarded froma relay router), make 2550 * sure that the packet was tunneled by that site's 6to4 2551 * router. 2552 */ 2553 IN6_6TO4_TO_V4ADDR(&inner6->ip6_src, (struct in_addr *)&v4addr); 2554 if (outer4->ipha_src != v4addr) 2555 return (B_FALSE); 2556 } else { 2557 /* 2558 * Only accept packets from a relay router if we've configured 2559 * outbound relay router functionality. 2560 */ 2561 if (iptun->iptun_iptuns->iptuns_relay_rtr_addr == INADDR_ANY) 2562 return (B_FALSE); 2563 } 2564 2565 return (B_TRUE); 2566 } 2567 2568 /* 2569 * Input function for everything that comes up from the ip module below us. 2570 * This is called directly from the ip module via connp->conn_recv(). 2571 * 2572 * There are two kinds of packets that can arrive here: (1) IP-in-IP tunneled 2573 * packets and (2) ICMP errors containing IP-in-IP packets transmitted by us. 2574 * They have the following structure: 2575 * 2576 * 1) M_DATA 2577 * 2) M_CTL[->M_DATA] 2578 * 2579 * (2) Is an M_CTL optionally followed by M_DATA, where the M_CTL block is the 2580 * start of the actual ICMP packet (it doesn't contain any special control 2581 * information). 2582 * 2583 * Either (1) or (2) can be IPsec-protected, in which case an M_CTL block 2584 * containing an ipsec_in_t will have been prepended to either (1) or (2), 2585 * making a total of four combinations of possible mblk chains: 2586 * 2587 * A) (1) 2588 * B) (2) 2589 * C) M_CTL(ipsec_in_t)->(1) 2590 * D) M_CTL(ipsec_in_t)->(2) 2591 */ 2592 /* ARGSUSED */ 2593 static void 2594 iptun_input(void *arg, mblk_t *mp, void *arg2) 2595 { 2596 conn_t *connp = arg; 2597 iptun_t *iptun = connp->conn_iptun; 2598 int outer_hlen; 2599 ipha_t *outer4, *inner4; 2600 ip6_t *outer6, *inner6; 2601 mblk_t *data_mp = mp; 2602 2603 ASSERT(IPCL_IS_IPTUN(connp)); 2604 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 2605 2606 if (DB_TYPE(mp) == M_CTL) { 2607 if (((ipsec_in_t *)(mp->b_rptr))->ipsec_in_type != IPSEC_IN) { 2608 iptun_input_icmp(iptun, NULL, mp); 2609 return; 2610 } 2611 2612 data_mp = mp->b_cont; 2613 if (DB_TYPE(data_mp) == M_CTL) { 2614 /* Protected ICMP packet. */ 2615 iptun_input_icmp(iptun, mp, data_mp); 2616 return; 2617 } 2618 } 2619 2620 /* 2621 * Request the destination's path MTU information regularly in case 2622 * path MTU has increased. 2623 */ 2624 if (IPTUN_PMTU_TOO_OLD(iptun)) 2625 iptun_task_dispatch(iptun, IPTUN_TASK_PMTU_UPDATE); 2626 2627 if ((outer_hlen = iptun_find_headers(data_mp, &outer4, &inner4, &outer6, 2628 &inner6)) == 0) 2629 goto drop; 2630 2631 /* 2632 * If the system is labeled, we call tsol_check_dest() on the packet 2633 * destination (our local tunnel address) to ensure that the packet as 2634 * labeled should be allowed to be sent to us. We don't need to call 2635 * the more involved tsol_receive_local() since the tunnel link itself 2636 * cannot be assigned to shared-stack non-global zones. 2637 */ 2638 if (is_system_labeled()) { 2639 cred_t *msg_cred; 2640 2641 if ((msg_cred = msg_getcred(data_mp, NULL)) == NULL) 2642 goto drop; 2643 if (tsol_check_dest(msg_cred, (outer4 != NULL ? 2644 (void *)&outer4->ipha_dst : (void *)&outer6->ip6_dst), 2645 (outer4 != NULL ? IPV4_VERSION : IPV6_VERSION), 2646 B_FALSE, NULL) != 0) 2647 goto drop; 2648 } 2649 2650 if (!ipsec_tun_inbound((mp == data_mp ? NULL : mp), &data_mp, 2651 iptun->iptun_itp, inner4, inner6, outer4, outer6, outer_hlen, 2652 iptun->iptun_ns)) { 2653 /* Callee did all of the freeing. */ 2654 return; 2655 } 2656 mp = data_mp; 2657 2658 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_6TO4 && 2659 !iptun_in_6to4_ok(iptun, outer4, inner6)) 2660 goto drop; 2661 2662 /* 2663 * We need to statistically account for each packet individually, so 2664 * we might as well split up any b_next chains here. 2665 */ 2666 do { 2667 mp = data_mp->b_next; 2668 data_mp->b_next = NULL; 2669 2670 atomic_inc_64(&iptun->iptun_ipackets); 2671 atomic_add_64(&iptun->iptun_rbytes, msgdsize(data_mp)); 2672 mac_rx(iptun->iptun_mh, NULL, data_mp); 2673 2674 data_mp = mp; 2675 } while (data_mp != NULL); 2676 return; 2677 drop: 2678 iptun_drop_pkt(mp, &iptun->iptun_ierrors); 2679 } 2680 2681 /* 2682 * Do 6to4-specific header-processing on output. Return B_TRUE if the packet 2683 * was processed without issue, or B_FALSE if the packet had issues and should 2684 * be dropped. 2685 */ 2686 static boolean_t 2687 iptun_out_process_6to4(iptun_t *iptun, ipha_t *outer4, ip6_t *inner6) 2688 { 2689 ipaddr_t v4addr; 2690 2691 /* 2692 * IPv6 source must be a 6to4 address. This is because a conscious 2693 * decision was made to not allow a Solaris system to be used as a 2694 * relay router (for security reasons) when 6to4 was initially 2695 * integrated. If this decision is ever reversed, the following check 2696 * can be removed. 2697 */ 2698 if (!IN6_IS_ADDR_6TO4(&inner6->ip6_src)) 2699 return (B_FALSE); 2700 2701 /* 2702 * RFC3056 mandates that the IPv4 source MUST be set to the IPv4 2703 * portion of the 6to4 IPv6 source address. In other words, make sure 2704 * that we're tunneling packets from our own 6to4 site. 2705 */ 2706 IN6_6TO4_TO_V4ADDR(&inner6->ip6_src, (struct in_addr *)&v4addr); 2707 if (outer4->ipha_src != v4addr) 2708 return (B_FALSE); 2709 2710 /* 2711 * Automatically set the destination of the outer IPv4 header as 2712 * described in RFC3056. There are two possibilities: 2713 * 2714 * a. If the IPv6 destination is a 6to4 address, set the IPv4 address 2715 * to the IPv4 portion of the 6to4 address. 2716 * b. If the IPv6 destination is a native IPv6 address, set the IPv4 2717 * destination to the address of a relay router. 2718 * 2719 * Design Note: b shouldn't be necessary here, and this is a flaw in 2720 * the design of the 6to4relay command. Instead of setting a 6to4 2721 * relay address in this module via an ioctl, the 6to4relay command 2722 * could simply add a IPv6 route for native IPv6 addresses (such as a 2723 * default route) in the forwarding table that uses a 6to4 destination 2724 * as its next hop, and the IPv4 portion of that address could be a 2725 * 6to4 relay address. In order for this to work, IP would have to 2726 * resolve the next hop address, which would necessitate a link-layer 2727 * address resolver for 6to4 links, which doesn't exist today. 2728 * 2729 * In fact, if a resolver existed for 6to4 links, then setting the 2730 * IPv4 destination in the outer header could be done as part of 2731 * link-layer address resolution and fast-path header generation, and 2732 * not here. 2733 */ 2734 if (IN6_IS_ADDR_6TO4(&inner6->ip6_dst)) { 2735 /* destination is a 6to4 router */ 2736 IN6_6TO4_TO_V4ADDR(&inner6->ip6_dst, 2737 (struct in_addr *)&outer4->ipha_dst); 2738 } else { 2739 /* 2740 * The destination is a native IPv6 address. If output to a 2741 * relay-router is enabled, use the relay-router's IPv4 2742 * address as the destination. 2743 */ 2744 if (iptun->iptun_iptuns->iptuns_relay_rtr_addr == INADDR_ANY) 2745 return (B_FALSE); 2746 outer4->ipha_dst = iptun->iptun_iptuns->iptuns_relay_rtr_addr; 2747 } 2748 2749 /* 2750 * If the outer source and destination are equal, this means that the 2751 * 6to4 router somehow forwarded an IPv6 packet destined for its own 2752 * 6to4 site to its 6to4 tunnel interface, which will result in this 2753 * packet infinitely bouncing between ip and iptun. 2754 */ 2755 return (outer4->ipha_src != outer4->ipha_dst); 2756 } 2757 2758 /* 2759 * Process output packets with outer IPv4 headers. Frees mp and bumps stat on 2760 * error. 2761 */ 2762 static mblk_t * 2763 iptun_out_process_ipv4(iptun_t *iptun, mblk_t *mp, ipha_t *outer4, 2764 ipha_t *inner4, ip6_t *inner6) 2765 { 2766 uint8_t *innerptr = (inner4 != NULL ? 2767 (uint8_t *)inner4 : (uint8_t *)inner6); 2768 size_t minmtu = (inner4 != NULL ? 2769 IPTUN_MIN_IPV4_MTU : IPTUN_MIN_IPV6_MTU); 2770 2771 if (inner4 != NULL) { 2772 ASSERT(outer4->ipha_protocol == IPPROTO_ENCAP); 2773 /* 2774 * Copy the tos from the inner IPv4 header. We mask off ECN 2775 * bits (bits 6 and 7) because there is currently no 2776 * tunnel-tunnel communication to determine if both sides 2777 * support ECN. We opt for the safe choice: don't copy the 2778 * ECN bits when doing encapsulation. 2779 */ 2780 outer4->ipha_type_of_service = 2781 inner4->ipha_type_of_service & ~0x03; 2782 } else { 2783 ASSERT(outer4->ipha_protocol == IPPROTO_IPV6 && 2784 inner6 != NULL); 2785 2786 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_6TO4 && 2787 !iptun_out_process_6to4(iptun, outer4, inner6)) { 2788 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 2789 return (NULL); 2790 } 2791 } 2792 2793 /* 2794 * As described in section 3.2.2 of RFC4213, if the packet payload is 2795 * less than or equal to the minimum MTU size, then we need to allow 2796 * IPv4 to fragment the packet. The reason is that even if we end up 2797 * receiving an ICMP frag-needed, the interface above this tunnel 2798 * won't be allowed to drop its MTU as a result, since the packet was 2799 * already smaller than the smallest allowable MTU for that interface. 2800 */ 2801 if (mp->b_wptr - innerptr <= minmtu) 2802 outer4->ipha_fragment_offset_and_flags = 0; 2803 2804 outer4->ipha_length = htons(msgdsize(mp)); 2805 2806 return (mp); 2807 } 2808 2809 /* 2810 * Insert an encapsulation limit destination option in the packet provided. 2811 * Always consumes the mp argument and returns a new mblk pointer. 2812 */ 2813 static mblk_t * 2814 iptun_insert_encaplimit(iptun_t *iptun, mblk_t *mp, ip6_t *outer6, 2815 uint8_t limit) 2816 { 2817 mblk_t *newmp; 2818 iptun_ipv6hdrs_t *newouter6; 2819 2820 ASSERT(outer6->ip6_nxt == IPPROTO_IPV6); 2821 ASSERT(mp->b_cont == NULL); 2822 2823 mp->b_rptr += sizeof (ip6_t); 2824 newmp = allocb_tmpl(sizeof (iptun_ipv6hdrs_t) + MBLKL(mp), mp); 2825 if (newmp == NULL) { 2826 iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf); 2827 return (NULL); 2828 } 2829 newmp->b_wptr += sizeof (iptun_ipv6hdrs_t); 2830 /* Copy the payload (Starting with the inner IPv6 header). */ 2831 bcopy(mp->b_rptr, newmp->b_wptr, MBLKL(mp)); 2832 newmp->b_wptr += MBLKL(mp); 2833 newouter6 = (iptun_ipv6hdrs_t *)newmp->b_rptr; 2834 /* Now copy the outer IPv6 header. */ 2835 bcopy(outer6, &newouter6->it6h_ip6h, sizeof (ip6_t)); 2836 newouter6->it6h_ip6h.ip6_nxt = IPPROTO_DSTOPTS; 2837 newouter6->it6h_encaplim = iptun_encaplim_init; 2838 newouter6->it6h_encaplim.iel_destopt.ip6d_nxt = outer6->ip6_nxt; 2839 newouter6->it6h_encaplim.iel_telopt.ip6ot_encap_limit = limit; 2840 2841 /* 2842 * The payload length will be set at the end of 2843 * iptun_out_process_ipv6(). 2844 */ 2845 2846 freemsg(mp); 2847 return (newmp); 2848 } 2849 2850 /* 2851 * Process output packets with outer IPv6 headers. Frees mp and bumps stats 2852 * on error. 2853 */ 2854 static mblk_t * 2855 iptun_out_process_ipv6(iptun_t *iptun, mblk_t *mp, ip6_t *outer6, ip6_t *inner6) 2856 { 2857 uint8_t *limit, *configlimit; 2858 uint32_t offset; 2859 iptun_ipv6hdrs_t *v6hdrs; 2860 2861 if (inner6 != NULL && iptun_find_encaplimit(mp, inner6, &limit)) { 2862 /* 2863 * The inner packet is an IPv6 packet which itself contains an 2864 * encapsulation limit option. The limit variable points to 2865 * the value in the embedded option. Process the 2866 * encapsulation limit option as specified in RFC 2473. 2867 * 2868 * If limit is 0, then we've exceeded the limit and we need to 2869 * send back an ICMPv6 parameter problem message. 2870 * 2871 * If limit is > 0, then we decrement it by 1 and make sure 2872 * that the encapsulation limit option in the outer header 2873 * reflects that (adding an option if one isn't already 2874 * there). 2875 */ 2876 ASSERT(limit > mp->b_rptr && limit < mp->b_wptr); 2877 if (*limit == 0) { 2878 mp->b_rptr = (uint8_t *)inner6; 2879 offset = limit - mp->b_rptr; 2880 iptun_icmp_error_v6(iptun, inner6, mp, ICMP6_PARAM_PROB, 2881 0, offset); 2882 atomic_inc_64(&iptun->iptun_noxmtbuf); 2883 return (NULL); 2884 } 2885 2886 /* 2887 * The outer header requires an encapsulation limit option. 2888 * If there isn't one already, add one. 2889 */ 2890 if (iptun->iptun_encaplimit == 0) { 2891 if ((mp = iptun_insert_encaplimit(iptun, mp, outer6, 2892 (*limit - 1))) == NULL) 2893 return (NULL); 2894 } else { 2895 /* 2896 * There is an existing encapsulation limit option in 2897 * the outer header. If the inner encapsulation limit 2898 * is less than the configured encapsulation limit, 2899 * update the outer encapsulation limit to reflect 2900 * this lesser value. 2901 */ 2902 v6hdrs = (iptun_ipv6hdrs_t *)mp->b_rptr; 2903 configlimit = 2904 &v6hdrs->it6h_encaplim.iel_telopt.ip6ot_encap_limit; 2905 if ((*limit - 1) < *configlimit) 2906 *configlimit = (*limit - 1); 2907 } 2908 } 2909 2910 outer6->ip6_plen = htons(msgdsize(mp) - sizeof (ip6_t)); 2911 return (mp); 2912 } 2913 2914 /* 2915 * The IP tunneling MAC-type plugins have already done most of the header 2916 * processing and validity checks. We are simply responsible for multiplexing 2917 * down to the ip module below us. 2918 */ 2919 static void 2920 iptun_output(iptun_t *iptun, mblk_t *mp) 2921 { 2922 conn_t *connp = iptun->iptun_connp; 2923 int outer_hlen; 2924 mblk_t *newmp; 2925 ipha_t *outer4, *inner4; 2926 ip6_t *outer6, *inner6; 2927 ipsec_tun_pol_t *itp = iptun->iptun_itp; 2928 2929 ASSERT(mp->b_datap->db_type == M_DATA); 2930 2931 if (mp->b_cont != NULL) { 2932 if ((newmp = msgpullup(mp, -1)) == NULL) { 2933 iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf); 2934 return; 2935 } 2936 freemsg(mp); 2937 mp = newmp; 2938 } 2939 2940 outer_hlen = iptun_find_headers(mp, &outer4, &inner4, &outer6, &inner6); 2941 if (outer_hlen == 0) { 2942 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 2943 return; 2944 } 2945 2946 /* Perform header processing. */ 2947 if (outer4 != NULL) 2948 mp = iptun_out_process_ipv4(iptun, mp, outer4, inner4, inner6); 2949 else 2950 mp = iptun_out_process_ipv6(iptun, mp, outer6, inner6); 2951 if (mp == NULL) 2952 return; 2953 2954 /* 2955 * Let's hope the compiler optimizes this with "branch taken". 2956 */ 2957 if (itp != NULL && (itp->itp_flags & ITPF_P_ACTIVE)) { 2958 if ((mp = ipsec_tun_outbound(mp, iptun, inner4, inner6, outer4, 2959 outer6, outer_hlen)) == NULL) { 2960 /* ipsec_tun_outbound() frees mp on error. */ 2961 atomic_inc_64(&iptun->iptun_oerrors); 2962 return; 2963 } 2964 /* 2965 * ipsec_tun_outbound() returns a chain of tunneled IP 2966 * fragments linked with b_next (or a single message if the 2967 * tunneled packet wasn't a fragment). Each message in the 2968 * chain is prepended by an IPSEC_OUT M_CTL block with 2969 * instructions for outbound IPsec processing. 2970 */ 2971 for (newmp = mp; newmp != NULL; newmp = mp) { 2972 ASSERT(newmp->b_datap->db_type == M_CTL); 2973 atomic_inc_64(&iptun->iptun_opackets); 2974 atomic_add_64(&iptun->iptun_obytes, 2975 msgdsize(newmp->b_cont)); 2976 mp = mp->b_next; 2977 newmp->b_next = NULL; 2978 connp->conn_send(connp, newmp, connp->conn_wq, IP_WPUT); 2979 } 2980 } else { 2981 /* 2982 * The ip module will potentially apply global policy to the 2983 * packet in its output path if there's no active tunnel 2984 * policy. 2985 */ 2986 atomic_inc_64(&iptun->iptun_opackets); 2987 atomic_add_64(&iptun->iptun_obytes, msgdsize(mp)); 2988 connp->conn_send(connp, mp, connp->conn_wq, IP_WPUT); 2989 } 2990 } 2991 2992 /* 2993 * Note that the setting or clearing iptun_{set,get}_g_q() is serialized via 2994 * iptuns_lock and iptunq_open(), so we must never be in a situation where 2995 * iptun_set_g_q() is called if the queue has already been set or vice versa 2996 * (hence the ASSERT()s.) 2997 */ 2998 void 2999 iptun_set_g_q(netstack_t *ns, queue_t *q) 3000 { 3001 ASSERT(ns->netstack_iptun->iptuns_g_q == NULL); 3002 ns->netstack_iptun->iptuns_g_q = q; 3003 } 3004 3005 void 3006 iptun_clear_g_q(netstack_t *ns) 3007 { 3008 ASSERT(ns->netstack_iptun->iptuns_g_q != NULL); 3009 ns->netstack_iptun->iptuns_g_q = NULL; 3010 } 3011 3012 static mac_callbacks_t iptun_m_callbacks = { 3013 .mc_callbacks = (MC_SETPROP | MC_GETPROP), 3014 .mc_getstat = iptun_m_getstat, 3015 .mc_start = iptun_m_start, 3016 .mc_stop = iptun_m_stop, 3017 .mc_setpromisc = iptun_m_setpromisc, 3018 .mc_multicst = iptun_m_multicst, 3019 .mc_unicst = iptun_m_unicst, 3020 .mc_tx = iptun_m_tx, 3021 .mc_setprop = iptun_m_setprop, 3022 .mc_getprop = iptun_m_getprop 3023 }; 3024