1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * iptun - IP Tunneling Driver 28 * 29 * This module is a GLDv3 driver that implements virtual datalinks over IP 30 * (a.k.a, IP tunneling). The datalinks are managed through a dld ioctl 31 * interface (see iptun_ctl.c), and registered with GLDv3 using 32 * mac_register(). It implements the logic for various forms of IP (IPv4 or 33 * IPv6) encapsulation within IP (IPv4 or IPv6) by interacting with the ip 34 * module below it. Each virtual IP tunnel datalink has a conn_t associated 35 * with it representing the "outer" IP connection. 36 * 37 * The module implements the following locking semantics: 38 * 39 * Lookups and deletions in iptun_hash are synchronized using iptun_hash_lock. 40 * See comments above iptun_hash_lock for details. 41 * 42 * No locks are ever held while calling up to GLDv3. The general architecture 43 * of GLDv3 requires this, as the mac perimeter (essentially a lock) for a 44 * given link will be held while making downcalls (iptun_m_*() callbacks). 45 * Because we need to hold locks while handling downcalls, holding these locks 46 * while issuing upcalls results in deadlock scenarios. See the block comment 47 * above iptun_task_cb() for details on how we safely issue upcalls without 48 * holding any locks. 49 * 50 * The contents of each iptun_t is protected by an iptun_mutex which is held 51 * in iptun_enter() (called by iptun_enter_by_linkid()), and exited in 52 * iptun_exit(). 53 * 54 * See comments in iptun_delete() and iptun_free() for details on how the 55 * iptun_t is deleted safely. 56 */ 57 58 #include <sys/types.h> 59 #include <sys/kmem.h> 60 #include <sys/errno.h> 61 #include <sys/modhash.h> 62 #include <sys/list.h> 63 #include <sys/strsun.h> 64 #include <sys/file.h> 65 #include <sys/systm.h> 66 #include <sys/tihdr.h> 67 #include <sys/param.h> 68 #include <sys/mac_provider.h> 69 #include <sys/mac_ipv4.h> 70 #include <sys/mac_ipv6.h> 71 #include <sys/mac_6to4.h> 72 #include <sys/tsol/tnet.h> 73 #include <sys/sunldi.h> 74 #include <netinet/in.h> 75 #include <netinet/ip6.h> 76 #include <inet/ip.h> 77 #include <inet/ip_ire.h> 78 #include <inet/ipsec_impl.h> 79 #include <inet/iptun.h> 80 #include "iptun_impl.h" 81 82 /* Do the tunnel type and address family match? */ 83 #define IPTUN_ADDR_MATCH(iptun_type, family) \ 84 ((iptun_type == IPTUN_TYPE_IPV4 && family == AF_INET) || \ 85 (iptun_type == IPTUN_TYPE_IPV6 && family == AF_INET6) || \ 86 (iptun_type == IPTUN_TYPE_6TO4 && family == AF_INET)) 87 88 #define IPTUN_HASH_KEY(key) ((mod_hash_key_t)(uintptr_t)(key)) 89 90 #define IPTUNQ_DEV "/dev/iptunq" 91 92 #define IPTUN_MIN_IPV4_MTU 576 /* ip.h still uses 68 (!) */ 93 #define IPTUN_MIN_IPV6_MTU IPV6_MIN_MTU 94 #define IPTUN_MAX_IPV4_MTU (IP_MAXPACKET - sizeof (ipha_t)) 95 #define IPTUN_MAX_IPV6_MTU (IP_MAXPACKET - sizeof (ip6_t) - \ 96 sizeof (iptun_encaplim_t)) 97 98 #define IPTUN_MIN_HOPLIMIT 1 99 #define IPTUN_MAX_HOPLIMIT UINT8_MAX 100 101 #define IPTUN_MIN_ENCAPLIMIT 0 102 #define IPTUN_MAX_ENCAPLIMIT UINT8_MAX 103 104 #define IPTUN_IPSEC_REQ_MASK (IPSEC_PREF_REQUIRED | IPSEC_PREF_NEVER) 105 106 static iptun_encaplim_t iptun_encaplim_init = { 107 { IPPROTO_NONE, 0 }, 108 IP6OPT_TUNNEL_LIMIT, 109 1, 110 IPTUN_DEFAULT_ENCAPLIMIT, /* filled in with actual value later */ 111 IP6OPT_PADN, 112 1, 113 0 114 }; 115 116 /* Table containing per-iptun-type information. */ 117 static iptun_typeinfo_t iptun_type_table[] = { 118 { IPTUN_TYPE_IPV4, MAC_PLUGIN_IDENT_IPV4, IPV4_VERSION, ip_output, 119 IPTUN_MIN_IPV4_MTU, IPTUN_MAX_IPV4_MTU, B_TRUE }, 120 { IPTUN_TYPE_IPV6, MAC_PLUGIN_IDENT_IPV6, IPV6_VERSION, ip_output_v6, 121 IPTUN_MIN_IPV6_MTU, IPTUN_MAX_IPV6_MTU, B_TRUE }, 122 { IPTUN_TYPE_6TO4, MAC_PLUGIN_IDENT_6TO4, IPV4_VERSION, ip_output, 123 IPTUN_MIN_IPV4_MTU, IPTUN_MAX_IPV4_MTU, B_FALSE }, 124 { IPTUN_TYPE_UNKNOWN, NULL, 0, NULL, 0, 0, B_FALSE } 125 }; 126 127 /* 128 * iptun_hash is an iptun_t lookup table by link ID protected by 129 * iptun_hash_lock. While the hash table's integrity is maintained via 130 * internal locking in the mod_hash_*() functions, we need additional locking 131 * so that an iptun_t cannot be deleted after a hash lookup has returned an 132 * iptun_t and before iptun_lock has been entered. As such, we use 133 * iptun_hash_lock when doing lookups and removals from iptun_hash. 134 */ 135 mod_hash_t *iptun_hash; 136 static kmutex_t iptun_hash_lock; 137 138 static uint_t iptun_tunnelcount; /* total for all stacks */ 139 kmem_cache_t *iptun_cache; 140 ddi_taskq_t *iptun_taskq; 141 142 typedef enum { 143 IPTUN_TASK_PMTU_UPDATE, /* obtain new destination path-MTU */ 144 IPTUN_TASK_MTU_UPDATE, /* tell mac about new tunnel link MTU */ 145 IPTUN_TASK_LADDR_UPDATE, /* tell mac about new local address */ 146 IPTUN_TASK_RADDR_UPDATE, /* tell mac about new remote address */ 147 IPTUN_TASK_LINK_UPDATE, /* tell mac about new link state */ 148 IPTUN_TASK_PDATA_UPDATE /* tell mac about updated plugin data */ 149 } iptun_task_t; 150 151 typedef struct iptun_task_data_s { 152 iptun_task_t itd_task; 153 datalink_id_t itd_linkid; 154 } iptun_task_data_t; 155 156 static void iptun_task_dispatch(iptun_t *, iptun_task_t); 157 static int iptun_enter(iptun_t *); 158 static void iptun_exit(iptun_t *); 159 static void iptun_headergen(iptun_t *, boolean_t); 160 static void iptun_drop_pkt(mblk_t *, uint64_t *); 161 static void iptun_input(void *, mblk_t *, void *); 162 static void iptun_output(iptun_t *, mblk_t *); 163 static uint32_t iptun_get_maxmtu(iptun_t *, uint32_t); 164 static uint32_t iptun_update_mtu(iptun_t *, uint32_t); 165 static uint32_t iptun_get_dst_pmtu(iptun_t *); 166 static int iptun_setladdr(iptun_t *, const struct sockaddr_storage *); 167 168 static mac_callbacks_t iptun_m_callbacks; 169 170 static int 171 iptun_m_getstat(void *arg, uint_t stat, uint64_t *val) 172 { 173 iptun_t *iptun = arg; 174 int err = 0; 175 176 switch (stat) { 177 case MAC_STAT_IERRORS: 178 *val = iptun->iptun_ierrors; 179 break; 180 case MAC_STAT_OERRORS: 181 *val = iptun->iptun_oerrors; 182 break; 183 case MAC_STAT_RBYTES: 184 *val = iptun->iptun_rbytes; 185 break; 186 case MAC_STAT_IPACKETS: 187 *val = iptun->iptun_ipackets; 188 break; 189 case MAC_STAT_OBYTES: 190 *val = iptun->iptun_obytes; 191 break; 192 case MAC_STAT_OPACKETS: 193 *val = iptun->iptun_opackets; 194 break; 195 case MAC_STAT_NORCVBUF: 196 *val = iptun->iptun_norcvbuf; 197 break; 198 case MAC_STAT_NOXMTBUF: 199 *val = iptun->iptun_noxmtbuf; 200 break; 201 default: 202 err = ENOTSUP; 203 } 204 205 return (err); 206 } 207 208 static int 209 iptun_m_start(void *arg) 210 { 211 iptun_t *iptun = arg; 212 int err; 213 214 if ((err = iptun_enter(iptun)) == 0) { 215 iptun->iptun_flags |= IPTUN_MAC_STARTED; 216 iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE); 217 iptun_exit(iptun); 218 } 219 return (err); 220 } 221 222 static void 223 iptun_m_stop(void *arg) 224 { 225 iptun_t *iptun = arg; 226 227 if (iptun_enter(iptun) == 0) { 228 iptun->iptun_flags &= ~IPTUN_MAC_STARTED; 229 iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE); 230 iptun_exit(iptun); 231 } 232 } 233 234 /* 235 * iptun_m_setpromisc() does nothing and always succeeds. This is because a 236 * tunnel data-link only ever receives packets that are destined exclusively 237 * for the local address of the tunnel. 238 */ 239 /* ARGSUSED */ 240 static int 241 iptun_m_setpromisc(void *arg, boolean_t on) 242 { 243 return (0); 244 } 245 246 /* ARGSUSED */ 247 static int 248 iptun_m_multicst(void *arg, boolean_t add, const uint8_t *addrp) 249 { 250 return (ENOTSUP); 251 } 252 253 /* 254 * iptun_m_unicst() sets the local address. 255 */ 256 /* ARGSUSED */ 257 static int 258 iptun_m_unicst(void *arg, const uint8_t *addrp) 259 { 260 iptun_t *iptun = arg; 261 int err; 262 struct sockaddr_storage ss; 263 struct sockaddr_in *sin; 264 struct sockaddr_in6 *sin6; 265 266 if ((err = iptun_enter(iptun)) == 0) { 267 switch (iptun->iptun_typeinfo->iti_ipvers) { 268 case IPV4_VERSION: 269 sin = (struct sockaddr_in *)&ss; 270 sin->sin_family = AF_INET; 271 bcopy(addrp, &sin->sin_addr, sizeof (in_addr_t)); 272 break; 273 case IPV6_VERSION: 274 sin6 = (struct sockaddr_in6 *)&ss; 275 sin6->sin6_family = AF_INET6; 276 bcopy(addrp, &sin6->sin6_addr, sizeof (in6_addr_t)); 277 break; 278 default: 279 ASSERT(0); 280 } 281 err = iptun_setladdr(iptun, &ss); 282 iptun_exit(iptun); 283 } 284 return (err); 285 } 286 287 static mblk_t * 288 iptun_m_tx(void *arg, mblk_t *mpchain) 289 { 290 mblk_t *mp, *nmp; 291 iptun_t *iptun = arg; 292 293 if (!IS_IPTUN_RUNNING(iptun)) { 294 iptun_drop_pkt(mpchain, &iptun->iptun_noxmtbuf); 295 return (NULL); 296 } 297 298 /* 299 * Request the destination's path MTU information regularly in case 300 * path MTU has increased. 301 */ 302 if (IPTUN_PMTU_TOO_OLD(iptun)) 303 iptun_task_dispatch(iptun, IPTUN_TASK_PMTU_UPDATE); 304 305 for (mp = mpchain; mp != NULL; mp = nmp) { 306 nmp = mp->b_next; 307 mp->b_next = NULL; 308 iptun_output(iptun, mp); 309 } 310 311 return (NULL); 312 } 313 314 /* ARGSUSED */ 315 static int 316 iptun_m_setprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, 317 uint_t pr_valsize, const void *pr_val) 318 { 319 iptun_t *iptun = barg; 320 uint32_t value = *(uint32_t *)pr_val; 321 int err; 322 323 /* 324 * We need to enter this iptun_t since we'll be modifying the outer 325 * header. 326 */ 327 if ((err = iptun_enter(iptun)) != 0) 328 return (err); 329 330 switch (pr_num) { 331 case MAC_PROP_IPTUN_HOPLIMIT: 332 if (value < IPTUN_MIN_HOPLIMIT || value > IPTUN_MAX_HOPLIMIT) { 333 err = EINVAL; 334 break; 335 } 336 if (value != iptun->iptun_hoplimit) { 337 iptun->iptun_hoplimit = (uint8_t)value; 338 iptun_headergen(iptun, B_TRUE); 339 } 340 break; 341 case MAC_PROP_IPTUN_ENCAPLIMIT: 342 if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_IPV6 || 343 value > IPTUN_MAX_ENCAPLIMIT) { 344 err = EINVAL; 345 break; 346 } 347 if (value != iptun->iptun_encaplimit) { 348 iptun->iptun_encaplimit = (uint8_t)value; 349 iptun_headergen(iptun, B_TRUE); 350 } 351 break; 352 case MAC_PROP_MTU: { 353 uint32_t maxmtu = iptun_get_maxmtu(iptun, 0); 354 355 if (value < iptun->iptun_typeinfo->iti_minmtu || 356 value > maxmtu) { 357 err = EINVAL; 358 break; 359 } 360 iptun->iptun_flags |= IPTUN_FIXED_MTU; 361 if (value != iptun->iptun_mtu) { 362 iptun->iptun_mtu = value; 363 iptun_task_dispatch(iptun, IPTUN_TASK_MTU_UPDATE); 364 } 365 break; 366 } 367 default: 368 err = EINVAL; 369 } 370 iptun_exit(iptun); 371 return (err); 372 } 373 374 /* ARGSUSED */ 375 static int 376 iptun_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, 377 uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) 378 { 379 iptun_t *iptun = barg; 380 mac_propval_range_t range; 381 boolean_t is_default = (pr_flags & MAC_PROP_DEFAULT); 382 boolean_t is_possible = (pr_flags & MAC_PROP_POSSIBLE); 383 int err; 384 385 if ((err = iptun_enter(iptun)) != 0) 386 return (err); 387 388 if ((pr_flags & ~(MAC_PROP_DEFAULT | MAC_PROP_POSSIBLE)) != 0) { 389 err = ENOTSUP; 390 goto done; 391 } 392 if (is_default && is_possible) { 393 err = EINVAL; 394 goto done; 395 } 396 397 *perm = MAC_PROP_PERM_RW; 398 399 if (is_possible) { 400 if (pr_valsize < sizeof (mac_propval_range_t)) { 401 err = EINVAL; 402 goto done; 403 } 404 range.mpr_count = 1; 405 range.mpr_type = MAC_PROPVAL_UINT32; 406 } else if (pr_valsize < sizeof (uint32_t)) { 407 err = EINVAL; 408 goto done; 409 } 410 411 switch (pr_num) { 412 case MAC_PROP_IPTUN_HOPLIMIT: 413 if (is_possible) { 414 range.range_uint32[0].mpur_min = IPTUN_MIN_HOPLIMIT; 415 range.range_uint32[0].mpur_max = IPTUN_MAX_HOPLIMIT; 416 } else if (is_default) { 417 *(uint32_t *)pr_val = IPTUN_DEFAULT_HOPLIMIT; 418 } else { 419 *(uint32_t *)pr_val = iptun->iptun_hoplimit; 420 } 421 break; 422 case MAC_PROP_IPTUN_ENCAPLIMIT: 423 if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_IPV6) { 424 err = ENOTSUP; 425 goto done; 426 } 427 if (is_possible) { 428 range.range_uint32[0].mpur_min = IPTUN_MIN_ENCAPLIMIT; 429 range.range_uint32[0].mpur_max = IPTUN_MAX_ENCAPLIMIT; 430 } else if (is_default) { 431 *(uint32_t *)pr_val = IPTUN_DEFAULT_ENCAPLIMIT; 432 } else { 433 *(uint32_t *)pr_val = iptun->iptun_encaplimit; 434 } 435 break; 436 case MAC_PROP_MTU: { 437 uint32_t maxmtu = iptun_get_maxmtu(iptun, 0); 438 439 if (is_possible) { 440 range.range_uint32[0].mpur_min = 441 iptun->iptun_typeinfo->iti_minmtu; 442 range.range_uint32[0].mpur_max = maxmtu; 443 } else { 444 /* 445 * The MAC module knows the current value and should 446 * never call us for it. There is also no default 447 * MTU, as by default, it is a dynamic property. 448 */ 449 err = ENOTSUP; 450 goto done; 451 } 452 break; 453 } 454 default: 455 err = EINVAL; 456 goto done; 457 } 458 if (is_possible) 459 bcopy(&range, pr_val, sizeof (range)); 460 done: 461 iptun_exit(iptun); 462 return (err); 463 } 464 465 uint_t 466 iptun_count(void) 467 { 468 return (iptun_tunnelcount); 469 } 470 471 /* 472 * Enter an iptun_t exclusively. This is essentially just a mutex, but we 473 * don't allow iptun_enter() to succeed on a tunnel if it's in the process of 474 * being deleted. 475 */ 476 static int 477 iptun_enter(iptun_t *iptun) 478 { 479 mutex_enter(&iptun->iptun_lock); 480 while (iptun->iptun_flags & IPTUN_DELETE_PENDING) 481 cv_wait(&iptun->iptun_enter_cv, &iptun->iptun_lock); 482 if (iptun->iptun_flags & IPTUN_CONDEMNED) { 483 mutex_exit(&iptun->iptun_lock); 484 return (ENOENT); 485 } 486 return (0); 487 } 488 489 /* 490 * Exit the tunnel entered in iptun_enter(). 491 */ 492 static void 493 iptun_exit(iptun_t *iptun) 494 { 495 mutex_exit(&iptun->iptun_lock); 496 } 497 498 /* 499 * Enter the IP tunnel instance by datalink ID. 500 */ 501 static int 502 iptun_enter_by_linkid(datalink_id_t linkid, iptun_t **iptun) 503 { 504 int err; 505 506 mutex_enter(&iptun_hash_lock); 507 if (mod_hash_find(iptun_hash, IPTUN_HASH_KEY(linkid), 508 (mod_hash_val_t *)iptun) == 0) 509 err = iptun_enter(*iptun); 510 else 511 err = ENOENT; 512 if (err != 0) 513 *iptun = NULL; 514 mutex_exit(&iptun_hash_lock); 515 return (err); 516 } 517 518 /* 519 * Handle tasks that were deferred through the iptun_taskq. These fall into 520 * two categories: 521 * 522 * 1. Tasks that were defered because we didn't want to spend time doing them 523 * while in the data path. Only IPTUN_TASK_PMTU_UPDATE falls into this 524 * category. 525 * 526 * 2. Tasks that were defered because they require calling up to the mac 527 * module, and we can't call up to the mac module while holding locks. 528 * 529 * Handling 1 is easy; we just lookup the iptun_t, perform the task, exit the 530 * tunnel, and we're done. 531 * 532 * Handling 2 is tricky to get right without introducing race conditions and 533 * deadlocks with the mac module, as we cannot issue an upcall while in the 534 * iptun_t. The reason is that upcalls may try and enter the mac perimeter, 535 * while iptun callbacks (such as iptun_m_setprop()) called from the mac 536 * module will already have the perimeter held, and will then try and enter 537 * the iptun_t. You can see the lock ordering problem with this; this will 538 * deadlock. 539 * 540 * The safe way to do this is to enter the iptun_t in question and copy the 541 * information we need out of it so that we can exit it and know that the 542 * information being passed up to the upcalls won't be subject to modification 543 * by other threads. The problem now is that we need to exit it prior to 544 * issuing the upcall, but once we do this, a thread could come along and 545 * delete the iptun_t and thus the mac handle required to issue the upcall. 546 * To prevent this, we set the IPTUN_UPCALL_PENDING flag prior to exiting the 547 * iptun_t. This flag is the condition associated with iptun_upcall_cv, which 548 * iptun_delete() will cv_wait() on. When the upcall completes, we clear 549 * IPTUN_UPCALL_PENDING and cv_signal() any potentially waiting 550 * iptun_delete(). We can thus still safely use iptun->iptun_mh after having 551 * exited the iptun_t. 552 */ 553 static void 554 iptun_task_cb(void *arg) 555 { 556 iptun_task_data_t *itd = arg; 557 iptun_task_t task = itd->itd_task; 558 datalink_id_t linkid = itd->itd_linkid; 559 iptun_t *iptun; 560 uint32_t mtu; 561 iptun_addr_t addr; 562 link_state_t linkstate; 563 size_t header_size; 564 iptun_header_t header; 565 566 kmem_free(itd, sizeof (*itd)); 567 568 /* 569 * Note that if the lookup fails, it's because the tunnel was deleted 570 * between the time the task was dispatched and now. That isn't an 571 * error. 572 */ 573 if (iptun_enter_by_linkid(linkid, &iptun) != 0) 574 return; 575 576 if (task == IPTUN_TASK_PMTU_UPDATE) { 577 (void) iptun_update_mtu(iptun, 0); 578 iptun_exit(iptun); 579 return; 580 } 581 582 iptun->iptun_flags |= IPTUN_UPCALL_PENDING; 583 584 switch (task) { 585 case IPTUN_TASK_MTU_UPDATE: 586 mtu = iptun->iptun_mtu; 587 break; 588 case IPTUN_TASK_LADDR_UPDATE: 589 addr = iptun->iptun_laddr; 590 break; 591 case IPTUN_TASK_RADDR_UPDATE: 592 addr = iptun->iptun_raddr; 593 break; 594 case IPTUN_TASK_LINK_UPDATE: 595 linkstate = IS_IPTUN_RUNNING(iptun) ? 596 LINK_STATE_UP : LINK_STATE_DOWN; 597 break; 598 case IPTUN_TASK_PDATA_UPDATE: 599 header_size = iptun->iptun_header_size; 600 header = iptun->iptun_header; 601 break; 602 default: 603 ASSERT(0); 604 } 605 606 iptun_exit(iptun); 607 608 switch (task) { 609 case IPTUN_TASK_MTU_UPDATE: 610 (void) mac_maxsdu_update(iptun->iptun_mh, mtu); 611 break; 612 case IPTUN_TASK_LADDR_UPDATE: 613 mac_unicst_update(iptun->iptun_mh, (uint8_t *)&addr.ia_addr); 614 break; 615 case IPTUN_TASK_RADDR_UPDATE: 616 mac_dst_update(iptun->iptun_mh, (uint8_t *)&addr.ia_addr); 617 break; 618 case IPTUN_TASK_LINK_UPDATE: 619 mac_link_update(iptun->iptun_mh, linkstate); 620 break; 621 case IPTUN_TASK_PDATA_UPDATE: 622 if (mac_pdata_update(iptun->iptun_mh, 623 header_size == 0 ? NULL : &header, header_size) != 0) 624 atomic_inc_64(&iptun->iptun_taskq_fail); 625 break; 626 } 627 628 mutex_enter(&iptun->iptun_lock); 629 iptun->iptun_flags &= ~IPTUN_UPCALL_PENDING; 630 cv_signal(&iptun->iptun_upcall_cv); 631 mutex_exit(&iptun->iptun_lock); 632 } 633 634 static void 635 iptun_task_dispatch(iptun_t *iptun, iptun_task_t iptun_task) 636 { 637 iptun_task_data_t *itd; 638 639 itd = kmem_alloc(sizeof (*itd), KM_NOSLEEP); 640 if (itd == NULL) { 641 atomic_inc_64(&iptun->iptun_taskq_fail); 642 return; 643 } 644 itd->itd_task = iptun_task; 645 itd->itd_linkid = iptun->iptun_linkid; 646 if (ddi_taskq_dispatch(iptun_taskq, iptun_task_cb, itd, DDI_NOSLEEP)) { 647 atomic_inc_64(&iptun->iptun_taskq_fail); 648 kmem_free(itd, sizeof (*itd)); 649 } 650 } 651 652 /* 653 * Convert an iptun_addr_t to sockaddr_storage. 654 */ 655 static void 656 iptun_getaddr(iptun_addr_t *iptun_addr, struct sockaddr_storage *ss) 657 { 658 struct sockaddr_in *sin; 659 struct sockaddr_in6 *sin6; 660 661 bzero(ss, sizeof (*ss)); 662 switch (iptun_addr->ia_family) { 663 case AF_INET: 664 sin = (struct sockaddr_in *)ss; 665 sin->sin_addr.s_addr = iptun_addr->ia_addr.iau_addr4; 666 break; 667 case AF_INET6: 668 sin6 = (struct sockaddr_in6 *)ss; 669 sin6->sin6_addr = iptun_addr->ia_addr.iau_addr6; 670 break; 671 default: 672 ASSERT(0); 673 } 674 ss->ss_family = iptun_addr->ia_family; 675 } 676 677 /* 678 * General purpose function to set an IP tunnel source or destination address. 679 */ 680 static int 681 iptun_setaddr(iptun_type_t iptun_type, iptun_addr_t *iptun_addr, 682 const struct sockaddr_storage *ss) 683 { 684 if (!IPTUN_ADDR_MATCH(iptun_type, ss->ss_family)) 685 return (EINVAL); 686 687 switch (ss->ss_family) { 688 case AF_INET: { 689 struct sockaddr_in *sin = (struct sockaddr_in *)ss; 690 691 if ((sin->sin_addr.s_addr == INADDR_ANY) || 692 (sin->sin_addr.s_addr == INADDR_BROADCAST) || 693 CLASSD(sin->sin_addr.s_addr)) { 694 return (EADDRNOTAVAIL); 695 } 696 iptun_addr->ia_addr.iau_addr4 = sin->sin_addr.s_addr; 697 break; 698 } 699 case AF_INET6: { 700 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)ss; 701 702 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) || 703 IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) || 704 IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 705 return (EADDRNOTAVAIL); 706 } 707 iptun_addr->ia_addr.iau_addr6 = sin6->sin6_addr; 708 break; 709 } 710 default: 711 return (EAFNOSUPPORT); 712 } 713 iptun_addr->ia_family = ss->ss_family; 714 return (0); 715 } 716 717 static int 718 iptun_setladdr(iptun_t *iptun, const struct sockaddr_storage *laddr) 719 { 720 return (iptun_setaddr(iptun->iptun_typeinfo->iti_type, 721 &iptun->iptun_laddr, laddr)); 722 } 723 724 static int 725 iptun_setraddr(iptun_t *iptun, const struct sockaddr_storage *raddr) 726 { 727 if (!(iptun->iptun_typeinfo->iti_hasraddr)) 728 return (EINVAL); 729 return (iptun_setaddr(iptun->iptun_typeinfo->iti_type, 730 &iptun->iptun_raddr, raddr)); 731 } 732 733 static boolean_t 734 iptun_canbind(iptun_t *iptun) 735 { 736 /* 737 * A tunnel may bind when its source address has been set, and if its 738 * tunnel type requires one, also its destination address. 739 */ 740 return ((iptun->iptun_flags & IPTUN_LADDR) && 741 ((iptun->iptun_flags & IPTUN_RADDR) || 742 !(iptun->iptun_typeinfo->iti_hasraddr))); 743 } 744 745 static int 746 iptun_bind(iptun_t *iptun) 747 { 748 conn_t *connp = iptun->iptun_connp; 749 int err; 750 751 ASSERT(iptun_canbind(iptun)); 752 753 switch (iptun->iptun_typeinfo->iti_type) { 754 case IPTUN_TYPE_IPV4: 755 /* 756 * When we set a tunnel's destination address, we do not care 757 * if the destination is reachable. Transient routing issues 758 * should not inhibit the creation of a tunnel interface, for 759 * example. For that reason, we pass in B_FALSE for the 760 * verify_dst argument of ip_proto_bind_connected_v4() (and 761 * similarly for IPv6 tunnels below). 762 */ 763 err = ip_proto_bind_connected_v4(connp, NULL, IPPROTO_ENCAP, 764 &iptun->iptun_laddr4, 0, iptun->iptun_raddr4, 0, B_TRUE, 765 B_FALSE, iptun->iptun_cred); 766 break; 767 case IPTUN_TYPE_IPV6: 768 err = ip_proto_bind_connected_v6(connp, NULL, IPPROTO_IPV6, 769 &iptun->iptun_laddr6, 0, &iptun->iptun_raddr6, NULL, 0, 770 B_TRUE, B_FALSE, iptun->iptun_cred); 771 break; 772 case IPTUN_TYPE_6TO4: 773 err = ip_proto_bind_laddr_v4(connp, NULL, IPPROTO_IPV6, 774 iptun->iptun_laddr4, 0, B_TRUE); 775 break; 776 } 777 778 if (err == 0) { 779 iptun->iptun_flags |= IPTUN_BOUND; 780 781 /* 782 * Now that we're bound with ip below us, this is a good time 783 * to initialize the destination path MTU and to re-calculate 784 * the tunnel's link MTU. 785 */ 786 (void) iptun_update_mtu(iptun, 0); 787 788 if (IS_IPTUN_RUNNING(iptun)) 789 iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE); 790 } 791 return (err); 792 } 793 794 static void 795 iptun_unbind(iptun_t *iptun) 796 { 797 ASSERT(iptun->iptun_flags & IPTUN_BOUND); 798 ASSERT(mutex_owned(&iptun->iptun_lock) || 799 (iptun->iptun_flags & IPTUN_CONDEMNED)); 800 ip_unbind(iptun->iptun_connp); 801 iptun->iptun_flags &= ~IPTUN_BOUND; 802 if (!(iptun->iptun_flags & IPTUN_CONDEMNED)) 803 iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE); 804 } 805 806 /* 807 * Re-generate the template data-link header for a given IP tunnel given the 808 * tunnel's current parameters. 809 */ 810 static void 811 iptun_headergen(iptun_t *iptun, boolean_t update_mac) 812 { 813 switch (iptun->iptun_typeinfo->iti_ipvers) { 814 case IPV4_VERSION: 815 /* 816 * We only need to use a custom IP header if the administrator 817 * has supplied a non-default hoplimit. 818 */ 819 if (iptun->iptun_hoplimit == IPTUN_DEFAULT_HOPLIMIT) { 820 iptun->iptun_header_size = 0; 821 break; 822 } 823 iptun->iptun_header_size = sizeof (ipha_t); 824 iptun->iptun_header4.ipha_version_and_hdr_length = 825 IP_SIMPLE_HDR_VERSION; 826 iptun->iptun_header4.ipha_fragment_offset_and_flags = 827 htons(IPH_DF); 828 iptun->iptun_header4.ipha_ttl = iptun->iptun_hoplimit; 829 break; 830 case IPV6_VERSION: { 831 ip6_t *ip6hp = &iptun->iptun_header6.it6h_ip6h; 832 833 /* 834 * We only need to use a custom IPv6 header if either the 835 * administrator has supplied a non-default hoplimit, or we 836 * need to include an encapsulation limit option in the outer 837 * header. 838 */ 839 if (iptun->iptun_hoplimit == IPTUN_DEFAULT_HOPLIMIT && 840 iptun->iptun_encaplimit == 0) { 841 iptun->iptun_header_size = 0; 842 break; 843 } 844 845 (void) memset(ip6hp, 0, sizeof (*ip6hp)); 846 if (iptun->iptun_encaplimit == 0) { 847 iptun->iptun_header_size = sizeof (ip6_t); 848 ip6hp->ip6_nxt = IPPROTO_NONE; 849 } else { 850 iptun_encaplim_t *iel; 851 852 iptun->iptun_header_size = sizeof (iptun_ipv6hdrs_t); 853 /* 854 * The mac_ipv6 plugin requires ip6_plen to be in host 855 * byte order and reflect the extension headers 856 * present in the template. The actual network byte 857 * order ip6_plen will be set on a per-packet basis on 858 * transmit. 859 */ 860 ip6hp->ip6_plen = sizeof (*iel); 861 ip6hp->ip6_nxt = IPPROTO_DSTOPTS; 862 iel = &iptun->iptun_header6.it6h_encaplim; 863 *iel = iptun_encaplim_init; 864 iel->iel_telopt.ip6ot_encap_limit = 865 iptun->iptun_encaplimit; 866 } 867 868 ip6hp->ip6_hlim = iptun->iptun_hoplimit; 869 break; 870 } 871 } 872 873 if (update_mac) 874 iptun_task_dispatch(iptun, IPTUN_TASK_PDATA_UPDATE); 875 } 876 877 /* 878 * Insert inbound and outbound IPv4 and IPv6 policy into the given policy 879 * head. 880 */ 881 static boolean_t 882 iptun_insert_simple_policies(ipsec_policy_head_t *ph, ipsec_act_t *actp, 883 uint_t n, netstack_t *ns) 884 { 885 int f = IPSEC_AF_V4; 886 887 if (!ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_INBOUND, ns) || 888 !ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_OUTBOUND, ns)) 889 return (B_FALSE); 890 891 f = IPSEC_AF_V6; 892 return (ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_INBOUND, ns) && 893 ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_OUTBOUND, ns)); 894 } 895 896 /* 897 * Used to set IPsec policy when policy is set through the IPTUN_CREATE or 898 * IPTUN_MODIFY ioctls. 899 */ 900 static int 901 iptun_set_sec_simple(iptun_t *iptun, const ipsec_req_t *ipsr) 902 { 903 int rc = 0; 904 uint_t nact; 905 ipsec_act_t *actp = NULL; 906 boolean_t clear_all, old_policy = B_FALSE; 907 ipsec_tun_pol_t *itp; 908 char name[MAXLINKNAMELEN]; 909 uint64_t gen; 910 netstack_t *ns = iptun->iptun_ns; 911 912 /* Can't specify self-encap on a tunnel. */ 913 if (ipsr->ipsr_self_encap_req != 0) 914 return (EINVAL); 915 916 /* 917 * If it's a "clear-all" entry, unset the security flags and resume 918 * normal cleartext (or inherit-from-global) policy. 919 */ 920 clear_all = ((ipsr->ipsr_ah_req & IPTUN_IPSEC_REQ_MASK) == 0 && 921 (ipsr->ipsr_esp_req & IPTUN_IPSEC_REQ_MASK) == 0); 922 923 ASSERT(mutex_owned(&iptun->iptun_lock)); 924 itp = iptun->iptun_itp; 925 if (itp == NULL) { 926 if (clear_all) 927 goto bail; 928 if ((rc = dls_mgmt_get_linkinfo(iptun->iptun_linkid, name, NULL, 929 NULL, NULL)) != 0) 930 goto bail; 931 ASSERT(name[0] != '\0'); 932 if ((itp = create_tunnel_policy(name, &rc, &gen, ns)) == NULL) 933 goto bail; 934 iptun->iptun_itp = itp; 935 } 936 937 /* Allocate the actvec now, before holding itp or polhead locks. */ 938 ipsec_actvec_from_req(ipsr, &actp, &nact, ns); 939 if (actp == NULL) { 940 rc = ENOMEM; 941 goto bail; 942 } 943 944 /* 945 * Just write on the active polhead. Save the primary/secondary stuff 946 * for spdsock operations. 947 * 948 * Mutex because we need to write to the polhead AND flags atomically. 949 * Other threads will acquire the polhead lock as a reader if the 950 * (unprotected) flag is set. 951 */ 952 mutex_enter(&itp->itp_lock); 953 if (itp->itp_flags & ITPF_P_TUNNEL) { 954 /* Oops, we lost a race. Let's get out of here. */ 955 rc = EBUSY; 956 goto mutex_bail; 957 } 958 old_policy = ((itp->itp_flags & ITPF_P_ACTIVE) != 0); 959 960 if (old_policy) { 961 ITPF_CLONE(itp->itp_flags); 962 rc = ipsec_copy_polhead(itp->itp_policy, itp->itp_inactive, ns); 963 if (rc != 0) { 964 /* inactive has already been cleared. */ 965 itp->itp_flags &= ~ITPF_IFLAGS; 966 goto mutex_bail; 967 } 968 rw_enter(&itp->itp_policy->iph_lock, RW_WRITER); 969 ipsec_polhead_flush(itp->itp_policy, ns); 970 } else { 971 /* Else assume itp->itp_policy is already flushed. */ 972 rw_enter(&itp->itp_policy->iph_lock, RW_WRITER); 973 } 974 975 if (clear_all) { 976 ASSERT(avl_numnodes(&itp->itp_policy->iph_rulebyid) == 0); 977 itp->itp_flags &= ~ITPF_PFLAGS; 978 rw_exit(&itp->itp_policy->iph_lock); 979 old_policy = B_FALSE; /* Clear out the inactive one too. */ 980 goto recover_bail; 981 } 982 983 if (iptun_insert_simple_policies(itp->itp_policy, actp, nact, ns)) { 984 rw_exit(&itp->itp_policy->iph_lock); 985 /* 986 * Adjust MTU and make sure the DL side knows what's up. 987 */ 988 itp->itp_flags = ITPF_P_ACTIVE; 989 (void) iptun_update_mtu(iptun, 0); 990 old_policy = B_FALSE; /* Blank out inactive - we succeeded */ 991 } else { 992 rw_exit(&itp->itp_policy->iph_lock); 993 rc = ENOMEM; 994 } 995 996 recover_bail: 997 if (old_policy) { 998 /* Recover policy in in active polhead. */ 999 ipsec_swap_policy(itp->itp_policy, itp->itp_inactive, ns); 1000 ITPF_SWAP(itp->itp_flags); 1001 } 1002 1003 /* Clear policy in inactive polhead. */ 1004 itp->itp_flags &= ~ITPF_IFLAGS; 1005 rw_enter(&itp->itp_inactive->iph_lock, RW_WRITER); 1006 ipsec_polhead_flush(itp->itp_inactive, ns); 1007 rw_exit(&itp->itp_inactive->iph_lock); 1008 1009 mutex_bail: 1010 mutex_exit(&itp->itp_lock); 1011 1012 bail: 1013 if (actp != NULL) 1014 ipsec_actvec_free(actp, nact); 1015 1016 return (rc); 1017 } 1018 1019 static iptun_typeinfo_t * 1020 iptun_gettypeinfo(iptun_type_t type) 1021 { 1022 int i; 1023 1024 for (i = 0; iptun_type_table[i].iti_type != IPTUN_TYPE_UNKNOWN; i++) { 1025 if (iptun_type_table[i].iti_type == type) 1026 break; 1027 } 1028 return (&iptun_type_table[i]); 1029 } 1030 1031 /* 1032 * Set the parameters included in ik on the tunnel iptun. Parameters that can 1033 * only be set at creation time are set in iptun_create(). 1034 */ 1035 static int 1036 iptun_setparams(iptun_t *iptun, const iptun_kparams_t *ik) 1037 { 1038 int err = 0; 1039 netstack_t *ns = iptun->iptun_ns; 1040 iptun_addr_t orig_laddr, orig_raddr; 1041 uint_t orig_flags = iptun->iptun_flags; 1042 1043 if (ik->iptun_kparam_flags & IPTUN_KPARAM_LADDR) { 1044 if (orig_flags & IPTUN_LADDR) 1045 orig_laddr = iptun->iptun_laddr; 1046 if ((err = iptun_setladdr(iptun, &ik->iptun_kparam_laddr)) != 0) 1047 return (err); 1048 iptun->iptun_flags |= IPTUN_LADDR; 1049 } 1050 1051 if (ik->iptun_kparam_flags & IPTUN_KPARAM_RADDR) { 1052 if (orig_flags & IPTUN_RADDR) 1053 orig_raddr = iptun->iptun_raddr; 1054 if ((err = iptun_setraddr(iptun, &ik->iptun_kparam_raddr)) != 0) 1055 goto done; 1056 iptun->iptun_flags |= IPTUN_RADDR; 1057 } 1058 1059 if (ik->iptun_kparam_flags & IPTUN_KPARAM_SECINFO) { 1060 /* 1061 * Set IPsec policy originating from the ifconfig(1M) command 1062 * line. This is traditionally called "simple" policy because 1063 * the ipsec_req_t (iptun_kparam_secinfo) can only describe a 1064 * simple policy of "do ESP on everything" and/or "do AH on 1065 * everything" (as opposed to the rich policy that can be 1066 * defined with ipsecconf(1M)). 1067 */ 1068 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_6TO4) { 1069 /* 1070 * Can't set security properties for automatic 1071 * tunnels. 1072 */ 1073 err = EINVAL; 1074 goto done; 1075 } 1076 1077 if (!ipsec_loaded(ns->netstack_ipsec)) { 1078 /* If IPsec can be loaded, try and load it now. */ 1079 if (ipsec_failed(ns->netstack_ipsec)) { 1080 err = EPROTONOSUPPORT; 1081 goto done; 1082 } 1083 ipsec_loader_loadnow(ns->netstack_ipsec); 1084 /* 1085 * ipsec_loader_loadnow() returns while IPsec is 1086 * loaded asynchronously. While a method exists to 1087 * wait for IPsec to load (ipsec_loader_wait()), it 1088 * requires use of a STREAMS queue to do a qwait(). 1089 * We're not in STREAMS context here, and so we can't 1090 * use it. This is not a problem in practice because 1091 * in the vast majority of cases, key management and 1092 * global policy will have loaded before any tunnels 1093 * are plumbed, and so IPsec will already have been 1094 * loaded. 1095 */ 1096 err = EAGAIN; 1097 goto done; 1098 } 1099 1100 err = iptun_set_sec_simple(iptun, &ik->iptun_kparam_secinfo); 1101 if (err == 0) { 1102 iptun->iptun_flags |= IPTUN_SIMPLE_POLICY; 1103 iptun->iptun_simple_policy = ik->iptun_kparam_secinfo; 1104 } 1105 } 1106 done: 1107 if (err != 0) { 1108 /* Restore original source and destination. */ 1109 if (ik->iptun_kparam_flags & IPTUN_KPARAM_LADDR && 1110 (orig_flags & IPTUN_LADDR)) 1111 iptun->iptun_laddr = orig_laddr; 1112 if ((ik->iptun_kparam_flags & IPTUN_KPARAM_RADDR) && 1113 (orig_flags & IPTUN_RADDR)) 1114 iptun->iptun_raddr = orig_raddr; 1115 iptun->iptun_flags = orig_flags; 1116 } 1117 return (err); 1118 } 1119 1120 static int 1121 iptun_register(iptun_t *iptun) 1122 { 1123 mac_register_t *mac; 1124 int err; 1125 1126 ASSERT(!(iptun->iptun_flags & IPTUN_MAC_REGISTERED)); 1127 1128 if ((mac = mac_alloc(MAC_VERSION)) == NULL) 1129 return (EINVAL); 1130 1131 mac->m_type_ident = iptun->iptun_typeinfo->iti_ident; 1132 mac->m_driver = iptun; 1133 mac->m_dip = iptun_dip; 1134 mac->m_instance = (uint_t)-1; 1135 mac->m_src_addr = (uint8_t *)&iptun->iptun_laddr.ia_addr; 1136 mac->m_dst_addr = iptun->iptun_typeinfo->iti_hasraddr ? 1137 (uint8_t *)&iptun->iptun_raddr.ia_addr : NULL; 1138 mac->m_callbacks = &iptun_m_callbacks; 1139 mac->m_min_sdu = iptun->iptun_typeinfo->iti_minmtu; 1140 mac->m_max_sdu = iptun->iptun_mtu; 1141 if (iptun->iptun_header_size != 0) { 1142 mac->m_pdata = &iptun->iptun_header; 1143 mac->m_pdata_size = iptun->iptun_header_size; 1144 } 1145 if ((err = mac_register(mac, &iptun->iptun_mh)) == 0) 1146 iptun->iptun_flags |= IPTUN_MAC_REGISTERED; 1147 mac_free(mac); 1148 return (err); 1149 } 1150 1151 static int 1152 iptun_unregister(iptun_t *iptun) 1153 { 1154 int err; 1155 1156 ASSERT(iptun->iptun_flags & IPTUN_MAC_REGISTERED); 1157 if ((err = mac_unregister(iptun->iptun_mh)) == 0) 1158 iptun->iptun_flags &= ~IPTUN_MAC_REGISTERED; 1159 return (err); 1160 } 1161 1162 static conn_t * 1163 iptun_conn_create(iptun_t *iptun, netstack_t *ns, cred_t *credp) 1164 { 1165 conn_t *connp; 1166 1167 if ((connp = ipcl_conn_create(IPCL_IPCCONN, KM_NOSLEEP, ns)) == NULL) 1168 return (NULL); 1169 1170 connp->conn_flags |= IPCL_IPTUN; 1171 connp->conn_iptun = iptun; 1172 connp->conn_recv = iptun_input; 1173 connp->conn_rq = ns->netstack_iptun->iptuns_g_q; 1174 connp->conn_wq = WR(connp->conn_rq); 1175 /* 1176 * For exclusive stacks we set conn_zoneid to GLOBAL_ZONEID as is done 1177 * for all other conn_t's. 1178 * 1179 * Note that there's an important distinction between iptun_zoneid and 1180 * conn_zoneid. The conn_zoneid is set to GLOBAL_ZONEID in non-global 1181 * exclusive stack zones to make the ip module believe that the 1182 * non-global zone is actually a global zone. Therefore, when 1183 * interacting with the ip module, we must always use conn_zoneid. 1184 */ 1185 connp->conn_zoneid = (ns->netstack_stackid == GLOBAL_NETSTACKID) ? 1186 crgetzoneid(credp) : GLOBAL_ZONEID; 1187 connp->conn_cred = credp; 1188 /* crfree() is done in ipcl_conn_destroy(), called by CONN_DEC_REF() */ 1189 crhold(connp->conn_cred); 1190 1191 connp->conn_send = iptun->iptun_typeinfo->iti_txfunc; 1192 connp->conn_af_isv6 = iptun->iptun_typeinfo->iti_ipvers == IPV6_VERSION; 1193 ASSERT(connp->conn_ref == 1); 1194 1195 mutex_enter(&connp->conn_lock); 1196 connp->conn_state_flags &= ~CONN_INCIPIENT; 1197 mutex_exit(&connp->conn_lock); 1198 return (connp); 1199 } 1200 1201 static void 1202 iptun_conn_destroy(conn_t *connp) 1203 { 1204 ip_quiesce_conn(connp); 1205 connp->conn_iptun = NULL; 1206 ASSERT(connp->conn_ref == 1); 1207 CONN_DEC_REF(connp); 1208 } 1209 1210 static int 1211 iptun_create_g_q(iptun_stack_t *iptuns, cred_t *credp) 1212 { 1213 int err; 1214 conn_t *connp; 1215 1216 ASSERT(iptuns->iptuns_g_q == NULL); 1217 /* 1218 * The global queue for this stack is set when iptunq_open() calls 1219 * iptun_set_g_q(). 1220 */ 1221 err = ldi_open_by_name(IPTUNQ_DEV, FWRITE|FREAD, credp, 1222 &iptuns->iptuns_g_q_lh, iptun_ldi_ident); 1223 if (err == 0) { 1224 connp = iptuns->iptuns_g_q->q_ptr; 1225 connp->conn_recv = iptun_input; 1226 } 1227 return (err); 1228 } 1229 1230 static iptun_t * 1231 iptun_alloc(void) 1232 { 1233 iptun_t *iptun; 1234 1235 if ((iptun = kmem_cache_alloc(iptun_cache, KM_NOSLEEP)) != NULL) { 1236 bzero(iptun, sizeof (*iptun)); 1237 atomic_inc_32(&iptun_tunnelcount); 1238 } 1239 return (iptun); 1240 } 1241 1242 static void 1243 iptun_free(iptun_t *iptun) 1244 { 1245 ASSERT(iptun->iptun_flags & IPTUN_CONDEMNED); 1246 1247 if (iptun->iptun_flags & IPTUN_HASH_INSERTED) { 1248 iptun_stack_t *iptuns = iptun->iptun_iptuns; 1249 1250 mutex_enter(&iptun_hash_lock); 1251 VERIFY(mod_hash_remove(iptun_hash, 1252 IPTUN_HASH_KEY(iptun->iptun_linkid), 1253 (mod_hash_val_t *)&iptun) == 0); 1254 mutex_exit(&iptun_hash_lock); 1255 iptun->iptun_flags &= ~IPTUN_HASH_INSERTED; 1256 mutex_enter(&iptuns->iptuns_lock); 1257 list_remove(&iptuns->iptuns_iptunlist, iptun); 1258 mutex_exit(&iptuns->iptuns_lock); 1259 } 1260 1261 if (iptun->iptun_flags & IPTUN_BOUND) 1262 iptun_unbind(iptun); 1263 1264 /* 1265 * After iptun_unregister(), there will be no threads executing a 1266 * downcall from the mac module, including in the tx datapath. 1267 */ 1268 if (iptun->iptun_flags & IPTUN_MAC_REGISTERED) 1269 VERIFY(iptun_unregister(iptun) == 0); 1270 1271 if (iptun->iptun_itp != NULL) { 1272 /* 1273 * Remove from the AVL tree, AND release the reference iptun_t 1274 * itself holds on the ITP. 1275 */ 1276 itp_unlink(iptun->iptun_itp, iptun->iptun_ns); 1277 ITP_REFRELE(iptun->iptun_itp, iptun->iptun_ns); 1278 iptun->iptun_itp = NULL; 1279 iptun->iptun_flags &= ~IPTUN_SIMPLE_POLICY; 1280 } 1281 1282 /* 1283 * After ipcl_conn_destroy(), there will be no threads executing an 1284 * upcall from ip (i.e., iptun_input()), and it is then safe to free 1285 * the iptun_t. 1286 */ 1287 if (iptun->iptun_connp != NULL) { 1288 iptun_conn_destroy(iptun->iptun_connp); 1289 iptun->iptun_connp = NULL; 1290 } 1291 1292 netstack_rele(iptun->iptun_ns); 1293 iptun->iptun_ns = NULL; 1294 crfree(iptun->iptun_cred); 1295 iptun->iptun_cred = NULL; 1296 1297 kmem_cache_free(iptun_cache, iptun); 1298 atomic_dec_32(&iptun_tunnelcount); 1299 } 1300 1301 int 1302 iptun_create(iptun_kparams_t *ik, cred_t *credp) 1303 { 1304 iptun_t *iptun = NULL; 1305 int err = 0, mherr; 1306 char linkname[MAXLINKNAMELEN]; 1307 ipsec_tun_pol_t *itp; 1308 netstack_t *ns = NULL; 1309 iptun_stack_t *iptuns; 1310 datalink_id_t tmpid; 1311 zoneid_t zoneid = crgetzoneid(credp); 1312 boolean_t link_created = B_FALSE; 1313 1314 /* The tunnel type is mandatory */ 1315 if (!(ik->iptun_kparam_flags & IPTUN_KPARAM_TYPE)) 1316 return (EINVAL); 1317 1318 /* 1319 * Is the linkid that the caller wishes to associate with this new 1320 * tunnel assigned to this zone? 1321 */ 1322 if (zone_check_datalink(&zoneid, ik->iptun_kparam_linkid) != 0) { 1323 if (zoneid != GLOBAL_ZONEID) 1324 return (EINVAL); 1325 } else if (zoneid == GLOBAL_ZONEID) { 1326 return (EINVAL); 1327 } 1328 1329 /* 1330 * Make sure that we're not trying to create a tunnel that has already 1331 * been created. 1332 */ 1333 if (iptun_enter_by_linkid(ik->iptun_kparam_linkid, &iptun) == 0) { 1334 iptun_exit(iptun); 1335 iptun = NULL; 1336 err = EEXIST; 1337 goto done; 1338 } 1339 1340 ns = netstack_find_by_cred(credp); 1341 iptuns = ns->netstack_iptun; 1342 1343 /* 1344 * Before we create any tunnel, we need to ensure that the default 1345 * STREAMS queue (used to satisfy the ip module's requirement for one) 1346 * is created. We only do this once per stack. The stream is closed 1347 * when the stack is destroyed in iptun_stack_fni(). 1348 */ 1349 mutex_enter(&iptuns->iptuns_lock); 1350 if (iptuns->iptuns_g_q == NULL) 1351 err = iptun_create_g_q(iptuns, zone_kcred()); 1352 mutex_exit(&iptuns->iptuns_lock); 1353 if (err != 0) 1354 goto done; 1355 1356 if ((iptun = iptun_alloc()) == NULL) { 1357 err = ENOMEM; 1358 goto done; 1359 } 1360 1361 iptun->iptun_linkid = ik->iptun_kparam_linkid; 1362 iptun->iptun_zoneid = zoneid; 1363 crhold(credp); 1364 iptun->iptun_cred = credp; 1365 iptun->iptun_ns = ns; 1366 1367 iptun->iptun_typeinfo = iptun_gettypeinfo(ik->iptun_kparam_type); 1368 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_UNKNOWN) { 1369 err = EINVAL; 1370 goto done; 1371 } 1372 1373 if (ik->iptun_kparam_flags & IPTUN_KPARAM_IMPLICIT) 1374 iptun->iptun_flags |= IPTUN_IMPLICIT; 1375 1376 if ((err = iptun_setparams(iptun, ik)) != 0) 1377 goto done; 1378 1379 iptun->iptun_hoplimit = IPTUN_DEFAULT_HOPLIMIT; 1380 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_IPV6) 1381 iptun->iptun_encaplimit = IPTUN_DEFAULT_ENCAPLIMIT; 1382 1383 iptun_headergen(iptun, B_FALSE); 1384 1385 iptun->iptun_connp = iptun_conn_create(iptun, ns, credp); 1386 if (iptun->iptun_connp == NULL) { 1387 err = ENOMEM; 1388 goto done; 1389 } 1390 1391 iptun->iptun_mtu = iptun->iptun_typeinfo->iti_maxmtu; 1392 iptun->iptun_dpmtu = iptun->iptun_mtu; 1393 1394 /* 1395 * Find an ITP based on linkname. If we have parms already set via 1396 * the iptun_setparams() call above, it may have created an ITP for 1397 * us. We always try get_tunnel_policy() for DEBUG correctness 1398 * checks, and we may wish to refactor this to only check when 1399 * iptun_itp is NULL. 1400 */ 1401 if ((err = dls_mgmt_get_linkinfo(iptun->iptun_linkid, linkname, NULL, 1402 NULL, NULL)) != 0) 1403 goto done; 1404 if ((itp = get_tunnel_policy(linkname, ns)) != NULL) 1405 iptun->iptun_itp = itp; 1406 1407 /* 1408 * See if we have the necessary IP addresses assigned to this tunnel 1409 * to try and bind them with ip underneath us. If we're not ready to 1410 * bind yet, then we'll defer the bind operation until the addresses 1411 * are modified. 1412 */ 1413 if (iptun_canbind(iptun) && ((err = iptun_bind(iptun)) != 0)) 1414 goto done; 1415 1416 if ((err = iptun_register(iptun)) != 0) 1417 goto done; 1418 1419 err = dls_devnet_create(iptun->iptun_mh, iptun->iptun_linkid, 1420 iptun->iptun_zoneid); 1421 if (err != 0) 1422 goto done; 1423 link_created = B_TRUE; 1424 1425 /* 1426 * We hash by link-id as that is the key used by all other iptun 1427 * interfaces (modify, delete, etc.). 1428 */ 1429 if ((mherr = mod_hash_insert(iptun_hash, 1430 IPTUN_HASH_KEY(iptun->iptun_linkid), (mod_hash_val_t)iptun)) == 0) { 1431 mutex_enter(&iptuns->iptuns_lock); 1432 list_insert_head(&iptuns->iptuns_iptunlist, iptun); 1433 mutex_exit(&iptuns->iptuns_lock); 1434 iptun->iptun_flags |= IPTUN_HASH_INSERTED; 1435 } else if (mherr == MH_ERR_NOMEM) { 1436 err = ENOMEM; 1437 } else if (mherr == MH_ERR_DUPLICATE) { 1438 err = EEXIST; 1439 } else { 1440 err = EINVAL; 1441 } 1442 1443 done: 1444 if (iptun == NULL && ns != NULL) 1445 netstack_rele(ns); 1446 if (err != 0 && iptun != NULL) { 1447 if (link_created) { 1448 (void) dls_devnet_destroy(iptun->iptun_mh, &tmpid, 1449 B_TRUE); 1450 } 1451 iptun->iptun_flags |= IPTUN_CONDEMNED; 1452 iptun_free(iptun); 1453 } 1454 return (err); 1455 } 1456 1457 int 1458 iptun_delete(datalink_id_t linkid, cred_t *credp) 1459 { 1460 int err; 1461 iptun_t *iptun = NULL; 1462 1463 if ((err = iptun_enter_by_linkid(linkid, &iptun)) != 0) 1464 return (err); 1465 1466 /* One cannot delete a tunnel that belongs to another zone. */ 1467 if (iptun->iptun_zoneid != crgetzoneid(credp)) { 1468 iptun_exit(iptun); 1469 return (EACCES); 1470 } 1471 1472 /* 1473 * We need to exit iptun in order to issue calls up the stack such as 1474 * dls_devnet_destroy(). If we call up while still in iptun, deadlock 1475 * with calls coming down the stack is possible. We prevent other 1476 * threads from entering this iptun after we've exited it by setting 1477 * the IPTUN_DELETE_PENDING flag. This will cause callers of 1478 * iptun_enter() to block waiting on iptun_enter_cv. The assumption 1479 * here is that the functions we're calling while IPTUN_DELETE_PENDING 1480 * is set dont resuult in an iptun_enter() call, as that would result 1481 * in deadlock. 1482 */ 1483 iptun->iptun_flags |= IPTUN_DELETE_PENDING; 1484 1485 /* Wait for any pending upcall to the mac module to complete. */ 1486 while (iptun->iptun_flags & IPTUN_UPCALL_PENDING) 1487 cv_wait(&iptun->iptun_upcall_cv, &iptun->iptun_lock); 1488 1489 iptun_exit(iptun); 1490 1491 if ((err = dls_devnet_destroy(iptun->iptun_mh, &linkid, B_TRUE)) == 0) { 1492 /* 1493 * mac_disable() will fail with EBUSY if there are references 1494 * to the iptun MAC. If there are none, then mac_disable() 1495 * will assure that none can be acquired until the MAC is 1496 * unregistered. 1497 * 1498 * XXX CR 6791335 prevents us from calling mac_disable() prior 1499 * to dls_devnet_destroy(), so we unfortunately need to 1500 * attempt to re-create the devnet node if mac_disable() 1501 * fails. 1502 */ 1503 if ((err = mac_disable(iptun->iptun_mh)) != 0) { 1504 (void) dls_devnet_create(iptun->iptun_mh, linkid, 1505 iptun->iptun_zoneid); 1506 } 1507 } 1508 1509 /* 1510 * Now that we know the fate of this iptun_t, we need to clear 1511 * IPTUN_DELETE_PENDING, and set IPTUN_CONDEMNED if the iptun_t is 1512 * slated to be freed. Either way, we need to signal the threads 1513 * waiting in iptun_enter() so that they can either fail if 1514 * IPTUN_CONDEMNED is set, or continue if it's not. 1515 */ 1516 mutex_enter(&iptun->iptun_lock); 1517 iptun->iptun_flags &= ~IPTUN_DELETE_PENDING; 1518 if (err == 0) 1519 iptun->iptun_flags |= IPTUN_CONDEMNED; 1520 cv_broadcast(&iptun->iptun_enter_cv); 1521 mutex_exit(&iptun->iptun_lock); 1522 1523 /* 1524 * Note that there is no danger in calling iptun_free() after having 1525 * dropped the iptun_lock since callers of iptun_enter() at this point 1526 * are doing so from iptun_enter_by_linkid() (mac_disable() got rid of 1527 * threads entering from mac callbacks which call iptun_enter() 1528 * directly) which holds iptun_hash_lock, and iptun_free() grabs this 1529 * lock in order to remove the iptun_t from the hash table. 1530 */ 1531 if (err == 0) 1532 iptun_free(iptun); 1533 1534 return (err); 1535 } 1536 1537 int 1538 iptun_modify(const iptun_kparams_t *ik, cred_t *credp) 1539 { 1540 iptun_t *iptun; 1541 boolean_t laddr_change = B_FALSE, raddr_change = B_FALSE; 1542 int err; 1543 1544 if ((err = iptun_enter_by_linkid(ik->iptun_kparam_linkid, &iptun)) != 0) 1545 return (err); 1546 1547 /* One cannot modify a tunnel that belongs to another zone. */ 1548 if (iptun->iptun_zoneid != crgetzoneid(credp)) { 1549 err = EACCES; 1550 goto done; 1551 } 1552 1553 /* The tunnel type cannot be changed */ 1554 if (ik->iptun_kparam_flags & IPTUN_KPARAM_TYPE) { 1555 err = EINVAL; 1556 goto done; 1557 } 1558 1559 if ((err = iptun_setparams(iptun, ik)) != 0) 1560 goto done; 1561 iptun_headergen(iptun, B_FALSE); 1562 1563 /* 1564 * If any of the tunnel's addresses has been modified and the tunnel 1565 * has the necessary addresses assigned to it, we need to try to bind 1566 * with ip underneath us. If we're not ready to bind yet, then we'll 1567 * try again when the addresses are modified later. 1568 */ 1569 laddr_change = (ik->iptun_kparam_flags & IPTUN_KPARAM_LADDR); 1570 raddr_change = (ik->iptun_kparam_flags & IPTUN_KPARAM_RADDR); 1571 if (laddr_change || raddr_change) { 1572 if (iptun->iptun_flags & IPTUN_BOUND) 1573 iptun_unbind(iptun); 1574 if (iptun_canbind(iptun) && (err = iptun_bind(iptun)) != 0) { 1575 if (laddr_change) 1576 iptun->iptun_flags &= ~IPTUN_LADDR; 1577 if (raddr_change) 1578 iptun->iptun_flags &= ~IPTUN_RADDR; 1579 goto done; 1580 } 1581 } 1582 1583 if (laddr_change) 1584 iptun_task_dispatch(iptun, IPTUN_TASK_LADDR_UPDATE); 1585 if (raddr_change) 1586 iptun_task_dispatch(iptun, IPTUN_TASK_RADDR_UPDATE); 1587 1588 done: 1589 iptun_exit(iptun); 1590 return (err); 1591 } 1592 1593 /* Given an IP tunnel's datalink id, fill in its parameters. */ 1594 int 1595 iptun_info(iptun_kparams_t *ik, cred_t *credp) 1596 { 1597 iptun_t *iptun; 1598 int err; 1599 1600 /* Is the tunnel link visible from the caller's zone? */ 1601 if (!dls_devnet_islinkvisible(ik->iptun_kparam_linkid, 1602 crgetzoneid(credp))) 1603 return (ENOENT); 1604 1605 if ((err = iptun_enter_by_linkid(ik->iptun_kparam_linkid, &iptun)) != 0) 1606 return (err); 1607 1608 bzero(ik, sizeof (iptun_kparams_t)); 1609 1610 ik->iptun_kparam_linkid = iptun->iptun_linkid; 1611 ik->iptun_kparam_type = iptun->iptun_typeinfo->iti_type; 1612 ik->iptun_kparam_flags |= IPTUN_KPARAM_TYPE; 1613 1614 if (iptun->iptun_flags & IPTUN_LADDR) { 1615 iptun_getaddr(&iptun->iptun_laddr, &ik->iptun_kparam_laddr); 1616 ik->iptun_kparam_flags |= IPTUN_KPARAM_LADDR; 1617 } 1618 if (iptun->iptun_flags & IPTUN_RADDR) { 1619 iptun_getaddr(&iptun->iptun_raddr, &ik->iptun_kparam_raddr); 1620 ik->iptun_kparam_flags |= IPTUN_KPARAM_RADDR; 1621 } 1622 1623 if (iptun->iptun_flags & IPTUN_IMPLICIT) 1624 ik->iptun_kparam_flags |= IPTUN_KPARAM_IMPLICIT; 1625 1626 if (iptun->iptun_itp != NULL) { 1627 mutex_enter(&iptun->iptun_itp->itp_lock); 1628 if (iptun->iptun_itp->itp_flags & ITPF_P_ACTIVE) { 1629 ik->iptun_kparam_flags |= IPTUN_KPARAM_IPSECPOL; 1630 if (iptun->iptun_flags & IPTUN_SIMPLE_POLICY) { 1631 ik->iptun_kparam_flags |= IPTUN_KPARAM_SECINFO; 1632 ik->iptun_kparam_secinfo = 1633 iptun->iptun_simple_policy; 1634 } 1635 } 1636 mutex_exit(&iptun->iptun_itp->itp_lock); 1637 } 1638 1639 done: 1640 iptun_exit(iptun); 1641 return (err); 1642 } 1643 1644 int 1645 iptun_set_6to4relay(netstack_t *ns, ipaddr_t relay_addr) 1646 { 1647 if (relay_addr == INADDR_BROADCAST || CLASSD(relay_addr)) 1648 return (EADDRNOTAVAIL); 1649 ns->netstack_iptun->iptuns_relay_rtr_addr = relay_addr; 1650 return (0); 1651 } 1652 1653 void 1654 iptun_get_6to4relay(netstack_t *ns, ipaddr_t *relay_addr) 1655 { 1656 *relay_addr = ns->netstack_iptun->iptuns_relay_rtr_addr; 1657 } 1658 1659 void 1660 iptun_set_policy(datalink_id_t linkid, ipsec_tun_pol_t *itp) 1661 { 1662 iptun_t *iptun; 1663 1664 if (iptun_enter_by_linkid(linkid, &iptun) != 0) 1665 return; 1666 if (iptun->iptun_itp != itp) { 1667 ASSERT(iptun->iptun_itp == NULL); 1668 ITP_REFHOLD(itp); 1669 iptun->iptun_itp = itp; 1670 /* IPsec policy means IPsec overhead, which means lower MTU. */ 1671 (void) iptun_update_mtu(iptun, 0); 1672 } 1673 iptun_exit(iptun); 1674 } 1675 1676 /* 1677 * Obtain the path MTU to the tunnel destination. 1678 */ 1679 static uint32_t 1680 iptun_get_dst_pmtu(iptun_t *iptun) 1681 { 1682 ire_t *ire = NULL; 1683 ip_stack_t *ipst = iptun->iptun_ns->netstack_ip; 1684 uint32_t pmtu = 0; 1685 1686 /* 1687 * We only obtain the destination IRE for tunnels that have a remote 1688 * tunnel address. 1689 */ 1690 if (!(iptun->iptun_flags & IPTUN_RADDR)) 1691 return (0); 1692 1693 switch (iptun->iptun_typeinfo->iti_ipvers) { 1694 case IPV4_VERSION: 1695 ire = ire_route_lookup(iptun->iptun_raddr4, INADDR_ANY, 1696 INADDR_ANY, 0, NULL, NULL, iptun->iptun_connp->conn_zoneid, 1697 NULL, (MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT), ipst); 1698 break; 1699 case IPV6_VERSION: 1700 ire = ire_route_lookup_v6(&iptun->iptun_raddr6, NULL, NULL, 0, 1701 NULL, NULL, iptun->iptun_connp->conn_zoneid, NULL, 1702 (MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT), ipst); 1703 break; 1704 } 1705 1706 if (ire != NULL) { 1707 pmtu = ire->ire_max_frag; 1708 ire_refrele(ire); 1709 } 1710 return (pmtu); 1711 } 1712 1713 /* 1714 * Returns the max of old_ovhd and the overhead associated with pol. 1715 */ 1716 static uint32_t 1717 iptun_max_policy_overhead(ipsec_policy_t *pol, uint32_t old_ovhd) 1718 { 1719 uint32_t new_ovhd = old_ovhd; 1720 1721 while (pol != NULL) { 1722 new_ovhd = max(new_ovhd, 1723 ipsec_act_ovhd(&pol->ipsp_act->ipa_act)); 1724 pol = pol->ipsp_hash.hash_next; 1725 } 1726 return (new_ovhd); 1727 } 1728 1729 static uint32_t 1730 iptun_get_ipsec_overhead(iptun_t *iptun) 1731 { 1732 ipsec_policy_root_t *ipr; 1733 ipsec_policy_head_t *iph; 1734 ipsec_policy_t *pol; 1735 ipsec_selector_t sel; 1736 int i; 1737 uint32_t ipsec_ovhd = 0; 1738 ipsec_tun_pol_t *itp = iptun->iptun_itp; 1739 netstack_t *ns = iptun->iptun_ns; 1740 1741 if (itp == NULL || !(itp->itp_flags & ITPF_P_ACTIVE)) { 1742 /* 1743 * Consult global policy, just in case. This will only work 1744 * if we have both source and destination addresses to work 1745 * with. 1746 */ 1747 if ((iptun->iptun_flags & (IPTUN_LADDR|IPTUN_RADDR)) != 1748 (IPTUN_LADDR|IPTUN_RADDR)) 1749 return (0); 1750 1751 iph = ipsec_system_policy(ns); 1752 bzero(&sel, sizeof (sel)); 1753 sel.ips_isv4 = 1754 (iptun->iptun_typeinfo->iti_ipvers == IPV4_VERSION); 1755 switch (iptun->iptun_typeinfo->iti_ipvers) { 1756 case IPV4_VERSION: 1757 sel.ips_local_addr_v4 = iptun->iptun_laddr4; 1758 sel.ips_remote_addr_v4 = iptun->iptun_raddr4; 1759 break; 1760 case IPV6_VERSION: 1761 sel.ips_local_addr_v6 = iptun->iptun_laddr6; 1762 sel.ips_remote_addr_v6 = iptun->iptun_raddr6; 1763 break; 1764 } 1765 /* Check for both IPv4 and IPv6. */ 1766 sel.ips_protocol = IPPROTO_ENCAP; 1767 pol = ipsec_find_policy_head(NULL, iph, IPSEC_TYPE_OUTBOUND, 1768 &sel, ns); 1769 if (pol != NULL) { 1770 ipsec_ovhd = ipsec_act_ovhd(&pol->ipsp_act->ipa_act); 1771 IPPOL_REFRELE(pol, ns); 1772 } 1773 sel.ips_protocol = IPPROTO_IPV6; 1774 pol = ipsec_find_policy_head(NULL, iph, IPSEC_TYPE_OUTBOUND, 1775 &sel, ns); 1776 if (pol != NULL) { 1777 ipsec_ovhd = max(ipsec_ovhd, 1778 ipsec_act_ovhd(&pol->ipsp_act->ipa_act)); 1779 IPPOL_REFRELE(pol, ns); 1780 } 1781 IPPH_REFRELE(iph, ns); 1782 } else { 1783 /* 1784 * Look through all of the possible IPsec actions for the 1785 * tunnel, and find the largest potential IPsec overhead. 1786 */ 1787 iph = itp->itp_policy; 1788 rw_enter(&iph->iph_lock, RW_READER); 1789 ipr = &(iph->iph_root[IPSEC_TYPE_OUTBOUND]); 1790 ipsec_ovhd = iptun_max_policy_overhead( 1791 ipr->ipr_nonhash[IPSEC_AF_V4], 0); 1792 ipsec_ovhd = iptun_max_policy_overhead( 1793 ipr->ipr_nonhash[IPSEC_AF_V6], ipsec_ovhd); 1794 for (i = 0; i < ipr->ipr_nchains; i++) { 1795 ipsec_ovhd = iptun_max_policy_overhead( 1796 ipr->ipr_hash[i].hash_head, ipsec_ovhd); 1797 } 1798 rw_exit(&iph->iph_lock); 1799 } 1800 1801 return (ipsec_ovhd); 1802 } 1803 1804 /* 1805 * Calculate and return the maximum possible MTU for the given tunnel. 1806 */ 1807 static uint32_t 1808 iptun_get_maxmtu(iptun_t *iptun, uint32_t new_pmtu) 1809 { 1810 size_t header_size, ipsec_overhead; 1811 uint32_t maxmtu, pmtu; 1812 1813 /* 1814 * Start with the path-MTU to the remote address, which is either 1815 * provided as the new_pmtu argument, or obtained using 1816 * iptun_get_dst_pmtu(). 1817 */ 1818 if (new_pmtu != 0) { 1819 if (iptun->iptun_flags & IPTUN_RADDR) { 1820 iptun->iptun_dpmtu = new_pmtu; 1821 iptun->iptun_dpmtu_lastupdate = ddi_get_lbolt(); 1822 } 1823 pmtu = new_pmtu; 1824 } else if (iptun->iptun_flags & IPTUN_RADDR) { 1825 if ((pmtu = iptun_get_dst_pmtu(iptun)) == 0) { 1826 /* 1827 * We weren't able to obtain the path-MTU of the 1828 * destination. Use the previous value. 1829 */ 1830 pmtu = iptun->iptun_dpmtu; 1831 } else { 1832 iptun->iptun_dpmtu = pmtu; 1833 iptun->iptun_dpmtu_lastupdate = ddi_get_lbolt(); 1834 } 1835 } else { 1836 /* 1837 * We have no path-MTU information to go on, use the maximum 1838 * possible value. 1839 */ 1840 pmtu = iptun->iptun_typeinfo->iti_maxmtu; 1841 } 1842 1843 /* 1844 * Now calculate tunneling overhead and subtract that from the 1845 * path-MTU information obtained above. 1846 */ 1847 if (iptun->iptun_header_size != 0) { 1848 header_size = iptun->iptun_header_size; 1849 } else { 1850 switch (iptun->iptun_typeinfo->iti_ipvers) { 1851 case IPV4_VERSION: 1852 header_size = sizeof (ipha_t); 1853 if (is_system_labeled()) 1854 header_size += IP_MAX_OPT_LENGTH; 1855 break; 1856 case IPV6_VERSION: 1857 header_size = sizeof (iptun_ipv6hdrs_t); 1858 break; 1859 } 1860 } 1861 1862 ipsec_overhead = iptun_get_ipsec_overhead(iptun); 1863 1864 maxmtu = pmtu - (header_size + ipsec_overhead); 1865 return (max(maxmtu, iptun->iptun_typeinfo->iti_minmtu)); 1866 } 1867 1868 /* 1869 * Re-calculate the tunnel's MTU and notify the MAC layer of any change in 1870 * MTU. The new_pmtu argument is the new path MTU to the tunnel destination 1871 * to be used in the tunnel MTU calculation. Passing in 0 for new_pmtu causes 1872 * the path MTU to be dynamically updated using iptun_update_pmtu(). 1873 * 1874 * If the calculated tunnel MTU is different than its previous value, then we 1875 * notify the MAC layer above us of this change using mac_maxsdu_update(). 1876 */ 1877 static uint32_t 1878 iptun_update_mtu(iptun_t *iptun, uint32_t new_pmtu) 1879 { 1880 uint32_t newmtu; 1881 1882 /* 1883 * We return the current MTU without updating it if it was pegged to a 1884 * static value using the MAC_PROP_MTU link property. 1885 */ 1886 if (iptun->iptun_flags & IPTUN_FIXED_MTU) 1887 return (iptun->iptun_mtu); 1888 1889 /* If the MTU isn't fixed, then use the maximum possible value. */ 1890 newmtu = iptun_get_maxmtu(iptun, new_pmtu); 1891 1892 /* 1893 * We only dynamically adjust the tunnel MTU for tunnels with 1894 * destinations because dynamic MTU calculations are based on the 1895 * destination path-MTU. 1896 */ 1897 if ((iptun->iptun_flags & IPTUN_RADDR) && newmtu != iptun->iptun_mtu) { 1898 iptun->iptun_mtu = newmtu; 1899 if (iptun->iptun_flags & IPTUN_MAC_REGISTERED) 1900 iptun_task_dispatch(iptun, IPTUN_TASK_MTU_UPDATE); 1901 } 1902 1903 return (newmtu); 1904 } 1905 1906 /* 1907 * Frees a packet or packet chain and bumps stat for each freed packet. 1908 */ 1909 static void 1910 iptun_drop_pkt(mblk_t *mp, uint64_t *stat) 1911 { 1912 mblk_t *pktmp; 1913 1914 for (pktmp = mp; pktmp != NULL; pktmp = mp) { 1915 mp = mp->b_next; 1916 pktmp->b_next = NULL; 1917 if (stat != NULL) 1918 atomic_inc_64(stat); 1919 freemsg(pktmp); 1920 } 1921 } 1922 1923 /* 1924 * Allocate and return a new mblk to hold an IP and ICMP header, and chain the 1925 * original packet to its b_cont. Returns NULL on failure. 1926 */ 1927 static mblk_t * 1928 iptun_build_icmperr(size_t hdrs_size, mblk_t *orig_pkt) 1929 { 1930 mblk_t *icmperr_mp; 1931 1932 if ((icmperr_mp = allocb_tmpl(hdrs_size, orig_pkt)) != NULL) { 1933 icmperr_mp->b_wptr += hdrs_size; 1934 /* tack on the offending packet */ 1935 icmperr_mp->b_cont = orig_pkt; 1936 } 1937 return (icmperr_mp); 1938 } 1939 1940 /* 1941 * Transmit an ICMP error. mp->b_rptr points at the packet to be included in 1942 * the ICMP error. 1943 */ 1944 static void 1945 iptun_sendicmp_v4(iptun_t *iptun, icmph_t *icmp, ipha_t *orig_ipha, mblk_t *mp) 1946 { 1947 size_t orig_pktsize, hdrs_size; 1948 mblk_t *icmperr_mp; 1949 ipha_t *new_ipha; 1950 icmph_t *new_icmp; 1951 1952 orig_pktsize = msgdsize(mp); 1953 hdrs_size = sizeof (ipha_t) + sizeof (icmph_t); 1954 if ((icmperr_mp = iptun_build_icmperr(hdrs_size, mp)) == NULL) { 1955 iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf); 1956 return; 1957 } 1958 1959 new_ipha = (ipha_t *)icmperr_mp->b_rptr; 1960 new_icmp = (icmph_t *)(new_ipha + 1); 1961 1962 new_ipha->ipha_version_and_hdr_length = IP_SIMPLE_HDR_VERSION; 1963 new_ipha->ipha_type_of_service = 0; 1964 new_ipha->ipha_ident = 0; 1965 new_ipha->ipha_fragment_offset_and_flags = 0; 1966 new_ipha->ipha_ttl = orig_ipha->ipha_ttl; 1967 new_ipha->ipha_protocol = IPPROTO_ICMP; 1968 new_ipha->ipha_src = orig_ipha->ipha_dst; 1969 new_ipha->ipha_dst = orig_ipha->ipha_src; 1970 new_ipha->ipha_hdr_checksum = 0; /* will be computed by ip */ 1971 new_ipha->ipha_length = htons(hdrs_size + orig_pktsize); 1972 1973 *new_icmp = *icmp; 1974 new_icmp->icmph_checksum = 0; 1975 new_icmp->icmph_checksum = IP_CSUM(icmperr_mp, sizeof (ipha_t), 0); 1976 1977 ip_output(iptun->iptun_connp, icmperr_mp, iptun->iptun_connp->conn_wq, 1978 IP_WPUT); 1979 } 1980 1981 static void 1982 iptun_sendicmp_v6(iptun_t *iptun, icmp6_t *icmp6, ip6_t *orig_ip6h, mblk_t *mp) 1983 { 1984 size_t orig_pktsize, hdrs_size; 1985 mblk_t *icmp6err_mp; 1986 ip6_t *new_ip6h; 1987 icmp6_t *new_icmp6; 1988 1989 orig_pktsize = msgdsize(mp); 1990 hdrs_size = sizeof (ip6_t) + sizeof (icmp6_t); 1991 if ((icmp6err_mp = iptun_build_icmperr(hdrs_size, mp)) == NULL) { 1992 iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf); 1993 return; 1994 } 1995 1996 new_ip6h = (ip6_t *)icmp6err_mp->b_rptr; 1997 new_icmp6 = (icmp6_t *)(new_ip6h + 1); 1998 1999 new_ip6h->ip6_vcf = orig_ip6h->ip6_vcf; 2000 new_ip6h->ip6_plen = htons(sizeof (icmp6_t) + orig_pktsize); 2001 new_ip6h->ip6_hops = orig_ip6h->ip6_hops; 2002 new_ip6h->ip6_nxt = IPPROTO_ICMPV6; 2003 new_ip6h->ip6_src = orig_ip6h->ip6_dst; 2004 new_ip6h->ip6_dst = orig_ip6h->ip6_src; 2005 2006 *new_icmp6 = *icmp6; 2007 /* The checksum is calculated in ip_wput_ire_v6(). */ 2008 new_icmp6->icmp6_cksum = new_ip6h->ip6_plen; 2009 2010 ip_output_v6(iptun->iptun_connp, icmp6err_mp, 2011 iptun->iptun_connp->conn_wq, IP_WPUT); 2012 } 2013 2014 static void 2015 iptun_icmp_error_v4(iptun_t *iptun, ipha_t *orig_ipha, mblk_t *mp, 2016 uint8_t type, uint8_t code) 2017 { 2018 icmph_t icmp; 2019 2020 bzero(&icmp, sizeof (icmp)); 2021 icmp.icmph_type = type; 2022 icmp.icmph_code = code; 2023 2024 iptun_sendicmp_v4(iptun, &icmp, orig_ipha, mp); 2025 } 2026 2027 static void 2028 iptun_icmp_fragneeded_v4(iptun_t *iptun, uint32_t newmtu, ipha_t *orig_ipha, 2029 mblk_t *mp) 2030 { 2031 icmph_t icmp; 2032 2033 icmp.icmph_type = ICMP_DEST_UNREACHABLE; 2034 icmp.icmph_code = ICMP_FRAGMENTATION_NEEDED; 2035 icmp.icmph_du_zero = 0; 2036 icmp.icmph_du_mtu = htons(newmtu); 2037 2038 iptun_sendicmp_v4(iptun, &icmp, orig_ipha, mp); 2039 } 2040 2041 static void 2042 iptun_icmp_error_v6(iptun_t *iptun, ip6_t *orig_ip6h, mblk_t *mp, 2043 uint8_t type, uint8_t code, uint32_t offset) 2044 { 2045 icmp6_t icmp6; 2046 2047 bzero(&icmp6, sizeof (icmp6)); 2048 icmp6.icmp6_type = type; 2049 icmp6.icmp6_code = code; 2050 if (type == ICMP6_PARAM_PROB) 2051 icmp6.icmp6_pptr = htonl(offset); 2052 2053 iptun_sendicmp_v6(iptun, &icmp6, orig_ip6h, mp); 2054 } 2055 2056 static void 2057 iptun_icmp_toobig_v6(iptun_t *iptun, uint32_t newmtu, ip6_t *orig_ip6h, 2058 mblk_t *mp) 2059 { 2060 icmp6_t icmp6; 2061 2062 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; 2063 icmp6.icmp6_code = 0; 2064 icmp6.icmp6_mtu = htonl(newmtu); 2065 2066 iptun_sendicmp_v6(iptun, &icmp6, orig_ip6h, mp); 2067 } 2068 2069 /* 2070 * Determines if the packet pointed to by ipha or ip6h is an ICMP error. The 2071 * mp argument is only used to do bounds checking. 2072 */ 2073 static boolean_t 2074 is_icmp_error(mblk_t *mp, ipha_t *ipha, ip6_t *ip6h) 2075 { 2076 uint16_t hlen; 2077 2078 if (ipha != NULL) { 2079 icmph_t *icmph; 2080 2081 ASSERT(ip6h == NULL); 2082 if (ipha->ipha_protocol != IPPROTO_ICMP) 2083 return (B_FALSE); 2084 2085 hlen = IPH_HDR_LENGTH(ipha); 2086 icmph = (icmph_t *)((uint8_t *)ipha + hlen); 2087 return (ICMP_IS_ERROR(icmph->icmph_type) || 2088 icmph->icmph_type == ICMP_REDIRECT); 2089 } else { 2090 icmp6_t *icmp6; 2091 uint8_t *nexthdrp; 2092 2093 ASSERT(ip6h != NULL); 2094 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hlen, &nexthdrp) || 2095 *nexthdrp != IPPROTO_ICMPV6) { 2096 return (B_FALSE); 2097 } 2098 2099 icmp6 = (icmp6_t *)((uint8_t *)ip6h + hlen); 2100 return (ICMP6_IS_ERROR(icmp6->icmp6_type) || 2101 icmp6->icmp6_type == ND_REDIRECT); 2102 } 2103 } 2104 2105 /* 2106 * Find inner and outer IP headers from a tunneled packet as setup for calls 2107 * into ipsec_tun_{in,out}bound(). 2108 */ 2109 static size_t 2110 iptun_find_headers(mblk_t *mp, ipha_t **outer4, ipha_t **inner4, ip6_t **outer6, 2111 ip6_t **inner6) 2112 { 2113 ipha_t *ipha; 2114 size_t outer_hlen; 2115 size_t first_mblkl = MBLKL(mp); 2116 mblk_t *inner_mp; 2117 2118 /* 2119 * Don't bother handling packets that don't have a full IP header in 2120 * the fist mblk. For the input path, the ip module ensures that this 2121 * won't happen, and on the output path, the IP tunneling MAC-type 2122 * plugins ensure that this also won't happen. 2123 */ 2124 if (first_mblkl < sizeof (ipha_t)) 2125 return (0); 2126 ipha = (ipha_t *)(mp->b_rptr); 2127 switch (IPH_HDR_VERSION(ipha)) { 2128 case IPV4_VERSION: 2129 *outer4 = ipha; 2130 *outer6 = NULL; 2131 outer_hlen = IPH_HDR_LENGTH(ipha); 2132 break; 2133 case IPV6_VERSION: 2134 *outer4 = NULL; 2135 *outer6 = (ip6_t *)ipha; 2136 outer_hlen = ip_hdr_length_v6(mp, (ip6_t *)ipha); 2137 break; 2138 default: 2139 return (0); 2140 } 2141 2142 if (first_mblkl < outer_hlen || 2143 (first_mblkl == outer_hlen && mp->b_cont == NULL)) 2144 return (0); 2145 2146 /* 2147 * We don't bother doing a pullup here since the outer header will 2148 * just get stripped off soon on input anyway. We just want to ensure 2149 * that the inner* pointer points to a full header. 2150 */ 2151 if (first_mblkl == outer_hlen) { 2152 inner_mp = mp->b_cont; 2153 ipha = (ipha_t *)inner_mp->b_rptr; 2154 } else { 2155 inner_mp = mp; 2156 ipha = (ipha_t *)(mp->b_rptr + outer_hlen); 2157 } 2158 switch (IPH_HDR_VERSION(ipha)) { 2159 case IPV4_VERSION: 2160 if (inner_mp->b_wptr - (uint8_t *)ipha < sizeof (ipha_t)) 2161 return (0); 2162 *inner4 = ipha; 2163 *inner6 = NULL; 2164 break; 2165 case IPV6_VERSION: 2166 if (inner_mp->b_wptr - (uint8_t *)ipha < sizeof (ip6_t)) 2167 return (0); 2168 *inner4 = NULL; 2169 *inner6 = (ip6_t *)ipha; 2170 break; 2171 default: 2172 return (0); 2173 } 2174 2175 return (outer_hlen); 2176 } 2177 2178 /* 2179 * Received ICMP error in response to an X over IPv4 packet that we 2180 * transmitted. 2181 * 2182 * NOTE: "outer" refers to what's inside the ICMP payload. We will get one of 2183 * the following: 2184 * 2185 * [IPv4(0)][ICMPv4][IPv4(1)][IPv4(2)][ULP] 2186 * 2187 * or 2188 * 2189 * [IPv4(0)][ICMPv4][IPv4(1)][IPv6][ULP] 2190 * 2191 * And "outer4" will get set to IPv4(1), and inner[46] will correspond to 2192 * whatever the very-inner packet is (IPv4(2) or IPv6). 2193 */ 2194 static void 2195 iptun_input_icmp_v4(iptun_t *iptun, mblk_t *ipsec_mp, mblk_t *data_mp, 2196 icmph_t *icmph) 2197 { 2198 uint8_t *orig; 2199 ipha_t *outer4, *inner4; 2200 ip6_t *outer6, *inner6; 2201 int outer_hlen; 2202 uint8_t type, code; 2203 2204 /* 2205 * Change the db_type to M_DATA because subsequent operations assume 2206 * the ICMP packet is M_DATA again (i.e. calls to msgdsize()). 2207 */ 2208 data_mp->b_datap->db_type = M_DATA; 2209 2210 ASSERT(data_mp->b_cont == NULL); 2211 /* 2212 * Temporarily move b_rptr forward so that iptun_find_headers() can 2213 * find headers in the ICMP packet payload. 2214 */ 2215 orig = data_mp->b_rptr; 2216 data_mp->b_rptr = (uint8_t *)(icmph + 1); 2217 /* 2218 * The ip module ensures that ICMP errors contain at least the 2219 * original IP header (otherwise, the error would never have made it 2220 * here). 2221 */ 2222 ASSERT(MBLKL(data_mp) >= 0); 2223 outer_hlen = iptun_find_headers(data_mp, &outer4, &inner4, &outer6, 2224 &inner6); 2225 ASSERT(outer6 == NULL); 2226 data_mp->b_rptr = orig; 2227 if (outer_hlen == 0) { 2228 iptun_drop_pkt((ipsec_mp != NULL ? ipsec_mp : data_mp), 2229 &iptun->iptun_ierrors); 2230 return; 2231 } 2232 2233 /* Only ICMP errors due to tunneled packets should reach here. */ 2234 ASSERT(outer4->ipha_protocol == IPPROTO_ENCAP || 2235 outer4->ipha_protocol == IPPROTO_IPV6); 2236 2237 /* ipsec_tun_inbound() always frees ipsec_mp. */ 2238 if (!ipsec_tun_inbound(ipsec_mp, &data_mp, iptun->iptun_itp, 2239 inner4, inner6, outer4, outer6, -outer_hlen, 2240 iptun->iptun_ns)) { 2241 /* Callee did all of the freeing. */ 2242 atomic_inc_64(&iptun->iptun_ierrors); 2243 return; 2244 } 2245 /* We should never see reassembled fragment here. */ 2246 ASSERT(data_mp->b_next == NULL); 2247 2248 data_mp->b_rptr = (uint8_t *)outer4 + outer_hlen; 2249 2250 /* 2251 * If the original packet being transmitted was itself an ICMP error, 2252 * then drop this packet. We don't want to generate an ICMP error in 2253 * response to an ICMP error. 2254 */ 2255 if (is_icmp_error(data_mp, inner4, inner6)) { 2256 iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf); 2257 return; 2258 } 2259 2260 switch (icmph->icmph_type) { 2261 case ICMP_DEST_UNREACHABLE: 2262 type = (inner4 != NULL ? icmph->icmph_type : ICMP6_DST_UNREACH); 2263 switch (icmph->icmph_code) { 2264 case ICMP_FRAGMENTATION_NEEDED: { 2265 uint32_t newmtu; 2266 2267 /* 2268 * We reconcile this with the fact that the tunnel may 2269 * also have IPsec policy by letting iptun_update_mtu 2270 * take care of it. 2271 */ 2272 newmtu = 2273 iptun_update_mtu(iptun, ntohs(icmph->icmph_du_mtu)); 2274 2275 if (inner4 != NULL) { 2276 iptun_icmp_fragneeded_v4(iptun, newmtu, inner4, 2277 data_mp); 2278 } else { 2279 iptun_icmp_toobig_v6(iptun, newmtu, inner6, 2280 data_mp); 2281 } 2282 return; 2283 } 2284 case ICMP_DEST_NET_UNREACH_ADMIN: 2285 case ICMP_DEST_HOST_UNREACH_ADMIN: 2286 code = (inner4 != NULL ? ICMP_DEST_NET_UNREACH_ADMIN : 2287 ICMP6_DST_UNREACH_ADMIN); 2288 break; 2289 default: 2290 code = (inner4 != NULL ? ICMP_HOST_UNREACHABLE : 2291 ICMP6_DST_UNREACH_ADDR); 2292 break; 2293 } 2294 break; 2295 case ICMP_TIME_EXCEEDED: 2296 if (inner6 != NULL) { 2297 type = ICMP6_TIME_EXCEEDED; 2298 code = 0; 2299 } /* else we're already set. */ 2300 break; 2301 case ICMP_PARAM_PROBLEM: 2302 /* 2303 * This is a problem with the outer header we transmitted. 2304 * Treat this as an output error. 2305 */ 2306 iptun_drop_pkt(data_mp, &iptun->iptun_oerrors); 2307 return; 2308 default: 2309 iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf); 2310 return; 2311 } 2312 2313 if (inner4 != NULL) 2314 iptun_icmp_error_v4(iptun, inner4, data_mp, type, code); 2315 else 2316 iptun_icmp_error_v6(iptun, inner6, data_mp, type, code, 0); 2317 } 2318 2319 /* 2320 * Return B_TRUE if the IPv6 packet pointed to by ip6h contains a Tunnel 2321 * Encapsulation Limit destination option. If there is one, set encaplim_ptr 2322 * to point to the option value. 2323 */ 2324 static boolean_t 2325 iptun_find_encaplimit(mblk_t *mp, ip6_t *ip6h, uint8_t **encaplim_ptr) 2326 { 2327 ip6_pkt_t pkt; 2328 uint8_t *endptr; 2329 ip6_dest_t *destp; 2330 struct ip6_opt *optp; 2331 2332 pkt.ipp_fields = 0; /* must be initialized */ 2333 (void) ip_find_hdr_v6(mp, ip6h, &pkt, NULL); 2334 if ((pkt.ipp_fields & IPPF_DSTOPTS) != 0) { 2335 destp = pkt.ipp_dstopts; 2336 } else if ((pkt.ipp_fields & IPPF_RTDSTOPTS) != 0) { 2337 destp = pkt.ipp_rtdstopts; 2338 } else { 2339 return (B_FALSE); 2340 } 2341 2342 endptr = (uint8_t *)destp + 8 * (destp->ip6d_len + 1); 2343 optp = (struct ip6_opt *)(destp + 1); 2344 while (endptr - (uint8_t *)optp > sizeof (*optp)) { 2345 if (optp->ip6o_type == IP6OPT_TUNNEL_LIMIT) { 2346 if ((uint8_t *)(optp + 1) >= endptr) 2347 return (B_FALSE); 2348 *encaplim_ptr = (uint8_t *)&optp[1]; 2349 return (B_TRUE); 2350 } 2351 optp = (struct ip6_opt *)((uint8_t *)optp + optp->ip6o_len + 2); 2352 } 2353 return (B_FALSE); 2354 } 2355 2356 /* 2357 * Received ICMPv6 error in response to an X over IPv6 packet that we 2358 * transmitted. 2359 * 2360 * NOTE: "outer" refers to what's inside the ICMP payload. We will get one of 2361 * the following: 2362 * 2363 * [IPv6(0)][ICMPv6][IPv6(1)][IPv4][ULP] 2364 * 2365 * or 2366 * 2367 * [IPv6(0)][ICMPv6][IPv6(1)][IPv6(2)][ULP] 2368 * 2369 * And "outer6" will get set to IPv6(1), and inner[46] will correspond to 2370 * whatever the very-inner packet is (IPv4 or IPv6(2)). 2371 */ 2372 static void 2373 iptun_input_icmp_v6(iptun_t *iptun, mblk_t *ipsec_mp, mblk_t *data_mp, 2374 icmp6_t *icmp6h) 2375 { 2376 uint8_t *orig; 2377 ipha_t *outer4, *inner4; 2378 ip6_t *outer6, *inner6; 2379 int outer_hlen; 2380 uint8_t type, code; 2381 2382 /* 2383 * Change the db_type to M_DATA because subsequent operations assume 2384 * the ICMP packet is M_DATA again (i.e. calls to msgdsize().) 2385 */ 2386 data_mp->b_datap->db_type = M_DATA; 2387 2388 ASSERT(data_mp->b_cont == NULL); 2389 2390 /* 2391 * Temporarily move b_rptr forward so that iptun_find_headers() can 2392 * find IP headers in the ICMP packet payload. 2393 */ 2394 orig = data_mp->b_rptr; 2395 data_mp->b_rptr = (uint8_t *)(icmp6h + 1); 2396 /* 2397 * The ip module ensures that ICMP errors contain at least the 2398 * original IP header (otherwise, the error would never have made it 2399 * here). 2400 */ 2401 ASSERT(MBLKL(data_mp) >= 0); 2402 outer_hlen = iptun_find_headers(data_mp, &outer4, &inner4, &outer6, 2403 &inner6); 2404 ASSERT(outer4 == NULL); 2405 data_mp->b_rptr = orig; /* Restore r_ptr */ 2406 if (outer_hlen == 0) { 2407 iptun_drop_pkt((ipsec_mp != NULL ? ipsec_mp : data_mp), 2408 &iptun->iptun_ierrors); 2409 return; 2410 } 2411 2412 if (!ipsec_tun_inbound(ipsec_mp, &data_mp, iptun->iptun_itp, 2413 inner4, inner6, outer4, outer6, -outer_hlen, 2414 iptun->iptun_ns)) { 2415 /* Callee did all of the freeing. */ 2416 atomic_inc_64(&iptun->iptun_ierrors); 2417 return; 2418 } 2419 /* We should never see reassembled fragment here. */ 2420 ASSERT(data_mp->b_next == NULL); 2421 2422 data_mp->b_rptr = (uint8_t *)outer6 + outer_hlen; 2423 2424 /* 2425 * If the original packet being transmitted was itself an ICMP error, 2426 * then drop this packet. We don't want to generate an ICMP error in 2427 * response to an ICMP error. 2428 */ 2429 if (is_icmp_error(data_mp, inner4, inner6)) { 2430 iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf); 2431 return; 2432 } 2433 2434 switch (icmp6h->icmp6_type) { 2435 case ICMP6_PARAM_PROB: { 2436 uint8_t *encaplim_ptr; 2437 2438 /* 2439 * If the ICMPv6 error points to a valid Tunnel Encapsulation 2440 * Limit option and the limit value is 0, then fall through 2441 * and send a host unreachable message. Otherwise, treat the 2442 * error as an output error, as there must have been a problem 2443 * with a packet we sent. 2444 */ 2445 if (!iptun_find_encaplimit(data_mp, outer6, &encaplim_ptr) || 2446 (icmp6h->icmp6_pptr != 2447 ((ptrdiff_t)encaplim_ptr - (ptrdiff_t)outer6)) || 2448 *encaplim_ptr != 0) { 2449 iptun_drop_pkt(data_mp, &iptun->iptun_oerrors); 2450 return; 2451 } 2452 /* FALLTHRU */ 2453 } 2454 case ICMP6_TIME_EXCEEDED: 2455 case ICMP6_DST_UNREACH: 2456 type = (inner4 != NULL ? ICMP_DEST_UNREACHABLE : 2457 ICMP6_DST_UNREACH); 2458 code = (inner4 != NULL ? ICMP_HOST_UNREACHABLE : 2459 ICMP6_DST_UNREACH_ADDR); 2460 break; 2461 case ICMP6_PACKET_TOO_BIG: { 2462 uint32_t newmtu; 2463 2464 /* 2465 * We reconcile this with the fact that the tunnel may also 2466 * have IPsec policy by letting iptun_update_mtu take care of 2467 * it. 2468 */ 2469 newmtu = iptun_update_mtu(iptun, ntohl(icmp6h->icmp6_mtu)); 2470 2471 if (inner4 != NULL) { 2472 iptun_icmp_fragneeded_v4(iptun, newmtu, inner4, 2473 data_mp); 2474 } else { 2475 iptun_icmp_toobig_v6(iptun, newmtu, inner6, data_mp); 2476 } 2477 return; 2478 } 2479 default: 2480 iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf); 2481 return; 2482 } 2483 2484 if (inner4 != NULL) 2485 iptun_icmp_error_v4(iptun, inner4, data_mp, type, code); 2486 else 2487 iptun_icmp_error_v6(iptun, inner6, data_mp, type, code, 0); 2488 } 2489 2490 static void 2491 iptun_input_icmp(iptun_t *iptun, mblk_t *ipsec_mp, mblk_t *data_mp) 2492 { 2493 mblk_t *tmpmp; 2494 size_t hlen; 2495 2496 if (data_mp->b_cont != NULL) { 2497 /* 2498 * Since ICMP error processing necessitates access to bits 2499 * that are within the ICMP error payload (the original packet 2500 * that caused the error), pull everything up into a single 2501 * block for convenience. 2502 */ 2503 data_mp->b_datap->db_type = M_DATA; 2504 if ((tmpmp = msgpullup(data_mp, -1)) == NULL) { 2505 iptun_drop_pkt((ipsec_mp != NULL ? ipsec_mp : data_mp), 2506 &iptun->iptun_norcvbuf); 2507 return; 2508 } 2509 freemsg(data_mp); 2510 data_mp = tmpmp; 2511 if (ipsec_mp != NULL) 2512 ipsec_mp->b_cont = data_mp; 2513 } 2514 2515 switch (iptun->iptun_typeinfo->iti_ipvers) { 2516 case IPV4_VERSION: 2517 /* 2518 * The outer IP header coming up from IP is always ipha_t 2519 * alligned (otherwise, we would have crashed in ip). 2520 */ 2521 hlen = IPH_HDR_LENGTH((ipha_t *)data_mp->b_rptr); 2522 iptun_input_icmp_v4(iptun, ipsec_mp, data_mp, 2523 (icmph_t *)(data_mp->b_rptr + hlen)); 2524 break; 2525 case IPV6_VERSION: 2526 hlen = ip_hdr_length_v6(data_mp, (ip6_t *)data_mp->b_rptr); 2527 iptun_input_icmp_v6(iptun, ipsec_mp, data_mp, 2528 (icmp6_t *)(data_mp->b_rptr + hlen)); 2529 break; 2530 } 2531 } 2532 2533 static boolean_t 2534 iptun_in_6to4_ok(iptun_t *iptun, ipha_t *outer4, ip6_t *inner6) 2535 { 2536 ipaddr_t v4addr; 2537 2538 /* 2539 * It's possible that someone sent us an IPv4-in-IPv4 packet with the 2540 * IPv4 address of a 6to4 tunnel as the destination. 2541 */ 2542 if (inner6 == NULL) 2543 return (B_FALSE); 2544 2545 /* 2546 * Make sure that the IPv6 destination is within the site that this 2547 * 6to4 tunnel is routing for. We don't want people bouncing random 2548 * tunneled IPv6 packets through this 6to4 router. 2549 */ 2550 IN6_6TO4_TO_V4ADDR(&inner6->ip6_dst, (struct in_addr *)&v4addr); 2551 if (outer4->ipha_dst != v4addr) 2552 return (B_FALSE); 2553 2554 if (IN6_IS_ADDR_6TO4(&inner6->ip6_src)) { 2555 /* 2556 * Section 9 of RFC 3056 (security considerations) suggests 2557 * that when a packet is from a 6to4 site (i.e., it's not a 2558 * global address being forwarded froma relay router), make 2559 * sure that the packet was tunneled by that site's 6to4 2560 * router. 2561 */ 2562 IN6_6TO4_TO_V4ADDR(&inner6->ip6_src, (struct in_addr *)&v4addr); 2563 if (outer4->ipha_src != v4addr) 2564 return (B_FALSE); 2565 } else { 2566 /* 2567 * Only accept packets from a relay router if we've configured 2568 * outbound relay router functionality. 2569 */ 2570 if (iptun->iptun_iptuns->iptuns_relay_rtr_addr == INADDR_ANY) 2571 return (B_FALSE); 2572 } 2573 2574 return (B_TRUE); 2575 } 2576 2577 /* 2578 * Input function for everything that comes up from the ip module below us. 2579 * This is called directly from the ip module via connp->conn_recv(). 2580 * 2581 * There are two kinds of packets that can arrive here: (1) IP-in-IP tunneled 2582 * packets and (2) ICMP errors containing IP-in-IP packets transmitted by us. 2583 * They have the following structure: 2584 * 2585 * 1) M_DATA 2586 * 2) M_CTL[->M_DATA] 2587 * 2588 * (2) Is an M_CTL optionally followed by M_DATA, where the M_CTL block is the 2589 * start of the actual ICMP packet (it doesn't contain any special control 2590 * information). 2591 * 2592 * Either (1) or (2) can be IPsec-protected, in which case an M_CTL block 2593 * containing an ipsec_in_t will have been prepended to either (1) or (2), 2594 * making a total of four combinations of possible mblk chains: 2595 * 2596 * A) (1) 2597 * B) (2) 2598 * C) M_CTL(ipsec_in_t)->(1) 2599 * D) M_CTL(ipsec_in_t)->(2) 2600 */ 2601 /* ARGSUSED */ 2602 static void 2603 iptun_input(void *arg, mblk_t *mp, void *arg2) 2604 { 2605 conn_t *connp = arg; 2606 iptun_t *iptun = connp->conn_iptun; 2607 int outer_hlen; 2608 ipha_t *outer4, *inner4; 2609 ip6_t *outer6, *inner6; 2610 mblk_t *data_mp = mp; 2611 2612 ASSERT(IPCL_IS_IPTUN(connp)); 2613 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 2614 2615 if (DB_TYPE(mp) == M_CTL) { 2616 if (((ipsec_in_t *)(mp->b_rptr))->ipsec_in_type != IPSEC_IN) { 2617 iptun_input_icmp(iptun, NULL, mp); 2618 return; 2619 } 2620 2621 data_mp = mp->b_cont; 2622 if (DB_TYPE(data_mp) == M_CTL) { 2623 /* Protected ICMP packet. */ 2624 iptun_input_icmp(iptun, mp, data_mp); 2625 return; 2626 } 2627 } 2628 2629 /* 2630 * Request the destination's path MTU information regularly in case 2631 * path MTU has increased. 2632 */ 2633 if (IPTUN_PMTU_TOO_OLD(iptun)) 2634 iptun_task_dispatch(iptun, IPTUN_TASK_PMTU_UPDATE); 2635 2636 if ((outer_hlen = iptun_find_headers(data_mp, &outer4, &inner4, &outer6, 2637 &inner6)) == 0) 2638 goto drop; 2639 2640 /* 2641 * If the system is labeled, we call tsol_check_dest() on the packet 2642 * destination (our local tunnel address) to ensure that the packet as 2643 * labeled should be allowed to be sent to us. We don't need to call 2644 * the more involved tsol_receive_local() since the tunnel link itself 2645 * cannot be assigned to shared-stack non-global zones. 2646 */ 2647 if (is_system_labeled()) { 2648 cred_t *msg_cred; 2649 2650 if ((msg_cred = msg_getcred(data_mp, NULL)) == NULL) 2651 goto drop; 2652 if (tsol_check_dest(msg_cred, (outer4 != NULL ? 2653 (void *)&outer4->ipha_dst : (void *)&outer6->ip6_dst), 2654 (outer4 != NULL ? IPV4_VERSION : IPV6_VERSION), 2655 CONN_MAC_DEFAULT, NULL) != 0) 2656 goto drop; 2657 } 2658 2659 if (!ipsec_tun_inbound((mp == data_mp ? NULL : mp), &data_mp, 2660 iptun->iptun_itp, inner4, inner6, outer4, outer6, outer_hlen, 2661 iptun->iptun_ns)) { 2662 /* Callee did all of the freeing. */ 2663 return; 2664 } 2665 mp = data_mp; 2666 2667 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_6TO4 && 2668 !iptun_in_6to4_ok(iptun, outer4, inner6)) 2669 goto drop; 2670 2671 /* 2672 * We need to statistically account for each packet individually, so 2673 * we might as well split up any b_next chains here. 2674 */ 2675 do { 2676 mp = data_mp->b_next; 2677 data_mp->b_next = NULL; 2678 2679 atomic_inc_64(&iptun->iptun_ipackets); 2680 atomic_add_64(&iptun->iptun_rbytes, msgdsize(data_mp)); 2681 mac_rx(iptun->iptun_mh, NULL, data_mp); 2682 2683 data_mp = mp; 2684 } while (data_mp != NULL); 2685 return; 2686 drop: 2687 iptun_drop_pkt(mp, &iptun->iptun_ierrors); 2688 } 2689 2690 /* 2691 * Do 6to4-specific header-processing on output. Return B_TRUE if the packet 2692 * was processed without issue, or B_FALSE if the packet had issues and should 2693 * be dropped. 2694 */ 2695 static boolean_t 2696 iptun_out_process_6to4(iptun_t *iptun, ipha_t *outer4, ip6_t *inner6) 2697 { 2698 ipaddr_t v4addr; 2699 2700 /* 2701 * IPv6 source must be a 6to4 address. This is because a conscious 2702 * decision was made to not allow a Solaris system to be used as a 2703 * relay router (for security reasons) when 6to4 was initially 2704 * integrated. If this decision is ever reversed, the following check 2705 * can be removed. 2706 */ 2707 if (!IN6_IS_ADDR_6TO4(&inner6->ip6_src)) 2708 return (B_FALSE); 2709 2710 /* 2711 * RFC3056 mandates that the IPv4 source MUST be set to the IPv4 2712 * portion of the 6to4 IPv6 source address. In other words, make sure 2713 * that we're tunneling packets from our own 6to4 site. 2714 */ 2715 IN6_6TO4_TO_V4ADDR(&inner6->ip6_src, (struct in_addr *)&v4addr); 2716 if (outer4->ipha_src != v4addr) 2717 return (B_FALSE); 2718 2719 /* 2720 * Automatically set the destination of the outer IPv4 header as 2721 * described in RFC3056. There are two possibilities: 2722 * 2723 * a. If the IPv6 destination is a 6to4 address, set the IPv4 address 2724 * to the IPv4 portion of the 6to4 address. 2725 * b. If the IPv6 destination is a native IPv6 address, set the IPv4 2726 * destination to the address of a relay router. 2727 * 2728 * Design Note: b shouldn't be necessary here, and this is a flaw in 2729 * the design of the 6to4relay command. Instead of setting a 6to4 2730 * relay address in this module via an ioctl, the 6to4relay command 2731 * could simply add a IPv6 route for native IPv6 addresses (such as a 2732 * default route) in the forwarding table that uses a 6to4 destination 2733 * as its next hop, and the IPv4 portion of that address could be a 2734 * 6to4 relay address. In order for this to work, IP would have to 2735 * resolve the next hop address, which would necessitate a link-layer 2736 * address resolver for 6to4 links, which doesn't exist today. 2737 * 2738 * In fact, if a resolver existed for 6to4 links, then setting the 2739 * IPv4 destination in the outer header could be done as part of 2740 * link-layer address resolution and fast-path header generation, and 2741 * not here. 2742 */ 2743 if (IN6_IS_ADDR_6TO4(&inner6->ip6_dst)) { 2744 /* destination is a 6to4 router */ 2745 IN6_6TO4_TO_V4ADDR(&inner6->ip6_dst, 2746 (struct in_addr *)&outer4->ipha_dst); 2747 } else { 2748 /* 2749 * The destination is a native IPv6 address. If output to a 2750 * relay-router is enabled, use the relay-router's IPv4 2751 * address as the destination. 2752 */ 2753 if (iptun->iptun_iptuns->iptuns_relay_rtr_addr == INADDR_ANY) 2754 return (B_FALSE); 2755 outer4->ipha_dst = iptun->iptun_iptuns->iptuns_relay_rtr_addr; 2756 } 2757 2758 /* 2759 * If the outer source and destination are equal, this means that the 2760 * 6to4 router somehow forwarded an IPv6 packet destined for its own 2761 * 6to4 site to its 6to4 tunnel interface, which will result in this 2762 * packet infinitely bouncing between ip and iptun. 2763 */ 2764 return (outer4->ipha_src != outer4->ipha_dst); 2765 } 2766 2767 /* 2768 * Process output packets with outer IPv4 headers. Frees mp and bumps stat on 2769 * error. 2770 */ 2771 static mblk_t * 2772 iptun_out_process_ipv4(iptun_t *iptun, mblk_t *mp, ipha_t *outer4, 2773 ipha_t *inner4, ip6_t *inner6) 2774 { 2775 uint8_t *innerptr = (inner4 != NULL ? 2776 (uint8_t *)inner4 : (uint8_t *)inner6); 2777 size_t minmtu = (inner4 != NULL ? 2778 IPTUN_MIN_IPV4_MTU : IPTUN_MIN_IPV6_MTU); 2779 2780 if (inner4 != NULL) { 2781 ASSERT(outer4->ipha_protocol == IPPROTO_ENCAP); 2782 /* 2783 * Copy the tos from the inner IPv4 header. We mask off ECN 2784 * bits (bits 6 and 7) because there is currently no 2785 * tunnel-tunnel communication to determine if both sides 2786 * support ECN. We opt for the safe choice: don't copy the 2787 * ECN bits when doing encapsulation. 2788 */ 2789 outer4->ipha_type_of_service = 2790 inner4->ipha_type_of_service & ~0x03; 2791 } else { 2792 ASSERT(outer4->ipha_protocol == IPPROTO_IPV6 && 2793 inner6 != NULL); 2794 2795 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_6TO4 && 2796 !iptun_out_process_6to4(iptun, outer4, inner6)) { 2797 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 2798 return (NULL); 2799 } 2800 } 2801 2802 /* 2803 * As described in section 3.2.2 of RFC4213, if the packet payload is 2804 * less than or equal to the minimum MTU size, then we need to allow 2805 * IPv4 to fragment the packet. The reason is that even if we end up 2806 * receiving an ICMP frag-needed, the interface above this tunnel 2807 * won't be allowed to drop its MTU as a result, since the packet was 2808 * already smaller than the smallest allowable MTU for that interface. 2809 */ 2810 if (mp->b_wptr - innerptr <= minmtu) 2811 outer4->ipha_fragment_offset_and_flags = 0; 2812 2813 outer4->ipha_length = htons(msgdsize(mp)); 2814 2815 return (mp); 2816 } 2817 2818 /* 2819 * Insert an encapsulation limit destination option in the packet provided. 2820 * Always consumes the mp argument and returns a new mblk pointer. 2821 */ 2822 static mblk_t * 2823 iptun_insert_encaplimit(iptun_t *iptun, mblk_t *mp, ip6_t *outer6, 2824 uint8_t limit) 2825 { 2826 mblk_t *newmp; 2827 iptun_ipv6hdrs_t *newouter6; 2828 2829 ASSERT(outer6->ip6_nxt == IPPROTO_IPV6); 2830 ASSERT(mp->b_cont == NULL); 2831 2832 mp->b_rptr += sizeof (ip6_t); 2833 newmp = allocb_tmpl(sizeof (iptun_ipv6hdrs_t) + MBLKL(mp), mp); 2834 if (newmp == NULL) { 2835 iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf); 2836 return (NULL); 2837 } 2838 newmp->b_wptr += sizeof (iptun_ipv6hdrs_t); 2839 /* Copy the payload (Starting with the inner IPv6 header). */ 2840 bcopy(mp->b_rptr, newmp->b_wptr, MBLKL(mp)); 2841 newmp->b_wptr += MBLKL(mp); 2842 newouter6 = (iptun_ipv6hdrs_t *)newmp->b_rptr; 2843 /* Now copy the outer IPv6 header. */ 2844 bcopy(outer6, &newouter6->it6h_ip6h, sizeof (ip6_t)); 2845 newouter6->it6h_ip6h.ip6_nxt = IPPROTO_DSTOPTS; 2846 newouter6->it6h_encaplim = iptun_encaplim_init; 2847 newouter6->it6h_encaplim.iel_destopt.ip6d_nxt = outer6->ip6_nxt; 2848 newouter6->it6h_encaplim.iel_telopt.ip6ot_encap_limit = limit; 2849 2850 /* 2851 * The payload length will be set at the end of 2852 * iptun_out_process_ipv6(). 2853 */ 2854 2855 freemsg(mp); 2856 return (newmp); 2857 } 2858 2859 /* 2860 * Process output packets with outer IPv6 headers. Frees mp and bumps stats 2861 * on error. 2862 */ 2863 static mblk_t * 2864 iptun_out_process_ipv6(iptun_t *iptun, mblk_t *mp, ip6_t *outer6, ip6_t *inner6) 2865 { 2866 uint8_t *limit, *configlimit; 2867 uint32_t offset; 2868 iptun_ipv6hdrs_t *v6hdrs; 2869 2870 if (inner6 != NULL && iptun_find_encaplimit(mp, inner6, &limit)) { 2871 /* 2872 * The inner packet is an IPv6 packet which itself contains an 2873 * encapsulation limit option. The limit variable points to 2874 * the value in the embedded option. Process the 2875 * encapsulation limit option as specified in RFC 2473. 2876 * 2877 * If limit is 0, then we've exceeded the limit and we need to 2878 * send back an ICMPv6 parameter problem message. 2879 * 2880 * If limit is > 0, then we decrement it by 1 and make sure 2881 * that the encapsulation limit option in the outer header 2882 * reflects that (adding an option if one isn't already 2883 * there). 2884 */ 2885 ASSERT(limit > mp->b_rptr && limit < mp->b_wptr); 2886 if (*limit == 0) { 2887 mp->b_rptr = (uint8_t *)inner6; 2888 offset = limit - mp->b_rptr; 2889 iptun_icmp_error_v6(iptun, inner6, mp, ICMP6_PARAM_PROB, 2890 0, offset); 2891 atomic_inc_64(&iptun->iptun_noxmtbuf); 2892 return (NULL); 2893 } 2894 2895 /* 2896 * The outer header requires an encapsulation limit option. 2897 * If there isn't one already, add one. 2898 */ 2899 if (iptun->iptun_encaplimit == 0) { 2900 if ((mp = iptun_insert_encaplimit(iptun, mp, outer6, 2901 (*limit - 1))) == NULL) 2902 return (NULL); 2903 } else { 2904 /* 2905 * There is an existing encapsulation limit option in 2906 * the outer header. If the inner encapsulation limit 2907 * is less than the configured encapsulation limit, 2908 * update the outer encapsulation limit to reflect 2909 * this lesser value. 2910 */ 2911 v6hdrs = (iptun_ipv6hdrs_t *)mp->b_rptr; 2912 configlimit = 2913 &v6hdrs->it6h_encaplim.iel_telopt.ip6ot_encap_limit; 2914 if ((*limit - 1) < *configlimit) 2915 *configlimit = (*limit - 1); 2916 } 2917 } 2918 2919 outer6->ip6_plen = htons(msgdsize(mp) - sizeof (ip6_t)); 2920 return (mp); 2921 } 2922 2923 /* 2924 * The IP tunneling MAC-type plugins have already done most of the header 2925 * processing and validity checks. We are simply responsible for multiplexing 2926 * down to the ip module below us. 2927 */ 2928 static void 2929 iptun_output(iptun_t *iptun, mblk_t *mp) 2930 { 2931 conn_t *connp = iptun->iptun_connp; 2932 int outer_hlen; 2933 mblk_t *newmp; 2934 ipha_t *outer4, *inner4; 2935 ip6_t *outer6, *inner6; 2936 ipsec_tun_pol_t *itp = iptun->iptun_itp; 2937 2938 ASSERT(mp->b_datap->db_type == M_DATA); 2939 2940 if (mp->b_cont != NULL) { 2941 if ((newmp = msgpullup(mp, -1)) == NULL) { 2942 iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf); 2943 return; 2944 } 2945 freemsg(mp); 2946 mp = newmp; 2947 } 2948 2949 outer_hlen = iptun_find_headers(mp, &outer4, &inner4, &outer6, &inner6); 2950 if (outer_hlen == 0) { 2951 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 2952 return; 2953 } 2954 2955 /* Perform header processing. */ 2956 if (outer4 != NULL) 2957 mp = iptun_out_process_ipv4(iptun, mp, outer4, inner4, inner6); 2958 else 2959 mp = iptun_out_process_ipv6(iptun, mp, outer6, inner6); 2960 if (mp == NULL) 2961 return; 2962 2963 /* 2964 * Let's hope the compiler optimizes this with "branch taken". 2965 */ 2966 if (itp != NULL && (itp->itp_flags & ITPF_P_ACTIVE)) { 2967 if ((mp = ipsec_tun_outbound(mp, iptun, inner4, inner6, outer4, 2968 outer6, outer_hlen)) == NULL) { 2969 /* ipsec_tun_outbound() frees mp on error. */ 2970 atomic_inc_64(&iptun->iptun_oerrors); 2971 return; 2972 } 2973 /* 2974 * ipsec_tun_outbound() returns a chain of tunneled IP 2975 * fragments linked with b_next (or a single message if the 2976 * tunneled packet wasn't a fragment). Each message in the 2977 * chain is prepended by an IPSEC_OUT M_CTL block with 2978 * instructions for outbound IPsec processing. 2979 */ 2980 for (newmp = mp; newmp != NULL; newmp = mp) { 2981 ASSERT(newmp->b_datap->db_type == M_CTL); 2982 atomic_inc_64(&iptun->iptun_opackets); 2983 atomic_add_64(&iptun->iptun_obytes, 2984 msgdsize(newmp->b_cont)); 2985 mp = mp->b_next; 2986 newmp->b_next = NULL; 2987 connp->conn_send(connp, newmp, connp->conn_wq, IP_WPUT); 2988 } 2989 } else { 2990 /* 2991 * The ip module will potentially apply global policy to the 2992 * packet in its output path if there's no active tunnel 2993 * policy. 2994 */ 2995 atomic_inc_64(&iptun->iptun_opackets); 2996 atomic_add_64(&iptun->iptun_obytes, msgdsize(mp)); 2997 connp->conn_send(connp, mp, connp->conn_wq, IP_WPUT); 2998 } 2999 } 3000 3001 /* 3002 * Note that the setting or clearing iptun_{set,get}_g_q() is serialized via 3003 * iptuns_lock and iptunq_open(), so we must never be in a situation where 3004 * iptun_set_g_q() is called if the queue has already been set or vice versa 3005 * (hence the ASSERT()s.) 3006 */ 3007 void 3008 iptun_set_g_q(netstack_t *ns, queue_t *q) 3009 { 3010 ASSERT(ns->netstack_iptun->iptuns_g_q == NULL); 3011 ns->netstack_iptun->iptuns_g_q = q; 3012 } 3013 3014 void 3015 iptun_clear_g_q(netstack_t *ns) 3016 { 3017 ASSERT(ns->netstack_iptun->iptuns_g_q != NULL); 3018 ns->netstack_iptun->iptuns_g_q = NULL; 3019 } 3020 3021 static mac_callbacks_t iptun_m_callbacks = { 3022 .mc_callbacks = (MC_SETPROP | MC_GETPROP), 3023 .mc_getstat = iptun_m_getstat, 3024 .mc_start = iptun_m_start, 3025 .mc_stop = iptun_m_stop, 3026 .mc_setpromisc = iptun_m_setpromisc, 3027 .mc_multicst = iptun_m_multicst, 3028 .mc_unicst = iptun_m_unicst, 3029 .mc_tx = iptun_m_tx, 3030 .mc_setprop = iptun_m_setprop, 3031 .mc_getprop = iptun_m_getprop 3032 }; 3033