1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * iptun - IP Tunneling Driver 28 * 29 * This module is a GLDv3 driver that implements virtual datalinks over IP 30 * (a.k.a, IP tunneling). The datalinks are managed through a dld ioctl 31 * interface (see iptun_ctl.c), and registered with GLDv3 using 32 * mac_register(). It implements the logic for various forms of IP (IPv4 or 33 * IPv6) encapsulation within IP (IPv4 or IPv6) by interacting with the ip 34 * module below it. Each virtual IP tunnel datalink has a conn_t associated 35 * with it representing the "outer" IP connection. 36 * 37 * The module implements the following locking semantics: 38 * 39 * Lookups and deletions in iptun_hash are synchronized using iptun_hash_lock. 40 * See comments above iptun_hash_lock for details. 41 * 42 * No locks are ever held while calling up to GLDv3. The general architecture 43 * of GLDv3 requires this, as the mac perimeter (essentially a lock) for a 44 * given link will be held while making downcalls (iptun_m_*() callbacks). 45 * Because we need to hold locks while handling downcalls, holding these locks 46 * while issuing upcalls results in deadlock scenarios. See the block comment 47 * above iptun_task_cb() for details on how we safely issue upcalls without 48 * holding any locks. 49 * 50 * The contents of each iptun_t is protected by an iptun_mutex which is held 51 * in iptun_enter() (called by iptun_enter_by_linkid()), and exited in 52 * iptun_exit(). 53 * 54 * See comments in iptun_delete() and iptun_free() for details on how the 55 * iptun_t is deleted safely. 56 */ 57 58 #include <sys/types.h> 59 #include <sys/kmem.h> 60 #include <sys/errno.h> 61 #include <sys/modhash.h> 62 #include <sys/list.h> 63 #include <sys/strsun.h> 64 #include <sys/file.h> 65 #include <sys/systm.h> 66 #include <sys/tihdr.h> 67 #include <sys/param.h> 68 #include <sys/mac_provider.h> 69 #include <sys/mac_ipv4.h> 70 #include <sys/mac_ipv6.h> 71 #include <sys/mac_6to4.h> 72 #include <sys/tsol/tnet.h> 73 #include <sys/sunldi.h> 74 #include <netinet/in.h> 75 #include <netinet/ip6.h> 76 #include <inet/ip.h> 77 #include <inet/ip_ire.h> 78 #include <inet/ipsec_impl.h> 79 #include <sys/tsol/label.h> 80 #include <sys/tsol/tnet.h> 81 #include <inet/iptun.h> 82 #include "iptun_impl.h" 83 84 /* Do the tunnel type and address family match? */ 85 #define IPTUN_ADDR_MATCH(iptun_type, family) \ 86 ((iptun_type == IPTUN_TYPE_IPV4 && family == AF_INET) || \ 87 (iptun_type == IPTUN_TYPE_IPV6 && family == AF_INET6) || \ 88 (iptun_type == IPTUN_TYPE_6TO4 && family == AF_INET)) 89 90 #define IPTUN_HASH_KEY(key) ((mod_hash_key_t)(uintptr_t)(key)) 91 92 #define IPTUN_MIN_IPV4_MTU 576 /* ip.h still uses 68 (!) */ 93 #define IPTUN_MIN_IPV6_MTU IPV6_MIN_MTU 94 #define IPTUN_MAX_IPV4_MTU (IP_MAXPACKET - sizeof (ipha_t)) 95 #define IPTUN_MAX_IPV6_MTU (IP_MAXPACKET - sizeof (ip6_t) - \ 96 sizeof (iptun_encaplim_t)) 97 98 #define IPTUN_MIN_HOPLIMIT 1 99 #define IPTUN_MAX_HOPLIMIT UINT8_MAX 100 101 #define IPTUN_MIN_ENCAPLIMIT 0 102 #define IPTUN_MAX_ENCAPLIMIT UINT8_MAX 103 104 #define IPTUN_IPSEC_REQ_MASK (IPSEC_PREF_REQUIRED | IPSEC_PREF_NEVER) 105 106 static iptun_encaplim_t iptun_encaplim_init = { 107 { IPPROTO_NONE, 0 }, 108 IP6OPT_TUNNEL_LIMIT, 109 1, 110 IPTUN_DEFAULT_ENCAPLIMIT, /* filled in with actual value later */ 111 IP6OPT_PADN, 112 1, 113 0 114 }; 115 116 /* 117 * Table containing per-iptun-type information. 118 * Since IPv6 can run over all of these we have the IPv6 min as the min MTU. 119 */ 120 static iptun_typeinfo_t iptun_type_table[] = { 121 { IPTUN_TYPE_IPV4, MAC_PLUGIN_IDENT_IPV4, IPV4_VERSION, 122 IPTUN_MIN_IPV6_MTU, IPTUN_MAX_IPV4_MTU, B_TRUE }, 123 { IPTUN_TYPE_IPV6, MAC_PLUGIN_IDENT_IPV6, IPV6_VERSION, 124 IPTUN_MIN_IPV6_MTU, IPTUN_MAX_IPV6_MTU, B_TRUE }, 125 { IPTUN_TYPE_6TO4, MAC_PLUGIN_IDENT_6TO4, IPV4_VERSION, 126 IPTUN_MIN_IPV6_MTU, IPTUN_MAX_IPV4_MTU, B_FALSE }, 127 { IPTUN_TYPE_UNKNOWN, NULL, 0, 0, 0, B_FALSE } 128 }; 129 130 /* 131 * iptun_hash is an iptun_t lookup table by link ID protected by 132 * iptun_hash_lock. While the hash table's integrity is maintained via 133 * internal locking in the mod_hash_*() functions, we need additional locking 134 * so that an iptun_t cannot be deleted after a hash lookup has returned an 135 * iptun_t and before iptun_lock has been entered. As such, we use 136 * iptun_hash_lock when doing lookups and removals from iptun_hash. 137 */ 138 mod_hash_t *iptun_hash; 139 static kmutex_t iptun_hash_lock; 140 141 static uint_t iptun_tunnelcount; /* total for all stacks */ 142 kmem_cache_t *iptun_cache; 143 ddi_taskq_t *iptun_taskq; 144 145 typedef enum { 146 IPTUN_TASK_MTU_UPDATE, /* tell mac about new tunnel link MTU */ 147 IPTUN_TASK_LADDR_UPDATE, /* tell mac about new local address */ 148 IPTUN_TASK_RADDR_UPDATE, /* tell mac about new remote address */ 149 IPTUN_TASK_LINK_UPDATE, /* tell mac about new link state */ 150 IPTUN_TASK_PDATA_UPDATE /* tell mac about updated plugin data */ 151 } iptun_task_t; 152 153 typedef struct iptun_task_data_s { 154 iptun_task_t itd_task; 155 datalink_id_t itd_linkid; 156 } iptun_task_data_t; 157 158 static void iptun_task_dispatch(iptun_t *, iptun_task_t); 159 static int iptun_enter(iptun_t *); 160 static void iptun_exit(iptun_t *); 161 static void iptun_headergen(iptun_t *, boolean_t); 162 static void iptun_drop_pkt(mblk_t *, uint64_t *); 163 static void iptun_input(void *, mblk_t *, void *, ip_recv_attr_t *); 164 static void iptun_input_icmp(void *, mblk_t *, void *, ip_recv_attr_t *); 165 static void iptun_output(iptun_t *, mblk_t *); 166 static uint32_t iptun_get_maxmtu(iptun_t *, ip_xmit_attr_t *, uint32_t); 167 static uint32_t iptun_update_mtu(iptun_t *, ip_xmit_attr_t *, uint32_t); 168 static uint32_t iptun_get_dst_pmtu(iptun_t *, ip_xmit_attr_t *); 169 static void iptun_update_dst_pmtu(iptun_t *, ip_xmit_attr_t *); 170 static int iptun_setladdr(iptun_t *, const struct sockaddr_storage *); 171 172 static void iptun_output_6to4(iptun_t *, mblk_t *); 173 static void iptun_output_common(iptun_t *, ip_xmit_attr_t *, mblk_t *); 174 static boolean_t iptun_verifyicmp(conn_t *, void *, icmph_t *, icmp6_t *, 175 ip_recv_attr_t *); 176 177 static void iptun_notify(void *, ip_xmit_attr_t *, ixa_notify_type_t, 178 ixa_notify_arg_t); 179 180 static mac_callbacks_t iptun_m_callbacks; 181 182 static int 183 iptun_m_getstat(void *arg, uint_t stat, uint64_t *val) 184 { 185 iptun_t *iptun = arg; 186 int err = 0; 187 188 switch (stat) { 189 case MAC_STAT_IERRORS: 190 *val = iptun->iptun_ierrors; 191 break; 192 case MAC_STAT_OERRORS: 193 *val = iptun->iptun_oerrors; 194 break; 195 case MAC_STAT_RBYTES: 196 *val = iptun->iptun_rbytes; 197 break; 198 case MAC_STAT_IPACKETS: 199 *val = iptun->iptun_ipackets; 200 break; 201 case MAC_STAT_OBYTES: 202 *val = iptun->iptun_obytes; 203 break; 204 case MAC_STAT_OPACKETS: 205 *val = iptun->iptun_opackets; 206 break; 207 case MAC_STAT_NORCVBUF: 208 *val = iptun->iptun_norcvbuf; 209 break; 210 case MAC_STAT_NOXMTBUF: 211 *val = iptun->iptun_noxmtbuf; 212 break; 213 default: 214 err = ENOTSUP; 215 } 216 217 return (err); 218 } 219 220 static int 221 iptun_m_start(void *arg) 222 { 223 iptun_t *iptun = arg; 224 int err; 225 226 if ((err = iptun_enter(iptun)) == 0) { 227 iptun->iptun_flags |= IPTUN_MAC_STARTED; 228 iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE); 229 iptun_exit(iptun); 230 } 231 return (err); 232 } 233 234 static void 235 iptun_m_stop(void *arg) 236 { 237 iptun_t *iptun = arg; 238 239 if (iptun_enter(iptun) == 0) { 240 iptun->iptun_flags &= ~IPTUN_MAC_STARTED; 241 iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE); 242 iptun_exit(iptun); 243 } 244 } 245 246 /* 247 * iptun_m_setpromisc() does nothing and always succeeds. This is because a 248 * tunnel data-link only ever receives packets that are destined exclusively 249 * for the local address of the tunnel. 250 */ 251 /* ARGSUSED */ 252 static int 253 iptun_m_setpromisc(void *arg, boolean_t on) 254 { 255 return (0); 256 } 257 258 /* ARGSUSED */ 259 static int 260 iptun_m_multicst(void *arg, boolean_t add, const uint8_t *addrp) 261 { 262 return (ENOTSUP); 263 } 264 265 /* 266 * iptun_m_unicst() sets the local address. 267 */ 268 /* ARGSUSED */ 269 static int 270 iptun_m_unicst(void *arg, const uint8_t *addrp) 271 { 272 iptun_t *iptun = arg; 273 int err; 274 struct sockaddr_storage ss; 275 struct sockaddr_in *sin; 276 struct sockaddr_in6 *sin6; 277 278 if ((err = iptun_enter(iptun)) == 0) { 279 switch (iptun->iptun_typeinfo->iti_ipvers) { 280 case IPV4_VERSION: 281 sin = (struct sockaddr_in *)&ss; 282 sin->sin_family = AF_INET; 283 bcopy(addrp, &sin->sin_addr, sizeof (in_addr_t)); 284 break; 285 case IPV6_VERSION: 286 sin6 = (struct sockaddr_in6 *)&ss; 287 sin6->sin6_family = AF_INET6; 288 bcopy(addrp, &sin6->sin6_addr, sizeof (in6_addr_t)); 289 break; 290 default: 291 ASSERT(0); 292 } 293 err = iptun_setladdr(iptun, &ss); 294 iptun_exit(iptun); 295 } 296 return (err); 297 } 298 299 static mblk_t * 300 iptun_m_tx(void *arg, mblk_t *mpchain) 301 { 302 mblk_t *mp, *nmp; 303 iptun_t *iptun = arg; 304 305 if (!IS_IPTUN_RUNNING(iptun)) { 306 iptun_drop_pkt(mpchain, &iptun->iptun_noxmtbuf); 307 return (NULL); 308 } 309 310 for (mp = mpchain; mp != NULL; mp = nmp) { 311 nmp = mp->b_next; 312 mp->b_next = NULL; 313 iptun_output(iptun, mp); 314 } 315 316 return (NULL); 317 } 318 319 /* ARGSUSED */ 320 static int 321 iptun_m_setprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, 322 uint_t pr_valsize, const void *pr_val) 323 { 324 iptun_t *iptun = barg; 325 uint32_t value = *(uint32_t *)pr_val; 326 int err; 327 328 /* 329 * We need to enter this iptun_t since we'll be modifying the outer 330 * header. 331 */ 332 if ((err = iptun_enter(iptun)) != 0) 333 return (err); 334 335 switch (pr_num) { 336 case MAC_PROP_IPTUN_HOPLIMIT: 337 if (value < IPTUN_MIN_HOPLIMIT || value > IPTUN_MAX_HOPLIMIT) { 338 err = EINVAL; 339 break; 340 } 341 if (value != iptun->iptun_hoplimit) { 342 iptun->iptun_hoplimit = (uint8_t)value; 343 iptun_headergen(iptun, B_TRUE); 344 } 345 break; 346 case MAC_PROP_IPTUN_ENCAPLIMIT: 347 if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_IPV6 || 348 value > IPTUN_MAX_ENCAPLIMIT) { 349 err = EINVAL; 350 break; 351 } 352 if (value != iptun->iptun_encaplimit) { 353 iptun->iptun_encaplimit = (uint8_t)value; 354 iptun_headergen(iptun, B_TRUE); 355 } 356 break; 357 case MAC_PROP_MTU: { 358 uint32_t maxmtu = iptun_get_maxmtu(iptun, NULL, 0); 359 360 if (value < iptun->iptun_typeinfo->iti_minmtu || 361 value > maxmtu) { 362 err = EINVAL; 363 break; 364 } 365 iptun->iptun_flags |= IPTUN_FIXED_MTU; 366 if (value != iptun->iptun_mtu) { 367 iptun->iptun_mtu = value; 368 iptun_task_dispatch(iptun, IPTUN_TASK_MTU_UPDATE); 369 } 370 break; 371 } 372 default: 373 err = EINVAL; 374 } 375 iptun_exit(iptun); 376 return (err); 377 } 378 379 /* ARGSUSED */ 380 static int 381 iptun_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, 382 uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) 383 { 384 iptun_t *iptun = barg; 385 mac_propval_range_t range; 386 boolean_t is_default = (pr_flags & MAC_PROP_DEFAULT); 387 boolean_t is_possible = (pr_flags & MAC_PROP_POSSIBLE); 388 int err; 389 390 if ((err = iptun_enter(iptun)) != 0) 391 return (err); 392 393 if ((pr_flags & ~(MAC_PROP_DEFAULT | MAC_PROP_POSSIBLE)) != 0) { 394 err = ENOTSUP; 395 goto done; 396 } 397 if (is_default && is_possible) { 398 err = EINVAL; 399 goto done; 400 } 401 402 *perm = MAC_PROP_PERM_RW; 403 404 if (is_possible) { 405 if (pr_valsize < sizeof (mac_propval_range_t)) { 406 err = EINVAL; 407 goto done; 408 } 409 range.mpr_count = 1; 410 range.mpr_type = MAC_PROPVAL_UINT32; 411 } else if (pr_valsize < sizeof (uint32_t)) { 412 err = EINVAL; 413 goto done; 414 } 415 416 switch (pr_num) { 417 case MAC_PROP_IPTUN_HOPLIMIT: 418 if (is_possible) { 419 range.range_uint32[0].mpur_min = IPTUN_MIN_HOPLIMIT; 420 range.range_uint32[0].mpur_max = IPTUN_MAX_HOPLIMIT; 421 } else if (is_default) { 422 *(uint32_t *)pr_val = IPTUN_DEFAULT_HOPLIMIT; 423 } else { 424 *(uint32_t *)pr_val = iptun->iptun_hoplimit; 425 } 426 break; 427 case MAC_PROP_IPTUN_ENCAPLIMIT: 428 if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_IPV6) { 429 err = ENOTSUP; 430 goto done; 431 } 432 if (is_possible) { 433 range.range_uint32[0].mpur_min = IPTUN_MIN_ENCAPLIMIT; 434 range.range_uint32[0].mpur_max = IPTUN_MAX_ENCAPLIMIT; 435 } else if (is_default) { 436 *(uint32_t *)pr_val = IPTUN_DEFAULT_ENCAPLIMIT; 437 } else { 438 *(uint32_t *)pr_val = iptun->iptun_encaplimit; 439 } 440 break; 441 case MAC_PROP_MTU: { 442 uint32_t maxmtu = iptun_get_maxmtu(iptun, NULL, 0); 443 444 if (is_possible) { 445 range.range_uint32[0].mpur_min = 446 iptun->iptun_typeinfo->iti_minmtu; 447 range.range_uint32[0].mpur_max = maxmtu; 448 } else { 449 /* 450 * The MAC module knows the current value and should 451 * never call us for it. There is also no default 452 * MTU, as by default, it is a dynamic property. 453 */ 454 err = ENOTSUP; 455 goto done; 456 } 457 break; 458 } 459 default: 460 err = EINVAL; 461 goto done; 462 } 463 if (is_possible) 464 bcopy(&range, pr_val, sizeof (range)); 465 done: 466 iptun_exit(iptun); 467 return (err); 468 } 469 470 uint_t 471 iptun_count(void) 472 { 473 return (iptun_tunnelcount); 474 } 475 476 /* 477 * Enter an iptun_t exclusively. This is essentially just a mutex, but we 478 * don't allow iptun_enter() to succeed on a tunnel if it's in the process of 479 * being deleted. 480 */ 481 static int 482 iptun_enter(iptun_t *iptun) 483 { 484 mutex_enter(&iptun->iptun_lock); 485 while (iptun->iptun_flags & IPTUN_DELETE_PENDING) 486 cv_wait(&iptun->iptun_enter_cv, &iptun->iptun_lock); 487 if (iptun->iptun_flags & IPTUN_CONDEMNED) { 488 mutex_exit(&iptun->iptun_lock); 489 return (ENOENT); 490 } 491 return (0); 492 } 493 494 /* 495 * Exit the tunnel entered in iptun_enter(). 496 */ 497 static void 498 iptun_exit(iptun_t *iptun) 499 { 500 mutex_exit(&iptun->iptun_lock); 501 } 502 503 /* 504 * Enter the IP tunnel instance by datalink ID. 505 */ 506 static int 507 iptun_enter_by_linkid(datalink_id_t linkid, iptun_t **iptun) 508 { 509 int err; 510 511 mutex_enter(&iptun_hash_lock); 512 if (mod_hash_find(iptun_hash, IPTUN_HASH_KEY(linkid), 513 (mod_hash_val_t *)iptun) == 0) 514 err = iptun_enter(*iptun); 515 else 516 err = ENOENT; 517 if (err != 0) 518 *iptun = NULL; 519 mutex_exit(&iptun_hash_lock); 520 return (err); 521 } 522 523 /* 524 * Handle tasks that were deferred through the iptun_taskq because they require 525 * calling up to the mac module, and we can't call up to the mac module while 526 * holding locks. 527 * 528 * This is tricky to get right without introducing race conditions and 529 * deadlocks with the mac module, as we cannot issue an upcall while in the 530 * iptun_t. The reason is that upcalls may try and enter the mac perimeter, 531 * while iptun callbacks (such as iptun_m_setprop()) called from the mac 532 * module will already have the perimeter held, and will then try and enter 533 * the iptun_t. You can see the lock ordering problem with this; this will 534 * deadlock. 535 * 536 * The safe way to do this is to enter the iptun_t in question and copy the 537 * information we need out of it so that we can exit it and know that the 538 * information being passed up to the upcalls won't be subject to modification 539 * by other threads. The problem now is that we need to exit it prior to 540 * issuing the upcall, but once we do this, a thread could come along and 541 * delete the iptun_t and thus the mac handle required to issue the upcall. 542 * To prevent this, we set the IPTUN_UPCALL_PENDING flag prior to exiting the 543 * iptun_t. This flag is the condition associated with iptun_upcall_cv, which 544 * iptun_delete() will cv_wait() on. When the upcall completes, we clear 545 * IPTUN_UPCALL_PENDING and cv_signal() any potentially waiting 546 * iptun_delete(). We can thus still safely use iptun->iptun_mh after having 547 * exited the iptun_t. 548 */ 549 static void 550 iptun_task_cb(void *arg) 551 { 552 iptun_task_data_t *itd = arg; 553 iptun_task_t task = itd->itd_task; 554 datalink_id_t linkid = itd->itd_linkid; 555 iptun_t *iptun; 556 uint32_t mtu; 557 iptun_addr_t addr; 558 link_state_t linkstate; 559 size_t header_size; 560 iptun_header_t header; 561 562 kmem_free(itd, sizeof (*itd)); 563 564 /* 565 * Note that if the lookup fails, it's because the tunnel was deleted 566 * between the time the task was dispatched and now. That isn't an 567 * error. 568 */ 569 if (iptun_enter_by_linkid(linkid, &iptun) != 0) 570 return; 571 572 iptun->iptun_flags |= IPTUN_UPCALL_PENDING; 573 574 switch (task) { 575 case IPTUN_TASK_MTU_UPDATE: 576 mtu = iptun->iptun_mtu; 577 break; 578 case IPTUN_TASK_LADDR_UPDATE: 579 addr = iptun->iptun_laddr; 580 break; 581 case IPTUN_TASK_RADDR_UPDATE: 582 addr = iptun->iptun_raddr; 583 break; 584 case IPTUN_TASK_LINK_UPDATE: 585 linkstate = IS_IPTUN_RUNNING(iptun) ? 586 LINK_STATE_UP : LINK_STATE_DOWN; 587 break; 588 case IPTUN_TASK_PDATA_UPDATE: 589 header_size = iptun->iptun_header_size; 590 header = iptun->iptun_header; 591 break; 592 default: 593 ASSERT(0); 594 } 595 596 iptun_exit(iptun); 597 598 switch (task) { 599 case IPTUN_TASK_MTU_UPDATE: 600 (void) mac_maxsdu_update(iptun->iptun_mh, mtu); 601 break; 602 case IPTUN_TASK_LADDR_UPDATE: 603 mac_unicst_update(iptun->iptun_mh, (uint8_t *)&addr.ia_addr); 604 break; 605 case IPTUN_TASK_RADDR_UPDATE: 606 mac_dst_update(iptun->iptun_mh, (uint8_t *)&addr.ia_addr); 607 break; 608 case IPTUN_TASK_LINK_UPDATE: 609 mac_link_update(iptun->iptun_mh, linkstate); 610 break; 611 case IPTUN_TASK_PDATA_UPDATE: 612 if (mac_pdata_update(iptun->iptun_mh, 613 header_size == 0 ? NULL : &header, header_size) != 0) 614 atomic_inc_64(&iptun->iptun_taskq_fail); 615 break; 616 } 617 618 mutex_enter(&iptun->iptun_lock); 619 iptun->iptun_flags &= ~IPTUN_UPCALL_PENDING; 620 cv_signal(&iptun->iptun_upcall_cv); 621 mutex_exit(&iptun->iptun_lock); 622 } 623 624 static void 625 iptun_task_dispatch(iptun_t *iptun, iptun_task_t iptun_task) 626 { 627 iptun_task_data_t *itd; 628 629 itd = kmem_alloc(sizeof (*itd), KM_NOSLEEP); 630 if (itd == NULL) { 631 atomic_inc_64(&iptun->iptun_taskq_fail); 632 return; 633 } 634 itd->itd_task = iptun_task; 635 itd->itd_linkid = iptun->iptun_linkid; 636 if (ddi_taskq_dispatch(iptun_taskq, iptun_task_cb, itd, DDI_NOSLEEP)) { 637 atomic_inc_64(&iptun->iptun_taskq_fail); 638 kmem_free(itd, sizeof (*itd)); 639 } 640 } 641 642 /* 643 * Convert an iptun_addr_t to sockaddr_storage. 644 */ 645 static void 646 iptun_getaddr(iptun_addr_t *iptun_addr, struct sockaddr_storage *ss) 647 { 648 struct sockaddr_in *sin; 649 struct sockaddr_in6 *sin6; 650 651 bzero(ss, sizeof (*ss)); 652 switch (iptun_addr->ia_family) { 653 case AF_INET: 654 sin = (struct sockaddr_in *)ss; 655 sin->sin_addr.s_addr = iptun_addr->ia_addr.iau_addr4; 656 break; 657 case AF_INET6: 658 sin6 = (struct sockaddr_in6 *)ss; 659 sin6->sin6_addr = iptun_addr->ia_addr.iau_addr6; 660 break; 661 default: 662 ASSERT(0); 663 } 664 ss->ss_family = iptun_addr->ia_family; 665 } 666 667 /* 668 * General purpose function to set an IP tunnel source or destination address. 669 */ 670 static int 671 iptun_setaddr(iptun_type_t iptun_type, iptun_addr_t *iptun_addr, 672 const struct sockaddr_storage *ss) 673 { 674 if (!IPTUN_ADDR_MATCH(iptun_type, ss->ss_family)) 675 return (EINVAL); 676 677 switch (ss->ss_family) { 678 case AF_INET: { 679 struct sockaddr_in *sin = (struct sockaddr_in *)ss; 680 681 if ((sin->sin_addr.s_addr == INADDR_ANY) || 682 (sin->sin_addr.s_addr == INADDR_BROADCAST) || 683 CLASSD(sin->sin_addr.s_addr)) { 684 return (EADDRNOTAVAIL); 685 } 686 iptun_addr->ia_addr.iau_addr4 = sin->sin_addr.s_addr; 687 break; 688 } 689 case AF_INET6: { 690 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)ss; 691 692 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) || 693 IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) || 694 IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 695 return (EADDRNOTAVAIL); 696 } 697 iptun_addr->ia_addr.iau_addr6 = sin6->sin6_addr; 698 break; 699 } 700 default: 701 return (EAFNOSUPPORT); 702 } 703 iptun_addr->ia_family = ss->ss_family; 704 return (0); 705 } 706 707 static int 708 iptun_setladdr(iptun_t *iptun, const struct sockaddr_storage *laddr) 709 { 710 return (iptun_setaddr(iptun->iptun_typeinfo->iti_type, 711 &iptun->iptun_laddr, laddr)); 712 } 713 714 static int 715 iptun_setraddr(iptun_t *iptun, const struct sockaddr_storage *raddr) 716 { 717 if (!(iptun->iptun_typeinfo->iti_hasraddr)) 718 return (EINVAL); 719 return (iptun_setaddr(iptun->iptun_typeinfo->iti_type, 720 &iptun->iptun_raddr, raddr)); 721 } 722 723 static boolean_t 724 iptun_canbind(iptun_t *iptun) 725 { 726 /* 727 * A tunnel may bind when its source address has been set, and if its 728 * tunnel type requires one, also its destination address. 729 */ 730 return ((iptun->iptun_flags & IPTUN_LADDR) && 731 ((iptun->iptun_flags & IPTUN_RADDR) || 732 !(iptun->iptun_typeinfo->iti_hasraddr))); 733 } 734 735 /* 736 * Verify that the local address is valid, and insert in the fanout 737 */ 738 static int 739 iptun_bind(iptun_t *iptun) 740 { 741 conn_t *connp = iptun->iptun_connp; 742 int error = 0; 743 ip_xmit_attr_t *ixa; 744 iulp_t uinfo; 745 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 746 747 /* Get an exclusive ixa for this thread, and replace conn_ixa */ 748 ixa = conn_get_ixa(connp, B_TRUE); 749 if (ixa == NULL) 750 return (ENOMEM); 751 ASSERT(ixa->ixa_refcnt >= 2); 752 ASSERT(ixa == connp->conn_ixa); 753 754 /* We create PMTU state including for 6to4 */ 755 ixa->ixa_flags |= IXAF_PMTU_DISCOVERY; 756 757 ASSERT(iptun_canbind(iptun)); 758 759 mutex_enter(&connp->conn_lock); 760 /* 761 * Note that conn_proto can't be set since the upper protocol 762 * can be both 41 and 4 when IPv6 and IPv4 are over the same tunnel. 763 * ipcl_iptun_classify doesn't use conn_proto. 764 */ 765 connp->conn_ipversion = iptun->iptun_typeinfo->iti_ipvers; 766 767 switch (iptun->iptun_typeinfo->iti_type) { 768 case IPTUN_TYPE_IPV4: 769 IN6_IPADDR_TO_V4MAPPED(iptun->iptun_laddr4, 770 &connp->conn_laddr_v6); 771 IN6_IPADDR_TO_V4MAPPED(iptun->iptun_raddr4, 772 &connp->conn_faddr_v6); 773 ixa->ixa_flags |= IXAF_IS_IPV4; 774 if (ip_laddr_verify_v4(iptun->iptun_laddr4, IPCL_ZONEID(connp), 775 ipst, B_FALSE) != IPVL_UNICAST_UP) { 776 mutex_exit(&connp->conn_lock); 777 error = EADDRNOTAVAIL; 778 goto done; 779 } 780 break; 781 case IPTUN_TYPE_IPV6: 782 connp->conn_laddr_v6 = iptun->iptun_laddr6; 783 connp->conn_faddr_v6 = iptun->iptun_raddr6; 784 ixa->ixa_flags &= ~IXAF_IS_IPV4; 785 /* We use a zero scopeid for now */ 786 if (ip_laddr_verify_v6(&iptun->iptun_laddr6, IPCL_ZONEID(connp), 787 ipst, B_FALSE, 0) != IPVL_UNICAST_UP) { 788 mutex_exit(&connp->conn_lock); 789 error = EADDRNOTAVAIL; 790 goto done; 791 } 792 break; 793 case IPTUN_TYPE_6TO4: 794 IN6_IPADDR_TO_V4MAPPED(iptun->iptun_laddr4, 795 &connp->conn_laddr_v6); 796 IN6_IPADDR_TO_V4MAPPED(INADDR_ANY, &connp->conn_faddr_v6); 797 ixa->ixa_flags |= IXAF_IS_IPV4; 798 mutex_exit(&connp->conn_lock); 799 800 switch (ip_laddr_verify_v4(iptun->iptun_laddr4, 801 IPCL_ZONEID(connp), ipst, B_FALSE)) { 802 case IPVL_UNICAST_UP: 803 case IPVL_UNICAST_DOWN: 804 break; 805 default: 806 error = EADDRNOTAVAIL; 807 goto done; 808 } 809 goto insert; 810 } 811 812 /* In case previous destination was multirt */ 813 ip_attr_newdst(ixa); 814 815 /* 816 * When we set a tunnel's destination address, we do not 817 * care if the destination is reachable. Transient routing 818 * issues should not inhibit the creation of a tunnel 819 * interface, for example. Thus we pass B_FALSE here. 820 */ 821 connp->conn_saddr_v6 = connp->conn_laddr_v6; 822 mutex_exit(&connp->conn_lock); 823 824 /* As long as the MTU is large we avoid fragmentation */ 825 ixa->ixa_flags |= IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF; 826 827 /* We handle IPsec in iptun_output_common */ 828 error = ip_attr_connect(connp, ixa, &connp->conn_saddr_v6, 829 &connp->conn_faddr_v6, &connp->conn_faddr_v6, 0, 830 &connp->conn_saddr_v6, &uinfo, 0); 831 832 if (error != 0) 833 goto done; 834 835 /* saddr shouldn't change since it was already set */ 836 ASSERT(IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6, 837 &connp->conn_saddr_v6)); 838 839 /* We set IXAF_VERIFY_PMTU to catch PMTU increases */ 840 ixa->ixa_flags |= IXAF_VERIFY_PMTU; 841 ASSERT(uinfo.iulp_mtu != 0); 842 843 /* 844 * Allow setting new policies. 845 * The addresses/ports are already set, thus the IPsec policy calls 846 * can handle their passed-in conn's. 847 */ 848 connp->conn_policy_cached = B_FALSE; 849 850 insert: 851 error = ipcl_conn_insert(connp); 852 if (error != 0) 853 goto done; 854 855 /* Record this as the "last" send even though we haven't sent any */ 856 connp->conn_v6lastdst = connp->conn_faddr_v6; 857 858 iptun->iptun_flags |= IPTUN_BOUND; 859 /* 860 * Now that we're bound with ip below us, this is a good 861 * time to initialize the destination path MTU and to 862 * re-calculate the tunnel's link MTU. 863 */ 864 (void) iptun_update_mtu(iptun, ixa, 0); 865 866 if (IS_IPTUN_RUNNING(iptun)) 867 iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE); 868 869 done: 870 ixa_refrele(ixa); 871 return (error); 872 } 873 874 static void 875 iptun_unbind(iptun_t *iptun) 876 { 877 ASSERT(iptun->iptun_flags & IPTUN_BOUND); 878 ASSERT(mutex_owned(&iptun->iptun_lock) || 879 (iptun->iptun_flags & IPTUN_CONDEMNED)); 880 ip_unbind(iptun->iptun_connp); 881 iptun->iptun_flags &= ~IPTUN_BOUND; 882 if (!(iptun->iptun_flags & IPTUN_CONDEMNED)) 883 iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE); 884 } 885 886 /* 887 * Re-generate the template data-link header for a given IP tunnel given the 888 * tunnel's current parameters. 889 */ 890 static void 891 iptun_headergen(iptun_t *iptun, boolean_t update_mac) 892 { 893 switch (iptun->iptun_typeinfo->iti_ipvers) { 894 case IPV4_VERSION: 895 /* 896 * We only need to use a custom IP header if the administrator 897 * has supplied a non-default hoplimit. 898 */ 899 if (iptun->iptun_hoplimit == IPTUN_DEFAULT_HOPLIMIT) { 900 iptun->iptun_header_size = 0; 901 break; 902 } 903 iptun->iptun_header_size = sizeof (ipha_t); 904 iptun->iptun_header4.ipha_version_and_hdr_length = 905 IP_SIMPLE_HDR_VERSION; 906 iptun->iptun_header4.ipha_fragment_offset_and_flags = 907 htons(IPH_DF); 908 iptun->iptun_header4.ipha_ttl = iptun->iptun_hoplimit; 909 break; 910 case IPV6_VERSION: { 911 ip6_t *ip6hp = &iptun->iptun_header6.it6h_ip6h; 912 913 /* 914 * We only need to use a custom IPv6 header if either the 915 * administrator has supplied a non-default hoplimit, or we 916 * need to include an encapsulation limit option in the outer 917 * header. 918 */ 919 if (iptun->iptun_hoplimit == IPTUN_DEFAULT_HOPLIMIT && 920 iptun->iptun_encaplimit == 0) { 921 iptun->iptun_header_size = 0; 922 break; 923 } 924 925 (void) memset(ip6hp, 0, sizeof (*ip6hp)); 926 if (iptun->iptun_encaplimit == 0) { 927 iptun->iptun_header_size = sizeof (ip6_t); 928 ip6hp->ip6_nxt = IPPROTO_NONE; 929 } else { 930 iptun_encaplim_t *iel; 931 932 iptun->iptun_header_size = sizeof (iptun_ipv6hdrs_t); 933 /* 934 * The mac_ipv6 plugin requires ip6_plen to be in host 935 * byte order and reflect the extension headers 936 * present in the template. The actual network byte 937 * order ip6_plen will be set on a per-packet basis on 938 * transmit. 939 */ 940 ip6hp->ip6_plen = sizeof (*iel); 941 ip6hp->ip6_nxt = IPPROTO_DSTOPTS; 942 iel = &iptun->iptun_header6.it6h_encaplim; 943 *iel = iptun_encaplim_init; 944 iel->iel_telopt.ip6ot_encap_limit = 945 iptun->iptun_encaplimit; 946 } 947 948 ip6hp->ip6_hlim = iptun->iptun_hoplimit; 949 break; 950 } 951 } 952 953 if (update_mac) 954 iptun_task_dispatch(iptun, IPTUN_TASK_PDATA_UPDATE); 955 } 956 957 /* 958 * Insert inbound and outbound IPv4 and IPv6 policy into the given policy 959 * head. 960 */ 961 static boolean_t 962 iptun_insert_simple_policies(ipsec_policy_head_t *ph, ipsec_act_t *actp, 963 uint_t n, netstack_t *ns) 964 { 965 int f = IPSEC_AF_V4; 966 967 if (!ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_INBOUND, ns) || 968 !ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_OUTBOUND, ns)) 969 return (B_FALSE); 970 971 f = IPSEC_AF_V6; 972 return (ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_INBOUND, ns) && 973 ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_OUTBOUND, ns)); 974 } 975 976 /* 977 * Used to set IPsec policy when policy is set through the IPTUN_CREATE or 978 * IPTUN_MODIFY ioctls. 979 */ 980 static int 981 iptun_set_sec_simple(iptun_t *iptun, const ipsec_req_t *ipsr) 982 { 983 int rc = 0; 984 uint_t nact; 985 ipsec_act_t *actp = NULL; 986 boolean_t clear_all, old_policy = B_FALSE; 987 ipsec_tun_pol_t *itp; 988 char name[MAXLINKNAMELEN]; 989 uint64_t gen; 990 netstack_t *ns = iptun->iptun_ns; 991 992 /* Can't specify self-encap on a tunnel. */ 993 if (ipsr->ipsr_self_encap_req != 0) 994 return (EINVAL); 995 996 /* 997 * If it's a "clear-all" entry, unset the security flags and resume 998 * normal cleartext (or inherit-from-global) policy. 999 */ 1000 clear_all = ((ipsr->ipsr_ah_req & IPTUN_IPSEC_REQ_MASK) == 0 && 1001 (ipsr->ipsr_esp_req & IPTUN_IPSEC_REQ_MASK) == 0); 1002 1003 ASSERT(mutex_owned(&iptun->iptun_lock)); 1004 itp = iptun->iptun_itp; 1005 if (itp == NULL) { 1006 if (clear_all) 1007 goto bail; 1008 if ((rc = dls_mgmt_get_linkinfo(iptun->iptun_linkid, name, NULL, 1009 NULL, NULL)) != 0) 1010 goto bail; 1011 ASSERT(name[0] != '\0'); 1012 if ((itp = create_tunnel_policy(name, &rc, &gen, ns)) == NULL) 1013 goto bail; 1014 iptun->iptun_itp = itp; 1015 } 1016 1017 /* Allocate the actvec now, before holding itp or polhead locks. */ 1018 ipsec_actvec_from_req(ipsr, &actp, &nact, ns); 1019 if (actp == NULL) { 1020 rc = ENOMEM; 1021 goto bail; 1022 } 1023 1024 /* 1025 * Just write on the active polhead. Save the primary/secondary stuff 1026 * for spdsock operations. 1027 * 1028 * Mutex because we need to write to the polhead AND flags atomically. 1029 * Other threads will acquire the polhead lock as a reader if the 1030 * (unprotected) flag is set. 1031 */ 1032 mutex_enter(&itp->itp_lock); 1033 if (itp->itp_flags & ITPF_P_TUNNEL) { 1034 /* Oops, we lost a race. Let's get out of here. */ 1035 rc = EBUSY; 1036 goto mutex_bail; 1037 } 1038 old_policy = ((itp->itp_flags & ITPF_P_ACTIVE) != 0); 1039 1040 if (old_policy) { 1041 ITPF_CLONE(itp->itp_flags); 1042 rc = ipsec_copy_polhead(itp->itp_policy, itp->itp_inactive, ns); 1043 if (rc != 0) { 1044 /* inactive has already been cleared. */ 1045 itp->itp_flags &= ~ITPF_IFLAGS; 1046 goto mutex_bail; 1047 } 1048 rw_enter(&itp->itp_policy->iph_lock, RW_WRITER); 1049 ipsec_polhead_flush(itp->itp_policy, ns); 1050 } else { 1051 /* Else assume itp->itp_policy is already flushed. */ 1052 rw_enter(&itp->itp_policy->iph_lock, RW_WRITER); 1053 } 1054 1055 if (clear_all) { 1056 ASSERT(avl_numnodes(&itp->itp_policy->iph_rulebyid) == 0); 1057 itp->itp_flags &= ~ITPF_PFLAGS; 1058 rw_exit(&itp->itp_policy->iph_lock); 1059 old_policy = B_FALSE; /* Clear out the inactive one too. */ 1060 goto recover_bail; 1061 } 1062 1063 if (iptun_insert_simple_policies(itp->itp_policy, actp, nact, ns)) { 1064 rw_exit(&itp->itp_policy->iph_lock); 1065 /* 1066 * Adjust MTU and make sure the DL side knows what's up. 1067 */ 1068 itp->itp_flags = ITPF_P_ACTIVE; 1069 (void) iptun_update_mtu(iptun, NULL, 0); 1070 old_policy = B_FALSE; /* Blank out inactive - we succeeded */ 1071 } else { 1072 rw_exit(&itp->itp_policy->iph_lock); 1073 rc = ENOMEM; 1074 } 1075 1076 recover_bail: 1077 if (old_policy) { 1078 /* Recover policy in in active polhead. */ 1079 ipsec_swap_policy(itp->itp_policy, itp->itp_inactive, ns); 1080 ITPF_SWAP(itp->itp_flags); 1081 } 1082 1083 /* Clear policy in inactive polhead. */ 1084 itp->itp_flags &= ~ITPF_IFLAGS; 1085 rw_enter(&itp->itp_inactive->iph_lock, RW_WRITER); 1086 ipsec_polhead_flush(itp->itp_inactive, ns); 1087 rw_exit(&itp->itp_inactive->iph_lock); 1088 1089 mutex_bail: 1090 mutex_exit(&itp->itp_lock); 1091 1092 bail: 1093 if (actp != NULL) 1094 ipsec_actvec_free(actp, nact); 1095 1096 return (rc); 1097 } 1098 1099 static iptun_typeinfo_t * 1100 iptun_gettypeinfo(iptun_type_t type) 1101 { 1102 int i; 1103 1104 for (i = 0; iptun_type_table[i].iti_type != IPTUN_TYPE_UNKNOWN; i++) { 1105 if (iptun_type_table[i].iti_type == type) 1106 break; 1107 } 1108 return (&iptun_type_table[i]); 1109 } 1110 1111 /* 1112 * Set the parameters included in ik on the tunnel iptun. Parameters that can 1113 * only be set at creation time are set in iptun_create(). 1114 */ 1115 static int 1116 iptun_setparams(iptun_t *iptun, const iptun_kparams_t *ik) 1117 { 1118 int err = 0; 1119 netstack_t *ns = iptun->iptun_ns; 1120 iptun_addr_t orig_laddr, orig_raddr; 1121 uint_t orig_flags = iptun->iptun_flags; 1122 1123 if (ik->iptun_kparam_flags & IPTUN_KPARAM_LADDR) { 1124 if (orig_flags & IPTUN_LADDR) 1125 orig_laddr = iptun->iptun_laddr; 1126 if ((err = iptun_setladdr(iptun, &ik->iptun_kparam_laddr)) != 0) 1127 return (err); 1128 iptun->iptun_flags |= IPTUN_LADDR; 1129 } 1130 1131 if (ik->iptun_kparam_flags & IPTUN_KPARAM_RADDR) { 1132 if (orig_flags & IPTUN_RADDR) 1133 orig_raddr = iptun->iptun_raddr; 1134 if ((err = iptun_setraddr(iptun, &ik->iptun_kparam_raddr)) != 0) 1135 goto done; 1136 iptun->iptun_flags |= IPTUN_RADDR; 1137 } 1138 1139 if (ik->iptun_kparam_flags & IPTUN_KPARAM_SECINFO) { 1140 /* 1141 * Set IPsec policy originating from the ifconfig(1M) command 1142 * line. This is traditionally called "simple" policy because 1143 * the ipsec_req_t (iptun_kparam_secinfo) can only describe a 1144 * simple policy of "do ESP on everything" and/or "do AH on 1145 * everything" (as opposed to the rich policy that can be 1146 * defined with ipsecconf(1M)). 1147 */ 1148 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_6TO4) { 1149 /* 1150 * Can't set security properties for automatic 1151 * tunnels. 1152 */ 1153 err = EINVAL; 1154 goto done; 1155 } 1156 1157 if (!ipsec_loaded(ns->netstack_ipsec)) { 1158 /* If IPsec can be loaded, try and load it now. */ 1159 if (ipsec_failed(ns->netstack_ipsec)) { 1160 err = EPROTONOSUPPORT; 1161 goto done; 1162 } 1163 ipsec_loader_loadnow(ns->netstack_ipsec); 1164 /* 1165 * ipsec_loader_loadnow() returns while IPsec is 1166 * loaded asynchronously. While a method exists to 1167 * wait for IPsec to load (ipsec_loader_wait()), it 1168 * requires use of a STREAMS queue to do a qwait(). 1169 * We're not in STREAMS context here, and so we can't 1170 * use it. This is not a problem in practice because 1171 * in the vast majority of cases, key management and 1172 * global policy will have loaded before any tunnels 1173 * are plumbed, and so IPsec will already have been 1174 * loaded. 1175 */ 1176 err = EAGAIN; 1177 goto done; 1178 } 1179 1180 err = iptun_set_sec_simple(iptun, &ik->iptun_kparam_secinfo); 1181 if (err == 0) { 1182 iptun->iptun_flags |= IPTUN_SIMPLE_POLICY; 1183 iptun->iptun_simple_policy = ik->iptun_kparam_secinfo; 1184 } 1185 } 1186 done: 1187 if (err != 0) { 1188 /* Restore original source and destination. */ 1189 if (ik->iptun_kparam_flags & IPTUN_KPARAM_LADDR && 1190 (orig_flags & IPTUN_LADDR)) 1191 iptun->iptun_laddr = orig_laddr; 1192 if ((ik->iptun_kparam_flags & IPTUN_KPARAM_RADDR) && 1193 (orig_flags & IPTUN_RADDR)) 1194 iptun->iptun_raddr = orig_raddr; 1195 iptun->iptun_flags = orig_flags; 1196 } 1197 return (err); 1198 } 1199 1200 static int 1201 iptun_register(iptun_t *iptun) 1202 { 1203 mac_register_t *mac; 1204 int err; 1205 1206 ASSERT(!(iptun->iptun_flags & IPTUN_MAC_REGISTERED)); 1207 1208 if ((mac = mac_alloc(MAC_VERSION)) == NULL) 1209 return (EINVAL); 1210 1211 mac->m_type_ident = iptun->iptun_typeinfo->iti_ident; 1212 mac->m_driver = iptun; 1213 mac->m_dip = iptun_dip; 1214 mac->m_instance = (uint_t)-1; 1215 mac->m_src_addr = (uint8_t *)&iptun->iptun_laddr.ia_addr; 1216 mac->m_dst_addr = iptun->iptun_typeinfo->iti_hasraddr ? 1217 (uint8_t *)&iptun->iptun_raddr.ia_addr : NULL; 1218 mac->m_callbacks = &iptun_m_callbacks; 1219 mac->m_min_sdu = iptun->iptun_typeinfo->iti_minmtu; 1220 mac->m_max_sdu = iptun->iptun_mtu; 1221 if (iptun->iptun_header_size != 0) { 1222 mac->m_pdata = &iptun->iptun_header; 1223 mac->m_pdata_size = iptun->iptun_header_size; 1224 } 1225 if ((err = mac_register(mac, &iptun->iptun_mh)) == 0) 1226 iptun->iptun_flags |= IPTUN_MAC_REGISTERED; 1227 mac_free(mac); 1228 return (err); 1229 } 1230 1231 static int 1232 iptun_unregister(iptun_t *iptun) 1233 { 1234 int err; 1235 1236 ASSERT(iptun->iptun_flags & IPTUN_MAC_REGISTERED); 1237 if ((err = mac_unregister(iptun->iptun_mh)) == 0) 1238 iptun->iptun_flags &= ~IPTUN_MAC_REGISTERED; 1239 return (err); 1240 } 1241 1242 static conn_t * 1243 iptun_conn_create(iptun_t *iptun, netstack_t *ns, cred_t *credp) 1244 { 1245 conn_t *connp; 1246 1247 if ((connp = ipcl_conn_create(IPCL_IPCCONN, KM_NOSLEEP, ns)) == NULL) 1248 return (NULL); 1249 1250 connp->conn_flags |= IPCL_IPTUN; 1251 connp->conn_iptun = iptun; 1252 connp->conn_recv = iptun_input; 1253 connp->conn_recvicmp = iptun_input_icmp; 1254 connp->conn_verifyicmp = iptun_verifyicmp; 1255 1256 /* 1257 * Register iptun_notify to listen to capability changes detected by IP. 1258 * This upcall is made in the context of the call to conn_ip_output. 1259 */ 1260 connp->conn_ixa->ixa_notify = iptun_notify; 1261 connp->conn_ixa->ixa_notify_cookie = iptun; 1262 1263 /* 1264 * For exclusive stacks we set conn_zoneid to GLOBAL_ZONEID as is done 1265 * for all other conn_t's. 1266 * 1267 * Note that there's an important distinction between iptun_zoneid and 1268 * conn_zoneid. The conn_zoneid is set to GLOBAL_ZONEID in non-global 1269 * exclusive stack zones to make the ip module believe that the 1270 * non-global zone is actually a global zone. Therefore, when 1271 * interacting with the ip module, we must always use conn_zoneid. 1272 */ 1273 connp->conn_zoneid = (ns->netstack_stackid == GLOBAL_NETSTACKID) ? 1274 crgetzoneid(credp) : GLOBAL_ZONEID; 1275 connp->conn_cred = credp; 1276 /* crfree() is done in ipcl_conn_destroy(), called by CONN_DEC_REF() */ 1277 crhold(connp->conn_cred); 1278 connp->conn_cpid = NOPID; 1279 1280 /* conn_allzones can not be set this early, hence no IPCL_ZONEID */ 1281 connp->conn_ixa->ixa_zoneid = connp->conn_zoneid; 1282 ASSERT(connp->conn_ref == 1); 1283 1284 /* Cache things in ixa without an extra refhold */ 1285 ASSERT(!(connp->conn_ixa->ixa_free_flags & IXA_FREE_CRED)); 1286 connp->conn_ixa->ixa_cred = connp->conn_cred; 1287 connp->conn_ixa->ixa_cpid = connp->conn_cpid; 1288 if (is_system_labeled()) 1289 connp->conn_ixa->ixa_tsl = crgetlabel(connp->conn_cred); 1290 1291 /* 1292 * Have conn_ip_output drop packets should our outer source 1293 * go invalid 1294 */ 1295 connp->conn_ixa->ixa_flags |= IXAF_VERIFY_SOURCE; 1296 1297 switch (iptun->iptun_typeinfo->iti_ipvers) { 1298 case IPV4_VERSION: 1299 connp->conn_family = AF_INET6; 1300 break; 1301 case IPV6_VERSION: 1302 connp->conn_family = AF_INET; 1303 break; 1304 } 1305 mutex_enter(&connp->conn_lock); 1306 connp->conn_state_flags &= ~CONN_INCIPIENT; 1307 mutex_exit(&connp->conn_lock); 1308 return (connp); 1309 } 1310 1311 static void 1312 iptun_conn_destroy(conn_t *connp) 1313 { 1314 ip_quiesce_conn(connp); 1315 connp->conn_iptun = NULL; 1316 ASSERT(connp->conn_ref == 1); 1317 CONN_DEC_REF(connp); 1318 } 1319 1320 static iptun_t * 1321 iptun_alloc(void) 1322 { 1323 iptun_t *iptun; 1324 1325 if ((iptun = kmem_cache_alloc(iptun_cache, KM_NOSLEEP)) != NULL) { 1326 bzero(iptun, sizeof (*iptun)); 1327 atomic_inc_32(&iptun_tunnelcount); 1328 } 1329 return (iptun); 1330 } 1331 1332 static void 1333 iptun_free(iptun_t *iptun) 1334 { 1335 ASSERT(iptun->iptun_flags & IPTUN_CONDEMNED); 1336 1337 if (iptun->iptun_flags & IPTUN_HASH_INSERTED) { 1338 iptun_stack_t *iptuns = iptun->iptun_iptuns; 1339 1340 mutex_enter(&iptun_hash_lock); 1341 VERIFY(mod_hash_remove(iptun_hash, 1342 IPTUN_HASH_KEY(iptun->iptun_linkid), 1343 (mod_hash_val_t *)&iptun) == 0); 1344 mutex_exit(&iptun_hash_lock); 1345 iptun->iptun_flags &= ~IPTUN_HASH_INSERTED; 1346 mutex_enter(&iptuns->iptuns_lock); 1347 list_remove(&iptuns->iptuns_iptunlist, iptun); 1348 mutex_exit(&iptuns->iptuns_lock); 1349 } 1350 1351 if (iptun->iptun_flags & IPTUN_BOUND) 1352 iptun_unbind(iptun); 1353 1354 /* 1355 * After iptun_unregister(), there will be no threads executing a 1356 * downcall from the mac module, including in the tx datapath. 1357 */ 1358 if (iptun->iptun_flags & IPTUN_MAC_REGISTERED) 1359 VERIFY(iptun_unregister(iptun) == 0); 1360 1361 if (iptun->iptun_itp != NULL) { 1362 /* 1363 * Remove from the AVL tree, AND release the reference iptun_t 1364 * itself holds on the ITP. 1365 */ 1366 itp_unlink(iptun->iptun_itp, iptun->iptun_ns); 1367 ITP_REFRELE(iptun->iptun_itp, iptun->iptun_ns); 1368 iptun->iptun_itp = NULL; 1369 iptun->iptun_flags &= ~IPTUN_SIMPLE_POLICY; 1370 } 1371 1372 /* 1373 * After ipcl_conn_destroy(), there will be no threads executing an 1374 * upcall from ip (i.e., iptun_input()), and it is then safe to free 1375 * the iptun_t. 1376 */ 1377 if (iptun->iptun_connp != NULL) { 1378 iptun_conn_destroy(iptun->iptun_connp); 1379 iptun->iptun_connp = NULL; 1380 } 1381 1382 kmem_cache_free(iptun_cache, iptun); 1383 atomic_dec_32(&iptun_tunnelcount); 1384 } 1385 1386 int 1387 iptun_create(iptun_kparams_t *ik, cred_t *credp) 1388 { 1389 iptun_t *iptun = NULL; 1390 int err = 0, mherr; 1391 char linkname[MAXLINKNAMELEN]; 1392 ipsec_tun_pol_t *itp; 1393 netstack_t *ns = NULL; 1394 iptun_stack_t *iptuns; 1395 datalink_id_t tmpid; 1396 zoneid_t zoneid = crgetzoneid(credp); 1397 boolean_t link_created = B_FALSE; 1398 1399 /* The tunnel type is mandatory */ 1400 if (!(ik->iptun_kparam_flags & IPTUN_KPARAM_TYPE)) 1401 return (EINVAL); 1402 1403 /* 1404 * Is the linkid that the caller wishes to associate with this new 1405 * tunnel assigned to this zone? 1406 */ 1407 if (zone_check_datalink(&zoneid, ik->iptun_kparam_linkid) != 0) { 1408 if (zoneid != GLOBAL_ZONEID) 1409 return (EINVAL); 1410 } else if (zoneid == GLOBAL_ZONEID) { 1411 return (EINVAL); 1412 } 1413 1414 /* 1415 * Make sure that we're not trying to create a tunnel that has already 1416 * been created. 1417 */ 1418 if (iptun_enter_by_linkid(ik->iptun_kparam_linkid, &iptun) == 0) { 1419 iptun_exit(iptun); 1420 iptun = NULL; 1421 err = EEXIST; 1422 goto done; 1423 } 1424 1425 ns = netstack_find_by_cred(credp); 1426 iptuns = ns->netstack_iptun; 1427 1428 if ((iptun = iptun_alloc()) == NULL) { 1429 err = ENOMEM; 1430 goto done; 1431 } 1432 1433 iptun->iptun_linkid = ik->iptun_kparam_linkid; 1434 iptun->iptun_zoneid = zoneid; 1435 iptun->iptun_ns = ns; 1436 1437 iptun->iptun_typeinfo = iptun_gettypeinfo(ik->iptun_kparam_type); 1438 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_UNKNOWN) { 1439 err = EINVAL; 1440 goto done; 1441 } 1442 1443 if (ik->iptun_kparam_flags & IPTUN_KPARAM_IMPLICIT) 1444 iptun->iptun_flags |= IPTUN_IMPLICIT; 1445 1446 if ((err = iptun_setparams(iptun, ik)) != 0) 1447 goto done; 1448 1449 iptun->iptun_hoplimit = IPTUN_DEFAULT_HOPLIMIT; 1450 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_IPV6) 1451 iptun->iptun_encaplimit = IPTUN_DEFAULT_ENCAPLIMIT; 1452 1453 iptun_headergen(iptun, B_FALSE); 1454 1455 iptun->iptun_connp = iptun_conn_create(iptun, ns, credp); 1456 if (iptun->iptun_connp == NULL) { 1457 err = ENOMEM; 1458 goto done; 1459 } 1460 1461 iptun->iptun_mtu = iptun->iptun_typeinfo->iti_maxmtu; 1462 iptun->iptun_dpmtu = iptun->iptun_mtu; 1463 1464 /* 1465 * Find an ITP based on linkname. If we have parms already set via 1466 * the iptun_setparams() call above, it may have created an ITP for 1467 * us. We always try get_tunnel_policy() for DEBUG correctness 1468 * checks, and we may wish to refactor this to only check when 1469 * iptun_itp is NULL. 1470 */ 1471 if ((err = dls_mgmt_get_linkinfo(iptun->iptun_linkid, linkname, NULL, 1472 NULL, NULL)) != 0) 1473 goto done; 1474 if ((itp = get_tunnel_policy(linkname, ns)) != NULL) 1475 iptun->iptun_itp = itp; 1476 1477 /* 1478 * See if we have the necessary IP addresses assigned to this tunnel 1479 * to try and bind them with ip underneath us. If we're not ready to 1480 * bind yet, then we'll defer the bind operation until the addresses 1481 * are modified. 1482 */ 1483 if (iptun_canbind(iptun) && ((err = iptun_bind(iptun)) != 0)) 1484 goto done; 1485 1486 if ((err = iptun_register(iptun)) != 0) 1487 goto done; 1488 1489 err = dls_devnet_create(iptun->iptun_mh, iptun->iptun_linkid, 1490 iptun->iptun_zoneid); 1491 if (err != 0) 1492 goto done; 1493 link_created = B_TRUE; 1494 1495 /* 1496 * We hash by link-id as that is the key used by all other iptun 1497 * interfaces (modify, delete, etc.). 1498 */ 1499 if ((mherr = mod_hash_insert(iptun_hash, 1500 IPTUN_HASH_KEY(iptun->iptun_linkid), (mod_hash_val_t)iptun)) == 0) { 1501 mutex_enter(&iptuns->iptuns_lock); 1502 list_insert_head(&iptuns->iptuns_iptunlist, iptun); 1503 mutex_exit(&iptuns->iptuns_lock); 1504 iptun->iptun_flags |= IPTUN_HASH_INSERTED; 1505 } else if (mherr == MH_ERR_NOMEM) { 1506 err = ENOMEM; 1507 } else if (mherr == MH_ERR_DUPLICATE) { 1508 err = EEXIST; 1509 } else { 1510 err = EINVAL; 1511 } 1512 1513 done: 1514 if (iptun == NULL && ns != NULL) 1515 netstack_rele(ns); 1516 if (err != 0 && iptun != NULL) { 1517 if (link_created) { 1518 (void) dls_devnet_destroy(iptun->iptun_mh, &tmpid, 1519 B_TRUE); 1520 } 1521 iptun->iptun_flags |= IPTUN_CONDEMNED; 1522 iptun_free(iptun); 1523 } 1524 return (err); 1525 } 1526 1527 int 1528 iptun_delete(datalink_id_t linkid, cred_t *credp) 1529 { 1530 int err; 1531 iptun_t *iptun = NULL; 1532 1533 if ((err = iptun_enter_by_linkid(linkid, &iptun)) != 0) 1534 return (err); 1535 1536 /* One cannot delete a tunnel that belongs to another zone. */ 1537 if (iptun->iptun_zoneid != crgetzoneid(credp)) { 1538 iptun_exit(iptun); 1539 return (EACCES); 1540 } 1541 1542 /* 1543 * We need to exit iptun in order to issue calls up the stack such as 1544 * dls_devnet_destroy(). If we call up while still in iptun, deadlock 1545 * with calls coming down the stack is possible. We prevent other 1546 * threads from entering this iptun after we've exited it by setting 1547 * the IPTUN_DELETE_PENDING flag. This will cause callers of 1548 * iptun_enter() to block waiting on iptun_enter_cv. The assumption 1549 * here is that the functions we're calling while IPTUN_DELETE_PENDING 1550 * is set dont resuult in an iptun_enter() call, as that would result 1551 * in deadlock. 1552 */ 1553 iptun->iptun_flags |= IPTUN_DELETE_PENDING; 1554 1555 /* Wait for any pending upcall to the mac module to complete. */ 1556 while (iptun->iptun_flags & IPTUN_UPCALL_PENDING) 1557 cv_wait(&iptun->iptun_upcall_cv, &iptun->iptun_lock); 1558 1559 iptun_exit(iptun); 1560 1561 if ((err = dls_devnet_destroy(iptun->iptun_mh, &linkid, B_TRUE)) == 0) { 1562 /* 1563 * mac_disable() will fail with EBUSY if there are references 1564 * to the iptun MAC. If there are none, then mac_disable() 1565 * will assure that none can be acquired until the MAC is 1566 * unregistered. 1567 * 1568 * XXX CR 6791335 prevents us from calling mac_disable() prior 1569 * to dls_devnet_destroy(), so we unfortunately need to 1570 * attempt to re-create the devnet node if mac_disable() 1571 * fails. 1572 */ 1573 if ((err = mac_disable(iptun->iptun_mh)) != 0) { 1574 (void) dls_devnet_create(iptun->iptun_mh, linkid, 1575 iptun->iptun_zoneid); 1576 } 1577 } 1578 1579 /* 1580 * Now that we know the fate of this iptun_t, we need to clear 1581 * IPTUN_DELETE_PENDING, and set IPTUN_CONDEMNED if the iptun_t is 1582 * slated to be freed. Either way, we need to signal the threads 1583 * waiting in iptun_enter() so that they can either fail if 1584 * IPTUN_CONDEMNED is set, or continue if it's not. 1585 */ 1586 mutex_enter(&iptun->iptun_lock); 1587 iptun->iptun_flags &= ~IPTUN_DELETE_PENDING; 1588 if (err == 0) 1589 iptun->iptun_flags |= IPTUN_CONDEMNED; 1590 cv_broadcast(&iptun->iptun_enter_cv); 1591 mutex_exit(&iptun->iptun_lock); 1592 1593 /* 1594 * Note that there is no danger in calling iptun_free() after having 1595 * dropped the iptun_lock since callers of iptun_enter() at this point 1596 * are doing so from iptun_enter_by_linkid() (mac_disable() got rid of 1597 * threads entering from mac callbacks which call iptun_enter() 1598 * directly) which holds iptun_hash_lock, and iptun_free() grabs this 1599 * lock in order to remove the iptun_t from the hash table. 1600 */ 1601 if (err == 0) 1602 iptun_free(iptun); 1603 1604 return (err); 1605 } 1606 1607 int 1608 iptun_modify(const iptun_kparams_t *ik, cred_t *credp) 1609 { 1610 iptun_t *iptun; 1611 boolean_t laddr_change = B_FALSE, raddr_change = B_FALSE; 1612 int err; 1613 1614 if ((err = iptun_enter_by_linkid(ik->iptun_kparam_linkid, &iptun)) != 0) 1615 return (err); 1616 1617 /* One cannot modify a tunnel that belongs to another zone. */ 1618 if (iptun->iptun_zoneid != crgetzoneid(credp)) { 1619 err = EACCES; 1620 goto done; 1621 } 1622 1623 /* The tunnel type cannot be changed */ 1624 if (ik->iptun_kparam_flags & IPTUN_KPARAM_TYPE) { 1625 err = EINVAL; 1626 goto done; 1627 } 1628 1629 if ((err = iptun_setparams(iptun, ik)) != 0) 1630 goto done; 1631 iptun_headergen(iptun, B_FALSE); 1632 1633 /* 1634 * If any of the tunnel's addresses has been modified and the tunnel 1635 * has the necessary addresses assigned to it, we need to try to bind 1636 * with ip underneath us. If we're not ready to bind yet, then we'll 1637 * try again when the addresses are modified later. 1638 */ 1639 laddr_change = (ik->iptun_kparam_flags & IPTUN_KPARAM_LADDR); 1640 raddr_change = (ik->iptun_kparam_flags & IPTUN_KPARAM_RADDR); 1641 if (laddr_change || raddr_change) { 1642 if (iptun->iptun_flags & IPTUN_BOUND) 1643 iptun_unbind(iptun); 1644 if (iptun_canbind(iptun) && (err = iptun_bind(iptun)) != 0) { 1645 if (laddr_change) 1646 iptun->iptun_flags &= ~IPTUN_LADDR; 1647 if (raddr_change) 1648 iptun->iptun_flags &= ~IPTUN_RADDR; 1649 goto done; 1650 } 1651 } 1652 1653 if (laddr_change) 1654 iptun_task_dispatch(iptun, IPTUN_TASK_LADDR_UPDATE); 1655 if (raddr_change) 1656 iptun_task_dispatch(iptun, IPTUN_TASK_RADDR_UPDATE); 1657 1658 done: 1659 iptun_exit(iptun); 1660 return (err); 1661 } 1662 1663 /* Given an IP tunnel's datalink id, fill in its parameters. */ 1664 int 1665 iptun_info(iptun_kparams_t *ik, cred_t *credp) 1666 { 1667 iptun_t *iptun; 1668 int err; 1669 1670 /* Is the tunnel link visible from the caller's zone? */ 1671 if (!dls_devnet_islinkvisible(ik->iptun_kparam_linkid, 1672 crgetzoneid(credp))) 1673 return (ENOENT); 1674 1675 if ((err = iptun_enter_by_linkid(ik->iptun_kparam_linkid, &iptun)) != 0) 1676 return (err); 1677 1678 bzero(ik, sizeof (iptun_kparams_t)); 1679 1680 ik->iptun_kparam_linkid = iptun->iptun_linkid; 1681 ik->iptun_kparam_type = iptun->iptun_typeinfo->iti_type; 1682 ik->iptun_kparam_flags |= IPTUN_KPARAM_TYPE; 1683 1684 if (iptun->iptun_flags & IPTUN_LADDR) { 1685 iptun_getaddr(&iptun->iptun_laddr, &ik->iptun_kparam_laddr); 1686 ik->iptun_kparam_flags |= IPTUN_KPARAM_LADDR; 1687 } 1688 if (iptun->iptun_flags & IPTUN_RADDR) { 1689 iptun_getaddr(&iptun->iptun_raddr, &ik->iptun_kparam_raddr); 1690 ik->iptun_kparam_flags |= IPTUN_KPARAM_RADDR; 1691 } 1692 1693 if (iptun->iptun_flags & IPTUN_IMPLICIT) 1694 ik->iptun_kparam_flags |= IPTUN_KPARAM_IMPLICIT; 1695 1696 if (iptun->iptun_itp != NULL) { 1697 mutex_enter(&iptun->iptun_itp->itp_lock); 1698 if (iptun->iptun_itp->itp_flags & ITPF_P_ACTIVE) { 1699 ik->iptun_kparam_flags |= IPTUN_KPARAM_IPSECPOL; 1700 if (iptun->iptun_flags & IPTUN_SIMPLE_POLICY) { 1701 ik->iptun_kparam_flags |= IPTUN_KPARAM_SECINFO; 1702 ik->iptun_kparam_secinfo = 1703 iptun->iptun_simple_policy; 1704 } 1705 } 1706 mutex_exit(&iptun->iptun_itp->itp_lock); 1707 } 1708 1709 done: 1710 iptun_exit(iptun); 1711 return (err); 1712 } 1713 1714 int 1715 iptun_set_6to4relay(netstack_t *ns, ipaddr_t relay_addr) 1716 { 1717 if (relay_addr == INADDR_BROADCAST || CLASSD(relay_addr)) 1718 return (EADDRNOTAVAIL); 1719 ns->netstack_iptun->iptuns_relay_rtr_addr = relay_addr; 1720 return (0); 1721 } 1722 1723 void 1724 iptun_get_6to4relay(netstack_t *ns, ipaddr_t *relay_addr) 1725 { 1726 *relay_addr = ns->netstack_iptun->iptuns_relay_rtr_addr; 1727 } 1728 1729 void 1730 iptun_set_policy(datalink_id_t linkid, ipsec_tun_pol_t *itp) 1731 { 1732 iptun_t *iptun; 1733 1734 if (iptun_enter_by_linkid(linkid, &iptun) != 0) 1735 return; 1736 if (iptun->iptun_itp != itp) { 1737 ASSERT(iptun->iptun_itp == NULL); 1738 ITP_REFHOLD(itp); 1739 iptun->iptun_itp = itp; 1740 } 1741 /* 1742 * IPsec policy means IPsec overhead, which means lower MTU. 1743 * Refresh the MTU for this tunnel. 1744 */ 1745 (void) iptun_update_mtu(iptun, NULL, 0); 1746 iptun_exit(iptun); 1747 } 1748 1749 /* 1750 * Obtain the path MTU to the tunnel destination. 1751 * Can return zero in some cases. 1752 */ 1753 static uint32_t 1754 iptun_get_dst_pmtu(iptun_t *iptun, ip_xmit_attr_t *ixa) 1755 { 1756 uint32_t pmtu = 0; 1757 conn_t *connp = iptun->iptun_connp; 1758 boolean_t need_rele = B_FALSE; 1759 1760 /* 1761 * We only obtain the pmtu for tunnels that have a remote tunnel 1762 * address. 1763 */ 1764 if (!(iptun->iptun_flags & IPTUN_RADDR)) 1765 return (0); 1766 1767 if (ixa == NULL) { 1768 ixa = conn_get_ixa(connp, B_FALSE); 1769 if (ixa == NULL) 1770 return (0); 1771 need_rele = B_TRUE; 1772 } 1773 /* 1774 * Guard against ICMP errors before we have sent, as well as against 1775 * and a thread which held conn_ixa. 1776 */ 1777 if (ixa->ixa_ire != NULL) { 1778 pmtu = ip_get_pmtu(ixa); 1779 1780 /* 1781 * For both IPv4 and IPv6 we can have indication that the outer 1782 * header needs fragmentation. 1783 */ 1784 if (ixa->ixa_flags & IXAF_PMTU_TOO_SMALL) { 1785 /* Must allow fragmentation in ip_output */ 1786 ixa->ixa_flags &= ~IXAF_DONTFRAG; 1787 } else if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_6TO4) { 1788 ixa->ixa_flags |= IXAF_DONTFRAG; 1789 } else { 1790 /* ip_get_pmtu might have set this - we don't want it */ 1791 ixa->ixa_flags &= ~IXAF_PMTU_IPV4_DF; 1792 } 1793 } 1794 1795 if (need_rele) 1796 ixa_refrele(ixa); 1797 return (pmtu); 1798 } 1799 1800 /* 1801 * Update the ip_xmit_attr_t to capture the current lower path mtu as known 1802 * by ip. 1803 */ 1804 static void 1805 iptun_update_dst_pmtu(iptun_t *iptun, ip_xmit_attr_t *ixa) 1806 { 1807 uint32_t pmtu; 1808 conn_t *connp = iptun->iptun_connp; 1809 boolean_t need_rele = B_FALSE; 1810 1811 /* IXAF_VERIFY_PMTU is not set if we don't have a fixed destination */ 1812 if (!(iptun->iptun_flags & IPTUN_RADDR)) 1813 return; 1814 1815 if (ixa == NULL) { 1816 ixa = conn_get_ixa(connp, B_FALSE); 1817 if (ixa == NULL) 1818 return; 1819 need_rele = B_TRUE; 1820 } 1821 /* 1822 * Guard against ICMP errors before we have sent, as well as against 1823 * and a thread which held conn_ixa. 1824 */ 1825 if (ixa->ixa_ire != NULL) { 1826 pmtu = ip_get_pmtu(ixa); 1827 /* 1828 * Update ixa_fragsize and ixa_pmtu. 1829 */ 1830 ixa->ixa_fragsize = ixa->ixa_pmtu = pmtu; 1831 1832 /* 1833 * For both IPv4 and IPv6 we can have indication that the outer 1834 * header needs fragmentation. 1835 */ 1836 if (ixa->ixa_flags & IXAF_PMTU_TOO_SMALL) { 1837 /* Must allow fragmentation in ip_output */ 1838 ixa->ixa_flags &= ~IXAF_DONTFRAG; 1839 } else if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_6TO4) { 1840 ixa->ixa_flags |= IXAF_DONTFRAG; 1841 } else { 1842 /* ip_get_pmtu might have set this - we don't want it */ 1843 ixa->ixa_flags &= ~IXAF_PMTU_IPV4_DF; 1844 } 1845 } 1846 1847 if (need_rele) 1848 ixa_refrele(ixa); 1849 } 1850 1851 /* 1852 * There is nothing that iptun can verify in addition to IP having 1853 * verified the IP addresses in the fanout. 1854 */ 1855 /* ARGSUSED */ 1856 static boolean_t 1857 iptun_verifyicmp(conn_t *connp, void *arg2, icmph_t *icmph, icmp6_t *icmp6, 1858 ip_recv_attr_t *ira) 1859 { 1860 return (B_TRUE); 1861 } 1862 1863 /* 1864 * Notify function registered with ip_xmit_attr_t. 1865 */ 1866 static void 1867 iptun_notify(void *arg, ip_xmit_attr_t *ixa, ixa_notify_type_t ntype, 1868 ixa_notify_arg_t narg) 1869 { 1870 iptun_t *iptun = (iptun_t *)arg; 1871 1872 switch (ntype) { 1873 case IXAN_PMTU: 1874 (void) iptun_update_mtu(iptun, ixa, narg); 1875 break; 1876 } 1877 } 1878 1879 /* 1880 * Returns the max of old_ovhd and the overhead associated with pol. 1881 */ 1882 static uint32_t 1883 iptun_max_policy_overhead(ipsec_policy_t *pol, uint32_t old_ovhd) 1884 { 1885 uint32_t new_ovhd = old_ovhd; 1886 1887 while (pol != NULL) { 1888 new_ovhd = max(new_ovhd, 1889 ipsec_act_ovhd(&pol->ipsp_act->ipa_act)); 1890 pol = pol->ipsp_hash.hash_next; 1891 } 1892 return (new_ovhd); 1893 } 1894 1895 static uint32_t 1896 iptun_get_ipsec_overhead(iptun_t *iptun) 1897 { 1898 ipsec_policy_root_t *ipr; 1899 ipsec_policy_head_t *iph; 1900 ipsec_policy_t *pol; 1901 ipsec_selector_t sel; 1902 int i; 1903 uint32_t ipsec_ovhd = 0; 1904 ipsec_tun_pol_t *itp = iptun->iptun_itp; 1905 netstack_t *ns = iptun->iptun_ns; 1906 1907 if (itp == NULL || !(itp->itp_flags & ITPF_P_ACTIVE)) { 1908 /* 1909 * Consult global policy, just in case. This will only work 1910 * if we have both source and destination addresses to work 1911 * with. 1912 */ 1913 if ((iptun->iptun_flags & (IPTUN_LADDR|IPTUN_RADDR)) != 1914 (IPTUN_LADDR|IPTUN_RADDR)) 1915 return (0); 1916 1917 iph = ipsec_system_policy(ns); 1918 bzero(&sel, sizeof (sel)); 1919 sel.ips_isv4 = 1920 (iptun->iptun_typeinfo->iti_ipvers == IPV4_VERSION); 1921 switch (iptun->iptun_typeinfo->iti_ipvers) { 1922 case IPV4_VERSION: 1923 sel.ips_local_addr_v4 = iptun->iptun_laddr4; 1924 sel.ips_remote_addr_v4 = iptun->iptun_raddr4; 1925 break; 1926 case IPV6_VERSION: 1927 sel.ips_local_addr_v6 = iptun->iptun_laddr6; 1928 sel.ips_remote_addr_v6 = iptun->iptun_raddr6; 1929 break; 1930 } 1931 /* Check for both IPv4 and IPv6. */ 1932 sel.ips_protocol = IPPROTO_ENCAP; 1933 pol = ipsec_find_policy_head(NULL, iph, IPSEC_TYPE_OUTBOUND, 1934 &sel); 1935 if (pol != NULL) { 1936 ipsec_ovhd = ipsec_act_ovhd(&pol->ipsp_act->ipa_act); 1937 IPPOL_REFRELE(pol); 1938 } 1939 sel.ips_protocol = IPPROTO_IPV6; 1940 pol = ipsec_find_policy_head(NULL, iph, IPSEC_TYPE_OUTBOUND, 1941 &sel); 1942 if (pol != NULL) { 1943 ipsec_ovhd = max(ipsec_ovhd, 1944 ipsec_act_ovhd(&pol->ipsp_act->ipa_act)); 1945 IPPOL_REFRELE(pol); 1946 } 1947 IPPH_REFRELE(iph, ns); 1948 } else { 1949 /* 1950 * Look through all of the possible IPsec actions for the 1951 * tunnel, and find the largest potential IPsec overhead. 1952 */ 1953 iph = itp->itp_policy; 1954 rw_enter(&iph->iph_lock, RW_READER); 1955 ipr = &(iph->iph_root[IPSEC_TYPE_OUTBOUND]); 1956 ipsec_ovhd = iptun_max_policy_overhead( 1957 ipr->ipr_nonhash[IPSEC_AF_V4], 0); 1958 ipsec_ovhd = iptun_max_policy_overhead( 1959 ipr->ipr_nonhash[IPSEC_AF_V6], ipsec_ovhd); 1960 for (i = 0; i < ipr->ipr_nchains; i++) { 1961 ipsec_ovhd = iptun_max_policy_overhead( 1962 ipr->ipr_hash[i].hash_head, ipsec_ovhd); 1963 } 1964 rw_exit(&iph->iph_lock); 1965 } 1966 1967 return (ipsec_ovhd); 1968 } 1969 1970 /* 1971 * Calculate and return the maximum possible upper MTU for the given tunnel. 1972 * 1973 * If new_pmtu is set then we also need to update the lower path MTU information 1974 * in the ip_xmit_attr_t. That is needed since we set IXAF_VERIFY_PMTU so that 1975 * we are notified by conn_ip_output() when the path MTU increases. 1976 */ 1977 static uint32_t 1978 iptun_get_maxmtu(iptun_t *iptun, ip_xmit_attr_t *ixa, uint32_t new_pmtu) 1979 { 1980 size_t header_size, ipsec_overhead; 1981 uint32_t maxmtu, pmtu; 1982 1983 /* 1984 * Start with the path-MTU to the remote address, which is either 1985 * provided as the new_pmtu argument, or obtained using 1986 * iptun_get_dst_pmtu(). 1987 */ 1988 if (new_pmtu != 0) { 1989 if (iptun->iptun_flags & IPTUN_RADDR) 1990 iptun->iptun_dpmtu = new_pmtu; 1991 pmtu = new_pmtu; 1992 } else if (iptun->iptun_flags & IPTUN_RADDR) { 1993 if ((pmtu = iptun_get_dst_pmtu(iptun, ixa)) == 0) { 1994 /* 1995 * We weren't able to obtain the path-MTU of the 1996 * destination. Use the previous value. 1997 */ 1998 pmtu = iptun->iptun_dpmtu; 1999 } else { 2000 iptun->iptun_dpmtu = pmtu; 2001 } 2002 } else { 2003 /* 2004 * We have no path-MTU information to go on, use the maximum 2005 * possible value. 2006 */ 2007 pmtu = iptun->iptun_typeinfo->iti_maxmtu; 2008 } 2009 2010 /* 2011 * Now calculate tunneling overhead and subtract that from the 2012 * path-MTU information obtained above. 2013 */ 2014 if (iptun->iptun_header_size != 0) { 2015 header_size = iptun->iptun_header_size; 2016 } else { 2017 switch (iptun->iptun_typeinfo->iti_ipvers) { 2018 case IPV4_VERSION: 2019 header_size = sizeof (ipha_t); 2020 if (is_system_labeled()) 2021 header_size += IP_MAX_OPT_LENGTH; 2022 break; 2023 case IPV6_VERSION: 2024 header_size = sizeof (iptun_ipv6hdrs_t); 2025 break; 2026 } 2027 } 2028 2029 ipsec_overhead = iptun_get_ipsec_overhead(iptun); 2030 2031 maxmtu = pmtu - (header_size + ipsec_overhead); 2032 return (max(maxmtu, iptun->iptun_typeinfo->iti_minmtu)); 2033 } 2034 2035 /* 2036 * Re-calculate the tunnel's MTU as seen from above and notify the MAC layer 2037 * of any change in MTU. The new_pmtu argument is the new lower path MTU to 2038 * the tunnel destination to be used in the tunnel MTU calculation. Passing 2039 * in 0 for new_pmtu causes the lower path MTU to be dynamically updated using 2040 * ip_get_pmtu(). 2041 * 2042 * If the calculated tunnel MTU is different than its previous value, then we 2043 * notify the MAC layer above us of this change using mac_maxsdu_update(). 2044 */ 2045 static uint32_t 2046 iptun_update_mtu(iptun_t *iptun, ip_xmit_attr_t *ixa, uint32_t new_pmtu) 2047 { 2048 uint32_t newmtu; 2049 2050 /* We always update the ixa since we might have set IXAF_VERIFY_PMTU */ 2051 iptun_update_dst_pmtu(iptun, ixa); 2052 2053 /* 2054 * We return the current MTU without updating it if it was pegged to a 2055 * static value using the MAC_PROP_MTU link property. 2056 */ 2057 if (iptun->iptun_flags & IPTUN_FIXED_MTU) 2058 return (iptun->iptun_mtu); 2059 2060 /* If the MTU isn't fixed, then use the maximum possible value. */ 2061 newmtu = iptun_get_maxmtu(iptun, ixa, new_pmtu); 2062 /* 2063 * We only dynamically adjust the tunnel MTU for tunnels with 2064 * destinations because dynamic MTU calculations are based on the 2065 * destination path-MTU. 2066 */ 2067 if ((iptun->iptun_flags & IPTUN_RADDR) && newmtu != iptun->iptun_mtu) { 2068 iptun->iptun_mtu = newmtu; 2069 if (iptun->iptun_flags & IPTUN_MAC_REGISTERED) 2070 iptun_task_dispatch(iptun, IPTUN_TASK_MTU_UPDATE); 2071 } 2072 2073 return (newmtu); 2074 } 2075 2076 /* 2077 * Frees a packet or packet chain and bumps stat for each freed packet. 2078 */ 2079 static void 2080 iptun_drop_pkt(mblk_t *mp, uint64_t *stat) 2081 { 2082 mblk_t *pktmp; 2083 2084 for (pktmp = mp; pktmp != NULL; pktmp = mp) { 2085 mp = mp->b_next; 2086 pktmp->b_next = NULL; 2087 if (stat != NULL) 2088 atomic_inc_64(stat); 2089 freemsg(pktmp); 2090 } 2091 } 2092 2093 /* 2094 * Allocate and return a new mblk to hold an IP and ICMP header, and chain the 2095 * original packet to its b_cont. Returns NULL on failure. 2096 */ 2097 static mblk_t * 2098 iptun_build_icmperr(size_t hdrs_size, mblk_t *orig_pkt) 2099 { 2100 mblk_t *icmperr_mp; 2101 2102 if ((icmperr_mp = allocb(hdrs_size, BPRI_MED)) != NULL) { 2103 icmperr_mp->b_wptr += hdrs_size; 2104 /* tack on the offending packet */ 2105 icmperr_mp->b_cont = orig_pkt; 2106 } 2107 return (icmperr_mp); 2108 } 2109 2110 /* 2111 * Transmit an ICMP error. mp->b_rptr points at the packet to be included in 2112 * the ICMP error. 2113 */ 2114 static void 2115 iptun_sendicmp_v4(iptun_t *iptun, icmph_t *icmp, ipha_t *orig_ipha, mblk_t *mp, 2116 ts_label_t *tsl) 2117 { 2118 size_t orig_pktsize, hdrs_size; 2119 mblk_t *icmperr_mp; 2120 ipha_t *new_ipha; 2121 icmph_t *new_icmp; 2122 ip_xmit_attr_t ixas; 2123 conn_t *connp = iptun->iptun_connp; 2124 2125 orig_pktsize = msgdsize(mp); 2126 hdrs_size = sizeof (ipha_t) + sizeof (icmph_t); 2127 if ((icmperr_mp = iptun_build_icmperr(hdrs_size, mp)) == NULL) { 2128 iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf); 2129 return; 2130 } 2131 2132 new_ipha = (ipha_t *)icmperr_mp->b_rptr; 2133 new_icmp = (icmph_t *)(new_ipha + 1); 2134 2135 new_ipha->ipha_version_and_hdr_length = IP_SIMPLE_HDR_VERSION; 2136 new_ipha->ipha_type_of_service = 0; 2137 new_ipha->ipha_ident = 0; 2138 new_ipha->ipha_fragment_offset_and_flags = 0; 2139 new_ipha->ipha_ttl = orig_ipha->ipha_ttl; 2140 new_ipha->ipha_protocol = IPPROTO_ICMP; 2141 new_ipha->ipha_src = orig_ipha->ipha_dst; 2142 new_ipha->ipha_dst = orig_ipha->ipha_src; 2143 new_ipha->ipha_hdr_checksum = 0; /* will be computed by ip */ 2144 new_ipha->ipha_length = htons(hdrs_size + orig_pktsize); 2145 2146 *new_icmp = *icmp; 2147 new_icmp->icmph_checksum = 0; 2148 new_icmp->icmph_checksum = IP_CSUM(icmperr_mp, sizeof (ipha_t), 0); 2149 2150 bzero(&ixas, sizeof (ixas)); 2151 ixas.ixa_flags = IXAF_BASIC_SIMPLE_V4; 2152 if (new_ipha->ipha_src == INADDR_ANY) 2153 ixas.ixa_flags |= IXAF_SET_SOURCE; 2154 2155 ixas.ixa_zoneid = IPCL_ZONEID(connp); 2156 ixas.ixa_ipst = connp->conn_netstack->netstack_ip; 2157 ixas.ixa_cred = connp->conn_cred; 2158 ixas.ixa_cpid = NOPID; 2159 if (is_system_labeled()) 2160 ixas.ixa_tsl = tsl; 2161 2162 ixas.ixa_ifindex = 0; 2163 ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 2164 2165 (void) ip_output_simple(icmperr_mp, &ixas); 2166 ixa_cleanup(&ixas); 2167 } 2168 2169 static void 2170 iptun_sendicmp_v6(iptun_t *iptun, icmp6_t *icmp6, ip6_t *orig_ip6h, mblk_t *mp, 2171 ts_label_t *tsl) 2172 { 2173 size_t orig_pktsize, hdrs_size; 2174 mblk_t *icmp6err_mp; 2175 ip6_t *new_ip6h; 2176 icmp6_t *new_icmp6; 2177 ip_xmit_attr_t ixas; 2178 conn_t *connp = iptun->iptun_connp; 2179 2180 orig_pktsize = msgdsize(mp); 2181 hdrs_size = sizeof (ip6_t) + sizeof (icmp6_t); 2182 if ((icmp6err_mp = iptun_build_icmperr(hdrs_size, mp)) == NULL) { 2183 iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf); 2184 return; 2185 } 2186 2187 new_ip6h = (ip6_t *)icmp6err_mp->b_rptr; 2188 new_icmp6 = (icmp6_t *)(new_ip6h + 1); 2189 2190 new_ip6h->ip6_vcf = orig_ip6h->ip6_vcf; 2191 new_ip6h->ip6_plen = htons(sizeof (icmp6_t) + orig_pktsize); 2192 new_ip6h->ip6_hops = orig_ip6h->ip6_hops; 2193 new_ip6h->ip6_nxt = IPPROTO_ICMPV6; 2194 new_ip6h->ip6_src = orig_ip6h->ip6_dst; 2195 new_ip6h->ip6_dst = orig_ip6h->ip6_src; 2196 2197 *new_icmp6 = *icmp6; 2198 /* The checksum is calculated in ip_output_simple and friends. */ 2199 new_icmp6->icmp6_cksum = new_ip6h->ip6_plen; 2200 2201 bzero(&ixas, sizeof (ixas)); 2202 ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6; 2203 if (IN6_IS_ADDR_UNSPECIFIED(&new_ip6h->ip6_src)) 2204 ixas.ixa_flags |= IXAF_SET_SOURCE; 2205 2206 ixas.ixa_zoneid = IPCL_ZONEID(connp); 2207 ixas.ixa_ipst = connp->conn_netstack->netstack_ip; 2208 ixas.ixa_cred = connp->conn_cred; 2209 ixas.ixa_cpid = NOPID; 2210 if (is_system_labeled()) 2211 ixas.ixa_tsl = tsl; 2212 2213 ixas.ixa_ifindex = 0; 2214 ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 2215 2216 (void) ip_output_simple(icmp6err_mp, &ixas); 2217 ixa_cleanup(&ixas); 2218 } 2219 2220 static void 2221 iptun_icmp_error_v4(iptun_t *iptun, ipha_t *orig_ipha, mblk_t *mp, 2222 uint8_t type, uint8_t code, ts_label_t *tsl) 2223 { 2224 icmph_t icmp; 2225 2226 bzero(&icmp, sizeof (icmp)); 2227 icmp.icmph_type = type; 2228 icmp.icmph_code = code; 2229 2230 iptun_sendicmp_v4(iptun, &icmp, orig_ipha, mp, tsl); 2231 } 2232 2233 static void 2234 iptun_icmp_fragneeded_v4(iptun_t *iptun, uint32_t newmtu, ipha_t *orig_ipha, 2235 mblk_t *mp, ts_label_t *tsl) 2236 { 2237 icmph_t icmp; 2238 2239 icmp.icmph_type = ICMP_DEST_UNREACHABLE; 2240 icmp.icmph_code = ICMP_FRAGMENTATION_NEEDED; 2241 icmp.icmph_du_zero = 0; 2242 icmp.icmph_du_mtu = htons(newmtu); 2243 2244 iptun_sendicmp_v4(iptun, &icmp, orig_ipha, mp, tsl); 2245 } 2246 2247 static void 2248 iptun_icmp_error_v6(iptun_t *iptun, ip6_t *orig_ip6h, mblk_t *mp, 2249 uint8_t type, uint8_t code, uint32_t offset, ts_label_t *tsl) 2250 { 2251 icmp6_t icmp6; 2252 2253 bzero(&icmp6, sizeof (icmp6)); 2254 icmp6.icmp6_type = type; 2255 icmp6.icmp6_code = code; 2256 if (type == ICMP6_PARAM_PROB) 2257 icmp6.icmp6_pptr = htonl(offset); 2258 2259 iptun_sendicmp_v6(iptun, &icmp6, orig_ip6h, mp, tsl); 2260 } 2261 2262 static void 2263 iptun_icmp_toobig_v6(iptun_t *iptun, uint32_t newmtu, ip6_t *orig_ip6h, 2264 mblk_t *mp, ts_label_t *tsl) 2265 { 2266 icmp6_t icmp6; 2267 2268 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; 2269 icmp6.icmp6_code = 0; 2270 icmp6.icmp6_mtu = htonl(newmtu); 2271 2272 iptun_sendicmp_v6(iptun, &icmp6, orig_ip6h, mp, tsl); 2273 } 2274 2275 /* 2276 * Determines if the packet pointed to by ipha or ip6h is an ICMP error. The 2277 * mp argument is only used to do bounds checking. 2278 */ 2279 static boolean_t 2280 is_icmp_error(mblk_t *mp, ipha_t *ipha, ip6_t *ip6h) 2281 { 2282 uint16_t hlen; 2283 2284 if (ipha != NULL) { 2285 icmph_t *icmph; 2286 2287 ASSERT(ip6h == NULL); 2288 if (ipha->ipha_protocol != IPPROTO_ICMP) 2289 return (B_FALSE); 2290 2291 hlen = IPH_HDR_LENGTH(ipha); 2292 icmph = (icmph_t *)((uint8_t *)ipha + hlen); 2293 return (ICMP_IS_ERROR(icmph->icmph_type) || 2294 icmph->icmph_type == ICMP_REDIRECT); 2295 } else { 2296 icmp6_t *icmp6; 2297 uint8_t *nexthdrp; 2298 2299 ASSERT(ip6h != NULL); 2300 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hlen, &nexthdrp) || 2301 *nexthdrp != IPPROTO_ICMPV6) { 2302 return (B_FALSE); 2303 } 2304 2305 icmp6 = (icmp6_t *)((uint8_t *)ip6h + hlen); 2306 return (ICMP6_IS_ERROR(icmp6->icmp6_type) || 2307 icmp6->icmp6_type == ND_REDIRECT); 2308 } 2309 } 2310 2311 /* 2312 * Find inner and outer IP headers from a tunneled packet as setup for calls 2313 * into ipsec_tun_{in,out}bound(). 2314 * Note that we need to allow the outer header to be in a separate mblk from 2315 * the inner header. 2316 * If the caller knows the outer_hlen, the caller passes it in. Otherwise zero. 2317 */ 2318 static size_t 2319 iptun_find_headers(mblk_t *mp, size_t outer_hlen, ipha_t **outer4, 2320 ipha_t **inner4, ip6_t **outer6, ip6_t **inner6) 2321 { 2322 ipha_t *ipha; 2323 size_t first_mblkl = MBLKL(mp); 2324 mblk_t *inner_mp; 2325 2326 /* 2327 * Don't bother handling packets that don't have a full IP header in 2328 * the fist mblk. For the input path, the ip module ensures that this 2329 * won't happen, and on the output path, the IP tunneling MAC-type 2330 * plugins ensure that this also won't happen. 2331 */ 2332 if (first_mblkl < sizeof (ipha_t)) 2333 return (0); 2334 ipha = (ipha_t *)(mp->b_rptr); 2335 switch (IPH_HDR_VERSION(ipha)) { 2336 case IPV4_VERSION: 2337 *outer4 = ipha; 2338 *outer6 = NULL; 2339 if (outer_hlen == 0) 2340 outer_hlen = IPH_HDR_LENGTH(ipha); 2341 break; 2342 case IPV6_VERSION: 2343 *outer4 = NULL; 2344 *outer6 = (ip6_t *)ipha; 2345 if (outer_hlen == 0) 2346 outer_hlen = ip_hdr_length_v6(mp, (ip6_t *)ipha); 2347 break; 2348 default: 2349 return (0); 2350 } 2351 2352 if (first_mblkl < outer_hlen || 2353 (first_mblkl == outer_hlen && mp->b_cont == NULL)) 2354 return (0); 2355 2356 /* 2357 * We don't bother doing a pullup here since the outer header will 2358 * just get stripped off soon on input anyway. We just want to ensure 2359 * that the inner* pointer points to a full header. 2360 */ 2361 if (first_mblkl == outer_hlen) { 2362 inner_mp = mp->b_cont; 2363 ipha = (ipha_t *)inner_mp->b_rptr; 2364 } else { 2365 inner_mp = mp; 2366 ipha = (ipha_t *)(mp->b_rptr + outer_hlen); 2367 } 2368 switch (IPH_HDR_VERSION(ipha)) { 2369 case IPV4_VERSION: 2370 if (inner_mp->b_wptr - (uint8_t *)ipha < sizeof (ipha_t)) 2371 return (0); 2372 *inner4 = ipha; 2373 *inner6 = NULL; 2374 break; 2375 case IPV6_VERSION: 2376 if (inner_mp->b_wptr - (uint8_t *)ipha < sizeof (ip6_t)) 2377 return (0); 2378 *inner4 = NULL; 2379 *inner6 = (ip6_t *)ipha; 2380 break; 2381 default: 2382 return (0); 2383 } 2384 2385 return (outer_hlen); 2386 } 2387 2388 /* 2389 * Received ICMP error in response to an X over IPv4 packet that we 2390 * transmitted. 2391 * 2392 * NOTE: "outer" refers to what's inside the ICMP payload. We will get one of 2393 * the following: 2394 * 2395 * [IPv4(0)][ICMPv4][IPv4(1)][IPv4(2)][ULP] 2396 * 2397 * or 2398 * 2399 * [IPv4(0)][ICMPv4][IPv4(1)][IPv6][ULP] 2400 * 2401 * And "outer4" will get set to IPv4(1), and inner[46] will correspond to 2402 * whatever the very-inner packet is (IPv4(2) or IPv6). 2403 */ 2404 static void 2405 iptun_input_icmp_v4(iptun_t *iptun, mblk_t *data_mp, icmph_t *icmph, 2406 ip_recv_attr_t *ira) 2407 { 2408 uint8_t *orig; 2409 ipha_t *outer4, *inner4; 2410 ip6_t *outer6, *inner6; 2411 int outer_hlen; 2412 uint8_t type, code; 2413 2414 ASSERT(data_mp->b_cont == NULL); 2415 /* 2416 * Temporarily move b_rptr forward so that iptun_find_headers() can 2417 * find headers in the ICMP packet payload. 2418 */ 2419 orig = data_mp->b_rptr; 2420 data_mp->b_rptr = (uint8_t *)(icmph + 1); 2421 /* 2422 * The ip module ensures that ICMP errors contain at least the 2423 * original IP header (otherwise, the error would never have made it 2424 * here). 2425 */ 2426 ASSERT(MBLKL(data_mp) >= 0); 2427 outer_hlen = iptun_find_headers(data_mp, 0, &outer4, &inner4, &outer6, 2428 &inner6); 2429 ASSERT(outer6 == NULL); 2430 data_mp->b_rptr = orig; 2431 if (outer_hlen == 0) { 2432 iptun_drop_pkt(data_mp, &iptun->iptun_ierrors); 2433 return; 2434 } 2435 2436 /* Only ICMP errors due to tunneled packets should reach here. */ 2437 ASSERT(outer4->ipha_protocol == IPPROTO_ENCAP || 2438 outer4->ipha_protocol == IPPROTO_IPV6); 2439 2440 data_mp = ipsec_tun_inbound(ira, data_mp, iptun->iptun_itp, 2441 inner4, inner6, outer4, outer6, -outer_hlen, iptun->iptun_ns); 2442 if (data_mp == NULL) { 2443 /* Callee did all of the freeing. */ 2444 atomic_inc_64(&iptun->iptun_ierrors); 2445 return; 2446 } 2447 /* We should never see reassembled fragment here. */ 2448 ASSERT(data_mp->b_next == NULL); 2449 2450 data_mp->b_rptr = (uint8_t *)outer4 + outer_hlen; 2451 2452 /* 2453 * If the original packet being transmitted was itself an ICMP error, 2454 * then drop this packet. We don't want to generate an ICMP error in 2455 * response to an ICMP error. 2456 */ 2457 if (is_icmp_error(data_mp, inner4, inner6)) { 2458 iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf); 2459 return; 2460 } 2461 2462 switch (icmph->icmph_type) { 2463 case ICMP_DEST_UNREACHABLE: 2464 type = (inner4 != NULL ? icmph->icmph_type : ICMP6_DST_UNREACH); 2465 switch (icmph->icmph_code) { 2466 case ICMP_FRAGMENTATION_NEEDED: { 2467 uint32_t newmtu; 2468 2469 /* 2470 * We reconcile this with the fact that the tunnel may 2471 * also have IPsec policy by letting iptun_update_mtu 2472 * take care of it. 2473 */ 2474 newmtu = iptun_update_mtu(iptun, NULL, 2475 ntohs(icmph->icmph_du_mtu)); 2476 2477 if (inner4 != NULL) { 2478 iptun_icmp_fragneeded_v4(iptun, newmtu, inner4, 2479 data_mp, ira->ira_tsl); 2480 } else { 2481 iptun_icmp_toobig_v6(iptun, newmtu, inner6, 2482 data_mp, ira->ira_tsl); 2483 } 2484 return; 2485 } 2486 case ICMP_DEST_NET_UNREACH_ADMIN: 2487 case ICMP_DEST_HOST_UNREACH_ADMIN: 2488 code = (inner4 != NULL ? ICMP_DEST_NET_UNREACH_ADMIN : 2489 ICMP6_DST_UNREACH_ADMIN); 2490 break; 2491 default: 2492 code = (inner4 != NULL ? ICMP_HOST_UNREACHABLE : 2493 ICMP6_DST_UNREACH_ADDR); 2494 break; 2495 } 2496 break; 2497 case ICMP_TIME_EXCEEDED: 2498 if (inner6 != NULL) { 2499 type = ICMP6_TIME_EXCEEDED; 2500 code = 0; 2501 } /* else we're already set. */ 2502 break; 2503 case ICMP_PARAM_PROBLEM: 2504 /* 2505 * This is a problem with the outer header we transmitted. 2506 * Treat this as an output error. 2507 */ 2508 iptun_drop_pkt(data_mp, &iptun->iptun_oerrors); 2509 return; 2510 default: 2511 iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf); 2512 return; 2513 } 2514 2515 if (inner4 != NULL) { 2516 iptun_icmp_error_v4(iptun, inner4, data_mp, type, code, 2517 ira->ira_tsl); 2518 } else { 2519 iptun_icmp_error_v6(iptun, inner6, data_mp, type, code, 0, 2520 ira->ira_tsl); 2521 } 2522 } 2523 2524 /* 2525 * Return B_TRUE if the IPv6 packet pointed to by ip6h contains a Tunnel 2526 * Encapsulation Limit destination option. If there is one, set encaplim_ptr 2527 * to point to the option value. 2528 */ 2529 static boolean_t 2530 iptun_find_encaplimit(mblk_t *mp, ip6_t *ip6h, uint8_t **encaplim_ptr) 2531 { 2532 ip_pkt_t pkt; 2533 uint8_t *endptr; 2534 ip6_dest_t *destp; 2535 struct ip6_opt *optp; 2536 2537 pkt.ipp_fields = 0; /* must be initialized */ 2538 (void) ip_find_hdr_v6(mp, ip6h, B_FALSE, &pkt, NULL); 2539 if ((pkt.ipp_fields & IPPF_DSTOPTS) != 0) { 2540 destp = pkt.ipp_dstopts; 2541 } else if ((pkt.ipp_fields & IPPF_RTHDRDSTOPTS) != 0) { 2542 destp = pkt.ipp_rthdrdstopts; 2543 } else { 2544 return (B_FALSE); 2545 } 2546 2547 endptr = (uint8_t *)destp + 8 * (destp->ip6d_len + 1); 2548 optp = (struct ip6_opt *)(destp + 1); 2549 while (endptr - (uint8_t *)optp > sizeof (*optp)) { 2550 if (optp->ip6o_type == IP6OPT_TUNNEL_LIMIT) { 2551 if ((uint8_t *)(optp + 1) >= endptr) 2552 return (B_FALSE); 2553 *encaplim_ptr = (uint8_t *)&optp[1]; 2554 return (B_TRUE); 2555 } 2556 optp = (struct ip6_opt *)((uint8_t *)optp + optp->ip6o_len + 2); 2557 } 2558 return (B_FALSE); 2559 } 2560 2561 /* 2562 * Received ICMPv6 error in response to an X over IPv6 packet that we 2563 * transmitted. 2564 * 2565 * NOTE: "outer" refers to what's inside the ICMP payload. We will get one of 2566 * the following: 2567 * 2568 * [IPv6(0)][ICMPv6][IPv6(1)][IPv4][ULP] 2569 * 2570 * or 2571 * 2572 * [IPv6(0)][ICMPv6][IPv6(1)][IPv6(2)][ULP] 2573 * 2574 * And "outer6" will get set to IPv6(1), and inner[46] will correspond to 2575 * whatever the very-inner packet is (IPv4 or IPv6(2)). 2576 */ 2577 static void 2578 iptun_input_icmp_v6(iptun_t *iptun, mblk_t *data_mp, icmp6_t *icmp6h, 2579 ip_recv_attr_t *ira) 2580 { 2581 uint8_t *orig; 2582 ipha_t *outer4, *inner4; 2583 ip6_t *outer6, *inner6; 2584 int outer_hlen; 2585 uint8_t type, code; 2586 2587 ASSERT(data_mp->b_cont == NULL); 2588 2589 /* 2590 * Temporarily move b_rptr forward so that iptun_find_headers() can 2591 * find IP headers in the ICMP packet payload. 2592 */ 2593 orig = data_mp->b_rptr; 2594 data_mp->b_rptr = (uint8_t *)(icmp6h + 1); 2595 /* 2596 * The ip module ensures that ICMP errors contain at least the 2597 * original IP header (otherwise, the error would never have made it 2598 * here). 2599 */ 2600 ASSERT(MBLKL(data_mp) >= 0); 2601 outer_hlen = iptun_find_headers(data_mp, 0, &outer4, &inner4, &outer6, 2602 &inner6); 2603 ASSERT(outer4 == NULL); 2604 data_mp->b_rptr = orig; /* Restore r_ptr */ 2605 if (outer_hlen == 0) { 2606 iptun_drop_pkt(data_mp, &iptun->iptun_ierrors); 2607 return; 2608 } 2609 2610 data_mp = ipsec_tun_inbound(ira, data_mp, iptun->iptun_itp, 2611 inner4, inner6, outer4, outer6, -outer_hlen, iptun->iptun_ns); 2612 if (data_mp == NULL) { 2613 /* Callee did all of the freeing. */ 2614 atomic_inc_64(&iptun->iptun_ierrors); 2615 return; 2616 } 2617 /* We should never see reassembled fragment here. */ 2618 ASSERT(data_mp->b_next == NULL); 2619 2620 data_mp->b_rptr = (uint8_t *)outer6 + outer_hlen; 2621 2622 /* 2623 * If the original packet being transmitted was itself an ICMP error, 2624 * then drop this packet. We don't want to generate an ICMP error in 2625 * response to an ICMP error. 2626 */ 2627 if (is_icmp_error(data_mp, inner4, inner6)) { 2628 iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf); 2629 return; 2630 } 2631 2632 switch (icmp6h->icmp6_type) { 2633 case ICMP6_PARAM_PROB: { 2634 uint8_t *encaplim_ptr; 2635 2636 /* 2637 * If the ICMPv6 error points to a valid Tunnel Encapsulation 2638 * Limit option and the limit value is 0, then fall through 2639 * and send a host unreachable message. Otherwise, treat the 2640 * error as an output error, as there must have been a problem 2641 * with a packet we sent. 2642 */ 2643 if (!iptun_find_encaplimit(data_mp, outer6, &encaplim_ptr) || 2644 (icmp6h->icmp6_pptr != 2645 ((ptrdiff_t)encaplim_ptr - (ptrdiff_t)outer6)) || 2646 *encaplim_ptr != 0) { 2647 iptun_drop_pkt(data_mp, &iptun->iptun_oerrors); 2648 return; 2649 } 2650 /* FALLTHRU */ 2651 } 2652 case ICMP6_TIME_EXCEEDED: 2653 case ICMP6_DST_UNREACH: 2654 type = (inner4 != NULL ? ICMP_DEST_UNREACHABLE : 2655 ICMP6_DST_UNREACH); 2656 code = (inner4 != NULL ? ICMP_HOST_UNREACHABLE : 2657 ICMP6_DST_UNREACH_ADDR); 2658 break; 2659 case ICMP6_PACKET_TOO_BIG: { 2660 uint32_t newmtu; 2661 2662 /* 2663 * We reconcile this with the fact that the tunnel may also 2664 * have IPsec policy by letting iptun_update_mtu take care of 2665 * it. 2666 */ 2667 newmtu = iptun_update_mtu(iptun, NULL, 2668 ntohl(icmp6h->icmp6_mtu)); 2669 2670 if (inner4 != NULL) { 2671 iptun_icmp_fragneeded_v4(iptun, newmtu, inner4, 2672 data_mp, ira->ira_tsl); 2673 } else { 2674 iptun_icmp_toobig_v6(iptun, newmtu, inner6, data_mp, 2675 ira->ira_tsl); 2676 } 2677 return; 2678 } 2679 default: 2680 iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf); 2681 return; 2682 } 2683 2684 if (inner4 != NULL) { 2685 iptun_icmp_error_v4(iptun, inner4, data_mp, type, code, 2686 ira->ira_tsl); 2687 } else { 2688 iptun_icmp_error_v6(iptun, inner6, data_mp, type, code, 0, 2689 ira->ira_tsl); 2690 } 2691 } 2692 2693 /* 2694 * Called as conn_recvicmp from IP for ICMP errors. 2695 */ 2696 /* ARGSUSED2 */ 2697 static void 2698 iptun_input_icmp(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *ira) 2699 { 2700 conn_t *connp = arg; 2701 iptun_t *iptun = connp->conn_iptun; 2702 mblk_t *tmpmp; 2703 size_t hlen; 2704 2705 ASSERT(IPCL_IS_IPTUN(connp)); 2706 2707 if (mp->b_cont != NULL) { 2708 /* 2709 * Since ICMP error processing necessitates access to bits 2710 * that are within the ICMP error payload (the original packet 2711 * that caused the error), pull everything up into a single 2712 * block for convenience. 2713 */ 2714 if ((tmpmp = msgpullup(mp, -1)) == NULL) { 2715 iptun_drop_pkt(mp, &iptun->iptun_norcvbuf); 2716 return; 2717 } 2718 freemsg(mp); 2719 mp = tmpmp; 2720 } 2721 2722 hlen = ira->ira_ip_hdr_length; 2723 switch (iptun->iptun_typeinfo->iti_ipvers) { 2724 case IPV4_VERSION: 2725 /* 2726 * The outer IP header coming up from IP is always ipha_t 2727 * alligned (otherwise, we would have crashed in ip). 2728 */ 2729 iptun_input_icmp_v4(iptun, mp, (icmph_t *)(mp->b_rptr + hlen), 2730 ira); 2731 break; 2732 case IPV6_VERSION: 2733 iptun_input_icmp_v6(iptun, mp, (icmp6_t *)(mp->b_rptr + hlen), 2734 ira); 2735 break; 2736 } 2737 } 2738 2739 static boolean_t 2740 iptun_in_6to4_ok(iptun_t *iptun, ipha_t *outer4, ip6_t *inner6) 2741 { 2742 ipaddr_t v4addr; 2743 2744 /* 2745 * It's possible that someone sent us an IPv4-in-IPv4 packet with the 2746 * IPv4 address of a 6to4 tunnel as the destination. 2747 */ 2748 if (inner6 == NULL) 2749 return (B_FALSE); 2750 2751 /* 2752 * Make sure that the IPv6 destination is within the site that this 2753 * 6to4 tunnel is routing for. We don't want people bouncing random 2754 * tunneled IPv6 packets through this 6to4 router. 2755 */ 2756 IN6_6TO4_TO_V4ADDR(&inner6->ip6_dst, (struct in_addr *)&v4addr); 2757 if (outer4->ipha_dst != v4addr) 2758 return (B_FALSE); 2759 2760 if (IN6_IS_ADDR_6TO4(&inner6->ip6_src)) { 2761 /* 2762 * Section 9 of RFC 3056 (security considerations) suggests 2763 * that when a packet is from a 6to4 site (i.e., it's not a 2764 * global address being forwarded froma relay router), make 2765 * sure that the packet was tunneled by that site's 6to4 2766 * router. 2767 */ 2768 IN6_6TO4_TO_V4ADDR(&inner6->ip6_src, (struct in_addr *)&v4addr); 2769 if (outer4->ipha_src != v4addr) 2770 return (B_FALSE); 2771 } else { 2772 /* 2773 * Only accept packets from a relay router if we've configured 2774 * outbound relay router functionality. 2775 */ 2776 if (iptun->iptun_iptuns->iptuns_relay_rtr_addr == INADDR_ANY) 2777 return (B_FALSE); 2778 } 2779 2780 return (B_TRUE); 2781 } 2782 2783 /* 2784 * Input function for everything that comes up from the ip module below us. 2785 * This is called directly from the ip module via connp->conn_recv(). 2786 * 2787 * We receive M_DATA messages with IP-in-IP tunneled packets. 2788 */ 2789 /* ARGSUSED2 */ 2790 static void 2791 iptun_input(void *arg, mblk_t *data_mp, void *arg2, ip_recv_attr_t *ira) 2792 { 2793 conn_t *connp = arg; 2794 iptun_t *iptun = connp->conn_iptun; 2795 int outer_hlen; 2796 ipha_t *outer4, *inner4; 2797 ip6_t *outer6, *inner6; 2798 2799 ASSERT(IPCL_IS_IPTUN(connp)); 2800 ASSERT(DB_TYPE(data_mp) == M_DATA); 2801 2802 outer_hlen = iptun_find_headers(data_mp, ira->ira_ip_hdr_length, 2803 &outer4, &inner4, &outer6, &inner6); 2804 if (outer_hlen == 0) 2805 goto drop; 2806 2807 /* 2808 * If the system is labeled, we call tsol_check_dest() on the packet 2809 * destination (our local tunnel address) to ensure that the packet as 2810 * labeled should be allowed to be sent to us. We don't need to call 2811 * the more involved tsol_receive_local() since the tunnel link itself 2812 * cannot be assigned to shared-stack non-global zones. 2813 */ 2814 if (ira->ira_flags & IRAF_SYSTEM_LABELED) { 2815 if (ira->ira_tsl == NULL) 2816 goto drop; 2817 if (tsol_check_dest(ira->ira_tsl, (outer4 != NULL ? 2818 (void *)&outer4->ipha_dst : (void *)&outer6->ip6_dst), 2819 (outer4 != NULL ? IPV4_VERSION : IPV6_VERSION), 2820 CONN_MAC_DEFAULT, B_FALSE, NULL) != 0) 2821 goto drop; 2822 } 2823 2824 data_mp = ipsec_tun_inbound(ira, data_mp, iptun->iptun_itp, 2825 inner4, inner6, outer4, outer6, outer_hlen, iptun->iptun_ns); 2826 if (data_mp == NULL) { 2827 /* Callee did all of the freeing. */ 2828 return; 2829 } 2830 2831 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_6TO4 && 2832 !iptun_in_6to4_ok(iptun, outer4, inner6)) 2833 goto drop; 2834 2835 /* 2836 * We need to statistically account for each packet individually, so 2837 * we might as well split up any b_next chains here. 2838 */ 2839 do { 2840 mblk_t *mp; 2841 2842 mp = data_mp->b_next; 2843 data_mp->b_next = NULL; 2844 2845 atomic_inc_64(&iptun->iptun_ipackets); 2846 atomic_add_64(&iptun->iptun_rbytes, msgdsize(data_mp)); 2847 mac_rx(iptun->iptun_mh, NULL, data_mp); 2848 2849 data_mp = mp; 2850 } while (data_mp != NULL); 2851 return; 2852 drop: 2853 iptun_drop_pkt(data_mp, &iptun->iptun_ierrors); 2854 } 2855 2856 /* 2857 * Do 6to4-specific header-processing on output. Return B_TRUE if the packet 2858 * was processed without issue, or B_FALSE if the packet had issues and should 2859 * be dropped. 2860 */ 2861 static boolean_t 2862 iptun_out_process_6to4(iptun_t *iptun, ipha_t *outer4, ip6_t *inner6) 2863 { 2864 ipaddr_t v4addr; 2865 2866 /* 2867 * IPv6 source must be a 6to4 address. This is because a conscious 2868 * decision was made to not allow a Solaris system to be used as a 2869 * relay router (for security reasons) when 6to4 was initially 2870 * integrated. If this decision is ever reversed, the following check 2871 * can be removed. 2872 */ 2873 if (!IN6_IS_ADDR_6TO4(&inner6->ip6_src)) 2874 return (B_FALSE); 2875 2876 /* 2877 * RFC3056 mandates that the IPv4 source MUST be set to the IPv4 2878 * portion of the 6to4 IPv6 source address. In other words, make sure 2879 * that we're tunneling packets from our own 6to4 site. 2880 */ 2881 IN6_6TO4_TO_V4ADDR(&inner6->ip6_src, (struct in_addr *)&v4addr); 2882 if (outer4->ipha_src != v4addr) 2883 return (B_FALSE); 2884 2885 /* 2886 * Automatically set the destination of the outer IPv4 header as 2887 * described in RFC3056. There are two possibilities: 2888 * 2889 * a. If the IPv6 destination is a 6to4 address, set the IPv4 address 2890 * to the IPv4 portion of the 6to4 address. 2891 * b. If the IPv6 destination is a native IPv6 address, set the IPv4 2892 * destination to the address of a relay router. 2893 * 2894 * Design Note: b shouldn't be necessary here, and this is a flaw in 2895 * the design of the 6to4relay command. Instead of setting a 6to4 2896 * relay address in this module via an ioctl, the 6to4relay command 2897 * could simply add a IPv6 route for native IPv6 addresses (such as a 2898 * default route) in the forwarding table that uses a 6to4 destination 2899 * as its next hop, and the IPv4 portion of that address could be a 2900 * 6to4 relay address. In order for this to work, IP would have to 2901 * resolve the next hop address, which would necessitate a link-layer 2902 * address resolver for 6to4 links, which doesn't exist today. 2903 * 2904 * In fact, if a resolver existed for 6to4 links, then setting the 2905 * IPv4 destination in the outer header could be done as part of 2906 * link-layer address resolution and fast-path header generation, and 2907 * not here. 2908 */ 2909 if (IN6_IS_ADDR_6TO4(&inner6->ip6_dst)) { 2910 /* destination is a 6to4 router */ 2911 IN6_6TO4_TO_V4ADDR(&inner6->ip6_dst, 2912 (struct in_addr *)&outer4->ipha_dst); 2913 2914 /* Reject attempts to send to INADDR_ANY */ 2915 if (outer4->ipha_dst == INADDR_ANY) 2916 return (B_FALSE); 2917 } else { 2918 /* 2919 * The destination is a native IPv6 address. If output to a 2920 * relay-router is enabled, use the relay-router's IPv4 2921 * address as the destination. 2922 */ 2923 if (iptun->iptun_iptuns->iptuns_relay_rtr_addr == INADDR_ANY) 2924 return (B_FALSE); 2925 outer4->ipha_dst = iptun->iptun_iptuns->iptuns_relay_rtr_addr; 2926 } 2927 2928 /* 2929 * If the outer source and destination are equal, this means that the 2930 * 6to4 router somehow forwarded an IPv6 packet destined for its own 2931 * 6to4 site to its 6to4 tunnel interface, which will result in this 2932 * packet infinitely bouncing between ip and iptun. 2933 */ 2934 return (outer4->ipha_src != outer4->ipha_dst); 2935 } 2936 2937 /* 2938 * Process output packets with outer IPv4 headers. Frees mp and bumps stat on 2939 * error. 2940 */ 2941 static mblk_t * 2942 iptun_out_process_ipv4(iptun_t *iptun, mblk_t *mp, ipha_t *outer4, 2943 ipha_t *inner4, ip6_t *inner6, ip_xmit_attr_t *ixa) 2944 { 2945 uint8_t *innerptr = (inner4 != NULL ? 2946 (uint8_t *)inner4 : (uint8_t *)inner6); 2947 size_t minmtu = iptun->iptun_typeinfo->iti_minmtu; 2948 2949 if (inner4 != NULL) { 2950 ASSERT(outer4->ipha_protocol == IPPROTO_ENCAP); 2951 /* 2952 * Copy the tos from the inner IPv4 header. We mask off ECN 2953 * bits (bits 6 and 7) because there is currently no 2954 * tunnel-tunnel communication to determine if both sides 2955 * support ECN. We opt for the safe choice: don't copy the 2956 * ECN bits when doing encapsulation. 2957 */ 2958 outer4->ipha_type_of_service = 2959 inner4->ipha_type_of_service & ~0x03; 2960 } else { 2961 ASSERT(outer4->ipha_protocol == IPPROTO_IPV6 && 2962 inner6 != NULL); 2963 } 2964 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) 2965 outer4->ipha_fragment_offset_and_flags |= IPH_DF_HTONS; 2966 else 2967 outer4->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS; 2968 2969 /* 2970 * As described in section 3.2.2 of RFC4213, if the packet payload is 2971 * less than or equal to the minimum MTU size, then we need to allow 2972 * IPv4 to fragment the packet. The reason is that even if we end up 2973 * receiving an ICMP frag-needed, the interface above this tunnel 2974 * won't be allowed to drop its MTU as a result, since the packet was 2975 * already smaller than the smallest allowable MTU for that interface. 2976 */ 2977 if (mp->b_wptr - innerptr <= minmtu) { 2978 outer4->ipha_fragment_offset_and_flags = 0; 2979 ixa->ixa_flags &= ~IXAF_DONTFRAG; 2980 } else if (!(ixa->ixa_flags & IXAF_PMTU_TOO_SMALL) && 2981 (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_6TO4)) { 2982 ixa->ixa_flags |= IXAF_DONTFRAG; 2983 } 2984 2985 ixa->ixa_ip_hdr_length = IPH_HDR_LENGTH(outer4); 2986 ixa->ixa_pktlen = msgdsize(mp); 2987 ixa->ixa_protocol = outer4->ipha_protocol; 2988 2989 outer4->ipha_length = htons(ixa->ixa_pktlen); 2990 return (mp); 2991 } 2992 2993 /* 2994 * Insert an encapsulation limit destination option in the packet provided. 2995 * Always consumes the mp argument and returns a new mblk pointer. 2996 */ 2997 static mblk_t * 2998 iptun_insert_encaplimit(iptun_t *iptun, mblk_t *mp, ip6_t *outer6, 2999 uint8_t limit) 3000 { 3001 mblk_t *newmp; 3002 iptun_ipv6hdrs_t *newouter6; 3003 3004 ASSERT(outer6->ip6_nxt == IPPROTO_IPV6); 3005 ASSERT(mp->b_cont == NULL); 3006 3007 mp->b_rptr += sizeof (ip6_t); 3008 newmp = allocb(sizeof (iptun_ipv6hdrs_t) + MBLKL(mp), BPRI_MED); 3009 if (newmp == NULL) { 3010 iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf); 3011 return (NULL); 3012 } 3013 newmp->b_wptr += sizeof (iptun_ipv6hdrs_t); 3014 /* Copy the payload (Starting with the inner IPv6 header). */ 3015 bcopy(mp->b_rptr, newmp->b_wptr, MBLKL(mp)); 3016 newmp->b_wptr += MBLKL(mp); 3017 newouter6 = (iptun_ipv6hdrs_t *)newmp->b_rptr; 3018 /* Now copy the outer IPv6 header. */ 3019 bcopy(outer6, &newouter6->it6h_ip6h, sizeof (ip6_t)); 3020 newouter6->it6h_ip6h.ip6_nxt = IPPROTO_DSTOPTS; 3021 newouter6->it6h_encaplim = iptun_encaplim_init; 3022 newouter6->it6h_encaplim.iel_destopt.ip6d_nxt = outer6->ip6_nxt; 3023 newouter6->it6h_encaplim.iel_telopt.ip6ot_encap_limit = limit; 3024 3025 /* 3026 * The payload length will be set at the end of 3027 * iptun_out_process_ipv6(). 3028 */ 3029 3030 freemsg(mp); 3031 return (newmp); 3032 } 3033 3034 /* 3035 * Process output packets with outer IPv6 headers. Frees mp and bumps stats 3036 * on error. 3037 */ 3038 static mblk_t * 3039 iptun_out_process_ipv6(iptun_t *iptun, mblk_t *mp, ip6_t *outer6, 3040 ipha_t *inner4, ip6_t *inner6, ip_xmit_attr_t *ixa) 3041 { 3042 uint8_t *innerptr = (inner4 != NULL ? 3043 (uint8_t *)inner4 : (uint8_t *)inner6); 3044 size_t minmtu = iptun->iptun_typeinfo->iti_minmtu; 3045 uint8_t *limit, *configlimit; 3046 uint32_t offset; 3047 iptun_ipv6hdrs_t *v6hdrs; 3048 3049 if (inner6 != NULL && iptun_find_encaplimit(mp, inner6, &limit)) { 3050 /* 3051 * The inner packet is an IPv6 packet which itself contains an 3052 * encapsulation limit option. The limit variable points to 3053 * the value in the embedded option. Process the 3054 * encapsulation limit option as specified in RFC 2473. 3055 * 3056 * If limit is 0, then we've exceeded the limit and we need to 3057 * send back an ICMPv6 parameter problem message. 3058 * 3059 * If limit is > 0, then we decrement it by 1 and make sure 3060 * that the encapsulation limit option in the outer header 3061 * reflects that (adding an option if one isn't already 3062 * there). 3063 */ 3064 ASSERT(limit > mp->b_rptr && limit < mp->b_wptr); 3065 if (*limit == 0) { 3066 mp->b_rptr = (uint8_t *)inner6; 3067 offset = limit - mp->b_rptr; 3068 iptun_icmp_error_v6(iptun, inner6, mp, ICMP6_PARAM_PROB, 3069 0, offset, ixa->ixa_tsl); 3070 atomic_inc_64(&iptun->iptun_noxmtbuf); 3071 return (NULL); 3072 } 3073 3074 /* 3075 * The outer header requires an encapsulation limit option. 3076 * If there isn't one already, add one. 3077 */ 3078 if (iptun->iptun_encaplimit == 0) { 3079 if ((mp = iptun_insert_encaplimit(iptun, mp, outer6, 3080 (*limit - 1))) == NULL) 3081 return (NULL); 3082 v6hdrs = (iptun_ipv6hdrs_t *)mp->b_rptr; 3083 } else { 3084 /* 3085 * There is an existing encapsulation limit option in 3086 * the outer header. If the inner encapsulation limit 3087 * is less than the configured encapsulation limit, 3088 * update the outer encapsulation limit to reflect 3089 * this lesser value. 3090 */ 3091 v6hdrs = (iptun_ipv6hdrs_t *)mp->b_rptr; 3092 configlimit = 3093 &v6hdrs->it6h_encaplim.iel_telopt.ip6ot_encap_limit; 3094 if ((*limit - 1) < *configlimit) 3095 *configlimit = (*limit - 1); 3096 } 3097 ixa->ixa_ip_hdr_length = sizeof (iptun_ipv6hdrs_t); 3098 ixa->ixa_protocol = v6hdrs->it6h_encaplim.iel_destopt.ip6d_nxt; 3099 } else { 3100 ixa->ixa_ip_hdr_length = sizeof (ip6_t); 3101 ixa->ixa_protocol = outer6->ip6_nxt; 3102 } 3103 /* 3104 * See iptun_output_process_ipv4() why we allow fragmentation for 3105 * small packets 3106 */ 3107 if (mp->b_wptr - innerptr <= minmtu) 3108 ixa->ixa_flags &= ~IXAF_DONTFRAG; 3109 else if (!(ixa->ixa_flags & IXAF_PMTU_TOO_SMALL)) 3110 ixa->ixa_flags |= IXAF_DONTFRAG; 3111 3112 ixa->ixa_pktlen = msgdsize(mp); 3113 outer6->ip6_plen = htons(ixa->ixa_pktlen - sizeof (ip6_t)); 3114 return (mp); 3115 } 3116 3117 /* 3118 * The IP tunneling MAC-type plugins have already done most of the header 3119 * processing and validity checks. We are simply responsible for multiplexing 3120 * down to the ip module below us. 3121 */ 3122 static void 3123 iptun_output(iptun_t *iptun, mblk_t *mp) 3124 { 3125 conn_t *connp = iptun->iptun_connp; 3126 mblk_t *newmp; 3127 int error; 3128 ip_xmit_attr_t *ixa; 3129 3130 ASSERT(mp->b_datap->db_type == M_DATA); 3131 3132 if (mp->b_cont != NULL) { 3133 if ((newmp = msgpullup(mp, -1)) == NULL) { 3134 iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf); 3135 return; 3136 } 3137 freemsg(mp); 3138 mp = newmp; 3139 } 3140 3141 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_6TO4) { 3142 iptun_output_6to4(iptun, mp); 3143 return; 3144 } 3145 3146 if (is_system_labeled()) { 3147 /* 3148 * Since the label can be different meaning a potentially 3149 * different IRE,we always use a unique ip_xmit_attr_t. 3150 */ 3151 ixa = conn_get_ixa_exclusive(connp); 3152 } else { 3153 /* 3154 * If no other thread is using conn_ixa this just gets a 3155 * reference to conn_ixa. Otherwise we get a safe copy of 3156 * conn_ixa. 3157 */ 3158 ixa = conn_get_ixa(connp, B_FALSE); 3159 } 3160 if (ixa == NULL) { 3161 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3162 return; 3163 } 3164 3165 /* 3166 * In case we got a safe copy of conn_ixa, then we need 3167 * to fill in any pointers in it. 3168 */ 3169 if (ixa->ixa_ire == NULL) { 3170 error = ip_attr_connect(connp, ixa, &connp->conn_saddr_v6, 3171 &connp->conn_faddr_v6, &connp->conn_faddr_v6, 0, 3172 NULL, NULL, 0); 3173 if (error != 0) { 3174 if (ixa->ixa_ire != NULL && 3175 (error == EHOSTUNREACH || error == ENETUNREACH)) { 3176 /* 3177 * Let conn_ip_output/ire_send_noroute return 3178 * the error and send any local ICMP error. 3179 */ 3180 error = 0; 3181 } else { 3182 ixa_refrele(ixa); 3183 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3184 return; 3185 } 3186 } 3187 } 3188 3189 iptun_output_common(iptun, ixa, mp); 3190 ixa_refrele(ixa); 3191 } 3192 3193 /* 3194 * We use an ixa based on the last destination. 3195 */ 3196 static void 3197 iptun_output_6to4(iptun_t *iptun, mblk_t *mp) 3198 { 3199 conn_t *connp = iptun->iptun_connp; 3200 ipha_t *outer4, *inner4; 3201 ip6_t *outer6, *inner6; 3202 ip_xmit_attr_t *ixa; 3203 ip_xmit_attr_t *oldixa; 3204 int error; 3205 boolean_t need_connect; 3206 in6_addr_t v6dst; 3207 3208 ASSERT(mp->b_cont == NULL); /* Verified by iptun_output */ 3209 3210 /* Make sure we set ipha_dst before we look at ipha_dst */ 3211 3212 (void) iptun_find_headers(mp, 0, &outer4, &inner4, &outer6, &inner6); 3213 ASSERT(outer4 != NULL); 3214 if (!iptun_out_process_6to4(iptun, outer4, inner6)) { 3215 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3216 return; 3217 } 3218 3219 if (is_system_labeled()) { 3220 /* 3221 * Since the label can be different meaning a potentially 3222 * different IRE,we always use a unique ip_xmit_attr_t. 3223 */ 3224 ixa = conn_get_ixa_exclusive(connp); 3225 } else { 3226 /* 3227 * If no other thread is using conn_ixa this just gets a 3228 * reference to conn_ixa. Otherwise we get a safe copy of 3229 * conn_ixa. 3230 */ 3231 ixa = conn_get_ixa(connp, B_FALSE); 3232 } 3233 if (ixa == NULL) { 3234 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3235 return; 3236 } 3237 3238 mutex_enter(&connp->conn_lock); 3239 if (connp->conn_v4lastdst == outer4->ipha_dst) { 3240 need_connect = (ixa->ixa_ire == NULL); 3241 } else { 3242 /* In case previous destination was multirt */ 3243 ip_attr_newdst(ixa); 3244 3245 /* 3246 * We later update conn_ixa when we update conn_v4lastdst 3247 * which enables subsequent packets to avoid redoing 3248 * ip_attr_connect 3249 */ 3250 need_connect = B_TRUE; 3251 } 3252 mutex_exit(&connp->conn_lock); 3253 3254 /* 3255 * In case we got a safe copy of conn_ixa, or otherwise we don't 3256 * have a current ixa_ire, then we need to fill in any pointers in 3257 * the ixa. 3258 */ 3259 if (need_connect) { 3260 IN6_IPADDR_TO_V4MAPPED(outer4->ipha_dst, &v6dst); 3261 3262 /* We handle IPsec in iptun_output_common */ 3263 error = ip_attr_connect(connp, ixa, &connp->conn_saddr_v6, 3264 &v6dst, &v6dst, 0, NULL, NULL, 0); 3265 if (error != 0) { 3266 if (ixa->ixa_ire != NULL && 3267 (error == EHOSTUNREACH || error == ENETUNREACH)) { 3268 /* 3269 * Let conn_ip_output/ire_send_noroute return 3270 * the error and send any local ICMP error. 3271 */ 3272 error = 0; 3273 } else { 3274 ixa_refrele(ixa); 3275 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3276 return; 3277 } 3278 } 3279 } 3280 3281 iptun_output_common(iptun, ixa, mp); 3282 3283 /* Atomically replace conn_ixa and conn_v4lastdst */ 3284 mutex_enter(&connp->conn_lock); 3285 if (connp->conn_v4lastdst != outer4->ipha_dst) { 3286 /* Remember the dst which corresponds to conn_ixa */ 3287 connp->conn_v6lastdst = v6dst; 3288 oldixa = conn_replace_ixa(connp, ixa); 3289 } else { 3290 oldixa = NULL; 3291 } 3292 mutex_exit(&connp->conn_lock); 3293 ixa_refrele(ixa); 3294 if (oldixa != NULL) 3295 ixa_refrele(oldixa); 3296 } 3297 3298 /* 3299 * Check the destination/label. Modifies *mpp by adding/removing CIPSO. 3300 * 3301 * We get the label from the message in order to honor the 3302 * ULPs/IPs choice of label. This will be NULL for forwarded 3303 * packets, neighbor discovery packets and some others. 3304 */ 3305 static int 3306 iptun_output_check_label(mblk_t **mpp, ip_xmit_attr_t *ixa) 3307 { 3308 cred_t *cr; 3309 int adjust; 3310 int iplen; 3311 int err; 3312 ts_label_t *effective_tsl = NULL; 3313 3314 3315 ASSERT(is_system_labeled()); 3316 3317 cr = msg_getcred(*mpp, NULL); 3318 if (cr == NULL) 3319 return (0); 3320 3321 /* 3322 * We need to start with a label based on the IP/ULP above us 3323 */ 3324 ip_xmit_attr_restore_tsl(ixa, cr); 3325 3326 /* 3327 * Need to update packet with any CIPSO option since 3328 * conn_ip_output doesn't do that. 3329 */ 3330 if (ixa->ixa_flags & IXAF_IS_IPV4) { 3331 ipha_t *ipha; 3332 3333 ipha = (ipha_t *)(*mpp)->b_rptr; 3334 iplen = ntohs(ipha->ipha_length); 3335 err = tsol_check_label_v4(ixa->ixa_tsl, 3336 ixa->ixa_zoneid, mpp, CONN_MAC_DEFAULT, B_FALSE, 3337 ixa->ixa_ipst, &effective_tsl); 3338 if (err != 0) 3339 return (err); 3340 3341 ipha = (ipha_t *)(*mpp)->b_rptr; 3342 adjust = (int)ntohs(ipha->ipha_length) - iplen; 3343 } else { 3344 ip6_t *ip6h; 3345 3346 ip6h = (ip6_t *)(*mpp)->b_rptr; 3347 iplen = ntohs(ip6h->ip6_plen); 3348 3349 err = tsol_check_label_v6(ixa->ixa_tsl, 3350 ixa->ixa_zoneid, mpp, CONN_MAC_DEFAULT, B_FALSE, 3351 ixa->ixa_ipst, &effective_tsl); 3352 if (err != 0) 3353 return (err); 3354 3355 ip6h = (ip6_t *)(*mpp)->b_rptr; 3356 adjust = (int)ntohs(ip6h->ip6_plen) - iplen; 3357 } 3358 3359 if (effective_tsl != NULL) { 3360 /* Update the label */ 3361 ip_xmit_attr_replace_tsl(ixa, effective_tsl); 3362 } 3363 ixa->ixa_pktlen += adjust; 3364 ixa->ixa_ip_hdr_length += adjust; 3365 return (0); 3366 } 3367 3368 3369 static void 3370 iptun_output_common(iptun_t *iptun, ip_xmit_attr_t *ixa, mblk_t *mp) 3371 { 3372 ipsec_tun_pol_t *itp = iptun->iptun_itp; 3373 int outer_hlen; 3374 mblk_t *newmp; 3375 ipha_t *outer4, *inner4; 3376 ip6_t *outer6, *inner6; 3377 int error; 3378 boolean_t update_pktlen; 3379 3380 ASSERT(ixa->ixa_ire != NULL); 3381 3382 outer_hlen = iptun_find_headers(mp, 0, &outer4, &inner4, &outer6, 3383 &inner6); 3384 if (outer_hlen == 0) { 3385 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3386 return; 3387 } 3388 3389 /* Save IXAF_DONTFRAG value */ 3390 iaflags_t dontfrag = ixa->ixa_flags & IXAF_DONTFRAG; 3391 3392 /* Perform header processing. */ 3393 if (outer4 != NULL) { 3394 mp = iptun_out_process_ipv4(iptun, mp, outer4, inner4, inner6, 3395 ixa); 3396 } else { 3397 mp = iptun_out_process_ipv6(iptun, mp, outer6, inner4, inner6, 3398 ixa); 3399 } 3400 if (mp == NULL) 3401 return; 3402 3403 /* 3404 * Let's hope the compiler optimizes this with "branch taken". 3405 */ 3406 if (itp != NULL && (itp->itp_flags & ITPF_P_ACTIVE)) { 3407 /* This updates the ip_xmit_attr_t */ 3408 mp = ipsec_tun_outbound(mp, iptun, inner4, inner6, outer4, 3409 outer6, outer_hlen, ixa); 3410 if (mp == NULL) { 3411 atomic_inc_64(&iptun->iptun_oerrors); 3412 return; 3413 } 3414 if (is_system_labeled()) { 3415 /* 3416 * Might change the packet by adding/removing CIPSO. 3417 * After this caller inner* and outer* and outer_hlen 3418 * might be invalid. 3419 */ 3420 error = iptun_output_check_label(&mp, ixa); 3421 if (error != 0) { 3422 ip2dbg(("label check failed (%d)\n", error)); 3423 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3424 return; 3425 } 3426 } 3427 3428 /* 3429 * ipsec_tun_outbound() returns a chain of tunneled IP 3430 * fragments linked with b_next (or a single message if the 3431 * tunneled packet wasn't a fragment). 3432 * If fragcache returned a list then we need to update 3433 * ixa_pktlen for all packets in the list. 3434 */ 3435 update_pktlen = (mp->b_next != NULL); 3436 3437 /* 3438 * Otherwise, we're good to go. The ixa has been updated with 3439 * instructions for outbound IPsec processing. 3440 */ 3441 for (newmp = mp; newmp != NULL; newmp = mp) { 3442 size_t minmtu = iptun->iptun_typeinfo->iti_minmtu; 3443 3444 atomic_inc_64(&iptun->iptun_opackets); 3445 atomic_add_64(&iptun->iptun_obytes, ixa->ixa_pktlen); 3446 mp = mp->b_next; 3447 newmp->b_next = NULL; 3448 3449 /* 3450 * The IXAF_DONTFRAG flag is global, but there is 3451 * a chain here. Check if we're really already 3452 * smaller than the minimum allowed MTU and reset here 3453 * appropriately. Otherwise one small packet can kill 3454 * the whole chain's path mtu discovery. 3455 * In addition, update the pktlen to the length of 3456 * the actual packet being processed. 3457 */ 3458 if (update_pktlen) { 3459 ixa->ixa_pktlen = msgdsize(newmp); 3460 if (ixa->ixa_pktlen <= minmtu) 3461 ixa->ixa_flags &= ~IXAF_DONTFRAG; 3462 } 3463 3464 atomic_inc_64(&iptun->iptun_opackets); 3465 atomic_add_64(&iptun->iptun_obytes, ixa->ixa_pktlen); 3466 3467 error = conn_ip_output(newmp, ixa); 3468 3469 /* Restore IXAF_DONTFRAG value */ 3470 ixa->ixa_flags |= dontfrag; 3471 3472 if (error == EMSGSIZE) { 3473 /* IPsec policy might have changed */ 3474 (void) iptun_update_mtu(iptun, ixa, 0); 3475 } 3476 } 3477 } else { 3478 /* 3479 * The ip module will potentially apply global policy to the 3480 * packet in its output path if there's no active tunnel 3481 * policy. 3482 */ 3483 ASSERT(ixa->ixa_ipsec_policy == NULL); 3484 mp = ip_output_attach_policy(mp, outer4, outer6, NULL, ixa); 3485 if (mp == NULL) { 3486 atomic_inc_64(&iptun->iptun_oerrors); 3487 return; 3488 } 3489 if (is_system_labeled()) { 3490 /* 3491 * Might change the packet by adding/removing CIPSO. 3492 * After this caller inner* and outer* and outer_hlen 3493 * might be invalid. 3494 */ 3495 error = iptun_output_check_label(&mp, ixa); 3496 if (error != 0) { 3497 ip2dbg(("label check failed (%d)\n", error)); 3498 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3499 return; 3500 } 3501 } 3502 3503 atomic_inc_64(&iptun->iptun_opackets); 3504 atomic_add_64(&iptun->iptun_obytes, ixa->ixa_pktlen); 3505 3506 error = conn_ip_output(mp, ixa); 3507 if (error == EMSGSIZE) { 3508 /* IPsec policy might have changed */ 3509 (void) iptun_update_mtu(iptun, ixa, 0); 3510 } 3511 } 3512 if (ixa->ixa_flags & IXAF_IPSEC_SECURE) 3513 ipsec_out_release_refs(ixa); 3514 } 3515 3516 static mac_callbacks_t iptun_m_callbacks = { 3517 .mc_callbacks = (MC_SETPROP | MC_GETPROP), 3518 .mc_getstat = iptun_m_getstat, 3519 .mc_start = iptun_m_start, 3520 .mc_stop = iptun_m_stop, 3521 .mc_setpromisc = iptun_m_setpromisc, 3522 .mc_multicst = iptun_m_multicst, 3523 .mc_unicst = iptun_m_unicst, 3524 .mc_tx = iptun_m_tx, 3525 .mc_setprop = iptun_m_setprop, 3526 .mc_getprop = iptun_m_getprop 3527 }; 3528