1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2016, Joyent, Inc. All rights reserved. 24 */ 25 26 /* 27 * iptun - IP Tunneling Driver 28 * 29 * This module is a GLDv3 driver that implements virtual datalinks over IP 30 * (a.k.a, IP tunneling). The datalinks are managed through a dld ioctl 31 * interface (see iptun_ctl.c), and registered with GLDv3 using 32 * mac_register(). It implements the logic for various forms of IP (IPv4 or 33 * IPv6) encapsulation within IP (IPv4 or IPv6) by interacting with the ip 34 * module below it. Each virtual IP tunnel datalink has a conn_t associated 35 * with it representing the "outer" IP connection. 36 * 37 * The module implements the following locking semantics: 38 * 39 * Lookups and deletions in iptun_hash are synchronized using iptun_hash_lock. 40 * See comments above iptun_hash_lock for details. 41 * 42 * No locks are ever held while calling up to GLDv3. The general architecture 43 * of GLDv3 requires this, as the mac perimeter (essentially a lock) for a 44 * given link will be held while making downcalls (iptun_m_*() callbacks). 45 * Because we need to hold locks while handling downcalls, holding these locks 46 * while issuing upcalls results in deadlock scenarios. See the block comment 47 * above iptun_task_cb() for details on how we safely issue upcalls without 48 * holding any locks. 49 * 50 * The contents of each iptun_t is protected by an iptun_mutex which is held 51 * in iptun_enter() (called by iptun_enter_by_linkid()), and exited in 52 * iptun_exit(). 53 * 54 * See comments in iptun_delete() and iptun_free() for details on how the 55 * iptun_t is deleted safely. 56 */ 57 58 #include <sys/types.h> 59 #include <sys/kmem.h> 60 #include <sys/errno.h> 61 #include <sys/modhash.h> 62 #include <sys/list.h> 63 #include <sys/strsun.h> 64 #include <sys/file.h> 65 #include <sys/systm.h> 66 #include <sys/tihdr.h> 67 #include <sys/param.h> 68 #include <sys/mac_provider.h> 69 #include <sys/mac_ipv4.h> 70 #include <sys/mac_ipv6.h> 71 #include <sys/mac_6to4.h> 72 #include <sys/tsol/tnet.h> 73 #include <sys/sunldi.h> 74 #include <netinet/in.h> 75 #include <netinet/ip6.h> 76 #include <inet/ip.h> 77 #include <inet/ip_ire.h> 78 #include <inet/ipsec_impl.h> 79 #include <sys/tsol/label.h> 80 #include <sys/tsol/tnet.h> 81 #include <inet/iptun.h> 82 #include "iptun_impl.h" 83 84 /* Do the tunnel type and address family match? */ 85 #define IPTUN_ADDR_MATCH(iptun_type, family) \ 86 ((iptun_type == IPTUN_TYPE_IPV4 && family == AF_INET) || \ 87 (iptun_type == IPTUN_TYPE_IPV6 && family == AF_INET6) || \ 88 (iptun_type == IPTUN_TYPE_6TO4 && family == AF_INET)) 89 90 #define IPTUN_HASH_KEY(key) ((mod_hash_key_t)(uintptr_t)(key)) 91 92 #define IPTUN_MIN_IPV4_MTU 576 /* ip.h still uses 68 (!) */ 93 #define IPTUN_MIN_IPV6_MTU IPV6_MIN_MTU 94 #define IPTUN_MAX_IPV4_MTU (IP_MAXPACKET - sizeof (ipha_t)) 95 #define IPTUN_MAX_IPV6_MTU (IP_MAXPACKET - sizeof (ip6_t) - \ 96 sizeof (iptun_encaplim_t)) 97 98 #define IPTUN_MIN_HOPLIMIT 1 99 #define IPTUN_MAX_HOPLIMIT UINT8_MAX 100 101 #define IPTUN_MIN_ENCAPLIMIT 0 102 #define IPTUN_MAX_ENCAPLIMIT UINT8_MAX 103 104 #define IPTUN_IPSEC_REQ_MASK (IPSEC_PREF_REQUIRED | IPSEC_PREF_NEVER) 105 106 static iptun_encaplim_t iptun_encaplim_init = { 107 { IPPROTO_NONE, 0 }, 108 IP6OPT_TUNNEL_LIMIT, 109 1, 110 IPTUN_DEFAULT_ENCAPLIMIT, /* filled in with actual value later */ 111 IP6OPT_PADN, 112 1, 113 0 114 }; 115 116 /* 117 * Table containing per-iptun-type information. 118 * Since IPv6 can run over all of these we have the IPv6 min as the min MTU. 119 */ 120 static iptun_typeinfo_t iptun_type_table[] = { 121 { IPTUN_TYPE_IPV4, MAC_PLUGIN_IDENT_IPV4, IPV4_VERSION, 122 IPTUN_MIN_IPV6_MTU, IPTUN_MAX_IPV4_MTU, B_TRUE }, 123 { IPTUN_TYPE_IPV6, MAC_PLUGIN_IDENT_IPV6, IPV6_VERSION, 124 IPTUN_MIN_IPV6_MTU, IPTUN_MAX_IPV6_MTU, B_TRUE }, 125 { IPTUN_TYPE_6TO4, MAC_PLUGIN_IDENT_6TO4, IPV4_VERSION, 126 IPTUN_MIN_IPV6_MTU, IPTUN_MAX_IPV4_MTU, B_FALSE }, 127 { IPTUN_TYPE_UNKNOWN, NULL, 0, 0, 0, B_FALSE } 128 }; 129 130 /* 131 * iptun_hash is an iptun_t lookup table by link ID protected by 132 * iptun_hash_lock. While the hash table's integrity is maintained via 133 * internal locking in the mod_hash_*() functions, we need additional locking 134 * so that an iptun_t cannot be deleted after a hash lookup has returned an 135 * iptun_t and before iptun_lock has been entered. As such, we use 136 * iptun_hash_lock when doing lookups and removals from iptun_hash. 137 */ 138 mod_hash_t *iptun_hash; 139 static kmutex_t iptun_hash_lock; 140 141 static uint_t iptun_tunnelcount; /* total for all stacks */ 142 kmem_cache_t *iptun_cache; 143 ddi_taskq_t *iptun_taskq; 144 145 typedef enum { 146 IPTUN_TASK_MTU_UPDATE, /* tell mac about new tunnel link MTU */ 147 IPTUN_TASK_LADDR_UPDATE, /* tell mac about new local address */ 148 IPTUN_TASK_RADDR_UPDATE, /* tell mac about new remote address */ 149 IPTUN_TASK_LINK_UPDATE, /* tell mac about new link state */ 150 IPTUN_TASK_PDATA_UPDATE /* tell mac about updated plugin data */ 151 } iptun_task_t; 152 153 typedef struct iptun_task_data_s { 154 iptun_task_t itd_task; 155 datalink_id_t itd_linkid; 156 } iptun_task_data_t; 157 158 static void iptun_task_dispatch(iptun_t *, iptun_task_t); 159 static int iptun_enter(iptun_t *); 160 static void iptun_exit(iptun_t *); 161 static void iptun_headergen(iptun_t *, boolean_t); 162 static void iptun_drop_pkt(mblk_t *, uint64_t *); 163 static void iptun_input(void *, mblk_t *, void *, ip_recv_attr_t *); 164 static void iptun_input_icmp(void *, mblk_t *, void *, ip_recv_attr_t *); 165 static void iptun_output(iptun_t *, mblk_t *); 166 static uint32_t iptun_get_maxmtu(iptun_t *, ip_xmit_attr_t *, uint32_t); 167 static uint32_t iptun_update_mtu(iptun_t *, ip_xmit_attr_t *, uint32_t); 168 static uint32_t iptun_get_dst_pmtu(iptun_t *, ip_xmit_attr_t *); 169 static void iptun_update_dst_pmtu(iptun_t *, ip_xmit_attr_t *); 170 static int iptun_setladdr(iptun_t *, const struct sockaddr_storage *); 171 172 static void iptun_output_6to4(iptun_t *, mblk_t *); 173 static void iptun_output_common(iptun_t *, ip_xmit_attr_t *, mblk_t *); 174 static boolean_t iptun_verifyicmp(conn_t *, void *, icmph_t *, icmp6_t *, 175 ip_recv_attr_t *); 176 177 static void iptun_notify(void *, ip_xmit_attr_t *, ixa_notify_type_t, 178 ixa_notify_arg_t); 179 180 static mac_callbacks_t iptun_m_callbacks; 181 182 static int 183 iptun_m_getstat(void *arg, uint_t stat, uint64_t *val) 184 { 185 iptun_t *iptun = arg; 186 int err = 0; 187 188 switch (stat) { 189 case MAC_STAT_IERRORS: 190 *val = iptun->iptun_ierrors; 191 break; 192 case MAC_STAT_OERRORS: 193 *val = iptun->iptun_oerrors; 194 break; 195 case MAC_STAT_RBYTES: 196 *val = iptun->iptun_rbytes; 197 break; 198 case MAC_STAT_IPACKETS: 199 *val = iptun->iptun_ipackets; 200 break; 201 case MAC_STAT_OBYTES: 202 *val = iptun->iptun_obytes; 203 break; 204 case MAC_STAT_OPACKETS: 205 *val = iptun->iptun_opackets; 206 break; 207 case MAC_STAT_NORCVBUF: 208 *val = iptun->iptun_norcvbuf; 209 break; 210 case MAC_STAT_NOXMTBUF: 211 *val = iptun->iptun_noxmtbuf; 212 break; 213 default: 214 err = ENOTSUP; 215 } 216 217 return (err); 218 } 219 220 static int 221 iptun_m_start(void *arg) 222 { 223 iptun_t *iptun = arg; 224 int err; 225 226 if ((err = iptun_enter(iptun)) == 0) { 227 iptun->iptun_flags |= IPTUN_MAC_STARTED; 228 iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE); 229 iptun_exit(iptun); 230 } 231 return (err); 232 } 233 234 static void 235 iptun_m_stop(void *arg) 236 { 237 iptun_t *iptun = arg; 238 239 if (iptun_enter(iptun) == 0) { 240 iptun->iptun_flags &= ~IPTUN_MAC_STARTED; 241 iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE); 242 iptun_exit(iptun); 243 } 244 } 245 246 /* 247 * iptun_m_setpromisc() does nothing and always succeeds. This is because a 248 * tunnel data-link only ever receives packets that are destined exclusively 249 * for the local address of the tunnel. 250 */ 251 /* ARGSUSED */ 252 static int 253 iptun_m_setpromisc(void *arg, boolean_t on) 254 { 255 return (0); 256 } 257 258 /* ARGSUSED */ 259 static int 260 iptun_m_multicst(void *arg, boolean_t add, const uint8_t *addrp) 261 { 262 return (ENOTSUP); 263 } 264 265 /* 266 * iptun_m_unicst() sets the local address. 267 */ 268 /* ARGSUSED */ 269 static int 270 iptun_m_unicst(void *arg, const uint8_t *addrp) 271 { 272 iptun_t *iptun = arg; 273 int err; 274 struct sockaddr_storage ss; 275 struct sockaddr_in *sin; 276 struct sockaddr_in6 *sin6; 277 278 if ((err = iptun_enter(iptun)) == 0) { 279 switch (iptun->iptun_typeinfo->iti_ipvers) { 280 case IPV4_VERSION: 281 sin = (struct sockaddr_in *)&ss; 282 sin->sin_family = AF_INET; 283 bcopy(addrp, &sin->sin_addr, sizeof (in_addr_t)); 284 break; 285 case IPV6_VERSION: 286 sin6 = (struct sockaddr_in6 *)&ss; 287 sin6->sin6_family = AF_INET6; 288 bcopy(addrp, &sin6->sin6_addr, sizeof (in6_addr_t)); 289 break; 290 default: 291 ASSERT(0); 292 } 293 err = iptun_setladdr(iptun, &ss); 294 iptun_exit(iptun); 295 } 296 return (err); 297 } 298 299 static mblk_t * 300 iptun_m_tx(void *arg, mblk_t *mpchain) 301 { 302 mblk_t *mp, *nmp; 303 iptun_t *iptun = arg; 304 305 if (!IS_IPTUN_RUNNING(iptun)) { 306 iptun_drop_pkt(mpchain, &iptun->iptun_noxmtbuf); 307 return (NULL); 308 } 309 310 for (mp = mpchain; mp != NULL; mp = nmp) { 311 nmp = mp->b_next; 312 mp->b_next = NULL; 313 iptun_output(iptun, mp); 314 } 315 316 return (NULL); 317 } 318 319 /* ARGSUSED */ 320 static int 321 iptun_m_setprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, 322 uint_t pr_valsize, const void *pr_val) 323 { 324 iptun_t *iptun = barg; 325 uint32_t value = *(uint32_t *)pr_val; 326 int err; 327 328 /* 329 * We need to enter this iptun_t since we'll be modifying the outer 330 * header. 331 */ 332 if ((err = iptun_enter(iptun)) != 0) 333 return (err); 334 335 switch (pr_num) { 336 case MAC_PROP_IPTUN_HOPLIMIT: 337 if (value < IPTUN_MIN_HOPLIMIT || value > IPTUN_MAX_HOPLIMIT) { 338 err = EINVAL; 339 break; 340 } 341 if (value != iptun->iptun_hoplimit) { 342 iptun->iptun_hoplimit = (uint8_t)value; 343 iptun_headergen(iptun, B_TRUE); 344 } 345 break; 346 case MAC_PROP_IPTUN_ENCAPLIMIT: 347 if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_IPV6 || 348 value > IPTUN_MAX_ENCAPLIMIT) { 349 err = EINVAL; 350 break; 351 } 352 if (value != iptun->iptun_encaplimit) { 353 iptun->iptun_encaplimit = (uint8_t)value; 354 iptun_headergen(iptun, B_TRUE); 355 } 356 break; 357 case MAC_PROP_MTU: { 358 uint32_t maxmtu = iptun_get_maxmtu(iptun, NULL, 0); 359 360 if (value < iptun->iptun_typeinfo->iti_minmtu || 361 value > maxmtu) { 362 err = EINVAL; 363 break; 364 } 365 iptun->iptun_flags |= IPTUN_FIXED_MTU; 366 if (value != iptun->iptun_mtu) { 367 iptun->iptun_mtu = value; 368 iptun_task_dispatch(iptun, IPTUN_TASK_MTU_UPDATE); 369 } 370 break; 371 } 372 default: 373 err = EINVAL; 374 } 375 iptun_exit(iptun); 376 return (err); 377 } 378 379 /* ARGSUSED */ 380 static int 381 iptun_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, 382 uint_t pr_valsize, void *pr_val) 383 { 384 iptun_t *iptun = barg; 385 int err; 386 387 if ((err = iptun_enter(iptun)) != 0) 388 return (err); 389 390 switch (pr_num) { 391 case MAC_PROP_IPTUN_HOPLIMIT: 392 ASSERT(pr_valsize >= sizeof (uint32_t)); 393 *(uint32_t *)pr_val = iptun->iptun_hoplimit; 394 break; 395 396 case MAC_PROP_IPTUN_ENCAPLIMIT: 397 *(uint32_t *)pr_val = iptun->iptun_encaplimit; 398 break; 399 default: 400 err = ENOTSUP; 401 } 402 done: 403 iptun_exit(iptun); 404 return (err); 405 } 406 407 /* ARGSUSED */ 408 static void 409 iptun_m_propinfo(void *barg, const char *pr_name, mac_prop_id_t pr_num, 410 mac_prop_info_handle_t prh) 411 { 412 iptun_t *iptun = barg; 413 414 switch (pr_num) { 415 case MAC_PROP_IPTUN_HOPLIMIT: 416 mac_prop_info_set_range_uint32(prh, 417 IPTUN_MIN_HOPLIMIT, IPTUN_MAX_HOPLIMIT); 418 mac_prop_info_set_default_uint32(prh, IPTUN_DEFAULT_HOPLIMIT); 419 break; 420 421 case MAC_PROP_IPTUN_ENCAPLIMIT: 422 if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_IPV6) 423 break; 424 mac_prop_info_set_range_uint32(prh, 425 IPTUN_MIN_ENCAPLIMIT, IPTUN_MAX_ENCAPLIMIT); 426 mac_prop_info_set_default_uint32(prh, IPTUN_DEFAULT_ENCAPLIMIT); 427 break; 428 case MAC_PROP_MTU: 429 mac_prop_info_set_range_uint32(prh, 430 iptun->iptun_typeinfo->iti_minmtu, 431 iptun_get_maxmtu(iptun, NULL, 0)); 432 break; 433 } 434 } 435 436 uint_t 437 iptun_count(void) 438 { 439 return (iptun_tunnelcount); 440 } 441 442 /* 443 * Enter an iptun_t exclusively. This is essentially just a mutex, but we 444 * don't allow iptun_enter() to succeed on a tunnel if it's in the process of 445 * being deleted. 446 */ 447 static int 448 iptun_enter(iptun_t *iptun) 449 { 450 mutex_enter(&iptun->iptun_lock); 451 while (iptun->iptun_flags & IPTUN_DELETE_PENDING) 452 cv_wait(&iptun->iptun_enter_cv, &iptun->iptun_lock); 453 if (iptun->iptun_flags & IPTUN_CONDEMNED) { 454 mutex_exit(&iptun->iptun_lock); 455 return (ENOENT); 456 } 457 return (0); 458 } 459 460 /* 461 * Exit the tunnel entered in iptun_enter(). 462 */ 463 static void 464 iptun_exit(iptun_t *iptun) 465 { 466 mutex_exit(&iptun->iptun_lock); 467 } 468 469 /* 470 * Enter the IP tunnel instance by datalink ID. 471 */ 472 static int 473 iptun_enter_by_linkid(datalink_id_t linkid, iptun_t **iptun) 474 { 475 int err; 476 477 mutex_enter(&iptun_hash_lock); 478 if (mod_hash_find(iptun_hash, IPTUN_HASH_KEY(linkid), 479 (mod_hash_val_t *)iptun) == 0) 480 err = iptun_enter(*iptun); 481 else 482 err = ENOENT; 483 if (err != 0) 484 *iptun = NULL; 485 mutex_exit(&iptun_hash_lock); 486 return (err); 487 } 488 489 /* 490 * Handle tasks that were deferred through the iptun_taskq because they require 491 * calling up to the mac module, and we can't call up to the mac module while 492 * holding locks. 493 * 494 * This is tricky to get right without introducing race conditions and 495 * deadlocks with the mac module, as we cannot issue an upcall while in the 496 * iptun_t. The reason is that upcalls may try and enter the mac perimeter, 497 * while iptun callbacks (such as iptun_m_setprop()) called from the mac 498 * module will already have the perimeter held, and will then try and enter 499 * the iptun_t. You can see the lock ordering problem with this; this will 500 * deadlock. 501 * 502 * The safe way to do this is to enter the iptun_t in question and copy the 503 * information we need out of it so that we can exit it and know that the 504 * information being passed up to the upcalls won't be subject to modification 505 * by other threads. The problem now is that we need to exit it prior to 506 * issuing the upcall, but once we do this, a thread could come along and 507 * delete the iptun_t and thus the mac handle required to issue the upcall. 508 * To prevent this, we set the IPTUN_UPCALL_PENDING flag prior to exiting the 509 * iptun_t. This flag is the condition associated with iptun_upcall_cv, which 510 * iptun_delete() will cv_wait() on. When the upcall completes, we clear 511 * IPTUN_UPCALL_PENDING and cv_signal() any potentially waiting 512 * iptun_delete(). We can thus still safely use iptun->iptun_mh after having 513 * exited the iptun_t. 514 */ 515 static void 516 iptun_task_cb(void *arg) 517 { 518 iptun_task_data_t *itd = arg; 519 iptun_task_t task = itd->itd_task; 520 datalink_id_t linkid = itd->itd_linkid; 521 iptun_t *iptun; 522 uint32_t mtu; 523 iptun_addr_t addr; 524 link_state_t linkstate; 525 size_t header_size; 526 iptun_header_t header; 527 528 kmem_free(itd, sizeof (*itd)); 529 530 /* 531 * Note that if the lookup fails, it's because the tunnel was deleted 532 * between the time the task was dispatched and now. That isn't an 533 * error. 534 */ 535 if (iptun_enter_by_linkid(linkid, &iptun) != 0) 536 return; 537 538 iptun->iptun_flags |= IPTUN_UPCALL_PENDING; 539 540 switch (task) { 541 case IPTUN_TASK_MTU_UPDATE: 542 mtu = iptun->iptun_mtu; 543 break; 544 case IPTUN_TASK_LADDR_UPDATE: 545 addr = iptun->iptun_laddr; 546 break; 547 case IPTUN_TASK_RADDR_UPDATE: 548 addr = iptun->iptun_raddr; 549 break; 550 case IPTUN_TASK_LINK_UPDATE: 551 linkstate = IS_IPTUN_RUNNING(iptun) ? 552 LINK_STATE_UP : LINK_STATE_DOWN; 553 break; 554 case IPTUN_TASK_PDATA_UPDATE: 555 header_size = iptun->iptun_header_size; 556 header = iptun->iptun_header; 557 break; 558 default: 559 ASSERT(0); 560 } 561 562 iptun_exit(iptun); 563 564 switch (task) { 565 case IPTUN_TASK_MTU_UPDATE: 566 (void) mac_maxsdu_update(iptun->iptun_mh, mtu); 567 break; 568 case IPTUN_TASK_LADDR_UPDATE: 569 mac_unicst_update(iptun->iptun_mh, (uint8_t *)&addr.ia_addr); 570 break; 571 case IPTUN_TASK_RADDR_UPDATE: 572 mac_dst_update(iptun->iptun_mh, (uint8_t *)&addr.ia_addr); 573 break; 574 case IPTUN_TASK_LINK_UPDATE: 575 mac_link_update(iptun->iptun_mh, linkstate); 576 break; 577 case IPTUN_TASK_PDATA_UPDATE: 578 if (mac_pdata_update(iptun->iptun_mh, 579 header_size == 0 ? NULL : &header, header_size) != 0) 580 atomic_inc_64(&iptun->iptun_taskq_fail); 581 break; 582 } 583 584 mutex_enter(&iptun->iptun_lock); 585 iptun->iptun_flags &= ~IPTUN_UPCALL_PENDING; 586 cv_signal(&iptun->iptun_upcall_cv); 587 mutex_exit(&iptun->iptun_lock); 588 } 589 590 static void 591 iptun_task_dispatch(iptun_t *iptun, iptun_task_t iptun_task) 592 { 593 iptun_task_data_t *itd; 594 595 itd = kmem_alloc(sizeof (*itd), KM_NOSLEEP); 596 if (itd == NULL) { 597 atomic_inc_64(&iptun->iptun_taskq_fail); 598 return; 599 } 600 itd->itd_task = iptun_task; 601 itd->itd_linkid = iptun->iptun_linkid; 602 if (ddi_taskq_dispatch(iptun_taskq, iptun_task_cb, itd, DDI_NOSLEEP)) { 603 atomic_inc_64(&iptun->iptun_taskq_fail); 604 kmem_free(itd, sizeof (*itd)); 605 } 606 } 607 608 /* 609 * Convert an iptun_addr_t to sockaddr_storage. 610 */ 611 static void 612 iptun_getaddr(iptun_addr_t *iptun_addr, struct sockaddr_storage *ss) 613 { 614 struct sockaddr_in *sin; 615 struct sockaddr_in6 *sin6; 616 617 bzero(ss, sizeof (*ss)); 618 switch (iptun_addr->ia_family) { 619 case AF_INET: 620 sin = (struct sockaddr_in *)ss; 621 sin->sin_addr.s_addr = iptun_addr->ia_addr.iau_addr4; 622 break; 623 case AF_INET6: 624 sin6 = (struct sockaddr_in6 *)ss; 625 sin6->sin6_addr = iptun_addr->ia_addr.iau_addr6; 626 break; 627 default: 628 ASSERT(0); 629 } 630 ss->ss_family = iptun_addr->ia_family; 631 } 632 633 /* 634 * General purpose function to set an IP tunnel source or destination address. 635 */ 636 static int 637 iptun_setaddr(iptun_type_t iptun_type, iptun_addr_t *iptun_addr, 638 const struct sockaddr_storage *ss) 639 { 640 if (!IPTUN_ADDR_MATCH(iptun_type, ss->ss_family)) 641 return (EINVAL); 642 643 switch (ss->ss_family) { 644 case AF_INET: { 645 struct sockaddr_in *sin = (struct sockaddr_in *)ss; 646 647 if ((sin->sin_addr.s_addr == INADDR_ANY) || 648 (sin->sin_addr.s_addr == INADDR_BROADCAST) || 649 CLASSD(sin->sin_addr.s_addr)) { 650 return (EADDRNOTAVAIL); 651 } 652 iptun_addr->ia_addr.iau_addr4 = sin->sin_addr.s_addr; 653 break; 654 } 655 case AF_INET6: { 656 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)ss; 657 658 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) || 659 IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) || 660 IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 661 return (EADDRNOTAVAIL); 662 } 663 iptun_addr->ia_addr.iau_addr6 = sin6->sin6_addr; 664 break; 665 } 666 default: 667 return (EAFNOSUPPORT); 668 } 669 iptun_addr->ia_family = ss->ss_family; 670 return (0); 671 } 672 673 static int 674 iptun_setladdr(iptun_t *iptun, const struct sockaddr_storage *laddr) 675 { 676 return (iptun_setaddr(iptun->iptun_typeinfo->iti_type, 677 &iptun->iptun_laddr, laddr)); 678 } 679 680 static int 681 iptun_setraddr(iptun_t *iptun, const struct sockaddr_storage *raddr) 682 { 683 if (!(iptun->iptun_typeinfo->iti_hasraddr)) 684 return (EINVAL); 685 return (iptun_setaddr(iptun->iptun_typeinfo->iti_type, 686 &iptun->iptun_raddr, raddr)); 687 } 688 689 static boolean_t 690 iptun_canbind(iptun_t *iptun) 691 { 692 /* 693 * A tunnel may bind when its source address has been set, and if its 694 * tunnel type requires one, also its destination address. 695 */ 696 return ((iptun->iptun_flags & IPTUN_LADDR) && 697 ((iptun->iptun_flags & IPTUN_RADDR) || 698 !(iptun->iptun_typeinfo->iti_hasraddr))); 699 } 700 701 /* 702 * Verify that the local address is valid, and insert in the fanout 703 */ 704 static int 705 iptun_bind(iptun_t *iptun) 706 { 707 conn_t *connp = iptun->iptun_connp; 708 int error = 0; 709 ip_xmit_attr_t *ixa; 710 ip_xmit_attr_t *oldixa; 711 iulp_t uinfo; 712 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 713 714 /* 715 * Get an exclusive ixa for this thread. 716 * We defer updating conn_ixa until later to handle any concurrent 717 * conn_ixa_cleanup thread. 718 */ 719 ixa = conn_get_ixa(connp, B_FALSE); 720 if (ixa == NULL) 721 return (ENOMEM); 722 723 /* We create PMTU state including for 6to4 */ 724 ixa->ixa_flags |= IXAF_PMTU_DISCOVERY; 725 726 ASSERT(iptun_canbind(iptun)); 727 728 mutex_enter(&connp->conn_lock); 729 /* 730 * Note that conn_proto can't be set since the upper protocol 731 * can be both 41 and 4 when IPv6 and IPv4 are over the same tunnel. 732 * ipcl_iptun_classify doesn't use conn_proto. 733 */ 734 connp->conn_ipversion = iptun->iptun_typeinfo->iti_ipvers; 735 736 switch (iptun->iptun_typeinfo->iti_type) { 737 case IPTUN_TYPE_IPV4: 738 IN6_IPADDR_TO_V4MAPPED(iptun->iptun_laddr4, 739 &connp->conn_laddr_v6); 740 IN6_IPADDR_TO_V4MAPPED(iptun->iptun_raddr4, 741 &connp->conn_faddr_v6); 742 ixa->ixa_flags |= IXAF_IS_IPV4; 743 if (ip_laddr_verify_v4(iptun->iptun_laddr4, IPCL_ZONEID(connp), 744 ipst, B_FALSE) != IPVL_UNICAST_UP) { 745 mutex_exit(&connp->conn_lock); 746 error = EADDRNOTAVAIL; 747 goto done; 748 } 749 break; 750 case IPTUN_TYPE_IPV6: 751 connp->conn_laddr_v6 = iptun->iptun_laddr6; 752 connp->conn_faddr_v6 = iptun->iptun_raddr6; 753 ixa->ixa_flags &= ~IXAF_IS_IPV4; 754 /* We use a zero scopeid for now */ 755 if (ip_laddr_verify_v6(&iptun->iptun_laddr6, IPCL_ZONEID(connp), 756 ipst, B_FALSE, 0) != IPVL_UNICAST_UP) { 757 mutex_exit(&connp->conn_lock); 758 error = EADDRNOTAVAIL; 759 goto done; 760 } 761 break; 762 case IPTUN_TYPE_6TO4: 763 IN6_IPADDR_TO_V4MAPPED(iptun->iptun_laddr4, 764 &connp->conn_laddr_v6); 765 IN6_IPADDR_TO_V4MAPPED(INADDR_ANY, &connp->conn_faddr_v6); 766 ixa->ixa_flags |= IXAF_IS_IPV4; 767 mutex_exit(&connp->conn_lock); 768 769 switch (ip_laddr_verify_v4(iptun->iptun_laddr4, 770 IPCL_ZONEID(connp), ipst, B_FALSE)) { 771 case IPVL_UNICAST_UP: 772 case IPVL_UNICAST_DOWN: 773 break; 774 default: 775 error = EADDRNOTAVAIL; 776 goto done; 777 } 778 goto insert; 779 } 780 781 /* In case previous destination was multirt */ 782 ip_attr_newdst(ixa); 783 784 /* 785 * When we set a tunnel's destination address, we do not 786 * care if the destination is reachable. Transient routing 787 * issues should not inhibit the creation of a tunnel 788 * interface, for example. Thus we pass B_FALSE here. 789 */ 790 connp->conn_saddr_v6 = connp->conn_laddr_v6; 791 mutex_exit(&connp->conn_lock); 792 793 /* As long as the MTU is large we avoid fragmentation */ 794 ixa->ixa_flags |= IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF; 795 796 /* We handle IPsec in iptun_output_common */ 797 error = ip_attr_connect(connp, ixa, &connp->conn_saddr_v6, 798 &connp->conn_faddr_v6, &connp->conn_faddr_v6, 0, 799 &connp->conn_saddr_v6, &uinfo, 0); 800 801 if (error != 0) 802 goto done; 803 804 /* saddr shouldn't change since it was already set */ 805 ASSERT(IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6, 806 &connp->conn_saddr_v6)); 807 808 /* We set IXAF_VERIFY_PMTU to catch PMTU increases */ 809 ixa->ixa_flags |= IXAF_VERIFY_PMTU; 810 ASSERT(uinfo.iulp_mtu != 0); 811 812 /* 813 * Allow setting new policies. 814 * The addresses/ports are already set, thus the IPsec policy calls 815 * can handle their passed-in conn's. 816 */ 817 connp->conn_policy_cached = B_FALSE; 818 819 insert: 820 error = ipcl_conn_insert(connp); 821 if (error != 0) 822 goto done; 823 824 /* Atomically update v6lastdst and conn_ixa */ 825 mutex_enter(&connp->conn_lock); 826 /* Record this as the "last" send even though we haven't sent any */ 827 connp->conn_v6lastdst = connp->conn_faddr_v6; 828 829 iptun->iptun_flags |= IPTUN_BOUND; 830 831 oldixa = conn_replace_ixa(connp, ixa); 832 /* Done with conn_t */ 833 mutex_exit(&connp->conn_lock); 834 ixa_refrele(oldixa); 835 836 /* 837 * Now that we're bound with ip below us, this is a good 838 * time to initialize the destination path MTU and to 839 * re-calculate the tunnel's link MTU. 840 */ 841 (void) iptun_update_mtu(iptun, ixa, 0); 842 843 if (IS_IPTUN_RUNNING(iptun)) 844 iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE); 845 846 done: 847 ixa_refrele(ixa); 848 return (error); 849 } 850 851 static void 852 iptun_unbind(iptun_t *iptun) 853 { 854 ASSERT(iptun->iptun_flags & IPTUN_BOUND); 855 ASSERT(mutex_owned(&iptun->iptun_lock) || 856 (iptun->iptun_flags & IPTUN_CONDEMNED)); 857 ip_unbind(iptun->iptun_connp); 858 iptun->iptun_flags &= ~IPTUN_BOUND; 859 if (!(iptun->iptun_flags & IPTUN_CONDEMNED)) 860 iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE); 861 } 862 863 /* 864 * Re-generate the template data-link header for a given IP tunnel given the 865 * tunnel's current parameters. 866 */ 867 static void 868 iptun_headergen(iptun_t *iptun, boolean_t update_mac) 869 { 870 switch (iptun->iptun_typeinfo->iti_ipvers) { 871 case IPV4_VERSION: 872 /* 873 * We only need to use a custom IP header if the administrator 874 * has supplied a non-default hoplimit. 875 */ 876 if (iptun->iptun_hoplimit == IPTUN_DEFAULT_HOPLIMIT) { 877 iptun->iptun_header_size = 0; 878 break; 879 } 880 iptun->iptun_header_size = sizeof (ipha_t); 881 iptun->iptun_header4.ipha_version_and_hdr_length = 882 IP_SIMPLE_HDR_VERSION; 883 iptun->iptun_header4.ipha_fragment_offset_and_flags = 884 htons(IPH_DF); 885 iptun->iptun_header4.ipha_ttl = iptun->iptun_hoplimit; 886 break; 887 case IPV6_VERSION: { 888 ip6_t *ip6hp = &iptun->iptun_header6.it6h_ip6h; 889 890 /* 891 * We only need to use a custom IPv6 header if either the 892 * administrator has supplied a non-default hoplimit, or we 893 * need to include an encapsulation limit option in the outer 894 * header. 895 */ 896 if (iptun->iptun_hoplimit == IPTUN_DEFAULT_HOPLIMIT && 897 iptun->iptun_encaplimit == 0) { 898 iptun->iptun_header_size = 0; 899 break; 900 } 901 902 (void) memset(ip6hp, 0, sizeof (*ip6hp)); 903 if (iptun->iptun_encaplimit == 0) { 904 iptun->iptun_header_size = sizeof (ip6_t); 905 ip6hp->ip6_nxt = IPPROTO_NONE; 906 } else { 907 iptun_encaplim_t *iel; 908 909 iptun->iptun_header_size = sizeof (iptun_ipv6hdrs_t); 910 /* 911 * The mac_ipv6 plugin requires ip6_plen to be in host 912 * byte order and reflect the extension headers 913 * present in the template. The actual network byte 914 * order ip6_plen will be set on a per-packet basis on 915 * transmit. 916 */ 917 ip6hp->ip6_plen = sizeof (*iel); 918 ip6hp->ip6_nxt = IPPROTO_DSTOPTS; 919 iel = &iptun->iptun_header6.it6h_encaplim; 920 *iel = iptun_encaplim_init; 921 iel->iel_telopt.ip6ot_encap_limit = 922 iptun->iptun_encaplimit; 923 } 924 925 ip6hp->ip6_hlim = iptun->iptun_hoplimit; 926 break; 927 } 928 } 929 930 if (update_mac) 931 iptun_task_dispatch(iptun, IPTUN_TASK_PDATA_UPDATE); 932 } 933 934 /* 935 * Insert inbound and outbound IPv4 and IPv6 policy into the given policy 936 * head. 937 */ 938 static boolean_t 939 iptun_insert_simple_policies(ipsec_policy_head_t *ph, ipsec_act_t *actp, 940 uint_t n, netstack_t *ns) 941 { 942 int f = IPSEC_AF_V4; 943 944 if (!ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_INBOUND, ns) || 945 !ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_OUTBOUND, ns)) 946 return (B_FALSE); 947 948 f = IPSEC_AF_V6; 949 return (ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_INBOUND, ns) && 950 ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_OUTBOUND, ns)); 951 } 952 953 /* 954 * Used to set IPsec policy when policy is set through the IPTUN_CREATE or 955 * IPTUN_MODIFY ioctls. 956 */ 957 static int 958 iptun_set_sec_simple(iptun_t *iptun, const ipsec_req_t *ipsr) 959 { 960 int rc = 0; 961 uint_t nact; 962 ipsec_act_t *actp = NULL; 963 boolean_t clear_all, old_policy = B_FALSE; 964 ipsec_tun_pol_t *itp; 965 char name[MAXLINKNAMELEN]; 966 uint64_t gen; 967 netstack_t *ns = iptun->iptun_ns; 968 969 /* Can't specify self-encap on a tunnel. */ 970 if (ipsr->ipsr_self_encap_req != 0) 971 return (EINVAL); 972 973 /* 974 * If it's a "clear-all" entry, unset the security flags and resume 975 * normal cleartext (or inherit-from-global) policy. 976 */ 977 clear_all = ((ipsr->ipsr_ah_req & IPTUN_IPSEC_REQ_MASK) == 0 && 978 (ipsr->ipsr_esp_req & IPTUN_IPSEC_REQ_MASK) == 0); 979 980 ASSERT(mutex_owned(&iptun->iptun_lock)); 981 itp = iptun->iptun_itp; 982 if (itp == NULL) { 983 if (clear_all) 984 goto bail; 985 if ((rc = dls_mgmt_get_linkinfo(iptun->iptun_linkid, name, NULL, 986 NULL, NULL)) != 0) 987 goto bail; 988 ASSERT(name[0] != '\0'); 989 if ((itp = create_tunnel_policy(name, &rc, &gen, ns)) == NULL) 990 goto bail; 991 iptun->iptun_itp = itp; 992 } 993 994 /* Allocate the actvec now, before holding itp or polhead locks. */ 995 ipsec_actvec_from_req(ipsr, &actp, &nact, ns); 996 if (actp == NULL) { 997 rc = ENOMEM; 998 goto bail; 999 } 1000 1001 /* 1002 * Just write on the active polhead. Save the primary/secondary stuff 1003 * for spdsock operations. 1004 * 1005 * Mutex because we need to write to the polhead AND flags atomically. 1006 * Other threads will acquire the polhead lock as a reader if the 1007 * (unprotected) flag is set. 1008 */ 1009 mutex_enter(&itp->itp_lock); 1010 if (itp->itp_flags & ITPF_P_TUNNEL) { 1011 /* Oops, we lost a race. Let's get out of here. */ 1012 rc = EBUSY; 1013 goto mutex_bail; 1014 } 1015 old_policy = ((itp->itp_flags & ITPF_P_ACTIVE) != 0); 1016 1017 if (old_policy) { 1018 ITPF_CLONE(itp->itp_flags); 1019 rc = ipsec_copy_polhead(itp->itp_policy, itp->itp_inactive, ns); 1020 if (rc != 0) { 1021 /* inactive has already been cleared. */ 1022 itp->itp_flags &= ~ITPF_IFLAGS; 1023 goto mutex_bail; 1024 } 1025 rw_enter(&itp->itp_policy->iph_lock, RW_WRITER); 1026 ipsec_polhead_flush(itp->itp_policy, ns); 1027 } else { 1028 /* Else assume itp->itp_policy is already flushed. */ 1029 rw_enter(&itp->itp_policy->iph_lock, RW_WRITER); 1030 } 1031 1032 if (clear_all) { 1033 ASSERT(avl_numnodes(&itp->itp_policy->iph_rulebyid) == 0); 1034 itp->itp_flags &= ~ITPF_PFLAGS; 1035 rw_exit(&itp->itp_policy->iph_lock); 1036 old_policy = B_FALSE; /* Clear out the inactive one too. */ 1037 goto recover_bail; 1038 } 1039 1040 if (iptun_insert_simple_policies(itp->itp_policy, actp, nact, ns)) { 1041 rw_exit(&itp->itp_policy->iph_lock); 1042 /* 1043 * Adjust MTU and make sure the DL side knows what's up. 1044 */ 1045 itp->itp_flags = ITPF_P_ACTIVE; 1046 (void) iptun_update_mtu(iptun, NULL, 0); 1047 old_policy = B_FALSE; /* Blank out inactive - we succeeded */ 1048 } else { 1049 rw_exit(&itp->itp_policy->iph_lock); 1050 rc = ENOMEM; 1051 } 1052 1053 recover_bail: 1054 if (old_policy) { 1055 /* Recover policy in in active polhead. */ 1056 ipsec_swap_policy(itp->itp_policy, itp->itp_inactive, ns); 1057 ITPF_SWAP(itp->itp_flags); 1058 } 1059 1060 /* Clear policy in inactive polhead. */ 1061 itp->itp_flags &= ~ITPF_IFLAGS; 1062 rw_enter(&itp->itp_inactive->iph_lock, RW_WRITER); 1063 ipsec_polhead_flush(itp->itp_inactive, ns); 1064 rw_exit(&itp->itp_inactive->iph_lock); 1065 1066 mutex_bail: 1067 mutex_exit(&itp->itp_lock); 1068 1069 bail: 1070 if (actp != NULL) 1071 ipsec_actvec_free(actp, nact); 1072 1073 return (rc); 1074 } 1075 1076 static iptun_typeinfo_t * 1077 iptun_gettypeinfo(iptun_type_t type) 1078 { 1079 int i; 1080 1081 for (i = 0; iptun_type_table[i].iti_type != IPTUN_TYPE_UNKNOWN; i++) { 1082 if (iptun_type_table[i].iti_type == type) 1083 break; 1084 } 1085 return (&iptun_type_table[i]); 1086 } 1087 1088 /* 1089 * Set the parameters included in ik on the tunnel iptun. Parameters that can 1090 * only be set at creation time are set in iptun_create(). 1091 */ 1092 static int 1093 iptun_setparams(iptun_t *iptun, const iptun_kparams_t *ik) 1094 { 1095 int err = 0; 1096 netstack_t *ns = iptun->iptun_ns; 1097 iptun_addr_t orig_laddr, orig_raddr; 1098 uint_t orig_flags = iptun->iptun_flags; 1099 1100 if (ik->iptun_kparam_flags & IPTUN_KPARAM_LADDR) { 1101 if (orig_flags & IPTUN_LADDR) 1102 orig_laddr = iptun->iptun_laddr; 1103 if ((err = iptun_setladdr(iptun, &ik->iptun_kparam_laddr)) != 0) 1104 return (err); 1105 iptun->iptun_flags |= IPTUN_LADDR; 1106 } 1107 1108 if (ik->iptun_kparam_flags & IPTUN_KPARAM_RADDR) { 1109 if (orig_flags & IPTUN_RADDR) 1110 orig_raddr = iptun->iptun_raddr; 1111 if ((err = iptun_setraddr(iptun, &ik->iptun_kparam_raddr)) != 0) 1112 goto done; 1113 iptun->iptun_flags |= IPTUN_RADDR; 1114 } 1115 1116 if (ik->iptun_kparam_flags & IPTUN_KPARAM_SECINFO) { 1117 /* 1118 * Set IPsec policy originating from the ifconfig(8) command 1119 * line. This is traditionally called "simple" policy because 1120 * the ipsec_req_t (iptun_kparam_secinfo) can only describe a 1121 * simple policy of "do ESP on everything" and/or "do AH on 1122 * everything" (as opposed to the rich policy that can be 1123 * defined with ipsecconf(8)). 1124 */ 1125 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_6TO4) { 1126 /* 1127 * Can't set security properties for automatic 1128 * tunnels. 1129 */ 1130 err = EINVAL; 1131 goto done; 1132 } 1133 1134 if (!ipsec_loaded(ns->netstack_ipsec)) { 1135 /* If IPsec can be loaded, try and load it now. */ 1136 if (ipsec_failed(ns->netstack_ipsec)) { 1137 err = EPROTONOSUPPORT; 1138 goto done; 1139 } 1140 ipsec_loader_loadnow(ns->netstack_ipsec); 1141 /* 1142 * ipsec_loader_loadnow() returns while IPsec is 1143 * loaded asynchronously. While a method exists to 1144 * wait for IPsec to load (ipsec_loader_wait()), it 1145 * requires use of a STREAMS queue to do a qwait(). 1146 * We're not in STREAMS context here, and so we can't 1147 * use it. This is not a problem in practice because 1148 * in the vast majority of cases, key management and 1149 * global policy will have loaded before any tunnels 1150 * are plumbed, and so IPsec will already have been 1151 * loaded. 1152 */ 1153 err = EAGAIN; 1154 goto done; 1155 } 1156 1157 err = iptun_set_sec_simple(iptun, &ik->iptun_kparam_secinfo); 1158 if (err == 0) { 1159 iptun->iptun_flags |= IPTUN_SIMPLE_POLICY; 1160 iptun->iptun_simple_policy = ik->iptun_kparam_secinfo; 1161 } 1162 } 1163 done: 1164 if (err != 0) { 1165 /* Restore original source and destination. */ 1166 if (ik->iptun_kparam_flags & IPTUN_KPARAM_LADDR && 1167 (orig_flags & IPTUN_LADDR)) 1168 iptun->iptun_laddr = orig_laddr; 1169 if ((ik->iptun_kparam_flags & IPTUN_KPARAM_RADDR) && 1170 (orig_flags & IPTUN_RADDR)) 1171 iptun->iptun_raddr = orig_raddr; 1172 iptun->iptun_flags = orig_flags; 1173 } 1174 return (err); 1175 } 1176 1177 static int 1178 iptun_register(iptun_t *iptun) 1179 { 1180 mac_register_t *mac; 1181 int err; 1182 1183 ASSERT(!(iptun->iptun_flags & IPTUN_MAC_REGISTERED)); 1184 1185 if ((mac = mac_alloc(MAC_VERSION)) == NULL) 1186 return (EINVAL); 1187 1188 mac->m_type_ident = iptun->iptun_typeinfo->iti_ident; 1189 mac->m_driver = iptun; 1190 mac->m_dip = iptun_dip; 1191 mac->m_instance = (uint_t)-1; 1192 mac->m_src_addr = (uint8_t *)&iptun->iptun_laddr.ia_addr; 1193 mac->m_dst_addr = iptun->iptun_typeinfo->iti_hasraddr ? 1194 (uint8_t *)&iptun->iptun_raddr.ia_addr : NULL; 1195 mac->m_callbacks = &iptun_m_callbacks; 1196 mac->m_min_sdu = iptun->iptun_typeinfo->iti_minmtu; 1197 mac->m_max_sdu = iptun->iptun_mtu; 1198 if (iptun->iptun_header_size != 0) { 1199 mac->m_pdata = &iptun->iptun_header; 1200 mac->m_pdata_size = iptun->iptun_header_size; 1201 } 1202 if ((err = mac_register(mac, &iptun->iptun_mh)) == 0) 1203 iptun->iptun_flags |= IPTUN_MAC_REGISTERED; 1204 mac_free(mac); 1205 return (err); 1206 } 1207 1208 static int 1209 iptun_unregister(iptun_t *iptun) 1210 { 1211 int err; 1212 1213 ASSERT(iptun->iptun_flags & IPTUN_MAC_REGISTERED); 1214 if ((err = mac_unregister(iptun->iptun_mh)) == 0) 1215 iptun->iptun_flags &= ~IPTUN_MAC_REGISTERED; 1216 return (err); 1217 } 1218 1219 static conn_t * 1220 iptun_conn_create(iptun_t *iptun, netstack_t *ns, cred_t *credp) 1221 { 1222 conn_t *connp; 1223 1224 if ((connp = ipcl_conn_create(IPCL_IPCCONN, KM_NOSLEEP, ns)) == NULL) 1225 return (NULL); 1226 1227 connp->conn_flags |= IPCL_IPTUN; 1228 connp->conn_iptun = iptun; 1229 connp->conn_recv = iptun_input; 1230 connp->conn_recvicmp = iptun_input_icmp; 1231 connp->conn_verifyicmp = iptun_verifyicmp; 1232 1233 /* 1234 * Register iptun_notify to listen to capability changes detected by IP. 1235 * This upcall is made in the context of the call to conn_ip_output. 1236 */ 1237 connp->conn_ixa->ixa_notify = iptun_notify; 1238 connp->conn_ixa->ixa_notify_cookie = iptun; 1239 1240 /* 1241 * For exclusive stacks we set conn_zoneid to GLOBAL_ZONEID as is done 1242 * for all other conn_t's. 1243 * 1244 * Note that there's an important distinction between iptun_zoneid and 1245 * conn_zoneid. The conn_zoneid is set to GLOBAL_ZONEID in non-global 1246 * exclusive stack zones to make the ip module believe that the 1247 * non-global zone is actually a global zone. Therefore, when 1248 * interacting with the ip module, we must always use conn_zoneid. 1249 */ 1250 connp->conn_zoneid = (ns->netstack_stackid == GLOBAL_NETSTACKID) ? 1251 crgetzoneid(credp) : GLOBAL_ZONEID; 1252 connp->conn_cred = credp; 1253 /* crfree() is done in ipcl_conn_destroy(), called by CONN_DEC_REF() */ 1254 crhold(connp->conn_cred); 1255 connp->conn_cpid = NOPID; 1256 1257 /* conn_allzones can not be set this early, hence no IPCL_ZONEID */ 1258 connp->conn_ixa->ixa_zoneid = connp->conn_zoneid; 1259 ASSERT(connp->conn_ref == 1); 1260 1261 /* Cache things in ixa without an extra refhold */ 1262 ASSERT(!(connp->conn_ixa->ixa_free_flags & IXA_FREE_CRED)); 1263 connp->conn_ixa->ixa_cred = connp->conn_cred; 1264 connp->conn_ixa->ixa_cpid = connp->conn_cpid; 1265 if (is_system_labeled()) 1266 connp->conn_ixa->ixa_tsl = crgetlabel(connp->conn_cred); 1267 1268 /* 1269 * Have conn_ip_output drop packets should our outer source 1270 * go invalid 1271 */ 1272 connp->conn_ixa->ixa_flags |= IXAF_VERIFY_SOURCE; 1273 1274 switch (iptun->iptun_typeinfo->iti_ipvers) { 1275 case IPV4_VERSION: 1276 connp->conn_family = AF_INET6; 1277 break; 1278 case IPV6_VERSION: 1279 connp->conn_family = AF_INET; 1280 break; 1281 } 1282 mutex_enter(&connp->conn_lock); 1283 connp->conn_state_flags &= ~CONN_INCIPIENT; 1284 mutex_exit(&connp->conn_lock); 1285 return (connp); 1286 } 1287 1288 static void 1289 iptun_conn_destroy(conn_t *connp) 1290 { 1291 ip_quiesce_conn(connp); 1292 connp->conn_iptun = NULL; 1293 ASSERT(connp->conn_ref == 1); 1294 CONN_DEC_REF(connp); 1295 } 1296 1297 static iptun_t * 1298 iptun_alloc(void) 1299 { 1300 iptun_t *iptun; 1301 1302 if ((iptun = kmem_cache_alloc(iptun_cache, KM_NOSLEEP)) != NULL) { 1303 bzero(iptun, sizeof (*iptun)); 1304 atomic_inc_32(&iptun_tunnelcount); 1305 } 1306 return (iptun); 1307 } 1308 1309 static void 1310 iptun_free(iptun_t *iptun) 1311 { 1312 ASSERT(iptun->iptun_flags & IPTUN_CONDEMNED); 1313 1314 if (iptun->iptun_flags & IPTUN_HASH_INSERTED) { 1315 iptun_stack_t *iptuns = iptun->iptun_iptuns; 1316 1317 mutex_enter(&iptun_hash_lock); 1318 VERIFY(mod_hash_remove(iptun_hash, 1319 IPTUN_HASH_KEY(iptun->iptun_linkid), 1320 (mod_hash_val_t *)&iptun) == 0); 1321 mutex_exit(&iptun_hash_lock); 1322 iptun->iptun_flags &= ~IPTUN_HASH_INSERTED; 1323 mutex_enter(&iptuns->iptuns_lock); 1324 list_remove(&iptuns->iptuns_iptunlist, iptun); 1325 mutex_exit(&iptuns->iptuns_lock); 1326 } 1327 1328 if (iptun->iptun_flags & IPTUN_BOUND) 1329 iptun_unbind(iptun); 1330 1331 /* 1332 * After iptun_unregister(), there will be no threads executing a 1333 * downcall from the mac module, including in the tx datapath. 1334 */ 1335 if (iptun->iptun_flags & IPTUN_MAC_REGISTERED) 1336 VERIFY(iptun_unregister(iptun) == 0); 1337 1338 if (iptun->iptun_itp != NULL) { 1339 /* 1340 * Remove from the AVL tree, AND release the reference iptun_t 1341 * itself holds on the ITP. 1342 */ 1343 itp_unlink(iptun->iptun_itp, iptun->iptun_ns); 1344 ITP_REFRELE(iptun->iptun_itp, iptun->iptun_ns); 1345 iptun->iptun_itp = NULL; 1346 iptun->iptun_flags &= ~IPTUN_SIMPLE_POLICY; 1347 } 1348 1349 /* 1350 * After ipcl_conn_destroy(), there will be no threads executing an 1351 * upcall from ip (i.e., iptun_input()), and it is then safe to free 1352 * the iptun_t. 1353 */ 1354 if (iptun->iptun_connp != NULL) { 1355 iptun_conn_destroy(iptun->iptun_connp); 1356 iptun->iptun_connp = NULL; 1357 } 1358 1359 netstack_rele(iptun->iptun_ns); 1360 kmem_cache_free(iptun_cache, iptun); 1361 atomic_dec_32(&iptun_tunnelcount); 1362 } 1363 1364 int 1365 iptun_create(iptun_kparams_t *ik, cred_t *credp) 1366 { 1367 iptun_t *iptun = NULL; 1368 int err = 0, mherr; 1369 char linkname[MAXLINKNAMELEN]; 1370 ipsec_tun_pol_t *itp; 1371 netstack_t *ns = NULL; 1372 iptun_stack_t *iptuns; 1373 datalink_id_t tmpid; 1374 zoneid_t zoneid = crgetzoneid(credp); 1375 boolean_t link_created = B_FALSE; 1376 1377 /* The tunnel type is mandatory */ 1378 if (!(ik->iptun_kparam_flags & IPTUN_KPARAM_TYPE)) 1379 return (EINVAL); 1380 1381 /* 1382 * Is the linkid that the caller wishes to associate with this new 1383 * tunnel assigned to this zone? 1384 */ 1385 if (zone_check_datalink(&zoneid, ik->iptun_kparam_linkid) != 0) { 1386 if (zoneid != GLOBAL_ZONEID) 1387 return (EINVAL); 1388 } else if (zoneid == GLOBAL_ZONEID) { 1389 return (EINVAL); 1390 } 1391 1392 /* 1393 * Make sure that we're not trying to create a tunnel that has already 1394 * been created. 1395 */ 1396 if (iptun_enter_by_linkid(ik->iptun_kparam_linkid, &iptun) == 0) { 1397 iptun_exit(iptun); 1398 iptun = NULL; 1399 err = EEXIST; 1400 goto done; 1401 } 1402 1403 ns = netstack_find_by_cred(credp); 1404 iptuns = ns->netstack_iptun; 1405 1406 if ((iptun = iptun_alloc()) == NULL) { 1407 err = ENOMEM; 1408 goto done; 1409 } 1410 1411 iptun->iptun_linkid = ik->iptun_kparam_linkid; 1412 iptun->iptun_zoneid = zoneid; 1413 iptun->iptun_ns = ns; 1414 1415 iptun->iptun_typeinfo = iptun_gettypeinfo(ik->iptun_kparam_type); 1416 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_UNKNOWN) { 1417 err = EINVAL; 1418 goto done; 1419 } 1420 1421 if (ik->iptun_kparam_flags & IPTUN_KPARAM_IMPLICIT) 1422 iptun->iptun_flags |= IPTUN_IMPLICIT; 1423 1424 if ((err = iptun_setparams(iptun, ik)) != 0) 1425 goto done; 1426 1427 iptun->iptun_hoplimit = IPTUN_DEFAULT_HOPLIMIT; 1428 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_IPV6) 1429 iptun->iptun_encaplimit = IPTUN_DEFAULT_ENCAPLIMIT; 1430 1431 iptun_headergen(iptun, B_FALSE); 1432 1433 iptun->iptun_connp = iptun_conn_create(iptun, ns, credp); 1434 if (iptun->iptun_connp == NULL) { 1435 err = ENOMEM; 1436 goto done; 1437 } 1438 1439 iptun->iptun_mtu = iptun->iptun_typeinfo->iti_maxmtu; 1440 iptun->iptun_dpmtu = iptun->iptun_mtu; 1441 1442 /* 1443 * Find an ITP based on linkname. If we have parms already set via 1444 * the iptun_setparams() call above, it may have created an ITP for 1445 * us. We always try get_tunnel_policy() for DEBUG correctness 1446 * checks, and we may wish to refactor this to only check when 1447 * iptun_itp is NULL. 1448 */ 1449 if ((err = dls_mgmt_get_linkinfo(iptun->iptun_linkid, linkname, NULL, 1450 NULL, NULL)) != 0) 1451 goto done; 1452 if ((itp = get_tunnel_policy(linkname, ns)) != NULL) 1453 iptun->iptun_itp = itp; 1454 1455 /* 1456 * See if we have the necessary IP addresses assigned to this tunnel 1457 * to try and bind them with ip underneath us. If we're not ready to 1458 * bind yet, then we'll defer the bind operation until the addresses 1459 * are modified. 1460 */ 1461 if (iptun_canbind(iptun) && ((err = iptun_bind(iptun)) != 0)) 1462 goto done; 1463 1464 if ((err = iptun_register(iptun)) != 0) 1465 goto done; 1466 1467 err = dls_devnet_create(iptun->iptun_mh, iptun->iptun_linkid, 1468 iptun->iptun_zoneid); 1469 if (err != 0) 1470 goto done; 1471 link_created = B_TRUE; 1472 1473 /* 1474 * We hash by link-id as that is the key used by all other iptun 1475 * interfaces (modify, delete, etc.). 1476 */ 1477 if ((mherr = mod_hash_insert(iptun_hash, 1478 IPTUN_HASH_KEY(iptun->iptun_linkid), (mod_hash_val_t)iptun)) == 0) { 1479 mutex_enter(&iptuns->iptuns_lock); 1480 list_insert_head(&iptuns->iptuns_iptunlist, iptun); 1481 mutex_exit(&iptuns->iptuns_lock); 1482 iptun->iptun_flags |= IPTUN_HASH_INSERTED; 1483 } else if (mherr == MH_ERR_NOMEM) { 1484 err = ENOMEM; 1485 } else if (mherr == MH_ERR_DUPLICATE) { 1486 err = EEXIST; 1487 } else { 1488 err = EINVAL; 1489 } 1490 1491 done: 1492 if (iptun == NULL && ns != NULL) 1493 netstack_rele(ns); 1494 if (err != 0 && iptun != NULL) { 1495 if (link_created) { 1496 (void) dls_devnet_destroy(iptun->iptun_mh, &tmpid, 1497 B_TRUE); 1498 } 1499 iptun->iptun_flags |= IPTUN_CONDEMNED; 1500 iptun_free(iptun); 1501 } 1502 return (err); 1503 } 1504 1505 int 1506 iptun_delete(datalink_id_t linkid, cred_t *credp) 1507 { 1508 int err; 1509 iptun_t *iptun = NULL; 1510 1511 if ((err = iptun_enter_by_linkid(linkid, &iptun)) != 0) 1512 return (err); 1513 1514 /* One cannot delete a tunnel that belongs to another zone. */ 1515 if (iptun->iptun_zoneid != crgetzoneid(credp)) { 1516 iptun_exit(iptun); 1517 return (EACCES); 1518 } 1519 1520 /* 1521 * We need to exit iptun in order to issue calls up the stack such as 1522 * dls_devnet_destroy(). If we call up while still in iptun, deadlock 1523 * with calls coming down the stack is possible. We prevent other 1524 * threads from entering this iptun after we've exited it by setting 1525 * the IPTUN_DELETE_PENDING flag. This will cause callers of 1526 * iptun_enter() to block waiting on iptun_enter_cv. The assumption 1527 * here is that the functions we're calling while IPTUN_DELETE_PENDING 1528 * is set dont resuult in an iptun_enter() call, as that would result 1529 * in deadlock. 1530 */ 1531 iptun->iptun_flags |= IPTUN_DELETE_PENDING; 1532 1533 /* Wait for any pending upcall to the mac module to complete. */ 1534 while (iptun->iptun_flags & IPTUN_UPCALL_PENDING) 1535 cv_wait(&iptun->iptun_upcall_cv, &iptun->iptun_lock); 1536 1537 iptun_exit(iptun); 1538 1539 if ((err = dls_devnet_destroy(iptun->iptun_mh, &linkid, B_TRUE)) == 0) { 1540 /* 1541 * mac_disable() will fail with EBUSY if there are references 1542 * to the iptun MAC. If there are none, then mac_disable() 1543 * will assure that none can be acquired until the MAC is 1544 * unregistered. 1545 * 1546 * XXX CR 6791335 prevents us from calling mac_disable() prior 1547 * to dls_devnet_destroy(), so we unfortunately need to 1548 * attempt to re-create the devnet node if mac_disable() 1549 * fails. 1550 */ 1551 if ((err = mac_disable(iptun->iptun_mh)) != 0) { 1552 (void) dls_devnet_create(iptun->iptun_mh, linkid, 1553 iptun->iptun_zoneid); 1554 } 1555 } 1556 1557 /* 1558 * Now that we know the fate of this iptun_t, we need to clear 1559 * IPTUN_DELETE_PENDING, and set IPTUN_CONDEMNED if the iptun_t is 1560 * slated to be freed. Either way, we need to signal the threads 1561 * waiting in iptun_enter() so that they can either fail if 1562 * IPTUN_CONDEMNED is set, or continue if it's not. 1563 */ 1564 mutex_enter(&iptun->iptun_lock); 1565 iptun->iptun_flags &= ~IPTUN_DELETE_PENDING; 1566 if (err == 0) 1567 iptun->iptun_flags |= IPTUN_CONDEMNED; 1568 cv_broadcast(&iptun->iptun_enter_cv); 1569 mutex_exit(&iptun->iptun_lock); 1570 1571 /* 1572 * Note that there is no danger in calling iptun_free() after having 1573 * dropped the iptun_lock since callers of iptun_enter() at this point 1574 * are doing so from iptun_enter_by_linkid() (mac_disable() got rid of 1575 * threads entering from mac callbacks which call iptun_enter() 1576 * directly) which holds iptun_hash_lock, and iptun_free() grabs this 1577 * lock in order to remove the iptun_t from the hash table. 1578 */ 1579 if (err == 0) 1580 iptun_free(iptun); 1581 1582 return (err); 1583 } 1584 1585 int 1586 iptun_modify(const iptun_kparams_t *ik, cred_t *credp) 1587 { 1588 iptun_t *iptun; 1589 boolean_t laddr_change = B_FALSE, raddr_change = B_FALSE; 1590 int err; 1591 1592 if ((err = iptun_enter_by_linkid(ik->iptun_kparam_linkid, &iptun)) != 0) 1593 return (err); 1594 1595 /* One cannot modify a tunnel that belongs to another zone. */ 1596 if (iptun->iptun_zoneid != crgetzoneid(credp)) { 1597 err = EACCES; 1598 goto done; 1599 } 1600 1601 /* The tunnel type cannot be changed */ 1602 if (ik->iptun_kparam_flags & IPTUN_KPARAM_TYPE) { 1603 err = EINVAL; 1604 goto done; 1605 } 1606 1607 if ((err = iptun_setparams(iptun, ik)) != 0) 1608 goto done; 1609 iptun_headergen(iptun, B_FALSE); 1610 1611 /* 1612 * If any of the tunnel's addresses has been modified and the tunnel 1613 * has the necessary addresses assigned to it, we need to try to bind 1614 * with ip underneath us. If we're not ready to bind yet, then we'll 1615 * try again when the addresses are modified later. 1616 */ 1617 laddr_change = (ik->iptun_kparam_flags & IPTUN_KPARAM_LADDR); 1618 raddr_change = (ik->iptun_kparam_flags & IPTUN_KPARAM_RADDR); 1619 if (laddr_change || raddr_change) { 1620 if (iptun->iptun_flags & IPTUN_BOUND) 1621 iptun_unbind(iptun); 1622 if (iptun_canbind(iptun) && (err = iptun_bind(iptun)) != 0) { 1623 if (laddr_change) 1624 iptun->iptun_flags &= ~IPTUN_LADDR; 1625 if (raddr_change) 1626 iptun->iptun_flags &= ~IPTUN_RADDR; 1627 goto done; 1628 } 1629 } 1630 1631 if (laddr_change) 1632 iptun_task_dispatch(iptun, IPTUN_TASK_LADDR_UPDATE); 1633 if (raddr_change) 1634 iptun_task_dispatch(iptun, IPTUN_TASK_RADDR_UPDATE); 1635 1636 done: 1637 iptun_exit(iptun); 1638 return (err); 1639 } 1640 1641 /* Given an IP tunnel's datalink id, fill in its parameters. */ 1642 int 1643 iptun_info(iptun_kparams_t *ik, cred_t *credp) 1644 { 1645 iptun_t *iptun; 1646 int err; 1647 1648 /* Is the tunnel link visible from the caller's zone? */ 1649 if (!dls_devnet_islinkvisible(ik->iptun_kparam_linkid, 1650 crgetzoneid(credp))) 1651 return (ENOENT); 1652 1653 if ((err = iptun_enter_by_linkid(ik->iptun_kparam_linkid, &iptun)) != 0) 1654 return (err); 1655 1656 bzero(ik, sizeof (iptun_kparams_t)); 1657 1658 ik->iptun_kparam_linkid = iptun->iptun_linkid; 1659 ik->iptun_kparam_type = iptun->iptun_typeinfo->iti_type; 1660 ik->iptun_kparam_flags |= IPTUN_KPARAM_TYPE; 1661 1662 if (iptun->iptun_flags & IPTUN_LADDR) { 1663 iptun_getaddr(&iptun->iptun_laddr, &ik->iptun_kparam_laddr); 1664 ik->iptun_kparam_flags |= IPTUN_KPARAM_LADDR; 1665 } 1666 if (iptun->iptun_flags & IPTUN_RADDR) { 1667 iptun_getaddr(&iptun->iptun_raddr, &ik->iptun_kparam_raddr); 1668 ik->iptun_kparam_flags |= IPTUN_KPARAM_RADDR; 1669 } 1670 1671 if (iptun->iptun_flags & IPTUN_IMPLICIT) 1672 ik->iptun_kparam_flags |= IPTUN_KPARAM_IMPLICIT; 1673 1674 if (iptun->iptun_itp != NULL) { 1675 mutex_enter(&iptun->iptun_itp->itp_lock); 1676 if (iptun->iptun_itp->itp_flags & ITPF_P_ACTIVE) { 1677 ik->iptun_kparam_flags |= IPTUN_KPARAM_IPSECPOL; 1678 if (iptun->iptun_flags & IPTUN_SIMPLE_POLICY) { 1679 ik->iptun_kparam_flags |= IPTUN_KPARAM_SECINFO; 1680 ik->iptun_kparam_secinfo = 1681 iptun->iptun_simple_policy; 1682 } 1683 } 1684 mutex_exit(&iptun->iptun_itp->itp_lock); 1685 } 1686 1687 done: 1688 iptun_exit(iptun); 1689 return (err); 1690 } 1691 1692 int 1693 iptun_set_6to4relay(netstack_t *ns, ipaddr_t relay_addr) 1694 { 1695 if (relay_addr == INADDR_BROADCAST || CLASSD(relay_addr)) 1696 return (EADDRNOTAVAIL); 1697 ns->netstack_iptun->iptuns_relay_rtr_addr = relay_addr; 1698 return (0); 1699 } 1700 1701 void 1702 iptun_get_6to4relay(netstack_t *ns, ipaddr_t *relay_addr) 1703 { 1704 *relay_addr = ns->netstack_iptun->iptuns_relay_rtr_addr; 1705 } 1706 1707 void 1708 iptun_set_policy(datalink_id_t linkid, ipsec_tun_pol_t *itp) 1709 { 1710 iptun_t *iptun; 1711 1712 if (iptun_enter_by_linkid(linkid, &iptun) != 0) 1713 return; 1714 if (iptun->iptun_itp != itp) { 1715 ASSERT(iptun->iptun_itp == NULL); 1716 ITP_REFHOLD(itp); 1717 iptun->iptun_itp = itp; 1718 } 1719 /* 1720 * IPsec policy means IPsec overhead, which means lower MTU. 1721 * Refresh the MTU for this tunnel. 1722 */ 1723 (void) iptun_update_mtu(iptun, NULL, 0); 1724 iptun_exit(iptun); 1725 } 1726 1727 /* 1728 * Obtain the path MTU to the tunnel destination. 1729 * Can return zero in some cases. 1730 */ 1731 static uint32_t 1732 iptun_get_dst_pmtu(iptun_t *iptun, ip_xmit_attr_t *ixa) 1733 { 1734 uint32_t pmtu = 0; 1735 conn_t *connp = iptun->iptun_connp; 1736 boolean_t need_rele = B_FALSE; 1737 1738 /* 1739 * We only obtain the pmtu for tunnels that have a remote tunnel 1740 * address. 1741 */ 1742 if (!(iptun->iptun_flags & IPTUN_RADDR)) 1743 return (0); 1744 1745 if (ixa == NULL) { 1746 ixa = conn_get_ixa(connp, B_FALSE); 1747 if (ixa == NULL) 1748 return (0); 1749 need_rele = B_TRUE; 1750 } 1751 /* 1752 * Guard against ICMP errors before we have sent, as well as against 1753 * and a thread which held conn_ixa. 1754 */ 1755 if (ixa->ixa_ire != NULL) { 1756 pmtu = ip_get_pmtu(ixa); 1757 1758 /* 1759 * For both IPv4 and IPv6 we can have indication that the outer 1760 * header needs fragmentation. 1761 */ 1762 if (ixa->ixa_flags & IXAF_PMTU_TOO_SMALL) { 1763 /* Must allow fragmentation in ip_output */ 1764 ixa->ixa_flags &= ~IXAF_DONTFRAG; 1765 } else if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_6TO4) { 1766 ixa->ixa_flags |= IXAF_DONTFRAG; 1767 } else { 1768 /* ip_get_pmtu might have set this - we don't want it */ 1769 ixa->ixa_flags &= ~IXAF_PMTU_IPV4_DF; 1770 } 1771 } 1772 1773 if (need_rele) 1774 ixa_refrele(ixa); 1775 return (pmtu); 1776 } 1777 1778 /* 1779 * Update the ip_xmit_attr_t to capture the current lower path mtu as known 1780 * by ip. 1781 */ 1782 static void 1783 iptun_update_dst_pmtu(iptun_t *iptun, ip_xmit_attr_t *ixa) 1784 { 1785 uint32_t pmtu; 1786 conn_t *connp = iptun->iptun_connp; 1787 boolean_t need_rele = B_FALSE; 1788 1789 /* IXAF_VERIFY_PMTU is not set if we don't have a fixed destination */ 1790 if (!(iptun->iptun_flags & IPTUN_RADDR)) 1791 return; 1792 1793 if (ixa == NULL) { 1794 ixa = conn_get_ixa(connp, B_FALSE); 1795 if (ixa == NULL) 1796 return; 1797 need_rele = B_TRUE; 1798 } 1799 /* 1800 * Guard against ICMP errors before we have sent, as well as against 1801 * and a thread which held conn_ixa. 1802 */ 1803 if (ixa->ixa_ire != NULL) { 1804 pmtu = ip_get_pmtu(ixa); 1805 /* 1806 * Update ixa_fragsize and ixa_pmtu. 1807 */ 1808 ixa->ixa_fragsize = ixa->ixa_pmtu = pmtu; 1809 1810 /* 1811 * For both IPv4 and IPv6 we can have indication that the outer 1812 * header needs fragmentation. 1813 */ 1814 if (ixa->ixa_flags & IXAF_PMTU_TOO_SMALL) { 1815 /* Must allow fragmentation in ip_output */ 1816 ixa->ixa_flags &= ~IXAF_DONTFRAG; 1817 } else if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_6TO4) { 1818 ixa->ixa_flags |= IXAF_DONTFRAG; 1819 } else { 1820 /* ip_get_pmtu might have set this - we don't want it */ 1821 ixa->ixa_flags &= ~IXAF_PMTU_IPV4_DF; 1822 } 1823 } 1824 1825 if (need_rele) 1826 ixa_refrele(ixa); 1827 } 1828 1829 /* 1830 * There is nothing that iptun can verify in addition to IP having 1831 * verified the IP addresses in the fanout. 1832 */ 1833 /* ARGSUSED */ 1834 static boolean_t 1835 iptun_verifyicmp(conn_t *connp, void *arg2, icmph_t *icmph, icmp6_t *icmp6, 1836 ip_recv_attr_t *ira) 1837 { 1838 return (B_TRUE); 1839 } 1840 1841 /* 1842 * Notify function registered with ip_xmit_attr_t. 1843 */ 1844 static void 1845 iptun_notify(void *arg, ip_xmit_attr_t *ixa, ixa_notify_type_t ntype, 1846 ixa_notify_arg_t narg) 1847 { 1848 iptun_t *iptun = (iptun_t *)arg; 1849 1850 switch (ntype) { 1851 case IXAN_PMTU: 1852 (void) iptun_update_mtu(iptun, ixa, narg); 1853 break; 1854 } 1855 } 1856 1857 /* 1858 * Returns the max of old_ovhd and the overhead associated with pol. 1859 */ 1860 static uint32_t 1861 iptun_max_policy_overhead(ipsec_policy_t *pol, uint32_t old_ovhd) 1862 { 1863 uint32_t new_ovhd = old_ovhd; 1864 1865 while (pol != NULL) { 1866 new_ovhd = max(new_ovhd, 1867 ipsec_act_ovhd(&pol->ipsp_act->ipa_act)); 1868 pol = pol->ipsp_hash.hash_next; 1869 } 1870 return (new_ovhd); 1871 } 1872 1873 static uint32_t 1874 iptun_get_ipsec_overhead(iptun_t *iptun) 1875 { 1876 ipsec_policy_root_t *ipr; 1877 ipsec_policy_head_t *iph; 1878 ipsec_policy_t *pol; 1879 ipsec_selector_t sel; 1880 int i; 1881 uint32_t ipsec_ovhd = 0; 1882 ipsec_tun_pol_t *itp = iptun->iptun_itp; 1883 netstack_t *ns = iptun->iptun_ns; 1884 1885 if (itp == NULL || !(itp->itp_flags & ITPF_P_ACTIVE)) { 1886 /* 1887 * Consult global policy, just in case. This will only work 1888 * if we have both source and destination addresses to work 1889 * with. 1890 */ 1891 if ((iptun->iptun_flags & (IPTUN_LADDR|IPTUN_RADDR)) != 1892 (IPTUN_LADDR|IPTUN_RADDR)) 1893 return (0); 1894 1895 iph = ipsec_system_policy(ns); 1896 bzero(&sel, sizeof (sel)); 1897 sel.ips_isv4 = 1898 (iptun->iptun_typeinfo->iti_ipvers == IPV4_VERSION); 1899 switch (iptun->iptun_typeinfo->iti_ipvers) { 1900 case IPV4_VERSION: 1901 sel.ips_local_addr_v4 = iptun->iptun_laddr4; 1902 sel.ips_remote_addr_v4 = iptun->iptun_raddr4; 1903 break; 1904 case IPV6_VERSION: 1905 sel.ips_local_addr_v6 = iptun->iptun_laddr6; 1906 sel.ips_remote_addr_v6 = iptun->iptun_raddr6; 1907 break; 1908 } 1909 /* Check for both IPv4 and IPv6. */ 1910 sel.ips_protocol = IPPROTO_ENCAP; 1911 pol = ipsec_find_policy_head(NULL, iph, IPSEC_TYPE_OUTBOUND, 1912 &sel); 1913 if (pol != NULL) { 1914 ipsec_ovhd = ipsec_act_ovhd(&pol->ipsp_act->ipa_act); 1915 IPPOL_REFRELE(pol); 1916 } 1917 sel.ips_protocol = IPPROTO_IPV6; 1918 pol = ipsec_find_policy_head(NULL, iph, IPSEC_TYPE_OUTBOUND, 1919 &sel); 1920 if (pol != NULL) { 1921 ipsec_ovhd = max(ipsec_ovhd, 1922 ipsec_act_ovhd(&pol->ipsp_act->ipa_act)); 1923 IPPOL_REFRELE(pol); 1924 } 1925 IPPH_REFRELE(iph, ns); 1926 } else { 1927 /* 1928 * Look through all of the possible IPsec actions for the 1929 * tunnel, and find the largest potential IPsec overhead. 1930 */ 1931 iph = itp->itp_policy; 1932 rw_enter(&iph->iph_lock, RW_READER); 1933 ipr = &(iph->iph_root[IPSEC_TYPE_OUTBOUND]); 1934 ipsec_ovhd = iptun_max_policy_overhead( 1935 ipr->ipr_nonhash[IPSEC_AF_V4], 0); 1936 ipsec_ovhd = iptun_max_policy_overhead( 1937 ipr->ipr_nonhash[IPSEC_AF_V6], ipsec_ovhd); 1938 for (i = 0; i < ipr->ipr_nchains; i++) { 1939 ipsec_ovhd = iptun_max_policy_overhead( 1940 ipr->ipr_hash[i].hash_head, ipsec_ovhd); 1941 } 1942 rw_exit(&iph->iph_lock); 1943 } 1944 1945 return (ipsec_ovhd); 1946 } 1947 1948 /* 1949 * Calculate and return the maximum possible upper MTU for the given tunnel. 1950 * 1951 * If new_pmtu is set then we also need to update the lower path MTU information 1952 * in the ip_xmit_attr_t. That is needed since we set IXAF_VERIFY_PMTU so that 1953 * we are notified by conn_ip_output() when the path MTU increases. 1954 */ 1955 static uint32_t 1956 iptun_get_maxmtu(iptun_t *iptun, ip_xmit_attr_t *ixa, uint32_t new_pmtu) 1957 { 1958 size_t header_size, ipsec_overhead; 1959 uint32_t maxmtu, pmtu; 1960 1961 /* 1962 * Start with the path-MTU to the remote address, which is either 1963 * provided as the new_pmtu argument, or obtained using 1964 * iptun_get_dst_pmtu(). 1965 */ 1966 if (new_pmtu != 0) { 1967 if (iptun->iptun_flags & IPTUN_RADDR) 1968 iptun->iptun_dpmtu = new_pmtu; 1969 pmtu = new_pmtu; 1970 } else if (iptun->iptun_flags & IPTUN_RADDR) { 1971 if ((pmtu = iptun_get_dst_pmtu(iptun, ixa)) == 0) { 1972 /* 1973 * We weren't able to obtain the path-MTU of the 1974 * destination. Use the previous value. 1975 */ 1976 pmtu = iptun->iptun_dpmtu; 1977 } else { 1978 iptun->iptun_dpmtu = pmtu; 1979 } 1980 } else { 1981 /* 1982 * We have no path-MTU information to go on, use the maximum 1983 * possible value. 1984 */ 1985 pmtu = iptun->iptun_typeinfo->iti_maxmtu; 1986 } 1987 1988 /* 1989 * Now calculate tunneling overhead and subtract that from the 1990 * path-MTU information obtained above. 1991 */ 1992 if (iptun->iptun_header_size != 0) { 1993 header_size = iptun->iptun_header_size; 1994 } else { 1995 switch (iptun->iptun_typeinfo->iti_ipvers) { 1996 case IPV4_VERSION: 1997 header_size = sizeof (ipha_t); 1998 if (is_system_labeled()) 1999 header_size += IP_MAX_OPT_LENGTH; 2000 break; 2001 case IPV6_VERSION: 2002 header_size = sizeof (iptun_ipv6hdrs_t); 2003 break; 2004 } 2005 } 2006 2007 ipsec_overhead = iptun_get_ipsec_overhead(iptun); 2008 2009 maxmtu = pmtu - (header_size + ipsec_overhead); 2010 return (max(maxmtu, iptun->iptun_typeinfo->iti_minmtu)); 2011 } 2012 2013 /* 2014 * Re-calculate the tunnel's MTU as seen from above and notify the MAC layer 2015 * of any change in MTU. The new_pmtu argument is the new lower path MTU to 2016 * the tunnel destination to be used in the tunnel MTU calculation. Passing 2017 * in 0 for new_pmtu causes the lower path MTU to be dynamically updated using 2018 * ip_get_pmtu(). 2019 * 2020 * If the calculated tunnel MTU is different than its previous value, then we 2021 * notify the MAC layer above us of this change using mac_maxsdu_update(). 2022 */ 2023 static uint32_t 2024 iptun_update_mtu(iptun_t *iptun, ip_xmit_attr_t *ixa, uint32_t new_pmtu) 2025 { 2026 uint32_t newmtu; 2027 2028 /* We always update the ixa since we might have set IXAF_VERIFY_PMTU */ 2029 iptun_update_dst_pmtu(iptun, ixa); 2030 2031 /* 2032 * We return the current MTU without updating it if it was pegged to a 2033 * static value using the MAC_PROP_MTU link property. 2034 */ 2035 if (iptun->iptun_flags & IPTUN_FIXED_MTU) 2036 return (iptun->iptun_mtu); 2037 2038 /* If the MTU isn't fixed, then use the maximum possible value. */ 2039 newmtu = iptun_get_maxmtu(iptun, ixa, new_pmtu); 2040 /* 2041 * We only dynamically adjust the tunnel MTU for tunnels with 2042 * destinations because dynamic MTU calculations are based on the 2043 * destination path-MTU. 2044 */ 2045 if ((iptun->iptun_flags & IPTUN_RADDR) && newmtu != iptun->iptun_mtu) { 2046 iptun->iptun_mtu = newmtu; 2047 if (iptun->iptun_flags & IPTUN_MAC_REGISTERED) 2048 iptun_task_dispatch(iptun, IPTUN_TASK_MTU_UPDATE); 2049 } 2050 2051 return (newmtu); 2052 } 2053 2054 /* 2055 * Frees a packet or packet chain and bumps stat for each freed packet. 2056 */ 2057 static void 2058 iptun_drop_pkt(mblk_t *mp, uint64_t *stat) 2059 { 2060 mblk_t *pktmp; 2061 2062 for (pktmp = mp; pktmp != NULL; pktmp = mp) { 2063 mp = mp->b_next; 2064 pktmp->b_next = NULL; 2065 if (stat != NULL) 2066 atomic_inc_64(stat); 2067 freemsg(pktmp); 2068 } 2069 } 2070 2071 /* 2072 * Allocate and return a new mblk to hold an IP and ICMP header, and chain the 2073 * original packet to its b_cont. Returns NULL on failure. 2074 */ 2075 static mblk_t * 2076 iptun_build_icmperr(size_t hdrs_size, mblk_t *orig_pkt) 2077 { 2078 mblk_t *icmperr_mp; 2079 2080 if ((icmperr_mp = allocb(hdrs_size, BPRI_MED)) != NULL) { 2081 icmperr_mp->b_wptr += hdrs_size; 2082 /* tack on the offending packet */ 2083 icmperr_mp->b_cont = orig_pkt; 2084 } 2085 return (icmperr_mp); 2086 } 2087 2088 /* 2089 * Transmit an ICMP error. mp->b_rptr points at the packet to be included in 2090 * the ICMP error. 2091 */ 2092 static void 2093 iptun_sendicmp_v4(iptun_t *iptun, icmph_t *icmp, ipha_t *orig_ipha, mblk_t *mp, 2094 ts_label_t *tsl) 2095 { 2096 size_t orig_pktsize, hdrs_size; 2097 mblk_t *icmperr_mp; 2098 ipha_t *new_ipha; 2099 icmph_t *new_icmp; 2100 ip_xmit_attr_t ixas; 2101 conn_t *connp = iptun->iptun_connp; 2102 2103 orig_pktsize = msgdsize(mp); 2104 hdrs_size = sizeof (ipha_t) + sizeof (icmph_t); 2105 if ((icmperr_mp = iptun_build_icmperr(hdrs_size, mp)) == NULL) { 2106 iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf); 2107 return; 2108 } 2109 2110 new_ipha = (ipha_t *)icmperr_mp->b_rptr; 2111 new_icmp = (icmph_t *)(new_ipha + 1); 2112 2113 new_ipha->ipha_version_and_hdr_length = IP_SIMPLE_HDR_VERSION; 2114 new_ipha->ipha_type_of_service = 0; 2115 new_ipha->ipha_ident = 0; 2116 new_ipha->ipha_fragment_offset_and_flags = 0; 2117 new_ipha->ipha_ttl = orig_ipha->ipha_ttl; 2118 new_ipha->ipha_protocol = IPPROTO_ICMP; 2119 new_ipha->ipha_src = orig_ipha->ipha_dst; 2120 new_ipha->ipha_dst = orig_ipha->ipha_src; 2121 new_ipha->ipha_hdr_checksum = 0; /* will be computed by ip */ 2122 new_ipha->ipha_length = htons(hdrs_size + orig_pktsize); 2123 2124 *new_icmp = *icmp; 2125 new_icmp->icmph_checksum = 0; 2126 new_icmp->icmph_checksum = IP_CSUM(icmperr_mp, sizeof (ipha_t), 0); 2127 2128 bzero(&ixas, sizeof (ixas)); 2129 ixas.ixa_flags = IXAF_BASIC_SIMPLE_V4; 2130 if (new_ipha->ipha_src == INADDR_ANY) { 2131 ixas.ixa_flags &= ~IXAF_VERIFY_SOURCE; 2132 ixas.ixa_flags |= IXAF_SET_SOURCE; 2133 } 2134 2135 ixas.ixa_zoneid = IPCL_ZONEID(connp); 2136 ixas.ixa_ipst = connp->conn_netstack->netstack_ip; 2137 ixas.ixa_cred = connp->conn_cred; 2138 ixas.ixa_cpid = NOPID; 2139 if (is_system_labeled()) 2140 ixas.ixa_tsl = tsl; 2141 2142 ixas.ixa_ifindex = 0; 2143 ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 2144 2145 (void) ip_output_simple(icmperr_mp, &ixas); 2146 ixa_cleanup(&ixas); 2147 } 2148 2149 static void 2150 iptun_sendicmp_v6(iptun_t *iptun, icmp6_t *icmp6, ip6_t *orig_ip6h, mblk_t *mp, 2151 ts_label_t *tsl) 2152 { 2153 size_t orig_pktsize, hdrs_size; 2154 mblk_t *icmp6err_mp; 2155 ip6_t *new_ip6h; 2156 icmp6_t *new_icmp6; 2157 ip_xmit_attr_t ixas; 2158 conn_t *connp = iptun->iptun_connp; 2159 2160 orig_pktsize = msgdsize(mp); 2161 hdrs_size = sizeof (ip6_t) + sizeof (icmp6_t); 2162 if ((icmp6err_mp = iptun_build_icmperr(hdrs_size, mp)) == NULL) { 2163 iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf); 2164 return; 2165 } 2166 2167 new_ip6h = (ip6_t *)icmp6err_mp->b_rptr; 2168 new_icmp6 = (icmp6_t *)(new_ip6h + 1); 2169 2170 new_ip6h->ip6_vcf = orig_ip6h->ip6_vcf; 2171 new_ip6h->ip6_plen = htons(sizeof (icmp6_t) + orig_pktsize); 2172 new_ip6h->ip6_hops = orig_ip6h->ip6_hops; 2173 new_ip6h->ip6_nxt = IPPROTO_ICMPV6; 2174 new_ip6h->ip6_src = orig_ip6h->ip6_dst; 2175 new_ip6h->ip6_dst = orig_ip6h->ip6_src; 2176 2177 *new_icmp6 = *icmp6; 2178 /* The checksum is calculated in ip_output_simple and friends. */ 2179 new_icmp6->icmp6_cksum = new_ip6h->ip6_plen; 2180 2181 bzero(&ixas, sizeof (ixas)); 2182 ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6; 2183 if (IN6_IS_ADDR_UNSPECIFIED(&new_ip6h->ip6_src)) { 2184 ixas.ixa_flags &= ~IXAF_VERIFY_SOURCE; 2185 ixas.ixa_flags |= IXAF_SET_SOURCE; 2186 } 2187 2188 ixas.ixa_zoneid = IPCL_ZONEID(connp); 2189 ixas.ixa_ipst = connp->conn_netstack->netstack_ip; 2190 ixas.ixa_cred = connp->conn_cred; 2191 ixas.ixa_cpid = NOPID; 2192 if (is_system_labeled()) 2193 ixas.ixa_tsl = tsl; 2194 2195 ixas.ixa_ifindex = 0; 2196 ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 2197 2198 (void) ip_output_simple(icmp6err_mp, &ixas); 2199 ixa_cleanup(&ixas); 2200 } 2201 2202 static void 2203 iptun_icmp_error_v4(iptun_t *iptun, ipha_t *orig_ipha, mblk_t *mp, 2204 uint8_t type, uint8_t code, ts_label_t *tsl) 2205 { 2206 icmph_t icmp; 2207 2208 bzero(&icmp, sizeof (icmp)); 2209 icmp.icmph_type = type; 2210 icmp.icmph_code = code; 2211 2212 iptun_sendicmp_v4(iptun, &icmp, orig_ipha, mp, tsl); 2213 } 2214 2215 static void 2216 iptun_icmp_fragneeded_v4(iptun_t *iptun, uint32_t newmtu, ipha_t *orig_ipha, 2217 mblk_t *mp, ts_label_t *tsl) 2218 { 2219 icmph_t icmp; 2220 2221 icmp.icmph_type = ICMP_DEST_UNREACHABLE; 2222 icmp.icmph_code = ICMP_FRAGMENTATION_NEEDED; 2223 icmp.icmph_du_zero = 0; 2224 icmp.icmph_du_mtu = htons(newmtu); 2225 2226 iptun_sendicmp_v4(iptun, &icmp, orig_ipha, mp, tsl); 2227 } 2228 2229 static void 2230 iptun_icmp_error_v6(iptun_t *iptun, ip6_t *orig_ip6h, mblk_t *mp, 2231 uint8_t type, uint8_t code, uint32_t offset, ts_label_t *tsl) 2232 { 2233 icmp6_t icmp6; 2234 2235 bzero(&icmp6, sizeof (icmp6)); 2236 icmp6.icmp6_type = type; 2237 icmp6.icmp6_code = code; 2238 if (type == ICMP6_PARAM_PROB) 2239 icmp6.icmp6_pptr = htonl(offset); 2240 2241 iptun_sendicmp_v6(iptun, &icmp6, orig_ip6h, mp, tsl); 2242 } 2243 2244 static void 2245 iptun_icmp_toobig_v6(iptun_t *iptun, uint32_t newmtu, ip6_t *orig_ip6h, 2246 mblk_t *mp, ts_label_t *tsl) 2247 { 2248 icmp6_t icmp6; 2249 2250 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; 2251 icmp6.icmp6_code = 0; 2252 icmp6.icmp6_mtu = htonl(newmtu); 2253 2254 iptun_sendicmp_v6(iptun, &icmp6, orig_ip6h, mp, tsl); 2255 } 2256 2257 /* 2258 * Determines if the packet pointed to by ipha or ip6h is an ICMP error. The 2259 * mp argument is only used to do bounds checking. 2260 */ 2261 static boolean_t 2262 is_icmp_error(mblk_t *mp, ipha_t *ipha, ip6_t *ip6h) 2263 { 2264 uint16_t hlen; 2265 2266 if (ipha != NULL) { 2267 icmph_t *icmph; 2268 2269 ASSERT(ip6h == NULL); 2270 if (ipha->ipha_protocol != IPPROTO_ICMP) 2271 return (B_FALSE); 2272 2273 hlen = IPH_HDR_LENGTH(ipha); 2274 icmph = (icmph_t *)((uint8_t *)ipha + hlen); 2275 return (ICMP_IS_ERROR(icmph->icmph_type) || 2276 icmph->icmph_type == ICMP_REDIRECT); 2277 } else { 2278 icmp6_t *icmp6; 2279 uint8_t *nexthdrp; 2280 2281 ASSERT(ip6h != NULL); 2282 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hlen, &nexthdrp) || 2283 *nexthdrp != IPPROTO_ICMPV6) { 2284 return (B_FALSE); 2285 } 2286 2287 icmp6 = (icmp6_t *)((uint8_t *)ip6h + hlen); 2288 return (ICMP6_IS_ERROR(icmp6->icmp6_type) || 2289 icmp6->icmp6_type == ND_REDIRECT); 2290 } 2291 } 2292 2293 /* 2294 * Find inner and outer IP headers from a tunneled packet as setup for calls 2295 * into ipsec_tun_{in,out}bound(). 2296 * Note that we need to allow the outer header to be in a separate mblk from 2297 * the inner header. 2298 * If the caller knows the outer_hlen, the caller passes it in. Otherwise zero. 2299 */ 2300 static size_t 2301 iptun_find_headers(mblk_t *mp, size_t outer_hlen, ipha_t **outer4, 2302 ipha_t **inner4, ip6_t **outer6, ip6_t **inner6) 2303 { 2304 ipha_t *ipha; 2305 size_t first_mblkl = MBLKL(mp); 2306 mblk_t *inner_mp; 2307 2308 /* 2309 * Don't bother handling packets that don't have a full IP header in 2310 * the fist mblk. For the input path, the ip module ensures that this 2311 * won't happen, and on the output path, the IP tunneling MAC-type 2312 * plugins ensure that this also won't happen. 2313 */ 2314 if (first_mblkl < sizeof (ipha_t)) 2315 return (0); 2316 ipha = (ipha_t *)(mp->b_rptr); 2317 switch (IPH_HDR_VERSION(ipha)) { 2318 case IPV4_VERSION: 2319 *outer4 = ipha; 2320 *outer6 = NULL; 2321 if (outer_hlen == 0) 2322 outer_hlen = IPH_HDR_LENGTH(ipha); 2323 break; 2324 case IPV6_VERSION: 2325 *outer4 = NULL; 2326 *outer6 = (ip6_t *)ipha; 2327 if (outer_hlen == 0) 2328 outer_hlen = ip_hdr_length_v6(mp, (ip6_t *)ipha); 2329 break; 2330 default: 2331 return (0); 2332 } 2333 2334 if (first_mblkl < outer_hlen || 2335 (first_mblkl == outer_hlen && mp->b_cont == NULL)) 2336 return (0); 2337 2338 /* 2339 * We don't bother doing a pullup here since the outer header will 2340 * just get stripped off soon on input anyway. We just want to ensure 2341 * that the inner* pointer points to a full header. 2342 */ 2343 if (first_mblkl == outer_hlen) { 2344 inner_mp = mp->b_cont; 2345 ipha = (ipha_t *)inner_mp->b_rptr; 2346 } else { 2347 inner_mp = mp; 2348 ipha = (ipha_t *)(mp->b_rptr + outer_hlen); 2349 } 2350 switch (IPH_HDR_VERSION(ipha)) { 2351 case IPV4_VERSION: 2352 if (inner_mp->b_wptr - (uint8_t *)ipha < sizeof (ipha_t)) 2353 return (0); 2354 *inner4 = ipha; 2355 *inner6 = NULL; 2356 break; 2357 case IPV6_VERSION: 2358 if (inner_mp->b_wptr - (uint8_t *)ipha < sizeof (ip6_t)) 2359 return (0); 2360 *inner4 = NULL; 2361 *inner6 = (ip6_t *)ipha; 2362 break; 2363 default: 2364 return (0); 2365 } 2366 2367 return (outer_hlen); 2368 } 2369 2370 /* 2371 * Received ICMP error in response to an X over IPv4 packet that we 2372 * transmitted. 2373 * 2374 * NOTE: "outer" refers to what's inside the ICMP payload. We will get one of 2375 * the following: 2376 * 2377 * [IPv4(0)][ICMPv4][IPv4(1)][IPv4(2)][ULP] 2378 * 2379 * or 2380 * 2381 * [IPv4(0)][ICMPv4][IPv4(1)][IPv6][ULP] 2382 * 2383 * And "outer4" will get set to IPv4(1), and inner[46] will correspond to 2384 * whatever the very-inner packet is (IPv4(2) or IPv6). 2385 */ 2386 static void 2387 iptun_input_icmp_v4(iptun_t *iptun, mblk_t *data_mp, icmph_t *icmph, 2388 ip_recv_attr_t *ira) 2389 { 2390 uint8_t *orig; 2391 ipha_t *outer4, *inner4; 2392 ip6_t *outer6, *inner6; 2393 int outer_hlen; 2394 uint8_t type, code; 2395 2396 ASSERT(data_mp->b_cont == NULL); 2397 /* 2398 * Temporarily move b_rptr forward so that iptun_find_headers() can 2399 * find headers in the ICMP packet payload. 2400 */ 2401 orig = data_mp->b_rptr; 2402 data_mp->b_rptr = (uint8_t *)(icmph + 1); 2403 /* 2404 * The ip module ensures that ICMP errors contain at least the 2405 * original IP header (otherwise, the error would never have made it 2406 * here). 2407 */ 2408 ASSERT(MBLKL(data_mp) >= 0); 2409 outer_hlen = iptun_find_headers(data_mp, 0, &outer4, &inner4, &outer6, 2410 &inner6); 2411 ASSERT(outer6 == NULL); 2412 data_mp->b_rptr = orig; 2413 if (outer_hlen == 0) { 2414 iptun_drop_pkt(data_mp, &iptun->iptun_ierrors); 2415 return; 2416 } 2417 2418 /* Only ICMP errors due to tunneled packets should reach here. */ 2419 ASSERT(outer4->ipha_protocol == IPPROTO_ENCAP || 2420 outer4->ipha_protocol == IPPROTO_IPV6); 2421 2422 data_mp = ipsec_tun_inbound(ira, data_mp, iptun->iptun_itp, 2423 inner4, inner6, outer4, outer6, -outer_hlen, iptun->iptun_ns); 2424 if (data_mp == NULL) { 2425 /* Callee did all of the freeing. */ 2426 atomic_inc_64(&iptun->iptun_ierrors); 2427 return; 2428 } 2429 /* We should never see reassembled fragment here. */ 2430 ASSERT(data_mp->b_next == NULL); 2431 2432 data_mp->b_rptr = (uint8_t *)outer4 + outer_hlen; 2433 2434 /* 2435 * If the original packet being transmitted was itself an ICMP error, 2436 * then drop this packet. We don't want to generate an ICMP error in 2437 * response to an ICMP error. 2438 */ 2439 if (is_icmp_error(data_mp, inner4, inner6)) { 2440 iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf); 2441 return; 2442 } 2443 2444 switch (icmph->icmph_type) { 2445 case ICMP_DEST_UNREACHABLE: 2446 type = (inner4 != NULL ? icmph->icmph_type : ICMP6_DST_UNREACH); 2447 switch (icmph->icmph_code) { 2448 case ICMP_FRAGMENTATION_NEEDED: { 2449 uint32_t newmtu; 2450 2451 /* 2452 * We reconcile this with the fact that the tunnel may 2453 * also have IPsec policy by letting iptun_update_mtu 2454 * take care of it. 2455 */ 2456 newmtu = iptun_update_mtu(iptun, NULL, 2457 ntohs(icmph->icmph_du_mtu)); 2458 2459 if (inner4 != NULL) { 2460 iptun_icmp_fragneeded_v4(iptun, newmtu, inner4, 2461 data_mp, ira->ira_tsl); 2462 } else { 2463 iptun_icmp_toobig_v6(iptun, newmtu, inner6, 2464 data_mp, ira->ira_tsl); 2465 } 2466 return; 2467 } 2468 case ICMP_DEST_NET_UNREACH_ADMIN: 2469 case ICMP_DEST_HOST_UNREACH_ADMIN: 2470 code = (inner4 != NULL ? ICMP_DEST_NET_UNREACH_ADMIN : 2471 ICMP6_DST_UNREACH_ADMIN); 2472 break; 2473 default: 2474 code = (inner4 != NULL ? ICMP_HOST_UNREACHABLE : 2475 ICMP6_DST_UNREACH_ADDR); 2476 break; 2477 } 2478 break; 2479 case ICMP_TIME_EXCEEDED: 2480 if (inner6 != NULL) { 2481 type = ICMP6_TIME_EXCEEDED; 2482 code = 0; 2483 } /* else we're already set. */ 2484 break; 2485 case ICMP_PARAM_PROBLEM: 2486 /* 2487 * This is a problem with the outer header we transmitted. 2488 * Treat this as an output error. 2489 */ 2490 iptun_drop_pkt(data_mp, &iptun->iptun_oerrors); 2491 return; 2492 default: 2493 iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf); 2494 return; 2495 } 2496 2497 if (inner4 != NULL) { 2498 iptun_icmp_error_v4(iptun, inner4, data_mp, type, code, 2499 ira->ira_tsl); 2500 } else { 2501 iptun_icmp_error_v6(iptun, inner6, data_mp, type, code, 0, 2502 ira->ira_tsl); 2503 } 2504 } 2505 2506 /* 2507 * Return B_TRUE if the IPv6 packet pointed to by ip6h contains a Tunnel 2508 * Encapsulation Limit destination option. If there is one, set encaplim_ptr 2509 * to point to the option value. 2510 */ 2511 static boolean_t 2512 iptun_find_encaplimit(mblk_t *mp, ip6_t *ip6h, uint8_t **encaplim_ptr) 2513 { 2514 ip_pkt_t pkt; 2515 uint8_t *endptr; 2516 ip6_dest_t *destp; 2517 struct ip6_opt *optp; 2518 2519 pkt.ipp_fields = 0; /* must be initialized */ 2520 (void) ip_find_hdr_v6(mp, ip6h, B_FALSE, &pkt, NULL); 2521 if ((pkt.ipp_fields & IPPF_DSTOPTS) != 0) { 2522 destp = pkt.ipp_dstopts; 2523 } else if ((pkt.ipp_fields & IPPF_RTHDRDSTOPTS) != 0) { 2524 destp = pkt.ipp_rthdrdstopts; 2525 } else { 2526 return (B_FALSE); 2527 } 2528 2529 endptr = (uint8_t *)destp + 8 * (destp->ip6d_len + 1); 2530 optp = (struct ip6_opt *)(destp + 1); 2531 while (endptr - (uint8_t *)optp > sizeof (*optp)) { 2532 if (optp->ip6o_type == IP6OPT_TUNNEL_LIMIT) { 2533 if ((uint8_t *)(optp + 1) >= endptr) 2534 return (B_FALSE); 2535 *encaplim_ptr = (uint8_t *)&optp[1]; 2536 return (B_TRUE); 2537 } 2538 optp = (struct ip6_opt *)((uint8_t *)optp + optp->ip6o_len + 2); 2539 } 2540 return (B_FALSE); 2541 } 2542 2543 /* 2544 * Received ICMPv6 error in response to an X over IPv6 packet that we 2545 * transmitted. 2546 * 2547 * NOTE: "outer" refers to what's inside the ICMP payload. We will get one of 2548 * the following: 2549 * 2550 * [IPv6(0)][ICMPv6][IPv6(1)][IPv4][ULP] 2551 * 2552 * or 2553 * 2554 * [IPv6(0)][ICMPv6][IPv6(1)][IPv6(2)][ULP] 2555 * 2556 * And "outer6" will get set to IPv6(1), and inner[46] will correspond to 2557 * whatever the very-inner packet is (IPv4 or IPv6(2)). 2558 */ 2559 static void 2560 iptun_input_icmp_v6(iptun_t *iptun, mblk_t *data_mp, icmp6_t *icmp6h, 2561 ip_recv_attr_t *ira) 2562 { 2563 uint8_t *orig; 2564 ipha_t *outer4, *inner4; 2565 ip6_t *outer6, *inner6; 2566 int outer_hlen; 2567 uint8_t type, code; 2568 2569 ASSERT(data_mp->b_cont == NULL); 2570 2571 /* 2572 * Temporarily move b_rptr forward so that iptun_find_headers() can 2573 * find IP headers in the ICMP packet payload. 2574 */ 2575 orig = data_mp->b_rptr; 2576 data_mp->b_rptr = (uint8_t *)(icmp6h + 1); 2577 /* 2578 * The ip module ensures that ICMP errors contain at least the 2579 * original IP header (otherwise, the error would never have made it 2580 * here). 2581 */ 2582 ASSERT(MBLKL(data_mp) >= 0); 2583 outer_hlen = iptun_find_headers(data_mp, 0, &outer4, &inner4, &outer6, 2584 &inner6); 2585 ASSERT(outer4 == NULL); 2586 data_mp->b_rptr = orig; /* Restore r_ptr */ 2587 if (outer_hlen == 0) { 2588 iptun_drop_pkt(data_mp, &iptun->iptun_ierrors); 2589 return; 2590 } 2591 2592 data_mp = ipsec_tun_inbound(ira, data_mp, iptun->iptun_itp, 2593 inner4, inner6, outer4, outer6, -outer_hlen, iptun->iptun_ns); 2594 if (data_mp == NULL) { 2595 /* Callee did all of the freeing. */ 2596 atomic_inc_64(&iptun->iptun_ierrors); 2597 return; 2598 } 2599 /* We should never see reassembled fragment here. */ 2600 ASSERT(data_mp->b_next == NULL); 2601 2602 data_mp->b_rptr = (uint8_t *)outer6 + outer_hlen; 2603 2604 /* 2605 * If the original packet being transmitted was itself an ICMP error, 2606 * then drop this packet. We don't want to generate an ICMP error in 2607 * response to an ICMP error. 2608 */ 2609 if (is_icmp_error(data_mp, inner4, inner6)) { 2610 iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf); 2611 return; 2612 } 2613 2614 switch (icmp6h->icmp6_type) { 2615 case ICMP6_PARAM_PROB: { 2616 uint8_t *encaplim_ptr; 2617 2618 /* 2619 * If the ICMPv6 error points to a valid Tunnel Encapsulation 2620 * Limit option and the limit value is 0, then fall through 2621 * and send a host unreachable message. Otherwise, treat the 2622 * error as an output error, as there must have been a problem 2623 * with a packet we sent. 2624 */ 2625 if (!iptun_find_encaplimit(data_mp, outer6, &encaplim_ptr) || 2626 (icmp6h->icmp6_pptr != 2627 ((ptrdiff_t)encaplim_ptr - (ptrdiff_t)outer6)) || 2628 *encaplim_ptr != 0) { 2629 iptun_drop_pkt(data_mp, &iptun->iptun_oerrors); 2630 return; 2631 } 2632 } 2633 /* FALLTHROUGH */ 2634 case ICMP6_TIME_EXCEEDED: 2635 case ICMP6_DST_UNREACH: 2636 type = (inner4 != NULL ? ICMP_DEST_UNREACHABLE : 2637 ICMP6_DST_UNREACH); 2638 code = (inner4 != NULL ? ICMP_HOST_UNREACHABLE : 2639 ICMP6_DST_UNREACH_ADDR); 2640 break; 2641 case ICMP6_PACKET_TOO_BIG: { 2642 uint32_t newmtu; 2643 2644 /* 2645 * We reconcile this with the fact that the tunnel may also 2646 * have IPsec policy by letting iptun_update_mtu take care of 2647 * it. 2648 */ 2649 newmtu = iptun_update_mtu(iptun, NULL, 2650 ntohl(icmp6h->icmp6_mtu)); 2651 2652 if (inner4 != NULL) { 2653 iptun_icmp_fragneeded_v4(iptun, newmtu, inner4, 2654 data_mp, ira->ira_tsl); 2655 } else { 2656 iptun_icmp_toobig_v6(iptun, newmtu, inner6, data_mp, 2657 ira->ira_tsl); 2658 } 2659 return; 2660 } 2661 default: 2662 iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf); 2663 return; 2664 } 2665 2666 if (inner4 != NULL) { 2667 iptun_icmp_error_v4(iptun, inner4, data_mp, type, code, 2668 ira->ira_tsl); 2669 } else { 2670 iptun_icmp_error_v6(iptun, inner6, data_mp, type, code, 0, 2671 ira->ira_tsl); 2672 } 2673 } 2674 2675 /* 2676 * Called as conn_recvicmp from IP for ICMP errors. 2677 */ 2678 /* ARGSUSED2 */ 2679 static void 2680 iptun_input_icmp(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *ira) 2681 { 2682 conn_t *connp = arg; 2683 iptun_t *iptun = connp->conn_iptun; 2684 mblk_t *tmpmp; 2685 size_t hlen; 2686 2687 ASSERT(IPCL_IS_IPTUN(connp)); 2688 2689 if (mp->b_cont != NULL) { 2690 /* 2691 * Since ICMP error processing necessitates access to bits 2692 * that are within the ICMP error payload (the original packet 2693 * that caused the error), pull everything up into a single 2694 * block for convenience. 2695 */ 2696 if ((tmpmp = msgpullup(mp, -1)) == NULL) { 2697 iptun_drop_pkt(mp, &iptun->iptun_norcvbuf); 2698 return; 2699 } 2700 freemsg(mp); 2701 mp = tmpmp; 2702 } 2703 2704 hlen = ira->ira_ip_hdr_length; 2705 switch (iptun->iptun_typeinfo->iti_ipvers) { 2706 case IPV4_VERSION: 2707 /* 2708 * The outer IP header coming up from IP is always ipha_t 2709 * alligned (otherwise, we would have crashed in ip). 2710 */ 2711 iptun_input_icmp_v4(iptun, mp, (icmph_t *)(mp->b_rptr + hlen), 2712 ira); 2713 break; 2714 case IPV6_VERSION: 2715 iptun_input_icmp_v6(iptun, mp, (icmp6_t *)(mp->b_rptr + hlen), 2716 ira); 2717 break; 2718 } 2719 } 2720 2721 static boolean_t 2722 iptun_in_6to4_ok(iptun_t *iptun, ipha_t *outer4, ip6_t *inner6) 2723 { 2724 ipaddr_t v4addr; 2725 2726 /* 2727 * It's possible that someone sent us an IPv4-in-IPv4 packet with the 2728 * IPv4 address of a 6to4 tunnel as the destination. 2729 */ 2730 if (inner6 == NULL) 2731 return (B_FALSE); 2732 2733 /* 2734 * Make sure that the IPv6 destination is within the site that this 2735 * 6to4 tunnel is routing for. We don't want people bouncing random 2736 * tunneled IPv6 packets through this 6to4 router. 2737 */ 2738 IN6_6TO4_TO_V4ADDR(&inner6->ip6_dst, (struct in_addr *)&v4addr); 2739 if (outer4->ipha_dst != v4addr) 2740 return (B_FALSE); 2741 2742 if (IN6_IS_ADDR_6TO4(&inner6->ip6_src)) { 2743 /* 2744 * Section 9 of RFC 3056 (security considerations) suggests 2745 * that when a packet is from a 6to4 site (i.e., it's not a 2746 * global address being forwarded froma relay router), make 2747 * sure that the packet was tunneled by that site's 6to4 2748 * router. 2749 */ 2750 IN6_6TO4_TO_V4ADDR(&inner6->ip6_src, (struct in_addr *)&v4addr); 2751 if (outer4->ipha_src != v4addr) 2752 return (B_FALSE); 2753 } else { 2754 /* 2755 * Only accept packets from a relay router if we've configured 2756 * outbound relay router functionality. 2757 */ 2758 if (iptun->iptun_iptuns->iptuns_relay_rtr_addr == INADDR_ANY) 2759 return (B_FALSE); 2760 } 2761 2762 return (B_TRUE); 2763 } 2764 2765 /* 2766 * Input function for everything that comes up from the ip module below us. 2767 * This is called directly from the ip module via connp->conn_recv(). 2768 * 2769 * We receive M_DATA messages with IP-in-IP tunneled packets. 2770 */ 2771 /* ARGSUSED2 */ 2772 static void 2773 iptun_input(void *arg, mblk_t *data_mp, void *arg2, ip_recv_attr_t *ira) 2774 { 2775 conn_t *connp = arg; 2776 iptun_t *iptun = connp->conn_iptun; 2777 int outer_hlen; 2778 ipha_t *outer4, *inner4; 2779 ip6_t *outer6, *inner6; 2780 2781 ASSERT(IPCL_IS_IPTUN(connp)); 2782 ASSERT(DB_TYPE(data_mp) == M_DATA); 2783 2784 outer_hlen = iptun_find_headers(data_mp, ira->ira_ip_hdr_length, 2785 &outer4, &inner4, &outer6, &inner6); 2786 if (outer_hlen == 0) 2787 goto drop; 2788 2789 /* 2790 * If the system is labeled, we call tsol_check_dest() on the packet 2791 * destination (our local tunnel address) to ensure that the packet as 2792 * labeled should be allowed to be sent to us. We don't need to call 2793 * the more involved tsol_receive_local() since the tunnel link itself 2794 * cannot be assigned to shared-stack non-global zones. 2795 */ 2796 if (ira->ira_flags & IRAF_SYSTEM_LABELED) { 2797 if (ira->ira_tsl == NULL) 2798 goto drop; 2799 if (tsol_check_dest(ira->ira_tsl, (outer4 != NULL ? 2800 (void *)&outer4->ipha_dst : (void *)&outer6->ip6_dst), 2801 (outer4 != NULL ? IPV4_VERSION : IPV6_VERSION), 2802 CONN_MAC_DEFAULT, B_FALSE, NULL) != 0) 2803 goto drop; 2804 } 2805 2806 data_mp = ipsec_tun_inbound(ira, data_mp, iptun->iptun_itp, 2807 inner4, inner6, outer4, outer6, outer_hlen, iptun->iptun_ns); 2808 if (data_mp == NULL) { 2809 /* Callee did all of the freeing. */ 2810 return; 2811 } 2812 2813 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_6TO4 && 2814 !iptun_in_6to4_ok(iptun, outer4, inner6)) 2815 goto drop; 2816 2817 /* 2818 * We need to statistically account for each packet individually, so 2819 * we might as well split up any b_next chains here. 2820 */ 2821 do { 2822 mblk_t *mp; 2823 2824 mp = data_mp->b_next; 2825 data_mp->b_next = NULL; 2826 2827 atomic_inc_64(&iptun->iptun_ipackets); 2828 atomic_add_64(&iptun->iptun_rbytes, msgdsize(data_mp)); 2829 mac_rx(iptun->iptun_mh, NULL, data_mp); 2830 2831 data_mp = mp; 2832 } while (data_mp != NULL); 2833 return; 2834 drop: 2835 iptun_drop_pkt(data_mp, &iptun->iptun_ierrors); 2836 } 2837 2838 /* 2839 * Do 6to4-specific header-processing on output. Return B_TRUE if the packet 2840 * was processed without issue, or B_FALSE if the packet had issues and should 2841 * be dropped. 2842 */ 2843 static boolean_t 2844 iptun_out_process_6to4(iptun_t *iptun, ipha_t *outer4, ip6_t *inner6) 2845 { 2846 ipaddr_t v4addr; 2847 2848 /* 2849 * IPv6 source must be a 6to4 address. This is because a conscious 2850 * decision was made to not allow a Solaris system to be used as a 2851 * relay router (for security reasons) when 6to4 was initially 2852 * integrated. If this decision is ever reversed, the following check 2853 * can be removed. 2854 */ 2855 if (!IN6_IS_ADDR_6TO4(&inner6->ip6_src)) 2856 return (B_FALSE); 2857 2858 /* 2859 * RFC3056 mandates that the IPv4 source MUST be set to the IPv4 2860 * portion of the 6to4 IPv6 source address. In other words, make sure 2861 * that we're tunneling packets from our own 6to4 site. 2862 */ 2863 IN6_6TO4_TO_V4ADDR(&inner6->ip6_src, (struct in_addr *)&v4addr); 2864 if (outer4->ipha_src != v4addr) 2865 return (B_FALSE); 2866 2867 /* 2868 * Automatically set the destination of the outer IPv4 header as 2869 * described in RFC3056. There are two possibilities: 2870 * 2871 * a. If the IPv6 destination is a 6to4 address, set the IPv4 address 2872 * to the IPv4 portion of the 6to4 address. 2873 * b. If the IPv6 destination is a native IPv6 address, set the IPv4 2874 * destination to the address of a relay router. 2875 * 2876 * Design Note: b shouldn't be necessary here, and this is a flaw in 2877 * the design of the 6to4relay command. Instead of setting a 6to4 2878 * relay address in this module via an ioctl, the 6to4relay command 2879 * could simply add a IPv6 route for native IPv6 addresses (such as a 2880 * default route) in the forwarding table that uses a 6to4 destination 2881 * as its next hop, and the IPv4 portion of that address could be a 2882 * 6to4 relay address. In order for this to work, IP would have to 2883 * resolve the next hop address, which would necessitate a link-layer 2884 * address resolver for 6to4 links, which doesn't exist today. 2885 * 2886 * In fact, if a resolver existed for 6to4 links, then setting the 2887 * IPv4 destination in the outer header could be done as part of 2888 * link-layer address resolution and fast-path header generation, and 2889 * not here. 2890 */ 2891 if (IN6_IS_ADDR_6TO4(&inner6->ip6_dst)) { 2892 /* destination is a 6to4 router */ 2893 IN6_6TO4_TO_V4ADDR(&inner6->ip6_dst, 2894 (struct in_addr *)&outer4->ipha_dst); 2895 2896 /* Reject attempts to send to INADDR_ANY */ 2897 if (outer4->ipha_dst == INADDR_ANY) 2898 return (B_FALSE); 2899 } else { 2900 /* 2901 * The destination is a native IPv6 address. If output to a 2902 * relay-router is enabled, use the relay-router's IPv4 2903 * address as the destination. 2904 */ 2905 if (iptun->iptun_iptuns->iptuns_relay_rtr_addr == INADDR_ANY) 2906 return (B_FALSE); 2907 outer4->ipha_dst = iptun->iptun_iptuns->iptuns_relay_rtr_addr; 2908 } 2909 2910 /* 2911 * If the outer source and destination are equal, this means that the 2912 * 6to4 router somehow forwarded an IPv6 packet destined for its own 2913 * 6to4 site to its 6to4 tunnel interface, which will result in this 2914 * packet infinitely bouncing between ip and iptun. 2915 */ 2916 return (outer4->ipha_src != outer4->ipha_dst); 2917 } 2918 2919 /* 2920 * Process output packets with outer IPv4 headers. Frees mp and bumps stat on 2921 * error. 2922 */ 2923 static mblk_t * 2924 iptun_out_process_ipv4(iptun_t *iptun, mblk_t *mp, ipha_t *outer4, 2925 ipha_t *inner4, ip6_t *inner6, ip_xmit_attr_t *ixa) 2926 { 2927 uint8_t *innerptr = (inner4 != NULL ? 2928 (uint8_t *)inner4 : (uint8_t *)inner6); 2929 size_t minmtu = iptun->iptun_typeinfo->iti_minmtu; 2930 2931 if (inner4 != NULL) { 2932 ASSERT(outer4->ipha_protocol == IPPROTO_ENCAP); 2933 /* 2934 * Copy the tos from the inner IPv4 header. We mask off ECN 2935 * bits (bits 6 and 7) because there is currently no 2936 * tunnel-tunnel communication to determine if both sides 2937 * support ECN. We opt for the safe choice: don't copy the 2938 * ECN bits when doing encapsulation. 2939 */ 2940 outer4->ipha_type_of_service = 2941 inner4->ipha_type_of_service & ~0x03; 2942 } else { 2943 ASSERT(outer4->ipha_protocol == IPPROTO_IPV6 && 2944 inner6 != NULL); 2945 } 2946 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) 2947 outer4->ipha_fragment_offset_and_flags |= IPH_DF_HTONS; 2948 else 2949 outer4->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS; 2950 2951 /* 2952 * As described in section 3.2.2 of RFC4213, if the packet payload is 2953 * less than or equal to the minimum MTU size, then we need to allow 2954 * IPv4 to fragment the packet. The reason is that even if we end up 2955 * receiving an ICMP frag-needed, the interface above this tunnel 2956 * won't be allowed to drop its MTU as a result, since the packet was 2957 * already smaller than the smallest allowable MTU for that interface. 2958 */ 2959 if (mp->b_wptr - innerptr <= minmtu) { 2960 outer4->ipha_fragment_offset_and_flags = 0; 2961 ixa->ixa_flags &= ~IXAF_DONTFRAG; 2962 } else if (!(ixa->ixa_flags & IXAF_PMTU_TOO_SMALL) && 2963 (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_6TO4)) { 2964 ixa->ixa_flags |= IXAF_DONTFRAG; 2965 } 2966 2967 ixa->ixa_ip_hdr_length = IPH_HDR_LENGTH(outer4); 2968 ixa->ixa_pktlen = msgdsize(mp); 2969 ixa->ixa_protocol = outer4->ipha_protocol; 2970 2971 outer4->ipha_length = htons(ixa->ixa_pktlen); 2972 return (mp); 2973 } 2974 2975 /* 2976 * Insert an encapsulation limit destination option in the packet provided. 2977 * Always consumes the mp argument and returns a new mblk pointer. 2978 */ 2979 static mblk_t * 2980 iptun_insert_encaplimit(iptun_t *iptun, mblk_t *mp, ip6_t *outer6, 2981 uint8_t limit) 2982 { 2983 mblk_t *newmp; 2984 iptun_ipv6hdrs_t *newouter6; 2985 2986 ASSERT(outer6->ip6_nxt == IPPROTO_IPV6); 2987 ASSERT(mp->b_cont == NULL); 2988 2989 mp->b_rptr += sizeof (ip6_t); 2990 newmp = allocb(sizeof (iptun_ipv6hdrs_t) + MBLKL(mp), BPRI_MED); 2991 if (newmp == NULL) { 2992 iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf); 2993 return (NULL); 2994 } 2995 newmp->b_wptr += sizeof (iptun_ipv6hdrs_t); 2996 /* Copy the payload (Starting with the inner IPv6 header). */ 2997 bcopy(mp->b_rptr, newmp->b_wptr, MBLKL(mp)); 2998 newmp->b_wptr += MBLKL(mp); 2999 newouter6 = (iptun_ipv6hdrs_t *)newmp->b_rptr; 3000 /* Now copy the outer IPv6 header. */ 3001 bcopy(outer6, &newouter6->it6h_ip6h, sizeof (ip6_t)); 3002 newouter6->it6h_ip6h.ip6_nxt = IPPROTO_DSTOPTS; 3003 newouter6->it6h_encaplim = iptun_encaplim_init; 3004 newouter6->it6h_encaplim.iel_destopt.ip6d_nxt = outer6->ip6_nxt; 3005 newouter6->it6h_encaplim.iel_telopt.ip6ot_encap_limit = limit; 3006 3007 /* 3008 * The payload length will be set at the end of 3009 * iptun_out_process_ipv6(). 3010 */ 3011 3012 freemsg(mp); 3013 return (newmp); 3014 } 3015 3016 /* 3017 * Process output packets with outer IPv6 headers. Frees mp and bumps stats 3018 * on error. 3019 */ 3020 static mblk_t * 3021 iptun_out_process_ipv6(iptun_t *iptun, mblk_t *mp, ip6_t *outer6, 3022 ipha_t *inner4, ip6_t *inner6, ip_xmit_attr_t *ixa) 3023 { 3024 uint8_t *innerptr = (inner4 != NULL ? 3025 (uint8_t *)inner4 : (uint8_t *)inner6); 3026 size_t minmtu = iptun->iptun_typeinfo->iti_minmtu; 3027 uint8_t *limit, *configlimit; 3028 uint32_t offset; 3029 iptun_ipv6hdrs_t *v6hdrs; 3030 3031 if (inner6 != NULL && iptun_find_encaplimit(mp, inner6, &limit)) { 3032 /* 3033 * The inner packet is an IPv6 packet which itself contains an 3034 * encapsulation limit option. The limit variable points to 3035 * the value in the embedded option. Process the 3036 * encapsulation limit option as specified in RFC 2473. 3037 * 3038 * If limit is 0, then we've exceeded the limit and we need to 3039 * send back an ICMPv6 parameter problem message. 3040 * 3041 * If limit is > 0, then we decrement it by 1 and make sure 3042 * that the encapsulation limit option in the outer header 3043 * reflects that (adding an option if one isn't already 3044 * there). 3045 */ 3046 ASSERT(limit > mp->b_rptr && limit < mp->b_wptr); 3047 if (*limit == 0) { 3048 mp->b_rptr = (uint8_t *)inner6; 3049 offset = limit - mp->b_rptr; 3050 iptun_icmp_error_v6(iptun, inner6, mp, ICMP6_PARAM_PROB, 3051 0, offset, ixa->ixa_tsl); 3052 atomic_inc_64(&iptun->iptun_noxmtbuf); 3053 return (NULL); 3054 } 3055 3056 /* 3057 * The outer header requires an encapsulation limit option. 3058 * If there isn't one already, add one. 3059 */ 3060 if (iptun->iptun_encaplimit == 0) { 3061 if ((mp = iptun_insert_encaplimit(iptun, mp, outer6, 3062 (*limit - 1))) == NULL) 3063 return (NULL); 3064 v6hdrs = (iptun_ipv6hdrs_t *)mp->b_rptr; 3065 } else { 3066 /* 3067 * There is an existing encapsulation limit option in 3068 * the outer header. If the inner encapsulation limit 3069 * is less than the configured encapsulation limit, 3070 * update the outer encapsulation limit to reflect 3071 * this lesser value. 3072 */ 3073 v6hdrs = (iptun_ipv6hdrs_t *)mp->b_rptr; 3074 configlimit = 3075 &v6hdrs->it6h_encaplim.iel_telopt.ip6ot_encap_limit; 3076 if ((*limit - 1) < *configlimit) 3077 *configlimit = (*limit - 1); 3078 } 3079 ixa->ixa_ip_hdr_length = sizeof (iptun_ipv6hdrs_t); 3080 ixa->ixa_protocol = v6hdrs->it6h_encaplim.iel_destopt.ip6d_nxt; 3081 } else { 3082 ixa->ixa_ip_hdr_length = sizeof (ip6_t); 3083 ixa->ixa_protocol = outer6->ip6_nxt; 3084 } 3085 /* 3086 * See iptun_output_process_ipv4() why we allow fragmentation for 3087 * small packets 3088 */ 3089 if (mp->b_wptr - innerptr <= minmtu) 3090 ixa->ixa_flags &= ~IXAF_DONTFRAG; 3091 else if (!(ixa->ixa_flags & IXAF_PMTU_TOO_SMALL)) 3092 ixa->ixa_flags |= IXAF_DONTFRAG; 3093 3094 ixa->ixa_pktlen = msgdsize(mp); 3095 outer6->ip6_plen = htons(ixa->ixa_pktlen - sizeof (ip6_t)); 3096 return (mp); 3097 } 3098 3099 /* 3100 * The IP tunneling MAC-type plugins have already done most of the header 3101 * processing and validity checks. We are simply responsible for multiplexing 3102 * down to the ip module below us. 3103 */ 3104 static void 3105 iptun_output(iptun_t *iptun, mblk_t *mp) 3106 { 3107 conn_t *connp = iptun->iptun_connp; 3108 mblk_t *newmp; 3109 int error; 3110 ip_xmit_attr_t *ixa; 3111 3112 ASSERT(mp->b_datap->db_type == M_DATA); 3113 3114 if (mp->b_cont != NULL) { 3115 if ((newmp = msgpullup(mp, -1)) == NULL) { 3116 iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf); 3117 return; 3118 } 3119 freemsg(mp); 3120 mp = newmp; 3121 } 3122 3123 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_6TO4) { 3124 iptun_output_6to4(iptun, mp); 3125 return; 3126 } 3127 3128 if (is_system_labeled()) { 3129 /* 3130 * Since the label can be different meaning a potentially 3131 * different IRE,we always use a unique ip_xmit_attr_t. 3132 */ 3133 ixa = conn_get_ixa_exclusive(connp); 3134 } else { 3135 /* 3136 * If no other thread is using conn_ixa this just gets a 3137 * reference to conn_ixa. Otherwise we get a safe copy of 3138 * conn_ixa. 3139 */ 3140 ixa = conn_get_ixa(connp, B_FALSE); 3141 } 3142 if (ixa == NULL) { 3143 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3144 return; 3145 } 3146 3147 /* 3148 * In case we got a safe copy of conn_ixa, then we need 3149 * to fill in any pointers in it. 3150 */ 3151 if (ixa->ixa_ire == NULL) { 3152 error = ip_attr_connect(connp, ixa, &connp->conn_saddr_v6, 3153 &connp->conn_faddr_v6, &connp->conn_faddr_v6, 0, 3154 NULL, NULL, 0); 3155 if (error != 0) { 3156 if (ixa->ixa_ire != NULL && 3157 (error == EHOSTUNREACH || error == ENETUNREACH)) { 3158 /* 3159 * Let conn_ip_output/ire_send_noroute return 3160 * the error and send any local ICMP error. 3161 */ 3162 error = 0; 3163 } else { 3164 ixa_refrele(ixa); 3165 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3166 return; 3167 } 3168 } 3169 } 3170 3171 iptun_output_common(iptun, ixa, mp); 3172 ixa_refrele(ixa); 3173 } 3174 3175 /* 3176 * We use an ixa based on the last destination. 3177 */ 3178 static void 3179 iptun_output_6to4(iptun_t *iptun, mblk_t *mp) 3180 { 3181 conn_t *connp = iptun->iptun_connp; 3182 ipha_t *outer4, *inner4; 3183 ip6_t *outer6, *inner6; 3184 ip_xmit_attr_t *ixa; 3185 ip_xmit_attr_t *oldixa; 3186 int error; 3187 boolean_t need_connect; 3188 in6_addr_t v6dst; 3189 3190 ASSERT(mp->b_cont == NULL); /* Verified by iptun_output */ 3191 3192 /* Make sure we set ipha_dst before we look at ipha_dst */ 3193 3194 (void) iptun_find_headers(mp, 0, &outer4, &inner4, &outer6, &inner6); 3195 ASSERT(outer4 != NULL); 3196 if (!iptun_out_process_6to4(iptun, outer4, inner6)) { 3197 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3198 return; 3199 } 3200 3201 if (is_system_labeled()) { 3202 /* 3203 * Since the label can be different meaning a potentially 3204 * different IRE,we always use a unique ip_xmit_attr_t. 3205 */ 3206 ixa = conn_get_ixa_exclusive(connp); 3207 } else { 3208 /* 3209 * If no other thread is using conn_ixa this just gets a 3210 * reference to conn_ixa. Otherwise we get a safe copy of 3211 * conn_ixa. 3212 */ 3213 ixa = conn_get_ixa(connp, B_FALSE); 3214 } 3215 if (ixa == NULL) { 3216 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3217 return; 3218 } 3219 3220 mutex_enter(&connp->conn_lock); 3221 if (connp->conn_v4lastdst == outer4->ipha_dst) { 3222 need_connect = (ixa->ixa_ire == NULL); 3223 } else { 3224 /* In case previous destination was multirt */ 3225 ip_attr_newdst(ixa); 3226 3227 /* 3228 * We later update conn_ixa when we update conn_v4lastdst 3229 * which enables subsequent packets to avoid redoing 3230 * ip_attr_connect 3231 */ 3232 need_connect = B_TRUE; 3233 } 3234 mutex_exit(&connp->conn_lock); 3235 3236 /* 3237 * In case we got a safe copy of conn_ixa, or otherwise we don't 3238 * have a current ixa_ire, then we need to fill in any pointers in 3239 * the ixa. 3240 */ 3241 if (need_connect) { 3242 IN6_IPADDR_TO_V4MAPPED(outer4->ipha_dst, &v6dst); 3243 3244 /* We handle IPsec in iptun_output_common */ 3245 error = ip_attr_connect(connp, ixa, &connp->conn_saddr_v6, 3246 &v6dst, &v6dst, 0, NULL, NULL, 0); 3247 if (error != 0) { 3248 if (ixa->ixa_ire != NULL && 3249 (error == EHOSTUNREACH || error == ENETUNREACH)) { 3250 /* 3251 * Let conn_ip_output/ire_send_noroute return 3252 * the error and send any local ICMP error. 3253 */ 3254 error = 0; 3255 } else { 3256 ixa_refrele(ixa); 3257 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3258 return; 3259 } 3260 } 3261 } 3262 3263 iptun_output_common(iptun, ixa, mp); 3264 3265 /* Atomically replace conn_ixa and conn_v4lastdst */ 3266 mutex_enter(&connp->conn_lock); 3267 if (connp->conn_v4lastdst != outer4->ipha_dst) { 3268 /* Remember the dst which corresponds to conn_ixa */ 3269 connp->conn_v6lastdst = v6dst; 3270 oldixa = conn_replace_ixa(connp, ixa); 3271 } else { 3272 oldixa = NULL; 3273 } 3274 mutex_exit(&connp->conn_lock); 3275 ixa_refrele(ixa); 3276 if (oldixa != NULL) 3277 ixa_refrele(oldixa); 3278 } 3279 3280 /* 3281 * Check the destination/label. Modifies *mpp by adding/removing CIPSO. 3282 * 3283 * We get the label from the message in order to honor the 3284 * ULPs/IPs choice of label. This will be NULL for forwarded 3285 * packets, neighbor discovery packets and some others. 3286 */ 3287 static int 3288 iptun_output_check_label(mblk_t **mpp, ip_xmit_attr_t *ixa) 3289 { 3290 cred_t *cr; 3291 int adjust; 3292 int iplen; 3293 int err; 3294 ts_label_t *effective_tsl = NULL; 3295 3296 3297 ASSERT(is_system_labeled()); 3298 3299 cr = msg_getcred(*mpp, NULL); 3300 if (cr == NULL) 3301 return (0); 3302 3303 /* 3304 * We need to start with a label based on the IP/ULP above us 3305 */ 3306 ip_xmit_attr_restore_tsl(ixa, cr); 3307 3308 /* 3309 * Need to update packet with any CIPSO option since 3310 * conn_ip_output doesn't do that. 3311 */ 3312 if (ixa->ixa_flags & IXAF_IS_IPV4) { 3313 ipha_t *ipha; 3314 3315 ipha = (ipha_t *)(*mpp)->b_rptr; 3316 iplen = ntohs(ipha->ipha_length); 3317 err = tsol_check_label_v4(ixa->ixa_tsl, 3318 ixa->ixa_zoneid, mpp, CONN_MAC_DEFAULT, B_FALSE, 3319 ixa->ixa_ipst, &effective_tsl); 3320 if (err != 0) 3321 return (err); 3322 3323 ipha = (ipha_t *)(*mpp)->b_rptr; 3324 adjust = (int)ntohs(ipha->ipha_length) - iplen; 3325 } else { 3326 ip6_t *ip6h; 3327 3328 ip6h = (ip6_t *)(*mpp)->b_rptr; 3329 iplen = ntohs(ip6h->ip6_plen); 3330 3331 err = tsol_check_label_v6(ixa->ixa_tsl, 3332 ixa->ixa_zoneid, mpp, CONN_MAC_DEFAULT, B_FALSE, 3333 ixa->ixa_ipst, &effective_tsl); 3334 if (err != 0) 3335 return (err); 3336 3337 ip6h = (ip6_t *)(*mpp)->b_rptr; 3338 adjust = (int)ntohs(ip6h->ip6_plen) - iplen; 3339 } 3340 3341 if (effective_tsl != NULL) { 3342 /* Update the label */ 3343 ip_xmit_attr_replace_tsl(ixa, effective_tsl); 3344 } 3345 ixa->ixa_pktlen += adjust; 3346 ixa->ixa_ip_hdr_length += adjust; 3347 return (0); 3348 } 3349 3350 3351 static void 3352 iptun_output_common(iptun_t *iptun, ip_xmit_attr_t *ixa, mblk_t *mp) 3353 { 3354 ipsec_tun_pol_t *itp = iptun->iptun_itp; 3355 int outer_hlen; 3356 mblk_t *newmp; 3357 ipha_t *outer4, *inner4; 3358 ip6_t *outer6, *inner6; 3359 int error; 3360 boolean_t update_pktlen; 3361 3362 ASSERT(ixa->ixa_ire != NULL); 3363 3364 outer_hlen = iptun_find_headers(mp, 0, &outer4, &inner4, &outer6, 3365 &inner6); 3366 if (outer_hlen == 0) { 3367 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3368 return; 3369 } 3370 3371 /* Save IXAF_DONTFRAG value */ 3372 iaflags_t dontfrag = ixa->ixa_flags & IXAF_DONTFRAG; 3373 3374 /* Perform header processing. */ 3375 if (outer4 != NULL) { 3376 mp = iptun_out_process_ipv4(iptun, mp, outer4, inner4, inner6, 3377 ixa); 3378 } else { 3379 mp = iptun_out_process_ipv6(iptun, mp, outer6, inner4, inner6, 3380 ixa); 3381 } 3382 if (mp == NULL) 3383 return; 3384 3385 /* 3386 * Let's hope the compiler optimizes this with "branch taken". 3387 */ 3388 if (itp != NULL && (itp->itp_flags & ITPF_P_ACTIVE)) { 3389 /* This updates the ip_xmit_attr_t */ 3390 mp = ipsec_tun_outbound(mp, iptun, inner4, inner6, outer4, 3391 outer6, outer_hlen, ixa); 3392 if (mp == NULL) { 3393 atomic_inc_64(&iptun->iptun_oerrors); 3394 return; 3395 } 3396 if (is_system_labeled()) { 3397 /* 3398 * Might change the packet by adding/removing CIPSO. 3399 * After this caller inner* and outer* and outer_hlen 3400 * might be invalid. 3401 */ 3402 error = iptun_output_check_label(&mp, ixa); 3403 if (error != 0) { 3404 ip2dbg(("label check failed (%d)\n", error)); 3405 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3406 return; 3407 } 3408 } 3409 3410 /* 3411 * ipsec_tun_outbound() returns a chain of tunneled IP 3412 * fragments linked with b_next (or a single message if the 3413 * tunneled packet wasn't a fragment). 3414 * If fragcache returned a list then we need to update 3415 * ixa_pktlen for all packets in the list. 3416 */ 3417 update_pktlen = (mp->b_next != NULL); 3418 3419 /* 3420 * Otherwise, we're good to go. The ixa has been updated with 3421 * instructions for outbound IPsec processing. 3422 */ 3423 for (newmp = mp; newmp != NULL; newmp = mp) { 3424 size_t minmtu = iptun->iptun_typeinfo->iti_minmtu; 3425 3426 atomic_inc_64(&iptun->iptun_opackets); 3427 atomic_add_64(&iptun->iptun_obytes, ixa->ixa_pktlen); 3428 mp = mp->b_next; 3429 newmp->b_next = NULL; 3430 3431 /* 3432 * The IXAF_DONTFRAG flag is global, but there is 3433 * a chain here. Check if we're really already 3434 * smaller than the minimum allowed MTU and reset here 3435 * appropriately. Otherwise one small packet can kill 3436 * the whole chain's path mtu discovery. 3437 * In addition, update the pktlen to the length of 3438 * the actual packet being processed. 3439 */ 3440 if (update_pktlen) { 3441 ixa->ixa_pktlen = msgdsize(newmp); 3442 if (ixa->ixa_pktlen <= minmtu) 3443 ixa->ixa_flags &= ~IXAF_DONTFRAG; 3444 } 3445 3446 atomic_inc_64(&iptun->iptun_opackets); 3447 atomic_add_64(&iptun->iptun_obytes, ixa->ixa_pktlen); 3448 3449 error = conn_ip_output(newmp, ixa); 3450 3451 /* Restore IXAF_DONTFRAG value */ 3452 ixa->ixa_flags |= dontfrag; 3453 3454 if (error == EMSGSIZE) { 3455 /* IPsec policy might have changed */ 3456 (void) iptun_update_mtu(iptun, ixa, 0); 3457 } 3458 } 3459 } else { 3460 /* 3461 * The ip module will potentially apply global policy to the 3462 * packet in its output path if there's no active tunnel 3463 * policy. 3464 */ 3465 ASSERT(ixa->ixa_ipsec_policy == NULL); 3466 mp = ip_output_attach_policy(mp, outer4, outer6, NULL, ixa); 3467 if (mp == NULL) { 3468 atomic_inc_64(&iptun->iptun_oerrors); 3469 return; 3470 } 3471 if (is_system_labeled()) { 3472 /* 3473 * Might change the packet by adding/removing CIPSO. 3474 * After this caller inner* and outer* and outer_hlen 3475 * might be invalid. 3476 */ 3477 error = iptun_output_check_label(&mp, ixa); 3478 if (error != 0) { 3479 ip2dbg(("label check failed (%d)\n", error)); 3480 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3481 return; 3482 } 3483 } 3484 3485 atomic_inc_64(&iptun->iptun_opackets); 3486 atomic_add_64(&iptun->iptun_obytes, ixa->ixa_pktlen); 3487 3488 error = conn_ip_output(mp, ixa); 3489 if (error == EMSGSIZE) { 3490 /* IPsec policy might have changed */ 3491 (void) iptun_update_mtu(iptun, ixa, 0); 3492 } 3493 } 3494 if (ixa->ixa_flags & IXAF_IPSEC_SECURE) 3495 ipsec_out_release_refs(ixa); 3496 } 3497 3498 static mac_callbacks_t iptun_m_callbacks = { 3499 .mc_callbacks = (MC_SETPROP | MC_GETPROP | MC_PROPINFO), 3500 .mc_getstat = iptun_m_getstat, 3501 .mc_start = iptun_m_start, 3502 .mc_stop = iptun_m_stop, 3503 .mc_setpromisc = iptun_m_setpromisc, 3504 .mc_multicst = iptun_m_multicst, 3505 .mc_unicst = iptun_m_unicst, 3506 .mc_tx = iptun_m_tx, 3507 .mc_reserved = NULL, 3508 .mc_setprop = iptun_m_setprop, 3509 .mc_getprop = iptun_m_getprop, 3510 .mc_propinfo = iptun_m_propinfo 3511 }; 3512