1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* 26 * iptun - IP Tunneling Driver 27 * 28 * This module is a GLDv3 driver that implements virtual datalinks over IP 29 * (a.k.a, IP tunneling). The datalinks are managed through a dld ioctl 30 * interface (see iptun_ctl.c), and registered with GLDv3 using 31 * mac_register(). It implements the logic for various forms of IP (IPv4 or 32 * IPv6) encapsulation within IP (IPv4 or IPv6) by interacting with the ip 33 * module below it. Each virtual IP tunnel datalink has a conn_t associated 34 * with it representing the "outer" IP connection. 35 * 36 * The module implements the following locking semantics: 37 * 38 * Lookups and deletions in iptun_hash are synchronized using iptun_hash_lock. 39 * See comments above iptun_hash_lock for details. 40 * 41 * No locks are ever held while calling up to GLDv3. The general architecture 42 * of GLDv3 requires this, as the mac perimeter (essentially a lock) for a 43 * given link will be held while making downcalls (iptun_m_*() callbacks). 44 * Because we need to hold locks while handling downcalls, holding these locks 45 * while issuing upcalls results in deadlock scenarios. See the block comment 46 * above iptun_task_cb() for details on how we safely issue upcalls without 47 * holding any locks. 48 * 49 * The contents of each iptun_t is protected by an iptun_mutex which is held 50 * in iptun_enter() (called by iptun_enter_by_linkid()), and exited in 51 * iptun_exit(). 52 * 53 * See comments in iptun_delete() and iptun_free() for details on how the 54 * iptun_t is deleted safely. 55 */ 56 57 #include <sys/types.h> 58 #include <sys/kmem.h> 59 #include <sys/errno.h> 60 #include <sys/modhash.h> 61 #include <sys/list.h> 62 #include <sys/strsun.h> 63 #include <sys/file.h> 64 #include <sys/systm.h> 65 #include <sys/tihdr.h> 66 #include <sys/param.h> 67 #include <sys/mac_provider.h> 68 #include <sys/mac_ipv4.h> 69 #include <sys/mac_ipv6.h> 70 #include <sys/mac_6to4.h> 71 #include <sys/tsol/tnet.h> 72 #include <sys/sunldi.h> 73 #include <netinet/in.h> 74 #include <netinet/ip6.h> 75 #include <inet/ip.h> 76 #include <inet/ip_ire.h> 77 #include <inet/ipsec_impl.h> 78 #include <sys/tsol/label.h> 79 #include <sys/tsol/tnet.h> 80 #include <inet/iptun.h> 81 #include "iptun_impl.h" 82 83 /* Do the tunnel type and address family match? */ 84 #define IPTUN_ADDR_MATCH(iptun_type, family) \ 85 ((iptun_type == IPTUN_TYPE_IPV4 && family == AF_INET) || \ 86 (iptun_type == IPTUN_TYPE_IPV6 && family == AF_INET6) || \ 87 (iptun_type == IPTUN_TYPE_6TO4 && family == AF_INET)) 88 89 #define IPTUN_HASH_KEY(key) ((mod_hash_key_t)(uintptr_t)(key)) 90 91 #define IPTUN_MIN_IPV4_MTU 576 /* ip.h still uses 68 (!) */ 92 #define IPTUN_MIN_IPV6_MTU IPV6_MIN_MTU 93 #define IPTUN_MAX_IPV4_MTU (IP_MAXPACKET - sizeof (ipha_t)) 94 #define IPTUN_MAX_IPV6_MTU (IP_MAXPACKET - sizeof (ip6_t) - \ 95 sizeof (iptun_encaplim_t)) 96 97 #define IPTUN_MIN_HOPLIMIT 1 98 #define IPTUN_MAX_HOPLIMIT UINT8_MAX 99 100 #define IPTUN_MIN_ENCAPLIMIT 0 101 #define IPTUN_MAX_ENCAPLIMIT UINT8_MAX 102 103 #define IPTUN_IPSEC_REQ_MASK (IPSEC_PREF_REQUIRED | IPSEC_PREF_NEVER) 104 105 static iptun_encaplim_t iptun_encaplim_init = { 106 { IPPROTO_NONE, 0 }, 107 IP6OPT_TUNNEL_LIMIT, 108 1, 109 IPTUN_DEFAULT_ENCAPLIMIT, /* filled in with actual value later */ 110 IP6OPT_PADN, 111 1, 112 0 113 }; 114 115 /* 116 * Table containing per-iptun-type information. 117 * Since IPv6 can run over all of these we have the IPv6 min as the min MTU. 118 */ 119 static iptun_typeinfo_t iptun_type_table[] = { 120 { IPTUN_TYPE_IPV4, MAC_PLUGIN_IDENT_IPV4, IPV4_VERSION, 121 IPTUN_MIN_IPV6_MTU, IPTUN_MAX_IPV4_MTU, B_TRUE }, 122 { IPTUN_TYPE_IPV6, MAC_PLUGIN_IDENT_IPV6, IPV6_VERSION, 123 IPTUN_MIN_IPV6_MTU, IPTUN_MAX_IPV6_MTU, B_TRUE }, 124 { IPTUN_TYPE_6TO4, MAC_PLUGIN_IDENT_6TO4, IPV4_VERSION, 125 IPTUN_MIN_IPV6_MTU, IPTUN_MAX_IPV4_MTU, B_FALSE }, 126 { IPTUN_TYPE_UNKNOWN, NULL, 0, 0, 0, B_FALSE } 127 }; 128 129 /* 130 * iptun_hash is an iptun_t lookup table by link ID protected by 131 * iptun_hash_lock. While the hash table's integrity is maintained via 132 * internal locking in the mod_hash_*() functions, we need additional locking 133 * so that an iptun_t cannot be deleted after a hash lookup has returned an 134 * iptun_t and before iptun_lock has been entered. As such, we use 135 * iptun_hash_lock when doing lookups and removals from iptun_hash. 136 */ 137 mod_hash_t *iptun_hash; 138 static kmutex_t iptun_hash_lock; 139 140 static uint_t iptun_tunnelcount; /* total for all stacks */ 141 kmem_cache_t *iptun_cache; 142 ddi_taskq_t *iptun_taskq; 143 144 typedef enum { 145 IPTUN_TASK_MTU_UPDATE, /* tell mac about new tunnel link MTU */ 146 IPTUN_TASK_LADDR_UPDATE, /* tell mac about new local address */ 147 IPTUN_TASK_RADDR_UPDATE, /* tell mac about new remote address */ 148 IPTUN_TASK_LINK_UPDATE, /* tell mac about new link state */ 149 IPTUN_TASK_PDATA_UPDATE /* tell mac about updated plugin data */ 150 } iptun_task_t; 151 152 typedef struct iptun_task_data_s { 153 iptun_task_t itd_task; 154 datalink_id_t itd_linkid; 155 } iptun_task_data_t; 156 157 static void iptun_task_dispatch(iptun_t *, iptun_task_t); 158 static int iptun_enter(iptun_t *); 159 static void iptun_exit(iptun_t *); 160 static void iptun_headergen(iptun_t *, boolean_t); 161 static void iptun_drop_pkt(mblk_t *, uint64_t *); 162 static void iptun_input(void *, mblk_t *, void *, ip_recv_attr_t *); 163 static void iptun_input_icmp(void *, mblk_t *, void *, ip_recv_attr_t *); 164 static void iptun_output(iptun_t *, mblk_t *); 165 static uint32_t iptun_get_maxmtu(iptun_t *, ip_xmit_attr_t *, uint32_t); 166 static uint32_t iptun_update_mtu(iptun_t *, ip_xmit_attr_t *, uint32_t); 167 static uint32_t iptun_get_dst_pmtu(iptun_t *, ip_xmit_attr_t *); 168 static void iptun_update_dst_pmtu(iptun_t *, ip_xmit_attr_t *); 169 static int iptun_setladdr(iptun_t *, const struct sockaddr_storage *); 170 171 static void iptun_output_6to4(iptun_t *, mblk_t *); 172 static void iptun_output_common(iptun_t *, ip_xmit_attr_t *, mblk_t *); 173 static boolean_t iptun_verifyicmp(conn_t *, void *, icmph_t *, icmp6_t *, 174 ip_recv_attr_t *); 175 176 static void iptun_notify(void *, ip_xmit_attr_t *, ixa_notify_type_t, 177 ixa_notify_arg_t); 178 179 static mac_callbacks_t iptun_m_callbacks; 180 181 static int 182 iptun_m_getstat(void *arg, uint_t stat, uint64_t *val) 183 { 184 iptun_t *iptun = arg; 185 int err = 0; 186 187 switch (stat) { 188 case MAC_STAT_IERRORS: 189 *val = iptun->iptun_ierrors; 190 break; 191 case MAC_STAT_OERRORS: 192 *val = iptun->iptun_oerrors; 193 break; 194 case MAC_STAT_RBYTES: 195 *val = iptun->iptun_rbytes; 196 break; 197 case MAC_STAT_IPACKETS: 198 *val = iptun->iptun_ipackets; 199 break; 200 case MAC_STAT_OBYTES: 201 *val = iptun->iptun_obytes; 202 break; 203 case MAC_STAT_OPACKETS: 204 *val = iptun->iptun_opackets; 205 break; 206 case MAC_STAT_NORCVBUF: 207 *val = iptun->iptun_norcvbuf; 208 break; 209 case MAC_STAT_NOXMTBUF: 210 *val = iptun->iptun_noxmtbuf; 211 break; 212 default: 213 err = ENOTSUP; 214 } 215 216 return (err); 217 } 218 219 static int 220 iptun_m_start(void *arg) 221 { 222 iptun_t *iptun = arg; 223 int err; 224 225 if ((err = iptun_enter(iptun)) == 0) { 226 iptun->iptun_flags |= IPTUN_MAC_STARTED; 227 iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE); 228 iptun_exit(iptun); 229 } 230 return (err); 231 } 232 233 static void 234 iptun_m_stop(void *arg) 235 { 236 iptun_t *iptun = arg; 237 238 if (iptun_enter(iptun) == 0) { 239 iptun->iptun_flags &= ~IPTUN_MAC_STARTED; 240 iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE); 241 iptun_exit(iptun); 242 } 243 } 244 245 /* 246 * iptun_m_setpromisc() does nothing and always succeeds. This is because a 247 * tunnel data-link only ever receives packets that are destined exclusively 248 * for the local address of the tunnel. 249 */ 250 /* ARGSUSED */ 251 static int 252 iptun_m_setpromisc(void *arg, boolean_t on) 253 { 254 return (0); 255 } 256 257 /* ARGSUSED */ 258 static int 259 iptun_m_multicst(void *arg, boolean_t add, const uint8_t *addrp) 260 { 261 return (ENOTSUP); 262 } 263 264 /* 265 * iptun_m_unicst() sets the local address. 266 */ 267 /* ARGSUSED */ 268 static int 269 iptun_m_unicst(void *arg, const uint8_t *addrp) 270 { 271 iptun_t *iptun = arg; 272 int err; 273 struct sockaddr_storage ss; 274 struct sockaddr_in *sin; 275 struct sockaddr_in6 *sin6; 276 277 if ((err = iptun_enter(iptun)) == 0) { 278 switch (iptun->iptun_typeinfo->iti_ipvers) { 279 case IPV4_VERSION: 280 sin = (struct sockaddr_in *)&ss; 281 sin->sin_family = AF_INET; 282 bcopy(addrp, &sin->sin_addr, sizeof (in_addr_t)); 283 break; 284 case IPV6_VERSION: 285 sin6 = (struct sockaddr_in6 *)&ss; 286 sin6->sin6_family = AF_INET6; 287 bcopy(addrp, &sin6->sin6_addr, sizeof (in6_addr_t)); 288 break; 289 default: 290 ASSERT(0); 291 } 292 err = iptun_setladdr(iptun, &ss); 293 iptun_exit(iptun); 294 } 295 return (err); 296 } 297 298 static mblk_t * 299 iptun_m_tx(void *arg, mblk_t *mpchain) 300 { 301 mblk_t *mp, *nmp; 302 iptun_t *iptun = arg; 303 304 if (!IS_IPTUN_RUNNING(iptun)) { 305 iptun_drop_pkt(mpchain, &iptun->iptun_noxmtbuf); 306 return (NULL); 307 } 308 309 for (mp = mpchain; mp != NULL; mp = nmp) { 310 nmp = mp->b_next; 311 mp->b_next = NULL; 312 iptun_output(iptun, mp); 313 } 314 315 return (NULL); 316 } 317 318 /* ARGSUSED */ 319 static int 320 iptun_m_setprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, 321 uint_t pr_valsize, const void *pr_val) 322 { 323 iptun_t *iptun = barg; 324 uint32_t value = *(uint32_t *)pr_val; 325 int err; 326 327 /* 328 * We need to enter this iptun_t since we'll be modifying the outer 329 * header. 330 */ 331 if ((err = iptun_enter(iptun)) != 0) 332 return (err); 333 334 switch (pr_num) { 335 case MAC_PROP_IPTUN_HOPLIMIT: 336 if (value < IPTUN_MIN_HOPLIMIT || value > IPTUN_MAX_HOPLIMIT) { 337 err = EINVAL; 338 break; 339 } 340 if (value != iptun->iptun_hoplimit) { 341 iptun->iptun_hoplimit = (uint8_t)value; 342 iptun_headergen(iptun, B_TRUE); 343 } 344 break; 345 case MAC_PROP_IPTUN_ENCAPLIMIT: 346 if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_IPV6 || 347 value > IPTUN_MAX_ENCAPLIMIT) { 348 err = EINVAL; 349 break; 350 } 351 if (value != iptun->iptun_encaplimit) { 352 iptun->iptun_encaplimit = (uint8_t)value; 353 iptun_headergen(iptun, B_TRUE); 354 } 355 break; 356 case MAC_PROP_MTU: { 357 uint32_t maxmtu = iptun_get_maxmtu(iptun, NULL, 0); 358 359 if (value < iptun->iptun_typeinfo->iti_minmtu || 360 value > maxmtu) { 361 err = EINVAL; 362 break; 363 } 364 iptun->iptun_flags |= IPTUN_FIXED_MTU; 365 if (value != iptun->iptun_mtu) { 366 iptun->iptun_mtu = value; 367 iptun_task_dispatch(iptun, IPTUN_TASK_MTU_UPDATE); 368 } 369 break; 370 } 371 default: 372 err = EINVAL; 373 } 374 iptun_exit(iptun); 375 return (err); 376 } 377 378 /* ARGSUSED */ 379 static int 380 iptun_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, 381 uint_t pr_valsize, void *pr_val) 382 { 383 iptun_t *iptun = barg; 384 int err; 385 386 if ((err = iptun_enter(iptun)) != 0) 387 return (err); 388 389 switch (pr_num) { 390 case MAC_PROP_IPTUN_HOPLIMIT: 391 ASSERT(pr_valsize >= sizeof (uint32_t)); 392 *(uint32_t *)pr_val = iptun->iptun_hoplimit; 393 break; 394 395 case MAC_PROP_IPTUN_ENCAPLIMIT: 396 *(uint32_t *)pr_val = iptun->iptun_encaplimit; 397 break; 398 default: 399 err = ENOTSUP; 400 } 401 done: 402 iptun_exit(iptun); 403 return (err); 404 } 405 406 /* ARGSUSED */ 407 static void 408 iptun_m_propinfo(void *barg, const char *pr_name, mac_prop_id_t pr_num, 409 mac_prop_info_handle_t prh) 410 { 411 iptun_t *iptun = barg; 412 413 switch (pr_num) { 414 case MAC_PROP_IPTUN_HOPLIMIT: 415 mac_prop_info_set_range_uint32(prh, 416 IPTUN_MIN_HOPLIMIT, IPTUN_MAX_HOPLIMIT); 417 mac_prop_info_set_default_uint32(prh, IPTUN_DEFAULT_HOPLIMIT); 418 break; 419 420 case MAC_PROP_IPTUN_ENCAPLIMIT: 421 if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_IPV6) 422 break; 423 mac_prop_info_set_range_uint32(prh, 424 IPTUN_MIN_ENCAPLIMIT, IPTUN_MAX_ENCAPLIMIT); 425 mac_prop_info_set_default_uint32(prh, IPTUN_DEFAULT_ENCAPLIMIT); 426 break; 427 case MAC_PROP_MTU: 428 mac_prop_info_set_range_uint32(prh, 429 iptun->iptun_typeinfo->iti_minmtu, 430 iptun_get_maxmtu(iptun, NULL, 0)); 431 break; 432 } 433 } 434 435 uint_t 436 iptun_count(void) 437 { 438 return (iptun_tunnelcount); 439 } 440 441 /* 442 * Enter an iptun_t exclusively. This is essentially just a mutex, but we 443 * don't allow iptun_enter() to succeed on a tunnel if it's in the process of 444 * being deleted. 445 */ 446 static int 447 iptun_enter(iptun_t *iptun) 448 { 449 mutex_enter(&iptun->iptun_lock); 450 while (iptun->iptun_flags & IPTUN_DELETE_PENDING) 451 cv_wait(&iptun->iptun_enter_cv, &iptun->iptun_lock); 452 if (iptun->iptun_flags & IPTUN_CONDEMNED) { 453 mutex_exit(&iptun->iptun_lock); 454 return (ENOENT); 455 } 456 return (0); 457 } 458 459 /* 460 * Exit the tunnel entered in iptun_enter(). 461 */ 462 static void 463 iptun_exit(iptun_t *iptun) 464 { 465 mutex_exit(&iptun->iptun_lock); 466 } 467 468 /* 469 * Enter the IP tunnel instance by datalink ID. 470 */ 471 static int 472 iptun_enter_by_linkid(datalink_id_t linkid, iptun_t **iptun) 473 { 474 int err; 475 476 mutex_enter(&iptun_hash_lock); 477 if (mod_hash_find(iptun_hash, IPTUN_HASH_KEY(linkid), 478 (mod_hash_val_t *)iptun) == 0) 479 err = iptun_enter(*iptun); 480 else 481 err = ENOENT; 482 if (err != 0) 483 *iptun = NULL; 484 mutex_exit(&iptun_hash_lock); 485 return (err); 486 } 487 488 /* 489 * Handle tasks that were deferred through the iptun_taskq because they require 490 * calling up to the mac module, and we can't call up to the mac module while 491 * holding locks. 492 * 493 * This is tricky to get right without introducing race conditions and 494 * deadlocks with the mac module, as we cannot issue an upcall while in the 495 * iptun_t. The reason is that upcalls may try and enter the mac perimeter, 496 * while iptun callbacks (such as iptun_m_setprop()) called from the mac 497 * module will already have the perimeter held, and will then try and enter 498 * the iptun_t. You can see the lock ordering problem with this; this will 499 * deadlock. 500 * 501 * The safe way to do this is to enter the iptun_t in question and copy the 502 * information we need out of it so that we can exit it and know that the 503 * information being passed up to the upcalls won't be subject to modification 504 * by other threads. The problem now is that we need to exit it prior to 505 * issuing the upcall, but once we do this, a thread could come along and 506 * delete the iptun_t and thus the mac handle required to issue the upcall. 507 * To prevent this, we set the IPTUN_UPCALL_PENDING flag prior to exiting the 508 * iptun_t. This flag is the condition associated with iptun_upcall_cv, which 509 * iptun_delete() will cv_wait() on. When the upcall completes, we clear 510 * IPTUN_UPCALL_PENDING and cv_signal() any potentially waiting 511 * iptun_delete(). We can thus still safely use iptun->iptun_mh after having 512 * exited the iptun_t. 513 */ 514 static void 515 iptun_task_cb(void *arg) 516 { 517 iptun_task_data_t *itd = arg; 518 iptun_task_t task = itd->itd_task; 519 datalink_id_t linkid = itd->itd_linkid; 520 iptun_t *iptun; 521 uint32_t mtu; 522 iptun_addr_t addr; 523 link_state_t linkstate; 524 size_t header_size; 525 iptun_header_t header; 526 527 kmem_free(itd, sizeof (*itd)); 528 529 /* 530 * Note that if the lookup fails, it's because the tunnel was deleted 531 * between the time the task was dispatched and now. That isn't an 532 * error. 533 */ 534 if (iptun_enter_by_linkid(linkid, &iptun) != 0) 535 return; 536 537 iptun->iptun_flags |= IPTUN_UPCALL_PENDING; 538 539 switch (task) { 540 case IPTUN_TASK_MTU_UPDATE: 541 mtu = iptun->iptun_mtu; 542 break; 543 case IPTUN_TASK_LADDR_UPDATE: 544 addr = iptun->iptun_laddr; 545 break; 546 case IPTUN_TASK_RADDR_UPDATE: 547 addr = iptun->iptun_raddr; 548 break; 549 case IPTUN_TASK_LINK_UPDATE: 550 linkstate = IS_IPTUN_RUNNING(iptun) ? 551 LINK_STATE_UP : LINK_STATE_DOWN; 552 break; 553 case IPTUN_TASK_PDATA_UPDATE: 554 header_size = iptun->iptun_header_size; 555 header = iptun->iptun_header; 556 break; 557 default: 558 ASSERT(0); 559 } 560 561 iptun_exit(iptun); 562 563 switch (task) { 564 case IPTUN_TASK_MTU_UPDATE: 565 (void) mac_maxsdu_update(iptun->iptun_mh, mtu); 566 break; 567 case IPTUN_TASK_LADDR_UPDATE: 568 mac_unicst_update(iptun->iptun_mh, (uint8_t *)&addr.ia_addr); 569 break; 570 case IPTUN_TASK_RADDR_UPDATE: 571 mac_dst_update(iptun->iptun_mh, (uint8_t *)&addr.ia_addr); 572 break; 573 case IPTUN_TASK_LINK_UPDATE: 574 mac_link_update(iptun->iptun_mh, linkstate); 575 break; 576 case IPTUN_TASK_PDATA_UPDATE: 577 if (mac_pdata_update(iptun->iptun_mh, 578 header_size == 0 ? NULL : &header, header_size) != 0) 579 atomic_inc_64(&iptun->iptun_taskq_fail); 580 break; 581 } 582 583 mutex_enter(&iptun->iptun_lock); 584 iptun->iptun_flags &= ~IPTUN_UPCALL_PENDING; 585 cv_signal(&iptun->iptun_upcall_cv); 586 mutex_exit(&iptun->iptun_lock); 587 } 588 589 static void 590 iptun_task_dispatch(iptun_t *iptun, iptun_task_t iptun_task) 591 { 592 iptun_task_data_t *itd; 593 594 itd = kmem_alloc(sizeof (*itd), KM_NOSLEEP); 595 if (itd == NULL) { 596 atomic_inc_64(&iptun->iptun_taskq_fail); 597 return; 598 } 599 itd->itd_task = iptun_task; 600 itd->itd_linkid = iptun->iptun_linkid; 601 if (ddi_taskq_dispatch(iptun_taskq, iptun_task_cb, itd, DDI_NOSLEEP)) { 602 atomic_inc_64(&iptun->iptun_taskq_fail); 603 kmem_free(itd, sizeof (*itd)); 604 } 605 } 606 607 /* 608 * Convert an iptun_addr_t to sockaddr_storage. 609 */ 610 static void 611 iptun_getaddr(iptun_addr_t *iptun_addr, struct sockaddr_storage *ss) 612 { 613 struct sockaddr_in *sin; 614 struct sockaddr_in6 *sin6; 615 616 bzero(ss, sizeof (*ss)); 617 switch (iptun_addr->ia_family) { 618 case AF_INET: 619 sin = (struct sockaddr_in *)ss; 620 sin->sin_addr.s_addr = iptun_addr->ia_addr.iau_addr4; 621 break; 622 case AF_INET6: 623 sin6 = (struct sockaddr_in6 *)ss; 624 sin6->sin6_addr = iptun_addr->ia_addr.iau_addr6; 625 break; 626 default: 627 ASSERT(0); 628 } 629 ss->ss_family = iptun_addr->ia_family; 630 } 631 632 /* 633 * General purpose function to set an IP tunnel source or destination address. 634 */ 635 static int 636 iptun_setaddr(iptun_type_t iptun_type, iptun_addr_t *iptun_addr, 637 const struct sockaddr_storage *ss) 638 { 639 if (!IPTUN_ADDR_MATCH(iptun_type, ss->ss_family)) 640 return (EINVAL); 641 642 switch (ss->ss_family) { 643 case AF_INET: { 644 struct sockaddr_in *sin = (struct sockaddr_in *)ss; 645 646 if ((sin->sin_addr.s_addr == INADDR_ANY) || 647 (sin->sin_addr.s_addr == INADDR_BROADCAST) || 648 CLASSD(sin->sin_addr.s_addr)) { 649 return (EADDRNOTAVAIL); 650 } 651 iptun_addr->ia_addr.iau_addr4 = sin->sin_addr.s_addr; 652 break; 653 } 654 case AF_INET6: { 655 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)ss; 656 657 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) || 658 IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) || 659 IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 660 return (EADDRNOTAVAIL); 661 } 662 iptun_addr->ia_addr.iau_addr6 = sin6->sin6_addr; 663 break; 664 } 665 default: 666 return (EAFNOSUPPORT); 667 } 668 iptun_addr->ia_family = ss->ss_family; 669 return (0); 670 } 671 672 static int 673 iptun_setladdr(iptun_t *iptun, const struct sockaddr_storage *laddr) 674 { 675 return (iptun_setaddr(iptun->iptun_typeinfo->iti_type, 676 &iptun->iptun_laddr, laddr)); 677 } 678 679 static int 680 iptun_setraddr(iptun_t *iptun, const struct sockaddr_storage *raddr) 681 { 682 if (!(iptun->iptun_typeinfo->iti_hasraddr)) 683 return (EINVAL); 684 return (iptun_setaddr(iptun->iptun_typeinfo->iti_type, 685 &iptun->iptun_raddr, raddr)); 686 } 687 688 static boolean_t 689 iptun_canbind(iptun_t *iptun) 690 { 691 /* 692 * A tunnel may bind when its source address has been set, and if its 693 * tunnel type requires one, also its destination address. 694 */ 695 return ((iptun->iptun_flags & IPTUN_LADDR) && 696 ((iptun->iptun_flags & IPTUN_RADDR) || 697 !(iptun->iptun_typeinfo->iti_hasraddr))); 698 } 699 700 /* 701 * Verify that the local address is valid, and insert in the fanout 702 */ 703 static int 704 iptun_bind(iptun_t *iptun) 705 { 706 conn_t *connp = iptun->iptun_connp; 707 int error = 0; 708 ip_xmit_attr_t *ixa; 709 ip_xmit_attr_t *oldixa; 710 iulp_t uinfo; 711 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 712 713 /* 714 * Get an exclusive ixa for this thread. 715 * We defer updating conn_ixa until later to handle any concurrent 716 * conn_ixa_cleanup thread. 717 */ 718 ixa = conn_get_ixa(connp, B_FALSE); 719 if (ixa == NULL) 720 return (ENOMEM); 721 ASSERT(ixa->ixa_refcnt >= 2); 722 ASSERT(ixa == connp->conn_ixa); 723 724 /* We create PMTU state including for 6to4 */ 725 ixa->ixa_flags |= IXAF_PMTU_DISCOVERY; 726 727 ASSERT(iptun_canbind(iptun)); 728 729 mutex_enter(&connp->conn_lock); 730 /* 731 * Note that conn_proto can't be set since the upper protocol 732 * can be both 41 and 4 when IPv6 and IPv4 are over the same tunnel. 733 * ipcl_iptun_classify doesn't use conn_proto. 734 */ 735 connp->conn_ipversion = iptun->iptun_typeinfo->iti_ipvers; 736 737 switch (iptun->iptun_typeinfo->iti_type) { 738 case IPTUN_TYPE_IPV4: 739 IN6_IPADDR_TO_V4MAPPED(iptun->iptun_laddr4, 740 &connp->conn_laddr_v6); 741 IN6_IPADDR_TO_V4MAPPED(iptun->iptun_raddr4, 742 &connp->conn_faddr_v6); 743 ixa->ixa_flags |= IXAF_IS_IPV4; 744 if (ip_laddr_verify_v4(iptun->iptun_laddr4, IPCL_ZONEID(connp), 745 ipst, B_FALSE) != IPVL_UNICAST_UP) { 746 mutex_exit(&connp->conn_lock); 747 error = EADDRNOTAVAIL; 748 goto done; 749 } 750 break; 751 case IPTUN_TYPE_IPV6: 752 connp->conn_laddr_v6 = iptun->iptun_laddr6; 753 connp->conn_faddr_v6 = iptun->iptun_raddr6; 754 ixa->ixa_flags &= ~IXAF_IS_IPV4; 755 /* We use a zero scopeid for now */ 756 if (ip_laddr_verify_v6(&iptun->iptun_laddr6, IPCL_ZONEID(connp), 757 ipst, B_FALSE, 0) != IPVL_UNICAST_UP) { 758 mutex_exit(&connp->conn_lock); 759 error = EADDRNOTAVAIL; 760 goto done; 761 } 762 break; 763 case IPTUN_TYPE_6TO4: 764 IN6_IPADDR_TO_V4MAPPED(iptun->iptun_laddr4, 765 &connp->conn_laddr_v6); 766 IN6_IPADDR_TO_V4MAPPED(INADDR_ANY, &connp->conn_faddr_v6); 767 ixa->ixa_flags |= IXAF_IS_IPV4; 768 mutex_exit(&connp->conn_lock); 769 770 switch (ip_laddr_verify_v4(iptun->iptun_laddr4, 771 IPCL_ZONEID(connp), ipst, B_FALSE)) { 772 case IPVL_UNICAST_UP: 773 case IPVL_UNICAST_DOWN: 774 break; 775 default: 776 error = EADDRNOTAVAIL; 777 goto done; 778 } 779 goto insert; 780 } 781 782 /* In case previous destination was multirt */ 783 ip_attr_newdst(ixa); 784 785 /* 786 * When we set a tunnel's destination address, we do not 787 * care if the destination is reachable. Transient routing 788 * issues should not inhibit the creation of a tunnel 789 * interface, for example. Thus we pass B_FALSE here. 790 */ 791 connp->conn_saddr_v6 = connp->conn_laddr_v6; 792 mutex_exit(&connp->conn_lock); 793 794 /* As long as the MTU is large we avoid fragmentation */ 795 ixa->ixa_flags |= IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF; 796 797 /* We handle IPsec in iptun_output_common */ 798 error = ip_attr_connect(connp, ixa, &connp->conn_saddr_v6, 799 &connp->conn_faddr_v6, &connp->conn_faddr_v6, 0, 800 &connp->conn_saddr_v6, &uinfo, 0); 801 802 if (error != 0) 803 goto done; 804 805 /* saddr shouldn't change since it was already set */ 806 ASSERT(IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6, 807 &connp->conn_saddr_v6)); 808 809 /* We set IXAF_VERIFY_PMTU to catch PMTU increases */ 810 ixa->ixa_flags |= IXAF_VERIFY_PMTU; 811 ASSERT(uinfo.iulp_mtu != 0); 812 813 /* 814 * Allow setting new policies. 815 * The addresses/ports are already set, thus the IPsec policy calls 816 * can handle their passed-in conn's. 817 */ 818 connp->conn_policy_cached = B_FALSE; 819 820 insert: 821 error = ipcl_conn_insert(connp); 822 if (error != 0) 823 goto done; 824 825 /* Atomically update v6lastdst and conn_ixa */ 826 mutex_enter(&connp->conn_lock); 827 /* Record this as the "last" send even though we haven't sent any */ 828 connp->conn_v6lastdst = connp->conn_faddr_v6; 829 830 iptun->iptun_flags |= IPTUN_BOUND; 831 832 oldixa = conn_replace_ixa(connp, ixa); 833 /* Done with conn_t */ 834 mutex_exit(&connp->conn_lock); 835 ixa_refrele(oldixa); 836 837 /* 838 * Now that we're bound with ip below us, this is a good 839 * time to initialize the destination path MTU and to 840 * re-calculate the tunnel's link MTU. 841 */ 842 (void) iptun_update_mtu(iptun, ixa, 0); 843 844 if (IS_IPTUN_RUNNING(iptun)) 845 iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE); 846 847 done: 848 ixa_refrele(ixa); 849 return (error); 850 } 851 852 static void 853 iptun_unbind(iptun_t *iptun) 854 { 855 ASSERT(iptun->iptun_flags & IPTUN_BOUND); 856 ASSERT(mutex_owned(&iptun->iptun_lock) || 857 (iptun->iptun_flags & IPTUN_CONDEMNED)); 858 ip_unbind(iptun->iptun_connp); 859 iptun->iptun_flags &= ~IPTUN_BOUND; 860 if (!(iptun->iptun_flags & IPTUN_CONDEMNED)) 861 iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE); 862 } 863 864 /* 865 * Re-generate the template data-link header for a given IP tunnel given the 866 * tunnel's current parameters. 867 */ 868 static void 869 iptun_headergen(iptun_t *iptun, boolean_t update_mac) 870 { 871 switch (iptun->iptun_typeinfo->iti_ipvers) { 872 case IPV4_VERSION: 873 /* 874 * We only need to use a custom IP header if the administrator 875 * has supplied a non-default hoplimit. 876 */ 877 if (iptun->iptun_hoplimit == IPTUN_DEFAULT_HOPLIMIT) { 878 iptun->iptun_header_size = 0; 879 break; 880 } 881 iptun->iptun_header_size = sizeof (ipha_t); 882 iptun->iptun_header4.ipha_version_and_hdr_length = 883 IP_SIMPLE_HDR_VERSION; 884 iptun->iptun_header4.ipha_fragment_offset_and_flags = 885 htons(IPH_DF); 886 iptun->iptun_header4.ipha_ttl = iptun->iptun_hoplimit; 887 break; 888 case IPV6_VERSION: { 889 ip6_t *ip6hp = &iptun->iptun_header6.it6h_ip6h; 890 891 /* 892 * We only need to use a custom IPv6 header if either the 893 * administrator has supplied a non-default hoplimit, or we 894 * need to include an encapsulation limit option in the outer 895 * header. 896 */ 897 if (iptun->iptun_hoplimit == IPTUN_DEFAULT_HOPLIMIT && 898 iptun->iptun_encaplimit == 0) { 899 iptun->iptun_header_size = 0; 900 break; 901 } 902 903 (void) memset(ip6hp, 0, sizeof (*ip6hp)); 904 if (iptun->iptun_encaplimit == 0) { 905 iptun->iptun_header_size = sizeof (ip6_t); 906 ip6hp->ip6_nxt = IPPROTO_NONE; 907 } else { 908 iptun_encaplim_t *iel; 909 910 iptun->iptun_header_size = sizeof (iptun_ipv6hdrs_t); 911 /* 912 * The mac_ipv6 plugin requires ip6_plen to be in host 913 * byte order and reflect the extension headers 914 * present in the template. The actual network byte 915 * order ip6_plen will be set on a per-packet basis on 916 * transmit. 917 */ 918 ip6hp->ip6_plen = sizeof (*iel); 919 ip6hp->ip6_nxt = IPPROTO_DSTOPTS; 920 iel = &iptun->iptun_header6.it6h_encaplim; 921 *iel = iptun_encaplim_init; 922 iel->iel_telopt.ip6ot_encap_limit = 923 iptun->iptun_encaplimit; 924 } 925 926 ip6hp->ip6_hlim = iptun->iptun_hoplimit; 927 break; 928 } 929 } 930 931 if (update_mac) 932 iptun_task_dispatch(iptun, IPTUN_TASK_PDATA_UPDATE); 933 } 934 935 /* 936 * Insert inbound and outbound IPv4 and IPv6 policy into the given policy 937 * head. 938 */ 939 static boolean_t 940 iptun_insert_simple_policies(ipsec_policy_head_t *ph, ipsec_act_t *actp, 941 uint_t n, netstack_t *ns) 942 { 943 int f = IPSEC_AF_V4; 944 945 if (!ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_INBOUND, ns) || 946 !ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_OUTBOUND, ns)) 947 return (B_FALSE); 948 949 f = IPSEC_AF_V6; 950 return (ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_INBOUND, ns) && 951 ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_OUTBOUND, ns)); 952 } 953 954 /* 955 * Used to set IPsec policy when policy is set through the IPTUN_CREATE or 956 * IPTUN_MODIFY ioctls. 957 */ 958 static int 959 iptun_set_sec_simple(iptun_t *iptun, const ipsec_req_t *ipsr) 960 { 961 int rc = 0; 962 uint_t nact; 963 ipsec_act_t *actp = NULL; 964 boolean_t clear_all, old_policy = B_FALSE; 965 ipsec_tun_pol_t *itp; 966 char name[MAXLINKNAMELEN]; 967 uint64_t gen; 968 netstack_t *ns = iptun->iptun_ns; 969 970 /* Can't specify self-encap on a tunnel. */ 971 if (ipsr->ipsr_self_encap_req != 0) 972 return (EINVAL); 973 974 /* 975 * If it's a "clear-all" entry, unset the security flags and resume 976 * normal cleartext (or inherit-from-global) policy. 977 */ 978 clear_all = ((ipsr->ipsr_ah_req & IPTUN_IPSEC_REQ_MASK) == 0 && 979 (ipsr->ipsr_esp_req & IPTUN_IPSEC_REQ_MASK) == 0); 980 981 ASSERT(mutex_owned(&iptun->iptun_lock)); 982 itp = iptun->iptun_itp; 983 if (itp == NULL) { 984 if (clear_all) 985 goto bail; 986 if ((rc = dls_mgmt_get_linkinfo(iptun->iptun_linkid, name, NULL, 987 NULL, NULL)) != 0) 988 goto bail; 989 ASSERT(name[0] != '\0'); 990 if ((itp = create_tunnel_policy(name, &rc, &gen, ns)) == NULL) 991 goto bail; 992 iptun->iptun_itp = itp; 993 } 994 995 /* Allocate the actvec now, before holding itp or polhead locks. */ 996 ipsec_actvec_from_req(ipsr, &actp, &nact, ns); 997 if (actp == NULL) { 998 rc = ENOMEM; 999 goto bail; 1000 } 1001 1002 /* 1003 * Just write on the active polhead. Save the primary/secondary stuff 1004 * for spdsock operations. 1005 * 1006 * Mutex because we need to write to the polhead AND flags atomically. 1007 * Other threads will acquire the polhead lock as a reader if the 1008 * (unprotected) flag is set. 1009 */ 1010 mutex_enter(&itp->itp_lock); 1011 if (itp->itp_flags & ITPF_P_TUNNEL) { 1012 /* Oops, we lost a race. Let's get out of here. */ 1013 rc = EBUSY; 1014 goto mutex_bail; 1015 } 1016 old_policy = ((itp->itp_flags & ITPF_P_ACTIVE) != 0); 1017 1018 if (old_policy) { 1019 ITPF_CLONE(itp->itp_flags); 1020 rc = ipsec_copy_polhead(itp->itp_policy, itp->itp_inactive, ns); 1021 if (rc != 0) { 1022 /* inactive has already been cleared. */ 1023 itp->itp_flags &= ~ITPF_IFLAGS; 1024 goto mutex_bail; 1025 } 1026 rw_enter(&itp->itp_policy->iph_lock, RW_WRITER); 1027 ipsec_polhead_flush(itp->itp_policy, ns); 1028 } else { 1029 /* Else assume itp->itp_policy is already flushed. */ 1030 rw_enter(&itp->itp_policy->iph_lock, RW_WRITER); 1031 } 1032 1033 if (clear_all) { 1034 ASSERT(avl_numnodes(&itp->itp_policy->iph_rulebyid) == 0); 1035 itp->itp_flags &= ~ITPF_PFLAGS; 1036 rw_exit(&itp->itp_policy->iph_lock); 1037 old_policy = B_FALSE; /* Clear out the inactive one too. */ 1038 goto recover_bail; 1039 } 1040 1041 if (iptun_insert_simple_policies(itp->itp_policy, actp, nact, ns)) { 1042 rw_exit(&itp->itp_policy->iph_lock); 1043 /* 1044 * Adjust MTU and make sure the DL side knows what's up. 1045 */ 1046 itp->itp_flags = ITPF_P_ACTIVE; 1047 (void) iptun_update_mtu(iptun, NULL, 0); 1048 old_policy = B_FALSE; /* Blank out inactive - we succeeded */ 1049 } else { 1050 rw_exit(&itp->itp_policy->iph_lock); 1051 rc = ENOMEM; 1052 } 1053 1054 recover_bail: 1055 if (old_policy) { 1056 /* Recover policy in in active polhead. */ 1057 ipsec_swap_policy(itp->itp_policy, itp->itp_inactive, ns); 1058 ITPF_SWAP(itp->itp_flags); 1059 } 1060 1061 /* Clear policy in inactive polhead. */ 1062 itp->itp_flags &= ~ITPF_IFLAGS; 1063 rw_enter(&itp->itp_inactive->iph_lock, RW_WRITER); 1064 ipsec_polhead_flush(itp->itp_inactive, ns); 1065 rw_exit(&itp->itp_inactive->iph_lock); 1066 1067 mutex_bail: 1068 mutex_exit(&itp->itp_lock); 1069 1070 bail: 1071 if (actp != NULL) 1072 ipsec_actvec_free(actp, nact); 1073 1074 return (rc); 1075 } 1076 1077 static iptun_typeinfo_t * 1078 iptun_gettypeinfo(iptun_type_t type) 1079 { 1080 int i; 1081 1082 for (i = 0; iptun_type_table[i].iti_type != IPTUN_TYPE_UNKNOWN; i++) { 1083 if (iptun_type_table[i].iti_type == type) 1084 break; 1085 } 1086 return (&iptun_type_table[i]); 1087 } 1088 1089 /* 1090 * Set the parameters included in ik on the tunnel iptun. Parameters that can 1091 * only be set at creation time are set in iptun_create(). 1092 */ 1093 static int 1094 iptun_setparams(iptun_t *iptun, const iptun_kparams_t *ik) 1095 { 1096 int err = 0; 1097 netstack_t *ns = iptun->iptun_ns; 1098 iptun_addr_t orig_laddr, orig_raddr; 1099 uint_t orig_flags = iptun->iptun_flags; 1100 1101 if (ik->iptun_kparam_flags & IPTUN_KPARAM_LADDR) { 1102 if (orig_flags & IPTUN_LADDR) 1103 orig_laddr = iptun->iptun_laddr; 1104 if ((err = iptun_setladdr(iptun, &ik->iptun_kparam_laddr)) != 0) 1105 return (err); 1106 iptun->iptun_flags |= IPTUN_LADDR; 1107 } 1108 1109 if (ik->iptun_kparam_flags & IPTUN_KPARAM_RADDR) { 1110 if (orig_flags & IPTUN_RADDR) 1111 orig_raddr = iptun->iptun_raddr; 1112 if ((err = iptun_setraddr(iptun, &ik->iptun_kparam_raddr)) != 0) 1113 goto done; 1114 iptun->iptun_flags |= IPTUN_RADDR; 1115 } 1116 1117 if (ik->iptun_kparam_flags & IPTUN_KPARAM_SECINFO) { 1118 /* 1119 * Set IPsec policy originating from the ifconfig(1M) command 1120 * line. This is traditionally called "simple" policy because 1121 * the ipsec_req_t (iptun_kparam_secinfo) can only describe a 1122 * simple policy of "do ESP on everything" and/or "do AH on 1123 * everything" (as opposed to the rich policy that can be 1124 * defined with ipsecconf(1M)). 1125 */ 1126 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_6TO4) { 1127 /* 1128 * Can't set security properties for automatic 1129 * tunnels. 1130 */ 1131 err = EINVAL; 1132 goto done; 1133 } 1134 1135 if (!ipsec_loaded(ns->netstack_ipsec)) { 1136 /* If IPsec can be loaded, try and load it now. */ 1137 if (ipsec_failed(ns->netstack_ipsec)) { 1138 err = EPROTONOSUPPORT; 1139 goto done; 1140 } 1141 ipsec_loader_loadnow(ns->netstack_ipsec); 1142 /* 1143 * ipsec_loader_loadnow() returns while IPsec is 1144 * loaded asynchronously. While a method exists to 1145 * wait for IPsec to load (ipsec_loader_wait()), it 1146 * requires use of a STREAMS queue to do a qwait(). 1147 * We're not in STREAMS context here, and so we can't 1148 * use it. This is not a problem in practice because 1149 * in the vast majority of cases, key management and 1150 * global policy will have loaded before any tunnels 1151 * are plumbed, and so IPsec will already have been 1152 * loaded. 1153 */ 1154 err = EAGAIN; 1155 goto done; 1156 } 1157 1158 err = iptun_set_sec_simple(iptun, &ik->iptun_kparam_secinfo); 1159 if (err == 0) { 1160 iptun->iptun_flags |= IPTUN_SIMPLE_POLICY; 1161 iptun->iptun_simple_policy = ik->iptun_kparam_secinfo; 1162 } 1163 } 1164 done: 1165 if (err != 0) { 1166 /* Restore original source and destination. */ 1167 if (ik->iptun_kparam_flags & IPTUN_KPARAM_LADDR && 1168 (orig_flags & IPTUN_LADDR)) 1169 iptun->iptun_laddr = orig_laddr; 1170 if ((ik->iptun_kparam_flags & IPTUN_KPARAM_RADDR) && 1171 (orig_flags & IPTUN_RADDR)) 1172 iptun->iptun_raddr = orig_raddr; 1173 iptun->iptun_flags = orig_flags; 1174 } 1175 return (err); 1176 } 1177 1178 static int 1179 iptun_register(iptun_t *iptun) 1180 { 1181 mac_register_t *mac; 1182 int err; 1183 1184 ASSERT(!(iptun->iptun_flags & IPTUN_MAC_REGISTERED)); 1185 1186 if ((mac = mac_alloc(MAC_VERSION)) == NULL) 1187 return (EINVAL); 1188 1189 mac->m_type_ident = iptun->iptun_typeinfo->iti_ident; 1190 mac->m_driver = iptun; 1191 mac->m_dip = iptun_dip; 1192 mac->m_instance = (uint_t)-1; 1193 mac->m_src_addr = (uint8_t *)&iptun->iptun_laddr.ia_addr; 1194 mac->m_dst_addr = iptun->iptun_typeinfo->iti_hasraddr ? 1195 (uint8_t *)&iptun->iptun_raddr.ia_addr : NULL; 1196 mac->m_callbacks = &iptun_m_callbacks; 1197 mac->m_min_sdu = iptun->iptun_typeinfo->iti_minmtu; 1198 mac->m_max_sdu = iptun->iptun_mtu; 1199 if (iptun->iptun_header_size != 0) { 1200 mac->m_pdata = &iptun->iptun_header; 1201 mac->m_pdata_size = iptun->iptun_header_size; 1202 } 1203 if ((err = mac_register(mac, &iptun->iptun_mh)) == 0) 1204 iptun->iptun_flags |= IPTUN_MAC_REGISTERED; 1205 mac_free(mac); 1206 return (err); 1207 } 1208 1209 static int 1210 iptun_unregister(iptun_t *iptun) 1211 { 1212 int err; 1213 1214 ASSERT(iptun->iptun_flags & IPTUN_MAC_REGISTERED); 1215 if ((err = mac_unregister(iptun->iptun_mh)) == 0) 1216 iptun->iptun_flags &= ~IPTUN_MAC_REGISTERED; 1217 return (err); 1218 } 1219 1220 static conn_t * 1221 iptun_conn_create(iptun_t *iptun, netstack_t *ns, cred_t *credp) 1222 { 1223 conn_t *connp; 1224 1225 if ((connp = ipcl_conn_create(IPCL_IPCCONN, KM_NOSLEEP, ns)) == NULL) 1226 return (NULL); 1227 1228 connp->conn_flags |= IPCL_IPTUN; 1229 connp->conn_iptun = iptun; 1230 connp->conn_recv = iptun_input; 1231 connp->conn_recvicmp = iptun_input_icmp; 1232 connp->conn_verifyicmp = iptun_verifyicmp; 1233 1234 /* 1235 * Register iptun_notify to listen to capability changes detected by IP. 1236 * This upcall is made in the context of the call to conn_ip_output. 1237 */ 1238 connp->conn_ixa->ixa_notify = iptun_notify; 1239 connp->conn_ixa->ixa_notify_cookie = iptun; 1240 1241 /* 1242 * For exclusive stacks we set conn_zoneid to GLOBAL_ZONEID as is done 1243 * for all other conn_t's. 1244 * 1245 * Note that there's an important distinction between iptun_zoneid and 1246 * conn_zoneid. The conn_zoneid is set to GLOBAL_ZONEID in non-global 1247 * exclusive stack zones to make the ip module believe that the 1248 * non-global zone is actually a global zone. Therefore, when 1249 * interacting with the ip module, we must always use conn_zoneid. 1250 */ 1251 connp->conn_zoneid = (ns->netstack_stackid == GLOBAL_NETSTACKID) ? 1252 crgetzoneid(credp) : GLOBAL_ZONEID; 1253 connp->conn_cred = credp; 1254 /* crfree() is done in ipcl_conn_destroy(), called by CONN_DEC_REF() */ 1255 crhold(connp->conn_cred); 1256 connp->conn_cpid = NOPID; 1257 1258 /* conn_allzones can not be set this early, hence no IPCL_ZONEID */ 1259 connp->conn_ixa->ixa_zoneid = connp->conn_zoneid; 1260 ASSERT(connp->conn_ref == 1); 1261 1262 /* Cache things in ixa without an extra refhold */ 1263 ASSERT(!(connp->conn_ixa->ixa_free_flags & IXA_FREE_CRED)); 1264 connp->conn_ixa->ixa_cred = connp->conn_cred; 1265 connp->conn_ixa->ixa_cpid = connp->conn_cpid; 1266 if (is_system_labeled()) 1267 connp->conn_ixa->ixa_tsl = crgetlabel(connp->conn_cred); 1268 1269 /* 1270 * Have conn_ip_output drop packets should our outer source 1271 * go invalid 1272 */ 1273 connp->conn_ixa->ixa_flags |= IXAF_VERIFY_SOURCE; 1274 1275 switch (iptun->iptun_typeinfo->iti_ipvers) { 1276 case IPV4_VERSION: 1277 connp->conn_family = AF_INET6; 1278 break; 1279 case IPV6_VERSION: 1280 connp->conn_family = AF_INET; 1281 break; 1282 } 1283 mutex_enter(&connp->conn_lock); 1284 connp->conn_state_flags &= ~CONN_INCIPIENT; 1285 mutex_exit(&connp->conn_lock); 1286 return (connp); 1287 } 1288 1289 static void 1290 iptun_conn_destroy(conn_t *connp) 1291 { 1292 ip_quiesce_conn(connp); 1293 connp->conn_iptun = NULL; 1294 ASSERT(connp->conn_ref == 1); 1295 CONN_DEC_REF(connp); 1296 } 1297 1298 static iptun_t * 1299 iptun_alloc(void) 1300 { 1301 iptun_t *iptun; 1302 1303 if ((iptun = kmem_cache_alloc(iptun_cache, KM_NOSLEEP)) != NULL) { 1304 bzero(iptun, sizeof (*iptun)); 1305 atomic_inc_32(&iptun_tunnelcount); 1306 } 1307 return (iptun); 1308 } 1309 1310 static void 1311 iptun_free(iptun_t *iptun) 1312 { 1313 ASSERT(iptun->iptun_flags & IPTUN_CONDEMNED); 1314 1315 if (iptun->iptun_flags & IPTUN_HASH_INSERTED) { 1316 iptun_stack_t *iptuns = iptun->iptun_iptuns; 1317 1318 mutex_enter(&iptun_hash_lock); 1319 VERIFY(mod_hash_remove(iptun_hash, 1320 IPTUN_HASH_KEY(iptun->iptun_linkid), 1321 (mod_hash_val_t *)&iptun) == 0); 1322 mutex_exit(&iptun_hash_lock); 1323 iptun->iptun_flags &= ~IPTUN_HASH_INSERTED; 1324 mutex_enter(&iptuns->iptuns_lock); 1325 list_remove(&iptuns->iptuns_iptunlist, iptun); 1326 mutex_exit(&iptuns->iptuns_lock); 1327 } 1328 1329 if (iptun->iptun_flags & IPTUN_BOUND) 1330 iptun_unbind(iptun); 1331 1332 /* 1333 * After iptun_unregister(), there will be no threads executing a 1334 * downcall from the mac module, including in the tx datapath. 1335 */ 1336 if (iptun->iptun_flags & IPTUN_MAC_REGISTERED) 1337 VERIFY(iptun_unregister(iptun) == 0); 1338 1339 if (iptun->iptun_itp != NULL) { 1340 /* 1341 * Remove from the AVL tree, AND release the reference iptun_t 1342 * itself holds on the ITP. 1343 */ 1344 itp_unlink(iptun->iptun_itp, iptun->iptun_ns); 1345 ITP_REFRELE(iptun->iptun_itp, iptun->iptun_ns); 1346 iptun->iptun_itp = NULL; 1347 iptun->iptun_flags &= ~IPTUN_SIMPLE_POLICY; 1348 } 1349 1350 /* 1351 * After ipcl_conn_destroy(), there will be no threads executing an 1352 * upcall from ip (i.e., iptun_input()), and it is then safe to free 1353 * the iptun_t. 1354 */ 1355 if (iptun->iptun_connp != NULL) { 1356 iptun_conn_destroy(iptun->iptun_connp); 1357 iptun->iptun_connp = NULL; 1358 } 1359 1360 kmem_cache_free(iptun_cache, iptun); 1361 atomic_dec_32(&iptun_tunnelcount); 1362 } 1363 1364 int 1365 iptun_create(iptun_kparams_t *ik, cred_t *credp) 1366 { 1367 iptun_t *iptun = NULL; 1368 int err = 0, mherr; 1369 char linkname[MAXLINKNAMELEN]; 1370 ipsec_tun_pol_t *itp; 1371 netstack_t *ns = NULL; 1372 iptun_stack_t *iptuns; 1373 datalink_id_t tmpid; 1374 zoneid_t zoneid = crgetzoneid(credp); 1375 boolean_t link_created = B_FALSE; 1376 1377 /* The tunnel type is mandatory */ 1378 if (!(ik->iptun_kparam_flags & IPTUN_KPARAM_TYPE)) 1379 return (EINVAL); 1380 1381 /* 1382 * Is the linkid that the caller wishes to associate with this new 1383 * tunnel assigned to this zone? 1384 */ 1385 if (zone_check_datalink(&zoneid, ik->iptun_kparam_linkid) != 0) { 1386 if (zoneid != GLOBAL_ZONEID) 1387 return (EINVAL); 1388 } else if (zoneid == GLOBAL_ZONEID) { 1389 return (EINVAL); 1390 } 1391 1392 /* 1393 * Make sure that we're not trying to create a tunnel that has already 1394 * been created. 1395 */ 1396 if (iptun_enter_by_linkid(ik->iptun_kparam_linkid, &iptun) == 0) { 1397 iptun_exit(iptun); 1398 iptun = NULL; 1399 err = EEXIST; 1400 goto done; 1401 } 1402 1403 ns = netstack_find_by_cred(credp); 1404 iptuns = ns->netstack_iptun; 1405 1406 if ((iptun = iptun_alloc()) == NULL) { 1407 err = ENOMEM; 1408 goto done; 1409 } 1410 1411 iptun->iptun_linkid = ik->iptun_kparam_linkid; 1412 iptun->iptun_zoneid = zoneid; 1413 iptun->iptun_ns = ns; 1414 1415 iptun->iptun_typeinfo = iptun_gettypeinfo(ik->iptun_kparam_type); 1416 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_UNKNOWN) { 1417 err = EINVAL; 1418 goto done; 1419 } 1420 1421 if (ik->iptun_kparam_flags & IPTUN_KPARAM_IMPLICIT) 1422 iptun->iptun_flags |= IPTUN_IMPLICIT; 1423 1424 if ((err = iptun_setparams(iptun, ik)) != 0) 1425 goto done; 1426 1427 iptun->iptun_hoplimit = IPTUN_DEFAULT_HOPLIMIT; 1428 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_IPV6) 1429 iptun->iptun_encaplimit = IPTUN_DEFAULT_ENCAPLIMIT; 1430 1431 iptun_headergen(iptun, B_FALSE); 1432 1433 iptun->iptun_connp = iptun_conn_create(iptun, ns, credp); 1434 if (iptun->iptun_connp == NULL) { 1435 err = ENOMEM; 1436 goto done; 1437 } 1438 1439 iptun->iptun_mtu = iptun->iptun_typeinfo->iti_maxmtu; 1440 iptun->iptun_dpmtu = iptun->iptun_mtu; 1441 1442 /* 1443 * Find an ITP based on linkname. If we have parms already set via 1444 * the iptun_setparams() call above, it may have created an ITP for 1445 * us. We always try get_tunnel_policy() for DEBUG correctness 1446 * checks, and we may wish to refactor this to only check when 1447 * iptun_itp is NULL. 1448 */ 1449 if ((err = dls_mgmt_get_linkinfo(iptun->iptun_linkid, linkname, NULL, 1450 NULL, NULL)) != 0) 1451 goto done; 1452 if ((itp = get_tunnel_policy(linkname, ns)) != NULL) 1453 iptun->iptun_itp = itp; 1454 1455 /* 1456 * See if we have the necessary IP addresses assigned to this tunnel 1457 * to try and bind them with ip underneath us. If we're not ready to 1458 * bind yet, then we'll defer the bind operation until the addresses 1459 * are modified. 1460 */ 1461 if (iptun_canbind(iptun) && ((err = iptun_bind(iptun)) != 0)) 1462 goto done; 1463 1464 if ((err = iptun_register(iptun)) != 0) 1465 goto done; 1466 1467 err = dls_devnet_create(iptun->iptun_mh, iptun->iptun_linkid, 1468 iptun->iptun_zoneid); 1469 if (err != 0) 1470 goto done; 1471 link_created = B_TRUE; 1472 1473 /* 1474 * We hash by link-id as that is the key used by all other iptun 1475 * interfaces (modify, delete, etc.). 1476 */ 1477 if ((mherr = mod_hash_insert(iptun_hash, 1478 IPTUN_HASH_KEY(iptun->iptun_linkid), (mod_hash_val_t)iptun)) == 0) { 1479 mutex_enter(&iptuns->iptuns_lock); 1480 list_insert_head(&iptuns->iptuns_iptunlist, iptun); 1481 mutex_exit(&iptuns->iptuns_lock); 1482 iptun->iptun_flags |= IPTUN_HASH_INSERTED; 1483 } else if (mherr == MH_ERR_NOMEM) { 1484 err = ENOMEM; 1485 } else if (mherr == MH_ERR_DUPLICATE) { 1486 err = EEXIST; 1487 } else { 1488 err = EINVAL; 1489 } 1490 1491 done: 1492 if (iptun == NULL && ns != NULL) 1493 netstack_rele(ns); 1494 if (err != 0 && iptun != NULL) { 1495 if (link_created) { 1496 (void) dls_devnet_destroy(iptun->iptun_mh, &tmpid, 1497 B_TRUE); 1498 } 1499 iptun->iptun_flags |= IPTUN_CONDEMNED; 1500 iptun_free(iptun); 1501 } 1502 return (err); 1503 } 1504 1505 int 1506 iptun_delete(datalink_id_t linkid, cred_t *credp) 1507 { 1508 int err; 1509 iptun_t *iptun = NULL; 1510 1511 if ((err = iptun_enter_by_linkid(linkid, &iptun)) != 0) 1512 return (err); 1513 1514 /* One cannot delete a tunnel that belongs to another zone. */ 1515 if (iptun->iptun_zoneid != crgetzoneid(credp)) { 1516 iptun_exit(iptun); 1517 return (EACCES); 1518 } 1519 1520 /* 1521 * We need to exit iptun in order to issue calls up the stack such as 1522 * dls_devnet_destroy(). If we call up while still in iptun, deadlock 1523 * with calls coming down the stack is possible. We prevent other 1524 * threads from entering this iptun after we've exited it by setting 1525 * the IPTUN_DELETE_PENDING flag. This will cause callers of 1526 * iptun_enter() to block waiting on iptun_enter_cv. The assumption 1527 * here is that the functions we're calling while IPTUN_DELETE_PENDING 1528 * is set dont resuult in an iptun_enter() call, as that would result 1529 * in deadlock. 1530 */ 1531 iptun->iptun_flags |= IPTUN_DELETE_PENDING; 1532 1533 /* Wait for any pending upcall to the mac module to complete. */ 1534 while (iptun->iptun_flags & IPTUN_UPCALL_PENDING) 1535 cv_wait(&iptun->iptun_upcall_cv, &iptun->iptun_lock); 1536 1537 iptun_exit(iptun); 1538 1539 if ((err = dls_devnet_destroy(iptun->iptun_mh, &linkid, B_TRUE)) == 0) { 1540 /* 1541 * mac_disable() will fail with EBUSY if there are references 1542 * to the iptun MAC. If there are none, then mac_disable() 1543 * will assure that none can be acquired until the MAC is 1544 * unregistered. 1545 * 1546 * XXX CR 6791335 prevents us from calling mac_disable() prior 1547 * to dls_devnet_destroy(), so we unfortunately need to 1548 * attempt to re-create the devnet node if mac_disable() 1549 * fails. 1550 */ 1551 if ((err = mac_disable(iptun->iptun_mh)) != 0) { 1552 (void) dls_devnet_create(iptun->iptun_mh, linkid, 1553 iptun->iptun_zoneid); 1554 } 1555 } 1556 1557 /* 1558 * Now that we know the fate of this iptun_t, we need to clear 1559 * IPTUN_DELETE_PENDING, and set IPTUN_CONDEMNED if the iptun_t is 1560 * slated to be freed. Either way, we need to signal the threads 1561 * waiting in iptun_enter() so that they can either fail if 1562 * IPTUN_CONDEMNED is set, or continue if it's not. 1563 */ 1564 mutex_enter(&iptun->iptun_lock); 1565 iptun->iptun_flags &= ~IPTUN_DELETE_PENDING; 1566 if (err == 0) 1567 iptun->iptun_flags |= IPTUN_CONDEMNED; 1568 cv_broadcast(&iptun->iptun_enter_cv); 1569 mutex_exit(&iptun->iptun_lock); 1570 1571 /* 1572 * Note that there is no danger in calling iptun_free() after having 1573 * dropped the iptun_lock since callers of iptun_enter() at this point 1574 * are doing so from iptun_enter_by_linkid() (mac_disable() got rid of 1575 * threads entering from mac callbacks which call iptun_enter() 1576 * directly) which holds iptun_hash_lock, and iptun_free() grabs this 1577 * lock in order to remove the iptun_t from the hash table. 1578 */ 1579 if (err == 0) 1580 iptun_free(iptun); 1581 1582 return (err); 1583 } 1584 1585 int 1586 iptun_modify(const iptun_kparams_t *ik, cred_t *credp) 1587 { 1588 iptun_t *iptun; 1589 boolean_t laddr_change = B_FALSE, raddr_change = B_FALSE; 1590 int err; 1591 1592 if ((err = iptun_enter_by_linkid(ik->iptun_kparam_linkid, &iptun)) != 0) 1593 return (err); 1594 1595 /* One cannot modify a tunnel that belongs to another zone. */ 1596 if (iptun->iptun_zoneid != crgetzoneid(credp)) { 1597 err = EACCES; 1598 goto done; 1599 } 1600 1601 /* The tunnel type cannot be changed */ 1602 if (ik->iptun_kparam_flags & IPTUN_KPARAM_TYPE) { 1603 err = EINVAL; 1604 goto done; 1605 } 1606 1607 if ((err = iptun_setparams(iptun, ik)) != 0) 1608 goto done; 1609 iptun_headergen(iptun, B_FALSE); 1610 1611 /* 1612 * If any of the tunnel's addresses has been modified and the tunnel 1613 * has the necessary addresses assigned to it, we need to try to bind 1614 * with ip underneath us. If we're not ready to bind yet, then we'll 1615 * try again when the addresses are modified later. 1616 */ 1617 laddr_change = (ik->iptun_kparam_flags & IPTUN_KPARAM_LADDR); 1618 raddr_change = (ik->iptun_kparam_flags & IPTUN_KPARAM_RADDR); 1619 if (laddr_change || raddr_change) { 1620 if (iptun->iptun_flags & IPTUN_BOUND) 1621 iptun_unbind(iptun); 1622 if (iptun_canbind(iptun) && (err = iptun_bind(iptun)) != 0) { 1623 if (laddr_change) 1624 iptun->iptun_flags &= ~IPTUN_LADDR; 1625 if (raddr_change) 1626 iptun->iptun_flags &= ~IPTUN_RADDR; 1627 goto done; 1628 } 1629 } 1630 1631 if (laddr_change) 1632 iptun_task_dispatch(iptun, IPTUN_TASK_LADDR_UPDATE); 1633 if (raddr_change) 1634 iptun_task_dispatch(iptun, IPTUN_TASK_RADDR_UPDATE); 1635 1636 done: 1637 iptun_exit(iptun); 1638 return (err); 1639 } 1640 1641 /* Given an IP tunnel's datalink id, fill in its parameters. */ 1642 int 1643 iptun_info(iptun_kparams_t *ik, cred_t *credp) 1644 { 1645 iptun_t *iptun; 1646 int err; 1647 1648 /* Is the tunnel link visible from the caller's zone? */ 1649 if (!dls_devnet_islinkvisible(ik->iptun_kparam_linkid, 1650 crgetzoneid(credp))) 1651 return (ENOENT); 1652 1653 if ((err = iptun_enter_by_linkid(ik->iptun_kparam_linkid, &iptun)) != 0) 1654 return (err); 1655 1656 bzero(ik, sizeof (iptun_kparams_t)); 1657 1658 ik->iptun_kparam_linkid = iptun->iptun_linkid; 1659 ik->iptun_kparam_type = iptun->iptun_typeinfo->iti_type; 1660 ik->iptun_kparam_flags |= IPTUN_KPARAM_TYPE; 1661 1662 if (iptun->iptun_flags & IPTUN_LADDR) { 1663 iptun_getaddr(&iptun->iptun_laddr, &ik->iptun_kparam_laddr); 1664 ik->iptun_kparam_flags |= IPTUN_KPARAM_LADDR; 1665 } 1666 if (iptun->iptun_flags & IPTUN_RADDR) { 1667 iptun_getaddr(&iptun->iptun_raddr, &ik->iptun_kparam_raddr); 1668 ik->iptun_kparam_flags |= IPTUN_KPARAM_RADDR; 1669 } 1670 1671 if (iptun->iptun_flags & IPTUN_IMPLICIT) 1672 ik->iptun_kparam_flags |= IPTUN_KPARAM_IMPLICIT; 1673 1674 if (iptun->iptun_itp != NULL) { 1675 mutex_enter(&iptun->iptun_itp->itp_lock); 1676 if (iptun->iptun_itp->itp_flags & ITPF_P_ACTIVE) { 1677 ik->iptun_kparam_flags |= IPTUN_KPARAM_IPSECPOL; 1678 if (iptun->iptun_flags & IPTUN_SIMPLE_POLICY) { 1679 ik->iptun_kparam_flags |= IPTUN_KPARAM_SECINFO; 1680 ik->iptun_kparam_secinfo = 1681 iptun->iptun_simple_policy; 1682 } 1683 } 1684 mutex_exit(&iptun->iptun_itp->itp_lock); 1685 } 1686 1687 done: 1688 iptun_exit(iptun); 1689 return (err); 1690 } 1691 1692 int 1693 iptun_set_6to4relay(netstack_t *ns, ipaddr_t relay_addr) 1694 { 1695 if (relay_addr == INADDR_BROADCAST || CLASSD(relay_addr)) 1696 return (EADDRNOTAVAIL); 1697 ns->netstack_iptun->iptuns_relay_rtr_addr = relay_addr; 1698 return (0); 1699 } 1700 1701 void 1702 iptun_get_6to4relay(netstack_t *ns, ipaddr_t *relay_addr) 1703 { 1704 *relay_addr = ns->netstack_iptun->iptuns_relay_rtr_addr; 1705 } 1706 1707 void 1708 iptun_set_policy(datalink_id_t linkid, ipsec_tun_pol_t *itp) 1709 { 1710 iptun_t *iptun; 1711 1712 if (iptun_enter_by_linkid(linkid, &iptun) != 0) 1713 return; 1714 if (iptun->iptun_itp != itp) { 1715 ASSERT(iptun->iptun_itp == NULL); 1716 ITP_REFHOLD(itp); 1717 iptun->iptun_itp = itp; 1718 } 1719 /* 1720 * IPsec policy means IPsec overhead, which means lower MTU. 1721 * Refresh the MTU for this tunnel. 1722 */ 1723 (void) iptun_update_mtu(iptun, NULL, 0); 1724 iptun_exit(iptun); 1725 } 1726 1727 /* 1728 * Obtain the path MTU to the tunnel destination. 1729 * Can return zero in some cases. 1730 */ 1731 static uint32_t 1732 iptun_get_dst_pmtu(iptun_t *iptun, ip_xmit_attr_t *ixa) 1733 { 1734 uint32_t pmtu = 0; 1735 conn_t *connp = iptun->iptun_connp; 1736 boolean_t need_rele = B_FALSE; 1737 1738 /* 1739 * We only obtain the pmtu for tunnels that have a remote tunnel 1740 * address. 1741 */ 1742 if (!(iptun->iptun_flags & IPTUN_RADDR)) 1743 return (0); 1744 1745 if (ixa == NULL) { 1746 ixa = conn_get_ixa(connp, B_FALSE); 1747 if (ixa == NULL) 1748 return (0); 1749 need_rele = B_TRUE; 1750 } 1751 /* 1752 * Guard against ICMP errors before we have sent, as well as against 1753 * and a thread which held conn_ixa. 1754 */ 1755 if (ixa->ixa_ire != NULL) { 1756 pmtu = ip_get_pmtu(ixa); 1757 1758 /* 1759 * For both IPv4 and IPv6 we can have indication that the outer 1760 * header needs fragmentation. 1761 */ 1762 if (ixa->ixa_flags & IXAF_PMTU_TOO_SMALL) { 1763 /* Must allow fragmentation in ip_output */ 1764 ixa->ixa_flags &= ~IXAF_DONTFRAG; 1765 } else if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_6TO4) { 1766 ixa->ixa_flags |= IXAF_DONTFRAG; 1767 } else { 1768 /* ip_get_pmtu might have set this - we don't want it */ 1769 ixa->ixa_flags &= ~IXAF_PMTU_IPV4_DF; 1770 } 1771 } 1772 1773 if (need_rele) 1774 ixa_refrele(ixa); 1775 return (pmtu); 1776 } 1777 1778 /* 1779 * Update the ip_xmit_attr_t to capture the current lower path mtu as known 1780 * by ip. 1781 */ 1782 static void 1783 iptun_update_dst_pmtu(iptun_t *iptun, ip_xmit_attr_t *ixa) 1784 { 1785 uint32_t pmtu; 1786 conn_t *connp = iptun->iptun_connp; 1787 boolean_t need_rele = B_FALSE; 1788 1789 /* IXAF_VERIFY_PMTU is not set if we don't have a fixed destination */ 1790 if (!(iptun->iptun_flags & IPTUN_RADDR)) 1791 return; 1792 1793 if (ixa == NULL) { 1794 ixa = conn_get_ixa(connp, B_FALSE); 1795 if (ixa == NULL) 1796 return; 1797 need_rele = B_TRUE; 1798 } 1799 /* 1800 * Guard against ICMP errors before we have sent, as well as against 1801 * and a thread which held conn_ixa. 1802 */ 1803 if (ixa->ixa_ire != NULL) { 1804 pmtu = ip_get_pmtu(ixa); 1805 /* 1806 * Update ixa_fragsize and ixa_pmtu. 1807 */ 1808 ixa->ixa_fragsize = ixa->ixa_pmtu = pmtu; 1809 1810 /* 1811 * For both IPv4 and IPv6 we can have indication that the outer 1812 * header needs fragmentation. 1813 */ 1814 if (ixa->ixa_flags & IXAF_PMTU_TOO_SMALL) { 1815 /* Must allow fragmentation in ip_output */ 1816 ixa->ixa_flags &= ~IXAF_DONTFRAG; 1817 } else if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_6TO4) { 1818 ixa->ixa_flags |= IXAF_DONTFRAG; 1819 } else { 1820 /* ip_get_pmtu might have set this - we don't want it */ 1821 ixa->ixa_flags &= ~IXAF_PMTU_IPV4_DF; 1822 } 1823 } 1824 1825 if (need_rele) 1826 ixa_refrele(ixa); 1827 } 1828 1829 /* 1830 * There is nothing that iptun can verify in addition to IP having 1831 * verified the IP addresses in the fanout. 1832 */ 1833 /* ARGSUSED */ 1834 static boolean_t 1835 iptun_verifyicmp(conn_t *connp, void *arg2, icmph_t *icmph, icmp6_t *icmp6, 1836 ip_recv_attr_t *ira) 1837 { 1838 return (B_TRUE); 1839 } 1840 1841 /* 1842 * Notify function registered with ip_xmit_attr_t. 1843 */ 1844 static void 1845 iptun_notify(void *arg, ip_xmit_attr_t *ixa, ixa_notify_type_t ntype, 1846 ixa_notify_arg_t narg) 1847 { 1848 iptun_t *iptun = (iptun_t *)arg; 1849 1850 switch (ntype) { 1851 case IXAN_PMTU: 1852 (void) iptun_update_mtu(iptun, ixa, narg); 1853 break; 1854 } 1855 } 1856 1857 /* 1858 * Returns the max of old_ovhd and the overhead associated with pol. 1859 */ 1860 static uint32_t 1861 iptun_max_policy_overhead(ipsec_policy_t *pol, uint32_t old_ovhd) 1862 { 1863 uint32_t new_ovhd = old_ovhd; 1864 1865 while (pol != NULL) { 1866 new_ovhd = max(new_ovhd, 1867 ipsec_act_ovhd(&pol->ipsp_act->ipa_act)); 1868 pol = pol->ipsp_hash.hash_next; 1869 } 1870 return (new_ovhd); 1871 } 1872 1873 static uint32_t 1874 iptun_get_ipsec_overhead(iptun_t *iptun) 1875 { 1876 ipsec_policy_root_t *ipr; 1877 ipsec_policy_head_t *iph; 1878 ipsec_policy_t *pol; 1879 ipsec_selector_t sel; 1880 int i; 1881 uint32_t ipsec_ovhd = 0; 1882 ipsec_tun_pol_t *itp = iptun->iptun_itp; 1883 netstack_t *ns = iptun->iptun_ns; 1884 1885 if (itp == NULL || !(itp->itp_flags & ITPF_P_ACTIVE)) { 1886 /* 1887 * Consult global policy, just in case. This will only work 1888 * if we have both source and destination addresses to work 1889 * with. 1890 */ 1891 if ((iptun->iptun_flags & (IPTUN_LADDR|IPTUN_RADDR)) != 1892 (IPTUN_LADDR|IPTUN_RADDR)) 1893 return (0); 1894 1895 iph = ipsec_system_policy(ns); 1896 bzero(&sel, sizeof (sel)); 1897 sel.ips_isv4 = 1898 (iptun->iptun_typeinfo->iti_ipvers == IPV4_VERSION); 1899 switch (iptun->iptun_typeinfo->iti_ipvers) { 1900 case IPV4_VERSION: 1901 sel.ips_local_addr_v4 = iptun->iptun_laddr4; 1902 sel.ips_remote_addr_v4 = iptun->iptun_raddr4; 1903 break; 1904 case IPV6_VERSION: 1905 sel.ips_local_addr_v6 = iptun->iptun_laddr6; 1906 sel.ips_remote_addr_v6 = iptun->iptun_raddr6; 1907 break; 1908 } 1909 /* Check for both IPv4 and IPv6. */ 1910 sel.ips_protocol = IPPROTO_ENCAP; 1911 pol = ipsec_find_policy_head(NULL, iph, IPSEC_TYPE_OUTBOUND, 1912 &sel); 1913 if (pol != NULL) { 1914 ipsec_ovhd = ipsec_act_ovhd(&pol->ipsp_act->ipa_act); 1915 IPPOL_REFRELE(pol); 1916 } 1917 sel.ips_protocol = IPPROTO_IPV6; 1918 pol = ipsec_find_policy_head(NULL, iph, IPSEC_TYPE_OUTBOUND, 1919 &sel); 1920 if (pol != NULL) { 1921 ipsec_ovhd = max(ipsec_ovhd, 1922 ipsec_act_ovhd(&pol->ipsp_act->ipa_act)); 1923 IPPOL_REFRELE(pol); 1924 } 1925 IPPH_REFRELE(iph, ns); 1926 } else { 1927 /* 1928 * Look through all of the possible IPsec actions for the 1929 * tunnel, and find the largest potential IPsec overhead. 1930 */ 1931 iph = itp->itp_policy; 1932 rw_enter(&iph->iph_lock, RW_READER); 1933 ipr = &(iph->iph_root[IPSEC_TYPE_OUTBOUND]); 1934 ipsec_ovhd = iptun_max_policy_overhead( 1935 ipr->ipr_nonhash[IPSEC_AF_V4], 0); 1936 ipsec_ovhd = iptun_max_policy_overhead( 1937 ipr->ipr_nonhash[IPSEC_AF_V6], ipsec_ovhd); 1938 for (i = 0; i < ipr->ipr_nchains; i++) { 1939 ipsec_ovhd = iptun_max_policy_overhead( 1940 ipr->ipr_hash[i].hash_head, ipsec_ovhd); 1941 } 1942 rw_exit(&iph->iph_lock); 1943 } 1944 1945 return (ipsec_ovhd); 1946 } 1947 1948 /* 1949 * Calculate and return the maximum possible upper MTU for the given tunnel. 1950 * 1951 * If new_pmtu is set then we also need to update the lower path MTU information 1952 * in the ip_xmit_attr_t. That is needed since we set IXAF_VERIFY_PMTU so that 1953 * we are notified by conn_ip_output() when the path MTU increases. 1954 */ 1955 static uint32_t 1956 iptun_get_maxmtu(iptun_t *iptun, ip_xmit_attr_t *ixa, uint32_t new_pmtu) 1957 { 1958 size_t header_size, ipsec_overhead; 1959 uint32_t maxmtu, pmtu; 1960 1961 /* 1962 * Start with the path-MTU to the remote address, which is either 1963 * provided as the new_pmtu argument, or obtained using 1964 * iptun_get_dst_pmtu(). 1965 */ 1966 if (new_pmtu != 0) { 1967 if (iptun->iptun_flags & IPTUN_RADDR) 1968 iptun->iptun_dpmtu = new_pmtu; 1969 pmtu = new_pmtu; 1970 } else if (iptun->iptun_flags & IPTUN_RADDR) { 1971 if ((pmtu = iptun_get_dst_pmtu(iptun, ixa)) == 0) { 1972 /* 1973 * We weren't able to obtain the path-MTU of the 1974 * destination. Use the previous value. 1975 */ 1976 pmtu = iptun->iptun_dpmtu; 1977 } else { 1978 iptun->iptun_dpmtu = pmtu; 1979 } 1980 } else { 1981 /* 1982 * We have no path-MTU information to go on, use the maximum 1983 * possible value. 1984 */ 1985 pmtu = iptun->iptun_typeinfo->iti_maxmtu; 1986 } 1987 1988 /* 1989 * Now calculate tunneling overhead and subtract that from the 1990 * path-MTU information obtained above. 1991 */ 1992 if (iptun->iptun_header_size != 0) { 1993 header_size = iptun->iptun_header_size; 1994 } else { 1995 switch (iptun->iptun_typeinfo->iti_ipvers) { 1996 case IPV4_VERSION: 1997 header_size = sizeof (ipha_t); 1998 if (is_system_labeled()) 1999 header_size += IP_MAX_OPT_LENGTH; 2000 break; 2001 case IPV6_VERSION: 2002 header_size = sizeof (iptun_ipv6hdrs_t); 2003 break; 2004 } 2005 } 2006 2007 ipsec_overhead = iptun_get_ipsec_overhead(iptun); 2008 2009 maxmtu = pmtu - (header_size + ipsec_overhead); 2010 return (max(maxmtu, iptun->iptun_typeinfo->iti_minmtu)); 2011 } 2012 2013 /* 2014 * Re-calculate the tunnel's MTU as seen from above and notify the MAC layer 2015 * of any change in MTU. The new_pmtu argument is the new lower path MTU to 2016 * the tunnel destination to be used in the tunnel MTU calculation. Passing 2017 * in 0 for new_pmtu causes the lower path MTU to be dynamically updated using 2018 * ip_get_pmtu(). 2019 * 2020 * If the calculated tunnel MTU is different than its previous value, then we 2021 * notify the MAC layer above us of this change using mac_maxsdu_update(). 2022 */ 2023 static uint32_t 2024 iptun_update_mtu(iptun_t *iptun, ip_xmit_attr_t *ixa, uint32_t new_pmtu) 2025 { 2026 uint32_t newmtu; 2027 2028 /* We always update the ixa since we might have set IXAF_VERIFY_PMTU */ 2029 iptun_update_dst_pmtu(iptun, ixa); 2030 2031 /* 2032 * We return the current MTU without updating it if it was pegged to a 2033 * static value using the MAC_PROP_MTU link property. 2034 */ 2035 if (iptun->iptun_flags & IPTUN_FIXED_MTU) 2036 return (iptun->iptun_mtu); 2037 2038 /* If the MTU isn't fixed, then use the maximum possible value. */ 2039 newmtu = iptun_get_maxmtu(iptun, ixa, new_pmtu); 2040 /* 2041 * We only dynamically adjust the tunnel MTU for tunnels with 2042 * destinations because dynamic MTU calculations are based on the 2043 * destination path-MTU. 2044 */ 2045 if ((iptun->iptun_flags & IPTUN_RADDR) && newmtu != iptun->iptun_mtu) { 2046 iptun->iptun_mtu = newmtu; 2047 if (iptun->iptun_flags & IPTUN_MAC_REGISTERED) 2048 iptun_task_dispatch(iptun, IPTUN_TASK_MTU_UPDATE); 2049 } 2050 2051 return (newmtu); 2052 } 2053 2054 /* 2055 * Frees a packet or packet chain and bumps stat for each freed packet. 2056 */ 2057 static void 2058 iptun_drop_pkt(mblk_t *mp, uint64_t *stat) 2059 { 2060 mblk_t *pktmp; 2061 2062 for (pktmp = mp; pktmp != NULL; pktmp = mp) { 2063 mp = mp->b_next; 2064 pktmp->b_next = NULL; 2065 if (stat != NULL) 2066 atomic_inc_64(stat); 2067 freemsg(pktmp); 2068 } 2069 } 2070 2071 /* 2072 * Allocate and return a new mblk to hold an IP and ICMP header, and chain the 2073 * original packet to its b_cont. Returns NULL on failure. 2074 */ 2075 static mblk_t * 2076 iptun_build_icmperr(size_t hdrs_size, mblk_t *orig_pkt) 2077 { 2078 mblk_t *icmperr_mp; 2079 2080 if ((icmperr_mp = allocb(hdrs_size, BPRI_MED)) != NULL) { 2081 icmperr_mp->b_wptr += hdrs_size; 2082 /* tack on the offending packet */ 2083 icmperr_mp->b_cont = orig_pkt; 2084 } 2085 return (icmperr_mp); 2086 } 2087 2088 /* 2089 * Transmit an ICMP error. mp->b_rptr points at the packet to be included in 2090 * the ICMP error. 2091 */ 2092 static void 2093 iptun_sendicmp_v4(iptun_t *iptun, icmph_t *icmp, ipha_t *orig_ipha, mblk_t *mp, 2094 ts_label_t *tsl) 2095 { 2096 size_t orig_pktsize, hdrs_size; 2097 mblk_t *icmperr_mp; 2098 ipha_t *new_ipha; 2099 icmph_t *new_icmp; 2100 ip_xmit_attr_t ixas; 2101 conn_t *connp = iptun->iptun_connp; 2102 2103 orig_pktsize = msgdsize(mp); 2104 hdrs_size = sizeof (ipha_t) + sizeof (icmph_t); 2105 if ((icmperr_mp = iptun_build_icmperr(hdrs_size, mp)) == NULL) { 2106 iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf); 2107 return; 2108 } 2109 2110 new_ipha = (ipha_t *)icmperr_mp->b_rptr; 2111 new_icmp = (icmph_t *)(new_ipha + 1); 2112 2113 new_ipha->ipha_version_and_hdr_length = IP_SIMPLE_HDR_VERSION; 2114 new_ipha->ipha_type_of_service = 0; 2115 new_ipha->ipha_ident = 0; 2116 new_ipha->ipha_fragment_offset_and_flags = 0; 2117 new_ipha->ipha_ttl = orig_ipha->ipha_ttl; 2118 new_ipha->ipha_protocol = IPPROTO_ICMP; 2119 new_ipha->ipha_src = orig_ipha->ipha_dst; 2120 new_ipha->ipha_dst = orig_ipha->ipha_src; 2121 new_ipha->ipha_hdr_checksum = 0; /* will be computed by ip */ 2122 new_ipha->ipha_length = htons(hdrs_size + orig_pktsize); 2123 2124 *new_icmp = *icmp; 2125 new_icmp->icmph_checksum = 0; 2126 new_icmp->icmph_checksum = IP_CSUM(icmperr_mp, sizeof (ipha_t), 0); 2127 2128 bzero(&ixas, sizeof (ixas)); 2129 ixas.ixa_flags = IXAF_BASIC_SIMPLE_V4; 2130 if (new_ipha->ipha_src == INADDR_ANY) { 2131 ixas.ixa_flags &= ~IXAF_VERIFY_SOURCE; 2132 ixas.ixa_flags |= IXAF_SET_SOURCE; 2133 } 2134 2135 ixas.ixa_zoneid = IPCL_ZONEID(connp); 2136 ixas.ixa_ipst = connp->conn_netstack->netstack_ip; 2137 ixas.ixa_cred = connp->conn_cred; 2138 ixas.ixa_cpid = NOPID; 2139 if (is_system_labeled()) 2140 ixas.ixa_tsl = tsl; 2141 2142 ixas.ixa_ifindex = 0; 2143 ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 2144 2145 (void) ip_output_simple(icmperr_mp, &ixas); 2146 ixa_cleanup(&ixas); 2147 } 2148 2149 static void 2150 iptun_sendicmp_v6(iptun_t *iptun, icmp6_t *icmp6, ip6_t *orig_ip6h, mblk_t *mp, 2151 ts_label_t *tsl) 2152 { 2153 size_t orig_pktsize, hdrs_size; 2154 mblk_t *icmp6err_mp; 2155 ip6_t *new_ip6h; 2156 icmp6_t *new_icmp6; 2157 ip_xmit_attr_t ixas; 2158 conn_t *connp = iptun->iptun_connp; 2159 2160 orig_pktsize = msgdsize(mp); 2161 hdrs_size = sizeof (ip6_t) + sizeof (icmp6_t); 2162 if ((icmp6err_mp = iptun_build_icmperr(hdrs_size, mp)) == NULL) { 2163 iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf); 2164 return; 2165 } 2166 2167 new_ip6h = (ip6_t *)icmp6err_mp->b_rptr; 2168 new_icmp6 = (icmp6_t *)(new_ip6h + 1); 2169 2170 new_ip6h->ip6_vcf = orig_ip6h->ip6_vcf; 2171 new_ip6h->ip6_plen = htons(sizeof (icmp6_t) + orig_pktsize); 2172 new_ip6h->ip6_hops = orig_ip6h->ip6_hops; 2173 new_ip6h->ip6_nxt = IPPROTO_ICMPV6; 2174 new_ip6h->ip6_src = orig_ip6h->ip6_dst; 2175 new_ip6h->ip6_dst = orig_ip6h->ip6_src; 2176 2177 *new_icmp6 = *icmp6; 2178 /* The checksum is calculated in ip_output_simple and friends. */ 2179 new_icmp6->icmp6_cksum = new_ip6h->ip6_plen; 2180 2181 bzero(&ixas, sizeof (ixas)); 2182 ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6; 2183 if (IN6_IS_ADDR_UNSPECIFIED(&new_ip6h->ip6_src)) { 2184 ixas.ixa_flags &= ~IXAF_VERIFY_SOURCE; 2185 ixas.ixa_flags |= IXAF_SET_SOURCE; 2186 } 2187 2188 ixas.ixa_zoneid = IPCL_ZONEID(connp); 2189 ixas.ixa_ipst = connp->conn_netstack->netstack_ip; 2190 ixas.ixa_cred = connp->conn_cred; 2191 ixas.ixa_cpid = NOPID; 2192 if (is_system_labeled()) 2193 ixas.ixa_tsl = tsl; 2194 2195 ixas.ixa_ifindex = 0; 2196 ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 2197 2198 (void) ip_output_simple(icmp6err_mp, &ixas); 2199 ixa_cleanup(&ixas); 2200 } 2201 2202 static void 2203 iptun_icmp_error_v4(iptun_t *iptun, ipha_t *orig_ipha, mblk_t *mp, 2204 uint8_t type, uint8_t code, ts_label_t *tsl) 2205 { 2206 icmph_t icmp; 2207 2208 bzero(&icmp, sizeof (icmp)); 2209 icmp.icmph_type = type; 2210 icmp.icmph_code = code; 2211 2212 iptun_sendicmp_v4(iptun, &icmp, orig_ipha, mp, tsl); 2213 } 2214 2215 static void 2216 iptun_icmp_fragneeded_v4(iptun_t *iptun, uint32_t newmtu, ipha_t *orig_ipha, 2217 mblk_t *mp, ts_label_t *tsl) 2218 { 2219 icmph_t icmp; 2220 2221 icmp.icmph_type = ICMP_DEST_UNREACHABLE; 2222 icmp.icmph_code = ICMP_FRAGMENTATION_NEEDED; 2223 icmp.icmph_du_zero = 0; 2224 icmp.icmph_du_mtu = htons(newmtu); 2225 2226 iptun_sendicmp_v4(iptun, &icmp, orig_ipha, mp, tsl); 2227 } 2228 2229 static void 2230 iptun_icmp_error_v6(iptun_t *iptun, ip6_t *orig_ip6h, mblk_t *mp, 2231 uint8_t type, uint8_t code, uint32_t offset, ts_label_t *tsl) 2232 { 2233 icmp6_t icmp6; 2234 2235 bzero(&icmp6, sizeof (icmp6)); 2236 icmp6.icmp6_type = type; 2237 icmp6.icmp6_code = code; 2238 if (type == ICMP6_PARAM_PROB) 2239 icmp6.icmp6_pptr = htonl(offset); 2240 2241 iptun_sendicmp_v6(iptun, &icmp6, orig_ip6h, mp, tsl); 2242 } 2243 2244 static void 2245 iptun_icmp_toobig_v6(iptun_t *iptun, uint32_t newmtu, ip6_t *orig_ip6h, 2246 mblk_t *mp, ts_label_t *tsl) 2247 { 2248 icmp6_t icmp6; 2249 2250 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; 2251 icmp6.icmp6_code = 0; 2252 icmp6.icmp6_mtu = htonl(newmtu); 2253 2254 iptun_sendicmp_v6(iptun, &icmp6, orig_ip6h, mp, tsl); 2255 } 2256 2257 /* 2258 * Determines if the packet pointed to by ipha or ip6h is an ICMP error. The 2259 * mp argument is only used to do bounds checking. 2260 */ 2261 static boolean_t 2262 is_icmp_error(mblk_t *mp, ipha_t *ipha, ip6_t *ip6h) 2263 { 2264 uint16_t hlen; 2265 2266 if (ipha != NULL) { 2267 icmph_t *icmph; 2268 2269 ASSERT(ip6h == NULL); 2270 if (ipha->ipha_protocol != IPPROTO_ICMP) 2271 return (B_FALSE); 2272 2273 hlen = IPH_HDR_LENGTH(ipha); 2274 icmph = (icmph_t *)((uint8_t *)ipha + hlen); 2275 return (ICMP_IS_ERROR(icmph->icmph_type) || 2276 icmph->icmph_type == ICMP_REDIRECT); 2277 } else { 2278 icmp6_t *icmp6; 2279 uint8_t *nexthdrp; 2280 2281 ASSERT(ip6h != NULL); 2282 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hlen, &nexthdrp) || 2283 *nexthdrp != IPPROTO_ICMPV6) { 2284 return (B_FALSE); 2285 } 2286 2287 icmp6 = (icmp6_t *)((uint8_t *)ip6h + hlen); 2288 return (ICMP6_IS_ERROR(icmp6->icmp6_type) || 2289 icmp6->icmp6_type == ND_REDIRECT); 2290 } 2291 } 2292 2293 /* 2294 * Find inner and outer IP headers from a tunneled packet as setup for calls 2295 * into ipsec_tun_{in,out}bound(). 2296 * Note that we need to allow the outer header to be in a separate mblk from 2297 * the inner header. 2298 * If the caller knows the outer_hlen, the caller passes it in. Otherwise zero. 2299 */ 2300 static size_t 2301 iptun_find_headers(mblk_t *mp, size_t outer_hlen, ipha_t **outer4, 2302 ipha_t **inner4, ip6_t **outer6, ip6_t **inner6) 2303 { 2304 ipha_t *ipha; 2305 size_t first_mblkl = MBLKL(mp); 2306 mblk_t *inner_mp; 2307 2308 /* 2309 * Don't bother handling packets that don't have a full IP header in 2310 * the fist mblk. For the input path, the ip module ensures that this 2311 * won't happen, and on the output path, the IP tunneling MAC-type 2312 * plugins ensure that this also won't happen. 2313 */ 2314 if (first_mblkl < sizeof (ipha_t)) 2315 return (0); 2316 ipha = (ipha_t *)(mp->b_rptr); 2317 switch (IPH_HDR_VERSION(ipha)) { 2318 case IPV4_VERSION: 2319 *outer4 = ipha; 2320 *outer6 = NULL; 2321 if (outer_hlen == 0) 2322 outer_hlen = IPH_HDR_LENGTH(ipha); 2323 break; 2324 case IPV6_VERSION: 2325 *outer4 = NULL; 2326 *outer6 = (ip6_t *)ipha; 2327 if (outer_hlen == 0) 2328 outer_hlen = ip_hdr_length_v6(mp, (ip6_t *)ipha); 2329 break; 2330 default: 2331 return (0); 2332 } 2333 2334 if (first_mblkl < outer_hlen || 2335 (first_mblkl == outer_hlen && mp->b_cont == NULL)) 2336 return (0); 2337 2338 /* 2339 * We don't bother doing a pullup here since the outer header will 2340 * just get stripped off soon on input anyway. We just want to ensure 2341 * that the inner* pointer points to a full header. 2342 */ 2343 if (first_mblkl == outer_hlen) { 2344 inner_mp = mp->b_cont; 2345 ipha = (ipha_t *)inner_mp->b_rptr; 2346 } else { 2347 inner_mp = mp; 2348 ipha = (ipha_t *)(mp->b_rptr + outer_hlen); 2349 } 2350 switch (IPH_HDR_VERSION(ipha)) { 2351 case IPV4_VERSION: 2352 if (inner_mp->b_wptr - (uint8_t *)ipha < sizeof (ipha_t)) 2353 return (0); 2354 *inner4 = ipha; 2355 *inner6 = NULL; 2356 break; 2357 case IPV6_VERSION: 2358 if (inner_mp->b_wptr - (uint8_t *)ipha < sizeof (ip6_t)) 2359 return (0); 2360 *inner4 = NULL; 2361 *inner6 = (ip6_t *)ipha; 2362 break; 2363 default: 2364 return (0); 2365 } 2366 2367 return (outer_hlen); 2368 } 2369 2370 /* 2371 * Received ICMP error in response to an X over IPv4 packet that we 2372 * transmitted. 2373 * 2374 * NOTE: "outer" refers to what's inside the ICMP payload. We will get one of 2375 * the following: 2376 * 2377 * [IPv4(0)][ICMPv4][IPv4(1)][IPv4(2)][ULP] 2378 * 2379 * or 2380 * 2381 * [IPv4(0)][ICMPv4][IPv4(1)][IPv6][ULP] 2382 * 2383 * And "outer4" will get set to IPv4(1), and inner[46] will correspond to 2384 * whatever the very-inner packet is (IPv4(2) or IPv6). 2385 */ 2386 static void 2387 iptun_input_icmp_v4(iptun_t *iptun, mblk_t *data_mp, icmph_t *icmph, 2388 ip_recv_attr_t *ira) 2389 { 2390 uint8_t *orig; 2391 ipha_t *outer4, *inner4; 2392 ip6_t *outer6, *inner6; 2393 int outer_hlen; 2394 uint8_t type, code; 2395 2396 ASSERT(data_mp->b_cont == NULL); 2397 /* 2398 * Temporarily move b_rptr forward so that iptun_find_headers() can 2399 * find headers in the ICMP packet payload. 2400 */ 2401 orig = data_mp->b_rptr; 2402 data_mp->b_rptr = (uint8_t *)(icmph + 1); 2403 /* 2404 * The ip module ensures that ICMP errors contain at least the 2405 * original IP header (otherwise, the error would never have made it 2406 * here). 2407 */ 2408 ASSERT(MBLKL(data_mp) >= 0); 2409 outer_hlen = iptun_find_headers(data_mp, 0, &outer4, &inner4, &outer6, 2410 &inner6); 2411 ASSERT(outer6 == NULL); 2412 data_mp->b_rptr = orig; 2413 if (outer_hlen == 0) { 2414 iptun_drop_pkt(data_mp, &iptun->iptun_ierrors); 2415 return; 2416 } 2417 2418 /* Only ICMP errors due to tunneled packets should reach here. */ 2419 ASSERT(outer4->ipha_protocol == IPPROTO_ENCAP || 2420 outer4->ipha_protocol == IPPROTO_IPV6); 2421 2422 data_mp = ipsec_tun_inbound(ira, data_mp, iptun->iptun_itp, 2423 inner4, inner6, outer4, outer6, -outer_hlen, iptun->iptun_ns); 2424 if (data_mp == NULL) { 2425 /* Callee did all of the freeing. */ 2426 atomic_inc_64(&iptun->iptun_ierrors); 2427 return; 2428 } 2429 /* We should never see reassembled fragment here. */ 2430 ASSERT(data_mp->b_next == NULL); 2431 2432 data_mp->b_rptr = (uint8_t *)outer4 + outer_hlen; 2433 2434 /* 2435 * If the original packet being transmitted was itself an ICMP error, 2436 * then drop this packet. We don't want to generate an ICMP error in 2437 * response to an ICMP error. 2438 */ 2439 if (is_icmp_error(data_mp, inner4, inner6)) { 2440 iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf); 2441 return; 2442 } 2443 2444 switch (icmph->icmph_type) { 2445 case ICMP_DEST_UNREACHABLE: 2446 type = (inner4 != NULL ? icmph->icmph_type : ICMP6_DST_UNREACH); 2447 switch (icmph->icmph_code) { 2448 case ICMP_FRAGMENTATION_NEEDED: { 2449 uint32_t newmtu; 2450 2451 /* 2452 * We reconcile this with the fact that the tunnel may 2453 * also have IPsec policy by letting iptun_update_mtu 2454 * take care of it. 2455 */ 2456 newmtu = iptun_update_mtu(iptun, NULL, 2457 ntohs(icmph->icmph_du_mtu)); 2458 2459 if (inner4 != NULL) { 2460 iptun_icmp_fragneeded_v4(iptun, newmtu, inner4, 2461 data_mp, ira->ira_tsl); 2462 } else { 2463 iptun_icmp_toobig_v6(iptun, newmtu, inner6, 2464 data_mp, ira->ira_tsl); 2465 } 2466 return; 2467 } 2468 case ICMP_DEST_NET_UNREACH_ADMIN: 2469 case ICMP_DEST_HOST_UNREACH_ADMIN: 2470 code = (inner4 != NULL ? ICMP_DEST_NET_UNREACH_ADMIN : 2471 ICMP6_DST_UNREACH_ADMIN); 2472 break; 2473 default: 2474 code = (inner4 != NULL ? ICMP_HOST_UNREACHABLE : 2475 ICMP6_DST_UNREACH_ADDR); 2476 break; 2477 } 2478 break; 2479 case ICMP_TIME_EXCEEDED: 2480 if (inner6 != NULL) { 2481 type = ICMP6_TIME_EXCEEDED; 2482 code = 0; 2483 } /* else we're already set. */ 2484 break; 2485 case ICMP_PARAM_PROBLEM: 2486 /* 2487 * This is a problem with the outer header we transmitted. 2488 * Treat this as an output error. 2489 */ 2490 iptun_drop_pkt(data_mp, &iptun->iptun_oerrors); 2491 return; 2492 default: 2493 iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf); 2494 return; 2495 } 2496 2497 if (inner4 != NULL) { 2498 iptun_icmp_error_v4(iptun, inner4, data_mp, type, code, 2499 ira->ira_tsl); 2500 } else { 2501 iptun_icmp_error_v6(iptun, inner6, data_mp, type, code, 0, 2502 ira->ira_tsl); 2503 } 2504 } 2505 2506 /* 2507 * Return B_TRUE if the IPv6 packet pointed to by ip6h contains a Tunnel 2508 * Encapsulation Limit destination option. If there is one, set encaplim_ptr 2509 * to point to the option value. 2510 */ 2511 static boolean_t 2512 iptun_find_encaplimit(mblk_t *mp, ip6_t *ip6h, uint8_t **encaplim_ptr) 2513 { 2514 ip_pkt_t pkt; 2515 uint8_t *endptr; 2516 ip6_dest_t *destp; 2517 struct ip6_opt *optp; 2518 2519 pkt.ipp_fields = 0; /* must be initialized */ 2520 (void) ip_find_hdr_v6(mp, ip6h, B_FALSE, &pkt, NULL); 2521 if ((pkt.ipp_fields & IPPF_DSTOPTS) != 0) { 2522 destp = pkt.ipp_dstopts; 2523 } else if ((pkt.ipp_fields & IPPF_RTHDRDSTOPTS) != 0) { 2524 destp = pkt.ipp_rthdrdstopts; 2525 } else { 2526 return (B_FALSE); 2527 } 2528 2529 endptr = (uint8_t *)destp + 8 * (destp->ip6d_len + 1); 2530 optp = (struct ip6_opt *)(destp + 1); 2531 while (endptr - (uint8_t *)optp > sizeof (*optp)) { 2532 if (optp->ip6o_type == IP6OPT_TUNNEL_LIMIT) { 2533 if ((uint8_t *)(optp + 1) >= endptr) 2534 return (B_FALSE); 2535 *encaplim_ptr = (uint8_t *)&optp[1]; 2536 return (B_TRUE); 2537 } 2538 optp = (struct ip6_opt *)((uint8_t *)optp + optp->ip6o_len + 2); 2539 } 2540 return (B_FALSE); 2541 } 2542 2543 /* 2544 * Received ICMPv6 error in response to an X over IPv6 packet that we 2545 * transmitted. 2546 * 2547 * NOTE: "outer" refers to what's inside the ICMP payload. We will get one of 2548 * the following: 2549 * 2550 * [IPv6(0)][ICMPv6][IPv6(1)][IPv4][ULP] 2551 * 2552 * or 2553 * 2554 * [IPv6(0)][ICMPv6][IPv6(1)][IPv6(2)][ULP] 2555 * 2556 * And "outer6" will get set to IPv6(1), and inner[46] will correspond to 2557 * whatever the very-inner packet is (IPv4 or IPv6(2)). 2558 */ 2559 static void 2560 iptun_input_icmp_v6(iptun_t *iptun, mblk_t *data_mp, icmp6_t *icmp6h, 2561 ip_recv_attr_t *ira) 2562 { 2563 uint8_t *orig; 2564 ipha_t *outer4, *inner4; 2565 ip6_t *outer6, *inner6; 2566 int outer_hlen; 2567 uint8_t type, code; 2568 2569 ASSERT(data_mp->b_cont == NULL); 2570 2571 /* 2572 * Temporarily move b_rptr forward so that iptun_find_headers() can 2573 * find IP headers in the ICMP packet payload. 2574 */ 2575 orig = data_mp->b_rptr; 2576 data_mp->b_rptr = (uint8_t *)(icmp6h + 1); 2577 /* 2578 * The ip module ensures that ICMP errors contain at least the 2579 * original IP header (otherwise, the error would never have made it 2580 * here). 2581 */ 2582 ASSERT(MBLKL(data_mp) >= 0); 2583 outer_hlen = iptun_find_headers(data_mp, 0, &outer4, &inner4, &outer6, 2584 &inner6); 2585 ASSERT(outer4 == NULL); 2586 data_mp->b_rptr = orig; /* Restore r_ptr */ 2587 if (outer_hlen == 0) { 2588 iptun_drop_pkt(data_mp, &iptun->iptun_ierrors); 2589 return; 2590 } 2591 2592 data_mp = ipsec_tun_inbound(ira, data_mp, iptun->iptun_itp, 2593 inner4, inner6, outer4, outer6, -outer_hlen, iptun->iptun_ns); 2594 if (data_mp == NULL) { 2595 /* Callee did all of the freeing. */ 2596 atomic_inc_64(&iptun->iptun_ierrors); 2597 return; 2598 } 2599 /* We should never see reassembled fragment here. */ 2600 ASSERT(data_mp->b_next == NULL); 2601 2602 data_mp->b_rptr = (uint8_t *)outer6 + outer_hlen; 2603 2604 /* 2605 * If the original packet being transmitted was itself an ICMP error, 2606 * then drop this packet. We don't want to generate an ICMP error in 2607 * response to an ICMP error. 2608 */ 2609 if (is_icmp_error(data_mp, inner4, inner6)) { 2610 iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf); 2611 return; 2612 } 2613 2614 switch (icmp6h->icmp6_type) { 2615 case ICMP6_PARAM_PROB: { 2616 uint8_t *encaplim_ptr; 2617 2618 /* 2619 * If the ICMPv6 error points to a valid Tunnel Encapsulation 2620 * Limit option and the limit value is 0, then fall through 2621 * and send a host unreachable message. Otherwise, treat the 2622 * error as an output error, as there must have been a problem 2623 * with a packet we sent. 2624 */ 2625 if (!iptun_find_encaplimit(data_mp, outer6, &encaplim_ptr) || 2626 (icmp6h->icmp6_pptr != 2627 ((ptrdiff_t)encaplim_ptr - (ptrdiff_t)outer6)) || 2628 *encaplim_ptr != 0) { 2629 iptun_drop_pkt(data_mp, &iptun->iptun_oerrors); 2630 return; 2631 } 2632 /* FALLTHRU */ 2633 } 2634 case ICMP6_TIME_EXCEEDED: 2635 case ICMP6_DST_UNREACH: 2636 type = (inner4 != NULL ? ICMP_DEST_UNREACHABLE : 2637 ICMP6_DST_UNREACH); 2638 code = (inner4 != NULL ? ICMP_HOST_UNREACHABLE : 2639 ICMP6_DST_UNREACH_ADDR); 2640 break; 2641 case ICMP6_PACKET_TOO_BIG: { 2642 uint32_t newmtu; 2643 2644 /* 2645 * We reconcile this with the fact that the tunnel may also 2646 * have IPsec policy by letting iptun_update_mtu take care of 2647 * it. 2648 */ 2649 newmtu = iptun_update_mtu(iptun, NULL, 2650 ntohl(icmp6h->icmp6_mtu)); 2651 2652 if (inner4 != NULL) { 2653 iptun_icmp_fragneeded_v4(iptun, newmtu, inner4, 2654 data_mp, ira->ira_tsl); 2655 } else { 2656 iptun_icmp_toobig_v6(iptun, newmtu, inner6, data_mp, 2657 ira->ira_tsl); 2658 } 2659 return; 2660 } 2661 default: 2662 iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf); 2663 return; 2664 } 2665 2666 if (inner4 != NULL) { 2667 iptun_icmp_error_v4(iptun, inner4, data_mp, type, code, 2668 ira->ira_tsl); 2669 } else { 2670 iptun_icmp_error_v6(iptun, inner6, data_mp, type, code, 0, 2671 ira->ira_tsl); 2672 } 2673 } 2674 2675 /* 2676 * Called as conn_recvicmp from IP for ICMP errors. 2677 */ 2678 /* ARGSUSED2 */ 2679 static void 2680 iptun_input_icmp(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *ira) 2681 { 2682 conn_t *connp = arg; 2683 iptun_t *iptun = connp->conn_iptun; 2684 mblk_t *tmpmp; 2685 size_t hlen; 2686 2687 ASSERT(IPCL_IS_IPTUN(connp)); 2688 2689 if (mp->b_cont != NULL) { 2690 /* 2691 * Since ICMP error processing necessitates access to bits 2692 * that are within the ICMP error payload (the original packet 2693 * that caused the error), pull everything up into a single 2694 * block for convenience. 2695 */ 2696 if ((tmpmp = msgpullup(mp, -1)) == NULL) { 2697 iptun_drop_pkt(mp, &iptun->iptun_norcvbuf); 2698 return; 2699 } 2700 freemsg(mp); 2701 mp = tmpmp; 2702 } 2703 2704 hlen = ira->ira_ip_hdr_length; 2705 switch (iptun->iptun_typeinfo->iti_ipvers) { 2706 case IPV4_VERSION: 2707 /* 2708 * The outer IP header coming up from IP is always ipha_t 2709 * alligned (otherwise, we would have crashed in ip). 2710 */ 2711 iptun_input_icmp_v4(iptun, mp, (icmph_t *)(mp->b_rptr + hlen), 2712 ira); 2713 break; 2714 case IPV6_VERSION: 2715 iptun_input_icmp_v6(iptun, mp, (icmp6_t *)(mp->b_rptr + hlen), 2716 ira); 2717 break; 2718 } 2719 } 2720 2721 static boolean_t 2722 iptun_in_6to4_ok(iptun_t *iptun, ipha_t *outer4, ip6_t *inner6) 2723 { 2724 ipaddr_t v4addr; 2725 2726 /* 2727 * It's possible that someone sent us an IPv4-in-IPv4 packet with the 2728 * IPv4 address of a 6to4 tunnel as the destination. 2729 */ 2730 if (inner6 == NULL) 2731 return (B_FALSE); 2732 2733 /* 2734 * Make sure that the IPv6 destination is within the site that this 2735 * 6to4 tunnel is routing for. We don't want people bouncing random 2736 * tunneled IPv6 packets through this 6to4 router. 2737 */ 2738 IN6_6TO4_TO_V4ADDR(&inner6->ip6_dst, (struct in_addr *)&v4addr); 2739 if (outer4->ipha_dst != v4addr) 2740 return (B_FALSE); 2741 2742 if (IN6_IS_ADDR_6TO4(&inner6->ip6_src)) { 2743 /* 2744 * Section 9 of RFC 3056 (security considerations) suggests 2745 * that when a packet is from a 6to4 site (i.e., it's not a 2746 * global address being forwarded froma relay router), make 2747 * sure that the packet was tunneled by that site's 6to4 2748 * router. 2749 */ 2750 IN6_6TO4_TO_V4ADDR(&inner6->ip6_src, (struct in_addr *)&v4addr); 2751 if (outer4->ipha_src != v4addr) 2752 return (B_FALSE); 2753 } else { 2754 /* 2755 * Only accept packets from a relay router if we've configured 2756 * outbound relay router functionality. 2757 */ 2758 if (iptun->iptun_iptuns->iptuns_relay_rtr_addr == INADDR_ANY) 2759 return (B_FALSE); 2760 } 2761 2762 return (B_TRUE); 2763 } 2764 2765 /* 2766 * Input function for everything that comes up from the ip module below us. 2767 * This is called directly from the ip module via connp->conn_recv(). 2768 * 2769 * We receive M_DATA messages with IP-in-IP tunneled packets. 2770 */ 2771 /* ARGSUSED2 */ 2772 static void 2773 iptun_input(void *arg, mblk_t *data_mp, void *arg2, ip_recv_attr_t *ira) 2774 { 2775 conn_t *connp = arg; 2776 iptun_t *iptun = connp->conn_iptun; 2777 int outer_hlen; 2778 ipha_t *outer4, *inner4; 2779 ip6_t *outer6, *inner6; 2780 2781 ASSERT(IPCL_IS_IPTUN(connp)); 2782 ASSERT(DB_TYPE(data_mp) == M_DATA); 2783 2784 outer_hlen = iptun_find_headers(data_mp, ira->ira_ip_hdr_length, 2785 &outer4, &inner4, &outer6, &inner6); 2786 if (outer_hlen == 0) 2787 goto drop; 2788 2789 /* 2790 * If the system is labeled, we call tsol_check_dest() on the packet 2791 * destination (our local tunnel address) to ensure that the packet as 2792 * labeled should be allowed to be sent to us. We don't need to call 2793 * the more involved tsol_receive_local() since the tunnel link itself 2794 * cannot be assigned to shared-stack non-global zones. 2795 */ 2796 if (ira->ira_flags & IRAF_SYSTEM_LABELED) { 2797 if (ira->ira_tsl == NULL) 2798 goto drop; 2799 if (tsol_check_dest(ira->ira_tsl, (outer4 != NULL ? 2800 (void *)&outer4->ipha_dst : (void *)&outer6->ip6_dst), 2801 (outer4 != NULL ? IPV4_VERSION : IPV6_VERSION), 2802 CONN_MAC_DEFAULT, B_FALSE, NULL) != 0) 2803 goto drop; 2804 } 2805 2806 data_mp = ipsec_tun_inbound(ira, data_mp, iptun->iptun_itp, 2807 inner4, inner6, outer4, outer6, outer_hlen, iptun->iptun_ns); 2808 if (data_mp == NULL) { 2809 /* Callee did all of the freeing. */ 2810 return; 2811 } 2812 2813 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_6TO4 && 2814 !iptun_in_6to4_ok(iptun, outer4, inner6)) 2815 goto drop; 2816 2817 /* 2818 * We need to statistically account for each packet individually, so 2819 * we might as well split up any b_next chains here. 2820 */ 2821 do { 2822 mblk_t *mp; 2823 2824 mp = data_mp->b_next; 2825 data_mp->b_next = NULL; 2826 2827 atomic_inc_64(&iptun->iptun_ipackets); 2828 atomic_add_64(&iptun->iptun_rbytes, msgdsize(data_mp)); 2829 mac_rx(iptun->iptun_mh, NULL, data_mp); 2830 2831 data_mp = mp; 2832 } while (data_mp != NULL); 2833 return; 2834 drop: 2835 iptun_drop_pkt(data_mp, &iptun->iptun_ierrors); 2836 } 2837 2838 /* 2839 * Do 6to4-specific header-processing on output. Return B_TRUE if the packet 2840 * was processed without issue, or B_FALSE if the packet had issues and should 2841 * be dropped. 2842 */ 2843 static boolean_t 2844 iptun_out_process_6to4(iptun_t *iptun, ipha_t *outer4, ip6_t *inner6) 2845 { 2846 ipaddr_t v4addr; 2847 2848 /* 2849 * IPv6 source must be a 6to4 address. This is because a conscious 2850 * decision was made to not allow a Solaris system to be used as a 2851 * relay router (for security reasons) when 6to4 was initially 2852 * integrated. If this decision is ever reversed, the following check 2853 * can be removed. 2854 */ 2855 if (!IN6_IS_ADDR_6TO4(&inner6->ip6_src)) 2856 return (B_FALSE); 2857 2858 /* 2859 * RFC3056 mandates that the IPv4 source MUST be set to the IPv4 2860 * portion of the 6to4 IPv6 source address. In other words, make sure 2861 * that we're tunneling packets from our own 6to4 site. 2862 */ 2863 IN6_6TO4_TO_V4ADDR(&inner6->ip6_src, (struct in_addr *)&v4addr); 2864 if (outer4->ipha_src != v4addr) 2865 return (B_FALSE); 2866 2867 /* 2868 * Automatically set the destination of the outer IPv4 header as 2869 * described in RFC3056. There are two possibilities: 2870 * 2871 * a. If the IPv6 destination is a 6to4 address, set the IPv4 address 2872 * to the IPv4 portion of the 6to4 address. 2873 * b. If the IPv6 destination is a native IPv6 address, set the IPv4 2874 * destination to the address of a relay router. 2875 * 2876 * Design Note: b shouldn't be necessary here, and this is a flaw in 2877 * the design of the 6to4relay command. Instead of setting a 6to4 2878 * relay address in this module via an ioctl, the 6to4relay command 2879 * could simply add a IPv6 route for native IPv6 addresses (such as a 2880 * default route) in the forwarding table that uses a 6to4 destination 2881 * as its next hop, and the IPv4 portion of that address could be a 2882 * 6to4 relay address. In order for this to work, IP would have to 2883 * resolve the next hop address, which would necessitate a link-layer 2884 * address resolver for 6to4 links, which doesn't exist today. 2885 * 2886 * In fact, if a resolver existed for 6to4 links, then setting the 2887 * IPv4 destination in the outer header could be done as part of 2888 * link-layer address resolution and fast-path header generation, and 2889 * not here. 2890 */ 2891 if (IN6_IS_ADDR_6TO4(&inner6->ip6_dst)) { 2892 /* destination is a 6to4 router */ 2893 IN6_6TO4_TO_V4ADDR(&inner6->ip6_dst, 2894 (struct in_addr *)&outer4->ipha_dst); 2895 2896 /* Reject attempts to send to INADDR_ANY */ 2897 if (outer4->ipha_dst == INADDR_ANY) 2898 return (B_FALSE); 2899 } else { 2900 /* 2901 * The destination is a native IPv6 address. If output to a 2902 * relay-router is enabled, use the relay-router's IPv4 2903 * address as the destination. 2904 */ 2905 if (iptun->iptun_iptuns->iptuns_relay_rtr_addr == INADDR_ANY) 2906 return (B_FALSE); 2907 outer4->ipha_dst = iptun->iptun_iptuns->iptuns_relay_rtr_addr; 2908 } 2909 2910 /* 2911 * If the outer source and destination are equal, this means that the 2912 * 6to4 router somehow forwarded an IPv6 packet destined for its own 2913 * 6to4 site to its 6to4 tunnel interface, which will result in this 2914 * packet infinitely bouncing between ip and iptun. 2915 */ 2916 return (outer4->ipha_src != outer4->ipha_dst); 2917 } 2918 2919 /* 2920 * Process output packets with outer IPv4 headers. Frees mp and bumps stat on 2921 * error. 2922 */ 2923 static mblk_t * 2924 iptun_out_process_ipv4(iptun_t *iptun, mblk_t *mp, ipha_t *outer4, 2925 ipha_t *inner4, ip6_t *inner6, ip_xmit_attr_t *ixa) 2926 { 2927 uint8_t *innerptr = (inner4 != NULL ? 2928 (uint8_t *)inner4 : (uint8_t *)inner6); 2929 size_t minmtu = iptun->iptun_typeinfo->iti_minmtu; 2930 2931 if (inner4 != NULL) { 2932 ASSERT(outer4->ipha_protocol == IPPROTO_ENCAP); 2933 /* 2934 * Copy the tos from the inner IPv4 header. We mask off ECN 2935 * bits (bits 6 and 7) because there is currently no 2936 * tunnel-tunnel communication to determine if both sides 2937 * support ECN. We opt for the safe choice: don't copy the 2938 * ECN bits when doing encapsulation. 2939 */ 2940 outer4->ipha_type_of_service = 2941 inner4->ipha_type_of_service & ~0x03; 2942 } else { 2943 ASSERT(outer4->ipha_protocol == IPPROTO_IPV6 && 2944 inner6 != NULL); 2945 } 2946 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) 2947 outer4->ipha_fragment_offset_and_flags |= IPH_DF_HTONS; 2948 else 2949 outer4->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS; 2950 2951 /* 2952 * As described in section 3.2.2 of RFC4213, if the packet payload is 2953 * less than or equal to the minimum MTU size, then we need to allow 2954 * IPv4 to fragment the packet. The reason is that even if we end up 2955 * receiving an ICMP frag-needed, the interface above this tunnel 2956 * won't be allowed to drop its MTU as a result, since the packet was 2957 * already smaller than the smallest allowable MTU for that interface. 2958 */ 2959 if (mp->b_wptr - innerptr <= minmtu) { 2960 outer4->ipha_fragment_offset_and_flags = 0; 2961 ixa->ixa_flags &= ~IXAF_DONTFRAG; 2962 } else if (!(ixa->ixa_flags & IXAF_PMTU_TOO_SMALL) && 2963 (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_6TO4)) { 2964 ixa->ixa_flags |= IXAF_DONTFRAG; 2965 } 2966 2967 ixa->ixa_ip_hdr_length = IPH_HDR_LENGTH(outer4); 2968 ixa->ixa_pktlen = msgdsize(mp); 2969 ixa->ixa_protocol = outer4->ipha_protocol; 2970 2971 outer4->ipha_length = htons(ixa->ixa_pktlen); 2972 return (mp); 2973 } 2974 2975 /* 2976 * Insert an encapsulation limit destination option in the packet provided. 2977 * Always consumes the mp argument and returns a new mblk pointer. 2978 */ 2979 static mblk_t * 2980 iptun_insert_encaplimit(iptun_t *iptun, mblk_t *mp, ip6_t *outer6, 2981 uint8_t limit) 2982 { 2983 mblk_t *newmp; 2984 iptun_ipv6hdrs_t *newouter6; 2985 2986 ASSERT(outer6->ip6_nxt == IPPROTO_IPV6); 2987 ASSERT(mp->b_cont == NULL); 2988 2989 mp->b_rptr += sizeof (ip6_t); 2990 newmp = allocb(sizeof (iptun_ipv6hdrs_t) + MBLKL(mp), BPRI_MED); 2991 if (newmp == NULL) { 2992 iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf); 2993 return (NULL); 2994 } 2995 newmp->b_wptr += sizeof (iptun_ipv6hdrs_t); 2996 /* Copy the payload (Starting with the inner IPv6 header). */ 2997 bcopy(mp->b_rptr, newmp->b_wptr, MBLKL(mp)); 2998 newmp->b_wptr += MBLKL(mp); 2999 newouter6 = (iptun_ipv6hdrs_t *)newmp->b_rptr; 3000 /* Now copy the outer IPv6 header. */ 3001 bcopy(outer6, &newouter6->it6h_ip6h, sizeof (ip6_t)); 3002 newouter6->it6h_ip6h.ip6_nxt = IPPROTO_DSTOPTS; 3003 newouter6->it6h_encaplim = iptun_encaplim_init; 3004 newouter6->it6h_encaplim.iel_destopt.ip6d_nxt = outer6->ip6_nxt; 3005 newouter6->it6h_encaplim.iel_telopt.ip6ot_encap_limit = limit; 3006 3007 /* 3008 * The payload length will be set at the end of 3009 * iptun_out_process_ipv6(). 3010 */ 3011 3012 freemsg(mp); 3013 return (newmp); 3014 } 3015 3016 /* 3017 * Process output packets with outer IPv6 headers. Frees mp and bumps stats 3018 * on error. 3019 */ 3020 static mblk_t * 3021 iptun_out_process_ipv6(iptun_t *iptun, mblk_t *mp, ip6_t *outer6, 3022 ipha_t *inner4, ip6_t *inner6, ip_xmit_attr_t *ixa) 3023 { 3024 uint8_t *innerptr = (inner4 != NULL ? 3025 (uint8_t *)inner4 : (uint8_t *)inner6); 3026 size_t minmtu = iptun->iptun_typeinfo->iti_minmtu; 3027 uint8_t *limit, *configlimit; 3028 uint32_t offset; 3029 iptun_ipv6hdrs_t *v6hdrs; 3030 3031 if (inner6 != NULL && iptun_find_encaplimit(mp, inner6, &limit)) { 3032 /* 3033 * The inner packet is an IPv6 packet which itself contains an 3034 * encapsulation limit option. The limit variable points to 3035 * the value in the embedded option. Process the 3036 * encapsulation limit option as specified in RFC 2473. 3037 * 3038 * If limit is 0, then we've exceeded the limit and we need to 3039 * send back an ICMPv6 parameter problem message. 3040 * 3041 * If limit is > 0, then we decrement it by 1 and make sure 3042 * that the encapsulation limit option in the outer header 3043 * reflects that (adding an option if one isn't already 3044 * there). 3045 */ 3046 ASSERT(limit > mp->b_rptr && limit < mp->b_wptr); 3047 if (*limit == 0) { 3048 mp->b_rptr = (uint8_t *)inner6; 3049 offset = limit - mp->b_rptr; 3050 iptun_icmp_error_v6(iptun, inner6, mp, ICMP6_PARAM_PROB, 3051 0, offset, ixa->ixa_tsl); 3052 atomic_inc_64(&iptun->iptun_noxmtbuf); 3053 return (NULL); 3054 } 3055 3056 /* 3057 * The outer header requires an encapsulation limit option. 3058 * If there isn't one already, add one. 3059 */ 3060 if (iptun->iptun_encaplimit == 0) { 3061 if ((mp = iptun_insert_encaplimit(iptun, mp, outer6, 3062 (*limit - 1))) == NULL) 3063 return (NULL); 3064 v6hdrs = (iptun_ipv6hdrs_t *)mp->b_rptr; 3065 } else { 3066 /* 3067 * There is an existing encapsulation limit option in 3068 * the outer header. If the inner encapsulation limit 3069 * is less than the configured encapsulation limit, 3070 * update the outer encapsulation limit to reflect 3071 * this lesser value. 3072 */ 3073 v6hdrs = (iptun_ipv6hdrs_t *)mp->b_rptr; 3074 configlimit = 3075 &v6hdrs->it6h_encaplim.iel_telopt.ip6ot_encap_limit; 3076 if ((*limit - 1) < *configlimit) 3077 *configlimit = (*limit - 1); 3078 } 3079 ixa->ixa_ip_hdr_length = sizeof (iptun_ipv6hdrs_t); 3080 ixa->ixa_protocol = v6hdrs->it6h_encaplim.iel_destopt.ip6d_nxt; 3081 } else { 3082 ixa->ixa_ip_hdr_length = sizeof (ip6_t); 3083 ixa->ixa_protocol = outer6->ip6_nxt; 3084 } 3085 /* 3086 * See iptun_output_process_ipv4() why we allow fragmentation for 3087 * small packets 3088 */ 3089 if (mp->b_wptr - innerptr <= minmtu) 3090 ixa->ixa_flags &= ~IXAF_DONTFRAG; 3091 else if (!(ixa->ixa_flags & IXAF_PMTU_TOO_SMALL)) 3092 ixa->ixa_flags |= IXAF_DONTFRAG; 3093 3094 ixa->ixa_pktlen = msgdsize(mp); 3095 outer6->ip6_plen = htons(ixa->ixa_pktlen - sizeof (ip6_t)); 3096 return (mp); 3097 } 3098 3099 /* 3100 * The IP tunneling MAC-type plugins have already done most of the header 3101 * processing and validity checks. We are simply responsible for multiplexing 3102 * down to the ip module below us. 3103 */ 3104 static void 3105 iptun_output(iptun_t *iptun, mblk_t *mp) 3106 { 3107 conn_t *connp = iptun->iptun_connp; 3108 mblk_t *newmp; 3109 int error; 3110 ip_xmit_attr_t *ixa; 3111 3112 ASSERT(mp->b_datap->db_type == M_DATA); 3113 3114 if (mp->b_cont != NULL) { 3115 if ((newmp = msgpullup(mp, -1)) == NULL) { 3116 iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf); 3117 return; 3118 } 3119 freemsg(mp); 3120 mp = newmp; 3121 } 3122 3123 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_6TO4) { 3124 iptun_output_6to4(iptun, mp); 3125 return; 3126 } 3127 3128 if (is_system_labeled()) { 3129 /* 3130 * Since the label can be different meaning a potentially 3131 * different IRE,we always use a unique ip_xmit_attr_t. 3132 */ 3133 ixa = conn_get_ixa_exclusive(connp); 3134 } else { 3135 /* 3136 * If no other thread is using conn_ixa this just gets a 3137 * reference to conn_ixa. Otherwise we get a safe copy of 3138 * conn_ixa. 3139 */ 3140 ixa = conn_get_ixa(connp, B_FALSE); 3141 } 3142 if (ixa == NULL) { 3143 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3144 return; 3145 } 3146 3147 /* 3148 * In case we got a safe copy of conn_ixa, then we need 3149 * to fill in any pointers in it. 3150 */ 3151 if (ixa->ixa_ire == NULL) { 3152 error = ip_attr_connect(connp, ixa, &connp->conn_saddr_v6, 3153 &connp->conn_faddr_v6, &connp->conn_faddr_v6, 0, 3154 NULL, NULL, 0); 3155 if (error != 0) { 3156 if (ixa->ixa_ire != NULL && 3157 (error == EHOSTUNREACH || error == ENETUNREACH)) { 3158 /* 3159 * Let conn_ip_output/ire_send_noroute return 3160 * the error and send any local ICMP error. 3161 */ 3162 error = 0; 3163 } else { 3164 ixa_refrele(ixa); 3165 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3166 return; 3167 } 3168 } 3169 } 3170 3171 iptun_output_common(iptun, ixa, mp); 3172 ixa_refrele(ixa); 3173 } 3174 3175 /* 3176 * We use an ixa based on the last destination. 3177 */ 3178 static void 3179 iptun_output_6to4(iptun_t *iptun, mblk_t *mp) 3180 { 3181 conn_t *connp = iptun->iptun_connp; 3182 ipha_t *outer4, *inner4; 3183 ip6_t *outer6, *inner6; 3184 ip_xmit_attr_t *ixa; 3185 ip_xmit_attr_t *oldixa; 3186 int error; 3187 boolean_t need_connect; 3188 in6_addr_t v6dst; 3189 3190 ASSERT(mp->b_cont == NULL); /* Verified by iptun_output */ 3191 3192 /* Make sure we set ipha_dst before we look at ipha_dst */ 3193 3194 (void) iptun_find_headers(mp, 0, &outer4, &inner4, &outer6, &inner6); 3195 ASSERT(outer4 != NULL); 3196 if (!iptun_out_process_6to4(iptun, outer4, inner6)) { 3197 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3198 return; 3199 } 3200 3201 if (is_system_labeled()) { 3202 /* 3203 * Since the label can be different meaning a potentially 3204 * different IRE,we always use a unique ip_xmit_attr_t. 3205 */ 3206 ixa = conn_get_ixa_exclusive(connp); 3207 } else { 3208 /* 3209 * If no other thread is using conn_ixa this just gets a 3210 * reference to conn_ixa. Otherwise we get a safe copy of 3211 * conn_ixa. 3212 */ 3213 ixa = conn_get_ixa(connp, B_FALSE); 3214 } 3215 if (ixa == NULL) { 3216 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3217 return; 3218 } 3219 3220 mutex_enter(&connp->conn_lock); 3221 if (connp->conn_v4lastdst == outer4->ipha_dst) { 3222 need_connect = (ixa->ixa_ire == NULL); 3223 } else { 3224 /* In case previous destination was multirt */ 3225 ip_attr_newdst(ixa); 3226 3227 /* 3228 * We later update conn_ixa when we update conn_v4lastdst 3229 * which enables subsequent packets to avoid redoing 3230 * ip_attr_connect 3231 */ 3232 need_connect = B_TRUE; 3233 } 3234 mutex_exit(&connp->conn_lock); 3235 3236 /* 3237 * In case we got a safe copy of conn_ixa, or otherwise we don't 3238 * have a current ixa_ire, then we need to fill in any pointers in 3239 * the ixa. 3240 */ 3241 if (need_connect) { 3242 IN6_IPADDR_TO_V4MAPPED(outer4->ipha_dst, &v6dst); 3243 3244 /* We handle IPsec in iptun_output_common */ 3245 error = ip_attr_connect(connp, ixa, &connp->conn_saddr_v6, 3246 &v6dst, &v6dst, 0, NULL, NULL, 0); 3247 if (error != 0) { 3248 if (ixa->ixa_ire != NULL && 3249 (error == EHOSTUNREACH || error == ENETUNREACH)) { 3250 /* 3251 * Let conn_ip_output/ire_send_noroute return 3252 * the error and send any local ICMP error. 3253 */ 3254 error = 0; 3255 } else { 3256 ixa_refrele(ixa); 3257 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3258 return; 3259 } 3260 } 3261 } 3262 3263 iptun_output_common(iptun, ixa, mp); 3264 3265 /* Atomically replace conn_ixa and conn_v4lastdst */ 3266 mutex_enter(&connp->conn_lock); 3267 if (connp->conn_v4lastdst != outer4->ipha_dst) { 3268 /* Remember the dst which corresponds to conn_ixa */ 3269 connp->conn_v6lastdst = v6dst; 3270 oldixa = conn_replace_ixa(connp, ixa); 3271 } else { 3272 oldixa = NULL; 3273 } 3274 mutex_exit(&connp->conn_lock); 3275 ixa_refrele(ixa); 3276 if (oldixa != NULL) 3277 ixa_refrele(oldixa); 3278 } 3279 3280 /* 3281 * Check the destination/label. Modifies *mpp by adding/removing CIPSO. 3282 * 3283 * We get the label from the message in order to honor the 3284 * ULPs/IPs choice of label. This will be NULL for forwarded 3285 * packets, neighbor discovery packets and some others. 3286 */ 3287 static int 3288 iptun_output_check_label(mblk_t **mpp, ip_xmit_attr_t *ixa) 3289 { 3290 cred_t *cr; 3291 int adjust; 3292 int iplen; 3293 int err; 3294 ts_label_t *effective_tsl = NULL; 3295 3296 3297 ASSERT(is_system_labeled()); 3298 3299 cr = msg_getcred(*mpp, NULL); 3300 if (cr == NULL) 3301 return (0); 3302 3303 /* 3304 * We need to start with a label based on the IP/ULP above us 3305 */ 3306 ip_xmit_attr_restore_tsl(ixa, cr); 3307 3308 /* 3309 * Need to update packet with any CIPSO option since 3310 * conn_ip_output doesn't do that. 3311 */ 3312 if (ixa->ixa_flags & IXAF_IS_IPV4) { 3313 ipha_t *ipha; 3314 3315 ipha = (ipha_t *)(*mpp)->b_rptr; 3316 iplen = ntohs(ipha->ipha_length); 3317 err = tsol_check_label_v4(ixa->ixa_tsl, 3318 ixa->ixa_zoneid, mpp, CONN_MAC_DEFAULT, B_FALSE, 3319 ixa->ixa_ipst, &effective_tsl); 3320 if (err != 0) 3321 return (err); 3322 3323 ipha = (ipha_t *)(*mpp)->b_rptr; 3324 adjust = (int)ntohs(ipha->ipha_length) - iplen; 3325 } else { 3326 ip6_t *ip6h; 3327 3328 ip6h = (ip6_t *)(*mpp)->b_rptr; 3329 iplen = ntohs(ip6h->ip6_plen); 3330 3331 err = tsol_check_label_v6(ixa->ixa_tsl, 3332 ixa->ixa_zoneid, mpp, CONN_MAC_DEFAULT, B_FALSE, 3333 ixa->ixa_ipst, &effective_tsl); 3334 if (err != 0) 3335 return (err); 3336 3337 ip6h = (ip6_t *)(*mpp)->b_rptr; 3338 adjust = (int)ntohs(ip6h->ip6_plen) - iplen; 3339 } 3340 3341 if (effective_tsl != NULL) { 3342 /* Update the label */ 3343 ip_xmit_attr_replace_tsl(ixa, effective_tsl); 3344 } 3345 ixa->ixa_pktlen += adjust; 3346 ixa->ixa_ip_hdr_length += adjust; 3347 return (0); 3348 } 3349 3350 3351 static void 3352 iptun_output_common(iptun_t *iptun, ip_xmit_attr_t *ixa, mblk_t *mp) 3353 { 3354 ipsec_tun_pol_t *itp = iptun->iptun_itp; 3355 int outer_hlen; 3356 mblk_t *newmp; 3357 ipha_t *outer4, *inner4; 3358 ip6_t *outer6, *inner6; 3359 int error; 3360 boolean_t update_pktlen; 3361 3362 ASSERT(ixa->ixa_ire != NULL); 3363 3364 outer_hlen = iptun_find_headers(mp, 0, &outer4, &inner4, &outer6, 3365 &inner6); 3366 if (outer_hlen == 0) { 3367 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3368 return; 3369 } 3370 3371 /* Save IXAF_DONTFRAG value */ 3372 iaflags_t dontfrag = ixa->ixa_flags & IXAF_DONTFRAG; 3373 3374 /* Perform header processing. */ 3375 if (outer4 != NULL) { 3376 mp = iptun_out_process_ipv4(iptun, mp, outer4, inner4, inner6, 3377 ixa); 3378 } else { 3379 mp = iptun_out_process_ipv6(iptun, mp, outer6, inner4, inner6, 3380 ixa); 3381 } 3382 if (mp == NULL) 3383 return; 3384 3385 /* 3386 * Let's hope the compiler optimizes this with "branch taken". 3387 */ 3388 if (itp != NULL && (itp->itp_flags & ITPF_P_ACTIVE)) { 3389 /* This updates the ip_xmit_attr_t */ 3390 mp = ipsec_tun_outbound(mp, iptun, inner4, inner6, outer4, 3391 outer6, outer_hlen, ixa); 3392 if (mp == NULL) { 3393 atomic_inc_64(&iptun->iptun_oerrors); 3394 return; 3395 } 3396 if (is_system_labeled()) { 3397 /* 3398 * Might change the packet by adding/removing CIPSO. 3399 * After this caller inner* and outer* and outer_hlen 3400 * might be invalid. 3401 */ 3402 error = iptun_output_check_label(&mp, ixa); 3403 if (error != 0) { 3404 ip2dbg(("label check failed (%d)\n", error)); 3405 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3406 return; 3407 } 3408 } 3409 3410 /* 3411 * ipsec_tun_outbound() returns a chain of tunneled IP 3412 * fragments linked with b_next (or a single message if the 3413 * tunneled packet wasn't a fragment). 3414 * If fragcache returned a list then we need to update 3415 * ixa_pktlen for all packets in the list. 3416 */ 3417 update_pktlen = (mp->b_next != NULL); 3418 3419 /* 3420 * Otherwise, we're good to go. The ixa has been updated with 3421 * instructions for outbound IPsec processing. 3422 */ 3423 for (newmp = mp; newmp != NULL; newmp = mp) { 3424 size_t minmtu = iptun->iptun_typeinfo->iti_minmtu; 3425 3426 atomic_inc_64(&iptun->iptun_opackets); 3427 atomic_add_64(&iptun->iptun_obytes, ixa->ixa_pktlen); 3428 mp = mp->b_next; 3429 newmp->b_next = NULL; 3430 3431 /* 3432 * The IXAF_DONTFRAG flag is global, but there is 3433 * a chain here. Check if we're really already 3434 * smaller than the minimum allowed MTU and reset here 3435 * appropriately. Otherwise one small packet can kill 3436 * the whole chain's path mtu discovery. 3437 * In addition, update the pktlen to the length of 3438 * the actual packet being processed. 3439 */ 3440 if (update_pktlen) { 3441 ixa->ixa_pktlen = msgdsize(newmp); 3442 if (ixa->ixa_pktlen <= minmtu) 3443 ixa->ixa_flags &= ~IXAF_DONTFRAG; 3444 } 3445 3446 atomic_inc_64(&iptun->iptun_opackets); 3447 atomic_add_64(&iptun->iptun_obytes, ixa->ixa_pktlen); 3448 3449 error = conn_ip_output(newmp, ixa); 3450 3451 /* Restore IXAF_DONTFRAG value */ 3452 ixa->ixa_flags |= dontfrag; 3453 3454 if (error == EMSGSIZE) { 3455 /* IPsec policy might have changed */ 3456 (void) iptun_update_mtu(iptun, ixa, 0); 3457 } 3458 } 3459 } else { 3460 /* 3461 * The ip module will potentially apply global policy to the 3462 * packet in its output path if there's no active tunnel 3463 * policy. 3464 */ 3465 ASSERT(ixa->ixa_ipsec_policy == NULL); 3466 mp = ip_output_attach_policy(mp, outer4, outer6, NULL, ixa); 3467 if (mp == NULL) { 3468 atomic_inc_64(&iptun->iptun_oerrors); 3469 return; 3470 } 3471 if (is_system_labeled()) { 3472 /* 3473 * Might change the packet by adding/removing CIPSO. 3474 * After this caller inner* and outer* and outer_hlen 3475 * might be invalid. 3476 */ 3477 error = iptun_output_check_label(&mp, ixa); 3478 if (error != 0) { 3479 ip2dbg(("label check failed (%d)\n", error)); 3480 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3481 return; 3482 } 3483 } 3484 3485 atomic_inc_64(&iptun->iptun_opackets); 3486 atomic_add_64(&iptun->iptun_obytes, ixa->ixa_pktlen); 3487 3488 error = conn_ip_output(mp, ixa); 3489 if (error == EMSGSIZE) { 3490 /* IPsec policy might have changed */ 3491 (void) iptun_update_mtu(iptun, ixa, 0); 3492 } 3493 } 3494 if (ixa->ixa_flags & IXAF_IPSEC_SECURE) 3495 ipsec_out_release_refs(ixa); 3496 } 3497 3498 static mac_callbacks_t iptun_m_callbacks = { 3499 .mc_callbacks = (MC_SETPROP | MC_GETPROP | MC_PROPINFO), 3500 .mc_getstat = iptun_m_getstat, 3501 .mc_start = iptun_m_start, 3502 .mc_stop = iptun_m_stop, 3503 .mc_setpromisc = iptun_m_setpromisc, 3504 .mc_multicst = iptun_m_multicst, 3505 .mc_unicst = iptun_m_unicst, 3506 .mc_tx = iptun_m_tx, 3507 .mc_reserved = NULL, 3508 .mc_setprop = iptun_m_setprop, 3509 .mc_getprop = iptun_m_getprop, 3510 .mc_propinfo = iptun_m_propinfo 3511 }; 3512