1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * iptun - IP Tunneling Driver 28 * 29 * This module is a GLDv3 driver that implements virtual datalinks over IP 30 * (a.k.a, IP tunneling). The datalinks are managed through a dld ioctl 31 * interface (see iptun_ctl.c), and registered with GLDv3 using 32 * mac_register(). It implements the logic for various forms of IP (IPv4 or 33 * IPv6) encapsulation within IP (IPv4 or IPv6) by interacting with the ip 34 * module below it. Each virtual IP tunnel datalink has a conn_t associated 35 * with it representing the "outer" IP connection. 36 * 37 * The module implements the following locking semantics: 38 * 39 * Lookups and deletions in iptun_hash are synchronized using iptun_hash_lock. 40 * See comments above iptun_hash_lock for details. 41 * 42 * No locks are ever held while calling up to GLDv3. The general architecture 43 * of GLDv3 requires this, as the mac perimeter (essentially a lock) for a 44 * given link will be held while making downcalls (iptun_m_*() callbacks). 45 * Because we need to hold locks while handling downcalls, holding these locks 46 * while issuing upcalls results in deadlock scenarios. See the block comment 47 * above iptun_task_cb() for details on how we safely issue upcalls without 48 * holding any locks. 49 * 50 * The contents of each iptun_t is protected by an iptun_mutex which is held 51 * in iptun_enter() (called by iptun_enter_by_linkid()), and exited in 52 * iptun_exit(). 53 * 54 * See comments in iptun_delete() and iptun_free() for details on how the 55 * iptun_t is deleted safely. 56 */ 57 58 #include <sys/types.h> 59 #include <sys/kmem.h> 60 #include <sys/errno.h> 61 #include <sys/modhash.h> 62 #include <sys/list.h> 63 #include <sys/strsun.h> 64 #include <sys/file.h> 65 #include <sys/systm.h> 66 #include <sys/tihdr.h> 67 #include <sys/param.h> 68 #include <sys/mac_provider.h> 69 #include <sys/mac_ipv4.h> 70 #include <sys/mac_ipv6.h> 71 #include <sys/mac_6to4.h> 72 #include <sys/tsol/tnet.h> 73 #include <sys/sunldi.h> 74 #include <netinet/in.h> 75 #include <netinet/ip6.h> 76 #include <inet/ip.h> 77 #include <inet/ip_ire.h> 78 #include <inet/ipsec_impl.h> 79 #include <sys/tsol/label.h> 80 #include <sys/tsol/tnet.h> 81 #include <inet/iptun.h> 82 #include "iptun_impl.h" 83 84 /* Do the tunnel type and address family match? */ 85 #define IPTUN_ADDR_MATCH(iptun_type, family) \ 86 ((iptun_type == IPTUN_TYPE_IPV4 && family == AF_INET) || \ 87 (iptun_type == IPTUN_TYPE_IPV6 && family == AF_INET6) || \ 88 (iptun_type == IPTUN_TYPE_6TO4 && family == AF_INET)) 89 90 #define IPTUN_HASH_KEY(key) ((mod_hash_key_t)(uintptr_t)(key)) 91 92 #define IPTUN_MIN_IPV4_MTU 576 /* ip.h still uses 68 (!) */ 93 #define IPTUN_MIN_IPV6_MTU IPV6_MIN_MTU 94 #define IPTUN_MAX_IPV4_MTU (IP_MAXPACKET - sizeof (ipha_t)) 95 #define IPTUN_MAX_IPV6_MTU (IP_MAXPACKET - sizeof (ip6_t) - \ 96 sizeof (iptun_encaplim_t)) 97 98 #define IPTUN_MIN_HOPLIMIT 1 99 #define IPTUN_MAX_HOPLIMIT UINT8_MAX 100 101 #define IPTUN_MIN_ENCAPLIMIT 0 102 #define IPTUN_MAX_ENCAPLIMIT UINT8_MAX 103 104 #define IPTUN_IPSEC_REQ_MASK (IPSEC_PREF_REQUIRED | IPSEC_PREF_NEVER) 105 106 static iptun_encaplim_t iptun_encaplim_init = { 107 { IPPROTO_NONE, 0 }, 108 IP6OPT_TUNNEL_LIMIT, 109 1, 110 IPTUN_DEFAULT_ENCAPLIMIT, /* filled in with actual value later */ 111 IP6OPT_PADN, 112 1, 113 0 114 }; 115 116 /* 117 * Table containing per-iptun-type information. 118 * Since IPv6 can run over all of these we have the IPv6 min as the min MTU. 119 */ 120 static iptun_typeinfo_t iptun_type_table[] = { 121 { IPTUN_TYPE_IPV4, MAC_PLUGIN_IDENT_IPV4, IPV4_VERSION, 122 IPTUN_MIN_IPV6_MTU, IPTUN_MAX_IPV4_MTU, B_TRUE }, 123 { IPTUN_TYPE_IPV6, MAC_PLUGIN_IDENT_IPV6, IPV6_VERSION, 124 IPTUN_MIN_IPV6_MTU, IPTUN_MAX_IPV6_MTU, B_TRUE }, 125 { IPTUN_TYPE_6TO4, MAC_PLUGIN_IDENT_6TO4, IPV4_VERSION, 126 IPTUN_MIN_IPV6_MTU, IPTUN_MAX_IPV4_MTU, B_FALSE }, 127 { IPTUN_TYPE_UNKNOWN, NULL, 0, 0, 0, B_FALSE } 128 }; 129 130 /* 131 * iptun_hash is an iptun_t lookup table by link ID protected by 132 * iptun_hash_lock. While the hash table's integrity is maintained via 133 * internal locking in the mod_hash_*() functions, we need additional locking 134 * so that an iptun_t cannot be deleted after a hash lookup has returned an 135 * iptun_t and before iptun_lock has been entered. As such, we use 136 * iptun_hash_lock when doing lookups and removals from iptun_hash. 137 */ 138 mod_hash_t *iptun_hash; 139 static kmutex_t iptun_hash_lock; 140 141 static uint_t iptun_tunnelcount; /* total for all stacks */ 142 kmem_cache_t *iptun_cache; 143 ddi_taskq_t *iptun_taskq; 144 145 typedef enum { 146 IPTUN_TASK_MTU_UPDATE, /* tell mac about new tunnel link MTU */ 147 IPTUN_TASK_LADDR_UPDATE, /* tell mac about new local address */ 148 IPTUN_TASK_RADDR_UPDATE, /* tell mac about new remote address */ 149 IPTUN_TASK_LINK_UPDATE, /* tell mac about new link state */ 150 IPTUN_TASK_PDATA_UPDATE /* tell mac about updated plugin data */ 151 } iptun_task_t; 152 153 typedef struct iptun_task_data_s { 154 iptun_task_t itd_task; 155 datalink_id_t itd_linkid; 156 } iptun_task_data_t; 157 158 static void iptun_task_dispatch(iptun_t *, iptun_task_t); 159 static int iptun_enter(iptun_t *); 160 static void iptun_exit(iptun_t *); 161 static void iptun_headergen(iptun_t *, boolean_t); 162 static void iptun_drop_pkt(mblk_t *, uint64_t *); 163 static void iptun_input(void *, mblk_t *, void *, ip_recv_attr_t *); 164 static void iptun_input_icmp(void *, mblk_t *, void *, ip_recv_attr_t *); 165 static void iptun_output(iptun_t *, mblk_t *); 166 static uint32_t iptun_get_maxmtu(iptun_t *, ip_xmit_attr_t *, uint32_t); 167 static uint32_t iptun_update_mtu(iptun_t *, ip_xmit_attr_t *, uint32_t); 168 static uint32_t iptun_get_dst_pmtu(iptun_t *, ip_xmit_attr_t *); 169 static void iptun_update_dst_pmtu(iptun_t *, ip_xmit_attr_t *); 170 static int iptun_setladdr(iptun_t *, const struct sockaddr_storage *); 171 172 static void iptun_output_6to4(iptun_t *, mblk_t *); 173 static void iptun_output_common(iptun_t *, ip_xmit_attr_t *, mblk_t *); 174 static boolean_t iptun_verifyicmp(conn_t *, void *, icmph_t *, icmp6_t *, 175 ip_recv_attr_t *); 176 177 static void iptun_notify(void *, ip_xmit_attr_t *, ixa_notify_type_t, 178 ixa_notify_arg_t); 179 180 static mac_callbacks_t iptun_m_callbacks; 181 182 static int 183 iptun_m_getstat(void *arg, uint_t stat, uint64_t *val) 184 { 185 iptun_t *iptun = arg; 186 int err = 0; 187 188 switch (stat) { 189 case MAC_STAT_IERRORS: 190 *val = iptun->iptun_ierrors; 191 break; 192 case MAC_STAT_OERRORS: 193 *val = iptun->iptun_oerrors; 194 break; 195 case MAC_STAT_RBYTES: 196 *val = iptun->iptun_rbytes; 197 break; 198 case MAC_STAT_IPACKETS: 199 *val = iptun->iptun_ipackets; 200 break; 201 case MAC_STAT_OBYTES: 202 *val = iptun->iptun_obytes; 203 break; 204 case MAC_STAT_OPACKETS: 205 *val = iptun->iptun_opackets; 206 break; 207 case MAC_STAT_NORCVBUF: 208 *val = iptun->iptun_norcvbuf; 209 break; 210 case MAC_STAT_NOXMTBUF: 211 *val = iptun->iptun_noxmtbuf; 212 break; 213 default: 214 err = ENOTSUP; 215 } 216 217 return (err); 218 } 219 220 static int 221 iptun_m_start(void *arg) 222 { 223 iptun_t *iptun = arg; 224 int err; 225 226 if ((err = iptun_enter(iptun)) == 0) { 227 iptun->iptun_flags |= IPTUN_MAC_STARTED; 228 iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE); 229 iptun_exit(iptun); 230 } 231 return (err); 232 } 233 234 static void 235 iptun_m_stop(void *arg) 236 { 237 iptun_t *iptun = arg; 238 239 if (iptun_enter(iptun) == 0) { 240 iptun->iptun_flags &= ~IPTUN_MAC_STARTED; 241 iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE); 242 iptun_exit(iptun); 243 } 244 } 245 246 /* 247 * iptun_m_setpromisc() does nothing and always succeeds. This is because a 248 * tunnel data-link only ever receives packets that are destined exclusively 249 * for the local address of the tunnel. 250 */ 251 /* ARGSUSED */ 252 static int 253 iptun_m_setpromisc(void *arg, boolean_t on) 254 { 255 return (0); 256 } 257 258 /* ARGSUSED */ 259 static int 260 iptun_m_multicst(void *arg, boolean_t add, const uint8_t *addrp) 261 { 262 return (ENOTSUP); 263 } 264 265 /* 266 * iptun_m_unicst() sets the local address. 267 */ 268 /* ARGSUSED */ 269 static int 270 iptun_m_unicst(void *arg, const uint8_t *addrp) 271 { 272 iptun_t *iptun = arg; 273 int err; 274 struct sockaddr_storage ss; 275 struct sockaddr_in *sin; 276 struct sockaddr_in6 *sin6; 277 278 if ((err = iptun_enter(iptun)) == 0) { 279 switch (iptun->iptun_typeinfo->iti_ipvers) { 280 case IPV4_VERSION: 281 sin = (struct sockaddr_in *)&ss; 282 sin->sin_family = AF_INET; 283 bcopy(addrp, &sin->sin_addr, sizeof (in_addr_t)); 284 break; 285 case IPV6_VERSION: 286 sin6 = (struct sockaddr_in6 *)&ss; 287 sin6->sin6_family = AF_INET6; 288 bcopy(addrp, &sin6->sin6_addr, sizeof (in6_addr_t)); 289 break; 290 default: 291 ASSERT(0); 292 } 293 err = iptun_setladdr(iptun, &ss); 294 iptun_exit(iptun); 295 } 296 return (err); 297 } 298 299 static mblk_t * 300 iptun_m_tx(void *arg, mblk_t *mpchain) 301 { 302 mblk_t *mp, *nmp; 303 iptun_t *iptun = arg; 304 305 if (!IS_IPTUN_RUNNING(iptun)) { 306 iptun_drop_pkt(mpchain, &iptun->iptun_noxmtbuf); 307 return (NULL); 308 } 309 310 for (mp = mpchain; mp != NULL; mp = nmp) { 311 nmp = mp->b_next; 312 mp->b_next = NULL; 313 iptun_output(iptun, mp); 314 } 315 316 return (NULL); 317 } 318 319 /* ARGSUSED */ 320 static int 321 iptun_m_setprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, 322 uint_t pr_valsize, const void *pr_val) 323 { 324 iptun_t *iptun = barg; 325 uint32_t value = *(uint32_t *)pr_val; 326 int err; 327 328 /* 329 * We need to enter this iptun_t since we'll be modifying the outer 330 * header. 331 */ 332 if ((err = iptun_enter(iptun)) != 0) 333 return (err); 334 335 switch (pr_num) { 336 case MAC_PROP_IPTUN_HOPLIMIT: 337 if (value < IPTUN_MIN_HOPLIMIT || value > IPTUN_MAX_HOPLIMIT) { 338 err = EINVAL; 339 break; 340 } 341 if (value != iptun->iptun_hoplimit) { 342 iptun->iptun_hoplimit = (uint8_t)value; 343 iptun_headergen(iptun, B_TRUE); 344 } 345 break; 346 case MAC_PROP_IPTUN_ENCAPLIMIT: 347 if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_IPV6 || 348 value > IPTUN_MAX_ENCAPLIMIT) { 349 err = EINVAL; 350 break; 351 } 352 if (value != iptun->iptun_encaplimit) { 353 iptun->iptun_encaplimit = (uint8_t)value; 354 iptun_headergen(iptun, B_TRUE); 355 } 356 break; 357 case MAC_PROP_MTU: { 358 uint32_t maxmtu = iptun_get_maxmtu(iptun, NULL, 0); 359 360 if (value < iptun->iptun_typeinfo->iti_minmtu || 361 value > maxmtu) { 362 err = EINVAL; 363 break; 364 } 365 iptun->iptun_flags |= IPTUN_FIXED_MTU; 366 if (value != iptun->iptun_mtu) { 367 iptun->iptun_mtu = value; 368 iptun_task_dispatch(iptun, IPTUN_TASK_MTU_UPDATE); 369 } 370 break; 371 } 372 default: 373 err = EINVAL; 374 } 375 iptun_exit(iptun); 376 return (err); 377 } 378 379 /* ARGSUSED */ 380 static int 381 iptun_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num, 382 uint_t pr_valsize, void *pr_val) 383 { 384 iptun_t *iptun = barg; 385 int err; 386 387 if ((err = iptun_enter(iptun)) != 0) 388 return (err); 389 390 switch (pr_num) { 391 case MAC_PROP_IPTUN_HOPLIMIT: 392 ASSERT(pr_valsize >= sizeof (uint32_t)); 393 *(uint32_t *)pr_val = iptun->iptun_hoplimit; 394 break; 395 396 case MAC_PROP_IPTUN_ENCAPLIMIT: 397 *(uint32_t *)pr_val = iptun->iptun_encaplimit; 398 break; 399 default: 400 err = ENOTSUP; 401 } 402 done: 403 iptun_exit(iptun); 404 return (err); 405 } 406 407 /* ARGSUSED */ 408 static void 409 iptun_m_propinfo(void *barg, const char *pr_name, mac_prop_id_t pr_num, 410 mac_prop_info_handle_t prh) 411 { 412 iptun_t *iptun = barg; 413 414 switch (pr_num) { 415 case MAC_PROP_IPTUN_HOPLIMIT: 416 mac_prop_info_set_range_uint32(prh, 417 IPTUN_MIN_HOPLIMIT, IPTUN_MAX_HOPLIMIT); 418 mac_prop_info_set_default_uint32(prh, IPTUN_DEFAULT_HOPLIMIT); 419 break; 420 421 case MAC_PROP_IPTUN_ENCAPLIMIT: 422 if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_IPV6) 423 break; 424 mac_prop_info_set_range_uint32(prh, 425 IPTUN_MIN_ENCAPLIMIT, IPTUN_MAX_ENCAPLIMIT); 426 mac_prop_info_set_default_uint32(prh, IPTUN_DEFAULT_ENCAPLIMIT); 427 break; 428 case MAC_PROP_MTU: 429 mac_prop_info_set_range_uint32(prh, 430 iptun->iptun_typeinfo->iti_minmtu, 431 iptun_get_maxmtu(iptun, NULL, 0)); 432 break; 433 } 434 } 435 436 uint_t 437 iptun_count(void) 438 { 439 return (iptun_tunnelcount); 440 } 441 442 /* 443 * Enter an iptun_t exclusively. This is essentially just a mutex, but we 444 * don't allow iptun_enter() to succeed on a tunnel if it's in the process of 445 * being deleted. 446 */ 447 static int 448 iptun_enter(iptun_t *iptun) 449 { 450 mutex_enter(&iptun->iptun_lock); 451 while (iptun->iptun_flags & IPTUN_DELETE_PENDING) 452 cv_wait(&iptun->iptun_enter_cv, &iptun->iptun_lock); 453 if (iptun->iptun_flags & IPTUN_CONDEMNED) { 454 mutex_exit(&iptun->iptun_lock); 455 return (ENOENT); 456 } 457 return (0); 458 } 459 460 /* 461 * Exit the tunnel entered in iptun_enter(). 462 */ 463 static void 464 iptun_exit(iptun_t *iptun) 465 { 466 mutex_exit(&iptun->iptun_lock); 467 } 468 469 /* 470 * Enter the IP tunnel instance by datalink ID. 471 */ 472 static int 473 iptun_enter_by_linkid(datalink_id_t linkid, iptun_t **iptun) 474 { 475 int err; 476 477 mutex_enter(&iptun_hash_lock); 478 if (mod_hash_find(iptun_hash, IPTUN_HASH_KEY(linkid), 479 (mod_hash_val_t *)iptun) == 0) 480 err = iptun_enter(*iptun); 481 else 482 err = ENOENT; 483 if (err != 0) 484 *iptun = NULL; 485 mutex_exit(&iptun_hash_lock); 486 return (err); 487 } 488 489 /* 490 * Handle tasks that were deferred through the iptun_taskq because they require 491 * calling up to the mac module, and we can't call up to the mac module while 492 * holding locks. 493 * 494 * This is tricky to get right without introducing race conditions and 495 * deadlocks with the mac module, as we cannot issue an upcall while in the 496 * iptun_t. The reason is that upcalls may try and enter the mac perimeter, 497 * while iptun callbacks (such as iptun_m_setprop()) called from the mac 498 * module will already have the perimeter held, and will then try and enter 499 * the iptun_t. You can see the lock ordering problem with this; this will 500 * deadlock. 501 * 502 * The safe way to do this is to enter the iptun_t in question and copy the 503 * information we need out of it so that we can exit it and know that the 504 * information being passed up to the upcalls won't be subject to modification 505 * by other threads. The problem now is that we need to exit it prior to 506 * issuing the upcall, but once we do this, a thread could come along and 507 * delete the iptun_t and thus the mac handle required to issue the upcall. 508 * To prevent this, we set the IPTUN_UPCALL_PENDING flag prior to exiting the 509 * iptun_t. This flag is the condition associated with iptun_upcall_cv, which 510 * iptun_delete() will cv_wait() on. When the upcall completes, we clear 511 * IPTUN_UPCALL_PENDING and cv_signal() any potentially waiting 512 * iptun_delete(). We can thus still safely use iptun->iptun_mh after having 513 * exited the iptun_t. 514 */ 515 static void 516 iptun_task_cb(void *arg) 517 { 518 iptun_task_data_t *itd = arg; 519 iptun_task_t task = itd->itd_task; 520 datalink_id_t linkid = itd->itd_linkid; 521 iptun_t *iptun; 522 uint32_t mtu; 523 iptun_addr_t addr; 524 link_state_t linkstate; 525 size_t header_size; 526 iptun_header_t header; 527 528 kmem_free(itd, sizeof (*itd)); 529 530 /* 531 * Note that if the lookup fails, it's because the tunnel was deleted 532 * between the time the task was dispatched and now. That isn't an 533 * error. 534 */ 535 if (iptun_enter_by_linkid(linkid, &iptun) != 0) 536 return; 537 538 iptun->iptun_flags |= IPTUN_UPCALL_PENDING; 539 540 switch (task) { 541 case IPTUN_TASK_MTU_UPDATE: 542 mtu = iptun->iptun_mtu; 543 break; 544 case IPTUN_TASK_LADDR_UPDATE: 545 addr = iptun->iptun_laddr; 546 break; 547 case IPTUN_TASK_RADDR_UPDATE: 548 addr = iptun->iptun_raddr; 549 break; 550 case IPTUN_TASK_LINK_UPDATE: 551 linkstate = IS_IPTUN_RUNNING(iptun) ? 552 LINK_STATE_UP : LINK_STATE_DOWN; 553 break; 554 case IPTUN_TASK_PDATA_UPDATE: 555 header_size = iptun->iptun_header_size; 556 header = iptun->iptun_header; 557 break; 558 default: 559 ASSERT(0); 560 } 561 562 iptun_exit(iptun); 563 564 switch (task) { 565 case IPTUN_TASK_MTU_UPDATE: 566 (void) mac_maxsdu_update(iptun->iptun_mh, mtu); 567 break; 568 case IPTUN_TASK_LADDR_UPDATE: 569 mac_unicst_update(iptun->iptun_mh, (uint8_t *)&addr.ia_addr); 570 break; 571 case IPTUN_TASK_RADDR_UPDATE: 572 mac_dst_update(iptun->iptun_mh, (uint8_t *)&addr.ia_addr); 573 break; 574 case IPTUN_TASK_LINK_UPDATE: 575 mac_link_update(iptun->iptun_mh, linkstate); 576 break; 577 case IPTUN_TASK_PDATA_UPDATE: 578 if (mac_pdata_update(iptun->iptun_mh, 579 header_size == 0 ? NULL : &header, header_size) != 0) 580 atomic_inc_64(&iptun->iptun_taskq_fail); 581 break; 582 } 583 584 mutex_enter(&iptun->iptun_lock); 585 iptun->iptun_flags &= ~IPTUN_UPCALL_PENDING; 586 cv_signal(&iptun->iptun_upcall_cv); 587 mutex_exit(&iptun->iptun_lock); 588 } 589 590 static void 591 iptun_task_dispatch(iptun_t *iptun, iptun_task_t iptun_task) 592 { 593 iptun_task_data_t *itd; 594 595 itd = kmem_alloc(sizeof (*itd), KM_NOSLEEP); 596 if (itd == NULL) { 597 atomic_inc_64(&iptun->iptun_taskq_fail); 598 return; 599 } 600 itd->itd_task = iptun_task; 601 itd->itd_linkid = iptun->iptun_linkid; 602 if (ddi_taskq_dispatch(iptun_taskq, iptun_task_cb, itd, DDI_NOSLEEP)) { 603 atomic_inc_64(&iptun->iptun_taskq_fail); 604 kmem_free(itd, sizeof (*itd)); 605 } 606 } 607 608 /* 609 * Convert an iptun_addr_t to sockaddr_storage. 610 */ 611 static void 612 iptun_getaddr(iptun_addr_t *iptun_addr, struct sockaddr_storage *ss) 613 { 614 struct sockaddr_in *sin; 615 struct sockaddr_in6 *sin6; 616 617 bzero(ss, sizeof (*ss)); 618 switch (iptun_addr->ia_family) { 619 case AF_INET: 620 sin = (struct sockaddr_in *)ss; 621 sin->sin_addr.s_addr = iptun_addr->ia_addr.iau_addr4; 622 break; 623 case AF_INET6: 624 sin6 = (struct sockaddr_in6 *)ss; 625 sin6->sin6_addr = iptun_addr->ia_addr.iau_addr6; 626 break; 627 default: 628 ASSERT(0); 629 } 630 ss->ss_family = iptun_addr->ia_family; 631 } 632 633 /* 634 * General purpose function to set an IP tunnel source or destination address. 635 */ 636 static int 637 iptun_setaddr(iptun_type_t iptun_type, iptun_addr_t *iptun_addr, 638 const struct sockaddr_storage *ss) 639 { 640 if (!IPTUN_ADDR_MATCH(iptun_type, ss->ss_family)) 641 return (EINVAL); 642 643 switch (ss->ss_family) { 644 case AF_INET: { 645 struct sockaddr_in *sin = (struct sockaddr_in *)ss; 646 647 if ((sin->sin_addr.s_addr == INADDR_ANY) || 648 (sin->sin_addr.s_addr == INADDR_BROADCAST) || 649 CLASSD(sin->sin_addr.s_addr)) { 650 return (EADDRNOTAVAIL); 651 } 652 iptun_addr->ia_addr.iau_addr4 = sin->sin_addr.s_addr; 653 break; 654 } 655 case AF_INET6: { 656 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)ss; 657 658 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) || 659 IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) || 660 IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 661 return (EADDRNOTAVAIL); 662 } 663 iptun_addr->ia_addr.iau_addr6 = sin6->sin6_addr; 664 break; 665 } 666 default: 667 return (EAFNOSUPPORT); 668 } 669 iptun_addr->ia_family = ss->ss_family; 670 return (0); 671 } 672 673 static int 674 iptun_setladdr(iptun_t *iptun, const struct sockaddr_storage *laddr) 675 { 676 return (iptun_setaddr(iptun->iptun_typeinfo->iti_type, 677 &iptun->iptun_laddr, laddr)); 678 } 679 680 static int 681 iptun_setraddr(iptun_t *iptun, const struct sockaddr_storage *raddr) 682 { 683 if (!(iptun->iptun_typeinfo->iti_hasraddr)) 684 return (EINVAL); 685 return (iptun_setaddr(iptun->iptun_typeinfo->iti_type, 686 &iptun->iptun_raddr, raddr)); 687 } 688 689 static boolean_t 690 iptun_canbind(iptun_t *iptun) 691 { 692 /* 693 * A tunnel may bind when its source address has been set, and if its 694 * tunnel type requires one, also its destination address. 695 */ 696 return ((iptun->iptun_flags & IPTUN_LADDR) && 697 ((iptun->iptun_flags & IPTUN_RADDR) || 698 !(iptun->iptun_typeinfo->iti_hasraddr))); 699 } 700 701 /* 702 * Verify that the local address is valid, and insert in the fanout 703 */ 704 static int 705 iptun_bind(iptun_t *iptun) 706 { 707 conn_t *connp = iptun->iptun_connp; 708 int error = 0; 709 ip_xmit_attr_t *ixa; 710 iulp_t uinfo; 711 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 712 713 /* Get an exclusive ixa for this thread, and replace conn_ixa */ 714 ixa = conn_get_ixa(connp, B_TRUE); 715 if (ixa == NULL) 716 return (ENOMEM); 717 ASSERT(ixa->ixa_refcnt >= 2); 718 ASSERT(ixa == connp->conn_ixa); 719 720 /* We create PMTU state including for 6to4 */ 721 ixa->ixa_flags |= IXAF_PMTU_DISCOVERY; 722 723 ASSERT(iptun_canbind(iptun)); 724 725 mutex_enter(&connp->conn_lock); 726 /* 727 * Note that conn_proto can't be set since the upper protocol 728 * can be both 41 and 4 when IPv6 and IPv4 are over the same tunnel. 729 * ipcl_iptun_classify doesn't use conn_proto. 730 */ 731 connp->conn_ipversion = iptun->iptun_typeinfo->iti_ipvers; 732 733 switch (iptun->iptun_typeinfo->iti_type) { 734 case IPTUN_TYPE_IPV4: 735 IN6_IPADDR_TO_V4MAPPED(iptun->iptun_laddr4, 736 &connp->conn_laddr_v6); 737 IN6_IPADDR_TO_V4MAPPED(iptun->iptun_raddr4, 738 &connp->conn_faddr_v6); 739 ixa->ixa_flags |= IXAF_IS_IPV4; 740 if (ip_laddr_verify_v4(iptun->iptun_laddr4, IPCL_ZONEID(connp), 741 ipst, B_FALSE) != IPVL_UNICAST_UP) { 742 mutex_exit(&connp->conn_lock); 743 error = EADDRNOTAVAIL; 744 goto done; 745 } 746 break; 747 case IPTUN_TYPE_IPV6: 748 connp->conn_laddr_v6 = iptun->iptun_laddr6; 749 connp->conn_faddr_v6 = iptun->iptun_raddr6; 750 ixa->ixa_flags &= ~IXAF_IS_IPV4; 751 /* We use a zero scopeid for now */ 752 if (ip_laddr_verify_v6(&iptun->iptun_laddr6, IPCL_ZONEID(connp), 753 ipst, B_FALSE, 0) != IPVL_UNICAST_UP) { 754 mutex_exit(&connp->conn_lock); 755 error = EADDRNOTAVAIL; 756 goto done; 757 } 758 break; 759 case IPTUN_TYPE_6TO4: 760 IN6_IPADDR_TO_V4MAPPED(iptun->iptun_laddr4, 761 &connp->conn_laddr_v6); 762 IN6_IPADDR_TO_V4MAPPED(INADDR_ANY, &connp->conn_faddr_v6); 763 ixa->ixa_flags |= IXAF_IS_IPV4; 764 mutex_exit(&connp->conn_lock); 765 766 switch (ip_laddr_verify_v4(iptun->iptun_laddr4, 767 IPCL_ZONEID(connp), ipst, B_FALSE)) { 768 case IPVL_UNICAST_UP: 769 case IPVL_UNICAST_DOWN: 770 break; 771 default: 772 error = EADDRNOTAVAIL; 773 goto done; 774 } 775 goto insert; 776 } 777 778 /* In case previous destination was multirt */ 779 ip_attr_newdst(ixa); 780 781 /* 782 * When we set a tunnel's destination address, we do not 783 * care if the destination is reachable. Transient routing 784 * issues should not inhibit the creation of a tunnel 785 * interface, for example. Thus we pass B_FALSE here. 786 */ 787 connp->conn_saddr_v6 = connp->conn_laddr_v6; 788 mutex_exit(&connp->conn_lock); 789 790 /* As long as the MTU is large we avoid fragmentation */ 791 ixa->ixa_flags |= IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF; 792 793 /* We handle IPsec in iptun_output_common */ 794 error = ip_attr_connect(connp, ixa, &connp->conn_saddr_v6, 795 &connp->conn_faddr_v6, &connp->conn_faddr_v6, 0, 796 &connp->conn_saddr_v6, &uinfo, 0); 797 798 if (error != 0) 799 goto done; 800 801 /* saddr shouldn't change since it was already set */ 802 ASSERT(IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6, 803 &connp->conn_saddr_v6)); 804 805 /* We set IXAF_VERIFY_PMTU to catch PMTU increases */ 806 ixa->ixa_flags |= IXAF_VERIFY_PMTU; 807 ASSERT(uinfo.iulp_mtu != 0); 808 809 /* 810 * Allow setting new policies. 811 * The addresses/ports are already set, thus the IPsec policy calls 812 * can handle their passed-in conn's. 813 */ 814 connp->conn_policy_cached = B_FALSE; 815 816 insert: 817 error = ipcl_conn_insert(connp); 818 if (error != 0) 819 goto done; 820 821 /* Record this as the "last" send even though we haven't sent any */ 822 connp->conn_v6lastdst = connp->conn_faddr_v6; 823 824 iptun->iptun_flags |= IPTUN_BOUND; 825 /* 826 * Now that we're bound with ip below us, this is a good 827 * time to initialize the destination path MTU and to 828 * re-calculate the tunnel's link MTU. 829 */ 830 (void) iptun_update_mtu(iptun, ixa, 0); 831 832 if (IS_IPTUN_RUNNING(iptun)) 833 iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE); 834 835 done: 836 ixa_refrele(ixa); 837 return (error); 838 } 839 840 static void 841 iptun_unbind(iptun_t *iptun) 842 { 843 ASSERT(iptun->iptun_flags & IPTUN_BOUND); 844 ASSERT(mutex_owned(&iptun->iptun_lock) || 845 (iptun->iptun_flags & IPTUN_CONDEMNED)); 846 ip_unbind(iptun->iptun_connp); 847 iptun->iptun_flags &= ~IPTUN_BOUND; 848 if (!(iptun->iptun_flags & IPTUN_CONDEMNED)) 849 iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE); 850 } 851 852 /* 853 * Re-generate the template data-link header for a given IP tunnel given the 854 * tunnel's current parameters. 855 */ 856 static void 857 iptun_headergen(iptun_t *iptun, boolean_t update_mac) 858 { 859 switch (iptun->iptun_typeinfo->iti_ipvers) { 860 case IPV4_VERSION: 861 /* 862 * We only need to use a custom IP header if the administrator 863 * has supplied a non-default hoplimit. 864 */ 865 if (iptun->iptun_hoplimit == IPTUN_DEFAULT_HOPLIMIT) { 866 iptun->iptun_header_size = 0; 867 break; 868 } 869 iptun->iptun_header_size = sizeof (ipha_t); 870 iptun->iptun_header4.ipha_version_and_hdr_length = 871 IP_SIMPLE_HDR_VERSION; 872 iptun->iptun_header4.ipha_fragment_offset_and_flags = 873 htons(IPH_DF); 874 iptun->iptun_header4.ipha_ttl = iptun->iptun_hoplimit; 875 break; 876 case IPV6_VERSION: { 877 ip6_t *ip6hp = &iptun->iptun_header6.it6h_ip6h; 878 879 /* 880 * We only need to use a custom IPv6 header if either the 881 * administrator has supplied a non-default hoplimit, or we 882 * need to include an encapsulation limit option in the outer 883 * header. 884 */ 885 if (iptun->iptun_hoplimit == IPTUN_DEFAULT_HOPLIMIT && 886 iptun->iptun_encaplimit == 0) { 887 iptun->iptun_header_size = 0; 888 break; 889 } 890 891 (void) memset(ip6hp, 0, sizeof (*ip6hp)); 892 if (iptun->iptun_encaplimit == 0) { 893 iptun->iptun_header_size = sizeof (ip6_t); 894 ip6hp->ip6_nxt = IPPROTO_NONE; 895 } else { 896 iptun_encaplim_t *iel; 897 898 iptun->iptun_header_size = sizeof (iptun_ipv6hdrs_t); 899 /* 900 * The mac_ipv6 plugin requires ip6_plen to be in host 901 * byte order and reflect the extension headers 902 * present in the template. The actual network byte 903 * order ip6_plen will be set on a per-packet basis on 904 * transmit. 905 */ 906 ip6hp->ip6_plen = sizeof (*iel); 907 ip6hp->ip6_nxt = IPPROTO_DSTOPTS; 908 iel = &iptun->iptun_header6.it6h_encaplim; 909 *iel = iptun_encaplim_init; 910 iel->iel_telopt.ip6ot_encap_limit = 911 iptun->iptun_encaplimit; 912 } 913 914 ip6hp->ip6_hlim = iptun->iptun_hoplimit; 915 break; 916 } 917 } 918 919 if (update_mac) 920 iptun_task_dispatch(iptun, IPTUN_TASK_PDATA_UPDATE); 921 } 922 923 /* 924 * Insert inbound and outbound IPv4 and IPv6 policy into the given policy 925 * head. 926 */ 927 static boolean_t 928 iptun_insert_simple_policies(ipsec_policy_head_t *ph, ipsec_act_t *actp, 929 uint_t n, netstack_t *ns) 930 { 931 int f = IPSEC_AF_V4; 932 933 if (!ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_INBOUND, ns) || 934 !ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_OUTBOUND, ns)) 935 return (B_FALSE); 936 937 f = IPSEC_AF_V6; 938 return (ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_INBOUND, ns) && 939 ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_OUTBOUND, ns)); 940 } 941 942 /* 943 * Used to set IPsec policy when policy is set through the IPTUN_CREATE or 944 * IPTUN_MODIFY ioctls. 945 */ 946 static int 947 iptun_set_sec_simple(iptun_t *iptun, const ipsec_req_t *ipsr) 948 { 949 int rc = 0; 950 uint_t nact; 951 ipsec_act_t *actp = NULL; 952 boolean_t clear_all, old_policy = B_FALSE; 953 ipsec_tun_pol_t *itp; 954 char name[MAXLINKNAMELEN]; 955 uint64_t gen; 956 netstack_t *ns = iptun->iptun_ns; 957 958 /* Can't specify self-encap on a tunnel. */ 959 if (ipsr->ipsr_self_encap_req != 0) 960 return (EINVAL); 961 962 /* 963 * If it's a "clear-all" entry, unset the security flags and resume 964 * normal cleartext (or inherit-from-global) policy. 965 */ 966 clear_all = ((ipsr->ipsr_ah_req & IPTUN_IPSEC_REQ_MASK) == 0 && 967 (ipsr->ipsr_esp_req & IPTUN_IPSEC_REQ_MASK) == 0); 968 969 ASSERT(mutex_owned(&iptun->iptun_lock)); 970 itp = iptun->iptun_itp; 971 if (itp == NULL) { 972 if (clear_all) 973 goto bail; 974 if ((rc = dls_mgmt_get_linkinfo(iptun->iptun_linkid, name, NULL, 975 NULL, NULL)) != 0) 976 goto bail; 977 ASSERT(name[0] != '\0'); 978 if ((itp = create_tunnel_policy(name, &rc, &gen, ns)) == NULL) 979 goto bail; 980 iptun->iptun_itp = itp; 981 } 982 983 /* Allocate the actvec now, before holding itp or polhead locks. */ 984 ipsec_actvec_from_req(ipsr, &actp, &nact, ns); 985 if (actp == NULL) { 986 rc = ENOMEM; 987 goto bail; 988 } 989 990 /* 991 * Just write on the active polhead. Save the primary/secondary stuff 992 * for spdsock operations. 993 * 994 * Mutex because we need to write to the polhead AND flags atomically. 995 * Other threads will acquire the polhead lock as a reader if the 996 * (unprotected) flag is set. 997 */ 998 mutex_enter(&itp->itp_lock); 999 if (itp->itp_flags & ITPF_P_TUNNEL) { 1000 /* Oops, we lost a race. Let's get out of here. */ 1001 rc = EBUSY; 1002 goto mutex_bail; 1003 } 1004 old_policy = ((itp->itp_flags & ITPF_P_ACTIVE) != 0); 1005 1006 if (old_policy) { 1007 ITPF_CLONE(itp->itp_flags); 1008 rc = ipsec_copy_polhead(itp->itp_policy, itp->itp_inactive, ns); 1009 if (rc != 0) { 1010 /* inactive has already been cleared. */ 1011 itp->itp_flags &= ~ITPF_IFLAGS; 1012 goto mutex_bail; 1013 } 1014 rw_enter(&itp->itp_policy->iph_lock, RW_WRITER); 1015 ipsec_polhead_flush(itp->itp_policy, ns); 1016 } else { 1017 /* Else assume itp->itp_policy is already flushed. */ 1018 rw_enter(&itp->itp_policy->iph_lock, RW_WRITER); 1019 } 1020 1021 if (clear_all) { 1022 ASSERT(avl_numnodes(&itp->itp_policy->iph_rulebyid) == 0); 1023 itp->itp_flags &= ~ITPF_PFLAGS; 1024 rw_exit(&itp->itp_policy->iph_lock); 1025 old_policy = B_FALSE; /* Clear out the inactive one too. */ 1026 goto recover_bail; 1027 } 1028 1029 if (iptun_insert_simple_policies(itp->itp_policy, actp, nact, ns)) { 1030 rw_exit(&itp->itp_policy->iph_lock); 1031 /* 1032 * Adjust MTU and make sure the DL side knows what's up. 1033 */ 1034 itp->itp_flags = ITPF_P_ACTIVE; 1035 (void) iptun_update_mtu(iptun, NULL, 0); 1036 old_policy = B_FALSE; /* Blank out inactive - we succeeded */ 1037 } else { 1038 rw_exit(&itp->itp_policy->iph_lock); 1039 rc = ENOMEM; 1040 } 1041 1042 recover_bail: 1043 if (old_policy) { 1044 /* Recover policy in in active polhead. */ 1045 ipsec_swap_policy(itp->itp_policy, itp->itp_inactive, ns); 1046 ITPF_SWAP(itp->itp_flags); 1047 } 1048 1049 /* Clear policy in inactive polhead. */ 1050 itp->itp_flags &= ~ITPF_IFLAGS; 1051 rw_enter(&itp->itp_inactive->iph_lock, RW_WRITER); 1052 ipsec_polhead_flush(itp->itp_inactive, ns); 1053 rw_exit(&itp->itp_inactive->iph_lock); 1054 1055 mutex_bail: 1056 mutex_exit(&itp->itp_lock); 1057 1058 bail: 1059 if (actp != NULL) 1060 ipsec_actvec_free(actp, nact); 1061 1062 return (rc); 1063 } 1064 1065 static iptun_typeinfo_t * 1066 iptun_gettypeinfo(iptun_type_t type) 1067 { 1068 int i; 1069 1070 for (i = 0; iptun_type_table[i].iti_type != IPTUN_TYPE_UNKNOWN; i++) { 1071 if (iptun_type_table[i].iti_type == type) 1072 break; 1073 } 1074 return (&iptun_type_table[i]); 1075 } 1076 1077 /* 1078 * Set the parameters included in ik on the tunnel iptun. Parameters that can 1079 * only be set at creation time are set in iptun_create(). 1080 */ 1081 static int 1082 iptun_setparams(iptun_t *iptun, const iptun_kparams_t *ik) 1083 { 1084 int err = 0; 1085 netstack_t *ns = iptun->iptun_ns; 1086 iptun_addr_t orig_laddr, orig_raddr; 1087 uint_t orig_flags = iptun->iptun_flags; 1088 1089 if (ik->iptun_kparam_flags & IPTUN_KPARAM_LADDR) { 1090 if (orig_flags & IPTUN_LADDR) 1091 orig_laddr = iptun->iptun_laddr; 1092 if ((err = iptun_setladdr(iptun, &ik->iptun_kparam_laddr)) != 0) 1093 return (err); 1094 iptun->iptun_flags |= IPTUN_LADDR; 1095 } 1096 1097 if (ik->iptun_kparam_flags & IPTUN_KPARAM_RADDR) { 1098 if (orig_flags & IPTUN_RADDR) 1099 orig_raddr = iptun->iptun_raddr; 1100 if ((err = iptun_setraddr(iptun, &ik->iptun_kparam_raddr)) != 0) 1101 goto done; 1102 iptun->iptun_flags |= IPTUN_RADDR; 1103 } 1104 1105 if (ik->iptun_kparam_flags & IPTUN_KPARAM_SECINFO) { 1106 /* 1107 * Set IPsec policy originating from the ifconfig(1M) command 1108 * line. This is traditionally called "simple" policy because 1109 * the ipsec_req_t (iptun_kparam_secinfo) can only describe a 1110 * simple policy of "do ESP on everything" and/or "do AH on 1111 * everything" (as opposed to the rich policy that can be 1112 * defined with ipsecconf(1M)). 1113 */ 1114 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_6TO4) { 1115 /* 1116 * Can't set security properties for automatic 1117 * tunnels. 1118 */ 1119 err = EINVAL; 1120 goto done; 1121 } 1122 1123 if (!ipsec_loaded(ns->netstack_ipsec)) { 1124 /* If IPsec can be loaded, try and load it now. */ 1125 if (ipsec_failed(ns->netstack_ipsec)) { 1126 err = EPROTONOSUPPORT; 1127 goto done; 1128 } 1129 ipsec_loader_loadnow(ns->netstack_ipsec); 1130 /* 1131 * ipsec_loader_loadnow() returns while IPsec is 1132 * loaded asynchronously. While a method exists to 1133 * wait for IPsec to load (ipsec_loader_wait()), it 1134 * requires use of a STREAMS queue to do a qwait(). 1135 * We're not in STREAMS context here, and so we can't 1136 * use it. This is not a problem in practice because 1137 * in the vast majority of cases, key management and 1138 * global policy will have loaded before any tunnels 1139 * are plumbed, and so IPsec will already have been 1140 * loaded. 1141 */ 1142 err = EAGAIN; 1143 goto done; 1144 } 1145 1146 err = iptun_set_sec_simple(iptun, &ik->iptun_kparam_secinfo); 1147 if (err == 0) { 1148 iptun->iptun_flags |= IPTUN_SIMPLE_POLICY; 1149 iptun->iptun_simple_policy = ik->iptun_kparam_secinfo; 1150 } 1151 } 1152 done: 1153 if (err != 0) { 1154 /* Restore original source and destination. */ 1155 if (ik->iptun_kparam_flags & IPTUN_KPARAM_LADDR && 1156 (orig_flags & IPTUN_LADDR)) 1157 iptun->iptun_laddr = orig_laddr; 1158 if ((ik->iptun_kparam_flags & IPTUN_KPARAM_RADDR) && 1159 (orig_flags & IPTUN_RADDR)) 1160 iptun->iptun_raddr = orig_raddr; 1161 iptun->iptun_flags = orig_flags; 1162 } 1163 return (err); 1164 } 1165 1166 static int 1167 iptun_register(iptun_t *iptun) 1168 { 1169 mac_register_t *mac; 1170 int err; 1171 1172 ASSERT(!(iptun->iptun_flags & IPTUN_MAC_REGISTERED)); 1173 1174 if ((mac = mac_alloc(MAC_VERSION)) == NULL) 1175 return (EINVAL); 1176 1177 mac->m_type_ident = iptun->iptun_typeinfo->iti_ident; 1178 mac->m_driver = iptun; 1179 mac->m_dip = iptun_dip; 1180 mac->m_instance = (uint_t)-1; 1181 mac->m_src_addr = (uint8_t *)&iptun->iptun_laddr.ia_addr; 1182 mac->m_dst_addr = iptun->iptun_typeinfo->iti_hasraddr ? 1183 (uint8_t *)&iptun->iptun_raddr.ia_addr : NULL; 1184 mac->m_callbacks = &iptun_m_callbacks; 1185 mac->m_min_sdu = iptun->iptun_typeinfo->iti_minmtu; 1186 mac->m_max_sdu = iptun->iptun_mtu; 1187 if (iptun->iptun_header_size != 0) { 1188 mac->m_pdata = &iptun->iptun_header; 1189 mac->m_pdata_size = iptun->iptun_header_size; 1190 } 1191 if ((err = mac_register(mac, &iptun->iptun_mh)) == 0) 1192 iptun->iptun_flags |= IPTUN_MAC_REGISTERED; 1193 mac_free(mac); 1194 return (err); 1195 } 1196 1197 static int 1198 iptun_unregister(iptun_t *iptun) 1199 { 1200 int err; 1201 1202 ASSERT(iptun->iptun_flags & IPTUN_MAC_REGISTERED); 1203 if ((err = mac_unregister(iptun->iptun_mh)) == 0) 1204 iptun->iptun_flags &= ~IPTUN_MAC_REGISTERED; 1205 return (err); 1206 } 1207 1208 static conn_t * 1209 iptun_conn_create(iptun_t *iptun, netstack_t *ns, cred_t *credp) 1210 { 1211 conn_t *connp; 1212 1213 if ((connp = ipcl_conn_create(IPCL_IPCCONN, KM_NOSLEEP, ns)) == NULL) 1214 return (NULL); 1215 1216 connp->conn_flags |= IPCL_IPTUN; 1217 connp->conn_iptun = iptun; 1218 connp->conn_recv = iptun_input; 1219 connp->conn_recvicmp = iptun_input_icmp; 1220 connp->conn_verifyicmp = iptun_verifyicmp; 1221 1222 /* 1223 * Register iptun_notify to listen to capability changes detected by IP. 1224 * This upcall is made in the context of the call to conn_ip_output. 1225 */ 1226 connp->conn_ixa->ixa_notify = iptun_notify; 1227 connp->conn_ixa->ixa_notify_cookie = iptun; 1228 1229 /* 1230 * For exclusive stacks we set conn_zoneid to GLOBAL_ZONEID as is done 1231 * for all other conn_t's. 1232 * 1233 * Note that there's an important distinction between iptun_zoneid and 1234 * conn_zoneid. The conn_zoneid is set to GLOBAL_ZONEID in non-global 1235 * exclusive stack zones to make the ip module believe that the 1236 * non-global zone is actually a global zone. Therefore, when 1237 * interacting with the ip module, we must always use conn_zoneid. 1238 */ 1239 connp->conn_zoneid = (ns->netstack_stackid == GLOBAL_NETSTACKID) ? 1240 crgetzoneid(credp) : GLOBAL_ZONEID; 1241 connp->conn_cred = credp; 1242 /* crfree() is done in ipcl_conn_destroy(), called by CONN_DEC_REF() */ 1243 crhold(connp->conn_cred); 1244 connp->conn_cpid = NOPID; 1245 1246 /* conn_allzones can not be set this early, hence no IPCL_ZONEID */ 1247 connp->conn_ixa->ixa_zoneid = connp->conn_zoneid; 1248 ASSERT(connp->conn_ref == 1); 1249 1250 /* Cache things in ixa without an extra refhold */ 1251 ASSERT(!(connp->conn_ixa->ixa_free_flags & IXA_FREE_CRED)); 1252 connp->conn_ixa->ixa_cred = connp->conn_cred; 1253 connp->conn_ixa->ixa_cpid = connp->conn_cpid; 1254 if (is_system_labeled()) 1255 connp->conn_ixa->ixa_tsl = crgetlabel(connp->conn_cred); 1256 1257 /* 1258 * Have conn_ip_output drop packets should our outer source 1259 * go invalid 1260 */ 1261 connp->conn_ixa->ixa_flags |= IXAF_VERIFY_SOURCE; 1262 1263 switch (iptun->iptun_typeinfo->iti_ipvers) { 1264 case IPV4_VERSION: 1265 connp->conn_family = AF_INET6; 1266 break; 1267 case IPV6_VERSION: 1268 connp->conn_family = AF_INET; 1269 break; 1270 } 1271 mutex_enter(&connp->conn_lock); 1272 connp->conn_state_flags &= ~CONN_INCIPIENT; 1273 mutex_exit(&connp->conn_lock); 1274 return (connp); 1275 } 1276 1277 static void 1278 iptun_conn_destroy(conn_t *connp) 1279 { 1280 ip_quiesce_conn(connp); 1281 connp->conn_iptun = NULL; 1282 ASSERT(connp->conn_ref == 1); 1283 CONN_DEC_REF(connp); 1284 } 1285 1286 static iptun_t * 1287 iptun_alloc(void) 1288 { 1289 iptun_t *iptun; 1290 1291 if ((iptun = kmem_cache_alloc(iptun_cache, KM_NOSLEEP)) != NULL) { 1292 bzero(iptun, sizeof (*iptun)); 1293 atomic_inc_32(&iptun_tunnelcount); 1294 } 1295 return (iptun); 1296 } 1297 1298 static void 1299 iptun_free(iptun_t *iptun) 1300 { 1301 ASSERT(iptun->iptun_flags & IPTUN_CONDEMNED); 1302 1303 if (iptun->iptun_flags & IPTUN_HASH_INSERTED) { 1304 iptun_stack_t *iptuns = iptun->iptun_iptuns; 1305 1306 mutex_enter(&iptun_hash_lock); 1307 VERIFY(mod_hash_remove(iptun_hash, 1308 IPTUN_HASH_KEY(iptun->iptun_linkid), 1309 (mod_hash_val_t *)&iptun) == 0); 1310 mutex_exit(&iptun_hash_lock); 1311 iptun->iptun_flags &= ~IPTUN_HASH_INSERTED; 1312 mutex_enter(&iptuns->iptuns_lock); 1313 list_remove(&iptuns->iptuns_iptunlist, iptun); 1314 mutex_exit(&iptuns->iptuns_lock); 1315 } 1316 1317 if (iptun->iptun_flags & IPTUN_BOUND) 1318 iptun_unbind(iptun); 1319 1320 /* 1321 * After iptun_unregister(), there will be no threads executing a 1322 * downcall from the mac module, including in the tx datapath. 1323 */ 1324 if (iptun->iptun_flags & IPTUN_MAC_REGISTERED) 1325 VERIFY(iptun_unregister(iptun) == 0); 1326 1327 if (iptun->iptun_itp != NULL) { 1328 /* 1329 * Remove from the AVL tree, AND release the reference iptun_t 1330 * itself holds on the ITP. 1331 */ 1332 itp_unlink(iptun->iptun_itp, iptun->iptun_ns); 1333 ITP_REFRELE(iptun->iptun_itp, iptun->iptun_ns); 1334 iptun->iptun_itp = NULL; 1335 iptun->iptun_flags &= ~IPTUN_SIMPLE_POLICY; 1336 } 1337 1338 /* 1339 * After ipcl_conn_destroy(), there will be no threads executing an 1340 * upcall from ip (i.e., iptun_input()), and it is then safe to free 1341 * the iptun_t. 1342 */ 1343 if (iptun->iptun_connp != NULL) { 1344 iptun_conn_destroy(iptun->iptun_connp); 1345 iptun->iptun_connp = NULL; 1346 } 1347 1348 kmem_cache_free(iptun_cache, iptun); 1349 atomic_dec_32(&iptun_tunnelcount); 1350 } 1351 1352 int 1353 iptun_create(iptun_kparams_t *ik, cred_t *credp) 1354 { 1355 iptun_t *iptun = NULL; 1356 int err = 0, mherr; 1357 char linkname[MAXLINKNAMELEN]; 1358 ipsec_tun_pol_t *itp; 1359 netstack_t *ns = NULL; 1360 iptun_stack_t *iptuns; 1361 datalink_id_t tmpid; 1362 zoneid_t zoneid = crgetzoneid(credp); 1363 boolean_t link_created = B_FALSE; 1364 1365 /* The tunnel type is mandatory */ 1366 if (!(ik->iptun_kparam_flags & IPTUN_KPARAM_TYPE)) 1367 return (EINVAL); 1368 1369 /* 1370 * Is the linkid that the caller wishes to associate with this new 1371 * tunnel assigned to this zone? 1372 */ 1373 if (zone_check_datalink(&zoneid, ik->iptun_kparam_linkid) != 0) { 1374 if (zoneid != GLOBAL_ZONEID) 1375 return (EINVAL); 1376 } else if (zoneid == GLOBAL_ZONEID) { 1377 return (EINVAL); 1378 } 1379 1380 /* 1381 * Make sure that we're not trying to create a tunnel that has already 1382 * been created. 1383 */ 1384 if (iptun_enter_by_linkid(ik->iptun_kparam_linkid, &iptun) == 0) { 1385 iptun_exit(iptun); 1386 iptun = NULL; 1387 err = EEXIST; 1388 goto done; 1389 } 1390 1391 ns = netstack_find_by_cred(credp); 1392 iptuns = ns->netstack_iptun; 1393 1394 if ((iptun = iptun_alloc()) == NULL) { 1395 err = ENOMEM; 1396 goto done; 1397 } 1398 1399 iptun->iptun_linkid = ik->iptun_kparam_linkid; 1400 iptun->iptun_zoneid = zoneid; 1401 iptun->iptun_ns = ns; 1402 1403 iptun->iptun_typeinfo = iptun_gettypeinfo(ik->iptun_kparam_type); 1404 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_UNKNOWN) { 1405 err = EINVAL; 1406 goto done; 1407 } 1408 1409 if (ik->iptun_kparam_flags & IPTUN_KPARAM_IMPLICIT) 1410 iptun->iptun_flags |= IPTUN_IMPLICIT; 1411 1412 if ((err = iptun_setparams(iptun, ik)) != 0) 1413 goto done; 1414 1415 iptun->iptun_hoplimit = IPTUN_DEFAULT_HOPLIMIT; 1416 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_IPV6) 1417 iptun->iptun_encaplimit = IPTUN_DEFAULT_ENCAPLIMIT; 1418 1419 iptun_headergen(iptun, B_FALSE); 1420 1421 iptun->iptun_connp = iptun_conn_create(iptun, ns, credp); 1422 if (iptun->iptun_connp == NULL) { 1423 err = ENOMEM; 1424 goto done; 1425 } 1426 1427 iptun->iptun_mtu = iptun->iptun_typeinfo->iti_maxmtu; 1428 iptun->iptun_dpmtu = iptun->iptun_mtu; 1429 1430 /* 1431 * Find an ITP based on linkname. If we have parms already set via 1432 * the iptun_setparams() call above, it may have created an ITP for 1433 * us. We always try get_tunnel_policy() for DEBUG correctness 1434 * checks, and we may wish to refactor this to only check when 1435 * iptun_itp is NULL. 1436 */ 1437 if ((err = dls_mgmt_get_linkinfo(iptun->iptun_linkid, linkname, NULL, 1438 NULL, NULL)) != 0) 1439 goto done; 1440 if ((itp = get_tunnel_policy(linkname, ns)) != NULL) 1441 iptun->iptun_itp = itp; 1442 1443 /* 1444 * See if we have the necessary IP addresses assigned to this tunnel 1445 * to try and bind them with ip underneath us. If we're not ready to 1446 * bind yet, then we'll defer the bind operation until the addresses 1447 * are modified. 1448 */ 1449 if (iptun_canbind(iptun) && ((err = iptun_bind(iptun)) != 0)) 1450 goto done; 1451 1452 if ((err = iptun_register(iptun)) != 0) 1453 goto done; 1454 1455 err = dls_devnet_create(iptun->iptun_mh, iptun->iptun_linkid, 1456 iptun->iptun_zoneid); 1457 if (err != 0) 1458 goto done; 1459 link_created = B_TRUE; 1460 1461 /* 1462 * We hash by link-id as that is the key used by all other iptun 1463 * interfaces (modify, delete, etc.). 1464 */ 1465 if ((mherr = mod_hash_insert(iptun_hash, 1466 IPTUN_HASH_KEY(iptun->iptun_linkid), (mod_hash_val_t)iptun)) == 0) { 1467 mutex_enter(&iptuns->iptuns_lock); 1468 list_insert_head(&iptuns->iptuns_iptunlist, iptun); 1469 mutex_exit(&iptuns->iptuns_lock); 1470 iptun->iptun_flags |= IPTUN_HASH_INSERTED; 1471 } else if (mherr == MH_ERR_NOMEM) { 1472 err = ENOMEM; 1473 } else if (mherr == MH_ERR_DUPLICATE) { 1474 err = EEXIST; 1475 } else { 1476 err = EINVAL; 1477 } 1478 1479 done: 1480 if (iptun == NULL && ns != NULL) 1481 netstack_rele(ns); 1482 if (err != 0 && iptun != NULL) { 1483 if (link_created) { 1484 (void) dls_devnet_destroy(iptun->iptun_mh, &tmpid, 1485 B_TRUE); 1486 } 1487 iptun->iptun_flags |= IPTUN_CONDEMNED; 1488 iptun_free(iptun); 1489 } 1490 return (err); 1491 } 1492 1493 int 1494 iptun_delete(datalink_id_t linkid, cred_t *credp) 1495 { 1496 int err; 1497 iptun_t *iptun = NULL; 1498 1499 if ((err = iptun_enter_by_linkid(linkid, &iptun)) != 0) 1500 return (err); 1501 1502 /* One cannot delete a tunnel that belongs to another zone. */ 1503 if (iptun->iptun_zoneid != crgetzoneid(credp)) { 1504 iptun_exit(iptun); 1505 return (EACCES); 1506 } 1507 1508 /* 1509 * We need to exit iptun in order to issue calls up the stack such as 1510 * dls_devnet_destroy(). If we call up while still in iptun, deadlock 1511 * with calls coming down the stack is possible. We prevent other 1512 * threads from entering this iptun after we've exited it by setting 1513 * the IPTUN_DELETE_PENDING flag. This will cause callers of 1514 * iptun_enter() to block waiting on iptun_enter_cv. The assumption 1515 * here is that the functions we're calling while IPTUN_DELETE_PENDING 1516 * is set dont resuult in an iptun_enter() call, as that would result 1517 * in deadlock. 1518 */ 1519 iptun->iptun_flags |= IPTUN_DELETE_PENDING; 1520 1521 /* Wait for any pending upcall to the mac module to complete. */ 1522 while (iptun->iptun_flags & IPTUN_UPCALL_PENDING) 1523 cv_wait(&iptun->iptun_upcall_cv, &iptun->iptun_lock); 1524 1525 iptun_exit(iptun); 1526 1527 if ((err = dls_devnet_destroy(iptun->iptun_mh, &linkid, B_TRUE)) == 0) { 1528 /* 1529 * mac_disable() will fail with EBUSY if there are references 1530 * to the iptun MAC. If there are none, then mac_disable() 1531 * will assure that none can be acquired until the MAC is 1532 * unregistered. 1533 * 1534 * XXX CR 6791335 prevents us from calling mac_disable() prior 1535 * to dls_devnet_destroy(), so we unfortunately need to 1536 * attempt to re-create the devnet node if mac_disable() 1537 * fails. 1538 */ 1539 if ((err = mac_disable(iptun->iptun_mh)) != 0) { 1540 (void) dls_devnet_create(iptun->iptun_mh, linkid, 1541 iptun->iptun_zoneid); 1542 } 1543 } 1544 1545 /* 1546 * Now that we know the fate of this iptun_t, we need to clear 1547 * IPTUN_DELETE_PENDING, and set IPTUN_CONDEMNED if the iptun_t is 1548 * slated to be freed. Either way, we need to signal the threads 1549 * waiting in iptun_enter() so that they can either fail if 1550 * IPTUN_CONDEMNED is set, or continue if it's not. 1551 */ 1552 mutex_enter(&iptun->iptun_lock); 1553 iptun->iptun_flags &= ~IPTUN_DELETE_PENDING; 1554 if (err == 0) 1555 iptun->iptun_flags |= IPTUN_CONDEMNED; 1556 cv_broadcast(&iptun->iptun_enter_cv); 1557 mutex_exit(&iptun->iptun_lock); 1558 1559 /* 1560 * Note that there is no danger in calling iptun_free() after having 1561 * dropped the iptun_lock since callers of iptun_enter() at this point 1562 * are doing so from iptun_enter_by_linkid() (mac_disable() got rid of 1563 * threads entering from mac callbacks which call iptun_enter() 1564 * directly) which holds iptun_hash_lock, and iptun_free() grabs this 1565 * lock in order to remove the iptun_t from the hash table. 1566 */ 1567 if (err == 0) 1568 iptun_free(iptun); 1569 1570 return (err); 1571 } 1572 1573 int 1574 iptun_modify(const iptun_kparams_t *ik, cred_t *credp) 1575 { 1576 iptun_t *iptun; 1577 boolean_t laddr_change = B_FALSE, raddr_change = B_FALSE; 1578 int err; 1579 1580 if ((err = iptun_enter_by_linkid(ik->iptun_kparam_linkid, &iptun)) != 0) 1581 return (err); 1582 1583 /* One cannot modify a tunnel that belongs to another zone. */ 1584 if (iptun->iptun_zoneid != crgetzoneid(credp)) { 1585 err = EACCES; 1586 goto done; 1587 } 1588 1589 /* The tunnel type cannot be changed */ 1590 if (ik->iptun_kparam_flags & IPTUN_KPARAM_TYPE) { 1591 err = EINVAL; 1592 goto done; 1593 } 1594 1595 if ((err = iptun_setparams(iptun, ik)) != 0) 1596 goto done; 1597 iptun_headergen(iptun, B_FALSE); 1598 1599 /* 1600 * If any of the tunnel's addresses has been modified and the tunnel 1601 * has the necessary addresses assigned to it, we need to try to bind 1602 * with ip underneath us. If we're not ready to bind yet, then we'll 1603 * try again when the addresses are modified later. 1604 */ 1605 laddr_change = (ik->iptun_kparam_flags & IPTUN_KPARAM_LADDR); 1606 raddr_change = (ik->iptun_kparam_flags & IPTUN_KPARAM_RADDR); 1607 if (laddr_change || raddr_change) { 1608 if (iptun->iptun_flags & IPTUN_BOUND) 1609 iptun_unbind(iptun); 1610 if (iptun_canbind(iptun) && (err = iptun_bind(iptun)) != 0) { 1611 if (laddr_change) 1612 iptun->iptun_flags &= ~IPTUN_LADDR; 1613 if (raddr_change) 1614 iptun->iptun_flags &= ~IPTUN_RADDR; 1615 goto done; 1616 } 1617 } 1618 1619 if (laddr_change) 1620 iptun_task_dispatch(iptun, IPTUN_TASK_LADDR_UPDATE); 1621 if (raddr_change) 1622 iptun_task_dispatch(iptun, IPTUN_TASK_RADDR_UPDATE); 1623 1624 done: 1625 iptun_exit(iptun); 1626 return (err); 1627 } 1628 1629 /* Given an IP tunnel's datalink id, fill in its parameters. */ 1630 int 1631 iptun_info(iptun_kparams_t *ik, cred_t *credp) 1632 { 1633 iptun_t *iptun; 1634 int err; 1635 1636 /* Is the tunnel link visible from the caller's zone? */ 1637 if (!dls_devnet_islinkvisible(ik->iptun_kparam_linkid, 1638 crgetzoneid(credp))) 1639 return (ENOENT); 1640 1641 if ((err = iptun_enter_by_linkid(ik->iptun_kparam_linkid, &iptun)) != 0) 1642 return (err); 1643 1644 bzero(ik, sizeof (iptun_kparams_t)); 1645 1646 ik->iptun_kparam_linkid = iptun->iptun_linkid; 1647 ik->iptun_kparam_type = iptun->iptun_typeinfo->iti_type; 1648 ik->iptun_kparam_flags |= IPTUN_KPARAM_TYPE; 1649 1650 if (iptun->iptun_flags & IPTUN_LADDR) { 1651 iptun_getaddr(&iptun->iptun_laddr, &ik->iptun_kparam_laddr); 1652 ik->iptun_kparam_flags |= IPTUN_KPARAM_LADDR; 1653 } 1654 if (iptun->iptun_flags & IPTUN_RADDR) { 1655 iptun_getaddr(&iptun->iptun_raddr, &ik->iptun_kparam_raddr); 1656 ik->iptun_kparam_flags |= IPTUN_KPARAM_RADDR; 1657 } 1658 1659 if (iptun->iptun_flags & IPTUN_IMPLICIT) 1660 ik->iptun_kparam_flags |= IPTUN_KPARAM_IMPLICIT; 1661 1662 if (iptun->iptun_itp != NULL) { 1663 mutex_enter(&iptun->iptun_itp->itp_lock); 1664 if (iptun->iptun_itp->itp_flags & ITPF_P_ACTIVE) { 1665 ik->iptun_kparam_flags |= IPTUN_KPARAM_IPSECPOL; 1666 if (iptun->iptun_flags & IPTUN_SIMPLE_POLICY) { 1667 ik->iptun_kparam_flags |= IPTUN_KPARAM_SECINFO; 1668 ik->iptun_kparam_secinfo = 1669 iptun->iptun_simple_policy; 1670 } 1671 } 1672 mutex_exit(&iptun->iptun_itp->itp_lock); 1673 } 1674 1675 done: 1676 iptun_exit(iptun); 1677 return (err); 1678 } 1679 1680 int 1681 iptun_set_6to4relay(netstack_t *ns, ipaddr_t relay_addr) 1682 { 1683 if (relay_addr == INADDR_BROADCAST || CLASSD(relay_addr)) 1684 return (EADDRNOTAVAIL); 1685 ns->netstack_iptun->iptuns_relay_rtr_addr = relay_addr; 1686 return (0); 1687 } 1688 1689 void 1690 iptun_get_6to4relay(netstack_t *ns, ipaddr_t *relay_addr) 1691 { 1692 *relay_addr = ns->netstack_iptun->iptuns_relay_rtr_addr; 1693 } 1694 1695 void 1696 iptun_set_policy(datalink_id_t linkid, ipsec_tun_pol_t *itp) 1697 { 1698 iptun_t *iptun; 1699 1700 if (iptun_enter_by_linkid(linkid, &iptun) != 0) 1701 return; 1702 if (iptun->iptun_itp != itp) { 1703 ASSERT(iptun->iptun_itp == NULL); 1704 ITP_REFHOLD(itp); 1705 iptun->iptun_itp = itp; 1706 } 1707 /* 1708 * IPsec policy means IPsec overhead, which means lower MTU. 1709 * Refresh the MTU for this tunnel. 1710 */ 1711 (void) iptun_update_mtu(iptun, NULL, 0); 1712 iptun_exit(iptun); 1713 } 1714 1715 /* 1716 * Obtain the path MTU to the tunnel destination. 1717 * Can return zero in some cases. 1718 */ 1719 static uint32_t 1720 iptun_get_dst_pmtu(iptun_t *iptun, ip_xmit_attr_t *ixa) 1721 { 1722 uint32_t pmtu = 0; 1723 conn_t *connp = iptun->iptun_connp; 1724 boolean_t need_rele = B_FALSE; 1725 1726 /* 1727 * We only obtain the pmtu for tunnels that have a remote tunnel 1728 * address. 1729 */ 1730 if (!(iptun->iptun_flags & IPTUN_RADDR)) 1731 return (0); 1732 1733 if (ixa == NULL) { 1734 ixa = conn_get_ixa(connp, B_FALSE); 1735 if (ixa == NULL) 1736 return (0); 1737 need_rele = B_TRUE; 1738 } 1739 /* 1740 * Guard against ICMP errors before we have sent, as well as against 1741 * and a thread which held conn_ixa. 1742 */ 1743 if (ixa->ixa_ire != NULL) { 1744 pmtu = ip_get_pmtu(ixa); 1745 1746 /* 1747 * For both IPv4 and IPv6 we can have indication that the outer 1748 * header needs fragmentation. 1749 */ 1750 if (ixa->ixa_flags & IXAF_PMTU_TOO_SMALL) { 1751 /* Must allow fragmentation in ip_output */ 1752 ixa->ixa_flags &= ~IXAF_DONTFRAG; 1753 } else if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_6TO4) { 1754 ixa->ixa_flags |= IXAF_DONTFRAG; 1755 } else { 1756 /* ip_get_pmtu might have set this - we don't want it */ 1757 ixa->ixa_flags &= ~IXAF_PMTU_IPV4_DF; 1758 } 1759 } 1760 1761 if (need_rele) 1762 ixa_refrele(ixa); 1763 return (pmtu); 1764 } 1765 1766 /* 1767 * Update the ip_xmit_attr_t to capture the current lower path mtu as known 1768 * by ip. 1769 */ 1770 static void 1771 iptun_update_dst_pmtu(iptun_t *iptun, ip_xmit_attr_t *ixa) 1772 { 1773 uint32_t pmtu; 1774 conn_t *connp = iptun->iptun_connp; 1775 boolean_t need_rele = B_FALSE; 1776 1777 /* IXAF_VERIFY_PMTU is not set if we don't have a fixed destination */ 1778 if (!(iptun->iptun_flags & IPTUN_RADDR)) 1779 return; 1780 1781 if (ixa == NULL) { 1782 ixa = conn_get_ixa(connp, B_FALSE); 1783 if (ixa == NULL) 1784 return; 1785 need_rele = B_TRUE; 1786 } 1787 /* 1788 * Guard against ICMP errors before we have sent, as well as against 1789 * and a thread which held conn_ixa. 1790 */ 1791 if (ixa->ixa_ire != NULL) { 1792 pmtu = ip_get_pmtu(ixa); 1793 /* 1794 * Update ixa_fragsize and ixa_pmtu. 1795 */ 1796 ixa->ixa_fragsize = ixa->ixa_pmtu = pmtu; 1797 1798 /* 1799 * For both IPv4 and IPv6 we can have indication that the outer 1800 * header needs fragmentation. 1801 */ 1802 if (ixa->ixa_flags & IXAF_PMTU_TOO_SMALL) { 1803 /* Must allow fragmentation in ip_output */ 1804 ixa->ixa_flags &= ~IXAF_DONTFRAG; 1805 } else if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_6TO4) { 1806 ixa->ixa_flags |= IXAF_DONTFRAG; 1807 } else { 1808 /* ip_get_pmtu might have set this - we don't want it */ 1809 ixa->ixa_flags &= ~IXAF_PMTU_IPV4_DF; 1810 } 1811 } 1812 1813 if (need_rele) 1814 ixa_refrele(ixa); 1815 } 1816 1817 /* 1818 * There is nothing that iptun can verify in addition to IP having 1819 * verified the IP addresses in the fanout. 1820 */ 1821 /* ARGSUSED */ 1822 static boolean_t 1823 iptun_verifyicmp(conn_t *connp, void *arg2, icmph_t *icmph, icmp6_t *icmp6, 1824 ip_recv_attr_t *ira) 1825 { 1826 return (B_TRUE); 1827 } 1828 1829 /* 1830 * Notify function registered with ip_xmit_attr_t. 1831 */ 1832 static void 1833 iptun_notify(void *arg, ip_xmit_attr_t *ixa, ixa_notify_type_t ntype, 1834 ixa_notify_arg_t narg) 1835 { 1836 iptun_t *iptun = (iptun_t *)arg; 1837 1838 switch (ntype) { 1839 case IXAN_PMTU: 1840 (void) iptun_update_mtu(iptun, ixa, narg); 1841 break; 1842 } 1843 } 1844 1845 /* 1846 * Returns the max of old_ovhd and the overhead associated with pol. 1847 */ 1848 static uint32_t 1849 iptun_max_policy_overhead(ipsec_policy_t *pol, uint32_t old_ovhd) 1850 { 1851 uint32_t new_ovhd = old_ovhd; 1852 1853 while (pol != NULL) { 1854 new_ovhd = max(new_ovhd, 1855 ipsec_act_ovhd(&pol->ipsp_act->ipa_act)); 1856 pol = pol->ipsp_hash.hash_next; 1857 } 1858 return (new_ovhd); 1859 } 1860 1861 static uint32_t 1862 iptun_get_ipsec_overhead(iptun_t *iptun) 1863 { 1864 ipsec_policy_root_t *ipr; 1865 ipsec_policy_head_t *iph; 1866 ipsec_policy_t *pol; 1867 ipsec_selector_t sel; 1868 int i; 1869 uint32_t ipsec_ovhd = 0; 1870 ipsec_tun_pol_t *itp = iptun->iptun_itp; 1871 netstack_t *ns = iptun->iptun_ns; 1872 1873 if (itp == NULL || !(itp->itp_flags & ITPF_P_ACTIVE)) { 1874 /* 1875 * Consult global policy, just in case. This will only work 1876 * if we have both source and destination addresses to work 1877 * with. 1878 */ 1879 if ((iptun->iptun_flags & (IPTUN_LADDR|IPTUN_RADDR)) != 1880 (IPTUN_LADDR|IPTUN_RADDR)) 1881 return (0); 1882 1883 iph = ipsec_system_policy(ns); 1884 bzero(&sel, sizeof (sel)); 1885 sel.ips_isv4 = 1886 (iptun->iptun_typeinfo->iti_ipvers == IPV4_VERSION); 1887 switch (iptun->iptun_typeinfo->iti_ipvers) { 1888 case IPV4_VERSION: 1889 sel.ips_local_addr_v4 = iptun->iptun_laddr4; 1890 sel.ips_remote_addr_v4 = iptun->iptun_raddr4; 1891 break; 1892 case IPV6_VERSION: 1893 sel.ips_local_addr_v6 = iptun->iptun_laddr6; 1894 sel.ips_remote_addr_v6 = iptun->iptun_raddr6; 1895 break; 1896 } 1897 /* Check for both IPv4 and IPv6. */ 1898 sel.ips_protocol = IPPROTO_ENCAP; 1899 pol = ipsec_find_policy_head(NULL, iph, IPSEC_TYPE_OUTBOUND, 1900 &sel); 1901 if (pol != NULL) { 1902 ipsec_ovhd = ipsec_act_ovhd(&pol->ipsp_act->ipa_act); 1903 IPPOL_REFRELE(pol); 1904 } 1905 sel.ips_protocol = IPPROTO_IPV6; 1906 pol = ipsec_find_policy_head(NULL, iph, IPSEC_TYPE_OUTBOUND, 1907 &sel); 1908 if (pol != NULL) { 1909 ipsec_ovhd = max(ipsec_ovhd, 1910 ipsec_act_ovhd(&pol->ipsp_act->ipa_act)); 1911 IPPOL_REFRELE(pol); 1912 } 1913 IPPH_REFRELE(iph, ns); 1914 } else { 1915 /* 1916 * Look through all of the possible IPsec actions for the 1917 * tunnel, and find the largest potential IPsec overhead. 1918 */ 1919 iph = itp->itp_policy; 1920 rw_enter(&iph->iph_lock, RW_READER); 1921 ipr = &(iph->iph_root[IPSEC_TYPE_OUTBOUND]); 1922 ipsec_ovhd = iptun_max_policy_overhead( 1923 ipr->ipr_nonhash[IPSEC_AF_V4], 0); 1924 ipsec_ovhd = iptun_max_policy_overhead( 1925 ipr->ipr_nonhash[IPSEC_AF_V6], ipsec_ovhd); 1926 for (i = 0; i < ipr->ipr_nchains; i++) { 1927 ipsec_ovhd = iptun_max_policy_overhead( 1928 ipr->ipr_hash[i].hash_head, ipsec_ovhd); 1929 } 1930 rw_exit(&iph->iph_lock); 1931 } 1932 1933 return (ipsec_ovhd); 1934 } 1935 1936 /* 1937 * Calculate and return the maximum possible upper MTU for the given tunnel. 1938 * 1939 * If new_pmtu is set then we also need to update the lower path MTU information 1940 * in the ip_xmit_attr_t. That is needed since we set IXAF_VERIFY_PMTU so that 1941 * we are notified by conn_ip_output() when the path MTU increases. 1942 */ 1943 static uint32_t 1944 iptun_get_maxmtu(iptun_t *iptun, ip_xmit_attr_t *ixa, uint32_t new_pmtu) 1945 { 1946 size_t header_size, ipsec_overhead; 1947 uint32_t maxmtu, pmtu; 1948 1949 /* 1950 * Start with the path-MTU to the remote address, which is either 1951 * provided as the new_pmtu argument, or obtained using 1952 * iptun_get_dst_pmtu(). 1953 */ 1954 if (new_pmtu != 0) { 1955 if (iptun->iptun_flags & IPTUN_RADDR) 1956 iptun->iptun_dpmtu = new_pmtu; 1957 pmtu = new_pmtu; 1958 } else if (iptun->iptun_flags & IPTUN_RADDR) { 1959 if ((pmtu = iptun_get_dst_pmtu(iptun, ixa)) == 0) { 1960 /* 1961 * We weren't able to obtain the path-MTU of the 1962 * destination. Use the previous value. 1963 */ 1964 pmtu = iptun->iptun_dpmtu; 1965 } else { 1966 iptun->iptun_dpmtu = pmtu; 1967 } 1968 } else { 1969 /* 1970 * We have no path-MTU information to go on, use the maximum 1971 * possible value. 1972 */ 1973 pmtu = iptun->iptun_typeinfo->iti_maxmtu; 1974 } 1975 1976 /* 1977 * Now calculate tunneling overhead and subtract that from the 1978 * path-MTU information obtained above. 1979 */ 1980 if (iptun->iptun_header_size != 0) { 1981 header_size = iptun->iptun_header_size; 1982 } else { 1983 switch (iptun->iptun_typeinfo->iti_ipvers) { 1984 case IPV4_VERSION: 1985 header_size = sizeof (ipha_t); 1986 if (is_system_labeled()) 1987 header_size += IP_MAX_OPT_LENGTH; 1988 break; 1989 case IPV6_VERSION: 1990 header_size = sizeof (iptun_ipv6hdrs_t); 1991 break; 1992 } 1993 } 1994 1995 ipsec_overhead = iptun_get_ipsec_overhead(iptun); 1996 1997 maxmtu = pmtu - (header_size + ipsec_overhead); 1998 return (max(maxmtu, iptun->iptun_typeinfo->iti_minmtu)); 1999 } 2000 2001 /* 2002 * Re-calculate the tunnel's MTU as seen from above and notify the MAC layer 2003 * of any change in MTU. The new_pmtu argument is the new lower path MTU to 2004 * the tunnel destination to be used in the tunnel MTU calculation. Passing 2005 * in 0 for new_pmtu causes the lower path MTU to be dynamically updated using 2006 * ip_get_pmtu(). 2007 * 2008 * If the calculated tunnel MTU is different than its previous value, then we 2009 * notify the MAC layer above us of this change using mac_maxsdu_update(). 2010 */ 2011 static uint32_t 2012 iptun_update_mtu(iptun_t *iptun, ip_xmit_attr_t *ixa, uint32_t new_pmtu) 2013 { 2014 uint32_t newmtu; 2015 2016 /* We always update the ixa since we might have set IXAF_VERIFY_PMTU */ 2017 iptun_update_dst_pmtu(iptun, ixa); 2018 2019 /* 2020 * We return the current MTU without updating it if it was pegged to a 2021 * static value using the MAC_PROP_MTU link property. 2022 */ 2023 if (iptun->iptun_flags & IPTUN_FIXED_MTU) 2024 return (iptun->iptun_mtu); 2025 2026 /* If the MTU isn't fixed, then use the maximum possible value. */ 2027 newmtu = iptun_get_maxmtu(iptun, ixa, new_pmtu); 2028 /* 2029 * We only dynamically adjust the tunnel MTU for tunnels with 2030 * destinations because dynamic MTU calculations are based on the 2031 * destination path-MTU. 2032 */ 2033 if ((iptun->iptun_flags & IPTUN_RADDR) && newmtu != iptun->iptun_mtu) { 2034 iptun->iptun_mtu = newmtu; 2035 if (iptun->iptun_flags & IPTUN_MAC_REGISTERED) 2036 iptun_task_dispatch(iptun, IPTUN_TASK_MTU_UPDATE); 2037 } 2038 2039 return (newmtu); 2040 } 2041 2042 /* 2043 * Frees a packet or packet chain and bumps stat for each freed packet. 2044 */ 2045 static void 2046 iptun_drop_pkt(mblk_t *mp, uint64_t *stat) 2047 { 2048 mblk_t *pktmp; 2049 2050 for (pktmp = mp; pktmp != NULL; pktmp = mp) { 2051 mp = mp->b_next; 2052 pktmp->b_next = NULL; 2053 if (stat != NULL) 2054 atomic_inc_64(stat); 2055 freemsg(pktmp); 2056 } 2057 } 2058 2059 /* 2060 * Allocate and return a new mblk to hold an IP and ICMP header, and chain the 2061 * original packet to its b_cont. Returns NULL on failure. 2062 */ 2063 static mblk_t * 2064 iptun_build_icmperr(size_t hdrs_size, mblk_t *orig_pkt) 2065 { 2066 mblk_t *icmperr_mp; 2067 2068 if ((icmperr_mp = allocb(hdrs_size, BPRI_MED)) != NULL) { 2069 icmperr_mp->b_wptr += hdrs_size; 2070 /* tack on the offending packet */ 2071 icmperr_mp->b_cont = orig_pkt; 2072 } 2073 return (icmperr_mp); 2074 } 2075 2076 /* 2077 * Transmit an ICMP error. mp->b_rptr points at the packet to be included in 2078 * the ICMP error. 2079 */ 2080 static void 2081 iptun_sendicmp_v4(iptun_t *iptun, icmph_t *icmp, ipha_t *orig_ipha, mblk_t *mp, 2082 ts_label_t *tsl) 2083 { 2084 size_t orig_pktsize, hdrs_size; 2085 mblk_t *icmperr_mp; 2086 ipha_t *new_ipha; 2087 icmph_t *new_icmp; 2088 ip_xmit_attr_t ixas; 2089 conn_t *connp = iptun->iptun_connp; 2090 2091 orig_pktsize = msgdsize(mp); 2092 hdrs_size = sizeof (ipha_t) + sizeof (icmph_t); 2093 if ((icmperr_mp = iptun_build_icmperr(hdrs_size, mp)) == NULL) { 2094 iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf); 2095 return; 2096 } 2097 2098 new_ipha = (ipha_t *)icmperr_mp->b_rptr; 2099 new_icmp = (icmph_t *)(new_ipha + 1); 2100 2101 new_ipha->ipha_version_and_hdr_length = IP_SIMPLE_HDR_VERSION; 2102 new_ipha->ipha_type_of_service = 0; 2103 new_ipha->ipha_ident = 0; 2104 new_ipha->ipha_fragment_offset_and_flags = 0; 2105 new_ipha->ipha_ttl = orig_ipha->ipha_ttl; 2106 new_ipha->ipha_protocol = IPPROTO_ICMP; 2107 new_ipha->ipha_src = orig_ipha->ipha_dst; 2108 new_ipha->ipha_dst = orig_ipha->ipha_src; 2109 new_ipha->ipha_hdr_checksum = 0; /* will be computed by ip */ 2110 new_ipha->ipha_length = htons(hdrs_size + orig_pktsize); 2111 2112 *new_icmp = *icmp; 2113 new_icmp->icmph_checksum = 0; 2114 new_icmp->icmph_checksum = IP_CSUM(icmperr_mp, sizeof (ipha_t), 0); 2115 2116 bzero(&ixas, sizeof (ixas)); 2117 ixas.ixa_flags = IXAF_BASIC_SIMPLE_V4; 2118 if (new_ipha->ipha_src == INADDR_ANY) 2119 ixas.ixa_flags |= IXAF_SET_SOURCE; 2120 2121 ixas.ixa_zoneid = IPCL_ZONEID(connp); 2122 ixas.ixa_ipst = connp->conn_netstack->netstack_ip; 2123 ixas.ixa_cred = connp->conn_cred; 2124 ixas.ixa_cpid = NOPID; 2125 if (is_system_labeled()) 2126 ixas.ixa_tsl = tsl; 2127 2128 ixas.ixa_ifindex = 0; 2129 ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 2130 2131 (void) ip_output_simple(icmperr_mp, &ixas); 2132 ixa_cleanup(&ixas); 2133 } 2134 2135 static void 2136 iptun_sendicmp_v6(iptun_t *iptun, icmp6_t *icmp6, ip6_t *orig_ip6h, mblk_t *mp, 2137 ts_label_t *tsl) 2138 { 2139 size_t orig_pktsize, hdrs_size; 2140 mblk_t *icmp6err_mp; 2141 ip6_t *new_ip6h; 2142 icmp6_t *new_icmp6; 2143 ip_xmit_attr_t ixas; 2144 conn_t *connp = iptun->iptun_connp; 2145 2146 orig_pktsize = msgdsize(mp); 2147 hdrs_size = sizeof (ip6_t) + sizeof (icmp6_t); 2148 if ((icmp6err_mp = iptun_build_icmperr(hdrs_size, mp)) == NULL) { 2149 iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf); 2150 return; 2151 } 2152 2153 new_ip6h = (ip6_t *)icmp6err_mp->b_rptr; 2154 new_icmp6 = (icmp6_t *)(new_ip6h + 1); 2155 2156 new_ip6h->ip6_vcf = orig_ip6h->ip6_vcf; 2157 new_ip6h->ip6_plen = htons(sizeof (icmp6_t) + orig_pktsize); 2158 new_ip6h->ip6_hops = orig_ip6h->ip6_hops; 2159 new_ip6h->ip6_nxt = IPPROTO_ICMPV6; 2160 new_ip6h->ip6_src = orig_ip6h->ip6_dst; 2161 new_ip6h->ip6_dst = orig_ip6h->ip6_src; 2162 2163 *new_icmp6 = *icmp6; 2164 /* The checksum is calculated in ip_output_simple and friends. */ 2165 new_icmp6->icmp6_cksum = new_ip6h->ip6_plen; 2166 2167 bzero(&ixas, sizeof (ixas)); 2168 ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6; 2169 if (IN6_IS_ADDR_UNSPECIFIED(&new_ip6h->ip6_src)) 2170 ixas.ixa_flags |= IXAF_SET_SOURCE; 2171 2172 ixas.ixa_zoneid = IPCL_ZONEID(connp); 2173 ixas.ixa_ipst = connp->conn_netstack->netstack_ip; 2174 ixas.ixa_cred = connp->conn_cred; 2175 ixas.ixa_cpid = NOPID; 2176 if (is_system_labeled()) 2177 ixas.ixa_tsl = tsl; 2178 2179 ixas.ixa_ifindex = 0; 2180 ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 2181 2182 (void) ip_output_simple(icmp6err_mp, &ixas); 2183 ixa_cleanup(&ixas); 2184 } 2185 2186 static void 2187 iptun_icmp_error_v4(iptun_t *iptun, ipha_t *orig_ipha, mblk_t *mp, 2188 uint8_t type, uint8_t code, ts_label_t *tsl) 2189 { 2190 icmph_t icmp; 2191 2192 bzero(&icmp, sizeof (icmp)); 2193 icmp.icmph_type = type; 2194 icmp.icmph_code = code; 2195 2196 iptun_sendicmp_v4(iptun, &icmp, orig_ipha, mp, tsl); 2197 } 2198 2199 static void 2200 iptun_icmp_fragneeded_v4(iptun_t *iptun, uint32_t newmtu, ipha_t *orig_ipha, 2201 mblk_t *mp, ts_label_t *tsl) 2202 { 2203 icmph_t icmp; 2204 2205 icmp.icmph_type = ICMP_DEST_UNREACHABLE; 2206 icmp.icmph_code = ICMP_FRAGMENTATION_NEEDED; 2207 icmp.icmph_du_zero = 0; 2208 icmp.icmph_du_mtu = htons(newmtu); 2209 2210 iptun_sendicmp_v4(iptun, &icmp, orig_ipha, mp, tsl); 2211 } 2212 2213 static void 2214 iptun_icmp_error_v6(iptun_t *iptun, ip6_t *orig_ip6h, mblk_t *mp, 2215 uint8_t type, uint8_t code, uint32_t offset, ts_label_t *tsl) 2216 { 2217 icmp6_t icmp6; 2218 2219 bzero(&icmp6, sizeof (icmp6)); 2220 icmp6.icmp6_type = type; 2221 icmp6.icmp6_code = code; 2222 if (type == ICMP6_PARAM_PROB) 2223 icmp6.icmp6_pptr = htonl(offset); 2224 2225 iptun_sendicmp_v6(iptun, &icmp6, orig_ip6h, mp, tsl); 2226 } 2227 2228 static void 2229 iptun_icmp_toobig_v6(iptun_t *iptun, uint32_t newmtu, ip6_t *orig_ip6h, 2230 mblk_t *mp, ts_label_t *tsl) 2231 { 2232 icmp6_t icmp6; 2233 2234 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG; 2235 icmp6.icmp6_code = 0; 2236 icmp6.icmp6_mtu = htonl(newmtu); 2237 2238 iptun_sendicmp_v6(iptun, &icmp6, orig_ip6h, mp, tsl); 2239 } 2240 2241 /* 2242 * Determines if the packet pointed to by ipha or ip6h is an ICMP error. The 2243 * mp argument is only used to do bounds checking. 2244 */ 2245 static boolean_t 2246 is_icmp_error(mblk_t *mp, ipha_t *ipha, ip6_t *ip6h) 2247 { 2248 uint16_t hlen; 2249 2250 if (ipha != NULL) { 2251 icmph_t *icmph; 2252 2253 ASSERT(ip6h == NULL); 2254 if (ipha->ipha_protocol != IPPROTO_ICMP) 2255 return (B_FALSE); 2256 2257 hlen = IPH_HDR_LENGTH(ipha); 2258 icmph = (icmph_t *)((uint8_t *)ipha + hlen); 2259 return (ICMP_IS_ERROR(icmph->icmph_type) || 2260 icmph->icmph_type == ICMP_REDIRECT); 2261 } else { 2262 icmp6_t *icmp6; 2263 uint8_t *nexthdrp; 2264 2265 ASSERT(ip6h != NULL); 2266 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hlen, &nexthdrp) || 2267 *nexthdrp != IPPROTO_ICMPV6) { 2268 return (B_FALSE); 2269 } 2270 2271 icmp6 = (icmp6_t *)((uint8_t *)ip6h + hlen); 2272 return (ICMP6_IS_ERROR(icmp6->icmp6_type) || 2273 icmp6->icmp6_type == ND_REDIRECT); 2274 } 2275 } 2276 2277 /* 2278 * Find inner and outer IP headers from a tunneled packet as setup for calls 2279 * into ipsec_tun_{in,out}bound(). 2280 * Note that we need to allow the outer header to be in a separate mblk from 2281 * the inner header. 2282 * If the caller knows the outer_hlen, the caller passes it in. Otherwise zero. 2283 */ 2284 static size_t 2285 iptun_find_headers(mblk_t *mp, size_t outer_hlen, ipha_t **outer4, 2286 ipha_t **inner4, ip6_t **outer6, ip6_t **inner6) 2287 { 2288 ipha_t *ipha; 2289 size_t first_mblkl = MBLKL(mp); 2290 mblk_t *inner_mp; 2291 2292 /* 2293 * Don't bother handling packets that don't have a full IP header in 2294 * the fist mblk. For the input path, the ip module ensures that this 2295 * won't happen, and on the output path, the IP tunneling MAC-type 2296 * plugins ensure that this also won't happen. 2297 */ 2298 if (first_mblkl < sizeof (ipha_t)) 2299 return (0); 2300 ipha = (ipha_t *)(mp->b_rptr); 2301 switch (IPH_HDR_VERSION(ipha)) { 2302 case IPV4_VERSION: 2303 *outer4 = ipha; 2304 *outer6 = NULL; 2305 if (outer_hlen == 0) 2306 outer_hlen = IPH_HDR_LENGTH(ipha); 2307 break; 2308 case IPV6_VERSION: 2309 *outer4 = NULL; 2310 *outer6 = (ip6_t *)ipha; 2311 if (outer_hlen == 0) 2312 outer_hlen = ip_hdr_length_v6(mp, (ip6_t *)ipha); 2313 break; 2314 default: 2315 return (0); 2316 } 2317 2318 if (first_mblkl < outer_hlen || 2319 (first_mblkl == outer_hlen && mp->b_cont == NULL)) 2320 return (0); 2321 2322 /* 2323 * We don't bother doing a pullup here since the outer header will 2324 * just get stripped off soon on input anyway. We just want to ensure 2325 * that the inner* pointer points to a full header. 2326 */ 2327 if (first_mblkl == outer_hlen) { 2328 inner_mp = mp->b_cont; 2329 ipha = (ipha_t *)inner_mp->b_rptr; 2330 } else { 2331 inner_mp = mp; 2332 ipha = (ipha_t *)(mp->b_rptr + outer_hlen); 2333 } 2334 switch (IPH_HDR_VERSION(ipha)) { 2335 case IPV4_VERSION: 2336 if (inner_mp->b_wptr - (uint8_t *)ipha < sizeof (ipha_t)) 2337 return (0); 2338 *inner4 = ipha; 2339 *inner6 = NULL; 2340 break; 2341 case IPV6_VERSION: 2342 if (inner_mp->b_wptr - (uint8_t *)ipha < sizeof (ip6_t)) 2343 return (0); 2344 *inner4 = NULL; 2345 *inner6 = (ip6_t *)ipha; 2346 break; 2347 default: 2348 return (0); 2349 } 2350 2351 return (outer_hlen); 2352 } 2353 2354 /* 2355 * Received ICMP error in response to an X over IPv4 packet that we 2356 * transmitted. 2357 * 2358 * NOTE: "outer" refers to what's inside the ICMP payload. We will get one of 2359 * the following: 2360 * 2361 * [IPv4(0)][ICMPv4][IPv4(1)][IPv4(2)][ULP] 2362 * 2363 * or 2364 * 2365 * [IPv4(0)][ICMPv4][IPv4(1)][IPv6][ULP] 2366 * 2367 * And "outer4" will get set to IPv4(1), and inner[46] will correspond to 2368 * whatever the very-inner packet is (IPv4(2) or IPv6). 2369 */ 2370 static void 2371 iptun_input_icmp_v4(iptun_t *iptun, mblk_t *data_mp, icmph_t *icmph, 2372 ip_recv_attr_t *ira) 2373 { 2374 uint8_t *orig; 2375 ipha_t *outer4, *inner4; 2376 ip6_t *outer6, *inner6; 2377 int outer_hlen; 2378 uint8_t type, code; 2379 2380 ASSERT(data_mp->b_cont == NULL); 2381 /* 2382 * Temporarily move b_rptr forward so that iptun_find_headers() can 2383 * find headers in the ICMP packet payload. 2384 */ 2385 orig = data_mp->b_rptr; 2386 data_mp->b_rptr = (uint8_t *)(icmph + 1); 2387 /* 2388 * The ip module ensures that ICMP errors contain at least the 2389 * original IP header (otherwise, the error would never have made it 2390 * here). 2391 */ 2392 ASSERT(MBLKL(data_mp) >= 0); 2393 outer_hlen = iptun_find_headers(data_mp, 0, &outer4, &inner4, &outer6, 2394 &inner6); 2395 ASSERT(outer6 == NULL); 2396 data_mp->b_rptr = orig; 2397 if (outer_hlen == 0) { 2398 iptun_drop_pkt(data_mp, &iptun->iptun_ierrors); 2399 return; 2400 } 2401 2402 /* Only ICMP errors due to tunneled packets should reach here. */ 2403 ASSERT(outer4->ipha_protocol == IPPROTO_ENCAP || 2404 outer4->ipha_protocol == IPPROTO_IPV6); 2405 2406 data_mp = ipsec_tun_inbound(ira, data_mp, iptun->iptun_itp, 2407 inner4, inner6, outer4, outer6, -outer_hlen, iptun->iptun_ns); 2408 if (data_mp == NULL) { 2409 /* Callee did all of the freeing. */ 2410 atomic_inc_64(&iptun->iptun_ierrors); 2411 return; 2412 } 2413 /* We should never see reassembled fragment here. */ 2414 ASSERT(data_mp->b_next == NULL); 2415 2416 data_mp->b_rptr = (uint8_t *)outer4 + outer_hlen; 2417 2418 /* 2419 * If the original packet being transmitted was itself an ICMP error, 2420 * then drop this packet. We don't want to generate an ICMP error in 2421 * response to an ICMP error. 2422 */ 2423 if (is_icmp_error(data_mp, inner4, inner6)) { 2424 iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf); 2425 return; 2426 } 2427 2428 switch (icmph->icmph_type) { 2429 case ICMP_DEST_UNREACHABLE: 2430 type = (inner4 != NULL ? icmph->icmph_type : ICMP6_DST_UNREACH); 2431 switch (icmph->icmph_code) { 2432 case ICMP_FRAGMENTATION_NEEDED: { 2433 uint32_t newmtu; 2434 2435 /* 2436 * We reconcile this with the fact that the tunnel may 2437 * also have IPsec policy by letting iptun_update_mtu 2438 * take care of it. 2439 */ 2440 newmtu = iptun_update_mtu(iptun, NULL, 2441 ntohs(icmph->icmph_du_mtu)); 2442 2443 if (inner4 != NULL) { 2444 iptun_icmp_fragneeded_v4(iptun, newmtu, inner4, 2445 data_mp, ira->ira_tsl); 2446 } else { 2447 iptun_icmp_toobig_v6(iptun, newmtu, inner6, 2448 data_mp, ira->ira_tsl); 2449 } 2450 return; 2451 } 2452 case ICMP_DEST_NET_UNREACH_ADMIN: 2453 case ICMP_DEST_HOST_UNREACH_ADMIN: 2454 code = (inner4 != NULL ? ICMP_DEST_NET_UNREACH_ADMIN : 2455 ICMP6_DST_UNREACH_ADMIN); 2456 break; 2457 default: 2458 code = (inner4 != NULL ? ICMP_HOST_UNREACHABLE : 2459 ICMP6_DST_UNREACH_ADDR); 2460 break; 2461 } 2462 break; 2463 case ICMP_TIME_EXCEEDED: 2464 if (inner6 != NULL) { 2465 type = ICMP6_TIME_EXCEEDED; 2466 code = 0; 2467 } /* else we're already set. */ 2468 break; 2469 case ICMP_PARAM_PROBLEM: 2470 /* 2471 * This is a problem with the outer header we transmitted. 2472 * Treat this as an output error. 2473 */ 2474 iptun_drop_pkt(data_mp, &iptun->iptun_oerrors); 2475 return; 2476 default: 2477 iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf); 2478 return; 2479 } 2480 2481 if (inner4 != NULL) { 2482 iptun_icmp_error_v4(iptun, inner4, data_mp, type, code, 2483 ira->ira_tsl); 2484 } else { 2485 iptun_icmp_error_v6(iptun, inner6, data_mp, type, code, 0, 2486 ira->ira_tsl); 2487 } 2488 } 2489 2490 /* 2491 * Return B_TRUE if the IPv6 packet pointed to by ip6h contains a Tunnel 2492 * Encapsulation Limit destination option. If there is one, set encaplim_ptr 2493 * to point to the option value. 2494 */ 2495 static boolean_t 2496 iptun_find_encaplimit(mblk_t *mp, ip6_t *ip6h, uint8_t **encaplim_ptr) 2497 { 2498 ip_pkt_t pkt; 2499 uint8_t *endptr; 2500 ip6_dest_t *destp; 2501 struct ip6_opt *optp; 2502 2503 pkt.ipp_fields = 0; /* must be initialized */ 2504 (void) ip_find_hdr_v6(mp, ip6h, B_FALSE, &pkt, NULL); 2505 if ((pkt.ipp_fields & IPPF_DSTOPTS) != 0) { 2506 destp = pkt.ipp_dstopts; 2507 } else if ((pkt.ipp_fields & IPPF_RTHDRDSTOPTS) != 0) { 2508 destp = pkt.ipp_rthdrdstopts; 2509 } else { 2510 return (B_FALSE); 2511 } 2512 2513 endptr = (uint8_t *)destp + 8 * (destp->ip6d_len + 1); 2514 optp = (struct ip6_opt *)(destp + 1); 2515 while (endptr - (uint8_t *)optp > sizeof (*optp)) { 2516 if (optp->ip6o_type == IP6OPT_TUNNEL_LIMIT) { 2517 if ((uint8_t *)(optp + 1) >= endptr) 2518 return (B_FALSE); 2519 *encaplim_ptr = (uint8_t *)&optp[1]; 2520 return (B_TRUE); 2521 } 2522 optp = (struct ip6_opt *)((uint8_t *)optp + optp->ip6o_len + 2); 2523 } 2524 return (B_FALSE); 2525 } 2526 2527 /* 2528 * Received ICMPv6 error in response to an X over IPv6 packet that we 2529 * transmitted. 2530 * 2531 * NOTE: "outer" refers to what's inside the ICMP payload. We will get one of 2532 * the following: 2533 * 2534 * [IPv6(0)][ICMPv6][IPv6(1)][IPv4][ULP] 2535 * 2536 * or 2537 * 2538 * [IPv6(0)][ICMPv6][IPv6(1)][IPv6(2)][ULP] 2539 * 2540 * And "outer6" will get set to IPv6(1), and inner[46] will correspond to 2541 * whatever the very-inner packet is (IPv4 or IPv6(2)). 2542 */ 2543 static void 2544 iptun_input_icmp_v6(iptun_t *iptun, mblk_t *data_mp, icmp6_t *icmp6h, 2545 ip_recv_attr_t *ira) 2546 { 2547 uint8_t *orig; 2548 ipha_t *outer4, *inner4; 2549 ip6_t *outer6, *inner6; 2550 int outer_hlen; 2551 uint8_t type, code; 2552 2553 ASSERT(data_mp->b_cont == NULL); 2554 2555 /* 2556 * Temporarily move b_rptr forward so that iptun_find_headers() can 2557 * find IP headers in the ICMP packet payload. 2558 */ 2559 orig = data_mp->b_rptr; 2560 data_mp->b_rptr = (uint8_t *)(icmp6h + 1); 2561 /* 2562 * The ip module ensures that ICMP errors contain at least the 2563 * original IP header (otherwise, the error would never have made it 2564 * here). 2565 */ 2566 ASSERT(MBLKL(data_mp) >= 0); 2567 outer_hlen = iptun_find_headers(data_mp, 0, &outer4, &inner4, &outer6, 2568 &inner6); 2569 ASSERT(outer4 == NULL); 2570 data_mp->b_rptr = orig; /* Restore r_ptr */ 2571 if (outer_hlen == 0) { 2572 iptun_drop_pkt(data_mp, &iptun->iptun_ierrors); 2573 return; 2574 } 2575 2576 data_mp = ipsec_tun_inbound(ira, data_mp, iptun->iptun_itp, 2577 inner4, inner6, outer4, outer6, -outer_hlen, iptun->iptun_ns); 2578 if (data_mp == NULL) { 2579 /* Callee did all of the freeing. */ 2580 atomic_inc_64(&iptun->iptun_ierrors); 2581 return; 2582 } 2583 /* We should never see reassembled fragment here. */ 2584 ASSERT(data_mp->b_next == NULL); 2585 2586 data_mp->b_rptr = (uint8_t *)outer6 + outer_hlen; 2587 2588 /* 2589 * If the original packet being transmitted was itself an ICMP error, 2590 * then drop this packet. We don't want to generate an ICMP error in 2591 * response to an ICMP error. 2592 */ 2593 if (is_icmp_error(data_mp, inner4, inner6)) { 2594 iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf); 2595 return; 2596 } 2597 2598 switch (icmp6h->icmp6_type) { 2599 case ICMP6_PARAM_PROB: { 2600 uint8_t *encaplim_ptr; 2601 2602 /* 2603 * If the ICMPv6 error points to a valid Tunnel Encapsulation 2604 * Limit option and the limit value is 0, then fall through 2605 * and send a host unreachable message. Otherwise, treat the 2606 * error as an output error, as there must have been a problem 2607 * with a packet we sent. 2608 */ 2609 if (!iptun_find_encaplimit(data_mp, outer6, &encaplim_ptr) || 2610 (icmp6h->icmp6_pptr != 2611 ((ptrdiff_t)encaplim_ptr - (ptrdiff_t)outer6)) || 2612 *encaplim_ptr != 0) { 2613 iptun_drop_pkt(data_mp, &iptun->iptun_oerrors); 2614 return; 2615 } 2616 /* FALLTHRU */ 2617 } 2618 case ICMP6_TIME_EXCEEDED: 2619 case ICMP6_DST_UNREACH: 2620 type = (inner4 != NULL ? ICMP_DEST_UNREACHABLE : 2621 ICMP6_DST_UNREACH); 2622 code = (inner4 != NULL ? ICMP_HOST_UNREACHABLE : 2623 ICMP6_DST_UNREACH_ADDR); 2624 break; 2625 case ICMP6_PACKET_TOO_BIG: { 2626 uint32_t newmtu; 2627 2628 /* 2629 * We reconcile this with the fact that the tunnel may also 2630 * have IPsec policy by letting iptun_update_mtu take care of 2631 * it. 2632 */ 2633 newmtu = iptun_update_mtu(iptun, NULL, 2634 ntohl(icmp6h->icmp6_mtu)); 2635 2636 if (inner4 != NULL) { 2637 iptun_icmp_fragneeded_v4(iptun, newmtu, inner4, 2638 data_mp, ira->ira_tsl); 2639 } else { 2640 iptun_icmp_toobig_v6(iptun, newmtu, inner6, data_mp, 2641 ira->ira_tsl); 2642 } 2643 return; 2644 } 2645 default: 2646 iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf); 2647 return; 2648 } 2649 2650 if (inner4 != NULL) { 2651 iptun_icmp_error_v4(iptun, inner4, data_mp, type, code, 2652 ira->ira_tsl); 2653 } else { 2654 iptun_icmp_error_v6(iptun, inner6, data_mp, type, code, 0, 2655 ira->ira_tsl); 2656 } 2657 } 2658 2659 /* 2660 * Called as conn_recvicmp from IP for ICMP errors. 2661 */ 2662 /* ARGSUSED2 */ 2663 static void 2664 iptun_input_icmp(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *ira) 2665 { 2666 conn_t *connp = arg; 2667 iptun_t *iptun = connp->conn_iptun; 2668 mblk_t *tmpmp; 2669 size_t hlen; 2670 2671 ASSERT(IPCL_IS_IPTUN(connp)); 2672 2673 if (mp->b_cont != NULL) { 2674 /* 2675 * Since ICMP error processing necessitates access to bits 2676 * that are within the ICMP error payload (the original packet 2677 * that caused the error), pull everything up into a single 2678 * block for convenience. 2679 */ 2680 if ((tmpmp = msgpullup(mp, -1)) == NULL) { 2681 iptun_drop_pkt(mp, &iptun->iptun_norcvbuf); 2682 return; 2683 } 2684 freemsg(mp); 2685 mp = tmpmp; 2686 } 2687 2688 hlen = ira->ira_ip_hdr_length; 2689 switch (iptun->iptun_typeinfo->iti_ipvers) { 2690 case IPV4_VERSION: 2691 /* 2692 * The outer IP header coming up from IP is always ipha_t 2693 * alligned (otherwise, we would have crashed in ip). 2694 */ 2695 iptun_input_icmp_v4(iptun, mp, (icmph_t *)(mp->b_rptr + hlen), 2696 ira); 2697 break; 2698 case IPV6_VERSION: 2699 iptun_input_icmp_v6(iptun, mp, (icmp6_t *)(mp->b_rptr + hlen), 2700 ira); 2701 break; 2702 } 2703 } 2704 2705 static boolean_t 2706 iptun_in_6to4_ok(iptun_t *iptun, ipha_t *outer4, ip6_t *inner6) 2707 { 2708 ipaddr_t v4addr; 2709 2710 /* 2711 * It's possible that someone sent us an IPv4-in-IPv4 packet with the 2712 * IPv4 address of a 6to4 tunnel as the destination. 2713 */ 2714 if (inner6 == NULL) 2715 return (B_FALSE); 2716 2717 /* 2718 * Make sure that the IPv6 destination is within the site that this 2719 * 6to4 tunnel is routing for. We don't want people bouncing random 2720 * tunneled IPv6 packets through this 6to4 router. 2721 */ 2722 IN6_6TO4_TO_V4ADDR(&inner6->ip6_dst, (struct in_addr *)&v4addr); 2723 if (outer4->ipha_dst != v4addr) 2724 return (B_FALSE); 2725 2726 if (IN6_IS_ADDR_6TO4(&inner6->ip6_src)) { 2727 /* 2728 * Section 9 of RFC 3056 (security considerations) suggests 2729 * that when a packet is from a 6to4 site (i.e., it's not a 2730 * global address being forwarded froma relay router), make 2731 * sure that the packet was tunneled by that site's 6to4 2732 * router. 2733 */ 2734 IN6_6TO4_TO_V4ADDR(&inner6->ip6_src, (struct in_addr *)&v4addr); 2735 if (outer4->ipha_src != v4addr) 2736 return (B_FALSE); 2737 } else { 2738 /* 2739 * Only accept packets from a relay router if we've configured 2740 * outbound relay router functionality. 2741 */ 2742 if (iptun->iptun_iptuns->iptuns_relay_rtr_addr == INADDR_ANY) 2743 return (B_FALSE); 2744 } 2745 2746 return (B_TRUE); 2747 } 2748 2749 /* 2750 * Input function for everything that comes up from the ip module below us. 2751 * This is called directly from the ip module via connp->conn_recv(). 2752 * 2753 * We receive M_DATA messages with IP-in-IP tunneled packets. 2754 */ 2755 /* ARGSUSED2 */ 2756 static void 2757 iptun_input(void *arg, mblk_t *data_mp, void *arg2, ip_recv_attr_t *ira) 2758 { 2759 conn_t *connp = arg; 2760 iptun_t *iptun = connp->conn_iptun; 2761 int outer_hlen; 2762 ipha_t *outer4, *inner4; 2763 ip6_t *outer6, *inner6; 2764 2765 ASSERT(IPCL_IS_IPTUN(connp)); 2766 ASSERT(DB_TYPE(data_mp) == M_DATA); 2767 2768 outer_hlen = iptun_find_headers(data_mp, ira->ira_ip_hdr_length, 2769 &outer4, &inner4, &outer6, &inner6); 2770 if (outer_hlen == 0) 2771 goto drop; 2772 2773 /* 2774 * If the system is labeled, we call tsol_check_dest() on the packet 2775 * destination (our local tunnel address) to ensure that the packet as 2776 * labeled should be allowed to be sent to us. We don't need to call 2777 * the more involved tsol_receive_local() since the tunnel link itself 2778 * cannot be assigned to shared-stack non-global zones. 2779 */ 2780 if (ira->ira_flags & IRAF_SYSTEM_LABELED) { 2781 if (ira->ira_tsl == NULL) 2782 goto drop; 2783 if (tsol_check_dest(ira->ira_tsl, (outer4 != NULL ? 2784 (void *)&outer4->ipha_dst : (void *)&outer6->ip6_dst), 2785 (outer4 != NULL ? IPV4_VERSION : IPV6_VERSION), 2786 CONN_MAC_DEFAULT, B_FALSE, NULL) != 0) 2787 goto drop; 2788 } 2789 2790 data_mp = ipsec_tun_inbound(ira, data_mp, iptun->iptun_itp, 2791 inner4, inner6, outer4, outer6, outer_hlen, iptun->iptun_ns); 2792 if (data_mp == NULL) { 2793 /* Callee did all of the freeing. */ 2794 return; 2795 } 2796 2797 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_6TO4 && 2798 !iptun_in_6to4_ok(iptun, outer4, inner6)) 2799 goto drop; 2800 2801 /* 2802 * We need to statistically account for each packet individually, so 2803 * we might as well split up any b_next chains here. 2804 */ 2805 do { 2806 mblk_t *mp; 2807 2808 mp = data_mp->b_next; 2809 data_mp->b_next = NULL; 2810 2811 atomic_inc_64(&iptun->iptun_ipackets); 2812 atomic_add_64(&iptun->iptun_rbytes, msgdsize(data_mp)); 2813 mac_rx(iptun->iptun_mh, NULL, data_mp); 2814 2815 data_mp = mp; 2816 } while (data_mp != NULL); 2817 return; 2818 drop: 2819 iptun_drop_pkt(data_mp, &iptun->iptun_ierrors); 2820 } 2821 2822 /* 2823 * Do 6to4-specific header-processing on output. Return B_TRUE if the packet 2824 * was processed without issue, or B_FALSE if the packet had issues and should 2825 * be dropped. 2826 */ 2827 static boolean_t 2828 iptun_out_process_6to4(iptun_t *iptun, ipha_t *outer4, ip6_t *inner6) 2829 { 2830 ipaddr_t v4addr; 2831 2832 /* 2833 * IPv6 source must be a 6to4 address. This is because a conscious 2834 * decision was made to not allow a Solaris system to be used as a 2835 * relay router (for security reasons) when 6to4 was initially 2836 * integrated. If this decision is ever reversed, the following check 2837 * can be removed. 2838 */ 2839 if (!IN6_IS_ADDR_6TO4(&inner6->ip6_src)) 2840 return (B_FALSE); 2841 2842 /* 2843 * RFC3056 mandates that the IPv4 source MUST be set to the IPv4 2844 * portion of the 6to4 IPv6 source address. In other words, make sure 2845 * that we're tunneling packets from our own 6to4 site. 2846 */ 2847 IN6_6TO4_TO_V4ADDR(&inner6->ip6_src, (struct in_addr *)&v4addr); 2848 if (outer4->ipha_src != v4addr) 2849 return (B_FALSE); 2850 2851 /* 2852 * Automatically set the destination of the outer IPv4 header as 2853 * described in RFC3056. There are two possibilities: 2854 * 2855 * a. If the IPv6 destination is a 6to4 address, set the IPv4 address 2856 * to the IPv4 portion of the 6to4 address. 2857 * b. If the IPv6 destination is a native IPv6 address, set the IPv4 2858 * destination to the address of a relay router. 2859 * 2860 * Design Note: b shouldn't be necessary here, and this is a flaw in 2861 * the design of the 6to4relay command. Instead of setting a 6to4 2862 * relay address in this module via an ioctl, the 6to4relay command 2863 * could simply add a IPv6 route for native IPv6 addresses (such as a 2864 * default route) in the forwarding table that uses a 6to4 destination 2865 * as its next hop, and the IPv4 portion of that address could be a 2866 * 6to4 relay address. In order for this to work, IP would have to 2867 * resolve the next hop address, which would necessitate a link-layer 2868 * address resolver for 6to4 links, which doesn't exist today. 2869 * 2870 * In fact, if a resolver existed for 6to4 links, then setting the 2871 * IPv4 destination in the outer header could be done as part of 2872 * link-layer address resolution and fast-path header generation, and 2873 * not here. 2874 */ 2875 if (IN6_IS_ADDR_6TO4(&inner6->ip6_dst)) { 2876 /* destination is a 6to4 router */ 2877 IN6_6TO4_TO_V4ADDR(&inner6->ip6_dst, 2878 (struct in_addr *)&outer4->ipha_dst); 2879 2880 /* Reject attempts to send to INADDR_ANY */ 2881 if (outer4->ipha_dst == INADDR_ANY) 2882 return (B_FALSE); 2883 } else { 2884 /* 2885 * The destination is a native IPv6 address. If output to a 2886 * relay-router is enabled, use the relay-router's IPv4 2887 * address as the destination. 2888 */ 2889 if (iptun->iptun_iptuns->iptuns_relay_rtr_addr == INADDR_ANY) 2890 return (B_FALSE); 2891 outer4->ipha_dst = iptun->iptun_iptuns->iptuns_relay_rtr_addr; 2892 } 2893 2894 /* 2895 * If the outer source and destination are equal, this means that the 2896 * 6to4 router somehow forwarded an IPv6 packet destined for its own 2897 * 6to4 site to its 6to4 tunnel interface, which will result in this 2898 * packet infinitely bouncing between ip and iptun. 2899 */ 2900 return (outer4->ipha_src != outer4->ipha_dst); 2901 } 2902 2903 /* 2904 * Process output packets with outer IPv4 headers. Frees mp and bumps stat on 2905 * error. 2906 */ 2907 static mblk_t * 2908 iptun_out_process_ipv4(iptun_t *iptun, mblk_t *mp, ipha_t *outer4, 2909 ipha_t *inner4, ip6_t *inner6, ip_xmit_attr_t *ixa) 2910 { 2911 uint8_t *innerptr = (inner4 != NULL ? 2912 (uint8_t *)inner4 : (uint8_t *)inner6); 2913 size_t minmtu = iptun->iptun_typeinfo->iti_minmtu; 2914 2915 if (inner4 != NULL) { 2916 ASSERT(outer4->ipha_protocol == IPPROTO_ENCAP); 2917 /* 2918 * Copy the tos from the inner IPv4 header. We mask off ECN 2919 * bits (bits 6 and 7) because there is currently no 2920 * tunnel-tunnel communication to determine if both sides 2921 * support ECN. We opt for the safe choice: don't copy the 2922 * ECN bits when doing encapsulation. 2923 */ 2924 outer4->ipha_type_of_service = 2925 inner4->ipha_type_of_service & ~0x03; 2926 } else { 2927 ASSERT(outer4->ipha_protocol == IPPROTO_IPV6 && 2928 inner6 != NULL); 2929 } 2930 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) 2931 outer4->ipha_fragment_offset_and_flags |= IPH_DF_HTONS; 2932 else 2933 outer4->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS; 2934 2935 /* 2936 * As described in section 3.2.2 of RFC4213, if the packet payload is 2937 * less than or equal to the minimum MTU size, then we need to allow 2938 * IPv4 to fragment the packet. The reason is that even if we end up 2939 * receiving an ICMP frag-needed, the interface above this tunnel 2940 * won't be allowed to drop its MTU as a result, since the packet was 2941 * already smaller than the smallest allowable MTU for that interface. 2942 */ 2943 if (mp->b_wptr - innerptr <= minmtu) { 2944 outer4->ipha_fragment_offset_and_flags = 0; 2945 ixa->ixa_flags &= ~IXAF_DONTFRAG; 2946 } else if (!(ixa->ixa_flags & IXAF_PMTU_TOO_SMALL) && 2947 (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_6TO4)) { 2948 ixa->ixa_flags |= IXAF_DONTFRAG; 2949 } 2950 2951 ixa->ixa_ip_hdr_length = IPH_HDR_LENGTH(outer4); 2952 ixa->ixa_pktlen = msgdsize(mp); 2953 ixa->ixa_protocol = outer4->ipha_protocol; 2954 2955 outer4->ipha_length = htons(ixa->ixa_pktlen); 2956 return (mp); 2957 } 2958 2959 /* 2960 * Insert an encapsulation limit destination option in the packet provided. 2961 * Always consumes the mp argument and returns a new mblk pointer. 2962 */ 2963 static mblk_t * 2964 iptun_insert_encaplimit(iptun_t *iptun, mblk_t *mp, ip6_t *outer6, 2965 uint8_t limit) 2966 { 2967 mblk_t *newmp; 2968 iptun_ipv6hdrs_t *newouter6; 2969 2970 ASSERT(outer6->ip6_nxt == IPPROTO_IPV6); 2971 ASSERT(mp->b_cont == NULL); 2972 2973 mp->b_rptr += sizeof (ip6_t); 2974 newmp = allocb(sizeof (iptun_ipv6hdrs_t) + MBLKL(mp), BPRI_MED); 2975 if (newmp == NULL) { 2976 iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf); 2977 return (NULL); 2978 } 2979 newmp->b_wptr += sizeof (iptun_ipv6hdrs_t); 2980 /* Copy the payload (Starting with the inner IPv6 header). */ 2981 bcopy(mp->b_rptr, newmp->b_wptr, MBLKL(mp)); 2982 newmp->b_wptr += MBLKL(mp); 2983 newouter6 = (iptun_ipv6hdrs_t *)newmp->b_rptr; 2984 /* Now copy the outer IPv6 header. */ 2985 bcopy(outer6, &newouter6->it6h_ip6h, sizeof (ip6_t)); 2986 newouter6->it6h_ip6h.ip6_nxt = IPPROTO_DSTOPTS; 2987 newouter6->it6h_encaplim = iptun_encaplim_init; 2988 newouter6->it6h_encaplim.iel_destopt.ip6d_nxt = outer6->ip6_nxt; 2989 newouter6->it6h_encaplim.iel_telopt.ip6ot_encap_limit = limit; 2990 2991 /* 2992 * The payload length will be set at the end of 2993 * iptun_out_process_ipv6(). 2994 */ 2995 2996 freemsg(mp); 2997 return (newmp); 2998 } 2999 3000 /* 3001 * Process output packets with outer IPv6 headers. Frees mp and bumps stats 3002 * on error. 3003 */ 3004 static mblk_t * 3005 iptun_out_process_ipv6(iptun_t *iptun, mblk_t *mp, ip6_t *outer6, 3006 ipha_t *inner4, ip6_t *inner6, ip_xmit_attr_t *ixa) 3007 { 3008 uint8_t *innerptr = (inner4 != NULL ? 3009 (uint8_t *)inner4 : (uint8_t *)inner6); 3010 size_t minmtu = iptun->iptun_typeinfo->iti_minmtu; 3011 uint8_t *limit, *configlimit; 3012 uint32_t offset; 3013 iptun_ipv6hdrs_t *v6hdrs; 3014 3015 if (inner6 != NULL && iptun_find_encaplimit(mp, inner6, &limit)) { 3016 /* 3017 * The inner packet is an IPv6 packet which itself contains an 3018 * encapsulation limit option. The limit variable points to 3019 * the value in the embedded option. Process the 3020 * encapsulation limit option as specified in RFC 2473. 3021 * 3022 * If limit is 0, then we've exceeded the limit and we need to 3023 * send back an ICMPv6 parameter problem message. 3024 * 3025 * If limit is > 0, then we decrement it by 1 and make sure 3026 * that the encapsulation limit option in the outer header 3027 * reflects that (adding an option if one isn't already 3028 * there). 3029 */ 3030 ASSERT(limit > mp->b_rptr && limit < mp->b_wptr); 3031 if (*limit == 0) { 3032 mp->b_rptr = (uint8_t *)inner6; 3033 offset = limit - mp->b_rptr; 3034 iptun_icmp_error_v6(iptun, inner6, mp, ICMP6_PARAM_PROB, 3035 0, offset, ixa->ixa_tsl); 3036 atomic_inc_64(&iptun->iptun_noxmtbuf); 3037 return (NULL); 3038 } 3039 3040 /* 3041 * The outer header requires an encapsulation limit option. 3042 * If there isn't one already, add one. 3043 */ 3044 if (iptun->iptun_encaplimit == 0) { 3045 if ((mp = iptun_insert_encaplimit(iptun, mp, outer6, 3046 (*limit - 1))) == NULL) 3047 return (NULL); 3048 v6hdrs = (iptun_ipv6hdrs_t *)mp->b_rptr; 3049 } else { 3050 /* 3051 * There is an existing encapsulation limit option in 3052 * the outer header. If the inner encapsulation limit 3053 * is less than the configured encapsulation limit, 3054 * update the outer encapsulation limit to reflect 3055 * this lesser value. 3056 */ 3057 v6hdrs = (iptun_ipv6hdrs_t *)mp->b_rptr; 3058 configlimit = 3059 &v6hdrs->it6h_encaplim.iel_telopt.ip6ot_encap_limit; 3060 if ((*limit - 1) < *configlimit) 3061 *configlimit = (*limit - 1); 3062 } 3063 ixa->ixa_ip_hdr_length = sizeof (iptun_ipv6hdrs_t); 3064 ixa->ixa_protocol = v6hdrs->it6h_encaplim.iel_destopt.ip6d_nxt; 3065 } else { 3066 ixa->ixa_ip_hdr_length = sizeof (ip6_t); 3067 ixa->ixa_protocol = outer6->ip6_nxt; 3068 } 3069 /* 3070 * See iptun_output_process_ipv4() why we allow fragmentation for 3071 * small packets 3072 */ 3073 if (mp->b_wptr - innerptr <= minmtu) 3074 ixa->ixa_flags &= ~IXAF_DONTFRAG; 3075 else if (!(ixa->ixa_flags & IXAF_PMTU_TOO_SMALL)) 3076 ixa->ixa_flags |= IXAF_DONTFRAG; 3077 3078 ixa->ixa_pktlen = msgdsize(mp); 3079 outer6->ip6_plen = htons(ixa->ixa_pktlen - sizeof (ip6_t)); 3080 return (mp); 3081 } 3082 3083 /* 3084 * The IP tunneling MAC-type plugins have already done most of the header 3085 * processing and validity checks. We are simply responsible for multiplexing 3086 * down to the ip module below us. 3087 */ 3088 static void 3089 iptun_output(iptun_t *iptun, mblk_t *mp) 3090 { 3091 conn_t *connp = iptun->iptun_connp; 3092 mblk_t *newmp; 3093 int error; 3094 ip_xmit_attr_t *ixa; 3095 3096 ASSERT(mp->b_datap->db_type == M_DATA); 3097 3098 if (mp->b_cont != NULL) { 3099 if ((newmp = msgpullup(mp, -1)) == NULL) { 3100 iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf); 3101 return; 3102 } 3103 freemsg(mp); 3104 mp = newmp; 3105 } 3106 3107 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_6TO4) { 3108 iptun_output_6to4(iptun, mp); 3109 return; 3110 } 3111 3112 if (is_system_labeled()) { 3113 /* 3114 * Since the label can be different meaning a potentially 3115 * different IRE,we always use a unique ip_xmit_attr_t. 3116 */ 3117 ixa = conn_get_ixa_exclusive(connp); 3118 } else { 3119 /* 3120 * If no other thread is using conn_ixa this just gets a 3121 * reference to conn_ixa. Otherwise we get a safe copy of 3122 * conn_ixa. 3123 */ 3124 ixa = conn_get_ixa(connp, B_FALSE); 3125 } 3126 if (ixa == NULL) { 3127 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3128 return; 3129 } 3130 3131 /* 3132 * In case we got a safe copy of conn_ixa, then we need 3133 * to fill in any pointers in it. 3134 */ 3135 if (ixa->ixa_ire == NULL) { 3136 error = ip_attr_connect(connp, ixa, &connp->conn_saddr_v6, 3137 &connp->conn_faddr_v6, &connp->conn_faddr_v6, 0, 3138 NULL, NULL, 0); 3139 if (error != 0) { 3140 if (ixa->ixa_ire != NULL && 3141 (error == EHOSTUNREACH || error == ENETUNREACH)) { 3142 /* 3143 * Let conn_ip_output/ire_send_noroute return 3144 * the error and send any local ICMP error. 3145 */ 3146 error = 0; 3147 } else { 3148 ixa_refrele(ixa); 3149 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3150 return; 3151 } 3152 } 3153 } 3154 3155 iptun_output_common(iptun, ixa, mp); 3156 ixa_refrele(ixa); 3157 } 3158 3159 /* 3160 * We use an ixa based on the last destination. 3161 */ 3162 static void 3163 iptun_output_6to4(iptun_t *iptun, mblk_t *mp) 3164 { 3165 conn_t *connp = iptun->iptun_connp; 3166 ipha_t *outer4, *inner4; 3167 ip6_t *outer6, *inner6; 3168 ip_xmit_attr_t *ixa; 3169 ip_xmit_attr_t *oldixa; 3170 int error; 3171 boolean_t need_connect; 3172 in6_addr_t v6dst; 3173 3174 ASSERT(mp->b_cont == NULL); /* Verified by iptun_output */ 3175 3176 /* Make sure we set ipha_dst before we look at ipha_dst */ 3177 3178 (void) iptun_find_headers(mp, 0, &outer4, &inner4, &outer6, &inner6); 3179 ASSERT(outer4 != NULL); 3180 if (!iptun_out_process_6to4(iptun, outer4, inner6)) { 3181 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3182 return; 3183 } 3184 3185 if (is_system_labeled()) { 3186 /* 3187 * Since the label can be different meaning a potentially 3188 * different IRE,we always use a unique ip_xmit_attr_t. 3189 */ 3190 ixa = conn_get_ixa_exclusive(connp); 3191 } else { 3192 /* 3193 * If no other thread is using conn_ixa this just gets a 3194 * reference to conn_ixa. Otherwise we get a safe copy of 3195 * conn_ixa. 3196 */ 3197 ixa = conn_get_ixa(connp, B_FALSE); 3198 } 3199 if (ixa == NULL) { 3200 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3201 return; 3202 } 3203 3204 mutex_enter(&connp->conn_lock); 3205 if (connp->conn_v4lastdst == outer4->ipha_dst) { 3206 need_connect = (ixa->ixa_ire == NULL); 3207 } else { 3208 /* In case previous destination was multirt */ 3209 ip_attr_newdst(ixa); 3210 3211 /* 3212 * We later update conn_ixa when we update conn_v4lastdst 3213 * which enables subsequent packets to avoid redoing 3214 * ip_attr_connect 3215 */ 3216 need_connect = B_TRUE; 3217 } 3218 mutex_exit(&connp->conn_lock); 3219 3220 /* 3221 * In case we got a safe copy of conn_ixa, or otherwise we don't 3222 * have a current ixa_ire, then we need to fill in any pointers in 3223 * the ixa. 3224 */ 3225 if (need_connect) { 3226 IN6_IPADDR_TO_V4MAPPED(outer4->ipha_dst, &v6dst); 3227 3228 /* We handle IPsec in iptun_output_common */ 3229 error = ip_attr_connect(connp, ixa, &connp->conn_saddr_v6, 3230 &v6dst, &v6dst, 0, NULL, NULL, 0); 3231 if (error != 0) { 3232 if (ixa->ixa_ire != NULL && 3233 (error == EHOSTUNREACH || error == ENETUNREACH)) { 3234 /* 3235 * Let conn_ip_output/ire_send_noroute return 3236 * the error and send any local ICMP error. 3237 */ 3238 error = 0; 3239 } else { 3240 ixa_refrele(ixa); 3241 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3242 return; 3243 } 3244 } 3245 } 3246 3247 iptun_output_common(iptun, ixa, mp); 3248 3249 /* Atomically replace conn_ixa and conn_v4lastdst */ 3250 mutex_enter(&connp->conn_lock); 3251 if (connp->conn_v4lastdst != outer4->ipha_dst) { 3252 /* Remember the dst which corresponds to conn_ixa */ 3253 connp->conn_v6lastdst = v6dst; 3254 oldixa = conn_replace_ixa(connp, ixa); 3255 } else { 3256 oldixa = NULL; 3257 } 3258 mutex_exit(&connp->conn_lock); 3259 ixa_refrele(ixa); 3260 if (oldixa != NULL) 3261 ixa_refrele(oldixa); 3262 } 3263 3264 /* 3265 * Check the destination/label. Modifies *mpp by adding/removing CIPSO. 3266 * 3267 * We get the label from the message in order to honor the 3268 * ULPs/IPs choice of label. This will be NULL for forwarded 3269 * packets, neighbor discovery packets and some others. 3270 */ 3271 static int 3272 iptun_output_check_label(mblk_t **mpp, ip_xmit_attr_t *ixa) 3273 { 3274 cred_t *cr; 3275 int adjust; 3276 int iplen; 3277 int err; 3278 ts_label_t *effective_tsl = NULL; 3279 3280 3281 ASSERT(is_system_labeled()); 3282 3283 cr = msg_getcred(*mpp, NULL); 3284 if (cr == NULL) 3285 return (0); 3286 3287 /* 3288 * We need to start with a label based on the IP/ULP above us 3289 */ 3290 ip_xmit_attr_restore_tsl(ixa, cr); 3291 3292 /* 3293 * Need to update packet with any CIPSO option since 3294 * conn_ip_output doesn't do that. 3295 */ 3296 if (ixa->ixa_flags & IXAF_IS_IPV4) { 3297 ipha_t *ipha; 3298 3299 ipha = (ipha_t *)(*mpp)->b_rptr; 3300 iplen = ntohs(ipha->ipha_length); 3301 err = tsol_check_label_v4(ixa->ixa_tsl, 3302 ixa->ixa_zoneid, mpp, CONN_MAC_DEFAULT, B_FALSE, 3303 ixa->ixa_ipst, &effective_tsl); 3304 if (err != 0) 3305 return (err); 3306 3307 ipha = (ipha_t *)(*mpp)->b_rptr; 3308 adjust = (int)ntohs(ipha->ipha_length) - iplen; 3309 } else { 3310 ip6_t *ip6h; 3311 3312 ip6h = (ip6_t *)(*mpp)->b_rptr; 3313 iplen = ntohs(ip6h->ip6_plen); 3314 3315 err = tsol_check_label_v6(ixa->ixa_tsl, 3316 ixa->ixa_zoneid, mpp, CONN_MAC_DEFAULT, B_FALSE, 3317 ixa->ixa_ipst, &effective_tsl); 3318 if (err != 0) 3319 return (err); 3320 3321 ip6h = (ip6_t *)(*mpp)->b_rptr; 3322 adjust = (int)ntohs(ip6h->ip6_plen) - iplen; 3323 } 3324 3325 if (effective_tsl != NULL) { 3326 /* Update the label */ 3327 ip_xmit_attr_replace_tsl(ixa, effective_tsl); 3328 } 3329 ixa->ixa_pktlen += adjust; 3330 ixa->ixa_ip_hdr_length += adjust; 3331 return (0); 3332 } 3333 3334 3335 static void 3336 iptun_output_common(iptun_t *iptun, ip_xmit_attr_t *ixa, mblk_t *mp) 3337 { 3338 ipsec_tun_pol_t *itp = iptun->iptun_itp; 3339 int outer_hlen; 3340 mblk_t *newmp; 3341 ipha_t *outer4, *inner4; 3342 ip6_t *outer6, *inner6; 3343 int error; 3344 boolean_t update_pktlen; 3345 3346 ASSERT(ixa->ixa_ire != NULL); 3347 3348 outer_hlen = iptun_find_headers(mp, 0, &outer4, &inner4, &outer6, 3349 &inner6); 3350 if (outer_hlen == 0) { 3351 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3352 return; 3353 } 3354 3355 /* Save IXAF_DONTFRAG value */ 3356 iaflags_t dontfrag = ixa->ixa_flags & IXAF_DONTFRAG; 3357 3358 /* Perform header processing. */ 3359 if (outer4 != NULL) { 3360 mp = iptun_out_process_ipv4(iptun, mp, outer4, inner4, inner6, 3361 ixa); 3362 } else { 3363 mp = iptun_out_process_ipv6(iptun, mp, outer6, inner4, inner6, 3364 ixa); 3365 } 3366 if (mp == NULL) 3367 return; 3368 3369 /* 3370 * Let's hope the compiler optimizes this with "branch taken". 3371 */ 3372 if (itp != NULL && (itp->itp_flags & ITPF_P_ACTIVE)) { 3373 /* This updates the ip_xmit_attr_t */ 3374 mp = ipsec_tun_outbound(mp, iptun, inner4, inner6, outer4, 3375 outer6, outer_hlen, ixa); 3376 if (mp == NULL) { 3377 atomic_inc_64(&iptun->iptun_oerrors); 3378 return; 3379 } 3380 if (is_system_labeled()) { 3381 /* 3382 * Might change the packet by adding/removing CIPSO. 3383 * After this caller inner* and outer* and outer_hlen 3384 * might be invalid. 3385 */ 3386 error = iptun_output_check_label(&mp, ixa); 3387 if (error != 0) { 3388 ip2dbg(("label check failed (%d)\n", error)); 3389 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3390 return; 3391 } 3392 } 3393 3394 /* 3395 * ipsec_tun_outbound() returns a chain of tunneled IP 3396 * fragments linked with b_next (or a single message if the 3397 * tunneled packet wasn't a fragment). 3398 * If fragcache returned a list then we need to update 3399 * ixa_pktlen for all packets in the list. 3400 */ 3401 update_pktlen = (mp->b_next != NULL); 3402 3403 /* 3404 * Otherwise, we're good to go. The ixa has been updated with 3405 * instructions for outbound IPsec processing. 3406 */ 3407 for (newmp = mp; newmp != NULL; newmp = mp) { 3408 size_t minmtu = iptun->iptun_typeinfo->iti_minmtu; 3409 3410 atomic_inc_64(&iptun->iptun_opackets); 3411 atomic_add_64(&iptun->iptun_obytes, ixa->ixa_pktlen); 3412 mp = mp->b_next; 3413 newmp->b_next = NULL; 3414 3415 /* 3416 * The IXAF_DONTFRAG flag is global, but there is 3417 * a chain here. Check if we're really already 3418 * smaller than the minimum allowed MTU and reset here 3419 * appropriately. Otherwise one small packet can kill 3420 * the whole chain's path mtu discovery. 3421 * In addition, update the pktlen to the length of 3422 * the actual packet being processed. 3423 */ 3424 if (update_pktlen) { 3425 ixa->ixa_pktlen = msgdsize(newmp); 3426 if (ixa->ixa_pktlen <= minmtu) 3427 ixa->ixa_flags &= ~IXAF_DONTFRAG; 3428 } 3429 3430 atomic_inc_64(&iptun->iptun_opackets); 3431 atomic_add_64(&iptun->iptun_obytes, ixa->ixa_pktlen); 3432 3433 error = conn_ip_output(newmp, ixa); 3434 3435 /* Restore IXAF_DONTFRAG value */ 3436 ixa->ixa_flags |= dontfrag; 3437 3438 if (error == EMSGSIZE) { 3439 /* IPsec policy might have changed */ 3440 (void) iptun_update_mtu(iptun, ixa, 0); 3441 } 3442 } 3443 } else { 3444 /* 3445 * The ip module will potentially apply global policy to the 3446 * packet in its output path if there's no active tunnel 3447 * policy. 3448 */ 3449 ASSERT(ixa->ixa_ipsec_policy == NULL); 3450 mp = ip_output_attach_policy(mp, outer4, outer6, NULL, ixa); 3451 if (mp == NULL) { 3452 atomic_inc_64(&iptun->iptun_oerrors); 3453 return; 3454 } 3455 if (is_system_labeled()) { 3456 /* 3457 * Might change the packet by adding/removing CIPSO. 3458 * After this caller inner* and outer* and outer_hlen 3459 * might be invalid. 3460 */ 3461 error = iptun_output_check_label(&mp, ixa); 3462 if (error != 0) { 3463 ip2dbg(("label check failed (%d)\n", error)); 3464 iptun_drop_pkt(mp, &iptun->iptun_oerrors); 3465 return; 3466 } 3467 } 3468 3469 atomic_inc_64(&iptun->iptun_opackets); 3470 atomic_add_64(&iptun->iptun_obytes, ixa->ixa_pktlen); 3471 3472 error = conn_ip_output(mp, ixa); 3473 if (error == EMSGSIZE) { 3474 /* IPsec policy might have changed */ 3475 (void) iptun_update_mtu(iptun, ixa, 0); 3476 } 3477 } 3478 if (ixa->ixa_flags & IXAF_IPSEC_SECURE) 3479 ipsec_out_release_refs(ixa); 3480 } 3481 3482 static mac_callbacks_t iptun_m_callbacks = { 3483 .mc_callbacks = (MC_SETPROP | MC_GETPROP | MC_PROPINFO), 3484 .mc_getstat = iptun_m_getstat, 3485 .mc_start = iptun_m_start, 3486 .mc_stop = iptun_m_stop, 3487 .mc_setpromisc = iptun_m_setpromisc, 3488 .mc_multicst = iptun_m_multicst, 3489 .mc_unicst = iptun_m_unicst, 3490 .mc_tx = iptun_m_tx, 3491 .mc_reserved = NULL, 3492 .mc_setprop = iptun_m_setprop, 3493 .mc_getprop = iptun_m_getprop, 3494 .mc_propinfo = iptun_m_propinfo 3495 }; 3496