1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 /* 12 * Copyright (c) 2012, Joyent, Inc. All rights reserved. 13 */ 14 15 /* 16 * ipd: Internet packet disturber 17 * 18 * The purpose of ipd is to simulate congested and lossy networks when they 19 * don't actually exist. The features of these congested and lossy networks are 20 * events that end up leading to retransmits and thus kicking us out of the 21 * TCP/IP fastpath. Since normally this would require us to have an actually 22 * congested network, which can be problematic, we instead simulate this 23 * behavior. 24 * 25 * 1. ipd's operations and restrictions 26 * 27 * ipd currently has facilities to cause IP traffic to be: 28 * 29 * - Corrupted with some probability. 30 * - Delayed for a set number of microseconds. 31 * - Dropped with some probability. 32 * 33 * Each of these features are enabled on a per-zone basic. The current 34 * implementation restricts this specifically to exclusive stack zones. 35 * Enabling ipd on a given zone causes pfhooks to be installed for that zone's 36 * netstack. Because of the nature of ipd, it currently only supports exclusive 37 * stack zones and as a further restriction, it only allows the global zone 38 * administrative access. ipd can be enabled for the global zone, but doing so 39 * will cause all shared-stack zones to also be affected. 40 * 41 * 2. General architecture and Locking 42 * 43 * ipd consists of a few components. There is a per netstack data structure that 44 * is created and destroyed with the creation and destruction of each exclusive 45 * stack zone. Each of these netstacks is stored in a global list which is 46 * accessed for control of ipd via ioctls. The following diagram touches on the 47 * data structures that are used throughout ipd. 48 * 49 * ADMINISTRATIVE DATA PATH 50 * 51 * +--------+ +------+ +------+ 52 * | ipdadm | | ip | | nics | 53 * +--------+ +------+ +------+ 54 * | ^ | | 55 * | | ioctl(2) | | 56 * V | V V 57 * +----------+ +-------------------------+ 58 * | /dev/ipd | | pfhooks packet callback | == ipd_hook() 59 * +----------+ +-------------------------+ 60 * | | 61 * | | 62 * V | 63 * +----------------+ | 64 * | list_t ipd_nsl |------+ | 65 * +----------------+ | | 66 * | | 67 * V per netstack V 68 * +----------------------------+ 69 * | ipd_nestack_t | 70 * +----------------------------+ 71 * 72 * ipd has two different entry points, one is administrative, the other is the 73 * data path. The administrative path is accessed by a userland component called 74 * ipdadm(1M). It communicates to the kernel component via ioctls to /dev/ipd. 75 * If the administrative path enables a specific zone, then the data path will 76 * become active for that zone. Any packet that leaves that zone's IP stack or 77 * is going to enter it, comes through the callback specified in the hook_t(9S) 78 * structure. This will cause each packet to go through ipd_hook(). 79 * 80 * While the locking inside of ipd should be straightforward, unfortunately, the 81 * pfhooks subsystem necessarily complicates this a little bit. There are 82 * currently three different sets of locks in ipd. 83 * 84 * - Global lock N on the netstack list. 85 * - Global lock A on the active count. 86 * - Per-netstack data structure lock Z. 87 * 88 * # Locking rules 89 * 90 * L.1a N must always be acquired first and released last 91 * 92 * If you need to acquire the netstack list lock, either for reading or writing, 93 * then N must be acquired first and before any other locks. It may not be 94 * dropped before any other lock. 95 * 96 * L.1b N must only be acquired from the administrative path and zone creation, 97 * shutdown, and destruct callbacks. 98 * 99 * The data path, e.g. receiving the per-packet callbacks, should never be 100 * grabbing the list lock. If it is, then the architecture here needs to be 101 * reconsidered. 102 * 103 * L.2 Z cannot be held across calls to the pfhooks subsystem if packet hooks 104 * are active. 105 * 106 * The way the pfhooks subsystem is designed is that a reference count is 107 * present on the hook_t while it is active. As long as that reference count is 108 * non-zero, a call to net_hook_unregister will block until it is lowered. 109 * Because the callbacks want the same lock for the netstack that is held by the 110 * administrative path calling into net_hook_unregister, we deadlock. 111 * 112 * ioctl from ipdadm remove hook_t cb (from nic) hook_t cb (from IP) 113 * ----------------------- -------------------- ------------------- 114 * | | | 115 * | bump hook_t refcount | 116 * mutex_enter(ipd_nsl_lock); enter ipd_hook() bump hook_t refcount 117 * mutex acquired mutex_enter(ins->ipdn_lock); | 118 * | mutex acquired enter ipd_hook() 119 * mutex_enter(ins->ipdn_lock); | mutex_enter(ins->ipdn_lock); 120 * | | | 121 * | | | 122 * | mutex_exit(ins->ipdn_lock); | 123 * | | | 124 * mutex acquired leave ipd_hook() | 125 * | decrement hook_t refcount | 126 * | | | 127 * ipd_teardown_hooks() | | 128 * net_hook_unregister() | | 129 * cv_wait() if recount | | 130 * | | | 131 * --------------------------------------------------------------------------- 132 * 133 * At this point, we can see that the second hook callback still doesn't have 134 * the mutex, but it has bumped the hook_t refcount. However, it will never 135 * acquire the mutex that it needs to finish its operation and decrement the 136 * refcount. 137 * 138 * Obviously, deadlocking is not acceptable, thus the following corollary to the 139 * second locking rule: 140 * 141 * L.2 Corollary: If Z is being released across a call to the pfhooks subsystem, 142 * N must be held. 143 * 144 * There is currently only one path where we have to worry about this. That is 145 * when we are removing a hook, but the zone is not being shutdown, then hooks 146 * are currently active. The only place that this currently happens is in 147 * ipd_check_hooks(). 148 * 149 */ 150 151 #include <sys/types.h> 152 #include <sys/file.h> 153 #include <sys/errno.h> 154 #include <sys/open.h> 155 #include <sys/cred.h> 156 #include <sys/ddi.h> 157 #include <sys/sunddi.h> 158 #include <sys/kmem.h> 159 #include <sys/conf.h> 160 #include <sys/stat.h> 161 #include <sys/cmn_err.h> 162 #include <sys/ddi.h> 163 #include <sys/sunddi.h> 164 #include <sys/modctl.h> 165 #include <sys/kstat.h> 166 #include <sys/neti.h> 167 #include <sys/list.h> 168 #include <sys/ksynch.h> 169 #include <sys/sysmacros.h> 170 #include <sys/policy.h> 171 #include <sys/atomic.h> 172 #include <sys/model.h> 173 #include <sys/strsun.h> 174 175 #include <sys/netstack.h> 176 #include <sys/hook.h> 177 #include <sys/hook_event.h> 178 179 #include <sys/ipd.h> 180 181 #define IPDN_STATUS_DISABLED 0x1 182 #define IPDN_STATUS_ENABLED 0x2 183 #define IPDN_STATUS_CONDEMNED 0x4 184 185 /* 186 * These flags are used to determine whether or not the hooks are registered. 187 */ 188 #define IPDN_HOOK_NONE 0x0 189 #define IPDN_HOOK_V4IN 0x1 190 #define IPDN_HOOK_V4OUT 0x2 191 #define IPDN_HOOK_V6IN 0x4 192 #define IPDN_HOOK_V6OUT 0x8 193 #define IPDN_HOOK_ALL 0xf 194 195 /* 196 * Per-netstack kstats. 197 */ 198 typedef struct ipd_nskstat { 199 kstat_named_t ink_ndrops; 200 kstat_named_t ink_ncorrupts; 201 kstat_named_t ink_ndelays; 202 } ipd_nskstat_t; 203 204 /* 205 * Different parts of this structure have different locking semantics. The list 206 * node is not normally referenced, if it is, one has to hold the ipd_nsl_lock. 207 * The following members are read only: ipdn_netid and ipdn_zoneid. The members 208 * of the kstat structure are always accessible in the data path, but the 209 * counters must be bumped with atomic operations. The ipdn_lock protects every 210 * other aspect of this structure. Please see the big theory statement on the 211 * requirements for lock ordering. 212 */ 213 typedef struct ipd_netstack { 214 list_node_t ipdn_link; /* link on ipd_nsl */ 215 netid_t ipdn_netid; /* netstack id */ 216 zoneid_t ipdn_zoneid; /* zone id */ 217 kstat_t *ipdn_kstat; /* kstat_t ptr */ 218 ipd_nskstat_t ipdn_ksdata; /* kstat data */ 219 kmutex_t ipdn_lock; /* protects following members */ 220 int ipdn_status; /* status flags */ 221 net_handle_t ipdn_v4hdl; /* IPv4 net handle */ 222 net_handle_t ipdn_v6hdl; /* IPv4 net handle */ 223 int ipdn_hooked; /* are hooks registered */ 224 hook_t *ipdn_v4in; /* IPv4 traffic in hook */ 225 hook_t *ipdn_v4out; /* IPv4 traffice out hook */ 226 hook_t *ipdn_v6in; /* IPv6 traffic in hook */ 227 hook_t *ipdn_v6out; /* IPv6 traffic out hook */ 228 int ipdn_enabled; /* which perturbs are on */ 229 int ipdn_corrupt; /* corrupt percentage */ 230 int ipdn_drop; /* drop percentage */ 231 uint_t ipdn_delay; /* delay us */ 232 long ipdn_rand; /* random seed */ 233 } ipd_netstack_t; 234 235 /* 236 * ipd internal variables 237 */ 238 static dev_info_t *ipd_devi; /* device info */ 239 static net_instance_t *ipd_neti; /* net_instance for hooks */ 240 static unsigned int ipd_max_delay = IPD_MAX_DELAY; /* max delay in us */ 241 static kmutex_t ipd_nsl_lock; /* lock for the nestack list */ 242 static list_t ipd_nsl; /* list of netstacks */ 243 static kmutex_t ipd_nactive_lock; /* lock for nactive */ 244 static unsigned int ipd_nactive; /* number of active netstacks */ 245 static int ipd_nactive_fudge = 4; /* amount to fudge by in list */ 246 247 /* 248 * Note that this random number implementation is based upon the old BSD 4.1 249 * rand. It's good enough for us! 250 */ 251 static int 252 ipd_nextrand(ipd_netstack_t *ins) 253 { 254 ins->ipdn_rand = ins->ipdn_rand * 1103515245L + 12345; 255 return (ins->ipdn_rand & 0x7fffffff); 256 } 257 258 static void 259 ipd_ksbump(kstat_named_t *nkp) 260 { 261 atomic_inc_64(&nkp->value.ui64); 262 } 263 264 /* 265 * This is where all the magic actually happens. The way that this works is we 266 * grab the ins lock to basically get a copy of all the data that we need to do 267 * our job and then let it go to minimize contention. In terms of actual work on 268 * the packet we do them in the following order: 269 * 270 * - drop 271 * - delay 272 * - corrupt 273 */ 274 /*ARGSUSED*/ 275 static int 276 ipd_hook(hook_event_token_t event, hook_data_t data, void *arg) 277 { 278 unsigned char *crp; 279 int dwait, corrupt, drop, rand, off, status; 280 mblk_t *mbp; 281 ipd_netstack_t *ins = arg; 282 hook_pkt_event_t *pkt = (hook_pkt_event_t *)data; 283 284 mutex_enter(&ins->ipdn_lock); 285 status = ins->ipdn_status; 286 dwait = ins->ipdn_delay; 287 corrupt = ins->ipdn_corrupt; 288 drop = ins->ipdn_drop; 289 rand = ipd_nextrand(ins); 290 mutex_exit(&ins->ipdn_lock); 291 292 /* 293 * This probably cannot happen, but we'll do an extra guard just in 294 * case. 295 */ 296 if (status & IPDN_STATUS_CONDEMNED) 297 return (0); 298 299 if (drop != 0 && rand % 100 < drop) { 300 freemsg(*pkt->hpe_mp); 301 *pkt->hpe_mp = NULL; 302 pkt->hpe_mb = NULL; 303 pkt->hpe_hdr = NULL; 304 ipd_ksbump(&ins->ipdn_ksdata.ink_ndrops); 305 306 return (1); 307 } 308 309 if (dwait != 0) { 310 if (dwait < TICK_TO_USEC(1)) 311 drv_usecwait(dwait); 312 else 313 delay(drv_usectohz(dwait)); 314 ipd_ksbump(&ins->ipdn_ksdata.ink_ndelays); 315 } 316 317 if (corrupt != 0 && rand % 100 < corrupt) { 318 /* 319 * Since we're corrupting the mblk, just corrupt everything in 320 * the chain. While we could corrupt the entire packet, that's a 321 * little strong. Instead we're going to just change one of the 322 * bytes in each mblock. 323 */ 324 mbp = *pkt->hpe_mp; 325 while (mbp != NULL) { 326 if (mbp->b_wptr == mbp->b_rptr) 327 continue; 328 329 /* 330 * While pfhooks probably won't send us anything else, 331 * let's just be extra careful. The stack probably isn't 332 * as resiliant to corruption of control messages. 333 */ 334 if (DB_TYPE(mbp) != M_DATA) 335 continue; 336 337 off = rand % ((uintptr_t)mbp->b_wptr - 338 (uintptr_t)mbp->b_rptr); 339 crp = mbp->b_rptr + off; 340 off = rand % 8; 341 *crp = *crp ^ (1 << off); 342 343 mbp = mbp->b_cont; 344 } 345 ipd_ksbump(&ins->ipdn_ksdata.ink_ncorrupts); 346 } 347 348 return (0); 349 } 350 351 /* 352 * Sets up and registers all the proper hooks needed for the netstack to capture 353 * packets. Callers are assumed to already be holding the ipd_netstack_t's lock. 354 * If there is a failure in setting something up, it is the responsibility of 355 * this function to clean it up. Once this function has been called, it should 356 * not be called until a corresponding call to tear down the hooks has been 357 * done. 358 */ 359 static int 360 ipd_setup_hooks(ipd_netstack_t *ins) 361 { 362 ASSERT(MUTEX_HELD(&ins->ipdn_lock)); 363 ins->ipdn_v4hdl = net_protocol_lookup(ins->ipdn_netid, NHF_INET); 364 if (ins->ipdn_v4hdl == NULL) 365 goto cleanup; 366 367 ins->ipdn_v6hdl = net_protocol_lookup(ins->ipdn_netid, NHF_INET6); 368 if (ins->ipdn_v6hdl == NULL) 369 goto cleanup; 370 371 ins->ipdn_v4in = hook_alloc(HOOK_VERSION); 372 if (ins->ipdn_v4in == NULL) 373 goto cleanup; 374 375 ins->ipdn_v4in->h_flags = 0; 376 ins->ipdn_v4in->h_hint = HH_NONE; 377 ins->ipdn_v4in->h_hintvalue = 0; 378 ins->ipdn_v4in->h_func = ipd_hook; 379 ins->ipdn_v4in->h_arg = ins; 380 ins->ipdn_v4in->h_name = "ipd IPv4 in"; 381 382 if (net_hook_register(ins->ipdn_v4hdl, NH_PHYSICAL_IN, 383 ins->ipdn_v4in) != 0) 384 goto cleanup; 385 ins->ipdn_hooked |= IPDN_HOOK_V4IN; 386 387 ins->ipdn_v4out = hook_alloc(HOOK_VERSION); 388 if (ins->ipdn_v4out == NULL) 389 goto cleanup; 390 ins->ipdn_v4out->h_flags = 0; 391 ins->ipdn_v4out->h_hint = HH_NONE; 392 ins->ipdn_v4out->h_hintvalue = 0; 393 ins->ipdn_v4out->h_func = ipd_hook; 394 ins->ipdn_v4out->h_arg = ins; 395 ins->ipdn_v4out->h_name = "ipd IPv4 out"; 396 397 if (net_hook_register(ins->ipdn_v4hdl, NH_PHYSICAL_OUT, 398 ins->ipdn_v4out) != 0) 399 goto cleanup; 400 ins->ipdn_hooked |= IPDN_HOOK_V4OUT; 401 402 ins->ipdn_v6in = hook_alloc(HOOK_VERSION); 403 if (ins->ipdn_v6in == NULL) 404 goto cleanup; 405 ins->ipdn_v6in->h_flags = 0; 406 ins->ipdn_v6in->h_hint = HH_NONE; 407 ins->ipdn_v6in->h_hintvalue = 0; 408 ins->ipdn_v6in->h_func = ipd_hook; 409 ins->ipdn_v6in->h_arg = ins; 410 ins->ipdn_v6in->h_name = "ipd IPv6 in"; 411 412 if (net_hook_register(ins->ipdn_v6hdl, NH_PHYSICAL_IN, 413 ins->ipdn_v6in) != 0) 414 goto cleanup; 415 ins->ipdn_hooked |= IPDN_HOOK_V6IN; 416 417 ins->ipdn_v6out = hook_alloc(HOOK_VERSION); 418 if (ins->ipdn_v6out == NULL) 419 goto cleanup; 420 ins->ipdn_v6out->h_flags = 0; 421 ins->ipdn_v6out->h_hint = HH_NONE; 422 ins->ipdn_v6out->h_hintvalue = 0; 423 ins->ipdn_v6out->h_func = ipd_hook; 424 ins->ipdn_v6out->h_arg = ins; 425 ins->ipdn_v6out->h_name = "ipd IPv6 out"; 426 427 if (net_hook_register(ins->ipdn_v6hdl, NH_PHYSICAL_OUT, 428 ins->ipdn_v6out) != 0) 429 goto cleanup; 430 ins->ipdn_hooked |= IPDN_HOOK_V6OUT; 431 mutex_enter(&ipd_nactive_lock); 432 ipd_nactive++; 433 mutex_exit(&ipd_nactive_lock); 434 435 return (0); 436 437 cleanup: 438 if (ins->ipdn_hooked & IPDN_HOOK_V6OUT) 439 (void) net_hook_unregister(ins->ipdn_v6hdl, NH_PHYSICAL_OUT, 440 ins->ipdn_v6out); 441 442 if (ins->ipdn_hooked & IPDN_HOOK_V6IN) 443 (void) net_hook_unregister(ins->ipdn_v6hdl, NH_PHYSICAL_IN, 444 ins->ipdn_v6in); 445 446 if (ins->ipdn_hooked & IPDN_HOOK_V4OUT) 447 (void) net_hook_unregister(ins->ipdn_v4hdl, NH_PHYSICAL_OUT, 448 ins->ipdn_v4out); 449 450 if (ins->ipdn_hooked & IPDN_HOOK_V4IN) 451 (void) net_hook_unregister(ins->ipdn_v4hdl, NH_PHYSICAL_IN, 452 ins->ipdn_v4in); 453 454 ins->ipdn_hooked = IPDN_HOOK_NONE; 455 456 if (ins->ipdn_v6out != NULL) 457 hook_free(ins->ipdn_v6out); 458 459 if (ins->ipdn_v6in != NULL) 460 hook_free(ins->ipdn_v6in); 461 462 if (ins->ipdn_v4out != NULL) 463 hook_free(ins->ipdn_v4out); 464 465 if (ins->ipdn_v4in != NULL) 466 hook_free(ins->ipdn_v4in); 467 468 if (ins->ipdn_v6hdl != NULL) 469 (void) net_protocol_release(ins->ipdn_v6hdl); 470 471 if (ins->ipdn_v4hdl != NULL) 472 (void) net_protocol_release(ins->ipdn_v4hdl); 473 474 return (1); 475 } 476 477 static void 478 ipd_teardown_hooks(ipd_netstack_t *ins) 479 { 480 ASSERT(ins->ipdn_hooked == IPDN_HOOK_ALL); 481 VERIFY(net_hook_unregister(ins->ipdn_v6hdl, NH_PHYSICAL_OUT, 482 ins->ipdn_v6out) == 0); 483 VERIFY(net_hook_unregister(ins->ipdn_v6hdl, NH_PHYSICAL_IN, 484 ins->ipdn_v6in) == 0); 485 VERIFY(net_hook_unregister(ins->ipdn_v4hdl, NH_PHYSICAL_OUT, 486 ins->ipdn_v4out) == 0); 487 VERIFY(net_hook_unregister(ins->ipdn_v4hdl, NH_PHYSICAL_IN, 488 ins->ipdn_v4in) == 0); 489 490 ins->ipdn_hooked = IPDN_HOOK_NONE; 491 492 hook_free(ins->ipdn_v6out); 493 hook_free(ins->ipdn_v6in); 494 hook_free(ins->ipdn_v4out); 495 hook_free(ins->ipdn_v4in); 496 497 VERIFY(net_protocol_release(ins->ipdn_v6hdl) == 0); 498 VERIFY(net_protocol_release(ins->ipdn_v4hdl) == 0); 499 500 mutex_enter(&ipd_nactive_lock); 501 ipd_nactive--; 502 mutex_exit(&ipd_nactive_lock); 503 } 504 505 static int 506 ipd_check_hooks(ipd_netstack_t *ins, int type, boolean_t enable) 507 { 508 int olden, rval; 509 olden = ins->ipdn_enabled; 510 511 if (enable) 512 ins->ipdn_enabled |= type; 513 else 514 ins->ipdn_enabled &= ~type; 515 516 /* 517 * If hooks were previously enabled. 518 */ 519 if (olden == 0 && ins->ipdn_enabled != 0) { 520 rval = ipd_setup_hooks(ins); 521 if (rval != 0) { 522 ins->ipdn_enabled &= ~type; 523 ASSERT(ins->ipdn_enabled == 0); 524 return (rval); 525 } 526 527 return (0); 528 } 529 530 if (olden != 0 && ins->ipdn_enabled == 0) { 531 ASSERT(olden != 0); 532 533 /* 534 * We have to drop the lock here, lest we cause a deadlock. 535 * Unfortunately, there may be hooks that are running and are 536 * actively in flight and we have to call the unregister 537 * function. Due to the hooks framework, if there is an inflight 538 * hook (most likely right now), and we are holding the 539 * netstack's lock, those hooks will never return. This is 540 * unfortunate. 541 * 542 * Because we only come into this path holding the list lock, we 543 * know that only way that someone else can come in and get to 544 * this structure is via the hook callbacks which are going to 545 * only be doing reads. They'll also see that everything has 546 * been disabled and return. So while this is unfortunate, it 547 * should be relatively safe. 548 */ 549 mutex_exit(&ins->ipdn_lock); 550 ipd_teardown_hooks(ins); 551 mutex_enter(&ins->ipdn_lock); 552 return (0); 553 } 554 555 /* 556 * Othwerise, nothing should have changed here. 557 */ 558 ASSERT((olden == 0) == (ins->ipdn_enabled == 0)); 559 return (0); 560 } 561 562 static int 563 ipd_toggle_corrupt(ipd_netstack_t *ins, int percent) 564 { 565 int rval; 566 567 ASSERT(MUTEX_HELD(&ins->ipdn_lock)); 568 569 if (percent < 0 || percent > 100) 570 return (ERANGE); 571 572 /* 573 * If we've been asked to set the value to a value that we already have, 574 * great, then we're done. 575 */ 576 if (percent == ins->ipdn_corrupt) 577 return (0); 578 579 ins->ipdn_corrupt = percent; 580 rval = ipd_check_hooks(ins, IPD_CORRUPT, percent != 0); 581 582 /* 583 * If ipd_check_hooks_failed, that must mean that we failed to set up 584 * the hooks, so we are going to effectively zero out and fail the 585 * request to enable corruption. 586 */ 587 if (rval != 0) 588 ins->ipdn_corrupt = 0; 589 590 return (rval); 591 } 592 593 static int 594 ipd_toggle_delay(ipd_netstack_t *ins, uint32_t delay) 595 { 596 int rval; 597 598 ASSERT(MUTEX_HELD(&ins->ipdn_lock)); 599 600 if (delay > ipd_max_delay) 601 return (ERANGE); 602 603 /* 604 * If we've been asked to set the value to a value that we already have, 605 * great, then we're done. 606 */ 607 if (delay == ins->ipdn_delay) 608 return (0); 609 610 ins->ipdn_delay = delay; 611 rval = ipd_check_hooks(ins, IPD_DELAY, delay != 0); 612 613 /* 614 * If ipd_check_hooks_failed, that must mean that we failed to set up 615 * the hooks, so we are going to effectively zero out and fail the 616 * request to enable corruption. 617 */ 618 if (rval != 0) 619 ins->ipdn_delay = 0; 620 621 return (rval); 622 } 623 static int 624 ipd_toggle_drop(ipd_netstack_t *ins, int percent) 625 { 626 int rval; 627 628 ASSERT(MUTEX_HELD(&ins->ipdn_lock)); 629 630 if (percent < 0 || percent > 100) 631 return (ERANGE); 632 633 /* 634 * If we've been asked to set the value to a value that we already have, 635 * great, then we're done. 636 */ 637 if (percent == ins->ipdn_drop) 638 return (0); 639 640 ins->ipdn_drop = percent; 641 rval = ipd_check_hooks(ins, IPD_DROP, percent != 0); 642 643 /* 644 * If ipd_check_hooks_failed, that must mean that we failed to set up 645 * the hooks, so we are going to effectively zero out and fail the 646 * request to enable corruption. 647 */ 648 if (rval != 0) 649 ins->ipdn_drop = 0; 650 651 return (rval); 652 } 653 654 static int 655 ipd_ioctl_perturb(ipd_ioc_perturb_t *ipi, cred_t *cr, intptr_t cmd) 656 { 657 zoneid_t zid; 658 ipd_netstack_t *ins; 659 int rval = 0; 660 661 /* 662 * If the zone that we're coming from is not the GZ, then we ignore it 663 * completely and then instead just set the zoneid to be that of the 664 * caller. If the zoneid is that of the GZ, then we don't touch this 665 * value. 666 */ 667 zid = crgetzoneid(cr); 668 if (zid != GLOBAL_ZONEID) 669 ipi->ipip_zoneid = zid; 670 671 if (zoneid_to_netstackid(ipi->ipip_zoneid) == GLOBAL_NETSTACKID && 672 zid != GLOBAL_ZONEID) 673 return (EPERM); 674 675 /* 676 * We need to hold the ipd_nsl_lock throughout the entire operation, 677 * otherwise someone else could come in and remove us from the list and 678 * free us, e.g. the netstack destroy handler. By holding the lock, we 679 * stop it from being able to do anything wrong. 680 */ 681 mutex_enter(&ipd_nsl_lock); 682 for (ins = list_head(&ipd_nsl); ins != NULL; 683 ins = list_next(&ipd_nsl, ins)) { 684 if (ins->ipdn_zoneid == ipi->ipip_zoneid) 685 break; 686 } 687 688 if (ins == NULL) { 689 mutex_exit(&ipd_nsl_lock); 690 return (EINVAL); 691 } 692 693 mutex_enter(&ins->ipdn_lock); 694 695 if (ins->ipdn_status & IPDN_STATUS_CONDEMNED) { 696 rval = ESHUTDOWN; 697 goto cleanup; 698 } 699 700 switch (cmd) { 701 case IPDIOC_CORRUPT: 702 rval = ipd_toggle_corrupt(ins, ipi->ipip_arg); 703 break; 704 case IPDIOC_DELAY: 705 rval = ipd_toggle_delay(ins, ipi->ipip_arg); 706 break; 707 case IPDIOC_DROP: 708 rval = ipd_toggle_drop(ins, ipi->ipip_arg); 709 break; 710 } 711 712 cleanup: 713 mutex_exit(&ins->ipdn_lock); 714 mutex_exit(&ipd_nsl_lock); 715 return (rval); 716 } 717 718 static int 719 ipd_ioctl_remove(ipd_ioc_perturb_t *ipi, cred_t *cr) 720 { 721 zoneid_t zid; 722 ipd_netstack_t *ins; 723 int rval = 0; 724 725 /* 726 * See ipd_ioctl_perturb for the rational here. 727 */ 728 zid = crgetzoneid(cr); 729 if (zid != GLOBAL_ZONEID) 730 ipi->ipip_zoneid = zid; 731 732 if (zoneid_to_netstackid(ipi->ipip_zoneid) == GLOBAL_NETSTACKID && 733 zid != GLOBAL_ZONEID) 734 return (EPERM); 735 736 mutex_enter(&ipd_nsl_lock); 737 for (ins = list_head(&ipd_nsl); ins != NULL; 738 ins = list_next(&ipd_nsl, ins)) { 739 if (ins->ipdn_zoneid == ipi->ipip_zoneid) 740 break; 741 } 742 743 if (ins == NULL) { 744 mutex_exit(&ipd_nsl_lock); 745 return (EINVAL); 746 } 747 748 mutex_enter(&ins->ipdn_lock); 749 750 /* 751 * If this is condemned, that means it's very shortly going to be torn 752 * down. In that case, there's no reason to actually do anything here, 753 * as it will all be done rather shortly in the destroy function. 754 * Furthermore, because condemned corresponds with it having hit 755 * shutdown, we know that no more packets can be received by this 756 * netstack. All this translates to a no-op. 757 */ 758 if (ins->ipdn_status & IPDN_STATUS_CONDEMNED) { 759 rval = 0; 760 goto cleanup; 761 } 762 763 rval = EINVAL; 764 /* 765 * Go through and disable the requested pieces. We can safely ignore the 766 * return value of ipd_check_hooks because the removal case should never 767 * fail, we verify that in the hook teardown case. 768 */ 769 if (ipi->ipip_arg & IPD_CORRUPT) { 770 ins->ipdn_corrupt = 0; 771 (void) ipd_check_hooks(ins, IPD_CORRUPT, B_FALSE); 772 rval = 0; 773 } 774 775 if (ipi->ipip_arg & IPD_DELAY) { 776 ins->ipdn_delay = 0; 777 (void) ipd_check_hooks(ins, IPD_DELAY, B_FALSE); 778 rval = 0; 779 } 780 781 if (ipi->ipip_arg & IPD_DROP) { 782 ins->ipdn_drop = 0; 783 (void) ipd_check_hooks(ins, IPD_DROP, B_FALSE); 784 rval = 0; 785 } 786 787 cleanup: 788 mutex_exit(&ins->ipdn_lock); 789 mutex_exit(&ipd_nsl_lock); 790 return (rval); 791 } 792 793 /* 794 * When this function is called, the value of the ipil_nzones argument controls 795 * how this function works. When called with a value of zero, then we treat that 796 * as the caller asking us what's a reasonable number of entries for me to 797 * allocate memory for. If the zone is the global zone, then we tell them how 798 * many folks are currently active and add a fudge factor. Otherwise the answer 799 * is always one. 800 * 801 * In the non-zero case, we give them that number of zone ids. While this isn't 802 * quite ideal as it might mean that someone misses something, this generally 803 * won't be an issue, as it involves a rather tight race condition in the 804 * current ipdadm implementation. 805 */ 806 static int 807 ipd_ioctl_list(intptr_t arg, cred_t *cr) 808 { 809 zoneid_t zid; 810 ipd_ioc_info_t *configs; 811 ipd_netstack_t *ins; 812 uint_t azones, rzones, nzones, cur; 813 int rval = 0; 814 STRUCT_DECL(ipd_ioc_list, h); 815 816 STRUCT_INIT(h, get_udatamodel()); 817 if (ddi_copyin((void *)arg, STRUCT_BUF(h), 818 STRUCT_SIZE(h), 0) != 0) 819 return (EFAULT); 820 821 zid = crgetzoneid(cr); 822 823 rzones = STRUCT_FGET(h, ipil_nzones); 824 if (rzones == 0) { 825 if (zid == GLOBAL_ZONEID) { 826 mutex_enter(&ipd_nactive_lock); 827 rzones = ipd_nactive + ipd_nactive_fudge; 828 mutex_exit(&ipd_nactive_lock); 829 } else { 830 rzones = 1; 831 } 832 STRUCT_FSET(h, ipil_nzones, rzones); 833 if (ddi_copyout(STRUCT_BUF(h), (void *)arg, 834 STRUCT_SIZE(h), 0) != 0) 835 return (EFAULT); 836 837 return (0); 838 } 839 840 mutex_enter(&ipd_nsl_lock); 841 if (zid == GLOBAL_ZONEID) { 842 azones = ipd_nactive; 843 } else { 844 azones = 1; 845 } 846 847 configs = kmem_alloc(sizeof (ipd_ioc_info_t) * azones, KM_SLEEP); 848 cur = 0; 849 for (ins = list_head(&ipd_nsl); ins != NULL; 850 ins = list_next(&ipd_nsl, ins)) { 851 if (ins->ipdn_enabled == 0) 852 continue; 853 854 ASSERT(cur < azones); 855 856 if (zid == GLOBAL_ZONEID || zid == ins->ipdn_zoneid) { 857 configs[cur].ipii_zoneid = ins->ipdn_zoneid; 858 859 mutex_enter(&ins->ipdn_lock); 860 configs[cur].ipii_corrupt = ins->ipdn_corrupt; 861 configs[cur].ipii_delay = ins->ipdn_delay; 862 configs[cur].ipii_drop = ins->ipdn_drop; 863 mutex_exit(&ins->ipdn_lock); 864 865 ++cur; 866 } 867 868 if (zid != GLOBAL_ZONEID && zid == ins->ipdn_zoneid) 869 break; 870 } 871 mutex_exit(&ipd_nsl_lock); 872 873 ASSERT(zid != GLOBAL_ZONEID || cur == azones); 874 875 if (cur == 0) 876 STRUCT_FSET(h, ipil_nzones, 0); 877 else 878 STRUCT_FSET(h, ipil_nzones, cur); 879 880 nzones = MIN(cur, rzones); 881 if (nzones > 0) { 882 if (ddi_copyout(configs, STRUCT_FGETP(h, ipil_info), 883 nzones * sizeof (ipd_ioc_info_t), 0) != 0) 884 rval = EFAULT; 885 } 886 887 kmem_free(configs, sizeof (ipd_ioc_info_t) * azones); 888 if (ddi_copyout(STRUCT_BUF(h), (void *)arg, STRUCT_SIZE(h), 0) != 0) 889 return (EFAULT); 890 891 return (rval); 892 } 893 894 static void * 895 ipd_nin_create(const netid_t id) 896 { 897 ipd_netstack_t *ins; 898 ipd_nskstat_t *ink; 899 900 ins = kmem_zalloc(sizeof (ipd_netstack_t), KM_SLEEP); 901 ins->ipdn_status = IPDN_STATUS_DISABLED; 902 ins->ipdn_netid = id; 903 ins->ipdn_zoneid = netstackid_to_zoneid(id); 904 ins->ipdn_rand = gethrtime(); 905 mutex_init(&ins->ipdn_lock, NULL, MUTEX_DRIVER, NULL); 906 907 ins->ipdn_kstat = net_kstat_create(id, "ipd", ins->ipdn_zoneid, 908 "ipd", "net", KSTAT_TYPE_NAMED, 909 sizeof (ipd_nskstat_t) / sizeof (kstat_named_t), 910 KSTAT_FLAG_VIRTUAL); 911 912 if (ins->ipdn_kstat != NULL) { 913 if (ins->ipdn_zoneid != GLOBAL_ZONEID) 914 kstat_zone_add(ins->ipdn_kstat, GLOBAL_ZONEID); 915 916 ink = &ins->ipdn_ksdata; 917 ins->ipdn_kstat->ks_data = ink; 918 kstat_named_init(&ink->ink_ncorrupts, "corrupts", 919 KSTAT_DATA_UINT64); 920 kstat_named_init(&ink->ink_ndrops, "drops", KSTAT_DATA_UINT64); 921 kstat_named_init(&ink->ink_ndelays, "delays", 922 KSTAT_DATA_UINT64); 923 kstat_install(ins->ipdn_kstat); 924 } 925 926 mutex_enter(&ipd_nsl_lock); 927 list_insert_tail(&ipd_nsl, ins); 928 mutex_exit(&ipd_nsl_lock); 929 930 return (ins); 931 } 932 933 static void 934 ipd_nin_shutdown(const netid_t id, void *arg) 935 { 936 ipd_netstack_t *ins = arg; 937 938 VERIFY(id == ins->ipdn_netid); 939 mutex_enter(&ins->ipdn_lock); 940 ASSERT(ins->ipdn_status == IPDN_STATUS_DISABLED || 941 ins->ipdn_status == IPDN_STATUS_ENABLED); 942 ins->ipdn_status |= IPDN_STATUS_CONDEMNED; 943 if (ins->ipdn_kstat != NULL) 944 net_kstat_delete(id, ins->ipdn_kstat); 945 mutex_exit(&ins->ipdn_lock); 946 } 947 948 /*ARGSUSED*/ 949 static void 950 ipd_nin_destroy(const netid_t id, void *arg) 951 { 952 ipd_netstack_t *ins = arg; 953 954 /* 955 * At this point none of the hooks should be able to fire because the 956 * zone has been shutdown and we are in the process of destroying it. 957 * Thus it should not be possible for someone else to come in and grab 958 * our ipd_netstack_t for this zone. Because of that, we know that we 959 * are the only ones who could be running here. 960 */ 961 mutex_enter(&ipd_nsl_lock); 962 list_remove(&ipd_nsl, ins); 963 mutex_exit(&ipd_nsl_lock); 964 965 if (ins->ipdn_hooked) 966 ipd_teardown_hooks(ins); 967 mutex_destroy(&ins->ipdn_lock); 968 kmem_free(ins, sizeof (ipd_netstack_t)); 969 } 970 971 /*ARGSUSED*/ 972 static int 973 ipd_open(dev_t *devp, int flag, int otype, cred_t *credp) 974 { 975 if (flag & FEXCL || flag & FNDELAY) 976 return (EINVAL); 977 978 if (otype != OTYP_CHR) 979 return (EINVAL); 980 981 if (!(flag & FREAD && flag & FWRITE)) 982 return (EINVAL); 983 984 if (secpolicy_ip_config(credp, B_FALSE) != 0) 985 return (EPERM); 986 987 return (0); 988 } 989 990 /*ARGSUSED*/ 991 static int 992 ipd_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) 993 { 994 int rval; 995 ipd_ioc_perturb_t ipip; 996 997 switch (cmd) { 998 case IPDIOC_CORRUPT: 999 case IPDIOC_DELAY: 1000 case IPDIOC_DROP: 1001 if (ddi_copyin((void *)arg, &ipip, sizeof (ipd_ioc_perturb_t), 1002 0) != 0) 1003 return (EFAULT); 1004 rval = ipd_ioctl_perturb(&ipip, cr, cmd); 1005 return (rval); 1006 case IPDIOC_REMOVE: 1007 if (ddi_copyin((void *)arg, &ipip, sizeof (ipd_ioc_perturb_t), 1008 0) != 0) 1009 return (EFAULT); 1010 rval = ipd_ioctl_remove(&ipip, cr); 1011 return (rval); 1012 case IPDIOC_LIST: 1013 /* 1014 * Because the list ioctl doesn't have a fixed-size struct due 1015 * to needing to pass around a pointer, we instead delegate the 1016 * copyin logic to the list code. 1017 */ 1018 return (ipd_ioctl_list(arg, cr)); 1019 default: 1020 break; 1021 } 1022 return (ENOTTY); 1023 } 1024 1025 /*ARGSUSED*/ 1026 static int 1027 ipd_close(dev_t dev, int flag, int otype, cred_t *credp) 1028 { 1029 return (0); 1030 } 1031 1032 static int 1033 ipd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 1034 { 1035 minor_t instance; 1036 1037 if (cmd != DDI_ATTACH) 1038 return (DDI_FAILURE); 1039 1040 if (ipd_devi != NULL) 1041 return (DDI_FAILURE); 1042 1043 instance = ddi_get_instance(dip); 1044 if (ddi_create_minor_node(dip, "ipd", S_IFCHR, instance, 1045 DDI_PSEUDO, 0) == DDI_FAILURE) 1046 return (DDI_FAILURE); 1047 1048 ipd_neti = net_instance_alloc(NETINFO_VERSION); 1049 if (ipd_neti == NULL) { 1050 ddi_remove_minor_node(dip, NULL); 1051 return (DDI_FAILURE); 1052 } 1053 1054 /* 1055 * Note that these global structures MUST be initialized before we call 1056 * net_instance_register, as that will instantly cause us to drive into 1057 * the ipd_nin_create callbacks. 1058 */ 1059 list_create(&ipd_nsl, sizeof (ipd_netstack_t), 1060 offsetof(ipd_netstack_t, ipdn_link)); 1061 mutex_init(&ipd_nsl_lock, NULL, MUTEX_DRIVER, NULL); 1062 mutex_init(&ipd_nactive_lock, NULL, MUTEX_DRIVER, NULL); 1063 1064 /* Note, net_instance_alloc sets the version. */ 1065 ipd_neti->nin_name = "ipd"; 1066 ipd_neti->nin_create = ipd_nin_create; 1067 ipd_neti->nin_destroy = ipd_nin_destroy; 1068 ipd_neti->nin_shutdown = ipd_nin_shutdown; 1069 if (net_instance_register(ipd_neti) == DDI_FAILURE) { 1070 net_instance_free(ipd_neti); 1071 ddi_remove_minor_node(dip, NULL); 1072 } 1073 1074 ddi_report_dev(dip); 1075 ipd_devi = dip; 1076 1077 return (DDI_SUCCESS); 1078 } 1079 1080 /*ARGSUSED*/ 1081 static int 1082 ipd_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 1083 { 1084 int error; 1085 1086 switch (infocmd) { 1087 case DDI_INFO_DEVT2DEVINFO: 1088 *result = ipd_devi; 1089 error = DDI_SUCCESS; 1090 break; 1091 case DDI_INFO_DEVT2INSTANCE: 1092 *result = (void *)(uintptr_t)getminor((dev_t)arg); 1093 error = DDI_SUCCESS; 1094 break; 1095 default: 1096 error = DDI_FAILURE; 1097 break; 1098 } 1099 1100 return (error); 1101 } 1102 1103 static int 1104 ipd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 1105 { 1106 if (cmd != DDI_DETACH) 1107 return (DDI_FAILURE); 1108 1109 mutex_enter(&ipd_nactive_lock); 1110 if (ipd_nactive > 0) { 1111 mutex_exit(&ipd_nactive_lock); 1112 return (EBUSY); 1113 } 1114 mutex_exit(&ipd_nactive_lock); 1115 ASSERT(dip == ipd_devi); 1116 ddi_remove_minor_node(dip, NULL); 1117 ipd_devi = NULL; 1118 1119 if (ipd_neti != NULL) { 1120 VERIFY(net_instance_unregister(ipd_neti) == 0); 1121 net_instance_free(ipd_neti); 1122 } 1123 1124 mutex_destroy(&ipd_nsl_lock); 1125 mutex_destroy(&ipd_nactive_lock); 1126 list_destroy(&ipd_nsl); 1127 1128 return (DDI_SUCCESS); 1129 } 1130 1131 static struct cb_ops ipd_cb_ops = { 1132 ipd_open, /* open */ 1133 ipd_close, /* close */ 1134 nodev, /* strategy */ 1135 nodev, /* print */ 1136 nodev, /* dump */ 1137 nodev, /* read */ 1138 nodev, /* write */ 1139 ipd_ioctl, /* ioctl */ 1140 nodev, /* devmap */ 1141 nodev, /* mmap */ 1142 nodev, /* segmap */ 1143 nochpoll, /* poll */ 1144 ddi_prop_op, /* cb_prop_op */ 1145 NULL, /* streamtab */ 1146 D_NEW | D_MP, /* Driver compatibility flag */ 1147 CB_REV, /* rev */ 1148 nodev, /* aread */ 1149 nodev /* awrite */ 1150 }; 1151 1152 static struct dev_ops ipd_ops = { 1153 DEVO_REV, /* devo_rev */ 1154 0, /* refcnt */ 1155 ipd_getinfo, /* get_dev_info */ 1156 nulldev, /* identify */ 1157 nulldev, /* probe */ 1158 ipd_attach, /* attach */ 1159 ipd_detach, /* detach */ 1160 nodev, /* reset */ 1161 &ipd_cb_ops, /* driver operations */ 1162 NULL, /* bus operations */ 1163 nodev, /* dev power */ 1164 ddi_quiesce_not_needed /* quiesce */ 1165 }; 1166 1167 static struct modldrv modldrv = { 1168 &mod_driverops, 1169 "Internet packet disturber", 1170 &ipd_ops 1171 }; 1172 1173 static struct modlinkage modlinkage = { 1174 MODREV_1, 1175 { (void *)&modldrv, NULL } 1176 }; 1177 1178 int 1179 _init(void) 1180 { 1181 return (mod_install(&modlinkage)); 1182 } 1183 1184 int 1185 _info(struct modinfo *modinfop) 1186 { 1187 return (mod_info(&modlinkage, modinfop)); 1188 } 1189 1190 int 1191 _fini(void) 1192 { 1193 return (mod_remove(&modlinkage)); 1194 } 1195