1 /*- 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Mike Karels at Berkeley Software Design, Inc. 7 * 8 * Quite extensively rewritten by Poul-Henning Kamp of the FreeBSD 9 * project, to make these variables more userfriendly. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * @(#)kern_sysctl.c 8.4 (Berkeley) 4/14/94 36 */ 37 38 #include <sys/cdefs.h> 39 __FBSDID("$FreeBSD$"); 40 41 #include "opt_capsicum.h" 42 #include "opt_compat.h" 43 #include "opt_ktrace.h" 44 45 #include <sys/param.h> 46 #include <sys/fail.h> 47 #include <sys/systm.h> 48 #include <sys/capsicum.h> 49 #include <sys/kernel.h> 50 #include <sys/sysctl.h> 51 #include <sys/malloc.h> 52 #include <sys/priv.h> 53 #include <sys/proc.h> 54 #include <sys/jail.h> 55 #include <sys/lock.h> 56 #include <sys/mutex.h> 57 #include <sys/rmlock.h> 58 #include <sys/sbuf.h> 59 #include <sys/sx.h> 60 #include <sys/sysproto.h> 61 #include <sys/uio.h> 62 #ifdef KTRACE 63 #include <sys/ktrace.h> 64 #endif 65 66 #include <net/vnet.h> 67 68 #include <security/mac/mac_framework.h> 69 70 #include <vm/vm.h> 71 #include <vm/vm_extern.h> 72 73 static MALLOC_DEFINE(M_SYSCTL, "sysctl", "sysctl internal magic"); 74 static MALLOC_DEFINE(M_SYSCTLOID, "sysctloid", "sysctl dynamic oids"); 75 static MALLOC_DEFINE(M_SYSCTLTMP, "sysctltmp", "sysctl temp output buffer"); 76 77 /* 78 * The sysctllock protects the MIB tree. It also protects sysctl 79 * contexts used with dynamic sysctls. The sysctl_register_oid() and 80 * sysctl_unregister_oid() routines require the sysctllock to already 81 * be held, so the sysctl_wlock() and sysctl_wunlock() routines are 82 * provided for the few places in the kernel which need to use that 83 * API rather than using the dynamic API. Use of the dynamic API is 84 * strongly encouraged for most code. 85 * 86 * The sysctlmemlock is used to limit the amount of user memory wired for 87 * sysctl requests. This is implemented by serializing any userland 88 * sysctl requests larger than a single page via an exclusive lock. 89 */ 90 static struct rmlock sysctllock; 91 static struct sx sysctlmemlock; 92 93 #define SYSCTL_WLOCK() rm_wlock(&sysctllock) 94 #define SYSCTL_WUNLOCK() rm_wunlock(&sysctllock) 95 #define SYSCTL_RLOCK(tracker) rm_rlock(&sysctllock, (tracker)) 96 #define SYSCTL_RUNLOCK(tracker) rm_runlock(&sysctllock, (tracker)) 97 #define SYSCTL_WLOCKED() rm_wowned(&sysctllock) 98 #define SYSCTL_ASSERT_LOCKED() rm_assert(&sysctllock, RA_LOCKED) 99 #define SYSCTL_ASSERT_WLOCKED() rm_assert(&sysctllock, RA_WLOCKED) 100 #define SYSCTL_ASSERT_RLOCKED() rm_assert(&sysctllock, RA_RLOCKED) 101 #define SYSCTL_INIT() rm_init_flags(&sysctllock, "sysctl lock", \ 102 RM_SLEEPABLE) 103 #define SYSCTL_SLEEP(ch, wmesg, timo) \ 104 rm_sleep(ch, &sysctllock, 0, wmesg, timo) 105 106 static int sysctl_root(SYSCTL_HANDLER_ARGS); 107 108 /* Root list */ 109 struct sysctl_oid_list sysctl__children = SLIST_HEAD_INITIALIZER(&sysctl__children); 110 111 static int sysctl_remove_oid_locked(struct sysctl_oid *oidp, int del, 112 int recurse); 113 static int sysctl_old_kernel(struct sysctl_req *, const void *, size_t); 114 static int sysctl_new_kernel(struct sysctl_req *, void *, size_t); 115 116 static struct sysctl_oid * 117 sysctl_find_oidname(const char *name, struct sysctl_oid_list *list) 118 { 119 struct sysctl_oid *oidp; 120 121 SYSCTL_ASSERT_LOCKED(); 122 SLIST_FOREACH(oidp, list, oid_link) { 123 if (strcmp(oidp->oid_name, name) == 0) { 124 return (oidp); 125 } 126 } 127 return (NULL); 128 } 129 130 /* 131 * Initialization of the MIB tree. 132 * 133 * Order by number in each list. 134 */ 135 void 136 sysctl_wlock(void) 137 { 138 139 SYSCTL_WLOCK(); 140 } 141 142 void 143 sysctl_wunlock(void) 144 { 145 146 SYSCTL_WUNLOCK(); 147 } 148 149 static int 150 sysctl_root_handler_locked(struct sysctl_oid *oid, void *arg1, intmax_t arg2, 151 struct sysctl_req *req, struct rm_priotracker *tracker) 152 { 153 int error; 154 155 if (oid->oid_kind & CTLFLAG_DYN) 156 atomic_add_int(&oid->oid_running, 1); 157 158 if (tracker != NULL) 159 SYSCTL_RUNLOCK(tracker); 160 else 161 SYSCTL_WUNLOCK(); 162 163 if (!(oid->oid_kind & CTLFLAG_MPSAFE)) 164 mtx_lock(&Giant); 165 error = oid->oid_handler(oid, arg1, arg2, req); 166 if (!(oid->oid_kind & CTLFLAG_MPSAFE)) 167 mtx_unlock(&Giant); 168 169 if (tracker != NULL) 170 SYSCTL_RLOCK(tracker); 171 else 172 SYSCTL_WLOCK(); 173 174 if (oid->oid_kind & CTLFLAG_DYN) { 175 if (atomic_fetchadd_int(&oid->oid_running, -1) == 1 && 176 (oid->oid_kind & CTLFLAG_DYING) != 0) 177 wakeup(&oid->oid_running); 178 } 179 180 return (error); 181 } 182 183 static void 184 sysctl_load_tunable_by_oid_locked(struct sysctl_oid *oidp) 185 { 186 struct sysctl_req req; 187 struct sysctl_oid *curr; 188 char *penv = NULL; 189 char path[64]; 190 ssize_t rem = sizeof(path); 191 ssize_t len; 192 uint8_t val_8; 193 uint16_t val_16; 194 uint32_t val_32; 195 int val_int; 196 long val_long; 197 int64_t val_64; 198 quad_t val_quad; 199 int error; 200 201 path[--rem] = 0; 202 203 for (curr = oidp; curr != NULL; curr = SYSCTL_PARENT(curr)) { 204 len = strlen(curr->oid_name); 205 rem -= len; 206 if (curr != oidp) 207 rem -= 1; 208 if (rem < 0) { 209 printf("OID path exceeds %d bytes\n", (int)sizeof(path)); 210 return; 211 } 212 memcpy(path + rem, curr->oid_name, len); 213 if (curr != oidp) 214 path[rem + len] = '.'; 215 } 216 217 memset(&req, 0, sizeof(req)); 218 219 req.td = curthread; 220 req.oldfunc = sysctl_old_kernel; 221 req.newfunc = sysctl_new_kernel; 222 req.lock = REQ_UNWIRED; 223 224 switch (oidp->oid_kind & CTLTYPE) { 225 case CTLTYPE_INT: 226 if (getenv_int(path + rem, &val_int) == 0) 227 return; 228 req.newlen = sizeof(val_int); 229 req.newptr = &val_int; 230 break; 231 case CTLTYPE_UINT: 232 if (getenv_uint(path + rem, (unsigned int *)&val_int) == 0) 233 return; 234 req.newlen = sizeof(val_int); 235 req.newptr = &val_int; 236 break; 237 case CTLTYPE_LONG: 238 if (getenv_long(path + rem, &val_long) == 0) 239 return; 240 req.newlen = sizeof(val_long); 241 req.newptr = &val_long; 242 break; 243 case CTLTYPE_ULONG: 244 if (getenv_ulong(path + rem, (unsigned long *)&val_long) == 0) 245 return; 246 req.newlen = sizeof(val_long); 247 req.newptr = &val_long; 248 break; 249 case CTLTYPE_S8: 250 if (getenv_int(path + rem, &val_int) == 0) 251 return; 252 val_8 = val_int; 253 req.newlen = sizeof(val_8); 254 req.newptr = &val_8; 255 break; 256 case CTLTYPE_S16: 257 if (getenv_int(path + rem, &val_int) == 0) 258 return; 259 val_16 = val_int; 260 req.newlen = sizeof(val_16); 261 req.newptr = &val_16; 262 break; 263 case CTLTYPE_S32: 264 if (getenv_long(path + rem, &val_long) == 0) 265 return; 266 val_32 = val_long; 267 req.newlen = sizeof(val_32); 268 req.newptr = &val_32; 269 break; 270 case CTLTYPE_S64: 271 if (getenv_quad(path + rem, &val_quad) == 0) 272 return; 273 val_64 = val_quad; 274 req.newlen = sizeof(val_64); 275 req.newptr = &val_64; 276 break; 277 case CTLTYPE_U8: 278 if (getenv_uint(path + rem, (unsigned int *)&val_int) == 0) 279 return; 280 val_8 = val_int; 281 req.newlen = sizeof(val_8); 282 req.newptr = &val_8; 283 break; 284 case CTLTYPE_U16: 285 if (getenv_uint(path + rem, (unsigned int *)&val_int) == 0) 286 return; 287 val_16 = val_int; 288 req.newlen = sizeof(val_16); 289 req.newptr = &val_16; 290 break; 291 case CTLTYPE_U32: 292 if (getenv_ulong(path + rem, (unsigned long *)&val_long) == 0) 293 return; 294 val_32 = val_long; 295 req.newlen = sizeof(val_32); 296 req.newptr = &val_32; 297 break; 298 case CTLTYPE_U64: 299 /* XXX there is no getenv_uquad() */ 300 if (getenv_quad(path + rem, &val_quad) == 0) 301 return; 302 val_64 = val_quad; 303 req.newlen = sizeof(val_64); 304 req.newptr = &val_64; 305 break; 306 case CTLTYPE_STRING: 307 penv = kern_getenv(path + rem); 308 if (penv == NULL) 309 return; 310 req.newlen = strlen(penv); 311 req.newptr = penv; 312 break; 313 default: 314 return; 315 } 316 error = sysctl_root_handler_locked(oidp, oidp->oid_arg1, 317 oidp->oid_arg2, &req, NULL); 318 if (error != 0) 319 printf("Setting sysctl %s failed: %d\n", path + rem, error); 320 if (penv != NULL) 321 freeenv(penv); 322 } 323 324 void 325 sysctl_register_oid(struct sysctl_oid *oidp) 326 { 327 struct sysctl_oid_list *parent = oidp->oid_parent; 328 struct sysctl_oid *p; 329 struct sysctl_oid *q; 330 int oid_number; 331 int timeout = 2; 332 333 /* 334 * First check if another oid with the same name already 335 * exists in the parent's list. 336 */ 337 SYSCTL_ASSERT_WLOCKED(); 338 p = sysctl_find_oidname(oidp->oid_name, parent); 339 if (p != NULL) { 340 if ((p->oid_kind & CTLTYPE) == CTLTYPE_NODE) { 341 p->oid_refcnt++; 342 return; 343 } else { 344 printf("can't re-use a leaf (%s)!\n", p->oid_name); 345 return; 346 } 347 } 348 /* get current OID number */ 349 oid_number = oidp->oid_number; 350 351 #if (OID_AUTO >= 0) 352 #error "OID_AUTO is expected to be a negative value" 353 #endif 354 /* 355 * Any negative OID number qualifies as OID_AUTO. Valid OID 356 * numbers should always be positive. 357 * 358 * NOTE: DO NOT change the starting value here, change it in 359 * <sys/sysctl.h>, and make sure it is at least 256 to 360 * accomodate e.g. net.inet.raw as a static sysctl node. 361 */ 362 if (oid_number < 0) { 363 static int newoid; 364 365 /* 366 * By decrementing the next OID number we spend less 367 * time inserting the OIDs into a sorted list. 368 */ 369 if (--newoid < CTL_AUTO_START) 370 newoid = 0x7fffffff; 371 372 oid_number = newoid; 373 } 374 375 /* 376 * Insert the OID into the parent's list sorted by OID number. 377 */ 378 retry: 379 q = NULL; 380 SLIST_FOREACH(p, parent, oid_link) { 381 /* check if the current OID number is in use */ 382 if (oid_number == p->oid_number) { 383 /* get the next valid OID number */ 384 if (oid_number < CTL_AUTO_START || 385 oid_number == 0x7fffffff) { 386 /* wraparound - restart */ 387 oid_number = CTL_AUTO_START; 388 /* don't loop forever */ 389 if (!timeout--) 390 panic("sysctl: Out of OID numbers\n"); 391 goto retry; 392 } else { 393 oid_number++; 394 } 395 } else if (oid_number < p->oid_number) 396 break; 397 q = p; 398 } 399 /* check for non-auto OID number collision */ 400 if (oidp->oid_number >= 0 && oidp->oid_number < CTL_AUTO_START && 401 oid_number >= CTL_AUTO_START) { 402 printf("sysctl: OID number(%d) is already in use for '%s'\n", 403 oidp->oid_number, oidp->oid_name); 404 } 405 /* update the OID number, if any */ 406 oidp->oid_number = oid_number; 407 if (q != NULL) 408 SLIST_INSERT_AFTER(q, oidp, oid_link); 409 else 410 SLIST_INSERT_HEAD(parent, oidp, oid_link); 411 412 if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE && 413 #ifdef VIMAGE 414 (oidp->oid_kind & CTLFLAG_VNET) == 0 && 415 #endif 416 (oidp->oid_kind & CTLFLAG_TUN) != 0 && 417 (oidp->oid_kind & CTLFLAG_NOFETCH) == 0) { 418 /* only fetch value once */ 419 oidp->oid_kind |= CTLFLAG_NOFETCH; 420 /* try to fetch value from kernel environment */ 421 sysctl_load_tunable_by_oid_locked(oidp); 422 } 423 } 424 425 void 426 sysctl_unregister_oid(struct sysctl_oid *oidp) 427 { 428 struct sysctl_oid *p; 429 int error; 430 431 SYSCTL_ASSERT_WLOCKED(); 432 error = ENOENT; 433 if (oidp->oid_number == OID_AUTO) { 434 error = EINVAL; 435 } else { 436 SLIST_FOREACH(p, oidp->oid_parent, oid_link) { 437 if (p == oidp) { 438 SLIST_REMOVE(oidp->oid_parent, oidp, 439 sysctl_oid, oid_link); 440 error = 0; 441 break; 442 } 443 } 444 } 445 446 /* 447 * This can happen when a module fails to register and is 448 * being unloaded afterwards. It should not be a panic() 449 * for normal use. 450 */ 451 if (error) 452 printf("%s: failed to unregister sysctl\n", __func__); 453 } 454 455 /* Initialize a new context to keep track of dynamically added sysctls. */ 456 int 457 sysctl_ctx_init(struct sysctl_ctx_list *c) 458 { 459 460 if (c == NULL) { 461 return (EINVAL); 462 } 463 464 /* 465 * No locking here, the caller is responsible for not adding 466 * new nodes to a context until after this function has 467 * returned. 468 */ 469 TAILQ_INIT(c); 470 return (0); 471 } 472 473 /* Free the context, and destroy all dynamic oids registered in this context */ 474 int 475 sysctl_ctx_free(struct sysctl_ctx_list *clist) 476 { 477 struct sysctl_ctx_entry *e, *e1; 478 int error; 479 480 error = 0; 481 /* 482 * First perform a "dry run" to check if it's ok to remove oids. 483 * XXX FIXME 484 * XXX This algorithm is a hack. But I don't know any 485 * XXX better solution for now... 486 */ 487 SYSCTL_WLOCK(); 488 TAILQ_FOREACH(e, clist, link) { 489 error = sysctl_remove_oid_locked(e->entry, 0, 0); 490 if (error) 491 break; 492 } 493 /* 494 * Restore deregistered entries, either from the end, 495 * or from the place where error occured. 496 * e contains the entry that was not unregistered 497 */ 498 if (error) 499 e1 = TAILQ_PREV(e, sysctl_ctx_list, link); 500 else 501 e1 = TAILQ_LAST(clist, sysctl_ctx_list); 502 while (e1 != NULL) { 503 sysctl_register_oid(e1->entry); 504 e1 = TAILQ_PREV(e1, sysctl_ctx_list, link); 505 } 506 if (error) { 507 SYSCTL_WUNLOCK(); 508 return(EBUSY); 509 } 510 /* Now really delete the entries */ 511 e = TAILQ_FIRST(clist); 512 while (e != NULL) { 513 e1 = TAILQ_NEXT(e, link); 514 error = sysctl_remove_oid_locked(e->entry, 1, 0); 515 if (error) 516 panic("sysctl_remove_oid: corrupt tree, entry: %s", 517 e->entry->oid_name); 518 free(e, M_SYSCTLOID); 519 e = e1; 520 } 521 SYSCTL_WUNLOCK(); 522 return (error); 523 } 524 525 /* Add an entry to the context */ 526 struct sysctl_ctx_entry * 527 sysctl_ctx_entry_add(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp) 528 { 529 struct sysctl_ctx_entry *e; 530 531 SYSCTL_ASSERT_WLOCKED(); 532 if (clist == NULL || oidp == NULL) 533 return(NULL); 534 e = malloc(sizeof(struct sysctl_ctx_entry), M_SYSCTLOID, M_WAITOK); 535 e->entry = oidp; 536 TAILQ_INSERT_HEAD(clist, e, link); 537 return (e); 538 } 539 540 /* Find an entry in the context */ 541 struct sysctl_ctx_entry * 542 sysctl_ctx_entry_find(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp) 543 { 544 struct sysctl_ctx_entry *e; 545 546 SYSCTL_ASSERT_WLOCKED(); 547 if (clist == NULL || oidp == NULL) 548 return(NULL); 549 TAILQ_FOREACH(e, clist, link) { 550 if(e->entry == oidp) 551 return(e); 552 } 553 return (e); 554 } 555 556 /* 557 * Delete an entry from the context. 558 * NOTE: this function doesn't free oidp! You have to remove it 559 * with sysctl_remove_oid(). 560 */ 561 int 562 sysctl_ctx_entry_del(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp) 563 { 564 struct sysctl_ctx_entry *e; 565 566 if (clist == NULL || oidp == NULL) 567 return (EINVAL); 568 SYSCTL_WLOCK(); 569 e = sysctl_ctx_entry_find(clist, oidp); 570 if (e != NULL) { 571 TAILQ_REMOVE(clist, e, link); 572 SYSCTL_WUNLOCK(); 573 free(e, M_SYSCTLOID); 574 return (0); 575 } else { 576 SYSCTL_WUNLOCK(); 577 return (ENOENT); 578 } 579 } 580 581 /* 582 * Remove dynamically created sysctl trees. 583 * oidp - top of the tree to be removed 584 * del - if 0 - just deregister, otherwise free up entries as well 585 * recurse - if != 0 traverse the subtree to be deleted 586 */ 587 int 588 sysctl_remove_oid(struct sysctl_oid *oidp, int del, int recurse) 589 { 590 int error; 591 592 SYSCTL_WLOCK(); 593 error = sysctl_remove_oid_locked(oidp, del, recurse); 594 SYSCTL_WUNLOCK(); 595 return (error); 596 } 597 598 int 599 sysctl_remove_name(struct sysctl_oid *parent, const char *name, 600 int del, int recurse) 601 { 602 struct sysctl_oid *p, *tmp; 603 int error; 604 605 error = ENOENT; 606 SYSCTL_WLOCK(); 607 SLIST_FOREACH_SAFE(p, SYSCTL_CHILDREN(parent), oid_link, tmp) { 608 if (strcmp(p->oid_name, name) == 0) { 609 error = sysctl_remove_oid_locked(p, del, recurse); 610 break; 611 } 612 } 613 SYSCTL_WUNLOCK(); 614 615 return (error); 616 } 617 618 619 static int 620 sysctl_remove_oid_locked(struct sysctl_oid *oidp, int del, int recurse) 621 { 622 struct sysctl_oid *p, *tmp; 623 int error; 624 625 SYSCTL_ASSERT_WLOCKED(); 626 if (oidp == NULL) 627 return(EINVAL); 628 if ((oidp->oid_kind & CTLFLAG_DYN) == 0) { 629 printf("can't remove non-dynamic nodes!\n"); 630 return (EINVAL); 631 } 632 /* 633 * WARNING: normal method to do this should be through 634 * sysctl_ctx_free(). Use recursing as the last resort 635 * method to purge your sysctl tree of leftovers... 636 * However, if some other code still references these nodes, 637 * it will panic. 638 */ 639 if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) { 640 if (oidp->oid_refcnt == 1) { 641 SLIST_FOREACH_SAFE(p, 642 SYSCTL_CHILDREN(oidp), oid_link, tmp) { 643 if (!recurse) { 644 printf("Warning: failed attempt to " 645 "remove oid %s with child %s\n", 646 oidp->oid_name, p->oid_name); 647 return (ENOTEMPTY); 648 } 649 error = sysctl_remove_oid_locked(p, del, 650 recurse); 651 if (error) 652 return (error); 653 } 654 } 655 } 656 if (oidp->oid_refcnt > 1 ) { 657 oidp->oid_refcnt--; 658 } else { 659 if (oidp->oid_refcnt == 0) { 660 printf("Warning: bad oid_refcnt=%u (%s)!\n", 661 oidp->oid_refcnt, oidp->oid_name); 662 return (EINVAL); 663 } 664 sysctl_unregister_oid(oidp); 665 if (del) { 666 /* 667 * Wait for all threads running the handler to drain. 668 * This preserves the previous behavior when the 669 * sysctl lock was held across a handler invocation, 670 * and is necessary for module unload correctness. 671 */ 672 while (oidp->oid_running > 0) { 673 oidp->oid_kind |= CTLFLAG_DYING; 674 SYSCTL_SLEEP(&oidp->oid_running, "oidrm", 0); 675 } 676 if (oidp->oid_descr) 677 free(__DECONST(char *, oidp->oid_descr), 678 M_SYSCTLOID); 679 free(__DECONST(char *, oidp->oid_name), M_SYSCTLOID); 680 free(oidp, M_SYSCTLOID); 681 } 682 } 683 return (0); 684 } 685 /* 686 * Create new sysctls at run time. 687 * clist may point to a valid context initialized with sysctl_ctx_init(). 688 */ 689 struct sysctl_oid * 690 sysctl_add_oid(struct sysctl_ctx_list *clist, struct sysctl_oid_list *parent, 691 int number, const char *name, int kind, void *arg1, intmax_t arg2, 692 int (*handler)(SYSCTL_HANDLER_ARGS), const char *fmt, const char *descr) 693 { 694 struct sysctl_oid *oidp; 695 696 /* You have to hook up somewhere.. */ 697 if (parent == NULL) 698 return(NULL); 699 /* Check if the node already exists, otherwise create it */ 700 SYSCTL_WLOCK(); 701 oidp = sysctl_find_oidname(name, parent); 702 if (oidp != NULL) { 703 if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) { 704 oidp->oid_refcnt++; 705 /* Update the context */ 706 if (clist != NULL) 707 sysctl_ctx_entry_add(clist, oidp); 708 SYSCTL_WUNLOCK(); 709 return (oidp); 710 } else { 711 SYSCTL_WUNLOCK(); 712 printf("can't re-use a leaf (%s)!\n", name); 713 return (NULL); 714 } 715 } 716 oidp = malloc(sizeof(struct sysctl_oid), M_SYSCTLOID, M_WAITOK|M_ZERO); 717 oidp->oid_parent = parent; 718 SLIST_INIT(&oidp->oid_children); 719 oidp->oid_number = number; 720 oidp->oid_refcnt = 1; 721 oidp->oid_name = strdup(name, M_SYSCTLOID); 722 oidp->oid_handler = handler; 723 oidp->oid_kind = CTLFLAG_DYN | kind; 724 oidp->oid_arg1 = arg1; 725 oidp->oid_arg2 = arg2; 726 oidp->oid_fmt = fmt; 727 if (descr != NULL) 728 oidp->oid_descr = strdup(descr, M_SYSCTLOID); 729 /* Update the context, if used */ 730 if (clist != NULL) 731 sysctl_ctx_entry_add(clist, oidp); 732 /* Register this oid */ 733 sysctl_register_oid(oidp); 734 SYSCTL_WUNLOCK(); 735 return (oidp); 736 } 737 738 /* 739 * Rename an existing oid. 740 */ 741 void 742 sysctl_rename_oid(struct sysctl_oid *oidp, const char *name) 743 { 744 char *newname; 745 char *oldname; 746 747 newname = strdup(name, M_SYSCTLOID); 748 SYSCTL_WLOCK(); 749 oldname = __DECONST(char *, oidp->oid_name); 750 oidp->oid_name = newname; 751 SYSCTL_WUNLOCK(); 752 free(oldname, M_SYSCTLOID); 753 } 754 755 /* 756 * Reparent an existing oid. 757 */ 758 int 759 sysctl_move_oid(struct sysctl_oid *oid, struct sysctl_oid_list *parent) 760 { 761 struct sysctl_oid *oidp; 762 763 SYSCTL_WLOCK(); 764 if (oid->oid_parent == parent) { 765 SYSCTL_WUNLOCK(); 766 return (0); 767 } 768 oidp = sysctl_find_oidname(oid->oid_name, parent); 769 if (oidp != NULL) { 770 SYSCTL_WUNLOCK(); 771 return (EEXIST); 772 } 773 sysctl_unregister_oid(oid); 774 oid->oid_parent = parent; 775 oid->oid_number = OID_AUTO; 776 sysctl_register_oid(oid); 777 SYSCTL_WUNLOCK(); 778 return (0); 779 } 780 781 /* 782 * Register the kernel's oids on startup. 783 */ 784 SET_DECLARE(sysctl_set, struct sysctl_oid); 785 786 static void 787 sysctl_register_all(void *arg) 788 { 789 struct sysctl_oid **oidp; 790 791 sx_init(&sysctlmemlock, "sysctl mem"); 792 SYSCTL_INIT(); 793 SYSCTL_WLOCK(); 794 SET_FOREACH(oidp, sysctl_set) 795 sysctl_register_oid(*oidp); 796 SYSCTL_WUNLOCK(); 797 } 798 SYSINIT(sysctl, SI_SUB_KMEM, SI_ORDER_FIRST, sysctl_register_all, 0); 799 800 /* 801 * "Staff-functions" 802 * 803 * These functions implement a presently undocumented interface 804 * used by the sysctl program to walk the tree, and get the type 805 * so it can print the value. 806 * This interface is under work and consideration, and should probably 807 * be killed with a big axe by the first person who can find the time. 808 * (be aware though, that the proper interface isn't as obvious as it 809 * may seem, there are various conflicting requirements. 810 * 811 * {0,0} printf the entire MIB-tree. 812 * {0,1,...} return the name of the "..." OID. 813 * {0,2,...} return the next OID. 814 * {0,3} return the OID of the name in "new" 815 * {0,4,...} return the kind & format info for the "..." OID. 816 * {0,5,...} return the description the "..." OID. 817 */ 818 819 #ifdef SYSCTL_DEBUG 820 static void 821 sysctl_sysctl_debug_dump_node(struct sysctl_oid_list *l, int i) 822 { 823 int k; 824 struct sysctl_oid *oidp; 825 826 SYSCTL_ASSERT_LOCKED(); 827 SLIST_FOREACH(oidp, l, oid_link) { 828 829 for (k=0; k<i; k++) 830 printf(" "); 831 832 printf("%d %s ", oidp->oid_number, oidp->oid_name); 833 834 printf("%c%c", 835 oidp->oid_kind & CTLFLAG_RD ? 'R':' ', 836 oidp->oid_kind & CTLFLAG_WR ? 'W':' '); 837 838 if (oidp->oid_handler) 839 printf(" *Handler"); 840 841 switch (oidp->oid_kind & CTLTYPE) { 842 case CTLTYPE_NODE: 843 printf(" Node\n"); 844 if (!oidp->oid_handler) { 845 sysctl_sysctl_debug_dump_node( 846 SYSCTL_CHILDREN(oidp), i + 2); 847 } 848 break; 849 case CTLTYPE_INT: printf(" Int\n"); break; 850 case CTLTYPE_UINT: printf(" u_int\n"); break; 851 case CTLTYPE_LONG: printf(" Long\n"); break; 852 case CTLTYPE_ULONG: printf(" u_long\n"); break; 853 case CTLTYPE_STRING: printf(" String\n"); break; 854 case CTLTYPE_S8: printf(" int8_t\n"); break; 855 case CTLTYPE_S16: printf(" int16_t\n"); break; 856 case CTLTYPE_S32: printf(" int32_t\n"); break; 857 case CTLTYPE_S64: printf(" int64_t\n"); break; 858 case CTLTYPE_U8: printf(" uint8_t\n"); break; 859 case CTLTYPE_U16: printf(" uint16_t\n"); break; 860 case CTLTYPE_U32: printf(" uint32_t\n"); break; 861 case CTLTYPE_U64: printf(" uint64_t\n"); break; 862 case CTLTYPE_OPAQUE: printf(" Opaque/struct\n"); break; 863 default: printf("\n"); 864 } 865 866 } 867 } 868 869 static int 870 sysctl_sysctl_debug(SYSCTL_HANDLER_ARGS) 871 { 872 struct rm_priotracker tracker; 873 int error; 874 875 error = priv_check(req->td, PRIV_SYSCTL_DEBUG); 876 if (error) 877 return (error); 878 SYSCTL_RLOCK(&tracker); 879 sysctl_sysctl_debug_dump_node(&sysctl__children, 0); 880 SYSCTL_RUNLOCK(&tracker); 881 return (ENOENT); 882 } 883 884 SYSCTL_PROC(_sysctl, 0, debug, CTLTYPE_STRING|CTLFLAG_RD|CTLFLAG_MPSAFE, 885 0, 0, sysctl_sysctl_debug, "-", ""); 886 #endif 887 888 static int 889 sysctl_sysctl_name(SYSCTL_HANDLER_ARGS) 890 { 891 int *name = (int *) arg1; 892 u_int namelen = arg2; 893 int error = 0; 894 struct sysctl_oid *oid; 895 struct sysctl_oid_list *lsp = &sysctl__children, *lsp2; 896 struct rm_priotracker tracker; 897 char buf[10]; 898 899 SYSCTL_RLOCK(&tracker); 900 while (namelen) { 901 if (!lsp) { 902 snprintf(buf,sizeof(buf),"%d",*name); 903 if (req->oldidx) 904 error = SYSCTL_OUT(req, ".", 1); 905 if (!error) 906 error = SYSCTL_OUT(req, buf, strlen(buf)); 907 if (error) 908 goto out; 909 namelen--; 910 name++; 911 continue; 912 } 913 lsp2 = 0; 914 SLIST_FOREACH(oid, lsp, oid_link) { 915 if (oid->oid_number != *name) 916 continue; 917 918 if (req->oldidx) 919 error = SYSCTL_OUT(req, ".", 1); 920 if (!error) 921 error = SYSCTL_OUT(req, oid->oid_name, 922 strlen(oid->oid_name)); 923 if (error) 924 goto out; 925 926 namelen--; 927 name++; 928 929 if ((oid->oid_kind & CTLTYPE) != CTLTYPE_NODE) 930 break; 931 932 if (oid->oid_handler) 933 break; 934 935 lsp2 = SYSCTL_CHILDREN(oid); 936 break; 937 } 938 lsp = lsp2; 939 } 940 error = SYSCTL_OUT(req, "", 1); 941 out: 942 SYSCTL_RUNLOCK(&tracker); 943 return (error); 944 } 945 946 /* 947 * XXXRW/JA: Shouldn't return name data for nodes that we don't permit in 948 * capability mode. 949 */ 950 static SYSCTL_NODE(_sysctl, 1, name, CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_CAPRD, 951 sysctl_sysctl_name, ""); 952 953 static int 954 sysctl_sysctl_next_ls(struct sysctl_oid_list *lsp, int *name, u_int namelen, 955 int *next, int *len, int level, struct sysctl_oid **oidpp) 956 { 957 struct sysctl_oid *oidp; 958 959 SYSCTL_ASSERT_LOCKED(); 960 *len = level; 961 SLIST_FOREACH(oidp, lsp, oid_link) { 962 *next = oidp->oid_number; 963 *oidpp = oidp; 964 965 if (oidp->oid_kind & CTLFLAG_SKIP) 966 continue; 967 968 if (!namelen) { 969 if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE) 970 return (0); 971 if (oidp->oid_handler) 972 /* We really should call the handler here...*/ 973 return (0); 974 lsp = SYSCTL_CHILDREN(oidp); 975 if (!sysctl_sysctl_next_ls(lsp, 0, 0, next+1, 976 len, level+1, oidpp)) 977 return (0); 978 goto emptynode; 979 } 980 981 if (oidp->oid_number < *name) 982 continue; 983 984 if (oidp->oid_number > *name) { 985 if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE) 986 return (0); 987 if (oidp->oid_handler) 988 return (0); 989 lsp = SYSCTL_CHILDREN(oidp); 990 if (!sysctl_sysctl_next_ls(lsp, name+1, namelen-1, 991 next+1, len, level+1, oidpp)) 992 return (0); 993 goto next; 994 } 995 if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE) 996 continue; 997 998 if (oidp->oid_handler) 999 continue; 1000 1001 lsp = SYSCTL_CHILDREN(oidp); 1002 if (!sysctl_sysctl_next_ls(lsp, name+1, namelen-1, next+1, 1003 len, level+1, oidpp)) 1004 return (0); 1005 next: 1006 namelen = 1; 1007 emptynode: 1008 *len = level; 1009 } 1010 return (1); 1011 } 1012 1013 static int 1014 sysctl_sysctl_next(SYSCTL_HANDLER_ARGS) 1015 { 1016 int *name = (int *) arg1; 1017 u_int namelen = arg2; 1018 int i, j, error; 1019 struct sysctl_oid *oid; 1020 struct sysctl_oid_list *lsp = &sysctl__children; 1021 struct rm_priotracker tracker; 1022 int newoid[CTL_MAXNAME]; 1023 1024 SYSCTL_RLOCK(&tracker); 1025 i = sysctl_sysctl_next_ls(lsp, name, namelen, newoid, &j, 1, &oid); 1026 SYSCTL_RUNLOCK(&tracker); 1027 if (i) 1028 return (ENOENT); 1029 error = SYSCTL_OUT(req, newoid, j * sizeof (int)); 1030 return (error); 1031 } 1032 1033 /* 1034 * XXXRW/JA: Shouldn't return next data for nodes that we don't permit in 1035 * capability mode. 1036 */ 1037 static SYSCTL_NODE(_sysctl, 2, next, CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_CAPRD, 1038 sysctl_sysctl_next, ""); 1039 1040 static int 1041 name2oid(char *name, int *oid, int *len, struct sysctl_oid **oidpp) 1042 { 1043 struct sysctl_oid *oidp; 1044 struct sysctl_oid_list *lsp = &sysctl__children; 1045 char *p; 1046 1047 SYSCTL_ASSERT_LOCKED(); 1048 1049 for (*len = 0; *len < CTL_MAXNAME;) { 1050 p = strsep(&name, "."); 1051 1052 oidp = SLIST_FIRST(lsp); 1053 for (;; oidp = SLIST_NEXT(oidp, oid_link)) { 1054 if (oidp == NULL) 1055 return (ENOENT); 1056 if (strcmp(p, oidp->oid_name) == 0) 1057 break; 1058 } 1059 *oid++ = oidp->oid_number; 1060 (*len)++; 1061 1062 if (name == NULL || *name == '\0') { 1063 if (oidpp) 1064 *oidpp = oidp; 1065 return (0); 1066 } 1067 1068 if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE) 1069 break; 1070 1071 if (oidp->oid_handler) 1072 break; 1073 1074 lsp = SYSCTL_CHILDREN(oidp); 1075 } 1076 return (ENOENT); 1077 } 1078 1079 static int 1080 sysctl_sysctl_name2oid(SYSCTL_HANDLER_ARGS) 1081 { 1082 char *p; 1083 int error, oid[CTL_MAXNAME], len = 0; 1084 struct sysctl_oid *op = 0; 1085 struct rm_priotracker tracker; 1086 1087 if (!req->newlen) 1088 return (ENOENT); 1089 if (req->newlen >= MAXPATHLEN) /* XXX arbitrary, undocumented */ 1090 return (ENAMETOOLONG); 1091 1092 p = malloc(req->newlen+1, M_SYSCTL, M_WAITOK); 1093 1094 error = SYSCTL_IN(req, p, req->newlen); 1095 if (error) { 1096 free(p, M_SYSCTL); 1097 return (error); 1098 } 1099 1100 p [req->newlen] = '\0'; 1101 1102 SYSCTL_RLOCK(&tracker); 1103 error = name2oid(p, oid, &len, &op); 1104 SYSCTL_RUNLOCK(&tracker); 1105 1106 free(p, M_SYSCTL); 1107 1108 if (error) 1109 return (error); 1110 1111 error = SYSCTL_OUT(req, oid, len * sizeof *oid); 1112 return (error); 1113 } 1114 1115 /* 1116 * XXXRW/JA: Shouldn't return name2oid data for nodes that we don't permit in 1117 * capability mode. 1118 */ 1119 SYSCTL_PROC(_sysctl, 3, name2oid, 1120 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MPSAFE 1121 | CTLFLAG_CAPRW, 0, 0, sysctl_sysctl_name2oid, "I", ""); 1122 1123 static int 1124 sysctl_sysctl_oidfmt(SYSCTL_HANDLER_ARGS) 1125 { 1126 struct sysctl_oid *oid; 1127 struct rm_priotracker tracker; 1128 int error; 1129 1130 SYSCTL_RLOCK(&tracker); 1131 error = sysctl_find_oid(arg1, arg2, &oid, NULL, req); 1132 if (error) 1133 goto out; 1134 1135 if (oid->oid_fmt == NULL) { 1136 error = ENOENT; 1137 goto out; 1138 } 1139 error = SYSCTL_OUT(req, &oid->oid_kind, sizeof(oid->oid_kind)); 1140 if (error) 1141 goto out; 1142 error = SYSCTL_OUT(req, oid->oid_fmt, strlen(oid->oid_fmt) + 1); 1143 out: 1144 SYSCTL_RUNLOCK(&tracker); 1145 return (error); 1146 } 1147 1148 1149 static SYSCTL_NODE(_sysctl, 4, oidfmt, CTLFLAG_RD|CTLFLAG_MPSAFE|CTLFLAG_CAPRD, 1150 sysctl_sysctl_oidfmt, ""); 1151 1152 static int 1153 sysctl_sysctl_oiddescr(SYSCTL_HANDLER_ARGS) 1154 { 1155 struct sysctl_oid *oid; 1156 struct rm_priotracker tracker; 1157 int error; 1158 1159 SYSCTL_RLOCK(&tracker); 1160 error = sysctl_find_oid(arg1, arg2, &oid, NULL, req); 1161 if (error) 1162 goto out; 1163 1164 if (oid->oid_descr == NULL) { 1165 error = ENOENT; 1166 goto out; 1167 } 1168 error = SYSCTL_OUT(req, oid->oid_descr, strlen(oid->oid_descr) + 1); 1169 out: 1170 SYSCTL_RUNLOCK(&tracker); 1171 return (error); 1172 } 1173 1174 static SYSCTL_NODE(_sysctl, 5, oiddescr, CTLFLAG_RD|CTLFLAG_MPSAFE|CTLFLAG_CAPRD, 1175 sysctl_sysctl_oiddescr, ""); 1176 1177 /* 1178 * Default "handler" functions. 1179 */ 1180 1181 /* 1182 * Handle an int8_t, signed or unsigned. 1183 * Two cases: 1184 * a variable: point arg1 at it. 1185 * a constant: pass it in arg2. 1186 */ 1187 1188 int 1189 sysctl_handle_8(SYSCTL_HANDLER_ARGS) 1190 { 1191 int8_t tmpout; 1192 int error = 0; 1193 1194 /* 1195 * Attempt to get a coherent snapshot by making a copy of the data. 1196 */ 1197 if (arg1) 1198 tmpout = *(int8_t *)arg1; 1199 else 1200 tmpout = arg2; 1201 error = SYSCTL_OUT(req, &tmpout, sizeof(tmpout)); 1202 1203 if (error || !req->newptr) 1204 return (error); 1205 1206 if (!arg1) 1207 error = EPERM; 1208 else 1209 error = SYSCTL_IN(req, arg1, sizeof(tmpout)); 1210 return (error); 1211 } 1212 1213 /* 1214 * Handle an int16_t, signed or unsigned. 1215 * Two cases: 1216 * a variable: point arg1 at it. 1217 * a constant: pass it in arg2. 1218 */ 1219 1220 int 1221 sysctl_handle_16(SYSCTL_HANDLER_ARGS) 1222 { 1223 int16_t tmpout; 1224 int error = 0; 1225 1226 /* 1227 * Attempt to get a coherent snapshot by making a copy of the data. 1228 */ 1229 if (arg1) 1230 tmpout = *(int16_t *)arg1; 1231 else 1232 tmpout = arg2; 1233 error = SYSCTL_OUT(req, &tmpout, sizeof(tmpout)); 1234 1235 if (error || !req->newptr) 1236 return (error); 1237 1238 if (!arg1) 1239 error = EPERM; 1240 else 1241 error = SYSCTL_IN(req, arg1, sizeof(tmpout)); 1242 return (error); 1243 } 1244 1245 /* 1246 * Handle an int32_t, signed or unsigned. 1247 * Two cases: 1248 * a variable: point arg1 at it. 1249 * a constant: pass it in arg2. 1250 */ 1251 1252 int 1253 sysctl_handle_32(SYSCTL_HANDLER_ARGS) 1254 { 1255 int32_t tmpout; 1256 int error = 0; 1257 1258 /* 1259 * Attempt to get a coherent snapshot by making a copy of the data. 1260 */ 1261 if (arg1) 1262 tmpout = *(int32_t *)arg1; 1263 else 1264 tmpout = arg2; 1265 error = SYSCTL_OUT(req, &tmpout, sizeof(tmpout)); 1266 1267 if (error || !req->newptr) 1268 return (error); 1269 1270 if (!arg1) 1271 error = EPERM; 1272 else 1273 error = SYSCTL_IN(req, arg1, sizeof(tmpout)); 1274 return (error); 1275 } 1276 1277 /* 1278 * Handle an int, signed or unsigned. 1279 * Two cases: 1280 * a variable: point arg1 at it. 1281 * a constant: pass it in arg2. 1282 */ 1283 1284 int 1285 sysctl_handle_int(SYSCTL_HANDLER_ARGS) 1286 { 1287 int tmpout, error = 0; 1288 1289 /* 1290 * Attempt to get a coherent snapshot by making a copy of the data. 1291 */ 1292 if (arg1) 1293 tmpout = *(int *)arg1; 1294 else 1295 tmpout = arg2; 1296 error = SYSCTL_OUT(req, &tmpout, sizeof(int)); 1297 1298 if (error || !req->newptr) 1299 return (error); 1300 1301 if (!arg1) 1302 error = EPERM; 1303 else 1304 error = SYSCTL_IN(req, arg1, sizeof(int)); 1305 return (error); 1306 } 1307 1308 /* 1309 * Based on on sysctl_handle_int() convert milliseconds into ticks. 1310 * Note: this is used by TCP. 1311 */ 1312 1313 int 1314 sysctl_msec_to_ticks(SYSCTL_HANDLER_ARGS) 1315 { 1316 int error, s, tt; 1317 1318 tt = *(int *)arg1; 1319 s = (int)((int64_t)tt * 1000 / hz); 1320 1321 error = sysctl_handle_int(oidp, &s, 0, req); 1322 if (error || !req->newptr) 1323 return (error); 1324 1325 tt = (int)((int64_t)s * hz / 1000); 1326 if (tt < 1) 1327 return (EINVAL); 1328 1329 *(int *)arg1 = tt; 1330 return (0); 1331 } 1332 1333 1334 /* 1335 * Handle a long, signed or unsigned. 1336 * Two cases: 1337 * a variable: point arg1 at it. 1338 * a constant: pass it in arg2. 1339 */ 1340 1341 int 1342 sysctl_handle_long(SYSCTL_HANDLER_ARGS) 1343 { 1344 int error = 0; 1345 long tmplong; 1346 #ifdef SCTL_MASK32 1347 int tmpint; 1348 #endif 1349 1350 /* 1351 * Attempt to get a coherent snapshot by making a copy of the data. 1352 */ 1353 if (arg1) 1354 tmplong = *(long *)arg1; 1355 else 1356 tmplong = arg2; 1357 #ifdef SCTL_MASK32 1358 if (req->flags & SCTL_MASK32) { 1359 tmpint = tmplong; 1360 error = SYSCTL_OUT(req, &tmpint, sizeof(int)); 1361 } else 1362 #endif 1363 error = SYSCTL_OUT(req, &tmplong, sizeof(long)); 1364 1365 if (error || !req->newptr) 1366 return (error); 1367 1368 if (!arg1) 1369 error = EPERM; 1370 #ifdef SCTL_MASK32 1371 else if (req->flags & SCTL_MASK32) { 1372 error = SYSCTL_IN(req, &tmpint, sizeof(int)); 1373 *(long *)arg1 = (long)tmpint; 1374 } 1375 #endif 1376 else 1377 error = SYSCTL_IN(req, arg1, sizeof(long)); 1378 return (error); 1379 } 1380 1381 /* 1382 * Handle a 64 bit int, signed or unsigned. 1383 * Two cases: 1384 * a variable: point arg1 at it. 1385 * a constant: pass it in arg2. 1386 */ 1387 int 1388 sysctl_handle_64(SYSCTL_HANDLER_ARGS) 1389 { 1390 int error = 0; 1391 uint64_t tmpout; 1392 1393 /* 1394 * Attempt to get a coherent snapshot by making a copy of the data. 1395 */ 1396 if (arg1) 1397 tmpout = *(uint64_t *)arg1; 1398 else 1399 tmpout = arg2; 1400 error = SYSCTL_OUT(req, &tmpout, sizeof(uint64_t)); 1401 1402 if (error || !req->newptr) 1403 return (error); 1404 1405 if (!arg1) 1406 error = EPERM; 1407 else 1408 error = SYSCTL_IN(req, arg1, sizeof(uint64_t)); 1409 return (error); 1410 } 1411 1412 /* 1413 * Handle our generic '\0' terminated 'C' string. 1414 * Two cases: 1415 * a variable string: point arg1 at it, arg2 is max length. 1416 * a constant string: point arg1 at it, arg2 is zero. 1417 */ 1418 1419 int 1420 sysctl_handle_string(SYSCTL_HANDLER_ARGS) 1421 { 1422 size_t outlen; 1423 int error = 0, ro_string = 0; 1424 1425 /* 1426 * A zero-length buffer indicates a fixed size read-only 1427 * string: 1428 */ 1429 if (arg2 == 0) { 1430 arg2 = strlen((char *)arg1) + 1; 1431 ro_string = 1; 1432 } 1433 1434 if (req->oldptr != NULL) { 1435 char *tmparg; 1436 1437 if (ro_string) { 1438 tmparg = arg1; 1439 } else { 1440 /* try to make a coherent snapshot of the string */ 1441 tmparg = malloc(arg2, M_SYSCTLTMP, M_WAITOK); 1442 memcpy(tmparg, arg1, arg2); 1443 } 1444 1445 outlen = strnlen(tmparg, arg2 - 1) + 1; 1446 error = SYSCTL_OUT(req, tmparg, outlen); 1447 1448 if (!ro_string) 1449 free(tmparg, M_SYSCTLTMP); 1450 } else { 1451 outlen = strnlen((char *)arg1, arg2 - 1) + 1; 1452 error = SYSCTL_OUT(req, NULL, outlen); 1453 } 1454 if (error || !req->newptr) 1455 return (error); 1456 1457 if ((req->newlen - req->newidx) >= arg2) { 1458 error = EINVAL; 1459 } else { 1460 arg2 = (req->newlen - req->newidx); 1461 error = SYSCTL_IN(req, arg1, arg2); 1462 ((char *)arg1)[arg2] = '\0'; 1463 } 1464 return (error); 1465 } 1466 1467 /* 1468 * Handle any kind of opaque data. 1469 * arg1 points to it, arg2 is the size. 1470 */ 1471 1472 int 1473 sysctl_handle_opaque(SYSCTL_HANDLER_ARGS) 1474 { 1475 int error, tries; 1476 u_int generation; 1477 struct sysctl_req req2; 1478 1479 /* 1480 * Attempt to get a coherent snapshot, by using the thread 1481 * pre-emption counter updated from within mi_switch() to 1482 * determine if we were pre-empted during a bcopy() or 1483 * copyout(). Make 3 attempts at doing this before giving up. 1484 * If we encounter an error, stop immediately. 1485 */ 1486 tries = 0; 1487 req2 = *req; 1488 retry: 1489 generation = curthread->td_generation; 1490 error = SYSCTL_OUT(req, arg1, arg2); 1491 if (error) 1492 return (error); 1493 tries++; 1494 if (generation != curthread->td_generation && tries < 3) { 1495 *req = req2; 1496 goto retry; 1497 } 1498 1499 error = SYSCTL_IN(req, arg1, arg2); 1500 1501 return (error); 1502 } 1503 1504 /* 1505 * Transfer functions to/from kernel space. 1506 * XXX: rather untested at this point 1507 */ 1508 static int 1509 sysctl_old_kernel(struct sysctl_req *req, const void *p, size_t l) 1510 { 1511 size_t i = 0; 1512 1513 if (req->oldptr) { 1514 i = l; 1515 if (req->oldlen <= req->oldidx) 1516 i = 0; 1517 else 1518 if (i > req->oldlen - req->oldidx) 1519 i = req->oldlen - req->oldidx; 1520 if (i > 0) 1521 bcopy(p, (char *)req->oldptr + req->oldidx, i); 1522 } 1523 req->oldidx += l; 1524 if (req->oldptr && i != l) 1525 return (ENOMEM); 1526 return (0); 1527 } 1528 1529 static int 1530 sysctl_new_kernel(struct sysctl_req *req, void *p, size_t l) 1531 { 1532 if (!req->newptr) 1533 return (0); 1534 if (req->newlen - req->newidx < l) 1535 return (EINVAL); 1536 bcopy((char *)req->newptr + req->newidx, p, l); 1537 req->newidx += l; 1538 return (0); 1539 } 1540 1541 int 1542 kernel_sysctl(struct thread *td, int *name, u_int namelen, void *old, 1543 size_t *oldlenp, void *new, size_t newlen, size_t *retval, int flags) 1544 { 1545 int error = 0; 1546 struct sysctl_req req; 1547 1548 bzero(&req, sizeof req); 1549 1550 req.td = td; 1551 req.flags = flags; 1552 1553 if (oldlenp) { 1554 req.oldlen = *oldlenp; 1555 } 1556 req.validlen = req.oldlen; 1557 1558 if (old) { 1559 req.oldptr= old; 1560 } 1561 1562 if (new != NULL) { 1563 req.newlen = newlen; 1564 req.newptr = new; 1565 } 1566 1567 req.oldfunc = sysctl_old_kernel; 1568 req.newfunc = sysctl_new_kernel; 1569 req.lock = REQ_UNWIRED; 1570 1571 error = sysctl_root(0, name, namelen, &req); 1572 1573 if (req.lock == REQ_WIRED && req.validlen > 0) 1574 vsunlock(req.oldptr, req.validlen); 1575 1576 if (error && error != ENOMEM) 1577 return (error); 1578 1579 if (retval) { 1580 if (req.oldptr && req.oldidx > req.validlen) 1581 *retval = req.validlen; 1582 else 1583 *retval = req.oldidx; 1584 } 1585 return (error); 1586 } 1587 1588 int 1589 kernel_sysctlbyname(struct thread *td, char *name, void *old, size_t *oldlenp, 1590 void *new, size_t newlen, size_t *retval, int flags) 1591 { 1592 int oid[CTL_MAXNAME]; 1593 size_t oidlen, plen; 1594 int error; 1595 1596 oid[0] = 0; /* sysctl internal magic */ 1597 oid[1] = 3; /* name2oid */ 1598 oidlen = sizeof(oid); 1599 1600 error = kernel_sysctl(td, oid, 2, oid, &oidlen, 1601 (void *)name, strlen(name), &plen, flags); 1602 if (error) 1603 return (error); 1604 1605 error = kernel_sysctl(td, oid, plen / sizeof(int), old, oldlenp, 1606 new, newlen, retval, flags); 1607 return (error); 1608 } 1609 1610 /* 1611 * Transfer function to/from user space. 1612 */ 1613 static int 1614 sysctl_old_user(struct sysctl_req *req, const void *p, size_t l) 1615 { 1616 size_t i, len, origidx; 1617 int error; 1618 1619 origidx = req->oldidx; 1620 req->oldidx += l; 1621 if (req->oldptr == NULL) 1622 return (0); 1623 /* 1624 * If we have not wired the user supplied buffer and we are currently 1625 * holding locks, drop a witness warning, as it's possible that 1626 * write operations to the user page can sleep. 1627 */ 1628 if (req->lock != REQ_WIRED) 1629 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, 1630 "sysctl_old_user()"); 1631 i = l; 1632 len = req->validlen; 1633 if (len <= origidx) 1634 i = 0; 1635 else { 1636 if (i > len - origidx) 1637 i = len - origidx; 1638 if (req->lock == REQ_WIRED) { 1639 error = copyout_nofault(p, (char *)req->oldptr + 1640 origidx, i); 1641 } else 1642 error = copyout(p, (char *)req->oldptr + origidx, i); 1643 if (error != 0) 1644 return (error); 1645 } 1646 if (i < l) 1647 return (ENOMEM); 1648 return (0); 1649 } 1650 1651 static int 1652 sysctl_new_user(struct sysctl_req *req, void *p, size_t l) 1653 { 1654 int error; 1655 1656 if (!req->newptr) 1657 return (0); 1658 if (req->newlen - req->newidx < l) 1659 return (EINVAL); 1660 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, 1661 "sysctl_new_user()"); 1662 error = copyin((char *)req->newptr + req->newidx, p, l); 1663 req->newidx += l; 1664 return (error); 1665 } 1666 1667 /* 1668 * Wire the user space destination buffer. If set to a value greater than 1669 * zero, the len parameter limits the maximum amount of wired memory. 1670 */ 1671 int 1672 sysctl_wire_old_buffer(struct sysctl_req *req, size_t len) 1673 { 1674 int ret; 1675 size_t wiredlen; 1676 1677 wiredlen = (len > 0 && len < req->oldlen) ? len : req->oldlen; 1678 ret = 0; 1679 if (req->lock != REQ_WIRED && req->oldptr && 1680 req->oldfunc == sysctl_old_user) { 1681 if (wiredlen != 0) { 1682 ret = vslock(req->oldptr, wiredlen); 1683 if (ret != 0) { 1684 if (ret != ENOMEM) 1685 return (ret); 1686 wiredlen = 0; 1687 } 1688 } 1689 req->lock = REQ_WIRED; 1690 req->validlen = wiredlen; 1691 } 1692 return (0); 1693 } 1694 1695 int 1696 sysctl_find_oid(int *name, u_int namelen, struct sysctl_oid **noid, 1697 int *nindx, struct sysctl_req *req) 1698 { 1699 struct sysctl_oid_list *lsp; 1700 struct sysctl_oid *oid; 1701 int indx; 1702 1703 SYSCTL_ASSERT_LOCKED(); 1704 lsp = &sysctl__children; 1705 indx = 0; 1706 while (indx < CTL_MAXNAME) { 1707 SLIST_FOREACH(oid, lsp, oid_link) { 1708 if (oid->oid_number == name[indx]) 1709 break; 1710 } 1711 if (oid == NULL) 1712 return (ENOENT); 1713 1714 indx++; 1715 if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) { 1716 if (oid->oid_handler != NULL || indx == namelen) { 1717 *noid = oid; 1718 if (nindx != NULL) 1719 *nindx = indx; 1720 KASSERT((oid->oid_kind & CTLFLAG_DYING) == 0, 1721 ("%s found DYING node %p", __func__, oid)); 1722 return (0); 1723 } 1724 lsp = SYSCTL_CHILDREN(oid); 1725 } else if (indx == namelen) { 1726 *noid = oid; 1727 if (nindx != NULL) 1728 *nindx = indx; 1729 KASSERT((oid->oid_kind & CTLFLAG_DYING) == 0, 1730 ("%s found DYING node %p", __func__, oid)); 1731 return (0); 1732 } else { 1733 return (ENOTDIR); 1734 } 1735 } 1736 return (ENOENT); 1737 } 1738 1739 /* 1740 * Traverse our tree, and find the right node, execute whatever it points 1741 * to, and return the resulting error code. 1742 */ 1743 1744 static int 1745 sysctl_root(SYSCTL_HANDLER_ARGS) 1746 { 1747 struct sysctl_oid *oid; 1748 struct rm_priotracker tracker; 1749 int error, indx, lvl; 1750 1751 SYSCTL_RLOCK(&tracker); 1752 1753 error = sysctl_find_oid(arg1, arg2, &oid, &indx, req); 1754 if (error) 1755 goto out; 1756 1757 if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) { 1758 /* 1759 * You can't call a sysctl when it's a node, but has 1760 * no handler. Inform the user that it's a node. 1761 * The indx may or may not be the same as namelen. 1762 */ 1763 if (oid->oid_handler == NULL) { 1764 error = EISDIR; 1765 goto out; 1766 } 1767 } 1768 1769 /* Is this sysctl writable? */ 1770 if (req->newptr && !(oid->oid_kind & CTLFLAG_WR)) { 1771 error = EPERM; 1772 goto out; 1773 } 1774 1775 KASSERT(req->td != NULL, ("sysctl_root(): req->td == NULL")); 1776 1777 #ifdef CAPABILITY_MODE 1778 /* 1779 * If the process is in capability mode, then don't permit reading or 1780 * writing unless specifically granted for the node. 1781 */ 1782 if (IN_CAPABILITY_MODE(req->td)) { 1783 if ((req->oldptr && !(oid->oid_kind & CTLFLAG_CAPRD)) || 1784 (req->newptr && !(oid->oid_kind & CTLFLAG_CAPWR))) { 1785 error = EPERM; 1786 goto out; 1787 } 1788 } 1789 #endif 1790 1791 /* Is this sysctl sensitive to securelevels? */ 1792 if (req->newptr && (oid->oid_kind & CTLFLAG_SECURE)) { 1793 lvl = (oid->oid_kind & CTLMASK_SECURE) >> CTLSHIFT_SECURE; 1794 error = securelevel_gt(req->td->td_ucred, lvl); 1795 if (error) 1796 goto out; 1797 } 1798 1799 /* Is this sysctl writable by only privileged users? */ 1800 if (req->newptr && !(oid->oid_kind & CTLFLAG_ANYBODY)) { 1801 int priv; 1802 1803 if (oid->oid_kind & CTLFLAG_PRISON) 1804 priv = PRIV_SYSCTL_WRITEJAIL; 1805 #ifdef VIMAGE 1806 else if ((oid->oid_kind & CTLFLAG_VNET) && 1807 prison_owns_vnet(req->td->td_ucred)) 1808 priv = PRIV_SYSCTL_WRITEJAIL; 1809 #endif 1810 else 1811 priv = PRIV_SYSCTL_WRITE; 1812 error = priv_check(req->td, priv); 1813 if (error) 1814 goto out; 1815 } 1816 1817 if (!oid->oid_handler) { 1818 error = EINVAL; 1819 goto out; 1820 } 1821 1822 if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) { 1823 arg1 = (int *)arg1 + indx; 1824 arg2 -= indx; 1825 } else { 1826 arg1 = oid->oid_arg1; 1827 arg2 = oid->oid_arg2; 1828 } 1829 #ifdef MAC 1830 error = mac_system_check_sysctl(req->td->td_ucred, oid, arg1, arg2, 1831 req); 1832 if (error != 0) 1833 goto out; 1834 #endif 1835 #ifdef VIMAGE 1836 if ((oid->oid_kind & CTLFLAG_VNET) && arg1 != NULL) 1837 arg1 = (void *)(curvnet->vnet_data_base + (uintptr_t)arg1); 1838 #endif 1839 error = sysctl_root_handler_locked(oid, arg1, arg2, req, &tracker); 1840 1841 KFAIL_POINT_ERROR(_debug_fail_point, sysctl_running, error); 1842 1843 out: 1844 SYSCTL_RUNLOCK(&tracker); 1845 return (error); 1846 } 1847 1848 #ifndef _SYS_SYSPROTO_H_ 1849 struct sysctl_args { 1850 int *name; 1851 u_int namelen; 1852 void *old; 1853 size_t *oldlenp; 1854 void *new; 1855 size_t newlen; 1856 }; 1857 #endif 1858 int 1859 sys___sysctl(struct thread *td, struct sysctl_args *uap) 1860 { 1861 int error, i, name[CTL_MAXNAME]; 1862 size_t j; 1863 1864 if (uap->namelen > CTL_MAXNAME || uap->namelen < 2) 1865 return (EINVAL); 1866 1867 error = copyin(uap->name, &name, uap->namelen * sizeof(int)); 1868 if (error) 1869 return (error); 1870 1871 error = userland_sysctl(td, name, uap->namelen, 1872 uap->old, uap->oldlenp, 0, 1873 uap->new, uap->newlen, &j, 0); 1874 if (error && error != ENOMEM) 1875 return (error); 1876 if (uap->oldlenp) { 1877 i = copyout(&j, uap->oldlenp, sizeof(j)); 1878 if (i) 1879 return (i); 1880 } 1881 return (error); 1882 } 1883 1884 /* 1885 * This is used from various compatibility syscalls too. That's why name 1886 * must be in kernel space. 1887 */ 1888 int 1889 userland_sysctl(struct thread *td, int *name, u_int namelen, void *old, 1890 size_t *oldlenp, int inkernel, void *new, size_t newlen, size_t *retval, 1891 int flags) 1892 { 1893 int error = 0, memlocked; 1894 struct sysctl_req req; 1895 1896 bzero(&req, sizeof req); 1897 1898 req.td = td; 1899 req.flags = flags; 1900 1901 if (oldlenp) { 1902 if (inkernel) { 1903 req.oldlen = *oldlenp; 1904 } else { 1905 error = copyin(oldlenp, &req.oldlen, sizeof(*oldlenp)); 1906 if (error) 1907 return (error); 1908 } 1909 } 1910 req.validlen = req.oldlen; 1911 1912 if (old) { 1913 if (!useracc(old, req.oldlen, VM_PROT_WRITE)) 1914 return (EFAULT); 1915 req.oldptr= old; 1916 } 1917 1918 if (new != NULL) { 1919 if (!useracc(new, newlen, VM_PROT_READ)) 1920 return (EFAULT); 1921 req.newlen = newlen; 1922 req.newptr = new; 1923 } 1924 1925 req.oldfunc = sysctl_old_user; 1926 req.newfunc = sysctl_new_user; 1927 req.lock = REQ_UNWIRED; 1928 1929 #ifdef KTRACE 1930 if (KTRPOINT(curthread, KTR_SYSCTL)) 1931 ktrsysctl(name, namelen); 1932 #endif 1933 1934 if (req.oldptr && req.oldlen > PAGE_SIZE) { 1935 memlocked = 1; 1936 sx_xlock(&sysctlmemlock); 1937 } else 1938 memlocked = 0; 1939 CURVNET_SET(TD_TO_VNET(td)); 1940 1941 for (;;) { 1942 req.oldidx = 0; 1943 req.newidx = 0; 1944 error = sysctl_root(0, name, namelen, &req); 1945 if (error != EAGAIN) 1946 break; 1947 kern_yield(PRI_USER); 1948 } 1949 1950 CURVNET_RESTORE(); 1951 1952 if (req.lock == REQ_WIRED && req.validlen > 0) 1953 vsunlock(req.oldptr, req.validlen); 1954 if (memlocked) 1955 sx_xunlock(&sysctlmemlock); 1956 1957 if (error && error != ENOMEM) 1958 return (error); 1959 1960 if (retval) { 1961 if (req.oldptr && req.oldidx > req.validlen) 1962 *retval = req.validlen; 1963 else 1964 *retval = req.oldidx; 1965 } 1966 return (error); 1967 } 1968 1969 /* 1970 * Drain into a sysctl struct. The user buffer should be wired if a page 1971 * fault would cause issue. 1972 */ 1973 static int 1974 sbuf_sysctl_drain(void *arg, const char *data, int len) 1975 { 1976 struct sysctl_req *req = arg; 1977 int error; 1978 1979 error = SYSCTL_OUT(req, data, len); 1980 KASSERT(error >= 0, ("Got unexpected negative value %d", error)); 1981 return (error == 0 ? len : -error); 1982 } 1983 1984 struct sbuf * 1985 sbuf_new_for_sysctl(struct sbuf *s, char *buf, int length, 1986 struct sysctl_req *req) 1987 { 1988 1989 /* Supply a default buffer size if none given. */ 1990 if (buf == NULL && length == 0) 1991 length = 64; 1992 s = sbuf_new(s, buf, length, SBUF_FIXEDLEN | SBUF_INCLUDENUL); 1993 sbuf_set_drain(s, sbuf_sysctl_drain, req); 1994 return (s); 1995 } 1996