1 /*- 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Mike Karels at Berkeley Software Design, Inc. 7 * 8 * Quite extensively rewritten by Poul-Henning Kamp of the FreeBSD 9 * project, to make these variables more userfriendly. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * @(#)kern_sysctl.c 8.4 (Berkeley) 4/14/94 36 */ 37 38 #include <sys/cdefs.h> 39 __FBSDID("$FreeBSD$"); 40 41 #include "opt_capsicum.h" 42 #include "opt_compat.h" 43 #include "opt_ktrace.h" 44 45 #include <sys/param.h> 46 #include <sys/fail.h> 47 #include <sys/systm.h> 48 #include <sys/capsicum.h> 49 #include <sys/kernel.h> 50 #include <sys/sysctl.h> 51 #include <sys/malloc.h> 52 #include <sys/priv.h> 53 #include <sys/proc.h> 54 #include <sys/jail.h> 55 #include <sys/lock.h> 56 #include <sys/mutex.h> 57 #include <sys/sbuf.h> 58 #include <sys/sx.h> 59 #include <sys/sysproto.h> 60 #include <sys/uio.h> 61 #ifdef KTRACE 62 #include <sys/ktrace.h> 63 #endif 64 65 #include <net/vnet.h> 66 67 #include <security/mac/mac_framework.h> 68 69 #include <vm/vm.h> 70 #include <vm/vm_extern.h> 71 72 static MALLOC_DEFINE(M_SYSCTL, "sysctl", "sysctl internal magic"); 73 static MALLOC_DEFINE(M_SYSCTLOID, "sysctloid", "sysctl dynamic oids"); 74 static MALLOC_DEFINE(M_SYSCTLTMP, "sysctltmp", "sysctl temp output buffer"); 75 76 /* 77 * The sysctllock protects the MIB tree. It also protects sysctl 78 * contexts used with dynamic sysctls. The sysctl_register_oid() and 79 * sysctl_unregister_oid() routines require the sysctllock to already 80 * be held, so the sysctl_xlock() and sysctl_xunlock() routines are 81 * provided for the few places in the kernel which need to use that 82 * API rather than using the dynamic API. Use of the dynamic API is 83 * strongly encouraged for most code. 84 * 85 * The sysctlmemlock is used to limit the amount of user memory wired for 86 * sysctl requests. This is implemented by serializing any userland 87 * sysctl requests larger than a single page via an exclusive lock. 88 */ 89 static struct sx sysctllock; 90 static struct sx sysctlmemlock; 91 92 #define SYSCTL_XLOCK() sx_xlock(&sysctllock) 93 #define SYSCTL_XUNLOCK() sx_xunlock(&sysctllock) 94 #define SYSCTL_SLOCK() sx_slock(&sysctllock) 95 #define SYSCTL_SUNLOCK() sx_sunlock(&sysctllock) 96 #define SYSCTL_XLOCKED() sx_xlocked(&sysctllock) 97 #define SYSCTL_ASSERT_LOCKED() sx_assert(&sysctllock, SA_LOCKED) 98 #define SYSCTL_ASSERT_XLOCKED() sx_assert(&sysctllock, SA_XLOCKED) 99 #define SYSCTL_ASSERT_SLOCKED() sx_assert(&sysctllock, SA_SLOCKED) 100 #define SYSCTL_INIT() sx_init(&sysctllock, "sysctl lock") 101 #define SYSCTL_SLEEP(ch, wmesg, timo) \ 102 sx_sleep(ch, &sysctllock, 0, wmesg, timo) 103 104 static int sysctl_root(SYSCTL_HANDLER_ARGS); 105 106 /* Root list */ 107 struct sysctl_oid_list sysctl__children = SLIST_HEAD_INITIALIZER(&sysctl__children); 108 109 static int sysctl_remove_oid_locked(struct sysctl_oid *oidp, int del, 110 int recurse); 111 static int sysctl_old_kernel(struct sysctl_req *, const void *, size_t); 112 static int sysctl_new_kernel(struct sysctl_req *, void *, size_t); 113 114 static void 115 sysctl_lock(bool xlock) 116 { 117 118 if (xlock) 119 SYSCTL_XLOCK(); 120 else 121 SYSCTL_SLOCK(); 122 } 123 124 static bool 125 sysctl_unlock(void) 126 { 127 bool xlocked; 128 129 xlocked = SYSCTL_XLOCKED(); 130 if (xlocked) 131 SYSCTL_XUNLOCK(); 132 else 133 SYSCTL_SUNLOCK(); 134 return (xlocked); 135 } 136 137 static struct sysctl_oid * 138 sysctl_find_oidname(const char *name, struct sysctl_oid_list *list) 139 { 140 struct sysctl_oid *oidp; 141 142 SYSCTL_ASSERT_LOCKED(); 143 SLIST_FOREACH(oidp, list, oid_link) { 144 if (strcmp(oidp->oid_name, name) == 0) { 145 return (oidp); 146 } 147 } 148 return (NULL); 149 } 150 151 /* 152 * Initialization of the MIB tree. 153 * 154 * Order by number in each list. 155 */ 156 void 157 sysctl_xlock(void) 158 { 159 160 SYSCTL_XLOCK(); 161 } 162 163 void 164 sysctl_xunlock(void) 165 { 166 167 SYSCTL_XUNLOCK(); 168 } 169 170 static int 171 sysctl_root_handler_locked(struct sysctl_oid *oid, void *arg1, intptr_t arg2, 172 struct sysctl_req *req) 173 { 174 int error; 175 bool xlocked; 176 177 if (oid->oid_kind & CTLFLAG_DYN) 178 atomic_add_int(&oid->oid_running, 1); 179 xlocked = sysctl_unlock(); 180 181 if (!(oid->oid_kind & CTLFLAG_MPSAFE)) 182 mtx_lock(&Giant); 183 error = oid->oid_handler(oid, arg1, arg2, req); 184 if (!(oid->oid_kind & CTLFLAG_MPSAFE)) 185 mtx_unlock(&Giant); 186 187 sysctl_lock(xlocked); 188 if (oid->oid_kind & CTLFLAG_DYN) { 189 if (atomic_fetchadd_int(&oid->oid_running, -1) == 1 && 190 (oid->oid_kind & CTLFLAG_DYING) != 0) 191 wakeup(&oid->oid_running); 192 } 193 194 return (error); 195 } 196 197 static void 198 sysctl_load_tunable_by_oid_locked(struct sysctl_oid *oidp) 199 { 200 struct sysctl_req req; 201 struct sysctl_oid *curr; 202 char *penv = NULL; 203 char path[64]; 204 ssize_t rem = sizeof(path); 205 ssize_t len; 206 int val_int; 207 long val_long; 208 int64_t val_64; 209 quad_t val_quad; 210 int error; 211 212 path[--rem] = 0; 213 214 for (curr = oidp; curr != NULL; curr = SYSCTL_PARENT(curr)) { 215 len = strlen(curr->oid_name); 216 rem -= len; 217 if (curr != oidp) 218 rem -= 1; 219 if (rem < 0) { 220 printf("OID path exceeds %d bytes\n", (int)sizeof(path)); 221 return; 222 } 223 memcpy(path + rem, curr->oid_name, len); 224 if (curr != oidp) 225 path[rem + len] = '.'; 226 } 227 228 memset(&req, 0, sizeof(req)); 229 230 req.td = curthread; 231 req.oldfunc = sysctl_old_kernel; 232 req.newfunc = sysctl_new_kernel; 233 req.lock = REQ_UNWIRED; 234 235 switch (oidp->oid_kind & CTLTYPE) { 236 case CTLTYPE_INT: 237 if (getenv_int(path + rem, &val_int) == 0) 238 return; 239 req.newlen = sizeof(val_int); 240 req.newptr = &val_int; 241 break; 242 case CTLTYPE_UINT: 243 if (getenv_uint(path + rem, (unsigned int *)&val_int) == 0) 244 return; 245 req.newlen = sizeof(val_int); 246 req.newptr = &val_int; 247 break; 248 case CTLTYPE_LONG: 249 if (getenv_long(path + rem, &val_long) == 0) 250 return; 251 req.newlen = sizeof(val_long); 252 req.newptr = &val_long; 253 break; 254 case CTLTYPE_ULONG: 255 if (getenv_ulong(path + rem, (unsigned long *)&val_long) == 0) 256 return; 257 req.newlen = sizeof(val_long); 258 req.newptr = &val_long; 259 break; 260 case CTLTYPE_S64: 261 if (getenv_quad(path + rem, &val_quad) == 0) 262 return; 263 val_64 = val_quad; 264 req.newlen = sizeof(val_64); 265 req.newptr = &val_64; 266 break; 267 case CTLTYPE_U64: 268 /* XXX there is no getenv_uquad() */ 269 if (getenv_quad(path + rem, &val_quad) == 0) 270 return; 271 val_64 = val_quad; 272 req.newlen = sizeof(val_64); 273 req.newptr = &val_64; 274 break; 275 case CTLTYPE_STRING: 276 penv = kern_getenv(path + rem); 277 if (penv == NULL) 278 return; 279 req.newlen = strlen(penv); 280 req.newptr = penv; 281 break; 282 default: 283 return; 284 } 285 error = sysctl_root_handler_locked(oidp, oidp->oid_arg1, 286 oidp->oid_arg2, &req); 287 if (error != 0) 288 printf("Setting sysctl %s failed: %d\n", path, error); 289 if (penv != NULL) 290 freeenv(penv); 291 } 292 293 void 294 sysctl_register_oid(struct sysctl_oid *oidp) 295 { 296 struct sysctl_oid_list *parent = oidp->oid_parent; 297 struct sysctl_oid *p; 298 struct sysctl_oid *q; 299 300 /* 301 * First check if another oid with the same name already 302 * exists in the parent's list. 303 */ 304 SYSCTL_ASSERT_XLOCKED(); 305 p = sysctl_find_oidname(oidp->oid_name, parent); 306 if (p != NULL) { 307 if ((p->oid_kind & CTLTYPE) == CTLTYPE_NODE) { 308 p->oid_refcnt++; 309 return; 310 } else { 311 printf("can't re-use a leaf (%s)!\n", p->oid_name); 312 return; 313 } 314 } 315 /* 316 * If this oid has a number OID_AUTO, give it a number which 317 * is greater than any current oid. 318 * NOTE: DO NOT change the starting value here, change it in 319 * <sys/sysctl.h>, and make sure it is at least 256 to 320 * accomodate e.g. net.inet.raw as a static sysctl node. 321 */ 322 if (oidp->oid_number == OID_AUTO) { 323 static int newoid = CTL_AUTO_START; 324 325 oidp->oid_number = newoid++; 326 if (newoid == 0x7fffffff) 327 panic("out of oids"); 328 } 329 #if 0 330 else if (oidp->oid_number >= CTL_AUTO_START) { 331 /* do not panic; this happens when unregistering sysctl sets */ 332 printf("static sysctl oid too high: %d", oidp->oid_number); 333 } 334 #endif 335 336 /* 337 * Insert the oid into the parent's list in order. 338 */ 339 q = NULL; 340 SLIST_FOREACH(p, parent, oid_link) { 341 if (oidp->oid_number < p->oid_number) 342 break; 343 q = p; 344 } 345 if (q) 346 SLIST_INSERT_AFTER(q, oidp, oid_link); 347 else 348 SLIST_INSERT_HEAD(parent, oidp, oid_link); 349 350 if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE && 351 #ifdef VIMAGE 352 (oidp->oid_kind & CTLFLAG_VNET) == 0 && 353 #endif 354 (oidp->oid_kind & CTLFLAG_TUN) != 0 && 355 (oidp->oid_kind & CTLFLAG_NOFETCH) == 0) { 356 sysctl_load_tunable_by_oid_locked(oidp); 357 } 358 } 359 360 void 361 sysctl_unregister_oid(struct sysctl_oid *oidp) 362 { 363 struct sysctl_oid *p; 364 int error; 365 366 SYSCTL_ASSERT_XLOCKED(); 367 error = ENOENT; 368 if (oidp->oid_number == OID_AUTO) { 369 error = EINVAL; 370 } else { 371 SLIST_FOREACH(p, oidp->oid_parent, oid_link) { 372 if (p == oidp) { 373 SLIST_REMOVE(oidp->oid_parent, oidp, 374 sysctl_oid, oid_link); 375 error = 0; 376 break; 377 } 378 } 379 } 380 381 /* 382 * This can happen when a module fails to register and is 383 * being unloaded afterwards. It should not be a panic() 384 * for normal use. 385 */ 386 if (error) 387 printf("%s: failed to unregister sysctl\n", __func__); 388 } 389 390 /* Initialize a new context to keep track of dynamically added sysctls. */ 391 int 392 sysctl_ctx_init(struct sysctl_ctx_list *c) 393 { 394 395 if (c == NULL) { 396 return (EINVAL); 397 } 398 399 /* 400 * No locking here, the caller is responsible for not adding 401 * new nodes to a context until after this function has 402 * returned. 403 */ 404 TAILQ_INIT(c); 405 return (0); 406 } 407 408 /* Free the context, and destroy all dynamic oids registered in this context */ 409 int 410 sysctl_ctx_free(struct sysctl_ctx_list *clist) 411 { 412 struct sysctl_ctx_entry *e, *e1; 413 int error; 414 415 error = 0; 416 /* 417 * First perform a "dry run" to check if it's ok to remove oids. 418 * XXX FIXME 419 * XXX This algorithm is a hack. But I don't know any 420 * XXX better solution for now... 421 */ 422 SYSCTL_XLOCK(); 423 TAILQ_FOREACH(e, clist, link) { 424 error = sysctl_remove_oid_locked(e->entry, 0, 0); 425 if (error) 426 break; 427 } 428 /* 429 * Restore deregistered entries, either from the end, 430 * or from the place where error occured. 431 * e contains the entry that was not unregistered 432 */ 433 if (error) 434 e1 = TAILQ_PREV(e, sysctl_ctx_list, link); 435 else 436 e1 = TAILQ_LAST(clist, sysctl_ctx_list); 437 while (e1 != NULL) { 438 sysctl_register_oid(e1->entry); 439 e1 = TAILQ_PREV(e1, sysctl_ctx_list, link); 440 } 441 if (error) { 442 SYSCTL_XUNLOCK(); 443 return(EBUSY); 444 } 445 /* Now really delete the entries */ 446 e = TAILQ_FIRST(clist); 447 while (e != NULL) { 448 e1 = TAILQ_NEXT(e, link); 449 error = sysctl_remove_oid_locked(e->entry, 1, 0); 450 if (error) 451 panic("sysctl_remove_oid: corrupt tree, entry: %s", 452 e->entry->oid_name); 453 free(e, M_SYSCTLOID); 454 e = e1; 455 } 456 SYSCTL_XUNLOCK(); 457 return (error); 458 } 459 460 /* Add an entry to the context */ 461 struct sysctl_ctx_entry * 462 sysctl_ctx_entry_add(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp) 463 { 464 struct sysctl_ctx_entry *e; 465 466 SYSCTL_ASSERT_XLOCKED(); 467 if (clist == NULL || oidp == NULL) 468 return(NULL); 469 e = malloc(sizeof(struct sysctl_ctx_entry), M_SYSCTLOID, M_WAITOK); 470 e->entry = oidp; 471 TAILQ_INSERT_HEAD(clist, e, link); 472 return (e); 473 } 474 475 /* Find an entry in the context */ 476 struct sysctl_ctx_entry * 477 sysctl_ctx_entry_find(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp) 478 { 479 struct sysctl_ctx_entry *e; 480 481 SYSCTL_ASSERT_XLOCKED(); 482 if (clist == NULL || oidp == NULL) 483 return(NULL); 484 TAILQ_FOREACH(e, clist, link) { 485 if(e->entry == oidp) 486 return(e); 487 } 488 return (e); 489 } 490 491 /* 492 * Delete an entry from the context. 493 * NOTE: this function doesn't free oidp! You have to remove it 494 * with sysctl_remove_oid(). 495 */ 496 int 497 sysctl_ctx_entry_del(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp) 498 { 499 struct sysctl_ctx_entry *e; 500 501 if (clist == NULL || oidp == NULL) 502 return (EINVAL); 503 SYSCTL_XLOCK(); 504 e = sysctl_ctx_entry_find(clist, oidp); 505 if (e != NULL) { 506 TAILQ_REMOVE(clist, e, link); 507 SYSCTL_XUNLOCK(); 508 free(e, M_SYSCTLOID); 509 return (0); 510 } else { 511 SYSCTL_XUNLOCK(); 512 return (ENOENT); 513 } 514 } 515 516 /* 517 * Remove dynamically created sysctl trees. 518 * oidp - top of the tree to be removed 519 * del - if 0 - just deregister, otherwise free up entries as well 520 * recurse - if != 0 traverse the subtree to be deleted 521 */ 522 int 523 sysctl_remove_oid(struct sysctl_oid *oidp, int del, int recurse) 524 { 525 int error; 526 527 SYSCTL_XLOCK(); 528 error = sysctl_remove_oid_locked(oidp, del, recurse); 529 SYSCTL_XUNLOCK(); 530 return (error); 531 } 532 533 int 534 sysctl_remove_name(struct sysctl_oid *parent, const char *name, 535 int del, int recurse) 536 { 537 struct sysctl_oid *p, *tmp; 538 int error; 539 540 error = ENOENT; 541 SYSCTL_XLOCK(); 542 SLIST_FOREACH_SAFE(p, SYSCTL_CHILDREN(parent), oid_link, tmp) { 543 if (strcmp(p->oid_name, name) == 0) { 544 error = sysctl_remove_oid_locked(p, del, recurse); 545 break; 546 } 547 } 548 SYSCTL_XUNLOCK(); 549 550 return (error); 551 } 552 553 554 static int 555 sysctl_remove_oid_locked(struct sysctl_oid *oidp, int del, int recurse) 556 { 557 struct sysctl_oid *p, *tmp; 558 int error; 559 560 SYSCTL_ASSERT_XLOCKED(); 561 if (oidp == NULL) 562 return(EINVAL); 563 if ((oidp->oid_kind & CTLFLAG_DYN) == 0) { 564 printf("can't remove non-dynamic nodes!\n"); 565 return (EINVAL); 566 } 567 /* 568 * WARNING: normal method to do this should be through 569 * sysctl_ctx_free(). Use recursing as the last resort 570 * method to purge your sysctl tree of leftovers... 571 * However, if some other code still references these nodes, 572 * it will panic. 573 */ 574 if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) { 575 if (oidp->oid_refcnt == 1) { 576 SLIST_FOREACH_SAFE(p, 577 SYSCTL_CHILDREN(oidp), oid_link, tmp) { 578 if (!recurse) { 579 printf("Warning: failed attempt to " 580 "remove oid %s with child %s\n", 581 oidp->oid_name, p->oid_name); 582 return (ENOTEMPTY); 583 } 584 error = sysctl_remove_oid_locked(p, del, 585 recurse); 586 if (error) 587 return (error); 588 } 589 } 590 } 591 if (oidp->oid_refcnt > 1 ) { 592 oidp->oid_refcnt--; 593 } else { 594 if (oidp->oid_refcnt == 0) { 595 printf("Warning: bad oid_refcnt=%u (%s)!\n", 596 oidp->oid_refcnt, oidp->oid_name); 597 return (EINVAL); 598 } 599 sysctl_unregister_oid(oidp); 600 if (del) { 601 /* 602 * Wait for all threads running the handler to drain. 603 * This preserves the previous behavior when the 604 * sysctl lock was held across a handler invocation, 605 * and is necessary for module unload correctness. 606 */ 607 while (oidp->oid_running > 0) { 608 oidp->oid_kind |= CTLFLAG_DYING; 609 SYSCTL_SLEEP(&oidp->oid_running, "oidrm", 0); 610 } 611 if (oidp->oid_descr) 612 free(__DECONST(char *, oidp->oid_descr), 613 M_SYSCTLOID); 614 free(__DECONST(char *, oidp->oid_name), M_SYSCTLOID); 615 free(oidp, M_SYSCTLOID); 616 } 617 } 618 return (0); 619 } 620 /* 621 * Create new sysctls at run time. 622 * clist may point to a valid context initialized with sysctl_ctx_init(). 623 */ 624 struct sysctl_oid * 625 sysctl_add_oid(struct sysctl_ctx_list *clist, struct sysctl_oid_list *parent, 626 int number, const char *name, int kind, void *arg1, intptr_t arg2, 627 int (*handler)(SYSCTL_HANDLER_ARGS), const char *fmt, const char *descr) 628 { 629 struct sysctl_oid *oidp; 630 631 /* You have to hook up somewhere.. */ 632 if (parent == NULL) 633 return(NULL); 634 /* Check if the node already exists, otherwise create it */ 635 SYSCTL_XLOCK(); 636 oidp = sysctl_find_oidname(name, parent); 637 if (oidp != NULL) { 638 if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) { 639 oidp->oid_refcnt++; 640 /* Update the context */ 641 if (clist != NULL) 642 sysctl_ctx_entry_add(clist, oidp); 643 SYSCTL_XUNLOCK(); 644 return (oidp); 645 } else { 646 SYSCTL_XUNLOCK(); 647 printf("can't re-use a leaf (%s)!\n", name); 648 return (NULL); 649 } 650 } 651 oidp = malloc(sizeof(struct sysctl_oid), M_SYSCTLOID, M_WAITOK|M_ZERO); 652 oidp->oid_parent = parent; 653 SLIST_INIT(&oidp->oid_children); 654 oidp->oid_number = number; 655 oidp->oid_refcnt = 1; 656 oidp->oid_name = strdup(name, M_SYSCTLOID); 657 oidp->oid_handler = handler; 658 oidp->oid_kind = CTLFLAG_DYN | kind; 659 oidp->oid_arg1 = arg1; 660 oidp->oid_arg2 = arg2; 661 oidp->oid_fmt = fmt; 662 if (descr != NULL) 663 oidp->oid_descr = strdup(descr, M_SYSCTLOID); 664 /* Update the context, if used */ 665 if (clist != NULL) 666 sysctl_ctx_entry_add(clist, oidp); 667 /* Register this oid */ 668 sysctl_register_oid(oidp); 669 SYSCTL_XUNLOCK(); 670 return (oidp); 671 } 672 673 /* 674 * Rename an existing oid. 675 */ 676 void 677 sysctl_rename_oid(struct sysctl_oid *oidp, const char *name) 678 { 679 char *newname; 680 char *oldname; 681 682 newname = strdup(name, M_SYSCTLOID); 683 SYSCTL_XLOCK(); 684 oldname = __DECONST(char *, oidp->oid_name); 685 oidp->oid_name = newname; 686 SYSCTL_XUNLOCK(); 687 free(oldname, M_SYSCTLOID); 688 } 689 690 /* 691 * Reparent an existing oid. 692 */ 693 int 694 sysctl_move_oid(struct sysctl_oid *oid, struct sysctl_oid_list *parent) 695 { 696 struct sysctl_oid *oidp; 697 698 SYSCTL_XLOCK(); 699 if (oid->oid_parent == parent) { 700 SYSCTL_XUNLOCK(); 701 return (0); 702 } 703 oidp = sysctl_find_oidname(oid->oid_name, parent); 704 if (oidp != NULL) { 705 SYSCTL_XUNLOCK(); 706 return (EEXIST); 707 } 708 sysctl_unregister_oid(oid); 709 oid->oid_parent = parent; 710 oid->oid_number = OID_AUTO; 711 sysctl_register_oid(oid); 712 SYSCTL_XUNLOCK(); 713 return (0); 714 } 715 716 /* 717 * Register the kernel's oids on startup. 718 */ 719 SET_DECLARE(sysctl_set, struct sysctl_oid); 720 721 static void 722 sysctl_register_all(void *arg) 723 { 724 struct sysctl_oid **oidp; 725 726 sx_init(&sysctlmemlock, "sysctl mem"); 727 SYSCTL_INIT(); 728 SYSCTL_XLOCK(); 729 SET_FOREACH(oidp, sysctl_set) 730 sysctl_register_oid(*oidp); 731 SYSCTL_XUNLOCK(); 732 } 733 SYSINIT(sysctl, SI_SUB_KMEM, SI_ORDER_FIRST, sysctl_register_all, 0); 734 735 /* 736 * "Staff-functions" 737 * 738 * These functions implement a presently undocumented interface 739 * used by the sysctl program to walk the tree, and get the type 740 * so it can print the value. 741 * This interface is under work and consideration, and should probably 742 * be killed with a big axe by the first person who can find the time. 743 * (be aware though, that the proper interface isn't as obvious as it 744 * may seem, there are various conflicting requirements. 745 * 746 * {0,0} printf the entire MIB-tree. 747 * {0,1,...} return the name of the "..." OID. 748 * {0,2,...} return the next OID. 749 * {0,3} return the OID of the name in "new" 750 * {0,4,...} return the kind & format info for the "..." OID. 751 * {0,5,...} return the description the "..." OID. 752 */ 753 754 #ifdef SYSCTL_DEBUG 755 static void 756 sysctl_sysctl_debug_dump_node(struct sysctl_oid_list *l, int i) 757 { 758 int k; 759 struct sysctl_oid *oidp; 760 761 SYSCTL_ASSERT_LOCKED(); 762 SLIST_FOREACH(oidp, l, oid_link) { 763 764 for (k=0; k<i; k++) 765 printf(" "); 766 767 printf("%d %s ", oidp->oid_number, oidp->oid_name); 768 769 printf("%c%c", 770 oidp->oid_kind & CTLFLAG_RD ? 'R':' ', 771 oidp->oid_kind & CTLFLAG_WR ? 'W':' '); 772 773 if (oidp->oid_handler) 774 printf(" *Handler"); 775 776 switch (oidp->oid_kind & CTLTYPE) { 777 case CTLTYPE_NODE: 778 printf(" Node\n"); 779 if (!oidp->oid_handler) { 780 sysctl_sysctl_debug_dump_node( 781 SYSCTL_CHILDREN(oidp), i + 2); 782 } 783 break; 784 case CTLTYPE_INT: printf(" Int\n"); break; 785 case CTLTYPE_UINT: printf(" u_int\n"); break; 786 case CTLTYPE_LONG: printf(" Long\n"); break; 787 case CTLTYPE_ULONG: printf(" u_long\n"); break; 788 case CTLTYPE_STRING: printf(" String\n"); break; 789 case CTLTYPE_U64: printf(" uint64_t\n"); break; 790 case CTLTYPE_S64: printf(" int64_t\n"); break; 791 case CTLTYPE_OPAQUE: printf(" Opaque/struct\n"); break; 792 default: printf("\n"); 793 } 794 795 } 796 } 797 798 static int 799 sysctl_sysctl_debug(SYSCTL_HANDLER_ARGS) 800 { 801 int error; 802 803 error = priv_check(req->td, PRIV_SYSCTL_DEBUG); 804 if (error) 805 return (error); 806 SYSCTL_SLOCK(); 807 sysctl_sysctl_debug_dump_node(&sysctl__children, 0); 808 SYSCTL_SUNLOCK(); 809 return (ENOENT); 810 } 811 812 SYSCTL_PROC(_sysctl, 0, debug, CTLTYPE_STRING|CTLFLAG_RD|CTLFLAG_MPSAFE, 813 0, 0, sysctl_sysctl_debug, "-", ""); 814 #endif 815 816 static int 817 sysctl_sysctl_name(SYSCTL_HANDLER_ARGS) 818 { 819 int *name = (int *) arg1; 820 u_int namelen = arg2; 821 int error = 0; 822 struct sysctl_oid *oid; 823 struct sysctl_oid_list *lsp = &sysctl__children, *lsp2; 824 char buf[10]; 825 826 SYSCTL_SLOCK(); 827 while (namelen) { 828 if (!lsp) { 829 snprintf(buf,sizeof(buf),"%d",*name); 830 if (req->oldidx) 831 error = SYSCTL_OUT(req, ".", 1); 832 if (!error) 833 error = SYSCTL_OUT(req, buf, strlen(buf)); 834 if (error) 835 goto out; 836 namelen--; 837 name++; 838 continue; 839 } 840 lsp2 = 0; 841 SLIST_FOREACH(oid, lsp, oid_link) { 842 if (oid->oid_number != *name) 843 continue; 844 845 if (req->oldidx) 846 error = SYSCTL_OUT(req, ".", 1); 847 if (!error) 848 error = SYSCTL_OUT(req, oid->oid_name, 849 strlen(oid->oid_name)); 850 if (error) 851 goto out; 852 853 namelen--; 854 name++; 855 856 if ((oid->oid_kind & CTLTYPE) != CTLTYPE_NODE) 857 break; 858 859 if (oid->oid_handler) 860 break; 861 862 lsp2 = SYSCTL_CHILDREN(oid); 863 break; 864 } 865 lsp = lsp2; 866 } 867 error = SYSCTL_OUT(req, "", 1); 868 out: 869 SYSCTL_SUNLOCK(); 870 return (error); 871 } 872 873 /* 874 * XXXRW/JA: Shouldn't return name data for nodes that we don't permit in 875 * capability mode. 876 */ 877 static SYSCTL_NODE(_sysctl, 1, name, CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_CAPRD, 878 sysctl_sysctl_name, ""); 879 880 static int 881 sysctl_sysctl_next_ls(struct sysctl_oid_list *lsp, int *name, u_int namelen, 882 int *next, int *len, int level, struct sysctl_oid **oidpp) 883 { 884 struct sysctl_oid *oidp; 885 886 SYSCTL_ASSERT_LOCKED(); 887 *len = level; 888 SLIST_FOREACH(oidp, lsp, oid_link) { 889 *next = oidp->oid_number; 890 *oidpp = oidp; 891 892 if (oidp->oid_kind & CTLFLAG_SKIP) 893 continue; 894 895 if (!namelen) { 896 if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE) 897 return (0); 898 if (oidp->oid_handler) 899 /* We really should call the handler here...*/ 900 return (0); 901 lsp = SYSCTL_CHILDREN(oidp); 902 if (!sysctl_sysctl_next_ls(lsp, 0, 0, next+1, 903 len, level+1, oidpp)) 904 return (0); 905 goto emptynode; 906 } 907 908 if (oidp->oid_number < *name) 909 continue; 910 911 if (oidp->oid_number > *name) { 912 if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE) 913 return (0); 914 if (oidp->oid_handler) 915 return (0); 916 lsp = SYSCTL_CHILDREN(oidp); 917 if (!sysctl_sysctl_next_ls(lsp, name+1, namelen-1, 918 next+1, len, level+1, oidpp)) 919 return (0); 920 goto next; 921 } 922 if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE) 923 continue; 924 925 if (oidp->oid_handler) 926 continue; 927 928 lsp = SYSCTL_CHILDREN(oidp); 929 if (!sysctl_sysctl_next_ls(lsp, name+1, namelen-1, next+1, 930 len, level+1, oidpp)) 931 return (0); 932 next: 933 namelen = 1; 934 emptynode: 935 *len = level; 936 } 937 return (1); 938 } 939 940 static int 941 sysctl_sysctl_next(SYSCTL_HANDLER_ARGS) 942 { 943 int *name = (int *) arg1; 944 u_int namelen = arg2; 945 int i, j, error; 946 struct sysctl_oid *oid; 947 struct sysctl_oid_list *lsp = &sysctl__children; 948 int newoid[CTL_MAXNAME]; 949 950 SYSCTL_SLOCK(); 951 i = sysctl_sysctl_next_ls(lsp, name, namelen, newoid, &j, 1, &oid); 952 SYSCTL_SUNLOCK(); 953 if (i) 954 return (ENOENT); 955 error = SYSCTL_OUT(req, newoid, j * sizeof (int)); 956 return (error); 957 } 958 959 /* 960 * XXXRW/JA: Shouldn't return next data for nodes that we don't permit in 961 * capability mode. 962 */ 963 static SYSCTL_NODE(_sysctl, 2, next, CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_CAPRD, 964 sysctl_sysctl_next, ""); 965 966 static int 967 name2oid(char *name, int *oid, int *len, struct sysctl_oid **oidpp) 968 { 969 struct sysctl_oid *oidp; 970 struct sysctl_oid_list *lsp = &sysctl__children; 971 char *p; 972 973 SYSCTL_ASSERT_LOCKED(); 974 975 for (*len = 0; *len < CTL_MAXNAME;) { 976 p = strsep(&name, "."); 977 978 oidp = SLIST_FIRST(lsp); 979 for (;; oidp = SLIST_NEXT(oidp, oid_link)) { 980 if (oidp == NULL) 981 return (ENOENT); 982 if (strcmp(p, oidp->oid_name) == 0) 983 break; 984 } 985 *oid++ = oidp->oid_number; 986 (*len)++; 987 988 if (name == NULL || *name == '\0') { 989 if (oidpp) 990 *oidpp = oidp; 991 return (0); 992 } 993 994 if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE) 995 break; 996 997 if (oidp->oid_handler) 998 break; 999 1000 lsp = SYSCTL_CHILDREN(oidp); 1001 } 1002 return (ENOENT); 1003 } 1004 1005 static int 1006 sysctl_sysctl_name2oid(SYSCTL_HANDLER_ARGS) 1007 { 1008 char *p; 1009 int error, oid[CTL_MAXNAME], len = 0; 1010 struct sysctl_oid *op = 0; 1011 1012 if (!req->newlen) 1013 return (ENOENT); 1014 if (req->newlen >= MAXPATHLEN) /* XXX arbitrary, undocumented */ 1015 return (ENAMETOOLONG); 1016 1017 p = malloc(req->newlen+1, M_SYSCTL, M_WAITOK); 1018 1019 error = SYSCTL_IN(req, p, req->newlen); 1020 if (error) { 1021 free(p, M_SYSCTL); 1022 return (error); 1023 } 1024 1025 p [req->newlen] = '\0'; 1026 1027 SYSCTL_SLOCK(); 1028 error = name2oid(p, oid, &len, &op); 1029 SYSCTL_SUNLOCK(); 1030 1031 free(p, M_SYSCTL); 1032 1033 if (error) 1034 return (error); 1035 1036 error = SYSCTL_OUT(req, oid, len * sizeof *oid); 1037 return (error); 1038 } 1039 1040 /* 1041 * XXXRW/JA: Shouldn't return name2oid data for nodes that we don't permit in 1042 * capability mode. 1043 */ 1044 SYSCTL_PROC(_sysctl, 3, name2oid, 1045 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MPSAFE 1046 | CTLFLAG_CAPRW, 0, 0, sysctl_sysctl_name2oid, "I", ""); 1047 1048 static int 1049 sysctl_sysctl_oidfmt(SYSCTL_HANDLER_ARGS) 1050 { 1051 struct sysctl_oid *oid; 1052 int error; 1053 1054 SYSCTL_SLOCK(); 1055 error = sysctl_find_oid(arg1, arg2, &oid, NULL, req); 1056 if (error) 1057 goto out; 1058 1059 if (oid->oid_fmt == NULL) { 1060 error = ENOENT; 1061 goto out; 1062 } 1063 error = SYSCTL_OUT(req, &oid->oid_kind, sizeof(oid->oid_kind)); 1064 if (error) 1065 goto out; 1066 error = SYSCTL_OUT(req, oid->oid_fmt, strlen(oid->oid_fmt) + 1); 1067 out: 1068 SYSCTL_SUNLOCK(); 1069 return (error); 1070 } 1071 1072 1073 static SYSCTL_NODE(_sysctl, 4, oidfmt, CTLFLAG_RD|CTLFLAG_MPSAFE|CTLFLAG_CAPRD, 1074 sysctl_sysctl_oidfmt, ""); 1075 1076 static int 1077 sysctl_sysctl_oiddescr(SYSCTL_HANDLER_ARGS) 1078 { 1079 struct sysctl_oid *oid; 1080 int error; 1081 1082 SYSCTL_SLOCK(); 1083 error = sysctl_find_oid(arg1, arg2, &oid, NULL, req); 1084 if (error) 1085 goto out; 1086 1087 if (oid->oid_descr == NULL) { 1088 error = ENOENT; 1089 goto out; 1090 } 1091 error = SYSCTL_OUT(req, oid->oid_descr, strlen(oid->oid_descr) + 1); 1092 out: 1093 SYSCTL_SUNLOCK(); 1094 return (error); 1095 } 1096 1097 static SYSCTL_NODE(_sysctl, 5, oiddescr, CTLFLAG_RD|CTLFLAG_MPSAFE|CTLFLAG_CAPRD, 1098 sysctl_sysctl_oiddescr, ""); 1099 1100 /* 1101 * Default "handler" functions. 1102 */ 1103 1104 /* 1105 * Handle an int, signed or unsigned. 1106 * Two cases: 1107 * a variable: point arg1 at it. 1108 * a constant: pass it in arg2. 1109 */ 1110 1111 int 1112 sysctl_handle_int(SYSCTL_HANDLER_ARGS) 1113 { 1114 int tmpout, error = 0; 1115 1116 /* 1117 * Attempt to get a coherent snapshot by making a copy of the data. 1118 */ 1119 if (arg1) 1120 tmpout = *(int *)arg1; 1121 else 1122 tmpout = arg2; 1123 error = SYSCTL_OUT(req, &tmpout, sizeof(int)); 1124 1125 if (error || !req->newptr) 1126 return (error); 1127 1128 if (!arg1) 1129 error = EPERM; 1130 else 1131 error = SYSCTL_IN(req, arg1, sizeof(int)); 1132 return (error); 1133 } 1134 1135 /* 1136 * Based on on sysctl_handle_int() convert milliseconds into ticks. 1137 * Note: this is used by TCP. 1138 */ 1139 1140 int 1141 sysctl_msec_to_ticks(SYSCTL_HANDLER_ARGS) 1142 { 1143 int error, s, tt; 1144 1145 tt = *(int *)arg1; 1146 s = (int)((int64_t)tt * 1000 / hz); 1147 1148 error = sysctl_handle_int(oidp, &s, 0, req); 1149 if (error || !req->newptr) 1150 return (error); 1151 1152 tt = (int)((int64_t)s * hz / 1000); 1153 if (tt < 1) 1154 return (EINVAL); 1155 1156 *(int *)arg1 = tt; 1157 return (0); 1158 } 1159 1160 1161 /* 1162 * Handle a long, signed or unsigned. 1163 * Two cases: 1164 * a variable: point arg1 at it. 1165 * a constant: pass it in arg2. 1166 */ 1167 1168 int 1169 sysctl_handle_long(SYSCTL_HANDLER_ARGS) 1170 { 1171 int error = 0; 1172 long tmplong; 1173 #ifdef SCTL_MASK32 1174 int tmpint; 1175 #endif 1176 1177 /* 1178 * Attempt to get a coherent snapshot by making a copy of the data. 1179 */ 1180 if (arg1) 1181 tmplong = *(long *)arg1; 1182 else 1183 tmplong = arg2; 1184 #ifdef SCTL_MASK32 1185 if (req->flags & SCTL_MASK32) { 1186 tmpint = tmplong; 1187 error = SYSCTL_OUT(req, &tmpint, sizeof(int)); 1188 } else 1189 #endif 1190 error = SYSCTL_OUT(req, &tmplong, sizeof(long)); 1191 1192 if (error || !req->newptr) 1193 return (error); 1194 1195 if (!arg1) 1196 error = EPERM; 1197 #ifdef SCTL_MASK32 1198 else if (req->flags & SCTL_MASK32) { 1199 error = SYSCTL_IN(req, &tmpint, sizeof(int)); 1200 *(long *)arg1 = (long)tmpint; 1201 } 1202 #endif 1203 else 1204 error = SYSCTL_IN(req, arg1, sizeof(long)); 1205 return (error); 1206 } 1207 1208 /* 1209 * Handle a 64 bit int, signed or unsigned. 1210 * Two cases: 1211 * a variable: point arg1 at it. 1212 * a constant: pass it in arg2. 1213 */ 1214 int 1215 sysctl_handle_64(SYSCTL_HANDLER_ARGS) 1216 { 1217 int error = 0; 1218 uint64_t tmpout; 1219 1220 /* 1221 * Attempt to get a coherent snapshot by making a copy of the data. 1222 */ 1223 if (arg1) 1224 tmpout = *(uint64_t *)arg1; 1225 else 1226 tmpout = arg2; 1227 error = SYSCTL_OUT(req, &tmpout, sizeof(uint64_t)); 1228 1229 if (error || !req->newptr) 1230 return (error); 1231 1232 if (!arg1) 1233 error = EPERM; 1234 else 1235 error = SYSCTL_IN(req, arg1, sizeof(uint64_t)); 1236 return (error); 1237 } 1238 1239 /* 1240 * Handle our generic '\0' terminated 'C' string. 1241 * Two cases: 1242 * a variable string: point arg1 at it, arg2 is max length. 1243 * a constant string: point arg1 at it, arg2 is zero. 1244 */ 1245 1246 int 1247 sysctl_handle_string(SYSCTL_HANDLER_ARGS) 1248 { 1249 size_t outlen; 1250 int error = 0, ro_string = 0; 1251 1252 /* 1253 * A zero-length buffer indicates a fixed size read-only 1254 * string: 1255 */ 1256 if (arg2 == 0) { 1257 arg2 = strlen((char *)arg1) + 1; 1258 ro_string = 1; 1259 } 1260 1261 if (req->oldptr != NULL) { 1262 char *tmparg; 1263 1264 if (ro_string) { 1265 tmparg = arg1; 1266 } else { 1267 /* try to make a coherent snapshot of the string */ 1268 tmparg = malloc(arg2, M_SYSCTLTMP, M_WAITOK); 1269 memcpy(tmparg, arg1, arg2); 1270 } 1271 1272 outlen = strnlen(tmparg, arg2 - 1) + 1; 1273 error = SYSCTL_OUT(req, tmparg, outlen); 1274 1275 if (!ro_string) 1276 free(tmparg, M_SYSCTLTMP); 1277 } else { 1278 outlen = strnlen((char *)arg1, arg2 - 1) + 1; 1279 error = SYSCTL_OUT(req, NULL, outlen); 1280 } 1281 if (error || !req->newptr) 1282 return (error); 1283 1284 if ((req->newlen - req->newidx) >= arg2) { 1285 error = EINVAL; 1286 } else { 1287 arg2 = (req->newlen - req->newidx); 1288 error = SYSCTL_IN(req, arg1, arg2); 1289 ((char *)arg1)[arg2] = '\0'; 1290 } 1291 return (error); 1292 } 1293 1294 /* 1295 * Handle any kind of opaque data. 1296 * arg1 points to it, arg2 is the size. 1297 */ 1298 1299 int 1300 sysctl_handle_opaque(SYSCTL_HANDLER_ARGS) 1301 { 1302 int error, tries; 1303 u_int generation; 1304 struct sysctl_req req2; 1305 1306 /* 1307 * Attempt to get a coherent snapshot, by using the thread 1308 * pre-emption counter updated from within mi_switch() to 1309 * determine if we were pre-empted during a bcopy() or 1310 * copyout(). Make 3 attempts at doing this before giving up. 1311 * If we encounter an error, stop immediately. 1312 */ 1313 tries = 0; 1314 req2 = *req; 1315 retry: 1316 generation = curthread->td_generation; 1317 error = SYSCTL_OUT(req, arg1, arg2); 1318 if (error) 1319 return (error); 1320 tries++; 1321 if (generation != curthread->td_generation && tries < 3) { 1322 *req = req2; 1323 goto retry; 1324 } 1325 1326 error = SYSCTL_IN(req, arg1, arg2); 1327 1328 return (error); 1329 } 1330 1331 /* 1332 * Transfer functions to/from kernel space. 1333 * XXX: rather untested at this point 1334 */ 1335 static int 1336 sysctl_old_kernel(struct sysctl_req *req, const void *p, size_t l) 1337 { 1338 size_t i = 0; 1339 1340 if (req->oldptr) { 1341 i = l; 1342 if (req->oldlen <= req->oldidx) 1343 i = 0; 1344 else 1345 if (i > req->oldlen - req->oldidx) 1346 i = req->oldlen - req->oldidx; 1347 if (i > 0) 1348 bcopy(p, (char *)req->oldptr + req->oldidx, i); 1349 } 1350 req->oldidx += l; 1351 if (req->oldptr && i != l) 1352 return (ENOMEM); 1353 return (0); 1354 } 1355 1356 static int 1357 sysctl_new_kernel(struct sysctl_req *req, void *p, size_t l) 1358 { 1359 if (!req->newptr) 1360 return (0); 1361 if (req->newlen - req->newidx < l) 1362 return (EINVAL); 1363 bcopy((char *)req->newptr + req->newidx, p, l); 1364 req->newidx += l; 1365 return (0); 1366 } 1367 1368 int 1369 kernel_sysctl(struct thread *td, int *name, u_int namelen, void *old, 1370 size_t *oldlenp, void *new, size_t newlen, size_t *retval, int flags) 1371 { 1372 int error = 0; 1373 struct sysctl_req req; 1374 1375 bzero(&req, sizeof req); 1376 1377 req.td = td; 1378 req.flags = flags; 1379 1380 if (oldlenp) { 1381 req.oldlen = *oldlenp; 1382 } 1383 req.validlen = req.oldlen; 1384 1385 if (old) { 1386 req.oldptr= old; 1387 } 1388 1389 if (new != NULL) { 1390 req.newlen = newlen; 1391 req.newptr = new; 1392 } 1393 1394 req.oldfunc = sysctl_old_kernel; 1395 req.newfunc = sysctl_new_kernel; 1396 req.lock = REQ_UNWIRED; 1397 1398 SYSCTL_SLOCK(); 1399 error = sysctl_root(0, name, namelen, &req); 1400 SYSCTL_SUNLOCK(); 1401 1402 if (req.lock == REQ_WIRED && req.validlen > 0) 1403 vsunlock(req.oldptr, req.validlen); 1404 1405 if (error && error != ENOMEM) 1406 return (error); 1407 1408 if (retval) { 1409 if (req.oldptr && req.oldidx > req.validlen) 1410 *retval = req.validlen; 1411 else 1412 *retval = req.oldidx; 1413 } 1414 return (error); 1415 } 1416 1417 int 1418 kernel_sysctlbyname(struct thread *td, char *name, void *old, size_t *oldlenp, 1419 void *new, size_t newlen, size_t *retval, int flags) 1420 { 1421 int oid[CTL_MAXNAME]; 1422 size_t oidlen, plen; 1423 int error; 1424 1425 oid[0] = 0; /* sysctl internal magic */ 1426 oid[1] = 3; /* name2oid */ 1427 oidlen = sizeof(oid); 1428 1429 error = kernel_sysctl(td, oid, 2, oid, &oidlen, 1430 (void *)name, strlen(name), &plen, flags); 1431 if (error) 1432 return (error); 1433 1434 error = kernel_sysctl(td, oid, plen / sizeof(int), old, oldlenp, 1435 new, newlen, retval, flags); 1436 return (error); 1437 } 1438 1439 /* 1440 * Transfer function to/from user space. 1441 */ 1442 static int 1443 sysctl_old_user(struct sysctl_req *req, const void *p, size_t l) 1444 { 1445 size_t i, len, origidx; 1446 int error; 1447 1448 origidx = req->oldidx; 1449 req->oldidx += l; 1450 if (req->oldptr == NULL) 1451 return (0); 1452 /* 1453 * If we have not wired the user supplied buffer and we are currently 1454 * holding locks, drop a witness warning, as it's possible that 1455 * write operations to the user page can sleep. 1456 */ 1457 if (req->lock != REQ_WIRED) 1458 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, 1459 "sysctl_old_user()"); 1460 i = l; 1461 len = req->validlen; 1462 if (len <= origidx) 1463 i = 0; 1464 else { 1465 if (i > len - origidx) 1466 i = len - origidx; 1467 if (req->lock == REQ_WIRED) { 1468 error = copyout_nofault(p, (char *)req->oldptr + 1469 origidx, i); 1470 } else 1471 error = copyout(p, (char *)req->oldptr + origidx, i); 1472 if (error != 0) 1473 return (error); 1474 } 1475 if (i < l) 1476 return (ENOMEM); 1477 return (0); 1478 } 1479 1480 static int 1481 sysctl_new_user(struct sysctl_req *req, void *p, size_t l) 1482 { 1483 int error; 1484 1485 if (!req->newptr) 1486 return (0); 1487 if (req->newlen - req->newidx < l) 1488 return (EINVAL); 1489 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, 1490 "sysctl_new_user()"); 1491 error = copyin((char *)req->newptr + req->newidx, p, l); 1492 req->newidx += l; 1493 return (error); 1494 } 1495 1496 /* 1497 * Wire the user space destination buffer. If set to a value greater than 1498 * zero, the len parameter limits the maximum amount of wired memory. 1499 */ 1500 int 1501 sysctl_wire_old_buffer(struct sysctl_req *req, size_t len) 1502 { 1503 int ret; 1504 size_t wiredlen; 1505 1506 wiredlen = (len > 0 && len < req->oldlen) ? len : req->oldlen; 1507 ret = 0; 1508 if (req->lock != REQ_WIRED && req->oldptr && 1509 req->oldfunc == sysctl_old_user) { 1510 if (wiredlen != 0) { 1511 ret = vslock(req->oldptr, wiredlen); 1512 if (ret != 0) { 1513 if (ret != ENOMEM) 1514 return (ret); 1515 wiredlen = 0; 1516 } 1517 } 1518 req->lock = REQ_WIRED; 1519 req->validlen = wiredlen; 1520 } 1521 return (0); 1522 } 1523 1524 int 1525 sysctl_find_oid(int *name, u_int namelen, struct sysctl_oid **noid, 1526 int *nindx, struct sysctl_req *req) 1527 { 1528 struct sysctl_oid_list *lsp; 1529 struct sysctl_oid *oid; 1530 int indx; 1531 1532 SYSCTL_ASSERT_LOCKED(); 1533 lsp = &sysctl__children; 1534 indx = 0; 1535 while (indx < CTL_MAXNAME) { 1536 SLIST_FOREACH(oid, lsp, oid_link) { 1537 if (oid->oid_number == name[indx]) 1538 break; 1539 } 1540 if (oid == NULL) 1541 return (ENOENT); 1542 1543 indx++; 1544 if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) { 1545 if (oid->oid_handler != NULL || indx == namelen) { 1546 *noid = oid; 1547 if (nindx != NULL) 1548 *nindx = indx; 1549 KASSERT((oid->oid_kind & CTLFLAG_DYING) == 0, 1550 ("%s found DYING node %p", __func__, oid)); 1551 return (0); 1552 } 1553 lsp = SYSCTL_CHILDREN(oid); 1554 } else if (indx == namelen) { 1555 *noid = oid; 1556 if (nindx != NULL) 1557 *nindx = indx; 1558 KASSERT((oid->oid_kind & CTLFLAG_DYING) == 0, 1559 ("%s found DYING node %p", __func__, oid)); 1560 return (0); 1561 } else { 1562 return (ENOTDIR); 1563 } 1564 } 1565 return (ENOENT); 1566 } 1567 1568 /* 1569 * Traverse our tree, and find the right node, execute whatever it points 1570 * to, and return the resulting error code. 1571 */ 1572 1573 static int 1574 sysctl_root(SYSCTL_HANDLER_ARGS) 1575 { 1576 struct sysctl_oid *oid; 1577 int error, indx, lvl; 1578 1579 SYSCTL_ASSERT_SLOCKED(); 1580 1581 error = sysctl_find_oid(arg1, arg2, &oid, &indx, req); 1582 if (error) 1583 return (error); 1584 1585 if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) { 1586 /* 1587 * You can't call a sysctl when it's a node, but has 1588 * no handler. Inform the user that it's a node. 1589 * The indx may or may not be the same as namelen. 1590 */ 1591 if (oid->oid_handler == NULL) 1592 return (EISDIR); 1593 } 1594 1595 /* Is this sysctl writable? */ 1596 if (req->newptr && !(oid->oid_kind & CTLFLAG_WR)) 1597 return (EPERM); 1598 1599 KASSERT(req->td != NULL, ("sysctl_root(): req->td == NULL")); 1600 1601 #ifdef CAPABILITY_MODE 1602 /* 1603 * If the process is in capability mode, then don't permit reading or 1604 * writing unless specifically granted for the node. 1605 */ 1606 if (IN_CAPABILITY_MODE(req->td)) { 1607 if (req->oldptr && !(oid->oid_kind & CTLFLAG_CAPRD)) 1608 return (EPERM); 1609 if (req->newptr && !(oid->oid_kind & CTLFLAG_CAPWR)) 1610 return (EPERM); 1611 } 1612 #endif 1613 1614 /* Is this sysctl sensitive to securelevels? */ 1615 if (req->newptr && (oid->oid_kind & CTLFLAG_SECURE)) { 1616 lvl = (oid->oid_kind & CTLMASK_SECURE) >> CTLSHIFT_SECURE; 1617 error = securelevel_gt(req->td->td_ucred, lvl); 1618 if (error) 1619 return (error); 1620 } 1621 1622 /* Is this sysctl writable by only privileged users? */ 1623 if (req->newptr && !(oid->oid_kind & CTLFLAG_ANYBODY)) { 1624 int priv; 1625 1626 if (oid->oid_kind & CTLFLAG_PRISON) 1627 priv = PRIV_SYSCTL_WRITEJAIL; 1628 #ifdef VIMAGE 1629 else if ((oid->oid_kind & CTLFLAG_VNET) && 1630 prison_owns_vnet(req->td->td_ucred)) 1631 priv = PRIV_SYSCTL_WRITEJAIL; 1632 #endif 1633 else 1634 priv = PRIV_SYSCTL_WRITE; 1635 error = priv_check(req->td, priv); 1636 if (error) 1637 return (error); 1638 } 1639 1640 if (!oid->oid_handler) 1641 return (EINVAL); 1642 1643 if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) { 1644 arg1 = (int *)arg1 + indx; 1645 arg2 -= indx; 1646 } else { 1647 arg1 = oid->oid_arg1; 1648 arg2 = oid->oid_arg2; 1649 } 1650 #ifdef MAC 1651 error = mac_system_check_sysctl(req->td->td_ucred, oid, arg1, arg2, 1652 req); 1653 if (error != 0) 1654 return (error); 1655 #endif 1656 #ifdef VIMAGE 1657 if ((oid->oid_kind & CTLFLAG_VNET) && arg1 != NULL) 1658 arg1 = (void *)(curvnet->vnet_data_base + (uintptr_t)arg1); 1659 #endif 1660 error = sysctl_root_handler_locked(oid, arg1, arg2, req); 1661 1662 KFAIL_POINT_ERROR(_debug_fail_point, sysctl_running, error); 1663 1664 return (error); 1665 } 1666 1667 #ifndef _SYS_SYSPROTO_H_ 1668 struct sysctl_args { 1669 int *name; 1670 u_int namelen; 1671 void *old; 1672 size_t *oldlenp; 1673 void *new; 1674 size_t newlen; 1675 }; 1676 #endif 1677 int 1678 sys___sysctl(struct thread *td, struct sysctl_args *uap) 1679 { 1680 int error, i, name[CTL_MAXNAME]; 1681 size_t j; 1682 1683 if (uap->namelen > CTL_MAXNAME || uap->namelen < 2) 1684 return (EINVAL); 1685 1686 error = copyin(uap->name, &name, uap->namelen * sizeof(int)); 1687 if (error) 1688 return (error); 1689 1690 error = userland_sysctl(td, name, uap->namelen, 1691 uap->old, uap->oldlenp, 0, 1692 uap->new, uap->newlen, &j, 0); 1693 if (error && error != ENOMEM) 1694 return (error); 1695 if (uap->oldlenp) { 1696 i = copyout(&j, uap->oldlenp, sizeof(j)); 1697 if (i) 1698 return (i); 1699 } 1700 return (error); 1701 } 1702 1703 /* 1704 * This is used from various compatibility syscalls too. That's why name 1705 * must be in kernel space. 1706 */ 1707 int 1708 userland_sysctl(struct thread *td, int *name, u_int namelen, void *old, 1709 size_t *oldlenp, int inkernel, void *new, size_t newlen, size_t *retval, 1710 int flags) 1711 { 1712 int error = 0, memlocked; 1713 struct sysctl_req req; 1714 1715 bzero(&req, sizeof req); 1716 1717 req.td = td; 1718 req.flags = flags; 1719 1720 if (oldlenp) { 1721 if (inkernel) { 1722 req.oldlen = *oldlenp; 1723 } else { 1724 error = copyin(oldlenp, &req.oldlen, sizeof(*oldlenp)); 1725 if (error) 1726 return (error); 1727 } 1728 } 1729 req.validlen = req.oldlen; 1730 1731 if (old) { 1732 if (!useracc(old, req.oldlen, VM_PROT_WRITE)) 1733 return (EFAULT); 1734 req.oldptr= old; 1735 } 1736 1737 if (new != NULL) { 1738 if (!useracc(new, newlen, VM_PROT_READ)) 1739 return (EFAULT); 1740 req.newlen = newlen; 1741 req.newptr = new; 1742 } 1743 1744 req.oldfunc = sysctl_old_user; 1745 req.newfunc = sysctl_new_user; 1746 req.lock = REQ_UNWIRED; 1747 1748 #ifdef KTRACE 1749 if (KTRPOINT(curthread, KTR_SYSCTL)) 1750 ktrsysctl(name, namelen); 1751 #endif 1752 1753 if (req.oldlen > PAGE_SIZE) { 1754 memlocked = 1; 1755 sx_xlock(&sysctlmemlock); 1756 } else 1757 memlocked = 0; 1758 CURVNET_SET(TD_TO_VNET(td)); 1759 1760 for (;;) { 1761 req.oldidx = 0; 1762 req.newidx = 0; 1763 SYSCTL_SLOCK(); 1764 error = sysctl_root(0, name, namelen, &req); 1765 SYSCTL_SUNLOCK(); 1766 if (error != EAGAIN) 1767 break; 1768 kern_yield(PRI_USER); 1769 } 1770 1771 CURVNET_RESTORE(); 1772 1773 if (req.lock == REQ_WIRED && req.validlen > 0) 1774 vsunlock(req.oldptr, req.validlen); 1775 if (memlocked) 1776 sx_xunlock(&sysctlmemlock); 1777 1778 if (error && error != ENOMEM) 1779 return (error); 1780 1781 if (retval) { 1782 if (req.oldptr && req.oldidx > req.validlen) 1783 *retval = req.validlen; 1784 else 1785 *retval = req.oldidx; 1786 } 1787 return (error); 1788 } 1789 1790 /* 1791 * Drain into a sysctl struct. The user buffer should be wired if a page 1792 * fault would cause issue. 1793 */ 1794 static int 1795 sbuf_sysctl_drain(void *arg, const char *data, int len) 1796 { 1797 struct sysctl_req *req = arg; 1798 int error; 1799 1800 error = SYSCTL_OUT(req, data, len); 1801 KASSERT(error >= 0, ("Got unexpected negative value %d", error)); 1802 return (error == 0 ? len : -error); 1803 } 1804 1805 struct sbuf * 1806 sbuf_new_for_sysctl(struct sbuf *s, char *buf, int length, 1807 struct sysctl_req *req) 1808 { 1809 1810 s = sbuf_new(s, buf, length, SBUF_FIXEDLEN); 1811 sbuf_set_drain(s, sbuf_sysctl_drain, req); 1812 return (s); 1813 } 1814