1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1982, 1986, 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Mike Karels at Berkeley Software Design, Inc.
9 *
10 * Quite extensively rewritten by Poul-Henning Kamp of the FreeBSD
11 * project, to make these variables more userfriendly.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 3. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 */
37
38 #include <sys/cdefs.h>
39 #include "opt_capsicum.h"
40 #include "opt_ddb.h"
41 #include "opt_ktrace.h"
42 #include "opt_sysctl.h"
43
44 #include <sys/param.h>
45 #include <sys/fail.h>
46 #include <sys/systm.h>
47 #include <sys/capsicum.h>
48 #include <sys/kernel.h>
49 #include <sys/limits.h>
50 #include <sys/sysctl.h>
51 #include <sys/malloc.h>
52 #include <sys/priv.h>
53 #include <sys/proc.h>
54 #include <sys/jail.h>
55 #include <sys/kdb.h>
56 #include <sys/lock.h>
57 #include <sys/mutex.h>
58 #include <sys/rmlock.h>
59 #include <sys/sbuf.h>
60 #include <sys/sx.h>
61 #include <sys/sysproto.h>
62 #include <sys/uio.h>
63 #ifdef KTRACE
64 #include <sys/ktrace.h>
65 #endif
66
67 #ifdef DDB
68 #include <ddb/ddb.h>
69 #include <ddb/db_lex.h>
70 #endif
71
72 #include <net/vnet.h>
73
74 #include <security/mac/mac_framework.h>
75
76 #include <vm/vm.h>
77 #include <vm/vm_extern.h>
78
79 static MALLOC_DEFINE(M_SYSCTL, "sysctl", "sysctl internal magic");
80 static MALLOC_DEFINE(M_SYSCTLOID, "sysctloid", "sysctl dynamic oids");
81 static MALLOC_DEFINE(M_SYSCTLTMP, "sysctltmp", "sysctl temp output buffer");
82
83 RB_GENERATE(sysctl_oid_list, sysctl_oid, oid_link, cmp_sysctl_oid);
84
85 /*
86 * The sysctllock protects the MIB tree. It also protects sysctl
87 * contexts used with dynamic sysctls. The sysctl_register_oid() and
88 * sysctl_unregister_oid() routines require the sysctllock to already
89 * be held, so the sysctl_wlock() and sysctl_wunlock() routines are
90 * provided for the few places in the kernel which need to use that
91 * API rather than using the dynamic API. Use of the dynamic API is
92 * strongly encouraged for most code.
93 *
94 * The sysctlmemlock is used to limit the amount of user memory wired for
95 * sysctl requests. This is implemented by serializing any userland
96 * sysctl requests larger than a single page via an exclusive lock.
97 *
98 * The sysctlstringlock is used to protect concurrent access to writable
99 * string nodes in sysctl_handle_string().
100 */
101 static struct rmlock sysctllock;
102 static struct sx __exclusive_cache_line sysctlmemlock;
103 static struct sx sysctlstringlock;
104
105 #define SYSCTL_WLOCK() rm_wlock(&sysctllock)
106 #define SYSCTL_WUNLOCK() rm_wunlock(&sysctllock)
107 #define SYSCTL_RLOCK(tracker) rm_rlock(&sysctllock, (tracker))
108 #define SYSCTL_RUNLOCK(tracker) rm_runlock(&sysctllock, (tracker))
109 #define SYSCTL_WLOCKED() rm_wowned(&sysctllock)
110 #define SYSCTL_ASSERT_LOCKED() rm_assert(&sysctllock, RA_LOCKED)
111 #define SYSCTL_ASSERT_WLOCKED() rm_assert(&sysctllock, RA_WLOCKED)
112 #define SYSCTL_ASSERT_RLOCKED() rm_assert(&sysctllock, RA_RLOCKED)
113 #define SYSCTL_INIT() rm_init_flags(&sysctllock, "sysctl lock", \
114 RM_SLEEPABLE)
115 #define SYSCTL_SLEEP(ch, wmesg, timo) \
116 rm_sleep(ch, &sysctllock, 0, wmesg, timo)
117
118 static int sysctl_root(SYSCTL_HANDLER_ARGS);
119
120 /* Root list */
121 struct sysctl_oid_list sysctl__children = RB_INITIALIZER(&sysctl__children);
122
123 static char* sysctl_escape_name(const char*);
124 static int sysctl_remove_oid_locked(struct sysctl_oid *oidp, int del,
125 int recurse);
126 static int sysctl_old_kernel(struct sysctl_req *, const void *, size_t);
127 static int sysctl_new_kernel(struct sysctl_req *, void *, size_t);
128 static int name2oid(const char *, int *, int *, struct sysctl_oid **);
129
130 static struct sysctl_oid *
sysctl_find_oidname(const char * name,struct sysctl_oid_list * list)131 sysctl_find_oidname(const char *name, struct sysctl_oid_list *list)
132 {
133 struct sysctl_oid *oidp;
134
135 SYSCTL_ASSERT_LOCKED();
136 SYSCTL_FOREACH(oidp, list) {
137 if (strcmp(oidp->oid_name, name) == 0) {
138 return (oidp);
139 }
140 }
141 return (NULL);
142 }
143
144 static struct sysctl_oid *
sysctl_find_oidnamelen(const char * name,size_t len,struct sysctl_oid_list * list)145 sysctl_find_oidnamelen(const char *name, size_t len,
146 struct sysctl_oid_list *list)
147 {
148 struct sysctl_oid *oidp;
149
150 SYSCTL_ASSERT_LOCKED();
151 SYSCTL_FOREACH(oidp, list) {
152 if (strncmp(oidp->oid_name, name, len) == 0 &&
153 oidp->oid_name[len] == '\0')
154 return (oidp);
155 }
156 return (NULL);
157 }
158
159 /*
160 * Initialization of the MIB tree.
161 *
162 * Order by number in each list.
163 */
164 void
sysctl_wlock(void)165 sysctl_wlock(void)
166 {
167
168 SYSCTL_WLOCK();
169 }
170
171 void
sysctl_wunlock(void)172 sysctl_wunlock(void)
173 {
174
175 SYSCTL_WUNLOCK();
176 }
177
178 static int
sysctl_root_handler_locked(struct sysctl_oid * oid,void * arg1,intmax_t arg2,struct sysctl_req * req,struct rm_priotracker * tracker)179 sysctl_root_handler_locked(struct sysctl_oid *oid, void *arg1, intmax_t arg2,
180 struct sysctl_req *req, struct rm_priotracker *tracker)
181 {
182 int error;
183
184 if (oid->oid_kind & CTLFLAG_DYN)
185 atomic_add_int(&oid->oid_running, 1);
186
187 if (tracker != NULL)
188 SYSCTL_RUNLOCK(tracker);
189 else
190 SYSCTL_WUNLOCK();
191
192 /*
193 * Treat set CTLFLAG_NEEDGIANT and unset CTLFLAG_MPSAFE flags the same,
194 * untill we're ready to remove all traces of Giant from sysctl(9).
195 */
196 if ((oid->oid_kind & CTLFLAG_NEEDGIANT) ||
197 (!(oid->oid_kind & CTLFLAG_MPSAFE)))
198 mtx_lock(&Giant);
199 error = oid->oid_handler(oid, arg1, arg2, req);
200 if ((oid->oid_kind & CTLFLAG_NEEDGIANT) ||
201 (!(oid->oid_kind & CTLFLAG_MPSAFE)))
202 mtx_unlock(&Giant);
203
204 KFAIL_POINT_ERROR(_debug_fail_point, sysctl_running, error);
205
206 if (tracker != NULL)
207 SYSCTL_RLOCK(tracker);
208 else
209 SYSCTL_WLOCK();
210
211 if (oid->oid_kind & CTLFLAG_DYN) {
212 if (atomic_fetchadd_int(&oid->oid_running, -1) == 1 &&
213 (oid->oid_kind & CTLFLAG_DYING) != 0)
214 wakeup(&oid->oid_running);
215 }
216
217 return (error);
218 }
219
220 static void
sysctl_load_tunable_by_oid_locked(struct sysctl_oid * oidp)221 sysctl_load_tunable_by_oid_locked(struct sysctl_oid *oidp)
222 {
223 struct sysctl_req req;
224 struct sysctl_oid *curr;
225 char *penv = NULL;
226 char path[96];
227 ssize_t rem = sizeof(path);
228 ssize_t len;
229 uint8_t data[512] __aligned(sizeof(uint64_t));
230 int size;
231 int error;
232
233 path[--rem] = 0;
234
235 for (curr = oidp; curr != NULL; curr = SYSCTL_PARENT(curr)) {
236 len = strlen(curr->oid_name);
237 rem -= len;
238 if (curr != oidp)
239 rem -= 1;
240 if (rem < 0) {
241 printf("OID path exceeds %d bytes\n", (int)sizeof(path));
242 return;
243 }
244 memcpy(path + rem, curr->oid_name, len);
245 if (curr != oidp)
246 path[rem + len] = '.';
247 }
248
249 memset(&req, 0, sizeof(req));
250
251 req.td = curthread;
252 req.oldfunc = sysctl_old_kernel;
253 req.newfunc = sysctl_new_kernel;
254 req.lock = REQ_UNWIRED;
255
256 switch (oidp->oid_kind & CTLTYPE) {
257 case CTLTYPE_INT:
258 if (getenv_array(path + rem, data, sizeof(data), &size,
259 sizeof(int), GETENV_SIGNED) == 0)
260 return;
261 req.newlen = size;
262 req.newptr = data;
263 break;
264 case CTLTYPE_UINT:
265 if (getenv_array(path + rem, data, sizeof(data), &size,
266 sizeof(int), GETENV_UNSIGNED) == 0)
267 return;
268 req.newlen = size;
269 req.newptr = data;
270 break;
271 case CTLTYPE_LONG:
272 if (getenv_array(path + rem, data, sizeof(data), &size,
273 sizeof(long), GETENV_SIGNED) == 0)
274 return;
275 req.newlen = size;
276 req.newptr = data;
277 break;
278 case CTLTYPE_ULONG:
279 if (getenv_array(path + rem, data, sizeof(data), &size,
280 sizeof(long), GETENV_UNSIGNED) == 0)
281 return;
282 req.newlen = size;
283 req.newptr = data;
284 break;
285 case CTLTYPE_S8:
286 if (getenv_array(path + rem, data, sizeof(data), &size,
287 sizeof(int8_t), GETENV_SIGNED) == 0)
288 return;
289 req.newlen = size;
290 req.newptr = data;
291 break;
292 case CTLTYPE_S16:
293 if (getenv_array(path + rem, data, sizeof(data), &size,
294 sizeof(int16_t), GETENV_SIGNED) == 0)
295 return;
296 req.newlen = size;
297 req.newptr = data;
298 break;
299 case CTLTYPE_S32:
300 if (getenv_array(path + rem, data, sizeof(data), &size,
301 sizeof(int32_t), GETENV_SIGNED) == 0)
302 return;
303 req.newlen = size;
304 req.newptr = data;
305 break;
306 case CTLTYPE_S64:
307 if (getenv_array(path + rem, data, sizeof(data), &size,
308 sizeof(int64_t), GETENV_SIGNED) == 0)
309 return;
310 req.newlen = size;
311 req.newptr = data;
312 break;
313 case CTLTYPE_U8:
314 if (getenv_array(path + rem, data, sizeof(data), &size,
315 sizeof(uint8_t), GETENV_UNSIGNED) == 0)
316 return;
317 req.newlen = size;
318 req.newptr = data;
319 break;
320 case CTLTYPE_U16:
321 if (getenv_array(path + rem, data, sizeof(data), &size,
322 sizeof(uint16_t), GETENV_UNSIGNED) == 0)
323 return;
324 req.newlen = size;
325 req.newptr = data;
326 break;
327 case CTLTYPE_U32:
328 if (getenv_array(path + rem, data, sizeof(data), &size,
329 sizeof(uint32_t), GETENV_UNSIGNED) == 0)
330 return;
331 req.newlen = size;
332 req.newptr = data;
333 break;
334 case CTLTYPE_U64:
335 if (getenv_array(path + rem, data, sizeof(data), &size,
336 sizeof(uint64_t), GETENV_UNSIGNED) == 0)
337 return;
338 req.newlen = size;
339 req.newptr = data;
340 break;
341 case CTLTYPE_STRING:
342 penv = kern_getenv(path + rem);
343 if (penv == NULL)
344 return;
345 req.newlen = strlen(penv);
346 req.newptr = penv;
347 break;
348 default:
349 return;
350 }
351 error = sysctl_root_handler_locked(oidp, oidp->oid_arg1,
352 oidp->oid_arg2, &req, NULL);
353 if (error != 0)
354 printf("Setting sysctl %s failed: %d\n", path + rem, error);
355 if (penv != NULL)
356 freeenv(penv);
357 }
358
359 /*
360 * Locate the path to a given oid. Returns the length of the resulting path,
361 * or -1 if the oid was not found. nodes must have room for CTL_MAXNAME
362 * elements.
363 */
364 static int
sysctl_search_oid(struct sysctl_oid ** nodes,struct sysctl_oid * needle)365 sysctl_search_oid(struct sysctl_oid **nodes, struct sysctl_oid *needle)
366 {
367 int indx;
368
369 SYSCTL_ASSERT_LOCKED();
370 indx = 0;
371 /*
372 * Do a depth-first search of the oid tree, looking for 'needle'. Start
373 * with the first child of the root.
374 */
375 nodes[indx] = RB_MIN(sysctl_oid_list, &sysctl__children);
376 for (;;) {
377 if (nodes[indx] == needle)
378 return (indx + 1);
379
380 if (nodes[indx] == NULL) {
381 /* Node has no more siblings, so back up to parent. */
382 if (indx-- == 0) {
383 /* Retreat to root, so give up. */
384 break;
385 }
386 } else if ((nodes[indx]->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
387 /* Node has children. */
388 if (++indx == CTL_MAXNAME) {
389 /* Max search depth reached, so give up. */
390 break;
391 }
392 /* Start with the first child. */
393 nodes[indx] = RB_MIN(sysctl_oid_list,
394 &nodes[indx - 1]->oid_children);
395 continue;
396 }
397 /* Consider next sibling. */
398 nodes[indx] = RB_NEXT(sysctl_oid_list, NULL, nodes[indx]);
399 }
400 return (-1);
401 }
402
403 static void
sysctl_warn_reuse(const char * func,struct sysctl_oid * leaf)404 sysctl_warn_reuse(const char *func, struct sysctl_oid *leaf)
405 {
406 struct sysctl_oid *nodes[CTL_MAXNAME];
407 char buf[128];
408 struct sbuf sb;
409 int rc, i;
410
411 (void)sbuf_new(&sb, buf, sizeof(buf), SBUF_FIXEDLEN | SBUF_INCLUDENUL);
412 sbuf_set_drain(&sb, sbuf_printf_drain, NULL);
413
414 sbuf_printf(&sb, "%s: can't re-use a leaf (", func);
415
416 rc = sysctl_search_oid(nodes, leaf);
417 if (rc > 0) {
418 for (i = 0; i < rc; i++)
419 sbuf_printf(&sb, "%s%.*s", nodes[i]->oid_name,
420 i != (rc - 1), ".");
421 } else {
422 sbuf_cat(&sb, leaf->oid_name);
423 }
424 sbuf_cat(&sb, ")!\n");
425
426 (void)sbuf_finish(&sb);
427 }
428
429 #ifdef SYSCTL_DEBUG
430 static int
sysctl_reuse_test(SYSCTL_HANDLER_ARGS)431 sysctl_reuse_test(SYSCTL_HANDLER_ARGS)
432 {
433 struct rm_priotracker tracker;
434
435 SYSCTL_RLOCK(&tracker);
436 sysctl_warn_reuse(__func__, oidp);
437 SYSCTL_RUNLOCK(&tracker);
438 return (0);
439 }
440 SYSCTL_PROC(_sysctl, OID_AUTO, reuse_test,
441 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, sysctl_reuse_test, "-",
442 "");
443 #endif
444
445 void
sysctl_register_oid(struct sysctl_oid * oidp)446 sysctl_register_oid(struct sysctl_oid *oidp)
447 {
448 struct sysctl_oid_list *parent = oidp->oid_parent;
449 struct sysctl_oid *p, key;
450 int oid_number;
451 int timeout = 2;
452
453 /*
454 * First check if another oid with the same name already
455 * exists in the parent's list.
456 */
457 SYSCTL_ASSERT_WLOCKED();
458 p = sysctl_find_oidname(oidp->oid_name, parent);
459 if (p != NULL) {
460 if ((p->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
461 p->oid_refcnt++;
462 return;
463 } else {
464 sysctl_warn_reuse(__func__, p);
465 return;
466 }
467 }
468 /* get current OID number */
469 oid_number = oidp->oid_number;
470
471 #if (OID_AUTO >= 0)
472 #error "OID_AUTO is expected to be a negative value"
473 #endif
474 /*
475 * Any negative OID number qualifies as OID_AUTO. Valid OID
476 * numbers should always be positive.
477 *
478 * NOTE: DO NOT change the starting value here, change it in
479 * <sys/sysctl.h>, and make sure it is at least 256 to
480 * accommodate e.g. net.inet.raw as a static sysctl node.
481 */
482 if (oid_number < 0) {
483 static int newoid;
484
485 /*
486 * By decrementing the next OID number we spend less
487 * time inserting the OIDs into a sorted list.
488 */
489 if (--newoid < CTL_AUTO_START)
490 newoid = 0x7fffffff;
491
492 oid_number = newoid;
493 }
494
495 /*
496 * Insert the OID into the parent's list sorted by OID number.
497 */
498 key.oid_number = oid_number;
499 p = RB_NFIND(sysctl_oid_list, parent, &key);
500 while (p != NULL && oid_number == p->oid_number) {
501 /* get the next valid OID number */
502 if (oid_number < CTL_AUTO_START ||
503 oid_number == 0x7fffffff) {
504 /* wraparound - restart */
505 oid_number = CTL_AUTO_START;
506 /* don't loop forever */
507 if (!timeout--)
508 panic("sysctl: Out of OID numbers\n");
509 key.oid_number = oid_number;
510 p = RB_NFIND(sysctl_oid_list, parent, &key);
511 continue;
512 }
513 p = RB_NEXT(sysctl_oid_list, NULL, p);
514 oid_number++;
515 }
516 /* check for non-auto OID number collision */
517 if (oidp->oid_number >= 0 && oidp->oid_number < CTL_AUTO_START &&
518 oid_number >= CTL_AUTO_START) {
519 panic("sysctl: OID number(%d) is already in use for '%s'\n",
520 oidp->oid_number, oidp->oid_name);
521 }
522 /* update the OID number, if any */
523 oidp->oid_number = oid_number;
524 RB_INSERT(sysctl_oid_list, parent, oidp);
525
526 if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE &&
527 (oidp->oid_kind & CTLFLAG_TUN) != 0 &&
528 (oidp->oid_kind & CTLFLAG_NOFETCH) == 0) {
529 #ifdef VIMAGE
530 /*
531 * Can fetch value multiple times for VNET loader tunables.
532 * Only fetch once for non-VNET loader tunables.
533 */
534 if ((oidp->oid_kind & CTLFLAG_VNET) == 0)
535 #endif
536 oidp->oid_kind |= CTLFLAG_NOFETCH;
537 /* try to fetch value from kernel environment */
538 sysctl_load_tunable_by_oid_locked(oidp);
539 }
540 }
541
542 void
sysctl_register_disabled_oid(struct sysctl_oid * oidp)543 sysctl_register_disabled_oid(struct sysctl_oid *oidp)
544 {
545
546 /*
547 * Mark the leaf as dormant if it's not to be immediately enabled.
548 * We do not disable nodes as they can be shared between modules
549 * and it is always safe to access a node.
550 */
551 KASSERT((oidp->oid_kind & CTLFLAG_DORMANT) == 0,
552 ("internal flag is set in oid_kind"));
553 if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE)
554 oidp->oid_kind |= CTLFLAG_DORMANT;
555 sysctl_register_oid(oidp);
556 }
557
558 void
sysctl_enable_oid(struct sysctl_oid * oidp)559 sysctl_enable_oid(struct sysctl_oid *oidp)
560 {
561
562 SYSCTL_ASSERT_WLOCKED();
563 if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
564 KASSERT((oidp->oid_kind & CTLFLAG_DORMANT) == 0,
565 ("sysctl node is marked as dormant"));
566 return;
567 }
568 KASSERT((oidp->oid_kind & CTLFLAG_DORMANT) != 0,
569 ("enabling already enabled sysctl oid"));
570 oidp->oid_kind &= ~CTLFLAG_DORMANT;
571 }
572
573 void
sysctl_unregister_oid(struct sysctl_oid * oidp)574 sysctl_unregister_oid(struct sysctl_oid *oidp)
575 {
576 int error;
577
578 SYSCTL_ASSERT_WLOCKED();
579 if (oidp->oid_number == OID_AUTO) {
580 error = EINVAL;
581 } else {
582 error = ENOENT;
583 if (RB_REMOVE(sysctl_oid_list, oidp->oid_parent, oidp))
584 error = 0;
585 }
586
587 /*
588 * This can happen when a module fails to register and is
589 * being unloaded afterwards. It should not be a panic()
590 * for normal use.
591 */
592 if (error) {
593 printf("%s: failed(%d) to unregister sysctl(%s)\n",
594 __func__, error, oidp->oid_name);
595 }
596 }
597
598 /* Initialize a new context to keep track of dynamically added sysctls. */
599 int
sysctl_ctx_init(struct sysctl_ctx_list * c)600 sysctl_ctx_init(struct sysctl_ctx_list *c)
601 {
602
603 if (c == NULL) {
604 return (EINVAL);
605 }
606
607 /*
608 * No locking here, the caller is responsible for not adding
609 * new nodes to a context until after this function has
610 * returned.
611 */
612 TAILQ_INIT(c);
613 return (0);
614 }
615
616 /* Free the context, and destroy all dynamic oids registered in this context */
617 int
sysctl_ctx_free(struct sysctl_ctx_list * clist)618 sysctl_ctx_free(struct sysctl_ctx_list *clist)
619 {
620 struct sysctl_ctx_entry *e, *e1;
621 int error;
622
623 error = 0;
624 /*
625 * First perform a "dry run" to check if it's ok to remove oids.
626 * XXX FIXME
627 * XXX This algorithm is a hack. But I don't know any
628 * XXX better solution for now...
629 */
630 SYSCTL_WLOCK();
631 TAILQ_FOREACH(e, clist, link) {
632 error = sysctl_remove_oid_locked(e->entry, 0, 0);
633 if (error)
634 break;
635 }
636 /*
637 * Restore deregistered entries, either from the end,
638 * or from the place where error occurred.
639 * e contains the entry that was not unregistered
640 */
641 if (error)
642 e1 = TAILQ_PREV(e, sysctl_ctx_list, link);
643 else
644 e1 = TAILQ_LAST(clist, sysctl_ctx_list);
645 while (e1 != NULL) {
646 sysctl_register_oid(e1->entry);
647 e1 = TAILQ_PREV(e1, sysctl_ctx_list, link);
648 }
649 if (error) {
650 SYSCTL_WUNLOCK();
651 return(EBUSY);
652 }
653 /* Now really delete the entries */
654 TAILQ_FOREACH_SAFE(e, clist, link, e1) {
655 error = sysctl_remove_oid_locked(e->entry, 1, 0);
656 if (error)
657 panic("sysctl_remove_oid: corrupt tree, entry: %s",
658 e->entry->oid_name);
659 free(e, M_SYSCTLOID);
660 }
661 SYSCTL_WUNLOCK();
662 TAILQ_INIT(clist);
663 return (error);
664 }
665
666 /* Add an entry to the context */
667 struct sysctl_ctx_entry *
sysctl_ctx_entry_add(struct sysctl_ctx_list * clist,struct sysctl_oid * oidp)668 sysctl_ctx_entry_add(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp)
669 {
670 struct sysctl_ctx_entry *e;
671
672 SYSCTL_ASSERT_WLOCKED();
673 if (clist == NULL || oidp == NULL)
674 return(NULL);
675 e = malloc(sizeof(struct sysctl_ctx_entry), M_SYSCTLOID, M_WAITOK);
676 e->entry = oidp;
677 TAILQ_INSERT_HEAD(clist, e, link);
678 return (e);
679 }
680
681 /* Find an entry in the context */
682 struct sysctl_ctx_entry *
sysctl_ctx_entry_find(struct sysctl_ctx_list * clist,struct sysctl_oid * oidp)683 sysctl_ctx_entry_find(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp)
684 {
685 struct sysctl_ctx_entry *e;
686
687 SYSCTL_ASSERT_WLOCKED();
688 if (clist == NULL || oidp == NULL)
689 return(NULL);
690 TAILQ_FOREACH(e, clist, link) {
691 if (e->entry == oidp)
692 return(e);
693 }
694 return (e);
695 }
696
697 /*
698 * Delete an entry from the context.
699 * NOTE: this function doesn't free oidp! You have to remove it
700 * with sysctl_remove_oid().
701 */
702 int
sysctl_ctx_entry_del(struct sysctl_ctx_list * clist,struct sysctl_oid * oidp)703 sysctl_ctx_entry_del(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp)
704 {
705 struct sysctl_ctx_entry *e;
706
707 if (clist == NULL || oidp == NULL)
708 return (EINVAL);
709 SYSCTL_WLOCK();
710 e = sysctl_ctx_entry_find(clist, oidp);
711 if (e != NULL) {
712 TAILQ_REMOVE(clist, e, link);
713 SYSCTL_WUNLOCK();
714 free(e, M_SYSCTLOID);
715 return (0);
716 } else {
717 SYSCTL_WUNLOCK();
718 return (ENOENT);
719 }
720 }
721
722 /*
723 * Remove dynamically created sysctl trees.
724 * oidp - top of the tree to be removed
725 * del - if 0 - just deregister, otherwise free up entries as well
726 * recurse - if != 0 traverse the subtree to be deleted
727 */
728 int
sysctl_remove_oid(struct sysctl_oid * oidp,int del,int recurse)729 sysctl_remove_oid(struct sysctl_oid *oidp, int del, int recurse)
730 {
731 int error;
732
733 SYSCTL_WLOCK();
734 error = sysctl_remove_oid_locked(oidp, del, recurse);
735 SYSCTL_WUNLOCK();
736 return (error);
737 }
738
739 int
sysctl_remove_name(struct sysctl_oid * parent,const char * name,int del,int recurse)740 sysctl_remove_name(struct sysctl_oid *parent, const char *name,
741 int del, int recurse)
742 {
743 struct sysctl_oid *p;
744 int error;
745
746 error = ENOENT;
747 SYSCTL_WLOCK();
748 p = sysctl_find_oidname(name, &parent->oid_children);
749 if (p)
750 error = sysctl_remove_oid_locked(p, del, recurse);
751 SYSCTL_WUNLOCK();
752
753 return (error);
754 }
755
756 /*
757 * Duplicate the provided string, escaping any illegal characters. The result
758 * must be freed when no longer in use.
759 *
760 * The list of illegal characters is ".".
761 */
762 static char*
sysctl_escape_name(const char * orig)763 sysctl_escape_name(const char* orig)
764 {
765 int i, s = 0, d = 0, nillegals = 0;
766 char *new;
767
768 /* First count the number of illegal characters */
769 for (i = 0; orig[i] != '\0'; i++) {
770 if (orig[i] == '.')
771 nillegals++;
772 }
773
774 /* Allocate storage for new string */
775 new = malloc(i + 2 * nillegals + 1, M_SYSCTLOID, M_WAITOK);
776
777 /* Copy the name, escaping characters as we go */
778 while (orig[s] != '\0') {
779 if (orig[s] == '.') {
780 /* %25 is the hexadecimal representation of '.' */
781 new[d++] = '%';
782 new[d++] = '2';
783 new[d++] = '5';
784 s++;
785 } else {
786 new[d++] = orig[s++];
787 }
788 }
789
790 /* Finally, nul-terminate */
791 new[d] = '\0';
792
793 return (new);
794 }
795
796 static int
sysctl_remove_oid_locked(struct sysctl_oid * oidp,int del,int recurse)797 sysctl_remove_oid_locked(struct sysctl_oid *oidp, int del, int recurse)
798 {
799 struct sysctl_oid *p, *tmp;
800 int error;
801
802 SYSCTL_ASSERT_WLOCKED();
803 if (oidp == NULL)
804 return(EINVAL);
805 if ((oidp->oid_kind & CTLFLAG_DYN) == 0) {
806 printf("Warning: can't remove non-dynamic nodes (%s)!\n",
807 oidp->oid_name);
808 return (EINVAL);
809 }
810 /*
811 * WARNING: normal method to do this should be through
812 * sysctl_ctx_free(). Use recursing as the last resort
813 * method to purge your sysctl tree of leftovers...
814 * However, if some other code still references these nodes,
815 * it will panic.
816 */
817 if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
818 if (oidp->oid_refcnt == 1) {
819 for(p = RB_MIN(sysctl_oid_list, &oidp->oid_children);
820 p != NULL; p = tmp) {
821 if (!recurse) {
822 printf("Warning: failed attempt to "
823 "remove oid %s with child %s\n",
824 oidp->oid_name, p->oid_name);
825 return (ENOTEMPTY);
826 }
827 tmp = RB_NEXT(sysctl_oid_list,
828 &oidp->oid_children, p);
829 error = sysctl_remove_oid_locked(p, del,
830 recurse);
831 if (error)
832 return (error);
833 }
834 }
835 }
836 if (oidp->oid_refcnt > 1 ) {
837 oidp->oid_refcnt--;
838 } else {
839 if (oidp->oid_refcnt == 0) {
840 printf("Warning: bad oid_refcnt=%u (%s)!\n",
841 oidp->oid_refcnt, oidp->oid_name);
842 return (EINVAL);
843 }
844 sysctl_unregister_oid(oidp);
845 if (del) {
846 /*
847 * Wait for all threads running the handler to drain.
848 * This preserves the previous behavior when the
849 * sysctl lock was held across a handler invocation,
850 * and is necessary for module unload correctness.
851 */
852 while (oidp->oid_running > 0) {
853 oidp->oid_kind |= CTLFLAG_DYING;
854 SYSCTL_SLEEP(&oidp->oid_running, "oidrm", 0);
855 }
856 if (oidp->oid_descr)
857 free(__DECONST(char *, oidp->oid_descr),
858 M_SYSCTLOID);
859 if (oidp->oid_label)
860 free(__DECONST(char *, oidp->oid_label),
861 M_SYSCTLOID);
862 free(__DECONST(char *, oidp->oid_name), M_SYSCTLOID);
863 free(oidp, M_SYSCTLOID);
864 }
865 }
866 return (0);
867 }
868 /*
869 * Create new sysctls at run time.
870 * clist may point to a valid context initialized with sysctl_ctx_init().
871 */
872 struct sysctl_oid *
sysctl_add_oid(struct sysctl_ctx_list * clist,struct sysctl_oid_list * parent,int number,const char * name,int kind,void * arg1,intmax_t arg2,int (* handler)(SYSCTL_HANDLER_ARGS),const char * fmt,const char * descr,const char * label)873 sysctl_add_oid(struct sysctl_ctx_list *clist, struct sysctl_oid_list *parent,
874 int number, const char *name, int kind, void *arg1, intmax_t arg2,
875 int (*handler)(SYSCTL_HANDLER_ARGS), const char *fmt, const char *descr,
876 const char *label)
877 {
878 struct sysctl_oid *oidp;
879 char *escaped;
880
881 /* You have to hook up somewhere.. */
882 if (parent == NULL)
883 return(NULL);
884 escaped = sysctl_escape_name(name);
885 /* Check if the node already exists, otherwise create it */
886 SYSCTL_WLOCK();
887 oidp = sysctl_find_oidname(escaped, parent);
888 if (oidp != NULL) {
889 free(escaped, M_SYSCTLOID);
890 if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
891 oidp->oid_refcnt++;
892 /* Update the context */
893 if (clist != NULL)
894 sysctl_ctx_entry_add(clist, oidp);
895 SYSCTL_WUNLOCK();
896 return (oidp);
897 } else {
898 sysctl_warn_reuse(__func__, oidp);
899 SYSCTL_WUNLOCK();
900 return (NULL);
901 }
902 }
903 oidp = malloc(sizeof(struct sysctl_oid), M_SYSCTLOID, M_WAITOK|M_ZERO);
904 oidp->oid_parent = parent;
905 RB_INIT(&oidp->oid_children);
906 oidp->oid_number = number;
907 oidp->oid_refcnt = 1;
908 oidp->oid_name = escaped;
909 oidp->oid_handler = handler;
910 oidp->oid_kind = CTLFLAG_DYN | kind;
911 oidp->oid_arg1 = arg1;
912 oidp->oid_arg2 = arg2;
913 oidp->oid_fmt = fmt;
914 if (descr != NULL)
915 oidp->oid_descr = strdup(descr, M_SYSCTLOID);
916 if (label != NULL)
917 oidp->oid_label = strdup(label, M_SYSCTLOID);
918 /* Update the context, if used */
919 if (clist != NULL)
920 sysctl_ctx_entry_add(clist, oidp);
921 /* Register this oid */
922 sysctl_register_oid(oidp);
923 SYSCTL_WUNLOCK();
924 return (oidp);
925 }
926
927 /*
928 * Rename an existing oid.
929 */
930 void
sysctl_rename_oid(struct sysctl_oid * oidp,const char * name)931 sysctl_rename_oid(struct sysctl_oid *oidp, const char *name)
932 {
933 char *newname;
934 char *oldname;
935
936 newname = strdup(name, M_SYSCTLOID);
937 SYSCTL_WLOCK();
938 oldname = __DECONST(char *, oidp->oid_name);
939 oidp->oid_name = newname;
940 SYSCTL_WUNLOCK();
941 free(oldname, M_SYSCTLOID);
942 }
943
944 /*
945 * Reparent an existing oid.
946 */
947 int
sysctl_move_oid(struct sysctl_oid * oid,struct sysctl_oid_list * parent)948 sysctl_move_oid(struct sysctl_oid *oid, struct sysctl_oid_list *parent)
949 {
950 struct sysctl_oid *oidp;
951
952 SYSCTL_WLOCK();
953 if (oid->oid_parent == parent) {
954 SYSCTL_WUNLOCK();
955 return (0);
956 }
957 oidp = sysctl_find_oidname(oid->oid_name, parent);
958 if (oidp != NULL) {
959 SYSCTL_WUNLOCK();
960 return (EEXIST);
961 }
962 sysctl_unregister_oid(oid);
963 oid->oid_parent = parent;
964 oid->oid_number = OID_AUTO;
965 sysctl_register_oid(oid);
966 SYSCTL_WUNLOCK();
967 return (0);
968 }
969
970 /*
971 * Register the kernel's oids on startup.
972 */
973 SET_DECLARE(sysctl_set, struct sysctl_oid);
974
975 static void
sysctl_register_all(void * arg)976 sysctl_register_all(void *arg)
977 {
978 struct sysctl_oid **oidp;
979
980 sx_init(&sysctlmemlock, "sysctl mem");
981 sx_init(&sysctlstringlock, "sysctl string handler");
982 SYSCTL_INIT();
983 SYSCTL_WLOCK();
984 SET_FOREACH(oidp, sysctl_set)
985 sysctl_register_oid(*oidp);
986 SYSCTL_WUNLOCK();
987 }
988 SYSINIT(sysctl, SI_SUB_KMEM, SI_ORDER_FIRST, sysctl_register_all, NULL);
989
990 #ifdef VIMAGE
991 static void
sysctl_setenv_vnet(void * arg __unused,const char * name)992 sysctl_setenv_vnet(void *arg __unused, const char *name)
993 {
994 struct sysctl_oid *oidp;
995 int oid[CTL_MAXNAME];
996 int error, nlen;
997
998 SYSCTL_WLOCK();
999 error = name2oid(name, oid, &nlen, &oidp);
1000 if (error)
1001 goto out;
1002
1003 if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE &&
1004 (oidp->oid_kind & CTLFLAG_VNET) != 0 &&
1005 (oidp->oid_kind & CTLFLAG_TUN) != 0 &&
1006 (oidp->oid_kind & CTLFLAG_NOFETCH) == 0) {
1007 /* Update value from kernel environment */
1008 sysctl_load_tunable_by_oid_locked(oidp);
1009 }
1010 out:
1011 SYSCTL_WUNLOCK();
1012 }
1013
1014 static void
sysctl_unsetenv_vnet(void * arg __unused,const char * name)1015 sysctl_unsetenv_vnet(void *arg __unused, const char *name)
1016 {
1017 struct sysctl_oid *oidp;
1018 int oid[CTL_MAXNAME];
1019 int error, nlen;
1020
1021 SYSCTL_WLOCK();
1022 /*
1023 * The setenv / unsetenv event handlers are invoked by kern_setenv() /
1024 * kern_unsetenv() without exclusive locks. It is rare but still possible
1025 * that the invoke order of event handlers is different from that of
1026 * kern_setenv() and kern_unsetenv().
1027 * Re-check environment variable string to make sure it is unset.
1028 */
1029 if (testenv(name))
1030 goto out;
1031 error = name2oid(name, oid, &nlen, &oidp);
1032 if (error)
1033 goto out;
1034
1035 if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE &&
1036 (oidp->oid_kind & CTLFLAG_VNET) != 0 &&
1037 (oidp->oid_kind & CTLFLAG_TUN) != 0 &&
1038 (oidp->oid_kind & CTLFLAG_NOFETCH) == 0) {
1039 size_t size;
1040
1041 switch (oidp->oid_kind & CTLTYPE) {
1042 case CTLTYPE_INT:
1043 case CTLTYPE_UINT:
1044 size = sizeof(int);
1045 break;
1046 case CTLTYPE_LONG:
1047 case CTLTYPE_ULONG:
1048 size = sizeof(long);
1049 break;
1050 case CTLTYPE_S8:
1051 case CTLTYPE_U8:
1052 size = sizeof(int8_t);
1053 break;
1054 case CTLTYPE_S16:
1055 case CTLTYPE_U16:
1056 size = sizeof(int16_t);
1057 break;
1058 case CTLTYPE_S32:
1059 case CTLTYPE_U32:
1060 size = sizeof(int32_t);
1061 break;
1062 case CTLTYPE_S64:
1063 case CTLTYPE_U64:
1064 size = sizeof(int64_t);
1065 break;
1066 case CTLTYPE_STRING:
1067 MPASS(oidp->oid_arg2 > 0);
1068 size = oidp->oid_arg2;
1069 break;
1070 default:
1071 goto out;
1072 }
1073 vnet_restore_init(oidp->oid_arg1, size);
1074 }
1075 out:
1076 SYSCTL_WUNLOCK();
1077 }
1078
1079 /*
1080 * Register the kernel's setenv / unsetenv events.
1081 */
1082 EVENTHANDLER_DEFINE(setenv, sysctl_setenv_vnet, NULL, EVENTHANDLER_PRI_ANY);
1083 EVENTHANDLER_DEFINE(unsetenv, sysctl_unsetenv_vnet, NULL, EVENTHANDLER_PRI_ANY);
1084 #endif
1085
1086 /*
1087 * "Staff-functions"
1088 *
1089 * These functions implement a presently undocumented interface
1090 * used by the sysctl program to walk the tree, and get the type
1091 * so it can print the value.
1092 * This interface is under work and consideration, and should probably
1093 * be killed with a big axe by the first person who can find the time.
1094 * (be aware though, that the proper interface isn't as obvious as it
1095 * may seem, there are various conflicting requirements.
1096 *
1097 * {CTL_SYSCTL, CTL_SYSCTL_DEBUG} printf the entire MIB-tree.
1098 * {CTL_SYSCTL, CTL_SYSCTL_NAME, ...} return the name of the "..."
1099 * OID.
1100 * {CTL_SYSCTL, CTL_SYSCTL_NEXT, ...} return the next OID, honoring
1101 * CTLFLAG_SKIP.
1102 * {CTL_SYSCTL, CTL_SYSCTL_NAME2OID} return the OID of the name in
1103 * "new"
1104 * {CTL_SYSCTL, CTL_SYSCTL_OIDFMT, ...} return the kind & format info
1105 * for the "..." OID.
1106 * {CTL_SYSCTL, CTL_SYSCTL_OIDDESCR, ...} return the description of the
1107 * "..." OID.
1108 * {CTL_SYSCTL, CTL_SYSCTL_OIDLABEL, ...} return the aggregation label of
1109 * the "..." OID.
1110 * {CTL_SYSCTL, CTL_SYSCTL_NEXTNOSKIP, ...} return the next OID, ignoring
1111 * CTLFLAG_SKIP.
1112 */
1113
1114 #ifdef SYSCTL_DEBUG
1115 static void
sysctl_sysctl_debug_dump_node(struct sysctl_oid_list * l,int i)1116 sysctl_sysctl_debug_dump_node(struct sysctl_oid_list *l, int i)
1117 {
1118 int k;
1119 struct sysctl_oid *oidp;
1120
1121 SYSCTL_ASSERT_LOCKED();
1122 SYSCTL_FOREACH(oidp, l) {
1123 for (k=0; k<i; k++)
1124 printf(" ");
1125
1126 printf("%d %s ", oidp->oid_number, oidp->oid_name);
1127
1128 printf("%c%c",
1129 oidp->oid_kind & CTLFLAG_RD ? 'R':' ',
1130 oidp->oid_kind & CTLFLAG_WR ? 'W':' ');
1131
1132 if (oidp->oid_handler)
1133 printf(" *Handler");
1134
1135 switch (oidp->oid_kind & CTLTYPE) {
1136 case CTLTYPE_NODE:
1137 printf(" Node\n");
1138 if (!oidp->oid_handler) {
1139 sysctl_sysctl_debug_dump_node(
1140 SYSCTL_CHILDREN(oidp), i + 2);
1141 }
1142 break;
1143 case CTLTYPE_INT: printf(" Int\n"); break;
1144 case CTLTYPE_UINT: printf(" u_int\n"); break;
1145 case CTLTYPE_LONG: printf(" Long\n"); break;
1146 case CTLTYPE_ULONG: printf(" u_long\n"); break;
1147 case CTLTYPE_STRING: printf(" String\n"); break;
1148 case CTLTYPE_S8: printf(" int8_t\n"); break;
1149 case CTLTYPE_S16: printf(" int16_t\n"); break;
1150 case CTLTYPE_S32: printf(" int32_t\n"); break;
1151 case CTLTYPE_S64: printf(" int64_t\n"); break;
1152 case CTLTYPE_U8: printf(" uint8_t\n"); break;
1153 case CTLTYPE_U16: printf(" uint16_t\n"); break;
1154 case CTLTYPE_U32: printf(" uint32_t\n"); break;
1155 case CTLTYPE_U64: printf(" uint64_t\n"); break;
1156 case CTLTYPE_OPAQUE: printf(" Opaque/struct\n"); break;
1157 default: printf("\n");
1158 }
1159 }
1160 }
1161
1162 static int
sysctl_sysctl_debug(SYSCTL_HANDLER_ARGS)1163 sysctl_sysctl_debug(SYSCTL_HANDLER_ARGS)
1164 {
1165 struct rm_priotracker tracker;
1166 int error;
1167
1168 error = priv_check(req->td, PRIV_SYSCTL_DEBUG);
1169 if (error)
1170 return (error);
1171 SYSCTL_RLOCK(&tracker);
1172 sysctl_sysctl_debug_dump_node(&sysctl__children, 0);
1173 SYSCTL_RUNLOCK(&tracker);
1174 return (ENOENT);
1175 }
1176
1177 SYSCTL_PROC(_sysctl, CTL_SYSCTL_DEBUG, debug, CTLTYPE_STRING | CTLFLAG_RD |
1178 CTLFLAG_MPSAFE, 0, 0, sysctl_sysctl_debug, "-", "");
1179 #endif
1180
1181 static int
sysctl_sysctl_name(SYSCTL_HANDLER_ARGS)1182 sysctl_sysctl_name(SYSCTL_HANDLER_ARGS)
1183 {
1184 int *name = (int *) arg1;
1185 u_int namelen = arg2;
1186 int error;
1187 struct sysctl_oid *oid, key;
1188 struct sysctl_oid_list *lsp = &sysctl__children, *lsp2;
1189 struct rm_priotracker tracker;
1190 char buf[10];
1191
1192 error = sysctl_wire_old_buffer(req, 0);
1193 if (error)
1194 return (error);
1195
1196 SYSCTL_RLOCK(&tracker);
1197 while (namelen) {
1198 if (!lsp) {
1199 snprintf(buf,sizeof(buf),"%d",*name);
1200 if (req->oldidx)
1201 error = SYSCTL_OUT(req, ".", 1);
1202 if (!error)
1203 error = SYSCTL_OUT(req, buf, strlen(buf));
1204 if (error)
1205 goto out;
1206 namelen--;
1207 name++;
1208 continue;
1209 }
1210 lsp2 = NULL;
1211 key.oid_number = *name;
1212 oid = RB_FIND(sysctl_oid_list, lsp, &key);
1213 if (oid) {
1214 if (req->oldidx)
1215 error = SYSCTL_OUT(req, ".", 1);
1216 if (!error)
1217 error = SYSCTL_OUT(req, oid->oid_name,
1218 strlen(oid->oid_name));
1219 if (error)
1220 goto out;
1221
1222 namelen--;
1223 name++;
1224
1225 if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE &&
1226 !oid->oid_handler)
1227 lsp2 = SYSCTL_CHILDREN(oid);
1228 }
1229 lsp = lsp2;
1230 }
1231 error = SYSCTL_OUT(req, "", 1);
1232 out:
1233 SYSCTL_RUNLOCK(&tracker);
1234 return (error);
1235 }
1236
1237 /*
1238 * XXXRW/JA: Shouldn't return name data for nodes that we don't permit in
1239 * capability mode.
1240 */
1241 static SYSCTL_NODE(_sysctl, CTL_SYSCTL_NAME, name, CTLFLAG_RD |
1242 CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_name, "");
1243
1244 enum sysctl_iter_action {
1245 ITER_SIBLINGS, /* Not matched, continue iterating siblings */
1246 ITER_CHILDREN, /* Node has children we need to iterate over them */
1247 ITER_FOUND, /* Matching node was found */
1248 };
1249
1250 /*
1251 * Tries to find the next node for @name and @namelen.
1252 *
1253 * Returns next action to take.
1254 */
1255 static enum sysctl_iter_action
sysctl_sysctl_next_node(struct sysctl_oid * oidp,int * name,unsigned int namelen,bool honor_skip)1256 sysctl_sysctl_next_node(struct sysctl_oid *oidp, int *name, unsigned int namelen,
1257 bool honor_skip)
1258 {
1259
1260 if ((oidp->oid_kind & CTLFLAG_DORMANT) != 0)
1261 return (ITER_SIBLINGS);
1262
1263 if (honor_skip && (oidp->oid_kind & CTLFLAG_SKIP) != 0)
1264 return (ITER_SIBLINGS);
1265
1266 if (namelen == 0) {
1267 /*
1268 * We have reached a node with a full name match and are
1269 * looking for the next oid in its children.
1270 *
1271 * For CTL_SYSCTL_NEXTNOSKIP we are done.
1272 *
1273 * For CTL_SYSCTL_NEXT we skip CTLTYPE_NODE (unless it
1274 * has a handler) and move on to the children.
1275 */
1276 if (!honor_skip)
1277 return (ITER_FOUND);
1278 if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE)
1279 return (ITER_FOUND);
1280 /* If node does not have an iterator, treat it as leaf */
1281 if (oidp->oid_handler)
1282 return (ITER_FOUND);
1283
1284 /* Report oid as a node to iterate */
1285 return (ITER_CHILDREN);
1286 }
1287
1288 /*
1289 * No match yet. Continue seeking the given name.
1290 *
1291 * We are iterating in order by oid_number, so skip oids lower
1292 * than the one we are looking for.
1293 *
1294 * When the current oid_number is higher than the one we seek,
1295 * that means we have reached the next oid in the sequence and
1296 * should return it.
1297 *
1298 * If the oid_number matches the name at this level then we
1299 * have to find a node to continue searching at the next level.
1300 */
1301 if (oidp->oid_number < *name)
1302 return (ITER_SIBLINGS);
1303 if (oidp->oid_number > *name) {
1304 /*
1305 * We have reached the next oid.
1306 *
1307 * For CTL_SYSCTL_NEXTNOSKIP we are done.
1308 *
1309 * For CTL_SYSCTL_NEXT we skip CTLTYPE_NODE (unless it
1310 * has a handler) and move on to the children.
1311 */
1312 if (!honor_skip)
1313 return (ITER_FOUND);
1314 if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE)
1315 return (ITER_FOUND);
1316 /* If node does not have an iterator, treat it as leaf */
1317 if (oidp->oid_handler)
1318 return (ITER_FOUND);
1319 return (ITER_CHILDREN);
1320 }
1321
1322 /* match at a current level */
1323 if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE)
1324 return (ITER_SIBLINGS);
1325 if (oidp->oid_handler)
1326 return (ITER_SIBLINGS);
1327
1328 return (ITER_CHILDREN);
1329 }
1330
1331 /*
1332 * Recursively walk the sysctl subtree at lsp until we find the given name.
1333 * Returns true and fills in next oid data in @next and @len if oid is found.
1334 */
1335 static bool
sysctl_sysctl_next_action(struct sysctl_oid_list * lsp,int * name,u_int namelen,int * next,int * len,int level,bool honor_skip)1336 sysctl_sysctl_next_action(struct sysctl_oid_list *lsp, int *name, u_int namelen,
1337 int *next, int *len, int level, bool honor_skip)
1338 {
1339 struct sysctl_oid_list *next_lsp;
1340 struct sysctl_oid *oidp = NULL, key;
1341 bool success = false;
1342 enum sysctl_iter_action action;
1343
1344 SYSCTL_ASSERT_LOCKED();
1345 /*
1346 * Start the search at the requested oid. But if not found, then scan
1347 * through all children.
1348 */
1349 if (namelen > 0) {
1350 key.oid_number = *name;
1351 oidp = RB_FIND(sysctl_oid_list, lsp, &key);
1352 }
1353 if (!oidp)
1354 oidp = RB_MIN(sysctl_oid_list, lsp);
1355 for(; oidp != NULL; oidp = RB_NEXT(sysctl_oid_list, lsp, oidp)) {
1356 action = sysctl_sysctl_next_node(oidp, name, namelen,
1357 honor_skip);
1358 if (action == ITER_SIBLINGS)
1359 continue;
1360 if (action == ITER_FOUND) {
1361 success = true;
1362 break;
1363 }
1364 KASSERT((action== ITER_CHILDREN), ("ret(%d)!=ITER_CHILDREN", action));
1365
1366 next_lsp = SYSCTL_CHILDREN(oidp);
1367 if (namelen == 0) {
1368 success = sysctl_sysctl_next_action(next_lsp, NULL, 0,
1369 next + 1, len, level + 1, honor_skip);
1370 } else {
1371 success = sysctl_sysctl_next_action(next_lsp, name + 1,
1372 namelen - 1, next + 1, len, level + 1, honor_skip);
1373 if (!success) {
1374
1375 /*
1376 * We maintain the invariant that current node oid
1377 * is >= the oid provided in @name.
1378 * As there are no usable children at this node,
1379 * current node oid is strictly > than the requested
1380 * oid.
1381 * Hence, reduce namelen to 0 to allow for picking first
1382 * nodes/leafs in the next node in list.
1383 */
1384 namelen = 0;
1385 }
1386 }
1387 if (success)
1388 break;
1389 }
1390
1391 if (success) {
1392 *next = oidp->oid_number;
1393 if (level > *len)
1394 *len = level;
1395 }
1396
1397 return (success);
1398 }
1399
1400 static int
sysctl_sysctl_next(SYSCTL_HANDLER_ARGS)1401 sysctl_sysctl_next(SYSCTL_HANDLER_ARGS)
1402 {
1403 int *name = (int *) arg1;
1404 u_int namelen = arg2;
1405 int len, error;
1406 bool success;
1407 struct sysctl_oid_list *lsp = &sysctl__children;
1408 struct rm_priotracker tracker;
1409 int next[CTL_MAXNAME];
1410
1411 len = 0;
1412 SYSCTL_RLOCK(&tracker);
1413 success = sysctl_sysctl_next_action(lsp, name, namelen, next, &len, 1,
1414 oidp->oid_number == CTL_SYSCTL_NEXT);
1415 SYSCTL_RUNLOCK(&tracker);
1416 if (!success)
1417 return (ENOENT);
1418 error = SYSCTL_OUT(req, next, len * sizeof (int));
1419 return (error);
1420 }
1421
1422 /*
1423 * XXXRW/JA: Shouldn't return next data for nodes that we don't permit in
1424 * capability mode.
1425 */
1426 static SYSCTL_NODE(_sysctl, CTL_SYSCTL_NEXT, next, CTLFLAG_RD |
1427 CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_next, "");
1428
1429 static SYSCTL_NODE(_sysctl, CTL_SYSCTL_NEXTNOSKIP, nextnoskip, CTLFLAG_RD |
1430 CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_next, "");
1431
1432 static int
name2oid(const char * name,int * oid,int * len,struct sysctl_oid ** oidpp)1433 name2oid(const char *name, int *oid, int *len, struct sysctl_oid **oidpp)
1434 {
1435 struct sysctl_oid *oidp;
1436 struct sysctl_oid_list *lsp = &sysctl__children;
1437 const char *n;
1438
1439 SYSCTL_ASSERT_LOCKED();
1440
1441 for (*len = 0; *len < CTL_MAXNAME;) {
1442 n = strchrnul(name, '.');
1443 oidp = sysctl_find_oidnamelen(name, n - name, lsp);
1444 if (oidp == NULL)
1445 return (ENOENT);
1446 *oid++ = oidp->oid_number;
1447 (*len)++;
1448
1449 name = n;
1450 if (*name == '.')
1451 name++;
1452 if (*name == '\0') {
1453 if (oidpp)
1454 *oidpp = oidp;
1455 return (0);
1456 }
1457
1458 if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE)
1459 break;
1460
1461 if (oidp->oid_handler)
1462 break;
1463
1464 lsp = SYSCTL_CHILDREN(oidp);
1465 }
1466 return (ENOENT);
1467 }
1468
1469 static int
sysctl_sysctl_name2oid(SYSCTL_HANDLER_ARGS)1470 sysctl_sysctl_name2oid(SYSCTL_HANDLER_ARGS)
1471 {
1472 char *p;
1473 int error, oid[CTL_MAXNAME], len = 0;
1474 struct sysctl_oid *op = NULL;
1475 struct rm_priotracker tracker;
1476 char buf[32];
1477
1478 if (!req->newlen)
1479 return (ENOENT);
1480 if (req->newlen >= MAXPATHLEN) /* XXX arbitrary, undocumented */
1481 return (ENAMETOOLONG);
1482
1483 p = buf;
1484 if (req->newlen >= sizeof(buf))
1485 p = malloc(req->newlen+1, M_SYSCTL, M_WAITOK);
1486
1487 error = SYSCTL_IN(req, p, req->newlen);
1488 if (error) {
1489 if (p != buf)
1490 free(p, M_SYSCTL);
1491 return (error);
1492 }
1493
1494 p [req->newlen] = '\0';
1495
1496 SYSCTL_RLOCK(&tracker);
1497 error = name2oid(p, oid, &len, &op);
1498 SYSCTL_RUNLOCK(&tracker);
1499
1500 if (p != buf)
1501 free(p, M_SYSCTL);
1502
1503 if (error)
1504 return (error);
1505
1506 error = SYSCTL_OUT(req, oid, len * sizeof *oid);
1507 return (error);
1508 }
1509
1510 /*
1511 * XXXRW/JA: Shouldn't return name2oid data for nodes that we don't permit in
1512 * capability mode.
1513 */
1514 SYSCTL_PROC(_sysctl, CTL_SYSCTL_NAME2OID, name2oid, CTLTYPE_INT | CTLFLAG_RW |
1515 CTLFLAG_ANYBODY | CTLFLAG_MPSAFE | CTLFLAG_CAPRW, 0, 0,
1516 sysctl_sysctl_name2oid, "I", "");
1517
1518 static int
sysctl_sysctl_oidfmt(SYSCTL_HANDLER_ARGS)1519 sysctl_sysctl_oidfmt(SYSCTL_HANDLER_ARGS)
1520 {
1521 struct sysctl_oid *oid;
1522 struct rm_priotracker tracker;
1523 int error;
1524
1525 error = sysctl_wire_old_buffer(req, 0);
1526 if (error)
1527 return (error);
1528
1529 SYSCTL_RLOCK(&tracker);
1530 error = sysctl_find_oid(arg1, arg2, &oid, NULL, req);
1531 if (error)
1532 goto out;
1533
1534 if (oid->oid_fmt == NULL) {
1535 error = ENOENT;
1536 goto out;
1537 }
1538 error = SYSCTL_OUT(req, &oid->oid_kind, sizeof(oid->oid_kind));
1539 if (error)
1540 goto out;
1541 error = SYSCTL_OUT(req, oid->oid_fmt, strlen(oid->oid_fmt) + 1);
1542 out:
1543 SYSCTL_RUNLOCK(&tracker);
1544 return (error);
1545 }
1546
1547 static SYSCTL_NODE(_sysctl, CTL_SYSCTL_OIDFMT, oidfmt, CTLFLAG_RD |
1548 CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_oidfmt, "");
1549
1550 static int
sysctl_sysctl_oiddescr(SYSCTL_HANDLER_ARGS)1551 sysctl_sysctl_oiddescr(SYSCTL_HANDLER_ARGS)
1552 {
1553 struct sysctl_oid *oid;
1554 struct rm_priotracker tracker;
1555 int error;
1556
1557 error = sysctl_wire_old_buffer(req, 0);
1558 if (error)
1559 return (error);
1560
1561 SYSCTL_RLOCK(&tracker);
1562 error = sysctl_find_oid(arg1, arg2, &oid, NULL, req);
1563 if (error)
1564 goto out;
1565
1566 if (oid->oid_descr == NULL) {
1567 error = ENOENT;
1568 goto out;
1569 }
1570 error = SYSCTL_OUT(req, oid->oid_descr, strlen(oid->oid_descr) + 1);
1571 out:
1572 SYSCTL_RUNLOCK(&tracker);
1573 return (error);
1574 }
1575
1576 static SYSCTL_NODE(_sysctl, CTL_SYSCTL_OIDDESCR, oiddescr, CTLFLAG_RD |
1577 CTLFLAG_MPSAFE|CTLFLAG_CAPRD, sysctl_sysctl_oiddescr, "");
1578
1579 static int
sysctl_sysctl_oidlabel(SYSCTL_HANDLER_ARGS)1580 sysctl_sysctl_oidlabel(SYSCTL_HANDLER_ARGS)
1581 {
1582 struct sysctl_oid *oid;
1583 struct rm_priotracker tracker;
1584 int error;
1585
1586 error = sysctl_wire_old_buffer(req, 0);
1587 if (error)
1588 return (error);
1589
1590 SYSCTL_RLOCK(&tracker);
1591 error = sysctl_find_oid(arg1, arg2, &oid, NULL, req);
1592 if (error)
1593 goto out;
1594
1595 if (oid->oid_label == NULL) {
1596 error = ENOENT;
1597 goto out;
1598 }
1599 error = SYSCTL_OUT(req, oid->oid_label, strlen(oid->oid_label) + 1);
1600 out:
1601 SYSCTL_RUNLOCK(&tracker);
1602 return (error);
1603 }
1604
1605 static SYSCTL_NODE(_sysctl, CTL_SYSCTL_OIDLABEL, oidlabel, CTLFLAG_RD |
1606 CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_oidlabel, "");
1607
1608 /*
1609 * Default "handler" functions.
1610 */
1611
1612 /*
1613 * Handle a bool.
1614 * Two cases:
1615 * a variable: point arg1 at it.
1616 * a constant: pass it in arg2.
1617 */
1618
1619 int
sysctl_handle_bool(SYSCTL_HANDLER_ARGS)1620 sysctl_handle_bool(SYSCTL_HANDLER_ARGS)
1621 {
1622 int error;
1623 uint8_t temp;
1624
1625 /*
1626 * Attempt to get a coherent snapshot by making a copy of the data.
1627 */
1628 if (arg1)
1629 temp = *(bool *)arg1 ? 1 : 0;
1630 else
1631 temp = arg2 ? 1 : 0;
1632
1633 /*
1634 * In order to support backwards-compatible conversion of integer knobs
1635 * that are used as booleans to true boolean knobs, whose internal state
1636 * is stored as a 'bool' and not an 'int', if exactly 4 bytes remain in
1637 * the output buffer, we assume that the caller expected an 'int'
1638 * instead of a 'uint8_t'.
1639 */
1640 if (req->oldlen - req->oldidx == sizeof(int)) {
1641 int temp_int = temp;
1642
1643 error = SYSCTL_OUT(req, &temp_int, sizeof(temp_int));
1644 } else
1645 error = SYSCTL_OUT(req, &temp, sizeof(temp));
1646 if (error || !req->newptr)
1647 return (error);
1648
1649 if (!arg1)
1650 error = EPERM;
1651 else {
1652 /*
1653 * Conversely, if the input buffer has exactly 4 bytes to read,
1654 * use them all to produce a bool.
1655 */
1656 if (req->newlen - req->newidx == sizeof(int)) {
1657 int temp_int;
1658
1659 error = SYSCTL_IN(req, &temp_int, sizeof(temp_int));
1660 if (error == 0)
1661 *(bool *)arg1 = temp_int != 0 ? 1 : 0;
1662 } else {
1663 error = SYSCTL_IN(req, &temp, sizeof(temp));
1664 if (error == 0)
1665 *(bool *)arg1 = temp != 0 ? 1 : 0;
1666 }
1667 }
1668 return (error);
1669 }
1670
1671 /*
1672 * Handle an int8_t, signed or unsigned.
1673 * Two cases:
1674 * a variable: point arg1 at it.
1675 * a constant: pass it in arg2.
1676 */
1677
1678 int
sysctl_handle_8(SYSCTL_HANDLER_ARGS)1679 sysctl_handle_8(SYSCTL_HANDLER_ARGS)
1680 {
1681 int8_t tmpout;
1682 int error = 0;
1683
1684 /*
1685 * Attempt to get a coherent snapshot by making a copy of the data.
1686 */
1687 if (arg1)
1688 tmpout = *(int8_t *)arg1;
1689 else
1690 tmpout = arg2;
1691 error = SYSCTL_OUT(req, &tmpout, sizeof(tmpout));
1692
1693 if (error || !req->newptr)
1694 return (error);
1695
1696 if (!arg1)
1697 error = EPERM;
1698 else
1699 error = SYSCTL_IN(req, arg1, sizeof(tmpout));
1700 return (error);
1701 }
1702
1703 /*
1704 * Handle an int16_t, signed or unsigned.
1705 * Two cases:
1706 * a variable: point arg1 at it.
1707 * a constant: pass it in arg2.
1708 */
1709
1710 int
sysctl_handle_16(SYSCTL_HANDLER_ARGS)1711 sysctl_handle_16(SYSCTL_HANDLER_ARGS)
1712 {
1713 int16_t tmpout;
1714 int error = 0;
1715
1716 /*
1717 * Attempt to get a coherent snapshot by making a copy of the data.
1718 */
1719 if (arg1)
1720 tmpout = *(int16_t *)arg1;
1721 else
1722 tmpout = arg2;
1723 error = SYSCTL_OUT(req, &tmpout, sizeof(tmpout));
1724
1725 if (error || !req->newptr)
1726 return (error);
1727
1728 if (!arg1)
1729 error = EPERM;
1730 else
1731 error = SYSCTL_IN(req, arg1, sizeof(tmpout));
1732 return (error);
1733 }
1734
1735 /*
1736 * Handle an int32_t, signed or unsigned.
1737 * Two cases:
1738 * a variable: point arg1 at it.
1739 * a constant: pass it in arg2.
1740 */
1741
1742 int
sysctl_handle_32(SYSCTL_HANDLER_ARGS)1743 sysctl_handle_32(SYSCTL_HANDLER_ARGS)
1744 {
1745 int32_t tmpout;
1746 int error = 0;
1747
1748 /*
1749 * Attempt to get a coherent snapshot by making a copy of the data.
1750 */
1751 if (arg1)
1752 tmpout = *(int32_t *)arg1;
1753 else
1754 tmpout = arg2;
1755 error = SYSCTL_OUT(req, &tmpout, sizeof(tmpout));
1756
1757 if (error || !req->newptr)
1758 return (error);
1759
1760 if (!arg1)
1761 error = EPERM;
1762 else
1763 error = SYSCTL_IN(req, arg1, sizeof(tmpout));
1764 return (error);
1765 }
1766
1767 /*
1768 * Handle an int, signed or unsigned.
1769 * Two cases:
1770 * a variable: point arg1 at it.
1771 * a constant: pass it in arg2.
1772 */
1773
1774 int
sysctl_handle_int(SYSCTL_HANDLER_ARGS)1775 sysctl_handle_int(SYSCTL_HANDLER_ARGS)
1776 {
1777 int tmpout, error = 0;
1778
1779 /*
1780 * Attempt to get a coherent snapshot by making a copy of the data.
1781 */
1782 if (arg1)
1783 tmpout = *(int *)arg1;
1784 else
1785 tmpout = arg2;
1786 error = SYSCTL_OUT(req, &tmpout, sizeof(int));
1787
1788 if (error || !req->newptr)
1789 return (error);
1790
1791 if (!arg1)
1792 error = EPERM;
1793 else
1794 error = SYSCTL_IN(req, arg1, sizeof(int));
1795 return (error);
1796 }
1797
1798 /*
1799 * Based on sysctl_handle_int() convert milliseconds into ticks.
1800 * Note: this is used by TCP.
1801 */
1802
1803 int
sysctl_msec_to_ticks(SYSCTL_HANDLER_ARGS)1804 sysctl_msec_to_ticks(SYSCTL_HANDLER_ARGS)
1805 {
1806 int error, s, tt;
1807
1808 tt = *(int *)arg1;
1809 s = (int)((int64_t)tt * 1000 / hz);
1810
1811 error = sysctl_handle_int(oidp, &s, 0, req);
1812 if (error || !req->newptr)
1813 return (error);
1814
1815 tt = (int)((int64_t)s * hz / 1000);
1816 if (tt < 1)
1817 return (EINVAL);
1818
1819 *(int *)arg1 = tt;
1820 return (0);
1821 }
1822
1823 /*
1824 * Handle a long, signed or unsigned.
1825 * Two cases:
1826 * a variable: point arg1 at it.
1827 * a constant: pass it in arg2.
1828 */
1829
1830 int
sysctl_handle_long(SYSCTL_HANDLER_ARGS)1831 sysctl_handle_long(SYSCTL_HANDLER_ARGS)
1832 {
1833 int error = 0;
1834 long tmplong;
1835 #ifdef SCTL_MASK32
1836 int tmpint;
1837 #endif
1838
1839 /*
1840 * Attempt to get a coherent snapshot by making a copy of the data.
1841 */
1842 if (arg1)
1843 tmplong = *(long *)arg1;
1844 else
1845 tmplong = arg2;
1846 #ifdef SCTL_MASK32
1847 if (req->flags & SCTL_MASK32) {
1848 tmpint = tmplong;
1849 error = SYSCTL_OUT(req, &tmpint, sizeof(int));
1850 } else
1851 #endif
1852 error = SYSCTL_OUT(req, &tmplong, sizeof(long));
1853
1854 if (error || !req->newptr)
1855 return (error);
1856
1857 if (!arg1)
1858 error = EPERM;
1859 #ifdef SCTL_MASK32
1860 else if (req->flags & SCTL_MASK32) {
1861 error = SYSCTL_IN(req, &tmpint, sizeof(int));
1862 *(long *)arg1 = (long)tmpint;
1863 }
1864 #endif
1865 else
1866 error = SYSCTL_IN(req, arg1, sizeof(long));
1867 return (error);
1868 }
1869
1870 /*
1871 * Handle a 64 bit int, signed or unsigned.
1872 * Two cases:
1873 * a variable: point arg1 at it.
1874 * a constant: pass it in arg2.
1875 */
1876 int
sysctl_handle_64(SYSCTL_HANDLER_ARGS)1877 sysctl_handle_64(SYSCTL_HANDLER_ARGS)
1878 {
1879 int error = 0;
1880 uint64_t tmpout;
1881
1882 /*
1883 * Attempt to get a coherent snapshot by making a copy of the data.
1884 */
1885 if (arg1)
1886 tmpout = *(uint64_t *)arg1;
1887 else
1888 tmpout = arg2;
1889 error = SYSCTL_OUT(req, &tmpout, sizeof(uint64_t));
1890
1891 if (error || !req->newptr)
1892 return (error);
1893
1894 if (!arg1)
1895 error = EPERM;
1896 else
1897 error = SYSCTL_IN(req, arg1, sizeof(uint64_t));
1898 return (error);
1899 }
1900
1901 /*
1902 * Handle our generic '\0' terminated 'C' string.
1903 * Two cases:
1904 * a variable string: point arg1 at it, arg2 is max length.
1905 * a constant string: point arg1 at it, arg2 is zero.
1906 */
1907
1908 int
sysctl_handle_string(SYSCTL_HANDLER_ARGS)1909 sysctl_handle_string(SYSCTL_HANDLER_ARGS)
1910 {
1911 char *tmparg;
1912 int error = 0;
1913
1914 /*
1915 * If the sysctl isn't writable and isn't a preallocated tunable that
1916 * can be modified by kenv(2), microoptimise and treat it as a
1917 * read-only string.
1918 * A zero-length buffer indicates a fixed size read-only
1919 * string. In ddb, don't worry about trying to make a malloced
1920 * snapshot.
1921 */
1922 if ((oidp->oid_kind & (CTLFLAG_WR | CTLFLAG_TUN)) == 0 ||
1923 arg2 == 0 || kdb_active) {
1924 size_t outlen;
1925
1926 if (arg2 == 0)
1927 outlen = arg2 = strlen(arg1) + 1;
1928 else
1929 outlen = strnlen(arg1, arg2 - 1) + 1;
1930
1931 tmparg = req->oldptr != NULL ? arg1 : NULL;
1932 error = SYSCTL_OUT(req, tmparg, outlen);
1933 } else {
1934 size_t outlen;
1935
1936 if (req->oldptr != NULL) {
1937 tmparg = malloc(arg2, M_SYSCTLTMP, M_WAITOK);
1938 sx_slock(&sysctlstringlock);
1939 memcpy(tmparg, arg1, arg2);
1940 sx_sunlock(&sysctlstringlock);
1941 outlen = strnlen(tmparg, arg2 - 1) + 1;
1942 } else {
1943 tmparg = NULL;
1944 sx_slock(&sysctlstringlock);
1945 outlen = strnlen(arg1, arg2 - 1) + 1;
1946 sx_sunlock(&sysctlstringlock);
1947 }
1948 error = SYSCTL_OUT(req, tmparg, outlen);
1949 free(tmparg, M_SYSCTLTMP);
1950 }
1951 if (error || !req->newptr)
1952 return (error);
1953
1954 if (req->newlen - req->newidx >= arg2 ||
1955 req->newlen - req->newidx < 0) {
1956 error = EINVAL;
1957 } else if (req->newlen - req->newidx == 0) {
1958 sx_xlock(&sysctlstringlock);
1959 ((char *)arg1)[0] = '\0';
1960 sx_xunlock(&sysctlstringlock);
1961 } else if (req->newfunc == sysctl_new_kernel) {
1962 arg2 = req->newlen - req->newidx;
1963 sx_xlock(&sysctlstringlock);
1964 error = SYSCTL_IN(req, arg1, arg2);
1965 if (error == 0) {
1966 ((char *)arg1)[arg2] = '\0';
1967 req->newidx += arg2;
1968 }
1969 sx_xunlock(&sysctlstringlock);
1970 } else {
1971 arg2 = req->newlen - req->newidx;
1972 tmparg = malloc(arg2, M_SYSCTLTMP, M_WAITOK);
1973
1974 error = SYSCTL_IN(req, tmparg, arg2);
1975 if (error) {
1976 free(tmparg, M_SYSCTLTMP);
1977 return (error);
1978 }
1979
1980 sx_xlock(&sysctlstringlock);
1981 memcpy(arg1, tmparg, arg2);
1982 ((char *)arg1)[arg2] = '\0';
1983 sx_xunlock(&sysctlstringlock);
1984 free(tmparg, M_SYSCTLTMP);
1985 req->newidx += arg2;
1986 }
1987 return (error);
1988 }
1989
1990 /*
1991 * Handle any kind of opaque data.
1992 * arg1 points to it, arg2 is the size.
1993 */
1994
1995 int
sysctl_handle_opaque(SYSCTL_HANDLER_ARGS)1996 sysctl_handle_opaque(SYSCTL_HANDLER_ARGS)
1997 {
1998 int error, tries;
1999 u_int generation;
2000 struct sysctl_req req2;
2001
2002 /*
2003 * Attempt to get a coherent snapshot, by using the thread
2004 * pre-emption counter updated from within mi_switch() to
2005 * determine if we were pre-empted during a bcopy() or
2006 * copyout(). Make 3 attempts at doing this before giving up.
2007 * If we encounter an error, stop immediately.
2008 */
2009 tries = 0;
2010 req2 = *req;
2011 retry:
2012 generation = curthread->td_generation;
2013 error = SYSCTL_OUT(req, arg1, arg2);
2014 if (error)
2015 return (error);
2016 tries++;
2017 if (generation != curthread->td_generation && tries < 3) {
2018 *req = req2;
2019 goto retry;
2020 }
2021
2022 error = SYSCTL_IN(req, arg1, arg2);
2023
2024 return (error);
2025 }
2026
2027 /*
2028 * Based on sysctl_handle_64() convert microseconds to a sbintime.
2029 */
2030 int
sysctl_usec_to_sbintime(SYSCTL_HANDLER_ARGS)2031 sysctl_usec_to_sbintime(SYSCTL_HANDLER_ARGS)
2032 {
2033 int error;
2034 int64_t usec;
2035
2036 usec = sbttous(*(sbintime_t *)arg1);
2037
2038 error = sysctl_handle_64(oidp, &usec, 0, req);
2039 if (error || !req->newptr)
2040 return (error);
2041
2042 *(sbintime_t *)arg1 = ustosbt(usec);
2043
2044 return (0);
2045 }
2046
2047 /*
2048 * Based on sysctl_handle_64() convert milliseconds to a sbintime.
2049 */
2050 int
sysctl_msec_to_sbintime(SYSCTL_HANDLER_ARGS)2051 sysctl_msec_to_sbintime(SYSCTL_HANDLER_ARGS)
2052 {
2053 int error;
2054 int64_t msec;
2055
2056 msec = sbttoms(*(sbintime_t *)arg1);
2057
2058 error = sysctl_handle_64(oidp, &msec, 0, req);
2059 if (error || !req->newptr)
2060 return (error);
2061
2062 *(sbintime_t *)arg1 = mstosbt(msec);
2063
2064 return (0);
2065 }
2066
2067 /*
2068 * Convert seconds to a struct timeval. Intended for use with
2069 * intervals and thus does not permit negative seconds.
2070 */
2071 int
sysctl_sec_to_timeval(SYSCTL_HANDLER_ARGS)2072 sysctl_sec_to_timeval(SYSCTL_HANDLER_ARGS)
2073 {
2074 struct timeval *tv;
2075 int error, secs;
2076
2077 tv = arg1;
2078 secs = tv->tv_sec;
2079
2080 error = sysctl_handle_int(oidp, &secs, 0, req);
2081 if (error || req->newptr == NULL)
2082 return (error);
2083
2084 if (secs < 0)
2085 return (EINVAL);
2086 tv->tv_sec = secs;
2087
2088 return (0);
2089 }
2090
2091 /*
2092 * Transfer functions to/from kernel space.
2093 * XXX: rather untested at this point
2094 */
2095 static int
sysctl_old_kernel(struct sysctl_req * req,const void * p,size_t l)2096 sysctl_old_kernel(struct sysctl_req *req, const void *p, size_t l)
2097 {
2098 size_t i = 0;
2099
2100 if (req->oldptr) {
2101 i = l;
2102 if (req->oldlen <= req->oldidx)
2103 i = 0;
2104 else
2105 if (i > req->oldlen - req->oldidx)
2106 i = req->oldlen - req->oldidx;
2107 if (i > 0)
2108 bcopy(p, (char *)req->oldptr + req->oldidx, i);
2109 }
2110 req->oldidx += l;
2111 if (req->oldptr && i != l)
2112 return (ENOMEM);
2113 return (0);
2114 }
2115
2116 static int
sysctl_new_kernel(struct sysctl_req * req,void * p,size_t l)2117 sysctl_new_kernel(struct sysctl_req *req, void *p, size_t l)
2118 {
2119 if (!req->newptr)
2120 return (0);
2121 if (req->newlen - req->newidx < l)
2122 return (EINVAL);
2123 bcopy((const char *)req->newptr + req->newidx, p, l);
2124 req->newidx += l;
2125 return (0);
2126 }
2127
2128 int
kernel_sysctl(struct thread * td,int * name,u_int namelen,void * old,size_t * oldlenp,void * new,size_t newlen,size_t * retval,int flags)2129 kernel_sysctl(struct thread *td, int *name, u_int namelen, void *old,
2130 size_t *oldlenp, void *new, size_t newlen, size_t *retval, int flags)
2131 {
2132 int error = 0;
2133 struct sysctl_req req;
2134
2135 bzero(&req, sizeof req);
2136
2137 req.td = td;
2138 req.flags = flags;
2139
2140 if (oldlenp) {
2141 req.oldlen = *oldlenp;
2142 }
2143 req.validlen = req.oldlen;
2144
2145 if (old) {
2146 req.oldptr= old;
2147 }
2148
2149 if (new != NULL) {
2150 req.newlen = newlen;
2151 req.newptr = new;
2152 }
2153
2154 req.oldfunc = sysctl_old_kernel;
2155 req.newfunc = sysctl_new_kernel;
2156 req.lock = REQ_UNWIRED;
2157
2158 error = sysctl_root(0, name, namelen, &req);
2159
2160 if (req.lock == REQ_WIRED && req.validlen > 0)
2161 vsunlock(req.oldptr, req.validlen);
2162
2163 if (error && error != ENOMEM)
2164 return (error);
2165
2166 if (retval) {
2167 if (req.oldptr && req.oldidx > req.validlen)
2168 *retval = req.validlen;
2169 else
2170 *retval = req.oldidx;
2171 }
2172 return (error);
2173 }
2174
2175 int
kernel_sysctlbyname(struct thread * td,char * name,void * old,size_t * oldlenp,void * new,size_t newlen,size_t * retval,int flags)2176 kernel_sysctlbyname(struct thread *td, char *name, void *old, size_t *oldlenp,
2177 void *new, size_t newlen, size_t *retval, int flags)
2178 {
2179 int oid[CTL_MAXNAME];
2180 size_t oidlen, plen;
2181 int error;
2182
2183 oid[0] = CTL_SYSCTL;
2184 oid[1] = CTL_SYSCTL_NAME2OID;
2185 oidlen = sizeof(oid);
2186
2187 error = kernel_sysctl(td, oid, 2, oid, &oidlen,
2188 (void *)name, strlen(name), &plen, flags);
2189 if (error)
2190 return (error);
2191
2192 error = kernel_sysctl(td, oid, plen / sizeof(int), old, oldlenp,
2193 new, newlen, retval, flags);
2194 return (error);
2195 }
2196
2197 /*
2198 * Transfer function to/from user space.
2199 */
2200 static int
sysctl_old_user(struct sysctl_req * req,const void * p,size_t l)2201 sysctl_old_user(struct sysctl_req *req, const void *p, size_t l)
2202 {
2203 size_t i, len, origidx;
2204 int error;
2205
2206 origidx = req->oldidx;
2207 req->oldidx += l;
2208 if (req->oldptr == NULL)
2209 return (0);
2210 /*
2211 * If we have not wired the user supplied buffer and we are currently
2212 * holding locks, drop a witness warning, as it's possible that
2213 * write operations to the user page can sleep.
2214 */
2215 if (req->lock != REQ_WIRED)
2216 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
2217 "sysctl_old_user()");
2218 i = l;
2219 len = req->validlen;
2220 if (len <= origidx)
2221 i = 0;
2222 else {
2223 if (i > len - origidx)
2224 i = len - origidx;
2225 if (req->lock == REQ_WIRED) {
2226 error = copyout_nofault(p, (char *)req->oldptr +
2227 origidx, i);
2228 } else
2229 error = copyout(p, (char *)req->oldptr + origidx, i);
2230 if (error != 0)
2231 return (error);
2232 }
2233 if (i < l)
2234 return (ENOMEM);
2235 return (0);
2236 }
2237
2238 static int
sysctl_new_user(struct sysctl_req * req,void * p,size_t l)2239 sysctl_new_user(struct sysctl_req *req, void *p, size_t l)
2240 {
2241 int error;
2242
2243 if (!req->newptr)
2244 return (0);
2245 if (req->newlen - req->newidx < l)
2246 return (EINVAL);
2247 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
2248 "sysctl_new_user()");
2249 error = copyin((const char *)req->newptr + req->newidx, p, l);
2250 req->newidx += l;
2251 return (error);
2252 }
2253
2254 /*
2255 * Wire the user space destination buffer. If set to a value greater than
2256 * zero, the len parameter limits the maximum amount of wired memory.
2257 */
2258 int
sysctl_wire_old_buffer(struct sysctl_req * req,size_t len)2259 sysctl_wire_old_buffer(struct sysctl_req *req, size_t len)
2260 {
2261 int ret;
2262 size_t wiredlen;
2263
2264 wiredlen = (len > 0 && len < req->oldlen) ? len : req->oldlen;
2265 ret = 0;
2266 if (req->lock != REQ_WIRED && req->oldptr &&
2267 req->oldfunc == sysctl_old_user) {
2268 if (wiredlen != 0) {
2269 ret = vslock(req->oldptr, wiredlen);
2270 if (ret != 0) {
2271 if (ret != ENOMEM)
2272 return (ret);
2273 wiredlen = 0;
2274 }
2275 }
2276 req->lock = REQ_WIRED;
2277 req->validlen = wiredlen;
2278 }
2279 return (0);
2280 }
2281
2282 int
sysctl_find_oid(int * name,u_int namelen,struct sysctl_oid ** noid,int * nindx,struct sysctl_req * req)2283 sysctl_find_oid(int *name, u_int namelen, struct sysctl_oid **noid,
2284 int *nindx, struct sysctl_req *req)
2285 {
2286 struct sysctl_oid_list *lsp;
2287 struct sysctl_oid *oid;
2288 struct sysctl_oid key;
2289 int indx;
2290
2291 SYSCTL_ASSERT_LOCKED();
2292 lsp = &sysctl__children;
2293 indx = 0;
2294 while (indx < CTL_MAXNAME) {
2295 key.oid_number = name[indx];
2296 oid = RB_FIND(sysctl_oid_list, lsp, &key);
2297 if (oid == NULL)
2298 return (ENOENT);
2299
2300 indx++;
2301 if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
2302 if (oid->oid_handler != NULL || indx == namelen) {
2303 *noid = oid;
2304 if (nindx != NULL)
2305 *nindx = indx;
2306 KASSERT((oid->oid_kind & CTLFLAG_DYING) == 0,
2307 ("%s found DYING node %p", __func__, oid));
2308 return (0);
2309 }
2310 lsp = SYSCTL_CHILDREN(oid);
2311 } else if (indx == namelen) {
2312 if ((oid->oid_kind & CTLFLAG_DORMANT) != 0)
2313 return (ENOENT);
2314 *noid = oid;
2315 if (nindx != NULL)
2316 *nindx = indx;
2317 KASSERT((oid->oid_kind & CTLFLAG_DYING) == 0,
2318 ("%s found DYING node %p", __func__, oid));
2319 return (0);
2320 } else {
2321 return (ENOTDIR);
2322 }
2323 }
2324 return (ENOENT);
2325 }
2326
2327 /*
2328 * Traverse our tree, and find the right node, execute whatever it points
2329 * to, and return the resulting error code.
2330 */
2331
2332 static int
sysctl_root(SYSCTL_HANDLER_ARGS)2333 sysctl_root(SYSCTL_HANDLER_ARGS)
2334 {
2335 struct sysctl_oid *oid;
2336 struct rm_priotracker tracker;
2337 int error, indx, lvl;
2338
2339 SYSCTL_RLOCK(&tracker);
2340
2341 error = sysctl_find_oid(arg1, arg2, &oid, &indx, req);
2342 if (error)
2343 goto out;
2344
2345 if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
2346 /*
2347 * You can't call a sysctl when it's a node, but has
2348 * no handler. Inform the user that it's a node.
2349 * The indx may or may not be the same as namelen.
2350 */
2351 if (oid->oid_handler == NULL) {
2352 error = EISDIR;
2353 goto out;
2354 }
2355 }
2356
2357 /* Is this sysctl writable? */
2358 if (req->newptr && !(oid->oid_kind & CTLFLAG_WR)) {
2359 error = EPERM;
2360 goto out;
2361 }
2362
2363 KASSERT(req->td != NULL, ("sysctl_root(): req->td == NULL"));
2364
2365 #ifdef CAPABILITY_MODE
2366 /*
2367 * If the process is in capability mode, then don't permit reading or
2368 * writing unless specifically granted for the node.
2369 */
2370 if (IN_CAPABILITY_MODE(req->td)) {
2371 if ((req->oldptr && !(oid->oid_kind & CTLFLAG_CAPRD)) ||
2372 (req->newptr && !(oid->oid_kind & CTLFLAG_CAPWR))) {
2373 error = EPERM;
2374 goto out;
2375 }
2376 }
2377 #endif
2378
2379 /* Is this sysctl sensitive to securelevels? */
2380 if (req->newptr && (oid->oid_kind & CTLFLAG_SECURE)) {
2381 lvl = (oid->oid_kind & CTLMASK_SECURE) >> CTLSHIFT_SECURE;
2382 error = securelevel_gt(req->td->td_ucred, lvl);
2383 if (error)
2384 goto out;
2385 }
2386
2387 /* Is this sysctl writable by only privileged users? */
2388 if (req->newptr && !(oid->oid_kind & CTLFLAG_ANYBODY)) {
2389 int priv;
2390
2391 if (oid->oid_kind & CTLFLAG_PRISON)
2392 priv = PRIV_SYSCTL_WRITEJAIL;
2393 #ifdef VIMAGE
2394 else if ((oid->oid_kind & CTLFLAG_VNET) &&
2395 prison_owns_vnet(req->td->td_ucred->cr_prison))
2396 priv = PRIV_SYSCTL_WRITEJAIL;
2397 #endif
2398 else
2399 priv = PRIV_SYSCTL_WRITE;
2400 error = priv_check(req->td, priv);
2401 if (error)
2402 goto out;
2403 }
2404
2405 if (!oid->oid_handler) {
2406 error = EINVAL;
2407 goto out;
2408 }
2409
2410 if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
2411 arg1 = (int *)arg1 + indx;
2412 arg2 -= indx;
2413 } else {
2414 arg1 = oid->oid_arg1;
2415 arg2 = oid->oid_arg2;
2416 }
2417 #ifdef MAC
2418 error = mac_system_check_sysctl(req->td->td_ucred, oid, arg1, arg2,
2419 req);
2420 if (error != 0)
2421 goto out;
2422 #endif
2423 #ifdef VIMAGE
2424 if ((oid->oid_kind & CTLFLAG_VNET) && arg1 != NULL)
2425 arg1 = (void *)(curvnet->vnet_data_base + (uintptr_t)arg1);
2426 #endif
2427 error = sysctl_root_handler_locked(oid, arg1, arg2, req, &tracker);
2428
2429 out:
2430 SYSCTL_RUNLOCK(&tracker);
2431 return (error);
2432 }
2433
2434 #ifndef _SYS_SYSPROTO_H_
2435 struct __sysctl_args {
2436 int *name;
2437 u_int namelen;
2438 void *old;
2439 size_t *oldlenp;
2440 void *new;
2441 size_t newlen;
2442 };
2443 #endif
2444 int
sys___sysctl(struct thread * td,struct __sysctl_args * uap)2445 sys___sysctl(struct thread *td, struct __sysctl_args *uap)
2446 {
2447 int error, i, name[CTL_MAXNAME];
2448 size_t j;
2449
2450 if (uap->namelen > CTL_MAXNAME || uap->namelen < 2)
2451 return (EINVAL);
2452
2453 error = copyin(uap->name, &name, uap->namelen * sizeof(int));
2454 if (error)
2455 return (error);
2456
2457 error = userland_sysctl(td, name, uap->namelen,
2458 uap->old, uap->oldlenp, 0,
2459 uap->new, uap->newlen, &j, 0);
2460 if (error && error != ENOMEM)
2461 return (error);
2462 if (uap->oldlenp) {
2463 i = copyout(&j, uap->oldlenp, sizeof(j));
2464 if (i)
2465 return (i);
2466 }
2467 return (error);
2468 }
2469
2470 int
kern___sysctlbyname(struct thread * td,const char * oname,size_t namelen,void * old,size_t * oldlenp,void * new,size_t newlen,size_t * retval,int flags,bool inkernel)2471 kern___sysctlbyname(struct thread *td, const char *oname, size_t namelen,
2472 void *old, size_t *oldlenp, void *new, size_t newlen, size_t *retval,
2473 int flags, bool inkernel)
2474 {
2475 int oid[CTL_MAXNAME];
2476 char namebuf[16];
2477 char *name;
2478 size_t oidlen;
2479 int error;
2480
2481 if (namelen > MAXPATHLEN || namelen == 0)
2482 return (EINVAL);
2483 name = namebuf;
2484 if (namelen > sizeof(namebuf))
2485 name = malloc(namelen, M_SYSCTL, M_WAITOK);
2486 error = copyin(oname, name, namelen);
2487 if (error != 0)
2488 goto out;
2489
2490 oid[0] = CTL_SYSCTL;
2491 oid[1] = CTL_SYSCTL_NAME2OID;
2492 oidlen = sizeof(oid);
2493 error = kernel_sysctl(td, oid, 2, oid, &oidlen, (void *)name, namelen,
2494 retval, flags);
2495 if (error != 0)
2496 goto out;
2497 error = userland_sysctl(td, oid, *retval / sizeof(int), old, oldlenp,
2498 inkernel, new, newlen, retval, flags);
2499
2500 out:
2501 if (namelen > sizeof(namebuf))
2502 free(name, M_SYSCTL);
2503 return (error);
2504 }
2505
2506 #ifndef _SYS_SYSPROTO_H_
2507 struct __sysctlbyname_args {
2508 const char *name;
2509 size_t namelen;
2510 void *old;
2511 size_t *oldlenp;
2512 void *new;
2513 size_t newlen;
2514 };
2515 #endif
2516 int
sys___sysctlbyname(struct thread * td,struct __sysctlbyname_args * uap)2517 sys___sysctlbyname(struct thread *td, struct __sysctlbyname_args *uap)
2518 {
2519 size_t rv;
2520 int error;
2521
2522 error = kern___sysctlbyname(td, uap->name, uap->namelen, uap->old,
2523 uap->oldlenp, uap->new, uap->newlen, &rv, 0, 0);
2524 if (error != 0)
2525 return (error);
2526 if (uap->oldlenp != NULL)
2527 error = copyout(&rv, uap->oldlenp, sizeof(rv));
2528
2529 return (error);
2530 }
2531
2532 /*
2533 * This is used from various compatibility syscalls too. That's why name
2534 * must be in kernel space.
2535 */
2536 int
userland_sysctl(struct thread * td,int * name,u_int namelen,void * old,size_t * oldlenp,int inkernel,const void * new,size_t newlen,size_t * retval,int flags)2537 userland_sysctl(struct thread *td, int *name, u_int namelen, void *old,
2538 size_t *oldlenp, int inkernel, const void *new, size_t newlen,
2539 size_t *retval, int flags)
2540 {
2541 struct sysctl_req req;
2542 int error = 0;
2543 bool memlocked;
2544
2545 bzero(&req, sizeof req);
2546
2547 req.td = td;
2548 req.flags = flags;
2549
2550 if (oldlenp) {
2551 if (inkernel) {
2552 req.oldlen = *oldlenp;
2553 } else {
2554 error = copyin(oldlenp, &req.oldlen, sizeof(*oldlenp));
2555 if (error)
2556 return (error);
2557 }
2558 }
2559 req.validlen = req.oldlen;
2560 req.oldptr = old;
2561
2562 if (new != NULL) {
2563 req.newlen = newlen;
2564 req.newptr = new;
2565 }
2566
2567 req.oldfunc = sysctl_old_user;
2568 req.newfunc = sysctl_new_user;
2569 req.lock = REQ_UNWIRED;
2570
2571 #ifdef KTRACE
2572 if (KTRPOINT(curthread, KTR_SYSCTL))
2573 ktrsysctl(name, namelen);
2574 #endif
2575 memlocked = false;
2576 if (priv_check(td, PRIV_SYSCTL_MEMLOCK) != 0 &&
2577 req.oldptr != NULL && req.oldlen > 4 * PAGE_SIZE) {
2578 memlocked = true;
2579 sx_xlock(&sysctlmemlock);
2580 }
2581 CURVNET_SET(TD_TO_VNET(td));
2582
2583 for (;;) {
2584 req.oldidx = 0;
2585 req.newidx = 0;
2586 error = sysctl_root(0, name, namelen, &req);
2587 if (error != EAGAIN)
2588 break;
2589 kern_yield(PRI_USER);
2590 }
2591
2592 CURVNET_RESTORE();
2593
2594 if (req.lock == REQ_WIRED && req.validlen > 0)
2595 vsunlock(req.oldptr, req.validlen);
2596 if (memlocked)
2597 sx_xunlock(&sysctlmemlock);
2598
2599 if (error && error != ENOMEM)
2600 return (error);
2601
2602 if (retval) {
2603 if (req.oldptr && req.oldidx > req.validlen)
2604 *retval = req.validlen;
2605 else
2606 *retval = req.oldidx;
2607 }
2608 return (error);
2609 }
2610
2611 /*
2612 * Drain into a sysctl struct. The user buffer should be wired if a page
2613 * fault would cause issue.
2614 */
2615 static int
sbuf_sysctl_drain(void * arg,const char * data,int len)2616 sbuf_sysctl_drain(void *arg, const char *data, int len)
2617 {
2618 struct sysctl_req *req = arg;
2619 int error;
2620
2621 error = SYSCTL_OUT(req, data, len);
2622 KASSERT(error >= 0, ("Got unexpected negative value %d", error));
2623 return (error == 0 ? len : -error);
2624 }
2625
2626 struct sbuf *
sbuf_new_for_sysctl(struct sbuf * s,char * buf,int length,struct sysctl_req * req)2627 sbuf_new_for_sysctl(struct sbuf *s, char *buf, int length,
2628 struct sysctl_req *req)
2629 {
2630
2631 /* Supply a default buffer size if none given. */
2632 if (buf == NULL && length == 0)
2633 length = 64;
2634 s = sbuf_new(s, buf, length, SBUF_FIXEDLEN | SBUF_INCLUDENUL);
2635 sbuf_set_drain(s, sbuf_sysctl_drain, req);
2636 return (s);
2637 }
2638
2639 #ifdef DDB
2640
2641 /* The current OID the debugger is working with */
2642 static struct sysctl_oid *g_ddb_oid;
2643
2644 /* The current flags specified by the user */
2645 static int g_ddb_sysctl_flags;
2646
2647 /* Check to see if the last sysctl printed */
2648 static int g_ddb_sysctl_printed;
2649
2650 static const int ctl_sign[CTLTYPE+1] = {
2651 [CTLTYPE_INT] = 1,
2652 [CTLTYPE_LONG] = 1,
2653 [CTLTYPE_S8] = 1,
2654 [CTLTYPE_S16] = 1,
2655 [CTLTYPE_S32] = 1,
2656 [CTLTYPE_S64] = 1,
2657 };
2658
2659 static const int ctl_size[CTLTYPE+1] = {
2660 [CTLTYPE_INT] = sizeof(int),
2661 [CTLTYPE_UINT] = sizeof(u_int),
2662 [CTLTYPE_LONG] = sizeof(long),
2663 [CTLTYPE_ULONG] = sizeof(u_long),
2664 [CTLTYPE_S8] = sizeof(int8_t),
2665 [CTLTYPE_S16] = sizeof(int16_t),
2666 [CTLTYPE_S32] = sizeof(int32_t),
2667 [CTLTYPE_S64] = sizeof(int64_t),
2668 [CTLTYPE_U8] = sizeof(uint8_t),
2669 [CTLTYPE_U16] = sizeof(uint16_t),
2670 [CTLTYPE_U32] = sizeof(uint32_t),
2671 [CTLTYPE_U64] = sizeof(uint64_t),
2672 };
2673
2674 #define DB_SYSCTL_NAME_ONLY 0x001 /* Compare with -N */
2675 #define DB_SYSCTL_VALUE_ONLY 0x002 /* Compare with -n */
2676 #define DB_SYSCTL_OPAQUE 0x004 /* Compare with -o */
2677 #define DB_SYSCTL_HEX 0x008 /* Compare with -x */
2678
2679 #define DB_SYSCTL_SAFE_ONLY 0x100 /* Only simple types */
2680
2681 static const char db_sysctl_modifs[] = {
2682 'N', 'n', 'o', 'x',
2683 };
2684
2685 static const int db_sysctl_modif_values[] = {
2686 DB_SYSCTL_NAME_ONLY, DB_SYSCTL_VALUE_ONLY,
2687 DB_SYSCTL_OPAQUE, DB_SYSCTL_HEX,
2688 };
2689
2690 /* Handlers considered safe to print while recursing */
2691 static int (* const db_safe_handlers[])(SYSCTL_HANDLER_ARGS) = {
2692 sysctl_handle_bool,
2693 sysctl_handle_8,
2694 sysctl_handle_16,
2695 sysctl_handle_32,
2696 sysctl_handle_64,
2697 sysctl_handle_int,
2698 sysctl_handle_long,
2699 sysctl_handle_string,
2700 sysctl_handle_opaque,
2701 };
2702
2703 /*
2704 * Use in place of sysctl_old_kernel to print sysctl values.
2705 *
2706 * Compare to the output handling in show_var from sbin/sysctl/sysctl.c
2707 */
2708 static int
sysctl_old_ddb(struct sysctl_req * req,const void * ptr,size_t len)2709 sysctl_old_ddb(struct sysctl_req *req, const void *ptr, size_t len)
2710 {
2711 const u_char *val, *p;
2712 const char *sep1;
2713 size_t intlen, slen;
2714 uintmax_t umv;
2715 intmax_t mv;
2716 int sign, ctltype, hexlen, xflag, error;
2717
2718 /* Suppress false-positive GCC uninitialized variable warnings */
2719 mv = 0;
2720 umv = 0;
2721
2722 slen = len;
2723 val = p = ptr;
2724
2725 if (ptr == NULL) {
2726 error = 0;
2727 goto out;
2728 }
2729
2730 /* We are going to print */
2731 g_ddb_sysctl_printed = 1;
2732
2733 xflag = g_ddb_sysctl_flags & DB_SYSCTL_HEX;
2734
2735 ctltype = (g_ddb_oid->oid_kind & CTLTYPE);
2736 sign = ctl_sign[ctltype];
2737 intlen = ctl_size[ctltype];
2738
2739 switch (ctltype) {
2740 case CTLTYPE_NODE:
2741 case CTLTYPE_STRING:
2742 db_printf("%.*s", (int) len, (const char *) p);
2743 error = 0;
2744 goto out;
2745
2746 case CTLTYPE_INT:
2747 case CTLTYPE_UINT:
2748 case CTLTYPE_LONG:
2749 case CTLTYPE_ULONG:
2750 case CTLTYPE_S8:
2751 case CTLTYPE_S16:
2752 case CTLTYPE_S32:
2753 case CTLTYPE_S64:
2754 case CTLTYPE_U8:
2755 case CTLTYPE_U16:
2756 case CTLTYPE_U32:
2757 case CTLTYPE_U64:
2758 hexlen = 2 + (intlen * CHAR_BIT + 3) / 4;
2759 sep1 = "";
2760 while (len >= intlen) {
2761 switch (ctltype) {
2762 case CTLTYPE_INT:
2763 case CTLTYPE_UINT:
2764 umv = *(const u_int *)p;
2765 mv = *(const int *)p;
2766 break;
2767 case CTLTYPE_LONG:
2768 case CTLTYPE_ULONG:
2769 umv = *(const u_long *)p;
2770 mv = *(const long *)p;
2771 break;
2772 case CTLTYPE_S8:
2773 case CTLTYPE_U8:
2774 umv = *(const uint8_t *)p;
2775 mv = *(const int8_t *)p;
2776 break;
2777 case CTLTYPE_S16:
2778 case CTLTYPE_U16:
2779 umv = *(const uint16_t *)p;
2780 mv = *(const int16_t *)p;
2781 break;
2782 case CTLTYPE_S32:
2783 case CTLTYPE_U32:
2784 umv = *(const uint32_t *)p;
2785 mv = *(const int32_t *)p;
2786 break;
2787 case CTLTYPE_S64:
2788 case CTLTYPE_U64:
2789 umv = *(const uint64_t *)p;
2790 mv = *(const int64_t *)p;
2791 break;
2792 }
2793
2794 db_printf("%s", sep1);
2795 if (xflag)
2796 db_printf("%#0*jx", hexlen, umv);
2797 else if (!sign)
2798 db_printf("%ju", umv);
2799 else if (g_ddb_oid->oid_fmt[1] == 'K') {
2800 /* Kelvins are currently unsupported. */
2801 error = EOPNOTSUPP;
2802 goto out;
2803 } else
2804 db_printf("%jd", mv);
2805
2806 sep1 = " ";
2807 len -= intlen;
2808 p += intlen;
2809 }
2810 error = 0;
2811 goto out;
2812
2813 case CTLTYPE_OPAQUE:
2814 /* TODO: Support struct functions. */
2815
2816 /* FALLTHROUGH */
2817 default:
2818 db_printf("Format:%s Length:%zu Dump:0x",
2819 g_ddb_oid->oid_fmt, len);
2820 while (len-- && (xflag || p < val + 16))
2821 db_printf("%02x", *p++);
2822 if (!xflag && len > 16)
2823 db_printf("...");
2824 error = 0;
2825 goto out;
2826 }
2827
2828 out:
2829 req->oldidx += slen;
2830 return (error);
2831 }
2832
2833 /*
2834 * Avoid setting new sysctl values from the debugger
2835 */
2836 static int
sysctl_new_ddb(struct sysctl_req * req,void * p,size_t l)2837 sysctl_new_ddb(struct sysctl_req *req, void *p, size_t l)
2838 {
2839
2840 if (!req->newptr)
2841 return (0);
2842
2843 /* Changing sysctls from the debugger is currently unsupported */
2844 return (EPERM);
2845 }
2846
2847 /*
2848 * Run a sysctl handler with the DDB oldfunc and newfunc attached.
2849 * Instead of copying any output to a buffer we'll dump it right to
2850 * the console.
2851 */
2852 static int
db_sysctl(struct sysctl_oid * oidp,int * name,u_int namelen,void * old,size_t * oldlenp,size_t * retval,int flags)2853 db_sysctl(struct sysctl_oid *oidp, int *name, u_int namelen,
2854 void *old, size_t *oldlenp, size_t *retval, int flags)
2855 {
2856 struct sysctl_req req;
2857 int error;
2858
2859 /* Setup the request */
2860 bzero(&req, sizeof req);
2861 req.td = kdb_thread;
2862 req.oldfunc = sysctl_old_ddb;
2863 req.newfunc = sysctl_new_ddb;
2864 req.lock = REQ_UNWIRED;
2865 if (oldlenp) {
2866 req.oldlen = *oldlenp;
2867 }
2868 req.validlen = req.oldlen;
2869 if (old) {
2870 req.oldptr = old;
2871 }
2872
2873 /* Setup our globals for sysctl_old_ddb */
2874 g_ddb_oid = oidp;
2875 g_ddb_sysctl_flags = flags;
2876 g_ddb_sysctl_printed = 0;
2877
2878 error = sysctl_root(0, name, namelen, &req);
2879
2880 /* Reset globals */
2881 g_ddb_oid = NULL;
2882 g_ddb_sysctl_flags = 0;
2883
2884 if (retval) {
2885 if (req.oldptr && req.oldidx > req.validlen)
2886 *retval = req.validlen;
2887 else
2888 *retval = req.oldidx;
2889 }
2890 return (error);
2891 }
2892
2893 /*
2894 * Show a sysctl's name
2895 */
2896 static void
db_show_oid_name(int * oid,size_t nlen)2897 db_show_oid_name(int *oid, size_t nlen)
2898 {
2899 struct sysctl_oid *oidp;
2900 int qoid[CTL_MAXNAME + 2];
2901 int error;
2902
2903 qoid[0] = CTL_SYSCTL;
2904 qoid[1] = CTL_SYSCTL_NAME;
2905 memcpy(qoid + 2, oid, nlen * sizeof(int));
2906
2907 error = sysctl_find_oid(qoid, nlen + 2, &oidp, NULL, NULL);
2908 if (error)
2909 db_error("sysctl name oid");
2910
2911 error = db_sysctl(oidp, qoid, nlen + 2, NULL, NULL, NULL, 0);
2912 if (error)
2913 db_error("sysctl name");
2914 }
2915
2916 /*
2917 * Check to see if an OID is safe to print from ddb.
2918 */
2919 static bool
db_oid_safe(const struct sysctl_oid * oidp)2920 db_oid_safe(const struct sysctl_oid *oidp)
2921 {
2922 for (unsigned int i = 0; i < nitems(db_safe_handlers); ++i) {
2923 if (oidp->oid_handler == db_safe_handlers[i])
2924 return (true);
2925 }
2926
2927 return (false);
2928 }
2929
2930 /*
2931 * Show a sysctl at a specific OID
2932 * Compare to the input handling in show_var from sbin/sysctl/sysctl.c
2933 */
2934 static int
db_show_oid(struct sysctl_oid * oidp,int * oid,size_t nlen,int flags)2935 db_show_oid(struct sysctl_oid *oidp, int *oid, size_t nlen, int flags)
2936 {
2937 int error, xflag, oflag, Nflag, nflag;
2938 size_t len;
2939
2940 xflag = flags & DB_SYSCTL_HEX;
2941 oflag = flags & DB_SYSCTL_OPAQUE;
2942 nflag = flags & DB_SYSCTL_VALUE_ONLY;
2943 Nflag = flags & DB_SYSCTL_NAME_ONLY;
2944
2945 if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_OPAQUE &&
2946 (!xflag && !oflag))
2947 return (0);
2948
2949 if (Nflag) {
2950 db_show_oid_name(oid, nlen);
2951 error = 0;
2952 goto out;
2953 }
2954
2955 if (!nflag) {
2956 db_show_oid_name(oid, nlen);
2957 db_printf(": ");
2958 }
2959
2960 if ((flags & DB_SYSCTL_SAFE_ONLY) && !db_oid_safe(oidp)) {
2961 db_printf("Skipping, unsafe to print while recursing.");
2962 error = 0;
2963 goto out;
2964 }
2965
2966 /* Try once, and ask about the size */
2967 len = 0;
2968 error = db_sysctl(oidp, oid, nlen,
2969 NULL, NULL, &len, flags);
2970 if (error)
2971 goto out;
2972
2973 if (!g_ddb_sysctl_printed)
2974 /* Lie about the size */
2975 error = db_sysctl(oidp, oid, nlen,
2976 (void *) 1, &len, NULL, flags);
2977
2978 out:
2979 db_printf("\n");
2980 return (error);
2981 }
2982
2983 /*
2984 * Show all sysctls under a specific OID
2985 * Compare to sysctl_all from sbin/sysctl/sysctl.c
2986 */
2987 static int
db_show_sysctl_all(int * oid,size_t len,int flags)2988 db_show_sysctl_all(int *oid, size_t len, int flags)
2989 {
2990 struct sysctl_oid *oidp;
2991 int qoid[CTL_MAXNAME + 2], next[CTL_MAXNAME];
2992 size_t nlen;
2993
2994 qoid[0] = CTL_SYSCTL;
2995 qoid[1] = CTL_SYSCTL_NEXT;
2996 if (len) {
2997 nlen = len;
2998 memcpy(&qoid[2], oid, nlen * sizeof(int));
2999 } else {
3000 nlen = 1;
3001 qoid[2] = CTL_KERN;
3002 }
3003 for (;;) {
3004 int error;
3005 size_t nextsize = sizeof(next);
3006
3007 error = kernel_sysctl(kdb_thread, qoid, nlen + 2,
3008 next, &nextsize, NULL, 0, &nlen, 0);
3009 if (error != 0) {
3010 if (error == ENOENT)
3011 return (0);
3012 else
3013 db_error("sysctl(next)");
3014 }
3015
3016 nlen /= sizeof(int);
3017
3018 if (nlen < (unsigned int)len)
3019 return (0);
3020
3021 if (memcmp(&oid[0], &next[0], len * sizeof(int)) != 0)
3022 return (0);
3023
3024 /* Find the OID in question */
3025 error = sysctl_find_oid(next, nlen, &oidp, NULL, NULL);
3026 if (error)
3027 return (error);
3028
3029 (void)db_show_oid(oidp, next, nlen, flags | DB_SYSCTL_SAFE_ONLY);
3030
3031 if (db_pager_quit)
3032 return (0);
3033
3034 memcpy(&qoid[2 + len], &next[len], (nlen - len) * sizeof(int));
3035 }
3036 }
3037
3038 /*
3039 * Show a sysctl by its user facing string
3040 */
3041 static int
db_sysctlbyname(const char * name,int flags)3042 db_sysctlbyname(const char *name, int flags)
3043 {
3044 struct sysctl_oid *oidp;
3045 int oid[CTL_MAXNAME];
3046 int error, nlen;
3047
3048 error = name2oid(name, oid, &nlen, &oidp);
3049 if (error) {
3050 return (error);
3051 }
3052
3053 if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
3054 db_show_sysctl_all(oid, nlen, flags);
3055 } else {
3056 error = db_show_oid(oidp, oid, nlen, flags);
3057 }
3058
3059 return (error);
3060 }
3061
3062 static void
db_sysctl_cmd_usage(void)3063 db_sysctl_cmd_usage(void)
3064 {
3065 db_printf(
3066 " sysctl [/Nnox] <sysctl> \n"
3067 " \n"
3068 " <sysctl> The name of the sysctl to show. \n"
3069 " \n"
3070 " Show a sysctl by hooking into SYSCTL_IN and SYSCTL_OUT. \n"
3071 " This will work for most sysctls, but should not be used \n"
3072 " with sysctls that are known to malloc. \n"
3073 " \n"
3074 " While recursing any \"unsafe\" sysctls will be skipped. \n"
3075 " Call sysctl directly on the sysctl to try printing the \n"
3076 " skipped sysctl. This is unsafe and may make the ddb \n"
3077 " session unusable. \n"
3078 " \n"
3079 " Arguments: \n"
3080 " /N Display only the name of the sysctl. \n"
3081 " /n Display only the value of the sysctl. \n"
3082 " /o Display opaque values. \n"
3083 " /x Display the sysctl in hex. \n"
3084 " \n"
3085 "For example: \n"
3086 "sysctl vm.v_free_min \n"
3087 "vn.v_free_min: 12669 \n"
3088 );
3089 }
3090
3091 /*
3092 * Show a specific sysctl similar to sysctl (8).
3093 */
DB_COMMAND_FLAGS(sysctl,db_sysctl_cmd,CS_OWN)3094 DB_COMMAND_FLAGS(sysctl, db_sysctl_cmd, CS_OWN)
3095 {
3096 char name[TOK_STRING_SIZE];
3097 int error, i, t, flags;
3098
3099 /* Parse the modifiers */
3100 t = db_read_token();
3101 if (t == tSLASH || t == tMINUS) {
3102 t = db_read_token();
3103 if (t != tIDENT) {
3104 db_printf("Bad modifier\n");
3105 error = EINVAL;
3106 goto out;
3107 }
3108 db_strcpy(modif, db_tok_string);
3109 }
3110 else {
3111 db_unread_token(t);
3112 modif[0] = '\0';
3113 }
3114
3115 flags = 0;
3116 for (i = 0; i < nitems(db_sysctl_modifs); i++) {
3117 if (strchr(modif, db_sysctl_modifs[i])) {
3118 flags |= db_sysctl_modif_values[i];
3119 }
3120 }
3121
3122 /* Parse the sysctl names */
3123 t = db_read_token();
3124 if (t != tIDENT) {
3125 db_printf("Need sysctl name\n");
3126 error = EINVAL;
3127 goto out;
3128 }
3129
3130 /* Copy the name into a temporary buffer */
3131 db_strcpy(name, db_tok_string);
3132
3133 /* Ensure there is no trailing cruft */
3134 t = db_read_token();
3135 if (t != tEOL) {
3136 db_printf("Unexpected sysctl argument\n");
3137 error = EINVAL;
3138 goto out;
3139 }
3140
3141 error = db_sysctlbyname(name, flags);
3142 if (error == ENOENT) {
3143 db_printf("unknown oid: '%s'\n", db_tok_string);
3144 goto out;
3145 } else if (error) {
3146 db_printf("%s: error: %d\n", db_tok_string, error);
3147 goto out;
3148 }
3149
3150 out:
3151 /* Ensure we eat all of our text */
3152 db_flush_lex();
3153
3154 if (error == EINVAL) {
3155 db_sysctl_cmd_usage();
3156 }
3157 }
3158
3159 #endif /* DDB */
3160