kern_sysctl.c revision 1.255 1 /* $NetBSD: kern_sysctl.c,v 1.255 2015/04/14 06:08:03 nat Exp $ */
2
3 /*-
4 * Copyright (c) 2003, 2007, 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Brown.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*-
33 * Copyright (c) 1982, 1986, 1989, 1993
34 * The Regents of the University of California. All rights reserved.
35 *
36 * This code is derived from software contributed to Berkeley by
37 * Mike Karels at Berkeley Software Design, Inc.
38 *
39 * Redistribution and use in source and binary forms, with or without
40 * modification, are permitted provided that the following conditions
41 * are met:
42 * 1. Redistributions of source code must retain the above copyright
43 * notice, this list of conditions and the following disclaimer.
44 * 2. Redistributions in binary form must reproduce the above copyright
45 * notice, this list of conditions and the following disclaimer in the
46 * documentation and/or other materials provided with the distribution.
47 * 3. Neither the name of the University nor the names of its contributors
48 * may be used to endorse or promote products derived from this software
49 * without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 *
63 * @(#)kern_sysctl.c 8.9 (Berkeley) 5/20/95
64 */
65
66 /*
67 * sysctl system call.
68 */
69
70 #include <sys/cdefs.h>
71 __KERNEL_RCSID(0, "$NetBSD: kern_sysctl.c,v 1.255 2015/04/14 06:08:03 nat Exp $");
72
73 #include "opt_defcorename.h"
74 #include "ksyms.h"
75
76 #include <sys/param.h>
77 #define __COMPAT_SYSCTL
78 #include <sys/sysctl.h>
79 #include <sys/systm.h>
80 #include <sys/buf.h>
81 #include <sys/ksyms.h>
82 #include <sys/malloc.h>
83 #include <sys/mount.h>
84 #include <sys/syscallargs.h>
85 #include <sys/kauth.h>
86 #include <sys/ktrace.h>
87 #include <sys/rnd.h>
88 #include <sys/rndsource.h>
89
90 #define MAXDESCLEN 1024
91 MALLOC_DEFINE(M_SYSCTLNODE, "sysctlnode", "sysctl node structures");
92 MALLOC_DEFINE(M_SYSCTLDATA, "sysctldata", "misc sysctl data");
93
94 static int sysctl_mmap(SYSCTLFN_PROTO);
95 static int sysctl_alloc(struct sysctlnode *, int);
96 static int sysctl_realloc(struct sysctlnode *);
97
98 static int sysctl_cvt_in(struct lwp *, int *, const void *, size_t,
99 struct sysctlnode *);
100 static int sysctl_cvt_out(struct lwp *, int, const struct sysctlnode *,
101 void *, size_t, size_t *);
102
103 static int sysctl_log_add(struct sysctllog **, const struct sysctlnode *);
104 static int sysctl_log_realloc(struct sysctllog *);
105
106 typedef void sysctl_setup_func(struct sysctllog **);
107
108 #ifdef SYSCTL_DEBUG
109 #define DPRINTF(a) printf a
110 #else
111 #define DPRINTF(a)
112 #endif
113
114 struct sysctllog {
115 const struct sysctlnode *log_root;
116 int *log_num;
117 int log_size, log_left;
118 };
119
120 /*
121 * the "root" of the new sysctl tree
122 */
123 struct sysctlnode sysctl_root = {
124 .sysctl_flags = SYSCTL_VERSION|
125 CTLFLAG_ROOT|CTLFLAG_READWRITE|
126 CTLTYPE_NODE,
127 .sysctl_num = 0,
128 .sysctl_size = sizeof(struct sysctlnode),
129 .sysctl_name = "(root)",
130 };
131
132 /*
133 * link set of functions that add nodes at boot time (see also
134 * sysctl_buildtree())
135 */
136 __link_set_decl(sysctl_funcs, sysctl_setup_func);
137
138 /*
139 * The `sysctl_treelock' is intended to serialize access to the sysctl
140 * tree. XXX This has serious problems; allocating memory and
141 * copying data out with the lock held is insane.
142 */
143 krwlock_t sysctl_treelock;
144
145 kmutex_t sysctl_file_marker_lock;
146
147 /*
148 * Attributes stored in the kernel.
149 */
150 char hostname[MAXHOSTNAMELEN];
151 int hostnamelen;
152
153 char domainname[MAXHOSTNAMELEN];
154 int domainnamelen;
155
156 long hostid;
157
158 #ifndef DEFCORENAME
159 #define DEFCORENAME "%n.core"
160 #endif
161 char defcorename[MAXPATHLEN] = DEFCORENAME;
162
163 /*
164 * ********************************************************************
165 * Section 0: Some simple glue
166 * ********************************************************************
167 * By wrapping copyin(), copyout(), and copyinstr() like this, we can
168 * stop caring about who's calling us and simplify some code a bunch.
169 * ********************************************************************
170 */
171 int
172 sysctl_copyin(struct lwp *l, const void *uaddr, void *kaddr, size_t len)
173 {
174 int error;
175
176 if (l != NULL) {
177 error = copyin(uaddr, kaddr, len);
178 ktrmibio(-1, UIO_WRITE, uaddr, len, error);
179 } else {
180 error = kcopy(uaddr, kaddr, len);
181 }
182
183 return error;
184 }
185
186 int
187 sysctl_copyout(struct lwp *l, const void *kaddr, void *uaddr, size_t len)
188 {
189 int error;
190
191 if (l != NULL) {
192 error = copyout(kaddr, uaddr, len);
193 ktrmibio(-1, UIO_READ, uaddr, len, error);
194 } else {
195 error = kcopy(kaddr, uaddr, len);
196 }
197
198 return error;
199 }
200
201 int
202 sysctl_copyinstr(struct lwp *l, const void *uaddr, void *kaddr,
203 size_t len, size_t *done)
204 {
205 int error;
206
207 if (l != NULL) {
208 error = copyinstr(uaddr, kaddr, len, done);
209 ktrmibio(-1, UIO_WRITE, uaddr, len, error);
210 } else {
211 error = copystr(uaddr, kaddr, len, done);
212 }
213
214 return error;
215 }
216
217 /*
218 * ********************************************************************
219 * Initialize sysctl subsystem.
220 * ********************************************************************
221 */
222 void
223 sysctl_init(void)
224 {
225 sysctl_setup_func *const *sysctl_setup;
226
227 rw_init(&sysctl_treelock);
228
229 /*
230 * dynamic mib numbers start here
231 */
232 sysctl_root.sysctl_num = CREATE_BASE;
233 sysctl_basenode_init();
234
235 __link_set_foreach(sysctl_setup, sysctl_funcs) {
236 (**sysctl_setup)(NULL);
237 }
238
239 mutex_init(&sysctl_file_marker_lock, MUTEX_DEFAULT, IPL_NONE);
240 }
241
242 /*
243 * Setting this means no more permanent nodes can be added,
244 * trees that claim to be readonly at the root now are, and if
245 * the main tree is readonly, *everything* is.
246 *
247 * Also starts up the PRNG used for the "random" sysctl: it's
248 * better to start it later than sooner.
249 *
250 * Call this at the end of kernel init.
251 */
252 void
253 sysctl_finalize(void)
254 {
255
256 sysctl_root.sysctl_flags |= CTLFLAG_PERMANENT;
257 }
258
259 /*
260 * ********************************************************************
261 * The main native sysctl system call itself.
262 * ********************************************************************
263 */
264 int
265 sys___sysctl(struct lwp *l, const struct sys___sysctl_args *uap, register_t *retval)
266 {
267 /* {
268 syscallarg(const int *) name;
269 syscallarg(u_int) namelen;
270 syscallarg(void *) old;
271 syscallarg(size_t *) oldlenp;
272 syscallarg(const void *) new;
273 syscallarg(size_t) newlen;
274 } */
275 int error, nerror, name[CTL_MAXNAME];
276 size_t oldlen, savelen, *oldlenp;
277
278 /*
279 * get oldlen
280 */
281 oldlen = 0;
282 oldlenp = SCARG(uap, oldlenp);
283 if (oldlenp != NULL) {
284 error = copyin(oldlenp, &oldlen, sizeof(oldlen));
285 if (error)
286 return (error);
287 }
288 savelen = oldlen;
289
290 /*
291 * top-level sysctl names may or may not be non-terminal, but
292 * we don't care
293 */
294 if (SCARG(uap, namelen) > CTL_MAXNAME || SCARG(uap, namelen) < 1)
295 return (EINVAL);
296 error = copyin(SCARG(uap, name), &name,
297 SCARG(uap, namelen) * sizeof(int));
298 if (error)
299 return (error);
300
301 ktrmib(name, SCARG(uap, namelen));
302
303 sysctl_lock(SCARG(uap, newv) != NULL);
304
305 /*
306 * do sysctl work (NULL means main built-in default tree)
307 */
308 error = sysctl_dispatch(&name[0], SCARG(uap, namelen),
309 SCARG(uap, oldv), &oldlen,
310 SCARG(uap, newv), SCARG(uap, newlen),
311 &name[0], l, NULL);
312
313 /*
314 * release the sysctl lock
315 */
316 sysctl_unlock();
317
318 /*
319 * set caller's oldlen to new value even in the face of an
320 * error (if this gets an error and they didn't have one, they
321 * get this one)
322 */
323 if (oldlenp) {
324 nerror = copyout(&oldlen, oldlenp, sizeof(oldlen));
325 if (error == 0)
326 error = nerror;
327 }
328
329 /*
330 * if the only problem is that we weren't given enough space,
331 * that's an ENOMEM error
332 */
333 if (error == 0 && SCARG(uap, oldv) != NULL && savelen < oldlen)
334 error = ENOMEM;
335
336 return (error);
337 }
338
339 /*
340 * ********************************************************************
341 * Section 1: How the tree is used
342 * ********************************************************************
343 * Implementations of sysctl for emulations should typically need only
344 * these three functions in this order: lock the tree, dispatch
345 * request into it, unlock the tree.
346 * ********************************************************************
347 */
348 void
349 sysctl_lock(bool write)
350 {
351
352 if (write) {
353 rw_enter(&sysctl_treelock, RW_WRITER);
354 curlwp->l_pflag |= LP_SYSCTLWRITE;
355 } else {
356 rw_enter(&sysctl_treelock, RW_READER);
357 curlwp->l_pflag &= ~LP_SYSCTLWRITE;
358 }
359 }
360
361 void
362 sysctl_relock(void)
363 {
364
365 if ((curlwp->l_pflag & LP_SYSCTLWRITE) != 0) {
366 rw_enter(&sysctl_treelock, RW_WRITER);
367 } else {
368 rw_enter(&sysctl_treelock, RW_READER);
369 }
370 }
371
372 /*
373 * ********************************************************************
374 * the main sysctl dispatch routine. scans the given tree and picks a
375 * function to call based on what it finds.
376 * ********************************************************************
377 */
378 int
379 sysctl_dispatch(SYSCTLFN_ARGS)
380 {
381 int error;
382 sysctlfn fn;
383 int ni;
384
385 KASSERT(rw_lock_held(&sysctl_treelock));
386
387 if (rnode && SYSCTL_VERS(rnode->sysctl_flags) != SYSCTL_VERSION) {
388 printf("sysctl_dispatch: rnode %p wrong version\n", rnode);
389 error = EINVAL;
390 goto out;
391 }
392
393 fn = NULL;
394 error = sysctl_locate(l, name, namelen, &rnode, &ni);
395
396 if (rnode->sysctl_func != NULL) {
397 /*
398 * the node we ended up at has a function, so call it. it can
399 * hand off to query or create if it wants to.
400 */
401 fn = rnode->sysctl_func;
402 } else if (error == 0) {
403 /*
404 * we found the node they were looking for, so do a lookup.
405 */
406 fn = (sysctlfn)sysctl_lookup; /* XXX may write to rnode */
407 } else if (error == ENOENT && (ni + 1) == namelen && name[ni] < 0) {
408 /*
409 * prospective parent node found, but the terminal node was
410 * not. generic operations associate with the parent.
411 */
412 switch (name[ni]) {
413 case CTL_QUERY:
414 fn = sysctl_query;
415 break;
416 case CTL_CREATE:
417 #if NKSYMS > 0
418 case CTL_CREATESYM:
419 #endif /* NKSYMS > 0 */
420 if (newp == NULL) {
421 error = EINVAL;
422 break;
423 }
424 KASSERT(rw_write_held(&sysctl_treelock));
425 fn = (sysctlfn)sysctl_create; /* we own the rnode */
426 break;
427 case CTL_DESTROY:
428 if (newp == NULL) {
429 error = EINVAL;
430 break;
431 }
432 KASSERT(rw_write_held(&sysctl_treelock));
433 fn = (sysctlfn)sysctl_destroy; /* we own the rnode */
434 break;
435 case CTL_MMAP:
436 fn = (sysctlfn)sysctl_mmap; /* we own the rnode */
437 break;
438 case CTL_DESCRIBE:
439 fn = sysctl_describe;
440 break;
441 default:
442 error = EOPNOTSUPP;
443 break;
444 }
445 }
446
447 /*
448 * after all of that, maybe we found someone who knows how to
449 * get us what we want?
450 */
451 if (fn != NULL)
452 error = (*fn)(name + ni, namelen - ni, oldp, oldlenp,
453 newp, newlen, name, l, rnode);
454 else if (error == 0)
455 error = EOPNOTSUPP;
456
457 out:
458 return (error);
459 }
460
461 /*
462 * ********************************************************************
463 * Releases the tree lock.
464 * ********************************************************************
465 */
466 void
467 sysctl_unlock(void)
468 {
469
470 rw_exit(&sysctl_treelock);
471 }
472
473 /*
474 * ********************************************************************
475 * Section 2: The main tree interfaces
476 * ********************************************************************
477 * This is how sysctl_dispatch() does its work, and you can too, by
478 * calling these routines from helpers (though typically only
479 * sysctl_lookup() will be used). The tree MUST BE LOCKED when these
480 * are called.
481 * ********************************************************************
482 */
483
484 /*
485 * sysctl_locate -- Finds the node matching the given mib under the
486 * given tree (via rv). If no tree is given, we fall back to the
487 * native tree. The current process (via l) is used for access
488 * control on the tree (some nodes may be traversable only by root) and
489 * on return, nip will show how many numbers in the mib were consumed.
490 */
491 int
492 sysctl_locate(struct lwp *l, const int *name, u_int namelen,
493 const struct sysctlnode **rnode, int *nip)
494 {
495 const struct sysctlnode *node, *pnode;
496 int tn, si, ni, error, alias;
497
498 KASSERT(rw_lock_held(&sysctl_treelock));
499
500 /*
501 * basic checks and setup
502 */
503 if (*rnode == NULL)
504 *rnode = &sysctl_root;
505 if (nip)
506 *nip = 0;
507 if (namelen == 0)
508 return (0);
509
510 /*
511 * search starts from "root"
512 */
513 pnode = *rnode;
514 if (SYSCTL_VERS(pnode->sysctl_flags) != SYSCTL_VERSION) {
515 printf("sysctl_locate: pnode %p wrong version\n", pnode);
516 return (EINVAL);
517 }
518 node = pnode->sysctl_child;
519 error = 0;
520
521 /*
522 * scan for node to which new node should be attached
523 */
524 for (ni = 0; ni < namelen; ni++) {
525 /*
526 * walked off bottom of tree
527 */
528 if (node == NULL) {
529 if (SYSCTL_TYPE(pnode->sysctl_flags) == CTLTYPE_NODE)
530 error = ENOENT;
531 else
532 error = ENOTDIR;
533 break;
534 }
535 /*
536 * can anyone traverse this node or only root?
537 */
538 if (l != NULL && (pnode->sysctl_flags & CTLFLAG_PRIVATE) &&
539 (error = kauth_authorize_system(l->l_cred,
540 KAUTH_SYSTEM_SYSCTL, KAUTH_REQ_SYSTEM_SYSCTL_PRVT,
541 NULL, NULL, NULL)) != 0)
542 return (error);
543 /*
544 * find a child node with the right number
545 */
546 tn = name[ni];
547 alias = 0;
548
549 si = 0;
550 /*
551 * Note: ANYNUMBER only matches positive integers.
552 * Since ANYNUMBER is only permitted on single-node
553 * sub-trees (eg proc), check before the loop and skip
554 * it if we can.
555 */
556 if ((node[si].sysctl_flags & CTLFLAG_ANYNUMBER) && (tn >= 0))
557 goto foundit;
558 for (; si < pnode->sysctl_clen; si++) {
559 if (node[si].sysctl_num == tn) {
560 if (node[si].sysctl_flags & CTLFLAG_ALIAS) {
561 if (alias++ == 4)
562 break;
563 else {
564 tn = node[si].sysctl_alias;
565 si = -1;
566 }
567 } else
568 goto foundit;
569 }
570 }
571 /*
572 * if we ran off the end, it obviously doesn't exist
573 */
574 error = ENOENT;
575 break;
576
577 /*
578 * so far so good, move on down the line
579 */
580 foundit:
581 pnode = &node[si];
582 if (SYSCTL_TYPE(pnode->sysctl_flags) == CTLTYPE_NODE)
583 node = node[si].sysctl_child;
584 else
585 node = NULL;
586 }
587
588 *rnode = pnode;
589 if (nip)
590 *nip = ni;
591
592 return (error);
593 }
594
595 /*
596 * sysctl_query -- The auto-discovery engine. Copies out the structs
597 * describing nodes under the given node and handles overlay trees.
598 */
599 int
600 sysctl_query(SYSCTLFN_ARGS)
601 {
602 int error, ni, elim, v;
603 size_t out, left, t;
604 const struct sysctlnode *enode, *onode;
605 struct sysctlnode qnode;
606
607 KASSERT(rw_lock_held(&sysctl_treelock));
608
609 if (SYSCTL_VERS(rnode->sysctl_flags) != SYSCTL_VERSION) {
610 printf("sysctl_query: rnode %p wrong version\n", rnode);
611 return (EINVAL);
612 }
613
614 if (SYSCTL_TYPE(rnode->sysctl_flags) != CTLTYPE_NODE)
615 return (ENOTDIR);
616 if (namelen != 1 || name[0] != CTL_QUERY)
617 return (EINVAL);
618
619 error = 0;
620 out = 0;
621 left = *oldlenp;
622 elim = 0;
623 enode = NULL;
624
625 /*
626 * translate the given request to a current node
627 */
628 error = sysctl_cvt_in(l, &v, newp, newlen, &qnode);
629 if (error)
630 return (error);
631
632 /*
633 * if the request specifies a version, check it
634 */
635 if (qnode.sysctl_ver != 0) {
636 enode = rnode;
637 if (qnode.sysctl_ver != enode->sysctl_ver &&
638 qnode.sysctl_ver != sysctl_rootof(enode)->sysctl_ver)
639 return (EINVAL);
640 }
641
642 /*
643 * process has overlay tree
644 */
645 if (l && l->l_proc->p_emul->e_sysctlovly) {
646 enode = l->l_proc->p_emul->e_sysctlovly;
647 elim = (name - oname);
648 error = sysctl_locate(l, oname, elim, &enode, NULL);
649 if (error == 0) {
650 /* ah, found parent in overlay */
651 elim = enode->sysctl_clen;
652 enode = enode->sysctl_child;
653 } else {
654 error = 0;
655 elim = 0;
656 enode = NULL;
657 }
658 }
659
660 for (ni = 0; ni < rnode->sysctl_clen; ni++) {
661 onode = &rnode->sysctl_child[ni];
662 if (enode && enode->sysctl_num == onode->sysctl_num) {
663 if (SYSCTL_TYPE(enode->sysctl_flags) != CTLTYPE_NODE)
664 onode = enode;
665 if (--elim > 0)
666 enode++;
667 else
668 enode = NULL;
669 }
670 error = sysctl_cvt_out(l, v, onode, oldp, left, &t);
671 if (error)
672 return (error);
673 if (oldp != NULL)
674 oldp = (char*)oldp + t;
675 out += t;
676 left -= MIN(left, t);
677 }
678
679 /*
680 * overlay trees *MUST* be entirely consumed
681 */
682 KASSERT(enode == NULL);
683
684 *oldlenp = out;
685
686 return (error);
687 }
688
689 /*
690 * sysctl_create -- Adds a node (the description of which is taken
691 * from newp) to the tree, returning a copy of it in the space pointed
692 * to by oldp. In the event that the requested slot is already taken
693 * (either by name or by number), the offending node is returned
694 * instead. Yes, this is complex, but we want to make sure everything
695 * is proper.
696 */
697 #ifdef SYSCTL_DEBUG_CREATE
698 int _sysctl_create(SYSCTLFN_ARGS);
699 int
700 _sysctl_create(SYSCTLFN_ARGS)
701 #else
702 int
703 sysctl_create(SYSCTLFN_ARGS)
704 #endif
705 {
706 struct sysctlnode nnode, *node, *pnode;
707 int error, ni, at, nm, type, nsz, sz, flags, anum, v;
708 void *own;
709
710 KASSERT(rw_write_held(&sysctl_treelock));
711
712 error = 0;
713 own = NULL;
714 anum = -1;
715
716 if (SYSCTL_VERS(rnode->sysctl_flags) != SYSCTL_VERSION) {
717 printf("sysctl_create: rnode %p wrong version\n", rnode);
718 return (EINVAL);
719 }
720
721 if (namelen != 1 || (name[namelen - 1] != CTL_CREATE
722 #if NKSYMS > 0
723 && name[namelen - 1] != CTL_CREATESYM
724 #endif /* NKSYMS > 0 */
725 ))
726 return (EINVAL);
727
728 /*
729 * processes can only add nodes at securelevel 0, must be
730 * root, and can't add nodes to a parent that's not writeable
731 */
732 if (l != NULL) {
733 #ifndef SYSCTL_DISALLOW_CREATE
734 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_SYSCTL,
735 KAUTH_REQ_SYSTEM_SYSCTL_ADD, NULL, NULL, NULL);
736 if (error)
737 return (error);
738 if (!(rnode->sysctl_flags & CTLFLAG_READWRITE))
739 #endif /* SYSCTL_DISALLOW_CREATE */
740 return (EPERM);
741 }
742
743 /*
744 * nothing can add a node if:
745 * we've finished initial set up of this tree and
746 * (the tree itself is not writeable or
747 * the entire sysctl system is not writeable)
748 */
749 if ((sysctl_rootof(rnode)->sysctl_flags & CTLFLAG_PERMANENT) &&
750 (!(sysctl_rootof(rnode)->sysctl_flags & CTLFLAG_READWRITE) ||
751 !(sysctl_root.sysctl_flags & CTLFLAG_READWRITE)))
752 return (EPERM);
753
754 /*
755 * it must be a "node", not a "int" or something
756 */
757 if (SYSCTL_TYPE(rnode->sysctl_flags) != CTLTYPE_NODE)
758 return (ENOTDIR);
759 if (rnode->sysctl_flags & CTLFLAG_ALIAS) {
760 printf("sysctl_create: attempt to add node to aliased "
761 "node %p\n", rnode);
762 return (EINVAL);
763 }
764 pnode = __UNCONST(rnode); /* we are adding children to this node */
765
766 if (newp == NULL)
767 return (EINVAL);
768 error = sysctl_cvt_in(l, &v, newp, newlen, &nnode);
769 if (error)
770 return (error);
771
772 /*
773 * nodes passed in don't *have* parents
774 */
775 if (nnode.sysctl_parent != NULL)
776 return (EINVAL);
777
778 /*
779 * if we are indeed adding it, it should be a "good" name and
780 * number
781 */
782 nm = nnode.sysctl_num;
783 #if NKSYMS > 0
784 if (nm == CTL_CREATESYM)
785 nm = CTL_CREATE;
786 #endif /* NKSYMS > 0 */
787 if (nm < 0 && nm != CTL_CREATE)
788 return (EINVAL);
789
790 /*
791 * the name can't start with a digit
792 */
793 if (nnode.sysctl_name[0] >= '0' &&
794 nnode.sysctl_name[0] <= '9')
795 return (EINVAL);
796
797 /*
798 * the name must be only alphanumerics or - or _, longer than
799 * 0 bytes and less that SYSCTL_NAMELEN
800 */
801 nsz = 0;
802 while (nsz < SYSCTL_NAMELEN && nnode.sysctl_name[nsz] != '\0') {
803 if ((nnode.sysctl_name[nsz] >= '0' &&
804 nnode.sysctl_name[nsz] <= '9') ||
805 (nnode.sysctl_name[nsz] >= 'A' &&
806 nnode.sysctl_name[nsz] <= 'Z') ||
807 (nnode.sysctl_name[nsz] >= 'a' &&
808 nnode.sysctl_name[nsz] <= 'z') ||
809 nnode.sysctl_name[nsz] == '-' ||
810 nnode.sysctl_name[nsz] == '_')
811 nsz++;
812 else
813 return (EINVAL);
814 }
815 if (nsz == 0 || nsz == SYSCTL_NAMELEN)
816 return (EINVAL);
817
818 /*
819 * various checks revolve around size vs type, etc
820 */
821 type = SYSCTL_TYPE(nnode.sysctl_flags);
822 flags = SYSCTL_FLAGS(nnode.sysctl_flags);
823 sz = nnode.sysctl_size;
824
825 /*
826 * find out if there's a collision, and if so, let the caller
827 * know what they collided with
828 */
829 node = pnode->sysctl_child;
830 at = 0;
831 if (node) {
832 if ((flags | node->sysctl_flags) & CTLFLAG_ANYNUMBER)
833 /* No siblings for a CTLFLAG_ANYNUMBER node */
834 return EINVAL;
835 for (ni = 0; ni < pnode->sysctl_clen; ni++) {
836 if (nm == node[ni].sysctl_num ||
837 strcmp(nnode.sysctl_name, node[ni].sysctl_name) == 0) {
838 /*
839 * ignore error here, since we
840 * are already fixed on EEXIST
841 */
842 (void)sysctl_cvt_out(l, v, &node[ni], oldp,
843 *oldlenp, oldlenp);
844 return (EEXIST);
845 }
846 if (nm > node[ni].sysctl_num)
847 at++;
848 }
849 }
850
851 /*
852 * use sysctl_ver to add to the tree iff it hasn't changed
853 */
854 if (nnode.sysctl_ver != 0) {
855 /*
856 * a specified value must match either the parent
857 * node's version or the root node's version
858 */
859 if (nnode.sysctl_ver != sysctl_rootof(rnode)->sysctl_ver &&
860 nnode.sysctl_ver != rnode->sysctl_ver) {
861 return (EINVAL);
862 }
863 }
864
865 /*
866 * only the kernel can assign functions to entries
867 */
868 if (l != NULL && nnode.sysctl_func != NULL)
869 return (EPERM);
870
871 /*
872 * only the kernel can create permanent entries, and only then
873 * before the kernel is finished setting itself up
874 */
875 if (l != NULL && (flags & ~SYSCTL_USERFLAGS))
876 return (EPERM);
877 if ((flags & CTLFLAG_PERMANENT) &
878 (sysctl_root.sysctl_flags & CTLFLAG_PERMANENT))
879 return (EPERM);
880 if ((flags & (CTLFLAG_OWNDATA | CTLFLAG_IMMEDIATE)) ==
881 (CTLFLAG_OWNDATA | CTLFLAG_IMMEDIATE))
882 return (EINVAL);
883 if ((flags & CTLFLAG_IMMEDIATE) &&
884 type != CTLTYPE_INT && type != CTLTYPE_QUAD && type != CTLTYPE_BOOL)
885 return (EINVAL);
886
887 /*
888 * check size, or set it if unset and we can figure it out.
889 * kernel created nodes are allowed to have a function instead
890 * of a size (or a data pointer).
891 */
892 switch (type) {
893 case CTLTYPE_NODE:
894 /*
895 * only *i* can assert the size of a node
896 */
897 if (flags & CTLFLAG_ALIAS) {
898 anum = nnode.sysctl_alias;
899 if (anum < 0)
900 return (EINVAL);
901 nnode.sysctl_alias = 0;
902 }
903 if (sz != 0 || nnode.sysctl_data != NULL)
904 return (EINVAL);
905 if (nnode.sysctl_csize != 0 ||
906 nnode.sysctl_clen != 0 ||
907 nnode.sysctl_child != 0)
908 return (EINVAL);
909 if (flags & CTLFLAG_OWNDATA)
910 return (EINVAL);
911 sz = sizeof(struct sysctlnode);
912 break;
913 case CTLTYPE_INT:
914 /*
915 * since an int is an int, if the size is not given or
916 * is wrong, we can "int-uit" it.
917 */
918 if (sz != 0 && sz != sizeof(int))
919 return (EINVAL);
920 sz = sizeof(int);
921 break;
922 case CTLTYPE_STRING:
923 /*
924 * strings are a little more tricky
925 */
926 if (sz == 0) {
927 if (l == NULL) {
928 if (nnode.sysctl_func == NULL) {
929 if (nnode.sysctl_data == NULL)
930 return (EINVAL);
931 else
932 sz = strlen(nnode.sysctl_data) +
933 1;
934 }
935 } else if (nnode.sysctl_data == NULL &&
936 flags & CTLFLAG_OWNDATA) {
937 return (EINVAL);
938 } else {
939 char *vp, *e;
940 size_t s;
941
942 /*
943 * we want a rough idea of what the
944 * size is now
945 */
946 vp = malloc(PAGE_SIZE, M_SYSCTLDATA,
947 M_WAITOK|M_CANFAIL);
948 if (vp == NULL)
949 return (ENOMEM);
950 e = nnode.sysctl_data;
951 do {
952 error = copyinstr(e, vp, PAGE_SIZE, &s);
953 if (error) {
954 if (error != ENAMETOOLONG) {
955 free(vp, M_SYSCTLDATA);
956 return (error);
957 }
958 e += PAGE_SIZE;
959 if ((e - 32 * PAGE_SIZE) >
960 (char*)nnode.sysctl_data) {
961 free(vp, M_SYSCTLDATA);
962 return (ERANGE);
963 }
964 }
965 } while (error != 0);
966 sz = s + (e - (char*)nnode.sysctl_data);
967 free(vp, M_SYSCTLDATA);
968 }
969 }
970 break;
971 case CTLTYPE_QUAD:
972 if (sz != 0 && sz != sizeof(u_quad_t))
973 return (EINVAL);
974 sz = sizeof(u_quad_t);
975 break;
976 case CTLTYPE_BOOL:
977 /*
978 * since an bool is an bool, if the size is not given or
979 * is wrong, we can "intuit" it.
980 */
981 if (sz != 0 && sz != sizeof(bool))
982 return (EINVAL);
983 sz = sizeof(bool);
984 break;
985 case CTLTYPE_STRUCT:
986 if (sz == 0) {
987 if (l != NULL || nnode.sysctl_func == NULL)
988 return (EINVAL);
989 if (flags & CTLFLAG_OWNDATA)
990 return (EINVAL);
991 }
992 break;
993 default:
994 return (EINVAL);
995 }
996
997 /*
998 * at this point, if sz is zero, we *must* have a
999 * function to go with it and we can't own it.
1000 */
1001
1002 /*
1003 * l ptr own
1004 * 0 0 0 -> EINVAL (if no func)
1005 * 0 0 1 -> own
1006 * 0 1 0 -> kptr
1007 * 0 1 1 -> kptr
1008 * 1 0 0 -> EINVAL
1009 * 1 0 1 -> own
1010 * 1 1 0 -> kptr, no own (fault on lookup)
1011 * 1 1 1 -> uptr, own
1012 */
1013 if (type != CTLTYPE_NODE) {
1014 if (sz != 0) {
1015 if (flags & CTLFLAG_OWNDATA) {
1016 own = malloc(sz, M_SYSCTLDATA,
1017 M_WAITOK|M_CANFAIL);
1018 if (own == NULL)
1019 return ENOMEM;
1020 if (nnode.sysctl_data == NULL)
1021 memset(own, 0, sz);
1022 else {
1023 error = sysctl_copyin(l,
1024 nnode.sysctl_data, own, sz);
1025 if (error != 0) {
1026 free(own, M_SYSCTLDATA);
1027 return (error);
1028 }
1029 }
1030 } else if ((nnode.sysctl_data != NULL) &&
1031 !(flags & CTLFLAG_IMMEDIATE)) {
1032 #if NKSYMS > 0
1033 if (name[namelen - 1] == CTL_CREATESYM) {
1034 char symname[128]; /* XXX enough? */
1035 u_long symaddr;
1036 size_t symlen;
1037
1038 error = sysctl_copyinstr(l,
1039 nnode.sysctl_data, symname,
1040 sizeof(symname), &symlen);
1041 if (error)
1042 return (error);
1043 error = ksyms_getval(NULL, symname,
1044 &symaddr, KSYMS_EXTERN);
1045 if (error)
1046 return (error); /* EINVAL? */
1047 nnode.sysctl_data = (void*)symaddr;
1048 }
1049 #endif /* NKSYMS > 0 */
1050 /*
1051 * Ideally, we'd like to verify here
1052 * that this address is acceptable,
1053 * but...
1054 *
1055 * - it might be valid now, only to
1056 * become invalid later
1057 *
1058 * - it might be invalid only for the
1059 * moment and valid later
1060 *
1061 * - or something else.
1062 *
1063 * Since we can't get a good answer,
1064 * we'll just accept the address as
1065 * given, and fault on individual
1066 * lookups.
1067 */
1068 }
1069 } else if (nnode.sysctl_func == NULL)
1070 return (EINVAL);
1071 }
1072
1073 /*
1074 * a process can't assign a function to a node, and the kernel
1075 * can't create a node that has no function or data.
1076 * (XXX somewhat redundant check)
1077 */
1078 if (l != NULL || nnode.sysctl_func == NULL) {
1079 if (type != CTLTYPE_NODE &&
1080 nnode.sysctl_data == NULL &&
1081 !(flags & CTLFLAG_IMMEDIATE) &&
1082 own == NULL)
1083 return (EINVAL);
1084 }
1085
1086 #ifdef SYSCTL_DISALLOW_KWRITE
1087 /*
1088 * a process can't create a writable node unless it refers to
1089 * new data.
1090 */
1091 if (l != NULL && own == NULL && type != CTLTYPE_NODE &&
1092 (flags & CTLFLAG_READWRITE) != CTLFLAG_READONLY &&
1093 !(flags & CTLFLAG_IMMEDIATE))
1094 return (EPERM);
1095 #endif /* SYSCTL_DISALLOW_KWRITE */
1096
1097 /*
1098 * make sure there's somewhere to put the new stuff.
1099 */
1100 if (pnode->sysctl_child == NULL) {
1101 if (flags & CTLFLAG_ANYNUMBER)
1102 error = sysctl_alloc(pnode, 1);
1103 else
1104 error = sysctl_alloc(pnode, 0);
1105 if (error) {
1106 if (own != NULL)
1107 free(own, M_SYSCTLDATA);
1108 return (error);
1109 }
1110 }
1111 node = pnode->sysctl_child;
1112
1113 /*
1114 * no collisions, so pick a good dynamic number if we need to.
1115 */
1116 if (nm == CTL_CREATE) {
1117 nm = ++sysctl_root.sysctl_num;
1118 for (ni = 0; ni < pnode->sysctl_clen; ni++) {
1119 if (nm == node[ni].sysctl_num) {
1120 nm++;
1121 ni = -1;
1122 } else if (nm > node[ni].sysctl_num)
1123 at = ni + 1;
1124 }
1125 }
1126
1127 /*
1128 * oops...ran out of space
1129 */
1130 if (pnode->sysctl_clen == pnode->sysctl_csize) {
1131 error = sysctl_realloc(pnode);
1132 if (error) {
1133 if (own != NULL)
1134 free(own, M_SYSCTLDATA);
1135 return (error);
1136 }
1137 node = pnode->sysctl_child;
1138 }
1139
1140 /*
1141 * insert new node data
1142 */
1143 if (at < pnode->sysctl_clen) {
1144 int t;
1145
1146 /*
1147 * move the nodes that should come after the new one
1148 */
1149 memmove(&node[at + 1], &node[at],
1150 (pnode->sysctl_clen - at) * sizeof(struct sysctlnode));
1151 memset(&node[at], 0, sizeof(struct sysctlnode));
1152 node[at].sysctl_parent = pnode;
1153 /*
1154 * and...reparent any children of any moved nodes
1155 */
1156 for (ni = at; ni <= pnode->sysctl_clen; ni++)
1157 if (node[ni].sysctl_child != NULL)
1158 for (t = 0; t < node[ni].sysctl_csize; t++)
1159 node[ni].sysctl_child[t].sysctl_parent =
1160 &node[ni];
1161 }
1162 node = &node[at];
1163 pnode->sysctl_clen++;
1164
1165 strlcpy(node->sysctl_name, nnode.sysctl_name,
1166 sizeof(node->sysctl_name));
1167 node->sysctl_num = nm;
1168 node->sysctl_size = sz;
1169 node->sysctl_flags = SYSCTL_VERSION|type|flags; /* XXX other trees */
1170 node->sysctl_csize = 0;
1171 node->sysctl_clen = 0;
1172 if (own) {
1173 node->sysctl_data = own;
1174 node->sysctl_flags |= CTLFLAG_OWNDATA;
1175 } else if (flags & CTLFLAG_ALIAS) {
1176 node->sysctl_alias = anum;
1177 } else if (flags & CTLFLAG_IMMEDIATE) {
1178 switch (type) {
1179 case CTLTYPE_BOOL:
1180 node->sysctl_bdata = nnode.sysctl_bdata;
1181 break;
1182 case CTLTYPE_INT:
1183 node->sysctl_idata = nnode.sysctl_idata;
1184 break;
1185 case CTLTYPE_QUAD:
1186 node->sysctl_qdata = nnode.sysctl_qdata;
1187 break;
1188 }
1189 } else {
1190 node->sysctl_data = nnode.sysctl_data;
1191 node->sysctl_flags &= ~CTLFLAG_OWNDATA;
1192 }
1193 node->sysctl_func = nnode.sysctl_func;
1194 node->sysctl_child = NULL;
1195 /* node->sysctl_parent should already be done */
1196
1197 /*
1198 * update "version" on path to "root"
1199 */
1200 for (; rnode->sysctl_parent != NULL; rnode = rnode->sysctl_parent)
1201 ;
1202 pnode = node;
1203 for (nm = rnode->sysctl_ver + 1; pnode != NULL;
1204 pnode = pnode->sysctl_parent)
1205 pnode->sysctl_ver = nm;
1206
1207 /* If this fails, the node is already added - the user won't know! */
1208 error = sysctl_cvt_out(l, v, node, oldp, *oldlenp, oldlenp);
1209
1210 return (error);
1211 }
1212
1213 /*
1214 * ********************************************************************
1215 * A wrapper around sysctl_create() that prints the thing we're trying
1216 * to add.
1217 * ********************************************************************
1218 */
1219 #ifdef SYSCTL_DEBUG_CREATE
1220 int
1221 sysctl_create(SYSCTLFN_ARGS)
1222 {
1223 const struct sysctlnode *node;
1224 int k, rc, ni, nl = namelen + (name - oname);
1225
1226 node = newp;
1227
1228 printf("namelen %d (", nl);
1229 for (ni = 0; ni < nl - 1; ni++)
1230 printf(" %d", oname[ni]);
1231 printf(" %d )\t[%s]\tflags %08x (%08x %d %zu)\n",
1232 k = node->sysctl_num,
1233 node->sysctl_name,
1234 node->sysctl_flags,
1235 SYSCTL_FLAGS(node->sysctl_flags),
1236 SYSCTL_TYPE(node->sysctl_flags),
1237 node->sysctl_size);
1238
1239 node = rnode;
1240 rc = _sysctl_create(SYSCTLFN_CALL(rnode));
1241
1242 printf("sysctl_create(");
1243 for (ni = 0; ni < nl - 1; ni++)
1244 printf(" %d", oname[ni]);
1245 printf(" %d ) returned %d\n", k, rc);
1246
1247 return (rc);
1248 }
1249 #endif /* SYSCTL_DEBUG_CREATE */
1250
1251 /*
1252 * sysctl_destroy -- Removes a node (as described by newp) from the
1253 * given tree, returning (if successful) a copy of the dead node in
1254 * oldp. Since we're removing stuff, there's not much to check.
1255 */
1256 int
1257 sysctl_destroy(SYSCTLFN_ARGS)
1258 {
1259 struct sysctlnode *node, *pnode, onode, nnode;
1260 int ni, error, v;
1261
1262 KASSERT(rw_write_held(&sysctl_treelock));
1263
1264 if (SYSCTL_VERS(rnode->sysctl_flags) != SYSCTL_VERSION) {
1265 printf("sysctl_destroy: rnode %p wrong version\n", rnode);
1266 return (EINVAL);
1267 }
1268
1269 error = 0;
1270
1271 if (namelen != 1 || name[namelen - 1] != CTL_DESTROY)
1272 return (EINVAL);
1273
1274 /*
1275 * processes can only destroy nodes at securelevel 0, must be
1276 * root, and can't remove nodes from a parent that's not
1277 * writeable
1278 */
1279 if (l != NULL) {
1280 #ifndef SYSCTL_DISALLOW_CREATE
1281 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_SYSCTL,
1282 KAUTH_REQ_SYSTEM_SYSCTL_DELETE, NULL, NULL, NULL);
1283 if (error)
1284 return (error);
1285 if (!(rnode->sysctl_flags & CTLFLAG_READWRITE))
1286 #endif /* SYSCTL_DISALLOW_CREATE */
1287 return (EPERM);
1288 }
1289
1290 /*
1291 * nothing can remove a node if:
1292 * the node is permanent (checked later) or
1293 * the tree itself is not writeable or
1294 * the entire sysctl system is not writeable
1295 *
1296 * note that we ignore whether setup is complete or not,
1297 * because these rules always apply.
1298 */
1299 if (!(sysctl_rootof(rnode)->sysctl_flags & CTLFLAG_READWRITE) ||
1300 !(sysctl_root.sysctl_flags & CTLFLAG_READWRITE))
1301 return (EPERM);
1302
1303 if (newp == NULL)
1304 return (EINVAL);
1305 error = sysctl_cvt_in(l, &v, newp, newlen, &nnode);
1306 if (error)
1307 return (error);
1308 memset(&onode, 0, sizeof(struct sysctlnode));
1309
1310 node = rnode->sysctl_child;
1311 for (ni = 0; ni < rnode->sysctl_clen; ni++) {
1312 if (nnode.sysctl_num == node[ni].sysctl_num) {
1313 /*
1314 * if name specified, must match
1315 */
1316 if (nnode.sysctl_name[0] != '\0' &&
1317 strcmp(nnode.sysctl_name, node[ni].sysctl_name))
1318 continue;
1319 /*
1320 * if version specified, must match
1321 */
1322 if (nnode.sysctl_ver != 0 &&
1323 nnode.sysctl_ver != node[ni].sysctl_ver)
1324 continue;
1325 /*
1326 * this must be the one
1327 */
1328 break;
1329 }
1330 }
1331 if (ni == rnode->sysctl_clen)
1332 return (ENOENT);
1333 node = &node[ni];
1334 pnode = node->sysctl_parent;
1335
1336 /*
1337 * if the kernel says permanent, it is, so there. nyah.
1338 */
1339 if (SYSCTL_FLAGS(node->sysctl_flags) & CTLFLAG_PERMANENT)
1340 return (EPERM);
1341
1342 /*
1343 * can't delete non-empty nodes
1344 */
1345 if (SYSCTL_TYPE(node->sysctl_flags) == CTLTYPE_NODE &&
1346 node->sysctl_clen != 0)
1347 return (ENOTEMPTY);
1348
1349 /*
1350 * if the node "owns" data, release it now
1351 */
1352 if (node->sysctl_flags & CTLFLAG_OWNDATA) {
1353 if (node->sysctl_data != NULL)
1354 free(node->sysctl_data, M_SYSCTLDATA);
1355 node->sysctl_data = NULL;
1356 }
1357 if (node->sysctl_flags & CTLFLAG_OWNDESC) {
1358 if (node->sysctl_desc != NULL)
1359 /*XXXUNCONST*/
1360 free(__UNCONST(node->sysctl_desc), M_SYSCTLDATA);
1361 node->sysctl_desc = NULL;
1362 }
1363
1364 /*
1365 * if the node to be removed is not the last one on the list,
1366 * move the remaining nodes up, and reparent any grandchildren
1367 */
1368 onode = *node;
1369 if (ni < pnode->sysctl_clen - 1) {
1370 int t;
1371
1372 memmove(&pnode->sysctl_child[ni], &pnode->sysctl_child[ni + 1],
1373 (pnode->sysctl_clen - ni - 1) *
1374 sizeof(struct sysctlnode));
1375 for (; ni < pnode->sysctl_clen - 1; ni++)
1376 if (SYSCTL_TYPE(pnode->sysctl_child[ni].sysctl_flags) ==
1377 CTLTYPE_NODE)
1378 for (t = 0;
1379 t < pnode->sysctl_child[ni].sysctl_clen;
1380 t++)
1381 pnode->sysctl_child[ni].sysctl_child[t].
1382 sysctl_parent =
1383 &pnode->sysctl_child[ni];
1384 ni = pnode->sysctl_clen - 1;
1385 node = &pnode->sysctl_child[ni];
1386 }
1387
1388 /*
1389 * reset the space we just vacated
1390 */
1391 memset(node, 0, sizeof(struct sysctlnode));
1392 node->sysctl_parent = pnode;
1393 pnode->sysctl_clen--;
1394
1395 /*
1396 * if this parent just lost its last child, nuke the creche
1397 */
1398 if (pnode->sysctl_clen == 0) {
1399 free(pnode->sysctl_child, M_SYSCTLNODE);
1400 pnode->sysctl_csize = 0;
1401 pnode->sysctl_child = NULL;
1402 }
1403
1404 /*
1405 * update "version" on path to "root"
1406 */
1407 for (; rnode->sysctl_parent != NULL; rnode = rnode->sysctl_parent)
1408 ;
1409 for (ni = rnode->sysctl_ver + 1; pnode != NULL;
1410 pnode = pnode->sysctl_parent)
1411 pnode->sysctl_ver = ni;
1412
1413 error = sysctl_cvt_out(l, v, &onode, oldp, *oldlenp, oldlenp);
1414
1415 return (error);
1416 }
1417
1418 /*
1419 * sysctl_lookup -- Handles copyin/copyout of new and old values.
1420 * Partial reads are globally allowed. Only root can write to things
1421 * unless the node says otherwise.
1422 */
1423 int
1424 sysctl_lookup(SYSCTLFN_ARGS)
1425 {
1426 int error, rw;
1427 size_t sz, len;
1428 void *d;
1429
1430 KASSERT(rw_lock_held(&sysctl_treelock));
1431
1432 if (SYSCTL_VERS(rnode->sysctl_flags) != SYSCTL_VERSION) {
1433 printf("%s: rnode %p wrong version\n", __func__, rnode);
1434 return EINVAL;
1435 }
1436
1437 if (newlen == 0)
1438 newp = NULL;
1439
1440 error = 0;
1441
1442 /*
1443 * you can't "look up" a node. you can "query" it, but you
1444 * can't "look it up".
1445 */
1446 if (SYSCTL_TYPE(rnode->sysctl_flags) == CTLTYPE_NODE || namelen != 0) {
1447 DPRINTF(("%s: can't lookup a node\n", __func__));
1448 return EINVAL;
1449 }
1450
1451 /*
1452 * some nodes are private, so only root can look into them.
1453 */
1454 if (l != NULL && (rnode->sysctl_flags & CTLFLAG_PRIVATE) &&
1455 (error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_SYSCTL,
1456 KAUTH_REQ_SYSTEM_SYSCTL_PRVT, NULL, NULL, NULL)) != 0) {
1457 DPRINTF(("%s: private node\n", __func__));
1458 return error;
1459 }
1460
1461 /*
1462 * if a node wants to be writable according to different rules
1463 * other than "only root can write to stuff unless a flag is
1464 * set", then it needs its own function which should have been
1465 * called and not us.
1466 */
1467 if (l != NULL && newp != NULL &&
1468 !(rnode->sysctl_flags & CTLFLAG_ANYWRITE) &&
1469 (error = kauth_authorize_system(l->l_cred,
1470 KAUTH_SYSTEM_SYSCTL, KAUTH_REQ_SYSTEM_SYSCTL_MODIFY, NULL, NULL,
1471 NULL)) != 0) {
1472 DPRINTF(("%s: can't modify\n", __func__));
1473 return error;
1474 }
1475
1476 /*
1477 * is this node supposedly writable?
1478 */
1479 rw = (rnode->sysctl_flags & CTLFLAG_READWRITE) ? 1 : 0;
1480
1481 /*
1482 * it appears not to be writable at this time, so if someone
1483 * tried to write to it, we must tell them to go away
1484 */
1485 if (!rw && newp != NULL) {
1486 DPRINTF(("%s: not writable\n", __func__));
1487 return EPERM;
1488 }
1489
1490 /*
1491 * step one, copy out the stuff we have presently
1492 */
1493 if (rnode->sysctl_flags & CTLFLAG_IMMEDIATE) {
1494 /*
1495 * note that we discard const here because we are
1496 * modifying the contents of the node (which is okay
1497 * because it's ours)
1498 *
1499 * It also doesn't matter which field of the union we pick.
1500 */
1501 d = __UNCONST(&rnode->sysctl_qdata);
1502 } else
1503 d = rnode->sysctl_data;
1504
1505 if (SYSCTL_TYPE(rnode->sysctl_flags) == CTLTYPE_STRING)
1506 sz = strlen(d) + 1; /* XXX@@@ possible fault here */
1507 else
1508 sz = rnode->sysctl_size;
1509 if (oldp != NULL) {
1510 error = sysctl_copyout(l, d, oldp, MIN(sz, *oldlenp));
1511 if (error) {
1512 DPRINTF(("%s: bad copyout %d\n", __func__, error));
1513 return error;
1514 }
1515 }
1516 *oldlenp = sz;
1517
1518 /*
1519 * are we done?
1520 */
1521 if (newp == NULL)
1522 return 0;
1523
1524 /*
1525 * hmm...not done. must now "copy in" new value. re-adjust
1526 * sz to maximum value (strings are "weird").
1527 */
1528 sz = rnode->sysctl_size;
1529 switch (SYSCTL_TYPE(rnode->sysctl_flags)) {
1530 case CTLTYPE_BOOL: {
1531 bool tmp;
1532 /*
1533 * these data must be *exactly* the same size coming
1534 * in. bool may only be true or false.
1535 */
1536 if (newlen != sz) {
1537 DPRINTF(("%s: bad size %zu != %zu\n", __func__, newlen,
1538 sz));
1539 return EINVAL;
1540 }
1541 error = sysctl_copyin(l, newp, &tmp, sz);
1542 if (error)
1543 break;
1544 if (tmp != true && tmp != false) {
1545 DPRINTF(("%s: tmp %d\n", __func__, tmp));
1546 return EINVAL;
1547 }
1548 *(bool *)d = tmp;
1549 break;
1550 }
1551 case CTLTYPE_INT:
1552 case CTLTYPE_QUAD:
1553 case CTLTYPE_STRUCT:
1554 /*
1555 * these data must be *exactly* the same size coming
1556 * in.
1557 */
1558 if (newlen != sz)
1559 goto bad_size;
1560 error = sysctl_copyin(l, newp, d, sz);
1561 rnd_add_data(NULL, d, sz, 0);
1562 break;
1563 case CTLTYPE_STRING: {
1564 /*
1565 * strings, on the other hand, can be shorter, and we
1566 * let userland be sloppy about the trailing nul.
1567 */
1568 char *newbuf;
1569
1570 /*
1571 * too much new string?
1572 */
1573 if (newlen > sz)
1574 goto bad_size;
1575
1576 /*
1577 * temporary copy of new inbound string
1578 */
1579 len = MIN(sz, newlen);
1580 newbuf = malloc(len, M_SYSCTLDATA, M_WAITOK|M_CANFAIL);
1581 if (newbuf == NULL) {
1582 DPRINTF(("%s: oomem %zu\n", __func__, len));
1583 return ENOMEM;
1584 }
1585 error = sysctl_copyin(l, newp, newbuf, len);
1586 if (error) {
1587 free(newbuf, M_SYSCTLDATA);
1588 DPRINTF(("%s: copyin %d\n", __func__, error));
1589 return error;
1590 }
1591
1592 /*
1593 * did they NUL terminate it, or do we have space
1594 * left to do it ourselves?
1595 */
1596 if (newbuf[len - 1] != '\0' && len == sz) {
1597 free(newbuf, M_SYSCTLDATA);
1598 DPRINTF(("%s: string too long\n", __func__));
1599 return EINVAL;
1600 }
1601
1602 /*
1603 * looks good, so pop it into place and zero the rest.
1604 */
1605 if (len > 0) {
1606 memcpy(d, newbuf, len);
1607 rnd_add_data(NULL, d, len, 0);
1608 }
1609 if (sz != len)
1610 memset((char*)d + len, 0, sz - len);
1611 free(newbuf, M_SYSCTLDATA);
1612 break;
1613 }
1614 default:
1615 DPRINTF(("%s: bad type\n", __func__));
1616 return EINVAL;
1617 }
1618 if (error) {
1619 DPRINTF(("%s: copyin %d\n", __func__, error));
1620 }
1621
1622 return error;
1623
1624 bad_size:
1625 DPRINTF(("%s: bad size %zu > %zu\n", __func__, newlen, sz));
1626 return EINVAL;
1627 }
1628
1629 /*
1630 * sysctl_mmap -- Dispatches sysctl mmap requests to those nodes that
1631 * purport to handle it. This interface isn't fully fleshed out yet,
1632 * unfortunately.
1633 */
1634 static int
1635 sysctl_mmap(SYSCTLFN_ARGS)
1636 {
1637 const struct sysctlnode *node;
1638 struct sysctlnode nnode;
1639 int error;
1640 int sysctl_num;
1641
1642 if (SYSCTL_VERS(rnode->sysctl_flags) != SYSCTL_VERSION) {
1643 printf("sysctl_mmap: rnode %p wrong version\n", rnode);
1644 return (EINVAL);
1645 }
1646
1647 /*
1648 * let's just pretend that didn't happen, m'kay?
1649 */
1650 if (l == NULL)
1651 return (EPERM);
1652
1653 /*
1654 * is this a sysctlnode description of an mmap request?
1655 */
1656 if (newp == NULL || newlen != sizeof(struct sysctlnode))
1657 return (EINVAL);
1658 error = sysctl_copyin(l, newp, &nnode, sizeof(nnode));
1659 if (error)
1660 return (error);
1661
1662 /*
1663 * does the node they asked for exist?
1664 */
1665 if (namelen != 1)
1666 return (EOPNOTSUPP);
1667 node = rnode;
1668 sysctl_num = nnode.sysctl_num;
1669 error = sysctl_locate(l, &sysctl_num, 1, &node, NULL);
1670 if (error)
1671 return (error);
1672
1673 /*
1674 * does this node that we have found purport to handle mmap?
1675 */
1676 if (node->sysctl_func == NULL ||
1677 !(node->sysctl_flags & CTLFLAG_MMAP))
1678 return (EOPNOTSUPP);
1679
1680 /*
1681 * well...okay, they asked for it.
1682 */
1683 return ((*node->sysctl_func)(SYSCTLFN_CALL(node)));
1684 }
1685
1686 int
1687 sysctl_describe(SYSCTLFN_ARGS)
1688 {
1689 struct sysctldesc *d;
1690 void *bf;
1691 size_t sz, left, tot;
1692 int i, error, v = -1;
1693 struct sysctlnode *node;
1694 struct sysctlnode dnode;
1695
1696 if (SYSCTL_VERS(rnode->sysctl_flags) != SYSCTL_VERSION) {
1697 printf("sysctl_query: rnode %p wrong version\n", rnode);
1698 return (EINVAL);
1699 }
1700
1701 if (SYSCTL_TYPE(rnode->sysctl_flags) != CTLTYPE_NODE)
1702 return (ENOTDIR);
1703 if (namelen != 1 || name[0] != CTL_DESCRIBE)
1704 return (EINVAL);
1705
1706 /*
1707 * get ready...
1708 */
1709 error = 0;
1710 d = bf = malloc(MAXDESCLEN, M_TEMP, M_WAITOK|M_CANFAIL);
1711 if (bf == NULL)
1712 return ENOMEM;
1713 tot = 0;
1714 node = rnode->sysctl_child;
1715 left = *oldlenp;
1716
1717 /*
1718 * no request -> all descriptions at this level
1719 * request with desc unset -> just this node
1720 * request with desc set -> set descr for this node
1721 */
1722 if (newp != NULL) {
1723 error = sysctl_cvt_in(l, &v, newp, newlen, &dnode);
1724 if (error)
1725 goto out;
1726 if (dnode.sysctl_desc != NULL) {
1727 /*
1728 * processes cannot set descriptions above
1729 * securelevel 0. and must be root. blah
1730 * blah blah. a couple more checks are made
1731 * once we find the node we want.
1732 */
1733 if (l != NULL) {
1734 #ifndef SYSCTL_DISALLOW_CREATE
1735 error = kauth_authorize_system(l->l_cred,
1736 KAUTH_SYSTEM_SYSCTL,
1737 KAUTH_REQ_SYSTEM_SYSCTL_DESC, NULL,
1738 NULL, NULL);
1739 if (error)
1740 goto out;
1741 #else /* SYSCTL_DISALLOW_CREATE */
1742 error = EPERM;
1743 goto out;
1744 #endif /* SYSCTL_DISALLOW_CREATE */
1745 }
1746
1747 /*
1748 * find node and try to set the description on it
1749 */
1750 for (i = 0; i < rnode->sysctl_clen; i++)
1751 if (node[i].sysctl_num == dnode.sysctl_num)
1752 break;
1753 if (i == rnode->sysctl_clen) {
1754 error = ENOENT;
1755 goto out;
1756 }
1757 node = &node[i];
1758
1759 /*
1760 * did the caller specify a node version?
1761 */
1762 if (dnode.sysctl_ver != 0 &&
1763 dnode.sysctl_ver != node->sysctl_ver) {
1764 error = EINVAL;
1765 goto out;
1766 }
1767
1768 /*
1769 * okay...some rules:
1770 * (1) if setup is done and the tree is
1771 * read-only or the whole system is
1772 * read-only
1773 * (2) no one can set a description on a
1774 * permanent node (it must be set when
1775 * using createv)
1776 * (3) processes cannot *change* a description
1777 * (4) processes *can*, however, set a
1778 * description on a read-only node so that
1779 * one can be created and then described
1780 * in two steps
1781 * anything else come to mind?
1782 */
1783 if ((sysctl_root.sysctl_flags & CTLFLAG_PERMANENT) &&
1784 (!(sysctl_rootof(node)->sysctl_flags &
1785 CTLFLAG_READWRITE) ||
1786 !(sysctl_root.sysctl_flags & CTLFLAG_READWRITE))) {
1787 error = EPERM;
1788 goto out;
1789 }
1790 if (node->sysctl_flags & CTLFLAG_PERMANENT) {
1791 error = EPERM;
1792 goto out;
1793 }
1794 if (l != NULL && node->sysctl_desc != NULL) {
1795 error = EPERM;
1796 goto out;
1797 }
1798
1799 /*
1800 * right, let's go ahead. the first step is
1801 * making the description into something the
1802 * node can "own", if need be.
1803 */
1804 if (l != NULL ||
1805 dnode.sysctl_flags & CTLFLAG_OWNDESC) {
1806 char *nd, *k;
1807
1808 k = malloc(MAXDESCLEN, M_TEMP,
1809 M_WAITOK|M_CANFAIL);
1810 if (k == NULL) {
1811 error = ENOMEM;
1812 goto out;
1813 }
1814 error = sysctl_copyinstr(l, dnode.sysctl_desc,
1815 k, MAXDESCLEN, &sz);
1816 if (error) {
1817 free(k, M_TEMP);
1818 goto out;
1819 }
1820 nd = malloc(sz, M_SYSCTLDATA,
1821 M_WAITOK|M_CANFAIL);
1822 if (nd == NULL) {
1823 free(k, M_TEMP);
1824 error = ENOMEM;
1825 goto out;
1826 }
1827 memcpy(nd, k, sz);
1828 dnode.sysctl_flags |= CTLFLAG_OWNDESC;
1829 dnode.sysctl_desc = nd;
1830 free(k, M_TEMP);
1831 }
1832
1833 /*
1834 * now "release" the old description and
1835 * attach the new one. ta-da.
1836 */
1837 if ((node->sysctl_flags & CTLFLAG_OWNDESC) &&
1838 node->sysctl_desc != NULL)
1839 /*XXXUNCONST*/
1840 free(__UNCONST(node->sysctl_desc), M_SYSCTLDATA);
1841 node->sysctl_desc = dnode.sysctl_desc;
1842 node->sysctl_flags |=
1843 (dnode.sysctl_flags & CTLFLAG_OWNDESC);
1844
1845 /*
1846 * now we "fall out" and into the loop which
1847 * will copy the new description back out for
1848 * those interested parties
1849 */
1850 }
1851 }
1852
1853 /*
1854 * scan for one description or just retrieve all descriptions
1855 */
1856 for (i = 0; i < rnode->sysctl_clen; i++) {
1857 /*
1858 * did they ask for the description of only one node?
1859 */
1860 if (v != -1 && node[i].sysctl_num != dnode.sysctl_num)
1861 continue;
1862
1863 /*
1864 * don't describe "private" nodes to non-suser users
1865 */
1866 if ((node[i].sysctl_flags & CTLFLAG_PRIVATE) && (l != NULL) &&
1867 !(kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_SYSCTL,
1868 KAUTH_REQ_SYSTEM_SYSCTL_PRVT, NULL, NULL, NULL)))
1869 continue;
1870
1871 /*
1872 * is this description "valid"?
1873 */
1874 memset(bf, 0, MAXDESCLEN);
1875 if (node[i].sysctl_desc == NULL)
1876 sz = 1;
1877 else if (copystr(node[i].sysctl_desc, &d->descr_str[0],
1878 MAXDESCLEN - sizeof(*d), &sz) != 0) {
1879 /*
1880 * erase possible partial description
1881 */
1882 memset(bf, 0, MAXDESCLEN);
1883 sz = 1;
1884 }
1885
1886 /*
1887 * we've got it, stuff it into the caller's buffer
1888 */
1889 d->descr_num = node[i].sysctl_num;
1890 d->descr_ver = node[i].sysctl_ver;
1891 d->descr_len = sz; /* includes trailing nul */
1892 sz = (char *)NEXT_DESCR(d) - (char *)d;
1893 if (oldp != NULL && left >= sz) {
1894 error = sysctl_copyout(l, d, oldp, sz);
1895 if (error)
1896 goto out;
1897 left -= sz;
1898 oldp = (void *)__sysc_desc_adv(oldp, d->descr_len);
1899 }
1900 tot += sz;
1901
1902 /*
1903 * if we get this far with v not "unset", they asked
1904 * for a specific node and we found it
1905 */
1906 if (v != -1)
1907 break;
1908 }
1909
1910 /*
1911 * did we find it after all?
1912 */
1913 if (v != -1 && tot == 0)
1914 error = ENOENT;
1915 else
1916 *oldlenp = tot;
1917
1918 out:
1919 free(bf, M_TEMP);
1920 return (error);
1921 }
1922
1923 /*
1924 * ********************************************************************
1925 * Section 3: Create and destroy from inside the kernel
1926 * ********************************************************************
1927 * sysctl_createv() and sysctl_destroyv() are simpler-to-use
1928 * interfaces for the kernel to fling new entries into the mib and rip
1929 * them out later. In the case of sysctl_createv(), the returned copy
1930 * of the node (see sysctl_create()) will be translated back into a
1931 * pointer to the actual node.
1932 *
1933 * Note that sysctl_createv() will return 0 if the create request
1934 * matches an existing node (ala mkdir -p), and that sysctl_destroyv()
1935 * will return 0 if the node to be destroyed already does not exist
1936 * (aka rm -f) or if it is a parent of other nodes.
1937 *
1938 * This allows two (or more) different subsystems to assert sub-tree
1939 * existence before populating their own nodes, and to remove their
1940 * own nodes without orphaning the others when they are done.
1941 * ********************************************************************
1942 */
1943 #undef sysctl_createv
1944 int
1945 sysctl_createv(struct sysctllog **log, int cflags,
1946 const struct sysctlnode **rnode, const struct sysctlnode **cnode,
1947 int flags, int type, const char *namep, const char *descr,
1948 sysctlfn func, u_quad_t qv, void *newp, size_t newlen,
1949 ...)
1950 {
1951 va_list ap;
1952 int error, ni, namelen, name[CTL_MAXNAME];
1953 const struct sysctlnode *root, *pnode;
1954 struct sysctlnode nnode, onode, *dnode;
1955 size_t sz;
1956
1957 /*
1958 * where are we putting this?
1959 */
1960 if (rnode != NULL && *rnode == NULL) {
1961 printf("sysctl_createv: rnode NULL\n");
1962 return (EINVAL);
1963 }
1964 root = rnode ? *rnode : NULL;
1965 if (cnode != NULL)
1966 *cnode = NULL;
1967 if (cflags != 0)
1968 return (EINVAL);
1969
1970 /*
1971 * what is it?
1972 */
1973 flags = SYSCTL_VERSION|SYSCTL_TYPE(type)|SYSCTL_FLAGS(flags);
1974 if (log != NULL)
1975 flags &= ~CTLFLAG_PERMANENT;
1976
1977 /*
1978 * where do we put it?
1979 */
1980 va_start(ap, newlen);
1981 namelen = 0;
1982 error = 0;
1983 ni = -1;
1984 do {
1985 if (++ni == CTL_MAXNAME) {
1986 error = ENAMETOOLONG;
1987 break;
1988 }
1989 name[ni] = va_arg(ap, int);
1990 /*
1991 * sorry, this is not supported from here
1992 */
1993 if (name[ni] == CTL_CREATESYM) {
1994 error = EINVAL;
1995 break;
1996 }
1997 } while (name[ni] != CTL_EOL && name[ni] != CTL_CREATE);
1998 va_end(ap);
1999 if (error)
2000 return error;
2001 namelen = ni + (name[ni] == CTL_CREATE ? 1 : 0);
2002
2003 /*
2004 * what's it called
2005 */
2006 if (strlcpy(nnode.sysctl_name, namep, sizeof(nnode.sysctl_name)) >=
2007 sizeof(nnode.sysctl_name))
2008 return (ENAMETOOLONG);
2009
2010 /*
2011 * cons up the description of the new node
2012 */
2013 nnode.sysctl_num = name[namelen - 1];
2014 name[namelen - 1] = CTL_CREATE;
2015 nnode.sysctl_size = newlen;
2016 nnode.sysctl_flags = flags;
2017 if (type == CTLTYPE_NODE) {
2018 nnode.sysctl_csize = 0;
2019 nnode.sysctl_clen = 0;
2020 nnode.sysctl_child = NULL;
2021 if (flags & CTLFLAG_ALIAS)
2022 nnode.sysctl_alias = qv;
2023 } else if (flags & CTLFLAG_IMMEDIATE) {
2024 switch (type) {
2025 case CTLTYPE_BOOL:
2026 nnode.sysctl_bdata = qv;
2027 break;
2028 case CTLTYPE_INT:
2029 nnode.sysctl_idata = qv;
2030 break;
2031 case CTLTYPE_QUAD:
2032 nnode.sysctl_qdata = qv;
2033 break;
2034 default:
2035 return (EINVAL);
2036 }
2037 } else {
2038 nnode.sysctl_data = newp;
2039 }
2040 nnode.sysctl_func = func;
2041 nnode.sysctl_parent = NULL;
2042 nnode.sysctl_ver = 0;
2043
2044 /*
2045 * initialize lock state -- we need locks if the main tree has
2046 * been marked as complete, but since we could be called from
2047 * either there, or from a device driver (say, at device
2048 * insertion), or from a module (at module load time, say), we
2049 * don't really want to "wait"...
2050 */
2051 sysctl_lock(true);
2052
2053 /*
2054 * locate the prospective parent of the new node, and if we
2055 * find it, add the new node.
2056 */
2057 sz = sizeof(onode);
2058 pnode = root;
2059 error = sysctl_locate(NULL, &name[0], namelen - 1, &pnode, &ni);
2060 if (error) {
2061 /*
2062 * XXX: If you are seeing this printf in early bringup
2063 * stages, perhaps your setfault is not functioning and
2064 * thus kcopy() is mis-behaving.
2065 */
2066 printf("sysctl_createv: sysctl_locate(%s) returned %d\n",
2067 nnode.sysctl_name, error);
2068 sysctl_unlock();
2069 return (error);
2070 }
2071 error = sysctl_create(&name[ni], namelen - ni, &onode, &sz,
2072 &nnode, sizeof(nnode), &name[0], NULL,
2073 pnode);
2074
2075 /*
2076 * unfortunately the node we wanted to create is already
2077 * there. if the node that's already there is a reasonable
2078 * facsimile of the node we wanted to create, just pretend
2079 * (for the caller's benefit) that we managed to create the
2080 * node they wanted.
2081 */
2082 if (error == EEXIST) {
2083 /* name is the same as requested... */
2084 if (strcmp(nnode.sysctl_name, onode.sysctl_name) == 0 &&
2085 /* they want the same function... */
2086 nnode.sysctl_func == onode.sysctl_func &&
2087 /* number is the same as requested, or... */
2088 (nnode.sysctl_num == onode.sysctl_num ||
2089 /* they didn't pick a number... */
2090 nnode.sysctl_num == CTL_CREATE)) {
2091 /*
2092 * collision here from trying to create
2093 * something that already existed; let's give
2094 * our customers a hand and tell them they got
2095 * what they wanted.
2096 */
2097 #ifdef SYSCTL_DEBUG_CREATE
2098 printf("cleared\n");
2099 #endif /* SYSCTL_DEBUG_CREATE */
2100 error = 0;
2101 }
2102 }
2103
2104 if (error == 0 &&
2105 (cnode != NULL || log != NULL || descr != NULL)) {
2106 /*
2107 * sysctl_create() gave us back a copy of the node,
2108 * but we need to know where it actually is...
2109 */
2110 pnode = root;
2111 error = sysctl_locate(NULL, &name[0], namelen - 1, &pnode, &ni);
2112
2113 /*
2114 * manual scan of last layer so that aliased nodes
2115 * aren't followed.
2116 */
2117 if (error == 0) {
2118 for (ni = 0; ni < pnode->sysctl_clen; ni++)
2119 if (pnode->sysctl_child[ni].sysctl_num ==
2120 onode.sysctl_num)
2121 break;
2122 if (ni < pnode->sysctl_clen)
2123 pnode = &pnode->sysctl_child[ni];
2124 else
2125 error = ENOENT;
2126 }
2127
2128 /*
2129 * not expecting an error here, but...
2130 */
2131 if (error == 0) {
2132 if (log != NULL)
2133 sysctl_log_add(log, pnode);
2134 if (cnode != NULL)
2135 *cnode = pnode;
2136 if (descr != NULL) {
2137 /*
2138 * allow first caller to *set* a
2139 * description actually to set it
2140 *
2141 * discard const here so we can attach
2142 * the description
2143 */
2144 dnode = __UNCONST(pnode);
2145 if (pnode->sysctl_desc != NULL)
2146 /* skip it...we've got one */;
2147 else if (flags & CTLFLAG_OWNDESC) {
2148 size_t l = strlen(descr) + 1;
2149 char *d = malloc(l, M_SYSCTLDATA,
2150 M_WAITOK|M_CANFAIL);
2151 if (d != NULL) {
2152 memcpy(d, descr, l);
2153 dnode->sysctl_desc = d;
2154 dnode->sysctl_flags |=
2155 CTLFLAG_OWNDESC;
2156 }
2157 } else
2158 dnode->sysctl_desc = descr;
2159 }
2160 } else {
2161 printf("sysctl_create succeeded but node not found?!\n");
2162 /*
2163 * confusing, but the create said it
2164 * succeeded, so...
2165 */
2166 error = 0;
2167 }
2168 }
2169
2170 /*
2171 * now it should be safe to release the lock state. note that
2172 * the pointer to the newly created node being passed back may
2173 * not be "good" for very long.
2174 */
2175 sysctl_unlock();
2176
2177 if (error != 0) {
2178 printf("sysctl_createv: sysctl_create(%s) returned %d\n",
2179 nnode.sysctl_name, error);
2180 #if 0
2181 if (error != ENOENT)
2182 sysctl_dump(&onode);
2183 #endif
2184 }
2185
2186 return (error);
2187 }
2188
2189 int
2190 sysctl_destroyv(struct sysctlnode *rnode, ...)
2191 {
2192 va_list ap;
2193 int error, name[CTL_MAXNAME], namelen, ni;
2194 const struct sysctlnode *pnode, *node;
2195 struct sysctlnode dnode, *onode;
2196 size_t sz;
2197
2198 va_start(ap, rnode);
2199 namelen = 0;
2200 ni = 0;
2201 do {
2202 if (ni == CTL_MAXNAME) {
2203 va_end(ap);
2204 return (ENAMETOOLONG);
2205 }
2206 name[ni] = va_arg(ap, int);
2207 } while (name[ni++] != CTL_EOL);
2208 namelen = ni - 1;
2209 va_end(ap);
2210
2211 /*
2212 * i can't imagine why we'd be destroying a node when the tree
2213 * wasn't complete, but who knows?
2214 */
2215 sysctl_lock(true);
2216
2217 /*
2218 * where is it?
2219 */
2220 node = rnode;
2221 error = sysctl_locate(NULL, &name[0], namelen - 1, &node, &ni);
2222 if (error) {
2223 /* they want it gone and it's not there, so... */
2224 sysctl_unlock();
2225 return (error == ENOENT ? 0 : error);
2226 }
2227
2228 /*
2229 * set up the deletion
2230 */
2231 pnode = node;
2232 node = &dnode;
2233 memset(&dnode, 0, sizeof(dnode));
2234 dnode.sysctl_flags = SYSCTL_VERSION;
2235 dnode.sysctl_num = name[namelen - 1];
2236
2237 /*
2238 * we found it, now let's nuke it
2239 */
2240 name[namelen - 1] = CTL_DESTROY;
2241 sz = 0;
2242 error = sysctl_destroy(&name[namelen - 1], 1, NULL, &sz,
2243 node, sizeof(*node), &name[0], NULL,
2244 pnode);
2245 if (error == ENOTEMPTY) {
2246 /*
2247 * think of trying to delete "foo" when "foo.bar"
2248 * (which someone else put there) is still in
2249 * existence
2250 */
2251 error = 0;
2252
2253 /*
2254 * dunno who put the description there, but if this
2255 * node can ever be removed, we need to make sure the
2256 * string doesn't go out of context. that means we
2257 * need to find the node that's still there (don't use
2258 * sysctl_locate() because that follows aliasing).
2259 */
2260 node = pnode->sysctl_child;
2261 for (ni = 0; ni < pnode->sysctl_clen; ni++)
2262 if (node[ni].sysctl_num == dnode.sysctl_num)
2263 break;
2264 node = (ni < pnode->sysctl_clen) ? &node[ni] : NULL;
2265
2266 /*
2267 * if we found it, and this node has a description,
2268 * and this node can be released, and it doesn't
2269 * already own its own description...sigh. :)
2270 */
2271 if (node != NULL && node->sysctl_desc != NULL &&
2272 !(node->sysctl_flags & CTLFLAG_PERMANENT) &&
2273 !(node->sysctl_flags & CTLFLAG_OWNDESC)) {
2274 char *d;
2275
2276 sz = strlen(node->sysctl_desc) + 1;
2277 d = malloc(sz, M_SYSCTLDATA, M_WAITOK|M_CANFAIL);
2278 if (d != NULL) {
2279 /*
2280 * discard const so that we can
2281 * re-attach the description
2282 */
2283 memcpy(d, node->sysctl_desc, sz);
2284 onode = __UNCONST(node);
2285 onode->sysctl_desc = d;
2286 onode->sysctl_flags |= CTLFLAG_OWNDESC;
2287 } else {
2288 /*
2289 * XXX drop the description? be
2290 * afraid? don't care?
2291 */
2292 }
2293 }
2294 }
2295
2296 sysctl_unlock();
2297
2298 return (error);
2299 }
2300
2301 /*
2302 * ********************************************************************
2303 * Deletes an entire n-ary tree. Not recommended unless you know why
2304 * you're doing it. Personally, I don't know why you'd even think
2305 * about it.
2306 * ********************************************************************
2307 */
2308 void
2309 sysctl_free(struct sysctlnode *rnode)
2310 {
2311 struct sysctlnode *node, *pnode;
2312
2313 rw_enter(&sysctl_treelock, RW_WRITER);
2314
2315 if (rnode == NULL)
2316 rnode = &sysctl_root;
2317
2318 if (SYSCTL_VERS(rnode->sysctl_flags) != SYSCTL_VERSION) {
2319 printf("sysctl_free: rnode %p wrong version\n", rnode);
2320 rw_exit(&sysctl_treelock);
2321 return;
2322 }
2323
2324 pnode = rnode;
2325
2326 node = pnode->sysctl_child;
2327 do {
2328 while (node != NULL && pnode->sysctl_csize > 0) {
2329 while (node <
2330 &pnode->sysctl_child[pnode->sysctl_clen] &&
2331 (SYSCTL_TYPE(node->sysctl_flags) !=
2332 CTLTYPE_NODE ||
2333 node->sysctl_csize == 0)) {
2334 if (SYSCTL_FLAGS(node->sysctl_flags) &
2335 CTLFLAG_OWNDATA) {
2336 if (node->sysctl_data != NULL) {
2337 free(node->sysctl_data,
2338 M_SYSCTLDATA);
2339 node->sysctl_data = NULL;
2340 }
2341 }
2342 if (SYSCTL_FLAGS(node->sysctl_flags) &
2343 CTLFLAG_OWNDESC) {
2344 if (node->sysctl_desc != NULL) {
2345 /*XXXUNCONST*/
2346 free(__UNCONST(node->sysctl_desc),
2347 M_SYSCTLDATA);
2348 node->sysctl_desc = NULL;
2349 }
2350 }
2351 node++;
2352 }
2353 if (node < &pnode->sysctl_child[pnode->sysctl_clen]) {
2354 pnode = node;
2355 node = node->sysctl_child;
2356 } else
2357 break;
2358 }
2359 if (pnode->sysctl_child != NULL)
2360 free(pnode->sysctl_child, M_SYSCTLNODE);
2361 pnode->sysctl_clen = 0;
2362 pnode->sysctl_csize = 0;
2363 pnode->sysctl_child = NULL;
2364 node = pnode;
2365 pnode = node->sysctl_parent;
2366 } while (pnode != NULL && node != rnode);
2367
2368 rw_exit(&sysctl_treelock);
2369 }
2370
2371 void
2372 sysctl_log_print(const struct sysctllog *slog)
2373 {
2374 int i, len;
2375
2376 printf("root %p left %d size %d content", (const void *)slog->log_root,
2377 slog->log_left, slog->log_size);
2378
2379 for (len = 0, i = slog->log_left; i < slog->log_size; i++) {
2380 switch (len) {
2381 case 0:
2382 len = -1;
2383 printf(" version %d", slog->log_num[i]);
2384 break;
2385 case -1:
2386 len = -2;
2387 printf(" type %d", slog->log_num[i]);
2388 break;
2389 case -2:
2390 len = slog->log_num[i];
2391 printf(" len %d:", slog->log_num[i]);
2392 if (len <= 0)
2393 len = -1;
2394 break;
2395 default:
2396 len--;
2397 printf(" %d", slog->log_num[i]);
2398 break;
2399 }
2400 }
2401 printf(" end\n");
2402 }
2403
2404 int
2405 sysctl_log_add(struct sysctllog **logp, const struct sysctlnode *node)
2406 {
2407 const int size0 = 16;
2408 int name[CTL_MAXNAME], namelen, i;
2409 const struct sysctlnode *pnode;
2410 struct sysctllog *log;
2411
2412 if (node->sysctl_flags & CTLFLAG_PERMANENT)
2413 return (0);
2414
2415 if (logp == NULL)
2416 return (0);
2417
2418 if (*logp == NULL) {
2419 log = malloc(sizeof(struct sysctllog),
2420 M_SYSCTLDATA, M_WAITOK|M_CANFAIL);
2421 if (log == NULL) {
2422 /* XXX print error message? */
2423 return (-1);
2424 }
2425 log->log_num = malloc(size0 * sizeof(int),
2426 M_SYSCTLDATA, M_WAITOK|M_CANFAIL);
2427 if (log->log_num == NULL) {
2428 /* XXX print error message? */
2429 free(log, M_SYSCTLDATA);
2430 return (-1);
2431 }
2432 memset(log->log_num, 0, size0 * sizeof(int));
2433 log->log_root = NULL;
2434 log->log_size = size0;
2435 log->log_left = size0;
2436 *logp = log;
2437 } else
2438 log = *logp;
2439
2440 /*
2441 * check that the root is proper. it's okay to record the
2442 * address of the root of a tree. it's the only thing that's
2443 * guaranteed not to shift around as nodes come and go.
2444 */
2445 if (log->log_root == NULL)
2446 log->log_root = sysctl_rootof(node);
2447 else if (log->log_root != sysctl_rootof(node)) {
2448 printf("sysctl: log %p root mismatch (%p)\n",
2449 log->log_root, sysctl_rootof(node));
2450 return (-1);
2451 }
2452
2453 /*
2454 * we will copy out name in reverse order
2455 */
2456 for (pnode = node, namelen = 0;
2457 pnode != NULL && !(pnode->sysctl_flags & CTLFLAG_ROOT);
2458 pnode = pnode->sysctl_parent)
2459 name[namelen++] = pnode->sysctl_num;
2460
2461 /*
2462 * do we have space?
2463 */
2464 if (log->log_left < (namelen + 3))
2465 sysctl_log_realloc(log);
2466 if (log->log_left < (namelen + 3))
2467 return (-1);
2468
2469 /*
2470 * stuff name in, then namelen, then node type, and finally,
2471 * the version for non-node nodes.
2472 */
2473 for (i = 0; i < namelen; i++)
2474 log->log_num[--log->log_left] = name[i];
2475 log->log_num[--log->log_left] = namelen;
2476 log->log_num[--log->log_left] = SYSCTL_TYPE(node->sysctl_flags);
2477 if (log->log_num[log->log_left] != CTLTYPE_NODE)
2478 log->log_num[--log->log_left] = node->sysctl_ver;
2479 else
2480 log->log_num[--log->log_left] = 0;
2481
2482 return (0);
2483 }
2484
2485 void
2486 sysctl_teardown(struct sysctllog **logp)
2487 {
2488 const struct sysctlnode *rnode;
2489 struct sysctlnode node;
2490 struct sysctllog *log;
2491 uint namelen;
2492 int *name, t, v, error, ni;
2493 size_t sz;
2494
2495 if (logp == NULL || *logp == NULL)
2496 return;
2497 log = *logp;
2498
2499 rw_enter(&sysctl_treelock, RW_WRITER);
2500 memset(&node, 0, sizeof(node));
2501
2502 while (log->log_left < log->log_size) {
2503 KASSERT((log->log_left + 3 < log->log_size) &&
2504 (log->log_left + log->log_num[log->log_left + 2] <=
2505 log->log_size));
2506 v = log->log_num[log->log_left++];
2507 t = log->log_num[log->log_left++];
2508 namelen = log->log_num[log->log_left++];
2509 name = &log->log_num[log->log_left];
2510
2511 node.sysctl_num = name[namelen - 1];
2512 node.sysctl_flags = SYSCTL_VERSION|t;
2513 node.sysctl_ver = v;
2514
2515 rnode = log->log_root;
2516 error = sysctl_locate(NULL, &name[0], namelen, &rnode, &ni);
2517 if (error == 0) {
2518 name[namelen - 1] = CTL_DESTROY;
2519 rnode = rnode->sysctl_parent;
2520 sz = 0;
2521 (void)sysctl_destroy(&name[namelen - 1], 1, NULL,
2522 &sz, &node, sizeof(node),
2523 &name[0], NULL, rnode);
2524 }
2525
2526 log->log_left += namelen;
2527 }
2528
2529 KASSERT(log->log_size == log->log_left);
2530 free(log->log_num, M_SYSCTLDATA);
2531 free(log, M_SYSCTLDATA);
2532 *logp = NULL;
2533
2534 rw_exit(&sysctl_treelock);
2535 }
2536
2537 /*
2538 * ********************************************************************
2539 * old_sysctl -- A routine to bridge old-style internal calls to the
2540 * new infrastructure.
2541 * ********************************************************************
2542 */
2543 int
2544 old_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp,
2545 void *newp, size_t newlen, struct lwp *l)
2546 {
2547 int error;
2548 size_t oldlen = 0;
2549 size_t savelen;
2550
2551 if (oldlenp) {
2552 oldlen = *oldlenp;
2553 }
2554 savelen = oldlen;
2555
2556 sysctl_lock(newp != NULL);
2557 error = sysctl_dispatch(name, namelen, oldp, &oldlen,
2558 newp, newlen, name, l, NULL);
2559 sysctl_unlock();
2560 if (error == 0 && oldp != NULL && savelen < oldlen)
2561 error = ENOMEM;
2562 if (oldlenp) {
2563 *oldlenp = oldlen;
2564 }
2565
2566 return (error);
2567 }
2568
2569 /*
2570 * ********************************************************************
2571 * Section 4: Generic helper routines
2572 * ********************************************************************
2573 * "helper" routines that can do more finely grained access control,
2574 * construct structures from disparate information, create the
2575 * appearance of more nodes and sub-trees, etc. for example, if
2576 * CTL_PROC wanted a helper function, it could respond to a CTL_QUERY
2577 * with a dynamically created list of nodes that represented the
2578 * currently running processes at that instant.
2579 * ********************************************************************
2580 */
2581
2582 /*
2583 * first, a few generic helpers that provide:
2584 *
2585 * sysctl_needfunc() a readonly interface that emits a warning
2586 * sysctl_notavail() returns EOPNOTSUPP (generic error)
2587 * sysctl_null() an empty return buffer with no error
2588 */
2589 int
2590 sysctl_needfunc(SYSCTLFN_ARGS)
2591 {
2592 int error;
2593
2594 printf("!!SYSCTL_NEEDFUNC!!\n");
2595
2596 if (newp != NULL || namelen != 0)
2597 return (EOPNOTSUPP);
2598
2599 error = 0;
2600 if (oldp != NULL)
2601 error = sysctl_copyout(l, rnode->sysctl_data, oldp,
2602 MIN(rnode->sysctl_size, *oldlenp));
2603 *oldlenp = rnode->sysctl_size;
2604
2605 return (error);
2606 }
2607
2608 int
2609 sysctl_notavail(SYSCTLFN_ARGS)
2610 {
2611
2612 if (namelen == 1 && name[0] == CTL_QUERY)
2613 return (sysctl_query(SYSCTLFN_CALL(rnode)));
2614
2615 return (EOPNOTSUPP);
2616 }
2617
2618 int
2619 sysctl_null(SYSCTLFN_ARGS)
2620 {
2621
2622 *oldlenp = 0;
2623
2624 return (0);
2625 }
2626
2627 u_int
2628 sysctl_map_flags(const u_int *map, u_int word)
2629 {
2630 u_int rv;
2631
2632 for (rv = 0; *map != 0; map += 2)
2633 if ((word & map[0]) != 0)
2634 rv |= map[1];
2635
2636 return rv;
2637 }
2638
2639 /*
2640 * ********************************************************************
2641 * Section 5: The machinery that makes it all go
2642 * ********************************************************************
2643 * Memory "manglement" routines. Not much to this, eh?
2644 * ********************************************************************
2645 */
2646 static int
2647 sysctl_alloc(struct sysctlnode *p, int x)
2648 {
2649 int i;
2650 struct sysctlnode *n;
2651
2652 assert(p->sysctl_child == NULL);
2653
2654 if (x == 1)
2655 n = malloc(sizeof(struct sysctlnode),
2656 M_SYSCTLNODE, M_WAITOK|M_CANFAIL);
2657 else
2658 n = malloc(SYSCTL_DEFSIZE * sizeof(struct sysctlnode),
2659 M_SYSCTLNODE, M_WAITOK|M_CANFAIL);
2660 if (n == NULL)
2661 return (ENOMEM);
2662
2663 if (x == 1) {
2664 memset(n, 0, sizeof(struct sysctlnode));
2665 p->sysctl_csize = 1;
2666 } else {
2667 memset(n, 0, SYSCTL_DEFSIZE * sizeof(struct sysctlnode));
2668 p->sysctl_csize = SYSCTL_DEFSIZE;
2669 }
2670 p->sysctl_clen = 0;
2671
2672 for (i = 0; i < p->sysctl_csize; i++)
2673 n[i].sysctl_parent = p;
2674
2675 p->sysctl_child = n;
2676 return (0);
2677 }
2678
2679 static int
2680 sysctl_realloc(struct sysctlnode *p)
2681 {
2682 int i, j, olen;
2683 struct sysctlnode *n;
2684
2685 assert(p->sysctl_csize == p->sysctl_clen);
2686
2687 /*
2688 * how many do we have...how many should we make?
2689 */
2690 olen = p->sysctl_clen;
2691 n = malloc(2 * olen * sizeof(struct sysctlnode), M_SYSCTLNODE,
2692 M_WAITOK|M_CANFAIL);
2693 if (n == NULL)
2694 return (ENOMEM);
2695
2696 /*
2697 * move old children over...initialize new children
2698 */
2699 memcpy(n, p->sysctl_child, olen * sizeof(struct sysctlnode));
2700 memset(&n[olen], 0, olen * sizeof(struct sysctlnode));
2701 p->sysctl_csize = 2 * olen;
2702
2703 /*
2704 * reattach moved (and new) children to parent; if a moved
2705 * child node has children, reattach the parent pointers of
2706 * grandchildren
2707 */
2708 for (i = 0; i < p->sysctl_csize; i++) {
2709 n[i].sysctl_parent = p;
2710 if (n[i].sysctl_child != NULL) {
2711 for (j = 0; j < n[i].sysctl_csize; j++)
2712 n[i].sysctl_child[j].sysctl_parent = &n[i];
2713 }
2714 }
2715
2716 /*
2717 * get out with the old and in with the new
2718 */
2719 free(p->sysctl_child, M_SYSCTLNODE);
2720 p->sysctl_child = n;
2721
2722 return (0);
2723 }
2724
2725 static int
2726 sysctl_log_realloc(struct sysctllog *log)
2727 {
2728 int *n, s, d;
2729
2730 s = log->log_size * 2;
2731 d = log->log_size;
2732
2733 n = malloc(s * sizeof(int), M_SYSCTLDATA, M_WAITOK|M_CANFAIL);
2734 if (n == NULL)
2735 return (-1);
2736
2737 memset(n, 0, s * sizeof(int));
2738 memcpy(&n[d], log->log_num, d * sizeof(int));
2739 free(log->log_num, M_SYSCTLDATA);
2740 log->log_num = n;
2741 if (d)
2742 log->log_left += d;
2743 else
2744 log->log_left = s;
2745 log->log_size = s;
2746
2747 return (0);
2748 }
2749
2750 /*
2751 * ********************************************************************
2752 * Section 6: Conversion between API versions wrt the sysctlnode
2753 * ********************************************************************
2754 */
2755 static int
2756 sysctl_cvt_in(struct lwp *l, int *vp, const void *i, size_t sz,
2757 struct sysctlnode *node)
2758 {
2759 int error, flags;
2760
2761 if (i == NULL || sz < sizeof(flags))
2762 return (EINVAL);
2763
2764 error = sysctl_copyin(l, i, &flags, sizeof(flags));
2765 if (error)
2766 return (error);
2767
2768 #if (SYSCTL_VERSION != SYSCTL_VERS_1)
2769 #error sysctl_cvt_in: no support for SYSCTL_VERSION
2770 #endif /* (SYSCTL_VERSION != SYSCTL_VERS_1) */
2771
2772 if (sz == sizeof(*node) &&
2773 SYSCTL_VERS(flags) == SYSCTL_VERSION) {
2774 error = sysctl_copyin(l, i, node, sizeof(*node));
2775 if (error)
2776 return (error);
2777 *vp = SYSCTL_VERSION;
2778 return (0);
2779 }
2780
2781 return (EINVAL);
2782 }
2783
2784 static int
2785 sysctl_cvt_out(struct lwp *l, int v, const struct sysctlnode *i,
2786 void *ovp, size_t left, size_t *szp)
2787 {
2788 size_t sz = sizeof(*i);
2789 const void *src = i;
2790 int error;
2791
2792 switch (v) {
2793 case SYSCTL_VERS_0:
2794 return (EINVAL);
2795
2796 #if (SYSCTL_VERSION != SYSCTL_VERS_1)
2797 #error sysctl_cvt_out: no support for SYSCTL_VERSION
2798 #endif /* (SYSCTL_VERSION != SYSCTL_VERS_1) */
2799
2800 case SYSCTL_VERSION:
2801 /* nothing more to do here */
2802 break;
2803 }
2804
2805 if (ovp != NULL && left >= sz) {
2806 error = sysctl_copyout(l, src, ovp, sz);
2807 if (error)
2808 return (error);
2809 }
2810
2811 if (szp != NULL)
2812 *szp = sz;
2813
2814 return (0);
2815 }
2816