1 /*- 2 * SPDX-License-Identifier: BSD-4-Clause 3 * 4 * Copyright (c) 1995 Terrence R. Lambert 5 * All rights reserved. 6 * 7 * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993 8 * The Regents of the University of California. All rights reserved. 9 * (c) UNIX System Laboratories, Inc. 10 * All or some portions of this file are derived from material licensed 11 * to the University of California by American Telephone and Telegraph 12 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 13 * the permission of UNIX System Laboratories, Inc. 14 * 15 * Redistribution and use in source and binary forms, with or without 16 * modification, are permitted provided that the following conditions 17 * are met: 18 * 1. Redistributions of source code must retain the above copyright 19 * notice, this list of conditions and the following disclaimer. 20 * 2. Redistributions in binary form must reproduce the above copyright 21 * notice, this list of conditions and the following disclaimer in the 22 * documentation and/or other materials provided with the distribution. 23 * 3. All advertising materials mentioning features or use of this software 24 * must display the following acknowledgement: 25 * This product includes software developed by the University of 26 * California, Berkeley and its contributors. 27 * 4. Neither the name of the University nor the names of its contributors 28 * may be used to endorse or promote products derived from this software 29 * without specific prior written permission. 30 * 31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 34 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 41 * SUCH DAMAGE. 42 */ 43 44 #include "opt_ddb.h" 45 #include "opt_kdb.h" 46 #include "opt_init_path.h" 47 #include "opt_verbose_sysinit.h" 48 49 #include <sys/param.h> 50 #include <sys/systm.h> 51 #include <sys/boottrace.h> 52 #include <sys/conf.h> 53 #include <sys/cpuset.h> 54 #include <sys/dtrace_bsd.h> 55 #include <sys/epoch.h> 56 #include <sys/eventhandler.h> 57 #include <sys/exec.h> 58 #include <sys/file.h> 59 #include <sys/filedesc.h> 60 #include <sys/imgact.h> 61 #include <sys/jail.h> 62 #include <sys/kernel.h> 63 #include <sys/ktr.h> 64 #include <sys/lock.h> 65 #include <sys/loginclass.h> 66 #include <sys/malloc.h> 67 #include <sys/mount.h> 68 #include <sys/mutex.h> 69 #include <sys/proc.h> 70 #include <sys/racct.h> 71 #include <sys/reboot.h> 72 #include <sys/resourcevar.h> 73 #include <sys/queue.h> 74 #include <sys/queue_mergesort.h> 75 #include <sys/sched.h> 76 #include <sys/signalvar.h> 77 #include <sys/sx.h> 78 #include <sys/syscallsubr.h> 79 #include <sys/sysctl.h> 80 #include <sys/sysent.h> 81 #include <sys/sysproto.h> 82 #include <sys/unistd.h> 83 #include <sys/vmmeter.h> 84 #include <sys/vnode.h> 85 86 #include <machine/cpu.h> 87 88 #include <security/audit/audit.h> 89 #include <security/mac/mac_framework.h> 90 91 #include <vm/vm.h> 92 #include <vm/vm_param.h> 93 #include <vm/vm_extern.h> 94 #include <vm/pmap.h> 95 #include <vm/vm_map.h> 96 #include <sys/copyright.h> 97 98 #include <ddb/ddb.h> 99 #include <ddb/db_sym.h> 100 101 void mi_startup(void); /* Should be elsewhere */ 102 103 /* Components of the first process -- never freed. */ 104 static struct session session0; 105 static struct pgrp pgrp0; 106 struct proc proc0; 107 struct thread0_storage thread0_st __aligned(32) = { 108 .t0st_thread = { 109 /* 110 * thread0.td_pflags is set with TDP_NOFAULTING to 111 * short-cut the vm page fault handler until it is 112 * ready. It is cleared in vm_init() after VM 113 * initialization. 114 */ 115 .td_pflags = TDP_NOFAULTING, 116 }, 117 }; 118 struct vmspace vmspace0; 119 struct proc *initproc; 120 121 int 122 linux_alloc_current_noop(struct thread *td __unused, int flags __unused) 123 { 124 return (0); 125 } 126 int (*lkpi_alloc_current)(struct thread *, int) = linux_alloc_current_noop; 127 128 #ifndef BOOTHOWTO 129 #define BOOTHOWTO 0 130 #endif 131 int boothowto = BOOTHOWTO; /* initialized so that it can be patched */ 132 SYSCTL_INT(_debug, OID_AUTO, boothowto, CTLFLAG_RD, &boothowto, 0, 133 "Boot control flags, passed from loader"); 134 135 #ifndef BOOTVERBOSE 136 #define BOOTVERBOSE 0 137 #endif 138 int bootverbose = BOOTVERBOSE; 139 SYSCTL_INT(_debug, OID_AUTO, bootverbose, CTLFLAG_RW, &bootverbose, 0, 140 "Control the output of verbose kernel messages"); 141 142 #ifdef VERBOSE_SYSINIT 143 /* 144 * We'll use the defined value of VERBOSE_SYSINIT from the kernel config to 145 * dictate the default VERBOSE_SYSINIT behavior. Significant values for this 146 * option and associated tunable are: 147 * - 0, 'compiled in but silent by default' 148 * - 1, 'compiled in but verbose by default' (default) 149 */ 150 int verbose_sysinit = VERBOSE_SYSINIT; 151 #endif 152 153 #ifdef INVARIANTS 154 FEATURE(invariants, "Kernel compiled with INVARIANTS, may affect performance"); 155 #endif 156 157 /* 158 * The sysinit linker set compiled into the kernel. These are placed onto the 159 * sysinit list by mi_startup; sysinit_add can add (e.g., from klds) additional 160 * sysinits to the linked list but the linker set here does not change. 161 */ 162 SET_DECLARE(sysinit_set, struct sysinit); 163 164 /* 165 * The sysinit lists. Items are moved to sysinit_done_list when done. 166 */ 167 static STAILQ_HEAD(sysinitlist, sysinit) sysinit_list; 168 static struct sysinitlist sysinit_done_list = 169 STAILQ_HEAD_INITIALIZER(sysinit_done_list); 170 171 /* 172 * Compare two sysinits; return -1, 0, or 1 if a comes before, at the same time 173 * as, or after b. 174 */ 175 static int 176 sysinit_compar(struct sysinit *a, struct sysinit *b, void *thunk __unused) 177 { 178 179 if (a->subsystem < b->subsystem) 180 return (-1); 181 if (a->subsystem > b->subsystem) 182 return (1); 183 if (a->order < b->order) 184 return (-1); 185 if (a->order > b->order) 186 return (1); 187 return (0); 188 } 189 190 static void 191 sysinit_mklist(struct sysinitlist *list, struct sysinit **set, 192 struct sysinit **set_end) 193 { 194 struct sysinit **sipp; 195 196 TSENTER(); 197 TSENTER2("listify"); 198 STAILQ_INIT(list); 199 for (sipp = set; sipp < set_end; sipp++) 200 STAILQ_INSERT_TAIL(list, *sipp, next); 201 TSEXIT2("listify"); 202 TSENTER2("mergesort"); 203 STAILQ_MERGESORT(list, NULL, sysinit_compar, sysinit, next); 204 TSEXIT2("mergesort"); 205 TSEXIT(); 206 } 207 208 /* 209 * Merge a new sysinit set into the sysinit list. 210 */ 211 void 212 sysinit_add(struct sysinit **set, struct sysinit **set_end) 213 { 214 struct sysinitlist new_list; 215 216 TSENTER(); 217 218 /* Construct a sorted list from the new sysinits. */ 219 sysinit_mklist(&new_list, set, set_end); 220 221 /* Merge the new list into the existing one. */ 222 TSENTER2("STAILQ_MERGE"); 223 STAILQ_MERGE(&sysinit_list, &new_list, NULL, sysinit_compar, sysinit, next); 224 TSEXIT2("STAILQ_MERGE"); 225 226 TSEXIT(); 227 } 228 229 #if defined (DDB) && defined(VERBOSE_SYSINIT) 230 static const char * 231 symbol_name(vm_offset_t va, db_strategy_t strategy) 232 { 233 const char *name; 234 c_db_sym_t sym; 235 db_expr_t offset; 236 237 if (va == 0) 238 return (NULL); 239 sym = db_search_symbol(va, strategy, &offset); 240 if (offset != 0) 241 return (NULL); 242 db_symbol_values(sym, &name, NULL); 243 return (name); 244 } 245 #endif 246 247 /* 248 * System startup; initialize the world, create process 0, mount root 249 * filesystem, and fork to create init and pagedaemon. Most of the 250 * hard work is done in the lower-level initialization routines including 251 * startup(), which does memory initialization and autoconfiguration. 252 * 253 * This allows simple addition of new kernel subsystems that require 254 * boot time initialization. It also allows substitution of subsystem 255 * (for instance, a scheduler, kernel profiler, or VM system) by object 256 * module. Finally, it allows for optional "kernel threads". 257 */ 258 void 259 mi_startup(void) 260 { 261 struct sysinit *sip; 262 int last; 263 #if defined(VERBOSE_SYSINIT) 264 int verbose; 265 #endif 266 267 TSENTER(); 268 269 if (boothowto & RB_VERBOSE) 270 bootverbose++; 271 272 /* Construct and sort sysinit list. */ 273 sysinit_mklist(&sysinit_list, SET_BEGIN(sysinit_set), SET_LIMIT(sysinit_set)); 274 275 last = SI_SUB_DUMMY; 276 #if defined(VERBOSE_SYSINIT) 277 TUNABLE_INT_FETCH("debug.verbose_sysinit", &verbose_sysinit); 278 verbose = 0; 279 #if !defined(DDB) 280 printf("VERBOSE_SYSINIT: DDB not enabled, symbol lookups disabled.\n"); 281 #endif 282 #endif 283 284 /* 285 * Perform each system initialization task from the ordered list. Note 286 * that if sysinit_list is modified (e.g. by a KLD) we will nonetheless 287 * always perform the earlist-sorted sysinit at each step; using the 288 * STAILQ_FOREACH macro would result in items being skipped if inserted 289 * earlier than the "current item". 290 */ 291 while ((sip = STAILQ_FIRST(&sysinit_list)) != NULL) { 292 STAILQ_REMOVE_HEAD(&sysinit_list, next); 293 STAILQ_INSERT_TAIL(&sysinit_done_list, sip, next); 294 295 if (sip->subsystem == SI_SUB_DUMMY) 296 continue; /* skip dummy task(s)*/ 297 298 if (sip->subsystem > last) 299 BOOTTRACE_INIT("sysinit 0x%7x", sip->subsystem); 300 301 #if defined(VERBOSE_SYSINIT) 302 if (sip->subsystem != last && verbose_sysinit != 0) { 303 verbose = 1; 304 printf("subsystem %x\n", sip->subsystem); 305 } 306 if (verbose) { 307 #if defined(DDB) 308 const char *func, *data; 309 310 func = symbol_name((vm_offset_t)sip->func, 311 DB_STGY_PROC); 312 data = symbol_name((vm_offset_t)sip->udata, 313 DB_STGY_ANY); 314 if (func != NULL && data != NULL) 315 printf(" %s(&%s)... ", func, data); 316 else if (func != NULL) 317 printf(" %s(%p)... ", func, sip->udata); 318 else 319 #endif 320 printf(" %p(%p)... ", sip->func, 321 sip->udata); 322 } 323 #endif 324 325 /* Call function */ 326 (*(sip->func))(sip->udata); 327 328 #if defined(VERBOSE_SYSINIT) 329 if (verbose) 330 printf("done.\n"); 331 #endif 332 333 /* Check off the one we're just done */ 334 last = sip->subsystem; 335 } 336 337 TSEXIT(); /* Here so we don't overlap with start_init. */ 338 BOOTTRACE("mi_startup done"); 339 340 mtx_assert(&Giant, MA_OWNED | MA_NOTRECURSED); 341 mtx_unlock(&Giant); 342 343 /* 344 * We can't free our thread structure since it is statically allocated. 345 * Just sleep forever. This thread could be repurposed for something if 346 * the need arises. 347 */ 348 for (;;) 349 tsleep(__builtin_frame_address(0), PNOLOCK, "-", 0); 350 } 351 352 static void 353 print_caddr_t(const void *data) 354 { 355 printf("%s", (const char *)data); 356 } 357 358 static void 359 print_version(const void *data __unused) 360 { 361 int len; 362 363 /* Strip a trailing newline from version. */ 364 len = strlen(version); 365 while (len > 0 && version[len - 1] == '\n') 366 len--; 367 printf("%.*s %s\n", len, version, machine); 368 printf("%s\n", compiler_version); 369 } 370 371 C_SYSINIT(announce, SI_SUB_COPYRIGHT, SI_ORDER_FIRST, print_caddr_t, 372 copyright); 373 C_SYSINIT(trademark, SI_SUB_COPYRIGHT, SI_ORDER_SECOND, print_caddr_t, 374 trademark); 375 C_SYSINIT(version, SI_SUB_COPYRIGHT, SI_ORDER_THIRD, print_version, NULL); 376 377 #ifdef WITNESS 378 static const char wit_warn[] = 379 "WARNING: WITNESS option enabled, expect reduced performance.\n"; 380 C_SYSINIT(witwarn, SI_SUB_COPYRIGHT, SI_ORDER_FOURTH, 381 print_caddr_t, wit_warn); 382 C_SYSINIT(witwarn2, SI_SUB_LAST, SI_ORDER_FOURTH, 383 print_caddr_t, wit_warn); 384 #endif 385 386 #ifdef DIAGNOSTIC 387 static const char diag_warn[] = 388 "WARNING: DIAGNOSTIC option enabled, expect reduced performance.\n"; 389 C_SYSINIT(diagwarn, SI_SUB_COPYRIGHT, SI_ORDER_FIFTH, 390 print_caddr_t, diag_warn); 391 C_SYSINIT(diagwarn2, SI_SUB_LAST, SI_ORDER_FIFTH, 392 print_caddr_t, diag_warn); 393 #endif 394 395 #if __SIZEOF_LONG__ == 4 396 static const char ilp32_warn[] = 397 "WARNING: 32-bit kernels are deprecated and may be removed in FreeBSD 16.0.\n"; 398 C_SYSINIT(ilp32warn, SI_SUB_COPYRIGHT, SI_ORDER_FIFTH, 399 print_caddr_t, ilp32_warn); 400 C_SYSINIT(ilp32warn2, SI_SUB_LAST, SI_ORDER_FIFTH, 401 print_caddr_t, ilp32_warn); 402 #endif 403 404 static int 405 null_fetch_syscall_args(struct thread *td __unused) 406 { 407 408 panic("null_fetch_syscall_args"); 409 } 410 411 static void 412 null_set_syscall_retval(struct thread *td __unused, int error __unused) 413 { 414 415 panic("null_set_syscall_retval"); 416 } 417 418 static void 419 null_set_fork_retval(struct thread *td __unused) 420 { 421 422 } 423 424 struct sysentvec null_sysvec = { 425 .sv_size = 0, 426 .sv_table = NULL, 427 .sv_fixup = NULL, 428 .sv_sendsig = NULL, 429 .sv_sigcode = NULL, 430 .sv_szsigcode = NULL, 431 .sv_name = "null", 432 .sv_coredump = NULL, 433 .sv_minsigstksz = 0, 434 .sv_minuser = VM_MIN_ADDRESS, 435 .sv_maxuser = VM_MAXUSER_ADDRESS, 436 .sv_usrstack = USRSTACK, 437 .sv_psstrings = PS_STRINGS, 438 .sv_psstringssz = sizeof(struct ps_strings), 439 .sv_stackprot = VM_PROT_ALL, 440 .sv_copyout_strings = NULL, 441 .sv_setregs = NULL, 442 .sv_fixlimit = NULL, 443 .sv_maxssiz = NULL, 444 .sv_flags = 0, 445 .sv_set_syscall_retval = null_set_syscall_retval, 446 .sv_fetch_syscall_args = null_fetch_syscall_args, 447 .sv_syscallnames = NULL, 448 .sv_schedtail = NULL, 449 .sv_thread_detach = NULL, 450 .sv_trap = NULL, 451 .sv_set_fork_retval = null_set_fork_retval, 452 .sv_regset_begin = NULL, 453 .sv_regset_end = NULL, 454 }; 455 456 /* 457 * The two following SYSINIT's are proc0 specific glue code. I am not 458 * convinced that they can not be safely combined, but their order of 459 * operation has been maintained as the same as the original init_main.c 460 * for right now. 461 */ 462 /* ARGSUSED*/ 463 static void 464 proc0_init(void *dummy __unused) 465 { 466 struct proc *p; 467 struct thread *td; 468 struct ucred *newcred; 469 struct uidinfo tmpuinfo; 470 struct loginclass tmplc = { 471 .lc_name = "", 472 }; 473 vm_paddr_t pageablemem; 474 int i; 475 476 GIANT_REQUIRED; 477 p = &proc0; 478 td = &thread0; 479 480 /* 481 * Initialize magic number and osrel. 482 */ 483 p->p_magic = P_MAGIC; 484 p->p_osrel = osreldate; 485 486 /* 487 * Initialize thread and process structures. 488 */ 489 procinit(); /* set up proc zone */ 490 threadinit(); /* set up UMA zones */ 491 492 /* 493 * Initialise scheduler resources. 494 * Add scheduler specific parts to proc, thread as needed. 495 */ 496 schedinit(); /* scheduler gets its house in order */ 497 498 /* 499 * Create process 0. 500 */ 501 LIST_INSERT_HEAD(&allproc, p, p_list); 502 LIST_INSERT_HEAD(PIDHASH(0), p, p_hash); 503 mtx_init(&pgrp0.pg_mtx, "process group", NULL, MTX_DEF | MTX_DUPOK); 504 sx_init(&pgrp0.pg_killsx, "killpg racer"); 505 p->p_pgrp = &pgrp0; 506 LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash); 507 LIST_INIT(&pgrp0.pg_members); 508 LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist); 509 510 pgrp0.pg_session = &session0; 511 mtx_init(&session0.s_mtx, "session", NULL, MTX_DEF); 512 refcount_init(&session0.s_count, 1); 513 session0.s_leader = p; 514 515 p->p_sysent = &null_sysvec; 516 p->p_flag = P_SYSTEM | P_INMEM | P_KPROC; 517 p->p_flag2 = 0; 518 p->p_state = PRS_NORMAL; 519 p->p_klist = knlist_alloc(&p->p_mtx); 520 STAILQ_INIT(&p->p_ktr); 521 p->p_nice = NZERO; 522 td->td_tid = THREAD0_TID; 523 tidhash_add(td); 524 TD_SET_STATE(td, TDS_RUNNING); 525 td->td_pri_class = PRI_TIMESHARE; 526 td->td_user_pri = PUSER; 527 td->td_base_user_pri = PUSER; 528 td->td_lend_user_pri = PRI_MAX; 529 td->td_priority = PVM; 530 td->td_base_pri = PVM; 531 td->td_oncpu = curcpu; 532 td->td_flags = TDF_INMEM; 533 td->td_pflags = TDP_KTHREAD; 534 td->td_cpuset = cpuset_thread0(); 535 td->td_domain.dr_policy = td->td_cpuset->cs_domain; 536 prison0_init(); 537 p->p_peers = 0; 538 p->p_leader = p; 539 p->p_reaper = p; 540 p->p_treeflag |= P_TREE_REAPER; 541 LIST_INIT(&p->p_reaplist); 542 543 strncpy(p->p_comm, "kernel", sizeof (p->p_comm)); 544 strncpy(td->td_name, "kernel", sizeof (td->td_name)); 545 546 callout_init_mtx(&p->p_itcallout, &p->p_mtx, 0); 547 callout_init_mtx(&p->p_limco, &p->p_mtx, 0); 548 callout_init(&td->td_slpcallout, 1); 549 TAILQ_INIT(&p->p_kqtim_stop); 550 551 /* Create credentials. */ 552 newcred = crget(); 553 newcred->cr_ngroups = 1; /* group 0 */ 554 /* A hack to prevent uifind from tripping over NULL pointers. */ 555 curthread->td_ucred = newcred; 556 tmpuinfo.ui_uid = 1; 557 newcred->cr_uidinfo = newcred->cr_ruidinfo = &tmpuinfo; 558 newcred->cr_uidinfo = uifind(0); 559 newcred->cr_ruidinfo = uifind(0); 560 newcred->cr_loginclass = &tmplc; 561 newcred->cr_loginclass = loginclass_find("default"); 562 /* End hack. creds get properly set later with thread_cow_get_proc */ 563 curthread->td_ucred = NULL; 564 newcred->cr_prison = &prison0; 565 newcred->cr_users++; /* avoid assertion failure */ 566 p->p_ucred = crcowget(newcred); 567 newcred->cr_users--; 568 crfree(newcred); 569 #ifdef AUDIT 570 audit_cred_kproc0(newcred); 571 #endif 572 #ifdef MAC 573 mac_cred_create_kproc0(newcred); 574 #endif 575 /* Create sigacts. */ 576 p->p_sigacts = sigacts_alloc(); 577 578 /* Initialize signal state for process 0. */ 579 siginit(&proc0); 580 581 /* Create the file descriptor table. */ 582 p->p_pd = pdinit(NULL, false); 583 p->p_fd = fdinit(); 584 p->p_fdtol = NULL; 585 586 /* Create the limits structures. */ 587 p->p_limit = lim_alloc(); 588 for (i = 0; i < RLIM_NLIMITS; i++) 589 p->p_limit->pl_rlimit[i].rlim_cur = 590 p->p_limit->pl_rlimit[i].rlim_max = RLIM_INFINITY; 591 p->p_limit->pl_rlimit[RLIMIT_NOFILE].rlim_cur = 592 p->p_limit->pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles; 593 p->p_limit->pl_rlimit[RLIMIT_NPROC].rlim_cur = 594 p->p_limit->pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc; 595 p->p_limit->pl_rlimit[RLIMIT_DATA].rlim_cur = dfldsiz; 596 p->p_limit->pl_rlimit[RLIMIT_DATA].rlim_max = maxdsiz; 597 p->p_limit->pl_rlimit[RLIMIT_STACK].rlim_cur = dflssiz; 598 p->p_limit->pl_rlimit[RLIMIT_STACK].rlim_max = maxssiz; 599 /* Cast to avoid overflow on i386/PAE. */ 600 pageablemem = ptoa((vm_paddr_t)vm_free_count()); 601 p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_cur = 602 p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_max = pageablemem; 603 p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = pageablemem / 3; 604 p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_max = pageablemem; 605 p->p_cpulimit = RLIM_INFINITY; 606 607 PROC_LOCK(p); 608 thread_cow_get_proc(td, p); 609 PROC_UNLOCK(p); 610 611 /* Initialize resource accounting structures. */ 612 racct_create(&p->p_racct); 613 614 p->p_stats = pstats_alloc(); 615 616 /* Allocate a prototype map so we have something to fork. */ 617 p->p_vmspace = &vmspace0; 618 refcount_init(&vmspace0.vm_refcnt, 1); 619 pmap_pinit0(vmspace_pmap(&vmspace0)); 620 621 /* 622 * proc0 is not expected to enter usermode, so there is no special 623 * handling for sv_minuser here, like is done for exec_new_vmspace(). 624 */ 625 vm_map_init(&vmspace0.vm_map, vmspace_pmap(&vmspace0), 626 p->p_sysent->sv_minuser, p->p_sysent->sv_maxuser); 627 628 /* 629 * Call the init and ctor for the new thread and proc. We wait 630 * to do this until all other structures are fairly sane. 631 */ 632 EVENTHANDLER_DIRECT_INVOKE(process_init, p); 633 EVENTHANDLER_DIRECT_INVOKE(thread_init, td); 634 #ifdef KDTRACE_HOOKS 635 kdtrace_proc_ctor(p); 636 kdtrace_thread_ctor(td); 637 #endif 638 EVENTHANDLER_DIRECT_INVOKE(process_ctor, p); 639 EVENTHANDLER_DIRECT_INVOKE(thread_ctor, td); 640 641 /* 642 * Charge root for one process. 643 */ 644 (void)chgproccnt(p->p_ucred->cr_ruidinfo, 1, 0); 645 PROC_LOCK(p); 646 racct_add_force(p, RACCT_NPROC, 1); 647 PROC_UNLOCK(p); 648 } 649 SYSINIT(p0init, SI_SUB_INTRINSIC, SI_ORDER_FIRST, proc0_init, NULL); 650 651 /* ARGSUSED*/ 652 static void 653 proc0_post(void *dummy __unused) 654 { 655 struct proc *p; 656 struct thread *td; 657 658 /* 659 * Now we can look at the time, having had a chance to verify the 660 * time from the filesystem. Pretend that all current threads 661 * started now. 662 */ 663 sx_slock(&allproc_lock); 664 FOREACH_PROC_IN_SYSTEM(p) { 665 PROC_LOCK(p); 666 if (p->p_state == PRS_NEW) { 667 PROC_UNLOCK(p); 668 continue; 669 } 670 microuptime(&p->p_stats->p_start); 671 PROC_STATLOCK(p); 672 ruxreset(&p->p_rux); 673 FOREACH_THREAD_IN_PROC(p, td) { 674 thread_lock(td); 675 td->td_incruntime = 0; 676 td->td_runtime = 0; 677 td->td_pticks = 0; 678 td->td_sticks = 0; 679 td->td_iticks = 0; 680 td->td_uticks = 0; 681 ruxreset(&td->td_rux); 682 thread_unlock(td); 683 } 684 PROC_STATUNLOCK(p); 685 PROC_UNLOCK(p); 686 } 687 sx_sunlock(&allproc_lock); 688 PCPU_SET(switchtime, cpu_ticks()); 689 PCPU_SET(switchticks, ticks); 690 } 691 SYSINIT(p0post, SI_SUB_INTRINSIC_POST, SI_ORDER_FIRST, proc0_post, NULL); 692 693 /* 694 *************************************************************************** 695 **** 696 **** The following SYSINIT's and glue code should be moved to the 697 **** respective files on a per subsystem basis. 698 **** 699 *************************************************************************** 700 */ 701 702 /* 703 * List of paths to try when searching for "init". 704 */ 705 static char init_path[MAXPATHLEN] = 706 #ifdef INIT_PATH 707 __XSTRING(INIT_PATH); 708 #else 709 "/sbin/init:/sbin/oinit:/sbin/init.bak:/rescue/init"; 710 #endif 711 SYSCTL_STRING(_kern, OID_AUTO, init_path, CTLFLAG_RD, init_path, 0, 712 "Path used to search the init process"); 713 714 /* 715 * Shutdown timeout of init(8). 716 * Unused within kernel, but used to control init(8), hence do not remove. 717 */ 718 #ifndef INIT_SHUTDOWN_TIMEOUT 719 #define INIT_SHUTDOWN_TIMEOUT 120 720 #endif 721 static int init_shutdown_timeout = INIT_SHUTDOWN_TIMEOUT; 722 SYSCTL_INT(_kern, OID_AUTO, init_shutdown_timeout, 723 CTLFLAG_RW, &init_shutdown_timeout, 0, "Shutdown timeout of init(8). " 724 "Unused within kernel, but used to control init(8)"); 725 726 /* 727 * Start the initial user process; try exec'ing each pathname in init_path. 728 * The program is invoked with one argument containing the boot flags. 729 */ 730 static void 731 start_init(void *dummy) 732 { 733 struct image_args args; 734 int error; 735 char *var, *path; 736 char *free_init_path, *tmp_init_path; 737 struct thread *td; 738 struct proc *p; 739 struct vmspace *oldvmspace; 740 741 TSENTER(); /* Here so we don't overlap with mi_startup. */ 742 743 td = curthread; 744 p = td->td_proc; 745 746 vfs_mountroot(); 747 748 /* Wipe GELI passphrase from the environment. */ 749 kern_unsetenv("kern.geom.eli.passphrase"); 750 751 /* For Multicons, report which console is primary to both */ 752 if (boothowto & RB_MULTIPLE) { 753 if (boothowto & RB_SERIAL) 754 printf("Dual Console: Serial Primary, Video Secondary\n"); 755 else 756 printf("Dual Console: Video Primary, Serial Secondary\n"); 757 } 758 759 if ((var = kern_getenv("init_path")) != NULL) { 760 strlcpy(init_path, var, sizeof(init_path)); 761 freeenv(var); 762 } 763 free_init_path = tmp_init_path = strdup(init_path, M_TEMP); 764 765 while ((path = strsep(&tmp_init_path, ":")) != NULL) { 766 if (bootverbose) 767 printf("start_init: trying %s\n", path); 768 769 memset(&args, 0, sizeof(args)); 770 error = exec_alloc_args(&args); 771 if (error != 0) 772 panic("%s: Can't allocate space for init arguments %d", 773 __func__, error); 774 775 error = exec_args_add_fname(&args, path, UIO_SYSSPACE); 776 if (error != 0) 777 panic("%s: Can't add fname %d", __func__, error); 778 error = exec_args_add_arg(&args, path, UIO_SYSSPACE); 779 if (error != 0) 780 panic("%s: Can't add argv[0] %d", __func__, error); 781 if (boothowto & RB_SINGLE) 782 error = exec_args_add_arg(&args, "-s", UIO_SYSSPACE); 783 if (error != 0) 784 panic("%s: Can't add argv[0] %d", __func__, error); 785 786 /* 787 * Now try to exec the program. If can't for any reason 788 * other than it doesn't exist, complain. 789 * 790 * Otherwise, return via fork_trampoline() all the way 791 * to user mode as init! 792 */ 793 KASSERT((td->td_pflags & TDP_EXECVMSPC) == 0, 794 ("nested execve")); 795 memset(td->td_frame, 0, sizeof(*td->td_frame)); 796 oldvmspace = p->p_vmspace; 797 error = kern_execve(td, &args, NULL, oldvmspace); 798 KASSERT(error != 0, 799 ("kern_execve returned success, not EJUSTRETURN")); 800 if (error == EJUSTRETURN) { 801 exec_cleanup(td, oldvmspace); 802 free(free_init_path, M_TEMP); 803 TSEXIT(); 804 return; 805 } 806 if (error != ENOENT) 807 printf("exec %s: error %d\n", path, error); 808 } 809 free(free_init_path, M_TEMP); 810 printf("init: not found in path %s\n", init_path); 811 panic("no init"); 812 } 813 814 /* 815 * Like kproc_create(), but runs in its own address space. We do this 816 * early to reserve pid 1. Note special case - do not make it 817 * runnable yet, init execution is started when userspace can be served. 818 */ 819 static void 820 create_init(const void *udata __unused) 821 { 822 struct fork_req fr; 823 struct ucred *newcred, *oldcred; 824 struct thread *td; 825 int error; 826 827 bzero(&fr, sizeof(fr)); 828 fr.fr_flags = RFFDG | RFPROC | RFSTOPPED; 829 fr.fr_procp = &initproc; 830 error = fork1(&thread0, &fr); 831 if (error) 832 panic("cannot fork init: %d\n", error); 833 KASSERT(initproc->p_pid == 1, ("create_init: initproc->p_pid != 1")); 834 /* divorce init's credentials from the kernel's */ 835 newcred = crget(); 836 sx_xlock(&proctree_lock); 837 PROC_LOCK(initproc); 838 initproc->p_flag |= P_SYSTEM | P_INMEM; 839 initproc->p_treeflag |= P_TREE_REAPER; 840 oldcred = initproc->p_ucred; 841 crcopy(newcred, oldcred); 842 #ifdef MAC 843 mac_cred_create_init(newcred); 844 #endif 845 #ifdef AUDIT 846 audit_cred_proc1(newcred); 847 #endif 848 proc_set_cred(initproc, newcred); 849 td = FIRST_THREAD_IN_PROC(initproc); 850 crcowfree(td); 851 td->td_realucred = crcowget(initproc->p_ucred); 852 td->td_ucred = td->td_realucred; 853 PROC_UNLOCK(initproc); 854 sx_xunlock(&proctree_lock); 855 crfree(oldcred); 856 cpu_fork_kthread_handler(FIRST_THREAD_IN_PROC(initproc), 857 start_init, NULL); 858 } 859 SYSINIT(init, SI_SUB_CREATE_INIT, SI_ORDER_FIRST, create_init, NULL); 860 861 /* 862 * Make it runnable now. 863 */ 864 static void 865 kick_init(const void *udata __unused) 866 { 867 struct thread *td; 868 869 td = FIRST_THREAD_IN_PROC(initproc); 870 thread_lock(td); 871 TD_SET_CAN_RUN(td); 872 sched_add(td, SRQ_BORING); 873 } 874 SYSINIT(kickinit, SI_SUB_KTHREAD_INIT, SI_ORDER_MIDDLE, kick_init, NULL); 875 876 /* 877 * DDB(4). 878 */ 879 #ifdef DDB 880 static void 881 db_show_print_syinit(struct sysinit *sip, bool ddb) 882 { 883 const char *sname, *funcname; 884 c_db_sym_t sym; 885 db_expr_t offset; 886 887 #define xprint(...) \ 888 if (ddb) \ 889 db_printf(__VA_ARGS__); \ 890 else \ 891 printf(__VA_ARGS__) 892 893 if (sip == NULL) { 894 xprint("%s: no sysinit * given\n", __func__); 895 return; 896 } 897 898 sym = db_search_symbol((vm_offset_t)sip, DB_STGY_ANY, &offset); 899 db_symbol_values(sym, &sname, NULL); 900 sym = db_search_symbol((vm_offset_t)sip->func, DB_STGY_PROC, &offset); 901 db_symbol_values(sym, &funcname, NULL); 902 xprint("%s(%p)\n", (sname != NULL) ? sname : "", sip); 903 xprint(" %#08x %#08x\n", sip->subsystem, sip->order); 904 xprint(" %p(%s)(%p)\n", 905 sip->func, (funcname != NULL) ? funcname : "", sip->udata); 906 #undef xprint 907 } 908 909 DB_SHOW_COMMAND_FLAGS(sysinit, db_show_sysinit, DB_CMD_MEMSAFE) 910 { 911 struct sysinit *sip; 912 913 db_printf("SYSINIT vs Name(Ptr)\n"); 914 db_printf(" Subsystem Order\n"); 915 db_printf(" Function(Name)(Arg)\n"); 916 STAILQ_FOREACH(sip, &sysinit_done_list, next) { 917 db_show_print_syinit(sip, true); 918 if (db_pager_quit) 919 return; 920 } 921 STAILQ_FOREACH(sip, &sysinit_list, next) { 922 db_show_print_syinit(sip, true); 923 if (db_pager_quit) 924 break; 925 } 926 } 927 #endif /* DDB */ 928