xref: /freebsd/sys/kern/init_main.c (revision f856af0466c076beef4ea9b15d088e1119a945b8)
1 /*-
2  * Copyright (c) 1995 Terrence R. Lambert
3  * All rights reserved.
4  *
5  * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. All advertising materials mentioning features or use of this software
22  *    must display the following acknowledgement:
23  *	This product includes software developed by the University of
24  *	California, Berkeley and its contributors.
25  * 4. Neither the name of the University nor the names of its contributors
26  *    may be used to endorse or promote products derived from this software
27  *    without specific prior written permission.
28  *
29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39  * SUCH DAMAGE.
40  *
41  *	@(#)init_main.c	8.9 (Berkeley) 1/21/94
42  */
43 
44 #include <sys/cdefs.h>
45 __FBSDID("$FreeBSD$");
46 
47 #include "opt_ddb.h"
48 #include "opt_init_path.h"
49 #include "opt_mac.h"
50 
51 #include <sys/param.h>
52 #include <sys/kernel.h>
53 #include <sys/exec.h>
54 #include <sys/file.h>
55 #include <sys/filedesc.h>
56 #include <sys/ktr.h>
57 #include <sys/lock.h>
58 #include <sys/mount.h>
59 #include <sys/mutex.h>
60 #include <sys/syscallsubr.h>
61 #include <sys/sysctl.h>
62 #include <sys/proc.h>
63 #include <sys/resourcevar.h>
64 #include <sys/systm.h>
65 #include <sys/signalvar.h>
66 #include <sys/vnode.h>
67 #include <sys/sysent.h>
68 #include <sys/reboot.h>
69 #include <sys/sched.h>
70 #include <sys/sx.h>
71 #include <sys/sysproto.h>
72 #include <sys/vmmeter.h>
73 #include <sys/unistd.h>
74 #include <sys/malloc.h>
75 #include <sys/conf.h>
76 
77 #include <machine/cpu.h>
78 
79 #include <security/audit/audit.h>
80 #include <security/mac/mac_framework.h>
81 
82 #include <vm/vm.h>
83 #include <vm/vm_param.h>
84 #include <vm/pmap.h>
85 #include <vm/vm_map.h>
86 #include <sys/copyright.h>
87 
88 #include <ddb/ddb.h>
89 #include <ddb/db_sym.h>
90 
91 void mi_startup(void);				/* Should be elsewhere */
92 
93 /* Components of the first process -- never freed. */
94 static struct session session0;
95 static struct pgrp pgrp0;
96 struct	proc proc0;
97 struct	thread thread0 __aligned(8);
98 struct	vmspace vmspace0;
99 struct	proc *initproc;
100 
101 int	boothowto = 0;		/* initialized so that it can be patched */
102 SYSCTL_INT(_debug, OID_AUTO, boothowto, CTLFLAG_RD, &boothowto, 0, "");
103 int	bootverbose;
104 SYSCTL_INT(_debug, OID_AUTO, bootverbose, CTLFLAG_RW, &bootverbose, 0, "");
105 
106 /*
107  * This ensures that there is at least one entry so that the sysinit_set
108  * symbol is not undefined.  A sybsystem ID of SI_SUB_DUMMY is never
109  * executed.
110  */
111 SYSINIT(placeholder, SI_SUB_DUMMY, SI_ORDER_ANY, NULL, NULL)
112 
113 /*
114  * The sysinit table itself.  Items are checked off as the are run.
115  * If we want to register new sysinit types, add them to newsysinit.
116  */
117 SET_DECLARE(sysinit_set, struct sysinit);
118 struct sysinit **sysinit, **sysinit_end;
119 struct sysinit **newsysinit, **newsysinit_end;
120 
121 /*
122  * Merge a new sysinit set into the current set, reallocating it if
123  * necessary.  This can only be called after malloc is running.
124  */
125 void
126 sysinit_add(struct sysinit **set, struct sysinit **set_end)
127 {
128 	struct sysinit **newset;
129 	struct sysinit **sipp;
130 	struct sysinit **xipp;
131 	int count;
132 
133 	count = set_end - set;
134 	if (newsysinit)
135 		count += newsysinit_end - newsysinit;
136 	else
137 		count += sysinit_end - sysinit;
138 	newset = malloc(count * sizeof(*sipp), M_TEMP, M_NOWAIT);
139 	if (newset == NULL)
140 		panic("cannot malloc for sysinit");
141 	xipp = newset;
142 	if (newsysinit)
143 		for (sipp = newsysinit; sipp < newsysinit_end; sipp++)
144 			*xipp++ = *sipp;
145 	else
146 		for (sipp = sysinit; sipp < sysinit_end; sipp++)
147 			*xipp++ = *sipp;
148 	for (sipp = set; sipp < set_end; sipp++)
149 		*xipp++ = *sipp;
150 	if (newsysinit)
151 		free(newsysinit, M_TEMP);
152 	newsysinit = newset;
153 	newsysinit_end = newset + count;
154 }
155 
156 /*
157  * System startup; initialize the world, create process 0, mount root
158  * filesystem, and fork to create init and pagedaemon.  Most of the
159  * hard work is done in the lower-level initialization routines including
160  * startup(), which does memory initialization and autoconfiguration.
161  *
162  * This allows simple addition of new kernel subsystems that require
163  * boot time initialization.  It also allows substitution of subsystem
164  * (for instance, a scheduler, kernel profiler, or VM system) by object
165  * module.  Finally, it allows for optional "kernel threads".
166  */
167 void
168 mi_startup(void)
169 {
170 
171 	register struct sysinit **sipp;		/* system initialization*/
172 	register struct sysinit **xipp;		/* interior loop of sort*/
173 	register struct sysinit *save;		/* bubble*/
174 
175 #if defined(VERBOSE_SYSINIT)
176 	int last;
177 	int verbose;
178 #endif
179 
180 	if (sysinit == NULL) {
181 		sysinit = SET_BEGIN(sysinit_set);
182 		sysinit_end = SET_LIMIT(sysinit_set);
183 	}
184 
185 restart:
186 	/*
187 	 * Perform a bubble sort of the system initialization objects by
188 	 * their subsystem (primary key) and order (secondary key).
189 	 */
190 	for (sipp = sysinit; sipp < sysinit_end; sipp++) {
191 		for (xipp = sipp + 1; xipp < sysinit_end; xipp++) {
192 			if ((*sipp)->subsystem < (*xipp)->subsystem ||
193 			     ((*sipp)->subsystem == (*xipp)->subsystem &&
194 			      (*sipp)->order <= (*xipp)->order))
195 				continue;	/* skip*/
196 			save = *sipp;
197 			*sipp = *xipp;
198 			*xipp = save;
199 		}
200 	}
201 
202 #if defined(VERBOSE_SYSINIT)
203 	last = SI_SUB_COPYRIGHT;
204 	verbose = 0;
205 #if !defined(DDB)
206 	printf("VERBOSE_SYSINIT: DDB not enabled, symbol lookups disabled.\n");
207 #endif
208 #endif
209 
210 	/*
211 	 * Traverse the (now) ordered list of system initialization tasks.
212 	 * Perform each task, and continue on to the next task.
213 	 *
214 	 * The last item on the list is expected to be the scheduler,
215 	 * which will not return.
216 	 */
217 	for (sipp = sysinit; sipp < sysinit_end; sipp++) {
218 
219 		if ((*sipp)->subsystem == SI_SUB_DUMMY)
220 			continue;	/* skip dummy task(s)*/
221 
222 		if ((*sipp)->subsystem == SI_SUB_DONE)
223 			continue;
224 
225 #if defined(VERBOSE_SYSINIT)
226 		if ((*sipp)->subsystem > last) {
227 			verbose = 1;
228 			last = (*sipp)->subsystem;
229 			printf("subsystem %x\n", last);
230 		}
231 		if (verbose) {
232 #if defined(DDB)
233 			const char *name;
234 			c_db_sym_t sym;
235 			db_expr_t  offset;
236 
237 			sym = db_search_symbol((vm_offset_t)(*sipp)->func,
238 			    DB_STGY_PROC, &offset);
239 			db_symbol_values(sym, &name, NULL);
240 			if (name != NULL)
241 				printf("   %s(%p)... ", name, (*sipp)->udata);
242 			else
243 #endif
244 				printf("   %p(%p)... ", (*sipp)->func,
245 				    (*sipp)->udata);
246 		}
247 #endif
248 
249 		/* Call function */
250 		(*((*sipp)->func))((*sipp)->udata);
251 
252 #if defined(VERBOSE_SYSINIT)
253 		if (verbose)
254 			printf("done.\n");
255 #endif
256 
257 		/* Check off the one we're just done */
258 		(*sipp)->subsystem = SI_SUB_DONE;
259 
260 		/* Check if we've installed more sysinit items via KLD */
261 		if (newsysinit != NULL) {
262 			if (sysinit != SET_BEGIN(sysinit_set))
263 				free(sysinit, M_TEMP);
264 			sysinit = newsysinit;
265 			sysinit_end = newsysinit_end;
266 			newsysinit = NULL;
267 			newsysinit_end = NULL;
268 			goto restart;
269 		}
270 	}
271 
272 	panic("Shouldn't get here!");
273 	/* NOTREACHED*/
274 }
275 
276 
277 /*
278  ***************************************************************************
279  ****
280  **** The following SYSINIT's belong elsewhere, but have not yet
281  **** been moved.
282  ****
283  ***************************************************************************
284  */
285 static void
286 print_caddr_t(void *data __unused)
287 {
288 	printf("%s", (char *)data);
289 }
290 SYSINIT(announce, SI_SUB_COPYRIGHT, SI_ORDER_FIRST, print_caddr_t, copyright)
291 SYSINIT(trademark, SI_SUB_COPYRIGHT, SI_ORDER_SECOND, print_caddr_t, trademark)
292 SYSINIT(version, SI_SUB_COPYRIGHT, SI_ORDER_THIRD, print_caddr_t, version)
293 
294 #ifdef WITNESS
295 static char wit_warn[] =
296      "WARNING: WITNESS option enabled, expect reduced performance.\n";
297 SYSINIT(witwarn, SI_SUB_COPYRIGHT, SI_ORDER_THIRD + 1,
298    print_caddr_t, wit_warn)
299 #endif
300 
301 #ifdef DIAGNOSTIC
302 static char diag_warn[] =
303      "WARNING: DIAGNOSTIC option enabled, expect reduced performance.\n";
304 SYSINIT(diagwarn, SI_SUB_COPYRIGHT, SI_ORDER_THIRD + 2,
305     print_caddr_t, diag_warn)
306 #endif
307 
308 static void
309 set_boot_verbose(void *data __unused)
310 {
311 
312 	if (boothowto & RB_VERBOSE)
313 		bootverbose++;
314 }
315 SYSINIT(boot_verbose, SI_SUB_TUNABLES, SI_ORDER_ANY, set_boot_verbose, NULL)
316 
317 struct sysentvec null_sysvec = {
318 	0,
319 	NULL,
320 	0,
321 	0,
322 	NULL,
323 	0,
324 	NULL,
325 	NULL,
326 	NULL,
327 	NULL,
328 	NULL,
329 	NULL,
330 	NULL,
331 	"null",
332 	NULL,
333 	NULL,
334 	0,
335 	PAGE_SIZE,
336 	VM_MIN_ADDRESS,
337 	VM_MAXUSER_ADDRESS,
338 	USRSTACK,
339 	PS_STRINGS,
340 	VM_PROT_ALL,
341 	NULL,
342 	NULL,
343 	NULL
344 };
345 
346 /*
347  ***************************************************************************
348  ****
349  **** The two following SYSINIT's are proc0 specific glue code.  I am not
350  **** convinced that they can not be safely combined, but their order of
351  **** operation has been maintained as the same as the original init_main.c
352  **** for right now.
353  ****
354  **** These probably belong in init_proc.c or kern_proc.c, since they
355  **** deal with proc0 (the fork template process).
356  ****
357  ***************************************************************************
358  */
359 /* ARGSUSED*/
360 static void
361 proc0_init(void *dummy __unused)
362 {
363 	struct proc *p;
364 	unsigned i;
365 	struct thread *td;
366 
367 	GIANT_REQUIRED;
368 	p = &proc0;
369 	td = &thread0;
370 
371 	/*
372 	 * Initialize magic number.
373 	 */
374 	p->p_magic = P_MAGIC;
375 
376 	/*
377 	 * Initialize thread and process structures.
378 	 */
379 	procinit();	/* set up proc zone */
380 	threadinit();	/* set up UMA zones */
381 
382 	/*
383 	 * Initialise scheduler resources.
384 	 * Add scheduler specific parts to proc, thread as needed.
385 	 */
386 	schedinit();	/* scheduler gets its house in order */
387 	/*
388 	 * Initialize sleep queue hash table
389 	 */
390 	sleepinit();
391 
392 	/*
393 	 * additional VM structures
394 	 */
395 	vm_init2();
396 
397 	/*
398 	 * Create process 0 (the swapper).
399 	 */
400 	LIST_INSERT_HEAD(&allproc, p, p_list);
401 	LIST_INSERT_HEAD(PIDHASH(0), p, p_hash);
402 	mtx_init(&pgrp0.pg_mtx, "process group", NULL, MTX_DEF | MTX_DUPOK);
403 	p->p_pgrp = &pgrp0;
404 	LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash);
405 	LIST_INIT(&pgrp0.pg_members);
406 	LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist);
407 
408 	pgrp0.pg_session = &session0;
409 	mtx_init(&session0.s_mtx, "session", NULL, MTX_DEF);
410 	session0.s_count = 1;
411 	session0.s_leader = p;
412 
413 	p->p_sysent = &null_sysvec;
414 	p->p_flag = P_SYSTEM;
415 	p->p_sflag = PS_INMEM;
416 	p->p_state = PRS_NORMAL;
417 	knlist_init(&p->p_klist, &p->p_mtx, NULL, NULL, NULL);
418 	STAILQ_INIT(&p->p_ktr);
419 	p->p_nice = NZERO;
420 	td->td_state = TDS_RUNNING;
421 	td->td_pri_class = PRI_TIMESHARE;
422 	td->td_user_pri = PUSER;
423 	td->td_base_user_pri = PUSER;
424 	td->td_priority = PVM;
425 	td->td_base_pri = PUSER;
426 	td->td_oncpu = 0;
427 	p->p_peers = 0;
428 	p->p_leader = p;
429 
430 
431 	bcopy("swapper", p->p_comm, sizeof ("swapper"));
432 
433 	callout_init(&p->p_itcallout, CALLOUT_MPSAFE);
434 	callout_init(&td->td_slpcallout, CALLOUT_MPSAFE);
435 
436 	/* Create credentials. */
437 	p->p_ucred = crget();
438 	p->p_ucred->cr_ngroups = 1;	/* group 0 */
439 	p->p_ucred->cr_uidinfo = uifind(0);
440 	p->p_ucred->cr_ruidinfo = uifind(0);
441 	p->p_ucred->cr_prison = NULL;	/* Don't jail it. */
442 #ifdef AUDIT
443 	audit_proc_alloc(p);
444 	audit_proc_kproc0(p);
445 #endif
446 #ifdef MAC
447 	mac_create_proc0(p->p_ucred);
448 #endif
449 	td->td_ucred = crhold(p->p_ucred);
450 
451 	/* Create sigacts. */
452 	p->p_sigacts = sigacts_alloc();
453 
454 	/* Initialize signal state for process 0. */
455 	siginit(&proc0);
456 
457 	/* Create the file descriptor table. */
458 	p->p_fd = fdinit(NULL);
459 	p->p_fdtol = NULL;
460 
461 	/* Create the limits structures. */
462 	p->p_limit = lim_alloc();
463 	for (i = 0; i < RLIM_NLIMITS; i++)
464 		p->p_limit->pl_rlimit[i].rlim_cur =
465 		    p->p_limit->pl_rlimit[i].rlim_max = RLIM_INFINITY;
466 	p->p_limit->pl_rlimit[RLIMIT_NOFILE].rlim_cur =
467 	    p->p_limit->pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles;
468 	p->p_limit->pl_rlimit[RLIMIT_NPROC].rlim_cur =
469 	    p->p_limit->pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc;
470 	i = ptoa(cnt.v_free_count);
471 	p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_max = i;
472 	p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_max = i;
473 	p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = i / 3;
474 	p->p_cpulimit = RLIM_INFINITY;
475 
476 	p->p_stats = pstats_alloc();
477 
478 	/* Allocate a prototype map so we have something to fork. */
479 	pmap_pinit0(vmspace_pmap(&vmspace0));
480 	p->p_vmspace = &vmspace0;
481 	vmspace0.vm_refcnt = 1;
482 	vm_map_init(&vmspace0.vm_map, p->p_sysent->sv_minuser,
483 	    p->p_sysent->sv_maxuser);
484 	vmspace0.vm_map.pmap = vmspace_pmap(&vmspace0);
485 
486 	/*
487 	 * Charge root for one process.
488 	 */
489 	(void)chgproccnt(p->p_ucred->cr_ruidinfo, 1, 0);
490 }
491 SYSINIT(p0init, SI_SUB_INTRINSIC, SI_ORDER_FIRST, proc0_init, NULL)
492 
493 /* ARGSUSED*/
494 static void
495 proc0_post(void *dummy __unused)
496 {
497 	struct timespec ts;
498 	struct proc *p;
499 
500 	/*
501 	 * Now we can look at the time, having had a chance to verify the
502 	 * time from the filesystem.  Pretend that proc0 started now.
503 	 */
504 	sx_slock(&allproc_lock);
505 	LIST_FOREACH(p, &allproc, p_list) {
506 		microuptime(&p->p_stats->p_start);
507 		p->p_rux.rux_runtime = 0;
508 	}
509 	sx_sunlock(&allproc_lock);
510 	PCPU_SET(switchtime, cpu_ticks());
511 	PCPU_SET(switchticks, ticks);
512 
513 	/*
514 	 * Give the ``random'' number generator a thump.
515 	 */
516 	nanotime(&ts);
517 	srandom(ts.tv_sec ^ ts.tv_nsec);
518 }
519 SYSINIT(p0post, SI_SUB_INTRINSIC_POST, SI_ORDER_FIRST, proc0_post, NULL)
520 
521 /*
522  ***************************************************************************
523  ****
524  **** The following SYSINIT's and glue code should be moved to the
525  **** respective files on a per subsystem basis.
526  ****
527  ***************************************************************************
528  */
529 
530 
531 /*
532  ***************************************************************************
533  ****
534  **** The following code probably belongs in another file, like
535  **** kern/init_init.c.
536  ****
537  ***************************************************************************
538  */
539 
540 /*
541  * List of paths to try when searching for "init".
542  */
543 static char init_path[MAXPATHLEN] =
544 #ifdef	INIT_PATH
545     __XSTRING(INIT_PATH);
546 #else
547     "/sbin/init:/sbin/oinit:/sbin/init.bak:/rescue/init:/stand/sysinstall";
548 #endif
549 SYSCTL_STRING(_kern, OID_AUTO, init_path, CTLFLAG_RD, init_path, 0,
550 	"Path used to search the init process");
551 
552 /*
553  * Shutdown timeout of init(8).
554  * Unused within kernel, but used to control init(8), hence do not remove.
555  */
556 #ifndef INIT_SHUTDOWN_TIMEOUT
557 #define INIT_SHUTDOWN_TIMEOUT 120
558 #endif
559 static int init_shutdown_timeout = INIT_SHUTDOWN_TIMEOUT;
560 SYSCTL_INT(_kern, OID_AUTO, init_shutdown_timeout,
561 	CTLFLAG_RW, &init_shutdown_timeout, 0, "");
562 
563 /*
564  * Start the initial user process; try exec'ing each pathname in init_path.
565  * The program is invoked with one argument containing the boot flags.
566  */
567 static void
568 start_init(void *dummy)
569 {
570 	vm_offset_t addr;
571 	struct execve_args args;
572 	int options, error;
573 	char *var, *path, *next, *s;
574 	char *ucp, **uap, *arg0, *arg1;
575 	struct thread *td;
576 	struct proc *p;
577 
578 	mtx_lock(&Giant);
579 
580 	GIANT_REQUIRED;
581 
582 	td = curthread;
583 	p = td->td_proc;
584 
585 	vfs_mountroot();
586 
587 	/*
588 	 * Need just enough stack to hold the faked-up "execve()" arguments.
589 	 */
590 	addr = p->p_sysent->sv_usrstack - PAGE_SIZE;
591 	if (vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &addr, PAGE_SIZE,
592 			FALSE, VM_PROT_ALL, VM_PROT_ALL, 0) != 0)
593 		panic("init: couldn't allocate argument space");
594 	p->p_vmspace->vm_maxsaddr = (caddr_t)addr;
595 	p->p_vmspace->vm_ssize = 1;
596 
597 	if ((var = getenv("init_path")) != NULL) {
598 		strlcpy(init_path, var, sizeof(init_path));
599 		freeenv(var);
600 	}
601 
602 	for (path = init_path; *path != '\0'; path = next) {
603 		while (*path == ':')
604 			path++;
605 		if (*path == '\0')
606 			break;
607 		for (next = path; *next != '\0' && *next != ':'; next++)
608 			/* nothing */ ;
609 		if (bootverbose)
610 			printf("start_init: trying %.*s\n", (int)(next - path),
611 			    path);
612 
613 		/*
614 		 * Move out the boot flag argument.
615 		 */
616 		options = 0;
617 		ucp = (char *)p->p_sysent->sv_usrstack;
618 		(void)subyte(--ucp, 0);		/* trailing zero */
619 		if (boothowto & RB_SINGLE) {
620 			(void)subyte(--ucp, 's');
621 			options = 1;
622 		}
623 #ifdef notyet
624                 if (boothowto & RB_FASTBOOT) {
625 			(void)subyte(--ucp, 'f');
626 			options = 1;
627 		}
628 #endif
629 
630 #ifdef BOOTCDROM
631 		(void)subyte(--ucp, 'C');
632 		options = 1;
633 #endif
634 
635 		if (options == 0)
636 			(void)subyte(--ucp, '-');
637 		(void)subyte(--ucp, '-');		/* leading hyphen */
638 		arg1 = ucp;
639 
640 		/*
641 		 * Move out the file name (also arg 0).
642 		 */
643 		(void)subyte(--ucp, 0);
644 		for (s = next - 1; s >= path; s--)
645 			(void)subyte(--ucp, *s);
646 		arg0 = ucp;
647 
648 		/*
649 		 * Move out the arg pointers.
650 		 */
651 		uap = (char **)((intptr_t)ucp & ~(sizeof(intptr_t)-1));
652 		(void)suword((caddr_t)--uap, (long)0);	/* terminator */
653 		(void)suword((caddr_t)--uap, (long)(intptr_t)arg1);
654 		(void)suword((caddr_t)--uap, (long)(intptr_t)arg0);
655 
656 		/*
657 		 * Point at the arguments.
658 		 */
659 		args.fname = arg0;
660 		args.argv = uap;
661 		args.envv = NULL;
662 
663 		/*
664 		 * Now try to exec the program.  If can't for any reason
665 		 * other than it doesn't exist, complain.
666 		 *
667 		 * Otherwise, return via fork_trampoline() all the way
668 		 * to user mode as init!
669 		 */
670 		if ((error = execve(td, &args)) == 0) {
671 			mtx_unlock(&Giant);
672 			return;
673 		}
674 		if (error != ENOENT)
675 			printf("exec %.*s: error %d\n", (int)(next - path),
676 			    path, error);
677 	}
678 	printf("init: not found in path %s\n", init_path);
679 	panic("no init");
680 }
681 
682 /*
683  * Like kthread_create(), but runs in it's own address space.
684  * We do this early to reserve pid 1.
685  *
686  * Note special case - do not make it runnable yet.  Other work
687  * in progress will change this more.
688  */
689 static void
690 create_init(const void *udata __unused)
691 {
692 	struct ucred *newcred, *oldcred;
693 	int error;
694 
695 	error = fork1(&thread0, RFFDG | RFPROC | RFSTOPPED, 0, &initproc);
696 	if (error)
697 		panic("cannot fork init: %d\n", error);
698 	KASSERT(initproc->p_pid == 1, ("create_init: initproc->p_pid != 1"));
699 	/* divorce init's credentials from the kernel's */
700 	newcred = crget();
701 	PROC_LOCK(initproc);
702 	initproc->p_flag |= P_SYSTEM;
703 	oldcred = initproc->p_ucred;
704 	crcopy(newcred, oldcred);
705 #ifdef MAC
706 	mac_create_proc1(newcred);
707 #endif
708 #ifdef AUDIT
709 	audit_proc_init(initproc);
710 #endif
711 	initproc->p_ucred = newcred;
712 	PROC_UNLOCK(initproc);
713 	crfree(oldcred);
714 	cred_update_thread(FIRST_THREAD_IN_PROC(initproc));
715 	mtx_lock_spin(&sched_lock);
716 	initproc->p_sflag |= PS_INMEM;
717 	mtx_unlock_spin(&sched_lock);
718 	cpu_set_fork_handler(FIRST_THREAD_IN_PROC(initproc), start_init, NULL);
719 }
720 SYSINIT(init, SI_SUB_CREATE_INIT, SI_ORDER_FIRST, create_init, NULL)
721 
722 /*
723  * Make it runnable now.
724  */
725 static void
726 kick_init(const void *udata __unused)
727 {
728 	struct thread *td;
729 
730 	td = FIRST_THREAD_IN_PROC(initproc);
731 	mtx_lock_spin(&sched_lock);
732 	TD_SET_CAN_RUN(td);
733 	setrunqueue(td, SRQ_BORING);
734 	mtx_unlock_spin(&sched_lock);
735 }
736 SYSINIT(kickinit, SI_SUB_KTHREAD_INIT, SI_ORDER_FIRST, kick_init, NULL)
737