xref: /freebsd/sys/kern/init_main.c (revision f0adf7f5cdd241db2f2c817683191a6ef64a4e95)
1 /*
2  * Copyright (c) 1995 Terrence R. Lambert
3  * All rights reserved.
4  *
5  * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. All advertising materials mentioning features or use of this software
22  *    must display the following acknowledgement:
23  *	This product includes software developed by the University of
24  *	California, Berkeley and its contributors.
25  * 4. Neither the name of the University nor the names of its contributors
26  *    may be used to endorse or promote products derived from this software
27  *    without specific prior written permission.
28  *
29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39  * SUCH DAMAGE.
40  *
41  *	@(#)init_main.c	8.9 (Berkeley) 1/21/94
42  */
43 
44 #include <sys/cdefs.h>
45 __FBSDID("$FreeBSD$");
46 
47 #include "opt_init_path.h"
48 #include "opt_mac.h"
49 
50 #include <sys/param.h>
51 #include <sys/kernel.h>
52 #include <sys/exec.h>
53 #include <sys/file.h>
54 #include <sys/filedesc.h>
55 #include <sys/ktr.h>
56 #include <sys/lock.h>
57 #include <sys/mac.h>
58 #include <sys/mount.h>
59 #include <sys/mutex.h>
60 #include <sys/syscallsubr.h>
61 #include <sys/sysctl.h>
62 #include <sys/proc.h>
63 #include <sys/resourcevar.h>
64 #include <sys/systm.h>
65 #include <sys/signalvar.h>
66 #include <sys/vnode.h>
67 #include <sys/sysent.h>
68 #include <sys/reboot.h>
69 #include <sys/sched.h>
70 #include <sys/sx.h>
71 #include <sys/sysproto.h>
72 #include <sys/vmmeter.h>
73 #include <sys/unistd.h>
74 #include <sys/malloc.h>
75 #include <sys/conf.h>
76 
77 #include <machine/cpu.h>
78 
79 #include <vm/vm.h>
80 #include <vm/vm_param.h>
81 #include <vm/pmap.h>
82 #include <vm/vm_map.h>
83 #include <sys/user.h>
84 #include <sys/copyright.h>
85 
86 void mi_startup(void);				/* Should be elsewhere */
87 
88 /* Components of the first process -- never freed. */
89 static struct session session0;
90 static struct pgrp pgrp0;
91 struct	proc proc0;
92 struct	thread thread0;
93 struct	kse kse0;
94 struct	ksegrp ksegrp0;
95 static struct filedesc0 filedesc0;
96 struct	vmspace vmspace0;
97 struct	proc *initproc;
98 
99 struct	vnode *rootvp;
100 int	boothowto = 0;		/* initialized so that it can be patched */
101 SYSCTL_INT(_debug, OID_AUTO, boothowto, CTLFLAG_RD, &boothowto, 0, "");
102 int	bootverbose;
103 SYSCTL_INT(_debug, OID_AUTO, bootverbose, CTLFLAG_RW, &bootverbose, 0, "");
104 
105 /*
106  * This ensures that there is at least one entry so that the sysinit_set
107  * symbol is not undefined.  A sybsystem ID of SI_SUB_DUMMY is never
108  * executed.
109  */
110 SYSINIT(placeholder, SI_SUB_DUMMY, SI_ORDER_ANY, NULL, NULL)
111 
112 /*
113  * The sysinit table itself.  Items are checked off as the are run.
114  * If we want to register new sysinit types, add them to newsysinit.
115  */
116 SET_DECLARE(sysinit_set, struct sysinit);
117 struct sysinit **sysinit, **sysinit_end;
118 struct sysinit **newsysinit, **newsysinit_end;
119 
120 /*
121  * Merge a new sysinit set into the current set, reallocating it if
122  * necessary.  This can only be called after malloc is running.
123  */
124 void
125 sysinit_add(struct sysinit **set, struct sysinit **set_end)
126 {
127 	struct sysinit **newset;
128 	struct sysinit **sipp;
129 	struct sysinit **xipp;
130 	int count;
131 
132 	count = set_end - set;
133 	if (newsysinit)
134 		count += newsysinit_end - newsysinit;
135 	else
136 		count += sysinit_end - sysinit;
137 	newset = malloc(count * sizeof(*sipp), M_TEMP, M_NOWAIT);
138 	if (newset == NULL)
139 		panic("cannot malloc for sysinit");
140 	xipp = newset;
141 	if (newsysinit)
142 		for (sipp = newsysinit; sipp < newsysinit_end; sipp++)
143 			*xipp++ = *sipp;
144 	else
145 		for (sipp = sysinit; sipp < sysinit_end; sipp++)
146 			*xipp++ = *sipp;
147 	for (sipp = set; sipp < set_end; sipp++)
148 		*xipp++ = *sipp;
149 	if (newsysinit)
150 		free(newsysinit, M_TEMP);
151 	newsysinit = newset;
152 	newsysinit_end = newset + count;
153 }
154 
155 /*
156  * System startup; initialize the world, create process 0, mount root
157  * filesystem, and fork to create init and pagedaemon.  Most of the
158  * hard work is done in the lower-level initialization routines including
159  * startup(), which does memory initialization and autoconfiguration.
160  *
161  * This allows simple addition of new kernel subsystems that require
162  * boot time initialization.  It also allows substitution of subsystem
163  * (for instance, a scheduler, kernel profiler, or VM system) by object
164  * module.  Finally, it allows for optional "kernel threads".
165  */
166 void
167 mi_startup(void)
168 {
169 
170 	register struct sysinit **sipp;		/* system initialization*/
171 	register struct sysinit **xipp;		/* interior loop of sort*/
172 	register struct sysinit *save;		/* bubble*/
173 
174 	if (sysinit == NULL) {
175 		sysinit = SET_BEGIN(sysinit_set);
176 		sysinit_end = SET_LIMIT(sysinit_set);
177 	}
178 
179 restart:
180 	/*
181 	 * Perform a bubble sort of the system initialization objects by
182 	 * their subsystem (primary key) and order (secondary key).
183 	 */
184 	for (sipp = sysinit; sipp < sysinit_end; sipp++) {
185 		for (xipp = sipp + 1; xipp < sysinit_end; xipp++) {
186 			if ((*sipp)->subsystem < (*xipp)->subsystem ||
187 			     ((*sipp)->subsystem == (*xipp)->subsystem &&
188 			      (*sipp)->order <= (*xipp)->order))
189 				continue;	/* skip*/
190 			save = *sipp;
191 			*sipp = *xipp;
192 			*xipp = save;
193 		}
194 	}
195 
196 	/*
197 	 * Traverse the (now) ordered list of system initialization tasks.
198 	 * Perform each task, and continue on to the next task.
199 	 *
200 	 * The last item on the list is expected to be the scheduler,
201 	 * which will not return.
202 	 */
203 	for (sipp = sysinit; sipp < sysinit_end; sipp++) {
204 
205 		if ((*sipp)->subsystem == SI_SUB_DUMMY)
206 			continue;	/* skip dummy task(s)*/
207 
208 		if ((*sipp)->subsystem == SI_SUB_DONE)
209 			continue;
210 
211 		/* Call function */
212 		(*((*sipp)->func))((*sipp)->udata);
213 
214 		/* Check off the one we're just done */
215 		(*sipp)->subsystem = SI_SUB_DONE;
216 
217 		/* Check if we've installed more sysinit items via KLD */
218 		if (newsysinit != NULL) {
219 			if (sysinit != SET_BEGIN(sysinit_set))
220 				free(sysinit, M_TEMP);
221 			sysinit = newsysinit;
222 			sysinit_end = newsysinit_end;
223 			newsysinit = NULL;
224 			newsysinit_end = NULL;
225 			goto restart;
226 		}
227 	}
228 
229 	panic("Shouldn't get here!");
230 	/* NOTREACHED*/
231 }
232 
233 
234 /*
235  ***************************************************************************
236  ****
237  **** The following SYSINIT's belong elsewhere, but have not yet
238  **** been moved.
239  ****
240  ***************************************************************************
241  */
242 static void
243 print_caddr_t(void *data __unused)
244 {
245 	printf("%s", (char *)data);
246 }
247 SYSINIT(announce, SI_SUB_COPYRIGHT, SI_ORDER_FIRST, print_caddr_t, copyright)
248 SYSINIT(version, SI_SUB_COPYRIGHT, SI_ORDER_SECOND, print_caddr_t, version)
249 
250 #ifdef WITNESS
251 static char wit_warn[] =
252      "WARNING: WITNESS option enabled, expect reduced performance.\n";
253 SYSINIT(witwarn, SI_SUB_COPYRIGHT, SI_ORDER_SECOND + 1,
254    print_caddr_t, wit_warn)
255 #endif
256 
257 #ifdef DIAGNOSTIC
258 static char diag_warn[] =
259      "WARNING: DIAGNOSTIC option enabled, expect reduced performance.\n";
260 SYSINIT(diagwarn, SI_SUB_COPYRIGHT, SI_ORDER_SECOND + 2,
261     print_caddr_t, diag_warn)
262 #endif
263 
264 static void
265 set_boot_verbose(void *data __unused)
266 {
267 
268 	if (boothowto & RB_VERBOSE)
269 		bootverbose++;
270 }
271 SYSINIT(boot_verbose, SI_SUB_TUNABLES, SI_ORDER_ANY, set_boot_verbose, NULL)
272 
273 struct sysentvec null_sysvec = {
274 	0,
275 	NULL,
276 	0,
277 	0,
278 	NULL,
279 	0,
280 	NULL,
281 	NULL,
282 	NULL,
283 	NULL,
284 	NULL,
285 	NULL,
286 	NULL,
287 	"null",
288 	NULL,
289 	NULL,
290 	0,
291 	PAGE_SIZE,
292 	VM_MIN_ADDRESS,
293 	VM_MAXUSER_ADDRESS,
294 	USRSTACK,
295 	PS_STRINGS,
296 	VM_PROT_ALL,
297 	NULL,
298 	NULL,
299 	NULL
300 };
301 
302 /*
303  ***************************************************************************
304  ****
305  **** The two following SYSINIT's are proc0 specific glue code.  I am not
306  **** convinced that they can not be safely combined, but their order of
307  **** operation has been maintained as the same as the original init_main.c
308  **** for right now.
309  ****
310  **** These probably belong in init_proc.c or kern_proc.c, since they
311  **** deal with proc0 (the fork template process).
312  ****
313  ***************************************************************************
314  */
315 /* ARGSUSED*/
316 static void
317 proc0_init(void *dummy __unused)
318 {
319 	register struct proc		*p;
320 	register struct filedesc0	*fdp;
321 	register unsigned i;
322 	struct thread *td;
323 	struct ksegrp *kg;
324 	struct kse *ke;
325 
326 	GIANT_REQUIRED;
327 	p = &proc0;
328 	td = &thread0;
329 	ke = &kse0;
330 	kg = &ksegrp0;
331 
332 	ke->ke_sched = kse0_sched;
333 	kg->kg_sched = ksegrp0_sched;
334 	p->p_sched = proc0_sched;
335 	td->td_sched = thread0_sched;
336 
337 	/*
338 	 * Initialize magic number.
339 	 */
340 	p->p_magic = P_MAGIC;
341 
342 	/*
343 	 * Initialize thread, process and pgrp structures.
344 	 */
345 	procinit();
346 	threadinit();
347 
348 	/*
349 	 * Initialize sleep queue hash table
350 	 */
351 	sleepinit();
352 
353 	/*
354 	 * additional VM structures
355 	 */
356 	vm_init2();
357 
358 	/*
359 	 * Create process 0 (the swapper).
360 	 */
361 	LIST_INSERT_HEAD(&allproc, p, p_list);
362 	LIST_INSERT_HEAD(PIDHASH(0), p, p_hash);
363 	mtx_init(&pgrp0.pg_mtx, "process group", NULL, MTX_DEF | MTX_DUPOK);
364 	p->p_pgrp = &pgrp0;
365 	LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash);
366 	LIST_INIT(&pgrp0.pg_members);
367 	LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist);
368 
369 	pgrp0.pg_session = &session0;
370 	mtx_init(&session0.s_mtx, "session", NULL, MTX_DEF);
371 	session0.s_count = 1;
372 	session0.s_leader = p;
373 
374 	p->p_sysent = &null_sysvec;
375 
376 	/*
377 	 * proc_linkup was already done in init_i386() or alphainit() etc.
378 	 * because the earlier code needed to follow td->td_proc. Otherwise
379 	 * I would have done it here.. maybe this means this should be
380 	 * done earlier too.
381 	 */
382 	p->p_flag = P_SYSTEM;
383 	p->p_sflag = PS_INMEM;
384 	p->p_state = PRS_NORMAL;
385 	p->p_nice = NZERO;
386 	td->td_state = TDS_RUNNING;
387 	kg->kg_pri_class = PRI_TIMESHARE;
388 	kg->kg_user_pri = PUSER;
389 	td->td_priority = PVM;
390 	td->td_base_pri = PUSER;
391 	td->td_kse = ke; /* XXXKSE */
392 	td->td_oncpu = 0;
393 	ke->ke_state = KES_THREAD;
394 	ke->ke_thread = td;
395 	p->p_peers = 0;
396 	p->p_leader = p;
397 
398 
399 	bcopy("swapper", p->p_comm, sizeof ("swapper"));
400 
401 	callout_init(&p->p_itcallout, CALLOUT_MPSAFE);
402 	callout_init(&td->td_slpcallout, CALLOUT_MPSAFE);
403 
404 	/* Create credentials. */
405 	p->p_ucred = crget();
406 	p->p_ucred->cr_ngroups = 1;	/* group 0 */
407 	p->p_ucred->cr_uidinfo = uifind(0);
408 	p->p_ucred->cr_ruidinfo = uifind(0);
409 	p->p_ucred->cr_prison = NULL;	/* Don't jail it. */
410 #ifdef MAC
411 	mac_create_proc0(p->p_ucred);
412 #endif
413 	td->td_ucred = crhold(p->p_ucred);
414 
415 	/* Create sigacts. */
416 	p->p_sigacts = sigacts_alloc();
417 
418 	/* Initialize signal state for process 0. */
419 	siginit(&proc0);
420 
421 	/* Create the file descriptor table. */
422 	/* XXX this duplicates part of fdinit() */
423 	fdp = &filedesc0;
424 	p->p_fd = &fdp->fd_fd;
425 	p->p_fdtol = NULL;
426 	mtx_init(&fdp->fd_fd.fd_mtx, FILEDESC_LOCK_DESC, NULL, MTX_DEF);
427 	fdp->fd_fd.fd_refcnt = 1;
428 	fdp->fd_fd.fd_cmask = CMASK;
429 	fdp->fd_fd.fd_ofiles = fdp->fd_dfiles;
430 	fdp->fd_fd.fd_ofileflags = fdp->fd_dfileflags;
431 	fdp->fd_fd.fd_nfiles = NDFILE;
432 	fdp->fd_fd.fd_map = fdp->fd_dmap;
433 
434 	/* Create the limits structures. */
435 	p->p_limit = lim_alloc();
436 	for (i = 0; i < RLIM_NLIMITS; i++)
437 		p->p_limit->pl_rlimit[i].rlim_cur =
438 		    p->p_limit->pl_rlimit[i].rlim_max = RLIM_INFINITY;
439 	p->p_limit->pl_rlimit[RLIMIT_NOFILE].rlim_cur =
440 	    p->p_limit->pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles;
441 	p->p_limit->pl_rlimit[RLIMIT_NPROC].rlim_cur =
442 	    p->p_limit->pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc;
443 	i = ptoa(cnt.v_free_count);
444 	p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_max = i;
445 	p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_max = i;
446 	p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = i / 3;
447 	p->p_cpulimit = RLIM_INFINITY;
448 
449 	/* Allocate a prototype map so we have something to fork. */
450 	pmap_pinit0(vmspace_pmap(&vmspace0));
451 	p->p_vmspace = &vmspace0;
452 	vmspace0.vm_refcnt = 1;
453 	vm_map_init(&vmspace0.vm_map, p->p_sysent->sv_minuser,
454 	    p->p_sysent->sv_maxuser);
455 	vmspace0.vm_map.pmap = vmspace_pmap(&vmspace0);
456 
457 	/*
458 	 * We continue to place resource usage info
459 	 * in the user struct so that it's pageable.
460 	 */
461 	p->p_stats = &p->p_uarea->u_stats;
462 
463 	/*
464 	 * Charge root for one process.
465 	 */
466 	(void)chgproccnt(p->p_ucred->cr_ruidinfo, 1, 0);
467 }
468 SYSINIT(p0init, SI_SUB_INTRINSIC, SI_ORDER_FIRST, proc0_init, NULL)
469 
470 /* ARGSUSED*/
471 static void
472 proc0_post(void *dummy __unused)
473 {
474 	struct timespec ts;
475 	struct proc *p;
476 
477 	/*
478 	 * Now we can look at the time, having had a chance to verify the
479 	 * time from the filesystem.  Pretend that proc0 started now.
480 	 */
481 	sx_slock(&allproc_lock);
482 	LIST_FOREACH(p, &allproc, p_list) {
483 		microuptime(&p->p_stats->p_start);
484 		p->p_runtime.sec = 0;
485 		p->p_runtime.frac = 0;
486 	}
487 	sx_sunlock(&allproc_lock);
488 	binuptime(PCPU_PTR(switchtime));
489 	PCPU_SET(switchticks, ticks);
490 
491 	/*
492 	 * Give the ``random'' number generator a thump.
493 	 */
494 	nanotime(&ts);
495 	srandom(ts.tv_sec ^ ts.tv_nsec);
496 }
497 SYSINIT(p0post, SI_SUB_INTRINSIC_POST, SI_ORDER_FIRST, proc0_post, NULL)
498 
499 /*
500  ***************************************************************************
501  ****
502  **** The following SYSINIT's and glue code should be moved to the
503  **** respective files on a per subsystem basis.
504  ****
505  ***************************************************************************
506  */
507 
508 
509 /*
510  ***************************************************************************
511  ****
512  **** The following code probably belongs in another file, like
513  **** kern/init_init.c.
514  ****
515  ***************************************************************************
516  */
517 
518 /*
519  * List of paths to try when searching for "init".
520  */
521 static char init_path[MAXPATHLEN] =
522 #ifdef	INIT_PATH
523     __XSTRING(INIT_PATH);
524 #else
525     "/sbin/init:/sbin/oinit:/sbin/init.bak:/stand/sysinstall";
526 #endif
527 SYSCTL_STRING(_kern, OID_AUTO, init_path, CTLFLAG_RD, init_path, 0,
528 	"Path used to search the init process");
529 
530 /*
531  * Start the initial user process; try exec'ing each pathname in init_path.
532  * The program is invoked with one argument containing the boot flags.
533  */
534 static void
535 start_init(void *dummy)
536 {
537 	vm_offset_t addr;
538 	struct execve_args args;
539 	int options, error;
540 	char *var, *path, *next, *s;
541 	char *ucp, **uap, *arg0, *arg1;
542 	struct thread *td;
543 	struct proc *p;
544 	int init_does_devfs = 0;
545 
546 	mtx_lock(&Giant);
547 
548 	GIANT_REQUIRED;
549 
550 	td = curthread;
551 	p = td->td_proc;
552 
553 	vfs_mountroot();
554 
555 	/* Get the vnode for '/'.  Set p->p_fd->fd_cdir to reference it. */
556 	if (VFS_ROOT(TAILQ_FIRST(&mountlist), &rootvnode, td))
557 		panic("cannot find root vnode");
558 	FILEDESC_LOCK(p->p_fd);
559 	p->p_fd->fd_cdir = rootvnode;
560 	VREF(p->p_fd->fd_cdir);
561 	p->p_fd->fd_rdir = rootvnode;
562 	VREF(p->p_fd->fd_rdir);
563 	FILEDESC_UNLOCK(p->p_fd);
564 	VOP_UNLOCK(rootvnode, 0, td);
565 #ifdef MAC
566 	mac_create_root_mount(td->td_ucred, TAILQ_FIRST(&mountlist));
567 #endif
568 
569 	/*
570 	 * For disk based systems, we probably cannot do this yet
571 	 * since the fs will be read-only.  But a NFS root
572 	 * might be ok.  It is worth a shot.
573 	 */
574 	error = kern_mkdir(td, "/dev", UIO_SYSSPACE, 0700);
575 	if (error == EEXIST)
576 		error = 0;
577 	if (error == 0)
578 		error = kernel_vmount(0, "fstype", "devfs",
579 		    "fspath", "/dev", NULL);
580 	if (error != 0)
581 		init_does_devfs = 1;
582 
583 	/*
584 	 * Need just enough stack to hold the faked-up "execve()" arguments.
585 	 */
586 	addr = p->p_sysent->sv_usrstack - PAGE_SIZE;
587 	if (vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &addr, PAGE_SIZE,
588 			FALSE, VM_PROT_ALL, VM_PROT_ALL, 0) != 0)
589 		panic("init: couldn't allocate argument space");
590 	p->p_vmspace->vm_maxsaddr = (caddr_t)addr;
591 	p->p_vmspace->vm_ssize = 1;
592 
593 	if ((var = getenv("init_path")) != NULL) {
594 		strlcpy(init_path, var, sizeof(init_path));
595 		freeenv(var);
596 	}
597 
598 	for (path = init_path; *path != '\0'; path = next) {
599 		while (*path == ':')
600 			path++;
601 		if (*path == '\0')
602 			break;
603 		for (next = path; *next != '\0' && *next != ':'; next++)
604 			/* nothing */ ;
605 		if (bootverbose)
606 			printf("start_init: trying %.*s\n", (int)(next - path),
607 			    path);
608 
609 		/*
610 		 * Move out the boot flag argument.
611 		 */
612 		options = 0;
613 		ucp = (char *)p->p_sysent->sv_usrstack;
614 		(void)subyte(--ucp, 0);		/* trailing zero */
615 		if (boothowto & RB_SINGLE) {
616 			(void)subyte(--ucp, 's');
617 			options = 1;
618 		}
619 #ifdef notyet
620                 if (boothowto & RB_FASTBOOT) {
621 			(void)subyte(--ucp, 'f');
622 			options = 1;
623 		}
624 #endif
625 
626 #ifdef BOOTCDROM
627 		(void)subyte(--ucp, 'C');
628 		options = 1;
629 #endif
630 		if (init_does_devfs) {
631 			(void)subyte(--ucp, 'd');
632 			options = 1;
633 		}
634 
635 		if (options == 0)
636 			(void)subyte(--ucp, '-');
637 		(void)subyte(--ucp, '-');		/* leading hyphen */
638 		arg1 = ucp;
639 
640 		/*
641 		 * Move out the file name (also arg 0).
642 		 */
643 		(void)subyte(--ucp, 0);
644 		for (s = next - 1; s >= path; s--)
645 			(void)subyte(--ucp, *s);
646 		arg0 = ucp;
647 
648 		/*
649 		 * Move out the arg pointers.
650 		 */
651 		uap = (char **)((intptr_t)ucp & ~(sizeof(intptr_t)-1));
652 		(void)suword((caddr_t)--uap, (long)0);	/* terminator */
653 		(void)suword((caddr_t)--uap, (long)(intptr_t)arg1);
654 		(void)suword((caddr_t)--uap, (long)(intptr_t)arg0);
655 
656 		/*
657 		 * Point at the arguments.
658 		 */
659 		args.fname = arg0;
660 		args.argv = uap;
661 		args.envv = NULL;
662 
663 		/*
664 		 * Now try to exec the program.  If can't for any reason
665 		 * other than it doesn't exist, complain.
666 		 *
667 		 * Otherwise, return via fork_trampoline() all the way
668 		 * to user mode as init!
669 		 */
670 		if ((error = execve(td, &args)) == 0) {
671 			mtx_unlock(&Giant);
672 			return;
673 		}
674 		if (error != ENOENT)
675 			printf("exec %.*s: error %d\n", (int)(next - path),
676 			    path, error);
677 	}
678 	printf("init: not found in path %s\n", init_path);
679 	panic("no init");
680 }
681 
682 /*
683  * Like kthread_create(), but runs in it's own address space.
684  * We do this early to reserve pid 1.
685  *
686  * Note special case - do not make it runnable yet.  Other work
687  * in progress will change this more.
688  */
689 static void
690 create_init(const void *udata __unused)
691 {
692 	struct ucred *newcred, *oldcred;
693 	int error;
694 
695 	error = fork1(&thread0, RFFDG | RFPROC | RFSTOPPED, 0, &initproc);
696 	if (error)
697 		panic("cannot fork init: %d\n", error);
698 	KASSERT(initproc->p_pid == 1, ("create_init: initproc->p_pid != 1"));
699 	/* divorce init's credentials from the kernel's */
700 	newcred = crget();
701 	PROC_LOCK(initproc);
702 	initproc->p_flag |= P_SYSTEM;
703 	oldcred = initproc->p_ucred;
704 	crcopy(newcred, oldcred);
705 #ifdef MAC
706 	mac_create_proc1(newcred);
707 #endif
708 	initproc->p_ucred = newcred;
709 	PROC_UNLOCK(initproc);
710 	crfree(oldcred);
711 	cred_update_thread(FIRST_THREAD_IN_PROC(initproc));
712 	mtx_lock_spin(&sched_lock);
713 	initproc->p_sflag |= PS_INMEM;
714 	mtx_unlock_spin(&sched_lock);
715 	cpu_set_fork_handler(FIRST_THREAD_IN_PROC(initproc), start_init, NULL);
716 }
717 SYSINIT(init, SI_SUB_CREATE_INIT, SI_ORDER_FIRST, create_init, NULL)
718 
719 /*
720  * Make it runnable now.
721  */
722 static void
723 kick_init(const void *udata __unused)
724 {
725 	struct thread *td;
726 
727 	td = FIRST_THREAD_IN_PROC(initproc);
728 	mtx_lock_spin(&sched_lock);
729 	TD_SET_CAN_RUN(td);
730 	setrunqueue(td);	/* XXXKSE */
731 	mtx_unlock_spin(&sched_lock);
732 }
733 SYSINIT(kickinit, SI_SUB_KTHREAD_INIT, SI_ORDER_FIRST, kick_init, NULL)
734