xref: /freebsd/sys/kern/init_main.c (revision 2546665afcaf0d53dc2c7058fee96354b3680f5a)
1 /*
2  * Copyright (c) 1995 Terrence R. Lambert
3  * All rights reserved.
4  *
5  * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. All advertising materials mentioning features or use of this software
22  *    must display the following acknowledgement:
23  *	This product includes software developed by the University of
24  *	California, Berkeley and its contributors.
25  * 4. Neither the name of the University nor the names of its contributors
26  *    may be used to endorse or promote products derived from this software
27  *    without specific prior written permission.
28  *
29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39  * SUCH DAMAGE.
40  *
41  *	@(#)init_main.c	8.9 (Berkeley) 1/21/94
42  */
43 
44 #include <sys/cdefs.h>
45 __FBSDID("$FreeBSD$");
46 
47 #include "opt_init_path.h"
48 #include "opt_mac.h"
49 
50 #include <sys/param.h>
51 #include <sys/kernel.h>
52 #include <sys/exec.h>
53 #include <sys/file.h>
54 #include <sys/filedesc.h>
55 #include <sys/ktr.h>
56 #include <sys/lock.h>
57 #include <sys/mac.h>
58 #include <sys/mount.h>
59 #include <sys/mutex.h>
60 #include <sys/syscallsubr.h>
61 #include <sys/sysctl.h>
62 #include <sys/proc.h>
63 #include <sys/resourcevar.h>
64 #include <sys/systm.h>
65 #include <sys/signalvar.h>
66 #include <sys/vnode.h>
67 #include <sys/sysent.h>
68 #include <sys/reboot.h>
69 #include <sys/sched.h>
70 #include <sys/sx.h>
71 #include <sys/sysproto.h>
72 #include <sys/vmmeter.h>
73 #include <sys/unistd.h>
74 #include <sys/malloc.h>
75 #include <sys/conf.h>
76 
77 #include <machine/cpu.h>
78 
79 #include <vm/vm.h>
80 #include <vm/vm_param.h>
81 #include <vm/pmap.h>
82 #include <vm/vm_map.h>
83 #include <sys/user.h>
84 #include <sys/copyright.h>
85 
86 void mi_startup(void);				/* Should be elsewhere */
87 
88 /* Components of the first process -- never freed. */
89 static struct session session0;
90 static struct pgrp pgrp0;
91 struct	proc proc0;
92 struct	thread thread0;
93 struct	kse kse0;
94 struct	ksegrp ksegrp0;
95 static struct filedesc0 filedesc0;
96 struct	vmspace vmspace0;
97 struct	proc *initproc;
98 
99 int	boothowto = 0;		/* initialized so that it can be patched */
100 SYSCTL_INT(_debug, OID_AUTO, boothowto, CTLFLAG_RD, &boothowto, 0, "");
101 int	bootverbose;
102 SYSCTL_INT(_debug, OID_AUTO, bootverbose, CTLFLAG_RW, &bootverbose, 0, "");
103 
104 /*
105  * This ensures that there is at least one entry so that the sysinit_set
106  * symbol is not undefined.  A sybsystem ID of SI_SUB_DUMMY is never
107  * executed.
108  */
109 SYSINIT(placeholder, SI_SUB_DUMMY, SI_ORDER_ANY, NULL, NULL)
110 
111 /*
112  * The sysinit table itself.  Items are checked off as the are run.
113  * If we want to register new sysinit types, add them to newsysinit.
114  */
115 SET_DECLARE(sysinit_set, struct sysinit);
116 struct sysinit **sysinit, **sysinit_end;
117 struct sysinit **newsysinit, **newsysinit_end;
118 
119 /*
120  * Merge a new sysinit set into the current set, reallocating it if
121  * necessary.  This can only be called after malloc is running.
122  */
123 void
124 sysinit_add(struct sysinit **set, struct sysinit **set_end)
125 {
126 	struct sysinit **newset;
127 	struct sysinit **sipp;
128 	struct sysinit **xipp;
129 	int count;
130 
131 	count = set_end - set;
132 	if (newsysinit)
133 		count += newsysinit_end - newsysinit;
134 	else
135 		count += sysinit_end - sysinit;
136 	newset = malloc(count * sizeof(*sipp), M_TEMP, M_NOWAIT);
137 	if (newset == NULL)
138 		panic("cannot malloc for sysinit");
139 	xipp = newset;
140 	if (newsysinit)
141 		for (sipp = newsysinit; sipp < newsysinit_end; sipp++)
142 			*xipp++ = *sipp;
143 	else
144 		for (sipp = sysinit; sipp < sysinit_end; sipp++)
145 			*xipp++ = *sipp;
146 	for (sipp = set; sipp < set_end; sipp++)
147 		*xipp++ = *sipp;
148 	if (newsysinit)
149 		free(newsysinit, M_TEMP);
150 	newsysinit = newset;
151 	newsysinit_end = newset + count;
152 }
153 
154 /*
155  * System startup; initialize the world, create process 0, mount root
156  * filesystem, and fork to create init and pagedaemon.  Most of the
157  * hard work is done in the lower-level initialization routines including
158  * startup(), which does memory initialization and autoconfiguration.
159  *
160  * This allows simple addition of new kernel subsystems that require
161  * boot time initialization.  It also allows substitution of subsystem
162  * (for instance, a scheduler, kernel profiler, or VM system) by object
163  * module.  Finally, it allows for optional "kernel threads".
164  */
165 void
166 mi_startup(void)
167 {
168 
169 	register struct sysinit **sipp;		/* system initialization*/
170 	register struct sysinit **xipp;		/* interior loop of sort*/
171 	register struct sysinit *save;		/* bubble*/
172 
173 	if (sysinit == NULL) {
174 		sysinit = SET_BEGIN(sysinit_set);
175 		sysinit_end = SET_LIMIT(sysinit_set);
176 	}
177 
178 restart:
179 	/*
180 	 * Perform a bubble sort of the system initialization objects by
181 	 * their subsystem (primary key) and order (secondary key).
182 	 */
183 	for (sipp = sysinit; sipp < sysinit_end; sipp++) {
184 		for (xipp = sipp + 1; xipp < sysinit_end; xipp++) {
185 			if ((*sipp)->subsystem < (*xipp)->subsystem ||
186 			     ((*sipp)->subsystem == (*xipp)->subsystem &&
187 			      (*sipp)->order <= (*xipp)->order))
188 				continue;	/* skip*/
189 			save = *sipp;
190 			*sipp = *xipp;
191 			*xipp = save;
192 		}
193 	}
194 
195 	/*
196 	 * Traverse the (now) ordered list of system initialization tasks.
197 	 * Perform each task, and continue on to the next task.
198 	 *
199 	 * The last item on the list is expected to be the scheduler,
200 	 * which will not return.
201 	 */
202 	for (sipp = sysinit; sipp < sysinit_end; sipp++) {
203 
204 		if ((*sipp)->subsystem == SI_SUB_DUMMY)
205 			continue;	/* skip dummy task(s)*/
206 
207 		if ((*sipp)->subsystem == SI_SUB_DONE)
208 			continue;
209 
210 		/* Call function */
211 		(*((*sipp)->func))((*sipp)->udata);
212 
213 		/* Check off the one we're just done */
214 		(*sipp)->subsystem = SI_SUB_DONE;
215 
216 		/* Check if we've installed more sysinit items via KLD */
217 		if (newsysinit != NULL) {
218 			if (sysinit != SET_BEGIN(sysinit_set))
219 				free(sysinit, M_TEMP);
220 			sysinit = newsysinit;
221 			sysinit_end = newsysinit_end;
222 			newsysinit = NULL;
223 			newsysinit_end = NULL;
224 			goto restart;
225 		}
226 	}
227 
228 	panic("Shouldn't get here!");
229 	/* NOTREACHED*/
230 }
231 
232 
233 /*
234  ***************************************************************************
235  ****
236  **** The following SYSINIT's belong elsewhere, but have not yet
237  **** been moved.
238  ****
239  ***************************************************************************
240  */
241 static void
242 print_caddr_t(void *data __unused)
243 {
244 	printf("%s", (char *)data);
245 }
246 SYSINIT(announce, SI_SUB_COPYRIGHT, SI_ORDER_FIRST, print_caddr_t, copyright)
247 SYSINIT(version, SI_SUB_COPYRIGHT, SI_ORDER_SECOND, print_caddr_t, version)
248 
249 #ifdef WITNESS
250 static char wit_warn[] =
251      "WARNING: WITNESS option enabled, expect reduced performance.\n";
252 SYSINIT(witwarn, SI_SUB_COPYRIGHT, SI_ORDER_SECOND + 1,
253    print_caddr_t, wit_warn)
254 #endif
255 
256 #ifdef DIAGNOSTIC
257 static char diag_warn[] =
258      "WARNING: DIAGNOSTIC option enabled, expect reduced performance.\n";
259 SYSINIT(diagwarn, SI_SUB_COPYRIGHT, SI_ORDER_SECOND + 2,
260     print_caddr_t, diag_warn)
261 #endif
262 
263 static void
264 set_boot_verbose(void *data __unused)
265 {
266 
267 	if (boothowto & RB_VERBOSE)
268 		bootverbose++;
269 }
270 SYSINIT(boot_verbose, SI_SUB_TUNABLES, SI_ORDER_ANY, set_boot_verbose, NULL)
271 
272 struct sysentvec null_sysvec = {
273 	0,
274 	NULL,
275 	0,
276 	0,
277 	NULL,
278 	0,
279 	NULL,
280 	NULL,
281 	NULL,
282 	NULL,
283 	NULL,
284 	NULL,
285 	NULL,
286 	"null",
287 	NULL,
288 	NULL,
289 	0,
290 	PAGE_SIZE,
291 	VM_MIN_ADDRESS,
292 	VM_MAXUSER_ADDRESS,
293 	USRSTACK,
294 	PS_STRINGS,
295 	VM_PROT_ALL,
296 	NULL,
297 	NULL,
298 	NULL
299 };
300 
301 /*
302  ***************************************************************************
303  ****
304  **** The two following SYSINIT's are proc0 specific glue code.  I am not
305  **** convinced that they can not be safely combined, but their order of
306  **** operation has been maintained as the same as the original init_main.c
307  **** for right now.
308  ****
309  **** These probably belong in init_proc.c or kern_proc.c, since they
310  **** deal with proc0 (the fork template process).
311  ****
312  ***************************************************************************
313  */
314 /* ARGSUSED*/
315 static void
316 proc0_init(void *dummy __unused)
317 {
318 	register struct proc		*p;
319 	register struct filedesc0	*fdp;
320 	register unsigned i;
321 	struct thread *td;
322 	struct ksegrp *kg;
323 	struct kse *ke;
324 
325 	GIANT_REQUIRED;
326 	p = &proc0;
327 	td = &thread0;
328 	ke = &kse0;
329 	kg = &ksegrp0;
330 
331 	ke->ke_sched = kse0_sched;
332 	kg->kg_sched = ksegrp0_sched;
333 	p->p_sched = proc0_sched;
334 	td->td_sched = thread0_sched;
335 
336 	/*
337 	 * Initialize magic number.
338 	 */
339 	p->p_magic = P_MAGIC;
340 
341 	/*
342 	 * Initialize thread, process and pgrp structures.
343 	 */
344 	procinit();
345 	threadinit();
346 
347 	/*
348 	 * Initialize sleep queue hash table
349 	 */
350 	sleepinit();
351 
352 	/*
353 	 * additional VM structures
354 	 */
355 	vm_init2();
356 
357 	/*
358 	 * Create process 0 (the swapper).
359 	 */
360 	LIST_INSERT_HEAD(&allproc, p, p_list);
361 	LIST_INSERT_HEAD(PIDHASH(0), p, p_hash);
362 	mtx_init(&pgrp0.pg_mtx, "process group", NULL, MTX_DEF | MTX_DUPOK);
363 	p->p_pgrp = &pgrp0;
364 	LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash);
365 	LIST_INIT(&pgrp0.pg_members);
366 	LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist);
367 
368 	pgrp0.pg_session = &session0;
369 	mtx_init(&session0.s_mtx, "session", NULL, MTX_DEF);
370 	session0.s_count = 1;
371 	session0.s_leader = p;
372 
373 	p->p_sysent = &null_sysvec;
374 
375 	/*
376 	 * proc_linkup was already done in init_i386() or alphainit() etc.
377 	 * because the earlier code needed to follow td->td_proc. Otherwise
378 	 * I would have done it here.. maybe this means this should be
379 	 * done earlier too.
380 	 */
381 	p->p_flag = P_SYSTEM;
382 	p->p_sflag = PS_INMEM;
383 	p->p_state = PRS_NORMAL;
384 	p->p_nice = NZERO;
385 	td->td_state = TDS_RUNNING;
386 	kg->kg_pri_class = PRI_TIMESHARE;
387 	kg->kg_user_pri = PUSER;
388 	td->td_priority = PVM;
389 	td->td_base_pri = PUSER;
390 	td->td_kse = ke; /* XXXKSE */
391 	td->td_oncpu = 0;
392 	ke->ke_state = KES_THREAD;
393 	ke->ke_thread = td;
394 	p->p_peers = 0;
395 	p->p_leader = p;
396 
397 
398 	bcopy("swapper", p->p_comm, sizeof ("swapper"));
399 
400 	callout_init(&p->p_itcallout, CALLOUT_MPSAFE);
401 	callout_init(&td->td_slpcallout, CALLOUT_MPSAFE);
402 
403 	/* Create credentials. */
404 	p->p_ucred = crget();
405 	p->p_ucred->cr_ngroups = 1;	/* group 0 */
406 	p->p_ucred->cr_uidinfo = uifind(0);
407 	p->p_ucred->cr_ruidinfo = uifind(0);
408 	p->p_ucred->cr_prison = NULL;	/* Don't jail it. */
409 #ifdef MAC
410 	mac_create_proc0(p->p_ucred);
411 #endif
412 	td->td_ucred = crhold(p->p_ucred);
413 
414 	/* Create sigacts. */
415 	p->p_sigacts = sigacts_alloc();
416 
417 	/* Initialize signal state for process 0. */
418 	siginit(&proc0);
419 
420 	/* Create the file descriptor table. */
421 	/* XXX this duplicates part of fdinit() */
422 	fdp = &filedesc0;
423 	p->p_fd = &fdp->fd_fd;
424 	p->p_fdtol = NULL;
425 	mtx_init(&fdp->fd_fd.fd_mtx, FILEDESC_LOCK_DESC, NULL, MTX_DEF);
426 	fdp->fd_fd.fd_refcnt = 1;
427 	fdp->fd_fd.fd_cmask = CMASK;
428 	fdp->fd_fd.fd_ofiles = fdp->fd_dfiles;
429 	fdp->fd_fd.fd_ofileflags = fdp->fd_dfileflags;
430 	fdp->fd_fd.fd_nfiles = NDFILE;
431 	fdp->fd_fd.fd_map = fdp->fd_dmap;
432 
433 	/* Create the limits structures. */
434 	p->p_limit = lim_alloc();
435 	for (i = 0; i < RLIM_NLIMITS; i++)
436 		p->p_limit->pl_rlimit[i].rlim_cur =
437 		    p->p_limit->pl_rlimit[i].rlim_max = RLIM_INFINITY;
438 	p->p_limit->pl_rlimit[RLIMIT_NOFILE].rlim_cur =
439 	    p->p_limit->pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles;
440 	p->p_limit->pl_rlimit[RLIMIT_NPROC].rlim_cur =
441 	    p->p_limit->pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc;
442 	i = ptoa(cnt.v_free_count);
443 	p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_max = i;
444 	p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_max = i;
445 	p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = i / 3;
446 	p->p_cpulimit = RLIM_INFINITY;
447 
448 	/* Allocate a prototype map so we have something to fork. */
449 	pmap_pinit0(vmspace_pmap(&vmspace0));
450 	p->p_vmspace = &vmspace0;
451 	vmspace0.vm_refcnt = 1;
452 	vm_map_init(&vmspace0.vm_map, p->p_sysent->sv_minuser,
453 	    p->p_sysent->sv_maxuser);
454 	vmspace0.vm_map.pmap = vmspace_pmap(&vmspace0);
455 
456 	/*
457 	 * We continue to place resource usage info
458 	 * in the user struct so that it's pageable.
459 	 */
460 	p->p_stats = &p->p_uarea->u_stats;
461 
462 	/*
463 	 * Charge root for one process.
464 	 */
465 	(void)chgproccnt(p->p_ucred->cr_ruidinfo, 1, 0);
466 }
467 SYSINIT(p0init, SI_SUB_INTRINSIC, SI_ORDER_FIRST, proc0_init, NULL)
468 
469 /* ARGSUSED*/
470 static void
471 proc0_post(void *dummy __unused)
472 {
473 	struct timespec ts;
474 	struct proc *p;
475 
476 	/*
477 	 * Now we can look at the time, having had a chance to verify the
478 	 * time from the filesystem.  Pretend that proc0 started now.
479 	 */
480 	sx_slock(&allproc_lock);
481 	LIST_FOREACH(p, &allproc, p_list) {
482 		microuptime(&p->p_stats->p_start);
483 		p->p_runtime.sec = 0;
484 		p->p_runtime.frac = 0;
485 	}
486 	sx_sunlock(&allproc_lock);
487 	binuptime(PCPU_PTR(switchtime));
488 	PCPU_SET(switchticks, ticks);
489 
490 	/*
491 	 * Give the ``random'' number generator a thump.
492 	 */
493 	nanotime(&ts);
494 	srandom(ts.tv_sec ^ ts.tv_nsec);
495 }
496 SYSINIT(p0post, SI_SUB_INTRINSIC_POST, SI_ORDER_FIRST, proc0_post, NULL)
497 
498 /*
499  ***************************************************************************
500  ****
501  **** The following SYSINIT's and glue code should be moved to the
502  **** respective files on a per subsystem basis.
503  ****
504  ***************************************************************************
505  */
506 
507 
508 /*
509  ***************************************************************************
510  ****
511  **** The following code probably belongs in another file, like
512  **** kern/init_init.c.
513  ****
514  ***************************************************************************
515  */
516 
517 /*
518  * List of paths to try when searching for "init".
519  */
520 static char init_path[MAXPATHLEN] =
521 #ifdef	INIT_PATH
522     __XSTRING(INIT_PATH);
523 #else
524     "/sbin/init:/sbin/oinit:/sbin/init.bak:/stand/sysinstall";
525 #endif
526 SYSCTL_STRING(_kern, OID_AUTO, init_path, CTLFLAG_RD, init_path, 0,
527 	"Path used to search the init process");
528 
529 /*
530  * Start the initial user process; try exec'ing each pathname in init_path.
531  * The program is invoked with one argument containing the boot flags.
532  */
533 static void
534 start_init(void *dummy)
535 {
536 	vm_offset_t addr;
537 	struct execve_args args;
538 	int options, error;
539 	char *var, *path, *next, *s;
540 	char *ucp, **uap, *arg0, *arg1;
541 	struct thread *td;
542 	struct proc *p;
543 	int init_does_devfs = 0;
544 
545 	mtx_lock(&Giant);
546 
547 	GIANT_REQUIRED;
548 
549 	td = curthread;
550 	p = td->td_proc;
551 
552 	vfs_mountroot();
553 
554 	/* Get the vnode for '/'.  Set p->p_fd->fd_cdir to reference it. */
555 	if (VFS_ROOT(TAILQ_FIRST(&mountlist), &rootvnode, td))
556 		panic("cannot find root vnode");
557 	FILEDESC_LOCK(p->p_fd);
558 	p->p_fd->fd_cdir = rootvnode;
559 	VREF(p->p_fd->fd_cdir);
560 	p->p_fd->fd_rdir = rootvnode;
561 	VREF(p->p_fd->fd_rdir);
562 	FILEDESC_UNLOCK(p->p_fd);
563 	VOP_UNLOCK(rootvnode, 0, td);
564 #ifdef MAC
565 	mac_create_root_mount(td->td_ucred, TAILQ_FIRST(&mountlist));
566 #endif
567 
568 	/*
569 	 * For disk based systems, we probably cannot do this yet
570 	 * since the fs will be read-only.  But a NFS root
571 	 * might be ok.  It is worth a shot.
572 	 */
573 	error = kern_mkdir(td, "/dev", UIO_SYSSPACE, 0700);
574 	if (error == EEXIST)
575 		error = 0;
576 	if (error == 0)
577 		error = kernel_vmount(0, "fstype", "devfs",
578 		    "fspath", "/dev", NULL);
579 	if (error != 0)
580 		init_does_devfs = 1;
581 
582 	/*
583 	 * Need just enough stack to hold the faked-up "execve()" arguments.
584 	 */
585 	addr = p->p_sysent->sv_usrstack - PAGE_SIZE;
586 	if (vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &addr, PAGE_SIZE,
587 			FALSE, VM_PROT_ALL, VM_PROT_ALL, 0) != 0)
588 		panic("init: couldn't allocate argument space");
589 	p->p_vmspace->vm_maxsaddr = (caddr_t)addr;
590 	p->p_vmspace->vm_ssize = 1;
591 
592 	if ((var = getenv("init_path")) != NULL) {
593 		strlcpy(init_path, var, sizeof(init_path));
594 		freeenv(var);
595 	}
596 
597 	for (path = init_path; *path != '\0'; path = next) {
598 		while (*path == ':')
599 			path++;
600 		if (*path == '\0')
601 			break;
602 		for (next = path; *next != '\0' && *next != ':'; next++)
603 			/* nothing */ ;
604 		if (bootverbose)
605 			printf("start_init: trying %.*s\n", (int)(next - path),
606 			    path);
607 
608 		/*
609 		 * Move out the boot flag argument.
610 		 */
611 		options = 0;
612 		ucp = (char *)p->p_sysent->sv_usrstack;
613 		(void)subyte(--ucp, 0);		/* trailing zero */
614 		if (boothowto & RB_SINGLE) {
615 			(void)subyte(--ucp, 's');
616 			options = 1;
617 		}
618 #ifdef notyet
619                 if (boothowto & RB_FASTBOOT) {
620 			(void)subyte(--ucp, 'f');
621 			options = 1;
622 		}
623 #endif
624 
625 #ifdef BOOTCDROM
626 		(void)subyte(--ucp, 'C');
627 		options = 1;
628 #endif
629 		if (init_does_devfs) {
630 			(void)subyte(--ucp, 'd');
631 			options = 1;
632 		}
633 
634 		if (options == 0)
635 			(void)subyte(--ucp, '-');
636 		(void)subyte(--ucp, '-');		/* leading hyphen */
637 		arg1 = ucp;
638 
639 		/*
640 		 * Move out the file name (also arg 0).
641 		 */
642 		(void)subyte(--ucp, 0);
643 		for (s = next - 1; s >= path; s--)
644 			(void)subyte(--ucp, *s);
645 		arg0 = ucp;
646 
647 		/*
648 		 * Move out the arg pointers.
649 		 */
650 		uap = (char **)((intptr_t)ucp & ~(sizeof(intptr_t)-1));
651 		(void)suword((caddr_t)--uap, (long)0);	/* terminator */
652 		(void)suword((caddr_t)--uap, (long)(intptr_t)arg1);
653 		(void)suword((caddr_t)--uap, (long)(intptr_t)arg0);
654 
655 		/*
656 		 * Point at the arguments.
657 		 */
658 		args.fname = arg0;
659 		args.argv = uap;
660 		args.envv = NULL;
661 
662 		/*
663 		 * Now try to exec the program.  If can't for any reason
664 		 * other than it doesn't exist, complain.
665 		 *
666 		 * Otherwise, return via fork_trampoline() all the way
667 		 * to user mode as init!
668 		 */
669 		if ((error = execve(td, &args)) == 0) {
670 			mtx_unlock(&Giant);
671 			return;
672 		}
673 		if (error != ENOENT)
674 			printf("exec %.*s: error %d\n", (int)(next - path),
675 			    path, error);
676 	}
677 	printf("init: not found in path %s\n", init_path);
678 	panic("no init");
679 }
680 
681 /*
682  * Like kthread_create(), but runs in it's own address space.
683  * We do this early to reserve pid 1.
684  *
685  * Note special case - do not make it runnable yet.  Other work
686  * in progress will change this more.
687  */
688 static void
689 create_init(const void *udata __unused)
690 {
691 	struct ucred *newcred, *oldcred;
692 	int error;
693 
694 	error = fork1(&thread0, RFFDG | RFPROC | RFSTOPPED, 0, &initproc);
695 	if (error)
696 		panic("cannot fork init: %d\n", error);
697 	KASSERT(initproc->p_pid == 1, ("create_init: initproc->p_pid != 1"));
698 	/* divorce init's credentials from the kernel's */
699 	newcred = crget();
700 	PROC_LOCK(initproc);
701 	initproc->p_flag |= P_SYSTEM;
702 	oldcred = initproc->p_ucred;
703 	crcopy(newcred, oldcred);
704 #ifdef MAC
705 	mac_create_proc1(newcred);
706 #endif
707 	initproc->p_ucred = newcred;
708 	PROC_UNLOCK(initproc);
709 	crfree(oldcred);
710 	cred_update_thread(FIRST_THREAD_IN_PROC(initproc));
711 	mtx_lock_spin(&sched_lock);
712 	initproc->p_sflag |= PS_INMEM;
713 	mtx_unlock_spin(&sched_lock);
714 	cpu_set_fork_handler(FIRST_THREAD_IN_PROC(initproc), start_init, NULL);
715 }
716 SYSINIT(init, SI_SUB_CREATE_INIT, SI_ORDER_FIRST, create_init, NULL)
717 
718 /*
719  * Make it runnable now.
720  */
721 static void
722 kick_init(const void *udata __unused)
723 {
724 	struct thread *td;
725 
726 	td = FIRST_THREAD_IN_PROC(initproc);
727 	mtx_lock_spin(&sched_lock);
728 	TD_SET_CAN_RUN(td);
729 	setrunqueue(td);	/* XXXKSE */
730 	mtx_unlock_spin(&sched_lock);
731 }
732 SYSINIT(kickinit, SI_SUB_KTHREAD_INIT, SI_ORDER_FIRST, kick_init, NULL)
733