xref: /freebsd/sys/kern/init_main.c (revision 7562eaabc01a48e6b11d5b558c41e3b92dae5c2d)
1 /*
2  * Copyright (c) 1995 Terrence R. Lambert
3  * All rights reserved.
4  *
5  * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. All advertising materials mentioning features or use of this software
22  *    must display the following acknowledgement:
23  *	This product includes software developed by the University of
24  *	California, Berkeley and its contributors.
25  * 4. Neither the name of the University nor the names of its contributors
26  *    may be used to endorse or promote products derived from this software
27  *    without specific prior written permission.
28  *
29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39  * SUCH DAMAGE.
40  *
41  *	@(#)init_main.c	8.9 (Berkeley) 1/21/94
42  */
43 
44 #include <sys/cdefs.h>
45 __FBSDID("$FreeBSD$");
46 
47 #include "opt_init_path.h"
48 #include "opt_mac.h"
49 
50 #include <sys/param.h>
51 #include <sys/kernel.h>
52 #include <sys/exec.h>
53 #include <sys/file.h>
54 #include <sys/filedesc.h>
55 #include <sys/ktr.h>
56 #include <sys/lock.h>
57 #include <sys/mac.h>
58 #include <sys/mount.h>
59 #include <sys/mutex.h>
60 #include <sys/syscallsubr.h>
61 #include <sys/sysctl.h>
62 #include <sys/proc.h>
63 #include <sys/resourcevar.h>
64 #include <sys/systm.h>
65 #include <sys/signalvar.h>
66 #include <sys/vnode.h>
67 #include <sys/sysent.h>
68 #include <sys/reboot.h>
69 #include <sys/sched.h>
70 #include <sys/sx.h>
71 #include <sys/sysproto.h>
72 #include <sys/vmmeter.h>
73 #include <sys/unistd.h>
74 #include <sys/malloc.h>
75 #include <sys/conf.h>
76 
77 #include <machine/cpu.h>
78 
79 #include <vm/vm.h>
80 #include <vm/vm_param.h>
81 #include <vm/pmap.h>
82 #include <vm/vm_map.h>
83 #include <sys/user.h>
84 #include <sys/copyright.h>
85 
86 void mi_startup(void);				/* Should be elsewhere */
87 
88 /* Components of the first process -- never freed. */
89 static struct session session0;
90 static struct pgrp pgrp0;
91 struct	proc proc0;
92 struct	thread thread0;
93 struct	ksegrp ksegrp0;
94 static struct filedesc0 filedesc0;
95 struct	vmspace vmspace0;
96 struct	proc *initproc;
97 
98 int	boothowto = 0;		/* initialized so that it can be patched */
99 SYSCTL_INT(_debug, OID_AUTO, boothowto, CTLFLAG_RD, &boothowto, 0, "");
100 int	bootverbose;
101 SYSCTL_INT(_debug, OID_AUTO, bootverbose, CTLFLAG_RW, &bootverbose, 0, "");
102 
103 /*
104  * This ensures that there is at least one entry so that the sysinit_set
105  * symbol is not undefined.  A sybsystem ID of SI_SUB_DUMMY is never
106  * executed.
107  */
108 SYSINIT(placeholder, SI_SUB_DUMMY, SI_ORDER_ANY, NULL, NULL)
109 
110 /*
111  * The sysinit table itself.  Items are checked off as the are run.
112  * If we want to register new sysinit types, add them to newsysinit.
113  */
114 SET_DECLARE(sysinit_set, struct sysinit);
115 struct sysinit **sysinit, **sysinit_end;
116 struct sysinit **newsysinit, **newsysinit_end;
117 
118 /*
119  * Merge a new sysinit set into the current set, reallocating it if
120  * necessary.  This can only be called after malloc is running.
121  */
122 void
123 sysinit_add(struct sysinit **set, struct sysinit **set_end)
124 {
125 	struct sysinit **newset;
126 	struct sysinit **sipp;
127 	struct sysinit **xipp;
128 	int count;
129 
130 	count = set_end - set;
131 	if (newsysinit)
132 		count += newsysinit_end - newsysinit;
133 	else
134 		count += sysinit_end - sysinit;
135 	newset = malloc(count * sizeof(*sipp), M_TEMP, M_NOWAIT);
136 	if (newset == NULL)
137 		panic("cannot malloc for sysinit");
138 	xipp = newset;
139 	if (newsysinit)
140 		for (sipp = newsysinit; sipp < newsysinit_end; sipp++)
141 			*xipp++ = *sipp;
142 	else
143 		for (sipp = sysinit; sipp < sysinit_end; sipp++)
144 			*xipp++ = *sipp;
145 	for (sipp = set; sipp < set_end; sipp++)
146 		*xipp++ = *sipp;
147 	if (newsysinit)
148 		free(newsysinit, M_TEMP);
149 	newsysinit = newset;
150 	newsysinit_end = newset + count;
151 }
152 
153 /*
154  * System startup; initialize the world, create process 0, mount root
155  * filesystem, and fork to create init and pagedaemon.  Most of the
156  * hard work is done in the lower-level initialization routines including
157  * startup(), which does memory initialization and autoconfiguration.
158  *
159  * This allows simple addition of new kernel subsystems that require
160  * boot time initialization.  It also allows substitution of subsystem
161  * (for instance, a scheduler, kernel profiler, or VM system) by object
162  * module.  Finally, it allows for optional "kernel threads".
163  */
164 void
165 mi_startup(void)
166 {
167 
168 	register struct sysinit **sipp;		/* system initialization*/
169 	register struct sysinit **xipp;		/* interior loop of sort*/
170 	register struct sysinit *save;		/* bubble*/
171 
172 	if (sysinit == NULL) {
173 		sysinit = SET_BEGIN(sysinit_set);
174 		sysinit_end = SET_LIMIT(sysinit_set);
175 	}
176 
177 restart:
178 	/*
179 	 * Perform a bubble sort of the system initialization objects by
180 	 * their subsystem (primary key) and order (secondary key).
181 	 */
182 	for (sipp = sysinit; sipp < sysinit_end; sipp++) {
183 		for (xipp = sipp + 1; xipp < sysinit_end; xipp++) {
184 			if ((*sipp)->subsystem < (*xipp)->subsystem ||
185 			     ((*sipp)->subsystem == (*xipp)->subsystem &&
186 			      (*sipp)->order <= (*xipp)->order))
187 				continue;	/* skip*/
188 			save = *sipp;
189 			*sipp = *xipp;
190 			*xipp = save;
191 		}
192 	}
193 
194 	/*
195 	 * Traverse the (now) ordered list of system initialization tasks.
196 	 * Perform each task, and continue on to the next task.
197 	 *
198 	 * The last item on the list is expected to be the scheduler,
199 	 * which will not return.
200 	 */
201 	for (sipp = sysinit; sipp < sysinit_end; sipp++) {
202 
203 		if ((*sipp)->subsystem == SI_SUB_DUMMY)
204 			continue;	/* skip dummy task(s)*/
205 
206 		if ((*sipp)->subsystem == SI_SUB_DONE)
207 			continue;
208 
209 		/* Call function */
210 		(*((*sipp)->func))((*sipp)->udata);
211 
212 		/* Check off the one we're just done */
213 		(*sipp)->subsystem = SI_SUB_DONE;
214 
215 		/* Check if we've installed more sysinit items via KLD */
216 		if (newsysinit != NULL) {
217 			if (sysinit != SET_BEGIN(sysinit_set))
218 				free(sysinit, M_TEMP);
219 			sysinit = newsysinit;
220 			sysinit_end = newsysinit_end;
221 			newsysinit = NULL;
222 			newsysinit_end = NULL;
223 			goto restart;
224 		}
225 	}
226 
227 	panic("Shouldn't get here!");
228 	/* NOTREACHED*/
229 }
230 
231 
232 /*
233  ***************************************************************************
234  ****
235  **** The following SYSINIT's belong elsewhere, but have not yet
236  **** been moved.
237  ****
238  ***************************************************************************
239  */
240 static void
241 print_caddr_t(void *data __unused)
242 {
243 	printf("%s", (char *)data);
244 }
245 SYSINIT(announce, SI_SUB_COPYRIGHT, SI_ORDER_FIRST, print_caddr_t, copyright)
246 SYSINIT(version, SI_SUB_COPYRIGHT, SI_ORDER_SECOND, print_caddr_t, version)
247 
248 #ifdef WITNESS
249 static char wit_warn[] =
250      "WARNING: WITNESS option enabled, expect reduced performance.\n";
251 SYSINIT(witwarn, SI_SUB_COPYRIGHT, SI_ORDER_SECOND + 1,
252    print_caddr_t, wit_warn)
253 #endif
254 
255 #ifdef DIAGNOSTIC
256 static char diag_warn[] =
257      "WARNING: DIAGNOSTIC option enabled, expect reduced performance.\n";
258 SYSINIT(diagwarn, SI_SUB_COPYRIGHT, SI_ORDER_SECOND + 2,
259     print_caddr_t, diag_warn)
260 #endif
261 
262 static void
263 set_boot_verbose(void *data __unused)
264 {
265 
266 	if (boothowto & RB_VERBOSE)
267 		bootverbose++;
268 }
269 SYSINIT(boot_verbose, SI_SUB_TUNABLES, SI_ORDER_ANY, set_boot_verbose, NULL)
270 
271 struct sysentvec null_sysvec = {
272 	0,
273 	NULL,
274 	0,
275 	0,
276 	NULL,
277 	0,
278 	NULL,
279 	NULL,
280 	NULL,
281 	NULL,
282 	NULL,
283 	NULL,
284 	NULL,
285 	"null",
286 	NULL,
287 	NULL,
288 	0,
289 	PAGE_SIZE,
290 	VM_MIN_ADDRESS,
291 	VM_MAXUSER_ADDRESS,
292 	USRSTACK,
293 	PS_STRINGS,
294 	VM_PROT_ALL,
295 	NULL,
296 	NULL,
297 	NULL
298 };
299 
300 /*
301  ***************************************************************************
302  ****
303  **** The two following SYSINIT's are proc0 specific glue code.  I am not
304  **** convinced that they can not be safely combined, but their order of
305  **** operation has been maintained as the same as the original init_main.c
306  **** for right now.
307  ****
308  **** These probably belong in init_proc.c or kern_proc.c, since they
309  **** deal with proc0 (the fork template process).
310  ****
311  ***************************************************************************
312  */
313 /* ARGSUSED*/
314 static void
315 proc0_init(void *dummy __unused)
316 {
317 	register struct proc		*p;
318 	register struct filedesc0	*fdp;
319 	register unsigned i;
320 	struct thread *td;
321 	struct ksegrp *kg;
322 
323 	GIANT_REQUIRED;
324 	p = &proc0;
325 	td = &thread0;
326 	kg = &ksegrp0;
327 
328 	/*
329 	 * Initialize magic number.
330 	 */
331 	p->p_magic = P_MAGIC;
332 
333 	/*
334 	 * Initialize thread, process and ksegrp structures.
335 	 */
336 	procinit();	/* set up proc zone */
337 	threadinit();	/* set up thead, upcall and KSEGRP zones */
338 
339 	/*
340 	 * Initialise scheduler resources.
341 	 * Add scheduler specific parts to proc, ksegrp, thread as needed.
342 	 */
343 	schedinit();	/* scheduler gets its house in order */
344 	/*
345 	 * Initialize sleep queue hash table
346 	 */
347 	sleepinit();
348 
349 	/*
350 	 * additional VM structures
351 	 */
352 	vm_init2();
353 
354 	/*
355 	 * Create process 0 (the swapper).
356 	 */
357 	LIST_INSERT_HEAD(&allproc, p, p_list);
358 	LIST_INSERT_HEAD(PIDHASH(0), p, p_hash);
359 	mtx_init(&pgrp0.pg_mtx, "process group", NULL, MTX_DEF | MTX_DUPOK);
360 	p->p_pgrp = &pgrp0;
361 	LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash);
362 	LIST_INIT(&pgrp0.pg_members);
363 	LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist);
364 
365 	pgrp0.pg_session = &session0;
366 	mtx_init(&session0.s_mtx, "session", NULL, MTX_DEF);
367 	session0.s_count = 1;
368 	session0.s_leader = p;
369 
370 	p->p_sysent = &null_sysvec;
371 	p->p_flag = P_SYSTEM;
372 	p->p_sflag = PS_INMEM;
373 	p->p_state = PRS_NORMAL;
374 	knlist_init(&p->p_klist, &p->p_mtx);
375 	p->p_nice = NZERO;
376 	td->td_state = TDS_RUNNING;
377 	kg->kg_pri_class = PRI_TIMESHARE;
378 	kg->kg_user_pri = PUSER;
379 	td->td_priority = PVM;
380 	td->td_base_pri = PUSER;
381 	td->td_oncpu = 0;
382 	p->p_peers = 0;
383 	p->p_leader = p;
384 
385 
386 	bcopy("swapper", p->p_comm, sizeof ("swapper"));
387 
388 	callout_init(&p->p_itcallout, CALLOUT_MPSAFE);
389 	callout_init(&td->td_slpcallout, CALLOUT_MPSAFE);
390 
391 	/* Create credentials. */
392 	p->p_ucred = crget();
393 	p->p_ucred->cr_ngroups = 1;	/* group 0 */
394 	p->p_ucred->cr_uidinfo = uifind(0);
395 	p->p_ucred->cr_ruidinfo = uifind(0);
396 	p->p_ucred->cr_prison = NULL;	/* Don't jail it. */
397 #ifdef MAC
398 	mac_create_proc0(p->p_ucred);
399 #endif
400 	td->td_ucred = crhold(p->p_ucred);
401 
402 	/* Create sigacts. */
403 	p->p_sigacts = sigacts_alloc();
404 
405 	/* Initialize signal state for process 0. */
406 	siginit(&proc0);
407 
408 	/* Create the file descriptor table. */
409 	/* XXX this duplicates part of fdinit() */
410 	fdp = &filedesc0;
411 	p->p_fd = &fdp->fd_fd;
412 	p->p_fdtol = NULL;
413 	mtx_init(&fdp->fd_fd.fd_mtx, FILEDESC_LOCK_DESC, NULL, MTX_DEF);
414 	fdp->fd_fd.fd_refcnt = 1;
415 	fdp->fd_fd.fd_cmask = CMASK;
416 	fdp->fd_fd.fd_ofiles = fdp->fd_dfiles;
417 	fdp->fd_fd.fd_ofileflags = fdp->fd_dfileflags;
418 	fdp->fd_fd.fd_nfiles = NDFILE;
419 	fdp->fd_fd.fd_map = fdp->fd_dmap;
420 
421 	/* Create the limits structures. */
422 	p->p_limit = lim_alloc();
423 	for (i = 0; i < RLIM_NLIMITS; i++)
424 		p->p_limit->pl_rlimit[i].rlim_cur =
425 		    p->p_limit->pl_rlimit[i].rlim_max = RLIM_INFINITY;
426 	p->p_limit->pl_rlimit[RLIMIT_NOFILE].rlim_cur =
427 	    p->p_limit->pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles;
428 	p->p_limit->pl_rlimit[RLIMIT_NPROC].rlim_cur =
429 	    p->p_limit->pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc;
430 	i = ptoa(cnt.v_free_count);
431 	p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_max = i;
432 	p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_max = i;
433 	p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = i / 3;
434 	p->p_cpulimit = RLIM_INFINITY;
435 
436 	/* Allocate a prototype map so we have something to fork. */
437 	pmap_pinit0(vmspace_pmap(&vmspace0));
438 	p->p_vmspace = &vmspace0;
439 	vmspace0.vm_refcnt = 1;
440 	vm_map_init(&vmspace0.vm_map, p->p_sysent->sv_minuser,
441 	    p->p_sysent->sv_maxuser);
442 	vmspace0.vm_map.pmap = vmspace_pmap(&vmspace0);
443 
444 	/*
445 	 * We continue to place resource usage info
446 	 * in the user struct so that it's pageable.
447 	 */
448 	p->p_stats = &p->p_uarea->u_stats;
449 
450 	/*
451 	 * Charge root for one process.
452 	 */
453 	(void)chgproccnt(p->p_ucred->cr_ruidinfo, 1, 0);
454 }
455 SYSINIT(p0init, SI_SUB_INTRINSIC, SI_ORDER_FIRST, proc0_init, NULL)
456 
457 /* ARGSUSED*/
458 static void
459 proc0_post(void *dummy __unused)
460 {
461 	struct timespec ts;
462 	struct proc *p;
463 
464 	/*
465 	 * Now we can look at the time, having had a chance to verify the
466 	 * time from the filesystem.  Pretend that proc0 started now.
467 	 */
468 	sx_slock(&allproc_lock);
469 	LIST_FOREACH(p, &allproc, p_list) {
470 		microuptime(&p->p_stats->p_start);
471 		p->p_rux.rux_runtime.sec = 0;
472 		p->p_rux.rux_runtime.frac = 0;
473 	}
474 	sx_sunlock(&allproc_lock);
475 	binuptime(PCPU_PTR(switchtime));
476 	PCPU_SET(switchticks, ticks);
477 
478 	/*
479 	 * Give the ``random'' number generator a thump.
480 	 */
481 	nanotime(&ts);
482 	srandom(ts.tv_sec ^ ts.tv_nsec);
483 }
484 SYSINIT(p0post, SI_SUB_INTRINSIC_POST, SI_ORDER_FIRST, proc0_post, NULL)
485 
486 /*
487  ***************************************************************************
488  ****
489  **** The following SYSINIT's and glue code should be moved to the
490  **** respective files on a per subsystem basis.
491  ****
492  ***************************************************************************
493  */
494 
495 
496 /*
497  ***************************************************************************
498  ****
499  **** The following code probably belongs in another file, like
500  **** kern/init_init.c.
501  ****
502  ***************************************************************************
503  */
504 
505 /*
506  * List of paths to try when searching for "init".
507  */
508 static char init_path[MAXPATHLEN] =
509 #ifdef	INIT_PATH
510     __XSTRING(INIT_PATH);
511 #else
512     "/sbin/init:/sbin/oinit:/sbin/init.bak:/stand/sysinstall";
513 #endif
514 SYSCTL_STRING(_kern, OID_AUTO, init_path, CTLFLAG_RD, init_path, 0,
515 	"Path used to search the init process");
516 
517 /*
518  * Start the initial user process; try exec'ing each pathname in init_path.
519  * The program is invoked with one argument containing the boot flags.
520  */
521 static void
522 start_init(void *dummy)
523 {
524 	vm_offset_t addr;
525 	struct execve_args args;
526 	int options, error;
527 	char *var, *path, *next, *s;
528 	char *ucp, **uap, *arg0, *arg1;
529 	struct thread *td;
530 	struct proc *p;
531 	int init_does_devfs = 0;
532 
533 	mtx_lock(&Giant);
534 
535 	GIANT_REQUIRED;
536 
537 	td = curthread;
538 	p = td->td_proc;
539 
540 	vfs_mountroot();
541 
542 	/* Get the vnode for '/'.  Set p->p_fd->fd_cdir to reference it. */
543 	if (VFS_ROOT(TAILQ_FIRST(&mountlist), &rootvnode, td))
544 		panic("cannot find root vnode");
545 	FILEDESC_LOCK(p->p_fd);
546 	p->p_fd->fd_cdir = rootvnode;
547 	VREF(p->p_fd->fd_cdir);
548 	p->p_fd->fd_rdir = rootvnode;
549 	VREF(p->p_fd->fd_rdir);
550 	FILEDESC_UNLOCK(p->p_fd);
551 	VOP_UNLOCK(rootvnode, 0, td);
552 #ifdef MAC
553 	mac_create_root_mount(td->td_ucred, TAILQ_FIRST(&mountlist));
554 #endif
555 
556 	/*
557 	 * For disk based systems, we probably cannot do this yet
558 	 * since the fs will be read-only.  But a NFS root
559 	 * might be ok.  It is worth a shot.
560 	 */
561 	error = kern_mkdir(td, "/dev", UIO_SYSSPACE, 0700);
562 	if (error == EEXIST)
563 		error = 0;
564 	if (error == 0)
565 		error = kernel_vmount(0, "fstype", "devfs",
566 		    "fspath", "/dev", NULL);
567 	if (error != 0)
568 		init_does_devfs = 1;
569 
570 	/*
571 	 * Need just enough stack to hold the faked-up "execve()" arguments.
572 	 */
573 	addr = p->p_sysent->sv_usrstack - PAGE_SIZE;
574 	if (vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &addr, PAGE_SIZE,
575 			FALSE, VM_PROT_ALL, VM_PROT_ALL, 0) != 0)
576 		panic("init: couldn't allocate argument space");
577 	p->p_vmspace->vm_maxsaddr = (caddr_t)addr;
578 	p->p_vmspace->vm_ssize = 1;
579 
580 	if ((var = getenv("init_path")) != NULL) {
581 		strlcpy(init_path, var, sizeof(init_path));
582 		freeenv(var);
583 	}
584 
585 	for (path = init_path; *path != '\0'; path = next) {
586 		while (*path == ':')
587 			path++;
588 		if (*path == '\0')
589 			break;
590 		for (next = path; *next != '\0' && *next != ':'; next++)
591 			/* nothing */ ;
592 		if (bootverbose)
593 			printf("start_init: trying %.*s\n", (int)(next - path),
594 			    path);
595 
596 		/*
597 		 * Move out the boot flag argument.
598 		 */
599 		options = 0;
600 		ucp = (char *)p->p_sysent->sv_usrstack;
601 		(void)subyte(--ucp, 0);		/* trailing zero */
602 		if (boothowto & RB_SINGLE) {
603 			(void)subyte(--ucp, 's');
604 			options = 1;
605 		}
606 #ifdef notyet
607                 if (boothowto & RB_FASTBOOT) {
608 			(void)subyte(--ucp, 'f');
609 			options = 1;
610 		}
611 #endif
612 
613 #ifdef BOOTCDROM
614 		(void)subyte(--ucp, 'C');
615 		options = 1;
616 #endif
617 		if (init_does_devfs) {
618 			(void)subyte(--ucp, 'd');
619 			options = 1;
620 		}
621 
622 		if (options == 0)
623 			(void)subyte(--ucp, '-');
624 		(void)subyte(--ucp, '-');		/* leading hyphen */
625 		arg1 = ucp;
626 
627 		/*
628 		 * Move out the file name (also arg 0).
629 		 */
630 		(void)subyte(--ucp, 0);
631 		for (s = next - 1; s >= path; s--)
632 			(void)subyte(--ucp, *s);
633 		arg0 = ucp;
634 
635 		/*
636 		 * Move out the arg pointers.
637 		 */
638 		uap = (char **)((intptr_t)ucp & ~(sizeof(intptr_t)-1));
639 		(void)suword((caddr_t)--uap, (long)0);	/* terminator */
640 		(void)suword((caddr_t)--uap, (long)(intptr_t)arg1);
641 		(void)suword((caddr_t)--uap, (long)(intptr_t)arg0);
642 
643 		/*
644 		 * Point at the arguments.
645 		 */
646 		args.fname = arg0;
647 		args.argv = uap;
648 		args.envv = NULL;
649 
650 		/*
651 		 * Now try to exec the program.  If can't for any reason
652 		 * other than it doesn't exist, complain.
653 		 *
654 		 * Otherwise, return via fork_trampoline() all the way
655 		 * to user mode as init!
656 		 */
657 		if ((error = execve(td, &args)) == 0) {
658 			mtx_unlock(&Giant);
659 			return;
660 		}
661 		if (error != ENOENT)
662 			printf("exec %.*s: error %d\n", (int)(next - path),
663 			    path, error);
664 	}
665 	printf("init: not found in path %s\n", init_path);
666 	panic("no init");
667 }
668 
669 /*
670  * Like kthread_create(), but runs in it's own address space.
671  * We do this early to reserve pid 1.
672  *
673  * Note special case - do not make it runnable yet.  Other work
674  * in progress will change this more.
675  */
676 static void
677 create_init(const void *udata __unused)
678 {
679 	struct ucred *newcred, *oldcred;
680 	int error;
681 
682 	error = fork1(&thread0, RFFDG | RFPROC | RFSTOPPED, 0, &initproc);
683 	if (error)
684 		panic("cannot fork init: %d\n", error);
685 	KASSERT(initproc->p_pid == 1, ("create_init: initproc->p_pid != 1"));
686 	/* divorce init's credentials from the kernel's */
687 	newcred = crget();
688 	PROC_LOCK(initproc);
689 	initproc->p_flag |= P_SYSTEM;
690 	oldcred = initproc->p_ucred;
691 	crcopy(newcred, oldcred);
692 #ifdef MAC
693 	mac_create_proc1(newcred);
694 #endif
695 	initproc->p_ucred = newcred;
696 	PROC_UNLOCK(initproc);
697 	crfree(oldcred);
698 	cred_update_thread(FIRST_THREAD_IN_PROC(initproc));
699 	mtx_lock_spin(&sched_lock);
700 	initproc->p_sflag |= PS_INMEM;
701 	mtx_unlock_spin(&sched_lock);
702 	cpu_set_fork_handler(FIRST_THREAD_IN_PROC(initproc), start_init, NULL);
703 }
704 SYSINIT(init, SI_SUB_CREATE_INIT, SI_ORDER_FIRST, create_init, NULL)
705 
706 /*
707  * Make it runnable now.
708  */
709 static void
710 kick_init(const void *udata __unused)
711 {
712 	struct thread *td;
713 
714 	td = FIRST_THREAD_IN_PROC(initproc);
715 	mtx_lock_spin(&sched_lock);
716 	TD_SET_CAN_RUN(td);
717 	setrunqueue(td, SRQ_BORING);	/* XXXKSE */
718 	mtx_unlock_spin(&sched_lock);
719 }
720 SYSINIT(kickinit, SI_SUB_KTHREAD_INIT, SI_ORDER_FIRST, kick_init, NULL)
721