xref: /freebsd/sys/kern/init_main.c (revision 5773cccf19ef7b97e56c1101aa481c43149224da)
1 /*
2  * Copyright (c) 1995 Terrence R. Lambert
3  * All rights reserved.
4  *
5  * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. All advertising materials mentioning features or use of this software
22  *    must display the following acknowledgement:
23  *	This product includes software developed by the University of
24  *	California, Berkeley and its contributors.
25  * 4. Neither the name of the University nor the names of its contributors
26  *    may be used to endorse or promote products derived from this software
27  *    without specific prior written permission.
28  *
29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39  * SUCH DAMAGE.
40  *
41  *	@(#)init_main.c	8.9 (Berkeley) 1/21/94
42  * $FreeBSD$
43  */
44 
45 #include "opt_init_path.h"
46 #include "opt_mac.h"
47 
48 #include <sys/param.h>
49 #include <sys/kernel.h>
50 #include <sys/exec.h>
51 #include <sys/file.h>
52 #include <sys/filedesc.h>
53 #include <sys/ktr.h>
54 #include <sys/lock.h>
55 #include <sys/mac.h>
56 #include <sys/mount.h>
57 #include <sys/mutex.h>
58 #include <sys/syscallsubr.h>
59 #include <sys/sysctl.h>
60 #include <sys/proc.h>
61 #include <sys/resourcevar.h>
62 #include <sys/systm.h>
63 #include <sys/signalvar.h>
64 #include <sys/vnode.h>
65 #include <sys/sysent.h>
66 #include <sys/reboot.h>
67 #include <sys/sched.h>
68 #include <sys/sx.h>
69 #include <sys/sysproto.h>
70 #include <sys/vmmeter.h>
71 #include <sys/unistd.h>
72 #include <sys/malloc.h>
73 #include <sys/conf.h>
74 
75 #include <machine/cpu.h>
76 
77 #include <vm/vm.h>
78 #include <vm/vm_param.h>
79 #include <vm/pmap.h>
80 #include <vm/vm_map.h>
81 #include <sys/user.h>
82 #include <sys/copyright.h>
83 
84 void mi_startup(void);				/* Should be elsewhere */
85 
86 /* Components of the first process -- never freed. */
87 static struct session session0;
88 static struct pgrp pgrp0;
89 struct	proc proc0;
90 struct	thread thread0;
91 struct	kse kse0;
92 struct	ksegrp ksegrp0;
93 static struct procsig procsig0;
94 static struct filedesc0 filedesc0;
95 static struct plimit limit0;
96 static struct vmspace vmspace0;
97 struct	proc *initproc;
98 
99 int cmask = CMASK;
100 extern int fallback_elf_brand;
101 
102 struct	vnode *rootvp;
103 int	boothowto = 0;		/* initialized so that it can be patched */
104 SYSCTL_INT(_debug, OID_AUTO, boothowto, CTLFLAG_RD, &boothowto, 0, "");
105 int	bootverbose;
106 SYSCTL_INT(_debug, OID_AUTO, bootverbose, CTLFLAG_RW, &bootverbose, 0, "");
107 
108 /*
109  * This ensures that there is at least one entry so that the sysinit_set
110  * symbol is not undefined.  A sybsystem ID of SI_SUB_DUMMY is never
111  * executed.
112  */
113 SYSINIT(placeholder, SI_SUB_DUMMY, SI_ORDER_ANY, NULL, NULL)
114 
115 /*
116  * The sysinit table itself.  Items are checked off as the are run.
117  * If we want to register new sysinit types, add them to newsysinit.
118  */
119 SET_DECLARE(sysinit_set, struct sysinit);
120 struct sysinit **sysinit, **sysinit_end;
121 struct sysinit **newsysinit, **newsysinit_end;
122 
123 /*
124  * Merge a new sysinit set into the current set, reallocating it if
125  * necessary.  This can only be called after malloc is running.
126  */
127 void
128 sysinit_add(struct sysinit **set, struct sysinit **set_end)
129 {
130 	struct sysinit **newset;
131 	struct sysinit **sipp;
132 	struct sysinit **xipp;
133 	int count;
134 
135 	count = set_end - set;
136 	if (newsysinit)
137 		count += newsysinit_end - newsysinit;
138 	else
139 		count += sysinit_end - sysinit;
140 	newset = malloc(count * sizeof(*sipp), M_TEMP, M_NOWAIT);
141 	if (newset == NULL)
142 		panic("cannot malloc for sysinit");
143 	xipp = newset;
144 	if (newsysinit)
145 		for (sipp = newsysinit; sipp < newsysinit_end; sipp++)
146 			*xipp++ = *sipp;
147 	else
148 		for (sipp = sysinit; sipp < sysinit_end; sipp++)
149 			*xipp++ = *sipp;
150 	for (sipp = set; sipp < set_end; sipp++)
151 		*xipp++ = *sipp;
152 	if (newsysinit)
153 		free(newsysinit, M_TEMP);
154 	newsysinit = newset;
155 	newsysinit_end = newset + count;
156 }
157 
158 /*
159  * System startup; initialize the world, create process 0, mount root
160  * filesystem, and fork to create init and pagedaemon.  Most of the
161  * hard work is done in the lower-level initialization routines including
162  * startup(), which does memory initialization and autoconfiguration.
163  *
164  * This allows simple addition of new kernel subsystems that require
165  * boot time initialization.  It also allows substitution of subsystem
166  * (for instance, a scheduler, kernel profiler, or VM system) by object
167  * module.  Finally, it allows for optional "kernel threads".
168  */
169 void
170 mi_startup(void)
171 {
172 
173 	register struct sysinit **sipp;		/* system initialization*/
174 	register struct sysinit **xipp;		/* interior loop of sort*/
175 	register struct sysinit *save;		/* bubble*/
176 
177 	if (sysinit == NULL) {
178 		sysinit = SET_BEGIN(sysinit_set);
179 		sysinit_end = SET_LIMIT(sysinit_set);
180 	}
181 
182 restart:
183 	/*
184 	 * Perform a bubble sort of the system initialization objects by
185 	 * their subsystem (primary key) and order (secondary key).
186 	 */
187 	for (sipp = sysinit; sipp < sysinit_end; sipp++) {
188 		for (xipp = sipp + 1; xipp < sysinit_end; xipp++) {
189 			if ((*sipp)->subsystem < (*xipp)->subsystem ||
190 			     ((*sipp)->subsystem == (*xipp)->subsystem &&
191 			      (*sipp)->order <= (*xipp)->order))
192 				continue;	/* skip*/
193 			save = *sipp;
194 			*sipp = *xipp;
195 			*xipp = save;
196 		}
197 	}
198 
199 	/*
200 	 * Traverse the (now) ordered list of system initialization tasks.
201 	 * Perform each task, and continue on to the next task.
202 	 *
203 	 * The last item on the list is expected to be the scheduler,
204 	 * which will not return.
205 	 */
206 	for (sipp = sysinit; sipp < sysinit_end; sipp++) {
207 
208 		if ((*sipp)->subsystem == SI_SUB_DUMMY)
209 			continue;	/* skip dummy task(s)*/
210 
211 		if ((*sipp)->subsystem == SI_SUB_DONE)
212 			continue;
213 
214 		/* Call function */
215 		(*((*sipp)->func))((*sipp)->udata);
216 
217 		/* Check off the one we're just done */
218 		(*sipp)->subsystem = SI_SUB_DONE;
219 
220 		/* Check if we've installed more sysinit items via KLD */
221 		if (newsysinit != NULL) {
222 			if (sysinit != SET_BEGIN(sysinit_set))
223 				free(sysinit, M_TEMP);
224 			sysinit = newsysinit;
225 			sysinit_end = newsysinit_end;
226 			newsysinit = NULL;
227 			newsysinit_end = NULL;
228 			goto restart;
229 		}
230 	}
231 
232 	panic("Shouldn't get here!");
233 	/* NOTREACHED*/
234 }
235 
236 
237 /*
238  ***************************************************************************
239  ****
240  **** The following SYSINIT's belong elsewhere, but have not yet
241  **** been moved.
242  ****
243  ***************************************************************************
244  */
245 static void
246 print_caddr_t(void *data __unused)
247 {
248 	printf("%s", (char *)data);
249 }
250 SYSINIT(announce, SI_SUB_COPYRIGHT, SI_ORDER_FIRST, print_caddr_t, copyright)
251 SYSINIT(version, SI_SUB_COPYRIGHT, SI_ORDER_SECOND, print_caddr_t, version)
252 
253 static void
254 set_boot_verbose(void *data __unused)
255 {
256 
257 	if (boothowto & RB_VERBOSE)
258 		bootverbose++;
259 }
260 SYSINIT(boot_verbose, SI_SUB_TUNABLES, SI_ORDER_ANY, set_boot_verbose, NULL)
261 
262 struct sysentvec null_sysvec = {
263 	0,
264 	NULL,
265 	0,
266 	0,
267 	NULL,
268 	0,
269 	NULL,
270 	NULL,
271 	NULL,
272 	NULL,
273 	NULL,
274 	NULL,
275 	NULL,
276 	"null",
277 	NULL,
278 	NULL,
279 	0,
280 	PAGE_SIZE,
281 	VM_MIN_ADDRESS,
282 	VM_MAXUSER_ADDRESS,
283 	USRSTACK,
284 	PS_STRINGS,
285 	VM_PROT_ALL,
286 	NULL,
287 	NULL
288 };
289 
290 /*
291  ***************************************************************************
292  ****
293  **** The two following SYSINT's are proc0 specific glue code.  I am not
294  **** convinced that they can not be safely combined, but their order of
295  **** operation has been maintained as the same as the original init_main.c
296  **** for right now.
297  ****
298  **** These probably belong in init_proc.c or kern_proc.c, since they
299  **** deal with proc0 (the fork template process).
300  ****
301  ***************************************************************************
302  */
303 /* ARGSUSED*/
304 static void
305 proc0_init(void *dummy __unused)
306 {
307 	register struct proc		*p;
308 	register struct filedesc0	*fdp;
309 	register unsigned i;
310 	struct thread *td;
311 	struct ksegrp *kg;
312 	struct kse *ke;
313 
314 	GIANT_REQUIRED;
315 	p = &proc0;
316 	td = &thread0;
317 	ke = &kse0;
318 	kg = &ksegrp0;
319 
320 	ke->ke_sched = kse0_sched;
321 	kg->kg_sched = ksegrp0_sched;
322 	p->p_sched = proc0_sched;
323 	td->td_sched = thread0_sched;
324 
325 	/*
326 	 * Initialize magic number.
327 	 */
328 	p->p_magic = P_MAGIC;
329 
330 	/*
331 	 * Initialize thread, process and pgrp structures.
332 	 */
333 	procinit();
334 	threadinit();
335 
336 	/*
337 	 * Initialize sleep queue hash table
338 	 */
339 	sleepinit();
340 
341 	/*
342 	 * additional VM structures
343 	 */
344 	vm_init2();
345 
346 	/*
347 	 * Create process 0 (the swapper).
348 	 */
349 	LIST_INSERT_HEAD(&allproc, p, p_list);
350 	LIST_INSERT_HEAD(PIDHASH(0), p, p_hash);
351 	mtx_init(&pgrp0.pg_mtx, "process group", NULL, MTX_DEF | MTX_DUPOK);
352 	p->p_pgrp = &pgrp0;
353 	LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash);
354 	LIST_INIT(&pgrp0.pg_members);
355 	LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist);
356 
357 	pgrp0.pg_session = &session0;
358 	mtx_init(&session0.s_mtx, "session", NULL, MTX_DEF);
359 	session0.s_count = 1;
360 	session0.s_leader = p;
361 
362 	p->p_sysent = &null_sysvec;
363 
364 	/*
365 	 * proc_linkup was already done in init_i386() or alphainit() etc.
366 	 * because the earlier code needed to follow td->td_proc. Otherwise
367 	 * I would have done it here.. maybe this means this should be
368 	 * done earlier too.
369 	 */
370 	p->p_flag = P_SYSTEM;
371 	p->p_sflag = PS_INMEM;
372 	p->p_state = PRS_NORMAL;
373 	td->td_state = TDS_RUNNING;
374 	kg->kg_nice = NZERO;
375 	kg->kg_pri_class = PRI_TIMESHARE;
376 	kg->kg_user_pri = PUSER;
377 	td->td_priority = PVM;
378 	td->td_base_pri = PUSER;
379 	td->td_kse = ke; /* XXXKSE */
380 	ke->ke_oncpu = 0;
381 	ke->ke_state = KES_THREAD;
382 	ke->ke_thread = td;
383 	p->p_peers = 0;
384 	p->p_leader = p;
385 
386 
387 	bcopy("swapper", p->p_comm, sizeof ("swapper"));
388 
389 	callout_init(&p->p_itcallout, 0);
390 	callout_init(&td->td_slpcallout, 1);
391 
392 	/* Create credentials. */
393 	p->p_ucred = crget();
394 	p->p_ucred->cr_ngroups = 1;	/* group 0 */
395 	p->p_ucred->cr_uidinfo = uifind(0);
396 	p->p_ucred->cr_ruidinfo = uifind(0);
397 	p->p_ucred->cr_prison = NULL;	/* Don't jail it. */
398 #ifdef MAC
399 	mac_create_proc0(p->p_ucred);
400 #endif
401 	td->td_ucred = crhold(p->p_ucred);
402 
403 	/* Create procsig. */
404 	p->p_procsig = &procsig0;
405 	p->p_procsig->ps_refcnt = 1;
406 
407 	/* Initialize signal state for process 0. */
408 	siginit(&proc0);
409 
410 	/* Create the file descriptor table. */
411 	fdp = &filedesc0;
412 	p->p_fd = &fdp->fd_fd;
413 	mtx_init(&fdp->fd_fd.fd_mtx, FILEDESC_LOCK_DESC, NULL, MTX_DEF);
414 	fdp->fd_fd.fd_refcnt = 1;
415 	fdp->fd_fd.fd_cmask = cmask;
416 	fdp->fd_fd.fd_ofiles = fdp->fd_dfiles;
417 	fdp->fd_fd.fd_ofileflags = fdp->fd_dfileflags;
418 	fdp->fd_fd.fd_nfiles = NDFILE;
419 
420 	/* Create the limits structures. */
421 	p->p_limit = &limit0;
422 	for (i = 0; i < sizeof(p->p_rlimit)/sizeof(p->p_rlimit[0]); i++)
423 		limit0.pl_rlimit[i].rlim_cur =
424 		    limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY;
425 	limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur =
426 	    limit0.pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles;
427 	limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur =
428 	    limit0.pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc;
429 	i = ptoa(cnt.v_free_count);
430 	limit0.pl_rlimit[RLIMIT_RSS].rlim_max = i;
431 	limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_max = i;
432 	limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = i / 3;
433 	limit0.p_refcnt = 1;
434 	p->p_cpulimit = RLIM_INFINITY;
435 
436 	/* Allocate a prototype map so we have something to fork. */
437 	pmap_pinit0(vmspace_pmap(&vmspace0));
438 	p->p_vmspace = &vmspace0;
439 	vmspace0.vm_refcnt = 1;
440 	vm_map_init(&vmspace0.vm_map, p->p_sysent->sv_minuser,
441 	    p->p_sysent->sv_maxuser);
442 	vmspace0.vm_map.pmap = vmspace_pmap(&vmspace0);
443 
444 	/*
445 	 * We continue to place resource usage info and signal
446 	 * actions in the user struct so they're pageable.
447 	 */
448 	p->p_stats = &p->p_uarea->u_stats;
449 	p->p_sigacts = &p->p_uarea->u_sigacts;
450 
451 	/*
452 	 * Charge root for one process.
453 	 */
454 	(void)chgproccnt(p->p_ucred->cr_ruidinfo, 1, 0);
455 }
456 SYSINIT(p0init, SI_SUB_INTRINSIC, SI_ORDER_FIRST, proc0_init, NULL)
457 
458 /* ARGSUSED*/
459 static void
460 proc0_post(void *dummy __unused)
461 {
462 	struct timespec ts;
463 	struct proc *p;
464 
465 	/*
466 	 * Now we can look at the time, having had a chance to verify the
467 	 * time from the filesystem.  Pretend that proc0 started now.
468 	 */
469 	sx_slock(&allproc_lock);
470 	LIST_FOREACH(p, &allproc, p_list) {
471 		microtime(&p->p_stats->p_start);
472 		p->p_runtime.sec = 0;
473 		p->p_runtime.frac = 0;
474 	}
475 	sx_sunlock(&allproc_lock);
476 	binuptime(PCPU_PTR(switchtime));
477 	PCPU_SET(switchticks, ticks);
478 
479 	/*
480 	 * Give the ``random'' number generator a thump.
481 	 */
482 	nanotime(&ts);
483 	srandom(ts.tv_sec ^ ts.tv_nsec);
484 }
485 SYSINIT(p0post, SI_SUB_INTRINSIC_POST, SI_ORDER_FIRST, proc0_post, NULL)
486 
487 /*
488  ***************************************************************************
489  ****
490  **** The following SYSINIT's and glue code should be moved to the
491  **** respective files on a per subsystem basis.
492  ****
493  ***************************************************************************
494  */
495 
496 
497 /*
498  ***************************************************************************
499  ****
500  **** The following code probably belongs in another file, like
501  **** kern/init_init.c.
502  ****
503  ***************************************************************************
504  */
505 
506 /*
507  * List of paths to try when searching for "init".
508  */
509 static char init_path[MAXPATHLEN] =
510 #ifdef	INIT_PATH
511     __XSTRING(INIT_PATH);
512 #else
513     "/sbin/init:/sbin/oinit:/sbin/init.bak:/stand/sysinstall";
514 #endif
515 SYSCTL_STRING(_kern, OID_AUTO, init_path, CTLFLAG_RD, init_path, 0,
516 	"Path used to search the init process");
517 
518 /*
519  * Start the initial user process; try exec'ing each pathname in init_path.
520  * The program is invoked with one argument containing the boot flags.
521  */
522 static void
523 start_init(void *dummy)
524 {
525 	vm_offset_t addr;
526 	struct execve_args args;
527 	int options, error;
528 	char *var, *path, *next, *s;
529 	char *ucp, **uap, *arg0, *arg1;
530 	struct thread *td;
531 	struct proc *p;
532 	int init_does_devfs = 0;
533 
534 	mtx_lock(&Giant);
535 
536 	GIANT_REQUIRED;
537 
538 	td = curthread;
539 	p = td->td_proc;
540 
541 	vfs_mountroot();
542 
543 	/* Get the vnode for '/'.  Set p->p_fd->fd_cdir to reference it. */
544 	if (VFS_ROOT(TAILQ_FIRST(&mountlist), &rootvnode))
545 		panic("cannot find root vnode");
546 	FILEDESC_LOCK(p->p_fd);
547 	p->p_fd->fd_cdir = rootvnode;
548 	VREF(p->p_fd->fd_cdir);
549 	p->p_fd->fd_rdir = rootvnode;
550 	VREF(p->p_fd->fd_rdir);
551 	FILEDESC_UNLOCK(p->p_fd);
552 	VOP_UNLOCK(rootvnode, 0, td);
553 #ifdef MAC
554 	mac_create_root_mount(td->td_ucred, TAILQ_FIRST(&mountlist));
555 #endif
556 
557 	if (devfs_present) {
558 		/*
559 		 * For disk based systems, we probably cannot do this yet
560 		 * since the fs will be read-only.  But a NFS root
561 		 * might be ok.  It is worth a shot.
562 		 */
563 		error = kern_mkdir(td, "/dev", UIO_SYSSPACE, 0700);
564 		if (error == EEXIST)
565 			error = 0;
566 		if (error == 0)
567 			error = kernel_vmount(0, "fstype", "devfs",
568 			    "fspath", "/dev", NULL);
569 		if (error != 0)
570 			init_does_devfs = 1;
571 	}
572 
573 	/*
574 	 * Need just enough stack to hold the faked-up "execve()" arguments.
575 	 */
576 	addr = p->p_sysent->sv_usrstack - PAGE_SIZE;
577 	if (vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &addr, PAGE_SIZE,
578 			FALSE, VM_PROT_ALL, VM_PROT_ALL, 0) != 0)
579 		panic("init: couldn't allocate argument space");
580 	p->p_vmspace->vm_maxsaddr = (caddr_t)addr;
581 	p->p_vmspace->vm_ssize = 1;
582 
583 	if ((var = getenv("init_path")) != NULL) {
584 		strlcpy(init_path, var, sizeof(init_path));
585 		freeenv(var);
586 	}
587 	if ((var = getenv("kern.fallback_elf_brand")) != NULL) {
588 		fallback_elf_brand = strtol(var, NULL, 0);
589 		freeenv(var);
590 	}
591 
592 	for (path = init_path; *path != '\0'; path = next) {
593 		while (*path == ':')
594 			path++;
595 		if (*path == '\0')
596 			break;
597 		for (next = path; *next != '\0' && *next != ':'; next++)
598 			/* nothing */ ;
599 		if (bootverbose)
600 			printf("start_init: trying %.*s\n", (int)(next - path),
601 			    path);
602 
603 		/*
604 		 * Move out the boot flag argument.
605 		 */
606 		options = 0;
607 		ucp = (char *)p->p_sysent->sv_usrstack;
608 		(void)subyte(--ucp, 0);		/* trailing zero */
609 		if (boothowto & RB_SINGLE) {
610 			(void)subyte(--ucp, 's');
611 			options = 1;
612 		}
613 #ifdef notyet
614                 if (boothowto & RB_FASTBOOT) {
615 			(void)subyte(--ucp, 'f');
616 			options = 1;
617 		}
618 #endif
619 
620 #ifdef BOOTCDROM
621 		(void)subyte(--ucp, 'C');
622 		options = 1;
623 #endif
624 		if (init_does_devfs) {
625 			(void)subyte(--ucp, 'd');
626 			options = 1;
627 		}
628 
629 		if (options == 0)
630 			(void)subyte(--ucp, '-');
631 		(void)subyte(--ucp, '-');		/* leading hyphen */
632 		arg1 = ucp;
633 
634 		/*
635 		 * Move out the file name (also arg 0).
636 		 */
637 		(void)subyte(--ucp, 0);
638 		for (s = next - 1; s >= path; s--)
639 			(void)subyte(--ucp, *s);
640 		arg0 = ucp;
641 
642 		/*
643 		 * Move out the arg pointers.
644 		 */
645 		uap = (char **)((intptr_t)ucp & ~(sizeof(intptr_t)-1));
646 		(void)suword((caddr_t)--uap, (long)0);	/* terminator */
647 		(void)suword((caddr_t)--uap, (long)(intptr_t)arg1);
648 		(void)suword((caddr_t)--uap, (long)(intptr_t)arg0);
649 
650 		/*
651 		 * Point at the arguments.
652 		 */
653 		args.fname = arg0;
654 		args.argv = uap;
655 		args.envv = NULL;
656 
657 		/*
658 		 * Now try to exec the program.  If can't for any reason
659 		 * other than it doesn't exist, complain.
660 		 *
661 		 * Otherwise, return via fork_trampoline() all the way
662 		 * to user mode as init!
663 		 */
664 		if ((error = execve(td, &args)) == 0) {
665 			mtx_unlock(&Giant);
666 			return;
667 		}
668 		if (error != ENOENT)
669 			printf("exec %.*s: error %d\n", (int)(next - path),
670 			    path, error);
671 	}
672 	printf("init: not found in path %s\n", init_path);
673 	panic("no init");
674 }
675 
676 /*
677  * Like kthread_create(), but runs in it's own address space.
678  * We do this early to reserve pid 1.
679  *
680  * Note special case - do not make it runnable yet.  Other work
681  * in progress will change this more.
682  */
683 static void
684 create_init(const void *udata __unused)
685 {
686 	struct ucred *newcred, *oldcred;
687 	int error;
688 
689 	error = fork1(&thread0, RFFDG | RFPROC | RFSTOPPED, 0, &initproc);
690 	if (error)
691 		panic("cannot fork init: %d\n", error);
692 	/* divorce init's credentials from the kernel's */
693 	newcred = crget();
694 	PROC_LOCK(initproc);
695 	initproc->p_flag |= P_SYSTEM;
696 	oldcred = initproc->p_ucred;
697 	crcopy(newcred, oldcred);
698 #ifdef MAC
699 	mac_create_proc1(newcred);
700 #endif
701 	initproc->p_ucred = newcred;
702 	PROC_UNLOCK(initproc);
703 	crfree(oldcred);
704 	cred_update_thread(FIRST_THREAD_IN_PROC(initproc));
705 	mtx_lock_spin(&sched_lock);
706 	initproc->p_sflag |= PS_INMEM;
707 	mtx_unlock_spin(&sched_lock);
708 	cpu_set_fork_handler(FIRST_THREAD_IN_PROC(initproc), start_init, NULL);
709 }
710 SYSINIT(init, SI_SUB_CREATE_INIT, SI_ORDER_FIRST, create_init, NULL)
711 
712 /*
713  * Make it runnable now.
714  */
715 static void
716 kick_init(const void *udata __unused)
717 {
718 	struct thread *td;
719 
720 	td = FIRST_THREAD_IN_PROC(initproc);
721 	mtx_lock_spin(&sched_lock);
722 	TD_SET_CAN_RUN(td);
723 	setrunqueue(td);	/* XXXKSE */
724 	mtx_unlock_spin(&sched_lock);
725 }
726 SYSINIT(kickinit, SI_SUB_KTHREAD_INIT, SI_ORDER_FIRST, kick_init, NULL)
727