xref: /freebsd/sys/kern/init_main.c (revision a3e8fd0b7f663db7eafff527d5c3ca3bcfa8a537)
1 /*
2  * Copyright (c) 1995 Terrence R. Lambert
3  * All rights reserved.
4  *
5  * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. All advertising materials mentioning features or use of this software
22  *    must display the following acknowledgement:
23  *	This product includes software developed by the University of
24  *	California, Berkeley and its contributors.
25  * 4. Neither the name of the University nor the names of its contributors
26  *    may be used to endorse or promote products derived from this software
27  *    without specific prior written permission.
28  *
29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39  * SUCH DAMAGE.
40  *
41  *	@(#)init_main.c	8.9 (Berkeley) 1/21/94
42  * $FreeBSD$
43  */
44 
45 #include "opt_init_path.h"
46 #include "opt_mac.h"
47 
48 #include <sys/param.h>
49 #include <sys/kernel.h>
50 #include <sys/exec.h>
51 #include <sys/file.h>
52 #include <sys/filedesc.h>
53 #include <sys/ktr.h>
54 #include <sys/lock.h>
55 #include <sys/mac.h>
56 #include <sys/mount.h>
57 #include <sys/mutex.h>
58 #include <sys/syscallsubr.h>
59 #include <sys/sysctl.h>
60 #include <sys/proc.h>
61 #include <sys/resourcevar.h>
62 #include <sys/systm.h>
63 #include <sys/signalvar.h>
64 #include <sys/vnode.h>
65 #include <sys/sysent.h>
66 #include <sys/reboot.h>
67 #include <sys/sx.h>
68 #include <sys/sysproto.h>
69 #include <sys/vmmeter.h>
70 #include <sys/unistd.h>
71 #include <sys/malloc.h>
72 #include <sys/conf.h>
73 
74 #include <machine/cpu.h>
75 
76 #include <vm/vm.h>
77 #include <vm/vm_param.h>
78 #include <vm/pmap.h>
79 #include <vm/vm_map.h>
80 #include <sys/user.h>
81 #include <sys/copyright.h>
82 
83 void mi_startup(void);				/* Should be elsewhere */
84 
85 /* Components of the first process -- never freed. */
86 static struct session session0;
87 static struct pgrp pgrp0;
88 struct	proc proc0;
89 struct	thread thread0;
90 struct	kse kse0;
91 struct	ksegrp ksegrp0;
92 static struct procsig procsig0;
93 static struct filedesc0 filedesc0;
94 static struct plimit limit0;
95 static struct vmspace vmspace0;
96 struct	proc *initproc;
97 
98 int cmask = CMASK;
99 extern int fallback_elf_brand;
100 
101 struct	vnode *rootvp;
102 int	boothowto = 0;		/* initialized so that it can be patched */
103 SYSCTL_INT(_debug, OID_AUTO, boothowto, CTLFLAG_RD, &boothowto, 0, "");
104 int	bootverbose;
105 SYSCTL_INT(_debug, OID_AUTO, bootverbose, CTLFLAG_RW, &bootverbose, 0, "");
106 
107 /*
108  * This ensures that there is at least one entry so that the sysinit_set
109  * symbol is not undefined.  A sybsystem ID of SI_SUB_DUMMY is never
110  * executed.
111  */
112 SYSINIT(placeholder, SI_SUB_DUMMY, SI_ORDER_ANY, NULL, NULL)
113 
114 /*
115  * The sysinit table itself.  Items are checked off as the are run.
116  * If we want to register new sysinit types, add them to newsysinit.
117  */
118 SET_DECLARE(sysinit_set, struct sysinit);
119 struct sysinit **sysinit, **sysinit_end;
120 struct sysinit **newsysinit, **newsysinit_end;
121 
122 /*
123  * Merge a new sysinit set into the current set, reallocating it if
124  * necessary.  This can only be called after malloc is running.
125  */
126 void
127 sysinit_add(struct sysinit **set, struct sysinit **set_end)
128 {
129 	struct sysinit **newset;
130 	struct sysinit **sipp;
131 	struct sysinit **xipp;
132 	int count;
133 
134 	count = set_end - set;
135 	if (newsysinit)
136 		count += newsysinit_end - newsysinit;
137 	else
138 		count += sysinit_end - sysinit;
139 	newset = malloc(count * sizeof(*sipp), M_TEMP, M_NOWAIT);
140 	if (newset == NULL)
141 		panic("cannot malloc for sysinit");
142 	xipp = newset;
143 	if (newsysinit)
144 		for (sipp = newsysinit; sipp < newsysinit_end; sipp++)
145 			*xipp++ = *sipp;
146 	else
147 		for (sipp = sysinit; sipp < sysinit_end; sipp++)
148 			*xipp++ = *sipp;
149 	for (sipp = set; sipp < set_end; sipp++)
150 		*xipp++ = *sipp;
151 	if (newsysinit)
152 		free(newsysinit, M_TEMP);
153 	newsysinit = newset;
154 	newsysinit_end = newset + count;
155 }
156 
157 /*
158  * System startup; initialize the world, create process 0, mount root
159  * filesystem, and fork to create init and pagedaemon.  Most of the
160  * hard work is done in the lower-level initialization routines including
161  * startup(), which does memory initialization and autoconfiguration.
162  *
163  * This allows simple addition of new kernel subsystems that require
164  * boot time initialization.  It also allows substitution of subsystem
165  * (for instance, a scheduler, kernel profiler, or VM system) by object
166  * module.  Finally, it allows for optional "kernel threads".
167  */
168 void
169 mi_startup(void)
170 {
171 
172 	register struct sysinit **sipp;		/* system initialization*/
173 	register struct sysinit **xipp;		/* interior loop of sort*/
174 	register struct sysinit *save;		/* bubble*/
175 
176 	if (sysinit == NULL) {
177 		sysinit = SET_BEGIN(sysinit_set);
178 		sysinit_end = SET_LIMIT(sysinit_set);
179 	}
180 
181 restart:
182 	/*
183 	 * Perform a bubble sort of the system initialization objects by
184 	 * their subsystem (primary key) and order (secondary key).
185 	 */
186 	for (sipp = sysinit; sipp < sysinit_end; sipp++) {
187 		for (xipp = sipp + 1; xipp < sysinit_end; xipp++) {
188 			if ((*sipp)->subsystem < (*xipp)->subsystem ||
189 			     ((*sipp)->subsystem == (*xipp)->subsystem &&
190 			      (*sipp)->order <= (*xipp)->order))
191 				continue;	/* skip*/
192 			save = *sipp;
193 			*sipp = *xipp;
194 			*xipp = save;
195 		}
196 	}
197 
198 	/*
199 	 * Traverse the (now) ordered list of system initialization tasks.
200 	 * Perform each task, and continue on to the next task.
201 	 *
202 	 * The last item on the list is expected to be the scheduler,
203 	 * which will not return.
204 	 */
205 	for (sipp = sysinit; sipp < sysinit_end; sipp++) {
206 
207 		if ((*sipp)->subsystem == SI_SUB_DUMMY)
208 			continue;	/* skip dummy task(s)*/
209 
210 		if ((*sipp)->subsystem == SI_SUB_DONE)
211 			continue;
212 
213 		/* Call function */
214 		(*((*sipp)->func))((*sipp)->udata);
215 
216 		/* Check off the one we're just done */
217 		(*sipp)->subsystem = SI_SUB_DONE;
218 
219 		/* Check if we've installed more sysinit items via KLD */
220 		if (newsysinit != NULL) {
221 			if (sysinit != SET_BEGIN(sysinit_set))
222 				free(sysinit, M_TEMP);
223 			sysinit = newsysinit;
224 			sysinit_end = newsysinit_end;
225 			newsysinit = NULL;
226 			newsysinit_end = NULL;
227 			goto restart;
228 		}
229 	}
230 
231 	panic("Shouldn't get here!");
232 	/* NOTREACHED*/
233 }
234 
235 
236 /*
237  ***************************************************************************
238  ****
239  **** The following SYSINIT's belong elsewhere, but have not yet
240  **** been moved.
241  ****
242  ***************************************************************************
243  */
244 static void
245 print_caddr_t(void *data __unused)
246 {
247 	printf("%s", (char *)data);
248 }
249 SYSINIT(announce, SI_SUB_COPYRIGHT, SI_ORDER_FIRST, print_caddr_t, copyright)
250 SYSINIT(version, SI_SUB_COPYRIGHT, SI_ORDER_SECOND, print_caddr_t, version)
251 
252 static void
253 set_boot_verbose(void *data __unused)
254 {
255 
256 	if (boothowto & RB_VERBOSE)
257 		bootverbose++;
258 }
259 SYSINIT(boot_verbose, SI_SUB_TUNABLES, SI_ORDER_ANY, set_boot_verbose, NULL)
260 
261 struct sysentvec null_sysvec = {
262 	0,
263 	NULL,
264 	0,
265 	0,
266 	NULL,
267 	0,
268 	NULL,
269 	NULL,
270 	NULL,
271 	NULL,
272 	NULL,
273 	NULL,
274 	NULL,
275 	"null",
276 	NULL,
277 	NULL,
278 	0,
279 	PAGE_SIZE,
280 	VM_MIN_ADDRESS,
281 	VM_MAXUSER_ADDRESS,
282 	USRSTACK,
283 	PS_STRINGS,
284 	VM_PROT_ALL,
285 	NULL,
286 	NULL
287 };
288 
289 /*
290  ***************************************************************************
291  ****
292  **** The two following SYSINT's are proc0 specific glue code.  I am not
293  **** convinced that they can not be safely combined, but their order of
294  **** operation has been maintained as the same as the original init_main.c
295  **** for right now.
296  ****
297  **** These probably belong in init_proc.c or kern_proc.c, since they
298  **** deal with proc0 (the fork template process).
299  ****
300  ***************************************************************************
301  */
302 /* ARGSUSED*/
303 static void
304 proc0_init(void *dummy __unused)
305 {
306 	register struct proc		*p;
307 	register struct filedesc0	*fdp;
308 	register unsigned i;
309 	struct thread *td;
310 	struct ksegrp *kg;
311 	struct kse *ke;
312 
313 	GIANT_REQUIRED;
314 	p = &proc0;
315 	td = &thread0;
316 	ke = &kse0;
317 	kg = &ksegrp0;
318 
319 	/*
320 	 * Initialize magic number.
321 	 */
322 	p->p_magic = P_MAGIC;
323 
324 	/*
325 	 * Initialize thread, process and pgrp structures.
326 	 */
327 	procinit();
328 	threadinit();
329 
330 	/*
331 	 * Initialize sleep queue hash table
332 	 */
333 	sleepinit();
334 
335 	/*
336 	 * additional VM structures
337 	 */
338 	vm_init2();
339 
340 	/*
341 	 * Create process 0 (the swapper).
342 	 */
343 	LIST_INSERT_HEAD(&allproc, p, p_list);
344 	LIST_INSERT_HEAD(PIDHASH(0), p, p_hash);
345 	mtx_init(&pgrp0.pg_mtx, "process group", NULL, MTX_DEF | MTX_DUPOK);
346 	p->p_pgrp = &pgrp0;
347 	LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash);
348 	LIST_INIT(&pgrp0.pg_members);
349 	LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist);
350 
351 	pgrp0.pg_session = &session0;
352 	mtx_init(&session0.s_mtx, "session", NULL, MTX_DEF);
353 	session0.s_count = 1;
354 	session0.s_leader = p;
355 
356 	p->p_sysent = &null_sysvec;
357 
358 	/*
359 	 * proc_linkup was already done in init_i386() or alphainit() etc.
360 	 * because the earlier code needed to follow td->td_proc. Otherwise
361 	 * I would have done it here.. maybe this means this should be
362 	 * done earlier too.
363 	 */
364 	p->p_flag = P_SYSTEM;
365 	p->p_sflag = PS_INMEM;
366 	p->p_state = PRS_NORMAL;
367 	td->td_state = TDS_RUNNING;
368 	kg->kg_nice = NZERO;
369 	kg->kg_pri_class = PRI_TIMESHARE;
370 	kg->kg_user_pri = PUSER;
371 	td->td_priority = PVM;
372 	td->td_base_pri = PUSER;
373 	td->td_kse = ke; /* XXXKSE */
374 	ke->ke_oncpu = 0;
375 	ke->ke_state = KES_THREAD;
376 	ke->ke_thread = td;
377 	p->p_peers = 0;
378 	p->p_leader = p;
379 
380 
381 	bcopy("swapper", p->p_comm, sizeof ("swapper"));
382 
383 	callout_init(&p->p_itcallout, 0);
384 	callout_init(&td->td_slpcallout, 1);
385 
386 	/* Create credentials. */
387 	p->p_ucred = crget();
388 	p->p_ucred->cr_ngroups = 1;	/* group 0 */
389 	p->p_ucred->cr_uidinfo = uifind(0);
390 	p->p_ucred->cr_ruidinfo = uifind(0);
391 	p->p_ucred->cr_prison = NULL;	/* Don't jail it. */
392 #ifdef MAC
393 	mac_create_proc0(p->p_ucred);
394 #endif
395 	td->td_ucred = crhold(p->p_ucred);
396 
397 	/* Create procsig. */
398 	p->p_procsig = &procsig0;
399 	p->p_procsig->ps_refcnt = 1;
400 
401 	/* Initialize signal state for process 0. */
402 	siginit(&proc0);
403 
404 	/* Create the file descriptor table. */
405 	fdp = &filedesc0;
406 	p->p_fd = &fdp->fd_fd;
407 	mtx_init(&fdp->fd_fd.fd_mtx, FILEDESC_LOCK_DESC, NULL, MTX_DEF);
408 	fdp->fd_fd.fd_refcnt = 1;
409 	fdp->fd_fd.fd_cmask = cmask;
410 	fdp->fd_fd.fd_ofiles = fdp->fd_dfiles;
411 	fdp->fd_fd.fd_ofileflags = fdp->fd_dfileflags;
412 	fdp->fd_fd.fd_nfiles = NDFILE;
413 
414 	/* Create the limits structures. */
415 	p->p_limit = &limit0;
416 	for (i = 0; i < sizeof(p->p_rlimit)/sizeof(p->p_rlimit[0]); i++)
417 		limit0.pl_rlimit[i].rlim_cur =
418 		    limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY;
419 	limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur =
420 	    limit0.pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles;
421 	limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur =
422 	    limit0.pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc;
423 	i = ptoa(cnt.v_free_count);
424 	limit0.pl_rlimit[RLIMIT_RSS].rlim_max = i;
425 	limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_max = i;
426 	limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = i / 3;
427 	limit0.p_refcnt = 1;
428 	p->p_cpulimit = RLIM_INFINITY;
429 
430 	/* Allocate a prototype map so we have something to fork. */
431 	pmap_pinit0(vmspace_pmap(&vmspace0));
432 	p->p_vmspace = &vmspace0;
433 	vmspace0.vm_refcnt = 1;
434 	vm_map_init(&vmspace0.vm_map, p->p_sysent->sv_minuser,
435 	    p->p_sysent->sv_maxuser);
436 	vmspace0.vm_map.pmap = vmspace_pmap(&vmspace0);
437 
438 	/*
439 	 * We continue to place resource usage info and signal
440 	 * actions in the user struct so they're pageable.
441 	 */
442 	p->p_stats = &p->p_uarea->u_stats;
443 	p->p_sigacts = &p->p_uarea->u_sigacts;
444 
445 	/*
446 	 * Charge root for one process.
447 	 */
448 	(void)chgproccnt(p->p_ucred->cr_ruidinfo, 1, 0);
449 }
450 SYSINIT(p0init, SI_SUB_INTRINSIC, SI_ORDER_FIRST, proc0_init, NULL)
451 
452 /* ARGSUSED*/
453 static void
454 proc0_post(void *dummy __unused)
455 {
456 	struct timespec ts;
457 	struct proc *p;
458 
459 	/*
460 	 * Now we can look at the time, having had a chance to verify the
461 	 * time from the filesystem.  Pretend that proc0 started now.
462 	 */
463 	sx_slock(&allproc_lock);
464 	LIST_FOREACH(p, &allproc, p_list) {
465 		microtime(&p->p_stats->p_start);
466 		p->p_runtime.sec = 0;
467 		p->p_runtime.frac = 0;
468 	}
469 	sx_sunlock(&allproc_lock);
470 	binuptime(PCPU_PTR(switchtime));
471 	PCPU_SET(switchticks, ticks);
472 
473 	/*
474 	 * Give the ``random'' number generator a thump.
475 	 */
476 	nanotime(&ts);
477 	srandom(ts.tv_sec ^ ts.tv_nsec);
478 }
479 SYSINIT(p0post, SI_SUB_INTRINSIC_POST, SI_ORDER_FIRST, proc0_post, NULL)
480 
481 /*
482  ***************************************************************************
483  ****
484  **** The following SYSINIT's and glue code should be moved to the
485  **** respective files on a per subsystem basis.
486  ****
487  ***************************************************************************
488  */
489 
490 
491 /*
492  ***************************************************************************
493  ****
494  **** The following code probably belongs in another file, like
495  **** kern/init_init.c.
496  ****
497  ***************************************************************************
498  */
499 
500 /*
501  * List of paths to try when searching for "init".
502  */
503 static char init_path[MAXPATHLEN] =
504 #ifdef	INIT_PATH
505     __XSTRING(INIT_PATH);
506 #else
507     "/sbin/init:/sbin/oinit:/sbin/init.bak:/stand/sysinstall";
508 #endif
509 SYSCTL_STRING(_kern, OID_AUTO, init_path, CTLFLAG_RD, init_path, 0,
510 	"Path used to search the init process");
511 
512 /*
513  * Start the initial user process; try exec'ing each pathname in init_path.
514  * The program is invoked with one argument containing the boot flags.
515  */
516 static void
517 start_init(void *dummy)
518 {
519 	vm_offset_t addr;
520 	struct execve_args args;
521 	int options, error;
522 	char *var, *path, *next, *s;
523 	char *ucp, **uap, *arg0, *arg1;
524 	struct thread *td;
525 	struct proc *p;
526 	int init_does_devfs = 0;
527 
528 	mtx_lock(&Giant);
529 
530 	GIANT_REQUIRED;
531 
532 	td = curthread;
533 	p = td->td_proc;
534 
535 	vfs_mountroot();
536 
537 	/* Get the vnode for '/'.  Set p->p_fd->fd_cdir to reference it. */
538 	if (VFS_ROOT(TAILQ_FIRST(&mountlist), &rootvnode))
539 		panic("cannot find root vnode");
540 	FILEDESC_LOCK(p->p_fd);
541 	p->p_fd->fd_cdir = rootvnode;
542 	VREF(p->p_fd->fd_cdir);
543 	p->p_fd->fd_rdir = rootvnode;
544 	VREF(p->p_fd->fd_rdir);
545 	FILEDESC_UNLOCK(p->p_fd);
546 	VOP_UNLOCK(rootvnode, 0, td);
547 #ifdef MAC
548 	mac_create_root_mount(td->td_ucred, TAILQ_FIRST(&mountlist));
549 #endif
550 
551 	if (devfs_present) {
552 		/*
553 		 * For disk based systems, we probably cannot do this yet
554 		 * since the fs will be read-only.  But a NFS root
555 		 * might be ok.  It is worth a shot.
556 		 */
557 		error = kern_mkdir(td, "/dev", UIO_SYSSPACE, 0700);
558 		if (error == EEXIST)
559 			error = 0;
560 		if (error == 0)
561 			error = kernel_vmount(0, "fstype", "devfs",
562 			    "fspath", "/dev", NULL);
563 		if (error != 0)
564 			init_does_devfs = 1;
565 	}
566 
567 	/*
568 	 * Need just enough stack to hold the faked-up "execve()" arguments.
569 	 */
570 	addr = p->p_sysent->sv_usrstack - PAGE_SIZE;
571 	if (vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &addr, PAGE_SIZE,
572 			FALSE, VM_PROT_ALL, VM_PROT_ALL, 0) != 0)
573 		panic("init: couldn't allocate argument space");
574 	p->p_vmspace->vm_maxsaddr = (caddr_t)addr;
575 	p->p_vmspace->vm_ssize = 1;
576 
577 	if ((var = getenv("init_path")) != NULL) {
578 		strlcpy(init_path, var, sizeof(init_path));
579 		freeenv(var);
580 	}
581 	if ((var = getenv("kern.fallback_elf_brand")) != NULL) {
582 		fallback_elf_brand = strtol(var, NULL, 0);
583 		freeenv(var);
584 	}
585 
586 	for (path = init_path; *path != '\0'; path = next) {
587 		while (*path == ':')
588 			path++;
589 		if (*path == '\0')
590 			break;
591 		for (next = path; *next != '\0' && *next != ':'; next++)
592 			/* nothing */ ;
593 		if (bootverbose)
594 			printf("start_init: trying %.*s\n", (int)(next - path),
595 			    path);
596 
597 		/*
598 		 * Move out the boot flag argument.
599 		 */
600 		options = 0;
601 		ucp = (char *)p->p_sysent->sv_usrstack;
602 		(void)subyte(--ucp, 0);		/* trailing zero */
603 		if (boothowto & RB_SINGLE) {
604 			(void)subyte(--ucp, 's');
605 			options = 1;
606 		}
607 #ifdef notyet
608                 if (boothowto & RB_FASTBOOT) {
609 			(void)subyte(--ucp, 'f');
610 			options = 1;
611 		}
612 #endif
613 
614 #ifdef BOOTCDROM
615 		(void)subyte(--ucp, 'C');
616 		options = 1;
617 #endif
618 		if (init_does_devfs) {
619 			(void)subyte(--ucp, 'd');
620 			options = 1;
621 		}
622 
623 		if (options == 0)
624 			(void)subyte(--ucp, '-');
625 		(void)subyte(--ucp, '-');		/* leading hyphen */
626 		arg1 = ucp;
627 
628 		/*
629 		 * Move out the file name (also arg 0).
630 		 */
631 		(void)subyte(--ucp, 0);
632 		for (s = next - 1; s >= path; s--)
633 			(void)subyte(--ucp, *s);
634 		arg0 = ucp;
635 
636 		/*
637 		 * Move out the arg pointers.
638 		 */
639 		uap = (char **)((intptr_t)ucp & ~(sizeof(intptr_t)-1));
640 		(void)suword((caddr_t)--uap, (long)0);	/* terminator */
641 		(void)suword((caddr_t)--uap, (long)(intptr_t)arg1);
642 		(void)suword((caddr_t)--uap, (long)(intptr_t)arg0);
643 
644 		/*
645 		 * Point at the arguments.
646 		 */
647 		args.fname = arg0;
648 		args.argv = uap;
649 		args.envv = NULL;
650 
651 		/*
652 		 * Now try to exec the program.  If can't for any reason
653 		 * other than it doesn't exist, complain.
654 		 *
655 		 * Otherwise, return via fork_trampoline() all the way
656 		 * to user mode as init!
657 		 */
658 		if ((error = execve(td, &args)) == 0) {
659 			mtx_unlock(&Giant);
660 			return;
661 		}
662 		if (error != ENOENT)
663 			printf("exec %.*s: error %d\n", (int)(next - path),
664 			    path, error);
665 	}
666 	printf("init: not found in path %s\n", init_path);
667 	panic("no init");
668 }
669 
670 /*
671  * Like kthread_create(), but runs in it's own address space.
672  * We do this early to reserve pid 1.
673  *
674  * Note special case - do not make it runnable yet.  Other work
675  * in progress will change this more.
676  */
677 static void
678 create_init(const void *udata __unused)
679 {
680 	struct ucred *newcred, *oldcred;
681 	int error;
682 
683 	error = fork1(&thread0, RFFDG | RFPROC | RFSTOPPED, 0, &initproc);
684 	if (error)
685 		panic("cannot fork init: %d\n", error);
686 	/* divorce init's credentials from the kernel's */
687 	newcred = crget();
688 	PROC_LOCK(initproc);
689 	initproc->p_flag |= P_SYSTEM;
690 	oldcred = initproc->p_ucred;
691 	crcopy(newcred, oldcred);
692 #ifdef MAC
693 	mac_create_proc1(newcred);
694 #endif
695 	initproc->p_ucred = newcred;
696 	PROC_UNLOCK(initproc);
697 	crfree(oldcred);
698 	cred_update_thread(FIRST_THREAD_IN_PROC(initproc));
699 	mtx_lock_spin(&sched_lock);
700 	initproc->p_sflag |= PS_INMEM;
701 	mtx_unlock_spin(&sched_lock);
702 	cpu_set_fork_handler(FIRST_THREAD_IN_PROC(initproc), start_init, NULL);
703 }
704 SYSINIT(init, SI_SUB_CREATE_INIT, SI_ORDER_FIRST, create_init, NULL)
705 
706 /*
707  * Make it runnable now.
708  */
709 static void
710 kick_init(const void *udata __unused)
711 {
712 	struct thread *td;
713 
714 	td = FIRST_THREAD_IN_PROC(initproc);
715 	mtx_lock_spin(&sched_lock);
716 	TD_SET_CAN_RUN(td);
717 	setrunqueue(td);	/* XXXKSE */
718 	mtx_unlock_spin(&sched_lock);
719 }
720 SYSINIT(kickinit, SI_SUB_KTHREAD_INIT, SI_ORDER_FIRST, kick_init, NULL)
721