xref: /freebsd/sys/kern/init_main.c (revision 74bf4e164ba5851606a27d4feff27717452583e5)
1 /*
2  * Copyright (c) 1995 Terrence R. Lambert
3  * All rights reserved.
4  *
5  * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. All advertising materials mentioning features or use of this software
22  *    must display the following acknowledgement:
23  *	This product includes software developed by the University of
24  *	California, Berkeley and its contributors.
25  * 4. Neither the name of the University nor the names of its contributors
26  *    may be used to endorse or promote products derived from this software
27  *    without specific prior written permission.
28  *
29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39  * SUCH DAMAGE.
40  *
41  *	@(#)init_main.c	8.9 (Berkeley) 1/21/94
42  */
43 
44 #include <sys/cdefs.h>
45 __FBSDID("$FreeBSD$");
46 
47 #include "opt_init_path.h"
48 #include "opt_mac.h"
49 
50 #include <sys/param.h>
51 #include <sys/kernel.h>
52 #include <sys/exec.h>
53 #include <sys/file.h>
54 #include <sys/filedesc.h>
55 #include <sys/ktr.h>
56 #include <sys/lock.h>
57 #include <sys/mac.h>
58 #include <sys/mount.h>
59 #include <sys/mutex.h>
60 #include <sys/syscallsubr.h>
61 #include <sys/sysctl.h>
62 #include <sys/proc.h>
63 #include <sys/resourcevar.h>
64 #include <sys/systm.h>
65 #include <sys/signalvar.h>
66 #include <sys/vnode.h>
67 #include <sys/sysent.h>
68 #include <sys/reboot.h>
69 #include <sys/sched.h>
70 #include <sys/sx.h>
71 #include <sys/sysproto.h>
72 #include <sys/vmmeter.h>
73 #include <sys/unistd.h>
74 #include <sys/malloc.h>
75 #include <sys/conf.h>
76 
77 #include <machine/cpu.h>
78 
79 #include <vm/vm.h>
80 #include <vm/vm_param.h>
81 #include <vm/pmap.h>
82 #include <vm/vm_map.h>
83 #include <sys/user.h>
84 #include <sys/copyright.h>
85 
86 void mi_startup(void);				/* Should be elsewhere */
87 
88 /* Components of the first process -- never freed. */
89 static struct session session0;
90 static struct pgrp pgrp0;
91 struct	proc proc0;
92 struct	thread thread0;
93 struct	kse kse0;
94 struct	ksegrp ksegrp0;
95 static struct filedesc0 filedesc0;
96 struct	vmspace vmspace0;
97 struct	proc *initproc;
98 
99 int	boothowto = 0;		/* initialized so that it can be patched */
100 SYSCTL_INT(_debug, OID_AUTO, boothowto, CTLFLAG_RD, &boothowto, 0, "");
101 int	bootverbose;
102 SYSCTL_INT(_debug, OID_AUTO, bootverbose, CTLFLAG_RW, &bootverbose, 0, "");
103 
104 /*
105  * This ensures that there is at least one entry so that the sysinit_set
106  * symbol is not undefined.  A sybsystem ID of SI_SUB_DUMMY is never
107  * executed.
108  */
109 SYSINIT(placeholder, SI_SUB_DUMMY, SI_ORDER_ANY, NULL, NULL)
110 
111 /*
112  * The sysinit table itself.  Items are checked off as the are run.
113  * If we want to register new sysinit types, add them to newsysinit.
114  */
115 SET_DECLARE(sysinit_set, struct sysinit);
116 struct sysinit **sysinit, **sysinit_end;
117 struct sysinit **newsysinit, **newsysinit_end;
118 
119 /*
120  * Merge a new sysinit set into the current set, reallocating it if
121  * necessary.  This can only be called after malloc is running.
122  */
123 void
124 sysinit_add(struct sysinit **set, struct sysinit **set_end)
125 {
126 	struct sysinit **newset;
127 	struct sysinit **sipp;
128 	struct sysinit **xipp;
129 	int count;
130 
131 	count = set_end - set;
132 	if (newsysinit)
133 		count += newsysinit_end - newsysinit;
134 	else
135 		count += sysinit_end - sysinit;
136 	newset = malloc(count * sizeof(*sipp), M_TEMP, M_NOWAIT);
137 	if (newset == NULL)
138 		panic("cannot malloc for sysinit");
139 	xipp = newset;
140 	if (newsysinit)
141 		for (sipp = newsysinit; sipp < newsysinit_end; sipp++)
142 			*xipp++ = *sipp;
143 	else
144 		for (sipp = sysinit; sipp < sysinit_end; sipp++)
145 			*xipp++ = *sipp;
146 	for (sipp = set; sipp < set_end; sipp++)
147 		*xipp++ = *sipp;
148 	if (newsysinit)
149 		free(newsysinit, M_TEMP);
150 	newsysinit = newset;
151 	newsysinit_end = newset + count;
152 }
153 
154 /*
155  * System startup; initialize the world, create process 0, mount root
156  * filesystem, and fork to create init and pagedaemon.  Most of the
157  * hard work is done in the lower-level initialization routines including
158  * startup(), which does memory initialization and autoconfiguration.
159  *
160  * This allows simple addition of new kernel subsystems that require
161  * boot time initialization.  It also allows substitution of subsystem
162  * (for instance, a scheduler, kernel profiler, or VM system) by object
163  * module.  Finally, it allows for optional "kernel threads".
164  */
165 void
166 mi_startup(void)
167 {
168 
169 	register struct sysinit **sipp;		/* system initialization*/
170 	register struct sysinit **xipp;		/* interior loop of sort*/
171 	register struct sysinit *save;		/* bubble*/
172 
173 	if (sysinit == NULL) {
174 		sysinit = SET_BEGIN(sysinit_set);
175 		sysinit_end = SET_LIMIT(sysinit_set);
176 	}
177 
178 restart:
179 	/*
180 	 * Perform a bubble sort of the system initialization objects by
181 	 * their subsystem (primary key) and order (secondary key).
182 	 */
183 	for (sipp = sysinit; sipp < sysinit_end; sipp++) {
184 		for (xipp = sipp + 1; xipp < sysinit_end; xipp++) {
185 			if ((*sipp)->subsystem < (*xipp)->subsystem ||
186 			     ((*sipp)->subsystem == (*xipp)->subsystem &&
187 			      (*sipp)->order <= (*xipp)->order))
188 				continue;	/* skip*/
189 			save = *sipp;
190 			*sipp = *xipp;
191 			*xipp = save;
192 		}
193 	}
194 
195 	/*
196 	 * Traverse the (now) ordered list of system initialization tasks.
197 	 * Perform each task, and continue on to the next task.
198 	 *
199 	 * The last item on the list is expected to be the scheduler,
200 	 * which will not return.
201 	 */
202 	for (sipp = sysinit; sipp < sysinit_end; sipp++) {
203 
204 		if ((*sipp)->subsystem == SI_SUB_DUMMY)
205 			continue;	/* skip dummy task(s)*/
206 
207 		if ((*sipp)->subsystem == SI_SUB_DONE)
208 			continue;
209 
210 		/* Call function */
211 		(*((*sipp)->func))((*sipp)->udata);
212 
213 		/* Check off the one we're just done */
214 		(*sipp)->subsystem = SI_SUB_DONE;
215 
216 		/* Check if we've installed more sysinit items via KLD */
217 		if (newsysinit != NULL) {
218 			if (sysinit != SET_BEGIN(sysinit_set))
219 				free(sysinit, M_TEMP);
220 			sysinit = newsysinit;
221 			sysinit_end = newsysinit_end;
222 			newsysinit = NULL;
223 			newsysinit_end = NULL;
224 			goto restart;
225 		}
226 	}
227 
228 	panic("Shouldn't get here!");
229 	/* NOTREACHED*/
230 }
231 
232 
233 /*
234  ***************************************************************************
235  ****
236  **** The following SYSINIT's belong elsewhere, but have not yet
237  **** been moved.
238  ****
239  ***************************************************************************
240  */
241 static void
242 print_caddr_t(void *data __unused)
243 {
244 	printf("%s", (char *)data);
245 }
246 SYSINIT(announce, SI_SUB_COPYRIGHT, SI_ORDER_FIRST, print_caddr_t, copyright)
247 SYSINIT(version, SI_SUB_COPYRIGHT, SI_ORDER_SECOND, print_caddr_t, version)
248 
249 #ifdef WITNESS
250 static char wit_warn[] =
251      "WARNING: WITNESS option enabled, expect reduced performance.\n";
252 SYSINIT(witwarn, SI_SUB_COPYRIGHT, SI_ORDER_SECOND + 1,
253    print_caddr_t, wit_warn)
254 #endif
255 
256 #ifdef DIAGNOSTIC
257 static char diag_warn[] =
258      "WARNING: DIAGNOSTIC option enabled, expect reduced performance.\n";
259 SYSINIT(diagwarn, SI_SUB_COPYRIGHT, SI_ORDER_SECOND + 2,
260     print_caddr_t, diag_warn)
261 #endif
262 
263 static void
264 set_boot_verbose(void *data __unused)
265 {
266 
267 	if (boothowto & RB_VERBOSE)
268 		bootverbose++;
269 }
270 SYSINIT(boot_verbose, SI_SUB_TUNABLES, SI_ORDER_ANY, set_boot_verbose, NULL)
271 
272 struct sysentvec null_sysvec = {
273 	0,
274 	NULL,
275 	0,
276 	0,
277 	NULL,
278 	0,
279 	NULL,
280 	NULL,
281 	NULL,
282 	NULL,
283 	NULL,
284 	NULL,
285 	NULL,
286 	"null",
287 	NULL,
288 	NULL,
289 	0,
290 	PAGE_SIZE,
291 	VM_MIN_ADDRESS,
292 	VM_MAXUSER_ADDRESS,
293 	USRSTACK,
294 	PS_STRINGS,
295 	VM_PROT_ALL,
296 	NULL,
297 	NULL,
298 	NULL
299 };
300 
301 /*
302  ***************************************************************************
303  ****
304  **** The two following SYSINIT's are proc0 specific glue code.  I am not
305  **** convinced that they can not be safely combined, but their order of
306  **** operation has been maintained as the same as the original init_main.c
307  **** for right now.
308  ****
309  **** These probably belong in init_proc.c or kern_proc.c, since they
310  **** deal with proc0 (the fork template process).
311  ****
312  ***************************************************************************
313  */
314 /* ARGSUSED*/
315 static void
316 proc0_init(void *dummy __unused)
317 {
318 	register struct proc		*p;
319 	register struct filedesc0	*fdp;
320 	register unsigned i;
321 	struct thread *td;
322 	struct ksegrp *kg;
323 	struct kse *ke;
324 
325 	GIANT_REQUIRED;
326 	p = &proc0;
327 	td = &thread0;
328 	ke = &kse0;
329 	kg = &ksegrp0;
330 
331 	ke->ke_sched = kse0_sched;
332 	kg->kg_sched = ksegrp0_sched;
333 	p->p_sched = proc0_sched;
334 	td->td_sched = thread0_sched;
335 
336 	/*
337 	 * Initialize magic number.
338 	 */
339 	p->p_magic = P_MAGIC;
340 
341 	/*
342 	 * Initialize thread, process and pgrp structures.
343 	 */
344 	procinit();
345 	threadinit();
346 
347 	/*
348 	 * Initialize sleep queue hash table
349 	 */
350 	sleepinit();
351 
352 	/*
353 	 * additional VM structures
354 	 */
355 	vm_init2();
356 
357 	/*
358 	 * Create process 0 (the swapper).
359 	 */
360 	LIST_INSERT_HEAD(&allproc, p, p_list);
361 	LIST_INSERT_HEAD(PIDHASH(0), p, p_hash);
362 	mtx_init(&pgrp0.pg_mtx, "process group", NULL, MTX_DEF | MTX_DUPOK);
363 	p->p_pgrp = &pgrp0;
364 	LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash);
365 	LIST_INIT(&pgrp0.pg_members);
366 	LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist);
367 
368 	pgrp0.pg_session = &session0;
369 	mtx_init(&session0.s_mtx, "session", NULL, MTX_DEF);
370 	session0.s_count = 1;
371 	session0.s_leader = p;
372 
373 	p->p_sysent = &null_sysvec;
374 
375 	/*
376 	 * proc_linkup was already done in init_i386() or alphainit() etc.
377 	 * because the earlier code needed to follow td->td_proc. Otherwise
378 	 * I would have done it here.. maybe this means this should be
379 	 * done earlier too.
380 	 */
381 	p->p_flag = P_SYSTEM;
382 	p->p_sflag = PS_INMEM;
383 	p->p_state = PRS_NORMAL;
384 	knlist_init(&p->p_klist, &p->p_mtx);
385 	p->p_nice = NZERO;
386 	td->td_state = TDS_RUNNING;
387 	kg->kg_pri_class = PRI_TIMESHARE;
388 	kg->kg_user_pri = PUSER;
389 	td->td_priority = PVM;
390 	td->td_base_pri = PUSER;
391 	td->td_kse = ke; /* XXXKSE */
392 	td->td_oncpu = 0;
393 	ke->ke_state = KES_THREAD;
394 	ke->ke_thread = td;
395 	p->p_peers = 0;
396 	p->p_leader = p;
397 
398 
399 	bcopy("swapper", p->p_comm, sizeof ("swapper"));
400 
401 	callout_init(&p->p_itcallout, CALLOUT_MPSAFE);
402 	callout_init(&td->td_slpcallout, CALLOUT_MPSAFE);
403 
404 	/* Create credentials. */
405 	p->p_ucred = crget();
406 	p->p_ucred->cr_ngroups = 1;	/* group 0 */
407 	p->p_ucred->cr_uidinfo = uifind(0);
408 	p->p_ucred->cr_ruidinfo = uifind(0);
409 	p->p_ucred->cr_prison = NULL;	/* Don't jail it. */
410 #ifdef MAC
411 	mac_create_proc0(p->p_ucred);
412 #endif
413 	td->td_ucred = crhold(p->p_ucred);
414 
415 	/* Create sigacts. */
416 	p->p_sigacts = sigacts_alloc();
417 
418 	/* Initialize signal state for process 0. */
419 	siginit(&proc0);
420 
421 	/* Create the file descriptor table. */
422 	/* XXX this duplicates part of fdinit() */
423 	fdp = &filedesc0;
424 	p->p_fd = &fdp->fd_fd;
425 	p->p_fdtol = NULL;
426 	mtx_init(&fdp->fd_fd.fd_mtx, FILEDESC_LOCK_DESC, NULL, MTX_DEF);
427 	fdp->fd_fd.fd_refcnt = 1;
428 	fdp->fd_fd.fd_cmask = CMASK;
429 	fdp->fd_fd.fd_ofiles = fdp->fd_dfiles;
430 	fdp->fd_fd.fd_ofileflags = fdp->fd_dfileflags;
431 	fdp->fd_fd.fd_nfiles = NDFILE;
432 	fdp->fd_fd.fd_map = fdp->fd_dmap;
433 
434 	/* Create the limits structures. */
435 	p->p_limit = lim_alloc();
436 	for (i = 0; i < RLIM_NLIMITS; i++)
437 		p->p_limit->pl_rlimit[i].rlim_cur =
438 		    p->p_limit->pl_rlimit[i].rlim_max = RLIM_INFINITY;
439 	p->p_limit->pl_rlimit[RLIMIT_NOFILE].rlim_cur =
440 	    p->p_limit->pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles;
441 	p->p_limit->pl_rlimit[RLIMIT_NPROC].rlim_cur =
442 	    p->p_limit->pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc;
443 	i = ptoa(cnt.v_free_count);
444 	p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_max = i;
445 	p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_max = i;
446 	p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = i / 3;
447 	p->p_cpulimit = RLIM_INFINITY;
448 
449 	/* Allocate a prototype map so we have something to fork. */
450 	pmap_pinit0(vmspace_pmap(&vmspace0));
451 	p->p_vmspace = &vmspace0;
452 	vmspace0.vm_refcnt = 1;
453 	vm_map_init(&vmspace0.vm_map, p->p_sysent->sv_minuser,
454 	    p->p_sysent->sv_maxuser);
455 	vmspace0.vm_map.pmap = vmspace_pmap(&vmspace0);
456 
457 	/*
458 	 * We continue to place resource usage info
459 	 * in the user struct so that it's pageable.
460 	 */
461 	p->p_stats = &p->p_uarea->u_stats;
462 
463 	/*
464 	 * Charge root for one process.
465 	 */
466 	(void)chgproccnt(p->p_ucred->cr_ruidinfo, 1, 0);
467 }
468 SYSINIT(p0init, SI_SUB_INTRINSIC, SI_ORDER_FIRST, proc0_init, NULL)
469 
470 /* ARGSUSED*/
471 static void
472 proc0_post(void *dummy __unused)
473 {
474 	struct timespec ts;
475 	struct proc *p;
476 
477 	/*
478 	 * Now we can look at the time, having had a chance to verify the
479 	 * time from the filesystem.  Pretend that proc0 started now.
480 	 */
481 	sx_slock(&allproc_lock);
482 	LIST_FOREACH(p, &allproc, p_list) {
483 		microuptime(&p->p_stats->p_start);
484 		p->p_runtime.sec = 0;
485 		p->p_runtime.frac = 0;
486 	}
487 	sx_sunlock(&allproc_lock);
488 	binuptime(PCPU_PTR(switchtime));
489 	PCPU_SET(switchticks, ticks);
490 
491 	/*
492 	 * Give the ``random'' number generator a thump.
493 	 */
494 	nanotime(&ts);
495 	srandom(ts.tv_sec ^ ts.tv_nsec);
496 }
497 SYSINIT(p0post, SI_SUB_INTRINSIC_POST, SI_ORDER_FIRST, proc0_post, NULL)
498 
499 /*
500  ***************************************************************************
501  ****
502  **** The following SYSINIT's and glue code should be moved to the
503  **** respective files on a per subsystem basis.
504  ****
505  ***************************************************************************
506  */
507 
508 
509 /*
510  ***************************************************************************
511  ****
512  **** The following code probably belongs in another file, like
513  **** kern/init_init.c.
514  ****
515  ***************************************************************************
516  */
517 
518 /*
519  * List of paths to try when searching for "init".
520  */
521 static char init_path[MAXPATHLEN] =
522 #ifdef	INIT_PATH
523     __XSTRING(INIT_PATH);
524 #else
525     "/sbin/init:/sbin/oinit:/sbin/init.bak:/stand/sysinstall";
526 #endif
527 SYSCTL_STRING(_kern, OID_AUTO, init_path, CTLFLAG_RD, init_path, 0,
528 	"Path used to search the init process");
529 
530 /*
531  * Start the initial user process; try exec'ing each pathname in init_path.
532  * The program is invoked with one argument containing the boot flags.
533  */
534 static void
535 start_init(void *dummy)
536 {
537 	vm_offset_t addr;
538 	struct execve_args args;
539 	int options, error;
540 	char *var, *path, *next, *s;
541 	char *ucp, **uap, *arg0, *arg1;
542 	struct thread *td;
543 	struct proc *p;
544 	int init_does_devfs = 0;
545 
546 	mtx_lock(&Giant);
547 
548 	GIANT_REQUIRED;
549 
550 	td = curthread;
551 	p = td->td_proc;
552 
553 	vfs_mountroot();
554 
555 	/* Get the vnode for '/'.  Set p->p_fd->fd_cdir to reference it. */
556 	if (VFS_ROOT(TAILQ_FIRST(&mountlist), &rootvnode, td))
557 		panic("cannot find root vnode");
558 	FILEDESC_LOCK(p->p_fd);
559 	p->p_fd->fd_cdir = rootvnode;
560 	VREF(p->p_fd->fd_cdir);
561 	p->p_fd->fd_rdir = rootvnode;
562 	VREF(p->p_fd->fd_rdir);
563 	FILEDESC_UNLOCK(p->p_fd);
564 	VOP_UNLOCK(rootvnode, 0, td);
565 #ifdef MAC
566 	mac_create_root_mount(td->td_ucred, TAILQ_FIRST(&mountlist));
567 #endif
568 
569 	/*
570 	 * For disk based systems, we probably cannot do this yet
571 	 * since the fs will be read-only.  But a NFS root
572 	 * might be ok.  It is worth a shot.
573 	 */
574 	error = kern_mkdir(td, "/dev", UIO_SYSSPACE, 0700);
575 	if (error == EEXIST)
576 		error = 0;
577 	if (error == 0)
578 		error = kernel_vmount(0, "fstype", "devfs",
579 		    "fspath", "/dev", NULL);
580 	if (error != 0)
581 		init_does_devfs = 1;
582 
583 	/*
584 	 * Need just enough stack to hold the faked-up "execve()" arguments.
585 	 */
586 	addr = p->p_sysent->sv_usrstack - PAGE_SIZE;
587 	if (vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &addr, PAGE_SIZE,
588 			FALSE, VM_PROT_ALL, VM_PROT_ALL, 0) != 0)
589 		panic("init: couldn't allocate argument space");
590 	p->p_vmspace->vm_maxsaddr = (caddr_t)addr;
591 	p->p_vmspace->vm_ssize = 1;
592 
593 	if ((var = getenv("init_path")) != NULL) {
594 		strlcpy(init_path, var, sizeof(init_path));
595 		freeenv(var);
596 	}
597 
598 	for (path = init_path; *path != '\0'; path = next) {
599 		while (*path == ':')
600 			path++;
601 		if (*path == '\0')
602 			break;
603 		for (next = path; *next != '\0' && *next != ':'; next++)
604 			/* nothing */ ;
605 		if (bootverbose)
606 			printf("start_init: trying %.*s\n", (int)(next - path),
607 			    path);
608 
609 		/*
610 		 * Move out the boot flag argument.
611 		 */
612 		options = 0;
613 		ucp = (char *)p->p_sysent->sv_usrstack;
614 		(void)subyte(--ucp, 0);		/* trailing zero */
615 		if (boothowto & RB_SINGLE) {
616 			(void)subyte(--ucp, 's');
617 			options = 1;
618 		}
619 #ifdef notyet
620                 if (boothowto & RB_FASTBOOT) {
621 			(void)subyte(--ucp, 'f');
622 			options = 1;
623 		}
624 #endif
625 
626 #ifdef BOOTCDROM
627 		(void)subyte(--ucp, 'C');
628 		options = 1;
629 #endif
630 		if (init_does_devfs) {
631 			(void)subyte(--ucp, 'd');
632 			options = 1;
633 		}
634 
635 		if (options == 0)
636 			(void)subyte(--ucp, '-');
637 		(void)subyte(--ucp, '-');		/* leading hyphen */
638 		arg1 = ucp;
639 
640 		/*
641 		 * Move out the file name (also arg 0).
642 		 */
643 		(void)subyte(--ucp, 0);
644 		for (s = next - 1; s >= path; s--)
645 			(void)subyte(--ucp, *s);
646 		arg0 = ucp;
647 
648 		/*
649 		 * Move out the arg pointers.
650 		 */
651 		uap = (char **)((intptr_t)ucp & ~(sizeof(intptr_t)-1));
652 		(void)suword((caddr_t)--uap, (long)0);	/* terminator */
653 		(void)suword((caddr_t)--uap, (long)(intptr_t)arg1);
654 		(void)suword((caddr_t)--uap, (long)(intptr_t)arg0);
655 
656 		/*
657 		 * Point at the arguments.
658 		 */
659 		args.fname = arg0;
660 		args.argv = uap;
661 		args.envv = NULL;
662 
663 		/*
664 		 * Now try to exec the program.  If can't for any reason
665 		 * other than it doesn't exist, complain.
666 		 *
667 		 * Otherwise, return via fork_trampoline() all the way
668 		 * to user mode as init!
669 		 */
670 		if ((error = execve(td, &args)) == 0) {
671 			mtx_unlock(&Giant);
672 			return;
673 		}
674 		if (error != ENOENT)
675 			printf("exec %.*s: error %d\n", (int)(next - path),
676 			    path, error);
677 	}
678 	printf("init: not found in path %s\n", init_path);
679 	panic("no init");
680 }
681 
682 /*
683  * Like kthread_create(), but runs in it's own address space.
684  * We do this early to reserve pid 1.
685  *
686  * Note special case - do not make it runnable yet.  Other work
687  * in progress will change this more.
688  */
689 static void
690 create_init(const void *udata __unused)
691 {
692 	struct ucred *newcred, *oldcred;
693 	int error;
694 
695 	error = fork1(&thread0, RFFDG | RFPROC | RFSTOPPED, 0, &initproc);
696 	if (error)
697 		panic("cannot fork init: %d\n", error);
698 	KASSERT(initproc->p_pid == 1, ("create_init: initproc->p_pid != 1"));
699 	/* divorce init's credentials from the kernel's */
700 	newcred = crget();
701 	PROC_LOCK(initproc);
702 	initproc->p_flag |= P_SYSTEM;
703 	oldcred = initproc->p_ucred;
704 	crcopy(newcred, oldcred);
705 #ifdef MAC
706 	mac_create_proc1(newcred);
707 #endif
708 	initproc->p_ucred = newcred;
709 	PROC_UNLOCK(initproc);
710 	crfree(oldcred);
711 	cred_update_thread(FIRST_THREAD_IN_PROC(initproc));
712 	mtx_lock_spin(&sched_lock);
713 	initproc->p_sflag |= PS_INMEM;
714 	mtx_unlock_spin(&sched_lock);
715 	cpu_set_fork_handler(FIRST_THREAD_IN_PROC(initproc), start_init, NULL);
716 }
717 SYSINIT(init, SI_SUB_CREATE_INIT, SI_ORDER_FIRST, create_init, NULL)
718 
719 /*
720  * Make it runnable now.
721  */
722 static void
723 kick_init(const void *udata __unused)
724 {
725 	struct thread *td;
726 
727 	td = FIRST_THREAD_IN_PROC(initproc);
728 	mtx_lock_spin(&sched_lock);
729 	TD_SET_CAN_RUN(td);
730 	setrunqueue(td);	/* XXXKSE */
731 	mtx_unlock_spin(&sched_lock);
732 }
733 SYSINIT(kickinit, SI_SUB_KTHREAD_INIT, SI_ORDER_FIRST, kick_init, NULL)
734