xref: /freebsd/sys/kern/init_main.c (revision d056fa046c6a91b90cd98165face0e42a33a5173)
1 /*-
2  * Copyright (c) 1995 Terrence R. Lambert
3  * All rights reserved.
4  *
5  * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. All advertising materials mentioning features or use of this software
22  *    must display the following acknowledgement:
23  *	This product includes software developed by the University of
24  *	California, Berkeley and its contributors.
25  * 4. Neither the name of the University nor the names of its contributors
26  *    may be used to endorse or promote products derived from this software
27  *    without specific prior written permission.
28  *
29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39  * SUCH DAMAGE.
40  *
41  *	@(#)init_main.c	8.9 (Berkeley) 1/21/94
42  */
43 
44 #include <sys/cdefs.h>
45 __FBSDID("$FreeBSD$");
46 
47 #include "opt_ddb.h"
48 #include "opt_init_path.h"
49 #include "opt_mac.h"
50 
51 #include <sys/param.h>
52 #include <sys/kernel.h>
53 #include <sys/exec.h>
54 #include <sys/file.h>
55 #include <sys/filedesc.h>
56 #include <sys/ktr.h>
57 #include <sys/lock.h>
58 #include <sys/mac.h>
59 #include <sys/mount.h>
60 #include <sys/mutex.h>
61 #include <sys/syscallsubr.h>
62 #include <sys/sysctl.h>
63 #include <sys/proc.h>
64 #include <sys/resourcevar.h>
65 #include <sys/systm.h>
66 #include <sys/signalvar.h>
67 #include <sys/vnode.h>
68 #include <sys/sysent.h>
69 #include <sys/reboot.h>
70 #include <sys/sched.h>
71 #include <sys/sx.h>
72 #include <sys/sysproto.h>
73 #include <sys/vmmeter.h>
74 #include <sys/unistd.h>
75 #include <sys/malloc.h>
76 #include <sys/conf.h>
77 
78 #include <machine/cpu.h>
79 
80 #include <security/audit/audit.h>
81 
82 #include <vm/vm.h>
83 #include <vm/vm_param.h>
84 #include <vm/pmap.h>
85 #include <vm/vm_map.h>
86 #include <sys/copyright.h>
87 
88 #include <ddb/ddb.h>
89 #include <ddb/db_sym.h>
90 
91 void mi_startup(void);				/* Should be elsewhere */
92 
93 /* Components of the first process -- never freed. */
94 static struct session session0;
95 static struct pgrp pgrp0;
96 struct	proc proc0;
97 struct	thread thread0 __aligned(8);
98 struct	ksegrp ksegrp0;
99 struct	vmspace vmspace0;
100 struct	proc *initproc;
101 
102 int	boothowto = 0;		/* initialized so that it can be patched */
103 SYSCTL_INT(_debug, OID_AUTO, boothowto, CTLFLAG_RD, &boothowto, 0, "");
104 int	bootverbose;
105 SYSCTL_INT(_debug, OID_AUTO, bootverbose, CTLFLAG_RW, &bootverbose, 0, "");
106 
107 /*
108  * This ensures that there is at least one entry so that the sysinit_set
109  * symbol is not undefined.  A sybsystem ID of SI_SUB_DUMMY is never
110  * executed.
111  */
112 SYSINIT(placeholder, SI_SUB_DUMMY, SI_ORDER_ANY, NULL, NULL)
113 
114 /*
115  * The sysinit table itself.  Items are checked off as the are run.
116  * If we want to register new sysinit types, add them to newsysinit.
117  */
118 SET_DECLARE(sysinit_set, struct sysinit);
119 struct sysinit **sysinit, **sysinit_end;
120 struct sysinit **newsysinit, **newsysinit_end;
121 
122 /*
123  * Merge a new sysinit set into the current set, reallocating it if
124  * necessary.  This can only be called after malloc is running.
125  */
126 void
127 sysinit_add(struct sysinit **set, struct sysinit **set_end)
128 {
129 	struct sysinit **newset;
130 	struct sysinit **sipp;
131 	struct sysinit **xipp;
132 	int count;
133 
134 	count = set_end - set;
135 	if (newsysinit)
136 		count += newsysinit_end - newsysinit;
137 	else
138 		count += sysinit_end - sysinit;
139 	newset = malloc(count * sizeof(*sipp), M_TEMP, M_NOWAIT);
140 	if (newset == NULL)
141 		panic("cannot malloc for sysinit");
142 	xipp = newset;
143 	if (newsysinit)
144 		for (sipp = newsysinit; sipp < newsysinit_end; sipp++)
145 			*xipp++ = *sipp;
146 	else
147 		for (sipp = sysinit; sipp < sysinit_end; sipp++)
148 			*xipp++ = *sipp;
149 	for (sipp = set; sipp < set_end; sipp++)
150 		*xipp++ = *sipp;
151 	if (newsysinit)
152 		free(newsysinit, M_TEMP);
153 	newsysinit = newset;
154 	newsysinit_end = newset + count;
155 }
156 
157 /*
158  * System startup; initialize the world, create process 0, mount root
159  * filesystem, and fork to create init and pagedaemon.  Most of the
160  * hard work is done in the lower-level initialization routines including
161  * startup(), which does memory initialization and autoconfiguration.
162  *
163  * This allows simple addition of new kernel subsystems that require
164  * boot time initialization.  It also allows substitution of subsystem
165  * (for instance, a scheduler, kernel profiler, or VM system) by object
166  * module.  Finally, it allows for optional "kernel threads".
167  */
168 void
169 mi_startup(void)
170 {
171 
172 	register struct sysinit **sipp;		/* system initialization*/
173 	register struct sysinit **xipp;		/* interior loop of sort*/
174 	register struct sysinit *save;		/* bubble*/
175 
176 #if defined(VERBOSE_SYSINIT)
177 	int last;
178 	int verbose;
179 #endif
180 
181 	if (sysinit == NULL) {
182 		sysinit = SET_BEGIN(sysinit_set);
183 		sysinit_end = SET_LIMIT(sysinit_set);
184 	}
185 
186 restart:
187 	/*
188 	 * Perform a bubble sort of the system initialization objects by
189 	 * their subsystem (primary key) and order (secondary key).
190 	 */
191 	for (sipp = sysinit; sipp < sysinit_end; sipp++) {
192 		for (xipp = sipp + 1; xipp < sysinit_end; xipp++) {
193 			if ((*sipp)->subsystem < (*xipp)->subsystem ||
194 			     ((*sipp)->subsystem == (*xipp)->subsystem &&
195 			      (*sipp)->order <= (*xipp)->order))
196 				continue;	/* skip*/
197 			save = *sipp;
198 			*sipp = *xipp;
199 			*xipp = save;
200 		}
201 	}
202 
203 #if defined(VERBOSE_SYSINIT)
204 	last = SI_SUB_COPYRIGHT;
205 	verbose = 0;
206 #if !defined(DDB)
207 	printf("VERBOSE_SYSINIT: DDB not enabled, symbol lookups disabled.\n");
208 #endif
209 #endif
210 
211 	/*
212 	 * Traverse the (now) ordered list of system initialization tasks.
213 	 * Perform each task, and continue on to the next task.
214 	 *
215 	 * The last item on the list is expected to be the scheduler,
216 	 * which will not return.
217 	 */
218 	for (sipp = sysinit; sipp < sysinit_end; sipp++) {
219 
220 		if ((*sipp)->subsystem == SI_SUB_DUMMY)
221 			continue;	/* skip dummy task(s)*/
222 
223 		if ((*sipp)->subsystem == SI_SUB_DONE)
224 			continue;
225 
226 #if defined(VERBOSE_SYSINIT)
227 		if ((*sipp)->subsystem > last) {
228 			verbose = 1;
229 			last = (*sipp)->subsystem;
230 			printf("subsystem %x\n", last);
231 		}
232 		if (verbose) {
233 #if defined(DDB)
234 			const char *name;
235 			c_db_sym_t sym;
236 			db_expr_t  offset;
237 
238 			sym = db_search_symbol((vm_offset_t)(*sipp)->func,
239 			    DB_STGY_PROC, &offset);
240 			db_symbol_values(sym, &name, NULL);
241 			if (name != NULL)
242 				printf("   %s(%p)... ", name, (*sipp)->udata);
243 			else
244 #endif
245 				printf("   %p(%p)... ", (*sipp)->func,
246 				    (*sipp)->udata);
247 		}
248 #endif
249 
250 		/* Call function */
251 		(*((*sipp)->func))((*sipp)->udata);
252 
253 #if defined(VERBOSE_SYSINIT)
254 		if (verbose)
255 			printf("done.\n");
256 #endif
257 
258 		/* Check off the one we're just done */
259 		(*sipp)->subsystem = SI_SUB_DONE;
260 
261 		/* Check if we've installed more sysinit items via KLD */
262 		if (newsysinit != NULL) {
263 			if (sysinit != SET_BEGIN(sysinit_set))
264 				free(sysinit, M_TEMP);
265 			sysinit = newsysinit;
266 			sysinit_end = newsysinit_end;
267 			newsysinit = NULL;
268 			newsysinit_end = NULL;
269 			goto restart;
270 		}
271 	}
272 
273 	panic("Shouldn't get here!");
274 	/* NOTREACHED*/
275 }
276 
277 
278 /*
279  ***************************************************************************
280  ****
281  **** The following SYSINIT's belong elsewhere, but have not yet
282  **** been moved.
283  ****
284  ***************************************************************************
285  */
286 static void
287 print_caddr_t(void *data __unused)
288 {
289 	printf("%s", (char *)data);
290 }
291 SYSINIT(announce, SI_SUB_COPYRIGHT, SI_ORDER_FIRST, print_caddr_t, copyright)
292 SYSINIT(version, SI_SUB_COPYRIGHT, SI_ORDER_SECOND, print_caddr_t, version)
293 
294 #ifdef WITNESS
295 static char wit_warn[] =
296      "WARNING: WITNESS option enabled, expect reduced performance.\n";
297 SYSINIT(witwarn, SI_SUB_COPYRIGHT, SI_ORDER_SECOND + 1,
298    print_caddr_t, wit_warn)
299 #endif
300 
301 #ifdef DIAGNOSTIC
302 static char diag_warn[] =
303      "WARNING: DIAGNOSTIC option enabled, expect reduced performance.\n";
304 SYSINIT(diagwarn, SI_SUB_COPYRIGHT, SI_ORDER_SECOND + 2,
305     print_caddr_t, diag_warn)
306 #endif
307 
308 static void
309 set_boot_verbose(void *data __unused)
310 {
311 
312 	if (boothowto & RB_VERBOSE)
313 		bootverbose++;
314 }
315 SYSINIT(boot_verbose, SI_SUB_TUNABLES, SI_ORDER_ANY, set_boot_verbose, NULL)
316 
317 struct sysentvec null_sysvec = {
318 	0,
319 	NULL,
320 	0,
321 	0,
322 	NULL,
323 	0,
324 	NULL,
325 	NULL,
326 	NULL,
327 	NULL,
328 	NULL,
329 	NULL,
330 	NULL,
331 	"null",
332 	NULL,
333 	NULL,
334 	0,
335 	PAGE_SIZE,
336 	VM_MIN_ADDRESS,
337 	VM_MAXUSER_ADDRESS,
338 	USRSTACK,
339 	PS_STRINGS,
340 	VM_PROT_ALL,
341 	NULL,
342 	NULL,
343 	NULL
344 };
345 
346 /*
347  ***************************************************************************
348  ****
349  **** The two following SYSINIT's are proc0 specific glue code.  I am not
350  **** convinced that they can not be safely combined, but their order of
351  **** operation has been maintained as the same as the original init_main.c
352  **** for right now.
353  ****
354  **** These probably belong in init_proc.c or kern_proc.c, since they
355  **** deal with proc0 (the fork template process).
356  ****
357  ***************************************************************************
358  */
359 /* ARGSUSED*/
360 static void
361 proc0_init(void *dummy __unused)
362 {
363 	struct proc *p;
364 	unsigned i;
365 	struct thread *td;
366 	struct ksegrp *kg;
367 
368 	GIANT_REQUIRED;
369 	p = &proc0;
370 	td = &thread0;
371 	kg = &ksegrp0;
372 
373 	/*
374 	 * Initialize magic number.
375 	 */
376 	p->p_magic = P_MAGIC;
377 
378 	/*
379 	 * Initialize thread, process and ksegrp structures.
380 	 */
381 	procinit();	/* set up proc zone */
382 	threadinit();	/* set up thead, upcall and KSEGRP zones */
383 
384 	/*
385 	 * Initialise scheduler resources.
386 	 * Add scheduler specific parts to proc, ksegrp, thread as needed.
387 	 */
388 	schedinit();	/* scheduler gets its house in order */
389 	/*
390 	 * Initialize sleep queue hash table
391 	 */
392 	sleepinit();
393 
394 	/*
395 	 * additional VM structures
396 	 */
397 	vm_init2();
398 
399 	/*
400 	 * Create process 0 (the swapper).
401 	 */
402 	LIST_INSERT_HEAD(&allproc, p, p_list);
403 	LIST_INSERT_HEAD(PIDHASH(0), p, p_hash);
404 	mtx_init(&pgrp0.pg_mtx, "process group", NULL, MTX_DEF | MTX_DUPOK);
405 	p->p_pgrp = &pgrp0;
406 	LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash);
407 	LIST_INIT(&pgrp0.pg_members);
408 	LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist);
409 
410 	pgrp0.pg_session = &session0;
411 	mtx_init(&session0.s_mtx, "session", NULL, MTX_DEF);
412 	session0.s_count = 1;
413 	session0.s_leader = p;
414 
415 	p->p_sysent = &null_sysvec;
416 	p->p_flag = P_SYSTEM;
417 	p->p_sflag = PS_INMEM;
418 	p->p_state = PRS_NORMAL;
419 	knlist_init(&p->p_klist, &p->p_mtx, NULL, NULL, NULL);
420 	STAILQ_INIT(&p->p_ktr);
421 	p->p_nice = NZERO;
422 	td->td_state = TDS_RUNNING;
423 	kg->kg_pri_class = PRI_TIMESHARE;
424 	kg->kg_user_pri = PUSER;
425 	td->td_priority = PVM;
426 	td->td_base_pri = PUSER;
427 	td->td_oncpu = 0;
428 	p->p_peers = 0;
429 	p->p_leader = p;
430 
431 
432 	bcopy("swapper", p->p_comm, sizeof ("swapper"));
433 
434 	callout_init(&p->p_itcallout, CALLOUT_MPSAFE);
435 	callout_init(&td->td_slpcallout, CALLOUT_MPSAFE);
436 
437 	/* Create credentials. */
438 	p->p_ucred = crget();
439 	p->p_ucred->cr_ngroups = 1;	/* group 0 */
440 	p->p_ucred->cr_uidinfo = uifind(0);
441 	p->p_ucred->cr_ruidinfo = uifind(0);
442 	p->p_ucred->cr_prison = NULL;	/* Don't jail it. */
443 #ifdef AUDIT
444 	audit_proc_alloc(p);
445 	audit_proc_kproc0(p);
446 #endif
447 #ifdef MAC
448 	mac_create_proc0(p->p_ucred);
449 #endif
450 	td->td_ucred = crhold(p->p_ucred);
451 
452 	/* Create sigacts. */
453 	p->p_sigacts = sigacts_alloc();
454 
455 	/* Initialize signal state for process 0. */
456 	siginit(&proc0);
457 
458 	/* Create the file descriptor table. */
459 	p->p_fd = fdinit(NULL);
460 	p->p_fdtol = NULL;
461 
462 	/* Create the limits structures. */
463 	p->p_limit = lim_alloc();
464 	for (i = 0; i < RLIM_NLIMITS; i++)
465 		p->p_limit->pl_rlimit[i].rlim_cur =
466 		    p->p_limit->pl_rlimit[i].rlim_max = RLIM_INFINITY;
467 	p->p_limit->pl_rlimit[RLIMIT_NOFILE].rlim_cur =
468 	    p->p_limit->pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles;
469 	p->p_limit->pl_rlimit[RLIMIT_NPROC].rlim_cur =
470 	    p->p_limit->pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc;
471 	i = ptoa(cnt.v_free_count);
472 	p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_max = i;
473 	p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_max = i;
474 	p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = i / 3;
475 	p->p_cpulimit = RLIM_INFINITY;
476 
477 	p->p_stats = pstats_alloc();
478 
479 	/* Allocate a prototype map so we have something to fork. */
480 	pmap_pinit0(vmspace_pmap(&vmspace0));
481 	p->p_vmspace = &vmspace0;
482 	vmspace0.vm_refcnt = 1;
483 	vm_map_init(&vmspace0.vm_map, p->p_sysent->sv_minuser,
484 	    p->p_sysent->sv_maxuser);
485 	vmspace0.vm_map.pmap = vmspace_pmap(&vmspace0);
486 
487 	/*
488 	 * Charge root for one process.
489 	 */
490 	(void)chgproccnt(p->p_ucred->cr_ruidinfo, 1, 0);
491 }
492 SYSINIT(p0init, SI_SUB_INTRINSIC, SI_ORDER_FIRST, proc0_init, NULL)
493 
494 /* ARGSUSED*/
495 static void
496 proc0_post(void *dummy __unused)
497 {
498 	struct timespec ts;
499 	struct proc *p;
500 
501 	/*
502 	 * Now we can look at the time, having had a chance to verify the
503 	 * time from the filesystem.  Pretend that proc0 started now.
504 	 */
505 	sx_slock(&allproc_lock);
506 	LIST_FOREACH(p, &allproc, p_list) {
507 		microuptime(&p->p_stats->p_start);
508 		p->p_rux.rux_runtime = 0;
509 	}
510 	sx_sunlock(&allproc_lock);
511 	PCPU_SET(switchtime, cpu_ticks());
512 	PCPU_SET(switchticks, ticks);
513 
514 	/*
515 	 * Give the ``random'' number generator a thump.
516 	 */
517 	nanotime(&ts);
518 	srandom(ts.tv_sec ^ ts.tv_nsec);
519 }
520 SYSINIT(p0post, SI_SUB_INTRINSIC_POST, SI_ORDER_FIRST, proc0_post, NULL)
521 
522 /*
523  ***************************************************************************
524  ****
525  **** The following SYSINIT's and glue code should be moved to the
526  **** respective files on a per subsystem basis.
527  ****
528  ***************************************************************************
529  */
530 
531 
532 /*
533  ***************************************************************************
534  ****
535  **** The following code probably belongs in another file, like
536  **** kern/init_init.c.
537  ****
538  ***************************************************************************
539  */
540 
541 /*
542  * List of paths to try when searching for "init".
543  */
544 static char init_path[MAXPATHLEN] =
545 #ifdef	INIT_PATH
546     __XSTRING(INIT_PATH);
547 #else
548     "/sbin/init:/sbin/oinit:/sbin/init.bak:/rescue/init:/stand/sysinstall";
549 #endif
550 SYSCTL_STRING(_kern, OID_AUTO, init_path, CTLFLAG_RD, init_path, 0,
551 	"Path used to search the init process");
552 
553 /*
554  * Shutdown timeout of init(8).
555  * Unused within kernel, but used to control init(8), hence do not remove.
556  */
557 #ifndef INIT_SHUTDOWN_TIMEOUT
558 #define INIT_SHUTDOWN_TIMEOUT 120
559 #endif
560 static int init_shutdown_timeout = INIT_SHUTDOWN_TIMEOUT;
561 SYSCTL_INT(_kern, OID_AUTO, init_shutdown_timeout,
562 	CTLFLAG_RW, &init_shutdown_timeout, 0, "");
563 
564 /*
565  * Start the initial user process; try exec'ing each pathname in init_path.
566  * The program is invoked with one argument containing the boot flags.
567  */
568 static void
569 start_init(void *dummy)
570 {
571 	vm_offset_t addr;
572 	struct execve_args args;
573 	int options, error;
574 	char *var, *path, *next, *s;
575 	char *ucp, **uap, *arg0, *arg1;
576 	struct thread *td;
577 	struct proc *p;
578 
579 	mtx_lock(&Giant);
580 
581 	GIANT_REQUIRED;
582 
583 	td = curthread;
584 	p = td->td_proc;
585 
586 	vfs_mountroot();
587 
588 	/*
589 	 * Need just enough stack to hold the faked-up "execve()" arguments.
590 	 */
591 	addr = p->p_sysent->sv_usrstack - PAGE_SIZE;
592 	if (vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &addr, PAGE_SIZE,
593 			FALSE, VM_PROT_ALL, VM_PROT_ALL, 0) != 0)
594 		panic("init: couldn't allocate argument space");
595 	p->p_vmspace->vm_maxsaddr = (caddr_t)addr;
596 	p->p_vmspace->vm_ssize = 1;
597 
598 	if ((var = getenv("init_path")) != NULL) {
599 		strlcpy(init_path, var, sizeof(init_path));
600 		freeenv(var);
601 	}
602 
603 	for (path = init_path; *path != '\0'; path = next) {
604 		while (*path == ':')
605 			path++;
606 		if (*path == '\0')
607 			break;
608 		for (next = path; *next != '\0' && *next != ':'; next++)
609 			/* nothing */ ;
610 		if (bootverbose)
611 			printf("start_init: trying %.*s\n", (int)(next - path),
612 			    path);
613 
614 		/*
615 		 * Move out the boot flag argument.
616 		 */
617 		options = 0;
618 		ucp = (char *)p->p_sysent->sv_usrstack;
619 		(void)subyte(--ucp, 0);		/* trailing zero */
620 		if (boothowto & RB_SINGLE) {
621 			(void)subyte(--ucp, 's');
622 			options = 1;
623 		}
624 #ifdef notyet
625                 if (boothowto & RB_FASTBOOT) {
626 			(void)subyte(--ucp, 'f');
627 			options = 1;
628 		}
629 #endif
630 
631 #ifdef BOOTCDROM
632 		(void)subyte(--ucp, 'C');
633 		options = 1;
634 #endif
635 
636 		if (options == 0)
637 			(void)subyte(--ucp, '-');
638 		(void)subyte(--ucp, '-');		/* leading hyphen */
639 		arg1 = ucp;
640 
641 		/*
642 		 * Move out the file name (also arg 0).
643 		 */
644 		(void)subyte(--ucp, 0);
645 		for (s = next - 1; s >= path; s--)
646 			(void)subyte(--ucp, *s);
647 		arg0 = ucp;
648 
649 		/*
650 		 * Move out the arg pointers.
651 		 */
652 		uap = (char **)((intptr_t)ucp & ~(sizeof(intptr_t)-1));
653 		(void)suword((caddr_t)--uap, (long)0);	/* terminator */
654 		(void)suword((caddr_t)--uap, (long)(intptr_t)arg1);
655 		(void)suword((caddr_t)--uap, (long)(intptr_t)arg0);
656 
657 		/*
658 		 * Point at the arguments.
659 		 */
660 		args.fname = arg0;
661 		args.argv = uap;
662 		args.envv = NULL;
663 
664 		/*
665 		 * Now try to exec the program.  If can't for any reason
666 		 * other than it doesn't exist, complain.
667 		 *
668 		 * Otherwise, return via fork_trampoline() all the way
669 		 * to user mode as init!
670 		 */
671 		if ((error = execve(td, &args)) == 0) {
672 			mtx_unlock(&Giant);
673 			return;
674 		}
675 		if (error != ENOENT)
676 			printf("exec %.*s: error %d\n", (int)(next - path),
677 			    path, error);
678 	}
679 	printf("init: not found in path %s\n", init_path);
680 	panic("no init");
681 }
682 
683 /*
684  * Like kthread_create(), but runs in it's own address space.
685  * We do this early to reserve pid 1.
686  *
687  * Note special case - do not make it runnable yet.  Other work
688  * in progress will change this more.
689  */
690 static void
691 create_init(const void *udata __unused)
692 {
693 	struct ucred *newcred, *oldcred;
694 	int error;
695 
696 	error = fork1(&thread0, RFFDG | RFPROC | RFSTOPPED, 0, &initproc);
697 	if (error)
698 		panic("cannot fork init: %d\n", error);
699 	KASSERT(initproc->p_pid == 1, ("create_init: initproc->p_pid != 1"));
700 	/* divorce init's credentials from the kernel's */
701 	newcred = crget();
702 	PROC_LOCK(initproc);
703 	initproc->p_flag |= P_SYSTEM;
704 	oldcred = initproc->p_ucred;
705 	crcopy(newcred, oldcred);
706 #ifdef MAC
707 	mac_create_proc1(newcred);
708 #endif
709 #ifdef AUDIT
710 	audit_proc_init(initproc);
711 #endif
712 	initproc->p_ucred = newcred;
713 	PROC_UNLOCK(initproc);
714 	crfree(oldcred);
715 	cred_update_thread(FIRST_THREAD_IN_PROC(initproc));
716 	mtx_lock_spin(&sched_lock);
717 	initproc->p_sflag |= PS_INMEM;
718 	mtx_unlock_spin(&sched_lock);
719 	cpu_set_fork_handler(FIRST_THREAD_IN_PROC(initproc), start_init, NULL);
720 }
721 SYSINIT(init, SI_SUB_CREATE_INIT, SI_ORDER_FIRST, create_init, NULL)
722 
723 /*
724  * Make it runnable now.
725  */
726 static void
727 kick_init(const void *udata __unused)
728 {
729 	struct thread *td;
730 
731 	td = FIRST_THREAD_IN_PROC(initproc);
732 	mtx_lock_spin(&sched_lock);
733 	TD_SET_CAN_RUN(td);
734 	setrunqueue(td, SRQ_BORING);	/* XXXKSE */
735 	mtx_unlock_spin(&sched_lock);
736 }
737 SYSINIT(kickinit, SI_SUB_KTHREAD_INIT, SI_ORDER_FIRST, kick_init, NULL)
738