xref: /freebsd/sys/kern/init_main.c (revision a1a4f1a0d87b594d3f17a97dc0127eec1417e6f6)
1 /*
2  * Copyright (c) 1995 Terrence R. Lambert
3  * All rights reserved.
4  *
5  * Copyright (c) 1982, 1986, 1989, 1991, 1992, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. All advertising materials mentioning features or use of this software
22  *    must display the following acknowledgement:
23  *	This product includes software developed by the University of
24  *	California, Berkeley and its contributors.
25  * 4. Neither the name of the University nor the names of its contributors
26  *    may be used to endorse or promote products derived from this software
27  *    without specific prior written permission.
28  *
29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39  * SUCH DAMAGE.
40  *
41  *	@(#)init_main.c	8.9 (Berkeley) 1/21/94
42  * $FreeBSD$
43  */
44 
45 #include "opt_init_path.h"
46 
47 #include <sys/param.h>
48 #include <sys/file.h>
49 #include <sys/filedesc.h>
50 #include <sys/kernel.h>
51 #include <sys/mount.h>
52 #include <sys/sysctl.h>
53 #include <sys/proc.h>
54 #include <sys/kthread.h>
55 #include <sys/resourcevar.h>
56 #include <sys/signalvar.h>
57 #include <sys/systm.h>
58 #include <sys/vnode.h>
59 #include <sys/sysent.h>
60 #include <sys/reboot.h>
61 #include <sys/sysproto.h>
62 #include <sys/vmmeter.h>
63 #include <sys/unistd.h>
64 #include <sys/malloc.h>
65 
66 #include <machine/cpu.h>
67 
68 #include <vm/vm.h>
69 #include <vm/vm_param.h>
70 #include <vm/vm_prot.h>
71 #include <sys/lock.h>
72 #include <vm/pmap.h>
73 #include <vm/vm_map.h>
74 #include <sys/user.h>
75 #include <sys/copyright.h>
76 
77 extern struct linker_set	sysinit_set;	/* XXX */
78 
79 extern void mi_startup __P((void *framep));
80 
81 /* Components of the first process -- never freed. */
82 static struct session session0;
83 static struct pgrp pgrp0;
84 struct	proc proc0;
85 static struct pcred cred0;
86 static struct procsig procsig0;
87 static struct filedesc0 filedesc0;
88 static struct plimit limit0;
89 static struct vmspace vmspace0;
90 struct	proc *initproc;
91 
92 int cmask = CMASK;
93 extern	struct user *proc0paddr;
94 
95 struct	vnode *rootvp;
96 int	boothowto = 0;		/* initialized so that it can be patched */
97 
98 struct	timeval boottime;
99 SYSCTL_STRUCT(_kern, KERN_BOOTTIME, boottime, CTLFLAG_RD,
100     &boottime, timeval, "System boottime");
101 
102 /*
103  * Promiscuous argument pass for start_init()
104  *
105  * This is a kludge because we use a return from mi_startup() rather than a call
106  * to a new routine in locore.s to kick the kernel alive from locore.s.
107  */
108 static void	*init_framep;
109 
110 /*
111  * This ensures that there is at least one entry so that the sysinit_set
112  * symbol is not undefined.  A sybsystem ID of SI_SUB_DUMMY is never
113  * executed.
114  */
115 SYSINIT(placeholder, SI_SUB_DUMMY,SI_ORDER_ANY, NULL, NULL)
116 
117 /*
118  * The sysinit table itself.  Items are checked off as the are run.
119  * If we want to register new sysinit types, add them to newsysinit.
120  */
121 struct sysinit **sysinit = (struct sysinit **)sysinit_set.ls_items;
122 struct sysinit **newsysinit;
123 
124 /*
125  * Merge a new sysinit set into the current set, reallocating it if
126  * necessary.  This can only be called after malloc is running.
127  */
128 void
129 sysinit_add(set)
130 	struct sysinit **set;
131 {
132 	struct sysinit **newset;
133 	struct sysinit **sipp;
134 	struct sysinit **xipp;
135 	int count = 0;
136 
137 	if (newsysinit)
138 		for (sipp = newsysinit; *sipp; sipp++)
139 			count++;
140 	else
141 		for (sipp = sysinit; *sipp; sipp++)
142 			count++;
143 	for (sipp = set; *sipp; sipp++)
144 		count++;
145 	count++;		/* Trailing NULL */
146 	newset = malloc(count * sizeof(*sipp), M_TEMP, M_NOWAIT);
147 	if (newset == NULL)
148 		panic("cannot malloc for sysinit");
149 	xipp = newset;
150 	if (newsysinit)
151 		for (sipp = newsysinit; *sipp; sipp++)
152 			*xipp++ = *sipp;
153 	else
154 		for (sipp = sysinit; *sipp; sipp++)
155 			*xipp++ = *sipp;
156 	for (sipp = set; *sipp; sipp++)
157 		*xipp++ = *sipp;
158 	*xipp = NULL;
159 	if (newsysinit)
160 		free(newsysinit, M_TEMP);
161 	newsysinit = newset;
162 }
163 
164 /*
165  * System startup; initialize the world, create process 0, mount root
166  * filesystem, and fork to create init and pagedaemon.  Most of the
167  * hard work is done in the lower-level initialization routines including
168  * startup(), which does memory initialization and autoconfiguration.
169  *
170  * This allows simple addition of new kernel subsystems that require
171  * boot time initialization.  It also allows substitution of subsystem
172  * (for instance, a scheduler, kernel profiler, or VM system) by object
173  * module.  Finally, it allows for optional "kernel threads".
174  */
175 void
176 mi_startup(framep)
177 	void *framep;
178 {
179 
180 	register struct sysinit **sipp;		/* system initialization*/
181 	register struct sysinit **xipp;		/* interior loop of sort*/
182 	register struct sysinit *save;		/* bubble*/
183 
184 	/*
185 	 * Copy the locore.s frame pointer for proc0, this is forked into
186 	 * all other processes.
187 	 */
188 	init_framep = framep;
189 
190 restart:
191 	/*
192 	 * Perform a bubble sort of the system initialization objects by
193 	 * their subsystem (primary key) and order (secondary key).
194 	 */
195 	for (sipp = sysinit; *sipp; sipp++) {
196 		for (xipp = sipp + 1; *xipp; xipp++) {
197 			if ((*sipp)->subsystem < (*xipp)->subsystem ||
198 			     ((*sipp)->subsystem == (*xipp)->subsystem &&
199 			      (*sipp)->order < (*xipp)->order))
200 				continue;	/* skip*/
201 			save = *sipp;
202 			*sipp = *xipp;
203 			*xipp = save;
204 		}
205 	}
206 
207 	/*
208 	 * Traverse the (now) ordered list of system initialization tasks.
209 	 * Perform each task, and continue on to the next task.
210 	 *
211 	 * The last item on the list is expected to be the scheduler,
212 	 * which will not return.
213 	 */
214 	for (sipp = sysinit; *sipp; sipp++) {
215 
216 		if ((*sipp)->subsystem == SI_SUB_DUMMY)
217 			continue;	/* skip dummy task(s)*/
218 
219 		if ((*sipp)->subsystem == SI_SUB_DONE)
220 			continue;
221 
222 		/* Call function */
223 		(*((*sipp)->func))((*sipp)->udata);
224 
225 		/* Check off the one we're just done */
226 		(*sipp)->subsystem = SI_SUB_DONE;
227 
228 		/* Check if we've installed more sysinit items via KLD */
229 		if (newsysinit != NULL) {
230 			if (sysinit != (struct sysinit **)sysinit_set.ls_items)
231 				free(sysinit, M_TEMP);
232 			sysinit = newsysinit;
233 			newsysinit = NULL;
234 			goto restart;
235 		}
236 	}
237 
238 	panic("Shouldn't get here!");
239 	/* NOTREACHED*/
240 }
241 
242 
243 /*
244  ***************************************************************************
245  ****
246  **** The following SYSINIT's belong elsewhere, but have not yet
247  **** been moved.
248  ****
249  ***************************************************************************
250  */
251 #ifdef OMIT
252 /*
253  * Handled by vfs_mountroot (bad idea) at this time... should be
254  * done the same as 4.4Lite2.
255  */
256 SYSINIT(swapinit, SI_SUB_SWAP, SI_ORDER_FIRST, swapinit, NULL)
257 #endif	/* OMIT*/
258 
259 static void print_caddr_t __P((void *data));
260 static void
261 print_caddr_t(data)
262 	void *data;
263 {
264 	printf("%s", (char *)data);
265 }
266 SYSINIT(announce, SI_SUB_COPYRIGHT, SI_ORDER_FIRST, print_caddr_t, copyright)
267 
268 
269 /*
270  ***************************************************************************
271  ****
272  **** The two following SYSINT's are proc0 specific glue code.  I am not
273  **** convinced that they can not be safely combined, but their order of
274  **** operation has been maintained as the same as the original init_main.c
275  **** for right now.
276  ****
277  **** These probably belong in init_proc.c or kern_proc.c, since they
278  **** deal with proc0 (the fork template process).
279  ****
280  ***************************************************************************
281  */
282 /* ARGSUSED*/
283 static void proc0_init __P((void *dummy));
284 static void
285 proc0_init(dummy)
286 	void *dummy;
287 {
288 	register struct proc		*p;
289 	register struct filedesc0	*fdp;
290 	register unsigned i;
291 
292 	p = &proc0;
293 
294 	/*
295 	 * Initialize process and pgrp structures.
296 	 */
297 	procinit();
298 
299 	/*
300 	 * Initialize sleep queue hash table
301 	 */
302 	sleepinit();
303 
304 	/*
305 	 * additional VM structures
306 	 */
307 	vm_init2();
308 
309 	/*
310 	 * Create process 0 (the swapper).
311 	 */
312 	LIST_INSERT_HEAD(&allproc, p, p_list);
313 	p->p_pgrp = &pgrp0;
314 	LIST_INSERT_HEAD(PGRPHASH(0), &pgrp0, pg_hash);
315 	LIST_INIT(&pgrp0.pg_members);
316 	LIST_INSERT_HEAD(&pgrp0.pg_members, p, p_pglist);
317 
318 	pgrp0.pg_session = &session0;
319 	session0.s_count = 1;
320 	session0.s_leader = p;
321 
322 	p->p_sysent = &aout_sysvec;
323 
324 	p->p_flag = P_INMEM | P_SYSTEM;
325 	p->p_stat = SRUN;
326 	p->p_nice = NZERO;
327 	p->p_rtprio.type = RTP_PRIO_NORMAL;
328 	p->p_rtprio.prio = 0;
329 
330 /*
331  * Link for kernel based threads
332  */
333 	p->p_peers = 0;
334 	p->p_leader = p;
335 
336 	bcopy("swapper", p->p_comm, sizeof ("swapper"));
337 
338 	/* Create credentials. */
339 	cred0.p_refcnt = 1;
340 	p->p_cred = &cred0;
341 	p->p_ucred = crget();
342 	p->p_ucred->cr_ngroups = 1;	/* group 0 */
343 
344 	/* Don't jail it */
345 	p->p_prison = 0;
346 
347 	/* Create procsig. */
348 	p->p_procsig = &procsig0;
349 	p->p_procsig->ps_refcnt = 1;
350 
351 	/* Create the file descriptor table. */
352 	fdp = &filedesc0;
353 	p->p_fd = &fdp->fd_fd;
354 	fdp->fd_fd.fd_refcnt = 1;
355 	fdp->fd_fd.fd_cmask = cmask;
356 	fdp->fd_fd.fd_ofiles = fdp->fd_dfiles;
357 	fdp->fd_fd.fd_ofileflags = fdp->fd_dfileflags;
358 	fdp->fd_fd.fd_nfiles = NDFILE;
359 
360 	/* Create the limits structures. */
361 	p->p_limit = &limit0;
362 	for (i = 0; i < sizeof(p->p_rlimit)/sizeof(p->p_rlimit[0]); i++)
363 		limit0.pl_rlimit[i].rlim_cur =
364 		    limit0.pl_rlimit[i].rlim_max = RLIM_INFINITY;
365 	limit0.pl_rlimit[RLIMIT_NOFILE].rlim_cur =
366 	    limit0.pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles;
367 	limit0.pl_rlimit[RLIMIT_NPROC].rlim_cur =
368 	    limit0.pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc;
369 	i = ptoa(cnt.v_free_count);
370 	limit0.pl_rlimit[RLIMIT_RSS].rlim_max = i;
371 	limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_max = i;
372 	limit0.pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = i / 3;
373 	limit0.p_cpulimit = RLIM_INFINITY;
374 	limit0.p_refcnt = 1;
375 
376 
377 	/* Allocate a prototype map so we have something to fork. */
378 	pmap_pinit0(vmspace_pmap(&vmspace0));
379 	p->p_vmspace = &vmspace0;
380 	vmspace0.vm_refcnt = 1;
381 	vm_map_init(&vmspace0.vm_map, round_page(VM_MIN_ADDRESS),
382 	    trunc_page(VM_MAXUSER_ADDRESS));
383 	vmspace0.vm_map.pmap = vmspace_pmap(&vmspace0);
384 	p->p_addr = proc0paddr;				/* XXX */
385 
386 #ifdef cpu_set_init_frame
387 	/*
388 	 * proc0 needs to have a coherent frame base in its stack.
389 	 */
390 	cpu_set_init_frame(p, init_framep);			/* XXX! */
391 #endif
392 
393 	/*
394 	 * We continue to place resource usage info and signal
395 	 * actions in the user struct so they're pageable.
396 	 */
397 	p->p_stats = &p->p_addr->u_stats;
398 	p->p_sigacts = &p->p_addr->u_sigacts;
399 
400 	/*
401 	 * Charge root for one process.
402 	 */
403 	(void)chgproccnt(0, 1);
404 
405 	/*
406 	 * Initialize the current process pointer (curproc) before
407 	 * any possible traps/probes to simplify trap processing.
408 	 */
409 	SET_CURPROC(p);
410 
411 }
412 SYSINIT(p0init, SI_SUB_INTRINSIC, SI_ORDER_FIRST, proc0_init, NULL)
413 
414 /* ARGSUSED*/
415 static void proc0_post __P((void *dummy));
416 static void
417 proc0_post(dummy)
418 	void *dummy;
419 {
420 	struct timespec ts;
421 
422 	/*
423 	 * Now we can look at the time, having had a chance to verify the
424 	 * time from the file system.  Pretend that proc0 started now.
425 	 */
426 	microtime(&proc0.p_stats->p_start);
427 	proc0.p_runtime = 0;
428 	microuptime(&switchtime);
429 	switchticks = ticks;
430 
431 	/*
432 	 * Give the ``random'' number generator a thump.
433 	 * XXX: Does read_random() contain enough bits to be used here ?
434 	 */
435 	nanotime(&ts);
436 	srandom(ts.tv_sec ^ ts.tv_nsec);
437 
438 	/* Initialize signal state for process 0. */
439 	siginit(&proc0);
440 }
441 SYSINIT(p0post, SI_SUB_INTRINSIC_POST, SI_ORDER_FIRST, proc0_post, NULL)
442 
443 
444 
445 
446 /*
447  ***************************************************************************
448  ****
449  **** The following SYSINIT's and glue code should be moved to the
450  **** respective files on a per subsystem basis.
451  ****
452  ***************************************************************************
453  */
454 
455 /* ARGSUSED */
456 static void root_conf __P((void *dummy));
457 static void
458 root_conf(dummy)
459 	void *dummy;
460 {
461 	cpu_rootconf();
462 }
463 SYSINIT(root_conf, SI_SUB_ROOT_CONF, SI_ORDER_FIRST, root_conf, NULL)
464 
465 /* ARGSUSED*/
466 static void xxx_vfs_root_fdtab __P((void *dummy));
467 static void
468 xxx_vfs_root_fdtab(dummy)
469 	void *dummy;
470 {
471 	register struct filedesc0	*fdp = &filedesc0;
472 
473 	/* Get the vnode for '/'.  Set fdp->fd_fd.fd_cdir to reference it. */
474 	if (VFS_ROOT(mountlist.cqh_first, &rootvnode))
475 		panic("cannot find root vnode");
476 	fdp->fd_fd.fd_cdir = rootvnode;
477 	VREF(fdp->fd_fd.fd_cdir);
478 	VOP_UNLOCK(rootvnode, 0, &proc0);
479 	fdp->fd_fd.fd_rdir = rootvnode;
480 }
481 SYSINIT(retrofit, SI_SUB_ROOT_FDTAB, SI_ORDER_FIRST, xxx_vfs_root_fdtab, NULL)
482 
483 
484 /*
485  ***************************************************************************
486  ****
487  **** The following code probably belongs in another file, like
488  **** kern/init_init.c.  It is here for two reasons only:
489  ****
490  ****	1)	This code returns to startup the system; this is
491  ****		abnormal for a kernel thread.
492  ****	2)	This code promiscuously uses init_frame
493  ****
494  ***************************************************************************
495  */
496 
497 extern void prepare_usermode __P((void));
498 static void create_init __P((const void *dummy));
499 static void start_init __P((void *dummy));
500 SYSINIT(init,SI_SUB_KTHREAD_INIT, SI_ORDER_FIRST, create_init, NULL)
501 
502 /*
503  * Like kthread_create(), but runs in it's own address space.
504  */
505 static void
506 create_init(udata)
507 	const void *udata;
508 {
509 	int error;
510 
511 	error = fork1(&proc0, RFFDG | RFPROC, &initproc);
512 	if (error)
513 		panic("cannot fork init: %d\n", error);
514 	initproc->p_flag |= P_INMEM | P_SYSTEM;
515 	cpu_set_fork_handler(initproc, start_init, NULL);
516 }
517 
518 /*
519  * List of paths to try when searching for "init".
520  */
521 static char init_path[MAXPATHLEN] =
522 #ifdef	INIT_PATH
523     __XSTRING(INIT_PATH);
524 #else
525     "/sbin/init:/sbin/oinit:/sbin/init.bak:/stand/sysinstall";
526 #endif
527 SYSCTL_STRING(_kern, OID_AUTO, init_path, CTLFLAG_RD, init_path, 0, "");
528 
529 /*
530  * Start the initial user process; try exec'ing each pathname in init_path.
531  * The program is invoked with one argument containing the boot flags.
532  */
533 static void
534 start_init(dummy)
535 	void *dummy;
536 {
537 	vm_offset_t addr;
538 	struct execve_args args;
539 	int options, error;
540 	char *var, *path, *next, *s;
541 	char *ucp, **uap, *arg0, *arg1;
542 	struct proc *p;
543 
544 	p = curproc;
545 
546 	/*
547 	 * Need just enough stack to hold the faked-up "execve()" arguments.
548 	 */
549 	addr = trunc_page(USRSTACK - PAGE_SIZE);
550 	if (vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &addr, PAGE_SIZE,
551 			FALSE, VM_PROT_ALL, VM_PROT_ALL, 0) != 0)
552 		panic("init: couldn't allocate argument space");
553 	p->p_vmspace->vm_maxsaddr = (caddr_t)addr;
554 	p->p_vmspace->vm_ssize = 1;
555 
556 	if ((var = getenv("init_path")) != NULL) {
557 		strncpy(init_path, var, sizeof init_path);
558 		init_path[sizeof init_path - 1] = 0;
559 	}
560 
561 	for (path = init_path; *path != '\0'; path = next) {
562 		while (*path == ':')
563 			path++;
564 		if (*path == '\0')
565 			break;
566 		for (next = path; *next != '\0' && *next != ':'; next++)
567 			/* nothing */ ;
568 		if (bootverbose)
569 			printf("start_init: trying %.*s\n", (int)(next - path),
570 			    path);
571 
572 		/*
573 		 * Move out the boot flag argument.
574 		 */
575 		options = 0;
576 		ucp = (char *)USRSTACK;
577 		(void)subyte(--ucp, 0);		/* trailing zero */
578 		if (boothowto & RB_SINGLE) {
579 			(void)subyte(--ucp, 's');
580 			options = 1;
581 		}
582 #ifdef notyet
583                 if (boothowto & RB_FASTBOOT) {
584 			(void)subyte(--ucp, 'f');
585 			options = 1;
586 		}
587 #endif
588 
589 #ifdef BOOTCDROM
590 		(void)subyte(--ucp, 'C');
591 		options = 1;
592 #endif
593 		if (options == 0)
594 			(void)subyte(--ucp, '-');
595 		(void)subyte(--ucp, '-');		/* leading hyphen */
596 		arg1 = ucp;
597 
598 		/*
599 		 * Move out the file name (also arg 0).
600 		 */
601 		(void)subyte(--ucp, 0);
602 		for (s = next - 1; s >= path; s--)
603 			(void)subyte(--ucp, *s);
604 		arg0 = ucp;
605 
606 		/*
607 		 * Move out the arg pointers.
608 		 */
609 		uap = (char **)((intptr_t)ucp & ~(sizeof(intptr_t)-1));
610 		(void)suword((caddr_t)--uap, (long)0);	/* terminator */
611 		(void)suword((caddr_t)--uap, (long)(intptr_t)arg1);
612 		(void)suword((caddr_t)--uap, (long)(intptr_t)arg0);
613 
614 		/*
615 		 * Point at the arguments.
616 		 */
617 		args.fname = arg0;
618 		args.argv = uap;
619 		args.envv = NULL;
620 
621 		/*
622 		 * Now try to exec the program.  If can't for any reason
623 		 * other than it doesn't exist, complain.
624 		 *
625 		 * Otherwise, return via the fork trampoline all the way
626 		 * to user mode as init!
627 		 */
628 		if ((error = execve(p, &args)) == 0) {
629 			prepare_usermode();
630 			return;
631 		}
632 		if (error != ENOENT)
633 			printf("exec %.*s: error %d\n", (int)(next - path),
634 			    path, error);
635 	}
636 	printf("init: not found\n");
637 	panic("no init");
638 }
639