1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1988 AT&T */ 27 /* All Rights Reserved */ 28 29 30 #pragma ident "%Z%%M% %I% %E% SMI" /* from SVr4.0 1.31 */ 31 32 #include <sys/types.h> 33 #include <sys/param.h> 34 #include <sys/sysmacros.h> 35 #include <sys/pcb.h> 36 #include <sys/systm.h> 37 #include <sys/signal.h> 38 #include <sys/cred.h> 39 #include <sys/user.h> 40 #include <sys/vfs.h> 41 #include <sys/vnode.h> 42 #include <sys/proc.h> 43 #include <sys/time.h> 44 #include <sys/file.h> 45 #include <sys/priocntl.h> 46 #include <sys/procset.h> 47 #include <sys/disp.h> 48 #include <sys/callo.h> 49 #include <sys/callb.h> 50 #include <sys/debug.h> 51 #include <sys/conf.h> 52 #include <sys/bootconf.h> 53 #include <sys/utsname.h> 54 #include <sys/cmn_err.h> 55 #include <sys/vmparam.h> 56 #include <sys/modctl.h> 57 #include <sys/vm.h> 58 #include <sys/callb.h> 59 #include <sys/kmem.h> 60 #include <sys/vmem.h> 61 #include <sys/cpuvar.h> 62 #include <sys/cladm.h> 63 #include <sys/corectl.h> 64 #include <sys/exec.h> 65 #include <sys/syscall.h> 66 #include <sys/reboot.h> 67 #include <sys/task.h> 68 #include <sys/exacct.h> 69 #include <sys/autoconf.h> 70 #include <sys/errorq.h> 71 #include <sys/class.h> 72 #include <sys/stack.h> 73 74 #include <vm/as.h> 75 #include <vm/seg_kmem.h> 76 #include <sys/dc_ki.h> 77 78 #include <c2/audit.h> 79 80 /* well known processes */ 81 proc_t *proc_sched; /* memory scheduler */ 82 proc_t *proc_init; /* init */ 83 proc_t *proc_pageout; /* pageout daemon */ 84 proc_t *proc_fsflush; /* fsflush daemon */ 85 86 pgcnt_t maxmem; /* Maximum available memory in pages. */ 87 pgcnt_t freemem; /* Current available memory in pages. */ 88 int audit_active; 89 int interrupts_unleashed; /* set when we do the first spl0() */ 90 91 kmem_cache_t *process_cache; /* kmem cache for proc structures */ 92 93 /* 94 * Process 0's lwp directory and lwpid hash table. 95 */ 96 lwpdir_t p0_lwpdir[2]; 97 lwpdir_t *p0_tidhash[2]; 98 lwpent_t p0_lep; 99 100 /* 101 * Machine-independent initialization code 102 * Called from cold start routine as 103 * soon as a stack and segmentation 104 * have been established. 105 * Functions: 106 * clear and free user core 107 * turn on clock 108 * hand craft 0th process 109 * call all initialization routines 110 * fork - process 0 to schedule 111 * - process 1 execute bootstrap 112 * - process 2 to page out 113 * create system threads 114 */ 115 116 int cluster_bootflags = 0; 117 118 void 119 cluster_wrapper(void) 120 { 121 cluster(); 122 panic("cluster() returned"); 123 } 124 125 char initname[INITNAME_SZ] = "/sbin/init"; /* also referenced by zone0 */ 126 char initargs[BOOTARGS_MAX] = ""; /* also referenced by zone0 */ 127 128 /* 129 * Construct a stack for init containing the arguments to it, then 130 * pass control to exec_common. 131 */ 132 int 133 exec_init(const char *initpath, const char *args) 134 { 135 caddr32_t ucp; 136 caddr32_t *uap; 137 caddr32_t *argv; 138 caddr32_t exec_fnamep; 139 char *scratchargs; 140 int i, sarg; 141 size_t argvlen, alen; 142 boolean_t in_arg; 143 int argc = 0; 144 int error = 0, count = 0; 145 proc_t *p = ttoproc(curthread); 146 klwp_t *lwp = ttolwp(curthread); 147 148 if (args == NULL) 149 args = ""; 150 151 alen = strlen(initpath) + 1 + strlen(args) + 1; 152 scratchargs = kmem_alloc(alen, KM_SLEEP); 153 (void) snprintf(scratchargs, alen, "%s %s", initpath, args); 154 155 /* 156 * We do a quick two state parse of the string to sort out how big 157 * argc should be. 158 */ 159 in_arg = B_FALSE; 160 for (i = 0; i < strlen(scratchargs); i++) { 161 if (scratchargs[i] == ' ' || scratchargs[i] == '\0') { 162 if (in_arg) { 163 in_arg = B_FALSE; 164 argc++; 165 } 166 } else { 167 in_arg = B_TRUE; 168 } 169 } 170 argvlen = sizeof (caddr32_t) * (argc + 1); 171 argv = kmem_zalloc(argvlen, KM_SLEEP); 172 173 /* 174 * We pull off a bit of a hack here. We work our way through the 175 * args string, putting nulls at the ends of space delimited tokens 176 * (boot args don't support quoting at this time). Then we just 177 * copy the whole mess to userland in one go. In other words, we 178 * transform this: "init -s -r\0" into this on the stack: 179 * 180 * -0x00 \0 181 * -0x01 r 182 * -0x02 - <--------. 183 * -0x03 \0 | 184 * -0x04 s | 185 * -0x05 - <------. | 186 * -0x06 \0 | | 187 * -0x07 t | | 188 * -0x08 i | | 189 * -0x09 n | | 190 * -0x0a i <---. | | 191 * -0x10 NULL | | | (argv[3]) 192 * -0x14 -----|--|-' (argv[2]) 193 * -0x18 ------|--' (argv[1]) 194 * -0x1c -------' (argv[0]) 195 * 196 * Since we know the value of ucp at the beginning of this process, 197 * we can trivially compute the argv[] array which we also need to 198 * place in userland: argv[i] = ucp - sarg(i), where ucp is the 199 * stack ptr, and sarg is the string index of the start of the 200 * argument. 201 */ 202 ucp = (caddr32_t)(uintptr_t)p->p_usrstack; 203 204 argc = 0; 205 in_arg = B_FALSE; 206 sarg = 0; 207 208 for (i = 0; i < alen; i++) { 209 if (scratchargs[i] == ' ' || scratchargs[i] == '\0') { 210 if (in_arg == B_TRUE) { 211 in_arg = B_FALSE; 212 scratchargs[i] = '\0'; 213 argv[argc++] = ucp - (alen - sarg); 214 } 215 } else if (in_arg == B_FALSE) { 216 in_arg = B_TRUE; 217 sarg = i; 218 } 219 } 220 ucp -= alen; 221 error |= copyout(scratchargs, (caddr_t)(uintptr_t)ucp, alen); 222 223 uap = (caddr32_t *)P2ALIGN((uintptr_t)ucp, sizeof (caddr32_t)); 224 uap--; /* advance to be below the word we're in */ 225 uap -= (argc + 1); /* advance argc words down, plus one for NULL */ 226 error |= copyout(argv, uap, argvlen); 227 228 if (error != 0) { 229 zcmn_err(p->p_zone->zone_id, CE_WARN, 230 "Could not construct stack for init.\n"); 231 kmem_free(argv, argvlen); 232 kmem_free(scratchargs, alen); 233 return (EFAULT); 234 } 235 236 exec_fnamep = argv[0]; 237 kmem_free(argv, argvlen); 238 kmem_free(scratchargs, alen); 239 240 /* 241 * Point at the arguments. 242 */ 243 lwp->lwp_ap = lwp->lwp_arg; 244 lwp->lwp_arg[0] = (uintptr_t)exec_fnamep; 245 lwp->lwp_arg[1] = (uintptr_t)uap; 246 lwp->lwp_arg[2] = NULL; 247 curthread->t_post_sys = 1; 248 curthread->t_sysnum = SYS_execve; 249 250 again: 251 error = exec_common((const char *)(uintptr_t)exec_fnamep, 252 (const char **)(uintptr_t)uap, NULL); 253 254 /* 255 * Normally we would just set lwp_argsaved and t_post_sys and 256 * let post_syscall reset lwp_ap for us. Unfortunately, 257 * exec_init isn't always called from a system call. Instead 258 * of making a mess of trap_cleanup, we just reset the args 259 * pointer here. 260 */ 261 reset_syscall_args(); 262 263 switch (error) { 264 case 0: 265 return (0); 266 267 case ENOENT: 268 zcmn_err(p->p_zone->zone_id, CE_WARN, 269 "exec(%s) failed (file not found).\n", initpath); 270 return (ENOENT); 271 272 case EAGAIN: 273 case EINTR: 274 ++count; 275 if (count < 5) { 276 zcmn_err(p->p_zone->zone_id, CE_WARN, 277 "exec(%s) failed with errno %d. Retrying...\n", 278 initpath, error); 279 goto again; 280 } 281 } 282 283 zcmn_err(p->p_zone->zone_id, CE_WARN, 284 "exec(%s) failed with errno %d.", initpath, error); 285 return (error); 286 } 287 288 /* 289 * This routine does all of the common setup for invoking init; global 290 * and non-global zones employ this routine for the functionality which is 291 * in common. 292 * 293 * This program (init, presumably) must be a 32-bit process. 294 */ 295 int 296 start_init_common() 297 { 298 proc_t *p = curproc; 299 ASSERT_STACK_ALIGNED(); 300 p->p_zone->zone_proc_initpid = p->p_pid; 301 302 p->p_cstime = p->p_stime = p->p_cutime = p->p_utime = 0; 303 p->p_usrstack = (caddr_t)USRSTACK32; 304 p->p_model = DATAMODEL_ILP32; 305 p->p_stkprot = PROT_ZFOD & ~PROT_EXEC; 306 p->p_datprot = PROT_ZFOD & ~PROT_EXEC; 307 p->p_stk_ctl = INT32_MAX; 308 309 p->p_as = as_alloc(); 310 p->p_as->a_userlimit = (caddr_t)USERLIMIT32; 311 (void) hat_setup(p->p_as->a_hat, HAT_INIT); 312 313 init_core(); 314 315 init_mstate(curthread, LMS_SYSTEM); 316 return (exec_init(p->p_zone->zone_initname, p->p_zone->zone_bootargs)); 317 } 318 319 /* 320 * Start the initial user process for the global zone; once running, if 321 * init should subsequently fail, it will be automatically be caught in the 322 * exit(2) path, and restarted by restart_init(). 323 */ 324 static void 325 start_init(void) 326 { 327 proc_init = curproc; 328 329 ASSERT(curproc->p_zone->zone_initname != NULL); 330 331 if (start_init_common() != 0) 332 halt("unix: Could not start init"); 333 lwp_rtt(); 334 } 335 336 void 337 main(void) 338 { 339 proc_t *p = ttoproc(curthread); /* &p0 */ 340 int (**initptr)(); 341 extern void sched(); 342 extern void fsflush(); 343 extern void thread_reaper(); 344 extern int (*init_tbl[])(); 345 extern int (*mp_init_tbl[])(); 346 extern id_t syscid, defaultcid; 347 extern int swaploaded; 348 extern int netboot; 349 extern void vm_init(void); 350 extern void cbe_init(void); 351 extern void clock_init(void); 352 extern void physio_bufs_init(void); 353 extern void pm_cfb_setup_intr(void); 354 extern int pm_adjust_timestamps(dev_info_t *, void *); 355 extern void start_other_cpus(int); 356 extern void sysevent_evc_thrinit(); 357 extern void lgrp_main_init(void); 358 extern void lgrp_main_mp_init(void); 359 360 /* 361 * In the horrible world of x86 in-lines, you can't get symbolic 362 * structure offsets a la genassym. This assertion is here so 363 * that the next poor slob who innocently changes the offset of 364 * cpu_thread doesn't waste as much time as I just did finding 365 * out that it's hard-coded in i86/ml/i86.il. Similarly for 366 * curcpup. You're welcome. 367 */ 368 ASSERT(CPU == CPU->cpu_self); 369 ASSERT(curthread == CPU->cpu_thread); 370 ASSERT_STACK_ALIGNED(); 371 372 /* 373 * Setup the first lgroup, and home t0 374 */ 375 lgrp_setup(); 376 377 startup(); 378 segkmem_gc(); 379 callb_init(); 380 callout_init(); /* callout table MUST be init'd before clock starts */ 381 cbe_init(); 382 clock_init(); 383 384 /* 385 * May need to probe to determine latencies from CPU 0 after 386 * gethrtime() comes alive in cbe_init() and before enabling interrupts 387 */ 388 lgrp_plat_probe(); 389 390 /* 391 * Call all system initialization functions. 392 */ 393 for (initptr = &init_tbl[0]; *initptr; initptr++) 394 (**initptr)(); 395 396 /* 397 * initialize vm related stuff. 398 */ 399 vm_init(); 400 401 /* 402 * initialize buffer pool for raw I/O requests 403 */ 404 physio_bufs_init(); 405 406 ttolwp(curthread)->lwp_error = 0; /* XXX kludge for SCSI driver */ 407 408 /* 409 * Drop the interrupt level and allow interrupts. At this point 410 * the DDI guarantees that interrupts are enabled. 411 */ 412 (void) spl0(); 413 interrupts_unleashed = 1; 414 415 vfs_mountroot(); /* Mount the root file system */ 416 errorq_init(); /* after vfs_mountroot() so DDI root is ready */ 417 cpu_kstat_init(CPU); /* after vfs_mountroot() so TOD is valid */ 418 ddi_walk_devs(ddi_root_node(), pm_adjust_timestamps, NULL); 419 /* after vfs_mountroot() so hrestime is valid */ 420 421 post_startup(); 422 swaploaded = 1; 423 424 /* 425 * Initial C2 audit system 426 */ 427 #ifdef C2_AUDIT 428 audit_init(); /* C2 hook */ 429 #endif 430 431 /* 432 * Plumb the protocol modules and drivers only if we are not 433 * networked booted, in this case we already did it in rootconf(). 434 */ 435 if (netboot == 0) 436 (void) strplumb(); 437 438 gethrestime(&u.u_start); 439 curthread->t_start = u.u_start.tv_sec; 440 p->p_mstart = gethrtime(); 441 442 /* 443 * Perform setup functions that can only be done after root 444 * and swap have been set up. 445 */ 446 consconfig(); 447 #if defined(__i386) || defined(__amd64) 448 release_bootstrap(); 449 #endif 450 /* 451 * attach drivers with ddi-forceattach prop 452 * This must be done after consconfig() to prevent usb key/mouse 453 * from attaching before the upper console stream is plumbed. 454 * It must be done early enough to load hotplug drivers (e.g. 455 * pcmcia nexus) so that devices enumerated via hotplug is 456 * available before I/O subsystem is fully initialized. 457 */ 458 i_ddi_forceattach_drivers(); 459 460 /* 461 * Set the scan rate and other parameters of the paging subsystem. 462 */ 463 setupclock(0); 464 465 /* 466 * Create kmem cache for proc structures 467 */ 468 process_cache = kmem_cache_create("process_cache", sizeof (proc_t), 469 0, NULL, NULL, NULL, NULL, NULL, 0); 470 471 /* 472 * Initialize process 0's lwp directory and lwpid hash table. 473 */ 474 p->p_lwpdir = p->p_lwpfree = p0_lwpdir; 475 p->p_lwpdir->ld_next = p->p_lwpdir + 1; 476 p->p_lwpdir_sz = 2; 477 p->p_tidhash = p0_tidhash; 478 p->p_tidhash_sz = 2; 479 p0_lep.le_thread = curthread; 480 p0_lep.le_lwpid = curthread->t_tid; 481 p0_lep.le_start = curthread->t_start; 482 lwp_hash_in(p, &p0_lep); 483 484 /* 485 * Initialize extended accounting. 486 */ 487 exacct_init(); 488 489 /* 490 * Initialize threads of sysevent event channels 491 */ 492 sysevent_evc_thrinit(); 493 494 /* 495 * main lgroup initialization 496 * This must be done after post_startup(), but before 497 * start_other_cpus() 498 */ 499 lgrp_main_init(); 500 501 /* 502 * Perform MP initialization, if any. 503 */ 504 start_other_cpus(0); 505 506 /* 507 * Finish lgrp initialization after all CPUS are brought online. 508 */ 509 lgrp_main_mp_init(); 510 511 /* 512 * After mp_init(), number of cpus are known (this is 513 * true for the time being, when there are actually 514 * hot pluggable cpus then this scheme would not do). 515 * Any per cpu initialization is done here. 516 */ 517 kmem_mp_init(); 518 vmem_update(NULL); 519 520 for (initptr = &mp_init_tbl[0]; *initptr; initptr++) 521 (**initptr)(); 522 523 /* 524 * This must be called after start_other_cpus 525 */ 526 pm_cfb_setup_intr(); 527 528 /* 529 * Make init process; enter scheduling loop with system process. 530 */ 531 532 /* create init process */ 533 if (newproc(start_init, NULL, defaultcid, 59, NULL)) 534 panic("main: unable to fork init."); 535 536 /* create pageout daemon */ 537 if (newproc(pageout, NULL, syscid, maxclsyspri - 1, NULL)) 538 panic("main: unable to fork pageout()"); 539 540 /* create fsflush daemon */ 541 if (newproc(fsflush, NULL, syscid, minclsyspri, NULL)) 542 panic("main: unable to fork fsflush()"); 543 544 /* create cluster process if we're a member of one */ 545 if (cluster_bootflags & CLUSTER_BOOTED) { 546 if (newproc(cluster_wrapper, NULL, syscid, minclsyspri, NULL)) 547 panic("main: unable to fork cluster()"); 548 } 549 550 /* 551 * Create system threads (threads are associated with p0) 552 */ 553 554 /* create thread_reaper daemon */ 555 (void) thread_create(NULL, 0, (void (*)())thread_reaper, 556 NULL, 0, &p0, TS_RUN, minclsyspri); 557 558 /* create module uninstall daemon */ 559 /* BugID 1132273. If swapping over NFS need a bigger stack */ 560 (void) thread_create(NULL, 0, (void (*)())mod_uninstall_daemon, 561 NULL, 0, &p0, TS_RUN, minclsyspri); 562 563 (void) thread_create(NULL, 0, seg_pasync_thread, 564 NULL, 0, &p0, TS_RUN, minclsyspri); 565 566 pid_setmin(); 567 568 bcopy("sched", u.u_psargs, 6); 569 bcopy("sched", u.u_comm, 5); 570 sched(); 571 /* NOTREACHED */ 572 } 573