1*52c653b3SJeff Dike /* 2*52c653b3SJeff Dike * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) 3*52c653b3SJeff Dike * Licensed under the GPL 4*52c653b3SJeff Dike */ 5*52c653b3SJeff Dike 6*52c653b3SJeff Dike #include <unistd.h> 7*52c653b3SJeff Dike #include <stdio.h> 8*52c653b3SJeff Dike #include <stdlib.h> 9*52c653b3SJeff Dike #include <string.h> 10*52c653b3SJeff Dike #include <signal.h> 11*52c653b3SJeff Dike #include <errno.h> 12*52c653b3SJeff Dike #include <sys/resource.h> 13*52c653b3SJeff Dike #include <sys/mman.h> 14*52c653b3SJeff Dike #include <sys/user.h> 15*52c653b3SJeff Dike #include <asm/page.h> 16*52c653b3SJeff Dike #include "user_util.h" 17*52c653b3SJeff Dike #include "kern_util.h" 18*52c653b3SJeff Dike #include "mem_user.h" 19*52c653b3SJeff Dike #include "signal_user.h" 20*52c653b3SJeff Dike #include "time_user.h" 21*52c653b3SJeff Dike #include "irq_user.h" 22*52c653b3SJeff Dike #include "user.h" 23*52c653b3SJeff Dike #include "init.h" 24*52c653b3SJeff Dike #include "mode.h" 25*52c653b3SJeff Dike #include "choose-mode.h" 26*52c653b3SJeff Dike #include "uml-config.h" 27*52c653b3SJeff Dike #include "os.h" 28*52c653b3SJeff Dike 29*52c653b3SJeff Dike /* Set in set_stklim, which is called from main and __wrap_malloc. 30*52c653b3SJeff Dike * __wrap_malloc only calls it if main hasn't started. 31*52c653b3SJeff Dike */ 32*52c653b3SJeff Dike unsigned long stacksizelim; 33*52c653b3SJeff Dike 34*52c653b3SJeff Dike /* Set in main */ 35*52c653b3SJeff Dike char *linux_prog; 36*52c653b3SJeff Dike 37*52c653b3SJeff Dike #define PGD_BOUND (4 * 1024 * 1024) 38*52c653b3SJeff Dike #define STACKSIZE (8 * 1024 * 1024) 39*52c653b3SJeff Dike #define THREAD_NAME_LEN (256) 40*52c653b3SJeff Dike 41*52c653b3SJeff Dike static void set_stklim(void) 42*52c653b3SJeff Dike { 43*52c653b3SJeff Dike struct rlimit lim; 44*52c653b3SJeff Dike 45*52c653b3SJeff Dike if(getrlimit(RLIMIT_STACK, &lim) < 0){ 46*52c653b3SJeff Dike perror("getrlimit"); 47*52c653b3SJeff Dike exit(1); 48*52c653b3SJeff Dike } 49*52c653b3SJeff Dike if((lim.rlim_cur == RLIM_INFINITY) || (lim.rlim_cur > STACKSIZE)){ 50*52c653b3SJeff Dike lim.rlim_cur = STACKSIZE; 51*52c653b3SJeff Dike if(setrlimit(RLIMIT_STACK, &lim) < 0){ 52*52c653b3SJeff Dike perror("setrlimit"); 53*52c653b3SJeff Dike exit(1); 54*52c653b3SJeff Dike } 55*52c653b3SJeff Dike } 56*52c653b3SJeff Dike stacksizelim = (lim.rlim_cur + PGD_BOUND - 1) & ~(PGD_BOUND - 1); 57*52c653b3SJeff Dike } 58*52c653b3SJeff Dike 59*52c653b3SJeff Dike static __init void do_uml_initcalls(void) 60*52c653b3SJeff Dike { 61*52c653b3SJeff Dike initcall_t *call; 62*52c653b3SJeff Dike 63*52c653b3SJeff Dike call = &__uml_initcall_start; 64*52c653b3SJeff Dike while (call < &__uml_initcall_end){; 65*52c653b3SJeff Dike (*call)(); 66*52c653b3SJeff Dike call++; 67*52c653b3SJeff Dike } 68*52c653b3SJeff Dike } 69*52c653b3SJeff Dike 70*52c653b3SJeff Dike static void last_ditch_exit(int sig) 71*52c653b3SJeff Dike { 72*52c653b3SJeff Dike signal(SIGINT, SIG_DFL); 73*52c653b3SJeff Dike signal(SIGTERM, SIG_DFL); 74*52c653b3SJeff Dike signal(SIGHUP, SIG_DFL); 75*52c653b3SJeff Dike uml_cleanup(); 76*52c653b3SJeff Dike exit(1); 77*52c653b3SJeff Dike } 78*52c653b3SJeff Dike 79*52c653b3SJeff Dike extern int uml_exitcode; 80*52c653b3SJeff Dike 81*52c653b3SJeff Dike extern void scan_elf_aux( char **envp); 82*52c653b3SJeff Dike 83*52c653b3SJeff Dike int main(int argc, char **argv, char **envp) 84*52c653b3SJeff Dike { 85*52c653b3SJeff Dike char **new_argv; 86*52c653b3SJeff Dike sigset_t mask; 87*52c653b3SJeff Dike int ret, i, err; 88*52c653b3SJeff Dike 89*52c653b3SJeff Dike /* Enable all signals except SIGIO - in some environments, we can 90*52c653b3SJeff Dike * enter with some signals blocked 91*52c653b3SJeff Dike */ 92*52c653b3SJeff Dike 93*52c653b3SJeff Dike sigemptyset(&mask); 94*52c653b3SJeff Dike sigaddset(&mask, SIGIO); 95*52c653b3SJeff Dike if(sigprocmask(SIG_SETMASK, &mask, NULL) < 0){ 96*52c653b3SJeff Dike perror("sigprocmask"); 97*52c653b3SJeff Dike exit(1); 98*52c653b3SJeff Dike } 99*52c653b3SJeff Dike 100*52c653b3SJeff Dike #ifdef UML_CONFIG_CMDLINE_ON_HOST 101*52c653b3SJeff Dike /* Allocate memory for thread command lines */ 102*52c653b3SJeff Dike if(argc < 2 || strlen(argv[1]) < THREAD_NAME_LEN - 1){ 103*52c653b3SJeff Dike 104*52c653b3SJeff Dike char padding[THREAD_NAME_LEN] = { 105*52c653b3SJeff Dike [ 0 ... THREAD_NAME_LEN - 2] = ' ', '\0' 106*52c653b3SJeff Dike }; 107*52c653b3SJeff Dike 108*52c653b3SJeff Dike new_argv = malloc((argc + 2) * sizeof(char*)); 109*52c653b3SJeff Dike if(!new_argv) { 110*52c653b3SJeff Dike perror("Allocating extended argv"); 111*52c653b3SJeff Dike exit(1); 112*52c653b3SJeff Dike } 113*52c653b3SJeff Dike 114*52c653b3SJeff Dike new_argv[0] = argv[0]; 115*52c653b3SJeff Dike new_argv[1] = padding; 116*52c653b3SJeff Dike 117*52c653b3SJeff Dike for(i = 2; i <= argc; i++) 118*52c653b3SJeff Dike new_argv[i] = argv[i - 1]; 119*52c653b3SJeff Dike new_argv[argc + 1] = NULL; 120*52c653b3SJeff Dike 121*52c653b3SJeff Dike execvp(new_argv[0], new_argv); 122*52c653b3SJeff Dike perror("execing with extended args"); 123*52c653b3SJeff Dike exit(1); 124*52c653b3SJeff Dike } 125*52c653b3SJeff Dike #endif 126*52c653b3SJeff Dike 127*52c653b3SJeff Dike linux_prog = argv[0]; 128*52c653b3SJeff Dike 129*52c653b3SJeff Dike set_stklim(); 130*52c653b3SJeff Dike 131*52c653b3SJeff Dike new_argv = malloc((argc + 1) * sizeof(char *)); 132*52c653b3SJeff Dike if(new_argv == NULL){ 133*52c653b3SJeff Dike perror("Mallocing argv"); 134*52c653b3SJeff Dike exit(1); 135*52c653b3SJeff Dike } 136*52c653b3SJeff Dike for(i=0;i<argc;i++){ 137*52c653b3SJeff Dike new_argv[i] = strdup(argv[i]); 138*52c653b3SJeff Dike if(new_argv[i] == NULL){ 139*52c653b3SJeff Dike perror("Mallocing an arg"); 140*52c653b3SJeff Dike exit(1); 141*52c653b3SJeff Dike } 142*52c653b3SJeff Dike } 143*52c653b3SJeff Dike new_argv[argc] = NULL; 144*52c653b3SJeff Dike 145*52c653b3SJeff Dike set_handler(SIGINT, last_ditch_exit, SA_ONESHOT | SA_NODEFER, -1); 146*52c653b3SJeff Dike set_handler(SIGTERM, last_ditch_exit, SA_ONESHOT | SA_NODEFER, -1); 147*52c653b3SJeff Dike set_handler(SIGHUP, last_ditch_exit, SA_ONESHOT | SA_NODEFER, -1); 148*52c653b3SJeff Dike 149*52c653b3SJeff Dike scan_elf_aux( envp); 150*52c653b3SJeff Dike 151*52c653b3SJeff Dike do_uml_initcalls(); 152*52c653b3SJeff Dike ret = linux_main(argc, argv); 153*52c653b3SJeff Dike 154*52c653b3SJeff Dike /* Disable SIGPROF - I have no idea why libc doesn't do this or turn 155*52c653b3SJeff Dike * off the profiling time, but UML dies with a SIGPROF just before 156*52c653b3SJeff Dike * exiting when profiling is active. 157*52c653b3SJeff Dike */ 158*52c653b3SJeff Dike change_sig(SIGPROF, 0); 159*52c653b3SJeff Dike 160*52c653b3SJeff Dike /* This signal stuff used to be in the reboot case. However, 161*52c653b3SJeff Dike * sometimes a SIGVTALRM can come in when we're halting (reproducably 162*52c653b3SJeff Dike * when writing out gcov information, presumably because that takes 163*52c653b3SJeff Dike * some time) and cause a segfault. 164*52c653b3SJeff Dike */ 165*52c653b3SJeff Dike 166*52c653b3SJeff Dike /* stop timers and set SIG*ALRM to be ignored */ 167*52c653b3SJeff Dike disable_timer(); 168*52c653b3SJeff Dike 169*52c653b3SJeff Dike /* disable SIGIO for the fds and set SIGIO to be ignored */ 170*52c653b3SJeff Dike err = deactivate_all_fds(); 171*52c653b3SJeff Dike if(err) 172*52c653b3SJeff Dike printf("deactivate_all_fds failed, errno = %d\n", -err); 173*52c653b3SJeff Dike 174*52c653b3SJeff Dike /* Let any pending signals fire now. This ensures 175*52c653b3SJeff Dike * that they won't be delivered after the exec, when 176*52c653b3SJeff Dike * they are definitely not expected. 177*52c653b3SJeff Dike */ 178*52c653b3SJeff Dike unblock_signals(); 179*52c653b3SJeff Dike 180*52c653b3SJeff Dike /* Reboot */ 181*52c653b3SJeff Dike if(ret){ 182*52c653b3SJeff Dike printf("\n"); 183*52c653b3SJeff Dike execvp(new_argv[0], new_argv); 184*52c653b3SJeff Dike perror("Failed to exec kernel"); 185*52c653b3SJeff Dike ret = 1; 186*52c653b3SJeff Dike } 187*52c653b3SJeff Dike printf("\n"); 188*52c653b3SJeff Dike return(uml_exitcode); 189*52c653b3SJeff Dike } 190*52c653b3SJeff Dike 191*52c653b3SJeff Dike #define CAN_KMALLOC() \ 192*52c653b3SJeff Dike (kmalloc_ok && CHOOSE_MODE((os_getpid() != tracing_pid), 1)) 193*52c653b3SJeff Dike 194*52c653b3SJeff Dike extern void *__real_malloc(int); 195*52c653b3SJeff Dike 196*52c653b3SJeff Dike void *__wrap_malloc(int size) 197*52c653b3SJeff Dike { 198*52c653b3SJeff Dike void *ret; 199*52c653b3SJeff Dike 200*52c653b3SJeff Dike if(!CAN_KMALLOC()) 201*52c653b3SJeff Dike return(__real_malloc(size)); 202*52c653b3SJeff Dike else if(size <= PAGE_SIZE) /* finding contiguos pages can be hard*/ 203*52c653b3SJeff Dike ret = um_kmalloc(size); 204*52c653b3SJeff Dike else ret = um_vmalloc(size); 205*52c653b3SJeff Dike 206*52c653b3SJeff Dike /* glibc people insist that if malloc fails, errno should be 207*52c653b3SJeff Dike * set by malloc as well. So we do. 208*52c653b3SJeff Dike */ 209*52c653b3SJeff Dike if(ret == NULL) 210*52c653b3SJeff Dike errno = ENOMEM; 211*52c653b3SJeff Dike 212*52c653b3SJeff Dike return(ret); 213*52c653b3SJeff Dike } 214*52c653b3SJeff Dike 215*52c653b3SJeff Dike void *__wrap_calloc(int n, int size) 216*52c653b3SJeff Dike { 217*52c653b3SJeff Dike void *ptr = __wrap_malloc(n * size); 218*52c653b3SJeff Dike 219*52c653b3SJeff Dike if(ptr == NULL) return(NULL); 220*52c653b3SJeff Dike memset(ptr, 0, n * size); 221*52c653b3SJeff Dike return(ptr); 222*52c653b3SJeff Dike } 223*52c653b3SJeff Dike 224*52c653b3SJeff Dike extern void __real_free(void *); 225*52c653b3SJeff Dike 226*52c653b3SJeff Dike extern unsigned long high_physmem; 227*52c653b3SJeff Dike 228*52c653b3SJeff Dike void __wrap_free(void *ptr) 229*52c653b3SJeff Dike { 230*52c653b3SJeff Dike unsigned long addr = (unsigned long) ptr; 231*52c653b3SJeff Dike 232*52c653b3SJeff Dike /* We need to know how the allocation happened, so it can be correctly 233*52c653b3SJeff Dike * freed. This is done by seeing what region of memory the pointer is 234*52c653b3SJeff Dike * in - 235*52c653b3SJeff Dike * physical memory - kmalloc/kfree 236*52c653b3SJeff Dike * kernel virtual memory - vmalloc/vfree 237*52c653b3SJeff Dike * anywhere else - malloc/free 238*52c653b3SJeff Dike * If kmalloc is not yet possible, then either high_physmem and/or 239*52c653b3SJeff Dike * end_vm are still 0 (as at startup), in which case we call free, or 240*52c653b3SJeff Dike * we have set them, but anyway addr has not been allocated from those 241*52c653b3SJeff Dike * areas. So, in both cases __real_free is called. 242*52c653b3SJeff Dike * 243*52c653b3SJeff Dike * CAN_KMALLOC is checked because it would be bad to free a buffer 244*52c653b3SJeff Dike * with kmalloc/vmalloc after they have been turned off during 245*52c653b3SJeff Dike * shutdown. 246*52c653b3SJeff Dike * XXX: However, we sometimes shutdown CAN_KMALLOC temporarily, so 247*52c653b3SJeff Dike * there is a possibility for memory leaks. 248*52c653b3SJeff Dike */ 249*52c653b3SJeff Dike 250*52c653b3SJeff Dike if((addr >= uml_physmem) && (addr < high_physmem)){ 251*52c653b3SJeff Dike if(CAN_KMALLOC()) 252*52c653b3SJeff Dike kfree(ptr); 253*52c653b3SJeff Dike } 254*52c653b3SJeff Dike else if((addr >= start_vm) && (addr < end_vm)){ 255*52c653b3SJeff Dike if(CAN_KMALLOC()) 256*52c653b3SJeff Dike vfree(ptr); 257*52c653b3SJeff Dike } 258*52c653b3SJeff Dike else __real_free(ptr); 259*52c653b3SJeff Dike } 260