1*0616c1c3SMichael Corcoran /* 2*0616c1c3SMichael Corcoran * CDDL HEADER START 3*0616c1c3SMichael Corcoran * 4*0616c1c3SMichael Corcoran * The contents of this file are subject to the terms of the 5*0616c1c3SMichael Corcoran * Common Development and Distribution License (the "License"). 6*0616c1c3SMichael Corcoran * You may not use this file except in compliance with the License. 7*0616c1c3SMichael Corcoran * 8*0616c1c3SMichael Corcoran * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*0616c1c3SMichael Corcoran * or http://www.opensolaris.org/os/licensing. 10*0616c1c3SMichael Corcoran * See the License for the specific language governing permissions 11*0616c1c3SMichael Corcoran * and limitations under the License. 12*0616c1c3SMichael Corcoran * 13*0616c1c3SMichael Corcoran * When distributing Covered Code, include this CDDL HEADER in each 14*0616c1c3SMichael Corcoran * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*0616c1c3SMichael Corcoran * If applicable, add the following below this CDDL HEADER, with the 16*0616c1c3SMichael Corcoran * fields enclosed by brackets "[]" replaced with your own identifying 17*0616c1c3SMichael Corcoran * information: Portions Copyright [yyyy] [name of copyright owner] 18*0616c1c3SMichael Corcoran * 19*0616c1c3SMichael Corcoran * CDDL HEADER END 20*0616c1c3SMichael Corcoran */ 21*0616c1c3SMichael Corcoran /* 22*0616c1c3SMichael Corcoran * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23*0616c1c3SMichael Corcoran * Use is subject to license terms. 24*0616c1c3SMichael Corcoran */ 25*0616c1c3SMichael Corcoran 26*0616c1c3SMichael Corcoran #include <sys/types.h> 27*0616c1c3SMichael Corcoran #include <sys/sysmacros.h> 28*0616c1c3SMichael Corcoran #include <sys/kmem.h> 29*0616c1c3SMichael Corcoran #include <sys/param.h> 30*0616c1c3SMichael Corcoran #include <sys/systm.h> 31*0616c1c3SMichael Corcoran #include <sys/errno.h> 32*0616c1c3SMichael Corcoran #include <sys/mman.h> 33*0616c1c3SMichael Corcoran #include <sys/cmn_err.h> 34*0616c1c3SMichael Corcoran #include <sys/cred.h> 35*0616c1c3SMichael Corcoran #include <sys/vmsystm.h> 36*0616c1c3SMichael Corcoran #include <sys/debug.h> 37*0616c1c3SMichael Corcoran #include <vm/as.h> 38*0616c1c3SMichael Corcoran #include <vm/seg.h> 39*0616c1c3SMichael Corcoran #include <sys/vmparam.h> 40*0616c1c3SMichael Corcoran #include <sys/vfs.h> 41*0616c1c3SMichael Corcoran #include <sys/elf.h> 42*0616c1c3SMichael Corcoran #include <sys/machelf.h> 43*0616c1c3SMichael Corcoran #include <sys/corectl.h> 44*0616c1c3SMichael Corcoran #include <sys/exec.h> 45*0616c1c3SMichael Corcoran #include <sys/exechdr.h> 46*0616c1c3SMichael Corcoran #include <sys/autoconf.h> 47*0616c1c3SMichael Corcoran #include <sys/mem.h> 48*0616c1c3SMichael Corcoran #include <vm/seg_dev.h> 49*0616c1c3SMichael Corcoran #include <sys/vmparam.h> 50*0616c1c3SMichael Corcoran #include <sys/mmapobj.h> 51*0616c1c3SMichael Corcoran #include <sys/atomic.h> 52*0616c1c3SMichael Corcoran 53*0616c1c3SMichael Corcoran /* 54*0616c1c3SMichael Corcoran * Theory statement: 55*0616c1c3SMichael Corcoran * 56*0616c1c3SMichael Corcoran * The main driving force behind mmapobj is to interpret and map ELF files 57*0616c1c3SMichael Corcoran * inside of the kernel instead of having the linker be responsible for this. 58*0616c1c3SMichael Corcoran * 59*0616c1c3SMichael Corcoran * mmapobj also supports the AOUT 4.x binary format as well as flat files in 60*0616c1c3SMichael Corcoran * a read only manner. 61*0616c1c3SMichael Corcoran * 62*0616c1c3SMichael Corcoran * When interpreting and mapping an ELF file, mmapobj will map each PT_LOAD 63*0616c1c3SMichael Corcoran * or PT_SUNWBSS segment according to the ELF standard. Refer to the "Linker 64*0616c1c3SMichael Corcoran * and Libraries Guide" for more information about the standard and mapping 65*0616c1c3SMichael Corcoran * rules. 66*0616c1c3SMichael Corcoran * 67*0616c1c3SMichael Corcoran * Having mmapobj interpret and map objects will allow the kernel to make the 68*0616c1c3SMichael Corcoran * best decision for where to place the mappings for said objects. Thus, we 69*0616c1c3SMichael Corcoran * can make optimizations inside of the kernel for specific platforms or 70*0616c1c3SMichael Corcoran * cache mapping information to make mapping objects faster. 71*0616c1c3SMichael Corcoran * 72*0616c1c3SMichael Corcoran * The lib_va_hash will be one such optimization. For each ELF object that 73*0616c1c3SMichael Corcoran * mmapobj is asked to interpret, we will attempt to cache the information 74*0616c1c3SMichael Corcoran * about the PT_LOAD and PT_SUNWBSS sections to speed up future mappings of 75*0616c1c3SMichael Corcoran * the same objects. We will cache up to LIBVA_CACHED_SEGS (see below) program 76*0616c1c3SMichael Corcoran * headers which should cover a majority of the libraries out there without 77*0616c1c3SMichael Corcoran * wasting space. In order to make sure that the cached information is valid, 78*0616c1c3SMichael Corcoran * we check the passed in vnode's mtime and ctime to make sure the vnode 79*0616c1c3SMichael Corcoran * has not been modified since the last time we used it. 80*0616c1c3SMichael Corcoran * 81*0616c1c3SMichael Corcoran * In addition, the lib_va_hash may contain a preferred starting VA for the 82*0616c1c3SMichael Corcoran * object which can be useful for platforms which support a shared context. 83*0616c1c3SMichael Corcoran * This will increase the likelyhood that library text can be shared among 84*0616c1c3SMichael Corcoran * many different processes. We limit the reserved VA space for 32 bit objects 85*0616c1c3SMichael Corcoran * in order to minimize fragmenting the processes address space. 86*0616c1c3SMichael Corcoran * 87*0616c1c3SMichael Corcoran * In addition to the above, the mmapobj interface allows for padding to be 88*0616c1c3SMichael Corcoran * requested before the first mapping and after the last mapping created. 89*0616c1c3SMichael Corcoran * When padding is requested, no additional optimizations will be made for 90*0616c1c3SMichael Corcoran * that request. 91*0616c1c3SMichael Corcoran */ 92*0616c1c3SMichael Corcoran 93*0616c1c3SMichael Corcoran /* 94*0616c1c3SMichael Corcoran * Threshold to prevent allocating too much kernel memory to read in the 95*0616c1c3SMichael Corcoran * program headers for an object. If it requires more than below, 96*0616c1c3SMichael Corcoran * we will use a KM_NOSLEEP allocation to allocate memory to hold all of the 97*0616c1c3SMichael Corcoran * program headers which could possibly fail. If less memory than below is 98*0616c1c3SMichael Corcoran * needed, then we use a KM_SLEEP allocation and are willing to wait for the 99*0616c1c3SMichael Corcoran * memory if we need to. 100*0616c1c3SMichael Corcoran */ 101*0616c1c3SMichael Corcoran size_t mmapobj_alloc_threshold = 65536; 102*0616c1c3SMichael Corcoran 103*0616c1c3SMichael Corcoran /* Debug stats for test coverage */ 104*0616c1c3SMichael Corcoran #ifdef DEBUG 105*0616c1c3SMichael Corcoran struct mobj_stats { 106*0616c1c3SMichael Corcoran uint_t mobjs_unmap_called; 107*0616c1c3SMichael Corcoran uint_t mobjs_remap_devnull; 108*0616c1c3SMichael Corcoran uint_t mobjs_lookup_start; 109*0616c1c3SMichael Corcoran uint_t mobjs_alloc_start; 110*0616c1c3SMichael Corcoran uint_t mobjs_alloc_vmem; 111*0616c1c3SMichael Corcoran uint_t mobjs_add_collision; 112*0616c1c3SMichael Corcoran uint_t mobjs_get_addr; 113*0616c1c3SMichael Corcoran uint_t mobjs_map_flat_no_padding; 114*0616c1c3SMichael Corcoran uint_t mobjs_map_flat_padding; 115*0616c1c3SMichael Corcoran uint_t mobjs_map_ptload_text; 116*0616c1c3SMichael Corcoran uint_t mobjs_map_ptload_initdata; 117*0616c1c3SMichael Corcoran uint_t mobjs_map_ptload_preread; 118*0616c1c3SMichael Corcoran uint_t mobjs_map_ptload_unaligned_text; 119*0616c1c3SMichael Corcoran uint_t mobjs_map_ptload_unaligned_map_fail; 120*0616c1c3SMichael Corcoran uint_t mobjs_map_ptload_unaligned_read_fail; 121*0616c1c3SMichael Corcoran uint_t mobjs_zfoddiff; 122*0616c1c3SMichael Corcoran uint_t mobjs_zfoddiff_nowrite; 123*0616c1c3SMichael Corcoran uint_t mobjs_zfodextra; 124*0616c1c3SMichael Corcoran uint_t mobjs_ptload_failed; 125*0616c1c3SMichael Corcoran uint_t mobjs_map_elf_no_holes; 126*0616c1c3SMichael Corcoran uint_t mobjs_unmap_hole; 127*0616c1c3SMichael Corcoran uint_t mobjs_nomem_header; 128*0616c1c3SMichael Corcoran uint_t mobjs_overlap_header; 129*0616c1c3SMichael Corcoran uint_t mobjs_np2_align; 130*0616c1c3SMichael Corcoran uint_t mobjs_np2_align_overflow; 131*0616c1c3SMichael Corcoran uint_t mobjs_exec_padding; 132*0616c1c3SMichael Corcoran uint_t mobjs_exec_addr_mapped; 133*0616c1c3SMichael Corcoran uint_t mobjs_exec_addr_devnull; 134*0616c1c3SMichael Corcoran uint_t mobjs_exec_addr_in_use; 135*0616c1c3SMichael Corcoran uint_t mobjs_lvp_found; 136*0616c1c3SMichael Corcoran uint_t mobjs_no_loadable_yet; 137*0616c1c3SMichael Corcoran uint_t mobjs_nothing_to_map; 138*0616c1c3SMichael Corcoran uint_t mobjs_e2big; 139*0616c1c3SMichael Corcoran uint_t mobjs_dyn_pad_align; 140*0616c1c3SMichael Corcoran uint_t mobjs_dyn_pad_noalign; 141*0616c1c3SMichael Corcoran uint_t mobjs_alloc_start_fail; 142*0616c1c3SMichael Corcoran uint_t mobjs_lvp_nocache; 143*0616c1c3SMichael Corcoran uint_t mobjs_extra_padding; 144*0616c1c3SMichael Corcoran uint_t mobjs_lvp_not_needed; 145*0616c1c3SMichael Corcoran uint_t mobjs_no_mem_map_sz; 146*0616c1c3SMichael Corcoran uint_t mobjs_check_exec_failed; 147*0616c1c3SMichael Corcoran uint_t mobjs_lvp_used; 148*0616c1c3SMichael Corcoran uint_t mobjs_wrong_model; 149*0616c1c3SMichael Corcoran uint_t mobjs_noexec_fs; 150*0616c1c3SMichael Corcoran uint_t mobjs_e2big_et_rel; 151*0616c1c3SMichael Corcoran uint_t mobjs_et_rel_mapped; 152*0616c1c3SMichael Corcoran uint_t mobjs_unknown_elf_type; 153*0616c1c3SMichael Corcoran uint_t mobjs_phent32_too_small; 154*0616c1c3SMichael Corcoran uint_t mobjs_phent64_too_small; 155*0616c1c3SMichael Corcoran uint_t mobjs_inval_elf_class; 156*0616c1c3SMichael Corcoran uint_t mobjs_too_many_phdrs; 157*0616c1c3SMichael Corcoran uint_t mobjs_no_phsize; 158*0616c1c3SMichael Corcoran uint_t mobjs_phsize_large; 159*0616c1c3SMichael Corcoran uint_t mobjs_phsize_xtralarge; 160*0616c1c3SMichael Corcoran uint_t mobjs_fast_wrong_model; 161*0616c1c3SMichael Corcoran uint_t mobjs_fast_e2big; 162*0616c1c3SMichael Corcoran uint_t mobjs_fast; 163*0616c1c3SMichael Corcoran uint_t mobjs_fast_success; 164*0616c1c3SMichael Corcoran uint_t mobjs_fast_not_now; 165*0616c1c3SMichael Corcoran uint_t mobjs_small_file; 166*0616c1c3SMichael Corcoran uint_t mobjs_read_error; 167*0616c1c3SMichael Corcoran uint_t mobjs_unsupported; 168*0616c1c3SMichael Corcoran uint_t mobjs_flat_e2big; 169*0616c1c3SMichael Corcoran uint_t mobjs_phent_align32; 170*0616c1c3SMichael Corcoran uint_t mobjs_phent_align64; 171*0616c1c3SMichael Corcoran uint_t mobjs_lib_va_find_hit; 172*0616c1c3SMichael Corcoran uint_t mobjs_lib_va_find_delay_delete; 173*0616c1c3SMichael Corcoran uint_t mobjs_lib_va_find_delete; 174*0616c1c3SMichael Corcoran uint_t mobjs_lib_va_add_delay_delete; 175*0616c1c3SMichael Corcoran uint_t mobjs_lib_va_add_delete; 176*0616c1c3SMichael Corcoran #if defined(__sparc) 177*0616c1c3SMichael Corcoran uint_t mobjs_vac_align; 178*0616c1c3SMichael Corcoran uint_t mobjs_aout_uzero_fault; 179*0616c1c3SMichael Corcoran uint_t mobjs_aout_64bit_try; 180*0616c1c3SMichael Corcoran uint_t mobjs_aout_noexec; 181*0616c1c3SMichael Corcoran uint_t mobjs_aout_e2big; 182*0616c1c3SMichael Corcoran uint_t mobjs_aout_lib; 183*0616c1c3SMichael Corcoran uint_t mobjs_aout_fixed; 184*0616c1c3SMichael Corcoran uint_t mobjs_aout_zfoddiff; 185*0616c1c3SMichael Corcoran uint_t mobjs_aout_map_bss; 186*0616c1c3SMichael Corcoran uint_t mobjs_aout_bss_fail; 187*0616c1c3SMichael Corcoran uint_t mobjs_aout_nlist; 188*0616c1c3SMichael Corcoran uint_t mobjs_aout_addr_in_use; 189*0616c1c3SMichael Corcoran #endif 190*0616c1c3SMichael Corcoran } mobj_stats; 191*0616c1c3SMichael Corcoran 192*0616c1c3SMichael Corcoran #define MOBJ_STAT_ADD(stat) ((mobj_stats.mobjs_##stat)++) 193*0616c1c3SMichael Corcoran #else 194*0616c1c3SMichael Corcoran #define MOBJ_STAT_ADD(stat) 195*0616c1c3SMichael Corcoran #endif 196*0616c1c3SMichael Corcoran 197*0616c1c3SMichael Corcoran /* lv_flags values - bitmap */ 198*0616c1c3SMichael Corcoran #define LV_ELF32 0x1 /* 32 bit ELF file */ 199*0616c1c3SMichael Corcoran #define LV_ELF64 0x2 /* 64 bit ELF file */ 200*0616c1c3SMichael Corcoran #define LV_DEL 0x4 /* delete when lv_refcnt hits zero */ 201*0616c1c3SMichael Corcoran 202*0616c1c3SMichael Corcoran /* 203*0616c1c3SMichael Corcoran * Note: lv_num_segs will denote how many segments this file has and will 204*0616c1c3SMichael Corcoran * only be set after the lv_mps array has been filled out. 205*0616c1c3SMichael Corcoran * lv_mps can only be valid if lv_num_segs is non-zero. 206*0616c1c3SMichael Corcoran */ 207*0616c1c3SMichael Corcoran struct lib_va { 208*0616c1c3SMichael Corcoran struct lib_va *lv_next; 209*0616c1c3SMichael Corcoran caddr_t lv_base_va; /* start va for library */ 210*0616c1c3SMichael Corcoran ssize_t lv_len; /* total va span of library */ 211*0616c1c3SMichael Corcoran size_t lv_align; /* minimum alignment */ 212*0616c1c3SMichael Corcoran uint64_t lv_nodeid; /* filesystem node id */ 213*0616c1c3SMichael Corcoran uint64_t lv_fsid; /* filesystem id */ 214*0616c1c3SMichael Corcoran timestruc_t lv_ctime; /* last time file was changed */ 215*0616c1c3SMichael Corcoran timestruc_t lv_mtime; /* or modified */ 216*0616c1c3SMichael Corcoran mmapobj_result_t lv_mps[LIBVA_CACHED_SEGS]; /* cached pheaders */ 217*0616c1c3SMichael Corcoran int lv_num_segs; /* # segs for this file */ 218*0616c1c3SMichael Corcoran int lv_flags; 219*0616c1c3SMichael Corcoran uint_t lv_refcnt; /* number of holds on struct */ 220*0616c1c3SMichael Corcoran }; 221*0616c1c3SMichael Corcoran 222*0616c1c3SMichael Corcoran #define LIB_VA_SIZE 1024 223*0616c1c3SMichael Corcoran #define LIB_VA_MASK (LIB_VA_SIZE - 1) 224*0616c1c3SMichael Corcoran #define LIB_VA_MUTEX_SHIFT 3 225*0616c1c3SMichael Corcoran 226*0616c1c3SMichael Corcoran #if (LIB_VA_SIZE & (LIB_VA_SIZE - 1)) 227*0616c1c3SMichael Corcoran #error "LIB_VA_SIZE is not a power of 2" 228*0616c1c3SMichael Corcoran #endif 229*0616c1c3SMichael Corcoran 230*0616c1c3SMichael Corcoran static struct lib_va *lib_va_hash[LIB_VA_SIZE]; 231*0616c1c3SMichael Corcoran static kmutex_t lib_va_hash_mutex[LIB_VA_SIZE >> LIB_VA_MUTEX_SHIFT]; 232*0616c1c3SMichael Corcoran 233*0616c1c3SMichael Corcoran #define LIB_VA_HASH_MUTEX(index) \ 234*0616c1c3SMichael Corcoran (&lib_va_hash_mutex[index >> LIB_VA_MUTEX_SHIFT]) 235*0616c1c3SMichael Corcoran 236*0616c1c3SMichael Corcoran #define LIB_VA_HASH(nodeid) \ 237*0616c1c3SMichael Corcoran (((nodeid) ^ ((nodeid) << 7) ^ ((nodeid) << 13)) & LIB_VA_MASK) 238*0616c1c3SMichael Corcoran 239*0616c1c3SMichael Corcoran #define LIB_VA_MATCH_ID(arg1, arg2) \ 240*0616c1c3SMichael Corcoran ((arg1)->lv_nodeid == (arg2)->va_nodeid && \ 241*0616c1c3SMichael Corcoran (arg1)->lv_fsid == (arg2)->va_fsid) 242*0616c1c3SMichael Corcoran 243*0616c1c3SMichael Corcoran #define LIB_VA_MATCH_TIME(arg1, arg2) \ 244*0616c1c3SMichael Corcoran ((arg1)->lv_ctime.tv_sec == (arg2)->va_ctime.tv_sec && \ 245*0616c1c3SMichael Corcoran (arg1)->lv_mtime.tv_sec == (arg2)->va_mtime.tv_sec && \ 246*0616c1c3SMichael Corcoran (arg1)->lv_ctime.tv_nsec == (arg2)->va_ctime.tv_nsec && \ 247*0616c1c3SMichael Corcoran (arg1)->lv_mtime.tv_nsec == (arg2)->va_mtime.tv_nsec) 248*0616c1c3SMichael Corcoran 249*0616c1c3SMichael Corcoran #define LIB_VA_MATCH(arg1, arg2) \ 250*0616c1c3SMichael Corcoran (LIB_VA_MATCH_ID(arg1, arg2) && LIB_VA_MATCH_TIME(arg1, arg2)) 251*0616c1c3SMichael Corcoran 252*0616c1c3SMichael Corcoran /* 253*0616c1c3SMichael Corcoran * In order to map libraries at the same VA in many processes, we need to carve 254*0616c1c3SMichael Corcoran * out our own address space for them which is unique across many processes. 255*0616c1c3SMichael Corcoran * We use different arenas for 32 bit and 64 bit libraries. 256*0616c1c3SMichael Corcoran * 257*0616c1c3SMichael Corcoran * Since the 32 bit address space is relatively small, we limit the number of 258*0616c1c3SMichael Corcoran * libraries which try to use consistent virtual addresses to lib_threshold. 259*0616c1c3SMichael Corcoran * For 64 bit libraries there is no such limit since the address space is large. 260*0616c1c3SMichael Corcoran */ 261*0616c1c3SMichael Corcoran static vmem_t *lib_va_32_arena; 262*0616c1c3SMichael Corcoran static vmem_t *lib_va_64_arena; 263*0616c1c3SMichael Corcoran uint_t lib_threshold = 20; /* modifiable via /etc/system */ 264*0616c1c3SMichael Corcoran 265*0616c1c3SMichael Corcoran /* 266*0616c1c3SMichael Corcoran * Number of 32 bit and 64 bit libraries in lib_va hash. 267*0616c1c3SMichael Corcoran */ 268*0616c1c3SMichael Corcoran static uint_t libs_mapped_32 = 0; 269*0616c1c3SMichael Corcoran static uint_t libs_mapped_64 = 0; 270*0616c1c3SMichael Corcoran 271*0616c1c3SMichael Corcoran /* 272*0616c1c3SMichael Corcoran * Initialize the VA span of the lib_va arenas to about half of the VA space 273*0616c1c3SMichael Corcoran * of a user process. These VAs will be used for optimized allocation of 274*0616c1c3SMichael Corcoran * libraries, such that subsequent mappings of the same library will attempt 275*0616c1c3SMichael Corcoran * to use the same VA as previous mappings of that library. 276*0616c1c3SMichael Corcoran */ 277*0616c1c3SMichael Corcoran void 278*0616c1c3SMichael Corcoran lib_va_init(void) 279*0616c1c3SMichael Corcoran { 280*0616c1c3SMichael Corcoran size_t start; 281*0616c1c3SMichael Corcoran size_t end; 282*0616c1c3SMichael Corcoran size_t len; 283*0616c1c3SMichael Corcoran /* 284*0616c1c3SMichael Corcoran * On 32 bit sparc, the user stack and /lib/ld.so.1 will both live 285*0616c1c3SMichael Corcoran * above the end address that we choose. On 32bit x86 only 286*0616c1c3SMichael Corcoran * /lib/ld.so.1 will live above the end address that we choose 287*0616c1c3SMichael Corcoran * because the user stack is at the bottom of the address space. 288*0616c1c3SMichael Corcoran * 289*0616c1c3SMichael Corcoran * We estimate the size of ld.so.1 to be 512K which leaves significant 290*0616c1c3SMichael Corcoran * room for growth without needing to change this value. Thus it is 291*0616c1c3SMichael Corcoran * safe for libraries to be mapped up to that address. 292*0616c1c3SMichael Corcoran * 293*0616c1c3SMichael Corcoran * If the length of ld.so.1 were to grow beyond 512K then 294*0616c1c3SMichael Corcoran * a library who has a reserved address in that range would always 295*0616c1c3SMichael Corcoran * fail to get that address and would have to call map_addr 296*0616c1c3SMichael Corcoran * to get an unused address range. On DEBUG kernels, we will check 297*0616c1c3SMichael Corcoran * on the first use of lib_va that our address does not overlap 298*0616c1c3SMichael Corcoran * ld.so.1, and if it does, then we'll print a cmn_err message. 299*0616c1c3SMichael Corcoran */ 300*0616c1c3SMichael Corcoran #if defined(__sparc) 301*0616c1c3SMichael Corcoran end = _userlimit32 - DFLSSIZ - (512 * 1024); 302*0616c1c3SMichael Corcoran #elif defined(__i386) || defined(__amd64) 303*0616c1c3SMichael Corcoran end = _userlimit32 - (512 * 1024); 304*0616c1c3SMichael Corcoran #else 305*0616c1c3SMichael Corcoran #error "no recognized machine type is defined" 306*0616c1c3SMichael Corcoran #endif 307*0616c1c3SMichael Corcoran len = end >> 1; 308*0616c1c3SMichael Corcoran len = P2ROUNDUP(len, PAGESIZE); 309*0616c1c3SMichael Corcoran start = end - len; 310*0616c1c3SMichael Corcoran lib_va_32_arena = vmem_create("lib_va_32", (void *)start, len, 311*0616c1c3SMichael Corcoran PAGESIZE, NULL, NULL, NULL, 0, VM_NOSLEEP | VMC_IDENTIFIER); 312*0616c1c3SMichael Corcoran 313*0616c1c3SMichael Corcoran #if defined(_LP64) 314*0616c1c3SMichael Corcoran /* 315*0616c1c3SMichael Corcoran * The user stack and /lib/ld.so.1 will both live above the end address 316*0616c1c3SMichael Corcoran * that we choose. We estimate the size of a mapped ld.so.1 to be 2M 317*0616c1c3SMichael Corcoran * which leaves significant room for growth without needing to change 318*0616c1c3SMichael Corcoran * this value. Thus it is safe for libraries to be mapped up to 319*0616c1c3SMichael Corcoran * that address. The same considerations for the size of ld.so.1 that 320*0616c1c3SMichael Corcoran * were mentioned above also apply here. 321*0616c1c3SMichael Corcoran */ 322*0616c1c3SMichael Corcoran end = _userlimit - DFLSSIZ - (2 * 1024 * 1024); 323*0616c1c3SMichael Corcoran len = end >> 1; 324*0616c1c3SMichael Corcoran len = P2ROUNDUP(len, PAGESIZE); 325*0616c1c3SMichael Corcoran start = end - len; 326*0616c1c3SMichael Corcoran lib_va_64_arena = vmem_create("lib_va_64", (void *)start, len, 327*0616c1c3SMichael Corcoran PAGESIZE, NULL, NULL, NULL, 0, VM_NOSLEEP | VMC_IDENTIFIER); 328*0616c1c3SMichael Corcoran #endif 329*0616c1c3SMichael Corcoran } 330*0616c1c3SMichael Corcoran 331*0616c1c3SMichael Corcoran /* 332*0616c1c3SMichael Corcoran * Free up the resources associated with lvp as well as lvp itself. 333*0616c1c3SMichael Corcoran * We also decrement the number of libraries mapped via a lib_va 334*0616c1c3SMichael Corcoran * cached virtual address. 335*0616c1c3SMichael Corcoran */ 336*0616c1c3SMichael Corcoran void 337*0616c1c3SMichael Corcoran lib_va_free(struct lib_va *lvp) 338*0616c1c3SMichael Corcoran { 339*0616c1c3SMichael Corcoran int is_64bit = lvp->lv_flags & LV_ELF64; 340*0616c1c3SMichael Corcoran ASSERT(lvp->lv_refcnt == 0); 341*0616c1c3SMichael Corcoran 342*0616c1c3SMichael Corcoran if (lvp->lv_base_va != NULL) { 343*0616c1c3SMichael Corcoran vmem_xfree(is_64bit ? lib_va_64_arena : lib_va_32_arena, 344*0616c1c3SMichael Corcoran lvp->lv_base_va, lvp->lv_len); 345*0616c1c3SMichael Corcoran if (is_64bit) { 346*0616c1c3SMichael Corcoran atomic_add_32(&libs_mapped_64, -1); 347*0616c1c3SMichael Corcoran } else { 348*0616c1c3SMichael Corcoran atomic_add_32(&libs_mapped_32, -1); 349*0616c1c3SMichael Corcoran } 350*0616c1c3SMichael Corcoran } 351*0616c1c3SMichael Corcoran kmem_free(lvp, sizeof (struct lib_va)); 352*0616c1c3SMichael Corcoran } 353*0616c1c3SMichael Corcoran 354*0616c1c3SMichael Corcoran /* 355*0616c1c3SMichael Corcoran * See if the file associated with the vap passed in is in the lib_va hash. 356*0616c1c3SMichael Corcoran * If it is and the file has not been modified since last use, then 357*0616c1c3SMichael Corcoran * return a pointer to that data. Otherwise, return NULL if the file has 358*0616c1c3SMichael Corcoran * changed or the file was not found in the hash. 359*0616c1c3SMichael Corcoran */ 360*0616c1c3SMichael Corcoran static struct lib_va * 361*0616c1c3SMichael Corcoran lib_va_find(vattr_t *vap) 362*0616c1c3SMichael Corcoran { 363*0616c1c3SMichael Corcoran struct lib_va *lvp; 364*0616c1c3SMichael Corcoran struct lib_va *del = NULL; 365*0616c1c3SMichael Corcoran struct lib_va **tmp; 366*0616c1c3SMichael Corcoran uint_t index; 367*0616c1c3SMichael Corcoran index = LIB_VA_HASH(vap->va_nodeid); 368*0616c1c3SMichael Corcoran 369*0616c1c3SMichael Corcoran mutex_enter(LIB_VA_HASH_MUTEX(index)); 370*0616c1c3SMichael Corcoran tmp = &lib_va_hash[index]; 371*0616c1c3SMichael Corcoran while (*tmp != NULL) { 372*0616c1c3SMichael Corcoran lvp = *tmp; 373*0616c1c3SMichael Corcoran if (LIB_VA_MATCH_ID(lvp, vap)) { 374*0616c1c3SMichael Corcoran if (LIB_VA_MATCH_TIME(lvp, vap)) { 375*0616c1c3SMichael Corcoran ASSERT((lvp->lv_flags & LV_DEL) == 0); 376*0616c1c3SMichael Corcoran lvp->lv_refcnt++; 377*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(lib_va_find_hit); 378*0616c1c3SMichael Corcoran } else { 379*0616c1c3SMichael Corcoran /* 380*0616c1c3SMichael Corcoran * file was updated since last use. 381*0616c1c3SMichael Corcoran * need to remove it from list. 382*0616c1c3SMichael Corcoran */ 383*0616c1c3SMichael Corcoran del = lvp; 384*0616c1c3SMichael Corcoran *tmp = del->lv_next; 385*0616c1c3SMichael Corcoran del->lv_next = NULL; 386*0616c1c3SMichael Corcoran /* 387*0616c1c3SMichael Corcoran * If we can't delete it now, mark it for later 388*0616c1c3SMichael Corcoran */ 389*0616c1c3SMichael Corcoran if (del->lv_refcnt) { 390*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(lib_va_find_delay_delete); 391*0616c1c3SMichael Corcoran del->lv_flags |= LV_DEL; 392*0616c1c3SMichael Corcoran del = NULL; 393*0616c1c3SMichael Corcoran } 394*0616c1c3SMichael Corcoran lvp = NULL; 395*0616c1c3SMichael Corcoran } 396*0616c1c3SMichael Corcoran mutex_exit(LIB_VA_HASH_MUTEX(index)); 397*0616c1c3SMichael Corcoran if (del) { 398*0616c1c3SMichael Corcoran ASSERT(del->lv_refcnt == 0); 399*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(lib_va_find_delete); 400*0616c1c3SMichael Corcoran lib_va_free(del); 401*0616c1c3SMichael Corcoran } 402*0616c1c3SMichael Corcoran return (lvp); 403*0616c1c3SMichael Corcoran } 404*0616c1c3SMichael Corcoran tmp = &lvp->lv_next; 405*0616c1c3SMichael Corcoran } 406*0616c1c3SMichael Corcoran mutex_exit(LIB_VA_HASH_MUTEX(index)); 407*0616c1c3SMichael Corcoran return (NULL); 408*0616c1c3SMichael Corcoran } 409*0616c1c3SMichael Corcoran 410*0616c1c3SMichael Corcoran /* 411*0616c1c3SMichael Corcoran * Add a new entry to the lib_va hash. 412*0616c1c3SMichael Corcoran * Search the hash while holding the appropriate mutex to make sure that the 413*0616c1c3SMichael Corcoran * data is not already in the cache. If we find data that is in the cache 414*0616c1c3SMichael Corcoran * already and has not been modified since last use, we return NULL. If it 415*0616c1c3SMichael Corcoran * has been modified since last use, we will remove that entry from 416*0616c1c3SMichael Corcoran * the hash and it will be deleted once it's reference count reaches zero. 417*0616c1c3SMichael Corcoran * If there is no current entry in the hash we will add the new entry and 418*0616c1c3SMichael Corcoran * return it to the caller who is responsible for calling lib_va_release to 419*0616c1c3SMichael Corcoran * drop their reference count on it. 420*0616c1c3SMichael Corcoran * 421*0616c1c3SMichael Corcoran * lv_num_segs will be set to zero since the caller needs to add that 422*0616c1c3SMichael Corcoran * information to the data structure. 423*0616c1c3SMichael Corcoran */ 424*0616c1c3SMichael Corcoran static struct lib_va * 425*0616c1c3SMichael Corcoran lib_va_add_hash(caddr_t base_va, ssize_t len, size_t align, vattr_t *vap) 426*0616c1c3SMichael Corcoran { 427*0616c1c3SMichael Corcoran struct lib_va *lvp; 428*0616c1c3SMichael Corcoran uint_t index; 429*0616c1c3SMichael Corcoran model_t model; 430*0616c1c3SMichael Corcoran struct lib_va **tmp; 431*0616c1c3SMichael Corcoran struct lib_va *del = NULL; 432*0616c1c3SMichael Corcoran 433*0616c1c3SMichael Corcoran model = get_udatamodel(); 434*0616c1c3SMichael Corcoran index = LIB_VA_HASH(vap->va_nodeid); 435*0616c1c3SMichael Corcoran 436*0616c1c3SMichael Corcoran lvp = kmem_alloc(sizeof (struct lib_va), KM_SLEEP); 437*0616c1c3SMichael Corcoran 438*0616c1c3SMichael Corcoran mutex_enter(LIB_VA_HASH_MUTEX(index)); 439*0616c1c3SMichael Corcoran 440*0616c1c3SMichael Corcoran /* 441*0616c1c3SMichael Corcoran * Make sure not adding same data a second time. 442*0616c1c3SMichael Corcoran * The hash chains should be relatively short and adding 443*0616c1c3SMichael Corcoran * is a relatively rare event, so it's worth the check. 444*0616c1c3SMichael Corcoran */ 445*0616c1c3SMichael Corcoran tmp = &lib_va_hash[index]; 446*0616c1c3SMichael Corcoran while (*tmp != NULL) { 447*0616c1c3SMichael Corcoran if (LIB_VA_MATCH_ID(*tmp, vap)) { 448*0616c1c3SMichael Corcoran if (LIB_VA_MATCH_TIME(*tmp, vap)) { 449*0616c1c3SMichael Corcoran mutex_exit(LIB_VA_HASH_MUTEX(index)); 450*0616c1c3SMichael Corcoran kmem_free(lvp, sizeof (struct lib_va)); 451*0616c1c3SMichael Corcoran return (NULL); 452*0616c1c3SMichael Corcoran } 453*0616c1c3SMichael Corcoran 454*0616c1c3SMichael Corcoran /* 455*0616c1c3SMichael Corcoran * We have the same nodeid and fsid but the file has 456*0616c1c3SMichael Corcoran * been modified since we last saw it. 457*0616c1c3SMichael Corcoran * Need to remove the old node and add this new 458*0616c1c3SMichael Corcoran * one. 459*0616c1c3SMichael Corcoran * Could probably use a callback mechanism to make 460*0616c1c3SMichael Corcoran * this cleaner. 461*0616c1c3SMichael Corcoran */ 462*0616c1c3SMichael Corcoran ASSERT(del == NULL); 463*0616c1c3SMichael Corcoran del = *tmp; 464*0616c1c3SMichael Corcoran *tmp = del->lv_next; 465*0616c1c3SMichael Corcoran del->lv_next = NULL; 466*0616c1c3SMichael Corcoran 467*0616c1c3SMichael Corcoran /* 468*0616c1c3SMichael Corcoran * Check to see if we can free it. If lv_refcnt 469*0616c1c3SMichael Corcoran * is greater than zero, than some other thread 470*0616c1c3SMichael Corcoran * has a reference to the one we want to delete 471*0616c1c3SMichael Corcoran * and we can not delete it. All of this is done 472*0616c1c3SMichael Corcoran * under the lib_va_hash_mutex lock so it is atomic. 473*0616c1c3SMichael Corcoran */ 474*0616c1c3SMichael Corcoran if (del->lv_refcnt) { 475*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(lib_va_add_delay_delete); 476*0616c1c3SMichael Corcoran del->lv_flags |= LV_DEL; 477*0616c1c3SMichael Corcoran del = NULL; 478*0616c1c3SMichael Corcoran } 479*0616c1c3SMichael Corcoran /* tmp is already advanced */ 480*0616c1c3SMichael Corcoran continue; 481*0616c1c3SMichael Corcoran } 482*0616c1c3SMichael Corcoran tmp = &((*tmp)->lv_next); 483*0616c1c3SMichael Corcoran } 484*0616c1c3SMichael Corcoran 485*0616c1c3SMichael Corcoran lvp->lv_base_va = base_va; 486*0616c1c3SMichael Corcoran lvp->lv_len = len; 487*0616c1c3SMichael Corcoran lvp->lv_align = align; 488*0616c1c3SMichael Corcoran lvp->lv_nodeid = vap->va_nodeid; 489*0616c1c3SMichael Corcoran lvp->lv_fsid = vap->va_fsid; 490*0616c1c3SMichael Corcoran lvp->lv_ctime.tv_sec = vap->va_ctime.tv_sec; 491*0616c1c3SMichael Corcoran lvp->lv_ctime.tv_nsec = vap->va_ctime.tv_nsec; 492*0616c1c3SMichael Corcoran lvp->lv_mtime.tv_sec = vap->va_mtime.tv_sec; 493*0616c1c3SMichael Corcoran lvp->lv_mtime.tv_nsec = vap->va_mtime.tv_nsec; 494*0616c1c3SMichael Corcoran lvp->lv_next = NULL; 495*0616c1c3SMichael Corcoran lvp->lv_refcnt = 1; 496*0616c1c3SMichael Corcoran 497*0616c1c3SMichael Corcoran /* Caller responsible for filling this and lv_mps out */ 498*0616c1c3SMichael Corcoran lvp->lv_num_segs = 0; 499*0616c1c3SMichael Corcoran 500*0616c1c3SMichael Corcoran if (model == DATAMODEL_LP64) { 501*0616c1c3SMichael Corcoran lvp->lv_flags = LV_ELF64; 502*0616c1c3SMichael Corcoran } else { 503*0616c1c3SMichael Corcoran ASSERT(model == DATAMODEL_ILP32); 504*0616c1c3SMichael Corcoran lvp->lv_flags = LV_ELF32; 505*0616c1c3SMichael Corcoran } 506*0616c1c3SMichael Corcoran 507*0616c1c3SMichael Corcoran if (base_va != NULL) { 508*0616c1c3SMichael Corcoran if (model == DATAMODEL_LP64) { 509*0616c1c3SMichael Corcoran atomic_add_32(&libs_mapped_64, 1); 510*0616c1c3SMichael Corcoran } else { 511*0616c1c3SMichael Corcoran ASSERT(model == DATAMODEL_ILP32); 512*0616c1c3SMichael Corcoran atomic_add_32(&libs_mapped_32, 1); 513*0616c1c3SMichael Corcoran } 514*0616c1c3SMichael Corcoran } 515*0616c1c3SMichael Corcoran ASSERT(*tmp == NULL); 516*0616c1c3SMichael Corcoran *tmp = lvp; 517*0616c1c3SMichael Corcoran mutex_exit(LIB_VA_HASH_MUTEX(index)); 518*0616c1c3SMichael Corcoran if (del) { 519*0616c1c3SMichael Corcoran ASSERT(del->lv_refcnt == 0); 520*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(lib_va_add_delete); 521*0616c1c3SMichael Corcoran lib_va_free(del); 522*0616c1c3SMichael Corcoran } 523*0616c1c3SMichael Corcoran return (lvp); 524*0616c1c3SMichael Corcoran } 525*0616c1c3SMichael Corcoran 526*0616c1c3SMichael Corcoran /* 527*0616c1c3SMichael Corcoran * Release the hold on lvp which was acquired by lib_va_find or lib_va_add_hash. 528*0616c1c3SMichael Corcoran * In addition, if this is the last hold and lvp is marked for deletion, 529*0616c1c3SMichael Corcoran * free up it's reserved address space and free the structure. 530*0616c1c3SMichael Corcoran */ 531*0616c1c3SMichael Corcoran static void 532*0616c1c3SMichael Corcoran lib_va_release(struct lib_va *lvp) 533*0616c1c3SMichael Corcoran { 534*0616c1c3SMichael Corcoran uint_t index; 535*0616c1c3SMichael Corcoran int to_del = 0; 536*0616c1c3SMichael Corcoran 537*0616c1c3SMichael Corcoran ASSERT(lvp->lv_refcnt > 0); 538*0616c1c3SMichael Corcoran 539*0616c1c3SMichael Corcoran index = LIB_VA_HASH(lvp->lv_nodeid); 540*0616c1c3SMichael Corcoran mutex_enter(LIB_VA_HASH_MUTEX(index)); 541*0616c1c3SMichael Corcoran if (--lvp->lv_refcnt == 0 && (lvp->lv_flags & LV_DEL)) { 542*0616c1c3SMichael Corcoran to_del = 1; 543*0616c1c3SMichael Corcoran } 544*0616c1c3SMichael Corcoran mutex_exit(LIB_VA_HASH_MUTEX(index)); 545*0616c1c3SMichael Corcoran if (to_del) { 546*0616c1c3SMichael Corcoran ASSERT(lvp->lv_next == 0); 547*0616c1c3SMichael Corcoran lib_va_free(lvp); 548*0616c1c3SMichael Corcoran } 549*0616c1c3SMichael Corcoran } 550*0616c1c3SMichael Corcoran 551*0616c1c3SMichael Corcoran /* 552*0616c1c3SMichael Corcoran * Dummy function for mapping through /dev/null 553*0616c1c3SMichael Corcoran * Normally I would have used mmmmap in common/io/mem.c 554*0616c1c3SMichael Corcoran * but that is a static function, and for /dev/null, it 555*0616c1c3SMichael Corcoran * just returns -1. 556*0616c1c3SMichael Corcoran */ 557*0616c1c3SMichael Corcoran /* ARGSUSED */ 558*0616c1c3SMichael Corcoran static int 559*0616c1c3SMichael Corcoran mmapobj_dummy(dev_t dev, off_t off, int prot) 560*0616c1c3SMichael Corcoran { 561*0616c1c3SMichael Corcoran return (-1); 562*0616c1c3SMichael Corcoran } 563*0616c1c3SMichael Corcoran 564*0616c1c3SMichael Corcoran /* 565*0616c1c3SMichael Corcoran * Called when an error occurred which requires mmapobj to return failure. 566*0616c1c3SMichael Corcoran * All mapped objects will be unmapped and /dev/null mappings will be 567*0616c1c3SMichael Corcoran * reclaimed if necessary. 568*0616c1c3SMichael Corcoran * num_mapped is the number of elements of mrp which have been mapped, and 569*0616c1c3SMichael Corcoran * num_segs is the total number of elements in mrp. 570*0616c1c3SMichael Corcoran * For e_type ET_EXEC, we need to unmap all of the elements in mrp since 571*0616c1c3SMichael Corcoran * we had already made reservations for them. 572*0616c1c3SMichael Corcoran * If num_mapped equals num_segs, then we know that we had fully mapped 573*0616c1c3SMichael Corcoran * the file and only need to clean up the segments described. 574*0616c1c3SMichael Corcoran * If they are not equal, then for ET_DYN we will unmap the range from the 575*0616c1c3SMichael Corcoran * end of the last mapped segment to the end of the last segment in mrp 576*0616c1c3SMichael Corcoran * since we would have made a reservation for that memory earlier. 577*0616c1c3SMichael Corcoran * If e_type is passed in as zero, num_mapped must equal num_segs. 578*0616c1c3SMichael Corcoran */ 579*0616c1c3SMichael Corcoran void 580*0616c1c3SMichael Corcoran mmapobj_unmap(mmapobj_result_t *mrp, int num_mapped, int num_segs, 581*0616c1c3SMichael Corcoran ushort_t e_type) 582*0616c1c3SMichael Corcoran { 583*0616c1c3SMichael Corcoran int i; 584*0616c1c3SMichael Corcoran struct as *as = curproc->p_as; 585*0616c1c3SMichael Corcoran caddr_t addr; 586*0616c1c3SMichael Corcoran size_t size; 587*0616c1c3SMichael Corcoran 588*0616c1c3SMichael Corcoran if (e_type == ET_EXEC) { 589*0616c1c3SMichael Corcoran num_mapped = num_segs; 590*0616c1c3SMichael Corcoran } 591*0616c1c3SMichael Corcoran #ifdef DEBUG 592*0616c1c3SMichael Corcoran if (e_type == 0) { 593*0616c1c3SMichael Corcoran ASSERT(num_mapped == num_segs); 594*0616c1c3SMichael Corcoran } 595*0616c1c3SMichael Corcoran #endif 596*0616c1c3SMichael Corcoran 597*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(unmap_called); 598*0616c1c3SMichael Corcoran for (i = 0; i < num_mapped; i++) { 599*0616c1c3SMichael Corcoran 600*0616c1c3SMichael Corcoran /* 601*0616c1c3SMichael Corcoran * If we are going to have to create a mapping we need to 602*0616c1c3SMichael Corcoran * make sure that no one else will use the address we 603*0616c1c3SMichael Corcoran * need to remap between the time it is unmapped and 604*0616c1c3SMichael Corcoran * mapped below. 605*0616c1c3SMichael Corcoran */ 606*0616c1c3SMichael Corcoran if (mrp[i].mr_flags & MR_RESV) { 607*0616c1c3SMichael Corcoran as_rangelock(as); 608*0616c1c3SMichael Corcoran } 609*0616c1c3SMichael Corcoran /* Always need to unmap what we mapped */ 610*0616c1c3SMichael Corcoran (void) as_unmap(as, mrp[i].mr_addr, mrp[i].mr_msize); 611*0616c1c3SMichael Corcoran 612*0616c1c3SMichael Corcoran /* Need to reclaim /dev/null reservation from earlier */ 613*0616c1c3SMichael Corcoran if (mrp[i].mr_flags & MR_RESV) { 614*0616c1c3SMichael Corcoran struct segdev_crargs dev_a; 615*0616c1c3SMichael Corcoran 616*0616c1c3SMichael Corcoran ASSERT(e_type != ET_DYN); 617*0616c1c3SMichael Corcoran /* 618*0616c1c3SMichael Corcoran * Use seg_dev segment driver for /dev/null mapping. 619*0616c1c3SMichael Corcoran */ 620*0616c1c3SMichael Corcoran dev_a.mapfunc = mmapobj_dummy; 621*0616c1c3SMichael Corcoran dev_a.dev = makedevice(mm_major, M_NULL); 622*0616c1c3SMichael Corcoran dev_a.offset = 0; 623*0616c1c3SMichael Corcoran dev_a.type = 0; /* neither PRIVATE nor SHARED */ 624*0616c1c3SMichael Corcoran dev_a.prot = dev_a.maxprot = (uchar_t)PROT_NONE; 625*0616c1c3SMichael Corcoran dev_a.hat_attr = 0; 626*0616c1c3SMichael Corcoran dev_a.hat_flags = 0; 627*0616c1c3SMichael Corcoran 628*0616c1c3SMichael Corcoran (void) as_map(as, mrp[i].mr_addr, mrp[i].mr_msize, 629*0616c1c3SMichael Corcoran segdev_create, &dev_a); 630*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(remap_devnull); 631*0616c1c3SMichael Corcoran as_rangeunlock(as); 632*0616c1c3SMichael Corcoran } 633*0616c1c3SMichael Corcoran } 634*0616c1c3SMichael Corcoran 635*0616c1c3SMichael Corcoran if (num_mapped != num_segs) { 636*0616c1c3SMichael Corcoran ASSERT(e_type == ET_DYN); 637*0616c1c3SMichael Corcoran /* Need to unmap any reservation made after last mapped seg */ 638*0616c1c3SMichael Corcoran if (num_mapped == 0) { 639*0616c1c3SMichael Corcoran addr = mrp[0].mr_addr; 640*0616c1c3SMichael Corcoran } else { 641*0616c1c3SMichael Corcoran addr = mrp[num_mapped - 1].mr_addr + 642*0616c1c3SMichael Corcoran mrp[num_mapped - 1].mr_msize; 643*0616c1c3SMichael Corcoran } 644*0616c1c3SMichael Corcoran size = (size_t)mrp[num_segs - 1].mr_addr + 645*0616c1c3SMichael Corcoran mrp[num_segs - 1].mr_msize - (size_t)addr; 646*0616c1c3SMichael Corcoran (void) as_unmap(as, addr, size); 647*0616c1c3SMichael Corcoran 648*0616c1c3SMichael Corcoran /* 649*0616c1c3SMichael Corcoran * Now we need to unmap the holes between mapped segs. 650*0616c1c3SMichael Corcoran * Note that we have not mapped all of the segments and thus 651*0616c1c3SMichael Corcoran * the holes between segments would not have been unmapped 652*0616c1c3SMichael Corcoran * yet. If num_mapped == num_segs, then all of the holes 653*0616c1c3SMichael Corcoran * between segments would have already been unmapped. 654*0616c1c3SMichael Corcoran */ 655*0616c1c3SMichael Corcoran 656*0616c1c3SMichael Corcoran for (i = 1; i < num_mapped; i++) { 657*0616c1c3SMichael Corcoran addr = mrp[i - 1].mr_addr + mrp[i - 1].mr_msize; 658*0616c1c3SMichael Corcoran size = mrp[i].mr_addr - addr; 659*0616c1c3SMichael Corcoran (void) as_unmap(as, addr, size); 660*0616c1c3SMichael Corcoran } 661*0616c1c3SMichael Corcoran } 662*0616c1c3SMichael Corcoran } 663*0616c1c3SMichael Corcoran 664*0616c1c3SMichael Corcoran /* 665*0616c1c3SMichael Corcoran * We need to add the start address into mrp so that the unmap function 666*0616c1c3SMichael Corcoran * has absolute addresses to use. 667*0616c1c3SMichael Corcoran */ 668*0616c1c3SMichael Corcoran static void 669*0616c1c3SMichael Corcoran mmapobj_unmap_exec(mmapobj_result_t *mrp, int num_mapped, caddr_t start_addr) 670*0616c1c3SMichael Corcoran { 671*0616c1c3SMichael Corcoran int i; 672*0616c1c3SMichael Corcoran 673*0616c1c3SMichael Corcoran for (i = 0; i < num_mapped; i++) { 674*0616c1c3SMichael Corcoran mrp[i].mr_addr += (size_t)start_addr; 675*0616c1c3SMichael Corcoran } 676*0616c1c3SMichael Corcoran mmapobj_unmap(mrp, num_mapped, num_mapped, ET_EXEC); 677*0616c1c3SMichael Corcoran } 678*0616c1c3SMichael Corcoran 679*0616c1c3SMichael Corcoran static caddr_t 680*0616c1c3SMichael Corcoran mmapobj_lookup_start_addr(struct lib_va *lvp) 681*0616c1c3SMichael Corcoran { 682*0616c1c3SMichael Corcoran struct as *as = curproc->p_as; 683*0616c1c3SMichael Corcoran struct segvn_crargs crargs = SEGVN_ZFOD_ARGS(PROT_USER, PROT_ALL); 684*0616c1c3SMichael Corcoran int error; 685*0616c1c3SMichael Corcoran uint_t ma_flags = _MAP_LOW32; 686*0616c1c3SMichael Corcoran caddr_t base = NULL; 687*0616c1c3SMichael Corcoran size_t len; 688*0616c1c3SMichael Corcoran size_t align; 689*0616c1c3SMichael Corcoran 690*0616c1c3SMichael Corcoran ASSERT(lvp != NULL); 691*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(lookup_start); 692*0616c1c3SMichael Corcoran 693*0616c1c3SMichael Corcoran as_rangelock(as); 694*0616c1c3SMichael Corcoran 695*0616c1c3SMichael Corcoran base = lvp->lv_base_va; 696*0616c1c3SMichael Corcoran len = lvp->lv_len; 697*0616c1c3SMichael Corcoran 698*0616c1c3SMichael Corcoran /* 699*0616c1c3SMichael Corcoran * If we don't have an expected base address, or the one that we want 700*0616c1c3SMichael Corcoran * to use is not available, go get an acceptable address range. 701*0616c1c3SMichael Corcoran */ 702*0616c1c3SMichael Corcoran if (base == NULL || as_gap(as, len, &base, &len, 0, NULL)) { 703*0616c1c3SMichael Corcoran 704*0616c1c3SMichael Corcoran if (lvp->lv_flags & LV_ELF64) { 705*0616c1c3SMichael Corcoran ma_flags = 0; 706*0616c1c3SMichael Corcoran } 707*0616c1c3SMichael Corcoran 708*0616c1c3SMichael Corcoran align = lvp->lv_align; 709*0616c1c3SMichael Corcoran if (align > 1) { 710*0616c1c3SMichael Corcoran ma_flags |= MAP_ALIGN; 711*0616c1c3SMichael Corcoran } 712*0616c1c3SMichael Corcoran 713*0616c1c3SMichael Corcoran base = (caddr_t)align; 714*0616c1c3SMichael Corcoran map_addr(&base, len, 0, 1, ma_flags); 715*0616c1c3SMichael Corcoran } 716*0616c1c3SMichael Corcoran 717*0616c1c3SMichael Corcoran /* 718*0616c1c3SMichael Corcoran * Need to reserve the address space we're going to use. 719*0616c1c3SMichael Corcoran * Don't reserve swap space since we'll be mapping over this. 720*0616c1c3SMichael Corcoran */ 721*0616c1c3SMichael Corcoran if (base != NULL) { 722*0616c1c3SMichael Corcoran crargs.flags |= MAP_NORESERVE; 723*0616c1c3SMichael Corcoran error = as_map(as, base, len, segvn_create, &crargs); 724*0616c1c3SMichael Corcoran if (error) { 725*0616c1c3SMichael Corcoran base = NULL; 726*0616c1c3SMichael Corcoran } 727*0616c1c3SMichael Corcoran } 728*0616c1c3SMichael Corcoran 729*0616c1c3SMichael Corcoran as_rangeunlock(as); 730*0616c1c3SMichael Corcoran return (base); 731*0616c1c3SMichael Corcoran } 732*0616c1c3SMichael Corcoran 733*0616c1c3SMichael Corcoran /* 734*0616c1c3SMichael Corcoran * Get the starting address for a given file to be mapped and return it 735*0616c1c3SMichael Corcoran * to the caller. If we're using lib_va and we need to allocate an address, 736*0616c1c3SMichael Corcoran * we will attempt to allocate it from the global reserved pool such that the 737*0616c1c3SMichael Corcoran * same address can be used in the future for this file. If we can't use the 738*0616c1c3SMichael Corcoran * reserved address then we just get one that will fit in our address space. 739*0616c1c3SMichael Corcoran * 740*0616c1c3SMichael Corcoran * Returns the starting virtual address for the range to be mapped or NULL 741*0616c1c3SMichael Corcoran * if an error is encountered. If we successfully insert the requested info 742*0616c1c3SMichael Corcoran * into the lib_va hash, then *lvpp will be set to point to this lib_va 743*0616c1c3SMichael Corcoran * structure. The structure will have a hold on it and thus lib_va_release 744*0616c1c3SMichael Corcoran * needs to be called on it by the caller. This function will not fill out 745*0616c1c3SMichael Corcoran * lv_mps or lv_num_segs since it does not have enough information to do so. 746*0616c1c3SMichael Corcoran * The caller is responsible for doing this making sure that any modifications 747*0616c1c3SMichael Corcoran * to lv_mps are visible before setting lv_num_segs. 748*0616c1c3SMichael Corcoran */ 749*0616c1c3SMichael Corcoran static caddr_t 750*0616c1c3SMichael Corcoran mmapobj_alloc_start_addr(struct lib_va **lvpp, size_t len, int use_lib_va, 751*0616c1c3SMichael Corcoran size_t align, vattr_t *vap) 752*0616c1c3SMichael Corcoran { 753*0616c1c3SMichael Corcoran struct as *as = curproc->p_as; 754*0616c1c3SMichael Corcoran struct segvn_crargs crargs = SEGVN_ZFOD_ARGS(PROT_USER, PROT_ALL); 755*0616c1c3SMichael Corcoran int error; 756*0616c1c3SMichael Corcoran model_t model; 757*0616c1c3SMichael Corcoran uint_t ma_flags = _MAP_LOW32; 758*0616c1c3SMichael Corcoran caddr_t base = NULL; 759*0616c1c3SMichael Corcoran vmem_t *model_vmem; 760*0616c1c3SMichael Corcoran 761*0616c1c3SMichael Corcoran ASSERT(lvpp != NULL); 762*0616c1c3SMichael Corcoran 763*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(alloc_start); 764*0616c1c3SMichael Corcoran model = get_udatamodel(); 765*0616c1c3SMichael Corcoran 766*0616c1c3SMichael Corcoran if (model == DATAMODEL_LP64) { 767*0616c1c3SMichael Corcoran ma_flags = 0; 768*0616c1c3SMichael Corcoran model_vmem = lib_va_64_arena; 769*0616c1c3SMichael Corcoran } else { 770*0616c1c3SMichael Corcoran ASSERT(model == DATAMODEL_ILP32); 771*0616c1c3SMichael Corcoran model_vmem = lib_va_32_arena; 772*0616c1c3SMichael Corcoran } 773*0616c1c3SMichael Corcoran 774*0616c1c3SMichael Corcoran if (align > 1) { 775*0616c1c3SMichael Corcoran ma_flags |= MAP_ALIGN; 776*0616c1c3SMichael Corcoran } 777*0616c1c3SMichael Corcoran if (use_lib_va) { 778*0616c1c3SMichael Corcoran if (model == DATAMODEL_LP64 || libs_mapped_32 < lib_threshold) { 779*0616c1c3SMichael Corcoran base = vmem_xalloc(model_vmem, len, align, 0, 0, NULL, 780*0616c1c3SMichael Corcoran NULL, VM_NOSLEEP | VM_ENDALLOC); 781*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(alloc_vmem); 782*0616c1c3SMichael Corcoran } 783*0616c1c3SMichael Corcoran #ifdef DEBUG 784*0616c1c3SMichael Corcoran /* 785*0616c1c3SMichael Corcoran * Check to see if we've run into ld.so.1. 786*0616c1c3SMichael Corcoran * If this is the first library we've mapped and we can not 787*0616c1c3SMichael Corcoran * use our reserved address space, then it's likely that 788*0616c1c3SMichael Corcoran * ld.so.1 is occupying some of this space and the 789*0616c1c3SMichael Corcoran * model_vmem arena bounds need to be changed. If we've run 790*0616c1c3SMichael Corcoran * into something else besides ld.so.1 we'll see this message 791*0616c1c3SMichael Corcoran * on the first use of mmapobj and should ignore the message. 792*0616c1c3SMichael Corcoran */ 793*0616c1c3SMichael Corcoran if (base != NULL && libs_mapped_32 == 0 && 794*0616c1c3SMichael Corcoran model == DATAMODEL_ILP32 && 795*0616c1c3SMichael Corcoran as_gap(as, len, &base, &len, 0, NULL)) { 796*0616c1c3SMichael Corcoran cmn_err(CE_NOTE, 797*0616c1c3SMichael Corcoran "lib_va_32_arena may not be optimized"); 798*0616c1c3SMichael Corcoran } else if (base != NULL && libs_mapped_64 == 0 && 799*0616c1c3SMichael Corcoran model == DATAMODEL_LP64 && 800*0616c1c3SMichael Corcoran as_gap(as, len, &base, &len, 0, NULL)) { 801*0616c1c3SMichael Corcoran cmn_err(CE_NOTE, 802*0616c1c3SMichael Corcoran "lib_va_64_arena may not be optimized"); 803*0616c1c3SMichael Corcoran } 804*0616c1c3SMichael Corcoran #endif 805*0616c1c3SMichael Corcoran /* 806*0616c1c3SMichael Corcoran * Even if the address fails to fit in our address space, 807*0616c1c3SMichael Corcoran * or we can't use a reserved address, 808*0616c1c3SMichael Corcoran * we should still save it off in lib_va_hash. 809*0616c1c3SMichael Corcoran */ 810*0616c1c3SMichael Corcoran *lvpp = lib_va_add_hash(base, len, align, vap); 811*0616c1c3SMichael Corcoran 812*0616c1c3SMichael Corcoran /* 813*0616c1c3SMichael Corcoran * Check for collision on insertion and free up our VA space. 814*0616c1c3SMichael Corcoran * This is expected to be rare, so we'll just reset base to 815*0616c1c3SMichael Corcoran * NULL instead of looking it up in the lib_va hash. 816*0616c1c3SMichael Corcoran */ 817*0616c1c3SMichael Corcoran if (*lvpp == NULL) { 818*0616c1c3SMichael Corcoran if (base != NULL) { 819*0616c1c3SMichael Corcoran vmem_xfree(model_vmem, base, len); 820*0616c1c3SMichael Corcoran base = NULL; 821*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(add_collision); 822*0616c1c3SMichael Corcoran } 823*0616c1c3SMichael Corcoran } 824*0616c1c3SMichael Corcoran } 825*0616c1c3SMichael Corcoran 826*0616c1c3SMichael Corcoran as_rangelock(as); 827*0616c1c3SMichael Corcoran 828*0616c1c3SMichael Corcoran /* 829*0616c1c3SMichael Corcoran * If we don't have an expected base address, or the one that we want 830*0616c1c3SMichael Corcoran * to use is not available, go get an acceptable address range. 831*0616c1c3SMichael Corcoran */ 832*0616c1c3SMichael Corcoran if (base == NULL || as_gap(as, len, &base, &len, 0, NULL)) { 833*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(get_addr); 834*0616c1c3SMichael Corcoran base = (caddr_t)align; 835*0616c1c3SMichael Corcoran map_addr(&base, len, 0, 1, ma_flags); 836*0616c1c3SMichael Corcoran } 837*0616c1c3SMichael Corcoran 838*0616c1c3SMichael Corcoran /* 839*0616c1c3SMichael Corcoran * Need to reserve the address space we're going to use. 840*0616c1c3SMichael Corcoran * Don't reserve swap space since we'll be mapping over this. 841*0616c1c3SMichael Corcoran */ 842*0616c1c3SMichael Corcoran if (base != NULL) { 843*0616c1c3SMichael Corcoran /* Don't reserve swap space since we'll be mapping over this */ 844*0616c1c3SMichael Corcoran crargs.flags |= MAP_NORESERVE; 845*0616c1c3SMichael Corcoran error = as_map(as, base, len, segvn_create, &crargs); 846*0616c1c3SMichael Corcoran if (error) { 847*0616c1c3SMichael Corcoran base = NULL; 848*0616c1c3SMichael Corcoran } 849*0616c1c3SMichael Corcoran } 850*0616c1c3SMichael Corcoran 851*0616c1c3SMichael Corcoran as_rangeunlock(as); 852*0616c1c3SMichael Corcoran return (base); 853*0616c1c3SMichael Corcoran } 854*0616c1c3SMichael Corcoran 855*0616c1c3SMichael Corcoran /* 856*0616c1c3SMichael Corcoran * Map the file associated with vp into the address space as a single 857*0616c1c3SMichael Corcoran * read only private mapping. 858*0616c1c3SMichael Corcoran * Returns 0 for success, and non-zero for failure to map the file. 859*0616c1c3SMichael Corcoran */ 860*0616c1c3SMichael Corcoran static int 861*0616c1c3SMichael Corcoran mmapobj_map_flat(vnode_t *vp, mmapobj_result_t *mrp, size_t padding, 862*0616c1c3SMichael Corcoran cred_t *fcred) 863*0616c1c3SMichael Corcoran { 864*0616c1c3SMichael Corcoran int error = 0; 865*0616c1c3SMichael Corcoran struct as *as = curproc->p_as; 866*0616c1c3SMichael Corcoran caddr_t addr = NULL; 867*0616c1c3SMichael Corcoran caddr_t start_addr; 868*0616c1c3SMichael Corcoran size_t len; 869*0616c1c3SMichael Corcoran size_t pad_len; 870*0616c1c3SMichael Corcoran int prot = PROT_USER | PROT_READ; 871*0616c1c3SMichael Corcoran uint_t ma_flags = _MAP_LOW32; 872*0616c1c3SMichael Corcoran vattr_t vattr; 873*0616c1c3SMichael Corcoran struct segvn_crargs crargs = SEGVN_ZFOD_ARGS(PROT_USER, PROT_ALL); 874*0616c1c3SMichael Corcoran 875*0616c1c3SMichael Corcoran if (get_udatamodel() == DATAMODEL_LP64) { 876*0616c1c3SMichael Corcoran ma_flags = 0; 877*0616c1c3SMichael Corcoran } 878*0616c1c3SMichael Corcoran 879*0616c1c3SMichael Corcoran vattr.va_mask = AT_SIZE; 880*0616c1c3SMichael Corcoran error = VOP_GETATTR(vp, &vattr, 0, fcred, NULL); 881*0616c1c3SMichael Corcoran if (error) { 882*0616c1c3SMichael Corcoran return (error); 883*0616c1c3SMichael Corcoran } 884*0616c1c3SMichael Corcoran 885*0616c1c3SMichael Corcoran len = vattr.va_size; 886*0616c1c3SMichael Corcoran 887*0616c1c3SMichael Corcoran ma_flags |= MAP_PRIVATE; 888*0616c1c3SMichael Corcoran if (padding == 0) { 889*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(map_flat_no_padding); 890*0616c1c3SMichael Corcoran error = VOP_MAP(vp, 0, as, &addr, len, prot, PROT_ALL, 891*0616c1c3SMichael Corcoran ma_flags, fcred, NULL); 892*0616c1c3SMichael Corcoran if (error == 0) { 893*0616c1c3SMichael Corcoran mrp[0].mr_addr = addr; 894*0616c1c3SMichael Corcoran mrp[0].mr_msize = len; 895*0616c1c3SMichael Corcoran mrp[0].mr_fsize = len; 896*0616c1c3SMichael Corcoran mrp[0].mr_offset = 0; 897*0616c1c3SMichael Corcoran mrp[0].mr_prot = prot; 898*0616c1c3SMichael Corcoran mrp[0].mr_flags = 0; 899*0616c1c3SMichael Corcoran } 900*0616c1c3SMichael Corcoran return (error); 901*0616c1c3SMichael Corcoran } 902*0616c1c3SMichael Corcoran 903*0616c1c3SMichael Corcoran /* padding was requested so there's more work to be done */ 904*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(map_flat_padding); 905*0616c1c3SMichael Corcoran 906*0616c1c3SMichael Corcoran /* No need to reserve swap space now since it will be reserved later */ 907*0616c1c3SMichael Corcoran crargs.flags |= MAP_NORESERVE; 908*0616c1c3SMichael Corcoran 909*0616c1c3SMichael Corcoran /* Need to setup padding which can only be in PAGESIZE increments. */ 910*0616c1c3SMichael Corcoran ASSERT((padding & PAGEOFFSET) == 0); 911*0616c1c3SMichael Corcoran pad_len = len + (2 * padding); 912*0616c1c3SMichael Corcoran 913*0616c1c3SMichael Corcoran as_rangelock(as); 914*0616c1c3SMichael Corcoran map_addr(&addr, pad_len, 0, 1, ma_flags); 915*0616c1c3SMichael Corcoran error = as_map(as, addr, pad_len, segvn_create, &crargs); 916*0616c1c3SMichael Corcoran as_rangeunlock(as); 917*0616c1c3SMichael Corcoran if (error) { 918*0616c1c3SMichael Corcoran return (error); 919*0616c1c3SMichael Corcoran } 920*0616c1c3SMichael Corcoran start_addr = addr; 921*0616c1c3SMichael Corcoran addr += padding; 922*0616c1c3SMichael Corcoran ma_flags |= MAP_FIXED; 923*0616c1c3SMichael Corcoran error = VOP_MAP(vp, 0, as, &addr, len, prot, PROT_ALL, ma_flags, 924*0616c1c3SMichael Corcoran fcred, NULL); 925*0616c1c3SMichael Corcoran if (error == 0) { 926*0616c1c3SMichael Corcoran mrp[0].mr_addr = start_addr; 927*0616c1c3SMichael Corcoran mrp[0].mr_msize = padding; 928*0616c1c3SMichael Corcoran mrp[0].mr_fsize = 0; 929*0616c1c3SMichael Corcoran mrp[0].mr_offset = 0; 930*0616c1c3SMichael Corcoran mrp[0].mr_prot = 0; 931*0616c1c3SMichael Corcoran mrp[0].mr_flags = MR_PADDING; 932*0616c1c3SMichael Corcoran 933*0616c1c3SMichael Corcoran mrp[1].mr_addr = addr; 934*0616c1c3SMichael Corcoran mrp[1].mr_msize = len; 935*0616c1c3SMichael Corcoran mrp[1].mr_fsize = len; 936*0616c1c3SMichael Corcoran mrp[1].mr_offset = 0; 937*0616c1c3SMichael Corcoran mrp[1].mr_prot = prot; 938*0616c1c3SMichael Corcoran mrp[1].mr_flags = 0; 939*0616c1c3SMichael Corcoran 940*0616c1c3SMichael Corcoran mrp[2].mr_addr = addr + P2ROUNDUP(len, PAGESIZE); 941*0616c1c3SMichael Corcoran mrp[2].mr_msize = padding; 942*0616c1c3SMichael Corcoran mrp[2].mr_fsize = 0; 943*0616c1c3SMichael Corcoran mrp[2].mr_offset = 0; 944*0616c1c3SMichael Corcoran mrp[2].mr_prot = 0; 945*0616c1c3SMichael Corcoran mrp[2].mr_flags = MR_PADDING; 946*0616c1c3SMichael Corcoran } else { 947*0616c1c3SMichael Corcoran /* Need to cleanup the as_map from earlier */ 948*0616c1c3SMichael Corcoran (void) as_unmap(as, start_addr, pad_len); 949*0616c1c3SMichael Corcoran } 950*0616c1c3SMichael Corcoran return (error); 951*0616c1c3SMichael Corcoran } 952*0616c1c3SMichael Corcoran 953*0616c1c3SMichael Corcoran /* 954*0616c1c3SMichael Corcoran * Map a PT_LOAD or PT_SUNWBSS section of an executable file into the user's 955*0616c1c3SMichael Corcoran * address space. 956*0616c1c3SMichael Corcoran * vp - vnode to be mapped in 957*0616c1c3SMichael Corcoran * addr - start address 958*0616c1c3SMichael Corcoran * len - length of vp to be mapped 959*0616c1c3SMichael Corcoran * zfodlen - length of zero filled memory after len above 960*0616c1c3SMichael Corcoran * offset - offset into file where mapping should start 961*0616c1c3SMichael Corcoran * prot - protections for this mapping 962*0616c1c3SMichael Corcoran * fcred - credentials for the file associated with vp at open time. 963*0616c1c3SMichael Corcoran */ 964*0616c1c3SMichael Corcoran static int 965*0616c1c3SMichael Corcoran mmapobj_map_ptload(struct vnode *vp, caddr_t addr, size_t len, size_t zfodlen, 966*0616c1c3SMichael Corcoran off_t offset, int prot, cred_t *fcred) 967*0616c1c3SMichael Corcoran { 968*0616c1c3SMichael Corcoran int error = 0; 969*0616c1c3SMichael Corcoran caddr_t zfodbase, oldaddr; 970*0616c1c3SMichael Corcoran size_t oldlen; 971*0616c1c3SMichael Corcoran size_t end; 972*0616c1c3SMichael Corcoran size_t zfoddiff; 973*0616c1c3SMichael Corcoran label_t ljb; 974*0616c1c3SMichael Corcoran struct as *as = curproc->p_as; 975*0616c1c3SMichael Corcoran model_t model; 976*0616c1c3SMichael Corcoran int full_page; 977*0616c1c3SMichael Corcoran 978*0616c1c3SMichael Corcoran /* 979*0616c1c3SMichael Corcoran * See if addr and offset are aligned such that we can map in 980*0616c1c3SMichael Corcoran * full pages instead of partial pages. 981*0616c1c3SMichael Corcoran */ 982*0616c1c3SMichael Corcoran full_page = (((uintptr_t)addr & PAGEOFFSET) == 983*0616c1c3SMichael Corcoran ((uintptr_t)offset & PAGEOFFSET)); 984*0616c1c3SMichael Corcoran 985*0616c1c3SMichael Corcoran model = get_udatamodel(); 986*0616c1c3SMichael Corcoran 987*0616c1c3SMichael Corcoran oldaddr = addr; 988*0616c1c3SMichael Corcoran addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 989*0616c1c3SMichael Corcoran if (len) { 990*0616c1c3SMichael Corcoran spgcnt_t availm, npages; 991*0616c1c3SMichael Corcoran int preread; 992*0616c1c3SMichael Corcoran uint_t mflag = MAP_PRIVATE | MAP_FIXED; 993*0616c1c3SMichael Corcoran 994*0616c1c3SMichael Corcoran if (model == DATAMODEL_ILP32) { 995*0616c1c3SMichael Corcoran mflag |= _MAP_LOW32; 996*0616c1c3SMichael Corcoran } 997*0616c1c3SMichael Corcoran /* We may need to map in extra bytes */ 998*0616c1c3SMichael Corcoran oldlen = len; 999*0616c1c3SMichael Corcoran len += ((size_t)oldaddr & PAGEOFFSET); 1000*0616c1c3SMichael Corcoran 1001*0616c1c3SMichael Corcoran if (full_page) { 1002*0616c1c3SMichael Corcoran offset = (off_t)((uintptr_t)offset & PAGEMASK); 1003*0616c1c3SMichael Corcoran if ((prot & (PROT_WRITE | PROT_EXEC)) == PROT_EXEC) { 1004*0616c1c3SMichael Corcoran mflag |= MAP_TEXT; 1005*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(map_ptload_text); 1006*0616c1c3SMichael Corcoran } else { 1007*0616c1c3SMichael Corcoran mflag |= MAP_INITDATA; 1008*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(map_ptload_initdata); 1009*0616c1c3SMichael Corcoran } 1010*0616c1c3SMichael Corcoran 1011*0616c1c3SMichael Corcoran /* 1012*0616c1c3SMichael Corcoran * maxprot is passed as PROT_ALL so that mdb can 1013*0616c1c3SMichael Corcoran * write to this segment. 1014*0616c1c3SMichael Corcoran */ 1015*0616c1c3SMichael Corcoran if (error = VOP_MAP(vp, (offset_t)offset, as, &addr, 1016*0616c1c3SMichael Corcoran len, prot, PROT_ALL, mflag, fcred, NULL)) { 1017*0616c1c3SMichael Corcoran return (error); 1018*0616c1c3SMichael Corcoran } 1019*0616c1c3SMichael Corcoran 1020*0616c1c3SMichael Corcoran /* 1021*0616c1c3SMichael Corcoran * If the segment can fit and is relatively small, then 1022*0616c1c3SMichael Corcoran * we prefault the entire segment in. This is based 1023*0616c1c3SMichael Corcoran * on the model that says the best working set of a 1024*0616c1c3SMichael Corcoran * small program is all of its pages. 1025*0616c1c3SMichael Corcoran * We only do this if freemem will not drop below 1026*0616c1c3SMichael Corcoran * lotsfree since we don't want to induce paging. 1027*0616c1c3SMichael Corcoran */ 1028*0616c1c3SMichael Corcoran npages = (spgcnt_t)btopr(len); 1029*0616c1c3SMichael Corcoran availm = freemem - lotsfree; 1030*0616c1c3SMichael Corcoran preread = (npages < availm && len < PGTHRESH) ? 1 : 0; 1031*0616c1c3SMichael Corcoran 1032*0616c1c3SMichael Corcoran /* 1033*0616c1c3SMichael Corcoran * If we aren't prefaulting the segment, 1034*0616c1c3SMichael Corcoran * increment "deficit", if necessary to ensure 1035*0616c1c3SMichael Corcoran * that pages will become available when this 1036*0616c1c3SMichael Corcoran * process starts executing. 1037*0616c1c3SMichael Corcoran */ 1038*0616c1c3SMichael Corcoran if (preread == 0 && npages > availm && 1039*0616c1c3SMichael Corcoran deficit < lotsfree) { 1040*0616c1c3SMichael Corcoran deficit += MIN((pgcnt_t)(npages - availm), 1041*0616c1c3SMichael Corcoran lotsfree - deficit); 1042*0616c1c3SMichael Corcoran } 1043*0616c1c3SMichael Corcoran 1044*0616c1c3SMichael Corcoran if (preread) { 1045*0616c1c3SMichael Corcoran (void) as_faulta(as, addr, len); 1046*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(map_ptload_preread); 1047*0616c1c3SMichael Corcoran } 1048*0616c1c3SMichael Corcoran } else { 1049*0616c1c3SMichael Corcoran /* 1050*0616c1c3SMichael Corcoran * addr and offset were not aligned such that we could 1051*0616c1c3SMichael Corcoran * use VOP_MAP, thus we need to as_map the memory we 1052*0616c1c3SMichael Corcoran * need and then read the data in from disk. 1053*0616c1c3SMichael Corcoran * This code path is a corner case which should never 1054*0616c1c3SMichael Corcoran * be taken, but hand crafted binaries could trigger 1055*0616c1c3SMichael Corcoran * this logic and it needs to work correctly. 1056*0616c1c3SMichael Corcoran */ 1057*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(map_ptload_unaligned_text); 1058*0616c1c3SMichael Corcoran as_rangelock(as); 1059*0616c1c3SMichael Corcoran (void) as_unmap(as, addr, len); 1060*0616c1c3SMichael Corcoran 1061*0616c1c3SMichael Corcoran /* 1062*0616c1c3SMichael Corcoran * We use zfod_argsp because we need to be able to 1063*0616c1c3SMichael Corcoran * write to the mapping and then we'll change the 1064*0616c1c3SMichael Corcoran * protections later if they are incorrect. 1065*0616c1c3SMichael Corcoran */ 1066*0616c1c3SMichael Corcoran error = as_map(as, addr, len, segvn_create, zfod_argsp); 1067*0616c1c3SMichael Corcoran as_rangeunlock(as); 1068*0616c1c3SMichael Corcoran if (error) { 1069*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(map_ptload_unaligned_map_fail); 1070*0616c1c3SMichael Corcoran return (error); 1071*0616c1c3SMichael Corcoran } 1072*0616c1c3SMichael Corcoran 1073*0616c1c3SMichael Corcoran /* Now read in the data from disk */ 1074*0616c1c3SMichael Corcoran error = vn_rdwr(UIO_READ, vp, oldaddr, oldlen, offset, 1075*0616c1c3SMichael Corcoran UIO_USERSPACE, 0, (rlim64_t)0, fcred, NULL); 1076*0616c1c3SMichael Corcoran if (error) { 1077*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(map_ptload_unaligned_read_fail); 1078*0616c1c3SMichael Corcoran return (error); 1079*0616c1c3SMichael Corcoran } 1080*0616c1c3SMichael Corcoran 1081*0616c1c3SMichael Corcoran /* 1082*0616c1c3SMichael Corcoran * Now set protections. 1083*0616c1c3SMichael Corcoran */ 1084*0616c1c3SMichael Corcoran if (prot != PROT_ZFOD) { 1085*0616c1c3SMichael Corcoran (void) as_setprot(as, addr, len, prot); 1086*0616c1c3SMichael Corcoran } 1087*0616c1c3SMichael Corcoran } 1088*0616c1c3SMichael Corcoran } 1089*0616c1c3SMichael Corcoran 1090*0616c1c3SMichael Corcoran if (zfodlen) { 1091*0616c1c3SMichael Corcoran end = (size_t)addr + len; 1092*0616c1c3SMichael Corcoran zfodbase = (caddr_t)P2ROUNDUP(end, PAGESIZE); 1093*0616c1c3SMichael Corcoran zfoddiff = (uintptr_t)zfodbase - end; 1094*0616c1c3SMichael Corcoran if (zfoddiff) { 1095*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(zfoddiff); 1096*0616c1c3SMichael Corcoran if ((prot & PROT_WRITE) == 0) { 1097*0616c1c3SMichael Corcoran (void) as_setprot(as, (caddr_t)end, 1098*0616c1c3SMichael Corcoran zfoddiff, prot | PROT_WRITE); 1099*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(zfoddiff_nowrite); 1100*0616c1c3SMichael Corcoran } 1101*0616c1c3SMichael Corcoran if (on_fault(&ljb)) { 1102*0616c1c3SMichael Corcoran no_fault(); 1103*0616c1c3SMichael Corcoran if ((prot & PROT_WRITE) == 0) { 1104*0616c1c3SMichael Corcoran (void) as_setprot(as, (caddr_t)end, 1105*0616c1c3SMichael Corcoran zfoddiff, prot); 1106*0616c1c3SMichael Corcoran } 1107*0616c1c3SMichael Corcoran return (EFAULT); 1108*0616c1c3SMichael Corcoran } 1109*0616c1c3SMichael Corcoran uzero((void *)end, zfoddiff); 1110*0616c1c3SMichael Corcoran no_fault(); 1111*0616c1c3SMichael Corcoran 1112*0616c1c3SMichael Corcoran /* 1113*0616c1c3SMichael Corcoran * Remove write protection to return to original state 1114*0616c1c3SMichael Corcoran */ 1115*0616c1c3SMichael Corcoran if ((prot & PROT_WRITE) == 0) { 1116*0616c1c3SMichael Corcoran (void) as_setprot(as, (caddr_t)end, 1117*0616c1c3SMichael Corcoran zfoddiff, prot); 1118*0616c1c3SMichael Corcoran } 1119*0616c1c3SMichael Corcoran } 1120*0616c1c3SMichael Corcoran if (zfodlen > zfoddiff) { 1121*0616c1c3SMichael Corcoran struct segvn_crargs crargs = 1122*0616c1c3SMichael Corcoran SEGVN_ZFOD_ARGS(prot, PROT_ALL); 1123*0616c1c3SMichael Corcoran 1124*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(zfodextra); 1125*0616c1c3SMichael Corcoran zfodlen -= zfoddiff; 1126*0616c1c3SMichael Corcoran crargs.szc = AS_MAP_NO_LPOOB; 1127*0616c1c3SMichael Corcoran 1128*0616c1c3SMichael Corcoran 1129*0616c1c3SMichael Corcoran as_rangelock(as); 1130*0616c1c3SMichael Corcoran (void) as_unmap(as, (caddr_t)zfodbase, zfodlen); 1131*0616c1c3SMichael Corcoran error = as_map(as, (caddr_t)zfodbase, 1132*0616c1c3SMichael Corcoran zfodlen, segvn_create, &crargs); 1133*0616c1c3SMichael Corcoran as_rangeunlock(as); 1134*0616c1c3SMichael Corcoran if (error) { 1135*0616c1c3SMichael Corcoran return (error); 1136*0616c1c3SMichael Corcoran } 1137*0616c1c3SMichael Corcoran } 1138*0616c1c3SMichael Corcoran } 1139*0616c1c3SMichael Corcoran return (0); 1140*0616c1c3SMichael Corcoran } 1141*0616c1c3SMichael Corcoran 1142*0616c1c3SMichael Corcoran /* 1143*0616c1c3SMichael Corcoran * Map the ELF file represented by vp into the users address space. The 1144*0616c1c3SMichael Corcoran * first mapping will start at start_addr and there will be num_elements 1145*0616c1c3SMichael Corcoran * mappings. The mappings are described by the data in mrp which may be 1146*0616c1c3SMichael Corcoran * modified upon returning from this function. 1147*0616c1c3SMichael Corcoran * Returns 0 for success or errno for failure. 1148*0616c1c3SMichael Corcoran */ 1149*0616c1c3SMichael Corcoran static int 1150*0616c1c3SMichael Corcoran mmapobj_map_elf(struct vnode *vp, caddr_t start_addr, mmapobj_result_t *mrp, 1151*0616c1c3SMichael Corcoran int num_elements, cred_t *fcred, ushort_t e_type) 1152*0616c1c3SMichael Corcoran { 1153*0616c1c3SMichael Corcoran int i; 1154*0616c1c3SMichael Corcoran int ret; 1155*0616c1c3SMichael Corcoran caddr_t lo; 1156*0616c1c3SMichael Corcoran caddr_t hi; 1157*0616c1c3SMichael Corcoran struct as *as = curproc->p_as; 1158*0616c1c3SMichael Corcoran 1159*0616c1c3SMichael Corcoran for (i = 0; i < num_elements; i++) { 1160*0616c1c3SMichael Corcoran caddr_t addr; 1161*0616c1c3SMichael Corcoran size_t p_memsz; 1162*0616c1c3SMichael Corcoran size_t p_filesz; 1163*0616c1c3SMichael Corcoran size_t zfodlen; 1164*0616c1c3SMichael Corcoran offset_t p_offset; 1165*0616c1c3SMichael Corcoran size_t dif; 1166*0616c1c3SMichael Corcoran int prot; 1167*0616c1c3SMichael Corcoran 1168*0616c1c3SMichael Corcoran /* Always need to adjust mr_addr */ 1169*0616c1c3SMichael Corcoran addr = start_addr + (size_t)(mrp[i].mr_addr); 1170*0616c1c3SMichael Corcoran mrp[i].mr_addr = 1171*0616c1c3SMichael Corcoran (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1172*0616c1c3SMichael Corcoran 1173*0616c1c3SMichael Corcoran /* Padding has already been mapped */ 1174*0616c1c3SMichael Corcoran if (MR_GET_TYPE(mrp[i].mr_flags) == MR_PADDING) { 1175*0616c1c3SMichael Corcoran continue; 1176*0616c1c3SMichael Corcoran } 1177*0616c1c3SMichael Corcoran p_memsz = mrp[i].mr_msize; 1178*0616c1c3SMichael Corcoran p_filesz = mrp[i].mr_fsize; 1179*0616c1c3SMichael Corcoran zfodlen = p_memsz - p_filesz; 1180*0616c1c3SMichael Corcoran p_offset = mrp[i].mr_offset; 1181*0616c1c3SMichael Corcoran dif = (uintptr_t)(addr) & PAGEOFFSET; 1182*0616c1c3SMichael Corcoran prot = mrp[i].mr_prot | PROT_USER; 1183*0616c1c3SMichael Corcoran ret = mmapobj_map_ptload(vp, addr, p_filesz, zfodlen, 1184*0616c1c3SMichael Corcoran p_offset, prot, fcred); 1185*0616c1c3SMichael Corcoran if (ret != 0) { 1186*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(ptload_failed); 1187*0616c1c3SMichael Corcoran mmapobj_unmap(mrp, i, num_elements, e_type); 1188*0616c1c3SMichael Corcoran return (ret); 1189*0616c1c3SMichael Corcoran } 1190*0616c1c3SMichael Corcoran 1191*0616c1c3SMichael Corcoran /* Need to cleanup mrp to reflect the actual values used */ 1192*0616c1c3SMichael Corcoran mrp[i].mr_msize += dif; 1193*0616c1c3SMichael Corcoran mrp[i].mr_offset = (size_t)addr & PAGEOFFSET; 1194*0616c1c3SMichael Corcoran } 1195*0616c1c3SMichael Corcoran 1196*0616c1c3SMichael Corcoran /* Also need to unmap any holes created above */ 1197*0616c1c3SMichael Corcoran if (num_elements == 1) { 1198*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(map_elf_no_holes); 1199*0616c1c3SMichael Corcoran return (0); 1200*0616c1c3SMichael Corcoran } 1201*0616c1c3SMichael Corcoran if (e_type == ET_EXEC) { 1202*0616c1c3SMichael Corcoran return (0); 1203*0616c1c3SMichael Corcoran } 1204*0616c1c3SMichael Corcoran 1205*0616c1c3SMichael Corcoran as_rangelock(as); 1206*0616c1c3SMichael Corcoran lo = start_addr; 1207*0616c1c3SMichael Corcoran hi = mrp[0].mr_addr; 1208*0616c1c3SMichael Corcoran 1209*0616c1c3SMichael Corcoran /* Remove holes made by the rest of the segments */ 1210*0616c1c3SMichael Corcoran for (i = 0; i < num_elements - 1; i++) { 1211*0616c1c3SMichael Corcoran lo = (caddr_t)P2ROUNDUP((size_t)(mrp[i].mr_addr) + 1212*0616c1c3SMichael Corcoran mrp[i].mr_msize, PAGESIZE); 1213*0616c1c3SMichael Corcoran hi = mrp[i + 1].mr_addr; 1214*0616c1c3SMichael Corcoran if (lo < hi) { 1215*0616c1c3SMichael Corcoran /* 1216*0616c1c3SMichael Corcoran * If as_unmap fails we just use up a bit of extra 1217*0616c1c3SMichael Corcoran * space 1218*0616c1c3SMichael Corcoran */ 1219*0616c1c3SMichael Corcoran (void) as_unmap(as, (caddr_t)lo, 1220*0616c1c3SMichael Corcoran (size_t)hi - (size_t)lo); 1221*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(unmap_hole); 1222*0616c1c3SMichael Corcoran } 1223*0616c1c3SMichael Corcoran } 1224*0616c1c3SMichael Corcoran as_rangeunlock(as); 1225*0616c1c3SMichael Corcoran 1226*0616c1c3SMichael Corcoran return (0); 1227*0616c1c3SMichael Corcoran } 1228*0616c1c3SMichael Corcoran 1229*0616c1c3SMichael Corcoran /* Ugly hack to get STRUCT_* macros to work below */ 1230*0616c1c3SMichael Corcoran struct myphdr { 1231*0616c1c3SMichael Corcoran Phdr x; /* native version */ 1232*0616c1c3SMichael Corcoran }; 1233*0616c1c3SMichael Corcoran 1234*0616c1c3SMichael Corcoran struct myphdr32 { 1235*0616c1c3SMichael Corcoran Elf32_Phdr x; 1236*0616c1c3SMichael Corcoran }; 1237*0616c1c3SMichael Corcoran 1238*0616c1c3SMichael Corcoran /* 1239*0616c1c3SMichael Corcoran * Calculate and return the number of loadable segments in the ELF Phdr 1240*0616c1c3SMichael Corcoran * represented by phdrbase as well as the len of the total mapping and 1241*0616c1c3SMichael Corcoran * the max alignment that is needed for a given segment. On success, 1242*0616c1c3SMichael Corcoran * 0 is returned, and *len, *loadable and *align have been filled out. 1243*0616c1c3SMichael Corcoran * On failure, errno will be returned, which in this case is ENOTSUP 1244*0616c1c3SMichael Corcoran * if we were passed an ELF file with overlapping segments. 1245*0616c1c3SMichael Corcoran */ 1246*0616c1c3SMichael Corcoran static int 1247*0616c1c3SMichael Corcoran calc_loadable(Ehdr *ehdrp, caddr_t phdrbase, int nphdrs, size_t *len, 1248*0616c1c3SMichael Corcoran int *loadable, size_t *align) 1249*0616c1c3SMichael Corcoran { 1250*0616c1c3SMichael Corcoran int i; 1251*0616c1c3SMichael Corcoran int hsize; 1252*0616c1c3SMichael Corcoran model_t model; 1253*0616c1c3SMichael Corcoran uint_t p_type; 1254*0616c1c3SMichael Corcoran offset_t p_offset; 1255*0616c1c3SMichael Corcoran size_t p_memsz; 1256*0616c1c3SMichael Corcoran size_t p_align; 1257*0616c1c3SMichael Corcoran caddr_t vaddr; 1258*0616c1c3SMichael Corcoran int num_segs = 0; 1259*0616c1c3SMichael Corcoran caddr_t start_addr = NULL; 1260*0616c1c3SMichael Corcoran caddr_t p_end = NULL; 1261*0616c1c3SMichael Corcoran size_t max_align = 0; 1262*0616c1c3SMichael Corcoran STRUCT_HANDLE(myphdr, mph); 1263*0616c1c3SMichael Corcoran #if defined(__sparc) 1264*0616c1c3SMichael Corcoran extern int vac_size; 1265*0616c1c3SMichael Corcoran #endif 1266*0616c1c3SMichael Corcoran 1267*0616c1c3SMichael Corcoran model = get_udatamodel(); 1268*0616c1c3SMichael Corcoran STRUCT_SET_HANDLE(mph, model, (struct myphdr *)phdrbase); 1269*0616c1c3SMichael Corcoran 1270*0616c1c3SMichael Corcoran /* hsize alignment should have been checked before calling this func */ 1271*0616c1c3SMichael Corcoran if (model == DATAMODEL_LP64) { 1272*0616c1c3SMichael Corcoran hsize = ehdrp->e_phentsize; 1273*0616c1c3SMichael Corcoran if (hsize & 7) { 1274*0616c1c3SMichael Corcoran return (ENOTSUP); 1275*0616c1c3SMichael Corcoran } 1276*0616c1c3SMichael Corcoran } else { 1277*0616c1c3SMichael Corcoran ASSERT(model == DATAMODEL_ILP32); 1278*0616c1c3SMichael Corcoran hsize = ((Elf32_Ehdr *)ehdrp)->e_phentsize; 1279*0616c1c3SMichael Corcoran if (hsize & 3) { 1280*0616c1c3SMichael Corcoran return (ENOTSUP); 1281*0616c1c3SMichael Corcoran } 1282*0616c1c3SMichael Corcoran } 1283*0616c1c3SMichael Corcoran 1284*0616c1c3SMichael Corcoran /* 1285*0616c1c3SMichael Corcoran * Determine the span of all loadable segments and calculate the 1286*0616c1c3SMichael Corcoran * number of loadable segments. 1287*0616c1c3SMichael Corcoran */ 1288*0616c1c3SMichael Corcoran for (i = 0; i < nphdrs; i++) { 1289*0616c1c3SMichael Corcoran p_type = STRUCT_FGET(mph, x.p_type); 1290*0616c1c3SMichael Corcoran if (p_type == PT_LOAD || p_type == PT_SUNWBSS) { 1291*0616c1c3SMichael Corcoran vaddr = (caddr_t)(uintptr_t)STRUCT_FGET(mph, x.p_vaddr); 1292*0616c1c3SMichael Corcoran p_memsz = STRUCT_FGET(mph, x.p_memsz); 1293*0616c1c3SMichael Corcoran 1294*0616c1c3SMichael Corcoran /* 1295*0616c1c3SMichael Corcoran * Skip this header if it requests no memory to be 1296*0616c1c3SMichael Corcoran * mapped. 1297*0616c1c3SMichael Corcoran */ 1298*0616c1c3SMichael Corcoran if (p_memsz == 0) { 1299*0616c1c3SMichael Corcoran STRUCT_SET_HANDLE(mph, model, 1300*0616c1c3SMichael Corcoran (struct myphdr *)((size_t)STRUCT_BUF(mph) + 1301*0616c1c3SMichael Corcoran hsize)); 1302*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(nomem_header); 1303*0616c1c3SMichael Corcoran continue; 1304*0616c1c3SMichael Corcoran } 1305*0616c1c3SMichael Corcoran if (num_segs++ == 0) { 1306*0616c1c3SMichael Corcoran start_addr = vaddr; 1307*0616c1c3SMichael Corcoran /* 1308*0616c1c3SMichael Corcoran * For the first segment, we need to map from 1309*0616c1c3SMichael Corcoran * the beginning of the file, so we will 1310*0616c1c3SMichael Corcoran * adjust the size of the mapping to include 1311*0616c1c3SMichael Corcoran * this memory. 1312*0616c1c3SMichael Corcoran */ 1313*0616c1c3SMichael Corcoran p_offset = STRUCT_FGET(mph, x.p_offset); 1314*0616c1c3SMichael Corcoran } else { 1315*0616c1c3SMichael Corcoran p_offset = 0; 1316*0616c1c3SMichael Corcoran } 1317*0616c1c3SMichael Corcoran /* 1318*0616c1c3SMichael Corcoran * Check to make sure that this mapping wouldn't 1319*0616c1c3SMichael Corcoran * overlap a previous mapping. 1320*0616c1c3SMichael Corcoran */ 1321*0616c1c3SMichael Corcoran if (vaddr < p_end) { 1322*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(overlap_header); 1323*0616c1c3SMichael Corcoran return (ENOTSUP); 1324*0616c1c3SMichael Corcoran } 1325*0616c1c3SMichael Corcoran 1326*0616c1c3SMichael Corcoran p_end = vaddr + p_memsz + p_offset; 1327*0616c1c3SMichael Corcoran p_end = (caddr_t)P2ROUNDUP((size_t)p_end, PAGESIZE); 1328*0616c1c3SMichael Corcoran 1329*0616c1c3SMichael Corcoran p_align = STRUCT_FGET(mph, x.p_align); 1330*0616c1c3SMichael Corcoran if (p_align > 1 && p_align > max_align) { 1331*0616c1c3SMichael Corcoran max_align = p_align; 1332*0616c1c3SMichael Corcoran #if defined(__sparc) 1333*0616c1c3SMichael Corcoran /* 1334*0616c1c3SMichael Corcoran * Want to prevent aliasing by making the start 1335*0616c1c3SMichael Corcoran * address be aligned to vac_size. 1336*0616c1c3SMichael Corcoran */ 1337*0616c1c3SMichael Corcoran if (max_align < vac_size) { 1338*0616c1c3SMichael Corcoran max_align = vac_size; 1339*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(vac_align); 1340*0616c1c3SMichael Corcoran } 1341*0616c1c3SMichael Corcoran #endif 1342*0616c1c3SMichael Corcoran } 1343*0616c1c3SMichael Corcoran } 1344*0616c1c3SMichael Corcoran STRUCT_SET_HANDLE(mph, model, 1345*0616c1c3SMichael Corcoran (struct myphdr *)((size_t)STRUCT_BUF(mph) + hsize)); 1346*0616c1c3SMichael Corcoran } 1347*0616c1c3SMichael Corcoran 1348*0616c1c3SMichael Corcoran /* 1349*0616c1c3SMichael Corcoran * The alignment should be a power of 2, if it isn't we forgive it 1350*0616c1c3SMichael Corcoran * and round up. On overflow, we'll set the alignment to max_align 1351*0616c1c3SMichael Corcoran * rounded down to the nearest power of 2. 1352*0616c1c3SMichael Corcoran */ 1353*0616c1c3SMichael Corcoran if (max_align > 0 && !ISP2(max_align)) { 1354*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(np2_align); 1355*0616c1c3SMichael Corcoran *align = 2 * (1L << (highbit(max_align) - 1)); 1356*0616c1c3SMichael Corcoran if (*align < max_align || 1357*0616c1c3SMichael Corcoran (*align > UINT_MAX && model == DATAMODEL_ILP32)) { 1358*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(np2_align_overflow); 1359*0616c1c3SMichael Corcoran *align = 1L << (highbit(max_align) - 1); 1360*0616c1c3SMichael Corcoran } 1361*0616c1c3SMichael Corcoran } else { 1362*0616c1c3SMichael Corcoran *align = max_align; 1363*0616c1c3SMichael Corcoran } 1364*0616c1c3SMichael Corcoran 1365*0616c1c3SMichael Corcoran *loadable = num_segs; 1366*0616c1c3SMichael Corcoran *len = p_end - start_addr; 1367*0616c1c3SMichael Corcoran return (0); 1368*0616c1c3SMichael Corcoran } 1369*0616c1c3SMichael Corcoran 1370*0616c1c3SMichael Corcoran /* 1371*0616c1c3SMichael Corcoran * Check the address space to see if the virtual addresses to be used are 1372*0616c1c3SMichael Corcoran * available. If they are not, return errno for failure. On success, 0 1373*0616c1c3SMichael Corcoran * will be returned, and the virtual addresses for each mmapobj_result_t 1374*0616c1c3SMichael Corcoran * will be reserved. Note that a reservation could have earlier been made 1375*0616c1c3SMichael Corcoran * for a given segment via a /dev/null mapping. If that is the case, then 1376*0616c1c3SMichael Corcoran * we can use that VA space for our mappings. 1377*0616c1c3SMichael Corcoran * Note: this function will only be used for ET_EXEC binaries. 1378*0616c1c3SMichael Corcoran */ 1379*0616c1c3SMichael Corcoran int 1380*0616c1c3SMichael Corcoran check_exec_addrs(int loadable, mmapobj_result_t *mrp, caddr_t start_addr) 1381*0616c1c3SMichael Corcoran { 1382*0616c1c3SMichael Corcoran int i; 1383*0616c1c3SMichael Corcoran struct as *as = curproc->p_as; 1384*0616c1c3SMichael Corcoran struct segvn_crargs crargs = SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL); 1385*0616c1c3SMichael Corcoran int ret; 1386*0616c1c3SMichael Corcoran caddr_t myaddr; 1387*0616c1c3SMichael Corcoran size_t mylen; 1388*0616c1c3SMichael Corcoran struct seg *seg; 1389*0616c1c3SMichael Corcoran 1390*0616c1c3SMichael Corcoran /* No need to reserve swap space now since it will be reserved later */ 1391*0616c1c3SMichael Corcoran crargs.flags |= MAP_NORESERVE; 1392*0616c1c3SMichael Corcoran as_rangelock(as); 1393*0616c1c3SMichael Corcoran for (i = 0; i < loadable; i++) { 1394*0616c1c3SMichael Corcoran 1395*0616c1c3SMichael Corcoran myaddr = start_addr + (size_t)mrp[i].mr_addr; 1396*0616c1c3SMichael Corcoran mylen = mrp[i].mr_msize; 1397*0616c1c3SMichael Corcoran 1398*0616c1c3SMichael Corcoran /* See if there is a hole in the as for this range */ 1399*0616c1c3SMichael Corcoran if (as_gap(as, mylen, &myaddr, &mylen, 0, NULL) == 0) { 1400*0616c1c3SMichael Corcoran ASSERT(myaddr == start_addr + (size_t)mrp[i].mr_addr); 1401*0616c1c3SMichael Corcoran ASSERT(mylen == mrp[i].mr_msize); 1402*0616c1c3SMichael Corcoran 1403*0616c1c3SMichael Corcoran #ifdef DEBUG 1404*0616c1c3SMichael Corcoran if (MR_GET_TYPE(mrp[i].mr_flags) == MR_PADDING) { 1405*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(exec_padding); 1406*0616c1c3SMichael Corcoran } 1407*0616c1c3SMichael Corcoran #endif 1408*0616c1c3SMichael Corcoran ret = as_map(as, myaddr, mylen, segvn_create, &crargs); 1409*0616c1c3SMichael Corcoran if (ret) { 1410*0616c1c3SMichael Corcoran as_rangeunlock(as); 1411*0616c1c3SMichael Corcoran mmapobj_unmap_exec(mrp, i, start_addr); 1412*0616c1c3SMichael Corcoran return (ret); 1413*0616c1c3SMichael Corcoran } 1414*0616c1c3SMichael Corcoran } else { 1415*0616c1c3SMichael Corcoran /* 1416*0616c1c3SMichael Corcoran * There is a mapping that exists in the range 1417*0616c1c3SMichael Corcoran * so check to see if it was a "reservation" 1418*0616c1c3SMichael Corcoran * from /dev/null. The mapping is from 1419*0616c1c3SMichael Corcoran * /dev/null if the mapping comes from 1420*0616c1c3SMichael Corcoran * segdev and the type is neither MAP_SHARED 1421*0616c1c3SMichael Corcoran * nor MAP_PRIVATE. 1422*0616c1c3SMichael Corcoran */ 1423*0616c1c3SMichael Corcoran AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 1424*0616c1c3SMichael Corcoran seg = as_findseg(as, myaddr, 0); 1425*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(exec_addr_mapped); 1426*0616c1c3SMichael Corcoran if (seg && seg->s_ops == &segdev_ops && 1427*0616c1c3SMichael Corcoran ((SEGOP_GETTYPE(seg, myaddr) & 1428*0616c1c3SMichael Corcoran (MAP_SHARED | MAP_PRIVATE)) == 0) && 1429*0616c1c3SMichael Corcoran myaddr >= seg->s_base && 1430*0616c1c3SMichael Corcoran myaddr + mylen <= 1431*0616c1c3SMichael Corcoran seg->s_base + seg->s_size) { 1432*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(exec_addr_devnull); 1433*0616c1c3SMichael Corcoran AS_LOCK_EXIT(as, &as->a_lock); 1434*0616c1c3SMichael Corcoran (void) as_unmap(as, myaddr, mylen); 1435*0616c1c3SMichael Corcoran ret = as_map(as, myaddr, mylen, segvn_create, 1436*0616c1c3SMichael Corcoran &crargs); 1437*0616c1c3SMichael Corcoran mrp[i].mr_flags |= MR_RESV; 1438*0616c1c3SMichael Corcoran if (ret) { 1439*0616c1c3SMichael Corcoran as_rangeunlock(as); 1440*0616c1c3SMichael Corcoran /* Need to remap what we unmapped */ 1441*0616c1c3SMichael Corcoran mmapobj_unmap_exec(mrp, i + 1, 1442*0616c1c3SMichael Corcoran start_addr); 1443*0616c1c3SMichael Corcoran return (ret); 1444*0616c1c3SMichael Corcoran } 1445*0616c1c3SMichael Corcoran } else { 1446*0616c1c3SMichael Corcoran AS_LOCK_EXIT(as, &as->a_lock); 1447*0616c1c3SMichael Corcoran as_rangeunlock(as); 1448*0616c1c3SMichael Corcoran mmapobj_unmap_exec(mrp, i, start_addr); 1449*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(exec_addr_in_use); 1450*0616c1c3SMichael Corcoran return (EADDRINUSE); 1451*0616c1c3SMichael Corcoran } 1452*0616c1c3SMichael Corcoran } 1453*0616c1c3SMichael Corcoran } 1454*0616c1c3SMichael Corcoran as_rangeunlock(as); 1455*0616c1c3SMichael Corcoran return (0); 1456*0616c1c3SMichael Corcoran } 1457*0616c1c3SMichael Corcoran 1458*0616c1c3SMichael Corcoran /* 1459*0616c1c3SMichael Corcoran * Walk through the ELF program headers and extract all useful information 1460*0616c1c3SMichael Corcoran * for PT_LOAD and PT_SUNWBSS segments into mrp. 1461*0616c1c3SMichael Corcoran * Return 0 on success or error on failure. 1462*0616c1c3SMichael Corcoran */ 1463*0616c1c3SMichael Corcoran static int 1464*0616c1c3SMichael Corcoran process_phdr(Ehdr *ehdrp, caddr_t phdrbase, int nphdrs, mmapobj_result_t *mrp, 1465*0616c1c3SMichael Corcoran vnode_t *vp, uint_t *num_mapped, size_t padding, cred_t *fcred) 1466*0616c1c3SMichael Corcoran { 1467*0616c1c3SMichael Corcoran int i; 1468*0616c1c3SMichael Corcoran caddr_t start_addr = NULL; 1469*0616c1c3SMichael Corcoran caddr_t vaddr; 1470*0616c1c3SMichael Corcoran size_t len = 0; 1471*0616c1c3SMichael Corcoran size_t lib_len = 0; 1472*0616c1c3SMichael Corcoran int ret; 1473*0616c1c3SMichael Corcoran int prot; 1474*0616c1c3SMichael Corcoran struct lib_va *lvp = NULL; 1475*0616c1c3SMichael Corcoran vattr_t vattr; 1476*0616c1c3SMichael Corcoran struct as *as = curproc->p_as; 1477*0616c1c3SMichael Corcoran int error; 1478*0616c1c3SMichael Corcoran int loadable = 0; 1479*0616c1c3SMichael Corcoran int current = 0; 1480*0616c1c3SMichael Corcoran int use_lib_va = 1; 1481*0616c1c3SMichael Corcoran size_t align = 0; 1482*0616c1c3SMichael Corcoran size_t add_pad = 0; 1483*0616c1c3SMichael Corcoran int hdr_seen = 0; 1484*0616c1c3SMichael Corcoran ushort_t e_type = ehdrp->e_type; /* same offset 32 and 64 bit */ 1485*0616c1c3SMichael Corcoran uint_t p_type; 1486*0616c1c3SMichael Corcoran offset_t p_offset; 1487*0616c1c3SMichael Corcoran size_t p_memsz; 1488*0616c1c3SMichael Corcoran size_t p_filesz; 1489*0616c1c3SMichael Corcoran uint_t p_flags; 1490*0616c1c3SMichael Corcoran int hsize; 1491*0616c1c3SMichael Corcoran model_t model; 1492*0616c1c3SMichael Corcoran STRUCT_HANDLE(myphdr, mph); 1493*0616c1c3SMichael Corcoran 1494*0616c1c3SMichael Corcoran model = get_udatamodel(); 1495*0616c1c3SMichael Corcoran STRUCT_SET_HANDLE(mph, model, (struct myphdr *)phdrbase); 1496*0616c1c3SMichael Corcoran 1497*0616c1c3SMichael Corcoran /* 1498*0616c1c3SMichael Corcoran * Need to make sure that hsize is aligned properly. 1499*0616c1c3SMichael Corcoran * For 32bit processes, 4 byte alignment is required. 1500*0616c1c3SMichael Corcoran * For 64bit processes, 8 byte alignment is required. 1501*0616c1c3SMichael Corcoran * If the alignment isn't correct, we need to return failure 1502*0616c1c3SMichael Corcoran * since it could cause an alignment error panic while walking 1503*0616c1c3SMichael Corcoran * the phdr array. 1504*0616c1c3SMichael Corcoran */ 1505*0616c1c3SMichael Corcoran if (model == DATAMODEL_LP64) { 1506*0616c1c3SMichael Corcoran hsize = ehdrp->e_phentsize; 1507*0616c1c3SMichael Corcoran if (hsize & 7) { 1508*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(phent_align64); 1509*0616c1c3SMichael Corcoran return (ENOTSUP); 1510*0616c1c3SMichael Corcoran } 1511*0616c1c3SMichael Corcoran } else { 1512*0616c1c3SMichael Corcoran ASSERT(model == DATAMODEL_ILP32); 1513*0616c1c3SMichael Corcoran hsize = ((Elf32_Ehdr *)ehdrp)->e_phentsize; 1514*0616c1c3SMichael Corcoran if (hsize & 3) { 1515*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(phent_align32); 1516*0616c1c3SMichael Corcoran return (ENOTSUP); 1517*0616c1c3SMichael Corcoran } 1518*0616c1c3SMichael Corcoran } 1519*0616c1c3SMichael Corcoran 1520*0616c1c3SMichael Corcoran if (padding != 0) { 1521*0616c1c3SMichael Corcoran use_lib_va = 0; 1522*0616c1c3SMichael Corcoran } 1523*0616c1c3SMichael Corcoran if (e_type == ET_DYN) { 1524*0616c1c3SMichael Corcoran vattr.va_mask = AT_FSID | AT_NODEID | AT_CTIME | AT_MTIME; 1525*0616c1c3SMichael Corcoran error = VOP_GETATTR(vp, &vattr, 0, fcred, NULL); 1526*0616c1c3SMichael Corcoran if (error) { 1527*0616c1c3SMichael Corcoran return (error); 1528*0616c1c3SMichael Corcoran } 1529*0616c1c3SMichael Corcoran /* Check to see if we already have a description for this lib */ 1530*0616c1c3SMichael Corcoran lvp = lib_va_find(&vattr); 1531*0616c1c3SMichael Corcoran 1532*0616c1c3SMichael Corcoran if (lvp != NULL) { 1533*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(lvp_found); 1534*0616c1c3SMichael Corcoran if (use_lib_va) { 1535*0616c1c3SMichael Corcoran start_addr = mmapobj_lookup_start_addr(lvp); 1536*0616c1c3SMichael Corcoran if (start_addr == NULL) { 1537*0616c1c3SMichael Corcoran lib_va_release(lvp); 1538*0616c1c3SMichael Corcoran return (ENOMEM); 1539*0616c1c3SMichael Corcoran } 1540*0616c1c3SMichael Corcoran } 1541*0616c1c3SMichael Corcoran 1542*0616c1c3SMichael Corcoran /* 1543*0616c1c3SMichael Corcoran * loadable may be zero if the original allocator 1544*0616c1c3SMichael Corcoran * of lvp hasn't finished setting it up but the rest 1545*0616c1c3SMichael Corcoran * of the fields will be accurate. 1546*0616c1c3SMichael Corcoran */ 1547*0616c1c3SMichael Corcoran loadable = lvp->lv_num_segs; 1548*0616c1c3SMichael Corcoran len = lvp->lv_len; 1549*0616c1c3SMichael Corcoran align = lvp->lv_align; 1550*0616c1c3SMichael Corcoran } 1551*0616c1c3SMichael Corcoran } 1552*0616c1c3SMichael Corcoran 1553*0616c1c3SMichael Corcoran /* 1554*0616c1c3SMichael Corcoran * Determine the span of all loadable segments and calculate the 1555*0616c1c3SMichael Corcoran * number of loadable segments, the total len spanned by the mappings 1556*0616c1c3SMichael Corcoran * and the max alignment, if we didn't get them above. 1557*0616c1c3SMichael Corcoran */ 1558*0616c1c3SMichael Corcoran if (loadable == 0) { 1559*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(no_loadable_yet); 1560*0616c1c3SMichael Corcoran ret = calc_loadable(ehdrp, phdrbase, nphdrs, &len, 1561*0616c1c3SMichael Corcoran &loadable, &align); 1562*0616c1c3SMichael Corcoran if (ret != 0) { 1563*0616c1c3SMichael Corcoran /* 1564*0616c1c3SMichael Corcoran * Since it'd be an invalid file, we shouldn't have 1565*0616c1c3SMichael Corcoran * cached it previously. 1566*0616c1c3SMichael Corcoran */ 1567*0616c1c3SMichael Corcoran ASSERT(lvp == NULL); 1568*0616c1c3SMichael Corcoran return (ret); 1569*0616c1c3SMichael Corcoran } 1570*0616c1c3SMichael Corcoran #ifdef DEBUG 1571*0616c1c3SMichael Corcoran if (lvp) { 1572*0616c1c3SMichael Corcoran ASSERT(len == lvp->lv_len); 1573*0616c1c3SMichael Corcoran ASSERT(align == lvp->lv_align); 1574*0616c1c3SMichael Corcoran } 1575*0616c1c3SMichael Corcoran #endif 1576*0616c1c3SMichael Corcoran } 1577*0616c1c3SMichael Corcoran 1578*0616c1c3SMichael Corcoran /* Make sure there's something to map. */ 1579*0616c1c3SMichael Corcoran if (len == 0 || loadable == 0) { 1580*0616c1c3SMichael Corcoran /* 1581*0616c1c3SMichael Corcoran * Since it'd be an invalid file, we shouldn't have 1582*0616c1c3SMichael Corcoran * cached it previously. 1583*0616c1c3SMichael Corcoran */ 1584*0616c1c3SMichael Corcoran ASSERT(lvp == NULL); 1585*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(nothing_to_map); 1586*0616c1c3SMichael Corcoran return (ENOTSUP); 1587*0616c1c3SMichael Corcoran } 1588*0616c1c3SMichael Corcoran 1589*0616c1c3SMichael Corcoran lib_len = len; 1590*0616c1c3SMichael Corcoran if (padding != 0) { 1591*0616c1c3SMichael Corcoran loadable += 2; 1592*0616c1c3SMichael Corcoran } 1593*0616c1c3SMichael Corcoran if (loadable > *num_mapped) { 1594*0616c1c3SMichael Corcoran *num_mapped = loadable; 1595*0616c1c3SMichael Corcoran /* cleanup previous reservation */ 1596*0616c1c3SMichael Corcoran if (start_addr) { 1597*0616c1c3SMichael Corcoran (void) as_unmap(as, start_addr, lib_len); 1598*0616c1c3SMichael Corcoran } 1599*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(e2big); 1600*0616c1c3SMichael Corcoran if (lvp) { 1601*0616c1c3SMichael Corcoran lib_va_release(lvp); 1602*0616c1c3SMichael Corcoran } 1603*0616c1c3SMichael Corcoran return (E2BIG); 1604*0616c1c3SMichael Corcoran } 1605*0616c1c3SMichael Corcoran 1606*0616c1c3SMichael Corcoran /* 1607*0616c1c3SMichael Corcoran * We now know the size of the object to map and now we need to 1608*0616c1c3SMichael Corcoran * get the start address to map it at. It's possible we already 1609*0616c1c3SMichael Corcoran * have it if we found all the info we need in the lib_va cache. 1610*0616c1c3SMichael Corcoran */ 1611*0616c1c3SMichael Corcoran if (e_type == ET_DYN && start_addr == NULL) { 1612*0616c1c3SMichael Corcoran /* 1613*0616c1c3SMichael Corcoran * Need to make sure padding does not throw off 1614*0616c1c3SMichael Corcoran * required alignment. We can only specify an 1615*0616c1c3SMichael Corcoran * alignment for the starting address to be mapped, 1616*0616c1c3SMichael Corcoran * so we round padding up to the alignment and map 1617*0616c1c3SMichael Corcoran * from there and then throw out the extra later. 1618*0616c1c3SMichael Corcoran */ 1619*0616c1c3SMichael Corcoran if (padding != 0) { 1620*0616c1c3SMichael Corcoran if (align > 1) { 1621*0616c1c3SMichael Corcoran add_pad = P2ROUNDUP(padding, align); 1622*0616c1c3SMichael Corcoran len += add_pad; 1623*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(dyn_pad_align); 1624*0616c1c3SMichael Corcoran } else { 1625*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(dyn_pad_noalign); 1626*0616c1c3SMichael Corcoran len += padding; /* at beginning */ 1627*0616c1c3SMichael Corcoran } 1628*0616c1c3SMichael Corcoran len += padding; /* at end of mapping */ 1629*0616c1c3SMichael Corcoran } 1630*0616c1c3SMichael Corcoran /* 1631*0616c1c3SMichael Corcoran * At this point, if lvp is non-NULL, then above we 1632*0616c1c3SMichael Corcoran * already found it in the cache but did not get 1633*0616c1c3SMichael Corcoran * the start address since we were not going to use lib_va. 1634*0616c1c3SMichael Corcoran * Since we know that lib_va will not be used, it's safe 1635*0616c1c3SMichael Corcoran * to call mmapobj_alloc_start_addr and know that lvp 1636*0616c1c3SMichael Corcoran * will not be modified. 1637*0616c1c3SMichael Corcoran */ 1638*0616c1c3SMichael Corcoran ASSERT(lvp ? use_lib_va == 0 : 1); 1639*0616c1c3SMichael Corcoran start_addr = mmapobj_alloc_start_addr(&lvp, len, 1640*0616c1c3SMichael Corcoran use_lib_va, align, &vattr); 1641*0616c1c3SMichael Corcoran if (start_addr == NULL) { 1642*0616c1c3SMichael Corcoran if (lvp) { 1643*0616c1c3SMichael Corcoran lib_va_release(lvp); 1644*0616c1c3SMichael Corcoran } 1645*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(alloc_start_fail); 1646*0616c1c3SMichael Corcoran return (ENOMEM); 1647*0616c1c3SMichael Corcoran } 1648*0616c1c3SMichael Corcoran /* 1649*0616c1c3SMichael Corcoran * If we can't cache it, no need to hang on to it. 1650*0616c1c3SMichael Corcoran * Setting lv_num_segs to non-zero will make that 1651*0616c1c3SMichael Corcoran * field active and since there are too many segments 1652*0616c1c3SMichael Corcoran * to cache, all future users will not try to use lv_mps. 1653*0616c1c3SMichael Corcoran */ 1654*0616c1c3SMichael Corcoran if (lvp != NULL && loadable > LIBVA_CACHED_SEGS && use_lib_va) { 1655*0616c1c3SMichael Corcoran lvp->lv_num_segs = loadable; 1656*0616c1c3SMichael Corcoran lib_va_release(lvp); 1657*0616c1c3SMichael Corcoran lvp = NULL; 1658*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(lvp_nocache); 1659*0616c1c3SMichael Corcoran } 1660*0616c1c3SMichael Corcoran /* 1661*0616c1c3SMichael Corcoran * Free the beginning of the mapping if the padding 1662*0616c1c3SMichael Corcoran * was not aligned correctly. 1663*0616c1c3SMichael Corcoran */ 1664*0616c1c3SMichael Corcoran if (padding != 0 && add_pad != padding) { 1665*0616c1c3SMichael Corcoran (void) as_unmap(as, start_addr, 1666*0616c1c3SMichael Corcoran add_pad - padding); 1667*0616c1c3SMichael Corcoran start_addr += (add_pad - padding); 1668*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(extra_padding); 1669*0616c1c3SMichael Corcoran } 1670*0616c1c3SMichael Corcoran } 1671*0616c1c3SMichael Corcoran 1672*0616c1c3SMichael Corcoran /* 1673*0616c1c3SMichael Corcoran * At this point, we have reserved the virtual address space 1674*0616c1c3SMichael Corcoran * for our mappings. Now we need to start filling out the mrp 1675*0616c1c3SMichael Corcoran * array to describe all of the individual mappings we are going 1676*0616c1c3SMichael Corcoran * to return. 1677*0616c1c3SMichael Corcoran * For ET_EXEC there has been no memory reservation since we are 1678*0616c1c3SMichael Corcoran * using fixed addresses. While filling in the mrp array below, 1679*0616c1c3SMichael Corcoran * we will have the first segment biased to start at addr 0 1680*0616c1c3SMichael Corcoran * and the rest will be biased by this same amount. Thus if there 1681*0616c1c3SMichael Corcoran * is padding, the first padding will start at addr 0, and the next 1682*0616c1c3SMichael Corcoran * segment will start at the value of padding. 1683*0616c1c3SMichael Corcoran */ 1684*0616c1c3SMichael Corcoran 1685*0616c1c3SMichael Corcoran /* We'll fill out padding later, so start filling in mrp at index 1 */ 1686*0616c1c3SMichael Corcoran if (padding != 0) { 1687*0616c1c3SMichael Corcoran current = 1; 1688*0616c1c3SMichael Corcoran } 1689*0616c1c3SMichael Corcoran 1690*0616c1c3SMichael Corcoran /* If we have no more need for lvp let it go now */ 1691*0616c1c3SMichael Corcoran if (lvp != NULL && use_lib_va == 0) { 1692*0616c1c3SMichael Corcoran lib_va_release(lvp); 1693*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(lvp_not_needed); 1694*0616c1c3SMichael Corcoran lvp = NULL; 1695*0616c1c3SMichael Corcoran } 1696*0616c1c3SMichael Corcoran 1697*0616c1c3SMichael Corcoran /* Now fill out the mrp structs from the program headers */ 1698*0616c1c3SMichael Corcoran STRUCT_SET_HANDLE(mph, model, (struct myphdr *)phdrbase); 1699*0616c1c3SMichael Corcoran for (i = 0; i < nphdrs; i++) { 1700*0616c1c3SMichael Corcoran p_type = STRUCT_FGET(mph, x.p_type); 1701*0616c1c3SMichael Corcoran if (p_type == PT_LOAD || p_type == PT_SUNWBSS) { 1702*0616c1c3SMichael Corcoran vaddr = (caddr_t)(uintptr_t)STRUCT_FGET(mph, x.p_vaddr); 1703*0616c1c3SMichael Corcoran p_memsz = STRUCT_FGET(mph, x.p_memsz); 1704*0616c1c3SMichael Corcoran p_filesz = STRUCT_FGET(mph, x.p_filesz); 1705*0616c1c3SMichael Corcoran p_offset = STRUCT_FGET(mph, x.p_offset); 1706*0616c1c3SMichael Corcoran p_flags = STRUCT_FGET(mph, x.p_flags); 1707*0616c1c3SMichael Corcoran 1708*0616c1c3SMichael Corcoran /* 1709*0616c1c3SMichael Corcoran * Skip this header if it requests no memory to be 1710*0616c1c3SMichael Corcoran * mapped. 1711*0616c1c3SMichael Corcoran */ 1712*0616c1c3SMichael Corcoran if (p_memsz == 0) { 1713*0616c1c3SMichael Corcoran STRUCT_SET_HANDLE(mph, model, 1714*0616c1c3SMichael Corcoran (struct myphdr *)((size_t)STRUCT_BUF(mph) + 1715*0616c1c3SMichael Corcoran hsize)); 1716*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(no_mem_map_sz); 1717*0616c1c3SMichael Corcoran continue; 1718*0616c1c3SMichael Corcoran } 1719*0616c1c3SMichael Corcoran 1720*0616c1c3SMichael Corcoran prot = 0; 1721*0616c1c3SMichael Corcoran if (p_flags & PF_R) 1722*0616c1c3SMichael Corcoran prot |= PROT_READ; 1723*0616c1c3SMichael Corcoran if (p_flags & PF_W) 1724*0616c1c3SMichael Corcoran prot |= PROT_WRITE; 1725*0616c1c3SMichael Corcoran if (p_flags & PF_X) 1726*0616c1c3SMichael Corcoran prot |= PROT_EXEC; 1727*0616c1c3SMichael Corcoran 1728*0616c1c3SMichael Corcoran ASSERT(current < loadable); 1729*0616c1c3SMichael Corcoran mrp[current].mr_msize = p_memsz; 1730*0616c1c3SMichael Corcoran mrp[current].mr_fsize = p_filesz; 1731*0616c1c3SMichael Corcoran mrp[current].mr_offset = p_offset; 1732*0616c1c3SMichael Corcoran mrp[current].mr_prot = prot; 1733*0616c1c3SMichael Corcoran 1734*0616c1c3SMichael Corcoran if (hdr_seen == 0 && p_filesz != 0) { 1735*0616c1c3SMichael Corcoran mrp[current].mr_flags = MR_HDR_ELF; 1736*0616c1c3SMichael Corcoran /* 1737*0616c1c3SMichael Corcoran * We modify mr_addr and mr_offset because we 1738*0616c1c3SMichael Corcoran * need to map the ELF header as well, and if 1739*0616c1c3SMichael Corcoran * we didn't then the header could be left out 1740*0616c1c3SMichael Corcoran * of the mapping that we will create later. 1741*0616c1c3SMichael Corcoran * Since we're removing the offset, we need to 1742*0616c1c3SMichael Corcoran * account for that in the other fields as well 1743*0616c1c3SMichael Corcoran * since we will be mapping the memory from 0 1744*0616c1c3SMichael Corcoran * to p_offset. 1745*0616c1c3SMichael Corcoran */ 1746*0616c1c3SMichael Corcoran if (e_type == ET_DYN) { 1747*0616c1c3SMichael Corcoran mrp[current].mr_offset = 0; 1748*0616c1c3SMichael Corcoran mrp[current].mr_msize += p_offset; 1749*0616c1c3SMichael Corcoran mrp[current].mr_fsize += p_offset; 1750*0616c1c3SMichael Corcoran } else { 1751*0616c1c3SMichael Corcoran ASSERT(e_type == ET_EXEC); 1752*0616c1c3SMichael Corcoran /* 1753*0616c1c3SMichael Corcoran * Save off the start addr which will be 1754*0616c1c3SMichael Corcoran * our bias for the rest of the 1755*0616c1c3SMichael Corcoran * ET_EXEC mappings. 1756*0616c1c3SMichael Corcoran */ 1757*0616c1c3SMichael Corcoran start_addr = vaddr - padding; 1758*0616c1c3SMichael Corcoran } 1759*0616c1c3SMichael Corcoran mrp[current].mr_addr = (caddr_t)padding; 1760*0616c1c3SMichael Corcoran hdr_seen = 1; 1761*0616c1c3SMichael Corcoran } else { 1762*0616c1c3SMichael Corcoran if (e_type == ET_EXEC) { 1763*0616c1c3SMichael Corcoran /* bias mr_addr */ 1764*0616c1c3SMichael Corcoran mrp[current].mr_addr = 1765*0616c1c3SMichael Corcoran vaddr - (size_t)start_addr; 1766*0616c1c3SMichael Corcoran } else { 1767*0616c1c3SMichael Corcoran mrp[current].mr_addr = vaddr + padding; 1768*0616c1c3SMichael Corcoran } 1769*0616c1c3SMichael Corcoran mrp[current].mr_flags = 0; 1770*0616c1c3SMichael Corcoran } 1771*0616c1c3SMichael Corcoran current++; 1772*0616c1c3SMichael Corcoran } 1773*0616c1c3SMichael Corcoran 1774*0616c1c3SMichael Corcoran /* Move to next phdr */ 1775*0616c1c3SMichael Corcoran STRUCT_SET_HANDLE(mph, model, 1776*0616c1c3SMichael Corcoran (struct myphdr *)((size_t)STRUCT_BUF(mph) + 1777*0616c1c3SMichael Corcoran hsize)); 1778*0616c1c3SMichael Corcoran } 1779*0616c1c3SMichael Corcoran 1780*0616c1c3SMichael Corcoran /* Now fill out the padding segments */ 1781*0616c1c3SMichael Corcoran if (padding != 0) { 1782*0616c1c3SMichael Corcoran mrp[0].mr_addr = NULL; 1783*0616c1c3SMichael Corcoran mrp[0].mr_msize = padding; 1784*0616c1c3SMichael Corcoran mrp[0].mr_fsize = 0; 1785*0616c1c3SMichael Corcoran mrp[0].mr_offset = 0; 1786*0616c1c3SMichael Corcoran mrp[0].mr_prot = 0; 1787*0616c1c3SMichael Corcoran mrp[0].mr_flags = MR_PADDING; 1788*0616c1c3SMichael Corcoran 1789*0616c1c3SMichael Corcoran /* Setup padding for the last segment */ 1790*0616c1c3SMichael Corcoran ASSERT(current == loadable - 1); 1791*0616c1c3SMichael Corcoran mrp[current].mr_addr = (caddr_t)lib_len + padding; 1792*0616c1c3SMichael Corcoran mrp[current].mr_msize = padding; 1793*0616c1c3SMichael Corcoran mrp[current].mr_fsize = 0; 1794*0616c1c3SMichael Corcoran mrp[current].mr_offset = 0; 1795*0616c1c3SMichael Corcoran mrp[current].mr_prot = 0; 1796*0616c1c3SMichael Corcoran mrp[current].mr_flags = MR_PADDING; 1797*0616c1c3SMichael Corcoran } 1798*0616c1c3SMichael Corcoran 1799*0616c1c3SMichael Corcoran /* 1800*0616c1c3SMichael Corcoran * Need to make sure address ranges desired are not in use or 1801*0616c1c3SMichael Corcoran * are previously allocated reservations from /dev/null. For 1802*0616c1c3SMichael Corcoran * ET_DYN, we already made sure our address range was free. 1803*0616c1c3SMichael Corcoran */ 1804*0616c1c3SMichael Corcoran if (e_type == ET_EXEC) { 1805*0616c1c3SMichael Corcoran ret = check_exec_addrs(loadable, mrp, start_addr); 1806*0616c1c3SMichael Corcoran if (ret != 0) { 1807*0616c1c3SMichael Corcoran ASSERT(lvp == NULL); 1808*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(check_exec_failed); 1809*0616c1c3SMichael Corcoran return (ret); 1810*0616c1c3SMichael Corcoran } 1811*0616c1c3SMichael Corcoran } 1812*0616c1c3SMichael Corcoran 1813*0616c1c3SMichael Corcoran /* Finish up our business with lvp. */ 1814*0616c1c3SMichael Corcoran if (lvp) { 1815*0616c1c3SMichael Corcoran ASSERT(e_type == ET_DYN); 1816*0616c1c3SMichael Corcoran if (lvp->lv_num_segs == 0 && loadable <= LIBVA_CACHED_SEGS) { 1817*0616c1c3SMichael Corcoran bcopy(mrp, lvp->lv_mps, 1818*0616c1c3SMichael Corcoran loadable * sizeof (mmapobj_result_t)); 1819*0616c1c3SMichael Corcoran membar_producer(); 1820*0616c1c3SMichael Corcoran } 1821*0616c1c3SMichael Corcoran /* 1822*0616c1c3SMichael Corcoran * Setting lv_num_segs to a non-zero value indicates that 1823*0616c1c3SMichael Corcoran * lv_mps is now valid and can be used by other threads. 1824*0616c1c3SMichael Corcoran * So, the above stores need to finish before lv_num_segs 1825*0616c1c3SMichael Corcoran * is updated. lv_mps is only valid if lv_num_segs is 1826*0616c1c3SMichael Corcoran * greater than LIBVA_CACHED_SEGS. 1827*0616c1c3SMichael Corcoran */ 1828*0616c1c3SMichael Corcoran lvp->lv_num_segs = loadable; 1829*0616c1c3SMichael Corcoran lib_va_release(lvp); 1830*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(lvp_used); 1831*0616c1c3SMichael Corcoran } 1832*0616c1c3SMichael Corcoran 1833*0616c1c3SMichael Corcoran /* Now that we have mrp completely filled out go map it */ 1834*0616c1c3SMichael Corcoran ret = mmapobj_map_elf(vp, start_addr, mrp, loadable, fcred, e_type); 1835*0616c1c3SMichael Corcoran if (ret == 0) { 1836*0616c1c3SMichael Corcoran *num_mapped = loadable; 1837*0616c1c3SMichael Corcoran } 1838*0616c1c3SMichael Corcoran 1839*0616c1c3SMichael Corcoran return (ret); 1840*0616c1c3SMichael Corcoran } 1841*0616c1c3SMichael Corcoran 1842*0616c1c3SMichael Corcoran /* 1843*0616c1c3SMichael Corcoran * Take the ELF file passed in, and do the work of mapping it. 1844*0616c1c3SMichael Corcoran * num_mapped in - # elements in user buffer 1845*0616c1c3SMichael Corcoran * num_mapped out - # sections mapped and length of mrp array if 1846*0616c1c3SMichael Corcoran * no errors. 1847*0616c1c3SMichael Corcoran */ 1848*0616c1c3SMichael Corcoran static int 1849*0616c1c3SMichael Corcoran doelfwork(Ehdr *ehdrp, vnode_t *vp, mmapobj_result_t *mrp, 1850*0616c1c3SMichael Corcoran uint_t *num_mapped, size_t padding, cred_t *fcred) 1851*0616c1c3SMichael Corcoran { 1852*0616c1c3SMichael Corcoran int error; 1853*0616c1c3SMichael Corcoran offset_t phoff; 1854*0616c1c3SMichael Corcoran int nphdrs; 1855*0616c1c3SMichael Corcoran unsigned char ei_class; 1856*0616c1c3SMichael Corcoran unsigned short phentsize; 1857*0616c1c3SMichael Corcoran ssize_t phsizep; 1858*0616c1c3SMichael Corcoran caddr_t phbasep; 1859*0616c1c3SMichael Corcoran int to_map; 1860*0616c1c3SMichael Corcoran model_t model; 1861*0616c1c3SMichael Corcoran 1862*0616c1c3SMichael Corcoran ei_class = ehdrp->e_ident[EI_CLASS]; 1863*0616c1c3SMichael Corcoran model = get_udatamodel(); 1864*0616c1c3SMichael Corcoran if ((model == DATAMODEL_ILP32 && ei_class == ELFCLASS64) || 1865*0616c1c3SMichael Corcoran (model == DATAMODEL_LP64 && ei_class == ELFCLASS32)) { 1866*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(wrong_model); 1867*0616c1c3SMichael Corcoran return (ENOTSUP); 1868*0616c1c3SMichael Corcoran } 1869*0616c1c3SMichael Corcoran 1870*0616c1c3SMichael Corcoran /* Can't execute code from "noexec" mounted filesystem. */ 1871*0616c1c3SMichael Corcoran if (ehdrp->e_type == ET_EXEC && 1872*0616c1c3SMichael Corcoran (vp->v_vfsp->vfs_flag & VFS_NOEXEC) != 0) { 1873*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(noexec_fs); 1874*0616c1c3SMichael Corcoran return (EACCES); 1875*0616c1c3SMichael Corcoran } 1876*0616c1c3SMichael Corcoran 1877*0616c1c3SMichael Corcoran /* 1878*0616c1c3SMichael Corcoran * Relocatable and core files are mapped as a single flat file 1879*0616c1c3SMichael Corcoran * since no interpretation is done on them by mmapobj. 1880*0616c1c3SMichael Corcoran */ 1881*0616c1c3SMichael Corcoran if (ehdrp->e_type == ET_REL || ehdrp->e_type == ET_CORE) { 1882*0616c1c3SMichael Corcoran to_map = padding ? 3 : 1; 1883*0616c1c3SMichael Corcoran if (*num_mapped < to_map) { 1884*0616c1c3SMichael Corcoran *num_mapped = to_map; 1885*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(e2big_et_rel); 1886*0616c1c3SMichael Corcoran return (E2BIG); 1887*0616c1c3SMichael Corcoran } 1888*0616c1c3SMichael Corcoran error = mmapobj_map_flat(vp, mrp, padding, fcred); 1889*0616c1c3SMichael Corcoran if (error == 0) { 1890*0616c1c3SMichael Corcoran *num_mapped = to_map; 1891*0616c1c3SMichael Corcoran mrp[padding ? 1 : 0].mr_flags = MR_HDR_ELF; 1892*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(et_rel_mapped); 1893*0616c1c3SMichael Corcoran } 1894*0616c1c3SMichael Corcoran return (error); 1895*0616c1c3SMichael Corcoran } 1896*0616c1c3SMichael Corcoran 1897*0616c1c3SMichael Corcoran /* Check for an unknown ELF type */ 1898*0616c1c3SMichael Corcoran if (ehdrp->e_type != ET_EXEC && ehdrp->e_type != ET_DYN) { 1899*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(unknown_elf_type); 1900*0616c1c3SMichael Corcoran return (ENOTSUP); 1901*0616c1c3SMichael Corcoran } 1902*0616c1c3SMichael Corcoran 1903*0616c1c3SMichael Corcoran if (ei_class == ELFCLASS32) { 1904*0616c1c3SMichael Corcoran Elf32_Ehdr *e32hdr = (Elf32_Ehdr *)ehdrp; 1905*0616c1c3SMichael Corcoran ASSERT(model == DATAMODEL_ILP32); 1906*0616c1c3SMichael Corcoran nphdrs = e32hdr->e_phnum; 1907*0616c1c3SMichael Corcoran phentsize = e32hdr->e_phentsize; 1908*0616c1c3SMichael Corcoran if (phentsize < sizeof (Elf32_Phdr)) { 1909*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(phent32_too_small); 1910*0616c1c3SMichael Corcoran return (ENOTSUP); 1911*0616c1c3SMichael Corcoran } 1912*0616c1c3SMichael Corcoran phoff = e32hdr->e_phoff; 1913*0616c1c3SMichael Corcoran } else if (ei_class == ELFCLASS64) { 1914*0616c1c3SMichael Corcoran Elf64_Ehdr *e64hdr = (Elf64_Ehdr *)ehdrp; 1915*0616c1c3SMichael Corcoran ASSERT(model == DATAMODEL_LP64); 1916*0616c1c3SMichael Corcoran nphdrs = e64hdr->e_phnum; 1917*0616c1c3SMichael Corcoran phentsize = e64hdr->e_phentsize; 1918*0616c1c3SMichael Corcoran if (phentsize < sizeof (Elf64_Phdr)) { 1919*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(phent64_too_small); 1920*0616c1c3SMichael Corcoran return (ENOTSUP); 1921*0616c1c3SMichael Corcoran } 1922*0616c1c3SMichael Corcoran phoff = e64hdr->e_phoff; 1923*0616c1c3SMichael Corcoran } else { 1924*0616c1c3SMichael Corcoran /* fallthrough case for an invalid ELF class */ 1925*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(inval_elf_class); 1926*0616c1c3SMichael Corcoran return (ENOTSUP); 1927*0616c1c3SMichael Corcoran } 1928*0616c1c3SMichael Corcoran 1929*0616c1c3SMichael Corcoran /* 1930*0616c1c3SMichael Corcoran * nphdrs should only have this value for core files which are handled 1931*0616c1c3SMichael Corcoran * above as a single mapping. If other file types ever use this 1932*0616c1c3SMichael Corcoran * sentinel, then we'll add the support needed to handle this here. 1933*0616c1c3SMichael Corcoran */ 1934*0616c1c3SMichael Corcoran if (nphdrs == PN_XNUM) { 1935*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(too_many_phdrs); 1936*0616c1c3SMichael Corcoran return (ENOTSUP); 1937*0616c1c3SMichael Corcoran } 1938*0616c1c3SMichael Corcoran 1939*0616c1c3SMichael Corcoran phsizep = nphdrs * phentsize; 1940*0616c1c3SMichael Corcoran 1941*0616c1c3SMichael Corcoran if (phsizep == 0) { 1942*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(no_phsize); 1943*0616c1c3SMichael Corcoran return (ENOTSUP); 1944*0616c1c3SMichael Corcoran } 1945*0616c1c3SMichael Corcoran 1946*0616c1c3SMichael Corcoran /* Make sure we only wait for memory if it's a reasonable request */ 1947*0616c1c3SMichael Corcoran if (phsizep > mmapobj_alloc_threshold) { 1948*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(phsize_large); 1949*0616c1c3SMichael Corcoran if ((phbasep = kmem_alloc(phsizep, KM_NOSLEEP)) == NULL) { 1950*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(phsize_xtralarge); 1951*0616c1c3SMichael Corcoran return (ENOMEM); 1952*0616c1c3SMichael Corcoran } 1953*0616c1c3SMichael Corcoran } else { 1954*0616c1c3SMichael Corcoran phbasep = kmem_alloc(phsizep, KM_SLEEP); 1955*0616c1c3SMichael Corcoran } 1956*0616c1c3SMichael Corcoran 1957*0616c1c3SMichael Corcoran if ((error = vn_rdwr(UIO_READ, vp, phbasep, phsizep, 1958*0616c1c3SMichael Corcoran (offset_t)phoff, UIO_SYSSPACE, 0, (rlim64_t)0, 1959*0616c1c3SMichael Corcoran fcred, NULL)) != 0) { 1960*0616c1c3SMichael Corcoran kmem_free(phbasep, phsizep); 1961*0616c1c3SMichael Corcoran return (error); 1962*0616c1c3SMichael Corcoran } 1963*0616c1c3SMichael Corcoran 1964*0616c1c3SMichael Corcoran /* Now process the phdr's */ 1965*0616c1c3SMichael Corcoran error = process_phdr(ehdrp, phbasep, nphdrs, mrp, vp, num_mapped, 1966*0616c1c3SMichael Corcoran padding, fcred); 1967*0616c1c3SMichael Corcoran kmem_free(phbasep, phsizep); 1968*0616c1c3SMichael Corcoran return (error); 1969*0616c1c3SMichael Corcoran } 1970*0616c1c3SMichael Corcoran 1971*0616c1c3SMichael Corcoran #if defined(__sparc) 1972*0616c1c3SMichael Corcoran /* 1973*0616c1c3SMichael Corcoran * Hack to support 64 bit kernels running AOUT 4.x programs. 1974*0616c1c3SMichael Corcoran * This is the sizeof (struct nlist) for a 32 bit kernel. 1975*0616c1c3SMichael Corcoran * Since AOUT programs are 32 bit only, they will never use the 64 bit 1976*0616c1c3SMichael Corcoran * sizeof (struct nlist) and thus creating a #define is the simplest 1977*0616c1c3SMichael Corcoran * way around this since this is a format which is not being updated. 1978*0616c1c3SMichael Corcoran * This will be used in the place of sizeof (struct nlist) below. 1979*0616c1c3SMichael Corcoran */ 1980*0616c1c3SMichael Corcoran #define NLIST_SIZE (0xC) 1981*0616c1c3SMichael Corcoran 1982*0616c1c3SMichael Corcoran static int 1983*0616c1c3SMichael Corcoran doaoutwork(vnode_t *vp, mmapobj_result_t *mrp, 1984*0616c1c3SMichael Corcoran uint_t *num_mapped, struct exec *hdr, cred_t *fcred) 1985*0616c1c3SMichael Corcoran { 1986*0616c1c3SMichael Corcoran int error; 1987*0616c1c3SMichael Corcoran size_t size; 1988*0616c1c3SMichael Corcoran size_t osize; 1989*0616c1c3SMichael Corcoran size_t nsize; /* nlist size */ 1990*0616c1c3SMichael Corcoran size_t msize; 1991*0616c1c3SMichael Corcoran size_t zfoddiff; 1992*0616c1c3SMichael Corcoran caddr_t addr; 1993*0616c1c3SMichael Corcoran caddr_t start_addr; 1994*0616c1c3SMichael Corcoran struct as *as = curproc->p_as; 1995*0616c1c3SMichael Corcoran int prot = PROT_USER | PROT_READ | PROT_EXEC; 1996*0616c1c3SMichael Corcoran uint_t mflag = MAP_PRIVATE | _MAP_LOW32; 1997*0616c1c3SMichael Corcoran offset_t off = 0; 1998*0616c1c3SMichael Corcoran int segnum = 0; 1999*0616c1c3SMichael Corcoran uint_t to_map; 2000*0616c1c3SMichael Corcoran int is_library = 0; 2001*0616c1c3SMichael Corcoran struct segvn_crargs crargs = SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL); 2002*0616c1c3SMichael Corcoran 2003*0616c1c3SMichael Corcoran /* Only 32bit apps supported by this file format */ 2004*0616c1c3SMichael Corcoran if (get_udatamodel() != DATAMODEL_ILP32) { 2005*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(aout_64bit_try); 2006*0616c1c3SMichael Corcoran return (ENOTSUP); 2007*0616c1c3SMichael Corcoran } 2008*0616c1c3SMichael Corcoran 2009*0616c1c3SMichael Corcoran /* Check to see if this is a library */ 2010*0616c1c3SMichael Corcoran if (hdr->a_magic == ZMAGIC && hdr->a_entry < PAGESIZE) { 2011*0616c1c3SMichael Corcoran is_library = 1; 2012*0616c1c3SMichael Corcoran } 2013*0616c1c3SMichael Corcoran 2014*0616c1c3SMichael Corcoran /* Can't execute code from "noexec" mounted filesystem. */ 2015*0616c1c3SMichael Corcoran if (((vp->v_vfsp->vfs_flag & VFS_NOEXEC) != 0) && (is_library == 0)) { 2016*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(aout_noexec); 2017*0616c1c3SMichael Corcoran return (EACCES); 2018*0616c1c3SMichael Corcoran } 2019*0616c1c3SMichael Corcoran 2020*0616c1c3SMichael Corcoran /* 2021*0616c1c3SMichael Corcoran * There are 2 ways to calculate the mapped size of executable: 2022*0616c1c3SMichael Corcoran * 1) rounded text size + data size + bss size. 2023*0616c1c3SMichael Corcoran * 2) starting offset for text + text size + data size + text relocation 2024*0616c1c3SMichael Corcoran * size + data relocation size + room for nlist data structure. 2025*0616c1c3SMichael Corcoran * 2026*0616c1c3SMichael Corcoran * The larger of the two sizes will be used to map this binary. 2027*0616c1c3SMichael Corcoran */ 2028*0616c1c3SMichael Corcoran osize = P2ROUNDUP(hdr->a_text, PAGESIZE) + hdr->a_data + hdr->a_bss; 2029*0616c1c3SMichael Corcoran 2030*0616c1c3SMichael Corcoran off = hdr->a_magic == ZMAGIC ? 0 : sizeof (struct exec); 2031*0616c1c3SMichael Corcoran 2032*0616c1c3SMichael Corcoran nsize = off + hdr->a_text + hdr->a_data + hdr->a_trsize + 2033*0616c1c3SMichael Corcoran hdr->a_drsize + NLIST_SIZE; 2034*0616c1c3SMichael Corcoran 2035*0616c1c3SMichael Corcoran size = MAX(osize, nsize); 2036*0616c1c3SMichael Corcoran if (size != nsize) { 2037*0616c1c3SMichael Corcoran nsize = 0; 2038*0616c1c3SMichael Corcoran } 2039*0616c1c3SMichael Corcoran 2040*0616c1c3SMichael Corcoran /* 2041*0616c1c3SMichael Corcoran * 1 seg for text and 1 seg for initialized data. 2042*0616c1c3SMichael Corcoran * 1 seg for bss (if can't fit in leftover space of init data) 2043*0616c1c3SMichael Corcoran * 1 seg for nlist if needed. 2044*0616c1c3SMichael Corcoran */ 2045*0616c1c3SMichael Corcoran to_map = 2 + (nsize ? 1 : 0) + 2046*0616c1c3SMichael Corcoran (hdr->a_bss > PAGESIZE - P2PHASE(hdr->a_data, PAGESIZE) ? 1 : 0); 2047*0616c1c3SMichael Corcoran if (*num_mapped < to_map) { 2048*0616c1c3SMichael Corcoran *num_mapped = to_map; 2049*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(aout_e2big); 2050*0616c1c3SMichael Corcoran return (E2BIG); 2051*0616c1c3SMichael Corcoran } 2052*0616c1c3SMichael Corcoran 2053*0616c1c3SMichael Corcoran /* Reserve address space for the whole mapping */ 2054*0616c1c3SMichael Corcoran if (is_library) { 2055*0616c1c3SMichael Corcoran /* We'll let VOP_MAP below pick our address for us */ 2056*0616c1c3SMichael Corcoran addr = NULL; 2057*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(aout_lib); 2058*0616c1c3SMichael Corcoran } else { 2059*0616c1c3SMichael Corcoran /* 2060*0616c1c3SMichael Corcoran * default start address for fixed binaries from AOUT 4.x 2061*0616c1c3SMichael Corcoran * standard. 2062*0616c1c3SMichael Corcoran */ 2063*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(aout_fixed); 2064*0616c1c3SMichael Corcoran mflag |= MAP_FIXED; 2065*0616c1c3SMichael Corcoran addr = (caddr_t)0x2000; 2066*0616c1c3SMichael Corcoran as_rangelock(as); 2067*0616c1c3SMichael Corcoran if (as_gap(as, size, &addr, &size, 0, NULL) != 0) { 2068*0616c1c3SMichael Corcoran as_rangeunlock(as); 2069*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(aout_addr_in_use); 2070*0616c1c3SMichael Corcoran return (EADDRINUSE); 2071*0616c1c3SMichael Corcoran } 2072*0616c1c3SMichael Corcoran crargs.flags |= MAP_NORESERVE; 2073*0616c1c3SMichael Corcoran error = as_map(as, addr, size, segvn_create, &crargs); 2074*0616c1c3SMichael Corcoran ASSERT(addr == (caddr_t)0x2000); 2075*0616c1c3SMichael Corcoran as_rangeunlock(as); 2076*0616c1c3SMichael Corcoran } 2077*0616c1c3SMichael Corcoran 2078*0616c1c3SMichael Corcoran start_addr = addr; 2079*0616c1c3SMichael Corcoran osize = size; 2080*0616c1c3SMichael Corcoran 2081*0616c1c3SMichael Corcoran /* 2082*0616c1c3SMichael Corcoran * Map as large as we need, backed by file, this will be text, and 2083*0616c1c3SMichael Corcoran * possibly the nlist segment. We map over this mapping for bss and 2084*0616c1c3SMichael Corcoran * initialized data segments. 2085*0616c1c3SMichael Corcoran */ 2086*0616c1c3SMichael Corcoran error = VOP_MAP(vp, off, as, &addr, size, prot, PROT_ALL, 2087*0616c1c3SMichael Corcoran mflag, fcred, NULL); 2088*0616c1c3SMichael Corcoran if (error) { 2089*0616c1c3SMichael Corcoran if (!is_library) { 2090*0616c1c3SMichael Corcoran (void) as_unmap(as, start_addr, osize); 2091*0616c1c3SMichael Corcoran } 2092*0616c1c3SMichael Corcoran return (error); 2093*0616c1c3SMichael Corcoran } 2094*0616c1c3SMichael Corcoran 2095*0616c1c3SMichael Corcoran /* pickup the value of start_addr and osize for libraries */ 2096*0616c1c3SMichael Corcoran start_addr = addr; 2097*0616c1c3SMichael Corcoran osize = size; 2098*0616c1c3SMichael Corcoran 2099*0616c1c3SMichael Corcoran /* 2100*0616c1c3SMichael Corcoran * We have our initial reservation/allocation so we need to use fixed 2101*0616c1c3SMichael Corcoran * addresses from now on. 2102*0616c1c3SMichael Corcoran */ 2103*0616c1c3SMichael Corcoran mflag |= MAP_FIXED; 2104*0616c1c3SMichael Corcoran 2105*0616c1c3SMichael Corcoran mrp[0].mr_addr = addr; 2106*0616c1c3SMichael Corcoran mrp[0].mr_msize = hdr->a_text; 2107*0616c1c3SMichael Corcoran mrp[0].mr_fsize = hdr->a_text; 2108*0616c1c3SMichael Corcoran mrp[0].mr_offset = 0; 2109*0616c1c3SMichael Corcoran mrp[0].mr_prot = PROT_READ | PROT_EXEC; 2110*0616c1c3SMichael Corcoran mrp[0].mr_flags = MR_HDR_AOUT; 2111*0616c1c3SMichael Corcoran 2112*0616c1c3SMichael Corcoran 2113*0616c1c3SMichael Corcoran /* 2114*0616c1c3SMichael Corcoran * Map initialized data. We are mapping over a portion of the 2115*0616c1c3SMichael Corcoran * previous mapping which will be unmapped in VOP_MAP below. 2116*0616c1c3SMichael Corcoran */ 2117*0616c1c3SMichael Corcoran off = P2ROUNDUP((offset_t)(hdr->a_text), PAGESIZE); 2118*0616c1c3SMichael Corcoran msize = off; 2119*0616c1c3SMichael Corcoran addr += off; 2120*0616c1c3SMichael Corcoran size = hdr->a_data; 2121*0616c1c3SMichael Corcoran error = VOP_MAP(vp, off, as, &addr, size, PROT_ALL, PROT_ALL, 2122*0616c1c3SMichael Corcoran mflag, fcred, NULL); 2123*0616c1c3SMichael Corcoran if (error) { 2124*0616c1c3SMichael Corcoran (void) as_unmap(as, start_addr, osize); 2125*0616c1c3SMichael Corcoran return (error); 2126*0616c1c3SMichael Corcoran } 2127*0616c1c3SMichael Corcoran msize += size; 2128*0616c1c3SMichael Corcoran mrp[1].mr_addr = addr; 2129*0616c1c3SMichael Corcoran mrp[1].mr_msize = size; 2130*0616c1c3SMichael Corcoran mrp[1].mr_fsize = size; 2131*0616c1c3SMichael Corcoran mrp[1].mr_offset = 0; 2132*0616c1c3SMichael Corcoran mrp[1].mr_prot = PROT_READ | PROT_WRITE | PROT_EXEC; 2133*0616c1c3SMichael Corcoran mrp[1].mr_flags = 0; 2134*0616c1c3SMichael Corcoran 2135*0616c1c3SMichael Corcoran /* Need to zero out remainder of page */ 2136*0616c1c3SMichael Corcoran addr += hdr->a_data; 2137*0616c1c3SMichael Corcoran zfoddiff = P2PHASE((size_t)addr, PAGESIZE); 2138*0616c1c3SMichael Corcoran if (zfoddiff) { 2139*0616c1c3SMichael Corcoran label_t ljb; 2140*0616c1c3SMichael Corcoran 2141*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(aout_zfoddiff); 2142*0616c1c3SMichael Corcoran zfoddiff = PAGESIZE - zfoddiff; 2143*0616c1c3SMichael Corcoran if (on_fault(&ljb)) { 2144*0616c1c3SMichael Corcoran no_fault(); 2145*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(aout_uzero_fault); 2146*0616c1c3SMichael Corcoran (void) as_unmap(as, start_addr, osize); 2147*0616c1c3SMichael Corcoran return (EFAULT); 2148*0616c1c3SMichael Corcoran } 2149*0616c1c3SMichael Corcoran uzero(addr, zfoddiff); 2150*0616c1c3SMichael Corcoran no_fault(); 2151*0616c1c3SMichael Corcoran } 2152*0616c1c3SMichael Corcoran msize += zfoddiff; 2153*0616c1c3SMichael Corcoran segnum = 2; 2154*0616c1c3SMichael Corcoran 2155*0616c1c3SMichael Corcoran /* Map bss */ 2156*0616c1c3SMichael Corcoran if (hdr->a_bss > zfoddiff) { 2157*0616c1c3SMichael Corcoran struct segvn_crargs crargs = 2158*0616c1c3SMichael Corcoran SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL); 2159*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(aout_map_bss); 2160*0616c1c3SMichael Corcoran addr += zfoddiff; 2161*0616c1c3SMichael Corcoran size = hdr->a_bss - zfoddiff; 2162*0616c1c3SMichael Corcoran as_rangelock(as); 2163*0616c1c3SMichael Corcoran (void) as_unmap(as, addr, size); 2164*0616c1c3SMichael Corcoran error = as_map(as, addr, size, segvn_create, &crargs); 2165*0616c1c3SMichael Corcoran as_rangeunlock(as); 2166*0616c1c3SMichael Corcoran msize += size; 2167*0616c1c3SMichael Corcoran 2168*0616c1c3SMichael Corcoran if (error) { 2169*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(aout_bss_fail); 2170*0616c1c3SMichael Corcoran (void) as_unmap(as, start_addr, osize); 2171*0616c1c3SMichael Corcoran return (error); 2172*0616c1c3SMichael Corcoran } 2173*0616c1c3SMichael Corcoran mrp[2].mr_addr = addr; 2174*0616c1c3SMichael Corcoran mrp[2].mr_msize = size; 2175*0616c1c3SMichael Corcoran mrp[2].mr_fsize = 0; 2176*0616c1c3SMichael Corcoran mrp[2].mr_offset = 0; 2177*0616c1c3SMichael Corcoran mrp[2].mr_prot = PROT_READ | PROT_WRITE | PROT_EXEC; 2178*0616c1c3SMichael Corcoran mrp[2].mr_flags = 0; 2179*0616c1c3SMichael Corcoran 2180*0616c1c3SMichael Corcoran addr += size; 2181*0616c1c3SMichael Corcoran segnum = 3; 2182*0616c1c3SMichael Corcoran } 2183*0616c1c3SMichael Corcoran 2184*0616c1c3SMichael Corcoran /* 2185*0616c1c3SMichael Corcoran * If we have extra bits left over, we need to include that in how 2186*0616c1c3SMichael Corcoran * much we mapped to make sure the nlist logic is correct 2187*0616c1c3SMichael Corcoran */ 2188*0616c1c3SMichael Corcoran msize = P2ROUNDUP(msize, PAGESIZE); 2189*0616c1c3SMichael Corcoran 2190*0616c1c3SMichael Corcoran if (nsize && msize < nsize) { 2191*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(aout_nlist); 2192*0616c1c3SMichael Corcoran mrp[segnum].mr_addr = addr; 2193*0616c1c3SMichael Corcoran mrp[segnum].mr_msize = nsize - msize; 2194*0616c1c3SMichael Corcoran mrp[segnum].mr_fsize = 0; 2195*0616c1c3SMichael Corcoran mrp[segnum].mr_offset = 0; 2196*0616c1c3SMichael Corcoran mrp[segnum].mr_prot = PROT_READ | PROT_EXEC; 2197*0616c1c3SMichael Corcoran mrp[segnum].mr_flags = 0; 2198*0616c1c3SMichael Corcoran } 2199*0616c1c3SMichael Corcoran 2200*0616c1c3SMichael Corcoran *num_mapped = to_map; 2201*0616c1c3SMichael Corcoran return (0); 2202*0616c1c3SMichael Corcoran } 2203*0616c1c3SMichael Corcoran #endif 2204*0616c1c3SMichael Corcoran 2205*0616c1c3SMichael Corcoran /* 2206*0616c1c3SMichael Corcoran * These are the two types of files that we can interpret and we want to read 2207*0616c1c3SMichael Corcoran * in enough info to cover both types when looking at the initial header. 2208*0616c1c3SMichael Corcoran */ 2209*0616c1c3SMichael Corcoran #define MAX_HEADER_SIZE (MAX(sizeof (Ehdr), sizeof (struct exec))) 2210*0616c1c3SMichael Corcoran 2211*0616c1c3SMichael Corcoran /* 2212*0616c1c3SMichael Corcoran * Map vp passed in in an interpreted manner. ELF and AOUT files will be 2213*0616c1c3SMichael Corcoran * interpreted and mapped appropriately for execution. 2214*0616c1c3SMichael Corcoran * num_mapped in - # elements in mrp 2215*0616c1c3SMichael Corcoran * num_mapped out - # sections mapped and length of mrp array if 2216*0616c1c3SMichael Corcoran * no errors or E2BIG returned. 2217*0616c1c3SMichael Corcoran * 2218*0616c1c3SMichael Corcoran * Returns 0 on success, errno value on failure. 2219*0616c1c3SMichael Corcoran */ 2220*0616c1c3SMichael Corcoran static int 2221*0616c1c3SMichael Corcoran mmapobj_map_interpret(vnode_t *vp, mmapobj_result_t *mrp, 2222*0616c1c3SMichael Corcoran uint_t *num_mapped, size_t padding, cred_t *fcred) 2223*0616c1c3SMichael Corcoran { 2224*0616c1c3SMichael Corcoran int error = 0; 2225*0616c1c3SMichael Corcoran vattr_t vattr; 2226*0616c1c3SMichael Corcoran struct lib_va *lvp; 2227*0616c1c3SMichael Corcoran caddr_t start_addr; 2228*0616c1c3SMichael Corcoran model_t model; 2229*0616c1c3SMichael Corcoran 2230*0616c1c3SMichael Corcoran /* 2231*0616c1c3SMichael Corcoran * header has to be aligned to the native size of ulong_t in order 2232*0616c1c3SMichael Corcoran * to avoid an unaligned access when dereferencing the header as 2233*0616c1c3SMichael Corcoran * a ulong_t. Thus we allocate our array on the stack of type 2234*0616c1c3SMichael Corcoran * ulong_t and then have header, which we dereference later as a char 2235*0616c1c3SMichael Corcoran * array point at lheader. 2236*0616c1c3SMichael Corcoran */ 2237*0616c1c3SMichael Corcoran ulong_t lheader[(MAX_HEADER_SIZE / (sizeof (ulong_t))) + 1]; 2238*0616c1c3SMichael Corcoran caddr_t header = (caddr_t)&lheader; 2239*0616c1c3SMichael Corcoran 2240*0616c1c3SMichael Corcoran vattr.va_mask = AT_FSID | AT_NODEID | AT_CTIME | AT_MTIME | AT_SIZE; 2241*0616c1c3SMichael Corcoran error = VOP_GETATTR(vp, &vattr, 0, fcred, NULL); 2242*0616c1c3SMichael Corcoran if (error) { 2243*0616c1c3SMichael Corcoran return (error); 2244*0616c1c3SMichael Corcoran } 2245*0616c1c3SMichael Corcoran 2246*0616c1c3SMichael Corcoran /* 2247*0616c1c3SMichael Corcoran * Check lib_va to see if we already have a full description 2248*0616c1c3SMichael Corcoran * for this library. This is the fast path and only used for 2249*0616c1c3SMichael Corcoran * ET_DYN ELF files (dynamic libraries). 2250*0616c1c3SMichael Corcoran */ 2251*0616c1c3SMichael Corcoran if (padding == 0 && (lvp = lib_va_find(&vattr)) != NULL) { 2252*0616c1c3SMichael Corcoran int num_segs; 2253*0616c1c3SMichael Corcoran 2254*0616c1c3SMichael Corcoran model = get_udatamodel(); 2255*0616c1c3SMichael Corcoran if ((model == DATAMODEL_ILP32 && 2256*0616c1c3SMichael Corcoran lvp->lv_flags & LV_ELF64) || 2257*0616c1c3SMichael Corcoran (model == DATAMODEL_LP64 && 2258*0616c1c3SMichael Corcoran lvp->lv_flags & LV_ELF32)) { 2259*0616c1c3SMichael Corcoran lib_va_release(lvp); 2260*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(fast_wrong_model); 2261*0616c1c3SMichael Corcoran return (ENOTSUP); 2262*0616c1c3SMichael Corcoran } 2263*0616c1c3SMichael Corcoran num_segs = lvp->lv_num_segs; 2264*0616c1c3SMichael Corcoran if (*num_mapped < num_segs) { 2265*0616c1c3SMichael Corcoran *num_mapped = num_segs; 2266*0616c1c3SMichael Corcoran lib_va_release(lvp); 2267*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(fast_e2big); 2268*0616c1c3SMichael Corcoran return (E2BIG); 2269*0616c1c3SMichael Corcoran } 2270*0616c1c3SMichael Corcoran 2271*0616c1c3SMichael Corcoran /* 2272*0616c1c3SMichael Corcoran * Check to see if we have all the mappable program headers 2273*0616c1c3SMichael Corcoran * cached. 2274*0616c1c3SMichael Corcoran */ 2275*0616c1c3SMichael Corcoran if (num_segs <= LIBVA_CACHED_SEGS && num_segs != 0) { 2276*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(fast); 2277*0616c1c3SMichael Corcoran start_addr = mmapobj_lookup_start_addr(lvp); 2278*0616c1c3SMichael Corcoran if (start_addr == NULL) { 2279*0616c1c3SMichael Corcoran lib_va_release(lvp); 2280*0616c1c3SMichael Corcoran return (ENOMEM); 2281*0616c1c3SMichael Corcoran } 2282*0616c1c3SMichael Corcoran 2283*0616c1c3SMichael Corcoran bcopy(lvp->lv_mps, mrp, 2284*0616c1c3SMichael Corcoran num_segs * sizeof (mmapobj_result_t)); 2285*0616c1c3SMichael Corcoran 2286*0616c1c3SMichael Corcoran error = mmapobj_map_elf(vp, start_addr, mrp, 2287*0616c1c3SMichael Corcoran num_segs, fcred, ET_DYN); 2288*0616c1c3SMichael Corcoran 2289*0616c1c3SMichael Corcoran lib_va_release(lvp); 2290*0616c1c3SMichael Corcoran if (error == 0) { 2291*0616c1c3SMichael Corcoran *num_mapped = num_segs; 2292*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(fast_success); 2293*0616c1c3SMichael Corcoran } 2294*0616c1c3SMichael Corcoran return (error); 2295*0616c1c3SMichael Corcoran } 2296*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(fast_not_now); 2297*0616c1c3SMichael Corcoran 2298*0616c1c3SMichael Corcoran /* Release it for now since we'll look it up below */ 2299*0616c1c3SMichael Corcoran lib_va_release(lvp); 2300*0616c1c3SMichael Corcoran } 2301*0616c1c3SMichael Corcoran 2302*0616c1c3SMichael Corcoran /* 2303*0616c1c3SMichael Corcoran * Time to see if this is a file we can interpret. If it's smaller 2304*0616c1c3SMichael Corcoran * than this, then we can't interpret it. 2305*0616c1c3SMichael Corcoran */ 2306*0616c1c3SMichael Corcoran if (vattr.va_size < MAX_HEADER_SIZE) { 2307*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(small_file); 2308*0616c1c3SMichael Corcoran return (ENOTSUP); 2309*0616c1c3SMichael Corcoran } 2310*0616c1c3SMichael Corcoran 2311*0616c1c3SMichael Corcoran if ((error = vn_rdwr(UIO_READ, vp, header, MAX_HEADER_SIZE, 0, 2312*0616c1c3SMichael Corcoran UIO_SYSSPACE, 0, (rlim64_t)0, fcred, NULL)) != 0) { 2313*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(read_error); 2314*0616c1c3SMichael Corcoran return (error); 2315*0616c1c3SMichael Corcoran } 2316*0616c1c3SMichael Corcoran 2317*0616c1c3SMichael Corcoran /* Verify file type */ 2318*0616c1c3SMichael Corcoran if (header[EI_MAG0] == ELFMAG0 && header[EI_MAG1] == ELFMAG1 && 2319*0616c1c3SMichael Corcoran header[EI_MAG2] == ELFMAG2 && header[EI_MAG3] == ELFMAG3) { 2320*0616c1c3SMichael Corcoran return (doelfwork((Ehdr *)lheader, vp, mrp, num_mapped, 2321*0616c1c3SMichael Corcoran padding, fcred)); 2322*0616c1c3SMichael Corcoran } 2323*0616c1c3SMichael Corcoran 2324*0616c1c3SMichael Corcoran #if defined(__sparc) 2325*0616c1c3SMichael Corcoran /* On sparc, check for 4.X AOUT format */ 2326*0616c1c3SMichael Corcoran switch (((struct exec *)header)->a_magic) { 2327*0616c1c3SMichael Corcoran case OMAGIC: 2328*0616c1c3SMichael Corcoran case ZMAGIC: 2329*0616c1c3SMichael Corcoran case NMAGIC: 2330*0616c1c3SMichael Corcoran return (doaoutwork(vp, mrp, num_mapped, 2331*0616c1c3SMichael Corcoran (struct exec *)lheader, fcred)); 2332*0616c1c3SMichael Corcoran } 2333*0616c1c3SMichael Corcoran #endif 2334*0616c1c3SMichael Corcoran 2335*0616c1c3SMichael Corcoran /* Unsupported type */ 2336*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(unsupported); 2337*0616c1c3SMichael Corcoran return (ENOTSUP); 2338*0616c1c3SMichael Corcoran } 2339*0616c1c3SMichael Corcoran 2340*0616c1c3SMichael Corcoran /* 2341*0616c1c3SMichael Corcoran * Given a vnode, map it as either a flat file or interpret it and map 2342*0616c1c3SMichael Corcoran * it according to the rules of the file type. 2343*0616c1c3SMichael Corcoran * *num_mapped will contain the size of the mmapobj_result_t array passed in. 2344*0616c1c3SMichael Corcoran * If padding is non-zero, the mappings will be padded by that amount 2345*0616c1c3SMichael Corcoran * rounded up to the nearest pagesize. 2346*0616c1c3SMichael Corcoran * If the mapping is successful, *num_mapped will contain the number of 2347*0616c1c3SMichael Corcoran * distinct mappings created, and mrp will point to the array of 2348*0616c1c3SMichael Corcoran * mmapobj_result_t's which describe these mappings. 2349*0616c1c3SMichael Corcoran * 2350*0616c1c3SMichael Corcoran * On error, -1 is returned and errno is set appropriately. 2351*0616c1c3SMichael Corcoran * A special error case will set errno to E2BIG when there are more than 2352*0616c1c3SMichael Corcoran * *num_mapped mappings to be created and *num_mapped will be set to the 2353*0616c1c3SMichael Corcoran * number of mappings needed. 2354*0616c1c3SMichael Corcoran */ 2355*0616c1c3SMichael Corcoran int 2356*0616c1c3SMichael Corcoran mmapobj(vnode_t *vp, uint_t flags, mmapobj_result_t *mrp, 2357*0616c1c3SMichael Corcoran uint_t *num_mapped, size_t padding, cred_t *fcred) 2358*0616c1c3SMichael Corcoran { 2359*0616c1c3SMichael Corcoran int to_map; 2360*0616c1c3SMichael Corcoran int error = 0; 2361*0616c1c3SMichael Corcoran 2362*0616c1c3SMichael Corcoran ASSERT((padding & PAGEOFFSET) == 0); 2363*0616c1c3SMichael Corcoran ASSERT((flags & ~MMOBJ_ALL_FLAGS) == 0); 2364*0616c1c3SMichael Corcoran ASSERT(num_mapped != NULL); 2365*0616c1c3SMichael Corcoran ASSERT((flags & MMOBJ_PADDING) ? padding != 0 : padding == 0); 2366*0616c1c3SMichael Corcoran 2367*0616c1c3SMichael Corcoran if ((flags & MMOBJ_INTERPRET) == 0) { 2368*0616c1c3SMichael Corcoran to_map = padding ? 3 : 1; 2369*0616c1c3SMichael Corcoran if (*num_mapped < to_map) { 2370*0616c1c3SMichael Corcoran *num_mapped = to_map; 2371*0616c1c3SMichael Corcoran MOBJ_STAT_ADD(flat_e2big); 2372*0616c1c3SMichael Corcoran return (E2BIG); 2373*0616c1c3SMichael Corcoran } 2374*0616c1c3SMichael Corcoran error = mmapobj_map_flat(vp, mrp, padding, fcred); 2375*0616c1c3SMichael Corcoran 2376*0616c1c3SMichael Corcoran if (error) { 2377*0616c1c3SMichael Corcoran return (error); 2378*0616c1c3SMichael Corcoran } 2379*0616c1c3SMichael Corcoran *num_mapped = to_map; 2380*0616c1c3SMichael Corcoran return (0); 2381*0616c1c3SMichael Corcoran } 2382*0616c1c3SMichael Corcoran 2383*0616c1c3SMichael Corcoran error = mmapobj_map_interpret(vp, mrp, num_mapped, padding, fcred); 2384*0616c1c3SMichael Corcoran return (error); 2385*0616c1c3SMichael Corcoran } 2386