1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 #include <sys/kmem.h>
26 #include <sys/errno.h>
27 #include <sys/systm.h>
28 #include <sys/cmn_err.h>
29 #include <sys/brand.h>
30 #include <sys/machbrand.h>
31 #include <sys/modctl.h>
32 #include <sys/rwlock.h>
33 #include <sys/zone.h>
34 #include <sys/pathname.h>
35
36 #define SUPPORTED_BRAND_VERSION BRAND_VER_1
37
38 #if defined(__sparcv9)
39 /* sparcv9 uses system wide brand interposition hooks */
40 static void brand_plat_interposition_enable(void);
41 static void brand_plat_interposition_disable(void);
42
43 struct brand_mach_ops native_mach_ops = {
44 NULL, NULL
45 };
46 #else /* !__sparcv9 */
47 struct brand_mach_ops native_mach_ops = {
48 NULL, NULL, NULL, NULL
49 };
50 #endif /* !__sparcv9 */
51
52 brand_t native_brand = {
53 BRAND_VER_1,
54 "native",
55 NULL,
56 &native_mach_ops
57 };
58
59 /*
60 * Used to maintain a list of all the brands currently loaded into the
61 * kernel.
62 */
63 struct brand_list {
64 int bl_refcnt;
65 struct brand_list *bl_next;
66 brand_t *bl_brand;
67 };
68
69 static struct brand_list *brand_list = NULL;
70
71 /*
72 * This lock protects the integrity of the brand list.
73 */
74 static kmutex_t brand_list_lock;
75
76 void
brand_init()77 brand_init()
78 {
79 mutex_init(&brand_list_lock, NULL, MUTEX_DEFAULT, NULL);
80 p0.p_brand = &native_brand;
81 }
82
83 int
brand_register(brand_t * brand)84 brand_register(brand_t *brand)
85 {
86 struct brand_list *list, *scan;
87
88 if (brand == NULL)
89 return (EINVAL);
90
91 if (brand->b_version != SUPPORTED_BRAND_VERSION) {
92 if (brand->b_version < SUPPORTED_BRAND_VERSION) {
93 cmn_err(CE_WARN,
94 "brand '%s' was built to run on older versions "
95 "of Solaris.",
96 brand->b_name);
97 } else {
98 cmn_err(CE_WARN,
99 "brand '%s' was built to run on a newer version "
100 "of Solaris.",
101 brand->b_name);
102 }
103 return (EINVAL);
104 }
105
106 /* Sanity checks */
107 if (brand->b_name == NULL || brand->b_ops == NULL ||
108 brand->b_ops->b_brandsys == NULL) {
109 cmn_err(CE_WARN, "Malformed brand");
110 return (EINVAL);
111 }
112
113 list = kmem_alloc(sizeof (struct brand_list), KM_SLEEP);
114
115 /* Add the brand to the list of loaded brands. */
116 mutex_enter(&brand_list_lock);
117
118 /*
119 * Check to be sure we haven't already registered this brand.
120 */
121 for (scan = brand_list; scan != NULL; scan = scan->bl_next) {
122 if (strcmp(brand->b_name, scan->bl_brand->b_name) == 0) {
123 cmn_err(CE_WARN,
124 "Invalid attempt to load a second instance of "
125 "brand %s", brand->b_name);
126 mutex_exit(&brand_list_lock);
127 kmem_free(list, sizeof (struct brand_list));
128 return (EINVAL);
129 }
130 }
131
132 #if defined(__sparcv9)
133 /* sparcv9 uses system wide brand interposition hooks */
134 if (brand_list == NULL)
135 brand_plat_interposition_enable();
136 #endif /* __sparcv9 */
137
138 list->bl_brand = brand;
139 list->bl_refcnt = 0;
140 list->bl_next = brand_list;
141 brand_list = list;
142
143 mutex_exit(&brand_list_lock);
144
145 return (0);
146 }
147
148 /*
149 * The kernel module implementing this brand is being unloaded, so remove
150 * it from the list of active brands.
151 */
152 int
brand_unregister(brand_t * brand)153 brand_unregister(brand_t *brand)
154 {
155 struct brand_list *list, *prev;
156
157 /* Sanity checks */
158 if (brand == NULL || brand->b_name == NULL) {
159 cmn_err(CE_WARN, "Malformed brand");
160 return (EINVAL);
161 }
162
163 prev = NULL;
164 mutex_enter(&brand_list_lock);
165
166 for (list = brand_list; list != NULL; list = list->bl_next) {
167 if (list->bl_brand == brand)
168 break;
169 prev = list;
170 }
171
172 if (list == NULL) {
173 cmn_err(CE_WARN, "Brand %s wasn't registered", brand->b_name);
174 mutex_exit(&brand_list_lock);
175 return (EINVAL);
176 }
177
178 if (list->bl_refcnt > 0) {
179 cmn_err(CE_WARN, "Unregistering brand %s which is still in use",
180 brand->b_name);
181 mutex_exit(&brand_list_lock);
182 return (EBUSY);
183 }
184
185 /* Remove brand from the list */
186 if (prev != NULL)
187 prev->bl_next = list->bl_next;
188 else
189 brand_list = list->bl_next;
190
191 #if defined(__sparcv9)
192 /* sparcv9 uses system wide brand interposition hooks */
193 if (brand_list == NULL)
194 brand_plat_interposition_disable();
195 #endif /* __sparcv9 */
196
197 mutex_exit(&brand_list_lock);
198
199 kmem_free(list, sizeof (struct brand_list));
200
201 return (0);
202 }
203
204 /*
205 * Record that a zone of this brand has been instantiated. If the kernel
206 * module implementing this brand's functionality is not present, this
207 * routine attempts to load the module as a side effect.
208 */
209 brand_t *
brand_register_zone(struct brand_attr * attr)210 brand_register_zone(struct brand_attr *attr)
211 {
212 struct brand_list *l = NULL;
213 ddi_modhandle_t hdl = NULL;
214 char *modname;
215 int err = 0;
216
217 if (is_system_labeled()) {
218 cmn_err(CE_WARN,
219 "Branded zones are not allowed on labeled systems.");
220 return (NULL);
221 }
222
223 /*
224 * We make at most two passes through this loop. The first time
225 * through, we're looking to see if this is a new user of an
226 * already loaded brand. If the brand hasn't been loaded, we
227 * call ddi_modopen() to force it to be loaded and then make a
228 * second pass through the list of brands. If we don't find the
229 * brand the second time through it means that the modname
230 * specified in the brand_attr structure doesn't provide the brand
231 * specified in the brandname field. This would suggest a bug in
232 * the brand's config.xml file. We close the module and return
233 * 'NULL' to the caller.
234 */
235 for (;;) {
236 /*
237 * Search list of loaded brands
238 */
239 mutex_enter(&brand_list_lock);
240 for (l = brand_list; l != NULL; l = l->bl_next)
241 if (strcmp(attr->ba_brandname,
242 l->bl_brand->b_name) == 0)
243 break;
244 if ((l != NULL) || (hdl != NULL))
245 break;
246 mutex_exit(&brand_list_lock);
247
248 /*
249 * We didn't find that the requested brand has been loaded
250 * yet, so we trigger the load of the appropriate kernel
251 * module and search the list again.
252 */
253 modname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
254 (void) strcpy(modname, "brand/");
255 (void) strcat(modname, attr->ba_modname);
256 hdl = ddi_modopen(modname, KRTLD_MODE_FIRST, &err);
257 kmem_free(modname, MAXPATHLEN);
258
259 if (err != 0)
260 return (NULL);
261 }
262
263 /*
264 * If we found the matching brand, bump its reference count.
265 */
266 if (l != NULL)
267 l->bl_refcnt++;
268
269 mutex_exit(&brand_list_lock);
270
271 if (hdl != NULL)
272 (void) ddi_modclose(hdl);
273
274 return ((l != NULL) ? l->bl_brand : NULL);
275 }
276
277 /*
278 * Return the number of zones currently using this brand.
279 */
280 int
brand_zone_count(struct brand * bp)281 brand_zone_count(struct brand *bp)
282 {
283 struct brand_list *l;
284 int cnt = 0;
285
286 mutex_enter(&brand_list_lock);
287 for (l = brand_list; l != NULL; l = l->bl_next)
288 if (l->bl_brand == bp) {
289 cnt = l->bl_refcnt;
290 break;
291 }
292 mutex_exit(&brand_list_lock);
293
294 return (cnt);
295 }
296
297 void
brand_unregister_zone(struct brand * bp)298 brand_unregister_zone(struct brand *bp)
299 {
300 struct brand_list *list;
301
302 mutex_enter(&brand_list_lock);
303 for (list = brand_list; list != NULL; list = list->bl_next) {
304 if (list->bl_brand == bp) {
305 ASSERT(list->bl_refcnt > 0);
306 list->bl_refcnt--;
307 break;
308 }
309 }
310 mutex_exit(&brand_list_lock);
311 }
312
313 void
brand_setbrand(proc_t * p)314 brand_setbrand(proc_t *p)
315 {
316 brand_t *bp = p->p_zone->zone_brand;
317
318 ASSERT(bp != NULL);
319 ASSERT(p->p_brand == &native_brand);
320
321 /*
322 * We should only be called from exec(), when we know the process
323 * is single-threaded.
324 */
325 ASSERT(p->p_tlist == p->p_tlist->t_forw);
326
327 p->p_brand = bp;
328 ASSERT(PROC_IS_BRANDED(p));
329 BROP(p)->b_setbrand(p);
330 }
331
332 void
brand_clearbrand(proc_t * p,boolean_t no_lwps)333 brand_clearbrand(proc_t *p, boolean_t no_lwps)
334 {
335 brand_t *bp = p->p_zone->zone_brand;
336 klwp_t *lwp = NULL;
337 ASSERT(bp != NULL);
338 ASSERT(!no_lwps || (p->p_tlist == NULL));
339
340 /*
341 * If called from exec_common() or proc_exit(),
342 * we know the process is single-threaded.
343 * If called from fork_fail, p_tlist is NULL.
344 */
345 if (!no_lwps) {
346 ASSERT(p->p_tlist == p->p_tlist->t_forw);
347 lwp = p->p_tlist->t_lwp;
348 }
349
350 ASSERT(PROC_IS_BRANDED(p));
351 BROP(p)->b_proc_exit(p, lwp);
352 p->p_brand = &native_brand;
353 }
354
355 #if defined(__sparcv9)
356 /*
357 * Currently, only sparc has system level brand syscall interposition.
358 * On x86 we're able to enable syscall interposition on a per-cpu basis
359 * when a branded thread is scheduled to run on a cpu.
360 */
361
362 /* Local variables needed for dynamic syscall interposition support */
363 static uint32_t syscall_trap_patch_instr_orig;
364 static uint32_t syscall_trap32_patch_instr_orig;
365
366 /* Trap Table syscall entry hot patch points */
367 extern void syscall_trap_patch_point(void);
368 extern void syscall_trap32_patch_point(void);
369
370 /* Alternate syscall entry handlers used when branded zones are running */
371 extern void syscall_wrapper(void);
372 extern void syscall_wrapper32(void);
373
374 /* Macros used to facilitate sparcv9 instruction generation */
375 #define BA_A_INSTR 0x30800000 /* ba,a addr */
376 #define DISP22(from, to) \
377 ((((uintptr_t)(to) - (uintptr_t)(from)) >> 2) & 0x3fffff)
378
379 /*ARGSUSED*/
380 static void
brand_plat_interposition_enable(void)381 brand_plat_interposition_enable(void)
382 {
383 ASSERT(MUTEX_HELD(&brand_list_lock));
384
385 /*
386 * Before we hot patch the kernel save the current instructions
387 * so that we can restore them later.
388 */
389 syscall_trap_patch_instr_orig =
390 *(uint32_t *)syscall_trap_patch_point;
391 syscall_trap32_patch_instr_orig =
392 *(uint32_t *)syscall_trap32_patch_point;
393
394 /*
395 * Modify the trap table at the patch points.
396 *
397 * We basically replace the first instruction at the patch
398 * point with a ba,a instruction that will transfer control
399 * to syscall_wrapper or syscall_wrapper32 for 64-bit and
400 * 32-bit syscalls respectively. It's important to note that
401 * the annul bit is set in the branch so we don't execute
402 * the instruction directly following the one we're patching
403 * during the branch's delay slot.
404 *
405 * It also doesn't matter that we're not atomically updating both
406 * the 64 and 32 bit syscall paths at the same time since there's
407 * no actual branded processes running on the system yet.
408 */
409 hot_patch_kernel_text((caddr_t)syscall_trap_patch_point,
410 BA_A_INSTR | DISP22(syscall_trap_patch_point, syscall_wrapper),
411 4);
412 hot_patch_kernel_text((caddr_t)syscall_trap32_patch_point,
413 BA_A_INSTR | DISP22(syscall_trap32_patch_point, syscall_wrapper32),
414 4);
415 }
416
417 /*ARGSUSED*/
418 static void
brand_plat_interposition_disable(void)419 brand_plat_interposition_disable(void)
420 {
421 ASSERT(MUTEX_HELD(&brand_list_lock));
422
423 /*
424 * Restore the original instructions at the trap table syscall
425 * patch points to disable the brand syscall interposition
426 * mechanism.
427 */
428 hot_patch_kernel_text((caddr_t)syscall_trap_patch_point,
429 syscall_trap_patch_instr_orig, 4);
430 hot_patch_kernel_text((caddr_t)syscall_trap32_patch_point,
431 syscall_trap32_patch_instr_orig, 4);
432 }
433 #endif /* __sparcv9 */
434
435 /*
436 * The following functions can be shared among kernel brand modules which
437 * implement Solaris-derived brands, all of which need to do similar tasks
438 * to manage the brand.
439 */
440
441 #if defined(_LP64)
442 static void
Ehdr32to64(Elf32_Ehdr * src,Ehdr * dst)443 Ehdr32to64(Elf32_Ehdr *src, Ehdr *dst)
444 {
445 bcopy(src->e_ident, dst->e_ident, sizeof (src->e_ident));
446 dst->e_type = src->e_type;
447 dst->e_machine = src->e_machine;
448 dst->e_version = src->e_version;
449 dst->e_entry = src->e_entry;
450 dst->e_phoff = src->e_phoff;
451 dst->e_shoff = src->e_shoff;
452 dst->e_flags = src->e_flags;
453 dst->e_ehsize = src->e_ehsize;
454 dst->e_phentsize = src->e_phentsize;
455 dst->e_phnum = src->e_phnum;
456 dst->e_shentsize = src->e_shentsize;
457 dst->e_shnum = src->e_shnum;
458 dst->e_shstrndx = src->e_shstrndx;
459 }
460 #endif /* _LP64 */
461
462 /*
463 * Return -1 if the cmd was not handled by this function.
464 */
465 /*ARGSUSED*/
466 int
brand_solaris_cmd(int cmd,uintptr_t arg1,uintptr_t arg2,uintptr_t arg3,struct brand * pbrand,int brandvers)467 brand_solaris_cmd(int cmd, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3,
468 struct brand *pbrand, int brandvers)
469 {
470 brand_proc_data_t *spd;
471 brand_proc_reg_t reg;
472 proc_t *p = curproc;
473 int err;
474
475 /*
476 * There is one operation that is supported for a native
477 * process; B_EXEC_BRAND. This brand operaion is redundant
478 * since the kernel assumes a native process doing an exec
479 * in a branded zone is going to run a branded processes.
480 * hence we don't support this operation.
481 */
482 if (cmd == B_EXEC_BRAND)
483 return (ENOSYS);
484
485 /* For all other operations this must be a branded process. */
486 if (p->p_brand == &native_brand)
487 return (ENOSYS);
488
489 ASSERT(p->p_brand == pbrand);
490 ASSERT(p->p_brand_data != NULL);
491
492 spd = (brand_proc_data_t *)p->p_brand_data;
493
494 switch ((cmd)) {
495 case B_EXEC_NATIVE:
496 err = exec_common((char *)arg1, (const char **)arg2,
497 (const char **)arg3, EBA_NATIVE);
498 return (err);
499
500 /*
501 * Get the address of the user-space system call handler from
502 * the user process and attach it to the proc structure.
503 */
504 case B_REGISTER:
505 if (p->p_model == DATAMODEL_NATIVE) {
506 if (copyin((void *)arg1, ®, sizeof (reg)) != 0)
507 return (EFAULT);
508 }
509 #if defined(_LP64)
510 else {
511 brand_common_reg32_t reg32;
512
513 if (copyin((void *)arg1, ®32, sizeof (reg32)) != 0)
514 return (EFAULT);
515 reg.sbr_version = reg32.sbr_version;
516 reg.sbr_handler = (caddr_t)(uintptr_t)reg32.sbr_handler;
517 }
518 #endif /* _LP64 */
519
520 if (reg.sbr_version != brandvers)
521 return (ENOTSUP);
522 spd->spd_handler = reg.sbr_handler;
523 return (0);
524
525 case B_ELFDATA:
526 if (p->p_model == DATAMODEL_NATIVE) {
527 if (copyout(&spd->spd_elf_data, (void *)arg1,
528 sizeof (brand_elf_data_t)) != 0)
529 return (EFAULT);
530 }
531 #if defined(_LP64)
532 else {
533 brand_elf_data32_t sed32;
534
535 sed32.sed_phdr = spd->spd_elf_data.sed_phdr;
536 sed32.sed_phent = spd->spd_elf_data.sed_phent;
537 sed32.sed_phnum = spd->spd_elf_data.sed_phnum;
538 sed32.sed_entry = spd->spd_elf_data.sed_entry;
539 sed32.sed_base = spd->spd_elf_data.sed_base;
540 sed32.sed_ldentry = spd->spd_elf_data.sed_ldentry;
541 sed32.sed_lddata = spd->spd_elf_data.sed_lddata;
542 if (copyout(&sed32, (void *)arg1, sizeof (sed32))
543 != 0)
544 return (EFAULT);
545 }
546 #endif /* _LP64 */
547 return (0);
548
549 /*
550 * The B_TRUSS_POINT subcommand exists so that we can see
551 * truss output from interposed system calls that return
552 * without first calling any other system call, meaning they
553 * would be invisible to truss(1).
554 * If the second argument is set non-zero, set errno to that
555 * value as well.
556 *
557 * Common arguments seen with truss are:
558 *
559 * arg1: syscall number
560 * arg2: errno
561 */
562 case B_TRUSS_POINT:
563 return ((arg2 == 0) ? 0 : set_errno((uint_t)arg2));
564 }
565
566 return (-1);
567 }
568
569 /*ARGSUSED*/
570 void
brand_solaris_copy_procdata(proc_t * child,proc_t * parent,struct brand * pbrand)571 brand_solaris_copy_procdata(proc_t *child, proc_t *parent, struct brand *pbrand)
572 {
573 brand_proc_data_t *spd;
574
575 ASSERT(parent->p_brand == pbrand);
576 ASSERT(child->p_brand == pbrand);
577 ASSERT(parent->p_brand_data != NULL);
578 ASSERT(child->p_brand_data == NULL);
579
580 /*
581 * Just duplicate all the proc data of the parent for the
582 * child
583 */
584 spd = kmem_alloc(sizeof (brand_proc_data_t), KM_SLEEP);
585 bcopy(parent->p_brand_data, spd, sizeof (brand_proc_data_t));
586 child->p_brand_data = spd;
587 }
588
589 static void
restoreexecenv(struct execenv * ep,stack_t * sp)590 restoreexecenv(struct execenv *ep, stack_t *sp)
591 {
592 klwp_t *lwp = ttolwp(curthread);
593
594 setexecenv(ep);
595 lwp->lwp_sigaltstack.ss_sp = sp->ss_sp;
596 lwp->lwp_sigaltstack.ss_size = sp->ss_size;
597 lwp->lwp_sigaltstack.ss_flags = sp->ss_flags;
598 }
599
600 /*ARGSUSED*/
601 int
brand_solaris_elfexec(vnode_t * vp,execa_t * uap,uarg_t * args,intpdata_t * idatap,int level,long * execsz,int setid,caddr_t exec_file,cred_t * cred,int brand_action,struct brand * pbrand,char * bname,char * brandlib,char * brandlib32,char * brandlinker,char * brandlinker32)602 brand_solaris_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args,
603 intpdata_t *idatap, int level, long *execsz, int setid, caddr_t exec_file,
604 cred_t *cred, int brand_action, struct brand *pbrand, char *bname,
605 char *brandlib, char *brandlib32, char *brandlinker, char *brandlinker32)
606 {
607
608 vnode_t *nvp;
609 Ehdr ehdr;
610 Addr uphdr_vaddr;
611 intptr_t voffset;
612 int interp;
613 int i, err;
614 struct execenv env;
615 struct execenv origenv;
616 stack_t orig_sigaltstack;
617 struct user *up = PTOU(curproc);
618 proc_t *p = ttoproc(curthread);
619 klwp_t *lwp = ttolwp(curthread);
620 brand_proc_data_t *spd;
621 brand_elf_data_t sed, *sedp;
622 char *linker;
623 uintptr_t lddata; /* lddata of executable's linker */
624
625 ASSERT(curproc->p_brand == pbrand);
626 ASSERT(curproc->p_brand_data != NULL);
627
628 spd = (brand_proc_data_t *)curproc->p_brand_data;
629 sedp = &spd->spd_elf_data;
630
631 args->brandname = bname;
632
633 /*
634 * We will exec the brand library and then map in the target
635 * application and (optionally) the brand's default linker.
636 */
637 if (args->to_model == DATAMODEL_NATIVE) {
638 args->emulator = brandlib;
639 linker = brandlinker;
640 }
641 #if defined(_LP64)
642 else {
643 args->emulator = brandlib32;
644 linker = brandlinker32;
645 }
646 #endif /* _LP64 */
647
648 if ((err = lookupname(args->emulator, UIO_SYSSPACE, FOLLOW,
649 NULLVPP, &nvp)) != 0) {
650 uprintf("%s: not found.", args->emulator);
651 return (err);
652 }
653
654 /*
655 * The following elf{32}exec call changes the execenv in the proc
656 * struct which includes changing the p_exec member to be the vnode
657 * for the brand library (e.g. /.SUNWnative/usr/lib/s10_brand.so.1).
658 * We will eventually set the p_exec member to be the vnode for the new
659 * executable when we call setexecenv(). However, if we get an error
660 * before that call we need to restore the execenv to its original
661 * values so that when we return to the caller fop_close() works
662 * properly while cleaning up from the failed exec(). Restoring the
663 * original value will also properly decrement the 2nd VN_RELE that we
664 * took on the brand library.
665 */
666 origenv.ex_bssbase = p->p_bssbase;
667 origenv.ex_brkbase = p->p_brkbase;
668 origenv.ex_brksize = p->p_brksize;
669 origenv.ex_vp = p->p_exec;
670 orig_sigaltstack.ss_sp = lwp->lwp_sigaltstack.ss_sp;
671 orig_sigaltstack.ss_size = lwp->lwp_sigaltstack.ss_size;
672 orig_sigaltstack.ss_flags = lwp->lwp_sigaltstack.ss_flags;
673
674 if (args->to_model == DATAMODEL_NATIVE) {
675 err = elfexec(nvp, uap, args, idatap, level + 1, execsz,
676 setid, exec_file, cred, brand_action);
677 }
678 #if defined(_LP64)
679 else {
680 err = elf32exec(nvp, uap, args, idatap, level + 1, execsz,
681 setid, exec_file, cred, brand_action);
682 }
683 #endif /* _LP64 */
684 VN_RELE(nvp);
685 if (err != 0) {
686 restoreexecenv(&origenv, &orig_sigaltstack);
687 return (err);
688 }
689
690 /*
691 * The u_auxv veCTors are set up by elfexec to point to the
692 * brand emulation library and linker. Save these so they can
693 * be copied to the specific brand aux vectors.
694 */
695 bzero(&sed, sizeof (sed));
696 for (i = 0; i < __KERN_NAUXV_IMPL; i++) {
697 switch (up->u_auxv[i].a_type) {
698 case AT_SUN_LDDATA:
699 sed.sed_lddata = up->u_auxv[i].a_un.a_val;
700 break;
701 case AT_BASE:
702 sed.sed_base = up->u_auxv[i].a_un.a_val;
703 break;
704 case AT_ENTRY:
705 sed.sed_entry = up->u_auxv[i].a_un.a_val;
706 break;
707 case AT_PHDR:
708 sed.sed_phdr = up->u_auxv[i].a_un.a_val;
709 break;
710 case AT_PHENT:
711 sed.sed_phent = up->u_auxv[i].a_un.a_val;
712 break;
713 case AT_PHNUM:
714 sed.sed_phnum = up->u_auxv[i].a_un.a_val;
715 break;
716 default:
717 break;
718 }
719 }
720 /* Make sure the emulator has an entry point */
721 ASSERT(sed.sed_entry != NULL);
722 ASSERT(sed.sed_phdr != NULL);
723
724 bzero(&env, sizeof (env));
725 if (args->to_model == DATAMODEL_NATIVE) {
726 err = mapexec_brand(vp, args, &ehdr, &uphdr_vaddr,
727 &voffset, exec_file, &interp, &env.ex_bssbase,
728 &env.ex_brkbase, &env.ex_brksize, NULL);
729 }
730 #if defined(_LP64)
731 else {
732 Elf32_Ehdr ehdr32;
733 Elf32_Addr uphdr_vaddr32;
734 err = mapexec32_brand(vp, args, &ehdr32, &uphdr_vaddr32,
735 &voffset, exec_file, &interp, &env.ex_bssbase,
736 &env.ex_brkbase, &env.ex_brksize, NULL);
737 Ehdr32to64(&ehdr32, &ehdr);
738
739 if (uphdr_vaddr32 == (Elf32_Addr)-1)
740 uphdr_vaddr = (Addr)-1;
741 else
742 uphdr_vaddr = uphdr_vaddr32;
743 }
744 #endif /* _LP64 */
745 if (err != 0) {
746 restoreexecenv(&origenv, &orig_sigaltstack);
747 return (err);
748 }
749
750 /*
751 * Save off the important properties of the executable. The
752 * brand library will ask us for this data later, when it is
753 * initializing and getting ready to transfer control to the
754 * brand application.
755 */
756 if (uphdr_vaddr == (Addr)-1)
757 sedp->sed_phdr = voffset + ehdr.e_phoff;
758 else
759 sedp->sed_phdr = voffset + uphdr_vaddr;
760 sedp->sed_entry = voffset + ehdr.e_entry;
761 sedp->sed_phent = ehdr.e_phentsize;
762 sedp->sed_phnum = ehdr.e_phnum;
763
764 if (interp) {
765 if (ehdr.e_type == ET_DYN) {
766 /*
767 * This is a shared object executable, so we
768 * need to pick a reasonable place to put the
769 * heap. Just don't use the first page.
770 */
771 env.ex_brkbase = (caddr_t)PAGESIZE;
772 env.ex_bssbase = (caddr_t)PAGESIZE;
773 }
774
775 /*
776 * If the program needs an interpreter (most do), map
777 * it in and store relevant information about it in the
778 * aux vector, where the brand library can find it.
779 */
780 if ((err = lookupname(linker, UIO_SYSSPACE,
781 FOLLOW, NULLVPP, &nvp)) != 0) {
782 uprintf("%s: not found.", brandlinker);
783 restoreexecenv(&origenv, &orig_sigaltstack);
784 return (err);
785 }
786 if (args->to_model == DATAMODEL_NATIVE) {
787 err = mapexec_brand(nvp, args, &ehdr,
788 &uphdr_vaddr, &voffset, exec_file, &interp,
789 NULL, NULL, NULL, &lddata);
790 }
791 #if defined(_LP64)
792 else {
793 Elf32_Ehdr ehdr32;
794 Elf32_Addr uphdr_vaddr32;
795 err = mapexec32_brand(nvp, args, &ehdr32,
796 &uphdr_vaddr32, &voffset, exec_file, &interp,
797 NULL, NULL, NULL, &lddata);
798 Ehdr32to64(&ehdr32, &ehdr);
799
800 if (uphdr_vaddr32 == (Elf32_Addr)-1)
801 uphdr_vaddr = (Addr)-1;
802 else
803 uphdr_vaddr = uphdr_vaddr32;
804 }
805 #endif /* _LP64 */
806 VN_RELE(nvp);
807 if (err != 0) {
808 restoreexecenv(&origenv, &orig_sigaltstack);
809 return (err);
810 }
811
812 /*
813 * Now that we know the base address of the brand's
814 * linker, place it in the aux vector.
815 */
816 sedp->sed_base = voffset;
817 sedp->sed_ldentry = voffset + ehdr.e_entry;
818 sedp->sed_lddata = voffset + lddata;
819 } else {
820 /*
821 * This program has no interpreter. The brand library
822 * will jump to the address in the AT_SUN_BRAND_LDENTRY
823 * aux vector, so in this case, put the entry point of
824 * the main executable there.
825 */
826 if (ehdr.e_type == ET_EXEC) {
827 /*
828 * An executable with no interpreter, this must
829 * be a statically linked executable, which
830 * means we loaded it at the address specified
831 * in the elf header, in which case the e_entry
832 * field of the elf header is an absolute
833 * address.
834 */
835 sedp->sed_ldentry = ehdr.e_entry;
836 sedp->sed_entry = ehdr.e_entry;
837 sedp->sed_lddata = NULL;
838 sedp->sed_base = NULL;
839 } else {
840 /*
841 * A shared object with no interpreter, we use
842 * the calculated address from above.
843 */
844 sedp->sed_ldentry = sedp->sed_entry;
845 sedp->sed_entry = NULL;
846 sedp->sed_phdr = NULL;
847 sedp->sed_phent = NULL;
848 sedp->sed_phnum = NULL;
849 sedp->sed_lddata = NULL;
850 sedp->sed_base = voffset;
851
852 if (ehdr.e_type == ET_DYN) {
853 /*
854 * Delay setting the brkbase until the
855 * first call to brk(); see elfexec()
856 * for details.
857 */
858 env.ex_bssbase = (caddr_t)0;
859 env.ex_brkbase = (caddr_t)0;
860 env.ex_brksize = 0;
861 }
862 }
863 }
864
865 env.ex_magic = elfmagic;
866 env.ex_vp = vp;
867 setexecenv(&env);
868
869 /*
870 * It's time to manipulate the process aux vectors. First
871 * we need to update the AT_SUN_AUXFLAGS aux vector to set
872 * the AF_SUN_NOPLM flag.
873 */
874 if (args->to_model == DATAMODEL_NATIVE) {
875 auxv_t auxflags_auxv;
876
877 if (copyin(args->auxp_auxflags, &auxflags_auxv,
878 sizeof (auxflags_auxv)) != 0)
879 return (EFAULT);
880
881 ASSERT(auxflags_auxv.a_type == AT_SUN_AUXFLAGS);
882 auxflags_auxv.a_un.a_val |= AF_SUN_NOPLM;
883 if (copyout(&auxflags_auxv, args->auxp_auxflags,
884 sizeof (auxflags_auxv)) != 0)
885 return (EFAULT);
886 }
887 #if defined(_LP64)
888 else {
889 auxv32_t auxflags_auxv32;
890
891 if (copyin(args->auxp_auxflags, &auxflags_auxv32,
892 sizeof (auxflags_auxv32)) != 0)
893 return (EFAULT);
894
895 ASSERT(auxflags_auxv32.a_type == AT_SUN_AUXFLAGS);
896 auxflags_auxv32.a_un.a_val |= AF_SUN_NOPLM;
897 if (copyout(&auxflags_auxv32, args->auxp_auxflags,
898 sizeof (auxflags_auxv32)) != 0)
899 return (EFAULT);
900 }
901 #endif /* _LP64 */
902
903 /* Second, copy out the brand specific aux vectors. */
904 if (args->to_model == DATAMODEL_NATIVE) {
905 auxv_t brand_auxv[] = {
906 { AT_SUN_BRAND_AUX1, 0 },
907 { AT_SUN_BRAND_AUX2, 0 },
908 { AT_SUN_BRAND_AUX3, 0 }
909 };
910
911 ASSERT(brand_auxv[0].a_type ==
912 AT_SUN_BRAND_COMMON_LDDATA);
913 brand_auxv[0].a_un.a_val = sed.sed_lddata;
914
915 if (copyout(&brand_auxv, args->auxp_brand,
916 sizeof (brand_auxv)) != 0)
917 return (EFAULT);
918 }
919 #if defined(_LP64)
920 else {
921 auxv32_t brand_auxv32[] = {
922 { AT_SUN_BRAND_AUX1, 0 },
923 { AT_SUN_BRAND_AUX2, 0 },
924 { AT_SUN_BRAND_AUX3, 0 }
925 };
926
927 ASSERT(brand_auxv32[0].a_type == AT_SUN_BRAND_COMMON_LDDATA);
928 brand_auxv32[0].a_un.a_val = (uint32_t)sed.sed_lddata;
929 if (copyout(&brand_auxv32, args->auxp_brand,
930 sizeof (brand_auxv32)) != 0)
931 return (EFAULT);
932 }
933 #endif /* _LP64 */
934
935 /*
936 * Third, the /proc aux vectors set up by elfexec() point to
937 * brand emulation library and it's linker. Copy these to the
938 * /proc brand specific aux vector, and update the regular
939 * /proc aux vectors to point to the executable (and it's
940 * linker). This will enable debuggers to access the
941 * executable via the usual /proc or elf notes aux vectors.
942 *
943 * The brand emulation library's linker will get it's aux
944 * vectors off the stack, and then update the stack with the
945 * executable's aux vectors before jumping to the executable's
946 * linker.
947 *
948 * Debugging the brand emulation library must be done from
949 * the global zone, where the librtld_db module knows how to
950 * fetch the brand specific aux vectors to access the brand
951 * emulation libraries linker.
952 */
953 for (i = 0; i < __KERN_NAUXV_IMPL; i++) {
954 ulong_t val;
955
956 switch (up->u_auxv[i].a_type) {
957 case AT_SUN_BRAND_COMMON_LDDATA:
958 up->u_auxv[i].a_un.a_val = sed.sed_lddata;
959 continue;
960 case AT_BASE:
961 val = sedp->sed_base;
962 break;
963 case AT_ENTRY:
964 val = sedp->sed_entry;
965 break;
966 case AT_PHDR:
967 val = sedp->sed_phdr;
968 break;
969 case AT_PHENT:
970 val = sedp->sed_phent;
971 break;
972 case AT_PHNUM:
973 val = sedp->sed_phnum;
974 break;
975 case AT_SUN_LDDATA:
976 val = sedp->sed_lddata;
977 break;
978 default:
979 continue;
980 }
981
982 up->u_auxv[i].a_un.a_val = val;
983 if (val == NULL) {
984 /* Hide the entry for static binaries */
985 up->u_auxv[i].a_type = AT_IGNORE;
986 }
987 }
988
989 /*
990 * The last thing we do here is clear spd->spd_handler. This
991 * is important because if we're already a branded process and
992 * if this exec succeeds, there is a window between when the
993 * exec() first returns to the userland of the new process and
994 * when our brand library get's initialized, during which we
995 * don't want system calls to be re-directed to our brand
996 * library since it hasn't been initialized yet.
997 */
998 spd->spd_handler = NULL;
999
1000 return (0);
1001 }
1002
1003 void
brand_solaris_exec(struct brand * pbrand)1004 brand_solaris_exec(struct brand *pbrand)
1005 {
1006 brand_proc_data_t *spd = curproc->p_brand_data;
1007
1008 ASSERT(curproc->p_brand == pbrand);
1009 ASSERT(curproc->p_brand_data != NULL);
1010 ASSERT(ttolwp(curthread)->lwp_brand != NULL);
1011
1012 /*
1013 * We should only be called from exec(), when we know the process
1014 * is single-threaded.
1015 */
1016 ASSERT(curproc->p_tlist == curproc->p_tlist->t_forw);
1017
1018 /* Upon exec, reset our lwp brand data. */
1019 (void) brand_solaris_freelwp(ttolwp(curthread), pbrand);
1020 (void) brand_solaris_initlwp(ttolwp(curthread), pbrand);
1021
1022 /*
1023 * Upon exec, reset all the proc brand data, except for the elf
1024 * data associated with the executable we are exec'ing.
1025 */
1026 spd->spd_handler = NULL;
1027 }
1028
1029 int
brand_solaris_fini(char ** emul_table,struct modlinkage * modlinkage,struct brand * pbrand)1030 brand_solaris_fini(char **emul_table, struct modlinkage *modlinkage,
1031 struct brand *pbrand)
1032 {
1033 int err;
1034
1035 /*
1036 * If there are any zones using this brand, we can't allow it
1037 * to be unloaded.
1038 */
1039 if (brand_zone_count(pbrand))
1040 return (EBUSY);
1041
1042 kmem_free(*emul_table, NSYSCALL);
1043 *emul_table = NULL;
1044
1045 err = mod_remove(modlinkage);
1046 if (err)
1047 cmn_err(CE_WARN, "Couldn't unload brand module");
1048
1049 return (err);
1050 }
1051
1052 /*ARGSUSED*/
1053 void
brand_solaris_forklwp(klwp_t * p,klwp_t * c,struct brand * pbrand)1054 brand_solaris_forklwp(klwp_t *p, klwp_t *c, struct brand *pbrand)
1055 {
1056 ASSERT(p->lwp_procp->p_brand == pbrand);
1057 ASSERT(c->lwp_procp->p_brand == pbrand);
1058
1059 ASSERT(p->lwp_procp->p_brand_data != NULL);
1060 ASSERT(c->lwp_procp->p_brand_data != NULL);
1061
1062 /*
1063 * Both LWPs have already had been initialized via
1064 * brand_solaris_initlwp().
1065 */
1066 ASSERT(p->lwp_brand != NULL);
1067 ASSERT(c->lwp_brand != NULL);
1068 }
1069
1070 /*ARGSUSED*/
1071 void
brand_solaris_freelwp(klwp_t * l,struct brand * pbrand)1072 brand_solaris_freelwp(klwp_t *l, struct brand *pbrand)
1073 {
1074 ASSERT(l->lwp_procp->p_brand == pbrand);
1075 ASSERT(l->lwp_procp->p_brand_data != NULL);
1076 ASSERT(l->lwp_brand != NULL);
1077 l->lwp_brand = NULL;
1078 }
1079
1080 /*ARGSUSED*/
1081 int
brand_solaris_initlwp(klwp_t * l,struct brand * pbrand)1082 brand_solaris_initlwp(klwp_t *l, struct brand *pbrand)
1083 {
1084 ASSERT(l->lwp_procp->p_brand == pbrand);
1085 ASSERT(l->lwp_procp->p_brand_data != NULL);
1086 ASSERT(l->lwp_brand == NULL);
1087 l->lwp_brand = (void *)-1;
1088 return (0);
1089 }
1090
1091 /*ARGSUSED*/
1092 void
brand_solaris_lwpexit(klwp_t * l,struct brand * pbrand)1093 brand_solaris_lwpexit(klwp_t *l, struct brand *pbrand)
1094 {
1095 proc_t *p = l->lwp_procp;
1096
1097 ASSERT(l->lwp_procp->p_brand == pbrand);
1098 ASSERT(l->lwp_procp->p_brand_data != NULL);
1099 ASSERT(l->lwp_brand != NULL);
1100
1101 /*
1102 * We should never be called for the last thread in a process.
1103 * (That case is handled by brand_solaris_proc_exit().)
1104 * Therefore this lwp must be exiting from a multi-threaded
1105 * process.
1106 */
1107 ASSERT(p->p_tlist != p->p_tlist->t_forw);
1108
1109 l->lwp_brand = NULL;
1110 }
1111
1112 /*ARGSUSED*/
1113 void
brand_solaris_proc_exit(struct proc * p,klwp_t * l,struct brand * pbrand)1114 brand_solaris_proc_exit(struct proc *p, klwp_t *l, struct brand *pbrand)
1115 {
1116 ASSERT(p->p_brand == pbrand);
1117 ASSERT(p->p_brand_data != NULL);
1118
1119 /*
1120 * When called from proc_exit(), we know that process is
1121 * single-threaded and free our lwp brand data.
1122 * otherwise just free p_brand_data and return.
1123 */
1124 if (l != NULL) {
1125 ASSERT(p->p_tlist == p->p_tlist->t_forw);
1126 ASSERT(p->p_tlist->t_lwp == l);
1127 (void) brand_solaris_freelwp(l, pbrand);
1128 }
1129
1130 /* upon exit, free our proc brand data */
1131 kmem_free(p->p_brand_data, sizeof (brand_proc_data_t));
1132 p->p_brand_data = NULL;
1133 }
1134
1135 void
brand_solaris_setbrand(proc_t * p,struct brand * pbrand)1136 brand_solaris_setbrand(proc_t *p, struct brand *pbrand)
1137 {
1138 ASSERT(p->p_brand == pbrand);
1139 ASSERT(p->p_brand_data == NULL);
1140
1141 /*
1142 * We should only be called from exec(), when we know the process
1143 * is single-threaded.
1144 */
1145 ASSERT(p->p_tlist == p->p_tlist->t_forw);
1146
1147 p->p_brand_data = kmem_zalloc(sizeof (brand_proc_data_t), KM_SLEEP);
1148 (void) brand_solaris_initlwp(p->p_tlist->t_lwp, pbrand);
1149 }
1150