xref: /illumos-gate/usr/src/uts/common/os/brand.c (revision 33c72b7598992897b94815b1f47b7b8077e53808)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 #include <sys/kmem.h>
26 #include <sys/errno.h>
27 #include <sys/systm.h>
28 #include <sys/cmn_err.h>
29 #include <sys/brand.h>
30 #include <sys/machbrand.h>
31 #include <sys/modctl.h>
32 #include <sys/rwlock.h>
33 #include <sys/zone.h>
34 #include <sys/pathname.h>
35 
36 #define	SUPPORTED_BRAND_VERSION BRAND_VER_1
37 
38 #if defined(__sparcv9)
39 /* sparcv9 uses system wide brand interposition hooks */
40 static void brand_plat_interposition_enable(void);
41 static void brand_plat_interposition_disable(void);
42 
43 struct brand_mach_ops native_mach_ops  = {
44 		NULL, NULL
45 };
46 #else /* !__sparcv9 */
47 struct brand_mach_ops native_mach_ops  = {
48 		NULL, NULL, NULL, NULL
49 };
50 #endif /* !__sparcv9 */
51 
52 brand_t native_brand = {
53 		BRAND_VER_1,
54 		"native",
55 		NULL,
56 		&native_mach_ops
57 };
58 
59 /*
60  * Used to maintain a list of all the brands currently loaded into the
61  * kernel.
62  */
63 struct brand_list {
64 	int			bl_refcnt;
65 	struct brand_list	*bl_next;
66 	brand_t			*bl_brand;
67 };
68 
69 static struct brand_list *brand_list = NULL;
70 
71 /*
72  * This lock protects the integrity of the brand list.
73  */
74 static kmutex_t brand_list_lock;
75 
76 void
77 brand_init()
78 {
79 	mutex_init(&brand_list_lock, NULL, MUTEX_DEFAULT, NULL);
80 	p0.p_brand = &native_brand;
81 }
82 
83 int
84 brand_register(brand_t *brand)
85 {
86 	struct brand_list *list, *scan;
87 
88 	if (brand == NULL)
89 		return (EINVAL);
90 
91 	if (brand->b_version != SUPPORTED_BRAND_VERSION) {
92 		if (brand->b_version < SUPPORTED_BRAND_VERSION) {
93 			cmn_err(CE_WARN,
94 			    "brand '%s' was built to run on older versions "
95 			    "of Solaris.",
96 			    brand->b_name);
97 		} else {
98 			cmn_err(CE_WARN,
99 			    "brand '%s' was built to run on a newer version "
100 			    "of Solaris.",
101 			    brand->b_name);
102 		}
103 		return (EINVAL);
104 	}
105 
106 	/* Sanity checks */
107 	if (brand->b_name == NULL || brand->b_ops == NULL ||
108 	    brand->b_ops->b_brandsys == NULL) {
109 		cmn_err(CE_WARN, "Malformed brand");
110 		return (EINVAL);
111 	}
112 
113 	list = kmem_alloc(sizeof (struct brand_list), KM_SLEEP);
114 
115 	/* Add the brand to the list of loaded brands. */
116 	mutex_enter(&brand_list_lock);
117 
118 	/*
119 	 * Check to be sure we haven't already registered this brand.
120 	 */
121 	for (scan = brand_list; scan != NULL; scan = scan->bl_next) {
122 		if (strcmp(brand->b_name, scan->bl_brand->b_name) == 0) {
123 			cmn_err(CE_WARN,
124 			    "Invalid attempt to load a second instance of "
125 			    "brand %s", brand->b_name);
126 			mutex_exit(&brand_list_lock);
127 			kmem_free(list, sizeof (struct brand_list));
128 			return (EINVAL);
129 		}
130 	}
131 
132 #if defined(__sparcv9)
133 	/* sparcv9 uses system wide brand interposition hooks */
134 	if (brand_list == NULL)
135 		brand_plat_interposition_enable();
136 #endif /* __sparcv9 */
137 
138 	list->bl_brand = brand;
139 	list->bl_refcnt = 0;
140 	list->bl_next = brand_list;
141 	brand_list = list;
142 
143 	mutex_exit(&brand_list_lock);
144 
145 	return (0);
146 }
147 
148 /*
149  * The kernel module implementing this brand is being unloaded, so remove
150  * it from the list of active brands.
151  */
152 int
153 brand_unregister(brand_t *brand)
154 {
155 	struct brand_list *list, *prev;
156 
157 	/* Sanity checks */
158 	if (brand == NULL || brand->b_name == NULL) {
159 		cmn_err(CE_WARN, "Malformed brand");
160 		return (EINVAL);
161 	}
162 
163 	prev = NULL;
164 	mutex_enter(&brand_list_lock);
165 
166 	for (list = brand_list; list != NULL; list = list->bl_next) {
167 		if (list->bl_brand == brand)
168 			break;
169 		prev = list;
170 	}
171 
172 	if (list == NULL) {
173 		cmn_err(CE_WARN, "Brand %s wasn't registered", brand->b_name);
174 		mutex_exit(&brand_list_lock);
175 		return (EINVAL);
176 	}
177 
178 	if (list->bl_refcnt > 0) {
179 		cmn_err(CE_WARN, "Unregistering brand %s which is still in use",
180 		    brand->b_name);
181 		mutex_exit(&brand_list_lock);
182 		return (EBUSY);
183 	}
184 
185 	/* Remove brand from the list */
186 	if (prev != NULL)
187 		prev->bl_next = list->bl_next;
188 	else
189 		brand_list = list->bl_next;
190 
191 #if defined(__sparcv9)
192 	/* sparcv9 uses system wide brand interposition hooks */
193 	if (brand_list == NULL)
194 		brand_plat_interposition_disable();
195 #endif /* __sparcv9 */
196 
197 	mutex_exit(&brand_list_lock);
198 
199 	kmem_free(list, sizeof (struct brand_list));
200 
201 	return (0);
202 }
203 
204 /*
205  * Record that a zone of this brand has been instantiated.  If the kernel
206  * module implementing this brand's functionality is not present, this
207  * routine attempts to load the module as a side effect.
208  */
209 brand_t *
210 brand_register_zone(struct brand_attr *attr)
211 {
212 	struct brand_list *l = NULL;
213 	ddi_modhandle_t	hdl = NULL;
214 	char *modname;
215 	int err = 0;
216 
217 	if (is_system_labeled()) {
218 		cmn_err(CE_WARN,
219 		    "Branded zones are not allowed on labeled systems.");
220 		return (NULL);
221 	}
222 
223 	/*
224 	 * We make at most two passes through this loop.  The first time
225 	 * through, we're looking to see if this is a new user of an
226 	 * already loaded brand.  If the brand hasn't been loaded, we
227 	 * call ddi_modopen() to force it to be loaded and then make a
228 	 * second pass through the list of brands.  If we don't find the
229 	 * brand the second time through it means that the modname
230 	 * specified in the brand_attr structure doesn't provide the brand
231 	 * specified in the brandname field.  This would suggest a bug in
232 	 * the brand's config.xml file.  We close the module and return
233 	 * 'NULL' to the caller.
234 	 */
235 	for (;;) {
236 		/*
237 		 * Search list of loaded brands
238 		 */
239 		mutex_enter(&brand_list_lock);
240 		for (l = brand_list; l != NULL; l = l->bl_next)
241 			if (strcmp(attr->ba_brandname,
242 			    l->bl_brand->b_name) == 0)
243 				break;
244 		if ((l != NULL) || (hdl != NULL))
245 			break;
246 		mutex_exit(&brand_list_lock);
247 
248 		/*
249 		 * We didn't find that the requested brand has been loaded
250 		 * yet, so we trigger the load of the appropriate kernel
251 		 * module and search the list again.
252 		 */
253 		modname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
254 		(void) strcpy(modname, "brand/");
255 		(void) strcat(modname, attr->ba_modname);
256 		hdl = ddi_modopen(modname, KRTLD_MODE_FIRST, &err);
257 		kmem_free(modname, MAXPATHLEN);
258 
259 		if (err != 0)
260 			return (NULL);
261 	}
262 
263 	/*
264 	 * If we found the matching brand, bump its reference count.
265 	 */
266 	if (l != NULL)
267 		l->bl_refcnt++;
268 
269 	mutex_exit(&brand_list_lock);
270 
271 	if (hdl != NULL)
272 		(void) ddi_modclose(hdl);
273 
274 	return ((l != NULL) ? l->bl_brand : NULL);
275 }
276 
277 /*
278  * Return the number of zones currently using this brand.
279  */
280 int
281 brand_zone_count(struct brand *bp)
282 {
283 	struct brand_list *l;
284 	int cnt = 0;
285 
286 	mutex_enter(&brand_list_lock);
287 	for (l = brand_list; l != NULL; l = l->bl_next)
288 		if (l->bl_brand == bp) {
289 			cnt = l->bl_refcnt;
290 			break;
291 		}
292 	mutex_exit(&brand_list_lock);
293 
294 	return (cnt);
295 }
296 
297 void
298 brand_unregister_zone(struct brand *bp)
299 {
300 	struct brand_list *list;
301 
302 	mutex_enter(&brand_list_lock);
303 	for (list = brand_list; list != NULL; list = list->bl_next) {
304 		if (list->bl_brand == bp) {
305 			ASSERT(list->bl_refcnt > 0);
306 			list->bl_refcnt--;
307 			break;
308 		}
309 	}
310 	mutex_exit(&brand_list_lock);
311 }
312 
313 void
314 brand_setbrand(proc_t *p)
315 {
316 	brand_t *bp = p->p_zone->zone_brand;
317 
318 	ASSERT(bp != NULL);
319 	ASSERT(p->p_brand == &native_brand);
320 
321 	/*
322 	 * We should only be called from exec(), when we know the process
323 	 * is single-threaded.
324 	 */
325 	ASSERT(p->p_tlist == p->p_tlist->t_forw);
326 
327 	p->p_brand = bp;
328 	ASSERT(PROC_IS_BRANDED(p));
329 	BROP(p)->b_setbrand(p);
330 }
331 
332 void
333 brand_clearbrand(proc_t *p, boolean_t no_lwps)
334 {
335 	brand_t *bp = p->p_zone->zone_brand;
336 	klwp_t *lwp = NULL;
337 	ASSERT(bp != NULL);
338 	ASSERT(!no_lwps || (p->p_tlist == NULL));
339 
340 	/*
341 	 * If called from exec_common() or proc_exit(),
342 	 * we know the process is single-threaded.
343 	 * If called from fork_fail, p_tlist is NULL.
344 	 */
345 	if (!no_lwps) {
346 		ASSERT(p->p_tlist == p->p_tlist->t_forw);
347 		lwp = p->p_tlist->t_lwp;
348 	}
349 
350 	ASSERT(PROC_IS_BRANDED(p));
351 	BROP(p)->b_proc_exit(p, lwp);
352 	p->p_brand = &native_brand;
353 }
354 
355 #if defined(__sparcv9)
356 /*
357  * Currently, only sparc has system level brand syscall interposition.
358  * On x86 we're able to enable syscall interposition on a per-cpu basis
359  * when a branded thread is scheduled to run on a cpu.
360  */
361 
362 /* Local variables needed for dynamic syscall interposition support */
363 static uint32_t	syscall_trap_patch_instr_orig;
364 static uint32_t	syscall_trap32_patch_instr_orig;
365 
366 /* Trap Table syscall entry hot patch points */
367 extern void	syscall_trap_patch_point(void);
368 extern void	syscall_trap32_patch_point(void);
369 
370 /* Alternate syscall entry handlers used when branded zones are running */
371 extern void	syscall_wrapper(void);
372 extern void	syscall_wrapper32(void);
373 
374 /* Macros used to facilitate sparcv9 instruction generation */
375 #define	BA_A_INSTR	0x30800000	/* ba,a addr */
376 #define	DISP22(from, to) \
377 	((((uintptr_t)(to) - (uintptr_t)(from)) >> 2) & 0x3fffff)
378 
379 /*ARGSUSED*/
380 static void
381 brand_plat_interposition_enable(void)
382 {
383 	ASSERT(MUTEX_HELD(&brand_list_lock));
384 
385 	/*
386 	 * Before we hot patch the kernel save the current instructions
387 	 * so that we can restore them later.
388 	 */
389 	syscall_trap_patch_instr_orig =
390 	    *(uint32_t *)syscall_trap_patch_point;
391 	syscall_trap32_patch_instr_orig =
392 	    *(uint32_t *)syscall_trap32_patch_point;
393 
394 	/*
395 	 * Modify the trap table at the patch points.
396 	 *
397 	 * We basically replace the first instruction at the patch
398 	 * point with a ba,a instruction that will transfer control
399 	 * to syscall_wrapper or syscall_wrapper32 for 64-bit and
400 	 * 32-bit syscalls respectively.  It's important to note that
401 	 * the annul bit is set in the branch so we don't execute
402 	 * the instruction directly following the one we're patching
403 	 * during the branch's delay slot.
404 	 *
405 	 * It also doesn't matter that we're not atomically updating both
406 	 * the 64 and 32 bit syscall paths at the same time since there's
407 	 * no actual branded processes running on the system yet.
408 	 */
409 	hot_patch_kernel_text((caddr_t)syscall_trap_patch_point,
410 	    BA_A_INSTR | DISP22(syscall_trap_patch_point, syscall_wrapper),
411 	    4);
412 	hot_patch_kernel_text((caddr_t)syscall_trap32_patch_point,
413 	    BA_A_INSTR | DISP22(syscall_trap32_patch_point, syscall_wrapper32),
414 	    4);
415 }
416 
417 /*ARGSUSED*/
418 static void
419 brand_plat_interposition_disable(void)
420 {
421 	ASSERT(MUTEX_HELD(&brand_list_lock));
422 
423 	/*
424 	 * Restore the original instructions at the trap table syscall
425 	 * patch points to disable the brand syscall interposition
426 	 * mechanism.
427 	 */
428 	hot_patch_kernel_text((caddr_t)syscall_trap_patch_point,
429 	    syscall_trap_patch_instr_orig, 4);
430 	hot_patch_kernel_text((caddr_t)syscall_trap32_patch_point,
431 	    syscall_trap32_patch_instr_orig, 4);
432 }
433 #endif /* __sparcv9 */
434 
435 /*
436  * The following functions can be shared among kernel brand modules which
437  * implement Solaris-derived brands, all of which need to do similar tasks
438  * to manage the brand.
439  */
440 
441 #if defined(_LP64)
442 static void
443 Ehdr32to64(Elf32_Ehdr *src, Ehdr *dst)
444 {
445 	bcopy(src->e_ident, dst->e_ident, sizeof (src->e_ident));
446 	dst->e_type =		src->e_type;
447 	dst->e_machine =	src->e_machine;
448 	dst->e_version =	src->e_version;
449 	dst->e_entry =		src->e_entry;
450 	dst->e_phoff =		src->e_phoff;
451 	dst->e_shoff =		src->e_shoff;
452 	dst->e_flags =		src->e_flags;
453 	dst->e_ehsize =		src->e_ehsize;
454 	dst->e_phentsize =	src->e_phentsize;
455 	dst->e_phnum =		src->e_phnum;
456 	dst->e_shentsize =	src->e_shentsize;
457 	dst->e_shnum =		src->e_shnum;
458 	dst->e_shstrndx =	src->e_shstrndx;
459 }
460 #endif /* _LP64 */
461 
462 /*
463  * Return -1 if the cmd was not handled by this function.
464  */
465 /*ARGSUSED*/
466 int
467 brand_solaris_cmd(int cmd, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3,
468     struct brand *pbrand, int brandvers)
469 {
470 	brand_proc_data_t	*spd;
471 	brand_proc_reg_t	reg;
472 	proc_t			*p = curproc;
473 	int			err;
474 
475 	/*
476 	 * There is one operation that is supported for a native
477 	 * process; B_EXEC_BRAND.  This brand operaion is redundant
478 	 * since the kernel assumes a native process doing an exec
479 	 * in a branded zone is going to run a branded processes.
480 	 * hence we don't support this operation.
481 	 */
482 	if (cmd == B_EXEC_BRAND)
483 		return (ENOSYS);
484 
485 	/* For all other operations this must be a branded process. */
486 	if (p->p_brand == &native_brand)
487 		return (ENOSYS);
488 
489 	ASSERT(p->p_brand == pbrand);
490 	ASSERT(p->p_brand_data != NULL);
491 
492 	spd = (brand_proc_data_t *)p->p_brand_data;
493 
494 	switch ((cmd)) {
495 	case B_EXEC_NATIVE:
496 		err = exec_common((char *)arg1, (const char **)arg2,
497 		    (const char **)arg3, EBA_NATIVE);
498 		return (err);
499 
500 	/*
501 	 * Get the address of the user-space system call handler from
502 	 * the user process and attach it to the proc structure.
503 	 */
504 	case B_REGISTER:
505 		if (p->p_model == DATAMODEL_NATIVE) {
506 			if (copyin((void *)arg1, &reg, sizeof (reg)) != 0)
507 				return (EFAULT);
508 		}
509 #if defined(_LP64)
510 		else {
511 			brand_common_reg32_t reg32;
512 
513 			if (copyin((void *)arg1, &reg32, sizeof (reg32)) != 0)
514 				return (EFAULT);
515 			reg.sbr_version = reg32.sbr_version;
516 			reg.sbr_handler = (caddr_t)(uintptr_t)reg32.sbr_handler;
517 		}
518 #endif /* _LP64 */
519 
520 		if (reg.sbr_version != brandvers)
521 			return (ENOTSUP);
522 		spd->spd_handler = reg.sbr_handler;
523 		return (0);
524 
525 	case B_ELFDATA:
526 		if (p->p_model == DATAMODEL_NATIVE) {
527 			if (copyout(&spd->spd_elf_data, (void *)arg1,
528 			    sizeof (brand_elf_data_t)) != 0)
529 				return (EFAULT);
530 		}
531 #if defined(_LP64)
532 		else {
533 			brand_elf_data32_t sed32;
534 
535 			sed32.sed_phdr = spd->spd_elf_data.sed_phdr;
536 			sed32.sed_phent = spd->spd_elf_data.sed_phent;
537 			sed32.sed_phnum = spd->spd_elf_data.sed_phnum;
538 			sed32.sed_entry = spd->spd_elf_data.sed_entry;
539 			sed32.sed_base = spd->spd_elf_data.sed_base;
540 			sed32.sed_ldentry = spd->spd_elf_data.sed_ldentry;
541 			sed32.sed_lddata = spd->spd_elf_data.sed_lddata;
542 			if (copyout(&sed32, (void *)arg1, sizeof (sed32))
543 			    != 0)
544 				return (EFAULT);
545 		}
546 #endif /* _LP64 */
547 		return (0);
548 
549 	/*
550 	 * The B_TRUSS_POINT subcommand exists so that we can see
551 	 * truss output from interposed system calls that return
552 	 * without first calling any other system call, meaning they
553 	 * would be invisible to truss(1).
554 	 * If the second argument is set non-zero, set errno to that
555 	 * value as well.
556 	 *
557 	 * Common arguments seen with truss are:
558 	 *
559 	 *	arg1: syscall number
560 	 *	arg2: errno
561 	 */
562 	case B_TRUSS_POINT:
563 		return ((arg2 == 0) ? 0 : set_errno((uint_t)arg2));
564 	}
565 
566 	return (-1);
567 }
568 
569 /*ARGSUSED*/
570 void
571 brand_solaris_copy_procdata(proc_t *child, proc_t *parent, struct brand *pbrand)
572 {
573 	brand_proc_data_t	*spd;
574 
575 	ASSERT(parent->p_brand == pbrand);
576 	ASSERT(child->p_brand == pbrand);
577 	ASSERT(parent->p_brand_data != NULL);
578 	ASSERT(child->p_brand_data == NULL);
579 
580 	/*
581 	 * Just duplicate all the proc data of the parent for the
582 	 * child
583 	 */
584 	spd = kmem_alloc(sizeof (brand_proc_data_t), KM_SLEEP);
585 	bcopy(parent->p_brand_data, spd, sizeof (brand_proc_data_t));
586 	child->p_brand_data = spd;
587 }
588 
589 static void
590 restoreexecenv(struct execenv *ep, stack_t *sp)
591 {
592 	klwp_t *lwp = ttolwp(curthread);
593 
594 	setexecenv(ep);
595 	lwp->lwp_sigaltstack.ss_sp = sp->ss_sp;
596 	lwp->lwp_sigaltstack.ss_size = sp->ss_size;
597 	lwp->lwp_sigaltstack.ss_flags = sp->ss_flags;
598 }
599 
600 /*ARGSUSED*/
601 int
602 brand_solaris_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args,
603     intpdata_t *idatap, int level, long *execsz, int setid, caddr_t exec_file,
604     cred_t *cred, int brand_action, struct brand *pbrand, char *bname,
605     char *brandlib, char *brandlib32, char *brandlinker, char *brandlinker32)
606 {
607 
608 	vnode_t		*nvp;
609 	Ehdr		ehdr;
610 	Addr		uphdr_vaddr;
611 	intptr_t	voffset;
612 	int		interp;
613 	int		i, err;
614 	struct execenv	env;
615 	struct execenv	origenv;
616 	stack_t		orig_sigaltstack;
617 	struct user	*up = PTOU(curproc);
618 	proc_t		*p = ttoproc(curthread);
619 	klwp_t		*lwp = ttolwp(curthread);
620 	brand_proc_data_t	*spd;
621 	brand_elf_data_t sed, *sedp;
622 	char		*linker;
623 	uintptr_t	lddata; /* lddata of executable's linker */
624 
625 	ASSERT(curproc->p_brand == pbrand);
626 	ASSERT(curproc->p_brand_data != NULL);
627 
628 	spd = (brand_proc_data_t *)curproc->p_brand_data;
629 	sedp = &spd->spd_elf_data;
630 
631 	args->brandname = bname;
632 
633 	/*
634 	 * We will exec the brand library and then map in the target
635 	 * application and (optionally) the brand's default linker.
636 	 */
637 	if (args->to_model == DATAMODEL_NATIVE) {
638 		args->emulator = brandlib;
639 		linker = brandlinker;
640 	}
641 #if defined(_LP64)
642 	else {
643 		args->emulator = brandlib32;
644 		linker = brandlinker32;
645 	}
646 #endif  /* _LP64 */
647 
648 	if ((err = lookupname(args->emulator, UIO_SYSSPACE, FOLLOW,
649 	    NULLVPP, &nvp)) != 0) {
650 		uprintf("%s: not found.", args->emulator);
651 		return (err);
652 	}
653 
654 	/*
655 	 * The following elf{32}exec call changes the execenv in the proc
656 	 * struct which includes changing the p_exec member to be the vnode
657 	 * for the brand library (e.g. /.SUNWnative/usr/lib/s10_brand.so.1).
658 	 * We will eventually set the p_exec member to be the vnode for the new
659 	 * executable when we call setexecenv().  However, if we get an error
660 	 * before that call we need to restore the execenv to its original
661 	 * values so that when we return to the caller fop_close() works
662 	 * properly while cleaning up from the failed exec().  Restoring the
663 	 * original value will also properly decrement the 2nd VN_RELE that we
664 	 * took on the brand library.
665 	 */
666 	origenv.ex_bssbase = p->p_bssbase;
667 	origenv.ex_brkbase = p->p_brkbase;
668 	origenv.ex_brksize = p->p_brksize;
669 	origenv.ex_vp = p->p_exec;
670 	orig_sigaltstack.ss_sp = lwp->lwp_sigaltstack.ss_sp;
671 	orig_sigaltstack.ss_size = lwp->lwp_sigaltstack.ss_size;
672 	orig_sigaltstack.ss_flags = lwp->lwp_sigaltstack.ss_flags;
673 
674 	if (args->to_model == DATAMODEL_NATIVE) {
675 		err = elfexec(nvp, uap, args, idatap, INTP_MAXDEPTH + 1, execsz,
676 		    setid, exec_file, cred, brand_action);
677 	}
678 #if defined(_LP64)
679 	else {
680 		err = elf32exec(nvp, uap, args, idatap, INTP_MAXDEPTH + 1,
681 		    execsz, setid, exec_file, cred, brand_action);
682 	}
683 #endif  /* _LP64 */
684 	VN_RELE(nvp);
685 	if (err != 0) {
686 		restoreexecenv(&origenv, &orig_sigaltstack);
687 		return (err);
688 	}
689 
690 	/*
691 	 * The u_auxv veCTors are set up by elfexec to point to the
692 	 * brand emulation library and linker.  Save these so they can
693 	 * be copied to the specific brand aux vectors.
694 	 */
695 	bzero(&sed, sizeof (sed));
696 	for (i = 0; i < __KERN_NAUXV_IMPL; i++) {
697 		switch (up->u_auxv[i].a_type) {
698 		case AT_SUN_LDDATA:
699 			sed.sed_lddata = up->u_auxv[i].a_un.a_val;
700 			break;
701 		case AT_BASE:
702 			sed.sed_base = up->u_auxv[i].a_un.a_val;
703 			break;
704 		case AT_ENTRY:
705 			sed.sed_entry = up->u_auxv[i].a_un.a_val;
706 			break;
707 		case AT_PHDR:
708 			sed.sed_phdr = up->u_auxv[i].a_un.a_val;
709 			break;
710 		case AT_PHENT:
711 			sed.sed_phent = up->u_auxv[i].a_un.a_val;
712 			break;
713 		case AT_PHNUM:
714 			sed.sed_phnum = up->u_auxv[i].a_un.a_val;
715 			break;
716 		default:
717 			break;
718 		}
719 	}
720 	/* Make sure the emulator has an entry point */
721 	ASSERT(sed.sed_entry != 0);
722 	ASSERT(sed.sed_phdr != 0);
723 
724 	bzero(&env, sizeof (env));
725 	if (args->to_model == DATAMODEL_NATIVE) {
726 		err = mapexec_brand(vp, args, &ehdr, &uphdr_vaddr,
727 		    &voffset, exec_file, &interp, &env.ex_bssbase,
728 		    &env.ex_brkbase, &env.ex_brksize, NULL);
729 	}
730 #if defined(_LP64)
731 	else {
732 		Elf32_Ehdr ehdr32;
733 		Elf32_Addr uphdr_vaddr32;
734 		err = mapexec32_brand(vp, args, &ehdr32, &uphdr_vaddr32,
735 		    &voffset, exec_file, &interp, &env.ex_bssbase,
736 		    &env.ex_brkbase, &env.ex_brksize, NULL);
737 		Ehdr32to64(&ehdr32, &ehdr);
738 
739 		if (uphdr_vaddr32 == (Elf32_Addr)-1)
740 			uphdr_vaddr = (Addr)-1;
741 		else
742 			uphdr_vaddr = uphdr_vaddr32;
743 	}
744 #endif  /* _LP64 */
745 	if (err != 0) {
746 		restoreexecenv(&origenv, &orig_sigaltstack);
747 		return (err);
748 	}
749 
750 	/*
751 	 * Save off the important properties of the executable. The
752 	 * brand library will ask us for this data later, when it is
753 	 * initializing and getting ready to transfer control to the
754 	 * brand application.
755 	 */
756 	if (uphdr_vaddr == (Addr)-1)
757 		sedp->sed_phdr = voffset + ehdr.e_phoff;
758 	else
759 		sedp->sed_phdr = voffset + uphdr_vaddr;
760 	sedp->sed_entry = voffset + ehdr.e_entry;
761 	sedp->sed_phent = ehdr.e_phentsize;
762 	sedp->sed_phnum = ehdr.e_phnum;
763 
764 	if (interp) {
765 		if (ehdr.e_type == ET_DYN) {
766 			/*
767 			 * This is a shared object executable, so we
768 			 * need to pick a reasonable place to put the
769 			 * heap. Just don't use the first page.
770 			 */
771 			env.ex_brkbase = (caddr_t)PAGESIZE;
772 			env.ex_bssbase = (caddr_t)PAGESIZE;
773 		}
774 
775 		/*
776 		 * If the program needs an interpreter (most do), map
777 		 * it in and store relevant information about it in the
778 		 * aux vector, where the brand library can find it.
779 		 */
780 		if ((err = lookupname(linker, UIO_SYSSPACE,
781 		    FOLLOW, NULLVPP, &nvp)) != 0) {
782 			uprintf("%s: not found.", brandlinker);
783 			restoreexecenv(&origenv, &orig_sigaltstack);
784 			return (err);
785 		}
786 		if (args->to_model == DATAMODEL_NATIVE) {
787 			err = mapexec_brand(nvp, args, &ehdr,
788 			    &uphdr_vaddr, &voffset, exec_file, &interp,
789 			    NULL, NULL, NULL, &lddata);
790 		}
791 #if defined(_LP64)
792 		else {
793 			Elf32_Ehdr ehdr32;
794 			Elf32_Addr uphdr_vaddr32;
795 			err = mapexec32_brand(nvp, args, &ehdr32,
796 			    &uphdr_vaddr32, &voffset, exec_file, &interp,
797 			    NULL, NULL, NULL, &lddata);
798 			Ehdr32to64(&ehdr32, &ehdr);
799 
800 			if (uphdr_vaddr32 == (Elf32_Addr)-1)
801 				uphdr_vaddr = (Addr)-1;
802 			else
803 				uphdr_vaddr = uphdr_vaddr32;
804 		}
805 #endif  /* _LP64 */
806 		VN_RELE(nvp);
807 		if (err != 0) {
808 			restoreexecenv(&origenv, &orig_sigaltstack);
809 			return (err);
810 		}
811 
812 		/*
813 		 * Now that we know the base address of the brand's
814 		 * linker, place it in the aux vector.
815 		 */
816 		sedp->sed_base = voffset;
817 		sedp->sed_ldentry = voffset + ehdr.e_entry;
818 		sedp->sed_lddata = voffset + lddata;
819 	} else {
820 		/*
821 		 * This program has no interpreter. The brand library
822 		 * will jump to the address in the AT_SUN_BRAND_LDENTRY
823 		 * aux vector, so in this case, put the entry point of
824 		 * the main executable there.
825 		 */
826 		if (ehdr.e_type == ET_EXEC) {
827 			/*
828 			 * An executable with no interpreter, this must
829 			 * be a statically linked executable, which
830 			 * means we loaded it at the address specified
831 			 * in the elf header, in which case the e_entry
832 			 * field of the elf header is an absolute
833 			 * address.
834 			 */
835 			sedp->sed_ldentry = ehdr.e_entry;
836 			sedp->sed_entry = ehdr.e_entry;
837 			sedp->sed_lddata = 0;
838 			sedp->sed_base = 0;
839 		} else {
840 			/*
841 			 * A shared object with no interpreter, we use
842 			 * the calculated address from above.
843 			 */
844 			sedp->sed_ldentry = sedp->sed_entry;
845 			sedp->sed_entry = 0;
846 			sedp->sed_phdr = 0;
847 			sedp->sed_phent = 0;
848 			sedp->sed_phnum = 0;
849 			sedp->sed_lddata = 0;
850 			sedp->sed_base = voffset;
851 
852 			if (ehdr.e_type == ET_DYN) {
853 				/*
854 				 * Delay setting the brkbase until the
855 				 * first call to brk(); see elfexec()
856 				 * for details.
857 				 */
858 				env.ex_bssbase = (caddr_t)0;
859 				env.ex_brkbase = (caddr_t)0;
860 				env.ex_brksize = 0;
861 			}
862 		}
863 	}
864 
865 	env.ex_magic = elfmagic;
866 	env.ex_vp = vp;
867 	setexecenv(&env);
868 
869 	/*
870 	 * It's time to manipulate the process aux vectors.  First
871 	 * we need to update the AT_SUN_AUXFLAGS aux vector to set
872 	 * the AF_SUN_NOPLM flag.
873 	 */
874 	if (args->to_model == DATAMODEL_NATIVE) {
875 		auxv_t		auxflags_auxv;
876 
877 		if (copyin(args->auxp_auxflags, &auxflags_auxv,
878 		    sizeof (auxflags_auxv)) != 0)
879 			return (EFAULT);
880 
881 		ASSERT(auxflags_auxv.a_type == AT_SUN_AUXFLAGS);
882 		auxflags_auxv.a_un.a_val |= AF_SUN_NOPLM;
883 		if (copyout(&auxflags_auxv, args->auxp_auxflags,
884 		    sizeof (auxflags_auxv)) != 0)
885 			return (EFAULT);
886 	}
887 #if defined(_LP64)
888 	else {
889 		auxv32_t	auxflags_auxv32;
890 
891 		if (copyin(args->auxp_auxflags, &auxflags_auxv32,
892 		    sizeof (auxflags_auxv32)) != 0)
893 			return (EFAULT);
894 
895 		ASSERT(auxflags_auxv32.a_type == AT_SUN_AUXFLAGS);
896 		auxflags_auxv32.a_un.a_val |= AF_SUN_NOPLM;
897 		if (copyout(&auxflags_auxv32, args->auxp_auxflags,
898 		    sizeof (auxflags_auxv32)) != 0)
899 			return (EFAULT);
900 	}
901 #endif  /* _LP64 */
902 
903 	/* Second, copy out the brand specific aux vectors. */
904 	if (args->to_model == DATAMODEL_NATIVE) {
905 		auxv_t brand_auxv[] = {
906 		    { AT_SUN_BRAND_AUX1, 0 },
907 		    { AT_SUN_BRAND_AUX2, 0 },
908 		    { AT_SUN_BRAND_AUX3, 0 }
909 		};
910 
911 		ASSERT(brand_auxv[0].a_type ==
912 		    AT_SUN_BRAND_COMMON_LDDATA);
913 		brand_auxv[0].a_un.a_val = sed.sed_lddata;
914 
915 		if (copyout(&brand_auxv, args->auxp_brand,
916 		    sizeof (brand_auxv)) != 0)
917 			return (EFAULT);
918 	}
919 #if defined(_LP64)
920 	else {
921 		auxv32_t brand_auxv32[] = {
922 		    { AT_SUN_BRAND_AUX1, 0 },
923 		    { AT_SUN_BRAND_AUX2, 0 },
924 		    { AT_SUN_BRAND_AUX3, 0 }
925 		};
926 
927 		ASSERT(brand_auxv32[0].a_type == AT_SUN_BRAND_COMMON_LDDATA);
928 		brand_auxv32[0].a_un.a_val = (uint32_t)sed.sed_lddata;
929 		if (copyout(&brand_auxv32, args->auxp_brand,
930 		    sizeof (brand_auxv32)) != 0)
931 			return (EFAULT);
932 	}
933 #endif  /* _LP64 */
934 
935 	/*
936 	 * Third, the /proc aux vectors set up by elfexec() point to
937 	 * brand emulation library and it's linker.  Copy these to the
938 	 * /proc brand specific aux vector, and update the regular
939 	 * /proc aux vectors to point to the executable (and it's
940 	 * linker).  This will enable debuggers to access the
941 	 * executable via the usual /proc or elf notes aux vectors.
942 	 *
943 	 * The brand emulation library's linker will get it's aux
944 	 * vectors off the stack, and then update the stack with the
945 	 * executable's aux vectors before jumping to the executable's
946 	 * linker.
947 	 *
948 	 * Debugging the brand emulation library must be done from
949 	 * the global zone, where the librtld_db module knows how to
950 	 * fetch the brand specific aux vectors to access the brand
951 	 * emulation libraries linker.
952 	 */
953 	for (i = 0; i < __KERN_NAUXV_IMPL; i++) {
954 		ulong_t val;
955 
956 		switch (up->u_auxv[i].a_type) {
957 		case AT_SUN_BRAND_COMMON_LDDATA:
958 			up->u_auxv[i].a_un.a_val = sed.sed_lddata;
959 			continue;
960 		case AT_BASE:
961 			val = sedp->sed_base;
962 			break;
963 		case AT_ENTRY:
964 			val = sedp->sed_entry;
965 			break;
966 		case AT_PHDR:
967 			val = sedp->sed_phdr;
968 			break;
969 		case AT_PHENT:
970 			val = sedp->sed_phent;
971 			break;
972 		case AT_PHNUM:
973 			val = sedp->sed_phnum;
974 			break;
975 		case AT_SUN_LDDATA:
976 			val = sedp->sed_lddata;
977 			break;
978 		default:
979 			continue;
980 		}
981 
982 		up->u_auxv[i].a_un.a_val = val;
983 		if (val == 0) {
984 			/* Hide the entry for static binaries */
985 			up->u_auxv[i].a_type = AT_IGNORE;
986 		}
987 	}
988 
989 	/*
990 	 * The last thing we do here is clear spd->spd_handler.  This
991 	 * is important because if we're already a branded process and
992 	 * if this exec succeeds, there is a window between when the
993 	 * exec() first returns to the userland of the new process and
994 	 * when our brand library get's initialized, during which we
995 	 * don't want system calls to be re-directed to our brand
996 	 * library since it hasn't been initialized yet.
997 	 */
998 	spd->spd_handler = NULL;
999 
1000 	return (0);
1001 }
1002 
1003 void
1004 brand_solaris_exec(struct brand *pbrand)
1005 {
1006 	brand_proc_data_t	*spd = curproc->p_brand_data;
1007 
1008 	ASSERT(curproc->p_brand == pbrand);
1009 	ASSERT(curproc->p_brand_data != NULL);
1010 	ASSERT(ttolwp(curthread)->lwp_brand != NULL);
1011 
1012 	/*
1013 	 * We should only be called from exec(), when we know the process
1014 	 * is single-threaded.
1015 	 */
1016 	ASSERT(curproc->p_tlist == curproc->p_tlist->t_forw);
1017 
1018 	/* Upon exec, reset our lwp brand data. */
1019 	(void) brand_solaris_freelwp(ttolwp(curthread), pbrand);
1020 	(void) brand_solaris_initlwp(ttolwp(curthread), pbrand);
1021 
1022 	/*
1023 	 * Upon exec, reset all the proc brand data, except for the elf
1024 	 * data associated with the executable we are exec'ing.
1025 	 */
1026 	spd->spd_handler = NULL;
1027 }
1028 
1029 int
1030 brand_solaris_fini(char **emul_table, struct modlinkage *modlinkage,
1031     struct brand *pbrand)
1032 {
1033 	int err;
1034 
1035 	/*
1036 	 * If there are any zones using this brand, we can't allow it
1037 	 * to be unloaded.
1038 	 */
1039 	if (brand_zone_count(pbrand))
1040 		return (EBUSY);
1041 
1042 	kmem_free(*emul_table, NSYSCALL);
1043 	*emul_table = NULL;
1044 
1045 	err = mod_remove(modlinkage);
1046 	if (err)
1047 		cmn_err(CE_WARN, "Couldn't unload brand module");
1048 
1049 	return (err);
1050 }
1051 
1052 /*ARGSUSED*/
1053 void
1054 brand_solaris_forklwp(klwp_t *p, klwp_t *c, struct brand *pbrand)
1055 {
1056 	ASSERT(p->lwp_procp->p_brand == pbrand);
1057 	ASSERT(c->lwp_procp->p_brand == pbrand);
1058 
1059 	ASSERT(p->lwp_procp->p_brand_data != NULL);
1060 	ASSERT(c->lwp_procp->p_brand_data != NULL);
1061 
1062 	/*
1063 	 * Both LWPs have already had been initialized via
1064 	 * brand_solaris_initlwp().
1065 	 */
1066 	ASSERT(p->lwp_brand != NULL);
1067 	ASSERT(c->lwp_brand != NULL);
1068 }
1069 
1070 /*ARGSUSED*/
1071 void
1072 brand_solaris_freelwp(klwp_t *l, struct brand *pbrand)
1073 {
1074 	ASSERT(l->lwp_procp->p_brand == pbrand);
1075 	ASSERT(l->lwp_procp->p_brand_data != NULL);
1076 	ASSERT(l->lwp_brand != NULL);
1077 	l->lwp_brand = NULL;
1078 }
1079 
1080 /*ARGSUSED*/
1081 int
1082 brand_solaris_initlwp(klwp_t *l, struct brand *pbrand)
1083 {
1084 	ASSERT(l->lwp_procp->p_brand == pbrand);
1085 	ASSERT(l->lwp_procp->p_brand_data != NULL);
1086 	ASSERT(l->lwp_brand == NULL);
1087 	l->lwp_brand = (void *)-1;
1088 	return (0);
1089 }
1090 
1091 /*ARGSUSED*/
1092 void
1093 brand_solaris_lwpexit(klwp_t *l, struct brand *pbrand)
1094 {
1095 	proc_t  *p = l->lwp_procp;
1096 
1097 	ASSERT(l->lwp_procp->p_brand == pbrand);
1098 	ASSERT(l->lwp_procp->p_brand_data != NULL);
1099 	ASSERT(l->lwp_brand != NULL);
1100 
1101 	/*
1102 	 * We should never be called for the last thread in a process.
1103 	 * (That case is handled by brand_solaris_proc_exit().)
1104 	 * Therefore this lwp must be exiting from a multi-threaded
1105 	 * process.
1106 	 */
1107 	ASSERT(p->p_tlist != p->p_tlist->t_forw);
1108 
1109 	l->lwp_brand = NULL;
1110 }
1111 
1112 /*ARGSUSED*/
1113 void
1114 brand_solaris_proc_exit(struct proc *p, klwp_t *l, struct brand *pbrand)
1115 {
1116 	ASSERT(p->p_brand == pbrand);
1117 	ASSERT(p->p_brand_data != NULL);
1118 
1119 	/*
1120 	 * When called from proc_exit(), we know that process is
1121 	 * single-threaded and free our lwp brand data.
1122 	 * otherwise just free p_brand_data and return.
1123 	 */
1124 	if (l != NULL) {
1125 		ASSERT(p->p_tlist == p->p_tlist->t_forw);
1126 		ASSERT(p->p_tlist->t_lwp == l);
1127 		(void) brand_solaris_freelwp(l, pbrand);
1128 	}
1129 
1130 	/* upon exit, free our proc brand data */
1131 	kmem_free(p->p_brand_data, sizeof (brand_proc_data_t));
1132 	p->p_brand_data = NULL;
1133 }
1134 
1135 void
1136 brand_solaris_setbrand(proc_t *p, struct brand *pbrand)
1137 {
1138 	ASSERT(p->p_brand == pbrand);
1139 	ASSERT(p->p_brand_data == NULL);
1140 
1141 	/*
1142 	 * We should only be called from exec(), when we know the process
1143 	 * is single-threaded.
1144 	 */
1145 	ASSERT(p->p_tlist == p->p_tlist->t_forw);
1146 
1147 	p->p_brand_data = kmem_zalloc(sizeof (brand_proc_data_t), KM_SLEEP);
1148 	(void) brand_solaris_initlwp(p->p_tlist->t_lwp, pbrand);
1149 }
1150