xref: /illumos-gate/usr/src/uts/common/os/brand.c (revision 2b395c3c2a39cdc00f9fe7ac497795bd112f7663)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2019 Joyent, Inc.
24  */
25 
26 #include <sys/kmem.h>
27 #include <sys/errno.h>
28 #include <sys/systm.h>
29 #include <sys/cmn_err.h>
30 #include <sys/brand.h>
31 #include <sys/machbrand.h>
32 #include <sys/modctl.h>
33 #include <sys/rwlock.h>
34 #include <sys/zone.h>
35 #include <sys/pathname.h>
36 
37 #define	SUPPORTED_BRAND_VERSION BRAND_VER_1
38 
39 #if defined(__sparcv9)
40 /* sparcv9 uses system wide brand interposition hooks */
41 static void brand_plat_interposition_enable(void);
42 static void brand_plat_interposition_disable(void);
43 
44 struct brand_mach_ops native_mach_ops  = {
45 		NULL, NULL
46 };
47 #else /* !__sparcv9 */
48 struct brand_mach_ops native_mach_ops  = {
49 		NULL, NULL, NULL, NULL
50 };
51 #endif /* !__sparcv9 */
52 
53 brand_t native_brand = {
54 		BRAND_VER_1,
55 		"native",
56 		NULL,
57 		&native_mach_ops
58 };
59 
60 /*
61  * Used to maintain a list of all the brands currently loaded into the
62  * kernel.
63  */
64 struct brand_list {
65 	int			bl_refcnt;
66 	struct brand_list	*bl_next;
67 	brand_t			*bl_brand;
68 };
69 
70 static struct brand_list *brand_list = NULL;
71 
72 /*
73  * This lock protects the integrity of the brand list.
74  */
75 static kmutex_t brand_list_lock;
76 
77 void
brand_init()78 brand_init()
79 {
80 	mutex_init(&brand_list_lock, NULL, MUTEX_DEFAULT, NULL);
81 	p0.p_brand = &native_brand;
82 }
83 
84 int
brand_register(brand_t * brand)85 brand_register(brand_t *brand)
86 {
87 	struct brand_list *list, *scan;
88 
89 	if (brand == NULL)
90 		return (EINVAL);
91 
92 	if (brand->b_version != SUPPORTED_BRAND_VERSION) {
93 		if (brand->b_version < SUPPORTED_BRAND_VERSION) {
94 			cmn_err(CE_WARN,
95 			    "brand '%s' was built to run on older versions "
96 			    "of Solaris.",
97 			    brand->b_name);
98 		} else {
99 			cmn_err(CE_WARN,
100 			    "brand '%s' was built to run on a newer version "
101 			    "of Solaris.",
102 			    brand->b_name);
103 		}
104 		return (EINVAL);
105 	}
106 
107 	/* Sanity checks */
108 	if (brand->b_name == NULL || brand->b_ops == NULL ||
109 	    brand->b_ops->b_brandsys == NULL) {
110 		cmn_err(CE_WARN, "Malformed brand");
111 		return (EINVAL);
112 	}
113 
114 	list = kmem_alloc(sizeof (struct brand_list), KM_SLEEP);
115 
116 	/* Add the brand to the list of loaded brands. */
117 	mutex_enter(&brand_list_lock);
118 
119 	/*
120 	 * Check to be sure we haven't already registered this brand.
121 	 */
122 	for (scan = brand_list; scan != NULL; scan = scan->bl_next) {
123 		if (strcmp(brand->b_name, scan->bl_brand->b_name) == 0) {
124 			cmn_err(CE_WARN,
125 			    "Invalid attempt to load a second instance of "
126 			    "brand %s", brand->b_name);
127 			mutex_exit(&brand_list_lock);
128 			kmem_free(list, sizeof (struct brand_list));
129 			return (EINVAL);
130 		}
131 	}
132 
133 #if defined(__sparcv9)
134 	/* sparcv9 uses system wide brand interposition hooks */
135 	if (brand_list == NULL)
136 		brand_plat_interposition_enable();
137 #endif /* __sparcv9 */
138 
139 	list->bl_brand = brand;
140 	list->bl_refcnt = 0;
141 	list->bl_next = brand_list;
142 	brand_list = list;
143 
144 	mutex_exit(&brand_list_lock);
145 
146 	return (0);
147 }
148 
149 /*
150  * The kernel module implementing this brand is being unloaded, so remove
151  * it from the list of active brands.
152  */
153 int
brand_unregister(brand_t * brand)154 brand_unregister(brand_t *brand)
155 {
156 	struct brand_list *list, *prev;
157 
158 	/* Sanity checks */
159 	if (brand == NULL || brand->b_name == NULL) {
160 		cmn_err(CE_WARN, "Malformed brand");
161 		return (EINVAL);
162 	}
163 
164 	prev = NULL;
165 	mutex_enter(&brand_list_lock);
166 
167 	for (list = brand_list; list != NULL; list = list->bl_next) {
168 		if (list->bl_brand == brand)
169 			break;
170 		prev = list;
171 	}
172 
173 	if (list == NULL) {
174 		cmn_err(CE_WARN, "Brand %s wasn't registered", brand->b_name);
175 		mutex_exit(&brand_list_lock);
176 		return (EINVAL);
177 	}
178 
179 	if (list->bl_refcnt > 0) {
180 		cmn_err(CE_WARN, "Unregistering brand %s which is still in use",
181 		    brand->b_name);
182 		mutex_exit(&brand_list_lock);
183 		return (EBUSY);
184 	}
185 
186 	/* Remove brand from the list */
187 	if (prev != NULL)
188 		prev->bl_next = list->bl_next;
189 	else
190 		brand_list = list->bl_next;
191 
192 #if defined(__sparcv9)
193 	/* sparcv9 uses system wide brand interposition hooks */
194 	if (brand_list == NULL)
195 		brand_plat_interposition_disable();
196 #endif /* __sparcv9 */
197 
198 	mutex_exit(&brand_list_lock);
199 
200 	kmem_free(list, sizeof (struct brand_list));
201 
202 	return (0);
203 }
204 
205 /*
206  * Record that a zone of this brand has been instantiated.  If the kernel
207  * module implementing this brand's functionality is not present, this
208  * routine attempts to load the module as a side effect.
209  */
210 brand_t *
brand_register_zone(struct brand_attr * attr)211 brand_register_zone(struct brand_attr *attr)
212 {
213 	struct brand_list *l = NULL;
214 	ddi_modhandle_t	hdl = NULL;
215 	char *modname;
216 	int err = 0;
217 
218 	if (is_system_labeled()) {
219 		cmn_err(CE_WARN,
220 		    "Branded zones are not allowed on labeled systems.");
221 		return (NULL);
222 	}
223 
224 	/*
225 	 * We make at most two passes through this loop.  The first time
226 	 * through, we're looking to see if this is a new user of an
227 	 * already loaded brand.  If the brand hasn't been loaded, we
228 	 * call ddi_modopen() to force it to be loaded and then make a
229 	 * second pass through the list of brands.  If we don't find the
230 	 * brand the second time through it means that the modname
231 	 * specified in the brand_attr structure doesn't provide the brand
232 	 * specified in the brandname field.  This would suggest a bug in
233 	 * the brand's config.xml file.  We close the module and return
234 	 * 'NULL' to the caller.
235 	 */
236 	for (;;) {
237 		/*
238 		 * Search list of loaded brands
239 		 */
240 		mutex_enter(&brand_list_lock);
241 		for (l = brand_list; l != NULL; l = l->bl_next)
242 			if (strcmp(attr->ba_brandname,
243 			    l->bl_brand->b_name) == 0)
244 				break;
245 		if ((l != NULL) || (hdl != NULL))
246 			break;
247 		mutex_exit(&brand_list_lock);
248 
249 		/*
250 		 * We didn't find that the requested brand has been loaded
251 		 * yet, so we trigger the load of the appropriate kernel
252 		 * module and search the list again.
253 		 */
254 		modname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
255 		(void) strcpy(modname, "brand/");
256 		(void) strcat(modname, attr->ba_modname);
257 		hdl = ddi_modopen(modname, KRTLD_MODE_FIRST, &err);
258 		kmem_free(modname, MAXPATHLEN);
259 
260 		if (err != 0)
261 			return (NULL);
262 	}
263 
264 	/*
265 	 * If we found the matching brand, bump its reference count.
266 	 */
267 	if (l != NULL)
268 		l->bl_refcnt++;
269 
270 	mutex_exit(&brand_list_lock);
271 
272 	if (hdl != NULL)
273 		(void) ddi_modclose(hdl);
274 
275 	return ((l != NULL) ? l->bl_brand : NULL);
276 }
277 
278 /*
279  * Return the number of zones currently using this brand.
280  */
281 int
brand_zone_count(struct brand * bp)282 brand_zone_count(struct brand *bp)
283 {
284 	struct brand_list *l;
285 	int cnt = 0;
286 
287 	mutex_enter(&brand_list_lock);
288 	for (l = brand_list; l != NULL; l = l->bl_next)
289 		if (l->bl_brand == bp) {
290 			cnt = l->bl_refcnt;
291 			break;
292 		}
293 	mutex_exit(&brand_list_lock);
294 
295 	return (cnt);
296 }
297 
298 void
brand_unregister_zone(struct brand * bp)299 brand_unregister_zone(struct brand *bp)
300 {
301 	struct brand_list *list;
302 
303 	mutex_enter(&brand_list_lock);
304 	for (list = brand_list; list != NULL; list = list->bl_next) {
305 		if (list->bl_brand == bp) {
306 			ASSERT(list->bl_refcnt > 0);
307 			list->bl_refcnt--;
308 			break;
309 		}
310 	}
311 	mutex_exit(&brand_list_lock);
312 }
313 
314 void
brand_setbrand(proc_t * p)315 brand_setbrand(proc_t *p)
316 {
317 	brand_t *bp = p->p_zone->zone_brand;
318 
319 	ASSERT(bp != NULL);
320 	ASSERT(p->p_brand == &native_brand);
321 
322 	/*
323 	 * We should only be called from exec(), when we know the process
324 	 * is single-threaded.
325 	 */
326 	ASSERT(p->p_tlist == p->p_tlist->t_forw);
327 
328 	p->p_brand = bp;
329 	ASSERT(PROC_IS_BRANDED(p));
330 	BROP(p)->b_setbrand(p);
331 }
332 
333 void
brand_clearbrand(proc_t * p,boolean_t no_lwps)334 brand_clearbrand(proc_t *p, boolean_t no_lwps)
335 {
336 	brand_t *bp = p->p_zone->zone_brand;
337 	klwp_t *lwp = NULL;
338 	ASSERT(bp != NULL);
339 	ASSERT(!no_lwps || (p->p_tlist == NULL));
340 
341 	/*
342 	 * If called from exec_common() or proc_exit(),
343 	 * we know the process is single-threaded.
344 	 * If called from fork_fail, p_tlist is NULL.
345 	 */
346 	if (!no_lwps) {
347 		ASSERT(p->p_tlist == p->p_tlist->t_forw);
348 		lwp = p->p_tlist->t_lwp;
349 	}
350 
351 	ASSERT(PROC_IS_BRANDED(p));
352 	BROP(p)->b_proc_exit(p, lwp);
353 	p->p_brand = &native_brand;
354 }
355 
356 #if defined(__sparcv9)
357 /*
358  * Currently, only sparc has system level brand syscall interposition.
359  * On x86 we're able to enable syscall interposition on a per-cpu basis
360  * when a branded thread is scheduled to run on a cpu.
361  */
362 
363 /* Local variables needed for dynamic syscall interposition support */
364 static uint32_t	syscall_trap_patch_instr_orig;
365 static uint32_t	syscall_trap32_patch_instr_orig;
366 
367 /* Trap Table syscall entry hot patch points */
368 extern void	syscall_trap_patch_point(void);
369 extern void	syscall_trap32_patch_point(void);
370 
371 /* Alternate syscall entry handlers used when branded zones are running */
372 extern void	syscall_wrapper(void);
373 extern void	syscall_wrapper32(void);
374 
375 /* Macros used to facilitate sparcv9 instruction generation */
376 #define	BA_A_INSTR	0x30800000	/* ba,a addr */
377 #define	DISP22(from, to) \
378 	((((uintptr_t)(to) - (uintptr_t)(from)) >> 2) & 0x3fffff)
379 
380 /*ARGSUSED*/
381 static void
brand_plat_interposition_enable(void)382 brand_plat_interposition_enable(void)
383 {
384 	ASSERT(MUTEX_HELD(&brand_list_lock));
385 
386 	/*
387 	 * Before we hot patch the kernel save the current instructions
388 	 * so that we can restore them later.
389 	 */
390 	syscall_trap_patch_instr_orig =
391 	    *(uint32_t *)syscall_trap_patch_point;
392 	syscall_trap32_patch_instr_orig =
393 	    *(uint32_t *)syscall_trap32_patch_point;
394 
395 	/*
396 	 * Modify the trap table at the patch points.
397 	 *
398 	 * We basically replace the first instruction at the patch
399 	 * point with a ba,a instruction that will transfer control
400 	 * to syscall_wrapper or syscall_wrapper32 for 64-bit and
401 	 * 32-bit syscalls respectively.  It's important to note that
402 	 * the annul bit is set in the branch so we don't execute
403 	 * the instruction directly following the one we're patching
404 	 * during the branch's delay slot.
405 	 *
406 	 * It also doesn't matter that we're not atomically updating both
407 	 * the 64 and 32 bit syscall paths at the same time since there's
408 	 * no actual branded processes running on the system yet.
409 	 */
410 	hot_patch_kernel_text((caddr_t)syscall_trap_patch_point,
411 	    BA_A_INSTR | DISP22(syscall_trap_patch_point, syscall_wrapper),
412 	    4);
413 	hot_patch_kernel_text((caddr_t)syscall_trap32_patch_point,
414 	    BA_A_INSTR | DISP22(syscall_trap32_patch_point, syscall_wrapper32),
415 	    4);
416 }
417 
418 /*ARGSUSED*/
419 static void
brand_plat_interposition_disable(void)420 brand_plat_interposition_disable(void)
421 {
422 	ASSERT(MUTEX_HELD(&brand_list_lock));
423 
424 	/*
425 	 * Restore the original instructions at the trap table syscall
426 	 * patch points to disable the brand syscall interposition
427 	 * mechanism.
428 	 */
429 	hot_patch_kernel_text((caddr_t)syscall_trap_patch_point,
430 	    syscall_trap_patch_instr_orig, 4);
431 	hot_patch_kernel_text((caddr_t)syscall_trap32_patch_point,
432 	    syscall_trap32_patch_instr_orig, 4);
433 }
434 #endif /* __sparcv9 */
435 
436 /*
437  * The following functions can be shared among kernel brand modules which
438  * implement Solaris-derived brands, all of which need to do similar tasks
439  * to manage the brand.
440  */
441 
442 #if defined(_LP64)
443 static void
Ehdr32to64(Elf32_Ehdr * src,Ehdr * dst)444 Ehdr32to64(Elf32_Ehdr *src, Ehdr *dst)
445 {
446 	bcopy(src->e_ident, dst->e_ident, sizeof (src->e_ident));
447 	dst->e_type =		src->e_type;
448 	dst->e_machine =	src->e_machine;
449 	dst->e_version =	src->e_version;
450 	dst->e_entry =		src->e_entry;
451 	dst->e_phoff =		src->e_phoff;
452 	dst->e_shoff =		src->e_shoff;
453 	dst->e_flags =		src->e_flags;
454 	dst->e_ehsize =		src->e_ehsize;
455 	dst->e_phentsize =	src->e_phentsize;
456 	dst->e_phnum =		src->e_phnum;
457 	dst->e_shentsize =	src->e_shentsize;
458 	dst->e_shnum =		src->e_shnum;
459 	dst->e_shstrndx =	src->e_shstrndx;
460 }
461 #endif /* _LP64 */
462 
463 /*
464  * Return -1 if the cmd was not handled by this function.
465  */
466 /*ARGSUSED*/
467 int
brand_solaris_cmd(int cmd,uintptr_t arg1,uintptr_t arg2,uintptr_t arg3,struct brand * pbrand,int brandvers)468 brand_solaris_cmd(int cmd, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3,
469     struct brand *pbrand, int brandvers)
470 {
471 	brand_proc_data_t	*spd;
472 	brand_proc_reg_t	reg;
473 	proc_t			*p = curproc;
474 	int			err;
475 
476 	/*
477 	 * There is one operation that is supported for a native
478 	 * process; B_EXEC_BRAND.  This brand operaion is redundant
479 	 * since the kernel assumes a native process doing an exec
480 	 * in a branded zone is going to run a branded processes.
481 	 * hence we don't support this operation.
482 	 */
483 	if (cmd == B_EXEC_BRAND)
484 		return (ENOSYS);
485 
486 	/* For all other operations this must be a branded process. */
487 	if (p->p_brand == &native_brand)
488 		return (ENOSYS);
489 
490 	ASSERT(p->p_brand == pbrand);
491 	ASSERT(p->p_brand_data != NULL);
492 
493 	spd = (brand_proc_data_t *)p->p_brand_data;
494 
495 	switch ((cmd)) {
496 	case B_EXEC_NATIVE:
497 		err = exec_common((char *)arg1, (const char **)arg2,
498 		    (const char **)arg3, NULL, EBA_NATIVE);
499 		return (err);
500 
501 	/*
502 	 * Get the address of the user-space system call handler from
503 	 * the user process and attach it to the proc structure.
504 	 */
505 	case B_REGISTER:
506 		if (p->p_model == DATAMODEL_NATIVE) {
507 			if (copyin((void *)arg1, &reg, sizeof (reg)) != 0)
508 				return (EFAULT);
509 		}
510 #if defined(_LP64)
511 		else {
512 			brand_common_reg32_t reg32;
513 
514 			if (copyin((void *)arg1, &reg32, sizeof (reg32)) != 0)
515 				return (EFAULT);
516 			reg.sbr_version = reg32.sbr_version;
517 			reg.sbr_handler = (caddr_t)(uintptr_t)reg32.sbr_handler;
518 		}
519 #endif /* _LP64 */
520 
521 		if (reg.sbr_version != brandvers)
522 			return (ENOTSUP);
523 		spd->spd_handler = reg.sbr_handler;
524 		return (0);
525 
526 	case B_ELFDATA:
527 		if (p->p_model == DATAMODEL_NATIVE) {
528 			if (copyout(&spd->spd_elf_data, (void *)arg1,
529 			    sizeof (brand_elf_data_t)) != 0)
530 				return (EFAULT);
531 		}
532 #if defined(_LP64)
533 		else {
534 			brand_elf_data32_t sed32;
535 
536 			sed32.sed_phdr = spd->spd_elf_data.sed_phdr;
537 			sed32.sed_phent = spd->spd_elf_data.sed_phent;
538 			sed32.sed_phnum = spd->spd_elf_data.sed_phnum;
539 			sed32.sed_entry = spd->spd_elf_data.sed_entry;
540 			sed32.sed_base = spd->spd_elf_data.sed_base;
541 			sed32.sed_ldentry = spd->spd_elf_data.sed_ldentry;
542 			sed32.sed_lddata = spd->spd_elf_data.sed_lddata;
543 			if (copyout(&sed32, (void *)arg1, sizeof (sed32))
544 			    != 0)
545 				return (EFAULT);
546 		}
547 #endif /* _LP64 */
548 		return (0);
549 
550 	/*
551 	 * The B_TRUSS_POINT subcommand exists so that we can see
552 	 * truss output from interposed system calls that return
553 	 * without first calling any other system call, meaning they
554 	 * would be invisible to truss(1).
555 	 * If the second argument is set non-zero, set errno to that
556 	 * value as well.
557 	 *
558 	 * Common arguments seen with truss are:
559 	 *
560 	 *	arg1: syscall number
561 	 *	arg2: errno
562 	 */
563 	case B_TRUSS_POINT:
564 		return ((arg2 == 0) ? 0 : set_errno((uint_t)arg2));
565 	}
566 
567 	return (-1);
568 }
569 
570 /*ARGSUSED*/
571 void
brand_solaris_copy_procdata(proc_t * child,proc_t * parent,struct brand * pbrand)572 brand_solaris_copy_procdata(proc_t *child, proc_t *parent, struct brand *pbrand)
573 {
574 	brand_proc_data_t	*spd;
575 
576 	ASSERT(parent->p_brand == pbrand);
577 	ASSERT(child->p_brand == pbrand);
578 	ASSERT(parent->p_brand_data != NULL);
579 	ASSERT(child->p_brand_data == NULL);
580 
581 	/*
582 	 * Just duplicate all the proc data of the parent for the
583 	 * child
584 	 */
585 	spd = kmem_alloc(sizeof (brand_proc_data_t), KM_SLEEP);
586 	bcopy(parent->p_brand_data, spd, sizeof (brand_proc_data_t));
587 	child->p_brand_data = spd;
588 }
589 
590 static void
restoreexecenv(struct execenv * ep,stack_t * sp)591 restoreexecenv(struct execenv *ep, stack_t *sp)
592 {
593 	klwp_t *lwp = ttolwp(curthread);
594 
595 	setexecenv(ep);
596 	lwp->lwp_sigaltstack.ss_sp = sp->ss_sp;
597 	lwp->lwp_sigaltstack.ss_size = sp->ss_size;
598 	lwp->lwp_sigaltstack.ss_flags = sp->ss_flags;
599 }
600 
601 /*ARGSUSED*/
602 int
brand_solaris_elfexec(vnode_t * vp,execa_t * uap,uarg_t * args,intpdata_t * idatap,int level,size_t * execsz,int setid,caddr_t exec_file,cred_t * cred,int brand_action,struct brand * pbrand,char * bname,char * brandlib,char * brandlib32,char * brandlinker,char * brandlinker32)603 brand_solaris_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args,
604     intpdata_t *idatap, int level, size_t *execsz, int setid, caddr_t exec_file,
605     cred_t *cred, int brand_action, struct brand *pbrand, char *bname,
606     char *brandlib, char *brandlib32, char *brandlinker, char *brandlinker32)
607 {
608 
609 	vnode_t		*nvp;
610 	Ehdr		ehdr;
611 	Addr		uphdr_vaddr;
612 	intptr_t	voffset;
613 	int		interp;
614 	int		i, err;
615 	struct execenv	env;
616 	struct execenv	origenv;
617 	stack_t		orig_sigaltstack;
618 	struct user	*up = PTOU(curproc);
619 	proc_t		*p = ttoproc(curthread);
620 	klwp_t		*lwp = ttolwp(curthread);
621 	brand_proc_data_t	*spd;
622 	brand_elf_data_t sed, *sedp;
623 	char		*linker;
624 	uintptr_t	lddata; /* lddata of executable's linker */
625 
626 	ASSERT(curproc->p_brand == pbrand);
627 	ASSERT(curproc->p_brand_data != NULL);
628 
629 	spd = (brand_proc_data_t *)curproc->p_brand_data;
630 	sedp = &spd->spd_elf_data;
631 
632 	args->brandname = bname;
633 
634 	/*
635 	 * We will exec the brand library and then map in the target
636 	 * application and (optionally) the brand's default linker.
637 	 */
638 	if (args->to_model == DATAMODEL_NATIVE) {
639 		args->emulator = brandlib;
640 		linker = brandlinker;
641 	}
642 #if defined(_LP64)
643 	else {
644 		args->emulator = brandlib32;
645 		linker = brandlinker32;
646 	}
647 #endif  /* _LP64 */
648 
649 	if ((err = lookupname(args->emulator, UIO_SYSSPACE, FOLLOW,
650 	    NULLVPP, &nvp)) != 0) {
651 		uprintf("%s: not found.", args->emulator);
652 		return (err);
653 	}
654 
655 	/*
656 	 * The following elf{32}exec call changes the execenv in the proc
657 	 * struct which includes changing the p_exec member to be the vnode
658 	 * for the brand library (e.g. /.SUNWnative/usr/lib/s10_brand.so.1).
659 	 * We will eventually set the p_exec member to be the vnode for the new
660 	 * executable when we call setexecenv().  However, if we get an error
661 	 * before that call we need to restore the execenv to its original
662 	 * values so that when we return to the caller fop_close() works
663 	 * properly while cleaning up from the failed exec().  Restoring the
664 	 * original value will also properly decrement the 2nd VN_RELE that we
665 	 * took on the brand library.
666 	 */
667 	origenv.ex_bssbase = p->p_bssbase;
668 	origenv.ex_brkbase = p->p_brkbase;
669 	origenv.ex_brksize = p->p_brksize;
670 	origenv.ex_vp = p->p_exec;
671 	orig_sigaltstack.ss_sp = lwp->lwp_sigaltstack.ss_sp;
672 	orig_sigaltstack.ss_size = lwp->lwp_sigaltstack.ss_size;
673 	orig_sigaltstack.ss_flags = lwp->lwp_sigaltstack.ss_flags;
674 
675 	if (args->to_model == DATAMODEL_NATIVE) {
676 		err = elfexec(nvp, uap, args, idatap, INTP_MAXDEPTH + 1, execsz,
677 		    setid, exec_file, cred, brand_action);
678 	}
679 #if defined(_LP64)
680 	else {
681 		err = elf32exec(nvp, uap, args, idatap, INTP_MAXDEPTH + 1,
682 		    execsz, setid, exec_file, cred, brand_action);
683 	}
684 #endif  /* _LP64 */
685 	VN_RELE(nvp);
686 	if (err != 0) {
687 		restoreexecenv(&origenv, &orig_sigaltstack);
688 		return (err);
689 	}
690 
691 	/*
692 	 * The u_auxv veCTors are set up by elfexec to point to the
693 	 * brand emulation library and linker.  Save these so they can
694 	 * be copied to the specific brand aux vectors.
695 	 */
696 	bzero(&sed, sizeof (sed));
697 	for (i = 0; i < __KERN_NAUXV_IMPL; i++) {
698 		switch (up->u_auxv[i].a_type) {
699 		case AT_SUN_LDDATA:
700 			sed.sed_lddata = up->u_auxv[i].a_un.a_val;
701 			break;
702 		case AT_BASE:
703 			sed.sed_base = up->u_auxv[i].a_un.a_val;
704 			break;
705 		case AT_ENTRY:
706 			sed.sed_entry = up->u_auxv[i].a_un.a_val;
707 			break;
708 		case AT_PHDR:
709 			sed.sed_phdr = up->u_auxv[i].a_un.a_val;
710 			break;
711 		case AT_PHENT:
712 			sed.sed_phent = up->u_auxv[i].a_un.a_val;
713 			break;
714 		case AT_PHNUM:
715 			sed.sed_phnum = up->u_auxv[i].a_un.a_val;
716 			break;
717 		default:
718 			break;
719 		}
720 	}
721 	/* Make sure the emulator has an entry point */
722 	ASSERT(sed.sed_entry != 0);
723 	ASSERT(sed.sed_phdr != 0);
724 
725 	bzero(&env, sizeof (env));
726 	if (args->to_model == DATAMODEL_NATIVE) {
727 		err = mapexec_brand(vp, args, &ehdr, &uphdr_vaddr,
728 		    &voffset, exec_file, &interp, &env.ex_bssbase,
729 		    &env.ex_brkbase, &env.ex_brksize, NULL);
730 	}
731 #if defined(_LP64)
732 	else {
733 		Elf32_Ehdr ehdr32;
734 		Elf32_Addr uphdr_vaddr32;
735 		err = mapexec32_brand(vp, args, &ehdr32, &uphdr_vaddr32,
736 		    &voffset, exec_file, &interp, &env.ex_bssbase,
737 		    &env.ex_brkbase, &env.ex_brksize, NULL);
738 		Ehdr32to64(&ehdr32, &ehdr);
739 
740 		if (uphdr_vaddr32 == (Elf32_Addr)-1)
741 			uphdr_vaddr = (Addr)-1;
742 		else
743 			uphdr_vaddr = uphdr_vaddr32;
744 	}
745 #endif  /* _LP64 */
746 	if (err != 0) {
747 		restoreexecenv(&origenv, &orig_sigaltstack);
748 		return (err);
749 	}
750 
751 	/*
752 	 * Save off the important properties of the executable. The
753 	 * brand library will ask us for this data later, when it is
754 	 * initializing and getting ready to transfer control to the
755 	 * brand application.
756 	 */
757 	if (uphdr_vaddr == (Addr)-1)
758 		sedp->sed_phdr = voffset + ehdr.e_phoff;
759 	else
760 		sedp->sed_phdr = voffset + uphdr_vaddr;
761 	sedp->sed_entry = voffset + ehdr.e_entry;
762 	sedp->sed_phent = ehdr.e_phentsize;
763 	sedp->sed_phnum = ehdr.e_phnum;
764 
765 	if (interp) {
766 		if (ehdr.e_type == ET_DYN) {
767 			/*
768 			 * This is a shared object executable, so we
769 			 * need to pick a reasonable place to put the
770 			 * heap. Just don't use the first page.
771 			 */
772 			env.ex_brkbase = (caddr_t)PAGESIZE;
773 			env.ex_bssbase = (caddr_t)PAGESIZE;
774 		}
775 
776 		/*
777 		 * If the program needs an interpreter (most do), map
778 		 * it in and store relevant information about it in the
779 		 * aux vector, where the brand library can find it.
780 		 */
781 		if ((err = lookupname(linker, UIO_SYSSPACE,
782 		    FOLLOW, NULLVPP, &nvp)) != 0) {
783 			uprintf("%s: not found.", brandlinker);
784 			restoreexecenv(&origenv, &orig_sigaltstack);
785 			return (err);
786 		}
787 		if (args->to_model == DATAMODEL_NATIVE) {
788 			err = mapexec_brand(nvp, args, &ehdr,
789 			    &uphdr_vaddr, &voffset, exec_file, &interp,
790 			    NULL, NULL, NULL, &lddata);
791 		}
792 #if defined(_LP64)
793 		else {
794 			Elf32_Ehdr ehdr32;
795 			Elf32_Addr uphdr_vaddr32;
796 			err = mapexec32_brand(nvp, args, &ehdr32,
797 			    &uphdr_vaddr32, &voffset, exec_file, &interp,
798 			    NULL, NULL, NULL, &lddata);
799 			Ehdr32to64(&ehdr32, &ehdr);
800 
801 			if (uphdr_vaddr32 == (Elf32_Addr)-1)
802 				uphdr_vaddr = (Addr)-1;
803 			else
804 				uphdr_vaddr = uphdr_vaddr32;
805 		}
806 #endif  /* _LP64 */
807 		VN_RELE(nvp);
808 		if (err != 0) {
809 			restoreexecenv(&origenv, &orig_sigaltstack);
810 			return (err);
811 		}
812 
813 		/*
814 		 * Now that we know the base address of the brand's
815 		 * linker, place it in the aux vector.
816 		 */
817 		sedp->sed_base = voffset;
818 		sedp->sed_ldentry = voffset + ehdr.e_entry;
819 		sedp->sed_lddata = voffset + lddata;
820 	} else {
821 		/*
822 		 * This program has no interpreter. The brand library
823 		 * will jump to the address in the AT_SUN_BRAND_LDENTRY
824 		 * aux vector, so in this case, put the entry point of
825 		 * the main executable there.
826 		 */
827 		if (ehdr.e_type == ET_EXEC) {
828 			/*
829 			 * An executable with no interpreter, this must
830 			 * be a statically linked executable, which
831 			 * means we loaded it at the address specified
832 			 * in the elf header, in which case the e_entry
833 			 * field of the elf header is an absolute
834 			 * address.
835 			 */
836 			sedp->sed_ldentry = ehdr.e_entry;
837 			sedp->sed_entry = ehdr.e_entry;
838 			sedp->sed_lddata = 0;
839 			sedp->sed_base = 0;
840 		} else {
841 			/*
842 			 * A shared object with no interpreter, we use
843 			 * the calculated address from above.
844 			 */
845 			sedp->sed_ldentry = sedp->sed_entry;
846 			sedp->sed_entry = 0;
847 			sedp->sed_phdr = 0;
848 			sedp->sed_phent = 0;
849 			sedp->sed_phnum = 0;
850 			sedp->sed_lddata = 0;
851 			sedp->sed_base = voffset;
852 
853 			if (ehdr.e_type == ET_DYN) {
854 				/*
855 				 * Delay setting the brkbase until the
856 				 * first call to brk(); see elfexec()
857 				 * for details.
858 				 */
859 				env.ex_bssbase = (caddr_t)0;
860 				env.ex_brkbase = (caddr_t)0;
861 				env.ex_brksize = 0;
862 			}
863 		}
864 	}
865 
866 	env.ex_magic = elfmagic;
867 	env.ex_vp = vp;
868 	setexecenv(&env);
869 
870 	/*
871 	 * It's time to manipulate the process aux vectors.  First
872 	 * we need to update the AT_SUN_AUXFLAGS aux vector to set
873 	 * the AF_SUN_NOPLM flag.
874 	 */
875 	if (args->to_model == DATAMODEL_NATIVE) {
876 		auxv_t		auxflags_auxv;
877 
878 		if (copyin(args->auxp_auxflags, &auxflags_auxv,
879 		    sizeof (auxflags_auxv)) != 0)
880 			return (EFAULT);
881 
882 		ASSERT(auxflags_auxv.a_type == AT_SUN_AUXFLAGS);
883 		auxflags_auxv.a_un.a_val |= AF_SUN_NOPLM;
884 		if (copyout(&auxflags_auxv, args->auxp_auxflags,
885 		    sizeof (auxflags_auxv)) != 0)
886 			return (EFAULT);
887 	}
888 #if defined(_LP64)
889 	else {
890 		auxv32_t	auxflags_auxv32;
891 
892 		if (copyin(args->auxp_auxflags, &auxflags_auxv32,
893 		    sizeof (auxflags_auxv32)) != 0)
894 			return (EFAULT);
895 
896 		ASSERT(auxflags_auxv32.a_type == AT_SUN_AUXFLAGS);
897 		auxflags_auxv32.a_un.a_val |= AF_SUN_NOPLM;
898 		if (copyout(&auxflags_auxv32, args->auxp_auxflags,
899 		    sizeof (auxflags_auxv32)) != 0)
900 			return (EFAULT);
901 	}
902 #endif  /* _LP64 */
903 
904 	/* Second, copy out the brand specific aux vectors. */
905 	if (args->to_model == DATAMODEL_NATIVE) {
906 		auxv_t brand_auxv[] = {
907 		    { AT_SUN_BRAND_AUX1, 0 },
908 		    { AT_SUN_BRAND_AUX2, 0 },
909 		    { AT_SUN_BRAND_AUX3, 0 }
910 		};
911 
912 		ASSERT(brand_auxv[0].a_type ==
913 		    AT_SUN_BRAND_COMMON_LDDATA);
914 		brand_auxv[0].a_un.a_val = sed.sed_lddata;
915 
916 		if (copyout(&brand_auxv, args->auxp_brand,
917 		    sizeof (brand_auxv)) != 0)
918 			return (EFAULT);
919 	}
920 #if defined(_LP64)
921 	else {
922 		auxv32_t brand_auxv32[] = {
923 		    { AT_SUN_BRAND_AUX1, 0 },
924 		    { AT_SUN_BRAND_AUX2, 0 },
925 		    { AT_SUN_BRAND_AUX3, 0 }
926 		};
927 
928 		ASSERT(brand_auxv32[0].a_type == AT_SUN_BRAND_COMMON_LDDATA);
929 		brand_auxv32[0].a_un.a_val = (uint32_t)sed.sed_lddata;
930 		if (copyout(&brand_auxv32, args->auxp_brand,
931 		    sizeof (brand_auxv32)) != 0)
932 			return (EFAULT);
933 	}
934 #endif  /* _LP64 */
935 
936 	/*
937 	 * Third, the /proc aux vectors set up by elfexec() point to
938 	 * brand emulation library and it's linker.  Copy these to the
939 	 * /proc brand specific aux vector, and update the regular
940 	 * /proc aux vectors to point to the executable (and it's
941 	 * linker).  This will enable debuggers to access the
942 	 * executable via the usual /proc or elf notes aux vectors.
943 	 *
944 	 * The brand emulation library's linker will get it's aux
945 	 * vectors off the stack, and then update the stack with the
946 	 * executable's aux vectors before jumping to the executable's
947 	 * linker.
948 	 *
949 	 * Debugging the brand emulation library must be done from
950 	 * the global zone, where the librtld_db module knows how to
951 	 * fetch the brand specific aux vectors to access the brand
952 	 * emulation libraries linker.
953 	 */
954 	for (i = 0; i < __KERN_NAUXV_IMPL; i++) {
955 		ulong_t val;
956 
957 		switch (up->u_auxv[i].a_type) {
958 		case AT_SUN_BRAND_COMMON_LDDATA:
959 			up->u_auxv[i].a_un.a_val = sed.sed_lddata;
960 			continue;
961 		case AT_BASE:
962 			val = sedp->sed_base;
963 			break;
964 		case AT_ENTRY:
965 			val = sedp->sed_entry;
966 			break;
967 		case AT_PHDR:
968 			val = sedp->sed_phdr;
969 			break;
970 		case AT_PHENT:
971 			val = sedp->sed_phent;
972 			break;
973 		case AT_PHNUM:
974 			val = sedp->sed_phnum;
975 			break;
976 		case AT_SUN_LDDATA:
977 			val = sedp->sed_lddata;
978 			break;
979 		default:
980 			continue;
981 		}
982 
983 		up->u_auxv[i].a_un.a_val = val;
984 		if (val == 0) {
985 			/* Hide the entry for static binaries */
986 			up->u_auxv[i].a_type = AT_IGNORE;
987 		}
988 	}
989 
990 	/*
991 	 * The last thing we do here is clear spd->spd_handler.  This
992 	 * is important because if we're already a branded process and
993 	 * if this exec succeeds, there is a window between when the
994 	 * exec() first returns to the userland of the new process and
995 	 * when our brand library get's initialized, during which we
996 	 * don't want system calls to be re-directed to our brand
997 	 * library since it hasn't been initialized yet.
998 	 */
999 	spd->spd_handler = NULL;
1000 
1001 	return (0);
1002 }
1003 
1004 void
brand_solaris_exec(struct brand * pbrand)1005 brand_solaris_exec(struct brand *pbrand)
1006 {
1007 	brand_proc_data_t	*spd = curproc->p_brand_data;
1008 
1009 	ASSERT(curproc->p_brand == pbrand);
1010 	ASSERT(curproc->p_brand_data != NULL);
1011 	ASSERT(ttolwp(curthread)->lwp_brand != NULL);
1012 
1013 	/*
1014 	 * We should only be called from exec(), when we know the process
1015 	 * is single-threaded.
1016 	 */
1017 	ASSERT(curproc->p_tlist == curproc->p_tlist->t_forw);
1018 
1019 	/* Upon exec, reset our lwp brand data. */
1020 	(void) brand_solaris_freelwp(ttolwp(curthread), pbrand);
1021 	(void) brand_solaris_initlwp(ttolwp(curthread), pbrand);
1022 
1023 	/*
1024 	 * Upon exec, reset all the proc brand data, except for the elf
1025 	 * data associated with the executable we are exec'ing.
1026 	 */
1027 	spd->spd_handler = NULL;
1028 }
1029 
1030 int
brand_solaris_fini(char ** emul_table,struct modlinkage * modlinkage,struct brand * pbrand)1031 brand_solaris_fini(char **emul_table, struct modlinkage *modlinkage,
1032     struct brand *pbrand)
1033 {
1034 	int err;
1035 
1036 	/*
1037 	 * If there are any zones using this brand, we can't allow it
1038 	 * to be unloaded.
1039 	 */
1040 	if (brand_zone_count(pbrand))
1041 		return (EBUSY);
1042 
1043 	kmem_free(*emul_table, NSYSCALL);
1044 	*emul_table = NULL;
1045 
1046 	err = mod_remove(modlinkage);
1047 	if (err)
1048 		cmn_err(CE_WARN, "Couldn't unload brand module");
1049 
1050 	return (err);
1051 }
1052 
1053 /*ARGSUSED*/
1054 void
brand_solaris_forklwp(klwp_t * p,klwp_t * c,struct brand * pbrand)1055 brand_solaris_forklwp(klwp_t *p, klwp_t *c, struct brand *pbrand)
1056 {
1057 	ASSERT(p->lwp_procp->p_brand == pbrand);
1058 	ASSERT(c->lwp_procp->p_brand == pbrand);
1059 
1060 	ASSERT(p->lwp_procp->p_brand_data != NULL);
1061 	ASSERT(c->lwp_procp->p_brand_data != NULL);
1062 
1063 	/*
1064 	 * Both LWPs have already had been initialized via
1065 	 * brand_solaris_initlwp().
1066 	 */
1067 	ASSERT(p->lwp_brand != NULL);
1068 	ASSERT(c->lwp_brand != NULL);
1069 }
1070 
1071 /*ARGSUSED*/
1072 void
brand_solaris_freelwp(klwp_t * l,struct brand * pbrand)1073 brand_solaris_freelwp(klwp_t *l, struct brand *pbrand)
1074 {
1075 	ASSERT(l->lwp_procp->p_brand == pbrand);
1076 	ASSERT(l->lwp_procp->p_brand_data != NULL);
1077 	ASSERT(l->lwp_brand != NULL);
1078 	l->lwp_brand = NULL;
1079 }
1080 
1081 /*ARGSUSED*/
1082 int
brand_solaris_initlwp(klwp_t * l,struct brand * pbrand)1083 brand_solaris_initlwp(klwp_t *l, struct brand *pbrand)
1084 {
1085 	ASSERT(l->lwp_procp->p_brand == pbrand);
1086 	ASSERT(l->lwp_procp->p_brand_data != NULL);
1087 	ASSERT(l->lwp_brand == NULL);
1088 	l->lwp_brand = (void *)-1;
1089 	return (0);
1090 }
1091 
1092 /*ARGSUSED*/
1093 void
brand_solaris_lwpexit(klwp_t * l,struct brand * pbrand)1094 brand_solaris_lwpexit(klwp_t *l, struct brand *pbrand)
1095 {
1096 	proc_t  *p = l->lwp_procp;
1097 
1098 	ASSERT(l->lwp_procp->p_brand == pbrand);
1099 	ASSERT(l->lwp_procp->p_brand_data != NULL);
1100 	ASSERT(l->lwp_brand != NULL);
1101 
1102 	/*
1103 	 * We should never be called for the last thread in a process.
1104 	 * (That case is handled by brand_solaris_proc_exit().)
1105 	 * Therefore this lwp must be exiting from a multi-threaded
1106 	 * process.
1107 	 */
1108 	ASSERT(p->p_tlist != p->p_tlist->t_forw);
1109 
1110 	l->lwp_brand = NULL;
1111 }
1112 
1113 /*ARGSUSED*/
1114 void
brand_solaris_proc_exit(struct proc * p,klwp_t * l,struct brand * pbrand)1115 brand_solaris_proc_exit(struct proc *p, klwp_t *l, struct brand *pbrand)
1116 {
1117 	ASSERT(p->p_brand == pbrand);
1118 	ASSERT(p->p_brand_data != NULL);
1119 
1120 	/*
1121 	 * When called from proc_exit(), we know that process is
1122 	 * single-threaded and free our lwp brand data.
1123 	 * otherwise just free p_brand_data and return.
1124 	 */
1125 	if (l != NULL) {
1126 		ASSERT(p->p_tlist == p->p_tlist->t_forw);
1127 		ASSERT(p->p_tlist->t_lwp == l);
1128 		(void) brand_solaris_freelwp(l, pbrand);
1129 	}
1130 
1131 	/* upon exit, free our proc brand data */
1132 	kmem_free(p->p_brand_data, sizeof (brand_proc_data_t));
1133 	p->p_brand_data = NULL;
1134 }
1135 
1136 void
brand_solaris_setbrand(proc_t * p,struct brand * pbrand)1137 brand_solaris_setbrand(proc_t *p, struct brand *pbrand)
1138 {
1139 	ASSERT(p->p_brand == pbrand);
1140 	ASSERT(p->p_brand_data == NULL);
1141 
1142 	/*
1143 	 * We should only be called from exec(), when we know the process
1144 	 * is single-threaded.
1145 	 */
1146 	ASSERT(p->p_tlist == p->p_tlist->t_forw);
1147 
1148 	p->p_brand_data = kmem_zalloc(sizeof (brand_proc_data_t), KM_SLEEP);
1149 	(void) brand_solaris_initlwp(p->p_tlist->t_lwp, pbrand);
1150 }
1151