xref: /illumos-gate/usr/src/uts/common/os/brand.c (revision d88e498a7e760a60ae266eb725566f1f7ed86ad5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/kmem.h>
27 #include <sys/errno.h>
28 #include <sys/systm.h>
29 #include <sys/cmn_err.h>
30 #include <sys/brand.h>
31 #include <sys/machbrand.h>
32 #include <sys/modctl.h>
33 #include <sys/rwlock.h>
34 #include <sys/zone.h>
35 
36 #define	SUPPORTED_BRAND_VERSION BRAND_VER_1
37 
38 #if defined(__sparcv9)
39 /* sparcv9 uses system wide brand interposition hooks */
40 static void brand_plat_interposition_enable(void);
41 static void brand_plat_interposition_disable(void);
42 
43 struct brand_mach_ops native_mach_ops  = {
44 		NULL, NULL
45 };
46 #else /* !__sparcv9 */
47 struct brand_mach_ops native_mach_ops  = {
48 		NULL, NULL, NULL, NULL, NULL, NULL
49 };
50 #endif /* !__sparcv9 */
51 
52 brand_t native_brand = {
53 		BRAND_VER_1,
54 		"native",
55 		NULL,
56 		&native_mach_ops
57 };
58 
59 /*
60  * Used to maintain a list of all the brands currently loaded into the
61  * kernel.
62  */
63 struct brand_list {
64 	int			bl_refcnt;
65 	struct brand_list	*bl_next;
66 	brand_t			*bl_brand;
67 };
68 
69 static struct brand_list *brand_list = NULL;
70 
71 /*
72  * This lock protects the integrity of the brand list.
73  */
74 static kmutex_t brand_list_lock;
75 
76 void
77 brand_init()
78 {
79 	mutex_init(&brand_list_lock, NULL, MUTEX_DEFAULT, NULL);
80 	p0.p_brand = &native_brand;
81 }
82 
83 int
84 brand_register(brand_t *brand)
85 {
86 	struct brand_list *list, *scan;
87 
88 	if (brand == NULL)
89 		return (EINVAL);
90 
91 	if (brand->b_version != SUPPORTED_BRAND_VERSION) {
92 		if (brand->b_version < SUPPORTED_BRAND_VERSION) {
93 			cmn_err(CE_WARN,
94 			    "brand '%s' was built to run on older versions "
95 			    "of Solaris.",
96 			    brand->b_name);
97 		} else {
98 			cmn_err(CE_WARN,
99 			    "brand '%s' was built to run on a newer version "
100 			    "of Solaris.",
101 			    brand->b_name);
102 		}
103 		return (EINVAL);
104 	}
105 
106 	/* Sanity checks */
107 	if (brand->b_name == NULL || brand->b_ops == NULL ||
108 	    brand->b_ops->b_brandsys == NULL) {
109 		cmn_err(CE_WARN, "Malformed brand");
110 		return (EINVAL);
111 	}
112 
113 	list = kmem_alloc(sizeof (struct brand_list), KM_SLEEP);
114 
115 	/* Add the brand to the list of loaded brands. */
116 	mutex_enter(&brand_list_lock);
117 
118 	/*
119 	 * Check to be sure we haven't already registered this brand.
120 	 */
121 	for (scan = brand_list; scan != NULL; scan = scan->bl_next) {
122 		if (strcmp(brand->b_name, scan->bl_brand->b_name) == 0) {
123 			cmn_err(CE_WARN,
124 			    "Invalid attempt to load a second instance of "
125 			    "brand %s", brand->b_name);
126 			mutex_exit(&brand_list_lock);
127 			kmem_free(list, sizeof (struct brand_list));
128 			return (EINVAL);
129 		}
130 	}
131 
132 #if defined(__sparcv9)
133 	/* sparcv9 uses system wide brand interposition hooks */
134 	if (brand_list == NULL)
135 		brand_plat_interposition_enable();
136 #endif /* __sparcv9 */
137 
138 	list->bl_brand = brand;
139 	list->bl_refcnt = 0;
140 	list->bl_next = brand_list;
141 	brand_list = list;
142 
143 	mutex_exit(&brand_list_lock);
144 
145 	return (0);
146 }
147 
148 /*
149  * The kernel module implementing this brand is being unloaded, so remove
150  * it from the list of active brands.
151  */
152 int
153 brand_unregister(brand_t *brand)
154 {
155 	struct brand_list *list, *prev;
156 
157 	/* Sanity checks */
158 	if (brand == NULL || brand->b_name == NULL) {
159 		cmn_err(CE_WARN, "Malformed brand");
160 		return (EINVAL);
161 	}
162 
163 	prev = NULL;
164 	mutex_enter(&brand_list_lock);
165 
166 	for (list = brand_list; list != NULL; list = list->bl_next) {
167 		if (list->bl_brand == brand)
168 			break;
169 		prev = list;
170 	}
171 
172 	if (list == NULL) {
173 		cmn_err(CE_WARN, "Brand %s wasn't registered", brand->b_name);
174 		mutex_exit(&brand_list_lock);
175 		return (EINVAL);
176 	}
177 
178 	if (list->bl_refcnt > 0) {
179 		cmn_err(CE_WARN, "Unregistering brand %s which is still in use",
180 		    brand->b_name);
181 		mutex_exit(&brand_list_lock);
182 		return (EBUSY);
183 	}
184 
185 	/* Remove brand from the list */
186 	if (prev != NULL)
187 		prev->bl_next = list->bl_next;
188 	else
189 		brand_list = list->bl_next;
190 
191 #if defined(__sparcv9)
192 	/* sparcv9 uses system wide brand interposition hooks */
193 	if (brand_list == NULL)
194 		brand_plat_interposition_disable();
195 #endif /* __sparcv9 */
196 
197 	mutex_exit(&brand_list_lock);
198 
199 	kmem_free(list, sizeof (struct brand_list));
200 
201 	return (0);
202 }
203 
204 /*
205  * Record that a zone of this brand has been instantiated.  If the kernel
206  * module implementing this brand's functionality is not present, this
207  * routine attempts to load the module as a side effect.
208  */
209 brand_t *
210 brand_register_zone(struct brand_attr *attr)
211 {
212 	struct brand_list *l = NULL;
213 	ddi_modhandle_t	hdl = NULL;
214 	char *modname;
215 	int err = 0;
216 
217 	if (is_system_labeled()) {
218 		cmn_err(CE_WARN,
219 		    "Branded zones are not allowed on labeled systems.");
220 		return (NULL);
221 	}
222 
223 	/*
224 	 * We make at most two passes through this loop.  The first time
225 	 * through, we're looking to see if this is a new user of an
226 	 * already loaded brand.  If the brand hasn't been loaded, we
227 	 * call ddi_modopen() to force it to be loaded and then make a
228 	 * second pass through the list of brands.  If we don't find the
229 	 * brand the second time through it means that the modname
230 	 * specified in the brand_attr structure doesn't provide the brand
231 	 * specified in the brandname field.  This would suggest a bug in
232 	 * the brand's config.xml file.  We close the module and return
233 	 * 'NULL' to the caller.
234 	 */
235 	for (;;) {
236 		/*
237 		 * Search list of loaded brands
238 		 */
239 		mutex_enter(&brand_list_lock);
240 		for (l = brand_list; l != NULL; l = l->bl_next)
241 			if (strcmp(attr->ba_brandname,
242 			    l->bl_brand->b_name) == 0)
243 				break;
244 		if ((l != NULL) || (hdl != NULL))
245 			break;
246 		mutex_exit(&brand_list_lock);
247 
248 		/*
249 		 * We didn't find that the requested brand has been loaded
250 		 * yet, so we trigger the load of the appropriate kernel
251 		 * module and search the list again.
252 		 */
253 		modname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
254 		(void) strcpy(modname, "brand/");
255 		(void) strcat(modname, attr->ba_modname);
256 		hdl = ddi_modopen(modname, KRTLD_MODE_FIRST, &err);
257 		kmem_free(modname, MAXPATHLEN);
258 
259 		if (err != 0)
260 			return (NULL);
261 	}
262 
263 	/*
264 	 * If we found the matching brand, bump its reference count.
265 	 */
266 	if (l != NULL)
267 		l->bl_refcnt++;
268 
269 	mutex_exit(&brand_list_lock);
270 
271 	if (hdl != NULL)
272 		(void) ddi_modclose(hdl);
273 
274 	return ((l != NULL) ? l->bl_brand : NULL);
275 }
276 
277 /*
278  * Return the number of zones currently using this brand.
279  */
280 int
281 brand_zone_count(struct brand *bp)
282 {
283 	struct brand_list *l;
284 	int cnt = 0;
285 
286 	mutex_enter(&brand_list_lock);
287 	for (l = brand_list; l != NULL; l = l->bl_next)
288 		if (l->bl_brand == bp) {
289 			cnt = l->bl_refcnt;
290 			break;
291 		}
292 	mutex_exit(&brand_list_lock);
293 
294 	return (cnt);
295 }
296 
297 void
298 brand_unregister_zone(struct brand *bp)
299 {
300 	struct brand_list *list;
301 
302 	mutex_enter(&brand_list_lock);
303 	for (list = brand_list; list != NULL; list = list->bl_next) {
304 		if (list->bl_brand == bp) {
305 			ASSERT(list->bl_refcnt > 0);
306 			list->bl_refcnt--;
307 			break;
308 		}
309 	}
310 	mutex_exit(&brand_list_lock);
311 }
312 
313 void
314 brand_setbrand(proc_t *p)
315 {
316 	brand_t *bp = p->p_zone->zone_brand;
317 
318 	ASSERT(bp != NULL);
319 	ASSERT(p->p_brand == &native_brand);
320 
321 	/*
322 	 * We should only be called from exec(), when we know the process
323 	 * is single-threaded.
324 	 */
325 	ASSERT(p->p_tlist == p->p_tlist->t_forw);
326 
327 	p->p_brand = bp;
328 	ASSERT(PROC_IS_BRANDED(p));
329 	BROP(p)->b_setbrand(p);
330 }
331 
332 void
333 brand_clearbrand(proc_t *p)
334 {
335 	brand_t *bp = p->p_zone->zone_brand;
336 	ASSERT(bp != NULL);
337 
338 	/*
339 	 * We should only be called from exec_common() or proc_exit(),
340 	 * when we know the process is single-threaded.
341 	 */
342 	ASSERT(p->p_tlist == p->p_tlist->t_forw);
343 
344 	ASSERT(PROC_IS_BRANDED(p));
345 	BROP(p)->b_proc_exit(p, p->p_tlist->t_lwp);
346 	p->p_brand = &native_brand;
347 }
348 
349 #if defined(__sparcv9)
350 /*
351  * Currently, only sparc has system level brand syscall interposition.
352  * On x86 we're able to enable syscall interposition on a per-cpu basis
353  * when a branded thread is scheduled to run on a cpu.
354  */
355 
356 /* Local variables needed for dynamic syscall interposition support */
357 static uint32_t	syscall_trap_patch_instr_orig;
358 static uint32_t	syscall_trap32_patch_instr_orig;
359 
360 /* Trap Table syscall entry hot patch points */
361 extern void	syscall_trap_patch_point(void);
362 extern void	syscall_trap32_patch_point(void);
363 
364 /* Alternate syscall entry handlers used when branded zones are running */
365 extern void	syscall_wrapper(void);
366 extern void	syscall_wrapper32(void);
367 
368 /* Macros used to facilitate sparcv9 instruction generation */
369 #define	BA_A_INSTR	0x30800000	/* ba,a addr */
370 #define	DISP22(from, to) \
371 	((((uintptr_t)(to) - (uintptr_t)(from)) >> 2) & 0x3fffff)
372 
373 /*ARGSUSED*/
374 static void
375 brand_plat_interposition_enable(void)
376 {
377 	ASSERT(MUTEX_HELD(&brand_list_lock));
378 
379 	/*
380 	 * Before we hot patch the kernel save the current instructions
381 	 * so that we can restore them later.
382 	 */
383 	syscall_trap_patch_instr_orig =
384 	    *(uint32_t *)syscall_trap_patch_point;
385 	syscall_trap32_patch_instr_orig =
386 	    *(uint32_t *)syscall_trap32_patch_point;
387 
388 	/*
389 	 * Modify the trap table at the patch points.
390 	 *
391 	 * We basically replace the first instruction at the patch
392 	 * point with a ba,a instruction that will transfer control
393 	 * to syscall_wrapper or syscall_wrapper32 for 64-bit and
394 	 * 32-bit syscalls respectively.  It's important to note that
395 	 * the annul bit is set in the branch so we don't execute
396 	 * the instruction directly following the one we're patching
397 	 * during the branch's delay slot.
398 	 *
399 	 * It also doesn't matter that we're not atomically updating both
400 	 * the 64 and 32 bit syscall paths at the same time since there's
401 	 * no actual branded processes running on the system yet.
402 	 */
403 	hot_patch_kernel_text((caddr_t)syscall_trap_patch_point,
404 	    BA_A_INSTR | DISP22(syscall_trap_patch_point, syscall_wrapper),
405 	    4);
406 	hot_patch_kernel_text((caddr_t)syscall_trap32_patch_point,
407 	    BA_A_INSTR | DISP22(syscall_trap32_patch_point, syscall_wrapper32),
408 	    4);
409 }
410 
411 /*ARGSUSED*/
412 static void
413 brand_plat_interposition_disable(void)
414 {
415 	ASSERT(MUTEX_HELD(&brand_list_lock));
416 
417 	/*
418 	 * Restore the original instructions at the trap table syscall
419 	 * patch points to disable the brand syscall interposition
420 	 * mechanism.
421 	 */
422 	hot_patch_kernel_text((caddr_t)syscall_trap_patch_point,
423 	    syscall_trap_patch_instr_orig, 4);
424 	hot_patch_kernel_text((caddr_t)syscall_trap32_patch_point,
425 	    syscall_trap32_patch_instr_orig, 4);
426 }
427 #endif /* __sparcv9 */
428