xref: /titanic_50/usr/src/uts/common/os/brand.c (revision f3324781c875e2f9865c291e43f86ee710b0c145)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/kmem.h>
29 #include <sys/errno.h>
30 #include <sys/systm.h>
31 #include <sys/cmn_err.h>
32 #include <sys/brand.h>
33 #include <sys/machbrand.h>
34 #include <sys/modctl.h>
35 #include <sys/rwlock.h>
36 #include <sys/zone.h>
37 
38 #define	SUPPORTED_BRAND_VERSION BRAND_VER_1
39 
40 #if defined(__sparcv9)
41 /* sparcv9 uses system wide brand interposition hooks */
42 static void brand_plat_interposition_enable(void);
43 static void brand_plat_interposition_disable(void);
44 
45 struct brand_mach_ops native_mach_ops  = {
46 		NULL, NULL
47 };
48 #else /* !__sparcv9 */
49 struct brand_mach_ops native_mach_ops  = {
50 		NULL, NULL, NULL, NULL, NULL, NULL
51 };
52 #endif /* !__sparcv9 */
53 
54 brand_t native_brand = {
55 		BRAND_VER_1,
56 		"native",
57 		NULL,
58 		&native_mach_ops
59 };
60 
61 /*
62  * Used to maintain a list of all the brands currently loaded into the
63  * kernel.
64  */
65 struct brand_list {
66 	int			bl_refcnt;
67 	struct brand_list	*bl_next;
68 	brand_t			*bl_brand;
69 };
70 
71 static struct brand_list *brand_list = NULL;
72 
73 /*
74  * This lock protects the integrity of the brand list.
75  */
76 static kmutex_t brand_list_lock;
77 
78 void
79 brand_init()
80 {
81 	mutex_init(&brand_list_lock, NULL, MUTEX_DEFAULT, NULL);
82 	p0.p_brand = &native_brand;
83 }
84 
85 int
86 brand_register(brand_t *brand)
87 {
88 	struct brand_list *list, *scan;
89 
90 	if (brand == NULL)
91 		return (EINVAL);
92 
93 	if (is_system_labeled()) {
94 		cmn_err(CE_WARN,
95 		    "Branded zones are not allowed on labeled systems.");
96 		return (EINVAL);
97 	}
98 
99 	if (brand->b_version != SUPPORTED_BRAND_VERSION) {
100 		if (brand->b_version < SUPPORTED_BRAND_VERSION) {
101 			cmn_err(CE_WARN,
102 			    "brand '%s' was built to run on older versions "
103 			    "of Solaris.",
104 			    brand->b_name);
105 		} else {
106 			cmn_err(CE_WARN,
107 			    "brand '%s' was built to run on a newer version "
108 			    "of Solaris.",
109 			    brand->b_name);
110 		}
111 		return (EINVAL);
112 	}
113 
114 	/* Sanity checks */
115 	if (brand->b_name == NULL || brand->b_ops == NULL ||
116 	    brand->b_ops->b_brandsys == NULL) {
117 		cmn_err(CE_WARN, "Malformed brand");
118 		return (EINVAL);
119 	}
120 
121 	list = kmem_alloc(sizeof (struct brand_list), KM_SLEEP);
122 
123 	/* Add the brand to the list of loaded brands. */
124 	mutex_enter(&brand_list_lock);
125 
126 	/*
127 	 * Check to be sure we haven't already registered this brand.
128 	 */
129 	for (scan = brand_list; scan != NULL; scan = scan->bl_next) {
130 		if (strcmp(brand->b_name, scan->bl_brand->b_name) == 0) {
131 			cmn_err(CE_WARN,
132 			    "Invalid attempt to load a second instance of "
133 			    "brand %s", brand->b_name);
134 			mutex_exit(&brand_list_lock);
135 			kmem_free(list, sizeof (struct brand_list));
136 			return (EINVAL);
137 		}
138 	}
139 
140 #if defined(__sparcv9)
141 	/* sparcv9 uses system wide brand interposition hooks */
142 	if (brand_list == NULL)
143 		brand_plat_interposition_enable();
144 #endif /* __sparcv9 */
145 
146 	list->bl_brand = brand;
147 	list->bl_refcnt = 0;
148 	list->bl_next = brand_list;
149 	brand_list = list;
150 
151 	mutex_exit(&brand_list_lock);
152 
153 	return (0);
154 }
155 
156 /*
157  * The kernel module implementing this brand is being unloaded, so remove
158  * it from the list of active brands.
159  */
160 int
161 brand_unregister(brand_t *brand)
162 {
163 	struct brand_list *list, *prev;
164 
165 	/* Sanity checks */
166 	if (brand == NULL || brand->b_name == NULL) {
167 		cmn_err(CE_WARN, "Malformed brand");
168 		return (EINVAL);
169 	}
170 
171 	prev = NULL;
172 	mutex_enter(&brand_list_lock);
173 
174 	for (list = brand_list; list != NULL; list = list->bl_next) {
175 		if (list->bl_brand == brand)
176 			break;
177 		prev = list;
178 	}
179 
180 	if (list == NULL) {
181 		cmn_err(CE_WARN, "Brand %s wasn't registered", brand->b_name);
182 		mutex_exit(&brand_list_lock);
183 		return (EINVAL);
184 	}
185 
186 	if (list->bl_refcnt > 0) {
187 		cmn_err(CE_WARN, "Unregistering brand %s which is still in use",
188 		    brand->b_name);
189 		mutex_exit(&brand_list_lock);
190 		return (EBUSY);
191 	}
192 
193 	/* Remove brand from the list */
194 	if (prev != NULL)
195 		prev->bl_next = list->bl_next;
196 	else
197 		brand_list = list->bl_next;
198 
199 #if defined(__sparcv9)
200 	/* sparcv9 uses system wide brand interposition hooks */
201 	if (brand_list == NULL)
202 		brand_plat_interposition_disable();
203 #endif /* __sparcv9 */
204 
205 	mutex_exit(&brand_list_lock);
206 
207 	kmem_free(list, sizeof (struct brand_list));
208 
209 	return (0);
210 }
211 
212 /*
213  * Record that a zone of this brand has been instantiated.  If the kernel
214  * module implementing this brand's functionality is not present, this
215  * routine attempts to load the module as a side effect.
216  */
217 brand_t *
218 brand_register_zone(struct brand_attr *attr)
219 {
220 	struct brand_list *l = NULL;
221 	ddi_modhandle_t	hdl = NULL;
222 	char *modname;
223 	int err = 0;
224 
225 	if (is_system_labeled()) {
226 		cmn_err(CE_WARN,
227 		    "Branded zones are not allowed on labeled systems.");
228 		return (NULL);
229 	}
230 
231 	/*
232 	 * We make at most two passes through this loop.  The first time
233 	 * through, we're looking to see if this is a new user of an
234 	 * already loaded brand.  If the brand hasn't been loaded, we
235 	 * call ddi_modopen() to force it to be loaded and then make a
236 	 * second pass through the list of brands.  If we don't find the
237 	 * brand the second time through it means that the modname
238 	 * specified in the brand_attr structure doesn't provide the brand
239 	 * specified in the brandname field.  This would suggest a bug in
240 	 * the brand's config.xml file.  We close the module and return
241 	 * 'NULL' to the caller.
242 	 */
243 	for (;;) {
244 		/*
245 		 * Search list of loaded brands
246 		 */
247 		mutex_enter(&brand_list_lock);
248 		for (l = brand_list; l != NULL; l = l->bl_next)
249 			if (strcmp(attr->ba_brandname,
250 			    l->bl_brand->b_name) == 0)
251 				break;
252 		if ((l != NULL) || (hdl != NULL))
253 			break;
254 		mutex_exit(&brand_list_lock);
255 
256 		/*
257 		 * We didn't find that the requested brand has been loaded
258 		 * yet, so we trigger the load of the appropriate kernel
259 		 * module and search the list again.
260 		 */
261 		modname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
262 		(void) strcpy(modname, "brand/");
263 		(void) strcat(modname, attr->ba_modname);
264 		hdl = ddi_modopen(modname, KRTLD_MODE_FIRST, &err);
265 		kmem_free(modname, MAXPATHLEN);
266 
267 		if (err != 0)
268 			return (NULL);
269 	}
270 
271 	/*
272 	 * If we found the matching brand, bump its reference count.
273 	 */
274 	if (l != NULL)
275 		l->bl_refcnt++;
276 
277 	mutex_exit(&brand_list_lock);
278 
279 	if (hdl != NULL)
280 		(void) ddi_modclose(hdl);
281 
282 	return ((l != NULL) ? l->bl_brand : NULL);
283 }
284 
285 /*
286  * Return the number of zones currently using this brand.
287  */
288 int
289 brand_zone_count(struct brand *bp)
290 {
291 	struct brand_list *l;
292 	int cnt = 0;
293 
294 	mutex_enter(&brand_list_lock);
295 	for (l = brand_list; l != NULL; l = l->bl_next)
296 		if (l->bl_brand == bp) {
297 			cnt = l->bl_refcnt;
298 			break;
299 		}
300 	mutex_exit(&brand_list_lock);
301 
302 	return (cnt);
303 }
304 
305 void
306 brand_unregister_zone(struct brand *bp)
307 {
308 	struct brand_list *list;
309 
310 	mutex_enter(&brand_list_lock);
311 	for (list = brand_list; list != NULL; list = list->bl_next) {
312 		if (list->bl_brand == bp) {
313 			ASSERT(list->bl_refcnt > 0);
314 			list->bl_refcnt--;
315 			break;
316 		}
317 	}
318 	mutex_exit(&brand_list_lock);
319 }
320 
321 void
322 brand_setbrand(proc_t *p)
323 {
324 	brand_t *bp = p->p_zone->zone_brand;
325 
326 	ASSERT(bp != NULL);
327 	ASSERT(p->p_brand == &native_brand);
328 
329 	/*
330 	 * We should only be called from exec(), when we know the process
331 	 * is single-threaded.
332 	 */
333 	ASSERT(p->p_tlist == p->p_tlist->t_forw);
334 
335 	p->p_brand = bp;
336 	if (PROC_IS_BRANDED(p)) {
337 		BROP(p)->b_setbrand(p);
338 		lwp_attach_brand_hdlrs(p->p_tlist->t_lwp);
339 	}
340 }
341 
342 #if defined(__sparcv9)
343 /*
344  * Currently, only sparc has system level brand syscall interposition.
345  * On x86 we're able to enable syscall interposition on a per-cpu basis
346  * when a branded thread is scheduled to run on a cpu.
347  */
348 
349 /* Local variables needed for dynamic syscall interposition support */
350 static uint32_t	syscall_trap_patch_instr_orig;
351 static uint32_t	syscall_trap32_patch_instr_orig;
352 
353 /* Trap Table syscall entry hot patch points */
354 extern void	syscall_trap_patch_point(void);
355 extern void	syscall_trap32_patch_point(void);
356 
357 /* Alternate syscall entry handlers used when branded zones are running */
358 extern void	syscall_wrapper(void);
359 extern void	syscall_wrapper32(void);
360 
361 /* Macros used to facilitate sparcv9 instruction generation */
362 #define	BA_A_INSTR	0x30800000	/* ba,a addr */
363 #define	DISP22(from, to) \
364 	((((uintptr_t)(to) - (uintptr_t)(from)) >> 2) & 0x3fffff)
365 
366 /*ARGSUSED*/
367 static void
368 brand_plat_interposition_enable(void)
369 {
370 	ASSERT(MUTEX_HELD(&brand_list_lock));
371 
372 	/*
373 	 * Before we hot patch the kernel save the current instructions
374 	 * so that we can restore them later.
375 	 */
376 	syscall_trap_patch_instr_orig =
377 	    *(uint32_t *)syscall_trap_patch_point;
378 	syscall_trap32_patch_instr_orig =
379 	    *(uint32_t *)syscall_trap32_patch_point;
380 
381 	/*
382 	 * Modify the trap table at the patch points.
383 	 *
384 	 * We basically replace the first instruction at the patch
385 	 * point with a ba,a instruction that will transfer control
386 	 * to syscall_wrapper or syscall_wrapper32 for 64-bit and
387 	 * 32-bit syscalls respectively.  It's important to note that
388 	 * the annul bit is set in the branch so we don't execute
389 	 * the instruction directly following the one we're patching
390 	 * during the branch's delay slot.
391 	 *
392 	 * It also doesn't matter that we're not atomically updating both
393 	 * the 64 and 32 bit syscall paths at the same time since there's
394 	 * no actual branded processes running on the system yet.
395 	 */
396 	hot_patch_kernel_text((caddr_t)syscall_trap_patch_point,
397 	    BA_A_INSTR | DISP22(syscall_trap_patch_point, syscall_wrapper),
398 	    4);
399 	hot_patch_kernel_text((caddr_t)syscall_trap32_patch_point,
400 	    BA_A_INSTR | DISP22(syscall_trap32_patch_point, syscall_wrapper32),
401 	    4);
402 }
403 
404 /*ARGSUSED*/
405 static void
406 brand_plat_interposition_disable(void)
407 {
408 	ASSERT(MUTEX_HELD(&brand_list_lock));
409 
410 	/*
411 	 * Restore the original instructions at the trap table syscall
412 	 * patch points to disable the brand syscall interposition
413 	 * mechanism.
414 	 */
415 	hot_patch_kernel_text((caddr_t)syscall_trap_patch_point,
416 	    syscall_trap_patch_instr_orig, 4);
417 	hot_patch_kernel_text((caddr_t)syscall_trap32_patch_point,
418 	    syscall_trap32_patch_instr_orig, 4);
419 }
420 #endif /* __sparcv9 */
421