1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/kmem.h> 29 #include <sys/errno.h> 30 #include <sys/systm.h> 31 #include <sys/cmn_err.h> 32 #include <sys/brand.h> 33 #include <sys/machbrand.h> 34 #include <sys/modctl.h> 35 #include <sys/rwlock.h> 36 #include <sys/zone.h> 37 38 #define SUPPORTED_BRAND_VERSION BRAND_VER_1 39 40 #if defined(__sparcv9) 41 /* sparcv9 uses system wide brand interposition hooks */ 42 static void brand_plat_interposition_enable(void); 43 static void brand_plat_interposition_disable(void); 44 45 struct brand_mach_ops native_mach_ops = { 46 NULL, NULL 47 }; 48 #else /* !__sparcv9 */ 49 struct brand_mach_ops native_mach_ops = { 50 NULL, NULL, NULL, NULL, NULL, NULL 51 }; 52 #endif /* !__sparcv9 */ 53 54 brand_t native_brand = { 55 BRAND_VER_1, 56 "native", 57 NULL, 58 &native_mach_ops 59 }; 60 61 /* 62 * Used to maintain a list of all the brands currently loaded into the 63 * kernel. 64 */ 65 struct brand_list { 66 int bl_refcnt; 67 struct brand_list *bl_next; 68 brand_t *bl_brand; 69 }; 70 71 static struct brand_list *brand_list = NULL; 72 73 /* 74 * This lock protects the integrity of the brand list. 75 */ 76 static kmutex_t brand_list_lock; 77 78 void 79 brand_init() 80 { 81 mutex_init(&brand_list_lock, NULL, MUTEX_DEFAULT, NULL); 82 p0.p_brand = &native_brand; 83 } 84 85 int 86 brand_register(brand_t *brand) 87 { 88 struct brand_list *list, *scan; 89 90 if (brand == NULL) 91 return (EINVAL); 92 93 if (is_system_labeled()) { 94 cmn_err(CE_WARN, 95 "Branded zones are not allowed on labeled systems."); 96 return (EINVAL); 97 } 98 99 if (brand->b_version != SUPPORTED_BRAND_VERSION) { 100 if (brand->b_version < SUPPORTED_BRAND_VERSION) { 101 cmn_err(CE_WARN, 102 "brand '%s' was built to run on older versions " 103 "of Solaris.", 104 brand->b_name); 105 } else { 106 cmn_err(CE_WARN, 107 "brand '%s' was built to run on a newer version " 108 "of Solaris.", 109 brand->b_name); 110 } 111 return (EINVAL); 112 } 113 114 /* Sanity checks */ 115 if (brand->b_name == NULL || brand->b_ops == NULL || 116 brand->b_ops->b_brandsys == NULL) { 117 cmn_err(CE_WARN, "Malformed brand"); 118 return (EINVAL); 119 } 120 121 list = kmem_alloc(sizeof (struct brand_list), KM_SLEEP); 122 123 /* Add the brand to the list of loaded brands. */ 124 mutex_enter(&brand_list_lock); 125 126 /* 127 * Check to be sure we haven't already registered this brand. 128 */ 129 for (scan = brand_list; scan != NULL; scan = scan->bl_next) { 130 if (strcmp(brand->b_name, scan->bl_brand->b_name) == 0) { 131 cmn_err(CE_WARN, 132 "Invalid attempt to load a second instance of " 133 "brand %s", brand->b_name); 134 mutex_exit(&brand_list_lock); 135 kmem_free(list, sizeof (struct brand_list)); 136 return (EINVAL); 137 } 138 } 139 140 #if defined(__sparcv9) 141 /* sparcv9 uses system wide brand interposition hooks */ 142 if (brand_list == NULL) 143 brand_plat_interposition_enable(); 144 #endif /* __sparcv9 */ 145 146 list->bl_brand = brand; 147 list->bl_refcnt = 0; 148 list->bl_next = brand_list; 149 brand_list = list; 150 151 mutex_exit(&brand_list_lock); 152 153 return (0); 154 } 155 156 /* 157 * The kernel module implementing this brand is being unloaded, so remove 158 * it from the list of active brands. 159 */ 160 int 161 brand_unregister(brand_t *brand) 162 { 163 struct brand_list *list, *prev; 164 165 /* Sanity checks */ 166 if (brand == NULL || brand->b_name == NULL) { 167 cmn_err(CE_WARN, "Malformed brand"); 168 return (EINVAL); 169 } 170 171 prev = NULL; 172 mutex_enter(&brand_list_lock); 173 174 for (list = brand_list; list != NULL; list = list->bl_next) { 175 if (list->bl_brand == brand) 176 break; 177 prev = list; 178 } 179 180 if (list == NULL) { 181 cmn_err(CE_WARN, "Brand %s wasn't registered", brand->b_name); 182 mutex_exit(&brand_list_lock); 183 return (EINVAL); 184 } 185 186 if (list->bl_refcnt > 0) { 187 cmn_err(CE_WARN, "Unregistering brand %s which is still in use", 188 brand->b_name); 189 mutex_exit(&brand_list_lock); 190 return (EBUSY); 191 } 192 193 /* Remove brand from the list */ 194 if (prev != NULL) 195 prev->bl_next = list->bl_next; 196 else 197 brand_list = list->bl_next; 198 199 #if defined(__sparcv9) 200 /* sparcv9 uses system wide brand interposition hooks */ 201 if (brand_list == NULL) 202 brand_plat_interposition_disable(); 203 #endif /* __sparcv9 */ 204 205 mutex_exit(&brand_list_lock); 206 207 kmem_free(list, sizeof (struct brand_list)); 208 209 return (0); 210 } 211 212 /* 213 * Record that a zone of this brand has been instantiated. If the kernel 214 * module implementing this brand's functionality is not present, this 215 * routine attempts to load the module as a side effect. 216 */ 217 brand_t * 218 brand_register_zone(struct brand_attr *attr) 219 { 220 struct brand_list *l = NULL; 221 ddi_modhandle_t hdl = NULL; 222 char *modname; 223 int err = 0; 224 225 if (is_system_labeled()) { 226 cmn_err(CE_WARN, 227 "Branded zones are not allowed on labeled systems."); 228 return (NULL); 229 } 230 231 /* 232 * We make at most two passes through this loop. The first time 233 * through, we're looking to see if this is a new user of an 234 * already loaded brand. If the brand hasn't been loaded, we 235 * call ddi_modopen() to force it to be loaded and then make a 236 * second pass through the list of brands. If we don't find the 237 * brand the second time through it means that the modname 238 * specified in the brand_attr structure doesn't provide the brand 239 * specified in the brandname field. This would suggest a bug in 240 * the brand's config.xml file. We close the module and return 241 * 'NULL' to the caller. 242 */ 243 for (;;) { 244 /* 245 * Search list of loaded brands 246 */ 247 mutex_enter(&brand_list_lock); 248 for (l = brand_list; l != NULL; l = l->bl_next) 249 if (strcmp(attr->ba_brandname, 250 l->bl_brand->b_name) == 0) 251 break; 252 if ((l != NULL) || (hdl != NULL)) 253 break; 254 mutex_exit(&brand_list_lock); 255 256 /* 257 * We didn't find that the requested brand has been loaded 258 * yet, so we trigger the load of the appropriate kernel 259 * module and search the list again. 260 */ 261 modname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 262 (void) strcpy(modname, "brand/"); 263 (void) strcat(modname, attr->ba_modname); 264 hdl = ddi_modopen(modname, KRTLD_MODE_FIRST, &err); 265 kmem_free(modname, MAXPATHLEN); 266 267 if (err != 0) 268 return (NULL); 269 } 270 271 /* 272 * If we found the matching brand, bump its reference count. 273 */ 274 if (l != NULL) 275 l->bl_refcnt++; 276 277 mutex_exit(&brand_list_lock); 278 279 if (hdl != NULL) 280 (void) ddi_modclose(hdl); 281 282 return ((l != NULL) ? l->bl_brand : NULL); 283 } 284 285 /* 286 * Return the number of zones currently using this brand. 287 */ 288 int 289 brand_zone_count(struct brand *bp) 290 { 291 struct brand_list *l; 292 int cnt = 0; 293 294 mutex_enter(&brand_list_lock); 295 for (l = brand_list; l != NULL; l = l->bl_next) 296 if (l->bl_brand == bp) { 297 cnt = l->bl_refcnt; 298 break; 299 } 300 mutex_exit(&brand_list_lock); 301 302 return (cnt); 303 } 304 305 void 306 brand_unregister_zone(struct brand *bp) 307 { 308 struct brand_list *list; 309 310 mutex_enter(&brand_list_lock); 311 for (list = brand_list; list != NULL; list = list->bl_next) { 312 if (list->bl_brand == bp) { 313 ASSERT(list->bl_refcnt > 0); 314 list->bl_refcnt--; 315 break; 316 } 317 } 318 mutex_exit(&brand_list_lock); 319 } 320 321 void 322 brand_setbrand(proc_t *p) 323 { 324 brand_t *bp = p->p_zone->zone_brand; 325 326 ASSERT(bp != NULL); 327 ASSERT(p->p_brand == &native_brand); 328 329 /* 330 * We should only be called from exec(), when we know the process 331 * is single-threaded. 332 */ 333 ASSERT(p->p_tlist == p->p_tlist->t_forw); 334 335 p->p_brand = bp; 336 if (PROC_IS_BRANDED(p)) { 337 BROP(p)->b_setbrand(p); 338 lwp_attach_brand_hdlrs(p->p_tlist->t_lwp); 339 } 340 } 341 342 #if defined(__sparcv9) 343 /* 344 * Currently, only sparc has system level brand syscall interposition. 345 * On x86 we're able to enable syscall interposition on a per-cpu basis 346 * when a branded thread is scheduled to run on a cpu. 347 */ 348 349 /* Local variables needed for dynamic syscall interposition support */ 350 static uint32_t syscall_trap_patch_instr_orig; 351 static uint32_t syscall_trap32_patch_instr_orig; 352 353 /* Trap Table syscall entry hot patch points */ 354 extern void syscall_trap_patch_point(void); 355 extern void syscall_trap32_patch_point(void); 356 357 /* Alternate syscall entry handlers used when branded zones are running */ 358 extern void syscall_wrapper(void); 359 extern void syscall_wrapper32(void); 360 361 /* Macros used to facilitate sparcv9 instruction generation */ 362 #define BA_A_INSTR 0x30800000 /* ba,a addr */ 363 #define DISP22(from, to) \ 364 ((((uintptr_t)(to) - (uintptr_t)(from)) >> 2) & 0x3fffff) 365 366 /*ARGSUSED*/ 367 static void 368 brand_plat_interposition_enable(void) 369 { 370 ASSERT(MUTEX_HELD(&brand_list_lock)); 371 372 /* 373 * Before we hot patch the kernel save the current instructions 374 * so that we can restore them later. 375 */ 376 syscall_trap_patch_instr_orig = 377 *(uint32_t *)syscall_trap_patch_point; 378 syscall_trap32_patch_instr_orig = 379 *(uint32_t *)syscall_trap32_patch_point; 380 381 /* 382 * Modify the trap table at the patch points. 383 * 384 * We basically replace the first instruction at the patch 385 * point with a ba,a instruction that will transfer control 386 * to syscall_wrapper or syscall_wrapper32 for 64-bit and 387 * 32-bit syscalls respectively. It's important to note that 388 * the annul bit is set in the branch so we don't execute 389 * the instruction directly following the one we're patching 390 * during the branch's delay slot. 391 * 392 * It also doesn't matter that we're not atomically updating both 393 * the 64 and 32 bit syscall paths at the same time since there's 394 * no actual branded processes running on the system yet. 395 */ 396 hot_patch_kernel_text((caddr_t)syscall_trap_patch_point, 397 BA_A_INSTR | DISP22(syscall_trap_patch_point, syscall_wrapper), 398 4); 399 hot_patch_kernel_text((caddr_t)syscall_trap32_patch_point, 400 BA_A_INSTR | DISP22(syscall_trap32_patch_point, syscall_wrapper32), 401 4); 402 } 403 404 /*ARGSUSED*/ 405 static void 406 brand_plat_interposition_disable(void) 407 { 408 ASSERT(MUTEX_HELD(&brand_list_lock)); 409 410 /* 411 * Restore the original instructions at the trap table syscall 412 * patch points to disable the brand syscall interposition 413 * mechanism. 414 */ 415 hot_patch_kernel_text((caddr_t)syscall_trap_patch_point, 416 syscall_trap_patch_instr_orig, 4); 417 hot_patch_kernel_text((caddr_t)syscall_trap32_patch_point, 418 syscall_trap32_patch_instr_orig, 4); 419 } 420 #endif /* __sparcv9 */ 421