1 /* 2 * dev_cgroup.c - device cgroup subsystem 3 * 4 * Copyright 2007 IBM Corp 5 */ 6 7 #include <linux/device_cgroup.h> 8 #include <linux/cgroup.h> 9 #include <linux/ctype.h> 10 #include <linux/list.h> 11 #include <linux/uaccess.h> 12 #include <linux/seq_file.h> 13 14 #define ACC_MKNOD 1 15 #define ACC_READ 2 16 #define ACC_WRITE 4 17 #define ACC_MASK (ACC_MKNOD | ACC_READ | ACC_WRITE) 18 19 #define DEV_BLOCK 1 20 #define DEV_CHAR 2 21 #define DEV_ALL 4 /* this represents all devices */ 22 23 /* 24 * whitelist locking rules: 25 * cgroup_lock() cannot be taken under dev_cgroup->lock. 26 * dev_cgroup->lock can be taken with or without cgroup_lock(). 27 * 28 * modifications always require cgroup_lock 29 * modifications to a list which is visible require the 30 * dev_cgroup->lock *and* cgroup_lock() 31 * walking the list requires dev_cgroup->lock or cgroup_lock(). 32 * 33 * reasoning: dev_whitelist_copy() needs to kmalloc, so needs 34 * a mutex, which the cgroup_lock() is. Since modifying 35 * a visible list requires both locks, either lock can be 36 * taken for walking the list. 37 */ 38 39 struct dev_whitelist_item { 40 u32 major, minor; 41 short type; 42 short access; 43 struct list_head list; 44 }; 45 46 struct dev_cgroup { 47 struct cgroup_subsys_state css; 48 struct list_head whitelist; 49 spinlock_t lock; 50 }; 51 52 static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s) 53 { 54 return container_of(s, struct dev_cgroup, css); 55 } 56 57 static inline struct dev_cgroup *cgroup_to_devcgroup(struct cgroup *cgroup) 58 { 59 return css_to_devcgroup(cgroup_subsys_state(cgroup, devices_subsys_id)); 60 } 61 62 struct cgroup_subsys devices_subsys; 63 64 static int devcgroup_can_attach(struct cgroup_subsys *ss, 65 struct cgroup *new_cgroup, struct task_struct *task) 66 { 67 if (current != task && !capable(CAP_SYS_ADMIN)) 68 return -EPERM; 69 70 return 0; 71 } 72 73 /* 74 * called under cgroup_lock() 75 */ 76 static int dev_whitelist_copy(struct list_head *dest, struct list_head *orig) 77 { 78 struct dev_whitelist_item *wh, *tmp, *new; 79 80 list_for_each_entry(wh, orig, list) { 81 new = kmalloc(sizeof(*wh), GFP_KERNEL); 82 if (!new) 83 goto free_and_exit; 84 new->major = wh->major; 85 new->minor = wh->minor; 86 new->type = wh->type; 87 new->access = wh->access; 88 list_add_tail(&new->list, dest); 89 } 90 91 return 0; 92 93 free_and_exit: 94 list_for_each_entry_safe(wh, tmp, dest, list) { 95 list_del(&wh->list); 96 kfree(wh); 97 } 98 return -ENOMEM; 99 } 100 101 /* Stupid prototype - don't bother combining existing entries */ 102 /* 103 * called under cgroup_lock() 104 * since the list is visible to other tasks, we need the spinlock also 105 */ 106 static int dev_whitelist_add(struct dev_cgroup *dev_cgroup, 107 struct dev_whitelist_item *wh) 108 { 109 struct dev_whitelist_item *whcopy, *walk; 110 111 whcopy = kmalloc(sizeof(*whcopy), GFP_KERNEL); 112 if (!whcopy) 113 return -ENOMEM; 114 115 memcpy(whcopy, wh, sizeof(*whcopy)); 116 spin_lock(&dev_cgroup->lock); 117 list_for_each_entry(walk, &dev_cgroup->whitelist, list) { 118 if (walk->type != wh->type) 119 continue; 120 if (walk->major != wh->major) 121 continue; 122 if (walk->minor != wh->minor) 123 continue; 124 125 walk->access |= wh->access; 126 kfree(whcopy); 127 whcopy = NULL; 128 } 129 130 if (whcopy != NULL) 131 list_add_tail(&whcopy->list, &dev_cgroup->whitelist); 132 spin_unlock(&dev_cgroup->lock); 133 return 0; 134 } 135 136 /* 137 * called under cgroup_lock() 138 * since the list is visible to other tasks, we need the spinlock also 139 */ 140 static void dev_whitelist_rm(struct dev_cgroup *dev_cgroup, 141 struct dev_whitelist_item *wh) 142 { 143 struct dev_whitelist_item *walk, *tmp; 144 145 spin_lock(&dev_cgroup->lock); 146 list_for_each_entry_safe(walk, tmp, &dev_cgroup->whitelist, list) { 147 if (walk->type == DEV_ALL) 148 goto remove; 149 if (walk->type != wh->type) 150 continue; 151 if (walk->major != ~0 && walk->major != wh->major) 152 continue; 153 if (walk->minor != ~0 && walk->minor != wh->minor) 154 continue; 155 156 remove: 157 walk->access &= ~wh->access; 158 if (!walk->access) { 159 list_del(&walk->list); 160 kfree(walk); 161 } 162 } 163 spin_unlock(&dev_cgroup->lock); 164 } 165 166 /* 167 * called from kernel/cgroup.c with cgroup_lock() held. 168 */ 169 static struct cgroup_subsys_state *devcgroup_create(struct cgroup_subsys *ss, 170 struct cgroup *cgroup) 171 { 172 struct dev_cgroup *dev_cgroup, *parent_dev_cgroup; 173 struct cgroup *parent_cgroup; 174 int ret; 175 176 dev_cgroup = kzalloc(sizeof(*dev_cgroup), GFP_KERNEL); 177 if (!dev_cgroup) 178 return ERR_PTR(-ENOMEM); 179 INIT_LIST_HEAD(&dev_cgroup->whitelist); 180 parent_cgroup = cgroup->parent; 181 182 if (parent_cgroup == NULL) { 183 struct dev_whitelist_item *wh; 184 wh = kmalloc(sizeof(*wh), GFP_KERNEL); 185 if (!wh) { 186 kfree(dev_cgroup); 187 return ERR_PTR(-ENOMEM); 188 } 189 wh->minor = wh->major = ~0; 190 wh->type = DEV_ALL; 191 wh->access = ACC_MKNOD | ACC_READ | ACC_WRITE; 192 list_add(&wh->list, &dev_cgroup->whitelist); 193 } else { 194 parent_dev_cgroup = cgroup_to_devcgroup(parent_cgroup); 195 ret = dev_whitelist_copy(&dev_cgroup->whitelist, 196 &parent_dev_cgroup->whitelist); 197 if (ret) { 198 kfree(dev_cgroup); 199 return ERR_PTR(ret); 200 } 201 } 202 203 spin_lock_init(&dev_cgroup->lock); 204 return &dev_cgroup->css; 205 } 206 207 static void devcgroup_destroy(struct cgroup_subsys *ss, 208 struct cgroup *cgroup) 209 { 210 struct dev_cgroup *dev_cgroup; 211 struct dev_whitelist_item *wh, *tmp; 212 213 dev_cgroup = cgroup_to_devcgroup(cgroup); 214 list_for_each_entry_safe(wh, tmp, &dev_cgroup->whitelist, list) { 215 list_del(&wh->list); 216 kfree(wh); 217 } 218 kfree(dev_cgroup); 219 } 220 221 #define DEVCG_ALLOW 1 222 #define DEVCG_DENY 2 223 #define DEVCG_LIST 3 224 225 #define MAJMINLEN 13 226 #define ACCLEN 4 227 228 static void set_access(char *acc, short access) 229 { 230 int idx = 0; 231 memset(acc, 0, ACCLEN); 232 if (access & ACC_READ) 233 acc[idx++] = 'r'; 234 if (access & ACC_WRITE) 235 acc[idx++] = 'w'; 236 if (access & ACC_MKNOD) 237 acc[idx++] = 'm'; 238 } 239 240 static char type_to_char(short type) 241 { 242 if (type == DEV_ALL) 243 return 'a'; 244 if (type == DEV_CHAR) 245 return 'c'; 246 if (type == DEV_BLOCK) 247 return 'b'; 248 return 'X'; 249 } 250 251 static void set_majmin(char *str, unsigned m) 252 { 253 memset(str, 0, MAJMINLEN); 254 if (m == ~0) 255 sprintf(str, "*"); 256 else 257 snprintf(str, MAJMINLEN, "%u", m); 258 } 259 260 static int devcgroup_seq_read(struct cgroup *cgroup, struct cftype *cft, 261 struct seq_file *m) 262 { 263 struct dev_cgroup *devcgroup = cgroup_to_devcgroup(cgroup); 264 struct dev_whitelist_item *wh; 265 char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN]; 266 267 spin_lock(&devcgroup->lock); 268 list_for_each_entry(wh, &devcgroup->whitelist, list) { 269 set_access(acc, wh->access); 270 set_majmin(maj, wh->major); 271 set_majmin(min, wh->minor); 272 seq_printf(m, "%c %s:%s %s\n", type_to_char(wh->type), 273 maj, min, acc); 274 } 275 spin_unlock(&devcgroup->lock); 276 277 return 0; 278 } 279 280 /* 281 * may_access_whitelist: 282 * does the access granted to dev_cgroup c contain the access 283 * requested in whitelist item refwh. 284 * return 1 if yes, 0 if no. 285 * call with c->lock held 286 */ 287 static int may_access_whitelist(struct dev_cgroup *c, 288 struct dev_whitelist_item *refwh) 289 { 290 struct dev_whitelist_item *whitem; 291 292 list_for_each_entry(whitem, &c->whitelist, list) { 293 if (whitem->type & DEV_ALL) 294 return 1; 295 if ((refwh->type & DEV_BLOCK) && !(whitem->type & DEV_BLOCK)) 296 continue; 297 if ((refwh->type & DEV_CHAR) && !(whitem->type & DEV_CHAR)) 298 continue; 299 if (whitem->major != ~0 && whitem->major != refwh->major) 300 continue; 301 if (whitem->minor != ~0 && whitem->minor != refwh->minor) 302 continue; 303 if (refwh->access & (~whitem->access)) 304 continue; 305 return 1; 306 } 307 return 0; 308 } 309 310 /* 311 * parent_has_perm: 312 * when adding a new allow rule to a device whitelist, the rule 313 * must be allowed in the parent device 314 */ 315 static int parent_has_perm(struct cgroup *childcg, 316 struct dev_whitelist_item *wh) 317 { 318 struct cgroup *pcg = childcg->parent; 319 struct dev_cgroup *parent; 320 int ret; 321 322 if (!pcg) 323 return 1; 324 parent = cgroup_to_devcgroup(pcg); 325 spin_lock(&parent->lock); 326 ret = may_access_whitelist(parent, wh); 327 spin_unlock(&parent->lock); 328 return ret; 329 } 330 331 /* 332 * Modify the whitelist using allow/deny rules. 333 * CAP_SYS_ADMIN is needed for this. It's at least separate from CAP_MKNOD 334 * so we can give a container CAP_MKNOD to let it create devices but not 335 * modify the whitelist. 336 * It seems likely we'll want to add a CAP_CONTAINER capability to allow 337 * us to also grant CAP_SYS_ADMIN to containers without giving away the 338 * device whitelist controls, but for now we'll stick with CAP_SYS_ADMIN 339 * 340 * Taking rules away is always allowed (given CAP_SYS_ADMIN). Granting 341 * new access is only allowed if you're in the top-level cgroup, or your 342 * parent cgroup has the access you're asking for. 343 */ 344 static ssize_t devcgroup_access_write(struct cgroup *cgroup, struct cftype *cft, 345 struct file *file, const char __user *userbuf, 346 size_t nbytes, loff_t *ppos) 347 { 348 struct cgroup *cur_cgroup; 349 struct dev_cgroup *devcgroup, *cur_devcgroup; 350 int filetype = cft->private; 351 char *buffer, *b; 352 int retval = 0, count; 353 struct dev_whitelist_item wh; 354 355 if (!capable(CAP_SYS_ADMIN)) 356 return -EPERM; 357 358 devcgroup = cgroup_to_devcgroup(cgroup); 359 cur_cgroup = task_cgroup(current, devices_subsys.subsys_id); 360 cur_devcgroup = cgroup_to_devcgroup(cur_cgroup); 361 362 buffer = kmalloc(nbytes+1, GFP_KERNEL); 363 if (!buffer) 364 return -ENOMEM; 365 366 if (copy_from_user(buffer, userbuf, nbytes)) { 367 retval = -EFAULT; 368 goto out1; 369 } 370 buffer[nbytes] = 0; /* nul-terminate */ 371 372 cgroup_lock(); 373 if (cgroup_is_removed(cgroup)) { 374 retval = -ENODEV; 375 goto out2; 376 } 377 378 memset(&wh, 0, sizeof(wh)); 379 b = buffer; 380 381 switch (*b) { 382 case 'a': 383 wh.type = DEV_ALL; 384 wh.access = ACC_MASK; 385 wh.major = ~0; 386 wh.minor = ~0; 387 goto handle; 388 case 'b': 389 wh.type = DEV_BLOCK; 390 break; 391 case 'c': 392 wh.type = DEV_CHAR; 393 break; 394 default: 395 retval = -EINVAL; 396 goto out2; 397 } 398 b++; 399 if (!isspace(*b)) { 400 retval = -EINVAL; 401 goto out2; 402 } 403 b++; 404 if (*b == '*') { 405 wh.major = ~0; 406 b++; 407 } else if (isdigit(*b)) { 408 wh.major = 0; 409 while (isdigit(*b)) { 410 wh.major = wh.major*10+(*b-'0'); 411 b++; 412 } 413 } else { 414 retval = -EINVAL; 415 goto out2; 416 } 417 if (*b != ':') { 418 retval = -EINVAL; 419 goto out2; 420 } 421 b++; 422 423 /* read minor */ 424 if (*b == '*') { 425 wh.minor = ~0; 426 b++; 427 } else if (isdigit(*b)) { 428 wh.minor = 0; 429 while (isdigit(*b)) { 430 wh.minor = wh.minor*10+(*b-'0'); 431 b++; 432 } 433 } else { 434 retval = -EINVAL; 435 goto out2; 436 } 437 if (!isspace(*b)) { 438 retval = -EINVAL; 439 goto out2; 440 } 441 for (b++, count = 0; count < 3; count++, b++) { 442 switch (*b) { 443 case 'r': 444 wh.access |= ACC_READ; 445 break; 446 case 'w': 447 wh.access |= ACC_WRITE; 448 break; 449 case 'm': 450 wh.access |= ACC_MKNOD; 451 break; 452 case '\n': 453 case '\0': 454 count = 3; 455 break; 456 default: 457 retval = -EINVAL; 458 goto out2; 459 } 460 } 461 462 handle: 463 retval = 0; 464 switch (filetype) { 465 case DEVCG_ALLOW: 466 if (!parent_has_perm(cgroup, &wh)) 467 retval = -EPERM; 468 else 469 retval = dev_whitelist_add(devcgroup, &wh); 470 break; 471 case DEVCG_DENY: 472 dev_whitelist_rm(devcgroup, &wh); 473 break; 474 default: 475 retval = -EINVAL; 476 goto out2; 477 } 478 479 if (retval == 0) 480 retval = nbytes; 481 482 out2: 483 cgroup_unlock(); 484 out1: 485 kfree(buffer); 486 return retval; 487 } 488 489 static struct cftype dev_cgroup_files[] = { 490 { 491 .name = "allow", 492 .write = devcgroup_access_write, 493 .private = DEVCG_ALLOW, 494 }, 495 { 496 .name = "deny", 497 .write = devcgroup_access_write, 498 .private = DEVCG_DENY, 499 }, 500 { 501 .name = "list", 502 .read_seq_string = devcgroup_seq_read, 503 .private = DEVCG_LIST, 504 }, 505 }; 506 507 static int devcgroup_populate(struct cgroup_subsys *ss, 508 struct cgroup *cgroup) 509 { 510 return cgroup_add_files(cgroup, ss, dev_cgroup_files, 511 ARRAY_SIZE(dev_cgroup_files)); 512 } 513 514 struct cgroup_subsys devices_subsys = { 515 .name = "devices", 516 .can_attach = devcgroup_can_attach, 517 .create = devcgroup_create, 518 .destroy = devcgroup_destroy, 519 .populate = devcgroup_populate, 520 .subsys_id = devices_subsys_id, 521 }; 522 523 int devcgroup_inode_permission(struct inode *inode, int mask) 524 { 525 struct dev_cgroup *dev_cgroup; 526 struct dev_whitelist_item *wh; 527 528 dev_t device = inode->i_rdev; 529 if (!device) 530 return 0; 531 if (!S_ISBLK(inode->i_mode) && !S_ISCHR(inode->i_mode)) 532 return 0; 533 dev_cgroup = css_to_devcgroup(task_subsys_state(current, 534 devices_subsys_id)); 535 if (!dev_cgroup) 536 return 0; 537 538 spin_lock(&dev_cgroup->lock); 539 list_for_each_entry(wh, &dev_cgroup->whitelist, list) { 540 if (wh->type & DEV_ALL) 541 goto acc_check; 542 if ((wh->type & DEV_BLOCK) && !S_ISBLK(inode->i_mode)) 543 continue; 544 if ((wh->type & DEV_CHAR) && !S_ISCHR(inode->i_mode)) 545 continue; 546 if (wh->major != ~0 && wh->major != imajor(inode)) 547 continue; 548 if (wh->minor != ~0 && wh->minor != iminor(inode)) 549 continue; 550 acc_check: 551 if ((mask & MAY_WRITE) && !(wh->access & ACC_WRITE)) 552 continue; 553 if ((mask & MAY_READ) && !(wh->access & ACC_READ)) 554 continue; 555 spin_unlock(&dev_cgroup->lock); 556 return 0; 557 } 558 spin_unlock(&dev_cgroup->lock); 559 560 return -EPERM; 561 } 562 563 int devcgroup_inode_mknod(int mode, dev_t dev) 564 { 565 struct dev_cgroup *dev_cgroup; 566 struct dev_whitelist_item *wh; 567 568 dev_cgroup = css_to_devcgroup(task_subsys_state(current, 569 devices_subsys_id)); 570 if (!dev_cgroup) 571 return 0; 572 573 spin_lock(&dev_cgroup->lock); 574 list_for_each_entry(wh, &dev_cgroup->whitelist, list) { 575 if (wh->type & DEV_ALL) 576 goto acc_check; 577 if ((wh->type & DEV_BLOCK) && !S_ISBLK(mode)) 578 continue; 579 if ((wh->type & DEV_CHAR) && !S_ISCHR(mode)) 580 continue; 581 if (wh->major != ~0 && wh->major != MAJOR(dev)) 582 continue; 583 if (wh->minor != ~0 && wh->minor != MINOR(dev)) 584 continue; 585 acc_check: 586 if (!(wh->access & ACC_MKNOD)) 587 continue; 588 spin_unlock(&dev_cgroup->lock); 589 return 0; 590 } 591 spin_unlock(&dev_cgroup->lock); 592 return -EPERM; 593 } 594