1 /* 2 * device_cgroup.c - device cgroup subsystem 3 * 4 * Copyright 2007 IBM Corp 5 */ 6 7 #include <linux/device_cgroup.h> 8 #include <linux/cgroup.h> 9 #include <linux/ctype.h> 10 #include <linux/list.h> 11 #include <linux/uaccess.h> 12 #include <linux/seq_file.h> 13 #include <linux/rcupdate.h> 14 15 #define ACC_MKNOD 1 16 #define ACC_READ 2 17 #define ACC_WRITE 4 18 #define ACC_MASK (ACC_MKNOD | ACC_READ | ACC_WRITE) 19 20 #define DEV_BLOCK 1 21 #define DEV_CHAR 2 22 #define DEV_ALL 4 /* this represents all devices */ 23 24 /* 25 * whitelist locking rules: 26 * hold cgroup_lock() for update/read. 27 * hold rcu_read_lock() for read. 28 */ 29 30 struct dev_whitelist_item { 31 u32 major, minor; 32 short type; 33 short access; 34 struct list_head list; 35 struct rcu_head rcu; 36 }; 37 38 struct dev_cgroup { 39 struct cgroup_subsys_state css; 40 struct list_head whitelist; 41 }; 42 43 static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s) 44 { 45 return container_of(s, struct dev_cgroup, css); 46 } 47 48 static inline struct dev_cgroup *cgroup_to_devcgroup(struct cgroup *cgroup) 49 { 50 return css_to_devcgroup(cgroup_subsys_state(cgroup, devices_subsys_id)); 51 } 52 53 static inline struct dev_cgroup *task_devcgroup(struct task_struct *task) 54 { 55 return css_to_devcgroup(task_subsys_state(task, devices_subsys_id)); 56 } 57 58 struct cgroup_subsys devices_subsys; 59 60 static int devcgroup_can_attach(struct cgroup_subsys *ss, 61 struct cgroup *new_cgroup, struct task_struct *task) 62 { 63 if (current != task && !capable(CAP_SYS_ADMIN)) 64 return -EPERM; 65 66 return 0; 67 } 68 69 /* 70 * called under cgroup_lock() 71 */ 72 static int dev_whitelist_copy(struct list_head *dest, struct list_head *orig) 73 { 74 struct dev_whitelist_item *wh, *tmp, *new; 75 76 list_for_each_entry(wh, orig, list) { 77 new = kmemdup(wh, sizeof(*wh), GFP_KERNEL); 78 if (!new) 79 goto free_and_exit; 80 list_add_tail(&new->list, dest); 81 } 82 83 return 0; 84 85 free_and_exit: 86 list_for_each_entry_safe(wh, tmp, dest, list) { 87 list_del(&wh->list); 88 kfree(wh); 89 } 90 return -ENOMEM; 91 } 92 93 /* Stupid prototype - don't bother combining existing entries */ 94 /* 95 * called under cgroup_lock() 96 */ 97 static int dev_whitelist_add(struct dev_cgroup *dev_cgroup, 98 struct dev_whitelist_item *wh) 99 { 100 struct dev_whitelist_item *whcopy, *walk; 101 102 whcopy = kmemdup(wh, sizeof(*wh), GFP_KERNEL); 103 if (!whcopy) 104 return -ENOMEM; 105 106 list_for_each_entry(walk, &dev_cgroup->whitelist, list) { 107 if (walk->type != wh->type) 108 continue; 109 if (walk->major != wh->major) 110 continue; 111 if (walk->minor != wh->minor) 112 continue; 113 114 walk->access |= wh->access; 115 kfree(whcopy); 116 whcopy = NULL; 117 } 118 119 if (whcopy != NULL) 120 list_add_tail_rcu(&whcopy->list, &dev_cgroup->whitelist); 121 return 0; 122 } 123 124 static void whitelist_item_free(struct rcu_head *rcu) 125 { 126 struct dev_whitelist_item *item; 127 128 item = container_of(rcu, struct dev_whitelist_item, rcu); 129 kfree(item); 130 } 131 132 /* 133 * called under cgroup_lock() 134 */ 135 static void dev_whitelist_rm(struct dev_cgroup *dev_cgroup, 136 struct dev_whitelist_item *wh) 137 { 138 struct dev_whitelist_item *walk, *tmp; 139 140 list_for_each_entry_safe(walk, tmp, &dev_cgroup->whitelist, list) { 141 if (walk->type == DEV_ALL) 142 goto remove; 143 if (walk->type != wh->type) 144 continue; 145 if (walk->major != ~0 && walk->major != wh->major) 146 continue; 147 if (walk->minor != ~0 && walk->minor != wh->minor) 148 continue; 149 150 remove: 151 walk->access &= ~wh->access; 152 if (!walk->access) { 153 list_del_rcu(&walk->list); 154 call_rcu(&walk->rcu, whitelist_item_free); 155 } 156 } 157 } 158 159 /* 160 * called from kernel/cgroup.c with cgroup_lock() held. 161 */ 162 static struct cgroup_subsys_state *devcgroup_create(struct cgroup_subsys *ss, 163 struct cgroup *cgroup) 164 { 165 struct dev_cgroup *dev_cgroup, *parent_dev_cgroup; 166 struct cgroup *parent_cgroup; 167 int ret; 168 169 dev_cgroup = kzalloc(sizeof(*dev_cgroup), GFP_KERNEL); 170 if (!dev_cgroup) 171 return ERR_PTR(-ENOMEM); 172 INIT_LIST_HEAD(&dev_cgroup->whitelist); 173 parent_cgroup = cgroup->parent; 174 175 if (parent_cgroup == NULL) { 176 struct dev_whitelist_item *wh; 177 wh = kmalloc(sizeof(*wh), GFP_KERNEL); 178 if (!wh) { 179 kfree(dev_cgroup); 180 return ERR_PTR(-ENOMEM); 181 } 182 wh->minor = wh->major = ~0; 183 wh->type = DEV_ALL; 184 wh->access = ACC_MASK; 185 list_add(&wh->list, &dev_cgroup->whitelist); 186 } else { 187 parent_dev_cgroup = cgroup_to_devcgroup(parent_cgroup); 188 ret = dev_whitelist_copy(&dev_cgroup->whitelist, 189 &parent_dev_cgroup->whitelist); 190 if (ret) { 191 kfree(dev_cgroup); 192 return ERR_PTR(ret); 193 } 194 } 195 196 return &dev_cgroup->css; 197 } 198 199 static void devcgroup_destroy(struct cgroup_subsys *ss, 200 struct cgroup *cgroup) 201 { 202 struct dev_cgroup *dev_cgroup; 203 struct dev_whitelist_item *wh, *tmp; 204 205 dev_cgroup = cgroup_to_devcgroup(cgroup); 206 list_for_each_entry_safe(wh, tmp, &dev_cgroup->whitelist, list) { 207 list_del(&wh->list); 208 kfree(wh); 209 } 210 kfree(dev_cgroup); 211 } 212 213 #define DEVCG_ALLOW 1 214 #define DEVCG_DENY 2 215 #define DEVCG_LIST 3 216 217 #define MAJMINLEN 13 218 #define ACCLEN 4 219 220 static void set_access(char *acc, short access) 221 { 222 int idx = 0; 223 memset(acc, 0, ACCLEN); 224 if (access & ACC_READ) 225 acc[idx++] = 'r'; 226 if (access & ACC_WRITE) 227 acc[idx++] = 'w'; 228 if (access & ACC_MKNOD) 229 acc[idx++] = 'm'; 230 } 231 232 static char type_to_char(short type) 233 { 234 if (type == DEV_ALL) 235 return 'a'; 236 if (type == DEV_CHAR) 237 return 'c'; 238 if (type == DEV_BLOCK) 239 return 'b'; 240 return 'X'; 241 } 242 243 static void set_majmin(char *str, unsigned m) 244 { 245 if (m == ~0) 246 strcpy(str, "*"); 247 else 248 sprintf(str, "%u", m); 249 } 250 251 static int devcgroup_seq_read(struct cgroup *cgroup, struct cftype *cft, 252 struct seq_file *m) 253 { 254 struct dev_cgroup *devcgroup = cgroup_to_devcgroup(cgroup); 255 struct dev_whitelist_item *wh; 256 char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN]; 257 258 rcu_read_lock(); 259 list_for_each_entry_rcu(wh, &devcgroup->whitelist, list) { 260 set_access(acc, wh->access); 261 set_majmin(maj, wh->major); 262 set_majmin(min, wh->minor); 263 seq_printf(m, "%c %s:%s %s\n", type_to_char(wh->type), 264 maj, min, acc); 265 } 266 rcu_read_unlock(); 267 268 return 0; 269 } 270 271 /* 272 * may_access_whitelist: 273 * does the access granted to dev_cgroup c contain the access 274 * requested in whitelist item refwh. 275 * return 1 if yes, 0 if no. 276 * call with c->lock held 277 */ 278 static int may_access_whitelist(struct dev_cgroup *c, 279 struct dev_whitelist_item *refwh) 280 { 281 struct dev_whitelist_item *whitem; 282 283 list_for_each_entry(whitem, &c->whitelist, list) { 284 if (whitem->type & DEV_ALL) 285 return 1; 286 if ((refwh->type & DEV_BLOCK) && !(whitem->type & DEV_BLOCK)) 287 continue; 288 if ((refwh->type & DEV_CHAR) && !(whitem->type & DEV_CHAR)) 289 continue; 290 if (whitem->major != ~0 && whitem->major != refwh->major) 291 continue; 292 if (whitem->minor != ~0 && whitem->minor != refwh->minor) 293 continue; 294 if (refwh->access & (~whitem->access)) 295 continue; 296 return 1; 297 } 298 return 0; 299 } 300 301 /* 302 * parent_has_perm: 303 * when adding a new allow rule to a device whitelist, the rule 304 * must be allowed in the parent device 305 */ 306 static int parent_has_perm(struct dev_cgroup *childcg, 307 struct dev_whitelist_item *wh) 308 { 309 struct cgroup *pcg = childcg->css.cgroup->parent; 310 struct dev_cgroup *parent; 311 312 if (!pcg) 313 return 1; 314 parent = cgroup_to_devcgroup(pcg); 315 return may_access_whitelist(parent, wh); 316 } 317 318 /* 319 * Modify the whitelist using allow/deny rules. 320 * CAP_SYS_ADMIN is needed for this. It's at least separate from CAP_MKNOD 321 * so we can give a container CAP_MKNOD to let it create devices but not 322 * modify the whitelist. 323 * It seems likely we'll want to add a CAP_CONTAINER capability to allow 324 * us to also grant CAP_SYS_ADMIN to containers without giving away the 325 * device whitelist controls, but for now we'll stick with CAP_SYS_ADMIN 326 * 327 * Taking rules away is always allowed (given CAP_SYS_ADMIN). Granting 328 * new access is only allowed if you're in the top-level cgroup, or your 329 * parent cgroup has the access you're asking for. 330 */ 331 static int devcgroup_update_access(struct dev_cgroup *devcgroup, 332 int filetype, const char *buffer) 333 { 334 const char *b; 335 char *endp; 336 int count; 337 struct dev_whitelist_item wh; 338 339 if (!capable(CAP_SYS_ADMIN)) 340 return -EPERM; 341 342 memset(&wh, 0, sizeof(wh)); 343 b = buffer; 344 345 switch (*b) { 346 case 'a': 347 wh.type = DEV_ALL; 348 wh.access = ACC_MASK; 349 wh.major = ~0; 350 wh.minor = ~0; 351 goto handle; 352 case 'b': 353 wh.type = DEV_BLOCK; 354 break; 355 case 'c': 356 wh.type = DEV_CHAR; 357 break; 358 default: 359 return -EINVAL; 360 } 361 b++; 362 if (!isspace(*b)) 363 return -EINVAL; 364 b++; 365 if (*b == '*') { 366 wh.major = ~0; 367 b++; 368 } else if (isdigit(*b)) { 369 wh.major = simple_strtoul(b, &endp, 10); 370 b = endp; 371 } else { 372 return -EINVAL; 373 } 374 if (*b != ':') 375 return -EINVAL; 376 b++; 377 378 /* read minor */ 379 if (*b == '*') { 380 wh.minor = ~0; 381 b++; 382 } else if (isdigit(*b)) { 383 wh.minor = simple_strtoul(b, &endp, 10); 384 b = endp; 385 } else { 386 return -EINVAL; 387 } 388 if (!isspace(*b)) 389 return -EINVAL; 390 for (b++, count = 0; count < 3; count++, b++) { 391 switch (*b) { 392 case 'r': 393 wh.access |= ACC_READ; 394 break; 395 case 'w': 396 wh.access |= ACC_WRITE; 397 break; 398 case 'm': 399 wh.access |= ACC_MKNOD; 400 break; 401 case '\n': 402 case '\0': 403 count = 3; 404 break; 405 default: 406 return -EINVAL; 407 } 408 } 409 410 handle: 411 switch (filetype) { 412 case DEVCG_ALLOW: 413 if (!parent_has_perm(devcgroup, &wh)) 414 return -EPERM; 415 return dev_whitelist_add(devcgroup, &wh); 416 case DEVCG_DENY: 417 dev_whitelist_rm(devcgroup, &wh); 418 break; 419 default: 420 return -EINVAL; 421 } 422 return 0; 423 } 424 425 static int devcgroup_access_write(struct cgroup *cgrp, struct cftype *cft, 426 const char *buffer) 427 { 428 int retval; 429 if (!cgroup_lock_live_group(cgrp)) 430 return -ENODEV; 431 retval = devcgroup_update_access(cgroup_to_devcgroup(cgrp), 432 cft->private, buffer); 433 cgroup_unlock(); 434 return retval; 435 } 436 437 static struct cftype dev_cgroup_files[] = { 438 { 439 .name = "allow", 440 .write_string = devcgroup_access_write, 441 .private = DEVCG_ALLOW, 442 }, 443 { 444 .name = "deny", 445 .write_string = devcgroup_access_write, 446 .private = DEVCG_DENY, 447 }, 448 { 449 .name = "list", 450 .read_seq_string = devcgroup_seq_read, 451 .private = DEVCG_LIST, 452 }, 453 }; 454 455 static int devcgroup_populate(struct cgroup_subsys *ss, 456 struct cgroup *cgroup) 457 { 458 return cgroup_add_files(cgroup, ss, dev_cgroup_files, 459 ARRAY_SIZE(dev_cgroup_files)); 460 } 461 462 struct cgroup_subsys devices_subsys = { 463 .name = "devices", 464 .can_attach = devcgroup_can_attach, 465 .create = devcgroup_create, 466 .destroy = devcgroup_destroy, 467 .populate = devcgroup_populate, 468 .subsys_id = devices_subsys_id, 469 }; 470 471 int devcgroup_inode_permission(struct inode *inode, int mask) 472 { 473 struct dev_cgroup *dev_cgroup; 474 struct dev_whitelist_item *wh; 475 476 dev_t device = inode->i_rdev; 477 if (!device) 478 return 0; 479 if (!S_ISBLK(inode->i_mode) && !S_ISCHR(inode->i_mode)) 480 return 0; 481 482 rcu_read_lock(); 483 484 dev_cgroup = task_devcgroup(current); 485 486 list_for_each_entry_rcu(wh, &dev_cgroup->whitelist, list) { 487 if (wh->type & DEV_ALL) 488 goto acc_check; 489 if ((wh->type & DEV_BLOCK) && !S_ISBLK(inode->i_mode)) 490 continue; 491 if ((wh->type & DEV_CHAR) && !S_ISCHR(inode->i_mode)) 492 continue; 493 if (wh->major != ~0 && wh->major != imajor(inode)) 494 continue; 495 if (wh->minor != ~0 && wh->minor != iminor(inode)) 496 continue; 497 acc_check: 498 if ((mask & MAY_WRITE) && !(wh->access & ACC_WRITE)) 499 continue; 500 if ((mask & MAY_READ) && !(wh->access & ACC_READ)) 501 continue; 502 rcu_read_unlock(); 503 return 0; 504 } 505 506 rcu_read_unlock(); 507 508 return -EPERM; 509 } 510 511 int devcgroup_inode_mknod(int mode, dev_t dev) 512 { 513 struct dev_cgroup *dev_cgroup; 514 struct dev_whitelist_item *wh; 515 516 rcu_read_lock(); 517 518 dev_cgroup = task_devcgroup(current); 519 520 list_for_each_entry(wh, &dev_cgroup->whitelist, list) { 521 if (wh->type & DEV_ALL) 522 goto acc_check; 523 if ((wh->type & DEV_BLOCK) && !S_ISBLK(mode)) 524 continue; 525 if ((wh->type & DEV_CHAR) && !S_ISCHR(mode)) 526 continue; 527 if (wh->major != ~0 && wh->major != MAJOR(dev)) 528 continue; 529 if (wh->minor != ~0 && wh->minor != MINOR(dev)) 530 continue; 531 acc_check: 532 if (!(wh->access & ACC_MKNOD)) 533 continue; 534 rcu_read_unlock(); 535 return 0; 536 } 537 538 rcu_read_unlock(); 539 540 return -EPERM; 541 } 542