1 /* 2 * fs/kernfs/file.c - kernfs file implementation 3 * 4 * Copyright (c) 2001-3 Patrick Mochel 5 * Copyright (c) 2007 SUSE Linux Products GmbH 6 * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org> 7 * 8 * This file is released under the GPLv2. 9 */ 10 11 #include <linux/fs.h> 12 #include <linux/seq_file.h> 13 #include <linux/slab.h> 14 #include <linux/poll.h> 15 #include <linux/pagemap.h> 16 #include <linux/sched.h> 17 #include <linux/fsnotify.h> 18 19 #include "kernfs-internal.h" 20 21 /* 22 * There's one kernfs_open_file for each open file and one kernfs_open_node 23 * for each kernfs_node with one or more open files. 24 * 25 * kernfs_node->attr.open points to kernfs_open_node. attr.open is 26 * protected by kernfs_open_node_lock. 27 * 28 * filp->private_data points to seq_file whose ->private points to 29 * kernfs_open_file. kernfs_open_files are chained at 30 * kernfs_open_node->files, which is protected by kernfs_open_file_mutex. 31 */ 32 static DEFINE_SPINLOCK(kernfs_open_node_lock); 33 static DEFINE_MUTEX(kernfs_open_file_mutex); 34 35 struct kernfs_open_node { 36 atomic_t refcnt; 37 atomic_t event; 38 wait_queue_head_t poll; 39 struct list_head files; /* goes through kernfs_open_file.list */ 40 }; 41 42 /* 43 * kernfs_notify() may be called from any context and bounces notifications 44 * through a work item. To minimize space overhead in kernfs_node, the 45 * pending queue is implemented as a singly linked list of kernfs_nodes. 46 * The list is terminated with the self pointer so that whether a 47 * kernfs_node is on the list or not can be determined by testing the next 48 * pointer for NULL. 49 */ 50 #define KERNFS_NOTIFY_EOL ((void *)&kernfs_notify_list) 51 52 static DEFINE_SPINLOCK(kernfs_notify_lock); 53 static struct kernfs_node *kernfs_notify_list = KERNFS_NOTIFY_EOL; 54 55 static struct kernfs_open_file *kernfs_of(struct file *file) 56 { 57 return ((struct seq_file *)file->private_data)->private; 58 } 59 60 /* 61 * Determine the kernfs_ops for the given kernfs_node. This function must 62 * be called while holding an active reference. 63 */ 64 static const struct kernfs_ops *kernfs_ops(struct kernfs_node *kn) 65 { 66 if (kn->flags & KERNFS_LOCKDEP) 67 lockdep_assert_held(kn); 68 return kn->attr.ops; 69 } 70 71 /* 72 * As kernfs_seq_stop() is also called after kernfs_seq_start() or 73 * kernfs_seq_next() failure, it needs to distinguish whether it's stopping 74 * a seq_file iteration which is fully initialized with an active reference 75 * or an aborted kernfs_seq_start() due to get_active failure. The 76 * position pointer is the only context for each seq_file iteration and 77 * thus the stop condition should be encoded in it. As the return value is 78 * directly visible to userland, ERR_PTR(-ENODEV) is the only acceptable 79 * choice to indicate get_active failure. 80 * 81 * Unfortunately, this is complicated due to the optional custom seq_file 82 * operations which may return ERR_PTR(-ENODEV) too. kernfs_seq_stop() 83 * can't distinguish whether ERR_PTR(-ENODEV) is from get_active failure or 84 * custom seq_file operations and thus can't decide whether put_active 85 * should be performed or not only on ERR_PTR(-ENODEV). 86 * 87 * This is worked around by factoring out the custom seq_stop() and 88 * put_active part into kernfs_seq_stop_active(), skipping it from 89 * kernfs_seq_stop() if ERR_PTR(-ENODEV) while invoking it directly after 90 * custom seq_file operations fail with ERR_PTR(-ENODEV) - this ensures 91 * that kernfs_seq_stop_active() is skipped only after get_active failure. 92 */ 93 static void kernfs_seq_stop_active(struct seq_file *sf, void *v) 94 { 95 struct kernfs_open_file *of = sf->private; 96 const struct kernfs_ops *ops = kernfs_ops(of->kn); 97 98 if (ops->seq_stop) 99 ops->seq_stop(sf, v); 100 kernfs_put_active(of->kn); 101 } 102 103 static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos) 104 { 105 struct kernfs_open_file *of = sf->private; 106 const struct kernfs_ops *ops; 107 108 /* 109 * @of->mutex nests outside active ref and is primarily to ensure that 110 * the ops aren't called concurrently for the same open file. 111 */ 112 mutex_lock(&of->mutex); 113 if (!kernfs_get_active(of->kn)) 114 return ERR_PTR(-ENODEV); 115 116 ops = kernfs_ops(of->kn); 117 if (ops->seq_start) { 118 void *next = ops->seq_start(sf, ppos); 119 /* see the comment above kernfs_seq_stop_active() */ 120 if (next == ERR_PTR(-ENODEV)) 121 kernfs_seq_stop_active(sf, next); 122 return next; 123 } else { 124 /* 125 * The same behavior and code as single_open(). Returns 126 * !NULL if pos is at the beginning; otherwise, NULL. 127 */ 128 return NULL + !*ppos; 129 } 130 } 131 132 static void *kernfs_seq_next(struct seq_file *sf, void *v, loff_t *ppos) 133 { 134 struct kernfs_open_file *of = sf->private; 135 const struct kernfs_ops *ops = kernfs_ops(of->kn); 136 137 if (ops->seq_next) { 138 void *next = ops->seq_next(sf, v, ppos); 139 /* see the comment above kernfs_seq_stop_active() */ 140 if (next == ERR_PTR(-ENODEV)) 141 kernfs_seq_stop_active(sf, next); 142 return next; 143 } else { 144 /* 145 * The same behavior and code as single_open(), always 146 * terminate after the initial read. 147 */ 148 ++*ppos; 149 return NULL; 150 } 151 } 152 153 static void kernfs_seq_stop(struct seq_file *sf, void *v) 154 { 155 struct kernfs_open_file *of = sf->private; 156 157 if (v != ERR_PTR(-ENODEV)) 158 kernfs_seq_stop_active(sf, v); 159 mutex_unlock(&of->mutex); 160 } 161 162 static int kernfs_seq_show(struct seq_file *sf, void *v) 163 { 164 struct kernfs_open_file *of = sf->private; 165 166 of->event = atomic_read(&of->kn->attr.open->event); 167 168 return of->kn->attr.ops->seq_show(sf, v); 169 } 170 171 static const struct seq_operations kernfs_seq_ops = { 172 .start = kernfs_seq_start, 173 .next = kernfs_seq_next, 174 .stop = kernfs_seq_stop, 175 .show = kernfs_seq_show, 176 }; 177 178 /* 179 * As reading a bin file can have side-effects, the exact offset and bytes 180 * specified in read(2) call should be passed to the read callback making 181 * it difficult to use seq_file. Implement simplistic custom buffering for 182 * bin files. 183 */ 184 static ssize_t kernfs_file_direct_read(struct kernfs_open_file *of, 185 char __user *user_buf, size_t count, 186 loff_t *ppos) 187 { 188 ssize_t len = min_t(size_t, count, PAGE_SIZE); 189 const struct kernfs_ops *ops; 190 char *buf; 191 192 buf = of->prealloc_buf; 193 if (!buf) 194 buf = kmalloc(len, GFP_KERNEL); 195 if (!buf) 196 return -ENOMEM; 197 198 /* 199 * @of->mutex nests outside active ref and is used both to ensure that 200 * the ops aren't called concurrently for the same open file, and 201 * to provide exclusive access to ->prealloc_buf (when that exists). 202 */ 203 mutex_lock(&of->mutex); 204 if (!kernfs_get_active(of->kn)) { 205 len = -ENODEV; 206 mutex_unlock(&of->mutex); 207 goto out_free; 208 } 209 210 ops = kernfs_ops(of->kn); 211 if (ops->read) 212 len = ops->read(of, buf, len, *ppos); 213 else 214 len = -EINVAL; 215 216 if (len < 0) 217 goto out_unlock; 218 219 if (copy_to_user(user_buf, buf, len)) { 220 len = -EFAULT; 221 goto out_unlock; 222 } 223 224 *ppos += len; 225 226 out_unlock: 227 kernfs_put_active(of->kn); 228 mutex_unlock(&of->mutex); 229 out_free: 230 if (buf != of->prealloc_buf) 231 kfree(buf); 232 return len; 233 } 234 235 /** 236 * kernfs_fop_read - kernfs vfs read callback 237 * @file: file pointer 238 * @user_buf: data to write 239 * @count: number of bytes 240 * @ppos: starting offset 241 */ 242 static ssize_t kernfs_fop_read(struct file *file, char __user *user_buf, 243 size_t count, loff_t *ppos) 244 { 245 struct kernfs_open_file *of = kernfs_of(file); 246 247 if (of->kn->flags & KERNFS_HAS_SEQ_SHOW) 248 return seq_read(file, user_buf, count, ppos); 249 else 250 return kernfs_file_direct_read(of, user_buf, count, ppos); 251 } 252 253 /** 254 * kernfs_fop_write - kernfs vfs write callback 255 * @file: file pointer 256 * @user_buf: data to write 257 * @count: number of bytes 258 * @ppos: starting offset 259 * 260 * Copy data in from userland and pass it to the matching kernfs write 261 * operation. 262 * 263 * There is no easy way for us to know if userspace is only doing a partial 264 * write, so we don't support them. We expect the entire buffer to come on 265 * the first write. Hint: if you're writing a value, first read the file, 266 * modify only the the value you're changing, then write entire buffer 267 * back. 268 */ 269 static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf, 270 size_t count, loff_t *ppos) 271 { 272 struct kernfs_open_file *of = kernfs_of(file); 273 const struct kernfs_ops *ops; 274 size_t len; 275 char *buf; 276 277 if (of->atomic_write_len) { 278 len = count; 279 if (len > of->atomic_write_len) 280 return -E2BIG; 281 } else { 282 len = min_t(size_t, count, PAGE_SIZE); 283 } 284 285 buf = of->prealloc_buf; 286 if (!buf) 287 buf = kmalloc(len + 1, GFP_KERNEL); 288 if (!buf) 289 return -ENOMEM; 290 291 /* 292 * @of->mutex nests outside active ref and is used both to ensure that 293 * the ops aren't called concurrently for the same open file, and 294 * to provide exclusive access to ->prealloc_buf (when that exists). 295 */ 296 mutex_lock(&of->mutex); 297 if (!kernfs_get_active(of->kn)) { 298 mutex_unlock(&of->mutex); 299 len = -ENODEV; 300 goto out_free; 301 } 302 303 if (copy_from_user(buf, user_buf, len)) { 304 len = -EFAULT; 305 goto out_unlock; 306 } 307 buf[len] = '\0'; /* guarantee string termination */ 308 309 ops = kernfs_ops(of->kn); 310 if (ops->write) 311 len = ops->write(of, buf, len, *ppos); 312 else 313 len = -EINVAL; 314 315 if (len > 0) 316 *ppos += len; 317 318 out_unlock: 319 kernfs_put_active(of->kn); 320 mutex_unlock(&of->mutex); 321 out_free: 322 if (buf != of->prealloc_buf) 323 kfree(buf); 324 return len; 325 } 326 327 static void kernfs_vma_open(struct vm_area_struct *vma) 328 { 329 struct file *file = vma->vm_file; 330 struct kernfs_open_file *of = kernfs_of(file); 331 332 if (!of->vm_ops) 333 return; 334 335 if (!kernfs_get_active(of->kn)) 336 return; 337 338 if (of->vm_ops->open) 339 of->vm_ops->open(vma); 340 341 kernfs_put_active(of->kn); 342 } 343 344 static int kernfs_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 345 { 346 struct file *file = vma->vm_file; 347 struct kernfs_open_file *of = kernfs_of(file); 348 int ret; 349 350 if (!of->vm_ops) 351 return VM_FAULT_SIGBUS; 352 353 if (!kernfs_get_active(of->kn)) 354 return VM_FAULT_SIGBUS; 355 356 ret = VM_FAULT_SIGBUS; 357 if (of->vm_ops->fault) 358 ret = of->vm_ops->fault(vma, vmf); 359 360 kernfs_put_active(of->kn); 361 return ret; 362 } 363 364 static int kernfs_vma_page_mkwrite(struct vm_area_struct *vma, 365 struct vm_fault *vmf) 366 { 367 struct file *file = vma->vm_file; 368 struct kernfs_open_file *of = kernfs_of(file); 369 int ret; 370 371 if (!of->vm_ops) 372 return VM_FAULT_SIGBUS; 373 374 if (!kernfs_get_active(of->kn)) 375 return VM_FAULT_SIGBUS; 376 377 ret = 0; 378 if (of->vm_ops->page_mkwrite) 379 ret = of->vm_ops->page_mkwrite(vma, vmf); 380 else 381 file_update_time(file); 382 383 kernfs_put_active(of->kn); 384 return ret; 385 } 386 387 static int kernfs_vma_access(struct vm_area_struct *vma, unsigned long addr, 388 void *buf, int len, int write) 389 { 390 struct file *file = vma->vm_file; 391 struct kernfs_open_file *of = kernfs_of(file); 392 int ret; 393 394 if (!of->vm_ops) 395 return -EINVAL; 396 397 if (!kernfs_get_active(of->kn)) 398 return -EINVAL; 399 400 ret = -EINVAL; 401 if (of->vm_ops->access) 402 ret = of->vm_ops->access(vma, addr, buf, len, write); 403 404 kernfs_put_active(of->kn); 405 return ret; 406 } 407 408 #ifdef CONFIG_NUMA 409 static int kernfs_vma_set_policy(struct vm_area_struct *vma, 410 struct mempolicy *new) 411 { 412 struct file *file = vma->vm_file; 413 struct kernfs_open_file *of = kernfs_of(file); 414 int ret; 415 416 if (!of->vm_ops) 417 return 0; 418 419 if (!kernfs_get_active(of->kn)) 420 return -EINVAL; 421 422 ret = 0; 423 if (of->vm_ops->set_policy) 424 ret = of->vm_ops->set_policy(vma, new); 425 426 kernfs_put_active(of->kn); 427 return ret; 428 } 429 430 static struct mempolicy *kernfs_vma_get_policy(struct vm_area_struct *vma, 431 unsigned long addr) 432 { 433 struct file *file = vma->vm_file; 434 struct kernfs_open_file *of = kernfs_of(file); 435 struct mempolicy *pol; 436 437 if (!of->vm_ops) 438 return vma->vm_policy; 439 440 if (!kernfs_get_active(of->kn)) 441 return vma->vm_policy; 442 443 pol = vma->vm_policy; 444 if (of->vm_ops->get_policy) 445 pol = of->vm_ops->get_policy(vma, addr); 446 447 kernfs_put_active(of->kn); 448 return pol; 449 } 450 451 static int kernfs_vma_migrate(struct vm_area_struct *vma, 452 const nodemask_t *from, const nodemask_t *to, 453 unsigned long flags) 454 { 455 struct file *file = vma->vm_file; 456 struct kernfs_open_file *of = kernfs_of(file); 457 int ret; 458 459 if (!of->vm_ops) 460 return 0; 461 462 if (!kernfs_get_active(of->kn)) 463 return 0; 464 465 ret = 0; 466 if (of->vm_ops->migrate) 467 ret = of->vm_ops->migrate(vma, from, to, flags); 468 469 kernfs_put_active(of->kn); 470 return ret; 471 } 472 #endif 473 474 static const struct vm_operations_struct kernfs_vm_ops = { 475 .open = kernfs_vma_open, 476 .fault = kernfs_vma_fault, 477 .page_mkwrite = kernfs_vma_page_mkwrite, 478 .access = kernfs_vma_access, 479 #ifdef CONFIG_NUMA 480 .set_policy = kernfs_vma_set_policy, 481 .get_policy = kernfs_vma_get_policy, 482 .migrate = kernfs_vma_migrate, 483 #endif 484 }; 485 486 static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma) 487 { 488 struct kernfs_open_file *of = kernfs_of(file); 489 const struct kernfs_ops *ops; 490 int rc; 491 492 /* 493 * mmap path and of->mutex are prone to triggering spurious lockdep 494 * warnings and we don't want to add spurious locking dependency 495 * between the two. Check whether mmap is actually implemented 496 * without grabbing @of->mutex by testing HAS_MMAP flag. See the 497 * comment in kernfs_file_open() for more details. 498 */ 499 if (!(of->kn->flags & KERNFS_HAS_MMAP)) 500 return -ENODEV; 501 502 mutex_lock(&of->mutex); 503 504 rc = -ENODEV; 505 if (!kernfs_get_active(of->kn)) 506 goto out_unlock; 507 508 ops = kernfs_ops(of->kn); 509 rc = ops->mmap(of, vma); 510 if (rc) 511 goto out_put; 512 513 /* 514 * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup() 515 * to satisfy versions of X which crash if the mmap fails: that 516 * substitutes a new vm_file, and we don't then want bin_vm_ops. 517 */ 518 if (vma->vm_file != file) 519 goto out_put; 520 521 rc = -EINVAL; 522 if (of->mmapped && of->vm_ops != vma->vm_ops) 523 goto out_put; 524 525 /* 526 * It is not possible to successfully wrap close. 527 * So error if someone is trying to use close. 528 */ 529 rc = -EINVAL; 530 if (vma->vm_ops && vma->vm_ops->close) 531 goto out_put; 532 533 rc = 0; 534 of->mmapped = 1; 535 of->vm_ops = vma->vm_ops; 536 vma->vm_ops = &kernfs_vm_ops; 537 out_put: 538 kernfs_put_active(of->kn); 539 out_unlock: 540 mutex_unlock(&of->mutex); 541 542 return rc; 543 } 544 545 /** 546 * kernfs_get_open_node - get or create kernfs_open_node 547 * @kn: target kernfs_node 548 * @of: kernfs_open_file for this instance of open 549 * 550 * If @kn->attr.open exists, increment its reference count; otherwise, 551 * create one. @of is chained to the files list. 552 * 553 * LOCKING: 554 * Kernel thread context (may sleep). 555 * 556 * RETURNS: 557 * 0 on success, -errno on failure. 558 */ 559 static int kernfs_get_open_node(struct kernfs_node *kn, 560 struct kernfs_open_file *of) 561 { 562 struct kernfs_open_node *on, *new_on = NULL; 563 564 retry: 565 mutex_lock(&kernfs_open_file_mutex); 566 spin_lock_irq(&kernfs_open_node_lock); 567 568 if (!kn->attr.open && new_on) { 569 kn->attr.open = new_on; 570 new_on = NULL; 571 } 572 573 on = kn->attr.open; 574 if (on) { 575 atomic_inc(&on->refcnt); 576 list_add_tail(&of->list, &on->files); 577 } 578 579 spin_unlock_irq(&kernfs_open_node_lock); 580 mutex_unlock(&kernfs_open_file_mutex); 581 582 if (on) { 583 kfree(new_on); 584 return 0; 585 } 586 587 /* not there, initialize a new one and retry */ 588 new_on = kmalloc(sizeof(*new_on), GFP_KERNEL); 589 if (!new_on) 590 return -ENOMEM; 591 592 atomic_set(&new_on->refcnt, 0); 593 atomic_set(&new_on->event, 1); 594 init_waitqueue_head(&new_on->poll); 595 INIT_LIST_HEAD(&new_on->files); 596 goto retry; 597 } 598 599 /** 600 * kernfs_put_open_node - put kernfs_open_node 601 * @kn: target kernfs_nodet 602 * @of: associated kernfs_open_file 603 * 604 * Put @kn->attr.open and unlink @of from the files list. If 605 * reference count reaches zero, disassociate and free it. 606 * 607 * LOCKING: 608 * None. 609 */ 610 static void kernfs_put_open_node(struct kernfs_node *kn, 611 struct kernfs_open_file *of) 612 { 613 struct kernfs_open_node *on = kn->attr.open; 614 unsigned long flags; 615 616 mutex_lock(&kernfs_open_file_mutex); 617 spin_lock_irqsave(&kernfs_open_node_lock, flags); 618 619 if (of) 620 list_del(&of->list); 621 622 if (atomic_dec_and_test(&on->refcnt)) 623 kn->attr.open = NULL; 624 else 625 on = NULL; 626 627 spin_unlock_irqrestore(&kernfs_open_node_lock, flags); 628 mutex_unlock(&kernfs_open_file_mutex); 629 630 kfree(on); 631 } 632 633 static int kernfs_fop_open(struct inode *inode, struct file *file) 634 { 635 struct kernfs_node *kn = file->f_path.dentry->d_fsdata; 636 struct kernfs_root *root = kernfs_root(kn); 637 const struct kernfs_ops *ops; 638 struct kernfs_open_file *of; 639 bool has_read, has_write, has_mmap; 640 int error = -EACCES; 641 642 if (!kernfs_get_active(kn)) 643 return -ENODEV; 644 645 ops = kernfs_ops(kn); 646 647 has_read = ops->seq_show || ops->read || ops->mmap; 648 has_write = ops->write || ops->mmap; 649 has_mmap = ops->mmap; 650 651 /* see the flag definition for details */ 652 if (root->flags & KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK) { 653 if ((file->f_mode & FMODE_WRITE) && 654 (!(inode->i_mode & S_IWUGO) || !has_write)) 655 goto err_out; 656 657 if ((file->f_mode & FMODE_READ) && 658 (!(inode->i_mode & S_IRUGO) || !has_read)) 659 goto err_out; 660 } 661 662 /* allocate a kernfs_open_file for the file */ 663 error = -ENOMEM; 664 of = kzalloc(sizeof(struct kernfs_open_file), GFP_KERNEL); 665 if (!of) 666 goto err_out; 667 668 /* 669 * The following is done to give a different lockdep key to 670 * @of->mutex for files which implement mmap. This is a rather 671 * crude way to avoid false positive lockdep warning around 672 * mm->mmap_sem - mmap nests @of->mutex under mm->mmap_sem and 673 * reading /sys/block/sda/trace/act_mask grabs sr_mutex, under 674 * which mm->mmap_sem nests, while holding @of->mutex. As each 675 * open file has a separate mutex, it's okay as long as those don't 676 * happen on the same file. At this point, we can't easily give 677 * each file a separate locking class. Let's differentiate on 678 * whether the file has mmap or not for now. 679 * 680 * Both paths of the branch look the same. They're supposed to 681 * look that way and give @of->mutex different static lockdep keys. 682 */ 683 if (has_mmap) 684 mutex_init(&of->mutex); 685 else 686 mutex_init(&of->mutex); 687 688 of->kn = kn; 689 of->file = file; 690 691 /* 692 * Write path needs to atomic_write_len outside active reference. 693 * Cache it in open_file. See kernfs_fop_write() for details. 694 */ 695 of->atomic_write_len = ops->atomic_write_len; 696 697 error = -EINVAL; 698 /* 699 * ->seq_show is incompatible with ->prealloc, 700 * as seq_read does its own allocation. 701 * ->read must be used instead. 702 */ 703 if (ops->prealloc && ops->seq_show) 704 goto err_free; 705 if (ops->prealloc) { 706 int len = of->atomic_write_len ?: PAGE_SIZE; 707 of->prealloc_buf = kmalloc(len + 1, GFP_KERNEL); 708 error = -ENOMEM; 709 if (!of->prealloc_buf) 710 goto err_free; 711 } 712 713 /* 714 * Always instantiate seq_file even if read access doesn't use 715 * seq_file or is not requested. This unifies private data access 716 * and readable regular files are the vast majority anyway. 717 */ 718 if (ops->seq_show) 719 error = seq_open(file, &kernfs_seq_ops); 720 else 721 error = seq_open(file, NULL); 722 if (error) 723 goto err_free; 724 725 ((struct seq_file *)file->private_data)->private = of; 726 727 /* seq_file clears PWRITE unconditionally, restore it if WRITE */ 728 if (file->f_mode & FMODE_WRITE) 729 file->f_mode |= FMODE_PWRITE; 730 731 /* make sure we have open node struct */ 732 error = kernfs_get_open_node(kn, of); 733 if (error) 734 goto err_close; 735 736 /* open succeeded, put active references */ 737 kernfs_put_active(kn); 738 return 0; 739 740 err_close: 741 seq_release(inode, file); 742 err_free: 743 kfree(of->prealloc_buf); 744 kfree(of); 745 err_out: 746 kernfs_put_active(kn); 747 return error; 748 } 749 750 static int kernfs_fop_release(struct inode *inode, struct file *filp) 751 { 752 struct kernfs_node *kn = filp->f_path.dentry->d_fsdata; 753 struct kernfs_open_file *of = kernfs_of(filp); 754 755 kernfs_put_open_node(kn, of); 756 seq_release(inode, filp); 757 kfree(of->prealloc_buf); 758 kfree(of); 759 760 return 0; 761 } 762 763 void kernfs_unmap_bin_file(struct kernfs_node *kn) 764 { 765 struct kernfs_open_node *on; 766 struct kernfs_open_file *of; 767 768 if (!(kn->flags & KERNFS_HAS_MMAP)) 769 return; 770 771 spin_lock_irq(&kernfs_open_node_lock); 772 on = kn->attr.open; 773 if (on) 774 atomic_inc(&on->refcnt); 775 spin_unlock_irq(&kernfs_open_node_lock); 776 if (!on) 777 return; 778 779 mutex_lock(&kernfs_open_file_mutex); 780 list_for_each_entry(of, &on->files, list) { 781 struct inode *inode = file_inode(of->file); 782 unmap_mapping_range(inode->i_mapping, 0, 0, 1); 783 } 784 mutex_unlock(&kernfs_open_file_mutex); 785 786 kernfs_put_open_node(kn, NULL); 787 } 788 789 /* 790 * Kernfs attribute files are pollable. The idea is that you read 791 * the content and then you use 'poll' or 'select' to wait for 792 * the content to change. When the content changes (assuming the 793 * manager for the kobject supports notification), poll will 794 * return POLLERR|POLLPRI, and select will return the fd whether 795 * it is waiting for read, write, or exceptions. 796 * Once poll/select indicates that the value has changed, you 797 * need to close and re-open the file, or seek to 0 and read again. 798 * Reminder: this only works for attributes which actively support 799 * it, and it is not possible to test an attribute from userspace 800 * to see if it supports poll (Neither 'poll' nor 'select' return 801 * an appropriate error code). When in doubt, set a suitable timeout value. 802 */ 803 static unsigned int kernfs_fop_poll(struct file *filp, poll_table *wait) 804 { 805 struct kernfs_open_file *of = kernfs_of(filp); 806 struct kernfs_node *kn = filp->f_path.dentry->d_fsdata; 807 struct kernfs_open_node *on = kn->attr.open; 808 809 /* need parent for the kobj, grab both */ 810 if (!kernfs_get_active(kn)) 811 goto trigger; 812 813 poll_wait(filp, &on->poll, wait); 814 815 kernfs_put_active(kn); 816 817 if (of->event != atomic_read(&on->event)) 818 goto trigger; 819 820 return DEFAULT_POLLMASK; 821 822 trigger: 823 return DEFAULT_POLLMASK|POLLERR|POLLPRI; 824 } 825 826 static void kernfs_notify_workfn(struct work_struct *work) 827 { 828 struct kernfs_node *kn; 829 struct kernfs_open_node *on; 830 struct kernfs_super_info *info; 831 repeat: 832 /* pop one off the notify_list */ 833 spin_lock_irq(&kernfs_notify_lock); 834 kn = kernfs_notify_list; 835 if (kn == KERNFS_NOTIFY_EOL) { 836 spin_unlock_irq(&kernfs_notify_lock); 837 return; 838 } 839 kernfs_notify_list = kn->attr.notify_next; 840 kn->attr.notify_next = NULL; 841 spin_unlock_irq(&kernfs_notify_lock); 842 843 /* kick poll */ 844 spin_lock_irq(&kernfs_open_node_lock); 845 846 on = kn->attr.open; 847 if (on) { 848 atomic_inc(&on->event); 849 wake_up_interruptible(&on->poll); 850 } 851 852 spin_unlock_irq(&kernfs_open_node_lock); 853 854 /* kick fsnotify */ 855 mutex_lock(&kernfs_mutex); 856 857 list_for_each_entry(info, &kernfs_root(kn)->supers, node) { 858 struct inode *inode; 859 struct dentry *dentry; 860 861 inode = ilookup(info->sb, kn->ino); 862 if (!inode) 863 continue; 864 865 dentry = d_find_any_alias(inode); 866 if (dentry) { 867 fsnotify_parent(NULL, dentry, FS_MODIFY); 868 fsnotify(inode, FS_MODIFY, inode, FSNOTIFY_EVENT_INODE, 869 NULL, 0); 870 dput(dentry); 871 } 872 873 iput(inode); 874 } 875 876 mutex_unlock(&kernfs_mutex); 877 kernfs_put(kn); 878 goto repeat; 879 } 880 881 /** 882 * kernfs_notify - notify a kernfs file 883 * @kn: file to notify 884 * 885 * Notify @kn such that poll(2) on @kn wakes up. Maybe be called from any 886 * context. 887 */ 888 void kernfs_notify(struct kernfs_node *kn) 889 { 890 static DECLARE_WORK(kernfs_notify_work, kernfs_notify_workfn); 891 unsigned long flags; 892 893 if (WARN_ON(kernfs_type(kn) != KERNFS_FILE)) 894 return; 895 896 spin_lock_irqsave(&kernfs_notify_lock, flags); 897 if (!kn->attr.notify_next) { 898 kernfs_get(kn); 899 kn->attr.notify_next = kernfs_notify_list; 900 kernfs_notify_list = kn; 901 schedule_work(&kernfs_notify_work); 902 } 903 spin_unlock_irqrestore(&kernfs_notify_lock, flags); 904 } 905 EXPORT_SYMBOL_GPL(kernfs_notify); 906 907 const struct file_operations kernfs_file_fops = { 908 .read = kernfs_fop_read, 909 .write = kernfs_fop_write, 910 .llseek = generic_file_llseek, 911 .mmap = kernfs_fop_mmap, 912 .open = kernfs_fop_open, 913 .release = kernfs_fop_release, 914 .poll = kernfs_fop_poll, 915 }; 916 917 /** 918 * __kernfs_create_file - kernfs internal function to create a file 919 * @parent: directory to create the file in 920 * @name: name of the file 921 * @mode: mode of the file 922 * @size: size of the file 923 * @ops: kernfs operations for the file 924 * @priv: private data for the file 925 * @ns: optional namespace tag of the file 926 * @name_is_static: don't copy file name 927 * @key: lockdep key for the file's active_ref, %NULL to disable lockdep 928 * 929 * Returns the created node on success, ERR_PTR() value on error. 930 */ 931 struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, 932 const char *name, 933 umode_t mode, loff_t size, 934 const struct kernfs_ops *ops, 935 void *priv, const void *ns, 936 bool name_is_static, 937 struct lock_class_key *key) 938 { 939 struct kernfs_node *kn; 940 unsigned flags; 941 int rc; 942 943 flags = KERNFS_FILE; 944 if (name_is_static) 945 flags |= KERNFS_STATIC_NAME; 946 947 kn = kernfs_new_node(parent, name, (mode & S_IALLUGO) | S_IFREG, flags); 948 if (!kn) 949 return ERR_PTR(-ENOMEM); 950 951 kn->attr.ops = ops; 952 kn->attr.size = size; 953 kn->ns = ns; 954 kn->priv = priv; 955 956 #ifdef CONFIG_DEBUG_LOCK_ALLOC 957 if (key) { 958 lockdep_init_map(&kn->dep_map, "s_active", key, 0); 959 kn->flags |= KERNFS_LOCKDEP; 960 } 961 #endif 962 963 /* 964 * kn->attr.ops is accesible only while holding active ref. We 965 * need to know whether some ops are implemented outside active 966 * ref. Cache their existence in flags. 967 */ 968 if (ops->seq_show) 969 kn->flags |= KERNFS_HAS_SEQ_SHOW; 970 if (ops->mmap) 971 kn->flags |= KERNFS_HAS_MMAP; 972 973 rc = kernfs_add_one(kn); 974 if (rc) { 975 kernfs_put(kn); 976 return ERR_PTR(rc); 977 } 978 return kn; 979 } 980