1 /* 2 * fs/kernfs/file.c - kernfs file implementation 3 * 4 * Copyright (c) 2001-3 Patrick Mochel 5 * Copyright (c) 2007 SUSE Linux Products GmbH 6 * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org> 7 * 8 * This file is released under the GPLv2. 9 */ 10 11 #include <linux/fs.h> 12 #include <linux/seq_file.h> 13 #include <linux/slab.h> 14 #include <linux/poll.h> 15 #include <linux/pagemap.h> 16 #include <linux/sched/mm.h> 17 #include <linux/fsnotify.h> 18 19 #include "kernfs-internal.h" 20 21 /* 22 * There's one kernfs_open_file for each open file and one kernfs_open_node 23 * for each kernfs_node with one or more open files. 24 * 25 * kernfs_node->attr.open points to kernfs_open_node. attr.open is 26 * protected by kernfs_open_node_lock. 27 * 28 * filp->private_data points to seq_file whose ->private points to 29 * kernfs_open_file. kernfs_open_files are chained at 30 * kernfs_open_node->files, which is protected by kernfs_open_file_mutex. 31 */ 32 static DEFINE_SPINLOCK(kernfs_open_node_lock); 33 static DEFINE_MUTEX(kernfs_open_file_mutex); 34 35 struct kernfs_open_node { 36 atomic_t refcnt; 37 atomic_t event; 38 wait_queue_head_t poll; 39 struct list_head files; /* goes through kernfs_open_file.list */ 40 }; 41 42 /* 43 * kernfs_notify() may be called from any context and bounces notifications 44 * through a work item. To minimize space overhead in kernfs_node, the 45 * pending queue is implemented as a singly linked list of kernfs_nodes. 46 * The list is terminated with the self pointer so that whether a 47 * kernfs_node is on the list or not can be determined by testing the next 48 * pointer for NULL. 49 */ 50 #define KERNFS_NOTIFY_EOL ((void *)&kernfs_notify_list) 51 52 static DEFINE_SPINLOCK(kernfs_notify_lock); 53 static struct kernfs_node *kernfs_notify_list = KERNFS_NOTIFY_EOL; 54 55 static struct kernfs_open_file *kernfs_of(struct file *file) 56 { 57 return ((struct seq_file *)file->private_data)->private; 58 } 59 60 /* 61 * Determine the kernfs_ops for the given kernfs_node. This function must 62 * be called while holding an active reference. 63 */ 64 static const struct kernfs_ops *kernfs_ops(struct kernfs_node *kn) 65 { 66 if (kn->flags & KERNFS_LOCKDEP) 67 lockdep_assert_held(kn); 68 return kn->attr.ops; 69 } 70 71 /* 72 * As kernfs_seq_stop() is also called after kernfs_seq_start() or 73 * kernfs_seq_next() failure, it needs to distinguish whether it's stopping 74 * a seq_file iteration which is fully initialized with an active reference 75 * or an aborted kernfs_seq_start() due to get_active failure. The 76 * position pointer is the only context for each seq_file iteration and 77 * thus the stop condition should be encoded in it. As the return value is 78 * directly visible to userland, ERR_PTR(-ENODEV) is the only acceptable 79 * choice to indicate get_active failure. 80 * 81 * Unfortunately, this is complicated due to the optional custom seq_file 82 * operations which may return ERR_PTR(-ENODEV) too. kernfs_seq_stop() 83 * can't distinguish whether ERR_PTR(-ENODEV) is from get_active failure or 84 * custom seq_file operations and thus can't decide whether put_active 85 * should be performed or not only on ERR_PTR(-ENODEV). 86 * 87 * This is worked around by factoring out the custom seq_stop() and 88 * put_active part into kernfs_seq_stop_active(), skipping it from 89 * kernfs_seq_stop() if ERR_PTR(-ENODEV) while invoking it directly after 90 * custom seq_file operations fail with ERR_PTR(-ENODEV) - this ensures 91 * that kernfs_seq_stop_active() is skipped only after get_active failure. 92 */ 93 static void kernfs_seq_stop_active(struct seq_file *sf, void *v) 94 { 95 struct kernfs_open_file *of = sf->private; 96 const struct kernfs_ops *ops = kernfs_ops(of->kn); 97 98 if (ops->seq_stop) 99 ops->seq_stop(sf, v); 100 kernfs_put_active(of->kn); 101 } 102 103 static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos) 104 { 105 struct kernfs_open_file *of = sf->private; 106 const struct kernfs_ops *ops; 107 108 /* 109 * @of->mutex nests outside active ref and is primarily to ensure that 110 * the ops aren't called concurrently for the same open file. 111 */ 112 mutex_lock(&of->mutex); 113 if (!kernfs_get_active(of->kn)) 114 return ERR_PTR(-ENODEV); 115 116 ops = kernfs_ops(of->kn); 117 if (ops->seq_start) { 118 void *next = ops->seq_start(sf, ppos); 119 /* see the comment above kernfs_seq_stop_active() */ 120 if (next == ERR_PTR(-ENODEV)) 121 kernfs_seq_stop_active(sf, next); 122 return next; 123 } else { 124 /* 125 * The same behavior and code as single_open(). Returns 126 * !NULL if pos is at the beginning; otherwise, NULL. 127 */ 128 return NULL + !*ppos; 129 } 130 } 131 132 static void *kernfs_seq_next(struct seq_file *sf, void *v, loff_t *ppos) 133 { 134 struct kernfs_open_file *of = sf->private; 135 const struct kernfs_ops *ops = kernfs_ops(of->kn); 136 137 if (ops->seq_next) { 138 void *next = ops->seq_next(sf, v, ppos); 139 /* see the comment above kernfs_seq_stop_active() */ 140 if (next == ERR_PTR(-ENODEV)) 141 kernfs_seq_stop_active(sf, next); 142 return next; 143 } else { 144 /* 145 * The same behavior and code as single_open(), always 146 * terminate after the initial read. 147 */ 148 ++*ppos; 149 return NULL; 150 } 151 } 152 153 static void kernfs_seq_stop(struct seq_file *sf, void *v) 154 { 155 struct kernfs_open_file *of = sf->private; 156 157 if (v != ERR_PTR(-ENODEV)) 158 kernfs_seq_stop_active(sf, v); 159 mutex_unlock(&of->mutex); 160 } 161 162 static int kernfs_seq_show(struct seq_file *sf, void *v) 163 { 164 struct kernfs_open_file *of = sf->private; 165 166 of->event = atomic_read(&of->kn->attr.open->event); 167 168 return of->kn->attr.ops->seq_show(sf, v); 169 } 170 171 static const struct seq_operations kernfs_seq_ops = { 172 .start = kernfs_seq_start, 173 .next = kernfs_seq_next, 174 .stop = kernfs_seq_stop, 175 .show = kernfs_seq_show, 176 }; 177 178 /* 179 * As reading a bin file can have side-effects, the exact offset and bytes 180 * specified in read(2) call should be passed to the read callback making 181 * it difficult to use seq_file. Implement simplistic custom buffering for 182 * bin files. 183 */ 184 static ssize_t kernfs_file_direct_read(struct kernfs_open_file *of, 185 char __user *user_buf, size_t count, 186 loff_t *ppos) 187 { 188 ssize_t len = min_t(size_t, count, PAGE_SIZE); 189 const struct kernfs_ops *ops; 190 char *buf; 191 192 buf = of->prealloc_buf; 193 if (buf) 194 mutex_lock(&of->prealloc_mutex); 195 else 196 buf = kmalloc(len, GFP_KERNEL); 197 if (!buf) 198 return -ENOMEM; 199 200 /* 201 * @of->mutex nests outside active ref and is used both to ensure that 202 * the ops aren't called concurrently for the same open file. 203 */ 204 mutex_lock(&of->mutex); 205 if (!kernfs_get_active(of->kn)) { 206 len = -ENODEV; 207 mutex_unlock(&of->mutex); 208 goto out_free; 209 } 210 211 of->event = atomic_read(&of->kn->attr.open->event); 212 ops = kernfs_ops(of->kn); 213 if (ops->read) 214 len = ops->read(of, buf, len, *ppos); 215 else 216 len = -EINVAL; 217 218 kernfs_put_active(of->kn); 219 mutex_unlock(&of->mutex); 220 221 if (len < 0) 222 goto out_free; 223 224 if (copy_to_user(user_buf, buf, len)) { 225 len = -EFAULT; 226 goto out_free; 227 } 228 229 *ppos += len; 230 231 out_free: 232 if (buf == of->prealloc_buf) 233 mutex_unlock(&of->prealloc_mutex); 234 else 235 kfree(buf); 236 return len; 237 } 238 239 /** 240 * kernfs_fop_read - kernfs vfs read callback 241 * @file: file pointer 242 * @user_buf: data to write 243 * @count: number of bytes 244 * @ppos: starting offset 245 */ 246 static ssize_t kernfs_fop_read(struct file *file, char __user *user_buf, 247 size_t count, loff_t *ppos) 248 { 249 struct kernfs_open_file *of = kernfs_of(file); 250 251 if (of->kn->flags & KERNFS_HAS_SEQ_SHOW) 252 return seq_read(file, user_buf, count, ppos); 253 else 254 return kernfs_file_direct_read(of, user_buf, count, ppos); 255 } 256 257 /** 258 * kernfs_fop_write - kernfs vfs write callback 259 * @file: file pointer 260 * @user_buf: data to write 261 * @count: number of bytes 262 * @ppos: starting offset 263 * 264 * Copy data in from userland and pass it to the matching kernfs write 265 * operation. 266 * 267 * There is no easy way for us to know if userspace is only doing a partial 268 * write, so we don't support them. We expect the entire buffer to come on 269 * the first write. Hint: if you're writing a value, first read the file, 270 * modify only the the value you're changing, then write entire buffer 271 * back. 272 */ 273 static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf, 274 size_t count, loff_t *ppos) 275 { 276 struct kernfs_open_file *of = kernfs_of(file); 277 const struct kernfs_ops *ops; 278 ssize_t len; 279 char *buf; 280 281 if (of->atomic_write_len) { 282 len = count; 283 if (len > of->atomic_write_len) 284 return -E2BIG; 285 } else { 286 len = min_t(size_t, count, PAGE_SIZE); 287 } 288 289 buf = of->prealloc_buf; 290 if (buf) 291 mutex_lock(&of->prealloc_mutex); 292 else 293 buf = kmalloc(len + 1, GFP_KERNEL); 294 if (!buf) 295 return -ENOMEM; 296 297 if (copy_from_user(buf, user_buf, len)) { 298 len = -EFAULT; 299 goto out_free; 300 } 301 buf[len] = '\0'; /* guarantee string termination */ 302 303 /* 304 * @of->mutex nests outside active ref and is used both to ensure that 305 * the ops aren't called concurrently for the same open file. 306 */ 307 mutex_lock(&of->mutex); 308 if (!kernfs_get_active(of->kn)) { 309 mutex_unlock(&of->mutex); 310 len = -ENODEV; 311 goto out_free; 312 } 313 314 ops = kernfs_ops(of->kn); 315 if (ops->write) 316 len = ops->write(of, buf, len, *ppos); 317 else 318 len = -EINVAL; 319 320 kernfs_put_active(of->kn); 321 mutex_unlock(&of->mutex); 322 323 if (len > 0) 324 *ppos += len; 325 326 out_free: 327 if (buf == of->prealloc_buf) 328 mutex_unlock(&of->prealloc_mutex); 329 else 330 kfree(buf); 331 return len; 332 } 333 334 static void kernfs_vma_open(struct vm_area_struct *vma) 335 { 336 struct file *file = vma->vm_file; 337 struct kernfs_open_file *of = kernfs_of(file); 338 339 if (!of->vm_ops) 340 return; 341 342 if (!kernfs_get_active(of->kn)) 343 return; 344 345 if (of->vm_ops->open) 346 of->vm_ops->open(vma); 347 348 kernfs_put_active(of->kn); 349 } 350 351 static vm_fault_t kernfs_vma_fault(struct vm_fault *vmf) 352 { 353 struct file *file = vmf->vma->vm_file; 354 struct kernfs_open_file *of = kernfs_of(file); 355 vm_fault_t ret; 356 357 if (!of->vm_ops) 358 return VM_FAULT_SIGBUS; 359 360 if (!kernfs_get_active(of->kn)) 361 return VM_FAULT_SIGBUS; 362 363 ret = VM_FAULT_SIGBUS; 364 if (of->vm_ops->fault) 365 ret = of->vm_ops->fault(vmf); 366 367 kernfs_put_active(of->kn); 368 return ret; 369 } 370 371 static vm_fault_t kernfs_vma_page_mkwrite(struct vm_fault *vmf) 372 { 373 struct file *file = vmf->vma->vm_file; 374 struct kernfs_open_file *of = kernfs_of(file); 375 vm_fault_t ret; 376 377 if (!of->vm_ops) 378 return VM_FAULT_SIGBUS; 379 380 if (!kernfs_get_active(of->kn)) 381 return VM_FAULT_SIGBUS; 382 383 ret = 0; 384 if (of->vm_ops->page_mkwrite) 385 ret = of->vm_ops->page_mkwrite(vmf); 386 else 387 file_update_time(file); 388 389 kernfs_put_active(of->kn); 390 return ret; 391 } 392 393 static int kernfs_vma_access(struct vm_area_struct *vma, unsigned long addr, 394 void *buf, int len, int write) 395 { 396 struct file *file = vma->vm_file; 397 struct kernfs_open_file *of = kernfs_of(file); 398 int ret; 399 400 if (!of->vm_ops) 401 return -EINVAL; 402 403 if (!kernfs_get_active(of->kn)) 404 return -EINVAL; 405 406 ret = -EINVAL; 407 if (of->vm_ops->access) 408 ret = of->vm_ops->access(vma, addr, buf, len, write); 409 410 kernfs_put_active(of->kn); 411 return ret; 412 } 413 414 #ifdef CONFIG_NUMA 415 static int kernfs_vma_set_policy(struct vm_area_struct *vma, 416 struct mempolicy *new) 417 { 418 struct file *file = vma->vm_file; 419 struct kernfs_open_file *of = kernfs_of(file); 420 int ret; 421 422 if (!of->vm_ops) 423 return 0; 424 425 if (!kernfs_get_active(of->kn)) 426 return -EINVAL; 427 428 ret = 0; 429 if (of->vm_ops->set_policy) 430 ret = of->vm_ops->set_policy(vma, new); 431 432 kernfs_put_active(of->kn); 433 return ret; 434 } 435 436 static struct mempolicy *kernfs_vma_get_policy(struct vm_area_struct *vma, 437 unsigned long addr) 438 { 439 struct file *file = vma->vm_file; 440 struct kernfs_open_file *of = kernfs_of(file); 441 struct mempolicy *pol; 442 443 if (!of->vm_ops) 444 return vma->vm_policy; 445 446 if (!kernfs_get_active(of->kn)) 447 return vma->vm_policy; 448 449 pol = vma->vm_policy; 450 if (of->vm_ops->get_policy) 451 pol = of->vm_ops->get_policy(vma, addr); 452 453 kernfs_put_active(of->kn); 454 return pol; 455 } 456 457 #endif 458 459 static const struct vm_operations_struct kernfs_vm_ops = { 460 .open = kernfs_vma_open, 461 .fault = kernfs_vma_fault, 462 .page_mkwrite = kernfs_vma_page_mkwrite, 463 .access = kernfs_vma_access, 464 #ifdef CONFIG_NUMA 465 .set_policy = kernfs_vma_set_policy, 466 .get_policy = kernfs_vma_get_policy, 467 #endif 468 }; 469 470 static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma) 471 { 472 struct kernfs_open_file *of = kernfs_of(file); 473 const struct kernfs_ops *ops; 474 int rc; 475 476 /* 477 * mmap path and of->mutex are prone to triggering spurious lockdep 478 * warnings and we don't want to add spurious locking dependency 479 * between the two. Check whether mmap is actually implemented 480 * without grabbing @of->mutex by testing HAS_MMAP flag. See the 481 * comment in kernfs_file_open() for more details. 482 */ 483 if (!(of->kn->flags & KERNFS_HAS_MMAP)) 484 return -ENODEV; 485 486 mutex_lock(&of->mutex); 487 488 rc = -ENODEV; 489 if (!kernfs_get_active(of->kn)) 490 goto out_unlock; 491 492 ops = kernfs_ops(of->kn); 493 rc = ops->mmap(of, vma); 494 if (rc) 495 goto out_put; 496 497 /* 498 * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup() 499 * to satisfy versions of X which crash if the mmap fails: that 500 * substitutes a new vm_file, and we don't then want bin_vm_ops. 501 */ 502 if (vma->vm_file != file) 503 goto out_put; 504 505 rc = -EINVAL; 506 if (of->mmapped && of->vm_ops != vma->vm_ops) 507 goto out_put; 508 509 /* 510 * It is not possible to successfully wrap close. 511 * So error if someone is trying to use close. 512 */ 513 rc = -EINVAL; 514 if (vma->vm_ops && vma->vm_ops->close) 515 goto out_put; 516 517 rc = 0; 518 of->mmapped = true; 519 of->vm_ops = vma->vm_ops; 520 vma->vm_ops = &kernfs_vm_ops; 521 out_put: 522 kernfs_put_active(of->kn); 523 out_unlock: 524 mutex_unlock(&of->mutex); 525 526 return rc; 527 } 528 529 /** 530 * kernfs_get_open_node - get or create kernfs_open_node 531 * @kn: target kernfs_node 532 * @of: kernfs_open_file for this instance of open 533 * 534 * If @kn->attr.open exists, increment its reference count; otherwise, 535 * create one. @of is chained to the files list. 536 * 537 * LOCKING: 538 * Kernel thread context (may sleep). 539 * 540 * RETURNS: 541 * 0 on success, -errno on failure. 542 */ 543 static int kernfs_get_open_node(struct kernfs_node *kn, 544 struct kernfs_open_file *of) 545 { 546 struct kernfs_open_node *on, *new_on = NULL; 547 548 retry: 549 mutex_lock(&kernfs_open_file_mutex); 550 spin_lock_irq(&kernfs_open_node_lock); 551 552 if (!kn->attr.open && new_on) { 553 kn->attr.open = new_on; 554 new_on = NULL; 555 } 556 557 on = kn->attr.open; 558 if (on) { 559 atomic_inc(&on->refcnt); 560 list_add_tail(&of->list, &on->files); 561 } 562 563 spin_unlock_irq(&kernfs_open_node_lock); 564 mutex_unlock(&kernfs_open_file_mutex); 565 566 if (on) { 567 kfree(new_on); 568 return 0; 569 } 570 571 /* not there, initialize a new one and retry */ 572 new_on = kmalloc(sizeof(*new_on), GFP_KERNEL); 573 if (!new_on) 574 return -ENOMEM; 575 576 atomic_set(&new_on->refcnt, 0); 577 atomic_set(&new_on->event, 1); 578 init_waitqueue_head(&new_on->poll); 579 INIT_LIST_HEAD(&new_on->files); 580 goto retry; 581 } 582 583 /** 584 * kernfs_put_open_node - put kernfs_open_node 585 * @kn: target kernfs_nodet 586 * @of: associated kernfs_open_file 587 * 588 * Put @kn->attr.open and unlink @of from the files list. If 589 * reference count reaches zero, disassociate and free it. 590 * 591 * LOCKING: 592 * None. 593 */ 594 static void kernfs_put_open_node(struct kernfs_node *kn, 595 struct kernfs_open_file *of) 596 { 597 struct kernfs_open_node *on = kn->attr.open; 598 unsigned long flags; 599 600 mutex_lock(&kernfs_open_file_mutex); 601 spin_lock_irqsave(&kernfs_open_node_lock, flags); 602 603 if (of) 604 list_del(&of->list); 605 606 if (atomic_dec_and_test(&on->refcnt)) 607 kn->attr.open = NULL; 608 else 609 on = NULL; 610 611 spin_unlock_irqrestore(&kernfs_open_node_lock, flags); 612 mutex_unlock(&kernfs_open_file_mutex); 613 614 kfree(on); 615 } 616 617 static int kernfs_fop_open(struct inode *inode, struct file *file) 618 { 619 struct kernfs_node *kn = inode->i_private; 620 struct kernfs_root *root = kernfs_root(kn); 621 const struct kernfs_ops *ops; 622 struct kernfs_open_file *of; 623 bool has_read, has_write, has_mmap; 624 int error = -EACCES; 625 626 if (!kernfs_get_active(kn)) 627 return -ENODEV; 628 629 ops = kernfs_ops(kn); 630 631 has_read = ops->seq_show || ops->read || ops->mmap; 632 has_write = ops->write || ops->mmap; 633 has_mmap = ops->mmap; 634 635 /* see the flag definition for details */ 636 if (root->flags & KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK) { 637 if ((file->f_mode & FMODE_WRITE) && 638 (!(inode->i_mode & S_IWUGO) || !has_write)) 639 goto err_out; 640 641 if ((file->f_mode & FMODE_READ) && 642 (!(inode->i_mode & S_IRUGO) || !has_read)) 643 goto err_out; 644 } 645 646 /* allocate a kernfs_open_file for the file */ 647 error = -ENOMEM; 648 of = kzalloc(sizeof(struct kernfs_open_file), GFP_KERNEL); 649 if (!of) 650 goto err_out; 651 652 /* 653 * The following is done to give a different lockdep key to 654 * @of->mutex for files which implement mmap. This is a rather 655 * crude way to avoid false positive lockdep warning around 656 * mm->mmap_sem - mmap nests @of->mutex under mm->mmap_sem and 657 * reading /sys/block/sda/trace/act_mask grabs sr_mutex, under 658 * which mm->mmap_sem nests, while holding @of->mutex. As each 659 * open file has a separate mutex, it's okay as long as those don't 660 * happen on the same file. At this point, we can't easily give 661 * each file a separate locking class. Let's differentiate on 662 * whether the file has mmap or not for now. 663 * 664 * Both paths of the branch look the same. They're supposed to 665 * look that way and give @of->mutex different static lockdep keys. 666 */ 667 if (has_mmap) 668 mutex_init(&of->mutex); 669 else 670 mutex_init(&of->mutex); 671 672 of->kn = kn; 673 of->file = file; 674 675 /* 676 * Write path needs to atomic_write_len outside active reference. 677 * Cache it in open_file. See kernfs_fop_write() for details. 678 */ 679 of->atomic_write_len = ops->atomic_write_len; 680 681 error = -EINVAL; 682 /* 683 * ->seq_show is incompatible with ->prealloc, 684 * as seq_read does its own allocation. 685 * ->read must be used instead. 686 */ 687 if (ops->prealloc && ops->seq_show) 688 goto err_free; 689 if (ops->prealloc) { 690 int len = of->atomic_write_len ?: PAGE_SIZE; 691 of->prealloc_buf = kmalloc(len + 1, GFP_KERNEL); 692 error = -ENOMEM; 693 if (!of->prealloc_buf) 694 goto err_free; 695 mutex_init(&of->prealloc_mutex); 696 } 697 698 /* 699 * Always instantiate seq_file even if read access doesn't use 700 * seq_file or is not requested. This unifies private data access 701 * and readable regular files are the vast majority anyway. 702 */ 703 if (ops->seq_show) 704 error = seq_open(file, &kernfs_seq_ops); 705 else 706 error = seq_open(file, NULL); 707 if (error) 708 goto err_free; 709 710 of->seq_file = file->private_data; 711 of->seq_file->private = of; 712 713 /* seq_file clears PWRITE unconditionally, restore it if WRITE */ 714 if (file->f_mode & FMODE_WRITE) 715 file->f_mode |= FMODE_PWRITE; 716 717 /* make sure we have open node struct */ 718 error = kernfs_get_open_node(kn, of); 719 if (error) 720 goto err_seq_release; 721 722 if (ops->open) { 723 /* nobody has access to @of yet, skip @of->mutex */ 724 error = ops->open(of); 725 if (error) 726 goto err_put_node; 727 } 728 729 /* open succeeded, put active references */ 730 kernfs_put_active(kn); 731 return 0; 732 733 err_put_node: 734 kernfs_put_open_node(kn, of); 735 err_seq_release: 736 seq_release(inode, file); 737 err_free: 738 kfree(of->prealloc_buf); 739 kfree(of); 740 err_out: 741 kernfs_put_active(kn); 742 return error; 743 } 744 745 /* used from release/drain to ensure that ->release() is called exactly once */ 746 static void kernfs_release_file(struct kernfs_node *kn, 747 struct kernfs_open_file *of) 748 { 749 /* 750 * @of is guaranteed to have no other file operations in flight and 751 * we just want to synchronize release and drain paths. 752 * @kernfs_open_file_mutex is enough. @of->mutex can't be used 753 * here because drain path may be called from places which can 754 * cause circular dependency. 755 */ 756 lockdep_assert_held(&kernfs_open_file_mutex); 757 758 if (!of->released) { 759 /* 760 * A file is never detached without being released and we 761 * need to be able to release files which are deactivated 762 * and being drained. Don't use kernfs_ops(). 763 */ 764 kn->attr.ops->release(of); 765 of->released = true; 766 } 767 } 768 769 static int kernfs_fop_release(struct inode *inode, struct file *filp) 770 { 771 struct kernfs_node *kn = inode->i_private; 772 struct kernfs_open_file *of = kernfs_of(filp); 773 774 if (kn->flags & KERNFS_HAS_RELEASE) { 775 mutex_lock(&kernfs_open_file_mutex); 776 kernfs_release_file(kn, of); 777 mutex_unlock(&kernfs_open_file_mutex); 778 } 779 780 kernfs_put_open_node(kn, of); 781 seq_release(inode, filp); 782 kfree(of->prealloc_buf); 783 kfree(of); 784 785 return 0; 786 } 787 788 void kernfs_drain_open_files(struct kernfs_node *kn) 789 { 790 struct kernfs_open_node *on; 791 struct kernfs_open_file *of; 792 793 if (!(kn->flags & (KERNFS_HAS_MMAP | KERNFS_HAS_RELEASE))) 794 return; 795 796 spin_lock_irq(&kernfs_open_node_lock); 797 on = kn->attr.open; 798 if (on) 799 atomic_inc(&on->refcnt); 800 spin_unlock_irq(&kernfs_open_node_lock); 801 if (!on) 802 return; 803 804 mutex_lock(&kernfs_open_file_mutex); 805 806 list_for_each_entry(of, &on->files, list) { 807 struct inode *inode = file_inode(of->file); 808 809 if (kn->flags & KERNFS_HAS_MMAP) 810 unmap_mapping_range(inode->i_mapping, 0, 0, 1); 811 812 if (kn->flags & KERNFS_HAS_RELEASE) 813 kernfs_release_file(kn, of); 814 } 815 816 mutex_unlock(&kernfs_open_file_mutex); 817 818 kernfs_put_open_node(kn, NULL); 819 } 820 821 /* 822 * Kernfs attribute files are pollable. The idea is that you read 823 * the content and then you use 'poll' or 'select' to wait for 824 * the content to change. When the content changes (assuming the 825 * manager for the kobject supports notification), poll will 826 * return EPOLLERR|EPOLLPRI, and select will return the fd whether 827 * it is waiting for read, write, or exceptions. 828 * Once poll/select indicates that the value has changed, you 829 * need to close and re-open the file, or seek to 0 and read again. 830 * Reminder: this only works for attributes which actively support 831 * it, and it is not possible to test an attribute from userspace 832 * to see if it supports poll (Neither 'poll' nor 'select' return 833 * an appropriate error code). When in doubt, set a suitable timeout value. 834 */ 835 __poll_t kernfs_generic_poll(struct kernfs_open_file *of, poll_table *wait) 836 { 837 struct kernfs_node *kn = kernfs_dentry_node(of->file->f_path.dentry); 838 struct kernfs_open_node *on = kn->attr.open; 839 840 poll_wait(of->file, &on->poll, wait); 841 842 if (of->event != atomic_read(&on->event)) 843 return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI; 844 845 return DEFAULT_POLLMASK; 846 } 847 848 static __poll_t kernfs_fop_poll(struct file *filp, poll_table *wait) 849 { 850 struct kernfs_open_file *of = kernfs_of(filp); 851 struct kernfs_node *kn = kernfs_dentry_node(filp->f_path.dentry); 852 __poll_t ret; 853 854 if (!kernfs_get_active(kn)) 855 return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI; 856 857 if (kn->attr.ops->poll) 858 ret = kn->attr.ops->poll(of, wait); 859 else 860 ret = kernfs_generic_poll(of, wait); 861 862 kernfs_put_active(kn); 863 return ret; 864 } 865 866 static void kernfs_notify_workfn(struct work_struct *work) 867 { 868 struct kernfs_node *kn; 869 struct kernfs_super_info *info; 870 repeat: 871 /* pop one off the notify_list */ 872 spin_lock_irq(&kernfs_notify_lock); 873 kn = kernfs_notify_list; 874 if (kn == KERNFS_NOTIFY_EOL) { 875 spin_unlock_irq(&kernfs_notify_lock); 876 return; 877 } 878 kernfs_notify_list = kn->attr.notify_next; 879 kn->attr.notify_next = NULL; 880 spin_unlock_irq(&kernfs_notify_lock); 881 882 /* kick fsnotify */ 883 mutex_lock(&kernfs_mutex); 884 885 list_for_each_entry(info, &kernfs_root(kn)->supers, node) { 886 struct kernfs_node *parent; 887 struct inode *inode; 888 889 /* 890 * We want fsnotify_modify() on @kn but as the 891 * modifications aren't originating from userland don't 892 * have the matching @file available. Look up the inodes 893 * and generate the events manually. 894 */ 895 inode = ilookup(info->sb, kn->id.ino); 896 if (!inode) 897 continue; 898 899 parent = kernfs_get_parent(kn); 900 if (parent) { 901 struct inode *p_inode; 902 903 p_inode = ilookup(info->sb, parent->id.ino); 904 if (p_inode) { 905 fsnotify(p_inode, FS_MODIFY | FS_EVENT_ON_CHILD, 906 inode, FSNOTIFY_EVENT_INODE, kn->name, 0); 907 iput(p_inode); 908 } 909 910 kernfs_put(parent); 911 } 912 913 fsnotify(inode, FS_MODIFY, inode, FSNOTIFY_EVENT_INODE, 914 kn->name, 0); 915 iput(inode); 916 } 917 918 mutex_unlock(&kernfs_mutex); 919 kernfs_put(kn); 920 goto repeat; 921 } 922 923 /** 924 * kernfs_notify - notify a kernfs file 925 * @kn: file to notify 926 * 927 * Notify @kn such that poll(2) on @kn wakes up. Maybe be called from any 928 * context. 929 */ 930 void kernfs_notify(struct kernfs_node *kn) 931 { 932 static DECLARE_WORK(kernfs_notify_work, kernfs_notify_workfn); 933 unsigned long flags; 934 struct kernfs_open_node *on; 935 936 if (WARN_ON(kernfs_type(kn) != KERNFS_FILE)) 937 return; 938 939 /* kick poll immediately */ 940 spin_lock_irqsave(&kernfs_open_node_lock, flags); 941 on = kn->attr.open; 942 if (on) { 943 atomic_inc(&on->event); 944 wake_up_interruptible(&on->poll); 945 } 946 spin_unlock_irqrestore(&kernfs_open_node_lock, flags); 947 948 /* schedule work to kick fsnotify */ 949 spin_lock_irqsave(&kernfs_notify_lock, flags); 950 if (!kn->attr.notify_next) { 951 kernfs_get(kn); 952 kn->attr.notify_next = kernfs_notify_list; 953 kernfs_notify_list = kn; 954 schedule_work(&kernfs_notify_work); 955 } 956 spin_unlock_irqrestore(&kernfs_notify_lock, flags); 957 } 958 EXPORT_SYMBOL_GPL(kernfs_notify); 959 960 const struct file_operations kernfs_file_fops = { 961 .read = kernfs_fop_read, 962 .write = kernfs_fop_write, 963 .llseek = generic_file_llseek, 964 .mmap = kernfs_fop_mmap, 965 .open = kernfs_fop_open, 966 .release = kernfs_fop_release, 967 .poll = kernfs_fop_poll, 968 .fsync = noop_fsync, 969 }; 970 971 /** 972 * __kernfs_create_file - kernfs internal function to create a file 973 * @parent: directory to create the file in 974 * @name: name of the file 975 * @mode: mode of the file 976 * @uid: uid of the file 977 * @gid: gid of the file 978 * @size: size of the file 979 * @ops: kernfs operations for the file 980 * @priv: private data for the file 981 * @ns: optional namespace tag of the file 982 * @key: lockdep key for the file's active_ref, %NULL to disable lockdep 983 * 984 * Returns the created node on success, ERR_PTR() value on error. 985 */ 986 struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, 987 const char *name, 988 umode_t mode, kuid_t uid, kgid_t gid, 989 loff_t size, 990 const struct kernfs_ops *ops, 991 void *priv, const void *ns, 992 struct lock_class_key *key) 993 { 994 struct kernfs_node *kn; 995 unsigned flags; 996 int rc; 997 998 flags = KERNFS_FILE; 999 1000 kn = kernfs_new_node(parent, name, (mode & S_IALLUGO) | S_IFREG, 1001 uid, gid, flags); 1002 if (!kn) 1003 return ERR_PTR(-ENOMEM); 1004 1005 kn->attr.ops = ops; 1006 kn->attr.size = size; 1007 kn->ns = ns; 1008 kn->priv = priv; 1009 1010 #ifdef CONFIG_DEBUG_LOCK_ALLOC 1011 if (key) { 1012 lockdep_init_map(&kn->dep_map, "kn->count", key, 0); 1013 kn->flags |= KERNFS_LOCKDEP; 1014 } 1015 #endif 1016 1017 /* 1018 * kn->attr.ops is accesible only while holding active ref. We 1019 * need to know whether some ops are implemented outside active 1020 * ref. Cache their existence in flags. 1021 */ 1022 if (ops->seq_show) 1023 kn->flags |= KERNFS_HAS_SEQ_SHOW; 1024 if (ops->mmap) 1025 kn->flags |= KERNFS_HAS_MMAP; 1026 if (ops->release) 1027 kn->flags |= KERNFS_HAS_RELEASE; 1028 1029 rc = kernfs_add_one(kn); 1030 if (rc) { 1031 kernfs_put(kn); 1032 return ERR_PTR(rc); 1033 } 1034 return kn; 1035 } 1036