1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * fs/kernfs/file.c - kernfs file implementation 4 * 5 * Copyright (c) 2001-3 Patrick Mochel 6 * Copyright (c) 2007 SUSE Linux Products GmbH 7 * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org> 8 */ 9 10 #include <linux/fs.h> 11 #include <linux/seq_file.h> 12 #include <linux/slab.h> 13 #include <linux/poll.h> 14 #include <linux/pagemap.h> 15 #include <linux/sched/mm.h> 16 #include <linux/fsnotify.h> 17 #include <linux/uio.h> 18 19 #include "kernfs-internal.h" 20 21 /* 22 * There's one kernfs_open_file for each open file and one kernfs_open_node 23 * for each kernfs_node with one or more open files. 24 * 25 * kernfs_node->attr.open points to kernfs_open_node. attr.open is 26 * protected by kernfs_open_node_lock. 27 * 28 * filp->private_data points to seq_file whose ->private points to 29 * kernfs_open_file. kernfs_open_files are chained at 30 * kernfs_open_node->files, which is protected by kernfs_open_file_mutex. 31 */ 32 static DEFINE_SPINLOCK(kernfs_open_node_lock); 33 static DEFINE_MUTEX(kernfs_open_file_mutex); 34 35 struct kernfs_open_node { 36 atomic_t event; 37 wait_queue_head_t poll; 38 struct list_head files; /* goes through kernfs_open_file.list */ 39 }; 40 41 /* 42 * kernfs_notify() may be called from any context and bounces notifications 43 * through a work item. To minimize space overhead in kernfs_node, the 44 * pending queue is implemented as a singly linked list of kernfs_nodes. 45 * The list is terminated with the self pointer so that whether a 46 * kernfs_node is on the list or not can be determined by testing the next 47 * pointer for NULL. 48 */ 49 #define KERNFS_NOTIFY_EOL ((void *)&kernfs_notify_list) 50 51 static DEFINE_SPINLOCK(kernfs_notify_lock); 52 static struct kernfs_node *kernfs_notify_list = KERNFS_NOTIFY_EOL; 53 54 static struct kernfs_open_file *kernfs_of(struct file *file) 55 { 56 return ((struct seq_file *)file->private_data)->private; 57 } 58 59 /* 60 * Determine the kernfs_ops for the given kernfs_node. This function must 61 * be called while holding an active reference. 62 */ 63 static const struct kernfs_ops *kernfs_ops(struct kernfs_node *kn) 64 { 65 if (kn->flags & KERNFS_LOCKDEP) 66 lockdep_assert_held(kn); 67 return kn->attr.ops; 68 } 69 70 /* 71 * As kernfs_seq_stop() is also called after kernfs_seq_start() or 72 * kernfs_seq_next() failure, it needs to distinguish whether it's stopping 73 * a seq_file iteration which is fully initialized with an active reference 74 * or an aborted kernfs_seq_start() due to get_active failure. The 75 * position pointer is the only context for each seq_file iteration and 76 * thus the stop condition should be encoded in it. As the return value is 77 * directly visible to userland, ERR_PTR(-ENODEV) is the only acceptable 78 * choice to indicate get_active failure. 79 * 80 * Unfortunately, this is complicated due to the optional custom seq_file 81 * operations which may return ERR_PTR(-ENODEV) too. kernfs_seq_stop() 82 * can't distinguish whether ERR_PTR(-ENODEV) is from get_active failure or 83 * custom seq_file operations and thus can't decide whether put_active 84 * should be performed or not only on ERR_PTR(-ENODEV). 85 * 86 * This is worked around by factoring out the custom seq_stop() and 87 * put_active part into kernfs_seq_stop_active(), skipping it from 88 * kernfs_seq_stop() if ERR_PTR(-ENODEV) while invoking it directly after 89 * custom seq_file operations fail with ERR_PTR(-ENODEV) - this ensures 90 * that kernfs_seq_stop_active() is skipped only after get_active failure. 91 */ 92 static void kernfs_seq_stop_active(struct seq_file *sf, void *v) 93 { 94 struct kernfs_open_file *of = sf->private; 95 const struct kernfs_ops *ops = kernfs_ops(of->kn); 96 97 if (ops->seq_stop) 98 ops->seq_stop(sf, v); 99 kernfs_put_active(of->kn); 100 } 101 102 static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos) 103 { 104 struct kernfs_open_file *of = sf->private; 105 const struct kernfs_ops *ops; 106 107 /* 108 * @of->mutex nests outside active ref and is primarily to ensure that 109 * the ops aren't called concurrently for the same open file. 110 */ 111 mutex_lock(&of->mutex); 112 if (!kernfs_get_active(of->kn)) 113 return ERR_PTR(-ENODEV); 114 115 ops = kernfs_ops(of->kn); 116 if (ops->seq_start) { 117 void *next = ops->seq_start(sf, ppos); 118 /* see the comment above kernfs_seq_stop_active() */ 119 if (next == ERR_PTR(-ENODEV)) 120 kernfs_seq_stop_active(sf, next); 121 return next; 122 } 123 return single_start(sf, ppos); 124 } 125 126 static void *kernfs_seq_next(struct seq_file *sf, void *v, loff_t *ppos) 127 { 128 struct kernfs_open_file *of = sf->private; 129 const struct kernfs_ops *ops = kernfs_ops(of->kn); 130 131 if (ops->seq_next) { 132 void *next = ops->seq_next(sf, v, ppos); 133 /* see the comment above kernfs_seq_stop_active() */ 134 if (next == ERR_PTR(-ENODEV)) 135 kernfs_seq_stop_active(sf, next); 136 return next; 137 } else { 138 /* 139 * The same behavior and code as single_open(), always 140 * terminate after the initial read. 141 */ 142 ++*ppos; 143 return NULL; 144 } 145 } 146 147 static void kernfs_seq_stop(struct seq_file *sf, void *v) 148 { 149 struct kernfs_open_file *of = sf->private; 150 151 if (v != ERR_PTR(-ENODEV)) 152 kernfs_seq_stop_active(sf, v); 153 mutex_unlock(&of->mutex); 154 } 155 156 static int kernfs_seq_show(struct seq_file *sf, void *v) 157 { 158 struct kernfs_open_file *of = sf->private; 159 160 of->event = atomic_read(&of->kn->attr.open->event); 161 162 return of->kn->attr.ops->seq_show(sf, v); 163 } 164 165 static const struct seq_operations kernfs_seq_ops = { 166 .start = kernfs_seq_start, 167 .next = kernfs_seq_next, 168 .stop = kernfs_seq_stop, 169 .show = kernfs_seq_show, 170 }; 171 172 /* 173 * As reading a bin file can have side-effects, the exact offset and bytes 174 * specified in read(2) call should be passed to the read callback making 175 * it difficult to use seq_file. Implement simplistic custom buffering for 176 * bin files. 177 */ 178 static ssize_t kernfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) 179 { 180 struct kernfs_open_file *of = kernfs_of(iocb->ki_filp); 181 ssize_t len = min_t(size_t, iov_iter_count(iter), PAGE_SIZE); 182 const struct kernfs_ops *ops; 183 char *buf; 184 185 buf = of->prealloc_buf; 186 if (buf) 187 mutex_lock(&of->prealloc_mutex); 188 else 189 buf = kmalloc(len, GFP_KERNEL); 190 if (!buf) 191 return -ENOMEM; 192 193 /* 194 * @of->mutex nests outside active ref and is used both to ensure that 195 * the ops aren't called concurrently for the same open file. 196 */ 197 mutex_lock(&of->mutex); 198 if (!kernfs_get_active(of->kn)) { 199 len = -ENODEV; 200 mutex_unlock(&of->mutex); 201 goto out_free; 202 } 203 204 of->event = atomic_read(&of->kn->attr.open->event); 205 ops = kernfs_ops(of->kn); 206 if (ops->read) 207 len = ops->read(of, buf, len, iocb->ki_pos); 208 else 209 len = -EINVAL; 210 211 kernfs_put_active(of->kn); 212 mutex_unlock(&of->mutex); 213 214 if (len < 0) 215 goto out_free; 216 217 if (copy_to_iter(buf, len, iter) != len) { 218 len = -EFAULT; 219 goto out_free; 220 } 221 222 iocb->ki_pos += len; 223 224 out_free: 225 if (buf == of->prealloc_buf) 226 mutex_unlock(&of->prealloc_mutex); 227 else 228 kfree(buf); 229 return len; 230 } 231 232 static ssize_t kernfs_fop_read_iter(struct kiocb *iocb, struct iov_iter *iter) 233 { 234 if (kernfs_of(iocb->ki_filp)->kn->flags & KERNFS_HAS_SEQ_SHOW) 235 return seq_read_iter(iocb, iter); 236 return kernfs_file_read_iter(iocb, iter); 237 } 238 239 /* 240 * Copy data in from userland and pass it to the matching kernfs write 241 * operation. 242 * 243 * There is no easy way for us to know if userspace is only doing a partial 244 * write, so we don't support them. We expect the entire buffer to come on 245 * the first write. Hint: if you're writing a value, first read the file, 246 * modify only the the value you're changing, then write entire buffer 247 * back. 248 */ 249 static ssize_t kernfs_fop_write_iter(struct kiocb *iocb, struct iov_iter *iter) 250 { 251 struct kernfs_open_file *of = kernfs_of(iocb->ki_filp); 252 ssize_t len = iov_iter_count(iter); 253 const struct kernfs_ops *ops; 254 char *buf; 255 256 if (of->atomic_write_len) { 257 if (len > of->atomic_write_len) 258 return -E2BIG; 259 } else { 260 len = min_t(size_t, len, PAGE_SIZE); 261 } 262 263 buf = of->prealloc_buf; 264 if (buf) 265 mutex_lock(&of->prealloc_mutex); 266 else 267 buf = kmalloc(len + 1, GFP_KERNEL); 268 if (!buf) 269 return -ENOMEM; 270 271 if (copy_from_iter(buf, len, iter) != len) { 272 len = -EFAULT; 273 goto out_free; 274 } 275 buf[len] = '\0'; /* guarantee string termination */ 276 277 /* 278 * @of->mutex nests outside active ref and is used both to ensure that 279 * the ops aren't called concurrently for the same open file. 280 */ 281 mutex_lock(&of->mutex); 282 if (!kernfs_get_active(of->kn)) { 283 mutex_unlock(&of->mutex); 284 len = -ENODEV; 285 goto out_free; 286 } 287 288 ops = kernfs_ops(of->kn); 289 if (ops->write) 290 len = ops->write(of, buf, len, iocb->ki_pos); 291 else 292 len = -EINVAL; 293 294 kernfs_put_active(of->kn); 295 mutex_unlock(&of->mutex); 296 297 if (len > 0) 298 iocb->ki_pos += len; 299 300 out_free: 301 if (buf == of->prealloc_buf) 302 mutex_unlock(&of->prealloc_mutex); 303 else 304 kfree(buf); 305 return len; 306 } 307 308 static void kernfs_vma_open(struct vm_area_struct *vma) 309 { 310 struct file *file = vma->vm_file; 311 struct kernfs_open_file *of = kernfs_of(file); 312 313 if (!of->vm_ops) 314 return; 315 316 if (!kernfs_get_active(of->kn)) 317 return; 318 319 if (of->vm_ops->open) 320 of->vm_ops->open(vma); 321 322 kernfs_put_active(of->kn); 323 } 324 325 static vm_fault_t kernfs_vma_fault(struct vm_fault *vmf) 326 { 327 struct file *file = vmf->vma->vm_file; 328 struct kernfs_open_file *of = kernfs_of(file); 329 vm_fault_t ret; 330 331 if (!of->vm_ops) 332 return VM_FAULT_SIGBUS; 333 334 if (!kernfs_get_active(of->kn)) 335 return VM_FAULT_SIGBUS; 336 337 ret = VM_FAULT_SIGBUS; 338 if (of->vm_ops->fault) 339 ret = of->vm_ops->fault(vmf); 340 341 kernfs_put_active(of->kn); 342 return ret; 343 } 344 345 static vm_fault_t kernfs_vma_page_mkwrite(struct vm_fault *vmf) 346 { 347 struct file *file = vmf->vma->vm_file; 348 struct kernfs_open_file *of = kernfs_of(file); 349 vm_fault_t ret; 350 351 if (!of->vm_ops) 352 return VM_FAULT_SIGBUS; 353 354 if (!kernfs_get_active(of->kn)) 355 return VM_FAULT_SIGBUS; 356 357 ret = 0; 358 if (of->vm_ops->page_mkwrite) 359 ret = of->vm_ops->page_mkwrite(vmf); 360 else 361 file_update_time(file); 362 363 kernfs_put_active(of->kn); 364 return ret; 365 } 366 367 static int kernfs_vma_access(struct vm_area_struct *vma, unsigned long addr, 368 void *buf, int len, int write) 369 { 370 struct file *file = vma->vm_file; 371 struct kernfs_open_file *of = kernfs_of(file); 372 int ret; 373 374 if (!of->vm_ops) 375 return -EINVAL; 376 377 if (!kernfs_get_active(of->kn)) 378 return -EINVAL; 379 380 ret = -EINVAL; 381 if (of->vm_ops->access) 382 ret = of->vm_ops->access(vma, addr, buf, len, write); 383 384 kernfs_put_active(of->kn); 385 return ret; 386 } 387 388 #ifdef CONFIG_NUMA 389 static int kernfs_vma_set_policy(struct vm_area_struct *vma, 390 struct mempolicy *new) 391 { 392 struct file *file = vma->vm_file; 393 struct kernfs_open_file *of = kernfs_of(file); 394 int ret; 395 396 if (!of->vm_ops) 397 return 0; 398 399 if (!kernfs_get_active(of->kn)) 400 return -EINVAL; 401 402 ret = 0; 403 if (of->vm_ops->set_policy) 404 ret = of->vm_ops->set_policy(vma, new); 405 406 kernfs_put_active(of->kn); 407 return ret; 408 } 409 410 static struct mempolicy *kernfs_vma_get_policy(struct vm_area_struct *vma, 411 unsigned long addr) 412 { 413 struct file *file = vma->vm_file; 414 struct kernfs_open_file *of = kernfs_of(file); 415 struct mempolicy *pol; 416 417 if (!of->vm_ops) 418 return vma->vm_policy; 419 420 if (!kernfs_get_active(of->kn)) 421 return vma->vm_policy; 422 423 pol = vma->vm_policy; 424 if (of->vm_ops->get_policy) 425 pol = of->vm_ops->get_policy(vma, addr); 426 427 kernfs_put_active(of->kn); 428 return pol; 429 } 430 431 #endif 432 433 static const struct vm_operations_struct kernfs_vm_ops = { 434 .open = kernfs_vma_open, 435 .fault = kernfs_vma_fault, 436 .page_mkwrite = kernfs_vma_page_mkwrite, 437 .access = kernfs_vma_access, 438 #ifdef CONFIG_NUMA 439 .set_policy = kernfs_vma_set_policy, 440 .get_policy = kernfs_vma_get_policy, 441 #endif 442 }; 443 444 static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma) 445 { 446 struct kernfs_open_file *of = kernfs_of(file); 447 const struct kernfs_ops *ops; 448 int rc; 449 450 /* 451 * mmap path and of->mutex are prone to triggering spurious lockdep 452 * warnings and we don't want to add spurious locking dependency 453 * between the two. Check whether mmap is actually implemented 454 * without grabbing @of->mutex by testing HAS_MMAP flag. See the 455 * comment in kernfs_file_open() for more details. 456 */ 457 if (!(of->kn->flags & KERNFS_HAS_MMAP)) 458 return -ENODEV; 459 460 mutex_lock(&of->mutex); 461 462 rc = -ENODEV; 463 if (!kernfs_get_active(of->kn)) 464 goto out_unlock; 465 466 ops = kernfs_ops(of->kn); 467 rc = ops->mmap(of, vma); 468 if (rc) 469 goto out_put; 470 471 /* 472 * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup() 473 * to satisfy versions of X which crash if the mmap fails: that 474 * substitutes a new vm_file, and we don't then want bin_vm_ops. 475 */ 476 if (vma->vm_file != file) 477 goto out_put; 478 479 rc = -EINVAL; 480 if (of->mmapped && of->vm_ops != vma->vm_ops) 481 goto out_put; 482 483 /* 484 * It is not possible to successfully wrap close. 485 * So error if someone is trying to use close. 486 */ 487 rc = -EINVAL; 488 if (vma->vm_ops && vma->vm_ops->close) 489 goto out_put; 490 491 rc = 0; 492 of->mmapped = true; 493 of->vm_ops = vma->vm_ops; 494 vma->vm_ops = &kernfs_vm_ops; 495 out_put: 496 kernfs_put_active(of->kn); 497 out_unlock: 498 mutex_unlock(&of->mutex); 499 500 return rc; 501 } 502 503 /** 504 * kernfs_get_open_node - get or create kernfs_open_node 505 * @kn: target kernfs_node 506 * @of: kernfs_open_file for this instance of open 507 * 508 * If @kn->attr.open exists, increment its reference count; otherwise, 509 * create one. @of is chained to the files list. 510 * 511 * LOCKING: 512 * Kernel thread context (may sleep). 513 * 514 * RETURNS: 515 * 0 on success, -errno on failure. 516 */ 517 static int kernfs_get_open_node(struct kernfs_node *kn, 518 struct kernfs_open_file *of) 519 { 520 struct kernfs_open_node *on, *new_on = NULL; 521 522 retry: 523 mutex_lock(&kernfs_open_file_mutex); 524 spin_lock_irq(&kernfs_open_node_lock); 525 526 if (!kn->attr.open && new_on) { 527 kn->attr.open = new_on; 528 new_on = NULL; 529 } 530 531 on = kn->attr.open; 532 if (on) 533 list_add_tail(&of->list, &on->files); 534 535 spin_unlock_irq(&kernfs_open_node_lock); 536 mutex_unlock(&kernfs_open_file_mutex); 537 538 if (on) { 539 kfree(new_on); 540 return 0; 541 } 542 543 /* not there, initialize a new one and retry */ 544 new_on = kmalloc(sizeof(*new_on), GFP_KERNEL); 545 if (!new_on) 546 return -ENOMEM; 547 548 atomic_set(&new_on->event, 1); 549 init_waitqueue_head(&new_on->poll); 550 INIT_LIST_HEAD(&new_on->files); 551 goto retry; 552 } 553 554 /** 555 * kernfs_unlink_open_file - Unlink @of from @kn. 556 * 557 * @kn: target kernfs_node 558 * @of: associated kernfs_open_file 559 * 560 * Unlink @of from list of @kn's associated open files. If list of 561 * associated open files becomes empty, disassociate and free 562 * kernfs_open_node. 563 * 564 * LOCKING: 565 * None. 566 */ 567 static void kernfs_unlink_open_file(struct kernfs_node *kn, 568 struct kernfs_open_file *of) 569 { 570 struct kernfs_open_node *on = kn->attr.open; 571 unsigned long flags; 572 573 mutex_lock(&kernfs_open_file_mutex); 574 spin_lock_irqsave(&kernfs_open_node_lock, flags); 575 576 if (of) 577 list_del(&of->list); 578 579 if (list_empty(&on->files)) 580 kn->attr.open = NULL; 581 else 582 on = NULL; 583 584 spin_unlock_irqrestore(&kernfs_open_node_lock, flags); 585 mutex_unlock(&kernfs_open_file_mutex); 586 587 kfree(on); 588 } 589 590 static int kernfs_fop_open(struct inode *inode, struct file *file) 591 { 592 struct kernfs_node *kn = inode->i_private; 593 struct kernfs_root *root = kernfs_root(kn); 594 const struct kernfs_ops *ops; 595 struct kernfs_open_file *of; 596 bool has_read, has_write, has_mmap; 597 int error = -EACCES; 598 599 if (!kernfs_get_active(kn)) 600 return -ENODEV; 601 602 ops = kernfs_ops(kn); 603 604 has_read = ops->seq_show || ops->read || ops->mmap; 605 has_write = ops->write || ops->mmap; 606 has_mmap = ops->mmap; 607 608 /* see the flag definition for details */ 609 if (root->flags & KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK) { 610 if ((file->f_mode & FMODE_WRITE) && 611 (!(inode->i_mode & S_IWUGO) || !has_write)) 612 goto err_out; 613 614 if ((file->f_mode & FMODE_READ) && 615 (!(inode->i_mode & S_IRUGO) || !has_read)) 616 goto err_out; 617 } 618 619 /* allocate a kernfs_open_file for the file */ 620 error = -ENOMEM; 621 of = kzalloc(sizeof(struct kernfs_open_file), GFP_KERNEL); 622 if (!of) 623 goto err_out; 624 625 /* 626 * The following is done to give a different lockdep key to 627 * @of->mutex for files which implement mmap. This is a rather 628 * crude way to avoid false positive lockdep warning around 629 * mm->mmap_lock - mmap nests @of->mutex under mm->mmap_lock and 630 * reading /sys/block/sda/trace/act_mask grabs sr_mutex, under 631 * which mm->mmap_lock nests, while holding @of->mutex. As each 632 * open file has a separate mutex, it's okay as long as those don't 633 * happen on the same file. At this point, we can't easily give 634 * each file a separate locking class. Let's differentiate on 635 * whether the file has mmap or not for now. 636 * 637 * Both paths of the branch look the same. They're supposed to 638 * look that way and give @of->mutex different static lockdep keys. 639 */ 640 if (has_mmap) 641 mutex_init(&of->mutex); 642 else 643 mutex_init(&of->mutex); 644 645 of->kn = kn; 646 of->file = file; 647 648 /* 649 * Write path needs to atomic_write_len outside active reference. 650 * Cache it in open_file. See kernfs_fop_write_iter() for details. 651 */ 652 of->atomic_write_len = ops->atomic_write_len; 653 654 error = -EINVAL; 655 /* 656 * ->seq_show is incompatible with ->prealloc, 657 * as seq_read does its own allocation. 658 * ->read must be used instead. 659 */ 660 if (ops->prealloc && ops->seq_show) 661 goto err_free; 662 if (ops->prealloc) { 663 int len = of->atomic_write_len ?: PAGE_SIZE; 664 of->prealloc_buf = kmalloc(len + 1, GFP_KERNEL); 665 error = -ENOMEM; 666 if (!of->prealloc_buf) 667 goto err_free; 668 mutex_init(&of->prealloc_mutex); 669 } 670 671 /* 672 * Always instantiate seq_file even if read access doesn't use 673 * seq_file or is not requested. This unifies private data access 674 * and readable regular files are the vast majority anyway. 675 */ 676 if (ops->seq_show) 677 error = seq_open(file, &kernfs_seq_ops); 678 else 679 error = seq_open(file, NULL); 680 if (error) 681 goto err_free; 682 683 of->seq_file = file->private_data; 684 of->seq_file->private = of; 685 686 /* seq_file clears PWRITE unconditionally, restore it if WRITE */ 687 if (file->f_mode & FMODE_WRITE) 688 file->f_mode |= FMODE_PWRITE; 689 690 /* make sure we have open node struct */ 691 error = kernfs_get_open_node(kn, of); 692 if (error) 693 goto err_seq_release; 694 695 if (ops->open) { 696 /* nobody has access to @of yet, skip @of->mutex */ 697 error = ops->open(of); 698 if (error) 699 goto err_put_node; 700 } 701 702 /* open succeeded, put active references */ 703 kernfs_put_active(kn); 704 return 0; 705 706 err_put_node: 707 kernfs_unlink_open_file(kn, of); 708 err_seq_release: 709 seq_release(inode, file); 710 err_free: 711 kfree(of->prealloc_buf); 712 kfree(of); 713 err_out: 714 kernfs_put_active(kn); 715 return error; 716 } 717 718 /* used from release/drain to ensure that ->release() is called exactly once */ 719 static void kernfs_release_file(struct kernfs_node *kn, 720 struct kernfs_open_file *of) 721 { 722 /* 723 * @of is guaranteed to have no other file operations in flight and 724 * we just want to synchronize release and drain paths. 725 * @kernfs_open_file_mutex is enough. @of->mutex can't be used 726 * here because drain path may be called from places which can 727 * cause circular dependency. 728 */ 729 lockdep_assert_held(&kernfs_open_file_mutex); 730 731 if (!of->released) { 732 /* 733 * A file is never detached without being released and we 734 * need to be able to release files which are deactivated 735 * and being drained. Don't use kernfs_ops(). 736 */ 737 kn->attr.ops->release(of); 738 of->released = true; 739 } 740 } 741 742 static int kernfs_fop_release(struct inode *inode, struct file *filp) 743 { 744 struct kernfs_node *kn = inode->i_private; 745 struct kernfs_open_file *of = kernfs_of(filp); 746 747 if (kn->flags & KERNFS_HAS_RELEASE) { 748 mutex_lock(&kernfs_open_file_mutex); 749 kernfs_release_file(kn, of); 750 mutex_unlock(&kernfs_open_file_mutex); 751 } 752 753 kernfs_unlink_open_file(kn, of); 754 seq_release(inode, filp); 755 kfree(of->prealloc_buf); 756 kfree(of); 757 758 return 0; 759 } 760 761 void kernfs_drain_open_files(struct kernfs_node *kn) 762 { 763 struct kernfs_open_node *on; 764 struct kernfs_open_file *of; 765 766 if (!(kn->flags & (KERNFS_HAS_MMAP | KERNFS_HAS_RELEASE))) 767 return; 768 769 /* 770 * lockless opportunistic check is safe below because no one is adding to 771 * ->attr.open at this point of time. This check allows early bail out 772 * if ->attr.open is already NULL. kernfs_unlink_open_file makes 773 * ->attr.open NULL only while holding kernfs_open_file_mutex so below 774 * check under kernfs_open_file_mutex will ensure bailing out if 775 * ->attr.open became NULL while waiting for the mutex. 776 */ 777 if (!kn->attr.open) 778 return; 779 780 mutex_lock(&kernfs_open_file_mutex); 781 if (!kn->attr.open) { 782 mutex_unlock(&kernfs_open_file_mutex); 783 return; 784 } 785 786 on = kn->attr.open; 787 788 list_for_each_entry(of, &on->files, list) { 789 struct inode *inode = file_inode(of->file); 790 791 if (kn->flags & KERNFS_HAS_MMAP) 792 unmap_mapping_range(inode->i_mapping, 0, 0, 1); 793 794 if (kn->flags & KERNFS_HAS_RELEASE) 795 kernfs_release_file(kn, of); 796 } 797 798 mutex_unlock(&kernfs_open_file_mutex); 799 } 800 801 /* 802 * Kernfs attribute files are pollable. The idea is that you read 803 * the content and then you use 'poll' or 'select' to wait for 804 * the content to change. When the content changes (assuming the 805 * manager for the kobject supports notification), poll will 806 * return EPOLLERR|EPOLLPRI, and select will return the fd whether 807 * it is waiting for read, write, or exceptions. 808 * Once poll/select indicates that the value has changed, you 809 * need to close and re-open the file, or seek to 0 and read again. 810 * Reminder: this only works for attributes which actively support 811 * it, and it is not possible to test an attribute from userspace 812 * to see if it supports poll (Neither 'poll' nor 'select' return 813 * an appropriate error code). When in doubt, set a suitable timeout value. 814 */ 815 __poll_t kernfs_generic_poll(struct kernfs_open_file *of, poll_table *wait) 816 { 817 struct kernfs_node *kn = kernfs_dentry_node(of->file->f_path.dentry); 818 struct kernfs_open_node *on = kn->attr.open; 819 820 poll_wait(of->file, &on->poll, wait); 821 822 if (of->event != atomic_read(&on->event)) 823 return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI; 824 825 return DEFAULT_POLLMASK; 826 } 827 828 static __poll_t kernfs_fop_poll(struct file *filp, poll_table *wait) 829 { 830 struct kernfs_open_file *of = kernfs_of(filp); 831 struct kernfs_node *kn = kernfs_dentry_node(filp->f_path.dentry); 832 __poll_t ret; 833 834 if (!kernfs_get_active(kn)) 835 return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI; 836 837 if (kn->attr.ops->poll) 838 ret = kn->attr.ops->poll(of, wait); 839 else 840 ret = kernfs_generic_poll(of, wait); 841 842 kernfs_put_active(kn); 843 return ret; 844 } 845 846 static void kernfs_notify_workfn(struct work_struct *work) 847 { 848 struct kernfs_node *kn; 849 struct kernfs_super_info *info; 850 struct kernfs_root *root; 851 repeat: 852 /* pop one off the notify_list */ 853 spin_lock_irq(&kernfs_notify_lock); 854 kn = kernfs_notify_list; 855 if (kn == KERNFS_NOTIFY_EOL) { 856 spin_unlock_irq(&kernfs_notify_lock); 857 return; 858 } 859 kernfs_notify_list = kn->attr.notify_next; 860 kn->attr.notify_next = NULL; 861 spin_unlock_irq(&kernfs_notify_lock); 862 863 root = kernfs_root(kn); 864 /* kick fsnotify */ 865 down_write(&root->kernfs_rwsem); 866 867 list_for_each_entry(info, &kernfs_root(kn)->supers, node) { 868 struct kernfs_node *parent; 869 struct inode *p_inode = NULL; 870 struct inode *inode; 871 struct qstr name; 872 873 /* 874 * We want fsnotify_modify() on @kn but as the 875 * modifications aren't originating from userland don't 876 * have the matching @file available. Look up the inodes 877 * and generate the events manually. 878 */ 879 inode = ilookup(info->sb, kernfs_ino(kn)); 880 if (!inode) 881 continue; 882 883 name = (struct qstr)QSTR_INIT(kn->name, strlen(kn->name)); 884 parent = kernfs_get_parent(kn); 885 if (parent) { 886 p_inode = ilookup(info->sb, kernfs_ino(parent)); 887 if (p_inode) { 888 fsnotify(FS_MODIFY | FS_EVENT_ON_CHILD, 889 inode, FSNOTIFY_EVENT_INODE, 890 p_inode, &name, inode, 0); 891 iput(p_inode); 892 } 893 894 kernfs_put(parent); 895 } 896 897 if (!p_inode) 898 fsnotify_inode(inode, FS_MODIFY); 899 900 iput(inode); 901 } 902 903 up_write(&root->kernfs_rwsem); 904 kernfs_put(kn); 905 goto repeat; 906 } 907 908 /** 909 * kernfs_notify - notify a kernfs file 910 * @kn: file to notify 911 * 912 * Notify @kn such that poll(2) on @kn wakes up. Maybe be called from any 913 * context. 914 */ 915 void kernfs_notify(struct kernfs_node *kn) 916 { 917 static DECLARE_WORK(kernfs_notify_work, kernfs_notify_workfn); 918 unsigned long flags; 919 struct kernfs_open_node *on; 920 921 if (WARN_ON(kernfs_type(kn) != KERNFS_FILE)) 922 return; 923 924 /* kick poll immediately */ 925 spin_lock_irqsave(&kernfs_open_node_lock, flags); 926 on = kn->attr.open; 927 if (on) { 928 atomic_inc(&on->event); 929 wake_up_interruptible(&on->poll); 930 } 931 spin_unlock_irqrestore(&kernfs_open_node_lock, flags); 932 933 /* schedule work to kick fsnotify */ 934 spin_lock_irqsave(&kernfs_notify_lock, flags); 935 if (!kn->attr.notify_next) { 936 kernfs_get(kn); 937 kn->attr.notify_next = kernfs_notify_list; 938 kernfs_notify_list = kn; 939 schedule_work(&kernfs_notify_work); 940 } 941 spin_unlock_irqrestore(&kernfs_notify_lock, flags); 942 } 943 EXPORT_SYMBOL_GPL(kernfs_notify); 944 945 const struct file_operations kernfs_file_fops = { 946 .read_iter = kernfs_fop_read_iter, 947 .write_iter = kernfs_fop_write_iter, 948 .llseek = generic_file_llseek, 949 .mmap = kernfs_fop_mmap, 950 .open = kernfs_fop_open, 951 .release = kernfs_fop_release, 952 .poll = kernfs_fop_poll, 953 .fsync = noop_fsync, 954 .splice_read = generic_file_splice_read, 955 .splice_write = iter_file_splice_write, 956 }; 957 958 /** 959 * __kernfs_create_file - kernfs internal function to create a file 960 * @parent: directory to create the file in 961 * @name: name of the file 962 * @mode: mode of the file 963 * @uid: uid of the file 964 * @gid: gid of the file 965 * @size: size of the file 966 * @ops: kernfs operations for the file 967 * @priv: private data for the file 968 * @ns: optional namespace tag of the file 969 * @key: lockdep key for the file's active_ref, %NULL to disable lockdep 970 * 971 * Returns the created node on success, ERR_PTR() value on error. 972 */ 973 struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, 974 const char *name, 975 umode_t mode, kuid_t uid, kgid_t gid, 976 loff_t size, 977 const struct kernfs_ops *ops, 978 void *priv, const void *ns, 979 struct lock_class_key *key) 980 { 981 struct kernfs_node *kn; 982 unsigned flags; 983 int rc; 984 985 flags = KERNFS_FILE; 986 987 kn = kernfs_new_node(parent, name, (mode & S_IALLUGO) | S_IFREG, 988 uid, gid, flags); 989 if (!kn) 990 return ERR_PTR(-ENOMEM); 991 992 kn->attr.ops = ops; 993 kn->attr.size = size; 994 kn->ns = ns; 995 kn->priv = priv; 996 997 #ifdef CONFIG_DEBUG_LOCK_ALLOC 998 if (key) { 999 lockdep_init_map(&kn->dep_map, "kn->active", key, 0); 1000 kn->flags |= KERNFS_LOCKDEP; 1001 } 1002 #endif 1003 1004 /* 1005 * kn->attr.ops is accessible only while holding active ref. We 1006 * need to know whether some ops are implemented outside active 1007 * ref. Cache their existence in flags. 1008 */ 1009 if (ops->seq_show) 1010 kn->flags |= KERNFS_HAS_SEQ_SHOW; 1011 if (ops->mmap) 1012 kn->flags |= KERNFS_HAS_MMAP; 1013 if (ops->release) 1014 kn->flags |= KERNFS_HAS_RELEASE; 1015 1016 rc = kernfs_add_one(kn); 1017 if (rc) { 1018 kernfs_put(kn); 1019 return ERR_PTR(rc); 1020 } 1021 return kn; 1022 } 1023