1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * fs/kernfs/file.c - kernfs file implementation 4 * 5 * Copyright (c) 2001-3 Patrick Mochel 6 * Copyright (c) 2007 SUSE Linux Products GmbH 7 * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org> 8 */ 9 10 #include <linux/fs.h> 11 #include <linux/seq_file.h> 12 #include <linux/slab.h> 13 #include <linux/poll.h> 14 #include <linux/pagemap.h> 15 #include <linux/sched/mm.h> 16 #include <linux/fsnotify.h> 17 #include <linux/uio.h> 18 19 #include "kernfs-internal.h" 20 21 struct kernfs_open_node { 22 struct rcu_head rcu_head; 23 atomic_t event; 24 wait_queue_head_t poll; 25 struct list_head files; /* goes through kernfs_open_file.list */ 26 unsigned int nr_mmapped; 27 unsigned int nr_to_release; 28 }; 29 30 /* 31 * kernfs_notify() may be called from any context and bounces notifications 32 * through a work item. To minimize space overhead in kernfs_node, the 33 * pending queue is implemented as a singly linked list of kernfs_nodes. 34 * The list is terminated with the self pointer so that whether a 35 * kernfs_node is on the list or not can be determined by testing the next 36 * pointer for %NULL. 37 */ 38 #define KERNFS_NOTIFY_EOL ((void *)&kernfs_notify_list) 39 40 static DEFINE_SPINLOCK(kernfs_notify_lock); 41 static struct kernfs_node *kernfs_notify_list = KERNFS_NOTIFY_EOL; 42 43 /* Compatibility wrappers - use the common hashed node lock */ 44 static inline struct mutex *kernfs_open_file_mutex_ptr(struct kernfs_node *kn) 45 { 46 return kernfs_node_lock_ptr(kn); 47 } 48 49 static inline struct mutex *kernfs_open_file_mutex_lock(struct kernfs_node *kn) 50 { 51 return kernfs_node_lock(kn); 52 } 53 54 /** 55 * of_on - Get the kernfs_open_node of the specified kernfs_open_file 56 * @of: target kernfs_open_file 57 * 58 * Return: the kernfs_open_node of the kernfs_open_file 59 */ 60 static struct kernfs_open_node *of_on(struct kernfs_open_file *of) 61 { 62 return rcu_dereference_protected(of->kn->attr.open, 63 !list_empty(&of->list)); 64 } 65 66 /* Get active reference to kernfs node for an open file */ 67 static struct kernfs_open_file *kernfs_get_active_of(struct kernfs_open_file *of) 68 { 69 /* Skip if file was already released */ 70 if (unlikely(of->released)) 71 return NULL; 72 73 if (!kernfs_get_active(of->kn)) 74 return NULL; 75 76 return of; 77 } 78 79 static void kernfs_put_active_of(struct kernfs_open_file *of) 80 { 81 return kernfs_put_active(of->kn); 82 } 83 84 /** 85 * kernfs_deref_open_node_locked - Get kernfs_open_node corresponding to @kn 86 * 87 * @kn: target kernfs_node. 88 * 89 * Fetch and return ->attr.open of @kn when caller holds the 90 * kernfs_open_file_mutex_ptr(kn). 91 * 92 * Update of ->attr.open happens under kernfs_open_file_mutex_ptr(kn). So when 93 * the caller guarantees that this mutex is being held, other updaters can't 94 * change ->attr.open and this means that we can safely deref ->attr.open 95 * outside RCU read-side critical section. 96 * 97 * The caller needs to make sure that kernfs_open_file_mutex is held. 98 * 99 * Return: @kn->attr.open when kernfs_open_file_mutex is held. 100 */ 101 static struct kernfs_open_node * 102 kernfs_deref_open_node_locked(struct kernfs_node *kn) 103 { 104 return rcu_dereference_protected(kn->attr.open, 105 lockdep_is_held(kernfs_open_file_mutex_ptr(kn))); 106 } 107 108 static struct kernfs_open_file *kernfs_of(struct file *file) 109 { 110 return ((struct seq_file *)file->private_data)->private; 111 } 112 113 /* 114 * Determine the kernfs_ops for the given kernfs_node. This function must 115 * be called while holding an active reference. 116 */ 117 static const struct kernfs_ops *kernfs_ops(struct kernfs_node *kn) 118 { 119 if (kn->flags & KERNFS_LOCKDEP) 120 lockdep_assert_held(kn); 121 return kn->attr.ops; 122 } 123 124 /* 125 * As kernfs_seq_stop() is also called after kernfs_seq_start() or 126 * kernfs_seq_next() failure, it needs to distinguish whether it's stopping 127 * a seq_file iteration which is fully initialized with an active reference 128 * or an aborted kernfs_seq_start() due to get_active failure. The 129 * position pointer is the only context for each seq_file iteration and 130 * thus the stop condition should be encoded in it. As the return value is 131 * directly visible to userland, ERR_PTR(-ENODEV) is the only acceptable 132 * choice to indicate get_active failure. 133 * 134 * Unfortunately, this is complicated due to the optional custom seq_file 135 * operations which may return ERR_PTR(-ENODEV) too. kernfs_seq_stop() 136 * can't distinguish whether ERR_PTR(-ENODEV) is from get_active failure or 137 * custom seq_file operations and thus can't decide whether put_active 138 * should be performed or not only on ERR_PTR(-ENODEV). 139 * 140 * This is worked around by factoring out the custom seq_stop() and 141 * put_active part into kernfs_seq_stop_active(), skipping it from 142 * kernfs_seq_stop() if ERR_PTR(-ENODEV) while invoking it directly after 143 * custom seq_file operations fail with ERR_PTR(-ENODEV) - this ensures 144 * that kernfs_seq_stop_active() is skipped only after get_active failure. 145 */ 146 static void kernfs_seq_stop_active(struct seq_file *sf, void *v) 147 { 148 struct kernfs_open_file *of = sf->private; 149 const struct kernfs_ops *ops = kernfs_ops(of->kn); 150 151 if (ops->seq_stop) 152 ops->seq_stop(sf, v); 153 kernfs_put_active_of(of); 154 } 155 156 static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos) 157 { 158 struct kernfs_open_file *of = sf->private; 159 const struct kernfs_ops *ops; 160 161 /* 162 * @of->mutex nests outside active ref and is primarily to ensure that 163 * the ops aren't called concurrently for the same open file. 164 */ 165 mutex_lock(&of->mutex); 166 if (!kernfs_get_active_of(of)) 167 return ERR_PTR(-ENODEV); 168 169 ops = kernfs_ops(of->kn); 170 if (ops->seq_start) { 171 void *next = ops->seq_start(sf, ppos); 172 /* see the comment above kernfs_seq_stop_active() */ 173 if (next == ERR_PTR(-ENODEV)) 174 kernfs_seq_stop_active(sf, next); 175 return next; 176 } 177 return single_start(sf, ppos); 178 } 179 180 static void *kernfs_seq_next(struct seq_file *sf, void *v, loff_t *ppos) 181 { 182 struct kernfs_open_file *of = sf->private; 183 const struct kernfs_ops *ops = kernfs_ops(of->kn); 184 185 if (ops->seq_next) { 186 void *next = ops->seq_next(sf, v, ppos); 187 /* see the comment above kernfs_seq_stop_active() */ 188 if (next == ERR_PTR(-ENODEV)) 189 kernfs_seq_stop_active(sf, next); 190 return next; 191 } else { 192 /* 193 * The same behavior and code as single_open(), always 194 * terminate after the initial read. 195 */ 196 ++*ppos; 197 return NULL; 198 } 199 } 200 201 static void kernfs_seq_stop(struct seq_file *sf, void *v) 202 { 203 struct kernfs_open_file *of = sf->private; 204 205 if (v != ERR_PTR(-ENODEV)) 206 kernfs_seq_stop_active(sf, v); 207 mutex_unlock(&of->mutex); 208 } 209 210 static int kernfs_seq_show(struct seq_file *sf, void *v) 211 { 212 struct kernfs_open_file *of = sf->private; 213 214 of->event = atomic_read(&of_on(of)->event); 215 216 return of->kn->attr.ops->seq_show(sf, v); 217 } 218 219 static const struct seq_operations kernfs_seq_ops = { 220 .start = kernfs_seq_start, 221 .next = kernfs_seq_next, 222 .stop = kernfs_seq_stop, 223 .show = kernfs_seq_show, 224 }; 225 226 /* 227 * As reading a bin file can have side-effects, the exact offset and bytes 228 * specified in read(2) call should be passed to the read callback making 229 * it difficult to use seq_file. Implement simplistic custom buffering for 230 * bin files. 231 */ 232 static ssize_t kernfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) 233 { 234 struct kernfs_open_file *of = kernfs_of(iocb->ki_filp); 235 ssize_t len = min_t(size_t, iov_iter_count(iter), PAGE_SIZE); 236 const struct kernfs_ops *ops; 237 char *buf; 238 239 buf = of->prealloc_buf; 240 if (buf) 241 mutex_lock(&of->prealloc_mutex); 242 else 243 buf = kmalloc(len, GFP_KERNEL); 244 if (!buf) 245 return -ENOMEM; 246 247 /* 248 * @of->mutex nests outside active ref and is used both to ensure that 249 * the ops aren't called concurrently for the same open file. 250 */ 251 mutex_lock(&of->mutex); 252 if (!kernfs_get_active_of(of)) { 253 len = -ENODEV; 254 mutex_unlock(&of->mutex); 255 goto out_free; 256 } 257 258 of->event = atomic_read(&of_on(of)->event); 259 260 ops = kernfs_ops(of->kn); 261 if (ops->read) 262 len = ops->read(of, buf, len, iocb->ki_pos); 263 else 264 len = -EINVAL; 265 266 kernfs_put_active_of(of); 267 mutex_unlock(&of->mutex); 268 269 if (len < 0) 270 goto out_free; 271 272 if (copy_to_iter(buf, len, iter) != len) { 273 len = -EFAULT; 274 goto out_free; 275 } 276 277 iocb->ki_pos += len; 278 279 out_free: 280 if (buf == of->prealloc_buf) 281 mutex_unlock(&of->prealloc_mutex); 282 else 283 kfree(buf); 284 return len; 285 } 286 287 static ssize_t kernfs_fop_read_iter(struct kiocb *iocb, struct iov_iter *iter) 288 { 289 if (kernfs_of(iocb->ki_filp)->kn->flags & KERNFS_HAS_SEQ_SHOW) 290 return seq_read_iter(iocb, iter); 291 return kernfs_file_read_iter(iocb, iter); 292 } 293 294 /* 295 * Copy data in from userland and pass it to the matching kernfs write 296 * operation. 297 * 298 * There is no easy way for us to know if userspace is only doing a partial 299 * write, so we don't support them. We expect the entire buffer to come on 300 * the first write. Hint: if you're writing a value, first read the file, 301 * modify only the value you're changing, then write entire buffer 302 * back. 303 */ 304 static ssize_t kernfs_fop_write_iter(struct kiocb *iocb, struct iov_iter *iter) 305 { 306 struct kernfs_open_file *of = kernfs_of(iocb->ki_filp); 307 ssize_t len = iov_iter_count(iter); 308 const struct kernfs_ops *ops; 309 char *buf; 310 311 if (of->atomic_write_len) { 312 if (len > of->atomic_write_len) 313 return -E2BIG; 314 } else { 315 len = min_t(size_t, len, PAGE_SIZE); 316 } 317 318 buf = of->prealloc_buf; 319 if (buf) 320 mutex_lock(&of->prealloc_mutex); 321 else 322 buf = kmalloc(len + 1, GFP_KERNEL); 323 if (!buf) 324 return -ENOMEM; 325 326 if (copy_from_iter(buf, len, iter) != len) { 327 len = -EFAULT; 328 goto out_free; 329 } 330 buf[len] = '\0'; /* guarantee string termination */ 331 332 /* 333 * @of->mutex nests outside active ref and is used both to ensure that 334 * the ops aren't called concurrently for the same open file. 335 */ 336 mutex_lock(&of->mutex); 337 if (!kernfs_get_active_of(of)) { 338 mutex_unlock(&of->mutex); 339 len = -ENODEV; 340 goto out_free; 341 } 342 343 ops = kernfs_ops(of->kn); 344 if (ops->write) 345 len = ops->write(of, buf, len, iocb->ki_pos); 346 else 347 len = -EINVAL; 348 349 kernfs_put_active_of(of); 350 mutex_unlock(&of->mutex); 351 352 if (len > 0) 353 iocb->ki_pos += len; 354 355 out_free: 356 if (buf == of->prealloc_buf) 357 mutex_unlock(&of->prealloc_mutex); 358 else 359 kfree(buf); 360 return len; 361 } 362 363 static void kernfs_vma_open(struct vm_area_struct *vma) 364 { 365 struct file *file = vma->vm_file; 366 struct kernfs_open_file *of = kernfs_of(file); 367 368 if (!of->vm_ops) 369 return; 370 371 if (!kernfs_get_active_of(of)) 372 return; 373 374 if (of->vm_ops->open) 375 of->vm_ops->open(vma); 376 377 kernfs_put_active_of(of); 378 } 379 380 static vm_fault_t kernfs_vma_fault(struct vm_fault *vmf) 381 { 382 struct file *file = vmf->vma->vm_file; 383 struct kernfs_open_file *of = kernfs_of(file); 384 vm_fault_t ret; 385 386 if (!of->vm_ops) 387 return VM_FAULT_SIGBUS; 388 389 if (!kernfs_get_active_of(of)) 390 return VM_FAULT_SIGBUS; 391 392 ret = VM_FAULT_SIGBUS; 393 if (of->vm_ops->fault) 394 ret = of->vm_ops->fault(vmf); 395 396 kernfs_put_active_of(of); 397 return ret; 398 } 399 400 static vm_fault_t kernfs_vma_page_mkwrite(struct vm_fault *vmf) 401 { 402 struct file *file = vmf->vma->vm_file; 403 struct kernfs_open_file *of = kernfs_of(file); 404 vm_fault_t ret; 405 406 if (!of->vm_ops) 407 return VM_FAULT_SIGBUS; 408 409 if (!kernfs_get_active_of(of)) 410 return VM_FAULT_SIGBUS; 411 412 ret = 0; 413 if (of->vm_ops->page_mkwrite) 414 ret = of->vm_ops->page_mkwrite(vmf); 415 else 416 file_update_time(file); 417 418 kernfs_put_active_of(of); 419 return ret; 420 } 421 422 static int kernfs_vma_access(struct vm_area_struct *vma, unsigned long addr, 423 void *buf, int len, int write) 424 { 425 struct file *file = vma->vm_file; 426 struct kernfs_open_file *of = kernfs_of(file); 427 int ret; 428 429 if (!of->vm_ops) 430 return -EINVAL; 431 432 if (!kernfs_get_active_of(of)) 433 return -EINVAL; 434 435 ret = -EINVAL; 436 if (of->vm_ops->access) 437 ret = of->vm_ops->access(vma, addr, buf, len, write); 438 439 kernfs_put_active_of(of); 440 return ret; 441 } 442 443 static const struct vm_operations_struct kernfs_vm_ops = { 444 .open = kernfs_vma_open, 445 .fault = kernfs_vma_fault, 446 .page_mkwrite = kernfs_vma_page_mkwrite, 447 .access = kernfs_vma_access, 448 }; 449 450 static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma) 451 { 452 struct kernfs_open_file *of = kernfs_of(file); 453 const struct kernfs_ops *ops; 454 int rc; 455 456 /* 457 * mmap path and of->mutex are prone to triggering spurious lockdep 458 * warnings and we don't want to add spurious locking dependency 459 * between the two. Check whether mmap is actually implemented 460 * without grabbing @of->mutex by testing HAS_MMAP flag. See the 461 * comment in kernfs_fop_open() for more details. 462 */ 463 if (!(of->kn->flags & KERNFS_HAS_MMAP)) 464 return -ENODEV; 465 466 mutex_lock(&of->mutex); 467 468 rc = -ENODEV; 469 if (!kernfs_get_active_of(of)) 470 goto out_unlock; 471 472 ops = kernfs_ops(of->kn); 473 rc = ops->mmap(of, vma); 474 if (rc) 475 goto out_put; 476 477 /* 478 * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup() 479 * to satisfy versions of X which crash if the mmap fails: that 480 * substitutes a new vm_file, and we don't then want bin_vm_ops. 481 */ 482 if (vma->vm_file != file) 483 goto out_put; 484 485 rc = -EINVAL; 486 if (of->mmapped && of->vm_ops != vma->vm_ops) 487 goto out_put; 488 489 /* 490 * It is not possible to successfully wrap close. 491 * So error if someone is trying to use close. 492 */ 493 if (vma->vm_ops && vma->vm_ops->close) 494 goto out_put; 495 496 rc = 0; 497 if (!of->mmapped) { 498 of->mmapped = true; 499 of_on(of)->nr_mmapped++; 500 of->vm_ops = vma->vm_ops; 501 } 502 vma->vm_ops = &kernfs_vm_ops; 503 out_put: 504 kernfs_put_active_of(of); 505 out_unlock: 506 mutex_unlock(&of->mutex); 507 508 return rc; 509 } 510 511 /** 512 * kernfs_get_open_node - get or create kernfs_open_node 513 * @kn: target kernfs_node 514 * @of: kernfs_open_file for this instance of open 515 * 516 * If @kn->attr.open exists, increment its reference count; otherwise, 517 * create one. @of is chained to the files list. 518 * 519 * Locking: 520 * Kernel thread context (may sleep). 521 * 522 * Return: 523 * %0 on success, -errno on failure. 524 */ 525 static int kernfs_get_open_node(struct kernfs_node *kn, 526 struct kernfs_open_file *of) 527 { 528 struct kernfs_open_node *on; 529 struct mutex *mutex; 530 531 mutex = kernfs_open_file_mutex_lock(kn); 532 on = kernfs_deref_open_node_locked(kn); 533 534 if (!on) { 535 /* not there, initialize a new one */ 536 on = kzalloc_obj(*on); 537 if (!on) { 538 mutex_unlock(mutex); 539 return -ENOMEM; 540 } 541 atomic_set(&on->event, 1); 542 init_waitqueue_head(&on->poll); 543 INIT_LIST_HEAD(&on->files); 544 rcu_assign_pointer(kn->attr.open, on); 545 } 546 547 list_add_tail(&of->list, &on->files); 548 if (kn->flags & KERNFS_HAS_RELEASE) 549 on->nr_to_release++; 550 551 mutex_unlock(mutex); 552 return 0; 553 } 554 555 /** 556 * kernfs_unlink_open_file - Unlink @of from @kn. 557 * 558 * @kn: target kernfs_node 559 * @of: associated kernfs_open_file 560 * @open_failed: ->open() failed, cancel ->release() 561 * 562 * Unlink @of from list of @kn's associated open files. If list of 563 * associated open files becomes empty, disassociate and free 564 * kernfs_open_node. 565 * 566 * LOCKING: 567 * None. 568 */ 569 static void kernfs_unlink_open_file(struct kernfs_node *kn, 570 struct kernfs_open_file *of, 571 bool open_failed) 572 { 573 struct kernfs_open_node *on; 574 struct mutex *mutex; 575 576 mutex = kernfs_open_file_mutex_lock(kn); 577 578 on = kernfs_deref_open_node_locked(kn); 579 if (!on) { 580 mutex_unlock(mutex); 581 return; 582 } 583 584 if (of) { 585 if (kn->flags & KERNFS_HAS_RELEASE) { 586 WARN_ON_ONCE(of->released == open_failed); 587 if (open_failed) 588 on->nr_to_release--; 589 } 590 if (of->mmapped) 591 on->nr_mmapped--; 592 list_del(&of->list); 593 } 594 595 if (list_empty(&on->files)) { 596 rcu_assign_pointer(kn->attr.open, NULL); 597 kfree_rcu(on, rcu_head); 598 } 599 600 mutex_unlock(mutex); 601 } 602 603 static int kernfs_fop_open(struct inode *inode, struct file *file) 604 { 605 struct kernfs_node *kn = inode->i_private; 606 struct kernfs_root *root = kernfs_root(kn); 607 const struct kernfs_ops *ops; 608 struct kernfs_open_file *of; 609 bool has_read, has_write, has_mmap; 610 int error = -EACCES; 611 612 if (!kernfs_get_active(kn)) 613 return -ENODEV; 614 615 ops = kernfs_ops(kn); 616 617 has_read = ops->seq_show || ops->read || ops->mmap; 618 has_write = ops->write || ops->mmap; 619 has_mmap = ops->mmap; 620 621 /* see the flag definition for details */ 622 if (root->flags & KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK) { 623 if ((file->f_mode & FMODE_WRITE) && 624 (!(inode->i_mode & S_IWUGO) || !has_write)) 625 goto err_out; 626 627 if ((file->f_mode & FMODE_READ) && 628 (!(inode->i_mode & S_IRUGO) || !has_read)) 629 goto err_out; 630 } 631 632 /* allocate a kernfs_open_file for the file */ 633 error = -ENOMEM; 634 of = kzalloc_obj(struct kernfs_open_file); 635 if (!of) 636 goto err_out; 637 638 /* 639 * The following is done to give a different lockdep key to 640 * @of->mutex for files which implement mmap. This is a rather 641 * crude way to avoid false positive lockdep warning around 642 * mm->mmap_lock - mmap nests @of->mutex under mm->mmap_lock and 643 * reading /sys/block/sda/trace/act_mask grabs sr_mutex, under 644 * which mm->mmap_lock nests, while holding @of->mutex. As each 645 * open file has a separate mutex, it's okay as long as those don't 646 * happen on the same file. At this point, we can't easily give 647 * each file a separate locking class. Let's differentiate on 648 * whether the file has mmap or not for now. 649 * 650 * For similar reasons, writable and readonly files are given different 651 * lockdep key, because the writable file /sys/power/resume may call vfs 652 * lookup helpers for arbitrary paths and readonly files can be read by 653 * overlayfs from vfs helpers when sysfs is a lower layer of overalyfs. 654 * 655 * All three cases look the same. They're supposed to 656 * look that way and give @of->mutex different static lockdep keys. 657 */ 658 if (has_mmap) 659 mutex_init(&of->mutex); 660 else if (file->f_mode & FMODE_WRITE) 661 mutex_init(&of->mutex); 662 else 663 mutex_init(&of->mutex); 664 665 of->kn = kn; 666 of->file = file; 667 668 /* 669 * Write path needs to atomic_write_len outside active reference. 670 * Cache it in open_file. See kernfs_fop_write_iter() for details. 671 */ 672 of->atomic_write_len = ops->atomic_write_len; 673 674 error = -EINVAL; 675 /* 676 * ->seq_show is incompatible with ->prealloc, 677 * as seq_read does its own allocation. 678 * ->read must be used instead. 679 */ 680 if (ops->prealloc && ops->seq_show) 681 goto err_free; 682 if (ops->prealloc) { 683 int len = of->atomic_write_len ?: PAGE_SIZE; 684 of->prealloc_buf = kmalloc(len + 1, GFP_KERNEL); 685 error = -ENOMEM; 686 if (!of->prealloc_buf) 687 goto err_free; 688 mutex_init(&of->prealloc_mutex); 689 } 690 691 /* 692 * Always instantiate seq_file even if read access doesn't use 693 * seq_file or is not requested. This unifies private data access 694 * and readable regular files are the vast majority anyway. 695 */ 696 if (ops->seq_show) 697 error = seq_open(file, &kernfs_seq_ops); 698 else 699 error = seq_open(file, NULL); 700 if (error) 701 goto err_free; 702 703 of->seq_file = file->private_data; 704 of->seq_file->private = of; 705 706 /* seq_file clears PWRITE unconditionally, restore it if WRITE */ 707 if (file->f_mode & FMODE_WRITE) 708 file->f_mode |= FMODE_PWRITE; 709 710 /* make sure we have open node struct */ 711 error = kernfs_get_open_node(kn, of); 712 if (error) 713 goto err_seq_release; 714 715 if (ops->open) { 716 /* nobody has access to @of yet, skip @of->mutex */ 717 error = ops->open(of); 718 if (error) 719 goto err_put_node; 720 } 721 722 /* open succeeded, put active references */ 723 kernfs_put_active(kn); 724 return 0; 725 726 err_put_node: 727 kernfs_unlink_open_file(kn, of, true); 728 err_seq_release: 729 seq_release(inode, file); 730 err_free: 731 kfree(of->prealloc_buf); 732 kfree(of); 733 err_out: 734 kernfs_put_active(kn); 735 return error; 736 } 737 738 /* used from release/drain to ensure that ->release() is called exactly once */ 739 static void kernfs_release_file(struct kernfs_node *kn, 740 struct kernfs_open_file *of) 741 { 742 /* 743 * @of is guaranteed to have no other file operations in flight and 744 * we just want to synchronize release and drain paths. 745 * @kernfs_open_file_mutex_ptr(kn) is enough. @of->mutex can't be used 746 * here because drain path may be called from places which can 747 * cause circular dependency. 748 */ 749 lockdep_assert_held(kernfs_open_file_mutex_ptr(kn)); 750 751 if (!of->released) { 752 /* 753 * A file is never detached without being released and we 754 * need to be able to release files which are deactivated 755 * and being drained. Don't use kernfs_ops(). 756 */ 757 kn->attr.ops->release(of); 758 of->released = true; 759 of_on(of)->nr_to_release--; 760 } 761 } 762 763 static int kernfs_fop_release(struct inode *inode, struct file *filp) 764 { 765 struct kernfs_node *kn = inode->i_private; 766 struct kernfs_open_file *of = kernfs_of(filp); 767 768 if (kn->flags & KERNFS_HAS_RELEASE) { 769 struct mutex *mutex; 770 771 mutex = kernfs_open_file_mutex_lock(kn); 772 kernfs_release_file(kn, of); 773 mutex_unlock(mutex); 774 } 775 776 kernfs_unlink_open_file(kn, of, false); 777 seq_release(inode, filp); 778 kfree(of->prealloc_buf); 779 kfree(of); 780 781 return 0; 782 } 783 784 bool kernfs_should_drain_open_files(struct kernfs_node *kn) 785 { 786 struct kernfs_open_node *on; 787 bool ret; 788 789 /* 790 * @kn being deactivated guarantees that @kn->attr.open can't change 791 * beneath us making the lockless test below safe. 792 * Callers post kernfs_unbreak_active_protection may be counted in 793 * kn->active by now, do not WARN_ON because of them. 794 */ 795 796 rcu_read_lock(); 797 on = rcu_dereference(kn->attr.open); 798 ret = on && (on->nr_mmapped || on->nr_to_release); 799 rcu_read_unlock(); 800 801 return ret; 802 } 803 804 void kernfs_drain_open_files(struct kernfs_node *kn) 805 { 806 struct kernfs_open_node *on; 807 struct kernfs_open_file *of; 808 struct mutex *mutex; 809 810 mutex = kernfs_open_file_mutex_lock(kn); 811 on = kernfs_deref_open_node_locked(kn); 812 if (!on) { 813 mutex_unlock(mutex); 814 return; 815 } 816 817 list_for_each_entry(of, &on->files, list) { 818 struct inode *inode = file_inode(of->file); 819 820 if (of->mmapped) { 821 unmap_mapping_range(inode->i_mapping, 0, 0, 1); 822 of->mmapped = false; 823 on->nr_mmapped--; 824 } 825 826 if (kn->flags & KERNFS_HAS_RELEASE) 827 kernfs_release_file(kn, of); 828 } 829 830 WARN_ON_ONCE(on->nr_mmapped || on->nr_to_release); 831 mutex_unlock(mutex); 832 } 833 834 /* 835 * Kernfs attribute files are pollable. The idea is that you read 836 * the content and then you use 'poll' or 'select' to wait for 837 * the content to change. When the content changes (assuming the 838 * manager for the kobject supports notification), poll will 839 * return EPOLLERR|EPOLLPRI, and select will return the fd whether 840 * it is waiting for read, write, or exceptions. 841 * Once poll/select indicates that the value has changed, you 842 * need to close and re-open the file, or seek to 0 and read again. 843 * Reminder: this only works for attributes which actively support 844 * it, and it is not possible to test an attribute from userspace 845 * to see if it supports poll (Neither 'poll' nor 'select' return 846 * an appropriate error code). When in doubt, set a suitable timeout value. 847 */ 848 __poll_t kernfs_generic_poll(struct kernfs_open_file *of, poll_table *wait) 849 { 850 struct kernfs_open_node *on = of_on(of); 851 852 poll_wait(of->file, &on->poll, wait); 853 854 if (of->event != atomic_read(&on->event)) 855 return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI; 856 857 return DEFAULT_POLLMASK; 858 } 859 860 static __poll_t kernfs_fop_poll(struct file *filp, poll_table *wait) 861 { 862 struct kernfs_open_file *of = kernfs_of(filp); 863 struct kernfs_node *kn = kernfs_dentry_node(filp->f_path.dentry); 864 __poll_t ret; 865 866 if (!kernfs_get_active_of(of)) 867 return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI; 868 869 if (kn->attr.ops->poll) 870 ret = kn->attr.ops->poll(of, wait); 871 else 872 ret = kernfs_generic_poll(of, wait); 873 874 kernfs_put_active_of(of); 875 return ret; 876 } 877 878 static loff_t kernfs_fop_llseek(struct file *file, loff_t offset, int whence) 879 { 880 struct kernfs_open_file *of = kernfs_of(file); 881 const struct kernfs_ops *ops; 882 loff_t ret; 883 884 /* 885 * @of->mutex nests outside active ref and is primarily to ensure that 886 * the ops aren't called concurrently for the same open file. 887 */ 888 mutex_lock(&of->mutex); 889 if (!kernfs_get_active_of(of)) { 890 mutex_unlock(&of->mutex); 891 return -ENODEV; 892 } 893 894 ops = kernfs_ops(of->kn); 895 if (ops->llseek) 896 ret = ops->llseek(of, offset, whence); 897 else 898 ret = generic_file_llseek(file, offset, whence); 899 900 kernfs_put_active_of(of); 901 mutex_unlock(&of->mutex); 902 return ret; 903 } 904 905 static void kernfs_notify_workfn(struct work_struct *work) 906 { 907 struct kernfs_node *kn; 908 struct kernfs_super_info *info; 909 struct kernfs_root *root; 910 repeat: 911 /* pop one off the notify_list */ 912 spin_lock_irq(&kernfs_notify_lock); 913 kn = kernfs_notify_list; 914 if (kn == KERNFS_NOTIFY_EOL) { 915 spin_unlock_irq(&kernfs_notify_lock); 916 return; 917 } 918 kernfs_notify_list = kn->attr.notify_next; 919 kn->attr.notify_next = NULL; 920 spin_unlock_irq(&kernfs_notify_lock); 921 922 root = kernfs_root(kn); 923 /* kick fsnotify */ 924 925 down_read(&root->kernfs_supers_rwsem); 926 down_read(&root->kernfs_rwsem); 927 list_for_each_entry(info, &kernfs_root(kn)->supers, node) { 928 struct kernfs_node *parent; 929 struct inode *p_inode = NULL; 930 const char *kn_name; 931 struct inode *inode; 932 struct qstr name; 933 934 /* 935 * We want fsnotify_modify() on @kn but as the 936 * modifications aren't originating from userland don't 937 * have the matching @file available. Look up the inodes 938 * and generate the events manually. 939 */ 940 inode = ilookup(info->sb, kernfs_ino(kn)); 941 if (!inode) 942 continue; 943 944 kn_name = kernfs_rcu_name(kn); 945 name = QSTR(kn_name); 946 parent = kernfs_get_parent(kn); 947 if (parent) { 948 p_inode = ilookup(info->sb, kernfs_ino(parent)); 949 if (p_inode) { 950 fsnotify(FS_MODIFY | FS_EVENT_ON_CHILD, 951 inode, FSNOTIFY_EVENT_INODE, 952 p_inode, &name, inode, 0); 953 iput(p_inode); 954 } 955 956 kernfs_put(parent); 957 } 958 959 if (!p_inode) 960 fsnotify_inode(inode, FS_MODIFY); 961 962 iput(inode); 963 } 964 965 up_read(&root->kernfs_rwsem); 966 up_read(&root->kernfs_supers_rwsem); 967 kernfs_put(kn); 968 goto repeat; 969 } 970 971 /** 972 * kernfs_notify - notify a kernfs file 973 * @kn: file to notify 974 * 975 * Notify @kn such that poll(2) on @kn wakes up. Maybe be called from any 976 * context. 977 */ 978 void kernfs_notify(struct kernfs_node *kn) 979 { 980 static DECLARE_WORK(kernfs_notify_work, kernfs_notify_workfn); 981 unsigned long flags; 982 struct kernfs_open_node *on; 983 984 if (WARN_ON(kernfs_type(kn) != KERNFS_FILE)) 985 return; 986 987 /* kick poll immediately */ 988 rcu_read_lock(); 989 on = rcu_dereference(kn->attr.open); 990 if (on) { 991 atomic_inc(&on->event); 992 wake_up_interruptible(&on->poll); 993 } 994 rcu_read_unlock(); 995 996 /* schedule work to kick fsnotify */ 997 spin_lock_irqsave(&kernfs_notify_lock, flags); 998 if (!kn->attr.notify_next) { 999 kernfs_get(kn); 1000 kn->attr.notify_next = kernfs_notify_list; 1001 kernfs_notify_list = kn; 1002 schedule_work(&kernfs_notify_work); 1003 } 1004 spin_unlock_irqrestore(&kernfs_notify_lock, flags); 1005 } 1006 EXPORT_SYMBOL_GPL(kernfs_notify); 1007 1008 const struct file_operations kernfs_file_fops = { 1009 .read_iter = kernfs_fop_read_iter, 1010 .write_iter = kernfs_fop_write_iter, 1011 .llseek = kernfs_fop_llseek, 1012 .mmap = kernfs_fop_mmap, 1013 .open = kernfs_fop_open, 1014 .release = kernfs_fop_release, 1015 .poll = kernfs_fop_poll, 1016 .fsync = noop_fsync, 1017 .splice_read = copy_splice_read, 1018 .splice_write = iter_file_splice_write, 1019 }; 1020 1021 /** 1022 * __kernfs_create_file - kernfs internal function to create a file 1023 * @parent: directory to create the file in 1024 * @name: name of the file 1025 * @mode: mode of the file 1026 * @uid: uid of the file 1027 * @gid: gid of the file 1028 * @size: size of the file 1029 * @ops: kernfs operations for the file 1030 * @priv: private data for the file 1031 * @ns: optional namespace tag of the file 1032 * @key: lockdep key for the file's active_ref, %NULL to disable lockdep 1033 * 1034 * Return: the created node on success, ERR_PTR() value on error. 1035 */ 1036 struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, 1037 const char *name, 1038 umode_t mode, kuid_t uid, kgid_t gid, 1039 loff_t size, 1040 const struct kernfs_ops *ops, 1041 void *priv, const struct ns_common *ns, 1042 struct lock_class_key *key) 1043 { 1044 struct kernfs_node *kn; 1045 unsigned flags; 1046 int rc; 1047 1048 flags = KERNFS_FILE; 1049 1050 kn = kernfs_new_node(parent, name, (mode & S_IALLUGO) | S_IFREG, 1051 uid, gid, flags); 1052 if (!kn) 1053 return ERR_PTR(-ENOMEM); 1054 1055 kn->attr.ops = ops; 1056 kn->attr.size = size; 1057 kn->ns = ns; 1058 kn->priv = priv; 1059 1060 #ifdef CONFIG_DEBUG_LOCK_ALLOC 1061 if (key) { 1062 lockdep_init_map(&kn->dep_map, "kn->active", key, 0); 1063 kn->flags |= KERNFS_LOCKDEP; 1064 } 1065 #endif 1066 1067 /* 1068 * kn->attr.ops is accessible only while holding active ref. We 1069 * need to know whether some ops are implemented outside active 1070 * ref. Cache their existence in flags. 1071 */ 1072 if (ops->seq_show) 1073 kn->flags |= KERNFS_HAS_SEQ_SHOW; 1074 if (ops->mmap) 1075 kn->flags |= KERNFS_HAS_MMAP; 1076 if (ops->release) 1077 kn->flags |= KERNFS_HAS_RELEASE; 1078 1079 rc = kernfs_add_one(kn); 1080 if (rc) { 1081 kernfs_put(kn); 1082 return ERR_PTR(rc); 1083 } 1084 return kn; 1085 } 1086