1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * fs/libfs.c 4 * Library for filesystems writers. 5 */ 6 7 #include <linux/blkdev.h> 8 #include <linux/export.h> 9 #include <linux/pagemap.h> 10 #include <linux/slab.h> 11 #include <linux/cred.h> 12 #include <linux/mount.h> 13 #include <linux/vfs.h> 14 #include <linux/quotaops.h> 15 #include <linux/mutex.h> 16 #include <linux/namei.h> 17 #include <linux/exportfs.h> 18 #include <linux/iversion.h> 19 #include <linux/writeback.h> 20 #include <linux/buffer_head.h> /* sync_mapping_buffers */ 21 #include <linux/fs_context.h> 22 #include <linux/pseudo_fs.h> 23 #include <linux/fsnotify.h> 24 #include <linux/unicode.h> 25 #include <linux/fscrypt.h> 26 27 #include <linux/uaccess.h> 28 29 #include "internal.h" 30 31 int simple_getattr(struct mnt_idmap *idmap, const struct path *path, 32 struct kstat *stat, u32 request_mask, 33 unsigned int query_flags) 34 { 35 struct inode *inode = d_inode(path->dentry); 36 generic_fillattr(&nop_mnt_idmap, inode, stat); 37 stat->blocks = inode->i_mapping->nrpages << (PAGE_SHIFT - 9); 38 return 0; 39 } 40 EXPORT_SYMBOL(simple_getattr); 41 42 int simple_statfs(struct dentry *dentry, struct kstatfs *buf) 43 { 44 buf->f_type = dentry->d_sb->s_magic; 45 buf->f_bsize = PAGE_SIZE; 46 buf->f_namelen = NAME_MAX; 47 return 0; 48 } 49 EXPORT_SYMBOL(simple_statfs); 50 51 /* 52 * Retaining negative dentries for an in-memory filesystem just wastes 53 * memory and lookup time: arrange for them to be deleted immediately. 54 */ 55 int always_delete_dentry(const struct dentry *dentry) 56 { 57 return 1; 58 } 59 EXPORT_SYMBOL(always_delete_dentry); 60 61 const struct dentry_operations simple_dentry_operations = { 62 .d_delete = always_delete_dentry, 63 }; 64 EXPORT_SYMBOL(simple_dentry_operations); 65 66 /* 67 * Lookup the data. This is trivial - if the dentry didn't already 68 * exist, we know it is negative. Set d_op to delete negative dentries. 69 */ 70 struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) 71 { 72 if (dentry->d_name.len > NAME_MAX) 73 return ERR_PTR(-ENAMETOOLONG); 74 if (!dentry->d_sb->s_d_op) 75 d_set_d_op(dentry, &simple_dentry_operations); 76 d_add(dentry, NULL); 77 return NULL; 78 } 79 EXPORT_SYMBOL(simple_lookup); 80 81 int dcache_dir_open(struct inode *inode, struct file *file) 82 { 83 file->private_data = d_alloc_cursor(file->f_path.dentry); 84 85 return file->private_data ? 0 : -ENOMEM; 86 } 87 EXPORT_SYMBOL(dcache_dir_open); 88 89 int dcache_dir_close(struct inode *inode, struct file *file) 90 { 91 dput(file->private_data); 92 return 0; 93 } 94 EXPORT_SYMBOL(dcache_dir_close); 95 96 /* parent is locked at least shared */ 97 /* 98 * Returns an element of siblings' list. 99 * We are looking for <count>th positive after <p>; if 100 * found, dentry is grabbed and returned to caller. 101 * If no such element exists, NULL is returned. 102 */ 103 static struct dentry *scan_positives(struct dentry *cursor, 104 struct list_head *p, 105 loff_t count, 106 struct dentry *last) 107 { 108 struct dentry *dentry = cursor->d_parent, *found = NULL; 109 110 spin_lock(&dentry->d_lock); 111 while ((p = p->next) != &dentry->d_subdirs) { 112 struct dentry *d = list_entry(p, struct dentry, d_child); 113 // we must at least skip cursors, to avoid livelocks 114 if (d->d_flags & DCACHE_DENTRY_CURSOR) 115 continue; 116 if (simple_positive(d) && !--count) { 117 spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED); 118 if (simple_positive(d)) 119 found = dget_dlock(d); 120 spin_unlock(&d->d_lock); 121 if (likely(found)) 122 break; 123 count = 1; 124 } 125 if (need_resched()) { 126 list_move(&cursor->d_child, p); 127 p = &cursor->d_child; 128 spin_unlock(&dentry->d_lock); 129 cond_resched(); 130 spin_lock(&dentry->d_lock); 131 } 132 } 133 spin_unlock(&dentry->d_lock); 134 dput(last); 135 return found; 136 } 137 138 loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence) 139 { 140 struct dentry *dentry = file->f_path.dentry; 141 switch (whence) { 142 case 1: 143 offset += file->f_pos; 144 fallthrough; 145 case 0: 146 if (offset >= 0) 147 break; 148 fallthrough; 149 default: 150 return -EINVAL; 151 } 152 if (offset != file->f_pos) { 153 struct dentry *cursor = file->private_data; 154 struct dentry *to = NULL; 155 156 inode_lock_shared(dentry->d_inode); 157 158 if (offset > 2) 159 to = scan_positives(cursor, &dentry->d_subdirs, 160 offset - 2, NULL); 161 spin_lock(&dentry->d_lock); 162 if (to) 163 list_move(&cursor->d_child, &to->d_child); 164 else 165 list_del_init(&cursor->d_child); 166 spin_unlock(&dentry->d_lock); 167 dput(to); 168 169 file->f_pos = offset; 170 171 inode_unlock_shared(dentry->d_inode); 172 } 173 return offset; 174 } 175 EXPORT_SYMBOL(dcache_dir_lseek); 176 177 /* Relationship between i_mode and the DT_xxx types */ 178 static inline unsigned char dt_type(struct inode *inode) 179 { 180 return (inode->i_mode >> 12) & 15; 181 } 182 183 /* 184 * Directory is locked and all positive dentries in it are safe, since 185 * for ramfs-type trees they can't go away without unlink() or rmdir(), 186 * both impossible due to the lock on directory. 187 */ 188 189 int dcache_readdir(struct file *file, struct dir_context *ctx) 190 { 191 struct dentry *dentry = file->f_path.dentry; 192 struct dentry *cursor = file->private_data; 193 struct list_head *anchor = &dentry->d_subdirs; 194 struct dentry *next = NULL; 195 struct list_head *p; 196 197 if (!dir_emit_dots(file, ctx)) 198 return 0; 199 200 if (ctx->pos == 2) 201 p = anchor; 202 else if (!list_empty(&cursor->d_child)) 203 p = &cursor->d_child; 204 else 205 return 0; 206 207 while ((next = scan_positives(cursor, p, 1, next)) != NULL) { 208 if (!dir_emit(ctx, next->d_name.name, next->d_name.len, 209 d_inode(next)->i_ino, dt_type(d_inode(next)))) 210 break; 211 ctx->pos++; 212 p = &next->d_child; 213 } 214 spin_lock(&dentry->d_lock); 215 if (next) 216 list_move_tail(&cursor->d_child, &next->d_child); 217 else 218 list_del_init(&cursor->d_child); 219 spin_unlock(&dentry->d_lock); 220 dput(next); 221 222 return 0; 223 } 224 EXPORT_SYMBOL(dcache_readdir); 225 226 ssize_t generic_read_dir(struct file *filp, char __user *buf, size_t siz, loff_t *ppos) 227 { 228 return -EISDIR; 229 } 230 EXPORT_SYMBOL(generic_read_dir); 231 232 const struct file_operations simple_dir_operations = { 233 .open = dcache_dir_open, 234 .release = dcache_dir_close, 235 .llseek = dcache_dir_lseek, 236 .read = generic_read_dir, 237 .iterate_shared = dcache_readdir, 238 .fsync = noop_fsync, 239 }; 240 EXPORT_SYMBOL(simple_dir_operations); 241 242 const struct inode_operations simple_dir_inode_operations = { 243 .lookup = simple_lookup, 244 }; 245 EXPORT_SYMBOL(simple_dir_inode_operations); 246 247 static struct dentry *find_next_child(struct dentry *parent, struct dentry *prev) 248 { 249 struct dentry *child = NULL; 250 struct list_head *p = prev ? &prev->d_child : &parent->d_subdirs; 251 252 spin_lock(&parent->d_lock); 253 while ((p = p->next) != &parent->d_subdirs) { 254 struct dentry *d = container_of(p, struct dentry, d_child); 255 if (simple_positive(d)) { 256 spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED); 257 if (simple_positive(d)) 258 child = dget_dlock(d); 259 spin_unlock(&d->d_lock); 260 if (likely(child)) 261 break; 262 } 263 } 264 spin_unlock(&parent->d_lock); 265 dput(prev); 266 return child; 267 } 268 269 void simple_recursive_removal(struct dentry *dentry, 270 void (*callback)(struct dentry *)) 271 { 272 struct dentry *this = dget(dentry); 273 while (true) { 274 struct dentry *victim = NULL, *child; 275 struct inode *inode = this->d_inode; 276 277 inode_lock(inode); 278 if (d_is_dir(this)) 279 inode->i_flags |= S_DEAD; 280 while ((child = find_next_child(this, victim)) == NULL) { 281 // kill and ascend 282 // update metadata while it's still locked 283 inode->i_ctime = current_time(inode); 284 clear_nlink(inode); 285 inode_unlock(inode); 286 victim = this; 287 this = this->d_parent; 288 inode = this->d_inode; 289 inode_lock(inode); 290 if (simple_positive(victim)) { 291 d_invalidate(victim); // avoid lost mounts 292 if (d_is_dir(victim)) 293 fsnotify_rmdir(inode, victim); 294 else 295 fsnotify_unlink(inode, victim); 296 if (callback) 297 callback(victim); 298 dput(victim); // unpin it 299 } 300 if (victim == dentry) { 301 inode->i_ctime = inode->i_mtime = 302 current_time(inode); 303 if (d_is_dir(dentry)) 304 drop_nlink(inode); 305 inode_unlock(inode); 306 dput(dentry); 307 return; 308 } 309 } 310 inode_unlock(inode); 311 this = child; 312 } 313 } 314 EXPORT_SYMBOL(simple_recursive_removal); 315 316 static const struct super_operations simple_super_operations = { 317 .statfs = simple_statfs, 318 }; 319 320 static int pseudo_fs_fill_super(struct super_block *s, struct fs_context *fc) 321 { 322 struct pseudo_fs_context *ctx = fc->fs_private; 323 struct inode *root; 324 325 s->s_maxbytes = MAX_LFS_FILESIZE; 326 s->s_blocksize = PAGE_SIZE; 327 s->s_blocksize_bits = PAGE_SHIFT; 328 s->s_magic = ctx->magic; 329 s->s_op = ctx->ops ?: &simple_super_operations; 330 s->s_xattr = ctx->xattr; 331 s->s_time_gran = 1; 332 root = new_inode(s); 333 if (!root) 334 return -ENOMEM; 335 336 /* 337 * since this is the first inode, make it number 1. New inodes created 338 * after this must take care not to collide with it (by passing 339 * max_reserved of 1 to iunique). 340 */ 341 root->i_ino = 1; 342 root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR; 343 root->i_atime = root->i_mtime = root->i_ctime = current_time(root); 344 s->s_root = d_make_root(root); 345 if (!s->s_root) 346 return -ENOMEM; 347 s->s_d_op = ctx->dops; 348 return 0; 349 } 350 351 static int pseudo_fs_get_tree(struct fs_context *fc) 352 { 353 return get_tree_nodev(fc, pseudo_fs_fill_super); 354 } 355 356 static void pseudo_fs_free(struct fs_context *fc) 357 { 358 kfree(fc->fs_private); 359 } 360 361 static const struct fs_context_operations pseudo_fs_context_ops = { 362 .free = pseudo_fs_free, 363 .get_tree = pseudo_fs_get_tree, 364 }; 365 366 /* 367 * Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that 368 * will never be mountable) 369 */ 370 struct pseudo_fs_context *init_pseudo(struct fs_context *fc, 371 unsigned long magic) 372 { 373 struct pseudo_fs_context *ctx; 374 375 ctx = kzalloc(sizeof(struct pseudo_fs_context), GFP_KERNEL); 376 if (likely(ctx)) { 377 ctx->magic = magic; 378 fc->fs_private = ctx; 379 fc->ops = &pseudo_fs_context_ops; 380 fc->sb_flags |= SB_NOUSER; 381 fc->global = true; 382 } 383 return ctx; 384 } 385 EXPORT_SYMBOL(init_pseudo); 386 387 int simple_open(struct inode *inode, struct file *file) 388 { 389 if (inode->i_private) 390 file->private_data = inode->i_private; 391 return 0; 392 } 393 EXPORT_SYMBOL(simple_open); 394 395 int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) 396 { 397 struct inode *inode = d_inode(old_dentry); 398 399 inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode); 400 inc_nlink(inode); 401 ihold(inode); 402 dget(dentry); 403 d_instantiate(dentry, inode); 404 return 0; 405 } 406 EXPORT_SYMBOL(simple_link); 407 408 int simple_empty(struct dentry *dentry) 409 { 410 struct dentry *child; 411 int ret = 0; 412 413 spin_lock(&dentry->d_lock); 414 list_for_each_entry(child, &dentry->d_subdirs, d_child) { 415 spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED); 416 if (simple_positive(child)) { 417 spin_unlock(&child->d_lock); 418 goto out; 419 } 420 spin_unlock(&child->d_lock); 421 } 422 ret = 1; 423 out: 424 spin_unlock(&dentry->d_lock); 425 return ret; 426 } 427 EXPORT_SYMBOL(simple_empty); 428 429 int simple_unlink(struct inode *dir, struct dentry *dentry) 430 { 431 struct inode *inode = d_inode(dentry); 432 433 inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode); 434 drop_nlink(inode); 435 dput(dentry); 436 return 0; 437 } 438 EXPORT_SYMBOL(simple_unlink); 439 440 int simple_rmdir(struct inode *dir, struct dentry *dentry) 441 { 442 if (!simple_empty(dentry)) 443 return -ENOTEMPTY; 444 445 drop_nlink(d_inode(dentry)); 446 simple_unlink(dir, dentry); 447 drop_nlink(dir); 448 return 0; 449 } 450 EXPORT_SYMBOL(simple_rmdir); 451 452 int simple_rename_exchange(struct inode *old_dir, struct dentry *old_dentry, 453 struct inode *new_dir, struct dentry *new_dentry) 454 { 455 bool old_is_dir = d_is_dir(old_dentry); 456 bool new_is_dir = d_is_dir(new_dentry); 457 458 if (old_dir != new_dir && old_is_dir != new_is_dir) { 459 if (old_is_dir) { 460 drop_nlink(old_dir); 461 inc_nlink(new_dir); 462 } else { 463 drop_nlink(new_dir); 464 inc_nlink(old_dir); 465 } 466 } 467 old_dir->i_ctime = old_dir->i_mtime = 468 new_dir->i_ctime = new_dir->i_mtime = 469 d_inode(old_dentry)->i_ctime = 470 d_inode(new_dentry)->i_ctime = current_time(old_dir); 471 472 return 0; 473 } 474 EXPORT_SYMBOL_GPL(simple_rename_exchange); 475 476 int simple_rename(struct mnt_idmap *idmap, struct inode *old_dir, 477 struct dentry *old_dentry, struct inode *new_dir, 478 struct dentry *new_dentry, unsigned int flags) 479 { 480 struct inode *inode = d_inode(old_dentry); 481 int they_are_dirs = d_is_dir(old_dentry); 482 483 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) 484 return -EINVAL; 485 486 if (flags & RENAME_EXCHANGE) 487 return simple_rename_exchange(old_dir, old_dentry, new_dir, new_dentry); 488 489 if (!simple_empty(new_dentry)) 490 return -ENOTEMPTY; 491 492 if (d_really_is_positive(new_dentry)) { 493 simple_unlink(new_dir, new_dentry); 494 if (they_are_dirs) { 495 drop_nlink(d_inode(new_dentry)); 496 drop_nlink(old_dir); 497 } 498 } else if (they_are_dirs) { 499 drop_nlink(old_dir); 500 inc_nlink(new_dir); 501 } 502 503 old_dir->i_ctime = old_dir->i_mtime = new_dir->i_ctime = 504 new_dir->i_mtime = inode->i_ctime = current_time(old_dir); 505 506 return 0; 507 } 508 EXPORT_SYMBOL(simple_rename); 509 510 /** 511 * simple_setattr - setattr for simple filesystem 512 * @idmap: idmap of the target mount 513 * @dentry: dentry 514 * @iattr: iattr structure 515 * 516 * Returns 0 on success, -error on failure. 517 * 518 * simple_setattr is a simple ->setattr implementation without a proper 519 * implementation of size changes. 520 * 521 * It can either be used for in-memory filesystems or special files 522 * on simple regular filesystems. Anything that needs to change on-disk 523 * or wire state on size changes needs its own setattr method. 524 */ 525 int simple_setattr(struct mnt_idmap *idmap, struct dentry *dentry, 526 struct iattr *iattr) 527 { 528 struct inode *inode = d_inode(dentry); 529 int error; 530 531 error = setattr_prepare(idmap, dentry, iattr); 532 if (error) 533 return error; 534 535 if (iattr->ia_valid & ATTR_SIZE) 536 truncate_setsize(inode, iattr->ia_size); 537 setattr_copy(idmap, inode, iattr); 538 mark_inode_dirty(inode); 539 return 0; 540 } 541 EXPORT_SYMBOL(simple_setattr); 542 543 static int simple_read_folio(struct file *file, struct folio *folio) 544 { 545 folio_zero_range(folio, 0, folio_size(folio)); 546 flush_dcache_folio(folio); 547 folio_mark_uptodate(folio); 548 folio_unlock(folio); 549 return 0; 550 } 551 552 int simple_write_begin(struct file *file, struct address_space *mapping, 553 loff_t pos, unsigned len, 554 struct page **pagep, void **fsdata) 555 { 556 struct page *page; 557 pgoff_t index; 558 559 index = pos >> PAGE_SHIFT; 560 561 page = grab_cache_page_write_begin(mapping, index); 562 if (!page) 563 return -ENOMEM; 564 565 *pagep = page; 566 567 if (!PageUptodate(page) && (len != PAGE_SIZE)) { 568 unsigned from = pos & (PAGE_SIZE - 1); 569 570 zero_user_segments(page, 0, from, from + len, PAGE_SIZE); 571 } 572 return 0; 573 } 574 EXPORT_SYMBOL(simple_write_begin); 575 576 /** 577 * simple_write_end - .write_end helper for non-block-device FSes 578 * @file: See .write_end of address_space_operations 579 * @mapping: " 580 * @pos: " 581 * @len: " 582 * @copied: " 583 * @page: " 584 * @fsdata: " 585 * 586 * simple_write_end does the minimum needed for updating a page after writing is 587 * done. It has the same API signature as the .write_end of 588 * address_space_operations vector. So it can just be set onto .write_end for 589 * FSes that don't need any other processing. i_mutex is assumed to be held. 590 * Block based filesystems should use generic_write_end(). 591 * NOTE: Even though i_size might get updated by this function, mark_inode_dirty 592 * is not called, so a filesystem that actually does store data in .write_inode 593 * should extend on what's done here with a call to mark_inode_dirty() in the 594 * case that i_size has changed. 595 * 596 * Use *ONLY* with simple_read_folio() 597 */ 598 static int simple_write_end(struct file *file, struct address_space *mapping, 599 loff_t pos, unsigned len, unsigned copied, 600 struct page *page, void *fsdata) 601 { 602 struct inode *inode = page->mapping->host; 603 loff_t last_pos = pos + copied; 604 605 /* zero the stale part of the page if we did a short copy */ 606 if (!PageUptodate(page)) { 607 if (copied < len) { 608 unsigned from = pos & (PAGE_SIZE - 1); 609 610 zero_user(page, from + copied, len - copied); 611 } 612 SetPageUptodate(page); 613 } 614 /* 615 * No need to use i_size_read() here, the i_size 616 * cannot change under us because we hold the i_mutex. 617 */ 618 if (last_pos > inode->i_size) 619 i_size_write(inode, last_pos); 620 621 set_page_dirty(page); 622 unlock_page(page); 623 put_page(page); 624 625 return copied; 626 } 627 628 /* 629 * Provides ramfs-style behavior: data in the pagecache, but no writeback. 630 */ 631 const struct address_space_operations ram_aops = { 632 .read_folio = simple_read_folio, 633 .write_begin = simple_write_begin, 634 .write_end = simple_write_end, 635 .dirty_folio = noop_dirty_folio, 636 }; 637 EXPORT_SYMBOL(ram_aops); 638 639 /* 640 * the inodes created here are not hashed. If you use iunique to generate 641 * unique inode values later for this filesystem, then you must take care 642 * to pass it an appropriate max_reserved value to avoid collisions. 643 */ 644 int simple_fill_super(struct super_block *s, unsigned long magic, 645 const struct tree_descr *files) 646 { 647 struct inode *inode; 648 struct dentry *root; 649 struct dentry *dentry; 650 int i; 651 652 s->s_blocksize = PAGE_SIZE; 653 s->s_blocksize_bits = PAGE_SHIFT; 654 s->s_magic = magic; 655 s->s_op = &simple_super_operations; 656 s->s_time_gran = 1; 657 658 inode = new_inode(s); 659 if (!inode) 660 return -ENOMEM; 661 /* 662 * because the root inode is 1, the files array must not contain an 663 * entry at index 1 664 */ 665 inode->i_ino = 1; 666 inode->i_mode = S_IFDIR | 0755; 667 inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); 668 inode->i_op = &simple_dir_inode_operations; 669 inode->i_fop = &simple_dir_operations; 670 set_nlink(inode, 2); 671 root = d_make_root(inode); 672 if (!root) 673 return -ENOMEM; 674 for (i = 0; !files->name || files->name[0]; i++, files++) { 675 if (!files->name) 676 continue; 677 678 /* warn if it tries to conflict with the root inode */ 679 if (unlikely(i == 1)) 680 printk(KERN_WARNING "%s: %s passed in a files array" 681 "with an index of 1!\n", __func__, 682 s->s_type->name); 683 684 dentry = d_alloc_name(root, files->name); 685 if (!dentry) 686 goto out; 687 inode = new_inode(s); 688 if (!inode) { 689 dput(dentry); 690 goto out; 691 } 692 inode->i_mode = S_IFREG | files->mode; 693 inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); 694 inode->i_fop = files->ops; 695 inode->i_ino = i; 696 d_add(dentry, inode); 697 } 698 s->s_root = root; 699 return 0; 700 out: 701 d_genocide(root); 702 shrink_dcache_parent(root); 703 dput(root); 704 return -ENOMEM; 705 } 706 EXPORT_SYMBOL(simple_fill_super); 707 708 static DEFINE_SPINLOCK(pin_fs_lock); 709 710 int simple_pin_fs(struct file_system_type *type, struct vfsmount **mount, int *count) 711 { 712 struct vfsmount *mnt = NULL; 713 spin_lock(&pin_fs_lock); 714 if (unlikely(!*mount)) { 715 spin_unlock(&pin_fs_lock); 716 mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, NULL); 717 if (IS_ERR(mnt)) 718 return PTR_ERR(mnt); 719 spin_lock(&pin_fs_lock); 720 if (!*mount) 721 *mount = mnt; 722 } 723 mntget(*mount); 724 ++*count; 725 spin_unlock(&pin_fs_lock); 726 mntput(mnt); 727 return 0; 728 } 729 EXPORT_SYMBOL(simple_pin_fs); 730 731 void simple_release_fs(struct vfsmount **mount, int *count) 732 { 733 struct vfsmount *mnt; 734 spin_lock(&pin_fs_lock); 735 mnt = *mount; 736 if (!--*count) 737 *mount = NULL; 738 spin_unlock(&pin_fs_lock); 739 mntput(mnt); 740 } 741 EXPORT_SYMBOL(simple_release_fs); 742 743 /** 744 * simple_read_from_buffer - copy data from the buffer to user space 745 * @to: the user space buffer to read to 746 * @count: the maximum number of bytes to read 747 * @ppos: the current position in the buffer 748 * @from: the buffer to read from 749 * @available: the size of the buffer 750 * 751 * The simple_read_from_buffer() function reads up to @count bytes from the 752 * buffer @from at offset @ppos into the user space address starting at @to. 753 * 754 * On success, the number of bytes read is returned and the offset @ppos is 755 * advanced by this number, or negative value is returned on error. 756 **/ 757 ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos, 758 const void *from, size_t available) 759 { 760 loff_t pos = *ppos; 761 size_t ret; 762 763 if (pos < 0) 764 return -EINVAL; 765 if (pos >= available || !count) 766 return 0; 767 if (count > available - pos) 768 count = available - pos; 769 ret = copy_to_user(to, from + pos, count); 770 if (ret == count) 771 return -EFAULT; 772 count -= ret; 773 *ppos = pos + count; 774 return count; 775 } 776 EXPORT_SYMBOL(simple_read_from_buffer); 777 778 /** 779 * simple_write_to_buffer - copy data from user space to the buffer 780 * @to: the buffer to write to 781 * @available: the size of the buffer 782 * @ppos: the current position in the buffer 783 * @from: the user space buffer to read from 784 * @count: the maximum number of bytes to read 785 * 786 * The simple_write_to_buffer() function reads up to @count bytes from the user 787 * space address starting at @from into the buffer @to at offset @ppos. 788 * 789 * On success, the number of bytes written is returned and the offset @ppos is 790 * advanced by this number, or negative value is returned on error. 791 **/ 792 ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos, 793 const void __user *from, size_t count) 794 { 795 loff_t pos = *ppos; 796 size_t res; 797 798 if (pos < 0) 799 return -EINVAL; 800 if (pos >= available || !count) 801 return 0; 802 if (count > available - pos) 803 count = available - pos; 804 res = copy_from_user(to + pos, from, count); 805 if (res == count) 806 return -EFAULT; 807 count -= res; 808 *ppos = pos + count; 809 return count; 810 } 811 EXPORT_SYMBOL(simple_write_to_buffer); 812 813 /** 814 * memory_read_from_buffer - copy data from the buffer 815 * @to: the kernel space buffer to read to 816 * @count: the maximum number of bytes to read 817 * @ppos: the current position in the buffer 818 * @from: the buffer to read from 819 * @available: the size of the buffer 820 * 821 * The memory_read_from_buffer() function reads up to @count bytes from the 822 * buffer @from at offset @ppos into the kernel space address starting at @to. 823 * 824 * On success, the number of bytes read is returned and the offset @ppos is 825 * advanced by this number, or negative value is returned on error. 826 **/ 827 ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos, 828 const void *from, size_t available) 829 { 830 loff_t pos = *ppos; 831 832 if (pos < 0) 833 return -EINVAL; 834 if (pos >= available) 835 return 0; 836 if (count > available - pos) 837 count = available - pos; 838 memcpy(to, from + pos, count); 839 *ppos = pos + count; 840 841 return count; 842 } 843 EXPORT_SYMBOL(memory_read_from_buffer); 844 845 /* 846 * Transaction based IO. 847 * The file expects a single write which triggers the transaction, and then 848 * possibly a read which collects the result - which is stored in a 849 * file-local buffer. 850 */ 851 852 void simple_transaction_set(struct file *file, size_t n) 853 { 854 struct simple_transaction_argresp *ar = file->private_data; 855 856 BUG_ON(n > SIMPLE_TRANSACTION_LIMIT); 857 858 /* 859 * The barrier ensures that ar->size will really remain zero until 860 * ar->data is ready for reading. 861 */ 862 smp_mb(); 863 ar->size = n; 864 } 865 EXPORT_SYMBOL(simple_transaction_set); 866 867 char *simple_transaction_get(struct file *file, const char __user *buf, size_t size) 868 { 869 struct simple_transaction_argresp *ar; 870 static DEFINE_SPINLOCK(simple_transaction_lock); 871 872 if (size > SIMPLE_TRANSACTION_LIMIT - 1) 873 return ERR_PTR(-EFBIG); 874 875 ar = (struct simple_transaction_argresp *)get_zeroed_page(GFP_KERNEL); 876 if (!ar) 877 return ERR_PTR(-ENOMEM); 878 879 spin_lock(&simple_transaction_lock); 880 881 /* only one write allowed per open */ 882 if (file->private_data) { 883 spin_unlock(&simple_transaction_lock); 884 free_page((unsigned long)ar); 885 return ERR_PTR(-EBUSY); 886 } 887 888 file->private_data = ar; 889 890 spin_unlock(&simple_transaction_lock); 891 892 if (copy_from_user(ar->data, buf, size)) 893 return ERR_PTR(-EFAULT); 894 895 return ar->data; 896 } 897 EXPORT_SYMBOL(simple_transaction_get); 898 899 ssize_t simple_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos) 900 { 901 struct simple_transaction_argresp *ar = file->private_data; 902 903 if (!ar) 904 return 0; 905 return simple_read_from_buffer(buf, size, pos, ar->data, ar->size); 906 } 907 EXPORT_SYMBOL(simple_transaction_read); 908 909 int simple_transaction_release(struct inode *inode, struct file *file) 910 { 911 free_page((unsigned long)file->private_data); 912 return 0; 913 } 914 EXPORT_SYMBOL(simple_transaction_release); 915 916 /* Simple attribute files */ 917 918 struct simple_attr { 919 int (*get)(void *, u64 *); 920 int (*set)(void *, u64); 921 char get_buf[24]; /* enough to store a u64 and "\n\0" */ 922 char set_buf[24]; 923 void *data; 924 const char *fmt; /* format for read operation */ 925 struct mutex mutex; /* protects access to these buffers */ 926 }; 927 928 /* simple_attr_open is called by an actual attribute open file operation 929 * to set the attribute specific access operations. */ 930 int simple_attr_open(struct inode *inode, struct file *file, 931 int (*get)(void *, u64 *), int (*set)(void *, u64), 932 const char *fmt) 933 { 934 struct simple_attr *attr; 935 936 attr = kzalloc(sizeof(*attr), GFP_KERNEL); 937 if (!attr) 938 return -ENOMEM; 939 940 attr->get = get; 941 attr->set = set; 942 attr->data = inode->i_private; 943 attr->fmt = fmt; 944 mutex_init(&attr->mutex); 945 946 file->private_data = attr; 947 948 return nonseekable_open(inode, file); 949 } 950 EXPORT_SYMBOL_GPL(simple_attr_open); 951 952 int simple_attr_release(struct inode *inode, struct file *file) 953 { 954 kfree(file->private_data); 955 return 0; 956 } 957 EXPORT_SYMBOL_GPL(simple_attr_release); /* GPL-only? This? Really? */ 958 959 /* read from the buffer that is filled with the get function */ 960 ssize_t simple_attr_read(struct file *file, char __user *buf, 961 size_t len, loff_t *ppos) 962 { 963 struct simple_attr *attr; 964 size_t size; 965 ssize_t ret; 966 967 attr = file->private_data; 968 969 if (!attr->get) 970 return -EACCES; 971 972 ret = mutex_lock_interruptible(&attr->mutex); 973 if (ret) 974 return ret; 975 976 if (*ppos && attr->get_buf[0]) { 977 /* continued read */ 978 size = strlen(attr->get_buf); 979 } else { 980 /* first read */ 981 u64 val; 982 ret = attr->get(attr->data, &val); 983 if (ret) 984 goto out; 985 986 size = scnprintf(attr->get_buf, sizeof(attr->get_buf), 987 attr->fmt, (unsigned long long)val); 988 } 989 990 ret = simple_read_from_buffer(buf, len, ppos, attr->get_buf, size); 991 out: 992 mutex_unlock(&attr->mutex); 993 return ret; 994 } 995 EXPORT_SYMBOL_GPL(simple_attr_read); 996 997 /* interpret the buffer as a number to call the set function with */ 998 static ssize_t simple_attr_write_xsigned(struct file *file, const char __user *buf, 999 size_t len, loff_t *ppos, bool is_signed) 1000 { 1001 struct simple_attr *attr; 1002 unsigned long long val; 1003 size_t size; 1004 ssize_t ret; 1005 1006 attr = file->private_data; 1007 if (!attr->set) 1008 return -EACCES; 1009 1010 ret = mutex_lock_interruptible(&attr->mutex); 1011 if (ret) 1012 return ret; 1013 1014 ret = -EFAULT; 1015 size = min(sizeof(attr->set_buf) - 1, len); 1016 if (copy_from_user(attr->set_buf, buf, size)) 1017 goto out; 1018 1019 attr->set_buf[size] = '\0'; 1020 if (is_signed) 1021 ret = kstrtoll(attr->set_buf, 0, &val); 1022 else 1023 ret = kstrtoull(attr->set_buf, 0, &val); 1024 if (ret) 1025 goto out; 1026 ret = attr->set(attr->data, val); 1027 if (ret == 0) 1028 ret = len; /* on success, claim we got the whole input */ 1029 out: 1030 mutex_unlock(&attr->mutex); 1031 return ret; 1032 } 1033 1034 ssize_t simple_attr_write(struct file *file, const char __user *buf, 1035 size_t len, loff_t *ppos) 1036 { 1037 return simple_attr_write_xsigned(file, buf, len, ppos, false); 1038 } 1039 EXPORT_SYMBOL_GPL(simple_attr_write); 1040 1041 ssize_t simple_attr_write_signed(struct file *file, const char __user *buf, 1042 size_t len, loff_t *ppos) 1043 { 1044 return simple_attr_write_xsigned(file, buf, len, ppos, true); 1045 } 1046 EXPORT_SYMBOL_GPL(simple_attr_write_signed); 1047 1048 /** 1049 * generic_fh_to_dentry - generic helper for the fh_to_dentry export operation 1050 * @sb: filesystem to do the file handle conversion on 1051 * @fid: file handle to convert 1052 * @fh_len: length of the file handle in bytes 1053 * @fh_type: type of file handle 1054 * @get_inode: filesystem callback to retrieve inode 1055 * 1056 * This function decodes @fid as long as it has one of the well-known 1057 * Linux filehandle types and calls @get_inode on it to retrieve the 1058 * inode for the object specified in the file handle. 1059 */ 1060 struct dentry *generic_fh_to_dentry(struct super_block *sb, struct fid *fid, 1061 int fh_len, int fh_type, struct inode *(*get_inode) 1062 (struct super_block *sb, u64 ino, u32 gen)) 1063 { 1064 struct inode *inode = NULL; 1065 1066 if (fh_len < 2) 1067 return NULL; 1068 1069 switch (fh_type) { 1070 case FILEID_INO32_GEN: 1071 case FILEID_INO32_GEN_PARENT: 1072 inode = get_inode(sb, fid->i32.ino, fid->i32.gen); 1073 break; 1074 } 1075 1076 return d_obtain_alias(inode); 1077 } 1078 EXPORT_SYMBOL_GPL(generic_fh_to_dentry); 1079 1080 /** 1081 * generic_fh_to_parent - generic helper for the fh_to_parent export operation 1082 * @sb: filesystem to do the file handle conversion on 1083 * @fid: file handle to convert 1084 * @fh_len: length of the file handle in bytes 1085 * @fh_type: type of file handle 1086 * @get_inode: filesystem callback to retrieve inode 1087 * 1088 * This function decodes @fid as long as it has one of the well-known 1089 * Linux filehandle types and calls @get_inode on it to retrieve the 1090 * inode for the _parent_ object specified in the file handle if it 1091 * is specified in the file handle, or NULL otherwise. 1092 */ 1093 struct dentry *generic_fh_to_parent(struct super_block *sb, struct fid *fid, 1094 int fh_len, int fh_type, struct inode *(*get_inode) 1095 (struct super_block *sb, u64 ino, u32 gen)) 1096 { 1097 struct inode *inode = NULL; 1098 1099 if (fh_len <= 2) 1100 return NULL; 1101 1102 switch (fh_type) { 1103 case FILEID_INO32_GEN_PARENT: 1104 inode = get_inode(sb, fid->i32.parent_ino, 1105 (fh_len > 3 ? fid->i32.parent_gen : 0)); 1106 break; 1107 } 1108 1109 return d_obtain_alias(inode); 1110 } 1111 EXPORT_SYMBOL_GPL(generic_fh_to_parent); 1112 1113 /** 1114 * __generic_file_fsync - generic fsync implementation for simple filesystems 1115 * 1116 * @file: file to synchronize 1117 * @start: start offset in bytes 1118 * @end: end offset in bytes (inclusive) 1119 * @datasync: only synchronize essential metadata if true 1120 * 1121 * This is a generic implementation of the fsync method for simple 1122 * filesystems which track all non-inode metadata in the buffers list 1123 * hanging off the address_space structure. 1124 */ 1125 int __generic_file_fsync(struct file *file, loff_t start, loff_t end, 1126 int datasync) 1127 { 1128 struct inode *inode = file->f_mapping->host; 1129 int err; 1130 int ret; 1131 1132 err = file_write_and_wait_range(file, start, end); 1133 if (err) 1134 return err; 1135 1136 inode_lock(inode); 1137 ret = sync_mapping_buffers(inode->i_mapping); 1138 if (!(inode->i_state & I_DIRTY_ALL)) 1139 goto out; 1140 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) 1141 goto out; 1142 1143 err = sync_inode_metadata(inode, 1); 1144 if (ret == 0) 1145 ret = err; 1146 1147 out: 1148 inode_unlock(inode); 1149 /* check and advance again to catch errors after syncing out buffers */ 1150 err = file_check_and_advance_wb_err(file); 1151 if (ret == 0) 1152 ret = err; 1153 return ret; 1154 } 1155 EXPORT_SYMBOL(__generic_file_fsync); 1156 1157 /** 1158 * generic_file_fsync - generic fsync implementation for simple filesystems 1159 * with flush 1160 * @file: file to synchronize 1161 * @start: start offset in bytes 1162 * @end: end offset in bytes (inclusive) 1163 * @datasync: only synchronize essential metadata if true 1164 * 1165 */ 1166 1167 int generic_file_fsync(struct file *file, loff_t start, loff_t end, 1168 int datasync) 1169 { 1170 struct inode *inode = file->f_mapping->host; 1171 int err; 1172 1173 err = __generic_file_fsync(file, start, end, datasync); 1174 if (err) 1175 return err; 1176 return blkdev_issue_flush(inode->i_sb->s_bdev); 1177 } 1178 EXPORT_SYMBOL(generic_file_fsync); 1179 1180 /** 1181 * generic_check_addressable - Check addressability of file system 1182 * @blocksize_bits: log of file system block size 1183 * @num_blocks: number of blocks in file system 1184 * 1185 * Determine whether a file system with @num_blocks blocks (and a 1186 * block size of 2**@blocksize_bits) is addressable by the sector_t 1187 * and page cache of the system. Return 0 if so and -EFBIG otherwise. 1188 */ 1189 int generic_check_addressable(unsigned blocksize_bits, u64 num_blocks) 1190 { 1191 u64 last_fs_block = num_blocks - 1; 1192 u64 last_fs_page = 1193 last_fs_block >> (PAGE_SHIFT - blocksize_bits); 1194 1195 if (unlikely(num_blocks == 0)) 1196 return 0; 1197 1198 if ((blocksize_bits < 9) || (blocksize_bits > PAGE_SHIFT)) 1199 return -EINVAL; 1200 1201 if ((last_fs_block > (sector_t)(~0ULL) >> (blocksize_bits - 9)) || 1202 (last_fs_page > (pgoff_t)(~0ULL))) { 1203 return -EFBIG; 1204 } 1205 return 0; 1206 } 1207 EXPORT_SYMBOL(generic_check_addressable); 1208 1209 /* 1210 * No-op implementation of ->fsync for in-memory filesystems. 1211 */ 1212 int noop_fsync(struct file *file, loff_t start, loff_t end, int datasync) 1213 { 1214 return 0; 1215 } 1216 EXPORT_SYMBOL(noop_fsync); 1217 1218 ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter) 1219 { 1220 /* 1221 * iomap based filesystems support direct I/O without need for 1222 * this callback. However, it still needs to be set in 1223 * inode->a_ops so that open/fcntl know that direct I/O is 1224 * generally supported. 1225 */ 1226 return -EINVAL; 1227 } 1228 EXPORT_SYMBOL_GPL(noop_direct_IO); 1229 1230 /* Because kfree isn't assignment-compatible with void(void*) ;-/ */ 1231 void kfree_link(void *p) 1232 { 1233 kfree(p); 1234 } 1235 EXPORT_SYMBOL(kfree_link); 1236 1237 struct inode *alloc_anon_inode(struct super_block *s) 1238 { 1239 static const struct address_space_operations anon_aops = { 1240 .dirty_folio = noop_dirty_folio, 1241 }; 1242 struct inode *inode = new_inode_pseudo(s); 1243 1244 if (!inode) 1245 return ERR_PTR(-ENOMEM); 1246 1247 inode->i_ino = get_next_ino(); 1248 inode->i_mapping->a_ops = &anon_aops; 1249 1250 /* 1251 * Mark the inode dirty from the very beginning, 1252 * that way it will never be moved to the dirty 1253 * list because mark_inode_dirty() will think 1254 * that it already _is_ on the dirty list. 1255 */ 1256 inode->i_state = I_DIRTY; 1257 inode->i_mode = S_IRUSR | S_IWUSR; 1258 inode->i_uid = current_fsuid(); 1259 inode->i_gid = current_fsgid(); 1260 inode->i_flags |= S_PRIVATE; 1261 inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); 1262 return inode; 1263 } 1264 EXPORT_SYMBOL(alloc_anon_inode); 1265 1266 /** 1267 * simple_nosetlease - generic helper for prohibiting leases 1268 * @filp: file pointer 1269 * @arg: type of lease to obtain 1270 * @flp: new lease supplied for insertion 1271 * @priv: private data for lm_setup operation 1272 * 1273 * Generic helper for filesystems that do not wish to allow leases to be set. 1274 * All arguments are ignored and it just returns -EINVAL. 1275 */ 1276 int 1277 simple_nosetlease(struct file *filp, long arg, struct file_lock **flp, 1278 void **priv) 1279 { 1280 return -EINVAL; 1281 } 1282 EXPORT_SYMBOL(simple_nosetlease); 1283 1284 /** 1285 * simple_get_link - generic helper to get the target of "fast" symlinks 1286 * @dentry: not used here 1287 * @inode: the symlink inode 1288 * @done: not used here 1289 * 1290 * Generic helper for filesystems to use for symlink inodes where a pointer to 1291 * the symlink target is stored in ->i_link. NOTE: this isn't normally called, 1292 * since as an optimization the path lookup code uses any non-NULL ->i_link 1293 * directly, without calling ->get_link(). But ->get_link() still must be set, 1294 * to mark the inode_operations as being for a symlink. 1295 * 1296 * Return: the symlink target 1297 */ 1298 const char *simple_get_link(struct dentry *dentry, struct inode *inode, 1299 struct delayed_call *done) 1300 { 1301 return inode->i_link; 1302 } 1303 EXPORT_SYMBOL(simple_get_link); 1304 1305 const struct inode_operations simple_symlink_inode_operations = { 1306 .get_link = simple_get_link, 1307 }; 1308 EXPORT_SYMBOL(simple_symlink_inode_operations); 1309 1310 /* 1311 * Operations for a permanently empty directory. 1312 */ 1313 static struct dentry *empty_dir_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) 1314 { 1315 return ERR_PTR(-ENOENT); 1316 } 1317 1318 static int empty_dir_getattr(struct mnt_idmap *idmap, 1319 const struct path *path, struct kstat *stat, 1320 u32 request_mask, unsigned int query_flags) 1321 { 1322 struct inode *inode = d_inode(path->dentry); 1323 generic_fillattr(&nop_mnt_idmap, inode, stat); 1324 return 0; 1325 } 1326 1327 static int empty_dir_setattr(struct mnt_idmap *idmap, 1328 struct dentry *dentry, struct iattr *attr) 1329 { 1330 return -EPERM; 1331 } 1332 1333 static ssize_t empty_dir_listxattr(struct dentry *dentry, char *list, size_t size) 1334 { 1335 return -EOPNOTSUPP; 1336 } 1337 1338 static const struct inode_operations empty_dir_inode_operations = { 1339 .lookup = empty_dir_lookup, 1340 .permission = generic_permission, 1341 .setattr = empty_dir_setattr, 1342 .getattr = empty_dir_getattr, 1343 .listxattr = empty_dir_listxattr, 1344 }; 1345 1346 static loff_t empty_dir_llseek(struct file *file, loff_t offset, int whence) 1347 { 1348 /* An empty directory has two entries . and .. at offsets 0 and 1 */ 1349 return generic_file_llseek_size(file, offset, whence, 2, 2); 1350 } 1351 1352 static int empty_dir_readdir(struct file *file, struct dir_context *ctx) 1353 { 1354 dir_emit_dots(file, ctx); 1355 return 0; 1356 } 1357 1358 static const struct file_operations empty_dir_operations = { 1359 .llseek = empty_dir_llseek, 1360 .read = generic_read_dir, 1361 .iterate_shared = empty_dir_readdir, 1362 .fsync = noop_fsync, 1363 }; 1364 1365 1366 void make_empty_dir_inode(struct inode *inode) 1367 { 1368 set_nlink(inode, 2); 1369 inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; 1370 inode->i_uid = GLOBAL_ROOT_UID; 1371 inode->i_gid = GLOBAL_ROOT_GID; 1372 inode->i_rdev = 0; 1373 inode->i_size = 0; 1374 inode->i_blkbits = PAGE_SHIFT; 1375 inode->i_blocks = 0; 1376 1377 inode->i_op = &empty_dir_inode_operations; 1378 inode->i_opflags &= ~IOP_XATTR; 1379 inode->i_fop = &empty_dir_operations; 1380 } 1381 1382 bool is_empty_dir_inode(struct inode *inode) 1383 { 1384 return (inode->i_fop == &empty_dir_operations) && 1385 (inode->i_op == &empty_dir_inode_operations); 1386 } 1387 1388 #if IS_ENABLED(CONFIG_UNICODE) 1389 /* 1390 * Determine if the name of a dentry should be casefolded. 1391 * 1392 * Return: if names will need casefolding 1393 */ 1394 static bool needs_casefold(const struct inode *dir) 1395 { 1396 return IS_CASEFOLDED(dir) && dir->i_sb->s_encoding; 1397 } 1398 1399 /** 1400 * generic_ci_d_compare - generic d_compare implementation for casefolding filesystems 1401 * @dentry: dentry whose name we are checking against 1402 * @len: len of name of dentry 1403 * @str: str pointer to name of dentry 1404 * @name: Name to compare against 1405 * 1406 * Return: 0 if names match, 1 if mismatch, or -ERRNO 1407 */ 1408 static int generic_ci_d_compare(const struct dentry *dentry, unsigned int len, 1409 const char *str, const struct qstr *name) 1410 { 1411 const struct dentry *parent = READ_ONCE(dentry->d_parent); 1412 const struct inode *dir = READ_ONCE(parent->d_inode); 1413 const struct super_block *sb = dentry->d_sb; 1414 const struct unicode_map *um = sb->s_encoding; 1415 struct qstr qstr = QSTR_INIT(str, len); 1416 char strbuf[DNAME_INLINE_LEN]; 1417 int ret; 1418 1419 if (!dir || !needs_casefold(dir)) 1420 goto fallback; 1421 /* 1422 * If the dentry name is stored in-line, then it may be concurrently 1423 * modified by a rename. If this happens, the VFS will eventually retry 1424 * the lookup, so it doesn't matter what ->d_compare() returns. 1425 * However, it's unsafe to call utf8_strncasecmp() with an unstable 1426 * string. Therefore, we have to copy the name into a temporary buffer. 1427 */ 1428 if (len <= DNAME_INLINE_LEN - 1) { 1429 memcpy(strbuf, str, len); 1430 strbuf[len] = 0; 1431 qstr.name = strbuf; 1432 /* prevent compiler from optimizing out the temporary buffer */ 1433 barrier(); 1434 } 1435 ret = utf8_strncasecmp(um, name, &qstr); 1436 if (ret >= 0) 1437 return ret; 1438 1439 if (sb_has_strict_encoding(sb)) 1440 return -EINVAL; 1441 fallback: 1442 if (len != name->len) 1443 return 1; 1444 return !!memcmp(str, name->name, len); 1445 } 1446 1447 /** 1448 * generic_ci_d_hash - generic d_hash implementation for casefolding filesystems 1449 * @dentry: dentry of the parent directory 1450 * @str: qstr of name whose hash we should fill in 1451 * 1452 * Return: 0 if hash was successful or unchanged, and -EINVAL on error 1453 */ 1454 static int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str) 1455 { 1456 const struct inode *dir = READ_ONCE(dentry->d_inode); 1457 struct super_block *sb = dentry->d_sb; 1458 const struct unicode_map *um = sb->s_encoding; 1459 int ret = 0; 1460 1461 if (!dir || !needs_casefold(dir)) 1462 return 0; 1463 1464 ret = utf8_casefold_hash(um, dentry, str); 1465 if (ret < 0 && sb_has_strict_encoding(sb)) 1466 return -EINVAL; 1467 return 0; 1468 } 1469 1470 static const struct dentry_operations generic_ci_dentry_ops = { 1471 .d_hash = generic_ci_d_hash, 1472 .d_compare = generic_ci_d_compare, 1473 }; 1474 #endif 1475 1476 #ifdef CONFIG_FS_ENCRYPTION 1477 static const struct dentry_operations generic_encrypted_dentry_ops = { 1478 .d_revalidate = fscrypt_d_revalidate, 1479 }; 1480 #endif 1481 1482 #if defined(CONFIG_FS_ENCRYPTION) && IS_ENABLED(CONFIG_UNICODE) 1483 static const struct dentry_operations generic_encrypted_ci_dentry_ops = { 1484 .d_hash = generic_ci_d_hash, 1485 .d_compare = generic_ci_d_compare, 1486 .d_revalidate = fscrypt_d_revalidate, 1487 }; 1488 #endif 1489 1490 /** 1491 * generic_set_encrypted_ci_d_ops - helper for setting d_ops for given dentry 1492 * @dentry: dentry to set ops on 1493 * 1494 * Casefolded directories need d_hash and d_compare set, so that the dentries 1495 * contained in them are handled case-insensitively. Note that these operations 1496 * are needed on the parent directory rather than on the dentries in it, and 1497 * while the casefolding flag can be toggled on and off on an empty directory, 1498 * dentry_operations can't be changed later. As a result, if the filesystem has 1499 * casefolding support enabled at all, we have to give all dentries the 1500 * casefolding operations even if their inode doesn't have the casefolding flag 1501 * currently (and thus the casefolding ops would be no-ops for now). 1502 * 1503 * Encryption works differently in that the only dentry operation it needs is 1504 * d_revalidate, which it only needs on dentries that have the no-key name flag. 1505 * The no-key flag can't be set "later", so we don't have to worry about that. 1506 * 1507 * Finally, to maximize compatibility with overlayfs (which isn't compatible 1508 * with certain dentry operations) and to avoid taking an unnecessary 1509 * performance hit, we use custom dentry_operations for each possible 1510 * combination rather than always installing all operations. 1511 */ 1512 void generic_set_encrypted_ci_d_ops(struct dentry *dentry) 1513 { 1514 #ifdef CONFIG_FS_ENCRYPTION 1515 bool needs_encrypt_ops = dentry->d_flags & DCACHE_NOKEY_NAME; 1516 #endif 1517 #if IS_ENABLED(CONFIG_UNICODE) 1518 bool needs_ci_ops = dentry->d_sb->s_encoding; 1519 #endif 1520 #if defined(CONFIG_FS_ENCRYPTION) && IS_ENABLED(CONFIG_UNICODE) 1521 if (needs_encrypt_ops && needs_ci_ops) { 1522 d_set_d_op(dentry, &generic_encrypted_ci_dentry_ops); 1523 return; 1524 } 1525 #endif 1526 #ifdef CONFIG_FS_ENCRYPTION 1527 if (needs_encrypt_ops) { 1528 d_set_d_op(dentry, &generic_encrypted_dentry_ops); 1529 return; 1530 } 1531 #endif 1532 #if IS_ENABLED(CONFIG_UNICODE) 1533 if (needs_ci_ops) { 1534 d_set_d_op(dentry, &generic_ci_dentry_ops); 1535 return; 1536 } 1537 #endif 1538 } 1539 EXPORT_SYMBOL(generic_set_encrypted_ci_d_ops); 1540 1541 /** 1542 * inode_maybe_inc_iversion - increments i_version 1543 * @inode: inode with the i_version that should be updated 1544 * @force: increment the counter even if it's not necessary? 1545 * 1546 * Every time the inode is modified, the i_version field must be seen to have 1547 * changed by any observer. 1548 * 1549 * If "force" is set or the QUERIED flag is set, then ensure that we increment 1550 * the value, and clear the queried flag. 1551 * 1552 * In the common case where neither is set, then we can return "false" without 1553 * updating i_version. 1554 * 1555 * If this function returns false, and no other metadata has changed, then we 1556 * can avoid logging the metadata. 1557 */ 1558 bool inode_maybe_inc_iversion(struct inode *inode, bool force) 1559 { 1560 u64 cur, new; 1561 1562 /* 1563 * The i_version field is not strictly ordered with any other inode 1564 * information, but the legacy inode_inc_iversion code used a spinlock 1565 * to serialize increments. 1566 * 1567 * Here, we add full memory barriers to ensure that any de-facto 1568 * ordering with other info is preserved. 1569 * 1570 * This barrier pairs with the barrier in inode_query_iversion() 1571 */ 1572 smp_mb(); 1573 cur = inode_peek_iversion_raw(inode); 1574 do { 1575 /* If flag is clear then we needn't do anything */ 1576 if (!force && !(cur & I_VERSION_QUERIED)) 1577 return false; 1578 1579 /* Since lowest bit is flag, add 2 to avoid it */ 1580 new = (cur & ~I_VERSION_QUERIED) + I_VERSION_INCREMENT; 1581 } while (!atomic64_try_cmpxchg(&inode->i_version, &cur, new)); 1582 return true; 1583 } 1584 EXPORT_SYMBOL(inode_maybe_inc_iversion); 1585 1586 /** 1587 * inode_query_iversion - read i_version for later use 1588 * @inode: inode from which i_version should be read 1589 * 1590 * Read the inode i_version counter. This should be used by callers that wish 1591 * to store the returned i_version for later comparison. This will guarantee 1592 * that a later query of the i_version will result in a different value if 1593 * anything has changed. 1594 * 1595 * In this implementation, we fetch the current value, set the QUERIED flag and 1596 * then try to swap it into place with a cmpxchg, if it wasn't already set. If 1597 * that fails, we try again with the newly fetched value from the cmpxchg. 1598 */ 1599 u64 inode_query_iversion(struct inode *inode) 1600 { 1601 u64 cur, new; 1602 1603 cur = inode_peek_iversion_raw(inode); 1604 do { 1605 /* If flag is already set, then no need to swap */ 1606 if (cur & I_VERSION_QUERIED) { 1607 /* 1608 * This barrier (and the implicit barrier in the 1609 * cmpxchg below) pairs with the barrier in 1610 * inode_maybe_inc_iversion(). 1611 */ 1612 smp_mb(); 1613 break; 1614 } 1615 1616 new = cur | I_VERSION_QUERIED; 1617 } while (!atomic64_try_cmpxchg(&inode->i_version, &cur, new)); 1618 return cur >> I_VERSION_QUERIED_SHIFT; 1619 } 1620 EXPORT_SYMBOL(inode_query_iversion); 1621