1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * fs/kernfs/dir.c - kernfs directory implementation 4 * 5 * Copyright (c) 2001-3 Patrick Mochel 6 * Copyright (c) 2007 SUSE Linux Products GmbH 7 * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org> 8 */ 9 10 #include <linux/sched.h> 11 #include <linux/fs.h> 12 #include <linux/namei.h> 13 #include <linux/idr.h> 14 #include <linux/slab.h> 15 #include <linux/security.h> 16 #include <linux/hash.h> 17 18 #include "kernfs-internal.h" 19 20 static DEFINE_SPINLOCK(kernfs_rename_lock); /* kn->parent and ->name */ 21 /* 22 * Don't use rename_lock to piggy back on pr_cont_buf. We don't want to 23 * call pr_cont() while holding rename_lock. Because sometimes pr_cont() 24 * will perform wakeups when releasing console_sem. Holding rename_lock 25 * will introduce deadlock if the scheduler reads the kernfs_name in the 26 * wakeup path. 27 */ 28 static DEFINE_SPINLOCK(kernfs_pr_cont_lock); 29 static char kernfs_pr_cont_buf[PATH_MAX]; /* protected by pr_cont_lock */ 30 static DEFINE_SPINLOCK(kernfs_idr_lock); /* root->ino_idr */ 31 32 #define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb) 33 34 static bool kernfs_active(struct kernfs_node *kn) 35 { 36 lockdep_assert_held(&kernfs_root(kn)->kernfs_rwsem); 37 return atomic_read(&kn->active) >= 0; 38 } 39 40 static bool kernfs_lockdep(struct kernfs_node *kn) 41 { 42 #ifdef CONFIG_DEBUG_LOCK_ALLOC 43 return kn->flags & KERNFS_LOCKDEP; 44 #else 45 return false; 46 #endif 47 } 48 49 static int kernfs_name_locked(struct kernfs_node *kn, char *buf, size_t buflen) 50 { 51 if (!kn) 52 return strlcpy(buf, "(null)", buflen); 53 54 return strlcpy(buf, kn->parent ? kn->name : "/", buflen); 55 } 56 57 /* kernfs_node_depth - compute depth from @from to @to */ 58 static size_t kernfs_depth(struct kernfs_node *from, struct kernfs_node *to) 59 { 60 size_t depth = 0; 61 62 while (to->parent && to != from) { 63 depth++; 64 to = to->parent; 65 } 66 return depth; 67 } 68 69 static struct kernfs_node *kernfs_common_ancestor(struct kernfs_node *a, 70 struct kernfs_node *b) 71 { 72 size_t da, db; 73 struct kernfs_root *ra = kernfs_root(a), *rb = kernfs_root(b); 74 75 if (ra != rb) 76 return NULL; 77 78 da = kernfs_depth(ra->kn, a); 79 db = kernfs_depth(rb->kn, b); 80 81 while (da > db) { 82 a = a->parent; 83 da--; 84 } 85 while (db > da) { 86 b = b->parent; 87 db--; 88 } 89 90 /* worst case b and a will be the same at root */ 91 while (b != a) { 92 b = b->parent; 93 a = a->parent; 94 } 95 96 return a; 97 } 98 99 /** 100 * kernfs_path_from_node_locked - find a pseudo-absolute path to @kn_to, 101 * where kn_from is treated as root of the path. 102 * @kn_from: kernfs node which should be treated as root for the path 103 * @kn_to: kernfs node to which path is needed 104 * @buf: buffer to copy the path into 105 * @buflen: size of @buf 106 * 107 * We need to handle couple of scenarios here: 108 * [1] when @kn_from is an ancestor of @kn_to at some level 109 * kn_from: /n1/n2/n3 110 * kn_to: /n1/n2/n3/n4/n5 111 * result: /n4/n5 112 * 113 * [2] when @kn_from is on a different hierarchy and we need to find common 114 * ancestor between @kn_from and @kn_to. 115 * kn_from: /n1/n2/n3/n4 116 * kn_to: /n1/n2/n5 117 * result: /../../n5 118 * OR 119 * kn_from: /n1/n2/n3/n4/n5 [depth=5] 120 * kn_to: /n1/n2/n3 [depth=3] 121 * result: /../.. 122 * 123 * [3] when @kn_to is NULL result will be "(null)" 124 * 125 * Returns the length of the full path. If the full length is equal to or 126 * greater than @buflen, @buf contains the truncated path with the trailing 127 * '\0'. On error, -errno is returned. 128 */ 129 static int kernfs_path_from_node_locked(struct kernfs_node *kn_to, 130 struct kernfs_node *kn_from, 131 char *buf, size_t buflen) 132 { 133 struct kernfs_node *kn, *common; 134 const char parent_str[] = "/.."; 135 size_t depth_from, depth_to, len = 0; 136 int i, j; 137 138 if (!kn_to) 139 return strlcpy(buf, "(null)", buflen); 140 141 if (!kn_from) 142 kn_from = kernfs_root(kn_to)->kn; 143 144 if (kn_from == kn_to) 145 return strlcpy(buf, "/", buflen); 146 147 if (!buf) 148 return -EINVAL; 149 150 common = kernfs_common_ancestor(kn_from, kn_to); 151 if (WARN_ON(!common)) 152 return -EINVAL; 153 154 depth_to = kernfs_depth(common, kn_to); 155 depth_from = kernfs_depth(common, kn_from); 156 157 buf[0] = '\0'; 158 159 for (i = 0; i < depth_from; i++) 160 len += strlcpy(buf + len, parent_str, 161 len < buflen ? buflen - len : 0); 162 163 /* Calculate how many bytes we need for the rest */ 164 for (i = depth_to - 1; i >= 0; i--) { 165 for (kn = kn_to, j = 0; j < i; j++) 166 kn = kn->parent; 167 len += strlcpy(buf + len, "/", 168 len < buflen ? buflen - len : 0); 169 len += strlcpy(buf + len, kn->name, 170 len < buflen ? buflen - len : 0); 171 } 172 173 return len; 174 } 175 176 /** 177 * kernfs_name - obtain the name of a given node 178 * @kn: kernfs_node of interest 179 * @buf: buffer to copy @kn's name into 180 * @buflen: size of @buf 181 * 182 * Copies the name of @kn into @buf of @buflen bytes. The behavior is 183 * similar to strlcpy(). It returns the length of @kn's name and if @buf 184 * isn't long enough, it's filled upto @buflen-1 and nul terminated. 185 * 186 * Fills buffer with "(null)" if @kn is NULL. 187 * 188 * This function can be called from any context. 189 */ 190 int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen) 191 { 192 unsigned long flags; 193 int ret; 194 195 spin_lock_irqsave(&kernfs_rename_lock, flags); 196 ret = kernfs_name_locked(kn, buf, buflen); 197 spin_unlock_irqrestore(&kernfs_rename_lock, flags); 198 return ret; 199 } 200 201 /** 202 * kernfs_path_from_node - build path of node @to relative to @from. 203 * @from: parent kernfs_node relative to which we need to build the path 204 * @to: kernfs_node of interest 205 * @buf: buffer to copy @to's path into 206 * @buflen: size of @buf 207 * 208 * Builds @to's path relative to @from in @buf. @from and @to must 209 * be on the same kernfs-root. If @from is not parent of @to, then a relative 210 * path (which includes '..'s) as needed to reach from @from to @to is 211 * returned. 212 * 213 * Returns the length of the full path. If the full length is equal to or 214 * greater than @buflen, @buf contains the truncated path with the trailing 215 * '\0'. On error, -errno is returned. 216 */ 217 int kernfs_path_from_node(struct kernfs_node *to, struct kernfs_node *from, 218 char *buf, size_t buflen) 219 { 220 unsigned long flags; 221 int ret; 222 223 spin_lock_irqsave(&kernfs_rename_lock, flags); 224 ret = kernfs_path_from_node_locked(to, from, buf, buflen); 225 spin_unlock_irqrestore(&kernfs_rename_lock, flags); 226 return ret; 227 } 228 EXPORT_SYMBOL_GPL(kernfs_path_from_node); 229 230 /** 231 * pr_cont_kernfs_name - pr_cont name of a kernfs_node 232 * @kn: kernfs_node of interest 233 * 234 * This function can be called from any context. 235 */ 236 void pr_cont_kernfs_name(struct kernfs_node *kn) 237 { 238 unsigned long flags; 239 240 spin_lock_irqsave(&kernfs_pr_cont_lock, flags); 241 242 kernfs_name(kn, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf)); 243 pr_cont("%s", kernfs_pr_cont_buf); 244 245 spin_unlock_irqrestore(&kernfs_pr_cont_lock, flags); 246 } 247 248 /** 249 * pr_cont_kernfs_path - pr_cont path of a kernfs_node 250 * @kn: kernfs_node of interest 251 * 252 * This function can be called from any context. 253 */ 254 void pr_cont_kernfs_path(struct kernfs_node *kn) 255 { 256 unsigned long flags; 257 int sz; 258 259 spin_lock_irqsave(&kernfs_pr_cont_lock, flags); 260 261 sz = kernfs_path_from_node(kn, NULL, kernfs_pr_cont_buf, 262 sizeof(kernfs_pr_cont_buf)); 263 if (sz < 0) { 264 pr_cont("(error)"); 265 goto out; 266 } 267 268 if (sz >= sizeof(kernfs_pr_cont_buf)) { 269 pr_cont("(name too long)"); 270 goto out; 271 } 272 273 pr_cont("%s", kernfs_pr_cont_buf); 274 275 out: 276 spin_unlock_irqrestore(&kernfs_pr_cont_lock, flags); 277 } 278 279 /** 280 * kernfs_get_parent - determine the parent node and pin it 281 * @kn: kernfs_node of interest 282 * 283 * Determines @kn's parent, pins and returns it. This function can be 284 * called from any context. 285 */ 286 struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn) 287 { 288 struct kernfs_node *parent; 289 unsigned long flags; 290 291 spin_lock_irqsave(&kernfs_rename_lock, flags); 292 parent = kn->parent; 293 kernfs_get(parent); 294 spin_unlock_irqrestore(&kernfs_rename_lock, flags); 295 296 return parent; 297 } 298 299 /** 300 * kernfs_name_hash 301 * @name: Null terminated string to hash 302 * @ns: Namespace tag to hash 303 * 304 * Returns 31 bit hash of ns + name (so it fits in an off_t ) 305 */ 306 static unsigned int kernfs_name_hash(const char *name, const void *ns) 307 { 308 unsigned long hash = init_name_hash(ns); 309 unsigned int len = strlen(name); 310 while (len--) 311 hash = partial_name_hash(*name++, hash); 312 hash = end_name_hash(hash); 313 hash &= 0x7fffffffU; 314 /* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */ 315 if (hash < 2) 316 hash += 2; 317 if (hash >= INT_MAX) 318 hash = INT_MAX - 1; 319 return hash; 320 } 321 322 static int kernfs_name_compare(unsigned int hash, const char *name, 323 const void *ns, const struct kernfs_node *kn) 324 { 325 if (hash < kn->hash) 326 return -1; 327 if (hash > kn->hash) 328 return 1; 329 if (ns < kn->ns) 330 return -1; 331 if (ns > kn->ns) 332 return 1; 333 return strcmp(name, kn->name); 334 } 335 336 static int kernfs_sd_compare(const struct kernfs_node *left, 337 const struct kernfs_node *right) 338 { 339 return kernfs_name_compare(left->hash, left->name, left->ns, right); 340 } 341 342 /** 343 * kernfs_link_sibling - link kernfs_node into sibling rbtree 344 * @kn: kernfs_node of interest 345 * 346 * Link @kn into its sibling rbtree which starts from 347 * @kn->parent->dir.children. 348 * 349 * Locking: 350 * kernfs_rwsem held exclusive 351 * 352 * RETURNS: 353 * 0 on susccess -EEXIST on failure. 354 */ 355 static int kernfs_link_sibling(struct kernfs_node *kn) 356 { 357 struct rb_node **node = &kn->parent->dir.children.rb_node; 358 struct rb_node *parent = NULL; 359 360 while (*node) { 361 struct kernfs_node *pos; 362 int result; 363 364 pos = rb_to_kn(*node); 365 parent = *node; 366 result = kernfs_sd_compare(kn, pos); 367 if (result < 0) 368 node = &pos->rb.rb_left; 369 else if (result > 0) 370 node = &pos->rb.rb_right; 371 else 372 return -EEXIST; 373 } 374 375 /* add new node and rebalance the tree */ 376 rb_link_node(&kn->rb, parent, node); 377 rb_insert_color(&kn->rb, &kn->parent->dir.children); 378 379 /* successfully added, account subdir number */ 380 if (kernfs_type(kn) == KERNFS_DIR) 381 kn->parent->dir.subdirs++; 382 kernfs_inc_rev(kn->parent); 383 384 return 0; 385 } 386 387 /** 388 * kernfs_unlink_sibling - unlink kernfs_node from sibling rbtree 389 * @kn: kernfs_node of interest 390 * 391 * Try to unlink @kn from its sibling rbtree which starts from 392 * kn->parent->dir.children. Returns %true if @kn was actually 393 * removed, %false if @kn wasn't on the rbtree. 394 * 395 * Locking: 396 * kernfs_rwsem held exclusive 397 */ 398 static bool kernfs_unlink_sibling(struct kernfs_node *kn) 399 { 400 if (RB_EMPTY_NODE(&kn->rb)) 401 return false; 402 403 if (kernfs_type(kn) == KERNFS_DIR) 404 kn->parent->dir.subdirs--; 405 kernfs_inc_rev(kn->parent); 406 407 rb_erase(&kn->rb, &kn->parent->dir.children); 408 RB_CLEAR_NODE(&kn->rb); 409 return true; 410 } 411 412 /** 413 * kernfs_get_active - get an active reference to kernfs_node 414 * @kn: kernfs_node to get an active reference to 415 * 416 * Get an active reference of @kn. This function is noop if @kn 417 * is NULL. 418 * 419 * RETURNS: 420 * Pointer to @kn on success, NULL on failure. 421 */ 422 struct kernfs_node *kernfs_get_active(struct kernfs_node *kn) 423 { 424 if (unlikely(!kn)) 425 return NULL; 426 427 if (!atomic_inc_unless_negative(&kn->active)) 428 return NULL; 429 430 if (kernfs_lockdep(kn)) 431 rwsem_acquire_read(&kn->dep_map, 0, 1, _RET_IP_); 432 return kn; 433 } 434 435 /** 436 * kernfs_put_active - put an active reference to kernfs_node 437 * @kn: kernfs_node to put an active reference to 438 * 439 * Put an active reference to @kn. This function is noop if @kn 440 * is NULL. 441 */ 442 void kernfs_put_active(struct kernfs_node *kn) 443 { 444 int v; 445 446 if (unlikely(!kn)) 447 return; 448 449 if (kernfs_lockdep(kn)) 450 rwsem_release(&kn->dep_map, _RET_IP_); 451 v = atomic_dec_return(&kn->active); 452 if (likely(v != KN_DEACTIVATED_BIAS)) 453 return; 454 455 wake_up_all(&kernfs_root(kn)->deactivate_waitq); 456 } 457 458 /** 459 * kernfs_drain - drain kernfs_node 460 * @kn: kernfs_node to drain 461 * 462 * Drain existing usages and nuke all existing mmaps of @kn. Mutiple 463 * removers may invoke this function concurrently on @kn and all will 464 * return after draining is complete. 465 */ 466 static void kernfs_drain(struct kernfs_node *kn) 467 __releases(&kernfs_root(kn)->kernfs_rwsem) 468 __acquires(&kernfs_root(kn)->kernfs_rwsem) 469 { 470 struct kernfs_root *root = kernfs_root(kn); 471 472 lockdep_assert_held_write(&root->kernfs_rwsem); 473 WARN_ON_ONCE(kernfs_active(kn)); 474 475 /* 476 * Skip draining if already fully drained. This avoids draining and its 477 * lockdep annotations for nodes which have never been activated 478 * allowing embedding kernfs_remove() in create error paths without 479 * worrying about draining. 480 */ 481 if (atomic_read(&kn->active) == KN_DEACTIVATED_BIAS && 482 !kernfs_should_drain_open_files(kn)) 483 return; 484 485 up_write(&root->kernfs_rwsem); 486 487 if (kernfs_lockdep(kn)) { 488 rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_); 489 if (atomic_read(&kn->active) != KN_DEACTIVATED_BIAS) 490 lock_contended(&kn->dep_map, _RET_IP_); 491 } 492 493 wait_event(root->deactivate_waitq, 494 atomic_read(&kn->active) == KN_DEACTIVATED_BIAS); 495 496 if (kernfs_lockdep(kn)) { 497 lock_acquired(&kn->dep_map, _RET_IP_); 498 rwsem_release(&kn->dep_map, _RET_IP_); 499 } 500 501 if (kernfs_should_drain_open_files(kn)) 502 kernfs_drain_open_files(kn); 503 504 down_write(&root->kernfs_rwsem); 505 } 506 507 /** 508 * kernfs_get - get a reference count on a kernfs_node 509 * @kn: the target kernfs_node 510 */ 511 void kernfs_get(struct kernfs_node *kn) 512 { 513 if (kn) { 514 WARN_ON(!atomic_read(&kn->count)); 515 atomic_inc(&kn->count); 516 } 517 } 518 EXPORT_SYMBOL_GPL(kernfs_get); 519 520 /** 521 * kernfs_put - put a reference count on a kernfs_node 522 * @kn: the target kernfs_node 523 * 524 * Put a reference count of @kn and destroy it if it reached zero. 525 */ 526 void kernfs_put(struct kernfs_node *kn) 527 { 528 struct kernfs_node *parent; 529 struct kernfs_root *root; 530 531 if (!kn || !atomic_dec_and_test(&kn->count)) 532 return; 533 root = kernfs_root(kn); 534 repeat: 535 /* 536 * Moving/renaming is always done while holding reference. 537 * kn->parent won't change beneath us. 538 */ 539 parent = kn->parent; 540 541 WARN_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS, 542 "kernfs_put: %s/%s: released with incorrect active_ref %d\n", 543 parent ? parent->name : "", kn->name, atomic_read(&kn->active)); 544 545 if (kernfs_type(kn) == KERNFS_LINK) 546 kernfs_put(kn->symlink.target_kn); 547 548 kfree_const(kn->name); 549 550 if (kn->iattr) { 551 simple_xattrs_free(&kn->iattr->xattrs); 552 kmem_cache_free(kernfs_iattrs_cache, kn->iattr); 553 } 554 spin_lock(&kernfs_idr_lock); 555 idr_remove(&root->ino_idr, (u32)kernfs_ino(kn)); 556 spin_unlock(&kernfs_idr_lock); 557 kmem_cache_free(kernfs_node_cache, kn); 558 559 kn = parent; 560 if (kn) { 561 if (atomic_dec_and_test(&kn->count)) 562 goto repeat; 563 } else { 564 /* just released the root kn, free @root too */ 565 idr_destroy(&root->ino_idr); 566 kfree(root); 567 } 568 } 569 EXPORT_SYMBOL_GPL(kernfs_put); 570 571 /** 572 * kernfs_node_from_dentry - determine kernfs_node associated with a dentry 573 * @dentry: the dentry in question 574 * 575 * Return the kernfs_node associated with @dentry. If @dentry is not a 576 * kernfs one, %NULL is returned. 577 * 578 * While the returned kernfs_node will stay accessible as long as @dentry 579 * is accessible, the returned node can be in any state and the caller is 580 * fully responsible for determining what's accessible. 581 */ 582 struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry) 583 { 584 if (dentry->d_sb->s_op == &kernfs_sops) 585 return kernfs_dentry_node(dentry); 586 return NULL; 587 } 588 589 static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, 590 struct kernfs_node *parent, 591 const char *name, umode_t mode, 592 kuid_t uid, kgid_t gid, 593 unsigned flags) 594 { 595 struct kernfs_node *kn; 596 u32 id_highbits; 597 int ret; 598 599 name = kstrdup_const(name, GFP_KERNEL); 600 if (!name) 601 return NULL; 602 603 kn = kmem_cache_zalloc(kernfs_node_cache, GFP_KERNEL); 604 if (!kn) 605 goto err_out1; 606 607 idr_preload(GFP_KERNEL); 608 spin_lock(&kernfs_idr_lock); 609 ret = idr_alloc_cyclic(&root->ino_idr, kn, 1, 0, GFP_ATOMIC); 610 if (ret >= 0 && ret < root->last_id_lowbits) 611 root->id_highbits++; 612 id_highbits = root->id_highbits; 613 root->last_id_lowbits = ret; 614 spin_unlock(&kernfs_idr_lock); 615 idr_preload_end(); 616 if (ret < 0) 617 goto err_out2; 618 619 kn->id = (u64)id_highbits << 32 | ret; 620 621 atomic_set(&kn->count, 1); 622 atomic_set(&kn->active, KN_DEACTIVATED_BIAS); 623 RB_CLEAR_NODE(&kn->rb); 624 625 kn->name = name; 626 kn->mode = mode; 627 kn->flags = flags; 628 629 if (!uid_eq(uid, GLOBAL_ROOT_UID) || !gid_eq(gid, GLOBAL_ROOT_GID)) { 630 struct iattr iattr = { 631 .ia_valid = ATTR_UID | ATTR_GID, 632 .ia_uid = uid, 633 .ia_gid = gid, 634 }; 635 636 ret = __kernfs_setattr(kn, &iattr); 637 if (ret < 0) 638 goto err_out3; 639 } 640 641 if (parent) { 642 ret = security_kernfs_init_security(parent, kn); 643 if (ret) 644 goto err_out3; 645 } 646 647 return kn; 648 649 err_out3: 650 idr_remove(&root->ino_idr, (u32)kernfs_ino(kn)); 651 err_out2: 652 kmem_cache_free(kernfs_node_cache, kn); 653 err_out1: 654 kfree_const(name); 655 return NULL; 656 } 657 658 struct kernfs_node *kernfs_new_node(struct kernfs_node *parent, 659 const char *name, umode_t mode, 660 kuid_t uid, kgid_t gid, 661 unsigned flags) 662 { 663 struct kernfs_node *kn; 664 665 kn = __kernfs_new_node(kernfs_root(parent), parent, 666 name, mode, uid, gid, flags); 667 if (kn) { 668 kernfs_get(parent); 669 kn->parent = parent; 670 } 671 return kn; 672 } 673 674 /* 675 * kernfs_find_and_get_node_by_id - get kernfs_node from node id 676 * @root: the kernfs root 677 * @id: the target node id 678 * 679 * @id's lower 32bits encode ino and upper gen. If the gen portion is 680 * zero, all generations are matched. 681 * 682 * RETURNS: 683 * NULL on failure. Return a kernfs node with reference counter incremented 684 */ 685 struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root, 686 u64 id) 687 { 688 struct kernfs_node *kn; 689 ino_t ino = kernfs_id_ino(id); 690 u32 gen = kernfs_id_gen(id); 691 692 spin_lock(&kernfs_idr_lock); 693 694 kn = idr_find(&root->ino_idr, (u32)ino); 695 if (!kn) 696 goto err_unlock; 697 698 if (sizeof(ino_t) >= sizeof(u64)) { 699 /* we looked up with the low 32bits, compare the whole */ 700 if (kernfs_ino(kn) != ino) 701 goto err_unlock; 702 } else { 703 /* 0 matches all generations */ 704 if (unlikely(gen && kernfs_gen(kn) != gen)) 705 goto err_unlock; 706 } 707 708 if (unlikely(!kernfs_active(kn) || !atomic_inc_not_zero(&kn->count))) 709 goto err_unlock; 710 711 spin_unlock(&kernfs_idr_lock); 712 return kn; 713 err_unlock: 714 spin_unlock(&kernfs_idr_lock); 715 return NULL; 716 } 717 718 /** 719 * kernfs_add_one - add kernfs_node to parent without warning 720 * @kn: kernfs_node to be added 721 * 722 * The caller must already have initialized @kn->parent. This 723 * function increments nlink of the parent's inode if @kn is a 724 * directory and link into the children list of the parent. 725 * 726 * RETURNS: 727 * 0 on success, -EEXIST if entry with the given name already 728 * exists. 729 */ 730 int kernfs_add_one(struct kernfs_node *kn) 731 { 732 struct kernfs_node *parent = kn->parent; 733 struct kernfs_root *root = kernfs_root(parent); 734 struct kernfs_iattrs *ps_iattr; 735 bool has_ns; 736 int ret; 737 738 down_write(&root->kernfs_rwsem); 739 740 ret = -EINVAL; 741 has_ns = kernfs_ns_enabled(parent); 742 if (WARN(has_ns != (bool)kn->ns, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n", 743 has_ns ? "required" : "invalid", parent->name, kn->name)) 744 goto out_unlock; 745 746 if (kernfs_type(parent) != KERNFS_DIR) 747 goto out_unlock; 748 749 ret = -ENOENT; 750 if (parent->flags & (KERNFS_REMOVING | KERNFS_EMPTY_DIR)) 751 goto out_unlock; 752 753 kn->hash = kernfs_name_hash(kn->name, kn->ns); 754 755 ret = kernfs_link_sibling(kn); 756 if (ret) 757 goto out_unlock; 758 759 /* Update timestamps on the parent */ 760 ps_iattr = parent->iattr; 761 if (ps_iattr) { 762 ktime_get_real_ts64(&ps_iattr->ia_ctime); 763 ps_iattr->ia_mtime = ps_iattr->ia_ctime; 764 } 765 766 up_write(&root->kernfs_rwsem); 767 768 /* 769 * Activate the new node unless CREATE_DEACTIVATED is requested. 770 * If not activated here, the kernfs user is responsible for 771 * activating the node with kernfs_activate(). A node which hasn't 772 * been activated is not visible to userland and its removal won't 773 * trigger deactivation. 774 */ 775 if (!(kernfs_root(kn)->flags & KERNFS_ROOT_CREATE_DEACTIVATED)) 776 kernfs_activate(kn); 777 return 0; 778 779 out_unlock: 780 up_write(&root->kernfs_rwsem); 781 return ret; 782 } 783 784 /** 785 * kernfs_find_ns - find kernfs_node with the given name 786 * @parent: kernfs_node to search under 787 * @name: name to look for 788 * @ns: the namespace tag to use 789 * 790 * Look for kernfs_node with name @name under @parent. Returns pointer to 791 * the found kernfs_node on success, %NULL on failure. 792 */ 793 static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent, 794 const unsigned char *name, 795 const void *ns) 796 { 797 struct rb_node *node = parent->dir.children.rb_node; 798 bool has_ns = kernfs_ns_enabled(parent); 799 unsigned int hash; 800 801 lockdep_assert_held(&kernfs_root(parent)->kernfs_rwsem); 802 803 if (has_ns != (bool)ns) { 804 WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n", 805 has_ns ? "required" : "invalid", parent->name, name); 806 return NULL; 807 } 808 809 hash = kernfs_name_hash(name, ns); 810 while (node) { 811 struct kernfs_node *kn; 812 int result; 813 814 kn = rb_to_kn(node); 815 result = kernfs_name_compare(hash, name, ns, kn); 816 if (result < 0) 817 node = node->rb_left; 818 else if (result > 0) 819 node = node->rb_right; 820 else 821 return kn; 822 } 823 return NULL; 824 } 825 826 static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent, 827 const unsigned char *path, 828 const void *ns) 829 { 830 size_t len; 831 char *p, *name; 832 833 lockdep_assert_held_read(&kernfs_root(parent)->kernfs_rwsem); 834 835 spin_lock_irq(&kernfs_pr_cont_lock); 836 837 len = strlcpy(kernfs_pr_cont_buf, path, sizeof(kernfs_pr_cont_buf)); 838 839 if (len >= sizeof(kernfs_pr_cont_buf)) { 840 spin_unlock_irq(&kernfs_pr_cont_lock); 841 return NULL; 842 } 843 844 p = kernfs_pr_cont_buf; 845 846 while ((name = strsep(&p, "/")) && parent) { 847 if (*name == '\0') 848 continue; 849 parent = kernfs_find_ns(parent, name, ns); 850 } 851 852 spin_unlock_irq(&kernfs_pr_cont_lock); 853 854 return parent; 855 } 856 857 /** 858 * kernfs_find_and_get_ns - find and get kernfs_node with the given name 859 * @parent: kernfs_node to search under 860 * @name: name to look for 861 * @ns: the namespace tag to use 862 * 863 * Look for kernfs_node with name @name under @parent and get a reference 864 * if found. This function may sleep and returns pointer to the found 865 * kernfs_node on success, %NULL on failure. 866 */ 867 struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent, 868 const char *name, const void *ns) 869 { 870 struct kernfs_node *kn; 871 struct kernfs_root *root = kernfs_root(parent); 872 873 down_read(&root->kernfs_rwsem); 874 kn = kernfs_find_ns(parent, name, ns); 875 kernfs_get(kn); 876 up_read(&root->kernfs_rwsem); 877 878 return kn; 879 } 880 EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns); 881 882 /** 883 * kernfs_walk_and_get_ns - find and get kernfs_node with the given path 884 * @parent: kernfs_node to search under 885 * @path: path to look for 886 * @ns: the namespace tag to use 887 * 888 * Look for kernfs_node with path @path under @parent and get a reference 889 * if found. This function may sleep and returns pointer to the found 890 * kernfs_node on success, %NULL on failure. 891 */ 892 struct kernfs_node *kernfs_walk_and_get_ns(struct kernfs_node *parent, 893 const char *path, const void *ns) 894 { 895 struct kernfs_node *kn; 896 struct kernfs_root *root = kernfs_root(parent); 897 898 down_read(&root->kernfs_rwsem); 899 kn = kernfs_walk_ns(parent, path, ns); 900 kernfs_get(kn); 901 up_read(&root->kernfs_rwsem); 902 903 return kn; 904 } 905 906 /** 907 * kernfs_create_root - create a new kernfs hierarchy 908 * @scops: optional syscall operations for the hierarchy 909 * @flags: KERNFS_ROOT_* flags 910 * @priv: opaque data associated with the new directory 911 * 912 * Returns the root of the new hierarchy on success, ERR_PTR() value on 913 * failure. 914 */ 915 struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops, 916 unsigned int flags, void *priv) 917 { 918 struct kernfs_root *root; 919 struct kernfs_node *kn; 920 921 root = kzalloc(sizeof(*root), GFP_KERNEL); 922 if (!root) 923 return ERR_PTR(-ENOMEM); 924 925 idr_init(&root->ino_idr); 926 init_rwsem(&root->kernfs_rwsem); 927 INIT_LIST_HEAD(&root->supers); 928 929 /* 930 * On 64bit ino setups, id is ino. On 32bit, low 32bits are ino. 931 * High bits generation. The starting value for both ino and 932 * genenration is 1. Initialize upper 32bit allocation 933 * accordingly. 934 */ 935 if (sizeof(ino_t) >= sizeof(u64)) 936 root->id_highbits = 0; 937 else 938 root->id_highbits = 1; 939 940 kn = __kernfs_new_node(root, NULL, "", S_IFDIR | S_IRUGO | S_IXUGO, 941 GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 942 KERNFS_DIR); 943 if (!kn) { 944 idr_destroy(&root->ino_idr); 945 kfree(root); 946 return ERR_PTR(-ENOMEM); 947 } 948 949 kn->priv = priv; 950 kn->dir.root = root; 951 952 root->syscall_ops = scops; 953 root->flags = flags; 954 root->kn = kn; 955 init_waitqueue_head(&root->deactivate_waitq); 956 957 if (!(root->flags & KERNFS_ROOT_CREATE_DEACTIVATED)) 958 kernfs_activate(kn); 959 960 return root; 961 } 962 963 /** 964 * kernfs_destroy_root - destroy a kernfs hierarchy 965 * @root: root of the hierarchy to destroy 966 * 967 * Destroy the hierarchy anchored at @root by removing all existing 968 * directories and destroying @root. 969 */ 970 void kernfs_destroy_root(struct kernfs_root *root) 971 { 972 /* 973 * kernfs_remove holds kernfs_rwsem from the root so the root 974 * shouldn't be freed during the operation. 975 */ 976 kernfs_get(root->kn); 977 kernfs_remove(root->kn); 978 kernfs_put(root->kn); /* will also free @root */ 979 } 980 981 /** 982 * kernfs_root_to_node - return the kernfs_node associated with a kernfs_root 983 * @root: root to use to lookup 984 */ 985 struct kernfs_node *kernfs_root_to_node(struct kernfs_root *root) 986 { 987 return root->kn; 988 } 989 990 /** 991 * kernfs_create_dir_ns - create a directory 992 * @parent: parent in which to create a new directory 993 * @name: name of the new directory 994 * @mode: mode of the new directory 995 * @uid: uid of the new directory 996 * @gid: gid of the new directory 997 * @priv: opaque data associated with the new directory 998 * @ns: optional namespace tag of the directory 999 * 1000 * Returns the created node on success, ERR_PTR() value on failure. 1001 */ 1002 struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, 1003 const char *name, umode_t mode, 1004 kuid_t uid, kgid_t gid, 1005 void *priv, const void *ns) 1006 { 1007 struct kernfs_node *kn; 1008 int rc; 1009 1010 /* allocate */ 1011 kn = kernfs_new_node(parent, name, mode | S_IFDIR, 1012 uid, gid, KERNFS_DIR); 1013 if (!kn) 1014 return ERR_PTR(-ENOMEM); 1015 1016 kn->dir.root = parent->dir.root; 1017 kn->ns = ns; 1018 kn->priv = priv; 1019 1020 /* link in */ 1021 rc = kernfs_add_one(kn); 1022 if (!rc) 1023 return kn; 1024 1025 kernfs_put(kn); 1026 return ERR_PTR(rc); 1027 } 1028 1029 /** 1030 * kernfs_create_empty_dir - create an always empty directory 1031 * @parent: parent in which to create a new directory 1032 * @name: name of the new directory 1033 * 1034 * Returns the created node on success, ERR_PTR() value on failure. 1035 */ 1036 struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent, 1037 const char *name) 1038 { 1039 struct kernfs_node *kn; 1040 int rc; 1041 1042 /* allocate */ 1043 kn = kernfs_new_node(parent, name, S_IRUGO|S_IXUGO|S_IFDIR, 1044 GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, KERNFS_DIR); 1045 if (!kn) 1046 return ERR_PTR(-ENOMEM); 1047 1048 kn->flags |= KERNFS_EMPTY_DIR; 1049 kn->dir.root = parent->dir.root; 1050 kn->ns = NULL; 1051 kn->priv = NULL; 1052 1053 /* link in */ 1054 rc = kernfs_add_one(kn); 1055 if (!rc) 1056 return kn; 1057 1058 kernfs_put(kn); 1059 return ERR_PTR(rc); 1060 } 1061 1062 static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags) 1063 { 1064 struct kernfs_node *kn; 1065 struct kernfs_root *root; 1066 1067 if (flags & LOOKUP_RCU) 1068 return -ECHILD; 1069 1070 /* Negative hashed dentry? */ 1071 if (d_really_is_negative(dentry)) { 1072 struct kernfs_node *parent; 1073 1074 /* If the kernfs parent node has changed discard and 1075 * proceed to ->lookup. 1076 */ 1077 spin_lock(&dentry->d_lock); 1078 parent = kernfs_dentry_node(dentry->d_parent); 1079 if (parent) { 1080 spin_unlock(&dentry->d_lock); 1081 root = kernfs_root(parent); 1082 down_read(&root->kernfs_rwsem); 1083 if (kernfs_dir_changed(parent, dentry)) { 1084 up_read(&root->kernfs_rwsem); 1085 return 0; 1086 } 1087 up_read(&root->kernfs_rwsem); 1088 } else 1089 spin_unlock(&dentry->d_lock); 1090 1091 /* The kernfs parent node hasn't changed, leave the 1092 * dentry negative and return success. 1093 */ 1094 return 1; 1095 } 1096 1097 kn = kernfs_dentry_node(dentry); 1098 root = kernfs_root(kn); 1099 down_read(&root->kernfs_rwsem); 1100 1101 /* The kernfs node has been deactivated */ 1102 if (!kernfs_active(kn)) 1103 goto out_bad; 1104 1105 /* The kernfs node has been moved? */ 1106 if (kernfs_dentry_node(dentry->d_parent) != kn->parent) 1107 goto out_bad; 1108 1109 /* The kernfs node has been renamed */ 1110 if (strcmp(dentry->d_name.name, kn->name) != 0) 1111 goto out_bad; 1112 1113 /* The kernfs node has been moved to a different namespace */ 1114 if (kn->parent && kernfs_ns_enabled(kn->parent) && 1115 kernfs_info(dentry->d_sb)->ns != kn->ns) 1116 goto out_bad; 1117 1118 up_read(&root->kernfs_rwsem); 1119 return 1; 1120 out_bad: 1121 up_read(&root->kernfs_rwsem); 1122 return 0; 1123 } 1124 1125 const struct dentry_operations kernfs_dops = { 1126 .d_revalidate = kernfs_dop_revalidate, 1127 }; 1128 1129 static struct dentry *kernfs_iop_lookup(struct inode *dir, 1130 struct dentry *dentry, 1131 unsigned int flags) 1132 { 1133 struct kernfs_node *parent = dir->i_private; 1134 struct kernfs_node *kn; 1135 struct kernfs_root *root; 1136 struct inode *inode = NULL; 1137 const void *ns = NULL; 1138 1139 root = kernfs_root(parent); 1140 down_read(&root->kernfs_rwsem); 1141 if (kernfs_ns_enabled(parent)) 1142 ns = kernfs_info(dir->i_sb)->ns; 1143 1144 kn = kernfs_find_ns(parent, dentry->d_name.name, ns); 1145 /* attach dentry and inode */ 1146 if (kn) { 1147 /* Inactive nodes are invisible to the VFS so don't 1148 * create a negative. 1149 */ 1150 if (!kernfs_active(kn)) { 1151 up_read(&root->kernfs_rwsem); 1152 return NULL; 1153 } 1154 inode = kernfs_get_inode(dir->i_sb, kn); 1155 if (!inode) 1156 inode = ERR_PTR(-ENOMEM); 1157 } 1158 /* 1159 * Needed for negative dentry validation. 1160 * The negative dentry can be created in kernfs_iop_lookup() 1161 * or transforms from positive dentry in dentry_unlink_inode() 1162 * called from vfs_rmdir(). 1163 */ 1164 if (!IS_ERR(inode)) 1165 kernfs_set_rev(parent, dentry); 1166 up_read(&root->kernfs_rwsem); 1167 1168 /* instantiate and hash (possibly negative) dentry */ 1169 return d_splice_alias(inode, dentry); 1170 } 1171 1172 static int kernfs_iop_mkdir(struct user_namespace *mnt_userns, 1173 struct inode *dir, struct dentry *dentry, 1174 umode_t mode) 1175 { 1176 struct kernfs_node *parent = dir->i_private; 1177 struct kernfs_syscall_ops *scops = kernfs_root(parent)->syscall_ops; 1178 int ret; 1179 1180 if (!scops || !scops->mkdir) 1181 return -EPERM; 1182 1183 if (!kernfs_get_active(parent)) 1184 return -ENODEV; 1185 1186 ret = scops->mkdir(parent, dentry->d_name.name, mode); 1187 1188 kernfs_put_active(parent); 1189 return ret; 1190 } 1191 1192 static int kernfs_iop_rmdir(struct inode *dir, struct dentry *dentry) 1193 { 1194 struct kernfs_node *kn = kernfs_dentry_node(dentry); 1195 struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops; 1196 int ret; 1197 1198 if (!scops || !scops->rmdir) 1199 return -EPERM; 1200 1201 if (!kernfs_get_active(kn)) 1202 return -ENODEV; 1203 1204 ret = scops->rmdir(kn); 1205 1206 kernfs_put_active(kn); 1207 return ret; 1208 } 1209 1210 static int kernfs_iop_rename(struct user_namespace *mnt_userns, 1211 struct inode *old_dir, struct dentry *old_dentry, 1212 struct inode *new_dir, struct dentry *new_dentry, 1213 unsigned int flags) 1214 { 1215 struct kernfs_node *kn = kernfs_dentry_node(old_dentry); 1216 struct kernfs_node *new_parent = new_dir->i_private; 1217 struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops; 1218 int ret; 1219 1220 if (flags) 1221 return -EINVAL; 1222 1223 if (!scops || !scops->rename) 1224 return -EPERM; 1225 1226 if (!kernfs_get_active(kn)) 1227 return -ENODEV; 1228 1229 if (!kernfs_get_active(new_parent)) { 1230 kernfs_put_active(kn); 1231 return -ENODEV; 1232 } 1233 1234 ret = scops->rename(kn, new_parent, new_dentry->d_name.name); 1235 1236 kernfs_put_active(new_parent); 1237 kernfs_put_active(kn); 1238 return ret; 1239 } 1240 1241 const struct inode_operations kernfs_dir_iops = { 1242 .lookup = kernfs_iop_lookup, 1243 .permission = kernfs_iop_permission, 1244 .setattr = kernfs_iop_setattr, 1245 .getattr = kernfs_iop_getattr, 1246 .listxattr = kernfs_iop_listxattr, 1247 1248 .mkdir = kernfs_iop_mkdir, 1249 .rmdir = kernfs_iop_rmdir, 1250 .rename = kernfs_iop_rename, 1251 }; 1252 1253 static struct kernfs_node *kernfs_leftmost_descendant(struct kernfs_node *pos) 1254 { 1255 struct kernfs_node *last; 1256 1257 while (true) { 1258 struct rb_node *rbn; 1259 1260 last = pos; 1261 1262 if (kernfs_type(pos) != KERNFS_DIR) 1263 break; 1264 1265 rbn = rb_first(&pos->dir.children); 1266 if (!rbn) 1267 break; 1268 1269 pos = rb_to_kn(rbn); 1270 } 1271 1272 return last; 1273 } 1274 1275 /** 1276 * kernfs_next_descendant_post - find the next descendant for post-order walk 1277 * @pos: the current position (%NULL to initiate traversal) 1278 * @root: kernfs_node whose descendants to walk 1279 * 1280 * Find the next descendant to visit for post-order traversal of @root's 1281 * descendants. @root is included in the iteration and the last node to be 1282 * visited. 1283 */ 1284 static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos, 1285 struct kernfs_node *root) 1286 { 1287 struct rb_node *rbn; 1288 1289 lockdep_assert_held_write(&kernfs_root(root)->kernfs_rwsem); 1290 1291 /* if first iteration, visit leftmost descendant which may be root */ 1292 if (!pos) 1293 return kernfs_leftmost_descendant(root); 1294 1295 /* if we visited @root, we're done */ 1296 if (pos == root) 1297 return NULL; 1298 1299 /* if there's an unvisited sibling, visit its leftmost descendant */ 1300 rbn = rb_next(&pos->rb); 1301 if (rbn) 1302 return kernfs_leftmost_descendant(rb_to_kn(rbn)); 1303 1304 /* no sibling left, visit parent */ 1305 return pos->parent; 1306 } 1307 1308 static void kernfs_activate_one(struct kernfs_node *kn) 1309 { 1310 lockdep_assert_held_write(&kernfs_root(kn)->kernfs_rwsem); 1311 1312 kn->flags |= KERNFS_ACTIVATED; 1313 1314 if (kernfs_active(kn) || (kn->flags & (KERNFS_HIDDEN | KERNFS_REMOVING))) 1315 return; 1316 1317 WARN_ON_ONCE(kn->parent && RB_EMPTY_NODE(&kn->rb)); 1318 WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS); 1319 1320 atomic_sub(KN_DEACTIVATED_BIAS, &kn->active); 1321 } 1322 1323 /** 1324 * kernfs_activate - activate a node which started deactivated 1325 * @kn: kernfs_node whose subtree is to be activated 1326 * 1327 * If the root has KERNFS_ROOT_CREATE_DEACTIVATED set, a newly created node 1328 * needs to be explicitly activated. A node which hasn't been activated 1329 * isn't visible to userland and deactivation is skipped during its 1330 * removal. This is useful to construct atomic init sequences where 1331 * creation of multiple nodes should either succeed or fail atomically. 1332 * 1333 * The caller is responsible for ensuring that this function is not called 1334 * after kernfs_remove*() is invoked on @kn. 1335 */ 1336 void kernfs_activate(struct kernfs_node *kn) 1337 { 1338 struct kernfs_node *pos; 1339 struct kernfs_root *root = kernfs_root(kn); 1340 1341 down_write(&root->kernfs_rwsem); 1342 1343 pos = NULL; 1344 while ((pos = kernfs_next_descendant_post(pos, kn))) 1345 kernfs_activate_one(pos); 1346 1347 up_write(&root->kernfs_rwsem); 1348 } 1349 1350 /** 1351 * kernfs_show - show or hide a node 1352 * @kn: kernfs_node to show or hide 1353 * @show: whether to show or hide 1354 * 1355 * If @show is %false, @kn is marked hidden and deactivated. A hidden node is 1356 * ignored in future activaitons. If %true, the mark is removed and activation 1357 * state is restored. This function won't implicitly activate a new node in a 1358 * %KERNFS_ROOT_CREATE_DEACTIVATED root which hasn't been activated yet. 1359 * 1360 * To avoid recursion complexities, directories aren't supported for now. 1361 */ 1362 void kernfs_show(struct kernfs_node *kn, bool show) 1363 { 1364 struct kernfs_root *root = kernfs_root(kn); 1365 1366 if (WARN_ON_ONCE(kernfs_type(kn) == KERNFS_DIR)) 1367 return; 1368 1369 down_write(&root->kernfs_rwsem); 1370 1371 if (show) { 1372 kn->flags &= ~KERNFS_HIDDEN; 1373 if (kn->flags & KERNFS_ACTIVATED) 1374 kernfs_activate_one(kn); 1375 } else { 1376 kn->flags |= KERNFS_HIDDEN; 1377 if (kernfs_active(kn)) 1378 atomic_add(KN_DEACTIVATED_BIAS, &kn->active); 1379 kernfs_drain(kn); 1380 } 1381 1382 up_write(&root->kernfs_rwsem); 1383 } 1384 1385 static void __kernfs_remove(struct kernfs_node *kn) 1386 { 1387 struct kernfs_node *pos; 1388 1389 /* Short-circuit if non-root @kn has already finished removal. */ 1390 if (!kn) 1391 return; 1392 1393 lockdep_assert_held_write(&kernfs_root(kn)->kernfs_rwsem); 1394 1395 /* 1396 * This is for kernfs_remove_self() which plays with active ref 1397 * after removal. 1398 */ 1399 if (kn->parent && RB_EMPTY_NODE(&kn->rb)) 1400 return; 1401 1402 pr_debug("kernfs %s: removing\n", kn->name); 1403 1404 /* prevent new usage by marking all nodes removing and deactivating */ 1405 pos = NULL; 1406 while ((pos = kernfs_next_descendant_post(pos, kn))) { 1407 pos->flags |= KERNFS_REMOVING; 1408 if (kernfs_active(pos)) 1409 atomic_add(KN_DEACTIVATED_BIAS, &pos->active); 1410 } 1411 1412 /* deactivate and unlink the subtree node-by-node */ 1413 do { 1414 pos = kernfs_leftmost_descendant(kn); 1415 1416 /* 1417 * kernfs_drain() may drop kernfs_rwsem temporarily and @pos's 1418 * base ref could have been put by someone else by the time 1419 * the function returns. Make sure it doesn't go away 1420 * underneath us. 1421 */ 1422 kernfs_get(pos); 1423 1424 kernfs_drain(pos); 1425 1426 /* 1427 * kernfs_unlink_sibling() succeeds once per node. Use it 1428 * to decide who's responsible for cleanups. 1429 */ 1430 if (!pos->parent || kernfs_unlink_sibling(pos)) { 1431 struct kernfs_iattrs *ps_iattr = 1432 pos->parent ? pos->parent->iattr : NULL; 1433 1434 /* update timestamps on the parent */ 1435 if (ps_iattr) { 1436 ktime_get_real_ts64(&ps_iattr->ia_ctime); 1437 ps_iattr->ia_mtime = ps_iattr->ia_ctime; 1438 } 1439 1440 kernfs_put(pos); 1441 } 1442 1443 kernfs_put(pos); 1444 } while (pos != kn); 1445 } 1446 1447 /** 1448 * kernfs_remove - remove a kernfs_node recursively 1449 * @kn: the kernfs_node to remove 1450 * 1451 * Remove @kn along with all its subdirectories and files. 1452 */ 1453 void kernfs_remove(struct kernfs_node *kn) 1454 { 1455 struct kernfs_root *root; 1456 1457 if (!kn) 1458 return; 1459 1460 root = kernfs_root(kn); 1461 1462 down_write(&root->kernfs_rwsem); 1463 __kernfs_remove(kn); 1464 up_write(&root->kernfs_rwsem); 1465 } 1466 1467 /** 1468 * kernfs_break_active_protection - break out of active protection 1469 * @kn: the self kernfs_node 1470 * 1471 * The caller must be running off of a kernfs operation which is invoked 1472 * with an active reference - e.g. one of kernfs_ops. Each invocation of 1473 * this function must also be matched with an invocation of 1474 * kernfs_unbreak_active_protection(). 1475 * 1476 * This function releases the active reference of @kn the caller is 1477 * holding. Once this function is called, @kn may be removed at any point 1478 * and the caller is solely responsible for ensuring that the objects it 1479 * dereferences are accessible. 1480 */ 1481 void kernfs_break_active_protection(struct kernfs_node *kn) 1482 { 1483 /* 1484 * Take out ourself out of the active ref dependency chain. If 1485 * we're called without an active ref, lockdep will complain. 1486 */ 1487 kernfs_put_active(kn); 1488 } 1489 1490 /** 1491 * kernfs_unbreak_active_protection - undo kernfs_break_active_protection() 1492 * @kn: the self kernfs_node 1493 * 1494 * If kernfs_break_active_protection() was called, this function must be 1495 * invoked before finishing the kernfs operation. Note that while this 1496 * function restores the active reference, it doesn't and can't actually 1497 * restore the active protection - @kn may already or be in the process of 1498 * being removed. Once kernfs_break_active_protection() is invoked, that 1499 * protection is irreversibly gone for the kernfs operation instance. 1500 * 1501 * While this function may be called at any point after 1502 * kernfs_break_active_protection() is invoked, its most useful location 1503 * would be right before the enclosing kernfs operation returns. 1504 */ 1505 void kernfs_unbreak_active_protection(struct kernfs_node *kn) 1506 { 1507 /* 1508 * @kn->active could be in any state; however, the increment we do 1509 * here will be undone as soon as the enclosing kernfs operation 1510 * finishes and this temporary bump can't break anything. If @kn 1511 * is alive, nothing changes. If @kn is being deactivated, the 1512 * soon-to-follow put will either finish deactivation or restore 1513 * deactivated state. If @kn is already removed, the temporary 1514 * bump is guaranteed to be gone before @kn is released. 1515 */ 1516 atomic_inc(&kn->active); 1517 if (kernfs_lockdep(kn)) 1518 rwsem_acquire(&kn->dep_map, 0, 1, _RET_IP_); 1519 } 1520 1521 /** 1522 * kernfs_remove_self - remove a kernfs_node from its own method 1523 * @kn: the self kernfs_node to remove 1524 * 1525 * The caller must be running off of a kernfs operation which is invoked 1526 * with an active reference - e.g. one of kernfs_ops. This can be used to 1527 * implement a file operation which deletes itself. 1528 * 1529 * For example, the "delete" file for a sysfs device directory can be 1530 * implemented by invoking kernfs_remove_self() on the "delete" file 1531 * itself. This function breaks the circular dependency of trying to 1532 * deactivate self while holding an active ref itself. It isn't necessary 1533 * to modify the usual removal path to use kernfs_remove_self(). The 1534 * "delete" implementation can simply invoke kernfs_remove_self() on self 1535 * before proceeding with the usual removal path. kernfs will ignore later 1536 * kernfs_remove() on self. 1537 * 1538 * kernfs_remove_self() can be called multiple times concurrently on the 1539 * same kernfs_node. Only the first one actually performs removal and 1540 * returns %true. All others will wait until the kernfs operation which 1541 * won self-removal finishes and return %false. Note that the losers wait 1542 * for the completion of not only the winning kernfs_remove_self() but also 1543 * the whole kernfs_ops which won the arbitration. This can be used to 1544 * guarantee, for example, all concurrent writes to a "delete" file to 1545 * finish only after the whole operation is complete. 1546 */ 1547 bool kernfs_remove_self(struct kernfs_node *kn) 1548 { 1549 bool ret; 1550 struct kernfs_root *root = kernfs_root(kn); 1551 1552 down_write(&root->kernfs_rwsem); 1553 kernfs_break_active_protection(kn); 1554 1555 /* 1556 * SUICIDAL is used to arbitrate among competing invocations. Only 1557 * the first one will actually perform removal. When the removal 1558 * is complete, SUICIDED is set and the active ref is restored 1559 * while kernfs_rwsem for held exclusive. The ones which lost 1560 * arbitration waits for SUICIDED && drained which can happen only 1561 * after the enclosing kernfs operation which executed the winning 1562 * instance of kernfs_remove_self() finished. 1563 */ 1564 if (!(kn->flags & KERNFS_SUICIDAL)) { 1565 kn->flags |= KERNFS_SUICIDAL; 1566 __kernfs_remove(kn); 1567 kn->flags |= KERNFS_SUICIDED; 1568 ret = true; 1569 } else { 1570 wait_queue_head_t *waitq = &kernfs_root(kn)->deactivate_waitq; 1571 DEFINE_WAIT(wait); 1572 1573 while (true) { 1574 prepare_to_wait(waitq, &wait, TASK_UNINTERRUPTIBLE); 1575 1576 if ((kn->flags & KERNFS_SUICIDED) && 1577 atomic_read(&kn->active) == KN_DEACTIVATED_BIAS) 1578 break; 1579 1580 up_write(&root->kernfs_rwsem); 1581 schedule(); 1582 down_write(&root->kernfs_rwsem); 1583 } 1584 finish_wait(waitq, &wait); 1585 WARN_ON_ONCE(!RB_EMPTY_NODE(&kn->rb)); 1586 ret = false; 1587 } 1588 1589 /* 1590 * This must be done while kernfs_rwsem held exclusive; otherwise, 1591 * waiting for SUICIDED && deactivated could finish prematurely. 1592 */ 1593 kernfs_unbreak_active_protection(kn); 1594 1595 up_write(&root->kernfs_rwsem); 1596 return ret; 1597 } 1598 1599 /** 1600 * kernfs_remove_by_name_ns - find a kernfs_node by name and remove it 1601 * @parent: parent of the target 1602 * @name: name of the kernfs_node to remove 1603 * @ns: namespace tag of the kernfs_node to remove 1604 * 1605 * Look for the kernfs_node with @name and @ns under @parent and remove it. 1606 * Returns 0 on success, -ENOENT if such entry doesn't exist. 1607 */ 1608 int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, 1609 const void *ns) 1610 { 1611 struct kernfs_node *kn; 1612 struct kernfs_root *root; 1613 1614 if (!parent) { 1615 WARN(1, KERN_WARNING "kernfs: can not remove '%s', no directory\n", 1616 name); 1617 return -ENOENT; 1618 } 1619 1620 root = kernfs_root(parent); 1621 down_write(&root->kernfs_rwsem); 1622 1623 kn = kernfs_find_ns(parent, name, ns); 1624 if (kn) { 1625 kernfs_get(kn); 1626 __kernfs_remove(kn); 1627 kernfs_put(kn); 1628 } 1629 1630 up_write(&root->kernfs_rwsem); 1631 1632 if (kn) 1633 return 0; 1634 else 1635 return -ENOENT; 1636 } 1637 1638 /** 1639 * kernfs_rename_ns - move and rename a kernfs_node 1640 * @kn: target node 1641 * @new_parent: new parent to put @sd under 1642 * @new_name: new name 1643 * @new_ns: new namespace tag 1644 */ 1645 int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, 1646 const char *new_name, const void *new_ns) 1647 { 1648 struct kernfs_node *old_parent; 1649 struct kernfs_root *root; 1650 const char *old_name = NULL; 1651 int error; 1652 1653 /* can't move or rename root */ 1654 if (!kn->parent) 1655 return -EINVAL; 1656 1657 root = kernfs_root(kn); 1658 down_write(&root->kernfs_rwsem); 1659 1660 error = -ENOENT; 1661 if (!kernfs_active(kn) || !kernfs_active(new_parent) || 1662 (new_parent->flags & KERNFS_EMPTY_DIR)) 1663 goto out; 1664 1665 error = 0; 1666 if ((kn->parent == new_parent) && (kn->ns == new_ns) && 1667 (strcmp(kn->name, new_name) == 0)) 1668 goto out; /* nothing to rename */ 1669 1670 error = -EEXIST; 1671 if (kernfs_find_ns(new_parent, new_name, new_ns)) 1672 goto out; 1673 1674 /* rename kernfs_node */ 1675 if (strcmp(kn->name, new_name) != 0) { 1676 error = -ENOMEM; 1677 new_name = kstrdup_const(new_name, GFP_KERNEL); 1678 if (!new_name) 1679 goto out; 1680 } else { 1681 new_name = NULL; 1682 } 1683 1684 /* 1685 * Move to the appropriate place in the appropriate directories rbtree. 1686 */ 1687 kernfs_unlink_sibling(kn); 1688 kernfs_get(new_parent); 1689 1690 /* rename_lock protects ->parent and ->name accessors */ 1691 spin_lock_irq(&kernfs_rename_lock); 1692 1693 old_parent = kn->parent; 1694 kn->parent = new_parent; 1695 1696 kn->ns = new_ns; 1697 if (new_name) { 1698 old_name = kn->name; 1699 kn->name = new_name; 1700 } 1701 1702 spin_unlock_irq(&kernfs_rename_lock); 1703 1704 kn->hash = kernfs_name_hash(kn->name, kn->ns); 1705 kernfs_link_sibling(kn); 1706 1707 kernfs_put(old_parent); 1708 kfree_const(old_name); 1709 1710 error = 0; 1711 out: 1712 up_write(&root->kernfs_rwsem); 1713 return error; 1714 } 1715 1716 /* Relationship between mode and the DT_xxx types */ 1717 static inline unsigned char dt_type(struct kernfs_node *kn) 1718 { 1719 return (kn->mode >> 12) & 15; 1720 } 1721 1722 static int kernfs_dir_fop_release(struct inode *inode, struct file *filp) 1723 { 1724 kernfs_put(filp->private_data); 1725 return 0; 1726 } 1727 1728 static struct kernfs_node *kernfs_dir_pos(const void *ns, 1729 struct kernfs_node *parent, loff_t hash, struct kernfs_node *pos) 1730 { 1731 if (pos) { 1732 int valid = kernfs_active(pos) && 1733 pos->parent == parent && hash == pos->hash; 1734 kernfs_put(pos); 1735 if (!valid) 1736 pos = NULL; 1737 } 1738 if (!pos && (hash > 1) && (hash < INT_MAX)) { 1739 struct rb_node *node = parent->dir.children.rb_node; 1740 while (node) { 1741 pos = rb_to_kn(node); 1742 1743 if (hash < pos->hash) 1744 node = node->rb_left; 1745 else if (hash > pos->hash) 1746 node = node->rb_right; 1747 else 1748 break; 1749 } 1750 } 1751 /* Skip over entries which are dying/dead or in the wrong namespace */ 1752 while (pos && (!kernfs_active(pos) || pos->ns != ns)) { 1753 struct rb_node *node = rb_next(&pos->rb); 1754 if (!node) 1755 pos = NULL; 1756 else 1757 pos = rb_to_kn(node); 1758 } 1759 return pos; 1760 } 1761 1762 static struct kernfs_node *kernfs_dir_next_pos(const void *ns, 1763 struct kernfs_node *parent, ino_t ino, struct kernfs_node *pos) 1764 { 1765 pos = kernfs_dir_pos(ns, parent, ino, pos); 1766 if (pos) { 1767 do { 1768 struct rb_node *node = rb_next(&pos->rb); 1769 if (!node) 1770 pos = NULL; 1771 else 1772 pos = rb_to_kn(node); 1773 } while (pos && (!kernfs_active(pos) || pos->ns != ns)); 1774 } 1775 return pos; 1776 } 1777 1778 static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx) 1779 { 1780 struct dentry *dentry = file->f_path.dentry; 1781 struct kernfs_node *parent = kernfs_dentry_node(dentry); 1782 struct kernfs_node *pos = file->private_data; 1783 struct kernfs_root *root; 1784 const void *ns = NULL; 1785 1786 if (!dir_emit_dots(file, ctx)) 1787 return 0; 1788 1789 root = kernfs_root(parent); 1790 down_read(&root->kernfs_rwsem); 1791 1792 if (kernfs_ns_enabled(parent)) 1793 ns = kernfs_info(dentry->d_sb)->ns; 1794 1795 for (pos = kernfs_dir_pos(ns, parent, ctx->pos, pos); 1796 pos; 1797 pos = kernfs_dir_next_pos(ns, parent, ctx->pos, pos)) { 1798 const char *name = pos->name; 1799 unsigned int type = dt_type(pos); 1800 int len = strlen(name); 1801 ino_t ino = kernfs_ino(pos); 1802 1803 ctx->pos = pos->hash; 1804 file->private_data = pos; 1805 kernfs_get(pos); 1806 1807 up_read(&root->kernfs_rwsem); 1808 if (!dir_emit(ctx, name, len, ino, type)) 1809 return 0; 1810 down_read(&root->kernfs_rwsem); 1811 } 1812 up_read(&root->kernfs_rwsem); 1813 file->private_data = NULL; 1814 ctx->pos = INT_MAX; 1815 return 0; 1816 } 1817 1818 const struct file_operations kernfs_dir_fops = { 1819 .read = generic_read_dir, 1820 .iterate_shared = kernfs_fop_readdir, 1821 .release = kernfs_dir_fop_release, 1822 .llseek = generic_file_llseek, 1823 }; 1824