1 /* 2 * fs/sysfs/dir.c - sysfs core and dir operation implementation 3 * 4 * Copyright (c) 2001-3 Patrick Mochel 5 * Copyright (c) 2007 SUSE Linux Products GmbH 6 * Copyright (c) 2007 Tejun Heo <teheo@suse.de> 7 * 8 * This file is released under the GPLv2. 9 * 10 * Please see Documentation/filesystems/sysfs.txt for more information. 11 */ 12 13 #undef DEBUG 14 15 #include <linux/fs.h> 16 #include <linux/mount.h> 17 #include <linux/module.h> 18 #include <linux/kobject.h> 19 #include <linux/namei.h> 20 #include <linux/idr.h> 21 #include <linux/completion.h> 22 #include <linux/mutex.h> 23 #include <linux/slab.h> 24 #include <linux/security.h> 25 #include <linux/hash.h> 26 #include "sysfs.h" 27 28 DEFINE_MUTEX(sysfs_mutex); 29 DEFINE_SPINLOCK(sysfs_symlink_target_lock); 30 31 #define to_sysfs_dirent(X) rb_entry((X), struct sysfs_dirent, s_rb) 32 33 static DEFINE_SPINLOCK(sysfs_ino_lock); 34 static DEFINE_IDA(sysfs_ino_ida); 35 36 /** 37 * sysfs_name_hash 38 * @name: Null terminated string to hash 39 * @ns: Namespace tag to hash 40 * 41 * Returns 31 bit hash of ns + name (so it fits in an off_t ) 42 */ 43 static unsigned int sysfs_name_hash(const char *name, const void *ns) 44 { 45 unsigned long hash = init_name_hash(); 46 unsigned int len = strlen(name); 47 while (len--) 48 hash = partial_name_hash(*name++, hash); 49 hash = (end_name_hash(hash) ^ hash_ptr((void *)ns, 31)); 50 hash &= 0x7fffffffU; 51 /* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */ 52 if (hash < 1) 53 hash += 2; 54 if (hash >= INT_MAX) 55 hash = INT_MAX - 1; 56 return hash; 57 } 58 59 static int sysfs_name_compare(unsigned int hash, const char *name, 60 const void *ns, const struct sysfs_dirent *sd) 61 { 62 if (hash != sd->s_hash) 63 return hash - sd->s_hash; 64 if (ns != sd->s_ns) 65 return ns - sd->s_ns; 66 return strcmp(name, sd->s_name); 67 } 68 69 static int sysfs_sd_compare(const struct sysfs_dirent *left, 70 const struct sysfs_dirent *right) 71 { 72 return sysfs_name_compare(left->s_hash, left->s_name, left->s_ns, 73 right); 74 } 75 76 /** 77 * sysfs_link_sibling - link sysfs_dirent into sibling rbtree 78 * @sd: sysfs_dirent of interest 79 * 80 * Link @sd into its sibling rbtree which starts from 81 * sd->s_parent->s_dir.children. 82 * 83 * Locking: 84 * mutex_lock(sysfs_mutex) 85 * 86 * RETURNS: 87 * 0 on susccess -EEXIST on failure. 88 */ 89 static int sysfs_link_sibling(struct sysfs_dirent *sd) 90 { 91 struct rb_node **node = &sd->s_parent->s_dir.children.rb_node; 92 struct rb_node *parent = NULL; 93 94 if (sysfs_type(sd) == SYSFS_DIR) 95 sd->s_parent->s_dir.subdirs++; 96 97 while (*node) { 98 struct sysfs_dirent *pos; 99 int result; 100 101 pos = to_sysfs_dirent(*node); 102 parent = *node; 103 result = sysfs_sd_compare(sd, pos); 104 if (result < 0) 105 node = &pos->s_rb.rb_left; 106 else if (result > 0) 107 node = &pos->s_rb.rb_right; 108 else 109 return -EEXIST; 110 } 111 /* add new node and rebalance the tree */ 112 rb_link_node(&sd->s_rb, parent, node); 113 rb_insert_color(&sd->s_rb, &sd->s_parent->s_dir.children); 114 return 0; 115 } 116 117 /** 118 * sysfs_unlink_sibling - unlink sysfs_dirent from sibling rbtree 119 * @sd: sysfs_dirent of interest 120 * 121 * Unlink @sd from its sibling rbtree which starts from 122 * sd->s_parent->s_dir.children. 123 * 124 * Locking: 125 * mutex_lock(sysfs_mutex) 126 */ 127 static void sysfs_unlink_sibling(struct sysfs_dirent *sd) 128 { 129 if (sysfs_type(sd) == SYSFS_DIR) 130 sd->s_parent->s_dir.subdirs--; 131 132 rb_erase(&sd->s_rb, &sd->s_parent->s_dir.children); 133 } 134 135 /** 136 * sysfs_get_active - get an active reference to sysfs_dirent 137 * @sd: sysfs_dirent to get an active reference to 138 * 139 * Get an active reference of @sd. This function is noop if @sd 140 * is NULL. 141 * 142 * RETURNS: 143 * Pointer to @sd on success, NULL on failure. 144 */ 145 struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd) 146 { 147 if (unlikely(!sd)) 148 return NULL; 149 150 if (!atomic_inc_unless_negative(&sd->s_active)) 151 return NULL; 152 153 if (sd->s_flags & SYSFS_FLAG_LOCKDEP) 154 rwsem_acquire_read(&sd->dep_map, 0, 1, _RET_IP_); 155 return sd; 156 } 157 158 /** 159 * sysfs_put_active - put an active reference to sysfs_dirent 160 * @sd: sysfs_dirent to put an active reference to 161 * 162 * Put an active reference to @sd. This function is noop if @sd 163 * is NULL. 164 */ 165 void sysfs_put_active(struct sysfs_dirent *sd) 166 { 167 int v; 168 169 if (unlikely(!sd)) 170 return; 171 172 if (sd->s_flags & SYSFS_FLAG_LOCKDEP) 173 rwsem_release(&sd->dep_map, 1, _RET_IP_); 174 v = atomic_dec_return(&sd->s_active); 175 if (likely(v != SD_DEACTIVATED_BIAS)) 176 return; 177 178 /* atomic_dec_return() is a mb(), we'll always see the updated 179 * sd->u.completion. 180 */ 181 complete(sd->u.completion); 182 } 183 184 /** 185 * sysfs_deactivate - deactivate sysfs_dirent 186 * @sd: sysfs_dirent to deactivate 187 * 188 * Deny new active references and drain existing ones. 189 */ 190 static void sysfs_deactivate(struct sysfs_dirent *sd) 191 { 192 DECLARE_COMPLETION_ONSTACK(wait); 193 int v; 194 195 BUG_ON(!(sd->s_flags & SYSFS_FLAG_REMOVED)); 196 197 if (!(sysfs_type(sd) & SYSFS_ACTIVE_REF)) 198 return; 199 200 sd->u.completion = (void *)&wait; 201 202 rwsem_acquire(&sd->dep_map, 0, 0, _RET_IP_); 203 /* atomic_add_return() is a mb(), put_active() will always see 204 * the updated sd->u.completion. 205 */ 206 v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active); 207 208 if (v != SD_DEACTIVATED_BIAS) { 209 lock_contended(&sd->dep_map, _RET_IP_); 210 wait_for_completion(&wait); 211 } 212 213 lock_acquired(&sd->dep_map, _RET_IP_); 214 rwsem_release(&sd->dep_map, 1, _RET_IP_); 215 } 216 217 static int sysfs_alloc_ino(unsigned int *pino) 218 { 219 int ino, rc; 220 221 retry: 222 spin_lock(&sysfs_ino_lock); 223 rc = ida_get_new_above(&sysfs_ino_ida, 2, &ino); 224 spin_unlock(&sysfs_ino_lock); 225 226 if (rc == -EAGAIN) { 227 if (ida_pre_get(&sysfs_ino_ida, GFP_KERNEL)) 228 goto retry; 229 rc = -ENOMEM; 230 } 231 232 *pino = ino; 233 return rc; 234 } 235 236 static void sysfs_free_ino(unsigned int ino) 237 { 238 spin_lock(&sysfs_ino_lock); 239 ida_remove(&sysfs_ino_ida, ino); 240 spin_unlock(&sysfs_ino_lock); 241 } 242 243 /** 244 * kernfs_get - get a reference count on a sysfs_dirent 245 * @sd: the target sysfs_dirent 246 */ 247 void kernfs_get(struct sysfs_dirent *sd) 248 { 249 if (sd) { 250 WARN_ON(!atomic_read(&sd->s_count)); 251 atomic_inc(&sd->s_count); 252 } 253 } 254 EXPORT_SYMBOL_GPL(kernfs_get); 255 256 /** 257 * kernfs_put - put a reference count on a sysfs_dirent 258 * @sd: the target sysfs_dirent 259 * 260 * Put a reference count of @sd and destroy it if it reached zero. 261 */ 262 void kernfs_put(struct sysfs_dirent *sd) 263 { 264 struct sysfs_dirent *parent_sd; 265 266 if (!sd || !atomic_dec_and_test(&sd->s_count)) 267 return; 268 repeat: 269 /* Moving/renaming is always done while holding reference. 270 * sd->s_parent won't change beneath us. 271 */ 272 parent_sd = sd->s_parent; 273 274 WARN(!(sd->s_flags & SYSFS_FLAG_REMOVED), 275 "sysfs: free using entry: %s/%s\n", 276 parent_sd ? parent_sd->s_name : "", sd->s_name); 277 278 if (sysfs_type(sd) == SYSFS_KOBJ_LINK) 279 kernfs_put(sd->s_symlink.target_sd); 280 if (sysfs_type(sd) & SYSFS_COPY_NAME) 281 kfree(sd->s_name); 282 if (sd->s_iattr && sd->s_iattr->ia_secdata) 283 security_release_secctx(sd->s_iattr->ia_secdata, 284 sd->s_iattr->ia_secdata_len); 285 kfree(sd->s_iattr); 286 sysfs_free_ino(sd->s_ino); 287 kmem_cache_free(sysfs_dir_cachep, sd); 288 289 sd = parent_sd; 290 if (sd && atomic_dec_and_test(&sd->s_count)) 291 goto repeat; 292 } 293 EXPORT_SYMBOL_GPL(kernfs_put); 294 295 static int sysfs_dentry_delete(const struct dentry *dentry) 296 { 297 struct sysfs_dirent *sd = dentry->d_fsdata; 298 return !(sd && !(sd->s_flags & SYSFS_FLAG_REMOVED)); 299 } 300 301 static int sysfs_dentry_revalidate(struct dentry *dentry, unsigned int flags) 302 { 303 struct sysfs_dirent *sd; 304 305 if (flags & LOOKUP_RCU) 306 return -ECHILD; 307 308 sd = dentry->d_fsdata; 309 mutex_lock(&sysfs_mutex); 310 311 /* The sysfs dirent has been deleted */ 312 if (sd->s_flags & SYSFS_FLAG_REMOVED) 313 goto out_bad; 314 315 /* The sysfs dirent has been moved? */ 316 if (dentry->d_parent->d_fsdata != sd->s_parent) 317 goto out_bad; 318 319 /* The sysfs dirent has been renamed */ 320 if (strcmp(dentry->d_name.name, sd->s_name) != 0) 321 goto out_bad; 322 323 /* The sysfs dirent has been moved to a different namespace */ 324 if (sd->s_parent && (sd->s_parent->s_flags & SYSFS_FLAG_NS) && 325 sysfs_info(dentry->d_sb)->ns != sd->s_ns) 326 goto out_bad; 327 328 mutex_unlock(&sysfs_mutex); 329 out_valid: 330 return 1; 331 out_bad: 332 /* Remove the dentry from the dcache hashes. 333 * If this is a deleted dentry we use d_drop instead of d_delete 334 * so sysfs doesn't need to cope with negative dentries. 335 * 336 * If this is a dentry that has simply been renamed we 337 * use d_drop to remove it from the dcache lookup on its 338 * old parent. If this dentry persists later when a lookup 339 * is performed at its new name the dentry will be readded 340 * to the dcache hashes. 341 */ 342 mutex_unlock(&sysfs_mutex); 343 344 /* If we have submounts we must allow the vfs caches 345 * to lie about the state of the filesystem to prevent 346 * leaks and other nasty things. 347 */ 348 if (check_submounts_and_drop(dentry) != 0) 349 goto out_valid; 350 351 return 0; 352 } 353 354 static void sysfs_dentry_release(struct dentry *dentry) 355 { 356 kernfs_put(dentry->d_fsdata); 357 } 358 359 const struct dentry_operations sysfs_dentry_ops = { 360 .d_revalidate = sysfs_dentry_revalidate, 361 .d_delete = sysfs_dentry_delete, 362 .d_release = sysfs_dentry_release, 363 }; 364 365 struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type) 366 { 367 char *dup_name = NULL; 368 struct sysfs_dirent *sd; 369 370 if (type & SYSFS_COPY_NAME) { 371 name = dup_name = kstrdup(name, GFP_KERNEL); 372 if (!name) 373 return NULL; 374 } 375 376 sd = kmem_cache_zalloc(sysfs_dir_cachep, GFP_KERNEL); 377 if (!sd) 378 goto err_out1; 379 380 if (sysfs_alloc_ino(&sd->s_ino)) 381 goto err_out2; 382 383 atomic_set(&sd->s_count, 1); 384 atomic_set(&sd->s_active, 0); 385 386 sd->s_name = name; 387 sd->s_mode = mode; 388 sd->s_flags = type | SYSFS_FLAG_REMOVED; 389 390 return sd; 391 392 err_out2: 393 kmem_cache_free(sysfs_dir_cachep, sd); 394 err_out1: 395 kfree(dup_name); 396 return NULL; 397 } 398 399 /** 400 * sysfs_addrm_start - prepare for sysfs_dirent add/remove 401 * @acxt: pointer to sysfs_addrm_cxt to be used 402 * 403 * This function is called when the caller is about to add or remove 404 * sysfs_dirent. This function acquires sysfs_mutex. @acxt is used 405 * to keep and pass context to other addrm functions. 406 * 407 * LOCKING: 408 * Kernel thread context (may sleep). sysfs_mutex is locked on 409 * return. 410 */ 411 void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt) 412 __acquires(sysfs_mutex) 413 { 414 memset(acxt, 0, sizeof(*acxt)); 415 416 mutex_lock(&sysfs_mutex); 417 } 418 419 /** 420 * sysfs_add_one - add sysfs_dirent to parent without warning 421 * @acxt: addrm context to use 422 * @sd: sysfs_dirent to be added 423 * @parent_sd: the parent sysfs_dirent to add @sd to 424 * 425 * Get @parent_sd and set @sd->s_parent to it and increment nlink of 426 * the parent inode if @sd is a directory and link into the children 427 * list of the parent. 428 * 429 * This function should be called between calls to 430 * sysfs_addrm_start() and sysfs_addrm_finish() and should be 431 * passed the same @acxt as passed to sysfs_addrm_start(). 432 * 433 * LOCKING: 434 * Determined by sysfs_addrm_start(). 435 * 436 * RETURNS: 437 * 0 on success, -EEXIST if entry with the given name already 438 * exists. 439 */ 440 int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd, 441 struct sysfs_dirent *parent_sd) 442 { 443 bool has_ns = parent_sd->s_flags & SYSFS_FLAG_NS; 444 struct sysfs_inode_attrs *ps_iattr; 445 int ret; 446 447 if (has_ns != (bool)sd->s_ns) { 448 WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n", 449 has_ns ? "required" : "invalid", 450 parent_sd->s_name, sd->s_name); 451 return -EINVAL; 452 } 453 454 if (sysfs_type(parent_sd) != SYSFS_DIR) 455 return -EINVAL; 456 457 sd->s_hash = sysfs_name_hash(sd->s_name, sd->s_ns); 458 sd->s_parent = parent_sd; 459 kernfs_get(parent_sd); 460 461 ret = sysfs_link_sibling(sd); 462 if (ret) 463 return ret; 464 465 /* Update timestamps on the parent */ 466 ps_iattr = parent_sd->s_iattr; 467 if (ps_iattr) { 468 struct iattr *ps_iattrs = &ps_iattr->ia_iattr; 469 ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME; 470 } 471 472 /* Mark the entry added into directory tree */ 473 sd->s_flags &= ~SYSFS_FLAG_REMOVED; 474 475 return 0; 476 } 477 478 /** 479 * sysfs_pathname - return full path to sysfs dirent 480 * @sd: sysfs_dirent whose path we want 481 * @path: caller allocated buffer of size PATH_MAX 482 * 483 * Gives the name "/" to the sysfs_root entry; any path returned 484 * is relative to wherever sysfs is mounted. 485 */ 486 static char *sysfs_pathname(struct sysfs_dirent *sd, char *path) 487 { 488 if (sd->s_parent) { 489 sysfs_pathname(sd->s_parent, path); 490 strlcat(path, "/", PATH_MAX); 491 } 492 strlcat(path, sd->s_name, PATH_MAX); 493 return path; 494 } 495 496 void sysfs_warn_dup(struct sysfs_dirent *parent, const char *name) 497 { 498 char *path; 499 500 path = kzalloc(PATH_MAX, GFP_KERNEL); 501 if (path) { 502 sysfs_pathname(parent, path); 503 strlcat(path, "/", PATH_MAX); 504 strlcat(path, name, PATH_MAX); 505 } 506 507 WARN(1, KERN_WARNING "sysfs: cannot create duplicate filename '%s'\n", 508 path ? path : name); 509 510 kfree(path); 511 } 512 513 /** 514 * sysfs_remove_one - remove sysfs_dirent from parent 515 * @acxt: addrm context to use 516 * @sd: sysfs_dirent to be removed 517 * 518 * Mark @sd removed and drop nlink of parent inode if @sd is a 519 * directory. @sd is unlinked from the children list. 520 * 521 * This function should be called between calls to 522 * sysfs_addrm_start() and sysfs_addrm_finish() and should be 523 * passed the same @acxt as passed to sysfs_addrm_start(). 524 * 525 * LOCKING: 526 * Determined by sysfs_addrm_start(). 527 */ 528 static void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, 529 struct sysfs_dirent *sd) 530 { 531 struct sysfs_inode_attrs *ps_iattr; 532 533 /* 534 * Removal can be called multiple times on the same node. Only the 535 * first invocation is effective and puts the base ref. 536 */ 537 if (sd->s_flags & SYSFS_FLAG_REMOVED) 538 return; 539 540 sysfs_unlink_sibling(sd); 541 542 /* Update timestamps on the parent */ 543 ps_iattr = sd->s_parent->s_iattr; 544 if (ps_iattr) { 545 struct iattr *ps_iattrs = &ps_iattr->ia_iattr; 546 ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME; 547 } 548 549 sd->s_flags |= SYSFS_FLAG_REMOVED; 550 sd->u.removed_list = acxt->removed; 551 acxt->removed = sd; 552 } 553 554 /** 555 * sysfs_addrm_finish - finish up sysfs_dirent add/remove 556 * @acxt: addrm context to finish up 557 * 558 * Finish up sysfs_dirent add/remove. Resources acquired by 559 * sysfs_addrm_start() are released and removed sysfs_dirents are 560 * cleaned up. 561 * 562 * LOCKING: 563 * sysfs_mutex is released. 564 */ 565 void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt) 566 __releases(sysfs_mutex) 567 { 568 /* release resources acquired by sysfs_addrm_start() */ 569 mutex_unlock(&sysfs_mutex); 570 571 /* kill removed sysfs_dirents */ 572 while (acxt->removed) { 573 struct sysfs_dirent *sd = acxt->removed; 574 575 acxt->removed = sd->u.removed_list; 576 577 sysfs_deactivate(sd); 578 sysfs_unmap_bin_file(sd); 579 kernfs_put(sd); 580 } 581 } 582 583 /** 584 * kernfs_find_ns - find sysfs_dirent with the given name 585 * @parent: sysfs_dirent to search under 586 * @name: name to look for 587 * @ns: the namespace tag to use 588 * 589 * Look for sysfs_dirent with name @name under @parent. Returns pointer to 590 * the found sysfs_dirent on success, %NULL on failure. 591 */ 592 static struct sysfs_dirent *kernfs_find_ns(struct sysfs_dirent *parent, 593 const unsigned char *name, 594 const void *ns) 595 { 596 struct rb_node *node = parent->s_dir.children.rb_node; 597 bool has_ns = parent->s_flags & SYSFS_FLAG_NS; 598 unsigned int hash; 599 600 lockdep_assert_held(&sysfs_mutex); 601 602 if (has_ns != (bool)ns) { 603 WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n", 604 has_ns ? "required" : "invalid", 605 parent->s_name, name); 606 return NULL; 607 } 608 609 hash = sysfs_name_hash(name, ns); 610 while (node) { 611 struct sysfs_dirent *sd; 612 int result; 613 614 sd = to_sysfs_dirent(node); 615 result = sysfs_name_compare(hash, name, ns, sd); 616 if (result < 0) 617 node = node->rb_left; 618 else if (result > 0) 619 node = node->rb_right; 620 else 621 return sd; 622 } 623 return NULL; 624 } 625 626 /** 627 * kernfs_find_and_get_ns - find and get sysfs_dirent with the given name 628 * @parent: sysfs_dirent to search under 629 * @name: name to look for 630 * @ns: the namespace tag to use 631 * 632 * Look for sysfs_dirent with name @name under @parent and get a reference 633 * if found. This function may sleep and returns pointer to the found 634 * sysfs_dirent on success, %NULL on failure. 635 */ 636 struct sysfs_dirent *kernfs_find_and_get_ns(struct sysfs_dirent *parent, 637 const char *name, const void *ns) 638 { 639 struct sysfs_dirent *sd; 640 641 mutex_lock(&sysfs_mutex); 642 sd = kernfs_find_ns(parent, name, ns); 643 kernfs_get(sd); 644 mutex_unlock(&sysfs_mutex); 645 646 return sd; 647 } 648 EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns); 649 650 /** 651 * kernfs_create_dir_ns - create a directory 652 * @parent: parent in which to create a new directory 653 * @name: name of the new directory 654 * @priv: opaque data associated with the new directory 655 * @ns: optional namespace tag of the directory 656 * 657 * Returns the created node on success, ERR_PTR() value on failure. 658 */ 659 struct sysfs_dirent *kernfs_create_dir_ns(struct sysfs_dirent *parent, 660 const char *name, void *priv, 661 const void *ns) 662 { 663 umode_t mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; 664 struct sysfs_addrm_cxt acxt; 665 struct sysfs_dirent *sd; 666 int rc; 667 668 /* allocate */ 669 sd = sysfs_new_dirent(name, mode, SYSFS_DIR); 670 if (!sd) 671 return ERR_PTR(-ENOMEM); 672 673 sd->s_ns = ns; 674 sd->priv = priv; 675 676 /* link in */ 677 sysfs_addrm_start(&acxt); 678 rc = sysfs_add_one(&acxt, sd, parent); 679 sysfs_addrm_finish(&acxt); 680 681 if (!rc) 682 return sd; 683 684 kernfs_put(sd); 685 return ERR_PTR(rc); 686 } 687 688 /** 689 * sysfs_create_dir_ns - create a directory for an object with a namespace tag 690 * @kobj: object we're creating directory for 691 * @ns: the namespace tag to use 692 */ 693 int sysfs_create_dir_ns(struct kobject *kobj, const void *ns) 694 { 695 struct sysfs_dirent *parent_sd, *sd; 696 697 BUG_ON(!kobj); 698 699 if (kobj->parent) 700 parent_sd = kobj->parent->sd; 701 else 702 parent_sd = &sysfs_root; 703 704 if (!parent_sd) 705 return -ENOENT; 706 707 sd = kernfs_create_dir_ns(parent_sd, kobject_name(kobj), kobj, ns); 708 if (IS_ERR(sd)) { 709 if (PTR_ERR(sd) == -EEXIST) 710 sysfs_warn_dup(parent_sd, kobject_name(kobj)); 711 return PTR_ERR(sd); 712 } 713 714 kobj->sd = sd; 715 return 0; 716 } 717 718 static struct dentry *sysfs_lookup(struct inode *dir, struct dentry *dentry, 719 unsigned int flags) 720 { 721 struct dentry *ret = NULL; 722 struct dentry *parent = dentry->d_parent; 723 struct sysfs_dirent *parent_sd = parent->d_fsdata; 724 struct sysfs_dirent *sd; 725 struct inode *inode; 726 const void *ns = NULL; 727 728 mutex_lock(&sysfs_mutex); 729 730 if (parent_sd->s_flags & SYSFS_FLAG_NS) 731 ns = sysfs_info(dir->i_sb)->ns; 732 733 sd = kernfs_find_ns(parent_sd, dentry->d_name.name, ns); 734 735 /* no such entry */ 736 if (!sd) { 737 ret = ERR_PTR(-ENOENT); 738 goto out_unlock; 739 } 740 kernfs_get(sd); 741 dentry->d_fsdata = sd; 742 743 /* attach dentry and inode */ 744 inode = sysfs_get_inode(dir->i_sb, sd); 745 if (!inode) { 746 ret = ERR_PTR(-ENOMEM); 747 goto out_unlock; 748 } 749 750 /* instantiate and hash dentry */ 751 ret = d_materialise_unique(dentry, inode); 752 out_unlock: 753 mutex_unlock(&sysfs_mutex); 754 return ret; 755 } 756 757 const struct inode_operations sysfs_dir_inode_operations = { 758 .lookup = sysfs_lookup, 759 .permission = sysfs_permission, 760 .setattr = sysfs_setattr, 761 .getattr = sysfs_getattr, 762 .setxattr = sysfs_setxattr, 763 }; 764 765 static struct sysfs_dirent *sysfs_leftmost_descendant(struct sysfs_dirent *pos) 766 { 767 struct sysfs_dirent *last; 768 769 while (true) { 770 struct rb_node *rbn; 771 772 last = pos; 773 774 if (sysfs_type(pos) != SYSFS_DIR) 775 break; 776 777 rbn = rb_first(&pos->s_dir.children); 778 if (!rbn) 779 break; 780 781 pos = to_sysfs_dirent(rbn); 782 } 783 784 return last; 785 } 786 787 /** 788 * sysfs_next_descendant_post - find the next descendant for post-order walk 789 * @pos: the current position (%NULL to initiate traversal) 790 * @root: sysfs_dirent whose descendants to walk 791 * 792 * Find the next descendant to visit for post-order traversal of @root's 793 * descendants. @root is included in the iteration and the last node to be 794 * visited. 795 */ 796 static struct sysfs_dirent *sysfs_next_descendant_post(struct sysfs_dirent *pos, 797 struct sysfs_dirent *root) 798 { 799 struct rb_node *rbn; 800 801 lockdep_assert_held(&sysfs_mutex); 802 803 /* if first iteration, visit leftmost descendant which may be root */ 804 if (!pos) 805 return sysfs_leftmost_descendant(root); 806 807 /* if we visited @root, we're done */ 808 if (pos == root) 809 return NULL; 810 811 /* if there's an unvisited sibling, visit its leftmost descendant */ 812 rbn = rb_next(&pos->s_rb); 813 if (rbn) 814 return sysfs_leftmost_descendant(to_sysfs_dirent(rbn)); 815 816 /* no sibling left, visit parent */ 817 return pos->s_parent; 818 } 819 820 static void __kernfs_remove(struct sysfs_addrm_cxt *acxt, 821 struct sysfs_dirent *sd) 822 { 823 struct sysfs_dirent *pos, *next; 824 825 if (!sd) 826 return; 827 828 pr_debug("sysfs %s: removing\n", sd->s_name); 829 830 next = NULL; 831 do { 832 pos = next; 833 next = sysfs_next_descendant_post(pos, sd); 834 if (pos) 835 sysfs_remove_one(acxt, pos); 836 } while (next); 837 } 838 839 /** 840 * kernfs_remove - remove a sysfs_dirent recursively 841 * @sd: the sysfs_dirent to remove 842 * 843 * Remove @sd along with all its subdirectories and files. 844 */ 845 void kernfs_remove(struct sysfs_dirent *sd) 846 { 847 struct sysfs_addrm_cxt acxt; 848 849 sysfs_addrm_start(&acxt); 850 __kernfs_remove(&acxt, sd); 851 sysfs_addrm_finish(&acxt); 852 } 853 854 /** 855 * kernfs_remove_by_name_ns - find a sysfs_dirent by name and remove it 856 * @dir_sd: parent of the target 857 * @name: name of the sysfs_dirent to remove 858 * @ns: namespace tag of the sysfs_dirent to remove 859 * 860 * Look for the sysfs_dirent with @name and @ns under @dir_sd and remove 861 * it. Returns 0 on success, -ENOENT if such entry doesn't exist. 862 */ 863 int kernfs_remove_by_name_ns(struct sysfs_dirent *dir_sd, const char *name, 864 const void *ns) 865 { 866 struct sysfs_addrm_cxt acxt; 867 struct sysfs_dirent *sd; 868 869 if (!dir_sd) { 870 WARN(1, KERN_WARNING "sysfs: can not remove '%s', no directory\n", 871 name); 872 return -ENOENT; 873 } 874 875 sysfs_addrm_start(&acxt); 876 877 sd = kernfs_find_ns(dir_sd, name, ns); 878 if (sd) 879 __kernfs_remove(&acxt, sd); 880 881 sysfs_addrm_finish(&acxt); 882 883 if (sd) 884 return 0; 885 else 886 return -ENOENT; 887 } 888 889 /** 890 * sysfs_remove_dir - remove an object's directory. 891 * @kobj: object. 892 * 893 * The only thing special about this is that we remove any files in 894 * the directory before we remove the directory, and we've inlined 895 * what used to be sysfs_rmdir() below, instead of calling separately. 896 */ 897 void sysfs_remove_dir(struct kobject *kobj) 898 { 899 struct sysfs_dirent *sd = kobj->sd; 900 901 /* 902 * In general, kboject owner is responsible for ensuring removal 903 * doesn't race with other operations and sysfs doesn't provide any 904 * protection; however, when @kobj is used as a symlink target, the 905 * symlinking entity usually doesn't own @kobj and thus has no 906 * control over removal. @kobj->sd may be removed anytime and 907 * symlink code may end up dereferencing an already freed sd. 908 * 909 * sysfs_symlink_target_lock synchronizes @kobj->sd disassociation 910 * against symlink operations so that symlink code can safely 911 * dereference @kobj->sd. 912 */ 913 spin_lock(&sysfs_symlink_target_lock); 914 kobj->sd = NULL; 915 spin_unlock(&sysfs_symlink_target_lock); 916 917 if (sd) { 918 WARN_ON_ONCE(sysfs_type(sd) != SYSFS_DIR); 919 kernfs_remove(sd); 920 } 921 } 922 923 /** 924 * kernfs_rename_ns - move and rename a kernfs_node 925 * @sd: target node 926 * @new_parent: new parent to put @sd under 927 * @new_name: new name 928 * @new_ns: new namespace tag 929 */ 930 int kernfs_rename_ns(struct sysfs_dirent *sd, struct sysfs_dirent *new_parent, 931 const char *new_name, const void *new_ns) 932 { 933 int error; 934 935 mutex_lock(&sysfs_mutex); 936 937 error = 0; 938 if ((sd->s_parent == new_parent) && (sd->s_ns == new_ns) && 939 (strcmp(sd->s_name, new_name) == 0)) 940 goto out; /* nothing to rename */ 941 942 error = -EEXIST; 943 if (kernfs_find_ns(new_parent, new_name, new_ns)) 944 goto out; 945 946 /* rename sysfs_dirent */ 947 if (strcmp(sd->s_name, new_name) != 0) { 948 error = -ENOMEM; 949 new_name = kstrdup(new_name, GFP_KERNEL); 950 if (!new_name) 951 goto out; 952 953 kfree(sd->s_name); 954 sd->s_name = new_name; 955 } 956 957 /* 958 * Move to the appropriate place in the appropriate directories rbtree. 959 */ 960 sysfs_unlink_sibling(sd); 961 kernfs_get(new_parent); 962 kernfs_put(sd->s_parent); 963 sd->s_ns = new_ns; 964 sd->s_hash = sysfs_name_hash(sd->s_name, sd->s_ns); 965 sd->s_parent = new_parent; 966 sysfs_link_sibling(sd); 967 968 error = 0; 969 out: 970 mutex_unlock(&sysfs_mutex); 971 return error; 972 } 973 974 int sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name, 975 const void *new_ns) 976 { 977 struct sysfs_dirent *parent_sd = kobj->sd->s_parent; 978 979 return kernfs_rename_ns(kobj->sd, parent_sd, new_name, new_ns); 980 } 981 982 int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj, 983 const void *new_ns) 984 { 985 struct sysfs_dirent *sd = kobj->sd; 986 struct sysfs_dirent *new_parent_sd; 987 988 BUG_ON(!sd->s_parent); 989 new_parent_sd = new_parent_kobj && new_parent_kobj->sd ? 990 new_parent_kobj->sd : &sysfs_root; 991 992 return kernfs_rename_ns(sd, new_parent_sd, sd->s_name, new_ns); 993 } 994 995 /** 996 * kernfs_enable_ns - enable namespace under a directory 997 * @sd: directory of interest, should be empty 998 * 999 * This is to be called right after @sd is created to enable namespace 1000 * under it. All children of @sd must have non-NULL namespace tags and 1001 * only the ones which match the super_block's tag will be visible. 1002 */ 1003 void kernfs_enable_ns(struct sysfs_dirent *sd) 1004 { 1005 WARN_ON_ONCE(sysfs_type(sd) != SYSFS_DIR); 1006 WARN_ON_ONCE(!RB_EMPTY_ROOT(&sd->s_dir.children)); 1007 sd->s_flags |= SYSFS_FLAG_NS; 1008 } 1009 1010 /* Relationship between s_mode and the DT_xxx types */ 1011 static inline unsigned char dt_type(struct sysfs_dirent *sd) 1012 { 1013 return (sd->s_mode >> 12) & 15; 1014 } 1015 1016 static int sysfs_dir_release(struct inode *inode, struct file *filp) 1017 { 1018 kernfs_put(filp->private_data); 1019 return 0; 1020 } 1021 1022 static struct sysfs_dirent *sysfs_dir_pos(const void *ns, 1023 struct sysfs_dirent *parent_sd, loff_t hash, struct sysfs_dirent *pos) 1024 { 1025 if (pos) { 1026 int valid = !(pos->s_flags & SYSFS_FLAG_REMOVED) && 1027 pos->s_parent == parent_sd && 1028 hash == pos->s_hash; 1029 kernfs_put(pos); 1030 if (!valid) 1031 pos = NULL; 1032 } 1033 if (!pos && (hash > 1) && (hash < INT_MAX)) { 1034 struct rb_node *node = parent_sd->s_dir.children.rb_node; 1035 while (node) { 1036 pos = to_sysfs_dirent(node); 1037 1038 if (hash < pos->s_hash) 1039 node = node->rb_left; 1040 else if (hash > pos->s_hash) 1041 node = node->rb_right; 1042 else 1043 break; 1044 } 1045 } 1046 /* Skip over entries in the wrong namespace */ 1047 while (pos && pos->s_ns != ns) { 1048 struct rb_node *node = rb_next(&pos->s_rb); 1049 if (!node) 1050 pos = NULL; 1051 else 1052 pos = to_sysfs_dirent(node); 1053 } 1054 return pos; 1055 } 1056 1057 static struct sysfs_dirent *sysfs_dir_next_pos(const void *ns, 1058 struct sysfs_dirent *parent_sd, ino_t ino, struct sysfs_dirent *pos) 1059 { 1060 pos = sysfs_dir_pos(ns, parent_sd, ino, pos); 1061 if (pos) 1062 do { 1063 struct rb_node *node = rb_next(&pos->s_rb); 1064 if (!node) 1065 pos = NULL; 1066 else 1067 pos = to_sysfs_dirent(node); 1068 } while (pos && pos->s_ns != ns); 1069 return pos; 1070 } 1071 1072 static int sysfs_readdir(struct file *file, struct dir_context *ctx) 1073 { 1074 struct dentry *dentry = file->f_path.dentry; 1075 struct sysfs_dirent *parent_sd = dentry->d_fsdata; 1076 struct sysfs_dirent *pos = file->private_data; 1077 const void *ns = NULL; 1078 1079 if (!dir_emit_dots(file, ctx)) 1080 return 0; 1081 mutex_lock(&sysfs_mutex); 1082 1083 if (parent_sd->s_flags & SYSFS_FLAG_NS) 1084 ns = sysfs_info(dentry->d_sb)->ns; 1085 1086 for (pos = sysfs_dir_pos(ns, parent_sd, ctx->pos, pos); 1087 pos; 1088 pos = sysfs_dir_next_pos(ns, parent_sd, ctx->pos, pos)) { 1089 const char *name = pos->s_name; 1090 unsigned int type = dt_type(pos); 1091 int len = strlen(name); 1092 ino_t ino = pos->s_ino; 1093 1094 ctx->pos = pos->s_hash; 1095 file->private_data = pos; 1096 kernfs_get(pos); 1097 1098 mutex_unlock(&sysfs_mutex); 1099 if (!dir_emit(ctx, name, len, ino, type)) 1100 return 0; 1101 mutex_lock(&sysfs_mutex); 1102 } 1103 mutex_unlock(&sysfs_mutex); 1104 file->private_data = NULL; 1105 ctx->pos = INT_MAX; 1106 return 0; 1107 } 1108 1109 static loff_t sysfs_dir_llseek(struct file *file, loff_t offset, int whence) 1110 { 1111 struct inode *inode = file_inode(file); 1112 loff_t ret; 1113 1114 mutex_lock(&inode->i_mutex); 1115 ret = generic_file_llseek(file, offset, whence); 1116 mutex_unlock(&inode->i_mutex); 1117 1118 return ret; 1119 } 1120 1121 const struct file_operations sysfs_dir_operations = { 1122 .read = generic_read_dir, 1123 .iterate = sysfs_readdir, 1124 .release = sysfs_dir_release, 1125 .llseek = sysfs_dir_llseek, 1126 }; 1127