1 /* 2 * hugetlbpage-backed filesystem. Based on ramfs. 3 * 4 * William Irwin, 2002 5 * 6 * Copyright (C) 2002 Linus Torvalds. 7 */ 8 9 #include <linux/module.h> 10 #include <linux/thread_info.h> 11 #include <asm/current.h> 12 #include <linux/sched.h> /* remove ASAP */ 13 #include <linux/fs.h> 14 #include <linux/mount.h> 15 #include <linux/file.h> 16 #include <linux/kernel.h> 17 #include <linux/writeback.h> 18 #include <linux/pagemap.h> 19 #include <linux/highmem.h> 20 #include <linux/init.h> 21 #include <linux/string.h> 22 #include <linux/capability.h> 23 #include <linux/ctype.h> 24 #include <linux/backing-dev.h> 25 #include <linux/hugetlb.h> 26 #include <linux/pagevec.h> 27 #include <linux/parser.h> 28 #include <linux/mman.h> 29 #include <linux/quotaops.h> 30 #include <linux/slab.h> 31 #include <linux/dnotify.h> 32 #include <linux/statfs.h> 33 #include <linux/security.h> 34 35 #include <asm/uaccess.h> 36 37 /* some random number */ 38 #define HUGETLBFS_MAGIC 0x958458f6 39 40 static const struct super_operations hugetlbfs_ops; 41 static const struct address_space_operations hugetlbfs_aops; 42 const struct file_operations hugetlbfs_file_operations; 43 static const struct inode_operations hugetlbfs_dir_inode_operations; 44 static const struct inode_operations hugetlbfs_inode_operations; 45 46 static struct backing_dev_info hugetlbfs_backing_dev_info = { 47 .ra_pages = 0, /* No readahead */ 48 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, 49 }; 50 51 int sysctl_hugetlb_shm_group; 52 53 enum { 54 Opt_size, Opt_nr_inodes, 55 Opt_mode, Opt_uid, Opt_gid, 56 Opt_err, 57 }; 58 59 static match_table_t tokens = { 60 {Opt_size, "size=%s"}, 61 {Opt_nr_inodes, "nr_inodes=%s"}, 62 {Opt_mode, "mode=%o"}, 63 {Opt_uid, "uid=%u"}, 64 {Opt_gid, "gid=%u"}, 65 {Opt_err, NULL}, 66 }; 67 68 static void huge_pagevec_release(struct pagevec *pvec) 69 { 70 int i; 71 72 for (i = 0; i < pagevec_count(pvec); ++i) 73 put_page(pvec->pages[i]); 74 75 pagevec_reinit(pvec); 76 } 77 78 static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) 79 { 80 struct inode *inode = file->f_path.dentry->d_inode; 81 loff_t len, vma_len; 82 int ret; 83 84 /* 85 * vma address alignment (but not the pgoff alignment) has 86 * already been checked by prepare_hugepage_range. If you add 87 * any error returns here, do so after setting VM_HUGETLB, so 88 * is_vm_hugetlb_page tests below unmap_region go the right 89 * way when do_mmap_pgoff unwinds (may be important on powerpc 90 * and ia64). 91 */ 92 vma->vm_flags |= VM_HUGETLB | VM_RESERVED; 93 vma->vm_ops = &hugetlb_vm_ops; 94 95 if (vma->vm_pgoff & ~(HPAGE_MASK >> PAGE_SHIFT)) 96 return -EINVAL; 97 98 vma_len = (loff_t)(vma->vm_end - vma->vm_start); 99 100 mutex_lock(&inode->i_mutex); 101 file_accessed(file); 102 103 ret = -ENOMEM; 104 len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); 105 106 if (vma->vm_flags & VM_MAYSHARE && 107 hugetlb_reserve_pages(inode, vma->vm_pgoff >> (HPAGE_SHIFT-PAGE_SHIFT), 108 len >> HPAGE_SHIFT)) 109 goto out; 110 111 ret = 0; 112 hugetlb_prefault_arch_hook(vma->vm_mm); 113 if (vma->vm_flags & VM_WRITE && inode->i_size < len) 114 inode->i_size = len; 115 out: 116 mutex_unlock(&inode->i_mutex); 117 118 return ret; 119 } 120 121 /* 122 * Called under down_write(mmap_sem). 123 */ 124 125 #ifndef HAVE_ARCH_HUGETLB_UNMAPPED_AREA 126 static unsigned long 127 hugetlb_get_unmapped_area(struct file *file, unsigned long addr, 128 unsigned long len, unsigned long pgoff, unsigned long flags) 129 { 130 struct mm_struct *mm = current->mm; 131 struct vm_area_struct *vma; 132 unsigned long start_addr; 133 134 if (len & ~HPAGE_MASK) 135 return -EINVAL; 136 if (len > TASK_SIZE) 137 return -ENOMEM; 138 139 if (flags & MAP_FIXED) { 140 if (prepare_hugepage_range(addr, len)) 141 return -EINVAL; 142 return addr; 143 } 144 145 if (addr) { 146 addr = ALIGN(addr, HPAGE_SIZE); 147 vma = find_vma(mm, addr); 148 if (TASK_SIZE - len >= addr && 149 (!vma || addr + len <= vma->vm_start)) 150 return addr; 151 } 152 153 start_addr = mm->free_area_cache; 154 155 if (len <= mm->cached_hole_size) 156 start_addr = TASK_UNMAPPED_BASE; 157 158 full_search: 159 addr = ALIGN(start_addr, HPAGE_SIZE); 160 161 for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { 162 /* At this point: (!vma || addr < vma->vm_end). */ 163 if (TASK_SIZE - len < addr) { 164 /* 165 * Start a new search - just in case we missed 166 * some holes. 167 */ 168 if (start_addr != TASK_UNMAPPED_BASE) { 169 start_addr = TASK_UNMAPPED_BASE; 170 goto full_search; 171 } 172 return -ENOMEM; 173 } 174 175 if (!vma || addr + len <= vma->vm_start) 176 return addr; 177 addr = ALIGN(vma->vm_end, HPAGE_SIZE); 178 } 179 } 180 #endif 181 182 /* 183 * Read a page. Again trivial. If it didn't already exist 184 * in the page cache, it is zero-filled. 185 */ 186 static int hugetlbfs_readpage(struct file *file, struct page * page) 187 { 188 unlock_page(page); 189 return -EINVAL; 190 } 191 192 static int hugetlbfs_prepare_write(struct file *file, 193 struct page *page, unsigned offset, unsigned to) 194 { 195 return -EINVAL; 196 } 197 198 static int hugetlbfs_commit_write(struct file *file, 199 struct page *page, unsigned offset, unsigned to) 200 { 201 return -EINVAL; 202 } 203 204 static void truncate_huge_page(struct page *page) 205 { 206 cancel_dirty_page(page, /* No IO accounting for huge pages? */0); 207 ClearPageUptodate(page); 208 remove_from_page_cache(page); 209 put_page(page); 210 } 211 212 static void truncate_hugepages(struct inode *inode, loff_t lstart) 213 { 214 struct address_space *mapping = &inode->i_data; 215 const pgoff_t start = lstart >> HPAGE_SHIFT; 216 struct pagevec pvec; 217 pgoff_t next; 218 int i, freed = 0; 219 220 pagevec_init(&pvec, 0); 221 next = start; 222 while (1) { 223 if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { 224 if (next == start) 225 break; 226 next = start; 227 continue; 228 } 229 230 for (i = 0; i < pagevec_count(&pvec); ++i) { 231 struct page *page = pvec.pages[i]; 232 233 lock_page(page); 234 if (page->index > next) 235 next = page->index; 236 ++next; 237 truncate_huge_page(page); 238 unlock_page(page); 239 hugetlb_put_quota(mapping); 240 freed++; 241 } 242 huge_pagevec_release(&pvec); 243 } 244 BUG_ON(!lstart && mapping->nrpages); 245 hugetlb_unreserve_pages(inode, start, freed); 246 } 247 248 static void hugetlbfs_delete_inode(struct inode *inode) 249 { 250 truncate_hugepages(inode, 0); 251 clear_inode(inode); 252 } 253 254 static void hugetlbfs_forget_inode(struct inode *inode) __releases(inode_lock) 255 { 256 struct super_block *sb = inode->i_sb; 257 258 if (!hlist_unhashed(&inode->i_hash)) { 259 if (!(inode->i_state & (I_DIRTY|I_LOCK))) 260 list_move(&inode->i_list, &inode_unused); 261 inodes_stat.nr_unused++; 262 if (!sb || (sb->s_flags & MS_ACTIVE)) { 263 spin_unlock(&inode_lock); 264 return; 265 } 266 inode->i_state |= I_WILL_FREE; 267 spin_unlock(&inode_lock); 268 /* 269 * write_inode_now is a noop as we set BDI_CAP_NO_WRITEBACK 270 * in our backing_dev_info. 271 */ 272 write_inode_now(inode, 1); 273 spin_lock(&inode_lock); 274 inode->i_state &= ~I_WILL_FREE; 275 inodes_stat.nr_unused--; 276 hlist_del_init(&inode->i_hash); 277 } 278 list_del_init(&inode->i_list); 279 list_del_init(&inode->i_sb_list); 280 inode->i_state |= I_FREEING; 281 inodes_stat.nr_inodes--; 282 spin_unlock(&inode_lock); 283 truncate_hugepages(inode, 0); 284 clear_inode(inode); 285 destroy_inode(inode); 286 } 287 288 static void hugetlbfs_drop_inode(struct inode *inode) 289 { 290 if (!inode->i_nlink) 291 generic_delete_inode(inode); 292 else 293 hugetlbfs_forget_inode(inode); 294 } 295 296 static inline void 297 hugetlb_vmtruncate_list(struct prio_tree_root *root, pgoff_t pgoff) 298 { 299 struct vm_area_struct *vma; 300 struct prio_tree_iter iter; 301 302 vma_prio_tree_foreach(vma, &iter, root, pgoff, ULONG_MAX) { 303 unsigned long v_offset; 304 305 /* 306 * Can the expression below overflow on 32-bit arches? 307 * No, because the prio_tree returns us only those vmas 308 * which overlap the truncated area starting at pgoff, 309 * and no vma on a 32-bit arch can span beyond the 4GB. 310 */ 311 if (vma->vm_pgoff < pgoff) 312 v_offset = (pgoff - vma->vm_pgoff) << PAGE_SHIFT; 313 else 314 v_offset = 0; 315 316 __unmap_hugepage_range(vma, 317 vma->vm_start + v_offset, vma->vm_end); 318 } 319 } 320 321 /* 322 * Expanding truncates are not allowed. 323 */ 324 static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) 325 { 326 pgoff_t pgoff; 327 struct address_space *mapping = inode->i_mapping; 328 329 if (offset > inode->i_size) 330 return -EINVAL; 331 332 BUG_ON(offset & ~HPAGE_MASK); 333 pgoff = offset >> PAGE_SHIFT; 334 335 inode->i_size = offset; 336 spin_lock(&mapping->i_mmap_lock); 337 if (!prio_tree_empty(&mapping->i_mmap)) 338 hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff); 339 spin_unlock(&mapping->i_mmap_lock); 340 truncate_hugepages(inode, offset); 341 return 0; 342 } 343 344 static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr) 345 { 346 struct inode *inode = dentry->d_inode; 347 int error; 348 unsigned int ia_valid = attr->ia_valid; 349 350 BUG_ON(!inode); 351 352 error = inode_change_ok(inode, attr); 353 if (error) 354 goto out; 355 356 if (ia_valid & ATTR_SIZE) { 357 error = -EINVAL; 358 if (!(attr->ia_size & ~HPAGE_MASK)) 359 error = hugetlb_vmtruncate(inode, attr->ia_size); 360 if (error) 361 goto out; 362 attr->ia_valid &= ~ATTR_SIZE; 363 } 364 error = inode_setattr(inode, attr); 365 out: 366 return error; 367 } 368 369 static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid, 370 gid_t gid, int mode, dev_t dev) 371 { 372 struct inode *inode; 373 374 inode = new_inode(sb); 375 if (inode) { 376 struct hugetlbfs_inode_info *info; 377 inode->i_mode = mode; 378 inode->i_uid = uid; 379 inode->i_gid = gid; 380 inode->i_blocks = 0; 381 inode->i_mapping->a_ops = &hugetlbfs_aops; 382 inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info; 383 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 384 INIT_LIST_HEAD(&inode->i_mapping->private_list); 385 info = HUGETLBFS_I(inode); 386 mpol_shared_policy_init(&info->policy, MPOL_DEFAULT, NULL); 387 switch (mode & S_IFMT) { 388 default: 389 init_special_inode(inode, mode, dev); 390 break; 391 case S_IFREG: 392 inode->i_op = &hugetlbfs_inode_operations; 393 inode->i_fop = &hugetlbfs_file_operations; 394 break; 395 case S_IFDIR: 396 inode->i_op = &hugetlbfs_dir_inode_operations; 397 inode->i_fop = &simple_dir_operations; 398 399 /* directory inodes start off with i_nlink == 2 (for "." entry) */ 400 inc_nlink(inode); 401 break; 402 case S_IFLNK: 403 inode->i_op = &page_symlink_inode_operations; 404 break; 405 } 406 } 407 return inode; 408 } 409 410 /* 411 * File creation. Allocate an inode, and we're done.. 412 */ 413 static int hugetlbfs_mknod(struct inode *dir, 414 struct dentry *dentry, int mode, dev_t dev) 415 { 416 struct inode *inode; 417 int error = -ENOSPC; 418 gid_t gid; 419 420 if (dir->i_mode & S_ISGID) { 421 gid = dir->i_gid; 422 if (S_ISDIR(mode)) 423 mode |= S_ISGID; 424 } else { 425 gid = current->fsgid; 426 } 427 inode = hugetlbfs_get_inode(dir->i_sb, current->fsuid, gid, mode, dev); 428 if (inode) { 429 dir->i_ctime = dir->i_mtime = CURRENT_TIME; 430 d_instantiate(dentry, inode); 431 dget(dentry); /* Extra count - pin the dentry in core */ 432 error = 0; 433 } 434 return error; 435 } 436 437 static int hugetlbfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 438 { 439 int retval = hugetlbfs_mknod(dir, dentry, mode | S_IFDIR, 0); 440 if (!retval) 441 inc_nlink(dir); 442 return retval; 443 } 444 445 static int hugetlbfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) 446 { 447 return hugetlbfs_mknod(dir, dentry, mode | S_IFREG, 0); 448 } 449 450 static int hugetlbfs_symlink(struct inode *dir, 451 struct dentry *dentry, const char *symname) 452 { 453 struct inode *inode; 454 int error = -ENOSPC; 455 gid_t gid; 456 457 if (dir->i_mode & S_ISGID) 458 gid = dir->i_gid; 459 else 460 gid = current->fsgid; 461 462 inode = hugetlbfs_get_inode(dir->i_sb, current->fsuid, 463 gid, S_IFLNK|S_IRWXUGO, 0); 464 if (inode) { 465 int l = strlen(symname)+1; 466 error = page_symlink(inode, symname, l); 467 if (!error) { 468 d_instantiate(dentry, inode); 469 dget(dentry); 470 } else 471 iput(inode); 472 } 473 dir->i_ctime = dir->i_mtime = CURRENT_TIME; 474 475 return error; 476 } 477 478 /* 479 * mark the head page dirty 480 */ 481 static int hugetlbfs_set_page_dirty(struct page *page) 482 { 483 struct page *head = compound_head(page); 484 485 SetPageDirty(head); 486 return 0; 487 } 488 489 static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf) 490 { 491 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb); 492 493 buf->f_type = HUGETLBFS_MAGIC; 494 buf->f_bsize = HPAGE_SIZE; 495 if (sbinfo) { 496 spin_lock(&sbinfo->stat_lock); 497 /* If no limits set, just report 0 for max/free/used 498 * blocks, like simple_statfs() */ 499 if (sbinfo->max_blocks >= 0) { 500 buf->f_blocks = sbinfo->max_blocks; 501 buf->f_bavail = buf->f_bfree = sbinfo->free_blocks; 502 buf->f_files = sbinfo->max_inodes; 503 buf->f_ffree = sbinfo->free_inodes; 504 } 505 spin_unlock(&sbinfo->stat_lock); 506 } 507 buf->f_namelen = NAME_MAX; 508 return 0; 509 } 510 511 static void hugetlbfs_put_super(struct super_block *sb) 512 { 513 struct hugetlbfs_sb_info *sbi = HUGETLBFS_SB(sb); 514 515 if (sbi) { 516 sb->s_fs_info = NULL; 517 kfree(sbi); 518 } 519 } 520 521 static inline int hugetlbfs_dec_free_inodes(struct hugetlbfs_sb_info *sbinfo) 522 { 523 if (sbinfo->free_inodes >= 0) { 524 spin_lock(&sbinfo->stat_lock); 525 if (unlikely(!sbinfo->free_inodes)) { 526 spin_unlock(&sbinfo->stat_lock); 527 return 0; 528 } 529 sbinfo->free_inodes--; 530 spin_unlock(&sbinfo->stat_lock); 531 } 532 533 return 1; 534 } 535 536 static void hugetlbfs_inc_free_inodes(struct hugetlbfs_sb_info *sbinfo) 537 { 538 if (sbinfo->free_inodes >= 0) { 539 spin_lock(&sbinfo->stat_lock); 540 sbinfo->free_inodes++; 541 spin_unlock(&sbinfo->stat_lock); 542 } 543 } 544 545 546 static struct kmem_cache *hugetlbfs_inode_cachep; 547 548 static struct inode *hugetlbfs_alloc_inode(struct super_block *sb) 549 { 550 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(sb); 551 struct hugetlbfs_inode_info *p; 552 553 if (unlikely(!hugetlbfs_dec_free_inodes(sbinfo))) 554 return NULL; 555 p = kmem_cache_alloc(hugetlbfs_inode_cachep, GFP_KERNEL); 556 if (unlikely(!p)) { 557 hugetlbfs_inc_free_inodes(sbinfo); 558 return NULL; 559 } 560 return &p->vfs_inode; 561 } 562 563 static void hugetlbfs_destroy_inode(struct inode *inode) 564 { 565 hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb)); 566 mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy); 567 kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode)); 568 } 569 570 static const struct address_space_operations hugetlbfs_aops = { 571 .readpage = hugetlbfs_readpage, 572 .prepare_write = hugetlbfs_prepare_write, 573 .commit_write = hugetlbfs_commit_write, 574 .set_page_dirty = hugetlbfs_set_page_dirty, 575 }; 576 577 578 static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags) 579 { 580 struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo; 581 582 inode_init_once(&ei->vfs_inode); 583 } 584 585 const struct file_operations hugetlbfs_file_operations = { 586 .mmap = hugetlbfs_file_mmap, 587 .fsync = simple_sync_file, 588 .get_unmapped_area = hugetlb_get_unmapped_area, 589 }; 590 591 static const struct inode_operations hugetlbfs_dir_inode_operations = { 592 .create = hugetlbfs_create, 593 .lookup = simple_lookup, 594 .link = simple_link, 595 .unlink = simple_unlink, 596 .symlink = hugetlbfs_symlink, 597 .mkdir = hugetlbfs_mkdir, 598 .rmdir = simple_rmdir, 599 .mknod = hugetlbfs_mknod, 600 .rename = simple_rename, 601 .setattr = hugetlbfs_setattr, 602 }; 603 604 static const struct inode_operations hugetlbfs_inode_operations = { 605 .setattr = hugetlbfs_setattr, 606 }; 607 608 static const struct super_operations hugetlbfs_ops = { 609 .alloc_inode = hugetlbfs_alloc_inode, 610 .destroy_inode = hugetlbfs_destroy_inode, 611 .statfs = hugetlbfs_statfs, 612 .delete_inode = hugetlbfs_delete_inode, 613 .drop_inode = hugetlbfs_drop_inode, 614 .put_super = hugetlbfs_put_super, 615 }; 616 617 static int 618 hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) 619 { 620 char *p, *rest; 621 substring_t args[MAX_OPT_ARGS]; 622 int option; 623 624 if (!options) 625 return 0; 626 627 while ((p = strsep(&options, ",")) != NULL) { 628 int token; 629 if (!*p) 630 continue; 631 632 token = match_token(p, tokens, args); 633 switch (token) { 634 case Opt_uid: 635 if (match_int(&args[0], &option)) 636 goto bad_val; 637 pconfig->uid = option; 638 break; 639 640 case Opt_gid: 641 if (match_int(&args[0], &option)) 642 goto bad_val; 643 pconfig->gid = option; 644 break; 645 646 case Opt_mode: 647 if (match_octal(&args[0], &option)) 648 goto bad_val; 649 pconfig->mode = option & 0777U; 650 break; 651 652 case Opt_size: { 653 unsigned long long size; 654 /* memparse() will accept a K/M/G without a digit */ 655 if (!isdigit(*args[0].from)) 656 goto bad_val; 657 size = memparse(args[0].from, &rest); 658 if (*rest == '%') { 659 size <<= HPAGE_SHIFT; 660 size *= max_huge_pages; 661 do_div(size, 100); 662 } 663 pconfig->nr_blocks = (size >> HPAGE_SHIFT); 664 break; 665 } 666 667 case Opt_nr_inodes: 668 /* memparse() will accept a K/M/G without a digit */ 669 if (!isdigit(*args[0].from)) 670 goto bad_val; 671 pconfig->nr_inodes = memparse(args[0].from, &rest); 672 break; 673 674 default: 675 printk(KERN_ERR "hugetlbfs: Bad mount option: \"%s\"\n", 676 p); 677 return -EINVAL; 678 break; 679 } 680 } 681 return 0; 682 683 bad_val: 684 printk(KERN_ERR "hugetlbfs: Bad value '%s' for mount option '%s'\n", 685 args[0].from, p); 686 return 1; 687 } 688 689 static int 690 hugetlbfs_fill_super(struct super_block *sb, void *data, int silent) 691 { 692 struct inode * inode; 693 struct dentry * root; 694 int ret; 695 struct hugetlbfs_config config; 696 struct hugetlbfs_sb_info *sbinfo; 697 698 config.nr_blocks = -1; /* No limit on size by default */ 699 config.nr_inodes = -1; /* No limit on number of inodes by default */ 700 config.uid = current->fsuid; 701 config.gid = current->fsgid; 702 config.mode = 0755; 703 ret = hugetlbfs_parse_options(data, &config); 704 if (ret) 705 return ret; 706 707 sbinfo = kmalloc(sizeof(struct hugetlbfs_sb_info), GFP_KERNEL); 708 if (!sbinfo) 709 return -ENOMEM; 710 sb->s_fs_info = sbinfo; 711 spin_lock_init(&sbinfo->stat_lock); 712 sbinfo->max_blocks = config.nr_blocks; 713 sbinfo->free_blocks = config.nr_blocks; 714 sbinfo->max_inodes = config.nr_inodes; 715 sbinfo->free_inodes = config.nr_inodes; 716 sb->s_maxbytes = MAX_LFS_FILESIZE; 717 sb->s_blocksize = HPAGE_SIZE; 718 sb->s_blocksize_bits = HPAGE_SHIFT; 719 sb->s_magic = HUGETLBFS_MAGIC; 720 sb->s_op = &hugetlbfs_ops; 721 sb->s_time_gran = 1; 722 inode = hugetlbfs_get_inode(sb, config.uid, config.gid, 723 S_IFDIR | config.mode, 0); 724 if (!inode) 725 goto out_free; 726 727 root = d_alloc_root(inode); 728 if (!root) { 729 iput(inode); 730 goto out_free; 731 } 732 sb->s_root = root; 733 return 0; 734 out_free: 735 kfree(sbinfo); 736 return -ENOMEM; 737 } 738 739 int hugetlb_get_quota(struct address_space *mapping) 740 { 741 int ret = 0; 742 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb); 743 744 if (sbinfo->free_blocks > -1) { 745 spin_lock(&sbinfo->stat_lock); 746 if (sbinfo->free_blocks > 0) 747 sbinfo->free_blocks--; 748 else 749 ret = -ENOMEM; 750 spin_unlock(&sbinfo->stat_lock); 751 } 752 753 return ret; 754 } 755 756 void hugetlb_put_quota(struct address_space *mapping) 757 { 758 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb); 759 760 if (sbinfo->free_blocks > -1) { 761 spin_lock(&sbinfo->stat_lock); 762 sbinfo->free_blocks++; 763 spin_unlock(&sbinfo->stat_lock); 764 } 765 } 766 767 static int hugetlbfs_get_sb(struct file_system_type *fs_type, 768 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 769 { 770 return get_sb_nodev(fs_type, flags, data, hugetlbfs_fill_super, mnt); 771 } 772 773 static struct file_system_type hugetlbfs_fs_type = { 774 .name = "hugetlbfs", 775 .get_sb = hugetlbfs_get_sb, 776 .kill_sb = kill_litter_super, 777 }; 778 779 static struct vfsmount *hugetlbfs_vfsmount; 780 781 static int can_do_hugetlb_shm(void) 782 { 783 return likely(capable(CAP_IPC_LOCK) || 784 in_group_p(sysctl_hugetlb_shm_group) || 785 can_do_mlock()); 786 } 787 788 struct file *hugetlb_file_setup(const char *name, size_t size) 789 { 790 int error = -ENOMEM; 791 struct file *file; 792 struct inode *inode; 793 struct dentry *dentry, *root; 794 struct qstr quick_string; 795 796 if (!hugetlbfs_vfsmount) 797 return ERR_PTR(-ENOENT); 798 799 if (!can_do_hugetlb_shm()) 800 return ERR_PTR(-EPERM); 801 802 if (!user_shm_lock(size, current->user)) 803 return ERR_PTR(-ENOMEM); 804 805 root = hugetlbfs_vfsmount->mnt_root; 806 quick_string.name = name; 807 quick_string.len = strlen(quick_string.name); 808 quick_string.hash = 0; 809 dentry = d_alloc(root, &quick_string); 810 if (!dentry) 811 goto out_shm_unlock; 812 813 error = -ENFILE; 814 file = get_empty_filp(); 815 if (!file) 816 goto out_dentry; 817 818 error = -ENOSPC; 819 inode = hugetlbfs_get_inode(root->d_sb, current->fsuid, 820 current->fsgid, S_IFREG | S_IRWXUGO, 0); 821 if (!inode) 822 goto out_file; 823 824 error = -ENOMEM; 825 if (hugetlb_reserve_pages(inode, 0, size >> HPAGE_SHIFT)) 826 goto out_inode; 827 828 d_instantiate(dentry, inode); 829 inode->i_size = size; 830 inode->i_nlink = 0; 831 file->f_path.mnt = mntget(hugetlbfs_vfsmount); 832 file->f_path.dentry = dentry; 833 file->f_mapping = inode->i_mapping; 834 file->f_op = &hugetlbfs_file_operations; 835 file->f_mode = FMODE_WRITE | FMODE_READ; 836 return file; 837 838 out_inode: 839 iput(inode); 840 out_file: 841 put_filp(file); 842 out_dentry: 843 dput(dentry); 844 out_shm_unlock: 845 user_shm_unlock(size, current->user); 846 return ERR_PTR(error); 847 } 848 849 static int __init init_hugetlbfs_fs(void) 850 { 851 int error; 852 struct vfsmount *vfsmount; 853 854 hugetlbfs_inode_cachep = kmem_cache_create("hugetlbfs_inode_cache", 855 sizeof(struct hugetlbfs_inode_info), 856 0, 0, init_once); 857 if (hugetlbfs_inode_cachep == NULL) 858 return -ENOMEM; 859 860 error = register_filesystem(&hugetlbfs_fs_type); 861 if (error) 862 goto out; 863 864 vfsmount = kern_mount(&hugetlbfs_fs_type); 865 866 if (!IS_ERR(vfsmount)) { 867 hugetlbfs_vfsmount = vfsmount; 868 return 0; 869 } 870 871 error = PTR_ERR(vfsmount); 872 873 out: 874 if (error) 875 kmem_cache_destroy(hugetlbfs_inode_cachep); 876 return error; 877 } 878 879 static void __exit exit_hugetlbfs_fs(void) 880 { 881 kmem_cache_destroy(hugetlbfs_inode_cachep); 882 unregister_filesystem(&hugetlbfs_fs_type); 883 } 884 885 module_init(init_hugetlbfs_fs) 886 module_exit(exit_hugetlbfs_fs) 887 888 MODULE_LICENSE("GPL"); 889