1 /* 2 * hugetlbpage-backed filesystem. Based on ramfs. 3 * 4 * William Irwin, 2002 5 * 6 * Copyright (C) 2002 Linus Torvalds. 7 */ 8 9 #include <linux/module.h> 10 #include <linux/thread_info.h> 11 #include <asm/current.h> 12 #include <linux/sched.h> /* remove ASAP */ 13 #include <linux/fs.h> 14 #include <linux/mount.h> 15 #include <linux/file.h> 16 #include <linux/writeback.h> 17 #include <linux/pagemap.h> 18 #include <linux/highmem.h> 19 #include <linux/init.h> 20 #include <linux/string.h> 21 #include <linux/capability.h> 22 #include <linux/backing-dev.h> 23 #include <linux/hugetlb.h> 24 #include <linux/pagevec.h> 25 #include <linux/quotaops.h> 26 #include <linux/slab.h> 27 #include <linux/dnotify.h> 28 #include <linux/statfs.h> 29 #include <linux/security.h> 30 31 #include <asm/uaccess.h> 32 33 /* some random number */ 34 #define HUGETLBFS_MAGIC 0x958458f6 35 36 static const struct super_operations hugetlbfs_ops; 37 static const struct address_space_operations hugetlbfs_aops; 38 const struct file_operations hugetlbfs_file_operations; 39 static const struct inode_operations hugetlbfs_dir_inode_operations; 40 static const struct inode_operations hugetlbfs_inode_operations; 41 42 static struct backing_dev_info hugetlbfs_backing_dev_info = { 43 .ra_pages = 0, /* No readahead */ 44 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, 45 }; 46 47 int sysctl_hugetlb_shm_group; 48 49 static void huge_pagevec_release(struct pagevec *pvec) 50 { 51 int i; 52 53 for (i = 0; i < pagevec_count(pvec); ++i) 54 put_page(pvec->pages[i]); 55 56 pagevec_reinit(pvec); 57 } 58 59 static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) 60 { 61 struct inode *inode = file->f_path.dentry->d_inode; 62 loff_t len, vma_len; 63 int ret; 64 65 /* 66 * vma alignment has already been checked by prepare_hugepage_range. 67 * If you add any error returns here, do so after setting VM_HUGETLB, 68 * so is_vm_hugetlb_page tests below unmap_region go the right way 69 * when do_mmap_pgoff unwinds (may be important on powerpc and ia64). 70 */ 71 vma->vm_flags |= VM_HUGETLB | VM_RESERVED; 72 vma->vm_ops = &hugetlb_vm_ops; 73 74 vma_len = (loff_t)(vma->vm_end - vma->vm_start); 75 76 mutex_lock(&inode->i_mutex); 77 file_accessed(file); 78 79 ret = -ENOMEM; 80 len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); 81 82 if (vma->vm_flags & VM_MAYSHARE && 83 hugetlb_reserve_pages(inode, vma->vm_pgoff >> (HPAGE_SHIFT-PAGE_SHIFT), 84 len >> HPAGE_SHIFT)) 85 goto out; 86 87 ret = 0; 88 hugetlb_prefault_arch_hook(vma->vm_mm); 89 if (vma->vm_flags & VM_WRITE && inode->i_size < len) 90 inode->i_size = len; 91 out: 92 mutex_unlock(&inode->i_mutex); 93 94 return ret; 95 } 96 97 /* 98 * Called under down_write(mmap_sem). 99 */ 100 101 #ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA 102 unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, 103 unsigned long len, unsigned long pgoff, unsigned long flags); 104 #else 105 static unsigned long 106 hugetlb_get_unmapped_area(struct file *file, unsigned long addr, 107 unsigned long len, unsigned long pgoff, unsigned long flags) 108 { 109 struct mm_struct *mm = current->mm; 110 struct vm_area_struct *vma; 111 unsigned long start_addr; 112 113 if (len & ~HPAGE_MASK) 114 return -EINVAL; 115 if (len > TASK_SIZE) 116 return -ENOMEM; 117 118 if (addr) { 119 addr = ALIGN(addr, HPAGE_SIZE); 120 vma = find_vma(mm, addr); 121 if (TASK_SIZE - len >= addr && 122 (!vma || addr + len <= vma->vm_start)) 123 return addr; 124 } 125 126 start_addr = mm->free_area_cache; 127 128 if (len <= mm->cached_hole_size) 129 start_addr = TASK_UNMAPPED_BASE; 130 131 full_search: 132 addr = ALIGN(start_addr, HPAGE_SIZE); 133 134 for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { 135 /* At this point: (!vma || addr < vma->vm_end). */ 136 if (TASK_SIZE - len < addr) { 137 /* 138 * Start a new search - just in case we missed 139 * some holes. 140 */ 141 if (start_addr != TASK_UNMAPPED_BASE) { 142 start_addr = TASK_UNMAPPED_BASE; 143 goto full_search; 144 } 145 return -ENOMEM; 146 } 147 148 if (!vma || addr + len <= vma->vm_start) 149 return addr; 150 addr = ALIGN(vma->vm_end, HPAGE_SIZE); 151 } 152 } 153 #endif 154 155 /* 156 * Read a page. Again trivial. If it didn't already exist 157 * in the page cache, it is zero-filled. 158 */ 159 static int hugetlbfs_readpage(struct file *file, struct page * page) 160 { 161 unlock_page(page); 162 return -EINVAL; 163 } 164 165 static int hugetlbfs_prepare_write(struct file *file, 166 struct page *page, unsigned offset, unsigned to) 167 { 168 return -EINVAL; 169 } 170 171 static int hugetlbfs_commit_write(struct file *file, 172 struct page *page, unsigned offset, unsigned to) 173 { 174 return -EINVAL; 175 } 176 177 static void truncate_huge_page(struct page *page) 178 { 179 cancel_dirty_page(page, /* No IO accounting for huge pages? */0); 180 ClearPageUptodate(page); 181 remove_from_page_cache(page); 182 put_page(page); 183 } 184 185 static void truncate_hugepages(struct inode *inode, loff_t lstart) 186 { 187 struct address_space *mapping = &inode->i_data; 188 const pgoff_t start = lstart >> HPAGE_SHIFT; 189 struct pagevec pvec; 190 pgoff_t next; 191 int i, freed = 0; 192 193 pagevec_init(&pvec, 0); 194 next = start; 195 while (1) { 196 if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { 197 if (next == start) 198 break; 199 next = start; 200 continue; 201 } 202 203 for (i = 0; i < pagevec_count(&pvec); ++i) { 204 struct page *page = pvec.pages[i]; 205 206 lock_page(page); 207 if (page->index > next) 208 next = page->index; 209 ++next; 210 truncate_huge_page(page); 211 unlock_page(page); 212 hugetlb_put_quota(mapping); 213 freed++; 214 } 215 huge_pagevec_release(&pvec); 216 } 217 BUG_ON(!lstart && mapping->nrpages); 218 hugetlb_unreserve_pages(inode, start, freed); 219 } 220 221 static void hugetlbfs_delete_inode(struct inode *inode) 222 { 223 truncate_hugepages(inode, 0); 224 clear_inode(inode); 225 } 226 227 static void hugetlbfs_forget_inode(struct inode *inode) __releases(inode_lock) 228 { 229 struct super_block *sb = inode->i_sb; 230 231 if (!hlist_unhashed(&inode->i_hash)) { 232 if (!(inode->i_state & (I_DIRTY|I_LOCK))) 233 list_move(&inode->i_list, &inode_unused); 234 inodes_stat.nr_unused++; 235 if (!sb || (sb->s_flags & MS_ACTIVE)) { 236 spin_unlock(&inode_lock); 237 return; 238 } 239 inode->i_state |= I_WILL_FREE; 240 spin_unlock(&inode_lock); 241 /* 242 * write_inode_now is a noop as we set BDI_CAP_NO_WRITEBACK 243 * in our backing_dev_info. 244 */ 245 write_inode_now(inode, 1); 246 spin_lock(&inode_lock); 247 inode->i_state &= ~I_WILL_FREE; 248 inodes_stat.nr_unused--; 249 hlist_del_init(&inode->i_hash); 250 } 251 list_del_init(&inode->i_list); 252 list_del_init(&inode->i_sb_list); 253 inode->i_state |= I_FREEING; 254 inodes_stat.nr_inodes--; 255 spin_unlock(&inode_lock); 256 truncate_hugepages(inode, 0); 257 clear_inode(inode); 258 destroy_inode(inode); 259 } 260 261 static void hugetlbfs_drop_inode(struct inode *inode) 262 { 263 if (!inode->i_nlink) 264 generic_delete_inode(inode); 265 else 266 hugetlbfs_forget_inode(inode); 267 } 268 269 static inline void 270 hugetlb_vmtruncate_list(struct prio_tree_root *root, pgoff_t pgoff) 271 { 272 struct vm_area_struct *vma; 273 struct prio_tree_iter iter; 274 275 vma_prio_tree_foreach(vma, &iter, root, pgoff, ULONG_MAX) { 276 unsigned long v_offset; 277 278 /* 279 * Can the expression below overflow on 32-bit arches? 280 * No, because the prio_tree returns us only those vmas 281 * which overlap the truncated area starting at pgoff, 282 * and no vma on a 32-bit arch can span beyond the 4GB. 283 */ 284 if (vma->vm_pgoff < pgoff) 285 v_offset = (pgoff - vma->vm_pgoff) << PAGE_SHIFT; 286 else 287 v_offset = 0; 288 289 __unmap_hugepage_range(vma, 290 vma->vm_start + v_offset, vma->vm_end); 291 } 292 } 293 294 /* 295 * Expanding truncates are not allowed. 296 */ 297 static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) 298 { 299 pgoff_t pgoff; 300 struct address_space *mapping = inode->i_mapping; 301 302 if (offset > inode->i_size) 303 return -EINVAL; 304 305 BUG_ON(offset & ~HPAGE_MASK); 306 pgoff = offset >> PAGE_SHIFT; 307 308 inode->i_size = offset; 309 spin_lock(&mapping->i_mmap_lock); 310 if (!prio_tree_empty(&mapping->i_mmap)) 311 hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff); 312 spin_unlock(&mapping->i_mmap_lock); 313 truncate_hugepages(inode, offset); 314 return 0; 315 } 316 317 static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr) 318 { 319 struct inode *inode = dentry->d_inode; 320 int error; 321 unsigned int ia_valid = attr->ia_valid; 322 323 BUG_ON(!inode); 324 325 error = inode_change_ok(inode, attr); 326 if (error) 327 goto out; 328 329 if (ia_valid & ATTR_SIZE) { 330 error = -EINVAL; 331 if (!(attr->ia_size & ~HPAGE_MASK)) 332 error = hugetlb_vmtruncate(inode, attr->ia_size); 333 if (error) 334 goto out; 335 attr->ia_valid &= ~ATTR_SIZE; 336 } 337 error = inode_setattr(inode, attr); 338 out: 339 return error; 340 } 341 342 static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid, 343 gid_t gid, int mode, dev_t dev) 344 { 345 struct inode *inode; 346 347 inode = new_inode(sb); 348 if (inode) { 349 struct hugetlbfs_inode_info *info; 350 inode->i_mode = mode; 351 inode->i_uid = uid; 352 inode->i_gid = gid; 353 inode->i_blocks = 0; 354 inode->i_mapping->a_ops = &hugetlbfs_aops; 355 inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info; 356 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 357 INIT_LIST_HEAD(&inode->i_mapping->private_list); 358 info = HUGETLBFS_I(inode); 359 mpol_shared_policy_init(&info->policy, MPOL_DEFAULT, NULL); 360 switch (mode & S_IFMT) { 361 default: 362 init_special_inode(inode, mode, dev); 363 break; 364 case S_IFREG: 365 inode->i_op = &hugetlbfs_inode_operations; 366 inode->i_fop = &hugetlbfs_file_operations; 367 break; 368 case S_IFDIR: 369 inode->i_op = &hugetlbfs_dir_inode_operations; 370 inode->i_fop = &simple_dir_operations; 371 372 /* directory inodes start off with i_nlink == 2 (for "." entry) */ 373 inc_nlink(inode); 374 break; 375 case S_IFLNK: 376 inode->i_op = &page_symlink_inode_operations; 377 break; 378 } 379 } 380 return inode; 381 } 382 383 /* 384 * File creation. Allocate an inode, and we're done.. 385 */ 386 static int hugetlbfs_mknod(struct inode *dir, 387 struct dentry *dentry, int mode, dev_t dev) 388 { 389 struct inode *inode; 390 int error = -ENOSPC; 391 gid_t gid; 392 393 if (dir->i_mode & S_ISGID) { 394 gid = dir->i_gid; 395 if (S_ISDIR(mode)) 396 mode |= S_ISGID; 397 } else { 398 gid = current->fsgid; 399 } 400 inode = hugetlbfs_get_inode(dir->i_sb, current->fsuid, gid, mode, dev); 401 if (inode) { 402 dir->i_ctime = dir->i_mtime = CURRENT_TIME; 403 d_instantiate(dentry, inode); 404 dget(dentry); /* Extra count - pin the dentry in core */ 405 error = 0; 406 } 407 return error; 408 } 409 410 static int hugetlbfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 411 { 412 int retval = hugetlbfs_mknod(dir, dentry, mode | S_IFDIR, 0); 413 if (!retval) 414 inc_nlink(dir); 415 return retval; 416 } 417 418 static int hugetlbfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) 419 { 420 return hugetlbfs_mknod(dir, dentry, mode | S_IFREG, 0); 421 } 422 423 static int hugetlbfs_symlink(struct inode *dir, 424 struct dentry *dentry, const char *symname) 425 { 426 struct inode *inode; 427 int error = -ENOSPC; 428 gid_t gid; 429 430 if (dir->i_mode & S_ISGID) 431 gid = dir->i_gid; 432 else 433 gid = current->fsgid; 434 435 inode = hugetlbfs_get_inode(dir->i_sb, current->fsuid, 436 gid, S_IFLNK|S_IRWXUGO, 0); 437 if (inode) { 438 int l = strlen(symname)+1; 439 error = page_symlink(inode, symname, l); 440 if (!error) { 441 d_instantiate(dentry, inode); 442 dget(dentry); 443 } else 444 iput(inode); 445 } 446 dir->i_ctime = dir->i_mtime = CURRENT_TIME; 447 448 return error; 449 } 450 451 /* 452 * mark the head page dirty 453 */ 454 static int hugetlbfs_set_page_dirty(struct page *page) 455 { 456 struct page *head = (struct page *)page_private(page); 457 458 SetPageDirty(head); 459 return 0; 460 } 461 462 static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf) 463 { 464 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb); 465 466 buf->f_type = HUGETLBFS_MAGIC; 467 buf->f_bsize = HPAGE_SIZE; 468 if (sbinfo) { 469 spin_lock(&sbinfo->stat_lock); 470 /* If no limits set, just report 0 for max/free/used 471 * blocks, like simple_statfs() */ 472 if (sbinfo->max_blocks >= 0) { 473 buf->f_blocks = sbinfo->max_blocks; 474 buf->f_bavail = buf->f_bfree = sbinfo->free_blocks; 475 buf->f_files = sbinfo->max_inodes; 476 buf->f_ffree = sbinfo->free_inodes; 477 } 478 spin_unlock(&sbinfo->stat_lock); 479 } 480 buf->f_namelen = NAME_MAX; 481 return 0; 482 } 483 484 static void hugetlbfs_put_super(struct super_block *sb) 485 { 486 struct hugetlbfs_sb_info *sbi = HUGETLBFS_SB(sb); 487 488 if (sbi) { 489 sb->s_fs_info = NULL; 490 kfree(sbi); 491 } 492 } 493 494 static inline int hugetlbfs_dec_free_inodes(struct hugetlbfs_sb_info *sbinfo) 495 { 496 if (sbinfo->free_inodes >= 0) { 497 spin_lock(&sbinfo->stat_lock); 498 if (unlikely(!sbinfo->free_inodes)) { 499 spin_unlock(&sbinfo->stat_lock); 500 return 0; 501 } 502 sbinfo->free_inodes--; 503 spin_unlock(&sbinfo->stat_lock); 504 } 505 506 return 1; 507 } 508 509 static void hugetlbfs_inc_free_inodes(struct hugetlbfs_sb_info *sbinfo) 510 { 511 if (sbinfo->free_inodes >= 0) { 512 spin_lock(&sbinfo->stat_lock); 513 sbinfo->free_inodes++; 514 spin_unlock(&sbinfo->stat_lock); 515 } 516 } 517 518 519 static struct kmem_cache *hugetlbfs_inode_cachep; 520 521 static struct inode *hugetlbfs_alloc_inode(struct super_block *sb) 522 { 523 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(sb); 524 struct hugetlbfs_inode_info *p; 525 526 if (unlikely(!hugetlbfs_dec_free_inodes(sbinfo))) 527 return NULL; 528 p = kmem_cache_alloc(hugetlbfs_inode_cachep, GFP_KERNEL); 529 if (unlikely(!p)) { 530 hugetlbfs_inc_free_inodes(sbinfo); 531 return NULL; 532 } 533 return &p->vfs_inode; 534 } 535 536 static void hugetlbfs_destroy_inode(struct inode *inode) 537 { 538 hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb)); 539 mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy); 540 kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode)); 541 } 542 543 static const struct address_space_operations hugetlbfs_aops = { 544 .readpage = hugetlbfs_readpage, 545 .prepare_write = hugetlbfs_prepare_write, 546 .commit_write = hugetlbfs_commit_write, 547 .set_page_dirty = hugetlbfs_set_page_dirty, 548 }; 549 550 551 static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags) 552 { 553 struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo; 554 555 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 556 SLAB_CTOR_CONSTRUCTOR) 557 inode_init_once(&ei->vfs_inode); 558 } 559 560 const struct file_operations hugetlbfs_file_operations = { 561 .mmap = hugetlbfs_file_mmap, 562 .fsync = simple_sync_file, 563 .get_unmapped_area = hugetlb_get_unmapped_area, 564 }; 565 566 static const struct inode_operations hugetlbfs_dir_inode_operations = { 567 .create = hugetlbfs_create, 568 .lookup = simple_lookup, 569 .link = simple_link, 570 .unlink = simple_unlink, 571 .symlink = hugetlbfs_symlink, 572 .mkdir = hugetlbfs_mkdir, 573 .rmdir = simple_rmdir, 574 .mknod = hugetlbfs_mknod, 575 .rename = simple_rename, 576 .setattr = hugetlbfs_setattr, 577 }; 578 579 static const struct inode_operations hugetlbfs_inode_operations = { 580 .setattr = hugetlbfs_setattr, 581 }; 582 583 static const struct super_operations hugetlbfs_ops = { 584 .alloc_inode = hugetlbfs_alloc_inode, 585 .destroy_inode = hugetlbfs_destroy_inode, 586 .statfs = hugetlbfs_statfs, 587 .delete_inode = hugetlbfs_delete_inode, 588 .drop_inode = hugetlbfs_drop_inode, 589 .put_super = hugetlbfs_put_super, 590 }; 591 592 static int 593 hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) 594 { 595 char *opt, *value, *rest; 596 597 if (!options) 598 return 0; 599 while ((opt = strsep(&options, ",")) != NULL) { 600 if (!*opt) 601 continue; 602 603 value = strchr(opt, '='); 604 if (!value || !*value) 605 return -EINVAL; 606 else 607 *value++ = '\0'; 608 609 if (!strcmp(opt, "uid")) 610 pconfig->uid = simple_strtoul(value, &value, 0); 611 else if (!strcmp(opt, "gid")) 612 pconfig->gid = simple_strtoul(value, &value, 0); 613 else if (!strcmp(opt, "mode")) 614 pconfig->mode = simple_strtoul(value,&value,0) & 0777U; 615 else if (!strcmp(opt, "size")) { 616 unsigned long long size = memparse(value, &rest); 617 if (*rest == '%') { 618 size <<= HPAGE_SHIFT; 619 size *= max_huge_pages; 620 do_div(size, 100); 621 rest++; 622 } 623 pconfig->nr_blocks = (size >> HPAGE_SHIFT); 624 value = rest; 625 } else if (!strcmp(opt,"nr_inodes")) { 626 pconfig->nr_inodes = memparse(value, &rest); 627 value = rest; 628 } else 629 return -EINVAL; 630 631 if (*value) 632 return -EINVAL; 633 } 634 return 0; 635 } 636 637 static int 638 hugetlbfs_fill_super(struct super_block *sb, void *data, int silent) 639 { 640 struct inode * inode; 641 struct dentry * root; 642 int ret; 643 struct hugetlbfs_config config; 644 struct hugetlbfs_sb_info *sbinfo; 645 646 config.nr_blocks = -1; /* No limit on size by default */ 647 config.nr_inodes = -1; /* No limit on number of inodes by default */ 648 config.uid = current->fsuid; 649 config.gid = current->fsgid; 650 config.mode = 0755; 651 ret = hugetlbfs_parse_options(data, &config); 652 653 if (ret) 654 return ret; 655 656 sbinfo = kmalloc(sizeof(struct hugetlbfs_sb_info), GFP_KERNEL); 657 if (!sbinfo) 658 return -ENOMEM; 659 sb->s_fs_info = sbinfo; 660 spin_lock_init(&sbinfo->stat_lock); 661 sbinfo->max_blocks = config.nr_blocks; 662 sbinfo->free_blocks = config.nr_blocks; 663 sbinfo->max_inodes = config.nr_inodes; 664 sbinfo->free_inodes = config.nr_inodes; 665 sb->s_maxbytes = MAX_LFS_FILESIZE; 666 sb->s_blocksize = HPAGE_SIZE; 667 sb->s_blocksize_bits = HPAGE_SHIFT; 668 sb->s_magic = HUGETLBFS_MAGIC; 669 sb->s_op = &hugetlbfs_ops; 670 sb->s_time_gran = 1; 671 inode = hugetlbfs_get_inode(sb, config.uid, config.gid, 672 S_IFDIR | config.mode, 0); 673 if (!inode) 674 goto out_free; 675 676 root = d_alloc_root(inode); 677 if (!root) { 678 iput(inode); 679 goto out_free; 680 } 681 sb->s_root = root; 682 return 0; 683 out_free: 684 kfree(sbinfo); 685 return -ENOMEM; 686 } 687 688 int hugetlb_get_quota(struct address_space *mapping) 689 { 690 int ret = 0; 691 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb); 692 693 if (sbinfo->free_blocks > -1) { 694 spin_lock(&sbinfo->stat_lock); 695 if (sbinfo->free_blocks > 0) 696 sbinfo->free_blocks--; 697 else 698 ret = -ENOMEM; 699 spin_unlock(&sbinfo->stat_lock); 700 } 701 702 return ret; 703 } 704 705 void hugetlb_put_quota(struct address_space *mapping) 706 { 707 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb); 708 709 if (sbinfo->free_blocks > -1) { 710 spin_lock(&sbinfo->stat_lock); 711 sbinfo->free_blocks++; 712 spin_unlock(&sbinfo->stat_lock); 713 } 714 } 715 716 static int hugetlbfs_get_sb(struct file_system_type *fs_type, 717 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 718 { 719 return get_sb_nodev(fs_type, flags, data, hugetlbfs_fill_super, mnt); 720 } 721 722 static struct file_system_type hugetlbfs_fs_type = { 723 .name = "hugetlbfs", 724 .get_sb = hugetlbfs_get_sb, 725 .kill_sb = kill_litter_super, 726 }; 727 728 static struct vfsmount *hugetlbfs_vfsmount; 729 730 static int can_do_hugetlb_shm(void) 731 { 732 return likely(capable(CAP_IPC_LOCK) || 733 in_group_p(sysctl_hugetlb_shm_group) || 734 can_do_mlock()); 735 } 736 737 struct file *hugetlb_zero_setup(size_t size) 738 { 739 int error = -ENOMEM; 740 struct file *file; 741 struct inode *inode; 742 struct dentry *dentry, *root; 743 struct qstr quick_string; 744 char buf[16]; 745 static atomic_t counter; 746 747 if (!can_do_hugetlb_shm()) 748 return ERR_PTR(-EPERM); 749 750 if (!user_shm_lock(size, current->user)) 751 return ERR_PTR(-ENOMEM); 752 753 root = hugetlbfs_vfsmount->mnt_root; 754 snprintf(buf, 16, "%u", atomic_inc_return(&counter)); 755 quick_string.name = buf; 756 quick_string.len = strlen(quick_string.name); 757 quick_string.hash = 0; 758 dentry = d_alloc(root, &quick_string); 759 if (!dentry) 760 goto out_shm_unlock; 761 762 error = -ENFILE; 763 file = get_empty_filp(); 764 if (!file) 765 goto out_dentry; 766 767 error = -ENOSPC; 768 inode = hugetlbfs_get_inode(root->d_sb, current->fsuid, 769 current->fsgid, S_IFREG | S_IRWXUGO, 0); 770 if (!inode) 771 goto out_file; 772 773 error = -ENOMEM; 774 if (hugetlb_reserve_pages(inode, 0, size >> HPAGE_SHIFT)) 775 goto out_inode; 776 777 d_instantiate(dentry, inode); 778 inode->i_size = size; 779 inode->i_nlink = 0; 780 file->f_path.mnt = mntget(hugetlbfs_vfsmount); 781 file->f_path.dentry = dentry; 782 file->f_mapping = inode->i_mapping; 783 file->f_op = &hugetlbfs_file_operations; 784 file->f_mode = FMODE_WRITE | FMODE_READ; 785 return file; 786 787 out_inode: 788 iput(inode); 789 out_file: 790 put_filp(file); 791 out_dentry: 792 dput(dentry); 793 out_shm_unlock: 794 user_shm_unlock(size, current->user); 795 return ERR_PTR(error); 796 } 797 798 static int __init init_hugetlbfs_fs(void) 799 { 800 int error; 801 struct vfsmount *vfsmount; 802 803 hugetlbfs_inode_cachep = kmem_cache_create("hugetlbfs_inode_cache", 804 sizeof(struct hugetlbfs_inode_info), 805 0, 0, init_once, NULL); 806 if (hugetlbfs_inode_cachep == NULL) 807 return -ENOMEM; 808 809 error = register_filesystem(&hugetlbfs_fs_type); 810 if (error) 811 goto out; 812 813 vfsmount = kern_mount(&hugetlbfs_fs_type); 814 815 if (!IS_ERR(vfsmount)) { 816 hugetlbfs_vfsmount = vfsmount; 817 return 0; 818 } 819 820 error = PTR_ERR(vfsmount); 821 822 out: 823 if (error) 824 kmem_cache_destroy(hugetlbfs_inode_cachep); 825 return error; 826 } 827 828 static void __exit exit_hugetlbfs_fs(void) 829 { 830 kmem_cache_destroy(hugetlbfs_inode_cachep); 831 unregister_filesystem(&hugetlbfs_fs_type); 832 } 833 834 module_init(init_hugetlbfs_fs) 835 module_exit(exit_hugetlbfs_fs) 836 837 MODULE_LICENSE("GPL"); 838