1 /* 2 * hugetlbpage-backed filesystem. Based on ramfs. 3 * 4 * William Irwin, 2002 5 * 6 * Copyright (C) 2002 Linus Torvalds. 7 */ 8 9 #include <linux/module.h> 10 #include <linux/thread_info.h> 11 #include <asm/current.h> 12 #include <linux/sched.h> /* remove ASAP */ 13 #include <linux/fs.h> 14 #include <linux/mount.h> 15 #include <linux/file.h> 16 #include <linux/writeback.h> 17 #include <linux/pagemap.h> 18 #include <linux/highmem.h> 19 #include <linux/init.h> 20 #include <linux/string.h> 21 #include <linux/capability.h> 22 #include <linux/backing-dev.h> 23 #include <linux/hugetlb.h> 24 #include <linux/pagevec.h> 25 #include <linux/quotaops.h> 26 #include <linux/slab.h> 27 #include <linux/dnotify.h> 28 #include <linux/statfs.h> 29 #include <linux/security.h> 30 31 #include <asm/uaccess.h> 32 33 /* some random number */ 34 #define HUGETLBFS_MAGIC 0x958458f6 35 36 static struct super_operations hugetlbfs_ops; 37 static const struct address_space_operations hugetlbfs_aops; 38 const struct file_operations hugetlbfs_file_operations; 39 static struct inode_operations hugetlbfs_dir_inode_operations; 40 static struct inode_operations hugetlbfs_inode_operations; 41 42 static struct backing_dev_info hugetlbfs_backing_dev_info = { 43 .ra_pages = 0, /* No readahead */ 44 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, 45 }; 46 47 int sysctl_hugetlb_shm_group; 48 49 static void huge_pagevec_release(struct pagevec *pvec) 50 { 51 int i; 52 53 for (i = 0; i < pagevec_count(pvec); ++i) 54 put_page(pvec->pages[i]); 55 56 pagevec_reinit(pvec); 57 } 58 59 static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) 60 { 61 struct inode *inode = file->f_path.dentry->d_inode; 62 loff_t len, vma_len; 63 int ret; 64 65 /* 66 * vma alignment has already been checked by prepare_hugepage_range. 67 * If you add any error returns here, do so after setting VM_HUGETLB, 68 * so is_vm_hugetlb_page tests below unmap_region go the right way 69 * when do_mmap_pgoff unwinds (may be important on powerpc and ia64). 70 */ 71 vma->vm_flags |= VM_HUGETLB | VM_RESERVED; 72 vma->vm_ops = &hugetlb_vm_ops; 73 74 vma_len = (loff_t)(vma->vm_end - vma->vm_start); 75 76 mutex_lock(&inode->i_mutex); 77 file_accessed(file); 78 79 ret = -ENOMEM; 80 len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); 81 82 if (vma->vm_flags & VM_MAYSHARE && 83 hugetlb_reserve_pages(inode, vma->vm_pgoff >> (HPAGE_SHIFT-PAGE_SHIFT), 84 len >> HPAGE_SHIFT)) 85 goto out; 86 87 ret = 0; 88 hugetlb_prefault_arch_hook(vma->vm_mm); 89 if (vma->vm_flags & VM_WRITE && inode->i_size < len) 90 inode->i_size = len; 91 out: 92 mutex_unlock(&inode->i_mutex); 93 94 return ret; 95 } 96 97 /* 98 * Called under down_write(mmap_sem). 99 */ 100 101 #ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA 102 unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, 103 unsigned long len, unsigned long pgoff, unsigned long flags); 104 #else 105 static unsigned long 106 hugetlb_get_unmapped_area(struct file *file, unsigned long addr, 107 unsigned long len, unsigned long pgoff, unsigned long flags) 108 { 109 struct mm_struct *mm = current->mm; 110 struct vm_area_struct *vma; 111 unsigned long start_addr; 112 113 if (len & ~HPAGE_MASK) 114 return -EINVAL; 115 if (len > TASK_SIZE) 116 return -ENOMEM; 117 118 if (addr) { 119 addr = ALIGN(addr, HPAGE_SIZE); 120 vma = find_vma(mm, addr); 121 if (TASK_SIZE - len >= addr && 122 (!vma || addr + len <= vma->vm_start)) 123 return addr; 124 } 125 126 start_addr = mm->free_area_cache; 127 128 if (len <= mm->cached_hole_size) 129 start_addr = TASK_UNMAPPED_BASE; 130 131 full_search: 132 addr = ALIGN(start_addr, HPAGE_SIZE); 133 134 for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { 135 /* At this point: (!vma || addr < vma->vm_end). */ 136 if (TASK_SIZE - len < addr) { 137 /* 138 * Start a new search - just in case we missed 139 * some holes. 140 */ 141 if (start_addr != TASK_UNMAPPED_BASE) { 142 start_addr = TASK_UNMAPPED_BASE; 143 goto full_search; 144 } 145 return -ENOMEM; 146 } 147 148 if (!vma || addr + len <= vma->vm_start) 149 return addr; 150 addr = ALIGN(vma->vm_end, HPAGE_SIZE); 151 } 152 } 153 #endif 154 155 /* 156 * Read a page. Again trivial. If it didn't already exist 157 * in the page cache, it is zero-filled. 158 */ 159 static int hugetlbfs_readpage(struct file *file, struct page * page) 160 { 161 unlock_page(page); 162 return -EINVAL; 163 } 164 165 static int hugetlbfs_prepare_write(struct file *file, 166 struct page *page, unsigned offset, unsigned to) 167 { 168 return -EINVAL; 169 } 170 171 static int hugetlbfs_commit_write(struct file *file, 172 struct page *page, unsigned offset, unsigned to) 173 { 174 return -EINVAL; 175 } 176 177 static void truncate_huge_page(struct page *page) 178 { 179 cancel_dirty_page(page, /* No IO accounting for huge pages? */0); 180 ClearPageUptodate(page); 181 remove_from_page_cache(page); 182 put_page(page); 183 } 184 185 static void truncate_hugepages(struct inode *inode, loff_t lstart) 186 { 187 struct address_space *mapping = &inode->i_data; 188 const pgoff_t start = lstart >> HPAGE_SHIFT; 189 struct pagevec pvec; 190 pgoff_t next; 191 int i, freed = 0; 192 193 pagevec_init(&pvec, 0); 194 next = start; 195 while (1) { 196 if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { 197 if (next == start) 198 break; 199 next = start; 200 continue; 201 } 202 203 for (i = 0; i < pagevec_count(&pvec); ++i) { 204 struct page *page = pvec.pages[i]; 205 206 lock_page(page); 207 if (page->index > next) 208 next = page->index; 209 ++next; 210 truncate_huge_page(page); 211 unlock_page(page); 212 hugetlb_put_quota(mapping); 213 freed++; 214 } 215 huge_pagevec_release(&pvec); 216 } 217 BUG_ON(!lstart && mapping->nrpages); 218 hugetlb_unreserve_pages(inode, start, freed); 219 } 220 221 static void hugetlbfs_delete_inode(struct inode *inode) 222 { 223 truncate_hugepages(inode, 0); 224 clear_inode(inode); 225 } 226 227 static void hugetlbfs_forget_inode(struct inode *inode) __releases(inode_lock) 228 { 229 struct super_block *sb = inode->i_sb; 230 231 if (!hlist_unhashed(&inode->i_hash)) { 232 if (!(inode->i_state & (I_DIRTY|I_LOCK))) 233 list_move(&inode->i_list, &inode_unused); 234 inodes_stat.nr_unused++; 235 if (!sb || (sb->s_flags & MS_ACTIVE)) { 236 spin_unlock(&inode_lock); 237 return; 238 } 239 inode->i_state |= I_WILL_FREE; 240 spin_unlock(&inode_lock); 241 /* 242 * write_inode_now is a noop as we set BDI_CAP_NO_WRITEBACK 243 * in our backing_dev_info. 244 */ 245 write_inode_now(inode, 1); 246 spin_lock(&inode_lock); 247 inode->i_state &= ~I_WILL_FREE; 248 inodes_stat.nr_unused--; 249 hlist_del_init(&inode->i_hash); 250 } 251 list_del_init(&inode->i_list); 252 list_del_init(&inode->i_sb_list); 253 inode->i_state |= I_FREEING; 254 inodes_stat.nr_inodes--; 255 spin_unlock(&inode_lock); 256 truncate_hugepages(inode, 0); 257 clear_inode(inode); 258 destroy_inode(inode); 259 } 260 261 static void hugetlbfs_drop_inode(struct inode *inode) 262 { 263 if (!inode->i_nlink) 264 generic_delete_inode(inode); 265 else 266 hugetlbfs_forget_inode(inode); 267 } 268 269 static inline void 270 hugetlb_vmtruncate_list(struct prio_tree_root *root, pgoff_t pgoff) 271 { 272 struct vm_area_struct *vma; 273 struct prio_tree_iter iter; 274 275 vma_prio_tree_foreach(vma, &iter, root, pgoff, ULONG_MAX) { 276 unsigned long v_offset; 277 278 /* 279 * Can the expression below overflow on 32-bit arches? 280 * No, because the prio_tree returns us only those vmas 281 * which overlap the truncated area starting at pgoff, 282 * and no vma on a 32-bit arch can span beyond the 4GB. 283 */ 284 if (vma->vm_pgoff < pgoff) 285 v_offset = (pgoff - vma->vm_pgoff) << PAGE_SHIFT; 286 else 287 v_offset = 0; 288 289 __unmap_hugepage_range(vma, 290 vma->vm_start + v_offset, vma->vm_end); 291 } 292 } 293 294 /* 295 * Expanding truncates are not allowed. 296 */ 297 static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) 298 { 299 pgoff_t pgoff; 300 struct address_space *mapping = inode->i_mapping; 301 302 if (offset > inode->i_size) 303 return -EINVAL; 304 305 BUG_ON(offset & ~HPAGE_MASK); 306 pgoff = offset >> PAGE_SHIFT; 307 308 inode->i_size = offset; 309 spin_lock(&mapping->i_mmap_lock); 310 if (!prio_tree_empty(&mapping->i_mmap)) 311 hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff); 312 spin_unlock(&mapping->i_mmap_lock); 313 truncate_hugepages(inode, offset); 314 return 0; 315 } 316 317 static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr) 318 { 319 struct inode *inode = dentry->d_inode; 320 int error; 321 unsigned int ia_valid = attr->ia_valid; 322 323 BUG_ON(!inode); 324 325 error = inode_change_ok(inode, attr); 326 if (error) 327 goto out; 328 329 if (ia_valid & ATTR_SIZE) { 330 error = -EINVAL; 331 if (!(attr->ia_size & ~HPAGE_MASK)) 332 error = hugetlb_vmtruncate(inode, attr->ia_size); 333 if (error) 334 goto out; 335 attr->ia_valid &= ~ATTR_SIZE; 336 } 337 error = inode_setattr(inode, attr); 338 out: 339 return error; 340 } 341 342 static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid, 343 gid_t gid, int mode, dev_t dev) 344 { 345 struct inode *inode; 346 347 inode = new_inode(sb); 348 if (inode) { 349 struct hugetlbfs_inode_info *info; 350 inode->i_mode = mode; 351 inode->i_uid = uid; 352 inode->i_gid = gid; 353 inode->i_blocks = 0; 354 inode->i_mapping->a_ops = &hugetlbfs_aops; 355 inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info; 356 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 357 INIT_LIST_HEAD(&inode->i_mapping->private_list); 358 info = HUGETLBFS_I(inode); 359 mpol_shared_policy_init(&info->policy, MPOL_DEFAULT, NULL); 360 switch (mode & S_IFMT) { 361 default: 362 init_special_inode(inode, mode, dev); 363 break; 364 case S_IFREG: 365 inode->i_op = &hugetlbfs_inode_operations; 366 inode->i_fop = &hugetlbfs_file_operations; 367 break; 368 case S_IFDIR: 369 inode->i_op = &hugetlbfs_dir_inode_operations; 370 inode->i_fop = &simple_dir_operations; 371 372 /* directory inodes start off with i_nlink == 2 (for "." entry) */ 373 inc_nlink(inode); 374 break; 375 case S_IFLNK: 376 inode->i_op = &page_symlink_inode_operations; 377 break; 378 } 379 } 380 return inode; 381 } 382 383 /* 384 * File creation. Allocate an inode, and we're done.. 385 */ 386 static int hugetlbfs_mknod(struct inode *dir, 387 struct dentry *dentry, int mode, dev_t dev) 388 { 389 struct inode *inode; 390 int error = -ENOSPC; 391 gid_t gid; 392 393 if (dir->i_mode & S_ISGID) { 394 gid = dir->i_gid; 395 if (S_ISDIR(mode)) 396 mode |= S_ISGID; 397 } else { 398 gid = current->fsgid; 399 } 400 inode = hugetlbfs_get_inode(dir->i_sb, current->fsuid, gid, mode, dev); 401 if (inode) { 402 dir->i_ctime = dir->i_mtime = CURRENT_TIME; 403 d_instantiate(dentry, inode); 404 dget(dentry); /* Extra count - pin the dentry in core */ 405 error = 0; 406 } 407 return error; 408 } 409 410 static int hugetlbfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 411 { 412 int retval = hugetlbfs_mknod(dir, dentry, mode | S_IFDIR, 0); 413 if (!retval) 414 inc_nlink(dir); 415 return retval; 416 } 417 418 static int hugetlbfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) 419 { 420 return hugetlbfs_mknod(dir, dentry, mode | S_IFREG, 0); 421 } 422 423 static int hugetlbfs_symlink(struct inode *dir, 424 struct dentry *dentry, const char *symname) 425 { 426 struct inode *inode; 427 int error = -ENOSPC; 428 gid_t gid; 429 430 if (dir->i_mode & S_ISGID) 431 gid = dir->i_gid; 432 else 433 gid = current->fsgid; 434 435 inode = hugetlbfs_get_inode(dir->i_sb, current->fsuid, 436 gid, S_IFLNK|S_IRWXUGO, 0); 437 if (inode) { 438 int l = strlen(symname)+1; 439 error = page_symlink(inode, symname, l); 440 if (!error) { 441 d_instantiate(dentry, inode); 442 dget(dentry); 443 } else 444 iput(inode); 445 } 446 dir->i_ctime = dir->i_mtime = CURRENT_TIME; 447 448 return error; 449 } 450 451 /* 452 * For direct-IO reads into hugetlb pages 453 */ 454 static int hugetlbfs_set_page_dirty(struct page *page) 455 { 456 return 0; 457 } 458 459 static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf) 460 { 461 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb); 462 463 buf->f_type = HUGETLBFS_MAGIC; 464 buf->f_bsize = HPAGE_SIZE; 465 if (sbinfo) { 466 spin_lock(&sbinfo->stat_lock); 467 /* If no limits set, just report 0 for max/free/used 468 * blocks, like simple_statfs() */ 469 if (sbinfo->max_blocks >= 0) { 470 buf->f_blocks = sbinfo->max_blocks; 471 buf->f_bavail = buf->f_bfree = sbinfo->free_blocks; 472 buf->f_files = sbinfo->max_inodes; 473 buf->f_ffree = sbinfo->free_inodes; 474 } 475 spin_unlock(&sbinfo->stat_lock); 476 } 477 buf->f_namelen = NAME_MAX; 478 return 0; 479 } 480 481 static void hugetlbfs_put_super(struct super_block *sb) 482 { 483 struct hugetlbfs_sb_info *sbi = HUGETLBFS_SB(sb); 484 485 if (sbi) { 486 sb->s_fs_info = NULL; 487 kfree(sbi); 488 } 489 } 490 491 static inline int hugetlbfs_dec_free_inodes(struct hugetlbfs_sb_info *sbinfo) 492 { 493 if (sbinfo->free_inodes >= 0) { 494 spin_lock(&sbinfo->stat_lock); 495 if (unlikely(!sbinfo->free_inodes)) { 496 spin_unlock(&sbinfo->stat_lock); 497 return 0; 498 } 499 sbinfo->free_inodes--; 500 spin_unlock(&sbinfo->stat_lock); 501 } 502 503 return 1; 504 } 505 506 static void hugetlbfs_inc_free_inodes(struct hugetlbfs_sb_info *sbinfo) 507 { 508 if (sbinfo->free_inodes >= 0) { 509 spin_lock(&sbinfo->stat_lock); 510 sbinfo->free_inodes++; 511 spin_unlock(&sbinfo->stat_lock); 512 } 513 } 514 515 516 static struct kmem_cache *hugetlbfs_inode_cachep; 517 518 static struct inode *hugetlbfs_alloc_inode(struct super_block *sb) 519 { 520 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(sb); 521 struct hugetlbfs_inode_info *p; 522 523 if (unlikely(!hugetlbfs_dec_free_inodes(sbinfo))) 524 return NULL; 525 p = kmem_cache_alloc(hugetlbfs_inode_cachep, GFP_KERNEL); 526 if (unlikely(!p)) { 527 hugetlbfs_inc_free_inodes(sbinfo); 528 return NULL; 529 } 530 return &p->vfs_inode; 531 } 532 533 static void hugetlbfs_destroy_inode(struct inode *inode) 534 { 535 hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb)); 536 mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy); 537 kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode)); 538 } 539 540 static const struct address_space_operations hugetlbfs_aops = { 541 .readpage = hugetlbfs_readpage, 542 .prepare_write = hugetlbfs_prepare_write, 543 .commit_write = hugetlbfs_commit_write, 544 .set_page_dirty = hugetlbfs_set_page_dirty, 545 }; 546 547 548 static void init_once(void *foo, struct kmem_cache *cachep, unsigned long flags) 549 { 550 struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo; 551 552 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 553 SLAB_CTOR_CONSTRUCTOR) 554 inode_init_once(&ei->vfs_inode); 555 } 556 557 const struct file_operations hugetlbfs_file_operations = { 558 .mmap = hugetlbfs_file_mmap, 559 .fsync = simple_sync_file, 560 .get_unmapped_area = hugetlb_get_unmapped_area, 561 }; 562 563 static struct inode_operations hugetlbfs_dir_inode_operations = { 564 .create = hugetlbfs_create, 565 .lookup = simple_lookup, 566 .link = simple_link, 567 .unlink = simple_unlink, 568 .symlink = hugetlbfs_symlink, 569 .mkdir = hugetlbfs_mkdir, 570 .rmdir = simple_rmdir, 571 .mknod = hugetlbfs_mknod, 572 .rename = simple_rename, 573 .setattr = hugetlbfs_setattr, 574 }; 575 576 static struct inode_operations hugetlbfs_inode_operations = { 577 .setattr = hugetlbfs_setattr, 578 }; 579 580 static struct super_operations hugetlbfs_ops = { 581 .alloc_inode = hugetlbfs_alloc_inode, 582 .destroy_inode = hugetlbfs_destroy_inode, 583 .statfs = hugetlbfs_statfs, 584 .delete_inode = hugetlbfs_delete_inode, 585 .drop_inode = hugetlbfs_drop_inode, 586 .put_super = hugetlbfs_put_super, 587 }; 588 589 static int 590 hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) 591 { 592 char *opt, *value, *rest; 593 594 if (!options) 595 return 0; 596 while ((opt = strsep(&options, ",")) != NULL) { 597 if (!*opt) 598 continue; 599 600 value = strchr(opt, '='); 601 if (!value || !*value) 602 return -EINVAL; 603 else 604 *value++ = '\0'; 605 606 if (!strcmp(opt, "uid")) 607 pconfig->uid = simple_strtoul(value, &value, 0); 608 else if (!strcmp(opt, "gid")) 609 pconfig->gid = simple_strtoul(value, &value, 0); 610 else if (!strcmp(opt, "mode")) 611 pconfig->mode = simple_strtoul(value,&value,0) & 0777U; 612 else if (!strcmp(opt, "size")) { 613 unsigned long long size = memparse(value, &rest); 614 if (*rest == '%') { 615 size <<= HPAGE_SHIFT; 616 size *= max_huge_pages; 617 do_div(size, 100); 618 rest++; 619 } 620 pconfig->nr_blocks = (size >> HPAGE_SHIFT); 621 value = rest; 622 } else if (!strcmp(opt,"nr_inodes")) { 623 pconfig->nr_inodes = memparse(value, &rest); 624 value = rest; 625 } else 626 return -EINVAL; 627 628 if (*value) 629 return -EINVAL; 630 } 631 return 0; 632 } 633 634 static int 635 hugetlbfs_fill_super(struct super_block *sb, void *data, int silent) 636 { 637 struct inode * inode; 638 struct dentry * root; 639 int ret; 640 struct hugetlbfs_config config; 641 struct hugetlbfs_sb_info *sbinfo; 642 643 config.nr_blocks = -1; /* No limit on size by default */ 644 config.nr_inodes = -1; /* No limit on number of inodes by default */ 645 config.uid = current->fsuid; 646 config.gid = current->fsgid; 647 config.mode = 0755; 648 ret = hugetlbfs_parse_options(data, &config); 649 650 if (ret) 651 return ret; 652 653 sbinfo = kmalloc(sizeof(struct hugetlbfs_sb_info), GFP_KERNEL); 654 if (!sbinfo) 655 return -ENOMEM; 656 sb->s_fs_info = sbinfo; 657 spin_lock_init(&sbinfo->stat_lock); 658 sbinfo->max_blocks = config.nr_blocks; 659 sbinfo->free_blocks = config.nr_blocks; 660 sbinfo->max_inodes = config.nr_inodes; 661 sbinfo->free_inodes = config.nr_inodes; 662 sb->s_maxbytes = MAX_LFS_FILESIZE; 663 sb->s_blocksize = HPAGE_SIZE; 664 sb->s_blocksize_bits = HPAGE_SHIFT; 665 sb->s_magic = HUGETLBFS_MAGIC; 666 sb->s_op = &hugetlbfs_ops; 667 sb->s_time_gran = 1; 668 inode = hugetlbfs_get_inode(sb, config.uid, config.gid, 669 S_IFDIR | config.mode, 0); 670 if (!inode) 671 goto out_free; 672 673 root = d_alloc_root(inode); 674 if (!root) { 675 iput(inode); 676 goto out_free; 677 } 678 sb->s_root = root; 679 return 0; 680 out_free: 681 kfree(sbinfo); 682 return -ENOMEM; 683 } 684 685 int hugetlb_get_quota(struct address_space *mapping) 686 { 687 int ret = 0; 688 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb); 689 690 if (sbinfo->free_blocks > -1) { 691 spin_lock(&sbinfo->stat_lock); 692 if (sbinfo->free_blocks > 0) 693 sbinfo->free_blocks--; 694 else 695 ret = -ENOMEM; 696 spin_unlock(&sbinfo->stat_lock); 697 } 698 699 return ret; 700 } 701 702 void hugetlb_put_quota(struct address_space *mapping) 703 { 704 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb); 705 706 if (sbinfo->free_blocks > -1) { 707 spin_lock(&sbinfo->stat_lock); 708 sbinfo->free_blocks++; 709 spin_unlock(&sbinfo->stat_lock); 710 } 711 } 712 713 static int hugetlbfs_get_sb(struct file_system_type *fs_type, 714 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 715 { 716 return get_sb_nodev(fs_type, flags, data, hugetlbfs_fill_super, mnt); 717 } 718 719 static struct file_system_type hugetlbfs_fs_type = { 720 .name = "hugetlbfs", 721 .get_sb = hugetlbfs_get_sb, 722 .kill_sb = kill_litter_super, 723 }; 724 725 static struct vfsmount *hugetlbfs_vfsmount; 726 727 static int can_do_hugetlb_shm(void) 728 { 729 return likely(capable(CAP_IPC_LOCK) || 730 in_group_p(sysctl_hugetlb_shm_group) || 731 can_do_mlock()); 732 } 733 734 struct file *hugetlb_zero_setup(size_t size) 735 { 736 int error = -ENOMEM; 737 struct file *file; 738 struct inode *inode; 739 struct dentry *dentry, *root; 740 struct qstr quick_string; 741 char buf[16]; 742 static atomic_t counter; 743 744 if (!can_do_hugetlb_shm()) 745 return ERR_PTR(-EPERM); 746 747 if (!user_shm_lock(size, current->user)) 748 return ERR_PTR(-ENOMEM); 749 750 root = hugetlbfs_vfsmount->mnt_root; 751 snprintf(buf, 16, "%u", atomic_inc_return(&counter)); 752 quick_string.name = buf; 753 quick_string.len = strlen(quick_string.name); 754 quick_string.hash = 0; 755 dentry = d_alloc(root, &quick_string); 756 if (!dentry) 757 goto out_shm_unlock; 758 759 error = -ENFILE; 760 file = get_empty_filp(); 761 if (!file) 762 goto out_dentry; 763 764 error = -ENOSPC; 765 inode = hugetlbfs_get_inode(root->d_sb, current->fsuid, 766 current->fsgid, S_IFREG | S_IRWXUGO, 0); 767 if (!inode) 768 goto out_file; 769 770 error = -ENOMEM; 771 if (hugetlb_reserve_pages(inode, 0, size >> HPAGE_SHIFT)) 772 goto out_inode; 773 774 d_instantiate(dentry, inode); 775 inode->i_size = size; 776 inode->i_nlink = 0; 777 file->f_path.mnt = mntget(hugetlbfs_vfsmount); 778 file->f_path.dentry = dentry; 779 file->f_mapping = inode->i_mapping; 780 file->f_op = &hugetlbfs_file_operations; 781 file->f_mode = FMODE_WRITE | FMODE_READ; 782 return file; 783 784 out_inode: 785 iput(inode); 786 out_file: 787 put_filp(file); 788 out_dentry: 789 dput(dentry); 790 out_shm_unlock: 791 user_shm_unlock(size, current->user); 792 return ERR_PTR(error); 793 } 794 795 static int __init init_hugetlbfs_fs(void) 796 { 797 int error; 798 struct vfsmount *vfsmount; 799 800 hugetlbfs_inode_cachep = kmem_cache_create("hugetlbfs_inode_cache", 801 sizeof(struct hugetlbfs_inode_info), 802 0, 0, init_once, NULL); 803 if (hugetlbfs_inode_cachep == NULL) 804 return -ENOMEM; 805 806 error = register_filesystem(&hugetlbfs_fs_type); 807 if (error) 808 goto out; 809 810 vfsmount = kern_mount(&hugetlbfs_fs_type); 811 812 if (!IS_ERR(vfsmount)) { 813 hugetlbfs_vfsmount = vfsmount; 814 return 0; 815 } 816 817 error = PTR_ERR(vfsmount); 818 819 out: 820 if (error) 821 kmem_cache_destroy(hugetlbfs_inode_cachep); 822 return error; 823 } 824 825 static void __exit exit_hugetlbfs_fs(void) 826 { 827 kmem_cache_destroy(hugetlbfs_inode_cachep); 828 unregister_filesystem(&hugetlbfs_fs_type); 829 } 830 831 module_init(init_hugetlbfs_fs) 832 module_exit(exit_hugetlbfs_fs) 833 834 MODULE_LICENSE("GPL"); 835