1 /* 2 * hugetlbpage-backed filesystem. Based on ramfs. 3 * 4 * William Irwin, 2002 5 * 6 * Copyright (C) 2002 Linus Torvalds. 7 */ 8 9 #include <linux/module.h> 10 #include <linux/thread_info.h> 11 #include <asm/current.h> 12 #include <linux/sched.h> /* remove ASAP */ 13 #include <linux/fs.h> 14 #include <linux/mount.h> 15 #include <linux/file.h> 16 #include <linux/writeback.h> 17 #include <linux/pagemap.h> 18 #include <linux/highmem.h> 19 #include <linux/init.h> 20 #include <linux/string.h> 21 #include <linux/capability.h> 22 #include <linux/backing-dev.h> 23 #include <linux/hugetlb.h> 24 #include <linux/pagevec.h> 25 #include <linux/quotaops.h> 26 #include <linux/slab.h> 27 #include <linux/dnotify.h> 28 #include <linux/statfs.h> 29 #include <linux/security.h> 30 31 #include <asm/uaccess.h> 32 33 /* some random number */ 34 #define HUGETLBFS_MAGIC 0x958458f6 35 36 static struct super_operations hugetlbfs_ops; 37 static const struct address_space_operations hugetlbfs_aops; 38 const struct file_operations hugetlbfs_file_operations; 39 static struct inode_operations hugetlbfs_dir_inode_operations; 40 static struct inode_operations hugetlbfs_inode_operations; 41 42 static struct backing_dev_info hugetlbfs_backing_dev_info = { 43 .ra_pages = 0, /* No readahead */ 44 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, 45 }; 46 47 int sysctl_hugetlb_shm_group; 48 49 static void huge_pagevec_release(struct pagevec *pvec) 50 { 51 int i; 52 53 for (i = 0; i < pagevec_count(pvec); ++i) 54 put_page(pvec->pages[i]); 55 56 pagevec_reinit(pvec); 57 } 58 59 static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) 60 { 61 struct inode *inode = file->f_dentry->d_inode; 62 loff_t len, vma_len; 63 int ret; 64 65 if (vma->vm_pgoff & (HPAGE_SIZE / PAGE_SIZE - 1)) 66 return -EINVAL; 67 68 if (vma->vm_start & ~HPAGE_MASK) 69 return -EINVAL; 70 71 if (vma->vm_end & ~HPAGE_MASK) 72 return -EINVAL; 73 74 if (vma->vm_end - vma->vm_start < HPAGE_SIZE) 75 return -EINVAL; 76 77 vma_len = (loff_t)(vma->vm_end - vma->vm_start); 78 79 mutex_lock(&inode->i_mutex); 80 file_accessed(file); 81 vma->vm_flags |= VM_HUGETLB | VM_RESERVED; 82 vma->vm_ops = &hugetlb_vm_ops; 83 84 ret = -ENOMEM; 85 len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); 86 87 if (vma->vm_flags & VM_MAYSHARE && 88 hugetlb_reserve_pages(inode, vma->vm_pgoff >> (HPAGE_SHIFT-PAGE_SHIFT), 89 len >> HPAGE_SHIFT)) 90 goto out; 91 92 ret = 0; 93 hugetlb_prefault_arch_hook(vma->vm_mm); 94 if (vma->vm_flags & VM_WRITE && inode->i_size < len) 95 inode->i_size = len; 96 out: 97 mutex_unlock(&inode->i_mutex); 98 99 return ret; 100 } 101 102 /* 103 * Called under down_write(mmap_sem). 104 */ 105 106 #ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA 107 unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, 108 unsigned long len, unsigned long pgoff, unsigned long flags); 109 #else 110 static unsigned long 111 hugetlb_get_unmapped_area(struct file *file, unsigned long addr, 112 unsigned long len, unsigned long pgoff, unsigned long flags) 113 { 114 struct mm_struct *mm = current->mm; 115 struct vm_area_struct *vma; 116 unsigned long start_addr; 117 118 if (len & ~HPAGE_MASK) 119 return -EINVAL; 120 if (len > TASK_SIZE) 121 return -ENOMEM; 122 123 if (addr) { 124 addr = ALIGN(addr, HPAGE_SIZE); 125 vma = find_vma(mm, addr); 126 if (TASK_SIZE - len >= addr && 127 (!vma || addr + len <= vma->vm_start)) 128 return addr; 129 } 130 131 start_addr = mm->free_area_cache; 132 133 if (len <= mm->cached_hole_size) 134 start_addr = TASK_UNMAPPED_BASE; 135 136 full_search: 137 addr = ALIGN(start_addr, HPAGE_SIZE); 138 139 for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { 140 /* At this point: (!vma || addr < vma->vm_end). */ 141 if (TASK_SIZE - len < addr) { 142 /* 143 * Start a new search - just in case we missed 144 * some holes. 145 */ 146 if (start_addr != TASK_UNMAPPED_BASE) { 147 start_addr = TASK_UNMAPPED_BASE; 148 goto full_search; 149 } 150 return -ENOMEM; 151 } 152 153 if (!vma || addr + len <= vma->vm_start) 154 return addr; 155 addr = ALIGN(vma->vm_end, HPAGE_SIZE); 156 } 157 } 158 #endif 159 160 /* 161 * Read a page. Again trivial. If it didn't already exist 162 * in the page cache, it is zero-filled. 163 */ 164 static int hugetlbfs_readpage(struct file *file, struct page * page) 165 { 166 unlock_page(page); 167 return -EINVAL; 168 } 169 170 static int hugetlbfs_prepare_write(struct file *file, 171 struct page *page, unsigned offset, unsigned to) 172 { 173 return -EINVAL; 174 } 175 176 static int hugetlbfs_commit_write(struct file *file, 177 struct page *page, unsigned offset, unsigned to) 178 { 179 return -EINVAL; 180 } 181 182 static void truncate_huge_page(struct page *page) 183 { 184 clear_page_dirty(page); 185 ClearPageUptodate(page); 186 remove_from_page_cache(page); 187 put_page(page); 188 } 189 190 static void truncate_hugepages(struct inode *inode, loff_t lstart) 191 { 192 struct address_space *mapping = &inode->i_data; 193 const pgoff_t start = lstart >> HPAGE_SHIFT; 194 struct pagevec pvec; 195 pgoff_t next; 196 int i, freed = 0; 197 198 pagevec_init(&pvec, 0); 199 next = start; 200 while (1) { 201 if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { 202 if (next == start) 203 break; 204 next = start; 205 continue; 206 } 207 208 for (i = 0; i < pagevec_count(&pvec); ++i) { 209 struct page *page = pvec.pages[i]; 210 211 lock_page(page); 212 if (page->index > next) 213 next = page->index; 214 ++next; 215 truncate_huge_page(page); 216 unlock_page(page); 217 hugetlb_put_quota(mapping); 218 freed++; 219 } 220 huge_pagevec_release(&pvec); 221 } 222 BUG_ON(!lstart && mapping->nrpages); 223 hugetlb_unreserve_pages(inode, start, freed); 224 } 225 226 static void hugetlbfs_delete_inode(struct inode *inode) 227 { 228 truncate_hugepages(inode, 0); 229 clear_inode(inode); 230 } 231 232 static void hugetlbfs_forget_inode(struct inode *inode) 233 { 234 struct super_block *sb = inode->i_sb; 235 236 if (!hlist_unhashed(&inode->i_hash)) { 237 if (!(inode->i_state & (I_DIRTY|I_LOCK))) 238 list_move(&inode->i_list, &inode_unused); 239 inodes_stat.nr_unused++; 240 if (!sb || (sb->s_flags & MS_ACTIVE)) { 241 spin_unlock(&inode_lock); 242 return; 243 } 244 inode->i_state |= I_WILL_FREE; 245 spin_unlock(&inode_lock); 246 /* 247 * write_inode_now is a noop as we set BDI_CAP_NO_WRITEBACK 248 * in our backing_dev_info. 249 */ 250 write_inode_now(inode, 1); 251 spin_lock(&inode_lock); 252 inode->i_state &= ~I_WILL_FREE; 253 inodes_stat.nr_unused--; 254 hlist_del_init(&inode->i_hash); 255 } 256 list_del_init(&inode->i_list); 257 list_del_init(&inode->i_sb_list); 258 inode->i_state |= I_FREEING; 259 inodes_stat.nr_inodes--; 260 spin_unlock(&inode_lock); 261 truncate_hugepages(inode, 0); 262 clear_inode(inode); 263 destroy_inode(inode); 264 } 265 266 static void hugetlbfs_drop_inode(struct inode *inode) 267 { 268 if (!inode->i_nlink) 269 generic_delete_inode(inode); 270 else 271 hugetlbfs_forget_inode(inode); 272 } 273 274 /* 275 * h_pgoff is in HPAGE_SIZE units. 276 * vma->vm_pgoff is in PAGE_SIZE units. 277 */ 278 static inline void 279 hugetlb_vmtruncate_list(struct prio_tree_root *root, unsigned long h_pgoff) 280 { 281 struct vm_area_struct *vma; 282 struct prio_tree_iter iter; 283 284 vma_prio_tree_foreach(vma, &iter, root, h_pgoff, ULONG_MAX) { 285 unsigned long h_vm_pgoff; 286 unsigned long v_offset; 287 288 h_vm_pgoff = vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT); 289 v_offset = (h_pgoff - h_vm_pgoff) << HPAGE_SHIFT; 290 /* 291 * Is this VMA fully outside the truncation point? 292 */ 293 if (h_vm_pgoff >= h_pgoff) 294 v_offset = 0; 295 296 unmap_hugepage_range(vma, 297 vma->vm_start + v_offset, vma->vm_end); 298 } 299 } 300 301 /* 302 * Expanding truncates are not allowed. 303 */ 304 static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) 305 { 306 unsigned long pgoff; 307 struct address_space *mapping = inode->i_mapping; 308 309 if (offset > inode->i_size) 310 return -EINVAL; 311 312 BUG_ON(offset & ~HPAGE_MASK); 313 pgoff = offset >> HPAGE_SHIFT; 314 315 inode->i_size = offset; 316 spin_lock(&mapping->i_mmap_lock); 317 if (!prio_tree_empty(&mapping->i_mmap)) 318 hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff); 319 spin_unlock(&mapping->i_mmap_lock); 320 truncate_hugepages(inode, offset); 321 return 0; 322 } 323 324 static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr) 325 { 326 struct inode *inode = dentry->d_inode; 327 int error; 328 unsigned int ia_valid = attr->ia_valid; 329 330 BUG_ON(!inode); 331 332 error = inode_change_ok(inode, attr); 333 if (error) 334 goto out; 335 336 if (ia_valid & ATTR_SIZE) { 337 error = -EINVAL; 338 if (!(attr->ia_size & ~HPAGE_MASK)) 339 error = hugetlb_vmtruncate(inode, attr->ia_size); 340 if (error) 341 goto out; 342 attr->ia_valid &= ~ATTR_SIZE; 343 } 344 error = inode_setattr(inode, attr); 345 out: 346 return error; 347 } 348 349 static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid, 350 gid_t gid, int mode, dev_t dev) 351 { 352 struct inode *inode; 353 354 inode = new_inode(sb); 355 if (inode) { 356 struct hugetlbfs_inode_info *info; 357 inode->i_mode = mode; 358 inode->i_uid = uid; 359 inode->i_gid = gid; 360 inode->i_blksize = HPAGE_SIZE; 361 inode->i_blocks = 0; 362 inode->i_mapping->a_ops = &hugetlbfs_aops; 363 inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info; 364 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 365 INIT_LIST_HEAD(&inode->i_mapping->private_list); 366 info = HUGETLBFS_I(inode); 367 mpol_shared_policy_init(&info->policy, MPOL_DEFAULT, NULL); 368 switch (mode & S_IFMT) { 369 default: 370 init_special_inode(inode, mode, dev); 371 break; 372 case S_IFREG: 373 inode->i_op = &hugetlbfs_inode_operations; 374 inode->i_fop = &hugetlbfs_file_operations; 375 break; 376 case S_IFDIR: 377 inode->i_op = &hugetlbfs_dir_inode_operations; 378 inode->i_fop = &simple_dir_operations; 379 380 /* directory inodes start off with i_nlink == 2 (for "." entry) */ 381 inode->i_nlink++; 382 break; 383 case S_IFLNK: 384 inode->i_op = &page_symlink_inode_operations; 385 break; 386 } 387 } 388 return inode; 389 } 390 391 /* 392 * File creation. Allocate an inode, and we're done.. 393 */ 394 static int hugetlbfs_mknod(struct inode *dir, 395 struct dentry *dentry, int mode, dev_t dev) 396 { 397 struct inode *inode; 398 int error = -ENOSPC; 399 gid_t gid; 400 401 if (dir->i_mode & S_ISGID) { 402 gid = dir->i_gid; 403 if (S_ISDIR(mode)) 404 mode |= S_ISGID; 405 } else { 406 gid = current->fsgid; 407 } 408 inode = hugetlbfs_get_inode(dir->i_sb, current->fsuid, gid, mode, dev); 409 if (inode) { 410 dir->i_ctime = dir->i_mtime = CURRENT_TIME; 411 d_instantiate(dentry, inode); 412 dget(dentry); /* Extra count - pin the dentry in core */ 413 error = 0; 414 } 415 return error; 416 } 417 418 static int hugetlbfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 419 { 420 int retval = hugetlbfs_mknod(dir, dentry, mode | S_IFDIR, 0); 421 if (!retval) 422 dir->i_nlink++; 423 return retval; 424 } 425 426 static int hugetlbfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) 427 { 428 return hugetlbfs_mknod(dir, dentry, mode | S_IFREG, 0); 429 } 430 431 static int hugetlbfs_symlink(struct inode *dir, 432 struct dentry *dentry, const char *symname) 433 { 434 struct inode *inode; 435 int error = -ENOSPC; 436 gid_t gid; 437 438 if (dir->i_mode & S_ISGID) 439 gid = dir->i_gid; 440 else 441 gid = current->fsgid; 442 443 inode = hugetlbfs_get_inode(dir->i_sb, current->fsuid, 444 gid, S_IFLNK|S_IRWXUGO, 0); 445 if (inode) { 446 int l = strlen(symname)+1; 447 error = page_symlink(inode, symname, l); 448 if (!error) { 449 d_instantiate(dentry, inode); 450 dget(dentry); 451 } else 452 iput(inode); 453 } 454 dir->i_ctime = dir->i_mtime = CURRENT_TIME; 455 456 return error; 457 } 458 459 /* 460 * For direct-IO reads into hugetlb pages 461 */ 462 static int hugetlbfs_set_page_dirty(struct page *page) 463 { 464 return 0; 465 } 466 467 static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf) 468 { 469 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb); 470 471 buf->f_type = HUGETLBFS_MAGIC; 472 buf->f_bsize = HPAGE_SIZE; 473 if (sbinfo) { 474 spin_lock(&sbinfo->stat_lock); 475 /* If no limits set, just report 0 for max/free/used 476 * blocks, like simple_statfs() */ 477 if (sbinfo->max_blocks >= 0) { 478 buf->f_blocks = sbinfo->max_blocks; 479 buf->f_bavail = buf->f_bfree = sbinfo->free_blocks; 480 buf->f_files = sbinfo->max_inodes; 481 buf->f_ffree = sbinfo->free_inodes; 482 } 483 spin_unlock(&sbinfo->stat_lock); 484 } 485 buf->f_namelen = NAME_MAX; 486 return 0; 487 } 488 489 static void hugetlbfs_put_super(struct super_block *sb) 490 { 491 struct hugetlbfs_sb_info *sbi = HUGETLBFS_SB(sb); 492 493 if (sbi) { 494 sb->s_fs_info = NULL; 495 kfree(sbi); 496 } 497 } 498 499 static inline int hugetlbfs_dec_free_inodes(struct hugetlbfs_sb_info *sbinfo) 500 { 501 if (sbinfo->free_inodes >= 0) { 502 spin_lock(&sbinfo->stat_lock); 503 if (unlikely(!sbinfo->free_inodes)) { 504 spin_unlock(&sbinfo->stat_lock); 505 return 0; 506 } 507 sbinfo->free_inodes--; 508 spin_unlock(&sbinfo->stat_lock); 509 } 510 511 return 1; 512 } 513 514 static void hugetlbfs_inc_free_inodes(struct hugetlbfs_sb_info *sbinfo) 515 { 516 if (sbinfo->free_inodes >= 0) { 517 spin_lock(&sbinfo->stat_lock); 518 sbinfo->free_inodes++; 519 spin_unlock(&sbinfo->stat_lock); 520 } 521 } 522 523 524 static kmem_cache_t *hugetlbfs_inode_cachep; 525 526 static struct inode *hugetlbfs_alloc_inode(struct super_block *sb) 527 { 528 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(sb); 529 struct hugetlbfs_inode_info *p; 530 531 if (unlikely(!hugetlbfs_dec_free_inodes(sbinfo))) 532 return NULL; 533 p = kmem_cache_alloc(hugetlbfs_inode_cachep, SLAB_KERNEL); 534 if (unlikely(!p)) { 535 hugetlbfs_inc_free_inodes(sbinfo); 536 return NULL; 537 } 538 return &p->vfs_inode; 539 } 540 541 static void hugetlbfs_destroy_inode(struct inode *inode) 542 { 543 hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb)); 544 mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy); 545 kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode)); 546 } 547 548 static const struct address_space_operations hugetlbfs_aops = { 549 .readpage = hugetlbfs_readpage, 550 .prepare_write = hugetlbfs_prepare_write, 551 .commit_write = hugetlbfs_commit_write, 552 .set_page_dirty = hugetlbfs_set_page_dirty, 553 }; 554 555 556 static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags) 557 { 558 struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo; 559 560 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 561 SLAB_CTOR_CONSTRUCTOR) 562 inode_init_once(&ei->vfs_inode); 563 } 564 565 const struct file_operations hugetlbfs_file_operations = { 566 .mmap = hugetlbfs_file_mmap, 567 .fsync = simple_sync_file, 568 .get_unmapped_area = hugetlb_get_unmapped_area, 569 }; 570 571 static struct inode_operations hugetlbfs_dir_inode_operations = { 572 .create = hugetlbfs_create, 573 .lookup = simple_lookup, 574 .link = simple_link, 575 .unlink = simple_unlink, 576 .symlink = hugetlbfs_symlink, 577 .mkdir = hugetlbfs_mkdir, 578 .rmdir = simple_rmdir, 579 .mknod = hugetlbfs_mknod, 580 .rename = simple_rename, 581 .setattr = hugetlbfs_setattr, 582 }; 583 584 static struct inode_operations hugetlbfs_inode_operations = { 585 .setattr = hugetlbfs_setattr, 586 }; 587 588 static struct super_operations hugetlbfs_ops = { 589 .alloc_inode = hugetlbfs_alloc_inode, 590 .destroy_inode = hugetlbfs_destroy_inode, 591 .statfs = hugetlbfs_statfs, 592 .delete_inode = hugetlbfs_delete_inode, 593 .drop_inode = hugetlbfs_drop_inode, 594 .put_super = hugetlbfs_put_super, 595 }; 596 597 static int 598 hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) 599 { 600 char *opt, *value, *rest; 601 602 if (!options) 603 return 0; 604 while ((opt = strsep(&options, ",")) != NULL) { 605 if (!*opt) 606 continue; 607 608 value = strchr(opt, '='); 609 if (!value || !*value) 610 return -EINVAL; 611 else 612 *value++ = '\0'; 613 614 if (!strcmp(opt, "uid")) 615 pconfig->uid = simple_strtoul(value, &value, 0); 616 else if (!strcmp(opt, "gid")) 617 pconfig->gid = simple_strtoul(value, &value, 0); 618 else if (!strcmp(opt, "mode")) 619 pconfig->mode = simple_strtoul(value,&value,0) & 0777U; 620 else if (!strcmp(opt, "size")) { 621 unsigned long long size = memparse(value, &rest); 622 if (*rest == '%') { 623 size <<= HPAGE_SHIFT; 624 size *= max_huge_pages; 625 do_div(size, 100); 626 rest++; 627 } 628 size &= HPAGE_MASK; 629 pconfig->nr_blocks = (size >> HPAGE_SHIFT); 630 value = rest; 631 } else if (!strcmp(opt,"nr_inodes")) { 632 pconfig->nr_inodes = memparse(value, &rest); 633 value = rest; 634 } else 635 return -EINVAL; 636 637 if (*value) 638 return -EINVAL; 639 } 640 return 0; 641 } 642 643 static int 644 hugetlbfs_fill_super(struct super_block *sb, void *data, int silent) 645 { 646 struct inode * inode; 647 struct dentry * root; 648 int ret; 649 struct hugetlbfs_config config; 650 struct hugetlbfs_sb_info *sbinfo; 651 652 config.nr_blocks = -1; /* No limit on size by default */ 653 config.nr_inodes = -1; /* No limit on number of inodes by default */ 654 config.uid = current->fsuid; 655 config.gid = current->fsgid; 656 config.mode = 0755; 657 ret = hugetlbfs_parse_options(data, &config); 658 659 if (ret) 660 return ret; 661 662 sbinfo = kmalloc(sizeof(struct hugetlbfs_sb_info), GFP_KERNEL); 663 if (!sbinfo) 664 return -ENOMEM; 665 sb->s_fs_info = sbinfo; 666 spin_lock_init(&sbinfo->stat_lock); 667 sbinfo->max_blocks = config.nr_blocks; 668 sbinfo->free_blocks = config.nr_blocks; 669 sbinfo->max_inodes = config.nr_inodes; 670 sbinfo->free_inodes = config.nr_inodes; 671 sb->s_maxbytes = MAX_LFS_FILESIZE; 672 sb->s_blocksize = HPAGE_SIZE; 673 sb->s_blocksize_bits = HPAGE_SHIFT; 674 sb->s_magic = HUGETLBFS_MAGIC; 675 sb->s_op = &hugetlbfs_ops; 676 sb->s_time_gran = 1; 677 inode = hugetlbfs_get_inode(sb, config.uid, config.gid, 678 S_IFDIR | config.mode, 0); 679 if (!inode) 680 goto out_free; 681 682 root = d_alloc_root(inode); 683 if (!root) { 684 iput(inode); 685 goto out_free; 686 } 687 sb->s_root = root; 688 return 0; 689 out_free: 690 kfree(sbinfo); 691 return -ENOMEM; 692 } 693 694 int hugetlb_get_quota(struct address_space *mapping) 695 { 696 int ret = 0; 697 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb); 698 699 if (sbinfo->free_blocks > -1) { 700 spin_lock(&sbinfo->stat_lock); 701 if (sbinfo->free_blocks > 0) 702 sbinfo->free_blocks--; 703 else 704 ret = -ENOMEM; 705 spin_unlock(&sbinfo->stat_lock); 706 } 707 708 return ret; 709 } 710 711 void hugetlb_put_quota(struct address_space *mapping) 712 { 713 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb); 714 715 if (sbinfo->free_blocks > -1) { 716 spin_lock(&sbinfo->stat_lock); 717 sbinfo->free_blocks++; 718 spin_unlock(&sbinfo->stat_lock); 719 } 720 } 721 722 static int hugetlbfs_get_sb(struct file_system_type *fs_type, 723 int flags, const char *dev_name, void *data, struct vfsmount *mnt) 724 { 725 return get_sb_nodev(fs_type, flags, data, hugetlbfs_fill_super, mnt); 726 } 727 728 static struct file_system_type hugetlbfs_fs_type = { 729 .name = "hugetlbfs", 730 .get_sb = hugetlbfs_get_sb, 731 .kill_sb = kill_litter_super, 732 }; 733 734 static struct vfsmount *hugetlbfs_vfsmount; 735 736 static int can_do_hugetlb_shm(void) 737 { 738 return likely(capable(CAP_IPC_LOCK) || 739 in_group_p(sysctl_hugetlb_shm_group) || 740 can_do_mlock()); 741 } 742 743 struct file *hugetlb_zero_setup(size_t size) 744 { 745 int error = -ENOMEM; 746 struct file *file; 747 struct inode *inode; 748 struct dentry *dentry, *root; 749 struct qstr quick_string; 750 char buf[16]; 751 static atomic_t counter; 752 753 if (!can_do_hugetlb_shm()) 754 return ERR_PTR(-EPERM); 755 756 if (!user_shm_lock(size, current->user)) 757 return ERR_PTR(-ENOMEM); 758 759 root = hugetlbfs_vfsmount->mnt_root; 760 snprintf(buf, 16, "%u", atomic_inc_return(&counter)); 761 quick_string.name = buf; 762 quick_string.len = strlen(quick_string.name); 763 quick_string.hash = 0; 764 dentry = d_alloc(root, &quick_string); 765 if (!dentry) 766 goto out_shm_unlock; 767 768 error = -ENFILE; 769 file = get_empty_filp(); 770 if (!file) 771 goto out_dentry; 772 773 error = -ENOSPC; 774 inode = hugetlbfs_get_inode(root->d_sb, current->fsuid, 775 current->fsgid, S_IFREG | S_IRWXUGO, 0); 776 if (!inode) 777 goto out_file; 778 779 error = -ENOMEM; 780 if (hugetlb_reserve_pages(inode, 0, size >> HPAGE_SHIFT)) 781 goto out_inode; 782 783 d_instantiate(dentry, inode); 784 inode->i_size = size; 785 inode->i_nlink = 0; 786 file->f_vfsmnt = mntget(hugetlbfs_vfsmount); 787 file->f_dentry = dentry; 788 file->f_mapping = inode->i_mapping; 789 file->f_op = &hugetlbfs_file_operations; 790 file->f_mode = FMODE_WRITE | FMODE_READ; 791 return file; 792 793 out_inode: 794 iput(inode); 795 out_file: 796 put_filp(file); 797 out_dentry: 798 dput(dentry); 799 out_shm_unlock: 800 user_shm_unlock(size, current->user); 801 return ERR_PTR(error); 802 } 803 804 static int __init init_hugetlbfs_fs(void) 805 { 806 int error; 807 struct vfsmount *vfsmount; 808 809 hugetlbfs_inode_cachep = kmem_cache_create("hugetlbfs_inode_cache", 810 sizeof(struct hugetlbfs_inode_info), 811 0, 0, init_once, NULL); 812 if (hugetlbfs_inode_cachep == NULL) 813 return -ENOMEM; 814 815 error = register_filesystem(&hugetlbfs_fs_type); 816 if (error) 817 goto out; 818 819 vfsmount = kern_mount(&hugetlbfs_fs_type); 820 821 if (!IS_ERR(vfsmount)) { 822 hugetlbfs_vfsmount = vfsmount; 823 return 0; 824 } 825 826 error = PTR_ERR(vfsmount); 827 828 out: 829 if (error) 830 kmem_cache_destroy(hugetlbfs_inode_cachep); 831 return error; 832 } 833 834 static void __exit exit_hugetlbfs_fs(void) 835 { 836 kmem_cache_destroy(hugetlbfs_inode_cachep); 837 unregister_filesystem(&hugetlbfs_fs_type); 838 } 839 840 module_init(init_hugetlbfs_fs) 841 module_exit(exit_hugetlbfs_fs) 842 843 MODULE_LICENSE("GPL"); 844