1 /* 2 * hugetlbpage-backed filesystem. Based on ramfs. 3 * 4 * William Irwin, 2002 5 * 6 * Copyright (C) 2002 Linus Torvalds. 7 */ 8 9 #include <linux/module.h> 10 #include <linux/thread_info.h> 11 #include <asm/current.h> 12 #include <linux/sched.h> /* remove ASAP */ 13 #include <linux/fs.h> 14 #include <linux/mount.h> 15 #include <linux/file.h> 16 #include <linux/writeback.h> 17 #include <linux/pagemap.h> 18 #include <linux/highmem.h> 19 #include <linux/init.h> 20 #include <linux/string.h> 21 #include <linux/capability.h> 22 #include <linux/backing-dev.h> 23 #include <linux/hugetlb.h> 24 #include <linux/pagevec.h> 25 #include <linux/quotaops.h> 26 #include <linux/slab.h> 27 #include <linux/dnotify.h> 28 #include <linux/statfs.h> 29 #include <linux/security.h> 30 31 #include <asm/uaccess.h> 32 33 /* some random number */ 34 #define HUGETLBFS_MAGIC 0x958458f6 35 36 static struct super_operations hugetlbfs_ops; 37 static struct address_space_operations hugetlbfs_aops; 38 struct file_operations hugetlbfs_file_operations; 39 static struct inode_operations hugetlbfs_dir_inode_operations; 40 static struct inode_operations hugetlbfs_inode_operations; 41 42 static struct backing_dev_info hugetlbfs_backing_dev_info = { 43 .ra_pages = 0, /* No readahead */ 44 .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, 45 }; 46 47 int sysctl_hugetlb_shm_group; 48 49 static void huge_pagevec_release(struct pagevec *pvec) 50 { 51 int i; 52 53 for (i = 0; i < pagevec_count(pvec); ++i) 54 put_page(pvec->pages[i]); 55 56 pagevec_reinit(pvec); 57 } 58 59 /* 60 * huge_pages_needed tries to determine the number of new huge pages that 61 * will be required to fully populate this VMA. This will be equal to 62 * the size of the VMA in huge pages minus the number of huge pages 63 * (covered by this VMA) that are found in the page cache. 64 * 65 * Result is in bytes to be compatible with is_hugepage_mem_enough() 66 */ 67 static unsigned long 68 huge_pages_needed(struct address_space *mapping, struct vm_area_struct *vma) 69 { 70 int i; 71 struct pagevec pvec; 72 unsigned long start = vma->vm_start; 73 unsigned long end = vma->vm_end; 74 unsigned long hugepages = (end - start) >> HPAGE_SHIFT; 75 pgoff_t next = vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT); 76 pgoff_t endpg = next + hugepages; 77 78 pagevec_init(&pvec, 0); 79 while (next < endpg) { 80 if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) 81 break; 82 for (i = 0; i < pagevec_count(&pvec); i++) { 83 struct page *page = pvec.pages[i]; 84 if (page->index > next) 85 next = page->index; 86 if (page->index >= endpg) 87 break; 88 next++; 89 hugepages--; 90 } 91 huge_pagevec_release(&pvec); 92 } 93 return hugepages << HPAGE_SHIFT; 94 } 95 96 static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) 97 { 98 struct inode *inode = file->f_dentry->d_inode; 99 struct address_space *mapping = inode->i_mapping; 100 unsigned long bytes; 101 loff_t len, vma_len; 102 int ret; 103 104 if (vma->vm_pgoff & (HPAGE_SIZE / PAGE_SIZE - 1)) 105 return -EINVAL; 106 107 if (vma->vm_start & ~HPAGE_MASK) 108 return -EINVAL; 109 110 if (vma->vm_end & ~HPAGE_MASK) 111 return -EINVAL; 112 113 if (vma->vm_end - vma->vm_start < HPAGE_SIZE) 114 return -EINVAL; 115 116 bytes = huge_pages_needed(mapping, vma); 117 if (!is_hugepage_mem_enough(bytes)) 118 return -ENOMEM; 119 120 vma_len = (loff_t)(vma->vm_end - vma->vm_start); 121 122 mutex_lock(&inode->i_mutex); 123 file_accessed(file); 124 vma->vm_flags |= VM_HUGETLB | VM_RESERVED; 125 vma->vm_ops = &hugetlb_vm_ops; 126 127 ret = -ENOMEM; 128 len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); 129 if (!(vma->vm_flags & VM_WRITE) && len > inode->i_size) 130 goto out; 131 132 ret = 0; 133 hugetlb_prefault_arch_hook(vma->vm_mm); 134 if (inode->i_size < len) 135 inode->i_size = len; 136 out: 137 mutex_unlock(&inode->i_mutex); 138 139 return ret; 140 } 141 142 /* 143 * Called under down_write(mmap_sem). 144 */ 145 146 #ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA 147 unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, 148 unsigned long len, unsigned long pgoff, unsigned long flags); 149 #else 150 static unsigned long 151 hugetlb_get_unmapped_area(struct file *file, unsigned long addr, 152 unsigned long len, unsigned long pgoff, unsigned long flags) 153 { 154 struct mm_struct *mm = current->mm; 155 struct vm_area_struct *vma; 156 unsigned long start_addr; 157 158 if (len & ~HPAGE_MASK) 159 return -EINVAL; 160 if (len > TASK_SIZE) 161 return -ENOMEM; 162 163 if (addr) { 164 addr = ALIGN(addr, HPAGE_SIZE); 165 vma = find_vma(mm, addr); 166 if (TASK_SIZE - len >= addr && 167 (!vma || addr + len <= vma->vm_start)) 168 return addr; 169 } 170 171 start_addr = mm->free_area_cache; 172 173 if (len <= mm->cached_hole_size) 174 start_addr = TASK_UNMAPPED_BASE; 175 176 full_search: 177 addr = ALIGN(start_addr, HPAGE_SIZE); 178 179 for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { 180 /* At this point: (!vma || addr < vma->vm_end). */ 181 if (TASK_SIZE - len < addr) { 182 /* 183 * Start a new search - just in case we missed 184 * some holes. 185 */ 186 if (start_addr != TASK_UNMAPPED_BASE) { 187 start_addr = TASK_UNMAPPED_BASE; 188 goto full_search; 189 } 190 return -ENOMEM; 191 } 192 193 if (!vma || addr + len <= vma->vm_start) 194 return addr; 195 addr = ALIGN(vma->vm_end, HPAGE_SIZE); 196 } 197 } 198 #endif 199 200 /* 201 * Read a page. Again trivial. If it didn't already exist 202 * in the page cache, it is zero-filled. 203 */ 204 static int hugetlbfs_readpage(struct file *file, struct page * page) 205 { 206 unlock_page(page); 207 return -EINVAL; 208 } 209 210 static int hugetlbfs_prepare_write(struct file *file, 211 struct page *page, unsigned offset, unsigned to) 212 { 213 return -EINVAL; 214 } 215 216 static int hugetlbfs_commit_write(struct file *file, 217 struct page *page, unsigned offset, unsigned to) 218 { 219 return -EINVAL; 220 } 221 222 static void truncate_huge_page(struct page *page) 223 { 224 clear_page_dirty(page); 225 ClearPageUptodate(page); 226 remove_from_page_cache(page); 227 put_page(page); 228 } 229 230 static void truncate_hugepages(struct address_space *mapping, loff_t lstart) 231 { 232 const pgoff_t start = lstart >> HPAGE_SHIFT; 233 struct pagevec pvec; 234 pgoff_t next; 235 int i; 236 237 pagevec_init(&pvec, 0); 238 next = start; 239 while (1) { 240 if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { 241 if (next == start) 242 break; 243 next = start; 244 continue; 245 } 246 247 for (i = 0; i < pagevec_count(&pvec); ++i) { 248 struct page *page = pvec.pages[i]; 249 250 lock_page(page); 251 if (page->index > next) 252 next = page->index; 253 ++next; 254 truncate_huge_page(page); 255 unlock_page(page); 256 hugetlb_put_quota(mapping); 257 } 258 huge_pagevec_release(&pvec); 259 } 260 BUG_ON(!lstart && mapping->nrpages); 261 } 262 263 static void hugetlbfs_delete_inode(struct inode *inode) 264 { 265 if (inode->i_data.nrpages) 266 truncate_hugepages(&inode->i_data, 0); 267 clear_inode(inode); 268 } 269 270 static void hugetlbfs_forget_inode(struct inode *inode) 271 { 272 struct super_block *sb = inode->i_sb; 273 274 if (!hlist_unhashed(&inode->i_hash)) { 275 if (!(inode->i_state & (I_DIRTY|I_LOCK))) 276 list_move(&inode->i_list, &inode_unused); 277 inodes_stat.nr_unused++; 278 if (!sb || (sb->s_flags & MS_ACTIVE)) { 279 spin_unlock(&inode_lock); 280 return; 281 } 282 inode->i_state |= I_WILL_FREE; 283 spin_unlock(&inode_lock); 284 /* 285 * write_inode_now is a noop as we set BDI_CAP_NO_WRITEBACK 286 * in our backing_dev_info. 287 */ 288 write_inode_now(inode, 1); 289 spin_lock(&inode_lock); 290 inode->i_state &= ~I_WILL_FREE; 291 inodes_stat.nr_unused--; 292 hlist_del_init(&inode->i_hash); 293 } 294 list_del_init(&inode->i_list); 295 list_del_init(&inode->i_sb_list); 296 inode->i_state |= I_FREEING; 297 inodes_stat.nr_inodes--; 298 spin_unlock(&inode_lock); 299 if (inode->i_data.nrpages) 300 truncate_hugepages(&inode->i_data, 0); 301 clear_inode(inode); 302 destroy_inode(inode); 303 } 304 305 static void hugetlbfs_drop_inode(struct inode *inode) 306 { 307 if (!inode->i_nlink) 308 generic_delete_inode(inode); 309 else 310 hugetlbfs_forget_inode(inode); 311 } 312 313 /* 314 * h_pgoff is in HPAGE_SIZE units. 315 * vma->vm_pgoff is in PAGE_SIZE units. 316 */ 317 static inline void 318 hugetlb_vmtruncate_list(struct prio_tree_root *root, unsigned long h_pgoff) 319 { 320 struct vm_area_struct *vma; 321 struct prio_tree_iter iter; 322 323 vma_prio_tree_foreach(vma, &iter, root, h_pgoff, ULONG_MAX) { 324 unsigned long h_vm_pgoff; 325 unsigned long v_offset; 326 327 h_vm_pgoff = vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT); 328 v_offset = (h_pgoff - h_vm_pgoff) << HPAGE_SHIFT; 329 /* 330 * Is this VMA fully outside the truncation point? 331 */ 332 if (h_vm_pgoff >= h_pgoff) 333 v_offset = 0; 334 335 unmap_hugepage_range(vma, 336 vma->vm_start + v_offset, vma->vm_end); 337 } 338 } 339 340 /* 341 * Expanding truncates are not allowed. 342 */ 343 static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) 344 { 345 unsigned long pgoff; 346 struct address_space *mapping = inode->i_mapping; 347 348 if (offset > inode->i_size) 349 return -EINVAL; 350 351 BUG_ON(offset & ~HPAGE_MASK); 352 pgoff = offset >> HPAGE_SHIFT; 353 354 inode->i_size = offset; 355 spin_lock(&mapping->i_mmap_lock); 356 if (!prio_tree_empty(&mapping->i_mmap)) 357 hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff); 358 spin_unlock(&mapping->i_mmap_lock); 359 truncate_hugepages(mapping, offset); 360 return 0; 361 } 362 363 static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr) 364 { 365 struct inode *inode = dentry->d_inode; 366 int error; 367 unsigned int ia_valid = attr->ia_valid; 368 369 BUG_ON(!inode); 370 371 error = inode_change_ok(inode, attr); 372 if (error) 373 goto out; 374 375 if (ia_valid & ATTR_SIZE) { 376 error = -EINVAL; 377 if (!(attr->ia_size & ~HPAGE_MASK)) 378 error = hugetlb_vmtruncate(inode, attr->ia_size); 379 if (error) 380 goto out; 381 attr->ia_valid &= ~ATTR_SIZE; 382 } 383 error = inode_setattr(inode, attr); 384 out: 385 return error; 386 } 387 388 static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid, 389 gid_t gid, int mode, dev_t dev) 390 { 391 struct inode *inode; 392 393 inode = new_inode(sb); 394 if (inode) { 395 struct hugetlbfs_inode_info *info; 396 inode->i_mode = mode; 397 inode->i_uid = uid; 398 inode->i_gid = gid; 399 inode->i_blksize = HPAGE_SIZE; 400 inode->i_blocks = 0; 401 inode->i_mapping->a_ops = &hugetlbfs_aops; 402 inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info; 403 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 404 info = HUGETLBFS_I(inode); 405 mpol_shared_policy_init(&info->policy, MPOL_DEFAULT, NULL); 406 switch (mode & S_IFMT) { 407 default: 408 init_special_inode(inode, mode, dev); 409 break; 410 case S_IFREG: 411 inode->i_op = &hugetlbfs_inode_operations; 412 inode->i_fop = &hugetlbfs_file_operations; 413 break; 414 case S_IFDIR: 415 inode->i_op = &hugetlbfs_dir_inode_operations; 416 inode->i_fop = &simple_dir_operations; 417 418 /* directory inodes start off with i_nlink == 2 (for "." entry) */ 419 inode->i_nlink++; 420 break; 421 case S_IFLNK: 422 inode->i_op = &page_symlink_inode_operations; 423 break; 424 } 425 } 426 return inode; 427 } 428 429 /* 430 * File creation. Allocate an inode, and we're done.. 431 */ 432 static int hugetlbfs_mknod(struct inode *dir, 433 struct dentry *dentry, int mode, dev_t dev) 434 { 435 struct inode *inode; 436 int error = -ENOSPC; 437 gid_t gid; 438 439 if (dir->i_mode & S_ISGID) { 440 gid = dir->i_gid; 441 if (S_ISDIR(mode)) 442 mode |= S_ISGID; 443 } else { 444 gid = current->fsgid; 445 } 446 inode = hugetlbfs_get_inode(dir->i_sb, current->fsuid, gid, mode, dev); 447 if (inode) { 448 dir->i_ctime = dir->i_mtime = CURRENT_TIME; 449 d_instantiate(dentry, inode); 450 dget(dentry); /* Extra count - pin the dentry in core */ 451 error = 0; 452 } 453 return error; 454 } 455 456 static int hugetlbfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 457 { 458 int retval = hugetlbfs_mknod(dir, dentry, mode | S_IFDIR, 0); 459 if (!retval) 460 dir->i_nlink++; 461 return retval; 462 } 463 464 static int hugetlbfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) 465 { 466 return hugetlbfs_mknod(dir, dentry, mode | S_IFREG, 0); 467 } 468 469 static int hugetlbfs_symlink(struct inode *dir, 470 struct dentry *dentry, const char *symname) 471 { 472 struct inode *inode; 473 int error = -ENOSPC; 474 gid_t gid; 475 476 if (dir->i_mode & S_ISGID) 477 gid = dir->i_gid; 478 else 479 gid = current->fsgid; 480 481 inode = hugetlbfs_get_inode(dir->i_sb, current->fsuid, 482 gid, S_IFLNK|S_IRWXUGO, 0); 483 if (inode) { 484 int l = strlen(symname)+1; 485 error = page_symlink(inode, symname, l); 486 if (!error) { 487 d_instantiate(dentry, inode); 488 dget(dentry); 489 } else 490 iput(inode); 491 } 492 dir->i_ctime = dir->i_mtime = CURRENT_TIME; 493 494 return error; 495 } 496 497 /* 498 * For direct-IO reads into hugetlb pages 499 */ 500 static int hugetlbfs_set_page_dirty(struct page *page) 501 { 502 return 0; 503 } 504 505 static int hugetlbfs_statfs(struct super_block *sb, struct kstatfs *buf) 506 { 507 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(sb); 508 509 buf->f_type = HUGETLBFS_MAGIC; 510 buf->f_bsize = HPAGE_SIZE; 511 if (sbinfo) { 512 spin_lock(&sbinfo->stat_lock); 513 /* If no limits set, just report 0 for max/free/used 514 * blocks, like simple_statfs() */ 515 if (sbinfo->max_blocks >= 0) { 516 buf->f_blocks = sbinfo->max_blocks; 517 buf->f_bavail = buf->f_bfree = sbinfo->free_blocks; 518 buf->f_files = sbinfo->max_inodes; 519 buf->f_ffree = sbinfo->free_inodes; 520 } 521 spin_unlock(&sbinfo->stat_lock); 522 } 523 buf->f_namelen = NAME_MAX; 524 return 0; 525 } 526 527 static void hugetlbfs_put_super(struct super_block *sb) 528 { 529 struct hugetlbfs_sb_info *sbi = HUGETLBFS_SB(sb); 530 531 if (sbi) { 532 sb->s_fs_info = NULL; 533 kfree(sbi); 534 } 535 } 536 537 static inline int hugetlbfs_dec_free_inodes(struct hugetlbfs_sb_info *sbinfo) 538 { 539 if (sbinfo->free_inodes >= 0) { 540 spin_lock(&sbinfo->stat_lock); 541 if (unlikely(!sbinfo->free_inodes)) { 542 spin_unlock(&sbinfo->stat_lock); 543 return 0; 544 } 545 sbinfo->free_inodes--; 546 spin_unlock(&sbinfo->stat_lock); 547 } 548 549 return 1; 550 } 551 552 static void hugetlbfs_inc_free_inodes(struct hugetlbfs_sb_info *sbinfo) 553 { 554 if (sbinfo->free_inodes >= 0) { 555 spin_lock(&sbinfo->stat_lock); 556 sbinfo->free_inodes++; 557 spin_unlock(&sbinfo->stat_lock); 558 } 559 } 560 561 562 static kmem_cache_t *hugetlbfs_inode_cachep; 563 564 static struct inode *hugetlbfs_alloc_inode(struct super_block *sb) 565 { 566 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(sb); 567 struct hugetlbfs_inode_info *p; 568 569 if (unlikely(!hugetlbfs_dec_free_inodes(sbinfo))) 570 return NULL; 571 p = kmem_cache_alloc(hugetlbfs_inode_cachep, SLAB_KERNEL); 572 if (unlikely(!p)) { 573 hugetlbfs_inc_free_inodes(sbinfo); 574 return NULL; 575 } 576 return &p->vfs_inode; 577 } 578 579 static void hugetlbfs_destroy_inode(struct inode *inode) 580 { 581 hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb)); 582 mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy); 583 kmem_cache_free(hugetlbfs_inode_cachep, HUGETLBFS_I(inode)); 584 } 585 586 static struct address_space_operations hugetlbfs_aops = { 587 .readpage = hugetlbfs_readpage, 588 .prepare_write = hugetlbfs_prepare_write, 589 .commit_write = hugetlbfs_commit_write, 590 .set_page_dirty = hugetlbfs_set_page_dirty, 591 }; 592 593 594 static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags) 595 { 596 struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo; 597 598 if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 599 SLAB_CTOR_CONSTRUCTOR) 600 inode_init_once(&ei->vfs_inode); 601 } 602 603 struct file_operations hugetlbfs_file_operations = { 604 .mmap = hugetlbfs_file_mmap, 605 .fsync = simple_sync_file, 606 .get_unmapped_area = hugetlb_get_unmapped_area, 607 }; 608 609 static struct inode_operations hugetlbfs_dir_inode_operations = { 610 .create = hugetlbfs_create, 611 .lookup = simple_lookup, 612 .link = simple_link, 613 .unlink = simple_unlink, 614 .symlink = hugetlbfs_symlink, 615 .mkdir = hugetlbfs_mkdir, 616 .rmdir = simple_rmdir, 617 .mknod = hugetlbfs_mknod, 618 .rename = simple_rename, 619 .setattr = hugetlbfs_setattr, 620 }; 621 622 static struct inode_operations hugetlbfs_inode_operations = { 623 .setattr = hugetlbfs_setattr, 624 }; 625 626 static struct super_operations hugetlbfs_ops = { 627 .alloc_inode = hugetlbfs_alloc_inode, 628 .destroy_inode = hugetlbfs_destroy_inode, 629 .statfs = hugetlbfs_statfs, 630 .delete_inode = hugetlbfs_delete_inode, 631 .drop_inode = hugetlbfs_drop_inode, 632 .put_super = hugetlbfs_put_super, 633 }; 634 635 static int 636 hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig) 637 { 638 char *opt, *value, *rest; 639 640 if (!options) 641 return 0; 642 while ((opt = strsep(&options, ",")) != NULL) { 643 if (!*opt) 644 continue; 645 646 value = strchr(opt, '='); 647 if (!value || !*value) 648 return -EINVAL; 649 else 650 *value++ = '\0'; 651 652 if (!strcmp(opt, "uid")) 653 pconfig->uid = simple_strtoul(value, &value, 0); 654 else if (!strcmp(opt, "gid")) 655 pconfig->gid = simple_strtoul(value, &value, 0); 656 else if (!strcmp(opt, "mode")) 657 pconfig->mode = simple_strtoul(value,&value,0) & 0777U; 658 else if (!strcmp(opt, "size")) { 659 unsigned long long size = memparse(value, &rest); 660 if (*rest == '%') { 661 size <<= HPAGE_SHIFT; 662 size *= max_huge_pages; 663 do_div(size, 100); 664 rest++; 665 } 666 size &= HPAGE_MASK; 667 pconfig->nr_blocks = (size >> HPAGE_SHIFT); 668 value = rest; 669 } else if (!strcmp(opt,"nr_inodes")) { 670 pconfig->nr_inodes = memparse(value, &rest); 671 value = rest; 672 } else 673 return -EINVAL; 674 675 if (*value) 676 return -EINVAL; 677 } 678 return 0; 679 } 680 681 static int 682 hugetlbfs_fill_super(struct super_block *sb, void *data, int silent) 683 { 684 struct inode * inode; 685 struct dentry * root; 686 int ret; 687 struct hugetlbfs_config config; 688 struct hugetlbfs_sb_info *sbinfo; 689 690 config.nr_blocks = -1; /* No limit on size by default */ 691 config.nr_inodes = -1; /* No limit on number of inodes by default */ 692 config.uid = current->fsuid; 693 config.gid = current->fsgid; 694 config.mode = 0755; 695 ret = hugetlbfs_parse_options(data, &config); 696 697 if (ret) 698 return ret; 699 700 sbinfo = kmalloc(sizeof(struct hugetlbfs_sb_info), GFP_KERNEL); 701 if (!sbinfo) 702 return -ENOMEM; 703 sb->s_fs_info = sbinfo; 704 spin_lock_init(&sbinfo->stat_lock); 705 sbinfo->max_blocks = config.nr_blocks; 706 sbinfo->free_blocks = config.nr_blocks; 707 sbinfo->max_inodes = config.nr_inodes; 708 sbinfo->free_inodes = config.nr_inodes; 709 sb->s_maxbytes = MAX_LFS_FILESIZE; 710 sb->s_blocksize = HPAGE_SIZE; 711 sb->s_blocksize_bits = HPAGE_SHIFT; 712 sb->s_magic = HUGETLBFS_MAGIC; 713 sb->s_op = &hugetlbfs_ops; 714 sb->s_time_gran = 1; 715 inode = hugetlbfs_get_inode(sb, config.uid, config.gid, 716 S_IFDIR | config.mode, 0); 717 if (!inode) 718 goto out_free; 719 720 root = d_alloc_root(inode); 721 if (!root) { 722 iput(inode); 723 goto out_free; 724 } 725 sb->s_root = root; 726 return 0; 727 out_free: 728 kfree(sbinfo); 729 return -ENOMEM; 730 } 731 732 int hugetlb_get_quota(struct address_space *mapping) 733 { 734 int ret = 0; 735 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb); 736 737 if (sbinfo->free_blocks > -1) { 738 spin_lock(&sbinfo->stat_lock); 739 if (sbinfo->free_blocks > 0) 740 sbinfo->free_blocks--; 741 else 742 ret = -ENOMEM; 743 spin_unlock(&sbinfo->stat_lock); 744 } 745 746 return ret; 747 } 748 749 void hugetlb_put_quota(struct address_space *mapping) 750 { 751 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb); 752 753 if (sbinfo->free_blocks > -1) { 754 spin_lock(&sbinfo->stat_lock); 755 sbinfo->free_blocks++; 756 spin_unlock(&sbinfo->stat_lock); 757 } 758 } 759 760 static struct super_block *hugetlbfs_get_sb(struct file_system_type *fs_type, 761 int flags, const char *dev_name, void *data) 762 { 763 return get_sb_nodev(fs_type, flags, data, hugetlbfs_fill_super); 764 } 765 766 static struct file_system_type hugetlbfs_fs_type = { 767 .name = "hugetlbfs", 768 .get_sb = hugetlbfs_get_sb, 769 .kill_sb = kill_litter_super, 770 }; 771 772 static struct vfsmount *hugetlbfs_vfsmount; 773 774 /* 775 * Return the next identifier for a shm file 776 */ 777 static unsigned long hugetlbfs_counter(void) 778 { 779 static DEFINE_SPINLOCK(lock); 780 static unsigned long counter; 781 unsigned long ret; 782 783 spin_lock(&lock); 784 ret = ++counter; 785 spin_unlock(&lock); 786 return ret; 787 } 788 789 static int can_do_hugetlb_shm(void) 790 { 791 return likely(capable(CAP_IPC_LOCK) || 792 in_group_p(sysctl_hugetlb_shm_group) || 793 can_do_mlock()); 794 } 795 796 struct file *hugetlb_zero_setup(size_t size) 797 { 798 int error = -ENOMEM; 799 struct file *file; 800 struct inode *inode; 801 struct dentry *dentry, *root; 802 struct qstr quick_string; 803 char buf[16]; 804 805 if (!can_do_hugetlb_shm()) 806 return ERR_PTR(-EPERM); 807 808 if (!is_hugepage_mem_enough(size)) 809 return ERR_PTR(-ENOMEM); 810 811 if (!user_shm_lock(size, current->user)) 812 return ERR_PTR(-ENOMEM); 813 814 root = hugetlbfs_vfsmount->mnt_root; 815 snprintf(buf, 16, "%lu", hugetlbfs_counter()); 816 quick_string.name = buf; 817 quick_string.len = strlen(quick_string.name); 818 quick_string.hash = 0; 819 dentry = d_alloc(root, &quick_string); 820 if (!dentry) 821 goto out_shm_unlock; 822 823 error = -ENFILE; 824 file = get_empty_filp(); 825 if (!file) 826 goto out_dentry; 827 828 error = -ENOSPC; 829 inode = hugetlbfs_get_inode(root->d_sb, current->fsuid, 830 current->fsgid, S_IFREG | S_IRWXUGO, 0); 831 if (!inode) 832 goto out_file; 833 834 d_instantiate(dentry, inode); 835 inode->i_size = size; 836 inode->i_nlink = 0; 837 file->f_vfsmnt = mntget(hugetlbfs_vfsmount); 838 file->f_dentry = dentry; 839 file->f_mapping = inode->i_mapping; 840 file->f_op = &hugetlbfs_file_operations; 841 file->f_mode = FMODE_WRITE | FMODE_READ; 842 return file; 843 844 out_file: 845 put_filp(file); 846 out_dentry: 847 dput(dentry); 848 out_shm_unlock: 849 user_shm_unlock(size, current->user); 850 return ERR_PTR(error); 851 } 852 853 static int __init init_hugetlbfs_fs(void) 854 { 855 int error; 856 struct vfsmount *vfsmount; 857 858 hugetlbfs_inode_cachep = kmem_cache_create("hugetlbfs_inode_cache", 859 sizeof(struct hugetlbfs_inode_info), 860 0, 0, init_once, NULL); 861 if (hugetlbfs_inode_cachep == NULL) 862 return -ENOMEM; 863 864 error = register_filesystem(&hugetlbfs_fs_type); 865 if (error) 866 goto out; 867 868 vfsmount = kern_mount(&hugetlbfs_fs_type); 869 870 if (!IS_ERR(vfsmount)) { 871 hugetlbfs_vfsmount = vfsmount; 872 return 0; 873 } 874 875 error = PTR_ERR(vfsmount); 876 877 out: 878 if (error) 879 kmem_cache_destroy(hugetlbfs_inode_cachep); 880 return error; 881 } 882 883 static void __exit exit_hugetlbfs_fs(void) 884 { 885 kmem_cache_destroy(hugetlbfs_inode_cachep); 886 unregister_filesystem(&hugetlbfs_fs_type); 887 } 888 889 module_init(init_hugetlbfs_fs) 890 module_exit(exit_hugetlbfs_fs) 891 892 MODULE_LICENSE("GPL"); 893