1 /* 2 * Generic hugetlb support. 3 * (C) William Irwin, April 2004 4 */ 5 #include <linux/gfp.h> 6 #include <linux/list.h> 7 #include <linux/init.h> 8 #include <linux/module.h> 9 #include <linux/mm.h> 10 #include <linux/sysctl.h> 11 #include <linux/highmem.h> 12 #include <linux/nodemask.h> 13 #include <linux/pagemap.h> 14 #include <linux/mempolicy.h> 15 #include <linux/cpuset.h> 16 #include <linux/mutex.h> 17 18 #include <asm/page.h> 19 #include <asm/pgtable.h> 20 21 #include <linux/hugetlb.h> 22 #include "internal.h" 23 24 const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL; 25 static unsigned long nr_huge_pages, free_huge_pages, resv_huge_pages; 26 unsigned long max_huge_pages; 27 static struct list_head hugepage_freelists[MAX_NUMNODES]; 28 static unsigned int nr_huge_pages_node[MAX_NUMNODES]; 29 static unsigned int free_huge_pages_node[MAX_NUMNODES]; 30 /* 31 * Protects updates to hugepage_freelists, nr_huge_pages, and free_huge_pages 32 */ 33 static DEFINE_SPINLOCK(hugetlb_lock); 34 35 static void clear_huge_page(struct page *page, unsigned long addr) 36 { 37 int i; 38 39 might_sleep(); 40 for (i = 0; i < (HPAGE_SIZE/PAGE_SIZE); i++) { 41 cond_resched(); 42 clear_user_highpage(page + i, addr); 43 } 44 } 45 46 static void copy_huge_page(struct page *dst, struct page *src, 47 unsigned long addr) 48 { 49 int i; 50 51 might_sleep(); 52 for (i = 0; i < HPAGE_SIZE/PAGE_SIZE; i++) { 53 cond_resched(); 54 copy_user_highpage(dst + i, src + i, addr + i*PAGE_SIZE); 55 } 56 } 57 58 static void enqueue_huge_page(struct page *page) 59 { 60 int nid = page_to_nid(page); 61 list_add(&page->lru, &hugepage_freelists[nid]); 62 free_huge_pages++; 63 free_huge_pages_node[nid]++; 64 } 65 66 static struct page *dequeue_huge_page(struct vm_area_struct *vma, 67 unsigned long address) 68 { 69 int nid = numa_node_id(); 70 struct page *page = NULL; 71 struct zonelist *zonelist = huge_zonelist(vma, address); 72 struct zone **z; 73 74 for (z = zonelist->zones; *z; z++) { 75 nid = (*z)->zone_pgdat->node_id; 76 if (cpuset_zone_allowed(*z, GFP_HIGHUSER) && 77 !list_empty(&hugepage_freelists[nid])) 78 break; 79 } 80 81 if (*z) { 82 page = list_entry(hugepage_freelists[nid].next, 83 struct page, lru); 84 list_del(&page->lru); 85 free_huge_pages--; 86 free_huge_pages_node[nid]--; 87 } 88 return page; 89 } 90 91 static void free_huge_page(struct page *page) 92 { 93 BUG_ON(page_count(page)); 94 95 INIT_LIST_HEAD(&page->lru); 96 97 spin_lock(&hugetlb_lock); 98 enqueue_huge_page(page); 99 spin_unlock(&hugetlb_lock); 100 } 101 102 static int alloc_fresh_huge_page(void) 103 { 104 static int nid = 0; 105 struct page *page; 106 page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP|__GFP_NOWARN, 107 HUGETLB_PAGE_ORDER); 108 nid = next_node(nid, node_online_map); 109 if (nid == MAX_NUMNODES) 110 nid = first_node(node_online_map); 111 if (page) { 112 page[1].lru.next = (void *)free_huge_page; /* dtor */ 113 spin_lock(&hugetlb_lock); 114 nr_huge_pages++; 115 nr_huge_pages_node[page_to_nid(page)]++; 116 spin_unlock(&hugetlb_lock); 117 put_page(page); /* free it into the hugepage allocator */ 118 return 1; 119 } 120 return 0; 121 } 122 123 static struct page *alloc_huge_page(struct vm_area_struct *vma, 124 unsigned long addr) 125 { 126 struct page *page; 127 128 spin_lock(&hugetlb_lock); 129 if (vma->vm_flags & VM_MAYSHARE) 130 resv_huge_pages--; 131 else if (free_huge_pages <= resv_huge_pages) 132 goto fail; 133 134 page = dequeue_huge_page(vma, addr); 135 if (!page) 136 goto fail; 137 138 spin_unlock(&hugetlb_lock); 139 set_page_refcounted(page); 140 return page; 141 142 fail: 143 spin_unlock(&hugetlb_lock); 144 return NULL; 145 } 146 147 static int __init hugetlb_init(void) 148 { 149 unsigned long i; 150 151 if (HPAGE_SHIFT == 0) 152 return 0; 153 154 for (i = 0; i < MAX_NUMNODES; ++i) 155 INIT_LIST_HEAD(&hugepage_freelists[i]); 156 157 for (i = 0; i < max_huge_pages; ++i) { 158 if (!alloc_fresh_huge_page()) 159 break; 160 } 161 max_huge_pages = free_huge_pages = nr_huge_pages = i; 162 printk("Total HugeTLB memory allocated, %ld\n", free_huge_pages); 163 return 0; 164 } 165 module_init(hugetlb_init); 166 167 static int __init hugetlb_setup(char *s) 168 { 169 if (sscanf(s, "%lu", &max_huge_pages) <= 0) 170 max_huge_pages = 0; 171 return 1; 172 } 173 __setup("hugepages=", hugetlb_setup); 174 175 #ifdef CONFIG_SYSCTL 176 static void update_and_free_page(struct page *page) 177 { 178 int i; 179 nr_huge_pages--; 180 nr_huge_pages_node[page_zone(page)->zone_pgdat->node_id]--; 181 for (i = 0; i < (HPAGE_SIZE / PAGE_SIZE); i++) { 182 page[i].flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced | 183 1 << PG_dirty | 1 << PG_active | 1 << PG_reserved | 184 1 << PG_private | 1<< PG_writeback); 185 } 186 page[1].lru.next = NULL; 187 set_page_refcounted(page); 188 __free_pages(page, HUGETLB_PAGE_ORDER); 189 } 190 191 #ifdef CONFIG_HIGHMEM 192 static void try_to_free_low(unsigned long count) 193 { 194 int i, nid; 195 for (i = 0; i < MAX_NUMNODES; ++i) { 196 struct page *page, *next; 197 list_for_each_entry_safe(page, next, &hugepage_freelists[i], lru) { 198 if (PageHighMem(page)) 199 continue; 200 list_del(&page->lru); 201 update_and_free_page(page); 202 nid = page_zone(page)->zone_pgdat->node_id; 203 free_huge_pages--; 204 free_huge_pages_node[nid]--; 205 if (count >= nr_huge_pages) 206 return; 207 } 208 } 209 } 210 #else 211 static inline void try_to_free_low(unsigned long count) 212 { 213 } 214 #endif 215 216 static unsigned long set_max_huge_pages(unsigned long count) 217 { 218 while (count > nr_huge_pages) { 219 if (!alloc_fresh_huge_page()) 220 return nr_huge_pages; 221 } 222 if (count >= nr_huge_pages) 223 return nr_huge_pages; 224 225 spin_lock(&hugetlb_lock); 226 count = max(count, resv_huge_pages); 227 try_to_free_low(count); 228 while (count < nr_huge_pages) { 229 struct page *page = dequeue_huge_page(NULL, 0); 230 if (!page) 231 break; 232 update_and_free_page(page); 233 } 234 spin_unlock(&hugetlb_lock); 235 return nr_huge_pages; 236 } 237 238 int hugetlb_sysctl_handler(struct ctl_table *table, int write, 239 struct file *file, void __user *buffer, 240 size_t *length, loff_t *ppos) 241 { 242 proc_doulongvec_minmax(table, write, file, buffer, length, ppos); 243 max_huge_pages = set_max_huge_pages(max_huge_pages); 244 return 0; 245 } 246 #endif /* CONFIG_SYSCTL */ 247 248 int hugetlb_report_meminfo(char *buf) 249 { 250 return sprintf(buf, 251 "HugePages_Total: %5lu\n" 252 "HugePages_Free: %5lu\n" 253 "HugePages_Rsvd: %5lu\n" 254 "Hugepagesize: %5lu kB\n", 255 nr_huge_pages, 256 free_huge_pages, 257 resv_huge_pages, 258 HPAGE_SIZE/1024); 259 } 260 261 int hugetlb_report_node_meminfo(int nid, char *buf) 262 { 263 return sprintf(buf, 264 "Node %d HugePages_Total: %5u\n" 265 "Node %d HugePages_Free: %5u\n", 266 nid, nr_huge_pages_node[nid], 267 nid, free_huge_pages_node[nid]); 268 } 269 270 /* Return the number pages of memory we physically have, in PAGE_SIZE units. */ 271 unsigned long hugetlb_total_pages(void) 272 { 273 return nr_huge_pages * (HPAGE_SIZE / PAGE_SIZE); 274 } 275 276 /* 277 * We cannot handle pagefaults against hugetlb pages at all. They cause 278 * handle_mm_fault() to try to instantiate regular-sized pages in the 279 * hugegpage VMA. do_page_fault() is supposed to trap this, so BUG is we get 280 * this far. 281 */ 282 static struct page *hugetlb_nopage(struct vm_area_struct *vma, 283 unsigned long address, int *unused) 284 { 285 BUG(); 286 return NULL; 287 } 288 289 struct vm_operations_struct hugetlb_vm_ops = { 290 .nopage = hugetlb_nopage, 291 }; 292 293 static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page, 294 int writable) 295 { 296 pte_t entry; 297 298 if (writable) { 299 entry = 300 pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); 301 } else { 302 entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot)); 303 } 304 entry = pte_mkyoung(entry); 305 entry = pte_mkhuge(entry); 306 307 return entry; 308 } 309 310 static void set_huge_ptep_writable(struct vm_area_struct *vma, 311 unsigned long address, pte_t *ptep) 312 { 313 pte_t entry; 314 315 entry = pte_mkwrite(pte_mkdirty(*ptep)); 316 ptep_set_access_flags(vma, address, ptep, entry, 1); 317 update_mmu_cache(vma, address, entry); 318 lazy_mmu_prot_update(entry); 319 } 320 321 322 int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, 323 struct vm_area_struct *vma) 324 { 325 pte_t *src_pte, *dst_pte, entry; 326 struct page *ptepage; 327 unsigned long addr; 328 int cow; 329 330 cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; 331 332 for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) { 333 src_pte = huge_pte_offset(src, addr); 334 if (!src_pte) 335 continue; 336 dst_pte = huge_pte_alloc(dst, addr); 337 if (!dst_pte) 338 goto nomem; 339 spin_lock(&dst->page_table_lock); 340 spin_lock(&src->page_table_lock); 341 if (!pte_none(*src_pte)) { 342 if (cow) 343 ptep_set_wrprotect(src, addr, src_pte); 344 entry = *src_pte; 345 ptepage = pte_page(entry); 346 get_page(ptepage); 347 add_mm_counter(dst, file_rss, HPAGE_SIZE / PAGE_SIZE); 348 set_huge_pte_at(dst, addr, dst_pte, entry); 349 } 350 spin_unlock(&src->page_table_lock); 351 spin_unlock(&dst->page_table_lock); 352 } 353 return 0; 354 355 nomem: 356 return -ENOMEM; 357 } 358 359 void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, 360 unsigned long end) 361 { 362 struct mm_struct *mm = vma->vm_mm; 363 unsigned long address; 364 pte_t *ptep; 365 pte_t pte; 366 struct page *page; 367 368 WARN_ON(!is_vm_hugetlb_page(vma)); 369 BUG_ON(start & ~HPAGE_MASK); 370 BUG_ON(end & ~HPAGE_MASK); 371 372 spin_lock(&mm->page_table_lock); 373 374 /* Update high watermark before we lower rss */ 375 update_hiwater_rss(mm); 376 377 for (address = start; address < end; address += HPAGE_SIZE) { 378 ptep = huge_pte_offset(mm, address); 379 if (!ptep) 380 continue; 381 382 pte = huge_ptep_get_and_clear(mm, address, ptep); 383 if (pte_none(pte)) 384 continue; 385 386 page = pte_page(pte); 387 put_page(page); 388 add_mm_counter(mm, file_rss, (int) -(HPAGE_SIZE / PAGE_SIZE)); 389 } 390 391 spin_unlock(&mm->page_table_lock); 392 flush_tlb_range(vma, start, end); 393 } 394 395 static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, 396 unsigned long address, pte_t *ptep, pte_t pte) 397 { 398 struct page *old_page, *new_page; 399 int avoidcopy; 400 401 old_page = pte_page(pte); 402 403 /* If no-one else is actually using this page, avoid the copy 404 * and just make the page writable */ 405 avoidcopy = (page_count(old_page) == 1); 406 if (avoidcopy) { 407 set_huge_ptep_writable(vma, address, ptep); 408 return VM_FAULT_MINOR; 409 } 410 411 page_cache_get(old_page); 412 new_page = alloc_huge_page(vma, address); 413 414 if (!new_page) { 415 page_cache_release(old_page); 416 return VM_FAULT_OOM; 417 } 418 419 spin_unlock(&mm->page_table_lock); 420 copy_huge_page(new_page, old_page, address); 421 spin_lock(&mm->page_table_lock); 422 423 ptep = huge_pte_offset(mm, address & HPAGE_MASK); 424 if (likely(pte_same(*ptep, pte))) { 425 /* Break COW */ 426 set_huge_pte_at(mm, address, ptep, 427 make_huge_pte(vma, new_page, 1)); 428 /* Make the old page be freed below */ 429 new_page = old_page; 430 } 431 page_cache_release(new_page); 432 page_cache_release(old_page); 433 return VM_FAULT_MINOR; 434 } 435 436 int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma, 437 unsigned long address, pte_t *ptep, int write_access) 438 { 439 int ret = VM_FAULT_SIGBUS; 440 unsigned long idx; 441 unsigned long size; 442 struct page *page; 443 struct address_space *mapping; 444 pte_t new_pte; 445 446 mapping = vma->vm_file->f_mapping; 447 idx = ((address - vma->vm_start) >> HPAGE_SHIFT) 448 + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT)); 449 450 /* 451 * Use page lock to guard against racing truncation 452 * before we get page_table_lock. 453 */ 454 retry: 455 page = find_lock_page(mapping, idx); 456 if (!page) { 457 if (hugetlb_get_quota(mapping)) 458 goto out; 459 page = alloc_huge_page(vma, address); 460 if (!page) { 461 hugetlb_put_quota(mapping); 462 ret = VM_FAULT_OOM; 463 goto out; 464 } 465 clear_huge_page(page, address); 466 467 if (vma->vm_flags & VM_SHARED) { 468 int err; 469 470 err = add_to_page_cache(page, mapping, idx, GFP_KERNEL); 471 if (err) { 472 put_page(page); 473 hugetlb_put_quota(mapping); 474 if (err == -EEXIST) 475 goto retry; 476 goto out; 477 } 478 } else 479 lock_page(page); 480 } 481 482 spin_lock(&mm->page_table_lock); 483 size = i_size_read(mapping->host) >> HPAGE_SHIFT; 484 if (idx >= size) 485 goto backout; 486 487 ret = VM_FAULT_MINOR; 488 if (!pte_none(*ptep)) 489 goto backout; 490 491 add_mm_counter(mm, file_rss, HPAGE_SIZE / PAGE_SIZE); 492 new_pte = make_huge_pte(vma, page, ((vma->vm_flags & VM_WRITE) 493 && (vma->vm_flags & VM_SHARED))); 494 set_huge_pte_at(mm, address, ptep, new_pte); 495 496 if (write_access && !(vma->vm_flags & VM_SHARED)) { 497 /* Optimization, do the COW without a second fault */ 498 ret = hugetlb_cow(mm, vma, address, ptep, new_pte); 499 } 500 501 spin_unlock(&mm->page_table_lock); 502 unlock_page(page); 503 out: 504 return ret; 505 506 backout: 507 spin_unlock(&mm->page_table_lock); 508 hugetlb_put_quota(mapping); 509 unlock_page(page); 510 put_page(page); 511 goto out; 512 } 513 514 int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, 515 unsigned long address, int write_access) 516 { 517 pte_t *ptep; 518 pte_t entry; 519 int ret; 520 static DEFINE_MUTEX(hugetlb_instantiation_mutex); 521 522 ptep = huge_pte_alloc(mm, address); 523 if (!ptep) 524 return VM_FAULT_OOM; 525 526 /* 527 * Serialize hugepage allocation and instantiation, so that we don't 528 * get spurious allocation failures if two CPUs race to instantiate 529 * the same page in the page cache. 530 */ 531 mutex_lock(&hugetlb_instantiation_mutex); 532 entry = *ptep; 533 if (pte_none(entry)) { 534 ret = hugetlb_no_page(mm, vma, address, ptep, write_access); 535 mutex_unlock(&hugetlb_instantiation_mutex); 536 return ret; 537 } 538 539 ret = VM_FAULT_MINOR; 540 541 spin_lock(&mm->page_table_lock); 542 /* Check for a racing update before calling hugetlb_cow */ 543 if (likely(pte_same(entry, *ptep))) 544 if (write_access && !pte_write(entry)) 545 ret = hugetlb_cow(mm, vma, address, ptep, entry); 546 spin_unlock(&mm->page_table_lock); 547 mutex_unlock(&hugetlb_instantiation_mutex); 548 549 return ret; 550 } 551 552 int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, 553 struct page **pages, struct vm_area_struct **vmas, 554 unsigned long *position, int *length, int i) 555 { 556 unsigned long pfn_offset; 557 unsigned long vaddr = *position; 558 int remainder = *length; 559 560 spin_lock(&mm->page_table_lock); 561 while (vaddr < vma->vm_end && remainder) { 562 pte_t *pte; 563 struct page *page; 564 565 /* 566 * Some archs (sparc64, sh*) have multiple pte_ts to 567 * each hugepage. We have to make * sure we get the 568 * first, for the page indexing below to work. 569 */ 570 pte = huge_pte_offset(mm, vaddr & HPAGE_MASK); 571 572 if (!pte || pte_none(*pte)) { 573 int ret; 574 575 spin_unlock(&mm->page_table_lock); 576 ret = hugetlb_fault(mm, vma, vaddr, 0); 577 spin_lock(&mm->page_table_lock); 578 if (ret == VM_FAULT_MINOR) 579 continue; 580 581 remainder = 0; 582 if (!i) 583 i = -EFAULT; 584 break; 585 } 586 587 pfn_offset = (vaddr & ~HPAGE_MASK) >> PAGE_SHIFT; 588 page = pte_page(*pte); 589 same_page: 590 if (pages) { 591 get_page(page); 592 pages[i] = page + pfn_offset; 593 } 594 595 if (vmas) 596 vmas[i] = vma; 597 598 vaddr += PAGE_SIZE; 599 ++pfn_offset; 600 --remainder; 601 ++i; 602 if (vaddr < vma->vm_end && remainder && 603 pfn_offset < HPAGE_SIZE/PAGE_SIZE) { 604 /* 605 * We use pfn_offset to avoid touching the pageframes 606 * of this compound page. 607 */ 608 goto same_page; 609 } 610 } 611 spin_unlock(&mm->page_table_lock); 612 *length = remainder; 613 *position = vaddr; 614 615 return i; 616 } 617 618 void hugetlb_change_protection(struct vm_area_struct *vma, 619 unsigned long address, unsigned long end, pgprot_t newprot) 620 { 621 struct mm_struct *mm = vma->vm_mm; 622 unsigned long start = address; 623 pte_t *ptep; 624 pte_t pte; 625 626 BUG_ON(address >= end); 627 flush_cache_range(vma, address, end); 628 629 spin_lock(&mm->page_table_lock); 630 for (; address < end; address += HPAGE_SIZE) { 631 ptep = huge_pte_offset(mm, address); 632 if (!ptep) 633 continue; 634 if (!pte_none(*ptep)) { 635 pte = huge_ptep_get_and_clear(mm, address, ptep); 636 pte = pte_mkhuge(pte_modify(pte, newprot)); 637 set_huge_pte_at(mm, address, ptep, pte); 638 lazy_mmu_prot_update(pte); 639 } 640 } 641 spin_unlock(&mm->page_table_lock); 642 643 flush_tlb_range(vma, start, end); 644 } 645 646 struct file_region { 647 struct list_head link; 648 long from; 649 long to; 650 }; 651 652 static long region_add(struct list_head *head, long f, long t) 653 { 654 struct file_region *rg, *nrg, *trg; 655 656 /* Locate the region we are either in or before. */ 657 list_for_each_entry(rg, head, link) 658 if (f <= rg->to) 659 break; 660 661 /* Round our left edge to the current segment if it encloses us. */ 662 if (f > rg->from) 663 f = rg->from; 664 665 /* Check for and consume any regions we now overlap with. */ 666 nrg = rg; 667 list_for_each_entry_safe(rg, trg, rg->link.prev, link) { 668 if (&rg->link == head) 669 break; 670 if (rg->from > t) 671 break; 672 673 /* If this area reaches higher then extend our area to 674 * include it completely. If this is not the first area 675 * which we intend to reuse, free it. */ 676 if (rg->to > t) 677 t = rg->to; 678 if (rg != nrg) { 679 list_del(&rg->link); 680 kfree(rg); 681 } 682 } 683 nrg->from = f; 684 nrg->to = t; 685 return 0; 686 } 687 688 static long region_chg(struct list_head *head, long f, long t) 689 { 690 struct file_region *rg, *nrg; 691 long chg = 0; 692 693 /* Locate the region we are before or in. */ 694 list_for_each_entry(rg, head, link) 695 if (f <= rg->to) 696 break; 697 698 /* If we are below the current region then a new region is required. 699 * Subtle, allocate a new region at the position but make it zero 700 * size such that we can guarentee to record the reservation. */ 701 if (&rg->link == head || t < rg->from) { 702 nrg = kmalloc(sizeof(*nrg), GFP_KERNEL); 703 if (nrg == 0) 704 return -ENOMEM; 705 nrg->from = f; 706 nrg->to = f; 707 INIT_LIST_HEAD(&nrg->link); 708 list_add(&nrg->link, rg->link.prev); 709 710 return t - f; 711 } 712 713 /* Round our left edge to the current segment if it encloses us. */ 714 if (f > rg->from) 715 f = rg->from; 716 chg = t - f; 717 718 /* Check for and consume any regions we now overlap with. */ 719 list_for_each_entry(rg, rg->link.prev, link) { 720 if (&rg->link == head) 721 break; 722 if (rg->from > t) 723 return chg; 724 725 /* We overlap with this area, if it extends futher than 726 * us then we must extend ourselves. Account for its 727 * existing reservation. */ 728 if (rg->to > t) { 729 chg += rg->to - t; 730 t = rg->to; 731 } 732 chg -= rg->to - rg->from; 733 } 734 return chg; 735 } 736 737 static long region_truncate(struct list_head *head, long end) 738 { 739 struct file_region *rg, *trg; 740 long chg = 0; 741 742 /* Locate the region we are either in or before. */ 743 list_for_each_entry(rg, head, link) 744 if (end <= rg->to) 745 break; 746 if (&rg->link == head) 747 return 0; 748 749 /* If we are in the middle of a region then adjust it. */ 750 if (end > rg->from) { 751 chg = rg->to - end; 752 rg->to = end; 753 rg = list_entry(rg->link.next, typeof(*rg), link); 754 } 755 756 /* Drop any remaining regions. */ 757 list_for_each_entry_safe(rg, trg, rg->link.prev, link) { 758 if (&rg->link == head) 759 break; 760 chg += rg->to - rg->from; 761 list_del(&rg->link); 762 kfree(rg); 763 } 764 return chg; 765 } 766 767 static int hugetlb_acct_memory(long delta) 768 { 769 int ret = -ENOMEM; 770 771 spin_lock(&hugetlb_lock); 772 if ((delta + resv_huge_pages) <= free_huge_pages) { 773 resv_huge_pages += delta; 774 ret = 0; 775 } 776 spin_unlock(&hugetlb_lock); 777 return ret; 778 } 779 780 int hugetlb_reserve_pages(struct inode *inode, long from, long to) 781 { 782 long ret, chg; 783 784 chg = region_chg(&inode->i_mapping->private_list, from, to); 785 if (chg < 0) 786 return chg; 787 ret = hugetlb_acct_memory(chg); 788 if (ret < 0) 789 return ret; 790 region_add(&inode->i_mapping->private_list, from, to); 791 return 0; 792 } 793 794 void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed) 795 { 796 long chg = region_truncate(&inode->i_mapping->private_list, offset); 797 hugetlb_acct_memory(freed - chg); 798 } 799