vma.c (6898c9039bc8e3027ae0fcd0f05fc2b82ccc8be0) | vma.c (dba14840905f9ecaad0b3a261e4d7a88120c8c7a) |
---|---|
1// SPDX-License-Identifier: GPL-2.0-or-later 2 3/* 4 * VMA-specific functions. 5 */ 6 7#include "vma_internal.h" 8#include "vma.h" --- 67 unchanged lines hidden (view full) --- 76 77 vp->file = vma->vm_file; 78 if (vp->file) 79 vp->mapping = vma->vm_file->f_mapping; 80 81} 82 83/* | 1// SPDX-License-Identifier: GPL-2.0-or-later 2 3/* 4 * VMA-specific functions. 5 */ 6 7#include "vma_internal.h" 8#include "vma.h" --- 67 unchanged lines hidden (view full) --- 76 77 vp->file = vma->vm_file; 78 if (vp->file) 79 vp->mapping = vma->vm_file->f_mapping; 80 81} 82 83/* |
84 * init_vma_munmap() - Initializer wrapper for vma_munmap_struct 85 * @vms: The vma munmap struct 86 * @vmi: The vma iterator 87 * @vma: The first vm_area_struct to munmap 88 * @start: The aligned start address to munmap 89 * @end: The aligned end address to munmap 90 * @uf: The userfaultfd list_head 91 * @unlock: Unlock after the operation. Only unlocked on success 92 */ 93static inline void init_vma_munmap(struct vma_munmap_struct *vms, 94 struct vma_iterator *vmi, struct vm_area_struct *vma, 95 unsigned long start, unsigned long end, struct list_head *uf, 96 bool unlock) 97{ 98 vms->vmi = vmi; 99 vms->vma = vma; 100 vms->mm = vma->vm_mm; 101 vms->start = start; 102 vms->end = end; 103 vms->unlock = unlock; 104 vms->uf = uf; 105 vms->vma_count = 0; 106 vms->nr_pages = vms->locked_vm = 0; 107} 108 109/* |
|
84 * Return true if we can merge this (vm_flags,anon_vma,file,vm_pgoff) 85 * in front of (at a lower virtual address and file offset than) the vma. 86 * 87 * We cannot merge two vmas if they have differently assigned (non-NULL) 88 * anon_vmas, nor if same anon_vma is assigned but offsets incompatible. 89 * 90 * We don't check here for the merged mmap wrapping around the end of pagecache 91 * indices (16TB on ia32) because do_mmap() does not permit mmap's which --- 588 unchanged lines hidden (view full) --- 680 mas_set(mas_detach, 0); 681 mas_for_each(mas_detach, vma, ULONG_MAX) 682 vma_mark_detached(vma, false); 683 684 __mt_destroy(mas_detach->tree); 685} 686 687/* | 110 * Return true if we can merge this (vm_flags,anon_vma,file,vm_pgoff) 111 * in front of (at a lower virtual address and file offset than) the vma. 112 * 113 * We cannot merge two vmas if they have differently assigned (non-NULL) 114 * anon_vmas, nor if same anon_vma is assigned but offsets incompatible. 115 * 116 * We don't check here for the merged mmap wrapping around the end of pagecache 117 * indices (16TB on ia32) because do_mmap() does not permit mmap's which --- 588 unchanged lines hidden (view full) --- 706 mas_set(mas_detach, 0); 707 mas_for_each(mas_detach, vma, ULONG_MAX) 708 vma_mark_detached(vma, false); 709 710 __mt_destroy(mas_detach->tree); 711} 712 713/* |
688 * vmi_complete_munmap_vmas() - Finish the munmap() operation 689 * @vmi: The vma iterator 690 * @vma: The first vma to be munmapped 691 * @mm: The mm struct 692 * @start: The start address 693 * @end: The end address 694 * @unlock: Unlock the mm or not 695 * @mas_detach: them maple state of the detached vma maple tree 696 * @locked_vm: The locked_vm count in the detached vmas | 714 * vms_complete_munmap_vmas() - Finish the munmap() operation 715 * @vms: The vma munmap struct 716 * @mas_detach: The maple state of the detached vmas |
697 * | 717 * |
698 * This function updates the mm_struct, unmaps the region, frees the resources | 718 * This updates the mm_struct, unmaps the region, frees the resources |
699 * used for the munmap() and may downgrade the lock - if requested. Everything 700 * needed to be done once the vma maple tree is updated. 701 */ | 719 * used for the munmap() and may downgrade the lock - if requested. Everything 720 * needed to be done once the vma maple tree is updated. 721 */ |
702static void 703vmi_complete_munmap_vmas(struct vma_iterator *vmi, struct vm_area_struct *vma, 704 struct mm_struct *mm, unsigned long start, unsigned long end, 705 bool unlock, struct ma_state *mas_detach, 706 unsigned long locked_vm) | 722static void vms_complete_munmap_vmas(struct vma_munmap_struct *vms, 723 struct ma_state *mas_detach) |
707{ 708 struct vm_area_struct *prev, *next; | 724{ 725 struct vm_area_struct *prev, *next; |
709 int count; | 726 struct mm_struct *mm; |
710 | 727 |
711 count = mas_detach->index + 1; 712 mm->map_count -= count; 713 mm->locked_vm -= locked_vm; 714 if (unlock) | 728 mm = vms->mm; 729 mm->map_count -= vms->vma_count; 730 mm->locked_vm -= vms->locked_vm; 731 if (vms->unlock) |
715 mmap_write_downgrade(mm); 716 | 732 mmap_write_downgrade(mm); 733 |
717 prev = vma_iter_prev_range(vmi); 718 next = vma_next(vmi); | 734 prev = vma_iter_prev_range(vms->vmi); 735 next = vma_next(vms->vmi); |
719 if (next) | 736 if (next) |
720 vma_iter_prev_range(vmi); | 737 vma_iter_prev_range(vms->vmi); |
721 722 /* 723 * We can free page tables without write-locking mmap_lock because VMAs 724 * were isolated before we downgraded mmap_lock. 725 */ 726 mas_set(mas_detach, 1); | 738 739 /* 740 * We can free page tables without write-locking mmap_lock because VMAs 741 * were isolated before we downgraded mmap_lock. 742 */ 743 mas_set(mas_detach, 1); |
727 unmap_region(mm, mas_detach, vma, prev, next, start, end, count, 728 !unlock); | 744 unmap_region(mm, mas_detach, vms->vma, prev, next, vms->start, vms->end, 745 vms->vma_count, !vms->unlock); |
729 /* Statistics and freeing VMAs */ 730 mas_set(mas_detach, 0); 731 remove_mt(mm, mas_detach); 732 validate_mm(mm); | 746 /* Statistics and freeing VMAs */ 747 mas_set(mas_detach, 0); 748 remove_mt(mm, mas_detach); 749 validate_mm(mm); |
733 if (unlock) | 750 if (vms->unlock) |
734 mmap_read_unlock(mm); 735 736 __mt_destroy(mas_detach->tree); 737} 738 739/* | 751 mmap_read_unlock(mm); 752 753 __mt_destroy(mas_detach->tree); 754} 755 756/* |
740 * vmi_gather_munmap_vmas() - Put all VMAs within a range into a maple tree | 757 * vms_gather_munmap_vmas() - Put all VMAs within a range into a maple tree |
741 * for removal at a later date. Handles splitting first and last if necessary 742 * and marking the vmas as isolated. 743 * | 758 * for removal at a later date. Handles splitting first and last if necessary 759 * and marking the vmas as isolated. 760 * |
744 * @vmi: The vma iterator 745 * @vma: The starting vm_area_struct 746 * @mm: The mm_struct 747 * @start: The aligned start address to munmap. 748 * @end: The aligned end address to munmap. 749 * @uf: The userfaultfd list_head | 761 * @vms: The vma munmap struct |
750 * @mas_detach: The maple state tracking the detached tree | 762 * @mas_detach: The maple state tracking the detached tree |
751 * @locked_vm: a pointer to store the VM_LOCKED pages count. | |
752 * 753 * Return: 0 on success, -EPERM on mseal vmas, -ENOMEM otherwise 754 */ | 763 * 764 * Return: 0 on success, -EPERM on mseal vmas, -ENOMEM otherwise 765 */ |
755static int 756vmi_gather_munmap_vmas(struct vma_iterator *vmi, struct vm_area_struct *vma, 757 struct mm_struct *mm, unsigned long start, 758 unsigned long end, struct list_head *uf, 759 struct ma_state *mas_detach, unsigned long *locked_vm) | 766static int vms_gather_munmap_vmas(struct vma_munmap_struct *vms, 767 struct ma_state *mas_detach) |
760{ 761 struct vm_area_struct *next = NULL; | 768{ 769 struct vm_area_struct *next = NULL; |
762 int count = 0; | |
763 int error = -ENOMEM; 764 765 /* 766 * If we need to split any vma, do it now to save pain later. 767 * 768 * Note: mremap's move_vma VM_ACCOUNT handling assumes a partially 769 * unmapped vm_area_struct will remain in use: so lower split_vma 770 * places tmp vma above, and higher split_vma places tmp vma below. 771 */ 772 773 /* Does it split the first one? */ | 770 int error = -ENOMEM; 771 772 /* 773 * If we need to split any vma, do it now to save pain later. 774 * 775 * Note: mremap's move_vma VM_ACCOUNT handling assumes a partially 776 * unmapped vm_area_struct will remain in use: so lower split_vma 777 * places tmp vma above, and higher split_vma places tmp vma below. 778 */ 779 780 /* Does it split the first one? */ |
774 if (start > vma->vm_start) { | 781 if (vms->start > vms->vma->vm_start) { |
775 776 /* 777 * Make sure that map_count on return from munmap() will 778 * not exceed its limit; but let map_count go just above 779 * its limit temporarily, to help free resources as expected. 780 */ | 782 783 /* 784 * Make sure that map_count on return from munmap() will 785 * not exceed its limit; but let map_count go just above 786 * its limit temporarily, to help free resources as expected. 787 */ |
781 if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count) | 788 if (vms->end < vms->vma->vm_end && 789 vms->mm->map_count >= sysctl_max_map_count) |
782 goto map_count_exceeded; 783 784 /* Don't bother splitting the VMA if we can't unmap it anyway */ | 790 goto map_count_exceeded; 791 792 /* Don't bother splitting the VMA if we can't unmap it anyway */ |
785 if (!can_modify_vma(vma)) { | 793 if (!can_modify_vma(vms->vma)) { |
786 error = -EPERM; 787 goto start_split_failed; 788 } 789 | 794 error = -EPERM; 795 goto start_split_failed; 796 } 797 |
790 if (__split_vma(vmi, vma, start, 1)) | 798 if (__split_vma(vms->vmi, vms->vma, vms->start, 1)) |
791 goto start_split_failed; 792 } 793 794 /* 795 * Detach a range of VMAs from the mm. Using next as a temp variable as 796 * it is always overwritten. 797 */ | 799 goto start_split_failed; 800 } 801 802 /* 803 * Detach a range of VMAs from the mm. Using next as a temp variable as 804 * it is always overwritten. 805 */ |
798 next = vma; | 806 next = vms->vma; |
799 do { 800 if (!can_modify_vma(next)) { 801 error = -EPERM; 802 goto modify_vma_failed; 803 } 804 805 /* Does it split the end? */ | 807 do { 808 if (!can_modify_vma(next)) { 809 error = -EPERM; 810 goto modify_vma_failed; 811 } 812 813 /* Does it split the end? */ |
806 if (next->vm_end > end) { 807 if (__split_vma(vmi, next, end, 0)) | 814 if (next->vm_end > vms->end) { 815 if (__split_vma(vms->vmi, next, vms->end, 0)) |
808 goto end_split_failed; 809 } 810 vma_start_write(next); | 816 goto end_split_failed; 817 } 818 vma_start_write(next); |
811 mas_set(mas_detach, count++); | 819 mas_set(mas_detach, vms->vma_count++); |
812 if (mas_store_gfp(mas_detach, next, GFP_KERNEL)) 813 goto munmap_gather_failed; 814 815 vma_mark_detached(next, true); 816 if (next->vm_flags & VM_LOCKED) | 820 if (mas_store_gfp(mas_detach, next, GFP_KERNEL)) 821 goto munmap_gather_failed; 822 823 vma_mark_detached(next, true); 824 if (next->vm_flags & VM_LOCKED) |
817 *locked_vm += vma_pages(next); | 825 vms->locked_vm += vma_pages(next); |
818 | 826 |
819 if (unlikely(uf)) { | 827 if (unlikely(vms->uf)) { |
820 /* 821 * If userfaultfd_unmap_prep returns an error the vmas 822 * will remain split, but userland will get a 823 * highly unexpected error anyway. This is no 824 * different than the case where the first of the two 825 * __split_vma fails, but we don't undo the first 826 * split, despite we could. This is unlikely enough 827 * failure that it's not worth optimizing it for. 828 */ | 828 /* 829 * If userfaultfd_unmap_prep returns an error the vmas 830 * will remain split, but userland will get a 831 * highly unexpected error anyway. This is no 832 * different than the case where the first of the two 833 * __split_vma fails, but we don't undo the first 834 * split, despite we could. This is unlikely enough 835 * failure that it's not worth optimizing it for. 836 */ |
829 if (userfaultfd_unmap_prep(next, start, end, uf)) | 837 if (userfaultfd_unmap_prep(next, vms->start, vms->end, 838 vms->uf)) |
830 goto userfaultfd_error; 831 } 832#ifdef CONFIG_DEBUG_VM_MAPLE_TREE | 839 goto userfaultfd_error; 840 } 841#ifdef CONFIG_DEBUG_VM_MAPLE_TREE |
833 BUG_ON(next->vm_start < start); 834 BUG_ON(next->vm_start > end); | 842 BUG_ON(next->vm_start < vms->start); 843 BUG_ON(next->vm_start > vms->end); |
835#endif | 844#endif |
836 } for_each_vma_range(*vmi, next, end); | 845 } for_each_vma_range(*(vms->vmi), next, vms->end); |
837 838#if defined(CONFIG_DEBUG_VM_MAPLE_TREE) 839 /* Make sure no VMAs are about to be lost. */ 840 { 841 MA_STATE(test, mas_detach->tree, 0, 0); 842 struct vm_area_struct *vma_mas, *vma_test; 843 int test_count = 0; 844 | 846 847#if defined(CONFIG_DEBUG_VM_MAPLE_TREE) 848 /* Make sure no VMAs are about to be lost. */ 849 { 850 MA_STATE(test, mas_detach->tree, 0, 0); 851 struct vm_area_struct *vma_mas, *vma_test; 852 int test_count = 0; 853 |
845 vma_iter_set(vmi, start); | 854 vma_iter_set(vms->vmi, vms->start); |
846 rcu_read_lock(); | 855 rcu_read_lock(); |
847 vma_test = mas_find(&test, count - 1); 848 for_each_vma_range(*vmi, vma_mas, end) { | 856 vma_test = mas_find(&test, vms->vma_count - 1); 857 for_each_vma_range(*(vms->vmi), vma_mas, vms->end) { |
849 BUG_ON(vma_mas != vma_test); 850 test_count++; | 858 BUG_ON(vma_mas != vma_test); 859 test_count++; |
851 vma_test = mas_next(&test, count - 1); | 860 vma_test = mas_next(&test, vms->vma_count - 1); |
852 } 853 rcu_read_unlock(); | 861 } 862 rcu_read_unlock(); |
854 BUG_ON(count != test_count); | 863 BUG_ON(vms->vma_count != test_count); |
855 } 856#endif 857 | 864 } 865#endif 866 |
858 while (vma_iter_addr(vmi) > start) 859 vma_iter_prev_range(vmi); | 867 while (vma_iter_addr(vms->vmi) > vms->start) 868 vma_iter_prev_range(vms->vmi); |
860 861 return 0; 862 863userfaultfd_error: 864munmap_gather_failed: 865end_split_failed: 866modify_vma_failed: 867 abort_munmap_vmas(mas_detach); --- 19 unchanged lines hidden (view full) --- 887int do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, 888 struct mm_struct *mm, unsigned long start, unsigned long end, 889 struct list_head *uf, bool unlock) 890{ 891 struct maple_tree mt_detach; 892 MA_STATE(mas_detach, &mt_detach, 0, 0); 893 mt_init_flags(&mt_detach, vmi->mas.tree->ma_flags & MT_FLAGS_LOCK_MASK); 894 mt_on_stack(mt_detach); | 869 870 return 0; 871 872userfaultfd_error: 873munmap_gather_failed: 874end_split_failed: 875modify_vma_failed: 876 abort_munmap_vmas(mas_detach); --- 19 unchanged lines hidden (view full) --- 896int do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, 897 struct mm_struct *mm, unsigned long start, unsigned long end, 898 struct list_head *uf, bool unlock) 899{ 900 struct maple_tree mt_detach; 901 MA_STATE(mas_detach, &mt_detach, 0, 0); 902 mt_init_flags(&mt_detach, vmi->mas.tree->ma_flags & MT_FLAGS_LOCK_MASK); 903 mt_on_stack(mt_detach); |
904 struct vma_munmap_struct vms; |
|
895 int error; | 905 int error; |
896 unsigned long locked_vm = 0; | |
897 | 906 |
898 error = vmi_gather_munmap_vmas(vmi, vma, mm, start, end, uf, 899 &mas_detach, &locked_vm); | 907 init_vma_munmap(&vms, vmi, vma, start, end, uf, unlock); 908 error = vms_gather_munmap_vmas(&vms, &mas_detach); |
900 if (error) 901 goto gather_failed; 902 903 error = vma_iter_clear_gfp(vmi, start, end, GFP_KERNEL); 904 if (error) 905 goto clear_tree_failed; 906 907 /* Point of no return */ | 909 if (error) 910 goto gather_failed; 911 912 error = vma_iter_clear_gfp(vmi, start, end, GFP_KERNEL); 913 if (error) 914 goto clear_tree_failed; 915 916 /* Point of no return */ |
908 vmi_complete_munmap_vmas(vmi, vma, mm, start, end, unlock, &mas_detach, 909 locked_vm); | 917 vms_complete_munmap_vmas(&vms, &mas_detach); |
910 return 0; 911 912clear_tree_failed: 913 abort_munmap_vmas(&mas_detach); 914gather_failed: 915 validate_mm(mm); 916 return error; 917} --- 924 unchanged lines hidden --- | 918 return 0; 919 920clear_tree_failed: 921 abort_munmap_vmas(&mas_detach); 922gather_failed: 923 validate_mm(mm); 924 return error; 925} --- 924 unchanged lines hidden --- |