vma.c (6898c9039bc8e3027ae0fcd0f05fc2b82ccc8be0) vma.c (dba14840905f9ecaad0b3a261e4d7a88120c8c7a)
1// SPDX-License-Identifier: GPL-2.0-or-later
2
3/*
4 * VMA-specific functions.
5 */
6
7#include "vma_internal.h"
8#include "vma.h"

--- 67 unchanged lines hidden (view full) ---

76
77 vp->file = vma->vm_file;
78 if (vp->file)
79 vp->mapping = vma->vm_file->f_mapping;
80
81}
82
83/*
1// SPDX-License-Identifier: GPL-2.0-or-later
2
3/*
4 * VMA-specific functions.
5 */
6
7#include "vma_internal.h"
8#include "vma.h"

--- 67 unchanged lines hidden (view full) ---

76
77 vp->file = vma->vm_file;
78 if (vp->file)
79 vp->mapping = vma->vm_file->f_mapping;
80
81}
82
83/*
84 * init_vma_munmap() - Initializer wrapper for vma_munmap_struct
85 * @vms: The vma munmap struct
86 * @vmi: The vma iterator
87 * @vma: The first vm_area_struct to munmap
88 * @start: The aligned start address to munmap
89 * @end: The aligned end address to munmap
90 * @uf: The userfaultfd list_head
91 * @unlock: Unlock after the operation. Only unlocked on success
92 */
93static inline void init_vma_munmap(struct vma_munmap_struct *vms,
94 struct vma_iterator *vmi, struct vm_area_struct *vma,
95 unsigned long start, unsigned long end, struct list_head *uf,
96 bool unlock)
97{
98 vms->vmi = vmi;
99 vms->vma = vma;
100 vms->mm = vma->vm_mm;
101 vms->start = start;
102 vms->end = end;
103 vms->unlock = unlock;
104 vms->uf = uf;
105 vms->vma_count = 0;
106 vms->nr_pages = vms->locked_vm = 0;
107}
108
109/*
84 * Return true if we can merge this (vm_flags,anon_vma,file,vm_pgoff)
85 * in front of (at a lower virtual address and file offset than) the vma.
86 *
87 * We cannot merge two vmas if they have differently assigned (non-NULL)
88 * anon_vmas, nor if same anon_vma is assigned but offsets incompatible.
89 *
90 * We don't check here for the merged mmap wrapping around the end of pagecache
91 * indices (16TB on ia32) because do_mmap() does not permit mmap's which

--- 588 unchanged lines hidden (view full) ---

680 mas_set(mas_detach, 0);
681 mas_for_each(mas_detach, vma, ULONG_MAX)
682 vma_mark_detached(vma, false);
683
684 __mt_destroy(mas_detach->tree);
685}
686
687/*
110 * Return true if we can merge this (vm_flags,anon_vma,file,vm_pgoff)
111 * in front of (at a lower virtual address and file offset than) the vma.
112 *
113 * We cannot merge two vmas if they have differently assigned (non-NULL)
114 * anon_vmas, nor if same anon_vma is assigned but offsets incompatible.
115 *
116 * We don't check here for the merged mmap wrapping around the end of pagecache
117 * indices (16TB on ia32) because do_mmap() does not permit mmap's which

--- 588 unchanged lines hidden (view full) ---

706 mas_set(mas_detach, 0);
707 mas_for_each(mas_detach, vma, ULONG_MAX)
708 vma_mark_detached(vma, false);
709
710 __mt_destroy(mas_detach->tree);
711}
712
713/*
688 * vmi_complete_munmap_vmas() - Finish the munmap() operation
689 * @vmi: The vma iterator
690 * @vma: The first vma to be munmapped
691 * @mm: The mm struct
692 * @start: The start address
693 * @end: The end address
694 * @unlock: Unlock the mm or not
695 * @mas_detach: them maple state of the detached vma maple tree
696 * @locked_vm: The locked_vm count in the detached vmas
714 * vms_complete_munmap_vmas() - Finish the munmap() operation
715 * @vms: The vma munmap struct
716 * @mas_detach: The maple state of the detached vmas
697 *
717 *
698 * This function updates the mm_struct, unmaps the region, frees the resources
718 * This updates the mm_struct, unmaps the region, frees the resources
699 * used for the munmap() and may downgrade the lock - if requested. Everything
700 * needed to be done once the vma maple tree is updated.
701 */
719 * used for the munmap() and may downgrade the lock - if requested. Everything
720 * needed to be done once the vma maple tree is updated.
721 */
702static void
703vmi_complete_munmap_vmas(struct vma_iterator *vmi, struct vm_area_struct *vma,
704 struct mm_struct *mm, unsigned long start, unsigned long end,
705 bool unlock, struct ma_state *mas_detach,
706 unsigned long locked_vm)
722static void vms_complete_munmap_vmas(struct vma_munmap_struct *vms,
723 struct ma_state *mas_detach)
707{
708 struct vm_area_struct *prev, *next;
724{
725 struct vm_area_struct *prev, *next;
709 int count;
726 struct mm_struct *mm;
710
727
711 count = mas_detach->index + 1;
712 mm->map_count -= count;
713 mm->locked_vm -= locked_vm;
714 if (unlock)
728 mm = vms->mm;
729 mm->map_count -= vms->vma_count;
730 mm->locked_vm -= vms->locked_vm;
731 if (vms->unlock)
715 mmap_write_downgrade(mm);
716
732 mmap_write_downgrade(mm);
733
717 prev = vma_iter_prev_range(vmi);
718 next = vma_next(vmi);
734 prev = vma_iter_prev_range(vms->vmi);
735 next = vma_next(vms->vmi);
719 if (next)
736 if (next)
720 vma_iter_prev_range(vmi);
737 vma_iter_prev_range(vms->vmi);
721
722 /*
723 * We can free page tables without write-locking mmap_lock because VMAs
724 * were isolated before we downgraded mmap_lock.
725 */
726 mas_set(mas_detach, 1);
738
739 /*
740 * We can free page tables without write-locking mmap_lock because VMAs
741 * were isolated before we downgraded mmap_lock.
742 */
743 mas_set(mas_detach, 1);
727 unmap_region(mm, mas_detach, vma, prev, next, start, end, count,
728 !unlock);
744 unmap_region(mm, mas_detach, vms->vma, prev, next, vms->start, vms->end,
745 vms->vma_count, !vms->unlock);
729 /* Statistics and freeing VMAs */
730 mas_set(mas_detach, 0);
731 remove_mt(mm, mas_detach);
732 validate_mm(mm);
746 /* Statistics and freeing VMAs */
747 mas_set(mas_detach, 0);
748 remove_mt(mm, mas_detach);
749 validate_mm(mm);
733 if (unlock)
750 if (vms->unlock)
734 mmap_read_unlock(mm);
735
736 __mt_destroy(mas_detach->tree);
737}
738
739/*
751 mmap_read_unlock(mm);
752
753 __mt_destroy(mas_detach->tree);
754}
755
756/*
740 * vmi_gather_munmap_vmas() - Put all VMAs within a range into a maple tree
757 * vms_gather_munmap_vmas() - Put all VMAs within a range into a maple tree
741 * for removal at a later date. Handles splitting first and last if necessary
742 * and marking the vmas as isolated.
743 *
758 * for removal at a later date. Handles splitting first and last if necessary
759 * and marking the vmas as isolated.
760 *
744 * @vmi: The vma iterator
745 * @vma: The starting vm_area_struct
746 * @mm: The mm_struct
747 * @start: The aligned start address to munmap.
748 * @end: The aligned end address to munmap.
749 * @uf: The userfaultfd list_head
761 * @vms: The vma munmap struct
750 * @mas_detach: The maple state tracking the detached tree
762 * @mas_detach: The maple state tracking the detached tree
751 * @locked_vm: a pointer to store the VM_LOCKED pages count.
752 *
753 * Return: 0 on success, -EPERM on mseal vmas, -ENOMEM otherwise
754 */
763 *
764 * Return: 0 on success, -EPERM on mseal vmas, -ENOMEM otherwise
765 */
755static int
756vmi_gather_munmap_vmas(struct vma_iterator *vmi, struct vm_area_struct *vma,
757 struct mm_struct *mm, unsigned long start,
758 unsigned long end, struct list_head *uf,
759 struct ma_state *mas_detach, unsigned long *locked_vm)
766static int vms_gather_munmap_vmas(struct vma_munmap_struct *vms,
767 struct ma_state *mas_detach)
760{
761 struct vm_area_struct *next = NULL;
768{
769 struct vm_area_struct *next = NULL;
762 int count = 0;
763 int error = -ENOMEM;
764
765 /*
766 * If we need to split any vma, do it now to save pain later.
767 *
768 * Note: mremap's move_vma VM_ACCOUNT handling assumes a partially
769 * unmapped vm_area_struct will remain in use: so lower split_vma
770 * places tmp vma above, and higher split_vma places tmp vma below.
771 */
772
773 /* Does it split the first one? */
770 int error = -ENOMEM;
771
772 /*
773 * If we need to split any vma, do it now to save pain later.
774 *
775 * Note: mremap's move_vma VM_ACCOUNT handling assumes a partially
776 * unmapped vm_area_struct will remain in use: so lower split_vma
777 * places tmp vma above, and higher split_vma places tmp vma below.
778 */
779
780 /* Does it split the first one? */
774 if (start > vma->vm_start) {
781 if (vms->start > vms->vma->vm_start) {
775
776 /*
777 * Make sure that map_count on return from munmap() will
778 * not exceed its limit; but let map_count go just above
779 * its limit temporarily, to help free resources as expected.
780 */
782
783 /*
784 * Make sure that map_count on return from munmap() will
785 * not exceed its limit; but let map_count go just above
786 * its limit temporarily, to help free resources as expected.
787 */
781 if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count)
788 if (vms->end < vms->vma->vm_end &&
789 vms->mm->map_count >= sysctl_max_map_count)
782 goto map_count_exceeded;
783
784 /* Don't bother splitting the VMA if we can't unmap it anyway */
790 goto map_count_exceeded;
791
792 /* Don't bother splitting the VMA if we can't unmap it anyway */
785 if (!can_modify_vma(vma)) {
793 if (!can_modify_vma(vms->vma)) {
786 error = -EPERM;
787 goto start_split_failed;
788 }
789
794 error = -EPERM;
795 goto start_split_failed;
796 }
797
790 if (__split_vma(vmi, vma, start, 1))
798 if (__split_vma(vms->vmi, vms->vma, vms->start, 1))
791 goto start_split_failed;
792 }
793
794 /*
795 * Detach a range of VMAs from the mm. Using next as a temp variable as
796 * it is always overwritten.
797 */
799 goto start_split_failed;
800 }
801
802 /*
803 * Detach a range of VMAs from the mm. Using next as a temp variable as
804 * it is always overwritten.
805 */
798 next = vma;
806 next = vms->vma;
799 do {
800 if (!can_modify_vma(next)) {
801 error = -EPERM;
802 goto modify_vma_failed;
803 }
804
805 /* Does it split the end? */
807 do {
808 if (!can_modify_vma(next)) {
809 error = -EPERM;
810 goto modify_vma_failed;
811 }
812
813 /* Does it split the end? */
806 if (next->vm_end > end) {
807 if (__split_vma(vmi, next, end, 0))
814 if (next->vm_end > vms->end) {
815 if (__split_vma(vms->vmi, next, vms->end, 0))
808 goto end_split_failed;
809 }
810 vma_start_write(next);
816 goto end_split_failed;
817 }
818 vma_start_write(next);
811 mas_set(mas_detach, count++);
819 mas_set(mas_detach, vms->vma_count++);
812 if (mas_store_gfp(mas_detach, next, GFP_KERNEL))
813 goto munmap_gather_failed;
814
815 vma_mark_detached(next, true);
816 if (next->vm_flags & VM_LOCKED)
820 if (mas_store_gfp(mas_detach, next, GFP_KERNEL))
821 goto munmap_gather_failed;
822
823 vma_mark_detached(next, true);
824 if (next->vm_flags & VM_LOCKED)
817 *locked_vm += vma_pages(next);
825 vms->locked_vm += vma_pages(next);
818
826
819 if (unlikely(uf)) {
827 if (unlikely(vms->uf)) {
820 /*
821 * If userfaultfd_unmap_prep returns an error the vmas
822 * will remain split, but userland will get a
823 * highly unexpected error anyway. This is no
824 * different than the case where the first of the two
825 * __split_vma fails, but we don't undo the first
826 * split, despite we could. This is unlikely enough
827 * failure that it's not worth optimizing it for.
828 */
828 /*
829 * If userfaultfd_unmap_prep returns an error the vmas
830 * will remain split, but userland will get a
831 * highly unexpected error anyway. This is no
832 * different than the case where the first of the two
833 * __split_vma fails, but we don't undo the first
834 * split, despite we could. This is unlikely enough
835 * failure that it's not worth optimizing it for.
836 */
829 if (userfaultfd_unmap_prep(next, start, end, uf))
837 if (userfaultfd_unmap_prep(next, vms->start, vms->end,
838 vms->uf))
830 goto userfaultfd_error;
831 }
832#ifdef CONFIG_DEBUG_VM_MAPLE_TREE
839 goto userfaultfd_error;
840 }
841#ifdef CONFIG_DEBUG_VM_MAPLE_TREE
833 BUG_ON(next->vm_start < start);
834 BUG_ON(next->vm_start > end);
842 BUG_ON(next->vm_start < vms->start);
843 BUG_ON(next->vm_start > vms->end);
835#endif
844#endif
836 } for_each_vma_range(*vmi, next, end);
845 } for_each_vma_range(*(vms->vmi), next, vms->end);
837
838#if defined(CONFIG_DEBUG_VM_MAPLE_TREE)
839 /* Make sure no VMAs are about to be lost. */
840 {
841 MA_STATE(test, mas_detach->tree, 0, 0);
842 struct vm_area_struct *vma_mas, *vma_test;
843 int test_count = 0;
844
846
847#if defined(CONFIG_DEBUG_VM_MAPLE_TREE)
848 /* Make sure no VMAs are about to be lost. */
849 {
850 MA_STATE(test, mas_detach->tree, 0, 0);
851 struct vm_area_struct *vma_mas, *vma_test;
852 int test_count = 0;
853
845 vma_iter_set(vmi, start);
854 vma_iter_set(vms->vmi, vms->start);
846 rcu_read_lock();
855 rcu_read_lock();
847 vma_test = mas_find(&test, count - 1);
848 for_each_vma_range(*vmi, vma_mas, end) {
856 vma_test = mas_find(&test, vms->vma_count - 1);
857 for_each_vma_range(*(vms->vmi), vma_mas, vms->end) {
849 BUG_ON(vma_mas != vma_test);
850 test_count++;
858 BUG_ON(vma_mas != vma_test);
859 test_count++;
851 vma_test = mas_next(&test, count - 1);
860 vma_test = mas_next(&test, vms->vma_count - 1);
852 }
853 rcu_read_unlock();
861 }
862 rcu_read_unlock();
854 BUG_ON(count != test_count);
863 BUG_ON(vms->vma_count != test_count);
855 }
856#endif
857
864 }
865#endif
866
858 while (vma_iter_addr(vmi) > start)
859 vma_iter_prev_range(vmi);
867 while (vma_iter_addr(vms->vmi) > vms->start)
868 vma_iter_prev_range(vms->vmi);
860
861 return 0;
862
863userfaultfd_error:
864munmap_gather_failed:
865end_split_failed:
866modify_vma_failed:
867 abort_munmap_vmas(mas_detach);

--- 19 unchanged lines hidden (view full) ---

887int do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
888 struct mm_struct *mm, unsigned long start, unsigned long end,
889 struct list_head *uf, bool unlock)
890{
891 struct maple_tree mt_detach;
892 MA_STATE(mas_detach, &mt_detach, 0, 0);
893 mt_init_flags(&mt_detach, vmi->mas.tree->ma_flags & MT_FLAGS_LOCK_MASK);
894 mt_on_stack(mt_detach);
869
870 return 0;
871
872userfaultfd_error:
873munmap_gather_failed:
874end_split_failed:
875modify_vma_failed:
876 abort_munmap_vmas(mas_detach);

--- 19 unchanged lines hidden (view full) ---

896int do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
897 struct mm_struct *mm, unsigned long start, unsigned long end,
898 struct list_head *uf, bool unlock)
899{
900 struct maple_tree mt_detach;
901 MA_STATE(mas_detach, &mt_detach, 0, 0);
902 mt_init_flags(&mt_detach, vmi->mas.tree->ma_flags & MT_FLAGS_LOCK_MASK);
903 mt_on_stack(mt_detach);
904 struct vma_munmap_struct vms;
895 int error;
905 int error;
896 unsigned long locked_vm = 0;
897
906
898 error = vmi_gather_munmap_vmas(vmi, vma, mm, start, end, uf,
899 &mas_detach, &locked_vm);
907 init_vma_munmap(&vms, vmi, vma, start, end, uf, unlock);
908 error = vms_gather_munmap_vmas(&vms, &mas_detach);
900 if (error)
901 goto gather_failed;
902
903 error = vma_iter_clear_gfp(vmi, start, end, GFP_KERNEL);
904 if (error)
905 goto clear_tree_failed;
906
907 /* Point of no return */
909 if (error)
910 goto gather_failed;
911
912 error = vma_iter_clear_gfp(vmi, start, end, GFP_KERNEL);
913 if (error)
914 goto clear_tree_failed;
915
916 /* Point of no return */
908 vmi_complete_munmap_vmas(vmi, vma, mm, start, end, unlock, &mas_detach,
909 locked_vm);
917 vms_complete_munmap_vmas(&vms, &mas_detach);
910 return 0;
911
912clear_tree_failed:
913 abort_munmap_vmas(&mas_detach);
914gather_failed:
915 validate_mm(mm);
916 return error;
917}

--- 924 unchanged lines hidden ---
918 return 0;
919
920clear_tree_failed:
921 abort_munmap_vmas(&mas_detach);
922gather_failed:
923 validate_mm(mm);
924 return error;
925}

--- 924 unchanged lines hidden ---