1 /* 2 * linux/mm/msync.c 3 * 4 * Copyright (C) 1994-1999 Linus Torvalds 5 */ 6 7 /* 8 * The msync() system call. 9 */ 10 #include <linux/slab.h> 11 #include <linux/pagemap.h> 12 #include <linux/fs.h> 13 #include <linux/mm.h> 14 #include <linux/mman.h> 15 #include <linux/hugetlb.h> 16 #include <linux/writeback.h> 17 #include <linux/file.h> 18 #include <linux/syscalls.h> 19 20 #include <asm/pgtable.h> 21 #include <asm/tlbflush.h> 22 23 static unsigned long msync_pte_range(struct vm_area_struct *vma, pmd_t *pmd, 24 unsigned long addr, unsigned long end) 25 { 26 pte_t *pte; 27 spinlock_t *ptl; 28 int progress = 0; 29 unsigned long ret = 0; 30 31 again: 32 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 33 do { 34 struct page *page; 35 36 if (progress >= 64) { 37 progress = 0; 38 if (need_resched() || need_lockbreak(ptl)) 39 break; 40 } 41 progress++; 42 if (!pte_present(*pte)) 43 continue; 44 if (!pte_maybe_dirty(*pte)) 45 continue; 46 page = vm_normal_page(vma, addr, *pte); 47 if (!page) 48 continue; 49 if (ptep_clear_flush_dirty(vma, addr, pte) || 50 page_test_and_clear_dirty(page)) 51 ret += set_page_dirty(page); 52 progress += 3; 53 } while (pte++, addr += PAGE_SIZE, addr != end); 54 pte_unmap_unlock(pte - 1, ptl); 55 cond_resched(); 56 if (addr != end) 57 goto again; 58 return ret; 59 } 60 61 static inline unsigned long msync_pmd_range(struct vm_area_struct *vma, 62 pud_t *pud, unsigned long addr, unsigned long end) 63 { 64 pmd_t *pmd; 65 unsigned long next; 66 unsigned long ret = 0; 67 68 pmd = pmd_offset(pud, addr); 69 do { 70 next = pmd_addr_end(addr, end); 71 if (pmd_none_or_clear_bad(pmd)) 72 continue; 73 ret += msync_pte_range(vma, pmd, addr, next); 74 } while (pmd++, addr = next, addr != end); 75 return ret; 76 } 77 78 static inline unsigned long msync_pud_range(struct vm_area_struct *vma, 79 pgd_t *pgd, unsigned long addr, unsigned long end) 80 { 81 pud_t *pud; 82 unsigned long next; 83 unsigned long ret = 0; 84 85 pud = pud_offset(pgd, addr); 86 do { 87 next = pud_addr_end(addr, end); 88 if (pud_none_or_clear_bad(pud)) 89 continue; 90 ret += msync_pmd_range(vma, pud, addr, next); 91 } while (pud++, addr = next, addr != end); 92 return ret; 93 } 94 95 static unsigned long msync_page_range(struct vm_area_struct *vma, 96 unsigned long addr, unsigned long end) 97 { 98 pgd_t *pgd; 99 unsigned long next; 100 unsigned long ret = 0; 101 102 /* For hugepages we can't go walking the page table normally, 103 * but that's ok, hugetlbfs is memory based, so we don't need 104 * to do anything more on an msync(). 105 */ 106 if (vma->vm_flags & VM_HUGETLB) 107 return 0; 108 109 BUG_ON(addr >= end); 110 pgd = pgd_offset(vma->vm_mm, addr); 111 flush_cache_range(vma, addr, end); 112 do { 113 next = pgd_addr_end(addr, end); 114 if (pgd_none_or_clear_bad(pgd)) 115 continue; 116 ret += msync_pud_range(vma, pgd, addr, next); 117 } while (pgd++, addr = next, addr != end); 118 return ret; 119 } 120 121 /* 122 * MS_SYNC syncs the entire file - including mappings. 123 * 124 * MS_ASYNC does not start I/O (it used to, up to 2.5.67). Instead, it just 125 * marks the relevant pages dirty. The application may now run fsync() to 126 * write out the dirty pages and wait on the writeout and check the result. 127 * Or the application may run fadvise(FADV_DONTNEED) against the fd to start 128 * async writeout immediately. 129 * So by _not_ starting I/O in MS_ASYNC we provide complete flexibility to 130 * applications. 131 */ 132 static int msync_interval(struct vm_area_struct *vma, unsigned long addr, 133 unsigned long end, int flags, 134 unsigned long *nr_pages_dirtied) 135 { 136 struct file *file = vma->vm_file; 137 138 if ((flags & MS_INVALIDATE) && (vma->vm_flags & VM_LOCKED)) 139 return -EBUSY; 140 141 if (file && (vma->vm_flags & VM_SHARED)) 142 *nr_pages_dirtied = msync_page_range(vma, addr, end); 143 return 0; 144 } 145 146 asmlinkage long sys_msync(unsigned long start, size_t len, int flags) 147 { 148 unsigned long end; 149 struct vm_area_struct *vma; 150 int unmapped_error = 0; 151 int error = -EINVAL; 152 int done = 0; 153 154 if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC)) 155 goto out; 156 if (start & ~PAGE_MASK) 157 goto out; 158 if ((flags & MS_ASYNC) && (flags & MS_SYNC)) 159 goto out; 160 error = -ENOMEM; 161 len = (len + ~PAGE_MASK) & PAGE_MASK; 162 end = start + len; 163 if (end < start) 164 goto out; 165 error = 0; 166 if (end == start) 167 goto out; 168 /* 169 * If the interval [start,end) covers some unmapped address ranges, 170 * just ignore them, but return -ENOMEM at the end. 171 */ 172 down_read(¤t->mm->mmap_sem); 173 if (flags & MS_SYNC) 174 current->flags |= PF_SYNCWRITE; 175 vma = find_vma(current->mm, start); 176 if (!vma) { 177 error = -ENOMEM; 178 goto out_unlock; 179 } 180 do { 181 unsigned long nr_pages_dirtied = 0; 182 struct file *file; 183 184 /* Here start < vma->vm_end. */ 185 if (start < vma->vm_start) { 186 unmapped_error = -ENOMEM; 187 start = vma->vm_start; 188 } 189 /* Here vma->vm_start <= start < vma->vm_end. */ 190 if (end <= vma->vm_end) { 191 if (start < end) { 192 error = msync_interval(vma, start, end, flags, 193 &nr_pages_dirtied); 194 if (error) 195 goto out_unlock; 196 } 197 error = unmapped_error; 198 done = 1; 199 } else { 200 /* Here vma->vm_start <= start < vma->vm_end < end. */ 201 error = msync_interval(vma, start, vma->vm_end, flags, 202 &nr_pages_dirtied); 203 if (error) 204 goto out_unlock; 205 } 206 file = vma->vm_file; 207 start = vma->vm_end; 208 if ((flags & MS_ASYNC) && file && nr_pages_dirtied) { 209 get_file(file); 210 up_read(¤t->mm->mmap_sem); 211 balance_dirty_pages_ratelimited_nr(file->f_mapping, 212 nr_pages_dirtied); 213 fput(file); 214 down_read(¤t->mm->mmap_sem); 215 vma = find_vma(current->mm, start); 216 } else if ((flags & MS_SYNC) && file && 217 (vma->vm_flags & VM_SHARED)) { 218 get_file(file); 219 up_read(¤t->mm->mmap_sem); 220 error = do_fsync(file, 0); 221 fput(file); 222 down_read(¤t->mm->mmap_sem); 223 if (error) 224 goto out_unlock; 225 vma = find_vma(current->mm, start); 226 } else { 227 vma = vma->vm_next; 228 } 229 } while (vma && !done); 230 out_unlock: 231 current->flags &= ~PF_SYNCWRITE; 232 up_read(¤t->mm->mmap_sem); 233 out: 234 return error; 235 } 236