1 /* 2 * linux/mm/mmu_notifier.c 3 * 4 * Copyright (C) 2008 Qumranet, Inc. 5 * Copyright (C) 2008 SGI 6 * Christoph Lameter <clameter@sgi.com> 7 * 8 * This work is licensed under the terms of the GNU GPL, version 2. See 9 * the COPYING file in the top-level directory. 10 */ 11 12 #include <linux/rculist.h> 13 #include <linux/mmu_notifier.h> 14 #include <linux/module.h> 15 #include <linux/mm.h> 16 #include <linux/err.h> 17 #include <linux/rcupdate.h> 18 #include <linux/sched.h> 19 #include <linux/slab.h> 20 21 /* 22 * This function can't run concurrently against mmu_notifier_register 23 * because mm->mm_users > 0 during mmu_notifier_register and exit_mmap 24 * runs with mm_users == 0. Other tasks may still invoke mmu notifiers 25 * in parallel despite there being no task using this mm any more, 26 * through the vmas outside of the exit_mmap context, such as with 27 * vmtruncate. This serializes against mmu_notifier_unregister with 28 * the mmu_notifier_mm->lock in addition to RCU and it serializes 29 * against the other mmu notifiers with RCU. struct mmu_notifier_mm 30 * can't go away from under us as exit_mmap holds an mm_count pin 31 * itself. 32 */ 33 void __mmu_notifier_release(struct mm_struct *mm) 34 { 35 struct mmu_notifier *mn; 36 37 spin_lock(&mm->mmu_notifier_mm->lock); 38 while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) { 39 mn = hlist_entry(mm->mmu_notifier_mm->list.first, 40 struct mmu_notifier, 41 hlist); 42 /* 43 * We arrived before mmu_notifier_unregister so 44 * mmu_notifier_unregister will do nothing other than 45 * to wait ->release to finish and 46 * mmu_notifier_unregister to return. 47 */ 48 hlist_del_init_rcu(&mn->hlist); 49 /* 50 * RCU here will block mmu_notifier_unregister until 51 * ->release returns. 52 */ 53 rcu_read_lock(); 54 spin_unlock(&mm->mmu_notifier_mm->lock); 55 /* 56 * if ->release runs before mmu_notifier_unregister it 57 * must be handled as it's the only way for the driver 58 * to flush all existing sptes and stop the driver 59 * from establishing any more sptes before all the 60 * pages in the mm are freed. 61 */ 62 if (mn->ops->release) 63 mn->ops->release(mn, mm); 64 rcu_read_unlock(); 65 spin_lock(&mm->mmu_notifier_mm->lock); 66 } 67 spin_unlock(&mm->mmu_notifier_mm->lock); 68 69 /* 70 * synchronize_rcu here prevents mmu_notifier_release to 71 * return to exit_mmap (which would proceed freeing all pages 72 * in the mm) until the ->release method returns, if it was 73 * invoked by mmu_notifier_unregister. 74 * 75 * The mmu_notifier_mm can't go away from under us because one 76 * mm_count is hold by exit_mmap. 77 */ 78 synchronize_rcu(); 79 } 80 81 /* 82 * If no young bitflag is supported by the hardware, ->clear_flush_young can 83 * unmap the address and return 1 or 0 depending if the mapping previously 84 * existed or not. 85 */ 86 int __mmu_notifier_clear_flush_young(struct mm_struct *mm, 87 unsigned long address) 88 { 89 struct mmu_notifier *mn; 90 struct hlist_node *n; 91 int young = 0; 92 93 rcu_read_lock(); 94 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { 95 if (mn->ops->clear_flush_young) 96 young |= mn->ops->clear_flush_young(mn, mm, address); 97 } 98 rcu_read_unlock(); 99 100 return young; 101 } 102 103 void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address, 104 pte_t pte) 105 { 106 struct mmu_notifier *mn; 107 struct hlist_node *n; 108 109 rcu_read_lock(); 110 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { 111 if (mn->ops->change_pte) 112 mn->ops->change_pte(mn, mm, address, pte); 113 /* 114 * Some drivers don't have change_pte, 115 * so we must call invalidate_page in that case. 116 */ 117 else if (mn->ops->invalidate_page) 118 mn->ops->invalidate_page(mn, mm, address); 119 } 120 rcu_read_unlock(); 121 } 122 123 void __mmu_notifier_invalidate_page(struct mm_struct *mm, 124 unsigned long address) 125 { 126 struct mmu_notifier *mn; 127 struct hlist_node *n; 128 129 rcu_read_lock(); 130 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { 131 if (mn->ops->invalidate_page) 132 mn->ops->invalidate_page(mn, mm, address); 133 } 134 rcu_read_unlock(); 135 } 136 137 void __mmu_notifier_invalidate_range_start(struct mm_struct *mm, 138 unsigned long start, unsigned long end) 139 { 140 struct mmu_notifier *mn; 141 struct hlist_node *n; 142 143 rcu_read_lock(); 144 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { 145 if (mn->ops->invalidate_range_start) 146 mn->ops->invalidate_range_start(mn, mm, start, end); 147 } 148 rcu_read_unlock(); 149 } 150 151 void __mmu_notifier_invalidate_range_end(struct mm_struct *mm, 152 unsigned long start, unsigned long end) 153 { 154 struct mmu_notifier *mn; 155 struct hlist_node *n; 156 157 rcu_read_lock(); 158 hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { 159 if (mn->ops->invalidate_range_end) 160 mn->ops->invalidate_range_end(mn, mm, start, end); 161 } 162 rcu_read_unlock(); 163 } 164 165 static int do_mmu_notifier_register(struct mmu_notifier *mn, 166 struct mm_struct *mm, 167 int take_mmap_sem) 168 { 169 struct mmu_notifier_mm *mmu_notifier_mm; 170 int ret; 171 172 BUG_ON(atomic_read(&mm->mm_users) <= 0); 173 174 ret = -ENOMEM; 175 mmu_notifier_mm = kmalloc(sizeof(struct mmu_notifier_mm), GFP_KERNEL); 176 if (unlikely(!mmu_notifier_mm)) 177 goto out; 178 179 if (take_mmap_sem) 180 down_write(&mm->mmap_sem); 181 ret = mm_take_all_locks(mm); 182 if (unlikely(ret)) 183 goto out_cleanup; 184 185 if (!mm_has_notifiers(mm)) { 186 INIT_HLIST_HEAD(&mmu_notifier_mm->list); 187 spin_lock_init(&mmu_notifier_mm->lock); 188 mm->mmu_notifier_mm = mmu_notifier_mm; 189 mmu_notifier_mm = NULL; 190 } 191 atomic_inc(&mm->mm_count); 192 193 /* 194 * Serialize the update against mmu_notifier_unregister. A 195 * side note: mmu_notifier_release can't run concurrently with 196 * us because we hold the mm_users pin (either implicitly as 197 * current->mm or explicitly with get_task_mm() or similar). 198 * We can't race against any other mmu notifier method either 199 * thanks to mm_take_all_locks(). 200 */ 201 spin_lock(&mm->mmu_notifier_mm->lock); 202 hlist_add_head(&mn->hlist, &mm->mmu_notifier_mm->list); 203 spin_unlock(&mm->mmu_notifier_mm->lock); 204 205 mm_drop_all_locks(mm); 206 out_cleanup: 207 if (take_mmap_sem) 208 up_write(&mm->mmap_sem); 209 /* kfree() does nothing if mmu_notifier_mm is NULL */ 210 kfree(mmu_notifier_mm); 211 out: 212 BUG_ON(atomic_read(&mm->mm_users) <= 0); 213 return ret; 214 } 215 216 /* 217 * Must not hold mmap_sem nor any other VM related lock when calling 218 * this registration function. Must also ensure mm_users can't go down 219 * to zero while this runs to avoid races with mmu_notifier_release, 220 * so mm has to be current->mm or the mm should be pinned safely such 221 * as with get_task_mm(). If the mm is not current->mm, the mm_users 222 * pin should be released by calling mmput after mmu_notifier_register 223 * returns. mmu_notifier_unregister must be always called to 224 * unregister the notifier. mm_count is automatically pinned to allow 225 * mmu_notifier_unregister to safely run at any time later, before or 226 * after exit_mmap. ->release will always be called before exit_mmap 227 * frees the pages. 228 */ 229 int mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm) 230 { 231 return do_mmu_notifier_register(mn, mm, 1); 232 } 233 EXPORT_SYMBOL_GPL(mmu_notifier_register); 234 235 /* 236 * Same as mmu_notifier_register but here the caller must hold the 237 * mmap_sem in write mode. 238 */ 239 int __mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm) 240 { 241 return do_mmu_notifier_register(mn, mm, 0); 242 } 243 EXPORT_SYMBOL_GPL(__mmu_notifier_register); 244 245 /* this is called after the last mmu_notifier_unregister() returned */ 246 void __mmu_notifier_mm_destroy(struct mm_struct *mm) 247 { 248 BUG_ON(!hlist_empty(&mm->mmu_notifier_mm->list)); 249 kfree(mm->mmu_notifier_mm); 250 mm->mmu_notifier_mm = LIST_POISON1; /* debug */ 251 } 252 253 /* 254 * This releases the mm_count pin automatically and frees the mm 255 * structure if it was the last user of it. It serializes against 256 * running mmu notifiers with RCU and against mmu_notifier_unregister 257 * with the unregister lock + RCU. All sptes must be dropped before 258 * calling mmu_notifier_unregister. ->release or any other notifier 259 * method may be invoked concurrently with mmu_notifier_unregister, 260 * and only after mmu_notifier_unregister returned we're guaranteed 261 * that ->release or any other method can't run anymore. 262 */ 263 void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm) 264 { 265 BUG_ON(atomic_read(&mm->mm_count) <= 0); 266 267 spin_lock(&mm->mmu_notifier_mm->lock); 268 if (!hlist_unhashed(&mn->hlist)) { 269 hlist_del_rcu(&mn->hlist); 270 271 /* 272 * RCU here will force exit_mmap to wait ->release to finish 273 * before freeing the pages. 274 */ 275 rcu_read_lock(); 276 spin_unlock(&mm->mmu_notifier_mm->lock); 277 /* 278 * exit_mmap will block in mmu_notifier_release to 279 * guarantee ->release is called before freeing the 280 * pages. 281 */ 282 if (mn->ops->release) 283 mn->ops->release(mn, mm); 284 rcu_read_unlock(); 285 } else 286 spin_unlock(&mm->mmu_notifier_mm->lock); 287 288 /* 289 * Wait any running method to finish, of course including 290 * ->release if it was run by mmu_notifier_relase instead of us. 291 */ 292 synchronize_rcu(); 293 294 BUG_ON(atomic_read(&mm->mm_count) <= 0); 295 296 mmdrop(mm); 297 } 298 EXPORT_SYMBOL_GPL(mmu_notifier_unregister); 299