1cddb8a5cSAndrea Arcangeli /* 2cddb8a5cSAndrea Arcangeli * linux/mm/mmu_notifier.c 3cddb8a5cSAndrea Arcangeli * 4cddb8a5cSAndrea Arcangeli * Copyright (C) 2008 Qumranet, Inc. 5cddb8a5cSAndrea Arcangeli * Copyright (C) 2008 SGI 6*93e205a7SChristoph Lameter * Christoph Lameter <cl@linux.com> 7cddb8a5cSAndrea Arcangeli * 8cddb8a5cSAndrea Arcangeli * This work is licensed under the terms of the GNU GPL, version 2. See 9cddb8a5cSAndrea Arcangeli * the COPYING file in the top-level directory. 10cddb8a5cSAndrea Arcangeli */ 11cddb8a5cSAndrea Arcangeli 12cddb8a5cSAndrea Arcangeli #include <linux/rculist.h> 13cddb8a5cSAndrea Arcangeli #include <linux/mmu_notifier.h> 14b95f1b31SPaul Gortmaker #include <linux/export.h> 15cddb8a5cSAndrea Arcangeli #include <linux/mm.h> 16cddb8a5cSAndrea Arcangeli #include <linux/err.h> 1721a92735SSagi Grimberg #include <linux/srcu.h> 18cddb8a5cSAndrea Arcangeli #include <linux/rcupdate.h> 19cddb8a5cSAndrea Arcangeli #include <linux/sched.h> 205a0e3ad6STejun Heo #include <linux/slab.h> 21cddb8a5cSAndrea Arcangeli 2221a92735SSagi Grimberg /* global SRCU for all MMs */ 2370400303SAndrea Arcangeli static struct srcu_struct srcu; 2421a92735SSagi Grimberg 25cddb8a5cSAndrea Arcangeli /* 26b972216eSPeter Zijlstra * This function allows mmu_notifier::release callback to delay a call to 27b972216eSPeter Zijlstra * a function that will free appropriate resources. The function must be 28b972216eSPeter Zijlstra * quick and must not block. 29b972216eSPeter Zijlstra */ 30b972216eSPeter Zijlstra void mmu_notifier_call_srcu(struct rcu_head *rcu, 31b972216eSPeter Zijlstra void (*func)(struct rcu_head *rcu)) 32b972216eSPeter Zijlstra { 33b972216eSPeter Zijlstra call_srcu(&srcu, rcu, func); 34b972216eSPeter Zijlstra } 35b972216eSPeter Zijlstra EXPORT_SYMBOL_GPL(mmu_notifier_call_srcu); 36b972216eSPeter Zijlstra 37b972216eSPeter Zijlstra void mmu_notifier_synchronize(void) 38b972216eSPeter Zijlstra { 39b972216eSPeter Zijlstra /* Wait for any running method to finish. */ 40b972216eSPeter Zijlstra srcu_barrier(&srcu); 41b972216eSPeter Zijlstra } 42b972216eSPeter Zijlstra EXPORT_SYMBOL_GPL(mmu_notifier_synchronize); 43b972216eSPeter Zijlstra 44b972216eSPeter Zijlstra /* 45cddb8a5cSAndrea Arcangeli * This function can't run concurrently against mmu_notifier_register 46cddb8a5cSAndrea Arcangeli * because mm->mm_users > 0 during mmu_notifier_register and exit_mmap 47cddb8a5cSAndrea Arcangeli * runs with mm_users == 0. Other tasks may still invoke mmu notifiers 48cddb8a5cSAndrea Arcangeli * in parallel despite there being no task using this mm any more, 49cddb8a5cSAndrea Arcangeli * through the vmas outside of the exit_mmap context, such as with 50cddb8a5cSAndrea Arcangeli * vmtruncate. This serializes against mmu_notifier_unregister with 5121a92735SSagi Grimberg * the mmu_notifier_mm->lock in addition to SRCU and it serializes 5221a92735SSagi Grimberg * against the other mmu notifiers with SRCU. struct mmu_notifier_mm 53cddb8a5cSAndrea Arcangeli * can't go away from under us as exit_mmap holds an mm_count pin 54cddb8a5cSAndrea Arcangeli * itself. 55cddb8a5cSAndrea Arcangeli */ 56cddb8a5cSAndrea Arcangeli void __mmu_notifier_release(struct mm_struct *mm) 57cddb8a5cSAndrea Arcangeli { 58cddb8a5cSAndrea Arcangeli struct mmu_notifier *mn; 5921a92735SSagi Grimberg int id; 603ad3d901SXiao Guangrong 613ad3d901SXiao Guangrong /* 62d34883d4SXiao Guangrong * SRCU here will block mmu_notifier_unregister until 63d34883d4SXiao Guangrong * ->release returns. 643ad3d901SXiao Guangrong */ 6521a92735SSagi Grimberg id = srcu_read_lock(&srcu); 66d34883d4SXiao Guangrong hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) 67d34883d4SXiao Guangrong /* 68d34883d4SXiao Guangrong * If ->release runs before mmu_notifier_unregister it must be 69d34883d4SXiao Guangrong * handled, as it's the only way for the driver to flush all 70d34883d4SXiao Guangrong * existing sptes and stop the driver from establishing any more 71d34883d4SXiao Guangrong * sptes before all the pages in the mm are freed. 72d34883d4SXiao Guangrong */ 73d34883d4SXiao Guangrong if (mn->ops->release) 74d34883d4SXiao Guangrong mn->ops->release(mn, mm); 75d34883d4SXiao Guangrong 76cddb8a5cSAndrea Arcangeli spin_lock(&mm->mmu_notifier_mm->lock); 77cddb8a5cSAndrea Arcangeli while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) { 78cddb8a5cSAndrea Arcangeli mn = hlist_entry(mm->mmu_notifier_mm->list.first, 79cddb8a5cSAndrea Arcangeli struct mmu_notifier, 80cddb8a5cSAndrea Arcangeli hlist); 81cddb8a5cSAndrea Arcangeli /* 82d34883d4SXiao Guangrong * We arrived before mmu_notifier_unregister so 83d34883d4SXiao Guangrong * mmu_notifier_unregister will do nothing other than to wait 84d34883d4SXiao Guangrong * for ->release to finish and for mmu_notifier_unregister to 85d34883d4SXiao Guangrong * return. 86cddb8a5cSAndrea Arcangeli */ 87cddb8a5cSAndrea Arcangeli hlist_del_init_rcu(&mn->hlist); 88cddb8a5cSAndrea Arcangeli } 89cddb8a5cSAndrea Arcangeli spin_unlock(&mm->mmu_notifier_mm->lock); 90b972216eSPeter Zijlstra srcu_read_unlock(&srcu, id); 91cddb8a5cSAndrea Arcangeli 92cddb8a5cSAndrea Arcangeli /* 93d34883d4SXiao Guangrong * synchronize_srcu here prevents mmu_notifier_release from returning to 94d34883d4SXiao Guangrong * exit_mmap (which would proceed with freeing all pages in the mm) 95d34883d4SXiao Guangrong * until the ->release method returns, if it was invoked by 96d34883d4SXiao Guangrong * mmu_notifier_unregister. 97d34883d4SXiao Guangrong * 98d34883d4SXiao Guangrong * The mmu_notifier_mm can't go away from under us because one mm_count 99d34883d4SXiao Guangrong * is held by exit_mmap. 100cddb8a5cSAndrea Arcangeli */ 10121a92735SSagi Grimberg synchronize_srcu(&srcu); 102cddb8a5cSAndrea Arcangeli } 103cddb8a5cSAndrea Arcangeli 104cddb8a5cSAndrea Arcangeli /* 105cddb8a5cSAndrea Arcangeli * If no young bitflag is supported by the hardware, ->clear_flush_young can 106cddb8a5cSAndrea Arcangeli * unmap the address and return 1 or 0 depending if the mapping previously 107cddb8a5cSAndrea Arcangeli * existed or not. 108cddb8a5cSAndrea Arcangeli */ 109cddb8a5cSAndrea Arcangeli int __mmu_notifier_clear_flush_young(struct mm_struct *mm, 11057128468SAndres Lagar-Cavilla unsigned long start, 11157128468SAndres Lagar-Cavilla unsigned long end) 112cddb8a5cSAndrea Arcangeli { 113cddb8a5cSAndrea Arcangeli struct mmu_notifier *mn; 11421a92735SSagi Grimberg int young = 0, id; 115cddb8a5cSAndrea Arcangeli 11621a92735SSagi Grimberg id = srcu_read_lock(&srcu); 117b67bfe0dSSasha Levin hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) { 118cddb8a5cSAndrea Arcangeli if (mn->ops->clear_flush_young) 11957128468SAndres Lagar-Cavilla young |= mn->ops->clear_flush_young(mn, mm, start, end); 120cddb8a5cSAndrea Arcangeli } 12121a92735SSagi Grimberg srcu_read_unlock(&srcu, id); 122cddb8a5cSAndrea Arcangeli 123cddb8a5cSAndrea Arcangeli return young; 124cddb8a5cSAndrea Arcangeli } 125cddb8a5cSAndrea Arcangeli 1261d7715c6SVladimir Davydov int __mmu_notifier_clear_young(struct mm_struct *mm, 1271d7715c6SVladimir Davydov unsigned long start, 1281d7715c6SVladimir Davydov unsigned long end) 1291d7715c6SVladimir Davydov { 1301d7715c6SVladimir Davydov struct mmu_notifier *mn; 1311d7715c6SVladimir Davydov int young = 0, id; 1321d7715c6SVladimir Davydov 1331d7715c6SVladimir Davydov id = srcu_read_lock(&srcu); 1341d7715c6SVladimir Davydov hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) { 1351d7715c6SVladimir Davydov if (mn->ops->clear_young) 1361d7715c6SVladimir Davydov young |= mn->ops->clear_young(mn, mm, start, end); 1371d7715c6SVladimir Davydov } 1381d7715c6SVladimir Davydov srcu_read_unlock(&srcu, id); 1391d7715c6SVladimir Davydov 1401d7715c6SVladimir Davydov return young; 1411d7715c6SVladimir Davydov } 1421d7715c6SVladimir Davydov 1438ee53820SAndrea Arcangeli int __mmu_notifier_test_young(struct mm_struct *mm, 1448ee53820SAndrea Arcangeli unsigned long address) 1458ee53820SAndrea Arcangeli { 1468ee53820SAndrea Arcangeli struct mmu_notifier *mn; 14721a92735SSagi Grimberg int young = 0, id; 1488ee53820SAndrea Arcangeli 14921a92735SSagi Grimberg id = srcu_read_lock(&srcu); 150b67bfe0dSSasha Levin hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) { 1518ee53820SAndrea Arcangeli if (mn->ops->test_young) { 1528ee53820SAndrea Arcangeli young = mn->ops->test_young(mn, mm, address); 1538ee53820SAndrea Arcangeli if (young) 1548ee53820SAndrea Arcangeli break; 1558ee53820SAndrea Arcangeli } 1568ee53820SAndrea Arcangeli } 15721a92735SSagi Grimberg srcu_read_unlock(&srcu, id); 1588ee53820SAndrea Arcangeli 1598ee53820SAndrea Arcangeli return young; 1608ee53820SAndrea Arcangeli } 1618ee53820SAndrea Arcangeli 162828502d3SIzik Eidus void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address, 163828502d3SIzik Eidus pte_t pte) 164828502d3SIzik Eidus { 165828502d3SIzik Eidus struct mmu_notifier *mn; 16621a92735SSagi Grimberg int id; 167828502d3SIzik Eidus 16821a92735SSagi Grimberg id = srcu_read_lock(&srcu); 169b67bfe0dSSasha Levin hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) { 170828502d3SIzik Eidus if (mn->ops->change_pte) 171828502d3SIzik Eidus mn->ops->change_pte(mn, mm, address, pte); 172828502d3SIzik Eidus } 17321a92735SSagi Grimberg srcu_read_unlock(&srcu, id); 174828502d3SIzik Eidus } 175828502d3SIzik Eidus 176cddb8a5cSAndrea Arcangeli void __mmu_notifier_invalidate_page(struct mm_struct *mm, 177cddb8a5cSAndrea Arcangeli unsigned long address) 178cddb8a5cSAndrea Arcangeli { 179cddb8a5cSAndrea Arcangeli struct mmu_notifier *mn; 18021a92735SSagi Grimberg int id; 181cddb8a5cSAndrea Arcangeli 18221a92735SSagi Grimberg id = srcu_read_lock(&srcu); 183b67bfe0dSSasha Levin hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) { 184cddb8a5cSAndrea Arcangeli if (mn->ops->invalidate_page) 185cddb8a5cSAndrea Arcangeli mn->ops->invalidate_page(mn, mm, address); 186cddb8a5cSAndrea Arcangeli } 18721a92735SSagi Grimberg srcu_read_unlock(&srcu, id); 188cddb8a5cSAndrea Arcangeli } 189cddb8a5cSAndrea Arcangeli 190cddb8a5cSAndrea Arcangeli void __mmu_notifier_invalidate_range_start(struct mm_struct *mm, 191cddb8a5cSAndrea Arcangeli unsigned long start, unsigned long end) 192cddb8a5cSAndrea Arcangeli { 193cddb8a5cSAndrea Arcangeli struct mmu_notifier *mn; 19421a92735SSagi Grimberg int id; 195cddb8a5cSAndrea Arcangeli 19621a92735SSagi Grimberg id = srcu_read_lock(&srcu); 197b67bfe0dSSasha Levin hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) { 198cddb8a5cSAndrea Arcangeli if (mn->ops->invalidate_range_start) 199cddb8a5cSAndrea Arcangeli mn->ops->invalidate_range_start(mn, mm, start, end); 200cddb8a5cSAndrea Arcangeli } 20121a92735SSagi Grimberg srcu_read_unlock(&srcu, id); 202cddb8a5cSAndrea Arcangeli } 203fa794199SCliff Wickman EXPORT_SYMBOL_GPL(__mmu_notifier_invalidate_range_start); 204cddb8a5cSAndrea Arcangeli 205cddb8a5cSAndrea Arcangeli void __mmu_notifier_invalidate_range_end(struct mm_struct *mm, 206cddb8a5cSAndrea Arcangeli unsigned long start, unsigned long end) 207cddb8a5cSAndrea Arcangeli { 208cddb8a5cSAndrea Arcangeli struct mmu_notifier *mn; 20921a92735SSagi Grimberg int id; 210cddb8a5cSAndrea Arcangeli 21121a92735SSagi Grimberg id = srcu_read_lock(&srcu); 212b67bfe0dSSasha Levin hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) { 2130f0a327fSJoerg Roedel /* 2140f0a327fSJoerg Roedel * Call invalidate_range here too to avoid the need for the 2150f0a327fSJoerg Roedel * subsystem of having to register an invalidate_range_end 2160f0a327fSJoerg Roedel * call-back when there is invalidate_range already. Usually a 2170f0a327fSJoerg Roedel * subsystem registers either invalidate_range_start()/end() or 2180f0a327fSJoerg Roedel * invalidate_range(), so this will be no additional overhead 2190f0a327fSJoerg Roedel * (besides the pointer check). 2200f0a327fSJoerg Roedel */ 2210f0a327fSJoerg Roedel if (mn->ops->invalidate_range) 2220f0a327fSJoerg Roedel mn->ops->invalidate_range(mn, mm, start, end); 223cddb8a5cSAndrea Arcangeli if (mn->ops->invalidate_range_end) 224cddb8a5cSAndrea Arcangeli mn->ops->invalidate_range_end(mn, mm, start, end); 225cddb8a5cSAndrea Arcangeli } 22621a92735SSagi Grimberg srcu_read_unlock(&srcu, id); 227cddb8a5cSAndrea Arcangeli } 228fa794199SCliff Wickman EXPORT_SYMBOL_GPL(__mmu_notifier_invalidate_range_end); 229cddb8a5cSAndrea Arcangeli 2300f0a327fSJoerg Roedel void __mmu_notifier_invalidate_range(struct mm_struct *mm, 2310f0a327fSJoerg Roedel unsigned long start, unsigned long end) 2320f0a327fSJoerg Roedel { 2330f0a327fSJoerg Roedel struct mmu_notifier *mn; 2340f0a327fSJoerg Roedel int id; 2350f0a327fSJoerg Roedel 2360f0a327fSJoerg Roedel id = srcu_read_lock(&srcu); 2370f0a327fSJoerg Roedel hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) { 2380f0a327fSJoerg Roedel if (mn->ops->invalidate_range) 2390f0a327fSJoerg Roedel mn->ops->invalidate_range(mn, mm, start, end); 2400f0a327fSJoerg Roedel } 2410f0a327fSJoerg Roedel srcu_read_unlock(&srcu, id); 2420f0a327fSJoerg Roedel } 2430f0a327fSJoerg Roedel EXPORT_SYMBOL_GPL(__mmu_notifier_invalidate_range); 2440f0a327fSJoerg Roedel 245cddb8a5cSAndrea Arcangeli static int do_mmu_notifier_register(struct mmu_notifier *mn, 246cddb8a5cSAndrea Arcangeli struct mm_struct *mm, 247cddb8a5cSAndrea Arcangeli int take_mmap_sem) 248cddb8a5cSAndrea Arcangeli { 249cddb8a5cSAndrea Arcangeli struct mmu_notifier_mm *mmu_notifier_mm; 250cddb8a5cSAndrea Arcangeli int ret; 251cddb8a5cSAndrea Arcangeli 252cddb8a5cSAndrea Arcangeli BUG_ON(atomic_read(&mm->mm_users) <= 0); 253cddb8a5cSAndrea Arcangeli 25421a92735SSagi Grimberg /* 25521a92735SSagi Grimberg * Verify that mmu_notifier_init() already run and the global srcu is 25621a92735SSagi Grimberg * initialized. 25721a92735SSagi Grimberg */ 25821a92735SSagi Grimberg BUG_ON(!srcu.per_cpu_ref); 25921a92735SSagi Grimberg 26035cfa2b0SGavin Shan ret = -ENOMEM; 26135cfa2b0SGavin Shan mmu_notifier_mm = kmalloc(sizeof(struct mmu_notifier_mm), GFP_KERNEL); 26235cfa2b0SGavin Shan if (unlikely(!mmu_notifier_mm)) 26335cfa2b0SGavin Shan goto out; 26435cfa2b0SGavin Shan 265cddb8a5cSAndrea Arcangeli if (take_mmap_sem) 266cddb8a5cSAndrea Arcangeli down_write(&mm->mmap_sem); 267cddb8a5cSAndrea Arcangeli ret = mm_take_all_locks(mm); 268cddb8a5cSAndrea Arcangeli if (unlikely(ret)) 26935cfa2b0SGavin Shan goto out_clean; 270cddb8a5cSAndrea Arcangeli 271cddb8a5cSAndrea Arcangeli if (!mm_has_notifiers(mm)) { 272cddb8a5cSAndrea Arcangeli INIT_HLIST_HEAD(&mmu_notifier_mm->list); 273cddb8a5cSAndrea Arcangeli spin_lock_init(&mmu_notifier_mm->lock); 274e0f3c3f7SGavin Shan 275cddb8a5cSAndrea Arcangeli mm->mmu_notifier_mm = mmu_notifier_mm; 27635cfa2b0SGavin Shan mmu_notifier_mm = NULL; 277cddb8a5cSAndrea Arcangeli } 278cddb8a5cSAndrea Arcangeli atomic_inc(&mm->mm_count); 279cddb8a5cSAndrea Arcangeli 280cddb8a5cSAndrea Arcangeli /* 281cddb8a5cSAndrea Arcangeli * Serialize the update against mmu_notifier_unregister. A 282cddb8a5cSAndrea Arcangeli * side note: mmu_notifier_release can't run concurrently with 283cddb8a5cSAndrea Arcangeli * us because we hold the mm_users pin (either implicitly as 284cddb8a5cSAndrea Arcangeli * current->mm or explicitly with get_task_mm() or similar). 285cddb8a5cSAndrea Arcangeli * We can't race against any other mmu notifier method either 286cddb8a5cSAndrea Arcangeli * thanks to mm_take_all_locks(). 287cddb8a5cSAndrea Arcangeli */ 288cddb8a5cSAndrea Arcangeli spin_lock(&mm->mmu_notifier_mm->lock); 289cddb8a5cSAndrea Arcangeli hlist_add_head(&mn->hlist, &mm->mmu_notifier_mm->list); 290cddb8a5cSAndrea Arcangeli spin_unlock(&mm->mmu_notifier_mm->lock); 291cddb8a5cSAndrea Arcangeli 292cddb8a5cSAndrea Arcangeli mm_drop_all_locks(mm); 29335cfa2b0SGavin Shan out_clean: 294cddb8a5cSAndrea Arcangeli if (take_mmap_sem) 295cddb8a5cSAndrea Arcangeli up_write(&mm->mmap_sem); 29635cfa2b0SGavin Shan kfree(mmu_notifier_mm); 29735cfa2b0SGavin Shan out: 298cddb8a5cSAndrea Arcangeli BUG_ON(atomic_read(&mm->mm_users) <= 0); 299cddb8a5cSAndrea Arcangeli return ret; 300cddb8a5cSAndrea Arcangeli } 301cddb8a5cSAndrea Arcangeli 302cddb8a5cSAndrea Arcangeli /* 303cddb8a5cSAndrea Arcangeli * Must not hold mmap_sem nor any other VM related lock when calling 304cddb8a5cSAndrea Arcangeli * this registration function. Must also ensure mm_users can't go down 305cddb8a5cSAndrea Arcangeli * to zero while this runs to avoid races with mmu_notifier_release, 306cddb8a5cSAndrea Arcangeli * so mm has to be current->mm or the mm should be pinned safely such 307cddb8a5cSAndrea Arcangeli * as with get_task_mm(). If the mm is not current->mm, the mm_users 308cddb8a5cSAndrea Arcangeli * pin should be released by calling mmput after mmu_notifier_register 309cddb8a5cSAndrea Arcangeli * returns. mmu_notifier_unregister must be always called to 310cddb8a5cSAndrea Arcangeli * unregister the notifier. mm_count is automatically pinned to allow 311cddb8a5cSAndrea Arcangeli * mmu_notifier_unregister to safely run at any time later, before or 312cddb8a5cSAndrea Arcangeli * after exit_mmap. ->release will always be called before exit_mmap 313cddb8a5cSAndrea Arcangeli * frees the pages. 314cddb8a5cSAndrea Arcangeli */ 315cddb8a5cSAndrea Arcangeli int mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm) 316cddb8a5cSAndrea Arcangeli { 317cddb8a5cSAndrea Arcangeli return do_mmu_notifier_register(mn, mm, 1); 318cddb8a5cSAndrea Arcangeli } 319cddb8a5cSAndrea Arcangeli EXPORT_SYMBOL_GPL(mmu_notifier_register); 320cddb8a5cSAndrea Arcangeli 321cddb8a5cSAndrea Arcangeli /* 322cddb8a5cSAndrea Arcangeli * Same as mmu_notifier_register but here the caller must hold the 323cddb8a5cSAndrea Arcangeli * mmap_sem in write mode. 324cddb8a5cSAndrea Arcangeli */ 325cddb8a5cSAndrea Arcangeli int __mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm) 326cddb8a5cSAndrea Arcangeli { 327cddb8a5cSAndrea Arcangeli return do_mmu_notifier_register(mn, mm, 0); 328cddb8a5cSAndrea Arcangeli } 329cddb8a5cSAndrea Arcangeli EXPORT_SYMBOL_GPL(__mmu_notifier_register); 330cddb8a5cSAndrea Arcangeli 331cddb8a5cSAndrea Arcangeli /* this is called after the last mmu_notifier_unregister() returned */ 332cddb8a5cSAndrea Arcangeli void __mmu_notifier_mm_destroy(struct mm_struct *mm) 333cddb8a5cSAndrea Arcangeli { 334cddb8a5cSAndrea Arcangeli BUG_ON(!hlist_empty(&mm->mmu_notifier_mm->list)); 335cddb8a5cSAndrea Arcangeli kfree(mm->mmu_notifier_mm); 336cddb8a5cSAndrea Arcangeli mm->mmu_notifier_mm = LIST_POISON1; /* debug */ 337cddb8a5cSAndrea Arcangeli } 338cddb8a5cSAndrea Arcangeli 339cddb8a5cSAndrea Arcangeli /* 340cddb8a5cSAndrea Arcangeli * This releases the mm_count pin automatically and frees the mm 341cddb8a5cSAndrea Arcangeli * structure if it was the last user of it. It serializes against 34221a92735SSagi Grimberg * running mmu notifiers with SRCU and against mmu_notifier_unregister 34321a92735SSagi Grimberg * with the unregister lock + SRCU. All sptes must be dropped before 344cddb8a5cSAndrea Arcangeli * calling mmu_notifier_unregister. ->release or any other notifier 345cddb8a5cSAndrea Arcangeli * method may be invoked concurrently with mmu_notifier_unregister, 346cddb8a5cSAndrea Arcangeli * and only after mmu_notifier_unregister returned we're guaranteed 347cddb8a5cSAndrea Arcangeli * that ->release or any other method can't run anymore. 348cddb8a5cSAndrea Arcangeli */ 349cddb8a5cSAndrea Arcangeli void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm) 350cddb8a5cSAndrea Arcangeli { 351cddb8a5cSAndrea Arcangeli BUG_ON(atomic_read(&mm->mm_count) <= 0); 352cddb8a5cSAndrea Arcangeli 353cddb8a5cSAndrea Arcangeli if (!hlist_unhashed(&mn->hlist)) { 354d34883d4SXiao Guangrong /* 355d34883d4SXiao Guangrong * SRCU here will force exit_mmap to wait for ->release to 356d34883d4SXiao Guangrong * finish before freeing the pages. 357d34883d4SXiao Guangrong */ 35821a92735SSagi Grimberg int id; 3593ad3d901SXiao Guangrong 360751efd86SRobin Holt id = srcu_read_lock(&srcu); 361d34883d4SXiao Guangrong /* 362d34883d4SXiao Guangrong * exit_mmap will block in mmu_notifier_release to guarantee 363d34883d4SXiao Guangrong * that ->release is called before freeing the pages. 364d34883d4SXiao Guangrong */ 365751efd86SRobin Holt if (mn->ops->release) 366751efd86SRobin Holt mn->ops->release(mn, mm); 367751efd86SRobin Holt srcu_read_unlock(&srcu, id); 368d34883d4SXiao Guangrong 369d34883d4SXiao Guangrong spin_lock(&mm->mmu_notifier_mm->lock); 370d34883d4SXiao Guangrong /* 371d34883d4SXiao Guangrong * Can not use list_del_rcu() since __mmu_notifier_release 372d34883d4SXiao Guangrong * can delete it before we hold the lock. 373d34883d4SXiao Guangrong */ 374d34883d4SXiao Guangrong hlist_del_init_rcu(&mn->hlist); 375751efd86SRobin Holt spin_unlock(&mm->mmu_notifier_mm->lock); 376d34883d4SXiao Guangrong } 377751efd86SRobin Holt 378751efd86SRobin Holt /* 379d34883d4SXiao Guangrong * Wait for any running method to finish, of course including 38083a35e36SGeert Uytterhoeven * ->release if it was run by mmu_notifier_release instead of us. 381cddb8a5cSAndrea Arcangeli */ 38221a92735SSagi Grimberg synchronize_srcu(&srcu); 383cddb8a5cSAndrea Arcangeli 384cddb8a5cSAndrea Arcangeli BUG_ON(atomic_read(&mm->mm_count) <= 0); 385cddb8a5cSAndrea Arcangeli 386cddb8a5cSAndrea Arcangeli mmdrop(mm); 387cddb8a5cSAndrea Arcangeli } 388cddb8a5cSAndrea Arcangeli EXPORT_SYMBOL_GPL(mmu_notifier_unregister); 38921a92735SSagi Grimberg 390b972216eSPeter Zijlstra /* 391b972216eSPeter Zijlstra * Same as mmu_notifier_unregister but no callback and no srcu synchronization. 392b972216eSPeter Zijlstra */ 393b972216eSPeter Zijlstra void mmu_notifier_unregister_no_release(struct mmu_notifier *mn, 394b972216eSPeter Zijlstra struct mm_struct *mm) 395b972216eSPeter Zijlstra { 396b972216eSPeter Zijlstra spin_lock(&mm->mmu_notifier_mm->lock); 397b972216eSPeter Zijlstra /* 398b972216eSPeter Zijlstra * Can not use list_del_rcu() since __mmu_notifier_release 399b972216eSPeter Zijlstra * can delete it before we hold the lock. 400b972216eSPeter Zijlstra */ 401b972216eSPeter Zijlstra hlist_del_init_rcu(&mn->hlist); 402b972216eSPeter Zijlstra spin_unlock(&mm->mmu_notifier_mm->lock); 403b972216eSPeter Zijlstra 404b972216eSPeter Zijlstra BUG_ON(atomic_read(&mm->mm_count) <= 0); 405b972216eSPeter Zijlstra mmdrop(mm); 406b972216eSPeter Zijlstra } 407b972216eSPeter Zijlstra EXPORT_SYMBOL_GPL(mmu_notifier_unregister_no_release); 408b972216eSPeter Zijlstra 40921a92735SSagi Grimberg static int __init mmu_notifier_init(void) 41021a92735SSagi Grimberg { 41121a92735SSagi Grimberg return init_srcu_struct(&srcu); 41221a92735SSagi Grimberg } 413a64fb3cdSPaul Gortmaker subsys_initcall(mmu_notifier_init); 414