1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2 /*
3 * Copyright(c) 2020 Cornelis Networks, Inc.
4 * Copyright(c) 2016 - 2017 Intel Corporation.
5 */
6
7 #include <linux/list.h>
8 #include <linux/rculist.h>
9 #include <linux/mmu_notifier.h>
10 #include <linux/interval_tree_generic.h>
11 #include <linux/sched/mm.h>
12
13 #include "mmu_rb.h"
14 #include "trace.h"
15
16 static unsigned long mmu_node_start(struct mmu_rb_node *);
17 static unsigned long mmu_node_last(struct mmu_rb_node *);
18 static int mmu_notifier_range_start(struct mmu_notifier *,
19 const struct mmu_notifier_range *);
20 static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *,
21 unsigned long, unsigned long);
22 static void release_immediate(struct kref *refcount);
23 static void handle_remove(struct work_struct *work);
24
25 static const struct mmu_notifier_ops mn_opts = {
26 .invalidate_range_start = mmu_notifier_range_start,
27 };
28
29 INTERVAL_TREE_DEFINE(struct mmu_rb_node, node, unsigned long, __last,
30 mmu_node_start, mmu_node_last, static, __mmu_int_rb);
31
mmu_node_start(struct mmu_rb_node * node)32 static unsigned long mmu_node_start(struct mmu_rb_node *node)
33 {
34 return node->addr & PAGE_MASK;
35 }
36
mmu_node_last(struct mmu_rb_node * node)37 static unsigned long mmu_node_last(struct mmu_rb_node *node)
38 {
39 return PAGE_ALIGN(node->addr + node->len) - 1;
40 }
41
hfi1_mmu_rb_register(void * ops_arg,const struct mmu_rb_ops * ops,struct workqueue_struct * wq,struct mmu_rb_handler ** handler)42 int hfi1_mmu_rb_register(void *ops_arg,
43 const struct mmu_rb_ops *ops,
44 struct workqueue_struct *wq,
45 struct mmu_rb_handler **handler)
46 {
47 struct mmu_rb_handler *h;
48 void *free_ptr;
49 int ret;
50
51 free_ptr = kzalloc(sizeof(*h) + cache_line_size() - 1, GFP_KERNEL);
52 if (!free_ptr)
53 return -ENOMEM;
54
55 h = PTR_ALIGN(free_ptr, cache_line_size());
56 h->root = RB_ROOT_CACHED;
57 h->ops = ops;
58 h->ops_arg = ops_arg;
59 INIT_HLIST_NODE(&h->mn.hlist);
60 spin_lock_init(&h->lock);
61 h->mn.ops = &mn_opts;
62 INIT_WORK(&h->del_work, handle_remove);
63 INIT_LIST_HEAD(&h->del_list);
64 INIT_LIST_HEAD(&h->lru_list);
65 h->wq = wq;
66 h->free_ptr = free_ptr;
67
68 ret = mmu_notifier_register(&h->mn, current->mm);
69 if (ret) {
70 kfree(free_ptr);
71 return ret;
72 }
73
74 *handler = h;
75 return 0;
76 }
77
hfi1_mmu_rb_unregister(struct mmu_rb_handler * handler)78 void hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler)
79 {
80 struct mmu_rb_node *rbnode;
81 struct rb_node *node;
82 unsigned long flags;
83 struct list_head del_list;
84
85 /* Prevent freeing of mm until we are completely finished. */
86 mmgrab(handler->mn.mm);
87
88 /* Unregister first so we don't get any more notifications. */
89 mmu_notifier_unregister(&handler->mn, handler->mn.mm);
90
91 /*
92 * Make sure the wq delete handler is finished running. It will not
93 * be triggered once the mmu notifiers are unregistered above.
94 */
95 flush_work(&handler->del_work);
96
97 INIT_LIST_HEAD(&del_list);
98
99 spin_lock_irqsave(&handler->lock, flags);
100 while ((node = rb_first_cached(&handler->root))) {
101 rbnode = rb_entry(node, struct mmu_rb_node, node);
102 rb_erase_cached(node, &handler->root);
103 /* move from LRU list to delete list */
104 list_move(&rbnode->list, &del_list);
105 }
106 spin_unlock_irqrestore(&handler->lock, flags);
107
108 while (!list_empty(&del_list)) {
109 rbnode = list_first_entry(&del_list, struct mmu_rb_node, list);
110 list_del(&rbnode->list);
111 kref_put(&rbnode->refcount, release_immediate);
112 }
113
114 /* Now the mm may be freed. */
115 mmdrop(handler->mn.mm);
116
117 kfree(handler->free_ptr);
118 }
119
hfi1_mmu_rb_insert(struct mmu_rb_handler * handler,struct mmu_rb_node * mnode)120 int hfi1_mmu_rb_insert(struct mmu_rb_handler *handler,
121 struct mmu_rb_node *mnode)
122 {
123 struct mmu_rb_node *node;
124 unsigned long flags;
125 int ret = 0;
126
127 trace_hfi1_mmu_rb_insert(mnode);
128
129 if (current->mm != handler->mn.mm)
130 return -EPERM;
131
132 spin_lock_irqsave(&handler->lock, flags);
133 node = __mmu_rb_search(handler, mnode->addr, mnode->len);
134 if (node) {
135 ret = -EEXIST;
136 goto unlock;
137 }
138 __mmu_int_rb_insert(mnode, &handler->root);
139 list_add_tail(&mnode->list, &handler->lru_list);
140 mnode->handler = handler;
141 unlock:
142 spin_unlock_irqrestore(&handler->lock, flags);
143 return ret;
144 }
145
146 /* Caller must hold handler lock */
hfi1_mmu_rb_get_first(struct mmu_rb_handler * handler,unsigned long addr,unsigned long len)147 struct mmu_rb_node *hfi1_mmu_rb_get_first(struct mmu_rb_handler *handler,
148 unsigned long addr, unsigned long len)
149 {
150 struct mmu_rb_node *node;
151
152 trace_hfi1_mmu_rb_search(addr, len);
153 node = __mmu_int_rb_iter_first(&handler->root, addr, (addr + len) - 1);
154 if (node)
155 list_move_tail(&node->list, &handler->lru_list);
156 return node;
157 }
158
159 /* Caller must hold handler lock */
__mmu_rb_search(struct mmu_rb_handler * handler,unsigned long addr,unsigned long len)160 static struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler,
161 unsigned long addr,
162 unsigned long len)
163 {
164 struct mmu_rb_node *node = NULL;
165
166 trace_hfi1_mmu_rb_search(addr, len);
167 if (!handler->ops->filter) {
168 node = __mmu_int_rb_iter_first(&handler->root, addr,
169 (addr + len) - 1);
170 } else {
171 for (node = __mmu_int_rb_iter_first(&handler->root, addr,
172 (addr + len) - 1);
173 node;
174 node = __mmu_int_rb_iter_next(node, addr,
175 (addr + len) - 1)) {
176 if (handler->ops->filter(node, addr, len))
177 return node;
178 }
179 }
180 return node;
181 }
182
183 /*
184 * Must NOT call while holding mnode->handler->lock.
185 * mnode->handler->ops->remove() may sleep and mnode->handler->lock is a
186 * spinlock.
187 */
release_immediate(struct kref * refcount)188 static void release_immediate(struct kref *refcount)
189 {
190 struct mmu_rb_node *mnode =
191 container_of(refcount, struct mmu_rb_node, refcount);
192 trace_hfi1_mmu_release_node(mnode);
193 mnode->handler->ops->remove(mnode->handler->ops_arg, mnode);
194 }
195
196 /* Caller must hold mnode->handler->lock */
release_nolock(struct kref * refcount)197 static void release_nolock(struct kref *refcount)
198 {
199 struct mmu_rb_node *mnode =
200 container_of(refcount, struct mmu_rb_node, refcount);
201 list_move(&mnode->list, &mnode->handler->del_list);
202 queue_work(mnode->handler->wq, &mnode->handler->del_work);
203 }
204
205 /*
206 * struct mmu_rb_node->refcount kref_put() callback.
207 * Adds mmu_rb_node to mmu_rb_node->handler->del_list and queues
208 * handler->del_work on handler->wq.
209 * Does not remove mmu_rb_node from handler->lru_list or handler->rb_root.
210 * Acquires mmu_rb_node->handler->lock; do not call while already holding
211 * handler->lock.
212 */
hfi1_mmu_rb_release(struct kref * refcount)213 void hfi1_mmu_rb_release(struct kref *refcount)
214 {
215 struct mmu_rb_node *mnode =
216 container_of(refcount, struct mmu_rb_node, refcount);
217 struct mmu_rb_handler *handler = mnode->handler;
218 unsigned long flags;
219
220 spin_lock_irqsave(&handler->lock, flags);
221 list_move(&mnode->list, &mnode->handler->del_list);
222 spin_unlock_irqrestore(&handler->lock, flags);
223 queue_work(handler->wq, &handler->del_work);
224 }
225
hfi1_mmu_rb_evict(struct mmu_rb_handler * handler,void * evict_arg)226 void hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg)
227 {
228 struct mmu_rb_node *rbnode, *ptr;
229 struct list_head del_list;
230 unsigned long flags;
231 bool stop = false;
232
233 if (current->mm != handler->mn.mm)
234 return;
235
236 INIT_LIST_HEAD(&del_list);
237
238 spin_lock_irqsave(&handler->lock, flags);
239 list_for_each_entry_safe(rbnode, ptr, &handler->lru_list, list) {
240 /* refcount == 1 implies mmu_rb_handler has only rbnode ref */
241 if (kref_read(&rbnode->refcount) > 1)
242 continue;
243
244 if (handler->ops->evict(handler->ops_arg, rbnode, evict_arg,
245 &stop)) {
246 __mmu_int_rb_remove(rbnode, &handler->root);
247 /* move from LRU list to delete list */
248 list_move(&rbnode->list, &del_list);
249 }
250 if (stop)
251 break;
252 }
253 spin_unlock_irqrestore(&handler->lock, flags);
254
255 list_for_each_entry_safe(rbnode, ptr, &del_list, list) {
256 trace_hfi1_mmu_rb_evict(rbnode);
257 kref_put(&rbnode->refcount, release_immediate);
258 }
259 }
260
mmu_notifier_range_start(struct mmu_notifier * mn,const struct mmu_notifier_range * range)261 static int mmu_notifier_range_start(struct mmu_notifier *mn,
262 const struct mmu_notifier_range *range)
263 {
264 struct mmu_rb_handler *handler =
265 container_of(mn, struct mmu_rb_handler, mn);
266 struct rb_root_cached *root = &handler->root;
267 struct mmu_rb_node *node, *ptr = NULL;
268 unsigned long flags;
269
270 spin_lock_irqsave(&handler->lock, flags);
271 for (node = __mmu_int_rb_iter_first(root, range->start, range->end-1);
272 node; node = ptr) {
273 /* Guard against node removal. */
274 ptr = __mmu_int_rb_iter_next(node, range->start,
275 range->end - 1);
276 trace_hfi1_mmu_mem_invalidate(node);
277 /* Remove from rb tree and lru_list. */
278 __mmu_int_rb_remove(node, root);
279 list_del_init(&node->list);
280 kref_put(&node->refcount, release_nolock);
281 }
282 spin_unlock_irqrestore(&handler->lock, flags);
283
284 return 0;
285 }
286
287 /*
288 * Work queue function to remove all nodes that have been queued up to
289 * be removed. The key feature is that mm->mmap_lock is not being held
290 * and the remove callback can sleep while taking it, if needed.
291 */
handle_remove(struct work_struct * work)292 static void handle_remove(struct work_struct *work)
293 {
294 struct mmu_rb_handler *handler = container_of(work,
295 struct mmu_rb_handler,
296 del_work);
297 struct list_head del_list;
298 unsigned long flags;
299 struct mmu_rb_node *node;
300
301 /* remove anything that is queued to get removed */
302 spin_lock_irqsave(&handler->lock, flags);
303 list_replace_init(&handler->del_list, &del_list);
304 spin_unlock_irqrestore(&handler->lock, flags);
305
306 while (!list_empty(&del_list)) {
307 node = list_first_entry(&del_list, struct mmu_rb_node, list);
308 list_del(&node->list);
309 trace_hfi1_mmu_release_node(node);
310 handler->ops->remove(handler->ops_arg, node);
311 }
312 }
313