xref: /linux/kernel/bpf/bpf_lru_list.c (revision 68f4e480b089abae26fbab0c38c3df3cbac3d79d)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2016 Facebook
3  */
4 #include <linux/cpumask.h>
5 #include <linux/spinlock.h>
6 #include <linux/percpu.h>
7 
8 #include "bpf_lru_list.h"
9 
10 #define LOCAL_FREE_TARGET		(128)
11 #define LOCAL_NR_SCANS			LOCAL_FREE_TARGET
12 
13 #define PERCPU_FREE_TARGET		(4)
14 #define PERCPU_NR_SCANS			PERCPU_FREE_TARGET
15 
16 #define IS_LOCAL_LIST_TYPE(t)	((t) >= BPF_LOCAL_LIST_T_OFFSET)
17 
18 /* bpf_lru_node helpers */
19 static bool bpf_lru_node_is_ref(const struct bpf_lru_node *node)
20 {
21 	return READ_ONCE(node->ref);
22 }
23 
24 static void bpf_lru_node_clear_ref(struct bpf_lru_node *node)
25 {
26 	WRITE_ONCE(node->ref, 0);
27 }
28 
29 static void bpf_lru_list_count_inc(struct bpf_lru_list *l,
30 				   enum bpf_lru_list_type type)
31 {
32 	if (type < NR_BPF_LRU_LIST_COUNT)
33 		l->counts[type]++;
34 }
35 
36 static void bpf_lru_list_count_dec(struct bpf_lru_list *l,
37 				   enum bpf_lru_list_type type)
38 {
39 	if (type < NR_BPF_LRU_LIST_COUNT)
40 		l->counts[type]--;
41 }
42 
43 static void __bpf_lru_node_move_to_free(struct bpf_lru_list *l,
44 					struct bpf_lru_node *node,
45 					struct list_head *free_list,
46 					enum bpf_lru_list_type tgt_free_type)
47 {
48 	if (WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(node->type)))
49 		return;
50 
51 	/* If the removing node is the next_inactive_rotation candidate,
52 	 * move the next_inactive_rotation pointer also.
53 	 */
54 	if (&node->list == l->next_inactive_rotation)
55 		l->next_inactive_rotation = l->next_inactive_rotation->prev;
56 
57 	bpf_lru_list_count_dec(l, node->type);
58 
59 	node->type = tgt_free_type;
60 	WRITE_ONCE(node->pending_free, 0);
61 	list_move(&node->list, free_list);
62 }
63 
64 /* Move nodes from local list to the LRU list */
65 static void __bpf_lru_node_move_in(struct bpf_lru_list *l,
66 				   struct bpf_lru_node *node,
67 				   enum bpf_lru_list_type tgt_type)
68 {
69 	if (WARN_ON_ONCE(!IS_LOCAL_LIST_TYPE(node->type)) ||
70 	    WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(tgt_type)))
71 		return;
72 
73 	bpf_lru_list_count_inc(l, tgt_type);
74 	node->type = tgt_type;
75 	bpf_lru_node_clear_ref(node);
76 	/* Reset pending_free only when moving to the free list */
77 	if (tgt_type == BPF_LRU_LIST_T_FREE)
78 		WRITE_ONCE(node->pending_free, 0);
79 	list_move(&node->list, &l->lists[tgt_type]);
80 }
81 
82 /* Move nodes between or within active and inactive list (like
83  * active to inactive, inactive to active or tail of active back to
84  * the head of active).
85  */
86 static void __bpf_lru_node_move(struct bpf_lru_list *l,
87 				struct bpf_lru_node *node,
88 				enum bpf_lru_list_type tgt_type)
89 {
90 	if (WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(node->type)) ||
91 	    WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(tgt_type)))
92 		return;
93 
94 	if (node->type != tgt_type) {
95 		bpf_lru_list_count_dec(l, node->type);
96 		bpf_lru_list_count_inc(l, tgt_type);
97 		node->type = tgt_type;
98 	}
99 	bpf_lru_node_clear_ref(node);
100 
101 	/* If the moving node is the next_inactive_rotation candidate,
102 	 * move the next_inactive_rotation pointer also.
103 	 */
104 	if (&node->list == l->next_inactive_rotation)
105 		l->next_inactive_rotation = l->next_inactive_rotation->prev;
106 
107 	list_move(&node->list, &l->lists[tgt_type]);
108 }
109 
110 static bool bpf_lru_list_inactive_low(const struct bpf_lru_list *l)
111 {
112 	return l->counts[BPF_LRU_LIST_T_INACTIVE] <
113 		l->counts[BPF_LRU_LIST_T_ACTIVE];
114 }
115 
116 /* Rotate the active list:
117  * 1. Start from tail
118  * 2. If the node has the ref bit set, it will be rotated
119  *    back to the head of active list with the ref bit cleared.
120  *    Give this node one more chance to survive in the active list.
121  * 3. If the ref bit is not set, move it to the head of the
122  *    inactive list.
123  * 4. It will at most scan nr_scans nodes
124  */
125 static void __bpf_lru_list_rotate_active(struct bpf_lru *lru,
126 					 struct bpf_lru_list *l)
127 {
128 	struct list_head *active = &l->lists[BPF_LRU_LIST_T_ACTIVE];
129 	struct bpf_lru_node *node, *tmp_node, *first_node;
130 	unsigned int i = 0;
131 
132 	first_node = list_first_entry(active, struct bpf_lru_node, list);
133 	list_for_each_entry_safe_reverse(node, tmp_node, active, list) {
134 		if (bpf_lru_node_is_ref(node))
135 			__bpf_lru_node_move(l, node, BPF_LRU_LIST_T_ACTIVE);
136 		else
137 			__bpf_lru_node_move(l, node, BPF_LRU_LIST_T_INACTIVE);
138 
139 		if (++i == lru->nr_scans || node == first_node)
140 			break;
141 	}
142 }
143 
144 /* Rotate the inactive list.  It starts from the next_inactive_rotation
145  * 1. If the node has ref bit set, it will be moved to the head
146  *    of active list with the ref bit cleared.
147  * 2. If the node does not have ref bit set, it will leave it
148  *    at its current location (i.e. do nothing) so that it can
149  *    be considered during the next inactive_shrink.
150  * 3. It will at most scan nr_scans nodes
151  */
152 static void __bpf_lru_list_rotate_inactive(struct bpf_lru *lru,
153 					   struct bpf_lru_list *l)
154 {
155 	struct list_head *inactive = &l->lists[BPF_LRU_LIST_T_INACTIVE];
156 	struct list_head *cur, *last, *next = inactive;
157 	struct bpf_lru_node *node;
158 	unsigned int i = 0;
159 
160 	if (list_empty(inactive))
161 		return;
162 
163 	last = l->next_inactive_rotation->next;
164 	if (last == inactive)
165 		last = last->next;
166 
167 	cur = l->next_inactive_rotation;
168 	while (i < lru->nr_scans) {
169 		if (cur == inactive) {
170 			cur = cur->prev;
171 			continue;
172 		}
173 
174 		node = list_entry(cur, struct bpf_lru_node, list);
175 		next = cur->prev;
176 		if (bpf_lru_node_is_ref(node))
177 			__bpf_lru_node_move(l, node, BPF_LRU_LIST_T_ACTIVE);
178 		if (cur == last)
179 			break;
180 		cur = next;
181 		i++;
182 	}
183 
184 	l->next_inactive_rotation = next;
185 }
186 
187 /* Shrink the inactive list.  It starts from the tail of the
188  * inactive list and only move the nodes without the ref bit
189  * set to the designated free list.
190  */
191 static unsigned int
192 __bpf_lru_list_shrink_inactive(struct bpf_lru *lru,
193 			       struct bpf_lru_list *l,
194 			       unsigned int tgt_nshrink,
195 			       struct list_head *free_list,
196 			       enum bpf_lru_list_type tgt_free_type)
197 {
198 	struct list_head *inactive = &l->lists[BPF_LRU_LIST_T_INACTIVE];
199 	struct bpf_lru_node *node, *tmp_node;
200 	unsigned int nshrinked = 0;
201 	unsigned int i = 0;
202 
203 	list_for_each_entry_safe_reverse(node, tmp_node, inactive, list) {
204 		if (bpf_lru_node_is_ref(node) &&
205 		    !READ_ONCE(node->pending_free)) {
206 			__bpf_lru_node_move(l, node, BPF_LRU_LIST_T_ACTIVE);
207 		} else if (READ_ONCE(node->pending_free) ||
208 			   lru->del_from_htab(lru->del_arg, node)) {
209 			__bpf_lru_node_move_to_free(l, node, free_list,
210 						    tgt_free_type);
211 			if (++nshrinked == tgt_nshrink)
212 				break;
213 		}
214 
215 		if (++i == lru->nr_scans)
216 			break;
217 	}
218 
219 	return nshrinked;
220 }
221 
222 /* 1. Rotate the active list (if needed)
223  * 2. Always rotate the inactive list
224  */
225 static void __bpf_lru_list_rotate(struct bpf_lru *lru, struct bpf_lru_list *l)
226 {
227 	if (bpf_lru_list_inactive_low(l))
228 		__bpf_lru_list_rotate_active(lru, l);
229 
230 	__bpf_lru_list_rotate_inactive(lru, l);
231 }
232 
233 /* Calls __bpf_lru_list_shrink_inactive() to shrink some
234  * ref-bit-cleared nodes and move them to the designated
235  * free list.
236  *
237  * If it cannot get a free node after calling
238  * __bpf_lru_list_shrink_inactive().  It will just remove
239  * one node from either inactive or active list without
240  * honoring the ref-bit.  It prefers inactive list to active
241  * list in this situation.
242  */
243 static unsigned int __bpf_lru_list_shrink(struct bpf_lru *lru,
244 					  struct bpf_lru_list *l,
245 					  unsigned int tgt_nshrink,
246 					  struct list_head *free_list,
247 					  enum bpf_lru_list_type tgt_free_type)
248 
249 {
250 	struct bpf_lru_node *node, *tmp_node;
251 	struct list_head *force_shrink_list;
252 	unsigned int nshrinked;
253 
254 	nshrinked = __bpf_lru_list_shrink_inactive(lru, l, tgt_nshrink,
255 						   free_list, tgt_free_type);
256 	if (nshrinked)
257 		return nshrinked;
258 
259 	/* Do a force shrink by ignoring the reference bit */
260 	if (!list_empty(&l->lists[BPF_LRU_LIST_T_INACTIVE]))
261 		force_shrink_list = &l->lists[BPF_LRU_LIST_T_INACTIVE];
262 	else
263 		force_shrink_list = &l->lists[BPF_LRU_LIST_T_ACTIVE];
264 
265 	list_for_each_entry_safe_reverse(node, tmp_node, force_shrink_list,
266 					 list) {
267 		if (READ_ONCE(node->pending_free) ||
268 		    lru->del_from_htab(lru->del_arg, node)) {
269 			__bpf_lru_node_move_to_free(l, node, free_list,
270 						    tgt_free_type);
271 			return 1;
272 		}
273 	}
274 
275 	return 0;
276 }
277 
278 /* Flush the nodes from the local pending list to the LRU list */
279 static void __local_list_flush(struct bpf_lru_list *l,
280 			       struct bpf_lru_locallist *loc_l)
281 {
282 	struct bpf_lru_node *node, *tmp_node;
283 
284 	list_for_each_entry_safe_reverse(node, tmp_node,
285 					 &loc_l->pending_list, list) {
286 		if (READ_ONCE(node->pending_free))
287 			__bpf_lru_node_move_in(l, node, BPF_LRU_LIST_T_FREE);
288 		else if (bpf_lru_node_is_ref(node))
289 			__bpf_lru_node_move_in(l, node, BPF_LRU_LIST_T_ACTIVE);
290 		else
291 			__bpf_lru_node_move_in(l, node,
292 					       BPF_LRU_LIST_T_INACTIVE);
293 	}
294 }
295 
296 static void bpf_lru_list_push_free(struct bpf_lru_list *l,
297 				   struct bpf_lru_node *node)
298 {
299 	unsigned long flags;
300 
301 	if (WARN_ON_ONCE(IS_LOCAL_LIST_TYPE(node->type)))
302 		return;
303 
304 	if (raw_res_spin_lock_irqsave(&l->lock, flags)) {
305 		WRITE_ONCE(node->pending_free, 1);
306 		return;
307 	}
308 	__bpf_lru_node_move(l, node, BPF_LRU_LIST_T_FREE);
309 	raw_res_spin_unlock_irqrestore(&l->lock, flags);
310 }
311 
312 static void bpf_lru_list_pop_free_to_local(struct bpf_lru *lru,
313 					   struct bpf_lru_locallist *loc_l)
314 {
315 	struct bpf_lru_list *l = &lru->common_lru.lru_list;
316 	struct bpf_lru_node *node, *tmp_node;
317 	unsigned int nfree = 0;
318 	LIST_HEAD(tmp_free);
319 
320 	if (raw_res_spin_lock(&l->lock))
321 		return;
322 
323 	__local_list_flush(l, loc_l);
324 
325 	__bpf_lru_list_rotate(lru, l);
326 
327 	list_for_each_entry_safe(node, tmp_node, &l->lists[BPF_LRU_LIST_T_FREE],
328 				 list) {
329 		__bpf_lru_node_move_to_free(l, node, &tmp_free,
330 					    BPF_LRU_LOCAL_LIST_T_FREE);
331 		if (++nfree == lru->target_free)
332 			break;
333 	}
334 
335 	if (nfree < lru->target_free)
336 		__bpf_lru_list_shrink(lru, l, lru->target_free - nfree,
337 				      &tmp_free,
338 				      BPF_LRU_LOCAL_LIST_T_FREE);
339 
340 	raw_res_spin_unlock(&l->lock);
341 
342 	/*
343 	 * Transfer the harvested nodes from the temporary list_head into
344 	 * the lockless per-CPU free llist.
345 	 */
346 	list_for_each_entry_safe(node, tmp_node, &tmp_free, list) {
347 		list_del(&node->list);
348 		llist_add(&node->llist, &loc_l->free_llist);
349 	}
350 }
351 
352 static void __local_list_add_pending(struct bpf_lru *lru,
353 				     struct bpf_lru_locallist *loc_l,
354 				     int cpu,
355 				     struct bpf_lru_node *node,
356 				     u32 hash)
357 {
358 	*(u32 *)((void *)node + lru->hash_offset) = hash;
359 	node->cpu = cpu;
360 	node->type = BPF_LRU_LOCAL_LIST_T_PENDING;
361 	WRITE_ONCE(node->pending_free, 0);
362 	bpf_lru_node_clear_ref(node);
363 	list_add(&node->list, &loc_l->pending_list);
364 }
365 
366 static struct bpf_lru_node *
367 __local_list_pop_free(struct bpf_lru_locallist *loc_l)
368 {
369 	struct llist_node *llnode;
370 
371 	llnode = llist_del_first(&loc_l->free_llist);
372 	if (!llnode)
373 		return NULL;
374 
375 	return container_of(llnode, struct bpf_lru_node, llist);
376 }
377 
378 static struct bpf_lru_node *
379 __local_list_pop_pending(struct bpf_lru *lru, struct bpf_lru_locallist *loc_l)
380 {
381 	struct bpf_lru_node *node;
382 	bool force = false;
383 
384 ignore_ref:
385 	/* Get from the tail (i.e. older element) of the pending list. */
386 	list_for_each_entry_reverse(node, &loc_l->pending_list, list) {
387 		if ((!bpf_lru_node_is_ref(node) || force) &&
388 		    (READ_ONCE(node->pending_free) ||
389 		     lru->del_from_htab(lru->del_arg, node))) {
390 			list_del(&node->list);
391 			return node;
392 		}
393 	}
394 
395 	if (!force) {
396 		force = true;
397 		goto ignore_ref;
398 	}
399 
400 	return NULL;
401 }
402 
403 static struct bpf_lru_node *bpf_percpu_lru_pop_free(struct bpf_lru *lru,
404 						    u32 hash)
405 {
406 	struct list_head *free_list;
407 	struct bpf_lru_node *node = NULL;
408 	struct bpf_lru_list *l;
409 	unsigned long flags;
410 	int cpu = raw_smp_processor_id();
411 
412 	l = per_cpu_ptr(lru->percpu_lru, cpu);
413 
414 	if (raw_res_spin_lock_irqsave(&l->lock, flags))
415 		return NULL;
416 
417 	__bpf_lru_list_rotate(lru, l);
418 
419 	free_list = &l->lists[BPF_LRU_LIST_T_FREE];
420 	if (list_empty(free_list))
421 		__bpf_lru_list_shrink(lru, l, PERCPU_FREE_TARGET, free_list,
422 				      BPF_LRU_LIST_T_FREE);
423 
424 	if (!list_empty(free_list)) {
425 		node = list_first_entry(free_list, struct bpf_lru_node, list);
426 		*(u32 *)((void *)node + lru->hash_offset) = hash;
427 		bpf_lru_node_clear_ref(node);
428 		__bpf_lru_node_move(l, node, BPF_LRU_LIST_T_INACTIVE);
429 	}
430 
431 	raw_res_spin_unlock_irqrestore(&l->lock, flags);
432 
433 	return node;
434 }
435 
436 static struct bpf_lru_node *bpf_common_lru_pop_free(struct bpf_lru *lru,
437 						    u32 hash)
438 {
439 	struct bpf_lru_locallist *loc_l, *steal_loc_l;
440 	struct bpf_common_lru *clru = &lru->common_lru;
441 	struct bpf_lru_node *node;
442 	int steal, first_steal;
443 	unsigned long flags;
444 	int cpu = raw_smp_processor_id();
445 
446 	loc_l = per_cpu_ptr(clru->local_list, cpu);
447 
448 	if (raw_res_spin_lock_irqsave(&loc_l->lock, flags))
449 		return NULL;
450 
451 	node = __local_list_pop_free(loc_l);
452 	if (!node) {
453 		bpf_lru_list_pop_free_to_local(lru, loc_l);
454 		node = __local_list_pop_free(loc_l);
455 	}
456 
457 	if (node)
458 		__local_list_add_pending(lru, loc_l, cpu, node, hash);
459 
460 	raw_res_spin_unlock_irqrestore(&loc_l->lock, flags);
461 
462 	if (node)
463 		return node;
464 
465 	/*
466 	 * No free nodes found from the local free list and
467 	 * the global LRU list.
468 	 *
469 	 * Steal from the local free/pending list of the
470 	 * current CPU and remote CPU in RR.  It starts
471 	 * with the loc_l->next_steal CPU.
472 	 *
473 	 * Acquire the victim's lock before touching either list. On
474 	 * acquisition failure (rqspinlock AA or timeout) skip the victim
475 	 * and try the next CPU.
476 	 */
477 
478 	first_steal = loc_l->next_steal;
479 	steal = first_steal;
480 	do {
481 		steal_loc_l = per_cpu_ptr(clru->local_list, steal);
482 
483 		if (!raw_res_spin_lock_irqsave(&steal_loc_l->lock, flags)) {
484 			node = __local_list_pop_free(steal_loc_l);
485 			if (!node)
486 				node = __local_list_pop_pending(lru, steal_loc_l);
487 			raw_res_spin_unlock_irqrestore(&steal_loc_l->lock, flags);
488 		}
489 
490 		steal = cpumask_next_wrap(steal, cpu_possible_mask);
491 	} while (!node && steal != first_steal);
492 
493 	loc_l->next_steal = steal;
494 
495 	if (!node)
496 		return NULL;
497 
498 	if (raw_res_spin_lock_irqsave(&loc_l->lock, flags)) {
499 		/*
500 		 * The local pending lock can't be acquired (rqspinlock AA
501 		 * or timeout). Return the stolen node to the per-CPU
502 		 * free_llist instead of orphaning it; the next pop_free on
503 		 * this CPU will pick it up.
504 		 */
505 		node->type = BPF_LRU_LOCAL_LIST_T_FREE;
506 		bpf_lru_node_clear_ref(node);
507 		WRITE_ONCE(node->pending_free, 0);
508 		llist_add(&node->llist, &loc_l->free_llist);
509 		return NULL;
510 	}
511 	__local_list_add_pending(lru, loc_l, cpu, node, hash);
512 	raw_res_spin_unlock_irqrestore(&loc_l->lock, flags);
513 
514 	return node;
515 }
516 
517 struct bpf_lru_node *bpf_lru_pop_free(struct bpf_lru *lru, u32 hash)
518 {
519 	if (lru->percpu)
520 		return bpf_percpu_lru_pop_free(lru, hash);
521 	else
522 		return bpf_common_lru_pop_free(lru, hash);
523 }
524 
525 static void bpf_common_lru_push_free(struct bpf_lru *lru,
526 				     struct bpf_lru_node *node)
527 {
528 	u8 node_type = READ_ONCE(node->type);
529 	unsigned long flags;
530 
531 	if (WARN_ON_ONCE(node_type == BPF_LRU_LIST_T_FREE) ||
532 	    WARN_ON_ONCE(node_type == BPF_LRU_LOCAL_LIST_T_FREE))
533 		return;
534 
535 	if (node_type == BPF_LRU_LOCAL_LIST_T_PENDING) {
536 		struct bpf_lru_locallist *loc_l;
537 
538 		loc_l = per_cpu_ptr(lru->common_lru.local_list, node->cpu);
539 
540 		if (raw_res_spin_lock_irqsave(&loc_l->lock, flags)) {
541 			WRITE_ONCE(node->pending_free, 1);
542 			return;
543 		}
544 
545 		if (unlikely(node->type != BPF_LRU_LOCAL_LIST_T_PENDING)) {
546 			raw_res_spin_unlock_irqrestore(&loc_l->lock,
547 						       flags);
548 			goto check_lru_list;
549 		}
550 
551 		node->type = BPF_LRU_LOCAL_LIST_T_FREE;
552 		bpf_lru_node_clear_ref(node);
553 		list_del(&node->list);
554 
555 		raw_res_spin_unlock_irqrestore(&loc_l->lock, flags);
556 
557 		llist_add(&node->llist, &loc_l->free_llist);
558 		return;
559 	}
560 
561 check_lru_list:
562 	bpf_lru_list_push_free(&lru->common_lru.lru_list, node);
563 }
564 
565 static void bpf_percpu_lru_push_free(struct bpf_lru *lru,
566 				     struct bpf_lru_node *node)
567 {
568 	struct bpf_lru_list *l;
569 	unsigned long flags;
570 
571 	l = per_cpu_ptr(lru->percpu_lru, node->cpu);
572 
573 	if (raw_res_spin_lock_irqsave(&l->lock, flags)) {
574 		WRITE_ONCE(node->pending_free, 1);
575 		return;
576 	}
577 
578 	__bpf_lru_node_move(l, node, BPF_LRU_LIST_T_FREE);
579 
580 	raw_res_spin_unlock_irqrestore(&l->lock, flags);
581 }
582 
583 void bpf_lru_push_free(struct bpf_lru *lru, struct bpf_lru_node *node)
584 {
585 	if (lru->percpu)
586 		bpf_percpu_lru_push_free(lru, node);
587 	else
588 		bpf_common_lru_push_free(lru, node);
589 }
590 
591 static void bpf_common_lru_populate(struct bpf_lru *lru, void *buf,
592 				    u32 node_offset, u32 elem_size,
593 				    u32 nr_elems)
594 {
595 	struct bpf_lru_list *l = &lru->common_lru.lru_list;
596 	u32 i;
597 
598 	for (i = 0; i < nr_elems; i++) {
599 		struct bpf_lru_node *node;
600 
601 		node = (struct bpf_lru_node *)(buf + node_offset);
602 		node->type = BPF_LRU_LIST_T_FREE;
603 		node->pending_free = 0;
604 		bpf_lru_node_clear_ref(node);
605 		list_add(&node->list, &l->lists[BPF_LRU_LIST_T_FREE]);
606 		buf += elem_size;
607 	}
608 
609 	lru->target_free = clamp((nr_elems / num_possible_cpus()) / 2,
610 				 1, LOCAL_FREE_TARGET);
611 }
612 
613 static void bpf_percpu_lru_populate(struct bpf_lru *lru, void *buf,
614 				    u32 node_offset, u32 elem_size,
615 				    u32 nr_elems)
616 {
617 	u32 i, pcpu_entries;
618 	int cpu;
619 	struct bpf_lru_list *l;
620 
621 	pcpu_entries = nr_elems / num_possible_cpus();
622 
623 	i = 0;
624 
625 	for_each_possible_cpu(cpu) {
626 		struct bpf_lru_node *node;
627 
628 		l = per_cpu_ptr(lru->percpu_lru, cpu);
629 again:
630 		node = (struct bpf_lru_node *)(buf + node_offset);
631 		node->cpu = cpu;
632 		node->type = BPF_LRU_LIST_T_FREE;
633 		node->pending_free = 0;
634 		bpf_lru_node_clear_ref(node);
635 		list_add(&node->list, &l->lists[BPF_LRU_LIST_T_FREE]);
636 		i++;
637 		buf += elem_size;
638 		if (i == nr_elems)
639 			break;
640 		if (i % pcpu_entries)
641 			goto again;
642 	}
643 }
644 
645 void bpf_lru_populate(struct bpf_lru *lru, void *buf, u32 node_offset,
646 		      u32 elem_size, u32 nr_elems)
647 {
648 	if (lru->percpu)
649 		bpf_percpu_lru_populate(lru, buf, node_offset, elem_size,
650 					nr_elems);
651 	else
652 		bpf_common_lru_populate(lru, buf, node_offset, elem_size,
653 					nr_elems);
654 }
655 
656 static void bpf_lru_locallist_init(struct bpf_lru_locallist *loc_l, int cpu)
657 {
658 	INIT_LIST_HEAD(&loc_l->pending_list);
659 	init_llist_head(&loc_l->free_llist);
660 
661 	loc_l->next_steal = cpu;
662 
663 	raw_res_spin_lock_init(&loc_l->lock);
664 }
665 
666 static void bpf_lru_list_init(struct bpf_lru_list *l)
667 {
668 	int i;
669 
670 	for (i = 0; i < NR_BPF_LRU_LIST_T; i++)
671 		INIT_LIST_HEAD(&l->lists[i]);
672 
673 	for (i = 0; i < NR_BPF_LRU_LIST_COUNT; i++)
674 		l->counts[i] = 0;
675 
676 	l->next_inactive_rotation = &l->lists[BPF_LRU_LIST_T_INACTIVE];
677 
678 	raw_res_spin_lock_init(&l->lock);
679 }
680 
681 int bpf_lru_init(struct bpf_lru *lru, bool percpu, u32 hash_offset,
682 		 del_from_htab_func del_from_htab, void *del_arg)
683 {
684 	int cpu;
685 
686 	if (percpu) {
687 		lru->percpu_lru = alloc_percpu(struct bpf_lru_list);
688 		if (!lru->percpu_lru)
689 			return -ENOMEM;
690 
691 		for_each_possible_cpu(cpu) {
692 			struct bpf_lru_list *l;
693 
694 			l = per_cpu_ptr(lru->percpu_lru, cpu);
695 			bpf_lru_list_init(l);
696 		}
697 		lru->nr_scans = PERCPU_NR_SCANS;
698 	} else {
699 		struct bpf_common_lru *clru = &lru->common_lru;
700 
701 		clru->local_list = alloc_percpu(struct bpf_lru_locallist);
702 		if (!clru->local_list)
703 			return -ENOMEM;
704 
705 		for_each_possible_cpu(cpu) {
706 			struct bpf_lru_locallist *loc_l;
707 
708 			loc_l = per_cpu_ptr(clru->local_list, cpu);
709 			bpf_lru_locallist_init(loc_l, cpu);
710 		}
711 
712 		bpf_lru_list_init(&clru->lru_list);
713 		lru->nr_scans = LOCAL_NR_SCANS;
714 	}
715 
716 	lru->percpu = percpu;
717 	lru->del_from_htab = del_from_htab;
718 	lru->del_arg = del_arg;
719 	lru->hash_offset = hash_offset;
720 
721 	return 0;
722 }
723 
724 void bpf_lru_destroy(struct bpf_lru *lru)
725 {
726 	if (lru->percpu)
727 		free_percpu(lru->percpu_lru);
728 	else
729 		free_percpu(lru->common_lru.local_list);
730 }
731