xref: /linux/net/sunrpc/cache.c (revision af81cb247ca94e4bcfea31ea862cf3aaf955a503)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * net/sunrpc/cache.c
4  *
5  * Generic code for various authentication-related caches
6  * used by sunrpc clients and servers.
7  *
8  * Copyright (C) 2002 Neil Brown <neilb@cse.unsw.edu.au>
9  */
10 
11 #include <linux/types.h>
12 #include <linux/fs.h>
13 #include <linux/file.h>
14 #include <linux/hex.h>
15 #include <linux/slab.h>
16 #include <linux/signal.h>
17 #include <linux/sched.h>
18 #include <linux/kmod.h>
19 #include <linux/list.h>
20 #include <linux/module.h>
21 #include <linux/ctype.h>
22 #include <linux/string_helpers.h>
23 #include <linux/uaccess.h>
24 #include <linux/poll.h>
25 #include <linux/seq_file.h>
26 #include <linux/proc_fs.h>
27 #include <linux/net.h>
28 #include <linux/workqueue.h>
29 #include <linux/mutex.h>
30 #include <linux/pagemap.h>
31 #include <asm/ioctls.h>
32 #include <linux/sunrpc/types.h>
33 #include <linux/sunrpc/cache.h>
34 #include <linux/sunrpc/stats.h>
35 #include <linux/sunrpc/rpc_pipe_fs.h>
36 #include <net/genetlink.h>
37 #include <trace/events/sunrpc.h>
38 
39 #include "netns.h"
40 #include "netlink.h"
41 #include "fail.h"
42 
43 #define	 RPCDBG_FACILITY RPCDBG_CACHE
44 
45 static bool cache_defer_req(struct cache_req *req, struct cache_head *item);
46 static void cache_revisit_request(struct cache_head *item);
47 
48 static void cache_init(struct cache_head *h, struct cache_detail *detail)
49 {
50 	time64_t now = seconds_since_boot();
51 	INIT_HLIST_NODE(&h->cache_list);
52 	h->flags = 0;
53 	kref_init(&h->ref);
54 	h->expiry_time = now + CACHE_NEW_EXPIRY;
55 	if (now <= detail->flush_time)
56 		/* ensure it isn't already expired */
57 		now = detail->flush_time + 1;
58 	h->last_refresh = now;
59 }
60 
61 static void cache_fresh_unlocked(struct cache_head *head,
62 				struct cache_detail *detail);
63 
64 static struct cache_head *sunrpc_cache_find_rcu(struct cache_detail *detail,
65 						struct cache_head *key,
66 						int hash)
67 {
68 	struct hlist_head *head = &detail->hash_table[hash];
69 	struct cache_head *tmp;
70 
71 	rcu_read_lock();
72 	hlist_for_each_entry_rcu(tmp, head, cache_list) {
73 		if (!detail->match(tmp, key))
74 			continue;
75 		if (test_bit(CACHE_VALID, &tmp->flags) &&
76 		    cache_is_expired(detail, tmp))
77 			continue;
78 		tmp = cache_get_rcu(tmp);
79 		rcu_read_unlock();
80 		return tmp;
81 	}
82 	rcu_read_unlock();
83 	return NULL;
84 }
85 
86 static void sunrpc_begin_cache_remove_entry(struct cache_head *ch,
87 					    struct cache_detail *cd)
88 {
89 	/* Must be called under cd->hash_lock */
90 	hlist_del_init_rcu(&ch->cache_list);
91 	set_bit(CACHE_CLEANED, &ch->flags);
92 	cd->entries --;
93 }
94 
95 static void sunrpc_end_cache_remove_entry(struct cache_head *ch,
96 					  struct cache_detail *cd)
97 {
98 	cache_fresh_unlocked(ch, cd);
99 	cache_put(ch, cd);
100 }
101 
102 static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail,
103 						 struct cache_head *key,
104 						 int hash)
105 {
106 	struct cache_head *new, *tmp, *freeme = NULL;
107 	struct hlist_head *head = &detail->hash_table[hash];
108 
109 	new = detail->alloc();
110 	if (!new)
111 		return NULL;
112 	/* must fully initialise 'new', else
113 	 * we might get lose if we need to
114 	 * cache_put it soon.
115 	 */
116 	cache_init(new, detail);
117 	detail->init(new, key);
118 
119 	spin_lock(&detail->hash_lock);
120 
121 	/* check if entry appeared while we slept */
122 	hlist_for_each_entry_rcu(tmp, head, cache_list,
123 				 lockdep_is_held(&detail->hash_lock)) {
124 		if (!detail->match(tmp, key))
125 			continue;
126 		if (test_bit(CACHE_VALID, &tmp->flags) &&
127 		    cache_is_expired(detail, tmp)) {
128 			sunrpc_begin_cache_remove_entry(tmp, detail);
129 			trace_cache_entry_expired(detail, tmp);
130 			freeme = tmp;
131 			break;
132 		}
133 		cache_get(tmp);
134 		spin_unlock(&detail->hash_lock);
135 		cache_put(new, detail);
136 		return tmp;
137 	}
138 
139 	cache_get(new);
140 	hlist_add_head_rcu(&new->cache_list, head);
141 	detail->entries++;
142 	if (detail->nextcheck > new->expiry_time)
143 		detail->nextcheck = new->expiry_time + 1;
144 	spin_unlock(&detail->hash_lock);
145 
146 	if (freeme)
147 		sunrpc_end_cache_remove_entry(freeme, detail);
148 	return new;
149 }
150 
151 struct cache_head *sunrpc_cache_lookup_rcu(struct cache_detail *detail,
152 					   struct cache_head *key, int hash)
153 {
154 	struct cache_head *ret;
155 
156 	ret = sunrpc_cache_find_rcu(detail, key, hash);
157 	if (ret)
158 		return ret;
159 	/* Didn't find anything, insert an empty entry */
160 	return sunrpc_cache_add_entry(detail, key, hash);
161 }
162 EXPORT_SYMBOL_GPL(sunrpc_cache_lookup_rcu);
163 
164 static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch);
165 
166 static void cache_fresh_locked(struct cache_head *head, time64_t expiry,
167 			       struct cache_detail *detail)
168 {
169 	time64_t now = seconds_since_boot();
170 	if (now <= detail->flush_time)
171 		/* ensure it isn't immediately treated as expired */
172 		now = detail->flush_time + 1;
173 	head->expiry_time = expiry;
174 	head->last_refresh = now;
175 	smp_wmb(); /* paired with smp_rmb() in cache_is_valid() */
176 	set_bit(CACHE_VALID, &head->flags);
177 }
178 
179 static void cache_fresh_unlocked(struct cache_head *head,
180 				 struct cache_detail *detail)
181 {
182 	if (test_and_clear_bit(CACHE_PENDING, &head->flags)) {
183 		cache_revisit_request(head);
184 		cache_dequeue(detail, head);
185 	}
186 }
187 
188 static void cache_make_negative(struct cache_detail *detail,
189 				struct cache_head *h)
190 {
191 	set_bit(CACHE_NEGATIVE, &h->flags);
192 	trace_cache_entry_make_negative(detail, h);
193 }
194 
195 static void cache_entry_update(struct cache_detail *detail,
196 			       struct cache_head *h,
197 			       struct cache_head *new)
198 {
199 	if (!test_bit(CACHE_NEGATIVE, &new->flags)) {
200 		detail->update(h, new);
201 		trace_cache_entry_update(detail, h);
202 	} else {
203 		cache_make_negative(detail, h);
204 	}
205 }
206 
207 struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
208 				       struct cache_head *new, struct cache_head *old, int hash)
209 {
210 	/* The 'old' entry is to be replaced by 'new'.
211 	 * If 'old' is not VALID, we update it directly,
212 	 * otherwise we need to replace it
213 	 */
214 	struct cache_head *tmp;
215 
216 	if (!test_bit(CACHE_VALID, &old->flags)) {
217 		spin_lock(&detail->hash_lock);
218 		if (!test_bit(CACHE_VALID, &old->flags)) {
219 			cache_entry_update(detail, old, new);
220 			cache_fresh_locked(old, new->expiry_time, detail);
221 			spin_unlock(&detail->hash_lock);
222 			cache_fresh_unlocked(old, detail);
223 			return old;
224 		}
225 		spin_unlock(&detail->hash_lock);
226 	}
227 	/* We need to insert a new entry */
228 	tmp = detail->alloc();
229 	if (!tmp) {
230 		cache_put(old, detail);
231 		return NULL;
232 	}
233 	cache_init(tmp, detail);
234 	detail->init(tmp, old);
235 
236 	spin_lock(&detail->hash_lock);
237 	cache_entry_update(detail, tmp, new);
238 	cache_get(tmp);
239 	hlist_add_head_rcu(&tmp->cache_list, &detail->hash_table[hash]);
240 	detail->entries++;
241 	cache_fresh_locked(tmp, new->expiry_time, detail);
242 	cache_fresh_locked(old, 0, detail);
243 	spin_unlock(&detail->hash_lock);
244 	cache_fresh_unlocked(tmp, detail);
245 	cache_fresh_unlocked(old, detail);
246 	cache_put(old, detail);
247 	return tmp;
248 }
249 EXPORT_SYMBOL_GPL(sunrpc_cache_update);
250 
251 static inline int cache_is_valid(struct cache_head *h)
252 {
253 	if (!test_bit(CACHE_VALID, &h->flags))
254 		return -EAGAIN;
255 	else {
256 		/* entry is valid */
257 		if (test_bit(CACHE_NEGATIVE, &h->flags))
258 			return -ENOENT;
259 		else {
260 			/*
261 			 * In combination with write barrier in
262 			 * sunrpc_cache_update, ensures that anyone
263 			 * using the cache entry after this sees the
264 			 * updated contents:
265 			 */
266 			smp_rmb();
267 			return 0;
268 		}
269 	}
270 }
271 
272 static int try_to_negate_entry(struct cache_detail *detail, struct cache_head *h)
273 {
274 	int rv;
275 
276 	spin_lock(&detail->hash_lock);
277 	rv = cache_is_valid(h);
278 	if (rv == -EAGAIN) {
279 		cache_make_negative(detail, h);
280 		cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY,
281 				   detail);
282 		rv = -ENOENT;
283 	}
284 	spin_unlock(&detail->hash_lock);
285 	cache_fresh_unlocked(h, detail);
286 	return rv;
287 }
288 
289 int cache_check_rcu(struct cache_detail *detail,
290 		    struct cache_head *h, struct cache_req *rqstp)
291 {
292 	int rv;
293 	time64_t refresh_age, age;
294 
295 	/* First decide return status as best we can */
296 	rv = cache_is_valid(h);
297 
298 	/* now see if we want to start an upcall */
299 	refresh_age = (h->expiry_time - h->last_refresh);
300 	age = seconds_since_boot() - h->last_refresh;
301 
302 	if (rqstp == NULL) {
303 		if (rv == -EAGAIN)
304 			rv = -ENOENT;
305 	} else if (rv == -EAGAIN ||
306 		   (h->expiry_time != 0 && age > refresh_age/2)) {
307 		dprintk("RPC:       Want update, refage=%lld, age=%lld\n",
308 				refresh_age, age);
309 		switch (detail->cache_upcall(detail, h)) {
310 		case -EINVAL:
311 			rv = try_to_negate_entry(detail, h);
312 			break;
313 		case -EAGAIN:
314 			cache_fresh_unlocked(h, detail);
315 			break;
316 		}
317 	}
318 
319 	if (rv == -EAGAIN) {
320 		if (!cache_defer_req(rqstp, h)) {
321 			/*
322 			 * Request was not deferred; handle it as best
323 			 * we can ourselves:
324 			 */
325 			rv = cache_is_valid(h);
326 			if (rv == -EAGAIN)
327 				rv = -ETIMEDOUT;
328 		}
329 	}
330 
331 	return rv;
332 }
333 EXPORT_SYMBOL_GPL(cache_check_rcu);
334 
335 /*
336  * This is the generic cache management routine for all
337  * the authentication caches.
338  * It checks the currency of a cache item and will (later)
339  * initiate an upcall to fill it if needed.
340  *
341  *
342  * Returns 0 if the cache_head can be used, or cache_puts it and returns
343  * -EAGAIN if upcall is pending and request has been queued
344  * -ETIMEDOUT if upcall failed or request could not be queue or
345  *           upcall completed but item is still invalid (implying that
346  *           the cache item has been replaced with a newer one).
347  * -ENOENT if cache entry was negative
348  */
349 int cache_check(struct cache_detail *detail,
350 		struct cache_head *h, struct cache_req *rqstp)
351 {
352 	int rv;
353 
354 	rv = cache_check_rcu(detail, h, rqstp);
355 	if (rv)
356 		cache_put(h, detail);
357 	return rv;
358 }
359 EXPORT_SYMBOL_GPL(cache_check);
360 
361 /*
362  * caches need to be periodically cleaned.
363  * For this we maintain a list of cache_detail and
364  * a current pointer into that list and into the table
365  * for that entry.
366  *
367  * Each time cache_clean is called it finds the next non-empty entry
368  * in the current table and walks the list in that entry
369  * looking for entries that can be removed.
370  *
371  * An entry gets removed if:
372  * - The expiry is before current time
373  * - The last_refresh time is before the flush_time for that cache
374  *
375  * later we might drop old entries with non-NEVER expiry if that table
376  * is getting 'full' for some definition of 'full'
377  *
378  * The question of "how often to scan a table" is an interesting one
379  * and is answered in part by the use of the "nextcheck" field in the
380  * cache_detail.
381  * When a scan of a table begins, the nextcheck field is set to a time
382  * that is well into the future.
383  * While scanning, if an expiry time is found that is earlier than the
384  * current nextcheck time, nextcheck is set to that expiry time.
385  * If the flush_time is ever set to a time earlier than the nextcheck
386  * time, the nextcheck time is then set to that flush_time.
387  *
388  * A table is then only scanned if the current time is at least
389  * the nextcheck time.
390  *
391  */
392 
393 static LIST_HEAD(cache_list);
394 static DEFINE_SPINLOCK(cache_list_lock);
395 static struct cache_detail *current_detail;
396 static int current_index;
397 
398 static void do_cache_clean(struct work_struct *work);
399 static struct delayed_work cache_cleaner;
400 
401 void sunrpc_init_cache_detail(struct cache_detail *cd)
402 {
403 	spin_lock_init(&cd->hash_lock);
404 	INIT_LIST_HEAD(&cd->requests);
405 	INIT_LIST_HEAD(&cd->readers);
406 	spin_lock_init(&cd->queue_lock);
407 	init_waitqueue_head(&cd->queue_wait);
408 	cd->next_seqno = 1;
409 	spin_lock(&cache_list_lock);
410 	cd->nextcheck = 0;
411 	cd->entries = 0;
412 	atomic_set(&cd->writers, 0);
413 	cd->last_close = 0;
414 	cd->last_warn = -1;
415 	list_add(&cd->others, &cache_list);
416 	spin_unlock(&cache_list_lock);
417 
418 	/* start the cleaning process */
419 	queue_delayed_work(system_power_efficient_wq, &cache_cleaner, 0);
420 }
421 EXPORT_SYMBOL_GPL(sunrpc_init_cache_detail);
422 
423 void sunrpc_destroy_cache_detail(struct cache_detail *cd)
424 {
425 	cache_purge(cd);
426 	spin_lock(&cache_list_lock);
427 	spin_lock(&cd->hash_lock);
428 	if (current_detail == cd)
429 		current_detail = NULL;
430 	list_del_init(&cd->others);
431 	spin_unlock(&cd->hash_lock);
432 	spin_unlock(&cache_list_lock);
433 	if (list_empty(&cache_list)) {
434 		/* module must be being unloaded so its safe to kill the worker */
435 		cancel_delayed_work_sync(&cache_cleaner);
436 	}
437 }
438 EXPORT_SYMBOL_GPL(sunrpc_destroy_cache_detail);
439 
440 /* clean cache tries to find something to clean
441  * and cleans it.
442  * It returns 1 if it cleaned something,
443  *            0 if it didn't find anything this time
444  *           -1 if it fell off the end of the list.
445  */
446 static int cache_clean(void)
447 {
448 	int rv = 0;
449 	struct list_head *next;
450 
451 	spin_lock(&cache_list_lock);
452 
453 	/* find a suitable table if we don't already have one */
454 	while (current_detail == NULL ||
455 	    current_index >= current_detail->hash_size) {
456 		if (current_detail)
457 			next = current_detail->others.next;
458 		else
459 			next = cache_list.next;
460 		if (next == &cache_list) {
461 			current_detail = NULL;
462 			spin_unlock(&cache_list_lock);
463 			return -1;
464 		}
465 		current_detail = list_entry(next, struct cache_detail, others);
466 		if (current_detail->nextcheck > seconds_since_boot())
467 			current_index = current_detail->hash_size;
468 		else {
469 			current_index = 0;
470 			current_detail->nextcheck = seconds_since_boot()+30*60;
471 		}
472 	}
473 
474 	spin_lock(&current_detail->hash_lock);
475 
476 	/* find a non-empty bucket in the table */
477 	while (current_index < current_detail->hash_size &&
478 	       hlist_empty(&current_detail->hash_table[current_index]))
479 		current_index++;
480 
481 	/* find a cleanable entry in the bucket and clean it, or set to next bucket */
482 	if (current_index < current_detail->hash_size) {
483 		struct cache_head *ch = NULL;
484 		struct cache_detail *d;
485 		struct hlist_head *head;
486 		struct hlist_node *tmp;
487 
488 		/* Ok, now to clean this strand */
489 		head = &current_detail->hash_table[current_index];
490 		hlist_for_each_entry_safe(ch, tmp, head, cache_list) {
491 			if (current_detail->nextcheck > ch->expiry_time)
492 				current_detail->nextcheck = ch->expiry_time+1;
493 			if (!cache_is_expired(current_detail, ch))
494 				continue;
495 
496 			sunrpc_begin_cache_remove_entry(ch, current_detail);
497 			trace_cache_entry_expired(current_detail, ch);
498 			rv = 1;
499 			break;
500 		}
501 
502 		spin_unlock(&current_detail->hash_lock);
503 		d = current_detail;
504 		if (!ch)
505 			current_index ++;
506 		spin_unlock(&cache_list_lock);
507 		if (ch)
508 			sunrpc_end_cache_remove_entry(ch, d);
509 	} else {
510 		spin_unlock(&current_detail->hash_lock);
511 		spin_unlock(&cache_list_lock);
512 	}
513 
514 	return rv;
515 }
516 
517 /*
518  * We want to regularly clean the cache, so we need to schedule some work ...
519  */
520 static void do_cache_clean(struct work_struct *work)
521 {
522 	int delay;
523 
524 	if (list_empty(&cache_list))
525 		return;
526 
527 	if (cache_clean() == -1)
528 		delay = round_jiffies_relative(30*HZ);
529 	else
530 		delay = 5;
531 
532 	queue_delayed_work(system_power_efficient_wq, &cache_cleaner, delay);
533 }
534 
535 
536 /*
537  * Clean all caches promptly.  This just calls cache_clean
538  * repeatedly until we are sure that every cache has had a chance to
539  * be fully cleaned
540  */
541 void cache_flush(void)
542 {
543 	while (cache_clean() != -1)
544 		cond_resched();
545 	while (cache_clean() != -1)
546 		cond_resched();
547 }
548 EXPORT_SYMBOL_GPL(cache_flush);
549 
550 void cache_purge(struct cache_detail *detail)
551 {
552 	struct cache_head *ch = NULL;
553 	struct hlist_head *head = NULL;
554 	int i = 0;
555 
556 	spin_lock(&detail->hash_lock);
557 	if (!detail->entries) {
558 		spin_unlock(&detail->hash_lock);
559 		return;
560 	}
561 
562 	dprintk("RPC: %d entries in %s cache\n", detail->entries, detail->name);
563 	for (i = 0; i < detail->hash_size; i++) {
564 		head = &detail->hash_table[i];
565 		while (!hlist_empty(head)) {
566 			ch = hlist_entry(head->first, struct cache_head,
567 					 cache_list);
568 			sunrpc_begin_cache_remove_entry(ch, detail);
569 			spin_unlock(&detail->hash_lock);
570 			sunrpc_end_cache_remove_entry(ch, detail);
571 			spin_lock(&detail->hash_lock);
572 		}
573 	}
574 	spin_unlock(&detail->hash_lock);
575 }
576 EXPORT_SYMBOL_GPL(cache_purge);
577 
578 
579 /*
580  * Deferral and Revisiting of Requests.
581  *
582  * If a cache lookup finds a pending entry, we
583  * need to defer the request and revisit it later.
584  * All deferred requests are stored in a hash table,
585  * indexed by "struct cache_head *".
586  * As it may be wasteful to store a whole request
587  * structure, we allow the request to provide a
588  * deferred form, which must contain a
589  * 'struct cache_deferred_req'
590  * This cache_deferred_req contains a method to allow
591  * it to be revisited when cache info is available
592  */
593 
594 #define	DFR_HASHSIZE	(PAGE_SIZE/sizeof(struct list_head))
595 #define	DFR_HASH(item)	((((long)item)>>4 ^ (((long)item)>>13)) % DFR_HASHSIZE)
596 
597 #define	DFR_MAX	300	/* ??? */
598 
599 static DEFINE_SPINLOCK(cache_defer_lock);
600 static LIST_HEAD(cache_defer_list);
601 static struct hlist_head cache_defer_hash[DFR_HASHSIZE];
602 static int cache_defer_cnt;
603 
604 static void __unhash_deferred_req(struct cache_deferred_req *dreq)
605 {
606 	hlist_del_init(&dreq->hash);
607 	if (!list_empty(&dreq->recent)) {
608 		list_del_init(&dreq->recent);
609 		cache_defer_cnt--;
610 	}
611 }
612 
613 static void __hash_deferred_req(struct cache_deferred_req *dreq, struct cache_head *item)
614 {
615 	int hash = DFR_HASH(item);
616 
617 	INIT_LIST_HEAD(&dreq->recent);
618 	hlist_add_head(&dreq->hash, &cache_defer_hash[hash]);
619 }
620 
621 static void setup_deferral(struct cache_deferred_req *dreq,
622 			   struct cache_head *item,
623 			   int count_me)
624 {
625 
626 	dreq->item = item;
627 
628 	spin_lock(&cache_defer_lock);
629 
630 	__hash_deferred_req(dreq, item);
631 
632 	if (count_me) {
633 		cache_defer_cnt++;
634 		list_add(&dreq->recent, &cache_defer_list);
635 	}
636 
637 	spin_unlock(&cache_defer_lock);
638 
639 }
640 
641 struct thread_deferred_req {
642 	struct cache_deferred_req handle;
643 	struct completion completion;
644 };
645 
646 static void cache_restart_thread(struct cache_deferred_req *dreq, int too_many)
647 {
648 	struct thread_deferred_req *dr =
649 		container_of(dreq, struct thread_deferred_req, handle);
650 	complete(&dr->completion);
651 }
652 
653 static void cache_wait_req(struct cache_req *req, struct cache_head *item)
654 {
655 	struct thread_deferred_req sleeper;
656 	struct cache_deferred_req *dreq = &sleeper.handle;
657 
658 	sleeper.completion = COMPLETION_INITIALIZER_ONSTACK(sleeper.completion);
659 	dreq->revisit = cache_restart_thread;
660 
661 	setup_deferral(dreq, item, 0);
662 
663 	if (!test_bit(CACHE_PENDING, &item->flags) ||
664 	    wait_for_completion_interruptible_timeout(
665 		    &sleeper.completion, req->thread_wait) <= 0) {
666 		/* The completion wasn't completed, so we need
667 		 * to clean up
668 		 */
669 		spin_lock(&cache_defer_lock);
670 		if (!hlist_unhashed(&sleeper.handle.hash)) {
671 			__unhash_deferred_req(&sleeper.handle);
672 			spin_unlock(&cache_defer_lock);
673 		} else {
674 			/* cache_revisit_request already removed
675 			 * this from the hash table, but hasn't
676 			 * called ->revisit yet.  It will very soon
677 			 * and we need to wait for it.
678 			 */
679 			spin_unlock(&cache_defer_lock);
680 			wait_for_completion(&sleeper.completion);
681 		}
682 	}
683 }
684 
685 static void cache_limit_defers(void)
686 {
687 	/* Make sure we haven't exceed the limit of allowed deferred
688 	 * requests.
689 	 */
690 	struct cache_deferred_req *discard = NULL;
691 
692 	if (cache_defer_cnt <= DFR_MAX)
693 		return;
694 
695 	spin_lock(&cache_defer_lock);
696 
697 	/* Consider removing either the first or the last */
698 	if (cache_defer_cnt > DFR_MAX) {
699 		if (get_random_u32_below(2))
700 			discard = list_entry(cache_defer_list.next,
701 					     struct cache_deferred_req, recent);
702 		else
703 			discard = list_entry(cache_defer_list.prev,
704 					     struct cache_deferred_req, recent);
705 		__unhash_deferred_req(discard);
706 	}
707 	spin_unlock(&cache_defer_lock);
708 	if (discard)
709 		discard->revisit(discard, 1);
710 }
711 
712 #if IS_ENABLED(CONFIG_FAIL_SUNRPC)
713 static inline bool cache_defer_immediately(void)
714 {
715 	return !fail_sunrpc.ignore_cache_wait &&
716 		should_fail(&fail_sunrpc.attr, 1);
717 }
718 #else
719 static inline bool cache_defer_immediately(void)
720 {
721 	return false;
722 }
723 #endif
724 
725 /* Return true if and only if a deferred request is queued. */
726 static bool cache_defer_req(struct cache_req *req, struct cache_head *item)
727 {
728 	struct cache_deferred_req *dreq;
729 
730 	if (!cache_defer_immediately()) {
731 		cache_wait_req(req, item);
732 		if (!test_bit(CACHE_PENDING, &item->flags))
733 			return false;
734 	}
735 
736 	dreq = req->defer(req);
737 	if (dreq == NULL)
738 		return false;
739 	setup_deferral(dreq, item, 1);
740 	if (!test_bit(CACHE_PENDING, &item->flags))
741 		/* Bit could have been cleared before we managed to
742 		 * set up the deferral, so need to revisit just in case
743 		 */
744 		cache_revisit_request(item);
745 
746 	cache_limit_defers();
747 	return true;
748 }
749 
750 static void cache_revisit_request(struct cache_head *item)
751 {
752 	struct cache_deferred_req *dreq;
753 	struct hlist_node *tmp;
754 	int hash = DFR_HASH(item);
755 	LIST_HEAD(pending);
756 
757 	spin_lock(&cache_defer_lock);
758 
759 	hlist_for_each_entry_safe(dreq, tmp, &cache_defer_hash[hash], hash)
760 		if (dreq->item == item) {
761 			__unhash_deferred_req(dreq);
762 			list_add(&dreq->recent, &pending);
763 		}
764 
765 	spin_unlock(&cache_defer_lock);
766 
767 	while (!list_empty(&pending)) {
768 		dreq = list_entry(pending.next, struct cache_deferred_req, recent);
769 		list_del_init(&dreq->recent);
770 		dreq->revisit(dreq, 0);
771 	}
772 }
773 
774 void cache_clean_deferred(void *owner)
775 {
776 	struct cache_deferred_req *dreq, *tmp;
777 	LIST_HEAD(pending);
778 
779 	spin_lock(&cache_defer_lock);
780 
781 	list_for_each_entry_safe(dreq, tmp, &cache_defer_list, recent) {
782 		if (dreq->owner == owner) {
783 			__unhash_deferred_req(dreq);
784 			list_add(&dreq->recent, &pending);
785 		}
786 	}
787 	spin_unlock(&cache_defer_lock);
788 
789 	while (!list_empty(&pending)) {
790 		dreq = list_entry(pending.next, struct cache_deferred_req, recent);
791 		list_del_init(&dreq->recent);
792 		dreq->revisit(dreq, 1);
793 	}
794 }
795 
796 /*
797  * communicate with user-space
798  *
799  * We have a magic /proc file - /proc/net/rpc/<cachename>/channel.
800  * On read, you get a full request, or block.
801  * On write, an update request is processed.
802  * Poll works if anything to read, and always allows write.
803  */
804 
805 struct cache_request {
806 	struct list_head	list;
807 	struct cache_head	*item;
808 	char			*buf;
809 	int			len;
810 	int			readers;
811 	u64			seqno;
812 };
813 struct cache_reader {
814 	struct list_head	list;
815 	int			offset;	/* if non-0, we have a refcnt on next request */
816 	u64			next_seqno;
817 };
818 
819 static int cache_request(struct cache_detail *detail,
820 			       struct cache_request *crq)
821 {
822 	char *bp = crq->buf;
823 	int len = PAGE_SIZE;
824 
825 	detail->cache_request(detail, crq->item, &bp, &len);
826 	if (len < 0)
827 		return -E2BIG;
828 	return PAGE_SIZE - len;
829 }
830 
831 static struct cache_request *
832 cache_next_request(struct cache_detail *cd, u64 seqno)
833 {
834 	struct cache_request *rq;
835 
836 	list_for_each_entry(rq, &cd->requests, list)
837 		if (rq->seqno >= seqno)
838 			return rq;
839 	return NULL;
840 }
841 
842 static ssize_t cache_read(struct file *filp, char __user *buf, size_t count,
843 			  loff_t *ppos, struct cache_detail *cd)
844 {
845 	struct cache_reader *rp = filp->private_data;
846 	struct cache_request *rq;
847 	struct inode *inode = file_inode(filp);
848 	int err;
849 
850 	if (count == 0)
851 		return 0;
852 
853 	inode_lock(inode); /* protect against multiple concurrent
854 			      * readers on this file */
855  again:
856 	spin_lock(&cd->queue_lock);
857 	/* need to find next request */
858 	rq = cache_next_request(cd, rp->next_seqno);
859 	if (!rq) {
860 		spin_unlock(&cd->queue_lock);
861 		inode_unlock(inode);
862 		WARN_ON_ONCE(rp->offset);
863 		return 0;
864 	}
865 	if (rp->offset == 0)
866 		rq->readers++;
867 	spin_unlock(&cd->queue_lock);
868 
869 	if (rq->len == 0) {
870 		err = cache_request(cd, rq);
871 		if (err < 0)
872 			goto out;
873 		rq->len = err;
874 	}
875 
876 	if (rp->offset == 0 && !test_bit(CACHE_PENDING, &rq->item->flags)) {
877 		err = -EAGAIN;
878 		rp->next_seqno = rq->seqno + 1;
879 	} else {
880 		if (rp->offset + count > rq->len)
881 			count = rq->len - rp->offset;
882 		err = -EFAULT;
883 		if (copy_to_user(buf, rq->buf + rp->offset, count))
884 			goto out;
885 		rp->offset += count;
886 		if (rp->offset >= rq->len) {
887 			rp->offset = 0;
888 			rp->next_seqno = rq->seqno + 1;
889 		}
890 		err = 0;
891 	}
892  out:
893 	if (rp->offset == 0) {
894 		/* need to release rq */
895 		spin_lock(&cd->queue_lock);
896 		rq->readers--;
897 		if (rq->readers == 0 &&
898 		    !test_bit(CACHE_PENDING, &rq->item->flags)) {
899 			list_del(&rq->list);
900 			spin_unlock(&cd->queue_lock);
901 			cache_put(rq->item, cd);
902 			kfree(rq->buf);
903 			kfree(rq);
904 		} else
905 			spin_unlock(&cd->queue_lock);
906 	}
907 	if (err == -EAGAIN)
908 		goto again;
909 	inode_unlock(inode);
910 	return err ? err :  count;
911 }
912 
913 static ssize_t cache_do_downcall(char *kaddr, const char __user *buf,
914 				 size_t count, struct cache_detail *cd)
915 {
916 	ssize_t ret;
917 
918 	if (count == 0)
919 		return -EINVAL;
920 	if (copy_from_user(kaddr, buf, count))
921 		return -EFAULT;
922 	kaddr[count] = '\0';
923 	ret = cd->cache_parse(cd, kaddr, count);
924 	if (!ret)
925 		ret = count;
926 	return ret;
927 }
928 
929 static ssize_t cache_downcall(struct address_space *mapping,
930 			      const char __user *buf,
931 			      size_t count, struct cache_detail *cd)
932 {
933 	char *write_buf;
934 	ssize_t ret = -ENOMEM;
935 
936 	if (count >= 32768) { /* 32k is max userland buffer, lets check anyway */
937 		ret = -EINVAL;
938 		goto out;
939 	}
940 
941 	write_buf = kvmalloc(count + 1, GFP_KERNEL);
942 	if (!write_buf)
943 		goto out;
944 
945 	ret = cache_do_downcall(write_buf, buf, count, cd);
946 	kvfree(write_buf);
947 out:
948 	return ret;
949 }
950 
951 static ssize_t cache_write(struct file *filp, const char __user *buf,
952 			   size_t count, loff_t *ppos,
953 			   struct cache_detail *cd)
954 {
955 	struct address_space *mapping = filp->f_mapping;
956 	struct inode *inode = file_inode(filp);
957 	ssize_t ret = -EINVAL;
958 
959 	if (!cd->cache_parse)
960 		goto out;
961 
962 	inode_lock(inode);
963 	ret = cache_downcall(mapping, buf, count, cd);
964 	inode_unlock(inode);
965 out:
966 	return ret;
967 }
968 
969 static __poll_t cache_poll(struct file *filp, poll_table *wait,
970 			       struct cache_detail *cd)
971 {
972 	__poll_t mask;
973 	struct cache_reader *rp = filp->private_data;
974 
975 	poll_wait(filp, &cd->queue_wait, wait);
976 
977 	/* alway allow write */
978 	mask = EPOLLOUT | EPOLLWRNORM;
979 
980 	if (!rp)
981 		return mask;
982 
983 	spin_lock(&cd->queue_lock);
984 
985 	if (cache_next_request(cd, rp->next_seqno))
986 		mask |= EPOLLIN | EPOLLRDNORM;
987 	spin_unlock(&cd->queue_lock);
988 	return mask;
989 }
990 
991 static int cache_ioctl(struct inode *ino, struct file *filp,
992 		       unsigned int cmd, unsigned long arg,
993 		       struct cache_detail *cd)
994 {
995 	int len = 0;
996 	struct cache_reader *rp = filp->private_data;
997 	struct cache_request *rq;
998 
999 	if (cmd != FIONREAD || !rp)
1000 		return -EINVAL;
1001 
1002 	spin_lock(&cd->queue_lock);
1003 
1004 	/* only find the length remaining in current request,
1005 	 * or the length of the next request
1006 	 */
1007 	rq = cache_next_request(cd, rp->next_seqno);
1008 	if (rq)
1009 		len = rq->len - rp->offset;
1010 	spin_unlock(&cd->queue_lock);
1011 
1012 	return put_user(len, (int __user *)arg);
1013 }
1014 
1015 static int cache_open(struct inode *inode, struct file *filp,
1016 		      struct cache_detail *cd)
1017 {
1018 	struct cache_reader *rp = NULL;
1019 
1020 	if (!cd || !try_module_get(cd->owner))
1021 		return -EACCES;
1022 	nonseekable_open(inode, filp);
1023 	if (filp->f_mode & FMODE_READ) {
1024 		rp = kmalloc_obj(*rp);
1025 		if (!rp) {
1026 			module_put(cd->owner);
1027 			return -ENOMEM;
1028 		}
1029 		rp->offset = 0;
1030 		rp->next_seqno = 0;
1031 
1032 		spin_lock(&cd->queue_lock);
1033 		list_add(&rp->list, &cd->readers);
1034 		spin_unlock(&cd->queue_lock);
1035 	}
1036 	if (filp->f_mode & FMODE_WRITE)
1037 		atomic_inc(&cd->writers);
1038 	filp->private_data = rp;
1039 	return 0;
1040 }
1041 
1042 static int cache_release(struct inode *inode, struct file *filp,
1043 			 struct cache_detail *cd)
1044 {
1045 	struct cache_reader *rp = filp->private_data;
1046 
1047 	if (rp) {
1048 		struct cache_request *rq = NULL;
1049 
1050 		spin_lock(&cd->queue_lock);
1051 		if (rp->offset) {
1052 			struct cache_request *cr;
1053 
1054 			cr = cache_next_request(cd, rp->next_seqno);
1055 			if (cr) {
1056 				cr->readers--;
1057 				if (cr->readers == 0 &&
1058 				    !test_bit(CACHE_PENDING,
1059 					      &cr->item->flags)) {
1060 					list_del(&cr->list);
1061 					rq = cr;
1062 				}
1063 			}
1064 			rp->offset = 0;
1065 		}
1066 		list_del(&rp->list);
1067 		spin_unlock(&cd->queue_lock);
1068 
1069 		if (rq) {
1070 			cache_put(rq->item, cd);
1071 			kfree(rq->buf);
1072 			kfree(rq);
1073 		}
1074 
1075 		filp->private_data = NULL;
1076 		kfree(rp);
1077 	}
1078 	if (filp->f_mode & FMODE_WRITE) {
1079 		atomic_dec(&cd->writers);
1080 		cd->last_close = seconds_since_boot();
1081 	}
1082 	module_put(cd->owner);
1083 	return 0;
1084 }
1085 
1086 
1087 
1088 static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch)
1089 {
1090 	struct cache_request *cr, *tmp;
1091 	LIST_HEAD(dequeued);
1092 
1093 	spin_lock(&detail->queue_lock);
1094 	list_for_each_entry_safe(cr, tmp, &detail->requests, list) {
1095 		if (cr->item != ch)
1096 			continue;
1097 		if (test_bit(CACHE_PENDING, &ch->flags))
1098 			/* Lost a race and it is pending again */
1099 			break;
1100 		if (cr->readers != 0)
1101 			continue;
1102 		list_move(&cr->list, &dequeued);
1103 	}
1104 	spin_unlock(&detail->queue_lock);
1105 	while (!list_empty(&dequeued)) {
1106 		cr = list_entry(dequeued.next, struct cache_request, list);
1107 		list_del(&cr->list);
1108 		cache_put(cr->item, detail);
1109 		kfree(cr->buf);
1110 		kfree(cr);
1111 	}
1112 }
1113 
1114 /*
1115  * Support routines for text-based upcalls.
1116  * Fields are separated by spaces.
1117  * Fields are either mangled to quote space tab newline slosh with slosh
1118  * or a hexified with a leading \x
1119  * Record is terminated with newline.
1120  *
1121  */
1122 
1123 void qword_add(char **bpp, int *lp, char *str)
1124 {
1125 	char *bp = *bpp;
1126 	int len = *lp;
1127 	int ret;
1128 
1129 	if (len < 0) return;
1130 
1131 	ret = string_escape_str(str, bp, len, ESCAPE_OCTAL, "\\ \n\t");
1132 	if (ret >= len) {
1133 		bp += len;
1134 		len = -1;
1135 	} else {
1136 		bp += ret;
1137 		len -= ret;
1138 		*bp++ = ' ';
1139 		len--;
1140 	}
1141 	*bpp = bp;
1142 	*lp = len;
1143 }
1144 EXPORT_SYMBOL_GPL(qword_add);
1145 
1146 void qword_addhex(char **bpp, int *lp, char *buf, int blen)
1147 {
1148 	char *bp = *bpp;
1149 	int len = *lp;
1150 
1151 	if (len < 0) return;
1152 
1153 	if (len > 2) {
1154 		*bp++ = '\\';
1155 		*bp++ = 'x';
1156 		len -= 2;
1157 		while (blen && len >= 2) {
1158 			bp = hex_byte_pack(bp, *buf++);
1159 			len -= 2;
1160 			blen--;
1161 		}
1162 	}
1163 	if (blen || len<1) len = -1;
1164 	else {
1165 		*bp++ = ' ';
1166 		len--;
1167 	}
1168 	*bpp = bp;
1169 	*lp = len;
1170 }
1171 EXPORT_SYMBOL_GPL(qword_addhex);
1172 
1173 static void warn_no_listener(struct cache_detail *detail)
1174 {
1175 	if (detail->last_warn != detail->last_close) {
1176 		detail->last_warn = detail->last_close;
1177 		if (detail->warn_no_listener)
1178 			detail->warn_no_listener(detail, detail->last_close != 0);
1179 	}
1180 }
1181 
1182 static bool cache_listeners_exist(struct cache_detail *detail)
1183 {
1184 	if (atomic_read(&detail->writers))
1185 		return true;
1186 	if (detail->last_close == 0)
1187 		/* This cache was never opened */
1188 		return false;
1189 	if (detail->last_close < seconds_since_boot() - 30)
1190 		/*
1191 		 * We allow for the possibility that someone might
1192 		 * restart a userspace daemon without restarting the
1193 		 * server; but after 30 seconds, we give up.
1194 		 */
1195 		 return false;
1196 	return true;
1197 }
1198 
1199 /*
1200  * register an upcall request to user-space and queue it up to be fetched by
1201  * the upcall daemon.
1202  *
1203  * Each request is at most one page long.
1204  */
1205 static int cache_do_upcall(struct cache_detail *detail, struct cache_head *h)
1206 {
1207 	char *buf;
1208 	struct cache_request *crq;
1209 	int ret = 0;
1210 
1211 	if (test_bit(CACHE_CLEANED, &h->flags))
1212 		/* Too late to make an upcall */
1213 		return -EAGAIN;
1214 
1215 	buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
1216 	if (!buf)
1217 		return -EAGAIN;
1218 
1219 	crq = kmalloc_obj(*crq);
1220 	if (!crq) {
1221 		kfree(buf);
1222 		return -EAGAIN;
1223 	}
1224 
1225 	crq->buf = buf;
1226 	crq->len = 0;
1227 	crq->readers = 0;
1228 	spin_lock(&detail->queue_lock);
1229 	if (test_bit(CACHE_PENDING, &h->flags)) {
1230 		crq->item = cache_get(h);
1231 		crq->seqno = detail->next_seqno++;
1232 		list_add_tail(&crq->list, &detail->requests);
1233 		trace_cache_entry_upcall(detail, h);
1234 	} else
1235 		/* Lost a race, no longer PENDING, so don't enqueue */
1236 		ret = -EAGAIN;
1237 	spin_unlock(&detail->queue_lock);
1238 	if (ret != -EAGAIN && detail->cache_notify)
1239 		detail->cache_notify(detail, h);
1240 	wake_up(&detail->queue_wait);
1241 	if (ret == -EAGAIN) {
1242 		kfree(buf);
1243 		kfree(crq);
1244 	}
1245 	return ret;
1246 }
1247 
1248 int sunrpc_cache_upcall(struct cache_detail *detail, struct cache_head *h)
1249 {
1250 	if (test_and_set_bit(CACHE_PENDING, &h->flags))
1251 		return 0;
1252 	return cache_do_upcall(detail, h);
1253 }
1254 EXPORT_SYMBOL_GPL(sunrpc_cache_upcall);
1255 
1256 int sunrpc_cache_upcall_warn(struct cache_detail *detail,
1257 			     struct cache_head *h)
1258 {
1259 	if (!cache_listeners_exist(detail)) {
1260 		warn_no_listener(detail);
1261 		trace_cache_entry_no_listener(detail, h);
1262 		return -EINVAL;
1263 	}
1264 	return sunrpc_cache_upcall(detail, h);
1265 }
1266 EXPORT_SYMBOL_GPL(sunrpc_cache_upcall_warn);
1267 
1268 /*
1269  * parse a message from user-space and pass it
1270  * to an appropriate cache
1271  * Messages are, like requests, separated into fields by
1272  * spaces and dequotes as \xHEXSTRING or embedded \nnn octal
1273  *
1274  * Message is
1275  *   reply cachename expiry key ... content....
1276  *
1277  * key and content are both parsed by cache
1278  */
1279 
1280 int qword_get(char **bpp, char *dest, int bufsize)
1281 {
1282 	/* return bytes copied, or -1 on error */
1283 	char *bp = *bpp;
1284 	int len = 0;
1285 
1286 	while (*bp == ' ') bp++;
1287 
1288 	if (bp[0] == '\\' && bp[1] == 'x') {
1289 		/* HEX STRING */
1290 		bp += 2;
1291 		while (len < bufsize - 1) {
1292 			int h, l;
1293 
1294 			h = hex_to_bin(bp[0]);
1295 			if (h < 0)
1296 				break;
1297 
1298 			l = hex_to_bin(bp[1]);
1299 			if (l < 0)
1300 				break;
1301 
1302 			*dest++ = (h << 4) | l;
1303 			bp += 2;
1304 			len++;
1305 		}
1306 	} else {
1307 		/* text with \nnn octal quoting */
1308 		while (*bp != ' ' && *bp != '\n' && *bp && len < bufsize-1) {
1309 			if (*bp == '\\' &&
1310 			    isodigit(bp[1]) && (bp[1] <= '3') &&
1311 			    isodigit(bp[2]) &&
1312 			    isodigit(bp[3])) {
1313 				int byte = (*++bp -'0');
1314 				bp++;
1315 				byte = (byte << 3) | (*bp++ - '0');
1316 				byte = (byte << 3) | (*bp++ - '0');
1317 				*dest++ = byte;
1318 				len++;
1319 			} else {
1320 				*dest++ = *bp++;
1321 				len++;
1322 			}
1323 		}
1324 	}
1325 
1326 	if (*bp != ' ' && *bp != '\n' && *bp != '\0')
1327 		return -1;
1328 	while (*bp == ' ') bp++;
1329 	*bpp = bp;
1330 	*dest = '\0';
1331 	return len;
1332 }
1333 EXPORT_SYMBOL_GPL(qword_get);
1334 
1335 
1336 /*
1337  * support /proc/net/rpc/$CACHENAME/content
1338  * as a seqfile.
1339  * We call ->cache_show passing NULL for the item to
1340  * get a header, then pass each real item in the cache
1341  */
1342 
1343 static void *__cache_seq_start(struct seq_file *m, loff_t *pos)
1344 {
1345 	loff_t n = *pos;
1346 	unsigned int hash, entry;
1347 	struct cache_head *ch;
1348 	struct cache_detail *cd = m->private;
1349 
1350 	if (!n--)
1351 		return SEQ_START_TOKEN;
1352 	hash = n >> 32;
1353 	entry = n & ((1LL<<32) - 1);
1354 
1355 	if (hash >= cd->hash_size)
1356 		return NULL;
1357 
1358 	hlist_for_each_entry_rcu(ch, &cd->hash_table[hash], cache_list)
1359 		if (!entry--)
1360 			return ch;
1361 	ch = NULL;
1362 	while (!ch && ++hash < cd->hash_size)
1363 		ch = hlist_entry_safe(rcu_dereference(
1364 				hlist_first_rcu(&cd->hash_table[hash])),
1365 				struct cache_head, cache_list);
1366 
1367 	*pos = ((long long)hash << 32) + 1;
1368 	return ch;
1369 }
1370 
1371 static void *cache_seq_next(struct seq_file *m, void *p, loff_t *pos)
1372 {
1373 	struct cache_head *ch = p;
1374 	int hash = (*pos >> 32);
1375 	struct cache_detail *cd = m->private;
1376 
1377 	if (p == SEQ_START_TOKEN) {
1378 		hash = 0;
1379 		ch = NULL;
1380 	}
1381 	while (hash < cd->hash_size) {
1382 		if (ch)
1383 			ch = hlist_entry_safe(
1384 				rcu_dereference(
1385 					hlist_next_rcu(&ch->cache_list)),
1386 				struct cache_head, cache_list);
1387 		else
1388 			ch = hlist_entry_safe(
1389 				rcu_dereference(
1390 					hlist_first_rcu(&cd->hash_table[hash])),
1391 				struct cache_head, cache_list);
1392 		if (ch) {
1393 			++*pos;
1394 			return ch;
1395 		}
1396 		hash++;
1397 		*pos = (long long)hash << 32;
1398 	}
1399 	return NULL;
1400 }
1401 
1402 void *cache_seq_start_rcu(struct seq_file *m, loff_t *pos)
1403 	__acquires(RCU)
1404 {
1405 	rcu_read_lock();
1406 	return __cache_seq_start(m, pos);
1407 }
1408 EXPORT_SYMBOL_GPL(cache_seq_start_rcu);
1409 
1410 void *cache_seq_next_rcu(struct seq_file *file, void *p, loff_t *pos)
1411 {
1412 	return cache_seq_next(file, p, pos);
1413 }
1414 EXPORT_SYMBOL_GPL(cache_seq_next_rcu);
1415 
1416 void cache_seq_stop_rcu(struct seq_file *m, void *p)
1417 	__releases(RCU)
1418 {
1419 	rcu_read_unlock();
1420 }
1421 EXPORT_SYMBOL_GPL(cache_seq_stop_rcu);
1422 
1423 static int c_show(struct seq_file *m, void *p)
1424 {
1425 	struct cache_head *cp = p;
1426 	struct cache_detail *cd = m->private;
1427 
1428 	if (p == SEQ_START_TOKEN)
1429 		return cd->cache_show(m, cd, NULL);
1430 
1431 	ifdebug(CACHE)
1432 		seq_printf(m, "# expiry=%lld refcnt=%d flags=%lx\n",
1433 			   convert_to_wallclock(cp->expiry_time),
1434 			   kref_read(&cp->ref), cp->flags);
1435 
1436 	if (cache_check_rcu(cd, cp, NULL))
1437 		seq_puts(m, "# ");
1438 	else if (cache_is_expired(cd, cp))
1439 		seq_puts(m, "# ");
1440 
1441 	return cd->cache_show(m, cd, cp);
1442 }
1443 
1444 static const struct seq_operations cache_content_op = {
1445 	.start	= cache_seq_start_rcu,
1446 	.next	= cache_seq_next_rcu,
1447 	.stop	= cache_seq_stop_rcu,
1448 	.show	= c_show,
1449 };
1450 
1451 static int content_open(struct inode *inode, struct file *file,
1452 			struct cache_detail *cd)
1453 {
1454 	struct seq_file *seq;
1455 	int err;
1456 
1457 	if (!cd || !try_module_get(cd->owner))
1458 		return -EACCES;
1459 
1460 	err = seq_open(file, &cache_content_op);
1461 	if (err) {
1462 		module_put(cd->owner);
1463 		return err;
1464 	}
1465 
1466 	seq = file->private_data;
1467 	seq->private = cd;
1468 	return 0;
1469 }
1470 
1471 static int content_release(struct inode *inode, struct file *file,
1472 		struct cache_detail *cd)
1473 {
1474 	int ret = seq_release(inode, file);
1475 	module_put(cd->owner);
1476 	return ret;
1477 }
1478 
1479 static int open_flush(struct inode *inode, struct file *file,
1480 			struct cache_detail *cd)
1481 {
1482 	if (!cd || !try_module_get(cd->owner))
1483 		return -EACCES;
1484 	return nonseekable_open(inode, file);
1485 }
1486 
1487 static int release_flush(struct inode *inode, struct file *file,
1488 			struct cache_detail *cd)
1489 {
1490 	module_put(cd->owner);
1491 	return 0;
1492 }
1493 
1494 static ssize_t read_flush(struct file *file, char __user *buf,
1495 			  size_t count, loff_t *ppos,
1496 			  struct cache_detail *cd)
1497 {
1498 	char tbuf[22];
1499 	size_t len;
1500 
1501 	len = snprintf(tbuf, sizeof(tbuf), "%llu\n",
1502 			convert_to_wallclock(cd->flush_time));
1503 	return simple_read_from_buffer(buf, count, ppos, tbuf, len);
1504 }
1505 
1506 static ssize_t write_flush(struct file *file, const char __user *buf,
1507 			   size_t count, loff_t *ppos,
1508 			   struct cache_detail *cd)
1509 {
1510 	char tbuf[20];
1511 	char *ep;
1512 	time64_t now;
1513 
1514 	if (*ppos || count > sizeof(tbuf)-1)
1515 		return -EINVAL;
1516 	if (copy_from_user(tbuf, buf, count))
1517 		return -EFAULT;
1518 	tbuf[count] = 0;
1519 	simple_strtoul(tbuf, &ep, 0);
1520 	if (*ep && *ep != '\n')
1521 		return -EINVAL;
1522 	/* Note that while we check that 'buf' holds a valid number,
1523 	 * we always ignore the value and just flush everything.
1524 	 * Making use of the number leads to races.
1525 	 */
1526 
1527 	now = seconds_since_boot();
1528 	/* Always flush everything, so behave like cache_purge()
1529 	 * Do this by advancing flush_time to the current time,
1530 	 * or by one second if it has already reached the current time.
1531 	 * Newly added cache entries will always have ->last_refresh greater
1532 	 * that ->flush_time, so they don't get flushed prematurely.
1533 	 */
1534 
1535 	if (cd->flush_time >= now)
1536 		now = cd->flush_time + 1;
1537 
1538 	cd->flush_time = now;
1539 	cd->nextcheck = now;
1540 	cache_flush();
1541 
1542 	if (cd->flush)
1543 		cd->flush();
1544 
1545 	*ppos += count;
1546 	return count;
1547 }
1548 
1549 static ssize_t cache_read_procfs(struct file *filp, char __user *buf,
1550 				 size_t count, loff_t *ppos)
1551 {
1552 	struct cache_detail *cd = pde_data(file_inode(filp));
1553 
1554 	return cache_read(filp, buf, count, ppos, cd);
1555 }
1556 
1557 static ssize_t cache_write_procfs(struct file *filp, const char __user *buf,
1558 				  size_t count, loff_t *ppos)
1559 {
1560 	struct cache_detail *cd = pde_data(file_inode(filp));
1561 
1562 	return cache_write(filp, buf, count, ppos, cd);
1563 }
1564 
1565 static __poll_t cache_poll_procfs(struct file *filp, poll_table *wait)
1566 {
1567 	struct cache_detail *cd = pde_data(file_inode(filp));
1568 
1569 	return cache_poll(filp, wait, cd);
1570 }
1571 
1572 static long cache_ioctl_procfs(struct file *filp,
1573 			       unsigned int cmd, unsigned long arg)
1574 {
1575 	struct inode *inode = file_inode(filp);
1576 	struct cache_detail *cd = pde_data(inode);
1577 
1578 	return cache_ioctl(inode, filp, cmd, arg, cd);
1579 }
1580 
1581 static int cache_open_procfs(struct inode *inode, struct file *filp)
1582 {
1583 	struct cache_detail *cd = pde_data(inode);
1584 
1585 	return cache_open(inode, filp, cd);
1586 }
1587 
1588 static int cache_release_procfs(struct inode *inode, struct file *filp)
1589 {
1590 	struct cache_detail *cd = pde_data(inode);
1591 
1592 	return cache_release(inode, filp, cd);
1593 }
1594 
1595 static const struct proc_ops cache_channel_proc_ops = {
1596 	.proc_read	= cache_read_procfs,
1597 	.proc_write	= cache_write_procfs,
1598 	.proc_poll	= cache_poll_procfs,
1599 	.proc_ioctl	= cache_ioctl_procfs, /* for FIONREAD */
1600 	.proc_open	= cache_open_procfs,
1601 	.proc_release	= cache_release_procfs,
1602 };
1603 
1604 static int content_open_procfs(struct inode *inode, struct file *filp)
1605 {
1606 	struct cache_detail *cd = pde_data(inode);
1607 
1608 	return content_open(inode, filp, cd);
1609 }
1610 
1611 static int content_release_procfs(struct inode *inode, struct file *filp)
1612 {
1613 	struct cache_detail *cd = pde_data(inode);
1614 
1615 	return content_release(inode, filp, cd);
1616 }
1617 
1618 static const struct proc_ops content_proc_ops = {
1619 	.proc_open	= content_open_procfs,
1620 	.proc_read	= seq_read,
1621 	.proc_lseek	= seq_lseek,
1622 	.proc_release	= content_release_procfs,
1623 };
1624 
1625 static int open_flush_procfs(struct inode *inode, struct file *filp)
1626 {
1627 	struct cache_detail *cd = pde_data(inode);
1628 
1629 	return open_flush(inode, filp, cd);
1630 }
1631 
1632 static int release_flush_procfs(struct inode *inode, struct file *filp)
1633 {
1634 	struct cache_detail *cd = pde_data(inode);
1635 
1636 	return release_flush(inode, filp, cd);
1637 }
1638 
1639 static ssize_t read_flush_procfs(struct file *filp, char __user *buf,
1640 			    size_t count, loff_t *ppos)
1641 {
1642 	struct cache_detail *cd = pde_data(file_inode(filp));
1643 
1644 	return read_flush(filp, buf, count, ppos, cd);
1645 }
1646 
1647 static ssize_t write_flush_procfs(struct file *filp,
1648 				  const char __user *buf,
1649 				  size_t count, loff_t *ppos)
1650 {
1651 	struct cache_detail *cd = pde_data(file_inode(filp));
1652 
1653 	return write_flush(filp, buf, count, ppos, cd);
1654 }
1655 
1656 static const struct proc_ops cache_flush_proc_ops = {
1657 	.proc_open	= open_flush_procfs,
1658 	.proc_read	= read_flush_procfs,
1659 	.proc_write	= write_flush_procfs,
1660 	.proc_release	= release_flush_procfs,
1661 };
1662 
1663 static void remove_cache_proc_entries(struct cache_detail *cd)
1664 {
1665 	if (cd->procfs) {
1666 		proc_remove(cd->procfs);
1667 		cd->procfs = NULL;
1668 	}
1669 }
1670 
1671 static int create_cache_proc_entries(struct cache_detail *cd, struct net *net)
1672 {
1673 	struct proc_dir_entry *p;
1674 	struct sunrpc_net *sn;
1675 
1676 	if (!IS_ENABLED(CONFIG_PROC_FS))
1677 		return 0;
1678 
1679 	sn = net_generic(net, sunrpc_net_id);
1680 	cd->procfs = proc_mkdir(cd->name, sn->proc_net_rpc);
1681 	if (cd->procfs == NULL)
1682 		goto out_nomem;
1683 
1684 	p = proc_create_data("flush", S_IFREG | 0600,
1685 			     cd->procfs, &cache_flush_proc_ops, cd);
1686 	if (p == NULL)
1687 		goto out_nomem;
1688 
1689 	if (cd->cache_request || cd->cache_parse) {
1690 		p = proc_create_data("channel", S_IFREG | 0600, cd->procfs,
1691 				     &cache_channel_proc_ops, cd);
1692 		if (p == NULL)
1693 			goto out_nomem;
1694 	}
1695 	if (cd->cache_show) {
1696 		p = proc_create_data("content", S_IFREG | 0400, cd->procfs,
1697 				     &content_proc_ops, cd);
1698 		if (p == NULL)
1699 			goto out_nomem;
1700 	}
1701 	return 0;
1702 out_nomem:
1703 	remove_cache_proc_entries(cd);
1704 	return -ENOMEM;
1705 }
1706 
1707 void __init cache_initialize(void)
1708 {
1709 	INIT_DEFERRABLE_WORK(&cache_cleaner, do_cache_clean);
1710 }
1711 
1712 int cache_register_net(struct cache_detail *cd, struct net *net)
1713 {
1714 	int ret;
1715 
1716 	sunrpc_init_cache_detail(cd);
1717 	ret = create_cache_proc_entries(cd, net);
1718 	if (ret)
1719 		sunrpc_destroy_cache_detail(cd);
1720 	return ret;
1721 }
1722 EXPORT_SYMBOL_GPL(cache_register_net);
1723 
1724 void cache_unregister_net(struct cache_detail *cd, struct net *net)
1725 {
1726 	remove_cache_proc_entries(cd);
1727 	sunrpc_destroy_cache_detail(cd);
1728 }
1729 EXPORT_SYMBOL_GPL(cache_unregister_net);
1730 
1731 struct cache_detail *cache_create_net(const struct cache_detail *tmpl, struct net *net)
1732 {
1733 	struct cache_detail *cd;
1734 	int i;
1735 
1736 	cd = kmemdup(tmpl, sizeof(struct cache_detail), GFP_KERNEL);
1737 	if (cd == NULL)
1738 		return ERR_PTR(-ENOMEM);
1739 
1740 	cd->hash_table = kzalloc_objs(struct hlist_head, cd->hash_size);
1741 	if (cd->hash_table == NULL) {
1742 		kfree(cd);
1743 		return ERR_PTR(-ENOMEM);
1744 	}
1745 
1746 	for (i = 0; i < cd->hash_size; i++)
1747 		INIT_HLIST_HEAD(&cd->hash_table[i]);
1748 	cd->net = net;
1749 	return cd;
1750 }
1751 EXPORT_SYMBOL_GPL(cache_create_net);
1752 
1753 void cache_destroy_net(struct cache_detail *cd, struct net *net)
1754 {
1755 	kfree(cd->hash_table);
1756 	kfree(cd);
1757 }
1758 EXPORT_SYMBOL_GPL(cache_destroy_net);
1759 
1760 static ssize_t cache_read_pipefs(struct file *filp, char __user *buf,
1761 				 size_t count, loff_t *ppos)
1762 {
1763 	struct cache_detail *cd = RPC_I(file_inode(filp))->private;
1764 
1765 	return cache_read(filp, buf, count, ppos, cd);
1766 }
1767 
1768 static ssize_t cache_write_pipefs(struct file *filp, const char __user *buf,
1769 				  size_t count, loff_t *ppos)
1770 {
1771 	struct cache_detail *cd = RPC_I(file_inode(filp))->private;
1772 
1773 	return cache_write(filp, buf, count, ppos, cd);
1774 }
1775 
1776 static __poll_t cache_poll_pipefs(struct file *filp, poll_table *wait)
1777 {
1778 	struct cache_detail *cd = RPC_I(file_inode(filp))->private;
1779 
1780 	return cache_poll(filp, wait, cd);
1781 }
1782 
1783 static long cache_ioctl_pipefs(struct file *filp,
1784 			      unsigned int cmd, unsigned long arg)
1785 {
1786 	struct inode *inode = file_inode(filp);
1787 	struct cache_detail *cd = RPC_I(inode)->private;
1788 
1789 	return cache_ioctl(inode, filp, cmd, arg, cd);
1790 }
1791 
1792 static int cache_open_pipefs(struct inode *inode, struct file *filp)
1793 {
1794 	struct cache_detail *cd = RPC_I(inode)->private;
1795 
1796 	return cache_open(inode, filp, cd);
1797 }
1798 
1799 static int cache_release_pipefs(struct inode *inode, struct file *filp)
1800 {
1801 	struct cache_detail *cd = RPC_I(inode)->private;
1802 
1803 	return cache_release(inode, filp, cd);
1804 }
1805 
1806 const struct file_operations cache_file_operations_pipefs = {
1807 	.owner		= THIS_MODULE,
1808 	.read		= cache_read_pipefs,
1809 	.write		= cache_write_pipefs,
1810 	.poll		= cache_poll_pipefs,
1811 	.unlocked_ioctl	= cache_ioctl_pipefs, /* for FIONREAD */
1812 	.open		= cache_open_pipefs,
1813 	.release	= cache_release_pipefs,
1814 };
1815 
1816 static int content_open_pipefs(struct inode *inode, struct file *filp)
1817 {
1818 	struct cache_detail *cd = RPC_I(inode)->private;
1819 
1820 	return content_open(inode, filp, cd);
1821 }
1822 
1823 static int content_release_pipefs(struct inode *inode, struct file *filp)
1824 {
1825 	struct cache_detail *cd = RPC_I(inode)->private;
1826 
1827 	return content_release(inode, filp, cd);
1828 }
1829 
1830 const struct file_operations content_file_operations_pipefs = {
1831 	.open		= content_open_pipefs,
1832 	.read		= seq_read,
1833 	.llseek		= seq_lseek,
1834 	.release	= content_release_pipefs,
1835 };
1836 
1837 static int open_flush_pipefs(struct inode *inode, struct file *filp)
1838 {
1839 	struct cache_detail *cd = RPC_I(inode)->private;
1840 
1841 	return open_flush(inode, filp, cd);
1842 }
1843 
1844 static int release_flush_pipefs(struct inode *inode, struct file *filp)
1845 {
1846 	struct cache_detail *cd = RPC_I(inode)->private;
1847 
1848 	return release_flush(inode, filp, cd);
1849 }
1850 
1851 static ssize_t read_flush_pipefs(struct file *filp, char __user *buf,
1852 			    size_t count, loff_t *ppos)
1853 {
1854 	struct cache_detail *cd = RPC_I(file_inode(filp))->private;
1855 
1856 	return read_flush(filp, buf, count, ppos, cd);
1857 }
1858 
1859 static ssize_t write_flush_pipefs(struct file *filp,
1860 				  const char __user *buf,
1861 				  size_t count, loff_t *ppos)
1862 {
1863 	struct cache_detail *cd = RPC_I(file_inode(filp))->private;
1864 
1865 	return write_flush(filp, buf, count, ppos, cd);
1866 }
1867 
1868 const struct file_operations cache_flush_operations_pipefs = {
1869 	.open		= open_flush_pipefs,
1870 	.read		= read_flush_pipefs,
1871 	.write		= write_flush_pipefs,
1872 	.release	= release_flush_pipefs,
1873 };
1874 
1875 int sunrpc_cache_register_pipefs(struct dentry *parent,
1876 				 const char *name, umode_t umode,
1877 				 struct cache_detail *cd)
1878 {
1879 	struct dentry *dir = rpc_create_cache_dir(parent, name, umode, cd);
1880 	if (IS_ERR(dir))
1881 		return PTR_ERR(dir);
1882 	cd->pipefs = dir;
1883 	return 0;
1884 }
1885 EXPORT_SYMBOL_GPL(sunrpc_cache_register_pipefs);
1886 
1887 void sunrpc_cache_unregister_pipefs(struct cache_detail *cd)
1888 {
1889 	if (cd->pipefs) {
1890 		rpc_remove_cache_dir(cd->pipefs);
1891 		cd->pipefs = NULL;
1892 	}
1893 }
1894 EXPORT_SYMBOL_GPL(sunrpc_cache_unregister_pipefs);
1895 
1896 void sunrpc_cache_unhash(struct cache_detail *cd, struct cache_head *h)
1897 {
1898 	spin_lock(&cd->hash_lock);
1899 	if (!hlist_unhashed(&h->cache_list)){
1900 		sunrpc_begin_cache_remove_entry(h, cd);
1901 		spin_unlock(&cd->hash_lock);
1902 		sunrpc_end_cache_remove_entry(h, cd);
1903 	} else
1904 		spin_unlock(&cd->hash_lock);
1905 }
1906 EXPORT_SYMBOL_GPL(sunrpc_cache_unhash);
1907 
1908 /**
1909  * sunrpc_cache_requests_count - count pending upcall requests
1910  * @cd: cache_detail to query
1911  *
1912  * Returns the number of requests on the cache's request list that
1913  * still have CACHE_PENDING set.
1914  */
1915 int sunrpc_cache_requests_count(struct cache_detail *cd)
1916 {
1917 	struct cache_request *crq;
1918 	int cnt = 0;
1919 
1920 	spin_lock(&cd->queue_lock);
1921 	list_for_each_entry(crq, &cd->requests, list) {
1922 		if (test_bit(CACHE_PENDING, &crq->item->flags))
1923 			cnt++;
1924 	}
1925 	spin_unlock(&cd->queue_lock);
1926 	return cnt;
1927 }
1928 EXPORT_SYMBOL_GPL(sunrpc_cache_requests_count);
1929 
1930 /**
1931  * sunrpc_cache_requests_snapshot - snapshot pending upcall requests
1932  * @cd: cache_detail to query
1933  * @items: array to fill with cache_head pointers (caller-allocated)
1934  * @seqnos: array to fill with sequence numbers (caller-allocated)
1935  * @max: size of the arrays
1936  * @min_seqno: only include entries with seqno > min_seqno (0 for all)
1937  *
1938  * Only entries with CACHE_PENDING set are included. Takes a reference
1939  * on each cache_head via cache_get(). Caller must call cache_put()
1940  * on each returned item when done.
1941  *
1942  * Returns the number of entries filled.
1943  */
1944 int sunrpc_cache_requests_snapshot(struct cache_detail *cd,
1945 				   struct cache_head **items,
1946 				   u64 *seqnos, int max,
1947 				   u64 min_seqno)
1948 {
1949 	struct cache_request *crq;
1950 	int i = 0;
1951 
1952 	spin_lock(&cd->queue_lock);
1953 	list_for_each_entry(crq, &cd->requests, list) {
1954 		if (i >= max)
1955 			break;
1956 		if (!test_bit(CACHE_PENDING, &crq->item->flags))
1957 			continue;
1958 		if (crq->seqno <= min_seqno)
1959 			continue;
1960 		items[i] = cache_get(crq->item);
1961 		seqnos[i] = crq->seqno;
1962 		i++;
1963 	}
1964 	spin_unlock(&cd->queue_lock);
1965 	return i;
1966 }
1967 EXPORT_SYMBOL_GPL(sunrpc_cache_requests_snapshot);
1968 
1969 /**
1970  * sunrpc_cache_notify - send a netlink notification for a cache event
1971  * @cd: cache_detail for the cache
1972  * @h: cache_head entry (unused, reserved for future use)
1973  * @cache_type: cache type identifier (e.g. SUNRPC_CACHE_TYPE_UNIX_GID)
1974  *
1975  * Sends a SUNRPC_CMD_CACHE_NOTIFY multicast message on the "exportd"
1976  * group if any listeners are present. Returns 0 on success or a
1977  * negative errno.
1978  */
1979 int sunrpc_cache_notify(struct cache_detail *cd, struct cache_head *h,
1980 			u32 cache_type)
1981 {
1982 	struct genlmsghdr *hdr;
1983 	struct sk_buff *msg;
1984 
1985 	if (!genl_has_listeners(&sunrpc_nl_family, cd->net,
1986 				SUNRPC_NLGRP_EXPORTD))
1987 		return -ENOLINK;
1988 
1989 	msg = genlmsg_new(nla_total_size(sizeof(u32)), GFP_KERNEL);
1990 	if (!msg)
1991 		return -ENOMEM;
1992 
1993 	hdr = genlmsg_put(msg, 0, 0, &sunrpc_nl_family, 0,
1994 			  SUNRPC_CMD_CACHE_NOTIFY);
1995 	if (!hdr) {
1996 		nlmsg_free(msg);
1997 		return -ENOMEM;
1998 	}
1999 
2000 	if (nla_put_u32(msg, SUNRPC_A_CACHE_NOTIFY_CACHE_TYPE, cache_type)) {
2001 		nlmsg_free(msg);
2002 		return -ENOMEM;
2003 	}
2004 
2005 	genlmsg_end(msg, hdr);
2006 	return genlmsg_multicast_netns(&sunrpc_nl_family, cd->net, msg, 0,
2007 				       SUNRPC_NLGRP_EXPORTD, GFP_KERNEL);
2008 }
2009 EXPORT_SYMBOL_GPL(sunrpc_cache_notify);
2010