xref: /linux/net/sunrpc/sched.c (revision 4b8dbdfbc5f650095a8e105998e7a84b4d212495)
1457c8996SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds  * linux/net/sunrpc/sched.c
41da177e4SLinus Torvalds  *
51da177e4SLinus Torvalds  * Scheduling for synchronous and asynchronous RPC requests.
61da177e4SLinus Torvalds  *
71da177e4SLinus Torvalds  * Copyright (C) 1996 Olaf Kirch, <okir@monad.swb.de>
81da177e4SLinus Torvalds  *
91da177e4SLinus Torvalds  * TCP NFS related read + write fixes
101da177e4SLinus Torvalds  * (C) 1999 Dave Airlie, University of Limerick, Ireland <airlied@linux.ie>
111da177e4SLinus Torvalds  */
121da177e4SLinus Torvalds 
131da177e4SLinus Torvalds #include <linux/module.h>
141da177e4SLinus Torvalds 
151da177e4SLinus Torvalds #include <linux/sched.h>
161da177e4SLinus Torvalds #include <linux/interrupt.h>
171da177e4SLinus Torvalds #include <linux/slab.h>
181da177e4SLinus Torvalds #include <linux/mempool.h>
191da177e4SLinus Torvalds #include <linux/smp.h>
201da177e4SLinus Torvalds #include <linux/spinlock.h>
214a3e2f71SArjan van de Ven #include <linux/mutex.h>
22d310310cSJeff Layton #include <linux/freezer.h>
23a1231fdaSTrond Myklebust #include <linux/sched/mm.h>
241da177e4SLinus Torvalds 
251da177e4SLinus Torvalds #include <linux/sunrpc/clnt.h>
269dfe52a9SDave Wysochanski #include <linux/sunrpc/metrics.h>
271da177e4SLinus Torvalds 
286951867bSBenny Halevy #include "sunrpc.h"
296951867bSBenny Halevy 
3082b0a4c3STrond Myklebust #define CREATE_TRACE_POINTS
3182b0a4c3STrond Myklebust #include <trace/events/sunrpc.h>
3282b0a4c3STrond Myklebust 
331da177e4SLinus Torvalds /*
341da177e4SLinus Torvalds  * RPC slabs and memory pools
351da177e4SLinus Torvalds  */
361da177e4SLinus Torvalds #define RPC_BUFFER_MAXSIZE	(2048)
371da177e4SLinus Torvalds #define RPC_BUFFER_POOLSIZE	(8)
381da177e4SLinus Torvalds #define RPC_TASK_POOLSIZE	(8)
39e18b890bSChristoph Lameter static struct kmem_cache	*rpc_task_slabp __read_mostly;
40e18b890bSChristoph Lameter static struct kmem_cache	*rpc_buffer_slabp __read_mostly;
41ba89966cSEric Dumazet static mempool_t	*rpc_task_mempool __read_mostly;
42ba89966cSEric Dumazet static mempool_t	*rpc_buffer_mempool __read_mostly;
431da177e4SLinus Torvalds 
4465f27f38SDavid Howells static void			rpc_async_schedule(struct work_struct *);
45bde8f00cSTrond Myklebust static void			 rpc_release_task(struct rpc_task *task);
467e0a0e38STrond Myklebust static void __rpc_queue_timer_fn(struct work_struct *);
471da177e4SLinus Torvalds 
481da177e4SLinus Torvalds /*
491da177e4SLinus Torvalds  * RPC tasks sit here while waiting for conditions to improve.
501da177e4SLinus Torvalds  */
51a4a87499STrond Myklebust static struct rpc_wait_queue delay_queue;
521da177e4SLinus Torvalds 
531da177e4SLinus Torvalds /*
541da177e4SLinus Torvalds  * rpciod-related stuff
551da177e4SLinus Torvalds  */
5640a5f1b1STrond Myklebust struct workqueue_struct *rpciod_workqueue __read_mostly;
5740a5f1b1STrond Myklebust struct workqueue_struct *xprtiod_workqueue __read_mostly;
58675dd90aSChuck Lever EXPORT_SYMBOL_GPL(xprtiod_workqueue);
591da177e4SLinus Torvalds 
6033e5c765STrond Myklebust gfp_t rpc_task_gfp_mask(void)
6133e5c765STrond Myklebust {
6233e5c765STrond Myklebust 	if (current->flags & PF_WQ_WORKER)
6333e5c765STrond Myklebust 		return GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN;
6433e5c765STrond Myklebust 	return GFP_KERNEL;
6533e5c765STrond Myklebust }
66*4b8dbdfbSTrond Myklebust EXPORT_SYMBOL_GPL(rpc_task_gfp_mask);
6733e5c765STrond Myklebust 
685efd1876STrond Myklebust unsigned long
695efd1876STrond Myklebust rpc_task_timeout(const struct rpc_task *task)
705efd1876STrond Myklebust {
715efd1876STrond Myklebust 	unsigned long timeout = READ_ONCE(task->tk_timeout);
725efd1876STrond Myklebust 
735efd1876STrond Myklebust 	if (timeout != 0) {
745efd1876STrond Myklebust 		unsigned long now = jiffies;
755efd1876STrond Myklebust 		if (time_before(now, timeout))
765efd1876STrond Myklebust 			return timeout - now;
775efd1876STrond Myklebust 	}
785efd1876STrond Myklebust 	return 0;
795efd1876STrond Myklebust }
805efd1876STrond Myklebust EXPORT_SYMBOL_GPL(rpc_task_timeout);
815efd1876STrond Myklebust 
821da177e4SLinus Torvalds /*
831da177e4SLinus Torvalds  * Disable the timer for a given RPC task. Should be called with
841da177e4SLinus Torvalds  * queue->lock and bh_disabled in order to avoid races within
851da177e4SLinus Torvalds  * rpc_run_timer().
861da177e4SLinus Torvalds  */
875d00837bSTrond Myklebust static void
88eb276c0eSTrond Myklebust __rpc_disable_timer(struct rpc_wait_queue *queue, struct rpc_task *task)
891da177e4SLinus Torvalds {
906b2e6856STrond Myklebust 	if (list_empty(&task->u.tk_wait.timer_list))
9136df9aaeSTrond Myklebust 		return;
921da177e4SLinus Torvalds 	task->tk_timeout = 0;
9336df9aaeSTrond Myklebust 	list_del(&task->u.tk_wait.timer_list);
94eb276c0eSTrond Myklebust 	if (list_empty(&queue->timer_list.list))
957e0a0e38STrond Myklebust 		cancel_delayed_work(&queue->timer_list.dwork);
9636df9aaeSTrond Myklebust }
9736df9aaeSTrond Myklebust 
9836df9aaeSTrond Myklebust static void
9936df9aaeSTrond Myklebust rpc_set_queue_timer(struct rpc_wait_queue *queue, unsigned long expires)
10036df9aaeSTrond Myklebust {
1017e0a0e38STrond Myklebust 	unsigned long now = jiffies;
1027e0a0e38STrond Myklebust 	queue->timer_list.expires = expires;
1037e0a0e38STrond Myklebust 	if (time_before_eq(expires, now))
1047e0a0e38STrond Myklebust 		expires = 0;
1057e0a0e38STrond Myklebust 	else
1067e0a0e38STrond Myklebust 		expires -= now;
1077e0a0e38STrond Myklebust 	mod_delayed_work(rpciod_workqueue, &queue->timer_list.dwork, expires);
1081da177e4SLinus Torvalds }
1091da177e4SLinus Torvalds 
1101da177e4SLinus Torvalds /*
1111da177e4SLinus Torvalds  * Set up a timer for the current task.
1121da177e4SLinus Torvalds  */
1135d00837bSTrond Myklebust static void
1146b2e6856STrond Myklebust __rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task,
1156b2e6856STrond Myklebust 		unsigned long timeout)
1161da177e4SLinus Torvalds {
1176b2e6856STrond Myklebust 	task->tk_timeout = timeout;
1187e0a0e38STrond Myklebust 	if (list_empty(&queue->timer_list.list) || time_before(timeout, queue->timer_list.expires))
1196b2e6856STrond Myklebust 		rpc_set_queue_timer(queue, timeout);
120eb276c0eSTrond Myklebust 	list_add(&task->u.tk_wait.timer_list, &queue->timer_list.list);
1211da177e4SLinus Torvalds }
1221da177e4SLinus Torvalds 
123c05eecf6STrond Myklebust static void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority)
124c05eecf6STrond Myklebust {
125edd2e36fSTrond Myklebust 	if (queue->priority != priority) {
126c05eecf6STrond Myklebust 		queue->priority = priority;
127f42f7c28STrond Myklebust 		queue->nr = 1U << priority;
128c05eecf6STrond Myklebust 	}
129edd2e36fSTrond Myklebust }
130c05eecf6STrond Myklebust 
131c05eecf6STrond Myklebust static void rpc_reset_waitqueue_priority(struct rpc_wait_queue *queue)
132c05eecf6STrond Myklebust {
133c05eecf6STrond Myklebust 	rpc_set_waitqueue_priority(queue, queue->maxpriority);
134f42f7c28STrond Myklebust }
135f42f7c28STrond Myklebust 
136f42f7c28STrond Myklebust /*
137f42f7c28STrond Myklebust  * Add a request to a queue list
138f42f7c28STrond Myklebust  */
139f42f7c28STrond Myklebust static void
140f42f7c28STrond Myklebust __rpc_list_enqueue_task(struct list_head *q, struct rpc_task *task)
141f42f7c28STrond Myklebust {
142f42f7c28STrond Myklebust 	struct rpc_task *t;
143f42f7c28STrond Myklebust 
144f42f7c28STrond Myklebust 	list_for_each_entry(t, q, u.tk_wait.list) {
145f42f7c28STrond Myklebust 		if (t->tk_owner == task->tk_owner) {
146f42f7c28STrond Myklebust 			list_add_tail(&task->u.tk_wait.links,
147f42f7c28STrond Myklebust 					&t->u.tk_wait.links);
148f42f7c28STrond Myklebust 			/* Cache the queue head in task->u.tk_wait.list */
149f42f7c28STrond Myklebust 			task->u.tk_wait.list.next = q;
150f42f7c28STrond Myklebust 			task->u.tk_wait.list.prev = NULL;
151f42f7c28STrond Myklebust 			return;
152f42f7c28STrond Myklebust 		}
153f42f7c28STrond Myklebust 	}
154f42f7c28STrond Myklebust 	INIT_LIST_HEAD(&task->u.tk_wait.links);
155f42f7c28STrond Myklebust 	list_add_tail(&task->u.tk_wait.list, q);
156f42f7c28STrond Myklebust }
157f42f7c28STrond Myklebust 
158f42f7c28STrond Myklebust /*
159f42f7c28STrond Myklebust  * Remove request from a queue list
160f42f7c28STrond Myklebust  */
161f42f7c28STrond Myklebust static void
162f42f7c28STrond Myklebust __rpc_list_dequeue_task(struct rpc_task *task)
163f42f7c28STrond Myklebust {
164f42f7c28STrond Myklebust 	struct list_head *q;
165f42f7c28STrond Myklebust 	struct rpc_task *t;
166f42f7c28STrond Myklebust 
167f42f7c28STrond Myklebust 	if (task->u.tk_wait.list.prev == NULL) {
168f42f7c28STrond Myklebust 		list_del(&task->u.tk_wait.links);
169f42f7c28STrond Myklebust 		return;
170f42f7c28STrond Myklebust 	}
171f42f7c28STrond Myklebust 	if (!list_empty(&task->u.tk_wait.links)) {
172f42f7c28STrond Myklebust 		t = list_first_entry(&task->u.tk_wait.links,
173f42f7c28STrond Myklebust 				struct rpc_task,
174f42f7c28STrond Myklebust 				u.tk_wait.links);
175f42f7c28STrond Myklebust 		/* Assume __rpc_list_enqueue_task() cached the queue head */
176f42f7c28STrond Myklebust 		q = t->u.tk_wait.list.next;
177f42f7c28STrond Myklebust 		list_add_tail(&t->u.tk_wait.list, q);
178f42f7c28STrond Myklebust 		list_del(&task->u.tk_wait.links);
179f42f7c28STrond Myklebust 	}
180f42f7c28STrond Myklebust 	list_del(&task->u.tk_wait.list);
181c05eecf6STrond Myklebust }
182c05eecf6STrond Myklebust 
1831da177e4SLinus Torvalds /*
1841da177e4SLinus Torvalds  * Add new request to a priority queue.
1851da177e4SLinus Torvalds  */
1863b27bad7STrond Myklebust static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue,
1873b27bad7STrond Myklebust 		struct rpc_task *task,
1883b27bad7STrond Myklebust 		unsigned char queue_priority)
1891da177e4SLinus Torvalds {
1903b27bad7STrond Myklebust 	if (unlikely(queue_priority > queue->maxpriority))
191c05eecf6STrond Myklebust 		queue_priority = queue->maxpriority;
192f42f7c28STrond Myklebust 	__rpc_list_enqueue_task(&queue->tasks[queue_priority], task);
1931da177e4SLinus Torvalds }
1941da177e4SLinus Torvalds 
1951da177e4SLinus Torvalds /*
1961da177e4SLinus Torvalds  * Add new request to wait queue.
1971da177e4SLinus Torvalds  */
1983b27bad7STrond Myklebust static void __rpc_add_wait_queue(struct rpc_wait_queue *queue,
1993b27bad7STrond Myklebust 		struct rpc_task *task,
2003b27bad7STrond Myklebust 		unsigned char queue_priority)
2011da177e4SLinus Torvalds {
2026b2e6856STrond Myklebust 	INIT_LIST_HEAD(&task->u.tk_wait.timer_list);
2031da177e4SLinus Torvalds 	if (RPC_IS_PRIORITY(queue))
2043b27bad7STrond Myklebust 		__rpc_add_wait_queue_priority(queue, task, queue_priority);
2051da177e4SLinus Torvalds 	else
2061da177e4SLinus Torvalds 		list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]);
20796ef13b2STrond Myklebust 	task->tk_waitqueue = queue;
208e19b63daSChuck Lever 	queue->qlen++;
2091166fde6STrond Myklebust 	/* barrier matches the read in rpc_wake_up_task_queue_locked() */
2101166fde6STrond Myklebust 	smp_wmb();
2111da177e4SLinus Torvalds 	rpc_set_queued(task);
2121da177e4SLinus Torvalds }
2131da177e4SLinus Torvalds 
2141da177e4SLinus Torvalds /*
2151da177e4SLinus Torvalds  * Remove request from a priority queue.
2161da177e4SLinus Torvalds  */
2171da177e4SLinus Torvalds static void __rpc_remove_wait_queue_priority(struct rpc_task *task)
2181da177e4SLinus Torvalds {
219f42f7c28STrond Myklebust 	__rpc_list_dequeue_task(task);
2201da177e4SLinus Torvalds }
2211da177e4SLinus Torvalds 
2221da177e4SLinus Torvalds /*
2231da177e4SLinus Torvalds  * Remove request from queue.
2241da177e4SLinus Torvalds  * Note: must be called with spin lock held.
2251da177e4SLinus Torvalds  */
22696ef13b2STrond Myklebust static void __rpc_remove_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task)
2271da177e4SLinus Torvalds {
228eb276c0eSTrond Myklebust 	__rpc_disable_timer(queue, task);
2291da177e4SLinus Torvalds 	if (RPC_IS_PRIORITY(queue))
2301da177e4SLinus Torvalds 		__rpc_remove_wait_queue_priority(task);
231f42f7c28STrond Myklebust 	else
2321da177e4SLinus Torvalds 		list_del(&task->u.tk_wait.list);
233e19b63daSChuck Lever 	queue->qlen--;
2341da177e4SLinus Torvalds }
2351da177e4SLinus Torvalds 
2363ff7576dSTrond Myklebust static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname, unsigned char nr_queues)
2371da177e4SLinus Torvalds {
2381da177e4SLinus Torvalds 	int i;
2391da177e4SLinus Torvalds 
2401da177e4SLinus Torvalds 	spin_lock_init(&queue->lock);
2411da177e4SLinus Torvalds 	for (i = 0; i < ARRAY_SIZE(queue->tasks); i++)
2421da177e4SLinus Torvalds 		INIT_LIST_HEAD(&queue->tasks[i]);
2433ff7576dSTrond Myklebust 	queue->maxpriority = nr_queues - 1;
2441da177e4SLinus Torvalds 	rpc_reset_waitqueue_priority(queue);
24536df9aaeSTrond Myklebust 	queue->qlen = 0;
2467e0a0e38STrond Myklebust 	queue->timer_list.expires = 0;
24766eb3addSTrond Myklebust 	INIT_DELAYED_WORK(&queue->timer_list.dwork, __rpc_queue_timer_fn);
24836df9aaeSTrond Myklebust 	INIT_LIST_HEAD(&queue->timer_list.list);
2492f09c242STrond Myklebust 	rpc_assign_waitqueue_name(queue, qname);
2501da177e4SLinus Torvalds }
2511da177e4SLinus Torvalds 
2521da177e4SLinus Torvalds void rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname)
2531da177e4SLinus Torvalds {
2543ff7576dSTrond Myklebust 	__rpc_init_priority_wait_queue(queue, qname, RPC_NR_PRIORITY);
2551da177e4SLinus Torvalds }
256689cf5c1SAlexandros Batsakis EXPORT_SYMBOL_GPL(rpc_init_priority_wait_queue);
2571da177e4SLinus Torvalds 
2581da177e4SLinus Torvalds void rpc_init_wait_queue(struct rpc_wait_queue *queue, const char *qname)
2591da177e4SLinus Torvalds {
2603ff7576dSTrond Myklebust 	__rpc_init_priority_wait_queue(queue, qname, 1);
2611da177e4SLinus Torvalds }
262e8914c65STrond Myklebust EXPORT_SYMBOL_GPL(rpc_init_wait_queue);
2631da177e4SLinus Torvalds 
264f6a1cc89STrond Myklebust void rpc_destroy_wait_queue(struct rpc_wait_queue *queue)
265f6a1cc89STrond Myklebust {
2667e0a0e38STrond Myklebust 	cancel_delayed_work_sync(&queue->timer_list.dwork);
267f6a1cc89STrond Myklebust }
268f6a1cc89STrond Myklebust EXPORT_SYMBOL_GPL(rpc_destroy_wait_queue);
269f6a1cc89STrond Myklebust 
270dfd01f02SPeter Zijlstra static int rpc_wait_bit_killable(struct wait_bit_key *key, int mode)
27144c28873STrond Myklebust {
272416ad3c9SColin Cross 	freezable_schedule_unsafe();
273dfd01f02SPeter Zijlstra 	if (signal_pending_state(mode, current))
274dfd01f02SPeter Zijlstra 		return -ERESTARTSYS;
27544c28873STrond Myklebust 	return 0;
27644c28873STrond Myklebust }
27744c28873STrond Myklebust 
2781306729bSJeff Layton #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || IS_ENABLED(CONFIG_TRACEPOINTS)
279c44fe705STrond Myklebust static void rpc_task_set_debuginfo(struct rpc_task *task)
280c44fe705STrond Myklebust {
2810392dd51SChuck Lever 	struct rpc_clnt *clnt = task->tk_client;
2820392dd51SChuck Lever 
2830392dd51SChuck Lever 	/* Might be a task carrying a reverse-direction operation */
2840392dd51SChuck Lever 	if (!clnt) {
285c44fe705STrond Myklebust 		static atomic_t rpc_pid;
286c44fe705STrond Myklebust 
287c44fe705STrond Myklebust 		task->tk_pid = atomic_inc_return(&rpc_pid);
2880392dd51SChuck Lever 		return;
2890392dd51SChuck Lever 	}
2900392dd51SChuck Lever 
2910392dd51SChuck Lever 	task->tk_pid = atomic_inc_return(&clnt->cl_pid);
292c44fe705STrond Myklebust }
293c44fe705STrond Myklebust #else
294c44fe705STrond Myklebust static inline void rpc_task_set_debuginfo(struct rpc_task *task)
295c44fe705STrond Myklebust {
296c44fe705STrond Myklebust }
297c44fe705STrond Myklebust #endif
298c44fe705STrond Myklebust 
299e6b3c4dbSTrond Myklebust static void rpc_set_active(struct rpc_task *task)
300e6b3c4dbSTrond Myklebust {
301c44fe705STrond Myklebust 	rpc_task_set_debuginfo(task);
30258f9612cSTrond Myklebust 	set_bit(RPC_TASK_ACTIVE, &task->tk_runstate);
303e671edb9SChuck Lever 	trace_rpc_task_begin(task, NULL);
304e6b3c4dbSTrond Myklebust }
305e6b3c4dbSTrond Myklebust 
30644c28873STrond Myklebust /*
30744c28873STrond Myklebust  * Mark an RPC call as having completed by clearing the 'active' bit
308bf294b41STrond Myklebust  * and then waking up all tasks that were sleeping.
30944c28873STrond Myklebust  */
310bf294b41STrond Myklebust static int rpc_complete_task(struct rpc_task *task)
31144c28873STrond Myklebust {
312bf294b41STrond Myklebust 	void *m = &task->tk_runstate;
313bf294b41STrond Myklebust 	wait_queue_head_t *wq = bit_waitqueue(m, RPC_TASK_ACTIVE);
314bf294b41STrond Myklebust 	struct wait_bit_key k = __WAIT_BIT_KEY_INITIALIZER(m, RPC_TASK_ACTIVE);
315bf294b41STrond Myklebust 	unsigned long flags;
316bf294b41STrond Myklebust 	int ret;
317bf294b41STrond Myklebust 
318e671edb9SChuck Lever 	trace_rpc_task_complete(task, NULL);
31982b0a4c3STrond Myklebust 
320bf294b41STrond Myklebust 	spin_lock_irqsave(&wq->lock, flags);
321e6b3c4dbSTrond Myklebust 	clear_bit(RPC_TASK_ACTIVE, &task->tk_runstate);
322bf294b41STrond Myklebust 	ret = atomic_dec_and_test(&task->tk_count);
323bf294b41STrond Myklebust 	if (waitqueue_active(wq))
324ac5be6b4SAndrea Arcangeli 		__wake_up_locked_key(wq, TASK_NORMAL, &k);
325bf294b41STrond Myklebust 	spin_unlock_irqrestore(&wq->lock, flags);
326bf294b41STrond Myklebust 	return ret;
32744c28873STrond Myklebust }
32844c28873STrond Myklebust 
32944c28873STrond Myklebust /*
33044c28873STrond Myklebust  * Allow callers to wait for completion of an RPC call
331bf294b41STrond Myklebust  *
332bf294b41STrond Myklebust  * Note the use of out_of_line_wait_on_bit() rather than wait_on_bit()
333bf294b41STrond Myklebust  * to enforce taking of the wq->lock and hence avoid races with
334bf294b41STrond Myklebust  * rpc_complete_task().
33544c28873STrond Myklebust  */
336c1221321SNeilBrown int __rpc_wait_for_completion_task(struct rpc_task *task, wait_bit_action_f *action)
33744c28873STrond Myklebust {
33844c28873STrond Myklebust 	if (action == NULL)
339150030b7SMatthew Wilcox 		action = rpc_wait_bit_killable;
340bf294b41STrond Myklebust 	return out_of_line_wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE,
341150030b7SMatthew Wilcox 			action, TASK_KILLABLE);
34244c28873STrond Myklebust }
343e8914c65STrond Myklebust EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task);
34444c28873STrond Myklebust 
3451da177e4SLinus Torvalds /*
3461da177e4SLinus Torvalds  * Make an RPC task runnable.
3471da177e4SLinus Torvalds  *
348506026c3SJeff Layton  * Note: If the task is ASYNC, and is being made runnable after sitting on an
349506026c3SJeff Layton  * rpc_wait_queue, this must be called with the queue spinlock held to protect
350506026c3SJeff Layton  * the wait queue operation.
351a3c3cac5STrond Myklebust  * Note the ordering of rpc_test_and_set_running() and rpc_clear_queued(),
352a3c3cac5STrond Myklebust  * which is needed to ensure that __rpc_execute() doesn't loop (due to the
353a3c3cac5STrond Myklebust  * lockless RPC_IS_QUEUED() test) before we've had a chance to test
354a3c3cac5STrond Myklebust  * the RPC_TASK_RUNNING flag.
3551da177e4SLinus Torvalds  */
356f1dc237cSTrond Myklebust static void rpc_make_runnable(struct workqueue_struct *wq,
357f1dc237cSTrond Myklebust 		struct rpc_task *task)
3581da177e4SLinus Torvalds {
359a3c3cac5STrond Myklebust 	bool need_wakeup = !rpc_test_and_set_running(task);
360a3c3cac5STrond Myklebust 
3611da177e4SLinus Torvalds 	rpc_clear_queued(task);
362a3c3cac5STrond Myklebust 	if (!need_wakeup)
3631da177e4SLinus Torvalds 		return;
3641da177e4SLinus Torvalds 	if (RPC_IS_ASYNC(task)) {
36565f27f38SDavid Howells 		INIT_WORK(&task->u.tk_work, rpc_async_schedule);
366f1dc237cSTrond Myklebust 		queue_work(wq, &task->u.tk_work);
3671da177e4SLinus Torvalds 	} else
36896651ab3STrond Myklebust 		wake_up_bit(&task->tk_runstate, RPC_TASK_QUEUED);
3691da177e4SLinus Torvalds }
3701da177e4SLinus Torvalds 
3711da177e4SLinus Torvalds /*
3721da177e4SLinus Torvalds  * Prepare for sleeping on a wait queue.
3731da177e4SLinus Torvalds  * By always appending tasks to the list we ensure FIFO behavior.
3741da177e4SLinus Torvalds  * NB: An RPC task will only receive interrupt-driven events as long
3751da177e4SLinus Torvalds  * as it's on a wait queue.
3761da177e4SLinus Torvalds  */
3771fab7dc4STrond Myklebust static void __rpc_do_sleep_on_priority(struct rpc_wait_queue *q,
3783b27bad7STrond Myklebust 		struct rpc_task *task,
3793b27bad7STrond Myklebust 		unsigned char queue_priority)
3801da177e4SLinus Torvalds {
381e671edb9SChuck Lever 	trace_rpc_task_sleep(task, q);
38282b0a4c3STrond Myklebust 
3833b27bad7STrond Myklebust 	__rpc_add_wait_queue(q, task, queue_priority);
3846b2e6856STrond Myklebust }
3856b2e6856STrond Myklebust 
3861fab7dc4STrond Myklebust static void __rpc_sleep_on_priority(struct rpc_wait_queue *q,
3871fab7dc4STrond Myklebust 		struct rpc_task *task,
3881fab7dc4STrond Myklebust 		unsigned char queue_priority)
3891fab7dc4STrond Myklebust {
3901fab7dc4STrond Myklebust 	if (WARN_ON_ONCE(RPC_IS_QUEUED(task)))
3911fab7dc4STrond Myklebust 		return;
3921fab7dc4STrond Myklebust 	__rpc_do_sleep_on_priority(q, task, queue_priority);
3931fab7dc4STrond Myklebust }
3941fab7dc4STrond Myklebust 
3956b2e6856STrond Myklebust static void __rpc_sleep_on_priority_timeout(struct rpc_wait_queue *q,
3966b2e6856STrond Myklebust 		struct rpc_task *task, unsigned long timeout,
3976b2e6856STrond Myklebust 		unsigned char queue_priority)
3986b2e6856STrond Myklebust {
3991fab7dc4STrond Myklebust 	if (WARN_ON_ONCE(RPC_IS_QUEUED(task)))
4001fab7dc4STrond Myklebust 		return;
4016b2e6856STrond Myklebust 	if (time_is_after_jiffies(timeout)) {
4021fab7dc4STrond Myklebust 		__rpc_do_sleep_on_priority(q, task, queue_priority);
4036b2e6856STrond Myklebust 		__rpc_add_timer(q, task, timeout);
4046b2e6856STrond Myklebust 	} else
4056b2e6856STrond Myklebust 		task->tk_status = -ETIMEDOUT;
4061da177e4SLinus Torvalds }
4071da177e4SLinus Torvalds 
40887150aaeSTrond Myklebust static void rpc_set_tk_callback(struct rpc_task *task, rpc_action action)
40987150aaeSTrond Myklebust {
41087150aaeSTrond Myklebust 	if (action && !WARN_ON_ONCE(task->tk_callback != NULL))
41187150aaeSTrond Myklebust 		task->tk_callback = action;
41287150aaeSTrond Myklebust }
41387150aaeSTrond Myklebust 
41487150aaeSTrond Myklebust static bool rpc_sleep_check_activated(struct rpc_task *task)
41587150aaeSTrond Myklebust {
41687150aaeSTrond Myklebust 	/* We shouldn't ever put an inactive task to sleep */
41787150aaeSTrond Myklebust 	if (WARN_ON_ONCE(!RPC_IS_ACTIVATED(task))) {
41887150aaeSTrond Myklebust 		task->tk_status = -EIO;
41987150aaeSTrond Myklebust 		rpc_put_task_async(task);
42087150aaeSTrond Myklebust 		return false;
42187150aaeSTrond Myklebust 	}
42287150aaeSTrond Myklebust 	return true;
42387150aaeSTrond Myklebust }
42487150aaeSTrond Myklebust 
4256b2e6856STrond Myklebust void rpc_sleep_on_timeout(struct rpc_wait_queue *q, struct rpc_task *task,
4266b2e6856STrond Myklebust 				rpc_action action, unsigned long timeout)
4271da177e4SLinus Torvalds {
42887150aaeSTrond Myklebust 	if (!rpc_sleep_check_activated(task))
429e454a7a8SWeston Andros Adamson 		return;
43087150aaeSTrond Myklebust 
43187150aaeSTrond Myklebust 	rpc_set_tk_callback(task, action);
432e6b3c4dbSTrond Myklebust 
4331da177e4SLinus Torvalds 	/*
4341da177e4SLinus Torvalds 	 * Protect the queue operations.
4351da177e4SLinus Torvalds 	 */
436c049f8eaSTrond Myklebust 	spin_lock(&q->lock);
4376b2e6856STrond Myklebust 	__rpc_sleep_on_priority_timeout(q, task, timeout, task->tk_priority);
438c049f8eaSTrond Myklebust 	spin_unlock(&q->lock);
4396b2e6856STrond Myklebust }
4406b2e6856STrond Myklebust EXPORT_SYMBOL_GPL(rpc_sleep_on_timeout);
4416b2e6856STrond Myklebust 
4426b2e6856STrond Myklebust void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
4436b2e6856STrond Myklebust 				rpc_action action)
4446b2e6856STrond Myklebust {
4456b2e6856STrond Myklebust 	if (!rpc_sleep_check_activated(task))
4466b2e6856STrond Myklebust 		return;
4476b2e6856STrond Myklebust 
4486b2e6856STrond Myklebust 	rpc_set_tk_callback(task, action);
4496b2e6856STrond Myklebust 
4506b2e6856STrond Myklebust 	WARN_ON_ONCE(task->tk_timeout != 0);
4516b2e6856STrond Myklebust 	/*
4526b2e6856STrond Myklebust 	 * Protect the queue operations.
4536b2e6856STrond Myklebust 	 */
454c049f8eaSTrond Myklebust 	spin_lock(&q->lock);
45587150aaeSTrond Myklebust 	__rpc_sleep_on_priority(q, task, task->tk_priority);
456c049f8eaSTrond Myklebust 	spin_unlock(&q->lock);
4571da177e4SLinus Torvalds }
458e8914c65STrond Myklebust EXPORT_SYMBOL_GPL(rpc_sleep_on);
4591da177e4SLinus Torvalds 
4606b2e6856STrond Myklebust void rpc_sleep_on_priority_timeout(struct rpc_wait_queue *q,
4616b2e6856STrond Myklebust 		struct rpc_task *task, unsigned long timeout, int priority)
4626b2e6856STrond Myklebust {
4636b2e6856STrond Myklebust 	if (!rpc_sleep_check_activated(task))
4646b2e6856STrond Myklebust 		return;
4656b2e6856STrond Myklebust 
4666b2e6856STrond Myklebust 	priority -= RPC_PRIORITY_LOW;
4676b2e6856STrond Myklebust 	/*
4686b2e6856STrond Myklebust 	 * Protect the queue operations.
4696b2e6856STrond Myklebust 	 */
470c049f8eaSTrond Myklebust 	spin_lock(&q->lock);
4716b2e6856STrond Myklebust 	__rpc_sleep_on_priority_timeout(q, task, timeout, priority);
472c049f8eaSTrond Myklebust 	spin_unlock(&q->lock);
4736b2e6856STrond Myklebust }
4746b2e6856STrond Myklebust EXPORT_SYMBOL_GPL(rpc_sleep_on_priority_timeout);
4756b2e6856STrond Myklebust 
4763b27bad7STrond Myklebust void rpc_sleep_on_priority(struct rpc_wait_queue *q, struct rpc_task *task,
4778357a9b6STrond Myklebust 		int priority)
4783b27bad7STrond Myklebust {
47987150aaeSTrond Myklebust 	if (!rpc_sleep_check_activated(task))
480e454a7a8SWeston Andros Adamson 		return;
48187150aaeSTrond Myklebust 
4826b2e6856STrond Myklebust 	WARN_ON_ONCE(task->tk_timeout != 0);
4838357a9b6STrond Myklebust 	priority -= RPC_PRIORITY_LOW;
4843b27bad7STrond Myklebust 	/*
4853b27bad7STrond Myklebust 	 * Protect the queue operations.
4863b27bad7STrond Myklebust 	 */
487c049f8eaSTrond Myklebust 	spin_lock(&q->lock);
4888357a9b6STrond Myklebust 	__rpc_sleep_on_priority(q, task, priority);
489c049f8eaSTrond Myklebust 	spin_unlock(&q->lock);
4903b27bad7STrond Myklebust }
4911e1093c7STrond Myklebust EXPORT_SYMBOL_GPL(rpc_sleep_on_priority);
4923b27bad7STrond Myklebust 
4931da177e4SLinus Torvalds /**
494f1dc237cSTrond Myklebust  * __rpc_do_wake_up_task_on_wq - wake up a single rpc_task
495f1dc237cSTrond Myklebust  * @wq: workqueue on which to run task
49696ef13b2STrond Myklebust  * @queue: wait queue
4971da177e4SLinus Torvalds  * @task: task to be woken up
4981da177e4SLinus Torvalds  *
4991da177e4SLinus Torvalds  * Caller must hold queue->lock, and have cleared the task queued flag.
5001da177e4SLinus Torvalds  */
501f1dc237cSTrond Myklebust static void __rpc_do_wake_up_task_on_wq(struct workqueue_struct *wq,
502f1dc237cSTrond Myklebust 		struct rpc_wait_queue *queue,
503f1dc237cSTrond Myklebust 		struct rpc_task *task)
5041da177e4SLinus Torvalds {
5051da177e4SLinus Torvalds 	/* Has the task been executed yet? If not, we cannot wake it up! */
5061da177e4SLinus Torvalds 	if (!RPC_IS_ACTIVATED(task)) {
5071da177e4SLinus Torvalds 		printk(KERN_ERR "RPC: Inactive task (%p) being woken up!\n", task);
5081da177e4SLinus Torvalds 		return;
5091da177e4SLinus Torvalds 	}
5101da177e4SLinus Torvalds 
511e671edb9SChuck Lever 	trace_rpc_task_wakeup(task, queue);
51282b0a4c3STrond Myklebust 
51396ef13b2STrond Myklebust 	__rpc_remove_wait_queue(queue, task);
5141da177e4SLinus Torvalds 
515f1dc237cSTrond Myklebust 	rpc_make_runnable(wq, task);
5161da177e4SLinus Torvalds }
5171da177e4SLinus Torvalds 
5181da177e4SLinus Torvalds /*
51996ef13b2STrond Myklebust  * Wake up a queued task while the queue lock is being held
5201da177e4SLinus Torvalds  */
521359c48c0STrond Myklebust static struct rpc_task *
522359c48c0STrond Myklebust rpc_wake_up_task_on_wq_queue_action_locked(struct workqueue_struct *wq,
523359c48c0STrond Myklebust 		struct rpc_wait_queue *queue, struct rpc_task *task,
524359c48c0STrond Myklebust 		bool (*action)(struct rpc_task *, void *), void *data)
5251da177e4SLinus Torvalds {
5261166fde6STrond Myklebust 	if (RPC_IS_QUEUED(task)) {
5271166fde6STrond Myklebust 		smp_rmb();
528359c48c0STrond Myklebust 		if (task->tk_waitqueue == queue) {
529359c48c0STrond Myklebust 			if (action == NULL || action(task, data)) {
530f1dc237cSTrond Myklebust 				__rpc_do_wake_up_task_on_wq(wq, queue, task);
531359c48c0STrond Myklebust 				return task;
5321da177e4SLinus Torvalds 			}
5331166fde6STrond Myklebust 		}
534359c48c0STrond Myklebust 	}
535359c48c0STrond Myklebust 	return NULL;
536359c48c0STrond Myklebust }
537359c48c0STrond Myklebust 
5381da177e4SLinus Torvalds /*
539f1dc237cSTrond Myklebust  * Wake up a queued task while the queue lock is being held
540f1dc237cSTrond Myklebust  */
541691b45ddSChuck Lever static void rpc_wake_up_task_queue_locked(struct rpc_wait_queue *queue,
5422275cde4STrond Myklebust 					  struct rpc_task *task)
5432275cde4STrond Myklebust {
544691b45ddSChuck Lever 	rpc_wake_up_task_on_wq_queue_action_locked(rpciod_workqueue, queue,
545691b45ddSChuck Lever 						   task, NULL, NULL);
5462275cde4STrond Myklebust }
5472275cde4STrond Myklebust 
5482275cde4STrond Myklebust /*
5492275cde4STrond Myklebust  * Wake up a task on a specific queue
5502275cde4STrond Myklebust  */
55196ef13b2STrond Myklebust void rpc_wake_up_queued_task(struct rpc_wait_queue *queue, struct rpc_task *task)
55296ef13b2STrond Myklebust {
5535ce97039STrond Myklebust 	if (!RPC_IS_QUEUED(task))
5545ce97039STrond Myklebust 		return;
555c049f8eaSTrond Myklebust 	spin_lock(&queue->lock);
55696ef13b2STrond Myklebust 	rpc_wake_up_task_queue_locked(queue, task);
557c049f8eaSTrond Myklebust 	spin_unlock(&queue->lock);
55896ef13b2STrond Myklebust }
55996ef13b2STrond Myklebust EXPORT_SYMBOL_GPL(rpc_wake_up_queued_task);
56096ef13b2STrond Myklebust 
561359c48c0STrond Myklebust static bool rpc_task_action_set_status(struct rpc_task *task, void *status)
562359c48c0STrond Myklebust {
563359c48c0STrond Myklebust 	task->tk_status = *(int *)status;
564359c48c0STrond Myklebust 	return true;
565359c48c0STrond Myklebust }
566359c48c0STrond Myklebust 
567359c48c0STrond Myklebust static void
568359c48c0STrond Myklebust rpc_wake_up_task_queue_set_status_locked(struct rpc_wait_queue *queue,
569359c48c0STrond Myklebust 		struct rpc_task *task, int status)
570359c48c0STrond Myklebust {
571359c48c0STrond Myklebust 	rpc_wake_up_task_on_wq_queue_action_locked(rpciod_workqueue, queue,
572359c48c0STrond Myklebust 			task, rpc_task_action_set_status, &status);
573359c48c0STrond Myklebust }
574359c48c0STrond Myklebust 
575359c48c0STrond Myklebust /**
576359c48c0STrond Myklebust  * rpc_wake_up_queued_task_set_status - wake up a task and set task->tk_status
577359c48c0STrond Myklebust  * @queue: pointer to rpc_wait_queue
578359c48c0STrond Myklebust  * @task: pointer to rpc_task
579359c48c0STrond Myklebust  * @status: integer error value
580359c48c0STrond Myklebust  *
581359c48c0STrond Myklebust  * If @task is queued on @queue, then it is woken up, and @task->tk_status is
582359c48c0STrond Myklebust  * set to the value of @status.
583359c48c0STrond Myklebust  */
584359c48c0STrond Myklebust void
585359c48c0STrond Myklebust rpc_wake_up_queued_task_set_status(struct rpc_wait_queue *queue,
586359c48c0STrond Myklebust 		struct rpc_task *task, int status)
587359c48c0STrond Myklebust {
588359c48c0STrond Myklebust 	if (!RPC_IS_QUEUED(task))
589359c48c0STrond Myklebust 		return;
590c049f8eaSTrond Myklebust 	spin_lock(&queue->lock);
591359c48c0STrond Myklebust 	rpc_wake_up_task_queue_set_status_locked(queue, task, status);
592c049f8eaSTrond Myklebust 	spin_unlock(&queue->lock);
593359c48c0STrond Myklebust }
594359c48c0STrond Myklebust 
59596ef13b2STrond Myklebust /*
5961da177e4SLinus Torvalds  * Wake up the next task on a priority queue.
5971da177e4SLinus Torvalds  */
598961a828dSTrond Myklebust static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *queue)
5991da177e4SLinus Torvalds {
6001da177e4SLinus Torvalds 	struct list_head *q;
6011da177e4SLinus Torvalds 	struct rpc_task *task;
6021da177e4SLinus Torvalds 
6031da177e4SLinus Torvalds 	/*
6045483b904SZhang Xiaoxu 	 * Service the privileged queue.
6055483b904SZhang Xiaoxu 	 */
6065483b904SZhang Xiaoxu 	q = &queue->tasks[RPC_NR_PRIORITY - 1];
6075483b904SZhang Xiaoxu 	if (queue->maxpriority > RPC_PRIORITY_PRIVILEGED && !list_empty(q)) {
6085483b904SZhang Xiaoxu 		task = list_first_entry(q, struct rpc_task, u.tk_wait.list);
6095483b904SZhang Xiaoxu 		goto out;
6105483b904SZhang Xiaoxu 	}
6115483b904SZhang Xiaoxu 
6125483b904SZhang Xiaoxu 	/*
6133ff7576dSTrond Myklebust 	 * Service a batch of tasks from a single owner.
6141da177e4SLinus Torvalds 	 */
6151da177e4SLinus Torvalds 	q = &queue->tasks[queue->priority];
616fcb170a9SZhang Xiaoxu 	if (!list_empty(q) && queue->nr) {
617fcb170a9SZhang Xiaoxu 		queue->nr--;
618f42f7c28STrond Myklebust 		task = list_first_entry(q, struct rpc_task, u.tk_wait.list);
6191da177e4SLinus Torvalds 		goto out;
6201da177e4SLinus Torvalds 	}
6211da177e4SLinus Torvalds 
6221da177e4SLinus Torvalds 	/*
6231da177e4SLinus Torvalds 	 * Service the next queue.
6241da177e4SLinus Torvalds 	 */
6251da177e4SLinus Torvalds 	do {
6261da177e4SLinus Torvalds 		if (q == &queue->tasks[0])
6271da177e4SLinus Torvalds 			q = &queue->tasks[queue->maxpriority];
6281da177e4SLinus Torvalds 		else
6291da177e4SLinus Torvalds 			q = q - 1;
6301da177e4SLinus Torvalds 		if (!list_empty(q)) {
631f42f7c28STrond Myklebust 			task = list_first_entry(q, struct rpc_task, u.tk_wait.list);
6321da177e4SLinus Torvalds 			goto new_queue;
6331da177e4SLinus Torvalds 		}
6341da177e4SLinus Torvalds 	} while (q != &queue->tasks[queue->priority]);
6351da177e4SLinus Torvalds 
6361da177e4SLinus Torvalds 	rpc_reset_waitqueue_priority(queue);
6371da177e4SLinus Torvalds 	return NULL;
6381da177e4SLinus Torvalds 
6391da177e4SLinus Torvalds new_queue:
6401da177e4SLinus Torvalds 	rpc_set_waitqueue_priority(queue, (unsigned int)(q - &queue->tasks[0]));
6411da177e4SLinus Torvalds out:
6421da177e4SLinus Torvalds 	return task;
6431da177e4SLinus Torvalds }
6441da177e4SLinus Torvalds 
645961a828dSTrond Myklebust static struct rpc_task *__rpc_find_next_queued(struct rpc_wait_queue *queue)
646961a828dSTrond Myklebust {
647961a828dSTrond Myklebust 	if (RPC_IS_PRIORITY(queue))
648961a828dSTrond Myklebust 		return __rpc_find_next_queued_priority(queue);
649961a828dSTrond Myklebust 	if (!list_empty(&queue->tasks[0]))
650961a828dSTrond Myklebust 		return list_first_entry(&queue->tasks[0], struct rpc_task, u.tk_wait.list);
651961a828dSTrond Myklebust 	return NULL;
652961a828dSTrond Myklebust }
653961a828dSTrond Myklebust 
654961a828dSTrond Myklebust /*
655961a828dSTrond Myklebust  * Wake up the first task on the wait queue.
656961a828dSTrond Myklebust  */
657f1dc237cSTrond Myklebust struct rpc_task *rpc_wake_up_first_on_wq(struct workqueue_struct *wq,
658f1dc237cSTrond Myklebust 		struct rpc_wait_queue *queue,
659961a828dSTrond Myklebust 		bool (*func)(struct rpc_task *, void *), void *data)
660961a828dSTrond Myklebust {
661961a828dSTrond Myklebust 	struct rpc_task	*task = NULL;
662961a828dSTrond Myklebust 
663c049f8eaSTrond Myklebust 	spin_lock(&queue->lock);
664961a828dSTrond Myklebust 	task = __rpc_find_next_queued(queue);
665359c48c0STrond Myklebust 	if (task != NULL)
666359c48c0STrond Myklebust 		task = rpc_wake_up_task_on_wq_queue_action_locked(wq, queue,
667359c48c0STrond Myklebust 				task, func, data);
668c049f8eaSTrond Myklebust 	spin_unlock(&queue->lock);
669961a828dSTrond Myklebust 
670961a828dSTrond Myklebust 	return task;
671961a828dSTrond Myklebust }
672f1dc237cSTrond Myklebust 
673f1dc237cSTrond Myklebust /*
674f1dc237cSTrond Myklebust  * Wake up the first task on the wait queue.
675f1dc237cSTrond Myklebust  */
676f1dc237cSTrond Myklebust struct rpc_task *rpc_wake_up_first(struct rpc_wait_queue *queue,
677f1dc237cSTrond Myklebust 		bool (*func)(struct rpc_task *, void *), void *data)
678f1dc237cSTrond Myklebust {
679f1dc237cSTrond Myklebust 	return rpc_wake_up_first_on_wq(rpciod_workqueue, queue, func, data);
680f1dc237cSTrond Myklebust }
681961a828dSTrond Myklebust EXPORT_SYMBOL_GPL(rpc_wake_up_first);
682961a828dSTrond Myklebust 
683961a828dSTrond Myklebust static bool rpc_wake_up_next_func(struct rpc_task *task, void *data)
684961a828dSTrond Myklebust {
685961a828dSTrond Myklebust 	return true;
686961a828dSTrond Myklebust }
687961a828dSTrond Myklebust 
6881da177e4SLinus Torvalds /*
6891da177e4SLinus Torvalds  * Wake up the next task on the wait queue.
6901da177e4SLinus Torvalds */
6911da177e4SLinus Torvalds struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *queue)
6921da177e4SLinus Torvalds {
693961a828dSTrond Myklebust 	return rpc_wake_up_first(queue, rpc_wake_up_next_func, NULL);
6941da177e4SLinus Torvalds }
695e8914c65STrond Myklebust EXPORT_SYMBOL_GPL(rpc_wake_up_next);
6961da177e4SLinus Torvalds 
6971da177e4SLinus Torvalds /**
698e4c72201STrond Myklebust  * rpc_wake_up_locked - wake up all rpc_tasks
699e4c72201STrond Myklebust  * @queue: rpc_wait_queue on which the tasks are sleeping
700e4c72201STrond Myklebust  *
701e4c72201STrond Myklebust  */
702e4c72201STrond Myklebust static void rpc_wake_up_locked(struct rpc_wait_queue *queue)
703e4c72201STrond Myklebust {
704e4c72201STrond Myklebust 	struct rpc_task *task;
705e4c72201STrond Myklebust 
706e4c72201STrond Myklebust 	for (;;) {
707e4c72201STrond Myklebust 		task = __rpc_find_next_queued(queue);
708e4c72201STrond Myklebust 		if (task == NULL)
709e4c72201STrond Myklebust 			break;
710e4c72201STrond Myklebust 		rpc_wake_up_task_queue_locked(queue, task);
711e4c72201STrond Myklebust 	}
712e4c72201STrond Myklebust }
713e4c72201STrond Myklebust 
714e4c72201STrond Myklebust /**
7151da177e4SLinus Torvalds  * rpc_wake_up - wake up all rpc_tasks
7161da177e4SLinus Torvalds  * @queue: rpc_wait_queue on which the tasks are sleeping
7171da177e4SLinus Torvalds  *
7181da177e4SLinus Torvalds  * Grabs queue->lock
7191da177e4SLinus Torvalds  */
7201da177e4SLinus Torvalds void rpc_wake_up(struct rpc_wait_queue *queue)
7211da177e4SLinus Torvalds {
722c049f8eaSTrond Myklebust 	spin_lock(&queue->lock);
723e4c72201STrond Myklebust 	rpc_wake_up_locked(queue);
724c049f8eaSTrond Myklebust 	spin_unlock(&queue->lock);
7251da177e4SLinus Torvalds }
726e8914c65STrond Myklebust EXPORT_SYMBOL_GPL(rpc_wake_up);
7271da177e4SLinus Torvalds 
7281da177e4SLinus Torvalds /**
729e4c72201STrond Myklebust  * rpc_wake_up_status_locked - wake up all rpc_tasks and set their status value.
730e4c72201STrond Myklebust  * @queue: rpc_wait_queue on which the tasks are sleeping
731e4c72201STrond Myklebust  * @status: status value to set
732e4c72201STrond Myklebust  */
733e4c72201STrond Myklebust static void rpc_wake_up_status_locked(struct rpc_wait_queue *queue, int status)
734e4c72201STrond Myklebust {
735e4c72201STrond Myklebust 	struct rpc_task *task;
736e4c72201STrond Myklebust 
737e4c72201STrond Myklebust 	for (;;) {
738e4c72201STrond Myklebust 		task = __rpc_find_next_queued(queue);
739e4c72201STrond Myklebust 		if (task == NULL)
740e4c72201STrond Myklebust 			break;
741e4c72201STrond Myklebust 		rpc_wake_up_task_queue_set_status_locked(queue, task, status);
742e4c72201STrond Myklebust 	}
743e4c72201STrond Myklebust }
744e4c72201STrond Myklebust 
745e4c72201STrond Myklebust /**
7461da177e4SLinus Torvalds  * rpc_wake_up_status - wake up all rpc_tasks and set their status value.
7471da177e4SLinus Torvalds  * @queue: rpc_wait_queue on which the tasks are sleeping
7481da177e4SLinus Torvalds  * @status: status value to set
7491da177e4SLinus Torvalds  *
7501da177e4SLinus Torvalds  * Grabs queue->lock
7511da177e4SLinus Torvalds  */
7521da177e4SLinus Torvalds void rpc_wake_up_status(struct rpc_wait_queue *queue, int status)
7531da177e4SLinus Torvalds {
754c049f8eaSTrond Myklebust 	spin_lock(&queue->lock);
755e4c72201STrond Myklebust 	rpc_wake_up_status_locked(queue, status);
756c049f8eaSTrond Myklebust 	spin_unlock(&queue->lock);
7571da177e4SLinus Torvalds }
758e8914c65STrond Myklebust EXPORT_SYMBOL_GPL(rpc_wake_up_status);
7591da177e4SLinus Torvalds 
7607e0a0e38STrond Myklebust static void __rpc_queue_timer_fn(struct work_struct *work)
76136df9aaeSTrond Myklebust {
7627e0a0e38STrond Myklebust 	struct rpc_wait_queue *queue = container_of(work,
7637e0a0e38STrond Myklebust 			struct rpc_wait_queue,
7647e0a0e38STrond Myklebust 			timer_list.dwork.work);
76536df9aaeSTrond Myklebust 	struct rpc_task *task, *n;
76636df9aaeSTrond Myklebust 	unsigned long expires, now, timeo;
76736df9aaeSTrond Myklebust 
76836df9aaeSTrond Myklebust 	spin_lock(&queue->lock);
76936df9aaeSTrond Myklebust 	expires = now = jiffies;
77036df9aaeSTrond Myklebust 	list_for_each_entry_safe(task, n, &queue->timer_list.list, u.tk_wait.timer_list) {
7716b2e6856STrond Myklebust 		timeo = task->tk_timeout;
77236df9aaeSTrond Myklebust 		if (time_after_eq(now, timeo)) {
773721a1d38SChuck Lever 			trace_rpc_task_timeout(task, task->tk_action);
77436df9aaeSTrond Myklebust 			task->tk_status = -ETIMEDOUT;
77536df9aaeSTrond Myklebust 			rpc_wake_up_task_queue_locked(queue, task);
77636df9aaeSTrond Myklebust 			continue;
77736df9aaeSTrond Myklebust 		}
77836df9aaeSTrond Myklebust 		if (expires == now || time_after(expires, timeo))
77936df9aaeSTrond Myklebust 			expires = timeo;
78036df9aaeSTrond Myklebust 	}
78136df9aaeSTrond Myklebust 	if (!list_empty(&queue->timer_list.list))
78236df9aaeSTrond Myklebust 		rpc_set_queue_timer(queue, expires);
78336df9aaeSTrond Myklebust 	spin_unlock(&queue->lock);
78436df9aaeSTrond Myklebust }
78536df9aaeSTrond Myklebust 
7868014793bSTrond Myklebust static void __rpc_atrun(struct rpc_task *task)
7878014793bSTrond Myklebust {
7886bd14416STrond Myklebust 	if (task->tk_status == -ETIMEDOUT)
7895d00837bSTrond Myklebust 		task->tk_status = 0;
7908014793bSTrond Myklebust }
7918014793bSTrond Myklebust 
7921da177e4SLinus Torvalds /*
7931da177e4SLinus Torvalds  * Run a task at a later time
7941da177e4SLinus Torvalds  */
7958014793bSTrond Myklebust void rpc_delay(struct rpc_task *task, unsigned long delay)
7961da177e4SLinus Torvalds {
7976b2e6856STrond Myklebust 	rpc_sleep_on_timeout(&delay_queue, task, __rpc_atrun, jiffies + delay);
7981da177e4SLinus Torvalds }
799e8914c65STrond Myklebust EXPORT_SYMBOL_GPL(rpc_delay);
8001da177e4SLinus Torvalds 
8011da177e4SLinus Torvalds /*
8024ce70adaSTrond Myklebust  * Helper to call task->tk_ops->rpc_call_prepare
8034ce70adaSTrond Myklebust  */
804aae2006eSAndy Adamson void rpc_prepare_task(struct rpc_task *task)
8054ce70adaSTrond Myklebust {
8064ce70adaSTrond Myklebust 	task->tk_ops->rpc_call_prepare(task, task->tk_calldata);
8074ce70adaSTrond Myklebust }
8084ce70adaSTrond Myklebust 
8097fdcf13bSTrond Myklebust static void
8107fdcf13bSTrond Myklebust rpc_init_task_statistics(struct rpc_task *task)
8117fdcf13bSTrond Myklebust {
8127fdcf13bSTrond Myklebust 	/* Initialize retry counters */
8137fdcf13bSTrond Myklebust 	task->tk_garb_retry = 2;
8147fdcf13bSTrond Myklebust 	task->tk_cred_retry = 2;
8157fdcf13bSTrond Myklebust 	task->tk_rebind_retry = 2;
8167fdcf13bSTrond Myklebust 
8177fdcf13bSTrond Myklebust 	/* starting timestamp */
8187fdcf13bSTrond Myklebust 	task->tk_start = ktime_get();
8197fdcf13bSTrond Myklebust }
8207fdcf13bSTrond Myklebust 
8217fdcf13bSTrond Myklebust static void
8227fdcf13bSTrond Myklebust rpc_reset_task_statistics(struct rpc_task *task)
8237fdcf13bSTrond Myklebust {
8247fdcf13bSTrond Myklebust 	task->tk_timeouts = 0;
825ae67bd38STrond Myklebust 	task->tk_flags &= ~(RPC_CALL_MAJORSEEN|RPC_TASK_SENT);
8267fdcf13bSTrond Myklebust 	rpc_init_task_statistics(task);
8277fdcf13bSTrond Myklebust }
8287fdcf13bSTrond Myklebust 
8294ce70adaSTrond Myklebust /*
830963d8fe5STrond Myklebust  * Helper that calls task->tk_ops->rpc_call_done if it exists
831d05fdb0cSTrond Myklebust  */
832abbcf28fSTrond Myklebust void rpc_exit_task(struct rpc_task *task)
833d05fdb0cSTrond Myklebust {
834a264abadSChuck Lever 	trace_rpc_task_end(task, task->tk_action);
835abbcf28fSTrond Myklebust 	task->tk_action = NULL;
8369dfe52a9SDave Wysochanski 	if (task->tk_ops->rpc_count_stats)
8379dfe52a9SDave Wysochanski 		task->tk_ops->rpc_count_stats(task, task->tk_calldata);
8389dfe52a9SDave Wysochanski 	else if (task->tk_client)
8399dfe52a9SDave Wysochanski 		rpc_count_iostats(task, task->tk_client->cl_metrics);
840963d8fe5STrond Myklebust 	if (task->tk_ops->rpc_call_done != NULL) {
841b40887e1SChuck Lever 		trace_rpc_task_call_done(task, task->tk_ops->rpc_call_done);
842963d8fe5STrond Myklebust 		task->tk_ops->rpc_call_done(task, task->tk_calldata);
843d05fdb0cSTrond Myklebust 		if (task->tk_action != NULL) {
844abbcf28fSTrond Myklebust 			/* Always release the RPC slot and buffer memory */
845d05fdb0cSTrond Myklebust 			xprt_release(task);
8467fdcf13bSTrond Myklebust 			rpc_reset_task_statistics(task);
847d05fdb0cSTrond Myklebust 		}
848d05fdb0cSTrond Myklebust 	}
849d05fdb0cSTrond Myklebust }
850d9b6cd94STrond Myklebust 
851ae67bd38STrond Myklebust void rpc_signal_task(struct rpc_task *task)
852ae67bd38STrond Myklebust {
853ae67bd38STrond Myklebust 	struct rpc_wait_queue *queue;
854ae67bd38STrond Myklebust 
855ae67bd38STrond Myklebust 	if (!RPC_IS_ACTIVATED(task))
856ae67bd38STrond Myklebust 		return;
857abf8af78SChuck Lever 
858abf8af78SChuck Lever 	trace_rpc_task_signalled(task, task->tk_action);
859ae67bd38STrond Myklebust 	set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate);
860ae67bd38STrond Myklebust 	smp_mb__after_atomic();
861ae67bd38STrond Myklebust 	queue = READ_ONCE(task->tk_waitqueue);
862ae67bd38STrond Myklebust 	if (queue)
863ae67bd38STrond Myklebust 		rpc_wake_up_queued_task_set_status(queue, task, -ERESTARTSYS);
864ae67bd38STrond Myklebust }
865ae67bd38STrond Myklebust 
866d9b6cd94STrond Myklebust void rpc_exit(struct rpc_task *task, int status)
867d9b6cd94STrond Myklebust {
868d9b6cd94STrond Myklebust 	task->tk_status = status;
869d9b6cd94STrond Myklebust 	task->tk_action = rpc_exit_task;
870d9b6cd94STrond Myklebust 	rpc_wake_up_queued_task(task->tk_waitqueue, task);
871d9b6cd94STrond Myklebust }
872d9b6cd94STrond Myklebust EXPORT_SYMBOL_GPL(rpc_exit);
873d05fdb0cSTrond Myklebust 
874bbd5a1f9STrond Myklebust void rpc_release_calldata(const struct rpc_call_ops *ops, void *calldata)
875bbd5a1f9STrond Myklebust {
876a86dc496STrond Myklebust 	if (ops->rpc_release != NULL)
877bbd5a1f9STrond Myklebust 		ops->rpc_release(calldata);
878bbd5a1f9STrond Myklebust }
879bbd5a1f9STrond Myklebust 
8808db55a03SNeilBrown static bool xprt_needs_memalloc(struct rpc_xprt *xprt, struct rpc_task *tk)
8818db55a03SNeilBrown {
8828db55a03SNeilBrown 	if (!xprt)
8838db55a03SNeilBrown 		return false;
8848db55a03SNeilBrown 	if (!atomic_read(&xprt->swapper))
8858db55a03SNeilBrown 		return false;
8868db55a03SNeilBrown 	return test_bit(XPRT_LOCKED, &xprt->state) && xprt->snd_task == tk;
8878db55a03SNeilBrown }
8888db55a03SNeilBrown 
889d05fdb0cSTrond Myklebust /*
8901da177e4SLinus Torvalds  * This is the RPC `scheduler' (or rather, the finite state machine).
8911da177e4SLinus Torvalds  */
8922efef837STrond Myklebust static void __rpc_execute(struct rpc_task *task)
8931da177e4SLinus Torvalds {
894eb9b55abSTrond Myklebust 	struct rpc_wait_queue *queue;
895eb9b55abSTrond Myklebust 	int task_is_async = RPC_IS_ASYNC(task);
8961da177e4SLinus Torvalds 	int status = 0;
8978db55a03SNeilBrown 	unsigned long pflags = current->flags;
8981da177e4SLinus Torvalds 
8992bd4eef8SWeston Andros Adamson 	WARN_ON_ONCE(RPC_IS_QUEUED(task));
9002bd4eef8SWeston Andros Adamson 	if (RPC_IS_QUEUED(task))
9012bd4eef8SWeston Andros Adamson 		return;
9021da177e4SLinus Torvalds 
903d05fdb0cSTrond Myklebust 	for (;;) {
904b55c5989STrond Myklebust 		void (*do_action)(struct rpc_task *);
9051da177e4SLinus Torvalds 
9061da177e4SLinus Torvalds 		/*
90721ead9ffSChuck Lever 		 * Perform the next FSM step or a pending callback.
90821ead9ffSChuck Lever 		 *
909b55c5989STrond Myklebust 		 * tk_action may be NULL if the task has been killed.
910b55c5989STrond Myklebust 		 * In particular, note that rpc_killall_tasks may
911b55c5989STrond Myklebust 		 * do this at any time, so beware when dereferencing.
9121da177e4SLinus Torvalds 		 */
913b55c5989STrond Myklebust 		do_action = task->tk_action;
91421ead9ffSChuck Lever 		if (task->tk_callback) {
91521ead9ffSChuck Lever 			do_action = task->tk_callback;
91621ead9ffSChuck Lever 			task->tk_callback = NULL;
9171da177e4SLinus Torvalds 		}
91821ead9ffSChuck Lever 		if (!do_action)
91921ead9ffSChuck Lever 			break;
9208db55a03SNeilBrown 		if (RPC_IS_SWAPPER(task) ||
9218db55a03SNeilBrown 		    xprt_needs_memalloc(task->tk_xprt, task))
9228db55a03SNeilBrown 			current->flags |= PF_MEMALLOC;
9238db55a03SNeilBrown 
924e671edb9SChuck Lever 		trace_rpc_task_run_action(task, do_action);
925b55c5989STrond Myklebust 		do_action(task);
9261da177e4SLinus Torvalds 
9271da177e4SLinus Torvalds 		/*
9281da177e4SLinus Torvalds 		 * Lockless check for whether task is sleeping or not.
9291da177e4SLinus Torvalds 		 */
93047dd8796STrond Myklebust 		if (!RPC_IS_QUEUED(task)) {
93147dd8796STrond Myklebust 			cond_resched();
9321da177e4SLinus Torvalds 			continue;
93347dd8796STrond Myklebust 		}
934ae67bd38STrond Myklebust 
935ae67bd38STrond Myklebust 		/*
936ae67bd38STrond Myklebust 		 * Signalled tasks should exit rather than sleep.
937ae67bd38STrond Myklebust 		 */
938714fbc73STrond Myklebust 		if (RPC_SIGNALLED(task)) {
939714fbc73STrond Myklebust 			task->tk_rpc_status = -ERESTARTSYS;
940ae67bd38STrond Myklebust 			rpc_exit(task, -ERESTARTSYS);
941714fbc73STrond Myklebust 		}
942ae67bd38STrond Myklebust 
943eb9b55abSTrond Myklebust 		/*
944eb9b55abSTrond Myklebust 		 * The queue->lock protects against races with
945eb9b55abSTrond Myklebust 		 * rpc_make_runnable().
946eb9b55abSTrond Myklebust 		 *
947eb9b55abSTrond Myklebust 		 * Note that once we clear RPC_TASK_RUNNING on an asynchronous
948eb9b55abSTrond Myklebust 		 * rpc_task, rpc_make_runnable() can assign it to a
949eb9b55abSTrond Myklebust 		 * different workqueue. We therefore cannot assume that the
950eb9b55abSTrond Myklebust 		 * rpc_task pointer may still be dereferenced.
951eb9b55abSTrond Myklebust 		 */
952eb9b55abSTrond Myklebust 		queue = task->tk_waitqueue;
953c049f8eaSTrond Myklebust 		spin_lock(&queue->lock);
954eb9b55abSTrond Myklebust 		if (!RPC_IS_QUEUED(task)) {
955c049f8eaSTrond Myklebust 			spin_unlock(&queue->lock);
9561da177e4SLinus Torvalds 			continue;
9571da177e4SLinus Torvalds 		}
958eb9b55abSTrond Myklebust 		rpc_clear_running(task);
959c049f8eaSTrond Myklebust 		spin_unlock(&queue->lock);
960eb9b55abSTrond Myklebust 		if (task_is_async)
9618db55a03SNeilBrown 			goto out;
9621da177e4SLinus Torvalds 
9631da177e4SLinus Torvalds 		/* sync task: sleep here */
9641466c221SChuck Lever 		trace_rpc_task_sync_sleep(task, task->tk_action);
96596651ab3STrond Myklebust 		status = out_of_line_wait_on_bit(&task->tk_runstate,
966150030b7SMatthew Wilcox 				RPC_TASK_QUEUED, rpc_wait_bit_killable,
967150030b7SMatthew Wilcox 				TASK_KILLABLE);
968ae67bd38STrond Myklebust 		if (status < 0) {
9691da177e4SLinus Torvalds 			/*
9701da177e4SLinus Torvalds 			 * When a sync task receives a signal, it exits with
9711da177e4SLinus Torvalds 			 * -ERESTARTSYS. In order to catch any callbacks that
9721da177e4SLinus Torvalds 			 * clean up after sleeping on some queue, we don't
9731da177e4SLinus Torvalds 			 * break the loop here, but go around once more.
9741da177e4SLinus Torvalds 			 */
975abf8af78SChuck Lever 			trace_rpc_task_signalled(task, task->tk_action);
976ae67bd38STrond Myklebust 			set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate);
977714fbc73STrond Myklebust 			task->tk_rpc_status = -ERESTARTSYS;
9781da177e4SLinus Torvalds 			rpc_exit(task, -ERESTARTSYS);
9791da177e4SLinus Torvalds 		}
9801466c221SChuck Lever 		trace_rpc_task_sync_wake(task, task->tk_action);
9811da177e4SLinus Torvalds 	}
9821da177e4SLinus Torvalds 
9831da177e4SLinus Torvalds 	/* Release all resources associated with the task */
9841da177e4SLinus Torvalds 	rpc_release_task(task);
9858db55a03SNeilBrown out:
9868db55a03SNeilBrown 	current_restore_flags(pflags, PF_MEMALLOC);
9871da177e4SLinus Torvalds }
9881da177e4SLinus Torvalds 
9891da177e4SLinus Torvalds /*
9901da177e4SLinus Torvalds  * User-visible entry point to the scheduler.
9911da177e4SLinus Torvalds  *
9921da177e4SLinus Torvalds  * This may be called recursively if e.g. an async NFS task updates
9931da177e4SLinus Torvalds  * the attributes and finds that dirty pages must be flushed.
9941da177e4SLinus Torvalds  * NOTE: Upon exit of this function the task is guaranteed to be
9951da177e4SLinus Torvalds  *	 released. In particular note that tk_release() will have
9961da177e4SLinus Torvalds  *	 been called, so your task memory may have been freed.
9971da177e4SLinus Torvalds  */
9982efef837STrond Myklebust void rpc_execute(struct rpc_task *task)
9991da177e4SLinus Torvalds {
1000a76580fbSTrond Myklebust 	bool is_async = RPC_IS_ASYNC(task);
1001a76580fbSTrond Myklebust 
100244c28873STrond Myklebust 	rpc_set_active(task);
1003f1dc237cSTrond Myklebust 	rpc_make_runnable(rpciod_workqueue, task);
1004f0940f4bSBenjamin Coddington 	if (!is_async) {
1005f0940f4bSBenjamin Coddington 		unsigned int pflags = memalloc_nofs_save();
10062efef837STrond Myklebust 		__rpc_execute(task);
1007f0940f4bSBenjamin Coddington 		memalloc_nofs_restore(pflags);
1008f0940f4bSBenjamin Coddington 	}
10091da177e4SLinus Torvalds }
10101da177e4SLinus Torvalds 
101165f27f38SDavid Howells static void rpc_async_schedule(struct work_struct *work)
10121da177e4SLinus Torvalds {
1013a1231fdaSTrond Myklebust 	unsigned int pflags = memalloc_nofs_save();
1014a1231fdaSTrond Myklebust 
101565f27f38SDavid Howells 	__rpc_execute(container_of(work, struct rpc_task, u.tk_work));
1016a1231fdaSTrond Myklebust 	memalloc_nofs_restore(pflags);
10171da177e4SLinus Torvalds }
10181da177e4SLinus Torvalds 
101902107148SChuck Lever /**
10205fe6eaa1SChuck Lever  * rpc_malloc - allocate RPC buffer resources
10215fe6eaa1SChuck Lever  * @task: RPC task
10225fe6eaa1SChuck Lever  *
10235fe6eaa1SChuck Lever  * A single memory region is allocated, which is split between the
10245fe6eaa1SChuck Lever  * RPC call and RPC reply that this task is being used for. When
10255fe6eaa1SChuck Lever  * this RPC is retired, the memory is released by calling rpc_free.
10261da177e4SLinus Torvalds  *
1027c5a4dd8bSChuck Lever  * To prevent rpciod from hanging, this allocator never sleeps,
10285fe6eaa1SChuck Lever  * returning -ENOMEM and suppressing warning if the request cannot
10295fe6eaa1SChuck Lever  * be serviced immediately. The caller can arrange to sleep in a
10305fe6eaa1SChuck Lever  * way that is safe for rpciod.
1031c5a4dd8bSChuck Lever  *
1032c5a4dd8bSChuck Lever  * Most requests are 'small' (under 2KiB) and can be serviced from a
1033c5a4dd8bSChuck Lever  * mempool, ensuring that NFS reads and writes can always proceed,
1034c5a4dd8bSChuck Lever  * and that there is good locality of reference for these buffers.
10351da177e4SLinus Torvalds  */
10365fe6eaa1SChuck Lever int rpc_malloc(struct rpc_task *task)
10371da177e4SLinus Torvalds {
10385fe6eaa1SChuck Lever 	struct rpc_rqst *rqst = task->tk_rqstp;
10395fe6eaa1SChuck Lever 	size_t size = rqst->rq_callsize + rqst->rq_rcvsize;
1040aa3d1faeSChuck Lever 	struct rpc_buffer *buf;
104133e5c765STrond Myklebust 	gfp_t gfp = rpc_task_gfp_mask();
10421da177e4SLinus Torvalds 
1043aa3d1faeSChuck Lever 	size += sizeof(struct rpc_buffer);
104433e5c765STrond Myklebust 	if (size <= RPC_BUFFER_MAXSIZE) {
104533e5c765STrond Myklebust 		buf = kmem_cache_alloc(rpc_buffer_slabp, gfp);
104633e5c765STrond Myklebust 		/* Reach for the mempool if dynamic allocation fails */
104733e5c765STrond Myklebust 		if (!buf && RPC_IS_ASYNC(task))
104833e5c765STrond Myklebust 			buf = mempool_alloc(rpc_buffer_mempool, GFP_NOWAIT);
104933e5c765STrond Myklebust 	} else
1050c5a4dd8bSChuck Lever 		buf = kmalloc(size, gfp);
1051ddce40dfSPeter Zijlstra 
1052ddce40dfSPeter Zijlstra 	if (!buf)
10535fe6eaa1SChuck Lever 		return -ENOMEM;
1054ddce40dfSPeter Zijlstra 
1055aa3d1faeSChuck Lever 	buf->len = size;
10565fe6eaa1SChuck Lever 	rqst->rq_buffer = buf->data;
105768778945SChuck Lever 	rqst->rq_rbuffer = (char *)rqst->rq_buffer + rqst->rq_callsize;
10585fe6eaa1SChuck Lever 	return 0;
10591da177e4SLinus Torvalds }
106012444809S\"Talpey, Thomas\ EXPORT_SYMBOL_GPL(rpc_malloc);
10611da177e4SLinus Torvalds 
106202107148SChuck Lever /**
10633435c74aSChuck Lever  * rpc_free - free RPC buffer resources allocated via rpc_malloc
10643435c74aSChuck Lever  * @task: RPC task
106502107148SChuck Lever  *
106602107148SChuck Lever  */
10673435c74aSChuck Lever void rpc_free(struct rpc_task *task)
10681da177e4SLinus Torvalds {
10693435c74aSChuck Lever 	void *buffer = task->tk_rqstp->rq_buffer;
1070aa3d1faeSChuck Lever 	size_t size;
1071aa3d1faeSChuck Lever 	struct rpc_buffer *buf;
107202107148SChuck Lever 
1073aa3d1faeSChuck Lever 	buf = container_of(buffer, struct rpc_buffer, data);
1074aa3d1faeSChuck Lever 	size = buf->len;
1075c5a4dd8bSChuck Lever 
1076c5a4dd8bSChuck Lever 	if (size <= RPC_BUFFER_MAXSIZE)
1077c5a4dd8bSChuck Lever 		mempool_free(buf, rpc_buffer_mempool);
10781da177e4SLinus Torvalds 	else
1079c5a4dd8bSChuck Lever 		kfree(buf);
10801da177e4SLinus Torvalds }
108112444809S\"Talpey, Thomas\ EXPORT_SYMBOL_GPL(rpc_free);
10821da177e4SLinus Torvalds 
10831da177e4SLinus Torvalds /*
10841da177e4SLinus Torvalds  * Creation and deletion of RPC task structures
10851da177e4SLinus Torvalds  */
108647fe0648STrond Myklebust static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *task_setup_data)
10871da177e4SLinus Torvalds {
10881da177e4SLinus Torvalds 	memset(task, 0, sizeof(*task));
108944c28873STrond Myklebust 	atomic_set(&task->tk_count, 1);
109084115e1cSTrond Myklebust 	task->tk_flags  = task_setup_data->flags;
109184115e1cSTrond Myklebust 	task->tk_ops = task_setup_data->callback_ops;
109284115e1cSTrond Myklebust 	task->tk_calldata = task_setup_data->callback_data;
10936529eba0STrond Myklebust 	INIT_LIST_HEAD(&task->tk_task);
10941da177e4SLinus Torvalds 
10953ff7576dSTrond Myklebust 	task->tk_priority = task_setup_data->priority - RPC_PRIORITY_LOW;
10963ff7576dSTrond Myklebust 	task->tk_owner = current->tgid;
10971da177e4SLinus Torvalds 
10981da177e4SLinus Torvalds 	/* Initialize workqueue for async tasks */
109932bfb5c0STrond Myklebust 	task->tk_workqueue = task_setup_data->workqueue;
11001da177e4SLinus Torvalds 
1101a101b043STrond Myklebust 	task->tk_xprt = rpc_task_get_xprt(task_setup_data->rpc_client,
1102a101b043STrond Myklebust 			xprt_get(task_setup_data->rpc_xprt));
11039d61498dSTrond Myklebust 
11041de7eea9SNeilBrown 	task->tk_op_cred = get_rpccred(task_setup_data->rpc_op_cred);
11051de7eea9SNeilBrown 
110684115e1cSTrond Myklebust 	if (task->tk_ops->rpc_call_prepare != NULL)
110784115e1cSTrond Myklebust 		task->tk_action = rpc_prepare_task;
1108963d8fe5STrond Myklebust 
11097fdcf13bSTrond Myklebust 	rpc_init_task_statistics(task);
11101da177e4SLinus Torvalds }
11111da177e4SLinus Torvalds 
1112910ad386STrond Myklebust static struct rpc_task *rpc_alloc_task(void)
11131da177e4SLinus Torvalds {
1114910ad386STrond Myklebust 	struct rpc_task *task;
1115910ad386STrond Myklebust 
1116910ad386STrond Myklebust 	task = kmem_cache_alloc(rpc_task_slabp, rpc_task_gfp_mask());
1117910ad386STrond Myklebust 	if (task)
1118910ad386STrond Myklebust 		return task;
1119910ad386STrond Myklebust 	return mempool_alloc(rpc_task_mempool, GFP_NOWAIT);
11201da177e4SLinus Torvalds }
11211da177e4SLinus Torvalds 
11221da177e4SLinus Torvalds /*
112390c5755fSTrond Myklebust  * Create a new task for the specified client.
11241da177e4SLinus Torvalds  */
112584115e1cSTrond Myklebust struct rpc_task *rpc_new_task(const struct rpc_task_setup *setup_data)
11261da177e4SLinus Torvalds {
1127e8f5d77cSTrond Myklebust 	struct rpc_task	*task = setup_data->task;
1128e8f5d77cSTrond Myklebust 	unsigned short flags = 0;
11291da177e4SLinus Torvalds 
1130e8f5d77cSTrond Myklebust 	if (task == NULL) {
11311da177e4SLinus Torvalds 		task = rpc_alloc_task();
113225cf32adSTrond Myklebust 		if (task == NULL) {
113325cf32adSTrond Myklebust 			rpc_release_calldata(setup_data->callback_ops,
113425cf32adSTrond Myklebust 					     setup_data->callback_data);
113525cf32adSTrond Myklebust 			return ERR_PTR(-ENOMEM);
113625cf32adSTrond Myklebust 		}
1137e8f5d77cSTrond Myklebust 		flags = RPC_TASK_DYNAMIC;
1138e8f5d77cSTrond Myklebust 	}
11391da177e4SLinus Torvalds 
114084115e1cSTrond Myklebust 	rpc_init_task(task, setup_data);
1141e8f5d77cSTrond Myklebust 	task->tk_flags |= flags;
11421da177e4SLinus Torvalds 	return task;
11431da177e4SLinus Torvalds }
11441da177e4SLinus Torvalds 
1145c6567ed1STrond Myklebust /*
1146c6567ed1STrond Myklebust  * rpc_free_task - release rpc task and perform cleanups
1147c6567ed1STrond Myklebust  *
1148c6567ed1STrond Myklebust  * Note that we free up the rpc_task _after_ rpc_release_calldata()
1149c6567ed1STrond Myklebust  * in order to work around a workqueue dependency issue.
1150c6567ed1STrond Myklebust  *
1151c6567ed1STrond Myklebust  * Tejun Heo states:
1152c6567ed1STrond Myklebust  * "Workqueue currently considers two work items to be the same if they're
1153c6567ed1STrond Myklebust  * on the same address and won't execute them concurrently - ie. it
1154c6567ed1STrond Myklebust  * makes a work item which is queued again while being executed wait
1155c6567ed1STrond Myklebust  * for the previous execution to complete.
1156c6567ed1STrond Myklebust  *
1157c6567ed1STrond Myklebust  * If a work function frees the work item, and then waits for an event
1158c6567ed1STrond Myklebust  * which should be performed by another work item and *that* work item
1159c6567ed1STrond Myklebust  * recycles the freed work item, it can create a false dependency loop.
1160c6567ed1STrond Myklebust  * There really is no reliable way to detect this short of verifying
1161c6567ed1STrond Myklebust  * every memory free."
1162c6567ed1STrond Myklebust  *
1163c6567ed1STrond Myklebust  */
116432bfb5c0STrond Myklebust static void rpc_free_task(struct rpc_task *task)
11651da177e4SLinus Torvalds {
1166c6567ed1STrond Myklebust 	unsigned short tk_flags = task->tk_flags;
11671da177e4SLinus Torvalds 
11681de7eea9SNeilBrown 	put_rpccred(task->tk_op_cred);
1169c6567ed1STrond Myklebust 	rpc_release_calldata(task->tk_ops, task->tk_calldata);
1170c6567ed1STrond Myklebust 
11711466c221SChuck Lever 	if (tk_flags & RPC_TASK_DYNAMIC)
11725e4424afSTrond Myklebust 		mempool_free(task, rpc_task_mempool);
11735e4424afSTrond Myklebust }
117432bfb5c0STrond Myklebust 
117532bfb5c0STrond Myklebust static void rpc_async_release(struct work_struct *work)
117632bfb5c0STrond Myklebust {
1177a1231fdaSTrond Myklebust 	unsigned int pflags = memalloc_nofs_save();
1178a1231fdaSTrond Myklebust 
117932bfb5c0STrond Myklebust 	rpc_free_task(container_of(work, struct rpc_task, u.tk_work));
1180a1231fdaSTrond Myklebust 	memalloc_nofs_restore(pflags);
118132bfb5c0STrond Myklebust }
118232bfb5c0STrond Myklebust 
1183bf294b41STrond Myklebust static void rpc_release_resources_task(struct rpc_task *task)
118432bfb5c0STrond Myklebust {
1185e6b3c4dbSTrond Myklebust 	xprt_release(task);
1186a271c5a0SOGAWA Hirofumi 	if (task->tk_msg.rpc_cred) {
11877eac5264STrond Myklebust 		if (!(task->tk_flags & RPC_TASK_CRED_NOREF))
1188a52458b4SNeilBrown 			put_cred(task->tk_msg.rpc_cred);
1189a271c5a0SOGAWA Hirofumi 		task->tk_msg.rpc_cred = NULL;
1190a271c5a0SOGAWA Hirofumi 	}
119158f9612cSTrond Myklebust 	rpc_task_release_client(task);
1192bf294b41STrond Myklebust }
1193bf294b41STrond Myklebust 
1194bf294b41STrond Myklebust static void rpc_final_put_task(struct rpc_task *task,
1195bf294b41STrond Myklebust 		struct workqueue_struct *q)
1196bf294b41STrond Myklebust {
1197bf294b41STrond Myklebust 	if (q != NULL) {
119832bfb5c0STrond Myklebust 		INIT_WORK(&task->u.tk_work, rpc_async_release);
1199bf294b41STrond Myklebust 		queue_work(q, &task->u.tk_work);
120032bfb5c0STrond Myklebust 	} else
120132bfb5c0STrond Myklebust 		rpc_free_task(task);
1202e6b3c4dbSTrond Myklebust }
1203bf294b41STrond Myklebust 
1204bf294b41STrond Myklebust static void rpc_do_put_task(struct rpc_task *task, struct workqueue_struct *q)
1205bf294b41STrond Myklebust {
1206bf294b41STrond Myklebust 	if (atomic_dec_and_test(&task->tk_count)) {
1207bf294b41STrond Myklebust 		rpc_release_resources_task(task);
1208bf294b41STrond Myklebust 		rpc_final_put_task(task, q);
1209bf294b41STrond Myklebust 	}
1210bf294b41STrond Myklebust }
1211bf294b41STrond Myklebust 
1212bf294b41STrond Myklebust void rpc_put_task(struct rpc_task *task)
1213bf294b41STrond Myklebust {
1214bf294b41STrond Myklebust 	rpc_do_put_task(task, NULL);
1215bf294b41STrond Myklebust }
1216e8914c65STrond Myklebust EXPORT_SYMBOL_GPL(rpc_put_task);
1217e6b3c4dbSTrond Myklebust 
1218bf294b41STrond Myklebust void rpc_put_task_async(struct rpc_task *task)
1219bf294b41STrond Myklebust {
1220bf294b41STrond Myklebust 	rpc_do_put_task(task, task->tk_workqueue);
1221bf294b41STrond Myklebust }
1222bf294b41STrond Myklebust EXPORT_SYMBOL_GPL(rpc_put_task_async);
1223bf294b41STrond Myklebust 
1224bde8f00cSTrond Myklebust static void rpc_release_task(struct rpc_task *task)
1225e6b3c4dbSTrond Myklebust {
12260a0c2a57SWeston Andros Adamson 	WARN_ON_ONCE(RPC_IS_QUEUED(task));
12271da177e4SLinus Torvalds 
1228bf294b41STrond Myklebust 	rpc_release_resources_task(task);
1229e6b3c4dbSTrond Myklebust 
1230bf294b41STrond Myklebust 	/*
1231bf294b41STrond Myklebust 	 * Note: at this point we have been removed from rpc_clnt->cl_tasks,
1232bf294b41STrond Myklebust 	 * so it should be safe to use task->tk_count as a test for whether
1233bf294b41STrond Myklebust 	 * or not any other processes still hold references to our rpc_task.
1234bf294b41STrond Myklebust 	 */
1235bf294b41STrond Myklebust 	if (atomic_read(&task->tk_count) != 1 + !RPC_IS_ASYNC(task)) {
1236bf294b41STrond Myklebust 		/* Wake up anyone who may be waiting for task completion */
1237bf294b41STrond Myklebust 		if (!rpc_complete_task(task))
1238bf294b41STrond Myklebust 			return;
1239bf294b41STrond Myklebust 	} else {
1240bf294b41STrond Myklebust 		if (!atomic_dec_and_test(&task->tk_count))
1241bf294b41STrond Myklebust 			return;
1242bf294b41STrond Myklebust 	}
1243bf294b41STrond Myklebust 	rpc_final_put_task(task, task->tk_workqueue);
12441da177e4SLinus Torvalds }
12451da177e4SLinus Torvalds 
1246b247bbf1STrond Myklebust int rpciod_up(void)
1247b247bbf1STrond Myklebust {
1248b247bbf1STrond Myklebust 	return try_module_get(THIS_MODULE) ? 0 : -EINVAL;
1249b247bbf1STrond Myklebust }
1250b247bbf1STrond Myklebust 
1251b247bbf1STrond Myklebust void rpciod_down(void)
1252b247bbf1STrond Myklebust {
1253b247bbf1STrond Myklebust 	module_put(THIS_MODULE);
1254b247bbf1STrond Myklebust }
1255b247bbf1STrond Myklebust 
12561da177e4SLinus Torvalds /*
1257b247bbf1STrond Myklebust  * Start up the rpciod workqueue.
12581da177e4SLinus Torvalds  */
1259b247bbf1STrond Myklebust static int rpciod_start(void)
12601da177e4SLinus Torvalds {
12611da177e4SLinus Torvalds 	struct workqueue_struct *wq;
12621da177e4SLinus Torvalds 
12631da177e4SLinus Torvalds 	/*
12641da177e4SLinus Torvalds 	 * Create the rpciod thread and wait for it to start.
12651da177e4SLinus Torvalds 	 */
1266f515f86bSOlga Kornievskaia 	wq = alloc_workqueue("rpciod", WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
126740a5f1b1STrond Myklebust 	if (!wq)
126840a5f1b1STrond Myklebust 		goto out_failed;
12691da177e4SLinus Torvalds 	rpciod_workqueue = wq;
12706dbcbe3fSTrond Myklebust 	wq = alloc_workqueue("xprtiod", WQ_UNBOUND | WQ_MEM_RECLAIM, 0);
127140a5f1b1STrond Myklebust 	if (!wq)
127240a5f1b1STrond Myklebust 		goto free_rpciod;
127340a5f1b1STrond Myklebust 	xprtiod_workqueue = wq;
127440a5f1b1STrond Myklebust 	return 1;
127540a5f1b1STrond Myklebust free_rpciod:
127640a5f1b1STrond Myklebust 	wq = rpciod_workqueue;
127740a5f1b1STrond Myklebust 	rpciod_workqueue = NULL;
127840a5f1b1STrond Myklebust 	destroy_workqueue(wq);
127940a5f1b1STrond Myklebust out_failed:
128040a5f1b1STrond Myklebust 	return 0;
12811da177e4SLinus Torvalds }
12821da177e4SLinus Torvalds 
1283b247bbf1STrond Myklebust static void rpciod_stop(void)
12841da177e4SLinus Torvalds {
1285b247bbf1STrond Myklebust 	struct workqueue_struct *wq = NULL;
1286ab418d70STrond Myklebust 
1287b247bbf1STrond Myklebust 	if (rpciod_workqueue == NULL)
1288b247bbf1STrond Myklebust 		return;
12891da177e4SLinus Torvalds 
1290b247bbf1STrond Myklebust 	wq = rpciod_workqueue;
12911da177e4SLinus Torvalds 	rpciod_workqueue = NULL;
1292b247bbf1STrond Myklebust 	destroy_workqueue(wq);
129340a5f1b1STrond Myklebust 	wq = xprtiod_workqueue;
129440a5f1b1STrond Myklebust 	xprtiod_workqueue = NULL;
129540a5f1b1STrond Myklebust 	destroy_workqueue(wq);
12961da177e4SLinus Torvalds }
12971da177e4SLinus Torvalds 
12981da177e4SLinus Torvalds void
12991da177e4SLinus Torvalds rpc_destroy_mempool(void)
13001da177e4SLinus Torvalds {
1301b247bbf1STrond Myklebust 	rpciod_stop();
13021da177e4SLinus Torvalds 	mempool_destroy(rpc_buffer_mempool);
13031da177e4SLinus Torvalds 	mempool_destroy(rpc_task_mempool);
13041a1d92c1SAlexey Dobriyan 	kmem_cache_destroy(rpc_task_slabp);
13051a1d92c1SAlexey Dobriyan 	kmem_cache_destroy(rpc_buffer_slabp);
1306f6a1cc89STrond Myklebust 	rpc_destroy_wait_queue(&delay_queue);
13071da177e4SLinus Torvalds }
13081da177e4SLinus Torvalds 
13091da177e4SLinus Torvalds int
13101da177e4SLinus Torvalds rpc_init_mempool(void)
13111da177e4SLinus Torvalds {
1312f6a1cc89STrond Myklebust 	/*
1313f6a1cc89STrond Myklebust 	 * The following is not strictly a mempool initialisation,
1314f6a1cc89STrond Myklebust 	 * but there is no harm in doing it here
1315f6a1cc89STrond Myklebust 	 */
1316f6a1cc89STrond Myklebust 	rpc_init_wait_queue(&delay_queue, "delayq");
1317f6a1cc89STrond Myklebust 	if (!rpciod_start())
1318f6a1cc89STrond Myklebust 		goto err_nomem;
1319f6a1cc89STrond Myklebust 
13201da177e4SLinus Torvalds 	rpc_task_slabp = kmem_cache_create("rpc_tasks",
13211da177e4SLinus Torvalds 					     sizeof(struct rpc_task),
13221da177e4SLinus Torvalds 					     0, SLAB_HWCACHE_ALIGN,
132320c2df83SPaul Mundt 					     NULL);
13241da177e4SLinus Torvalds 	if (!rpc_task_slabp)
13251da177e4SLinus Torvalds 		goto err_nomem;
13261da177e4SLinus Torvalds 	rpc_buffer_slabp = kmem_cache_create("rpc_buffers",
13271da177e4SLinus Torvalds 					     RPC_BUFFER_MAXSIZE,
13281da177e4SLinus Torvalds 					     0, SLAB_HWCACHE_ALIGN,
132920c2df83SPaul Mundt 					     NULL);
13301da177e4SLinus Torvalds 	if (!rpc_buffer_slabp)
13311da177e4SLinus Torvalds 		goto err_nomem;
133293d2341cSMatthew Dobson 	rpc_task_mempool = mempool_create_slab_pool(RPC_TASK_POOLSIZE,
13331da177e4SLinus Torvalds 						    rpc_task_slabp);
13341da177e4SLinus Torvalds 	if (!rpc_task_mempool)
13351da177e4SLinus Torvalds 		goto err_nomem;
133693d2341cSMatthew Dobson 	rpc_buffer_mempool = mempool_create_slab_pool(RPC_BUFFER_POOLSIZE,
13371da177e4SLinus Torvalds 						      rpc_buffer_slabp);
13381da177e4SLinus Torvalds 	if (!rpc_buffer_mempool)
13391da177e4SLinus Torvalds 		goto err_nomem;
13401da177e4SLinus Torvalds 	return 0;
13411da177e4SLinus Torvalds err_nomem:
13421da177e4SLinus Torvalds 	rpc_destroy_mempool();
13431da177e4SLinus Torvalds 	return -ENOMEM;
13441da177e4SLinus Torvalds }
1345