xref: /linux/fs/nfsd/nfs4recover.c (revision ebf68996de0ab250c5d520eb2291ab65643e9a1e)
1 /*
2 *  Copyright (c) 2004 The Regents of the University of Michigan.
3 *  Copyright (c) 2012 Jeff Layton <jlayton@redhat.com>
4 *  All rights reserved.
5 *
6 *  Andy Adamson <andros@citi.umich.edu>
7 *
8 *  Redistribution and use in source and binary forms, with or without
9 *  modification, are permitted provided that the following conditions
10 *  are met:
11 *
12 *  1. Redistributions of source code must retain the above copyright
13 *     notice, this list of conditions and the following disclaimer.
14 *  2. Redistributions in binary form must reproduce the above copyright
15 *     notice, this list of conditions and the following disclaimer in the
16 *     documentation and/or other materials provided with the distribution.
17 *  3. Neither the name of the University nor the names of its
18 *     contributors may be used to endorse or promote products derived
19 *     from this software without specific prior written permission.
20 *
21 *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
22 *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
23 *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
28 *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29 *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30 *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31 *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 *
33 */
34 
35 #include <crypto/hash.h>
36 #include <linux/file.h>
37 #include <linux/slab.h>
38 #include <linux/namei.h>
39 #include <linux/sched.h>
40 #include <linux/fs.h>
41 #include <linux/module.h>
42 #include <net/net_namespace.h>
43 #include <linux/sunrpc/rpc_pipe_fs.h>
44 #include <linux/sunrpc/clnt.h>
45 #include <linux/nfsd/cld.h>
46 
47 #include "nfsd.h"
48 #include "state.h"
49 #include "vfs.h"
50 #include "netns.h"
51 
52 #define NFSDDBG_FACILITY                NFSDDBG_PROC
53 
54 /* Declarations */
55 struct nfsd4_client_tracking_ops {
56 	int (*init)(struct net *);
57 	void (*exit)(struct net *);
58 	void (*create)(struct nfs4_client *);
59 	void (*remove)(struct nfs4_client *);
60 	int (*check)(struct nfs4_client *);
61 	void (*grace_done)(struct nfsd_net *);
62 };
63 
64 /* Globals */
65 static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery";
66 
67 static int
68 nfs4_save_creds(const struct cred **original_creds)
69 {
70 	struct cred *new;
71 
72 	new = prepare_creds();
73 	if (!new)
74 		return -ENOMEM;
75 
76 	new->fsuid = GLOBAL_ROOT_UID;
77 	new->fsgid = GLOBAL_ROOT_GID;
78 	*original_creds = override_creds(new);
79 	put_cred(new);
80 	return 0;
81 }
82 
83 static void
84 nfs4_reset_creds(const struct cred *original)
85 {
86 	revert_creds(original);
87 }
88 
89 static void
90 md5_to_hex(char *out, char *md5)
91 {
92 	int i;
93 
94 	for (i=0; i<16; i++) {
95 		unsigned char c = md5[i];
96 
97 		*out++ = '0' + ((c&0xf0)>>4) + (c>=0xa0)*('a'-'9'-1);
98 		*out++ = '0' + (c&0x0f) + ((c&0x0f)>=0x0a)*('a'-'9'-1);
99 	}
100 	*out = '\0';
101 }
102 
103 static int
104 nfs4_make_rec_clidname(char *dname, const struct xdr_netobj *clname)
105 {
106 	struct xdr_netobj cksum;
107 	struct crypto_shash *tfm;
108 	int status;
109 
110 	dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n",
111 			clname->len, clname->data);
112 	tfm = crypto_alloc_shash("md5", 0, 0);
113 	if (IS_ERR(tfm)) {
114 		status = PTR_ERR(tfm);
115 		goto out_no_tfm;
116 	}
117 
118 	cksum.len = crypto_shash_digestsize(tfm);
119 	cksum.data = kmalloc(cksum.len, GFP_KERNEL);
120 	if (cksum.data == NULL) {
121 		status = -ENOMEM;
122  		goto out;
123 	}
124 
125 	{
126 		SHASH_DESC_ON_STACK(desc, tfm);
127 
128 		desc->tfm = tfm;
129 
130 		status = crypto_shash_digest(desc, clname->data, clname->len,
131 					     cksum.data);
132 		shash_desc_zero(desc);
133 	}
134 
135 	if (status)
136 		goto out;
137 
138 	md5_to_hex(dname, cksum.data);
139 
140 	status = 0;
141 out:
142 	kfree(cksum.data);
143 	crypto_free_shash(tfm);
144 out_no_tfm:
145 	return status;
146 }
147 
148 /*
149  * If we had an error generating the recdir name for the legacy tracker
150  * then warn the admin. If the error doesn't appear to be transient,
151  * then disable recovery tracking.
152  */
153 static void
154 legacy_recdir_name_error(struct nfs4_client *clp, int error)
155 {
156 	printk(KERN_ERR "NFSD: unable to generate recoverydir "
157 			"name (%d).\n", error);
158 
159 	/*
160 	 * if the algorithm just doesn't exist, then disable the recovery
161 	 * tracker altogether. The crypto libs will generally return this if
162 	 * FIPS is enabled as well.
163 	 */
164 	if (error == -ENOENT) {
165 		printk(KERN_ERR "NFSD: disabling legacy clientid tracking. "
166 			"Reboot recovery will not function correctly!\n");
167 		nfsd4_client_tracking_exit(clp->net);
168 	}
169 }
170 
171 static void
172 __nfsd4_create_reclaim_record_grace(struct nfs4_client *clp,
173 		const char *dname, int len, struct nfsd_net *nn)
174 {
175 	struct xdr_netobj name;
176 	struct nfs4_client_reclaim *crp;
177 
178 	name.data = kmemdup(dname, len, GFP_KERNEL);
179 	if (!name.data) {
180 		dprintk("%s: failed to allocate memory for name.data!\n",
181 			__func__);
182 		return;
183 	}
184 	name.len = len;
185 	crp = nfs4_client_to_reclaim(name, nn);
186 	if (!crp) {
187 		kfree(name.data);
188 		return;
189 	}
190 	crp->cr_clp = clp;
191 }
192 
193 static void
194 nfsd4_create_clid_dir(struct nfs4_client *clp)
195 {
196 	const struct cred *original_cred;
197 	char dname[HEXDIR_LEN];
198 	struct dentry *dir, *dentry;
199 	int status;
200 	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
201 
202 	if (test_and_set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
203 		return;
204 	if (!nn->rec_file)
205 		return;
206 
207 	status = nfs4_make_rec_clidname(dname, &clp->cl_name);
208 	if (status)
209 		return legacy_recdir_name_error(clp, status);
210 
211 	status = nfs4_save_creds(&original_cred);
212 	if (status < 0)
213 		return;
214 
215 	status = mnt_want_write_file(nn->rec_file);
216 	if (status)
217 		goto out_creds;
218 
219 	dir = nn->rec_file->f_path.dentry;
220 	/* lock the parent */
221 	inode_lock(d_inode(dir));
222 
223 	dentry = lookup_one_len(dname, dir, HEXDIR_LEN-1);
224 	if (IS_ERR(dentry)) {
225 		status = PTR_ERR(dentry);
226 		goto out_unlock;
227 	}
228 	if (d_really_is_positive(dentry))
229 		/*
230 		 * In the 4.1 case, where we're called from
231 		 * reclaim_complete(), records from the previous reboot
232 		 * may still be left, so this is OK.
233 		 *
234 		 * In the 4.0 case, we should never get here; but we may
235 		 * as well be forgiving and just succeed silently.
236 		 */
237 		goto out_put;
238 	status = vfs_mkdir(d_inode(dir), dentry, S_IRWXU);
239 out_put:
240 	dput(dentry);
241 out_unlock:
242 	inode_unlock(d_inode(dir));
243 	if (status == 0) {
244 		if (nn->in_grace)
245 			__nfsd4_create_reclaim_record_grace(clp, dname,
246 					HEXDIR_LEN, nn);
247 		vfs_fsync(nn->rec_file, 0);
248 	} else {
249 		printk(KERN_ERR "NFSD: failed to write recovery record"
250 				" (err %d); please check that %s exists"
251 				" and is writeable", status,
252 				user_recovery_dirname);
253 	}
254 	mnt_drop_write_file(nn->rec_file);
255 out_creds:
256 	nfs4_reset_creds(original_cred);
257 }
258 
259 typedef int (recdir_func)(struct dentry *, struct dentry *, struct nfsd_net *);
260 
261 struct name_list {
262 	char name[HEXDIR_LEN];
263 	struct list_head list;
264 };
265 
266 struct nfs4_dir_ctx {
267 	struct dir_context ctx;
268 	struct list_head names;
269 };
270 
271 static int
272 nfsd4_build_namelist(struct dir_context *__ctx, const char *name, int namlen,
273 		loff_t offset, u64 ino, unsigned int d_type)
274 {
275 	struct nfs4_dir_ctx *ctx =
276 		container_of(__ctx, struct nfs4_dir_ctx, ctx);
277 	struct name_list *entry;
278 
279 	if (namlen != HEXDIR_LEN - 1)
280 		return 0;
281 	entry = kmalloc(sizeof(struct name_list), GFP_KERNEL);
282 	if (entry == NULL)
283 		return -ENOMEM;
284 	memcpy(entry->name, name, HEXDIR_LEN - 1);
285 	entry->name[HEXDIR_LEN - 1] = '\0';
286 	list_add(&entry->list, &ctx->names);
287 	return 0;
288 }
289 
290 static int
291 nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn)
292 {
293 	const struct cred *original_cred;
294 	struct dentry *dir = nn->rec_file->f_path.dentry;
295 	struct nfs4_dir_ctx ctx = {
296 		.ctx.actor = nfsd4_build_namelist,
297 		.names = LIST_HEAD_INIT(ctx.names)
298 	};
299 	struct name_list *entry, *tmp;
300 	int status;
301 
302 	status = nfs4_save_creds(&original_cred);
303 	if (status < 0)
304 		return status;
305 
306 	status = vfs_llseek(nn->rec_file, 0, SEEK_SET);
307 	if (status < 0) {
308 		nfs4_reset_creds(original_cred);
309 		return status;
310 	}
311 
312 	status = iterate_dir(nn->rec_file, &ctx.ctx);
313 	inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
314 
315 	list_for_each_entry_safe(entry, tmp, &ctx.names, list) {
316 		if (!status) {
317 			struct dentry *dentry;
318 			dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1);
319 			if (IS_ERR(dentry)) {
320 				status = PTR_ERR(dentry);
321 				break;
322 			}
323 			status = f(dir, dentry, nn);
324 			dput(dentry);
325 		}
326 		list_del(&entry->list);
327 		kfree(entry);
328 	}
329 	inode_unlock(d_inode(dir));
330 	nfs4_reset_creds(original_cred);
331 
332 	list_for_each_entry_safe(entry, tmp, &ctx.names, list) {
333 		dprintk("NFSD: %s. Left entry %s\n", __func__, entry->name);
334 		list_del(&entry->list);
335 		kfree(entry);
336 	}
337 	return status;
338 }
339 
340 static int
341 nfsd4_unlink_clid_dir(char *name, int namlen, struct nfsd_net *nn)
342 {
343 	struct dentry *dir, *dentry;
344 	int status;
345 
346 	dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name);
347 
348 	dir = nn->rec_file->f_path.dentry;
349 	inode_lock_nested(d_inode(dir), I_MUTEX_PARENT);
350 	dentry = lookup_one_len(name, dir, namlen);
351 	if (IS_ERR(dentry)) {
352 		status = PTR_ERR(dentry);
353 		goto out_unlock;
354 	}
355 	status = -ENOENT;
356 	if (d_really_is_negative(dentry))
357 		goto out;
358 	status = vfs_rmdir(d_inode(dir), dentry);
359 out:
360 	dput(dentry);
361 out_unlock:
362 	inode_unlock(d_inode(dir));
363 	return status;
364 }
365 
366 static void
367 __nfsd4_remove_reclaim_record_grace(const char *dname, int len,
368 		struct nfsd_net *nn)
369 {
370 	struct xdr_netobj name;
371 	struct nfs4_client_reclaim *crp;
372 
373 	name.data = kmemdup(dname, len, GFP_KERNEL);
374 	if (!name.data) {
375 		dprintk("%s: failed to allocate memory for name.data!\n",
376 			__func__);
377 		return;
378 	}
379 	name.len = len;
380 	crp = nfsd4_find_reclaim_client(name, nn);
381 	kfree(name.data);
382 	if (crp)
383 		nfs4_remove_reclaim_record(crp, nn);
384 }
385 
386 static void
387 nfsd4_remove_clid_dir(struct nfs4_client *clp)
388 {
389 	const struct cred *original_cred;
390 	char dname[HEXDIR_LEN];
391 	int status;
392 	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
393 
394 	if (!nn->rec_file || !test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
395 		return;
396 
397 	status = nfs4_make_rec_clidname(dname, &clp->cl_name);
398 	if (status)
399 		return legacy_recdir_name_error(clp, status);
400 
401 	status = mnt_want_write_file(nn->rec_file);
402 	if (status)
403 		goto out;
404 	clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
405 
406 	status = nfs4_save_creds(&original_cred);
407 	if (status < 0)
408 		goto out_drop_write;
409 
410 	status = nfsd4_unlink_clid_dir(dname, HEXDIR_LEN-1, nn);
411 	nfs4_reset_creds(original_cred);
412 	if (status == 0) {
413 		vfs_fsync(nn->rec_file, 0);
414 		if (nn->in_grace)
415 			__nfsd4_remove_reclaim_record_grace(dname,
416 					HEXDIR_LEN, nn);
417 	}
418 out_drop_write:
419 	mnt_drop_write_file(nn->rec_file);
420 out:
421 	if (status)
422 		printk("NFSD: Failed to remove expired client state directory"
423 				" %.*s\n", HEXDIR_LEN, dname);
424 }
425 
426 static int
427 purge_old(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
428 {
429 	int status;
430 	struct xdr_netobj name;
431 
432 	if (child->d_name.len != HEXDIR_LEN - 1) {
433 		printk("%s: illegal name %pd in recovery directory\n",
434 				__func__, child);
435 		/* Keep trying; maybe the others are OK: */
436 		return 0;
437 	}
438 	name.data = kmemdup_nul(child->d_name.name, child->d_name.len, GFP_KERNEL);
439 	if (!name.data) {
440 		dprintk("%s: failed to allocate memory for name.data!\n",
441 			__func__);
442 		goto out;
443 	}
444 	name.len = HEXDIR_LEN;
445 	if (nfs4_has_reclaimed_state(name, nn))
446 		goto out_free;
447 
448 	status = vfs_rmdir(d_inode(parent), child);
449 	if (status)
450 		printk("failed to remove client recovery directory %pd\n",
451 				child);
452 out_free:
453 	kfree(name.data);
454 out:
455 	/* Keep trying, success or failure: */
456 	return 0;
457 }
458 
459 static void
460 nfsd4_recdir_purge_old(struct nfsd_net *nn)
461 {
462 	int status;
463 
464 	nn->in_grace = false;
465 	if (!nn->rec_file)
466 		return;
467 	status = mnt_want_write_file(nn->rec_file);
468 	if (status)
469 		goto out;
470 	status = nfsd4_list_rec_dir(purge_old, nn);
471 	if (status == 0)
472 		vfs_fsync(nn->rec_file, 0);
473 	mnt_drop_write_file(nn->rec_file);
474 out:
475 	nfs4_release_reclaim(nn);
476 	if (status)
477 		printk("nfsd4: failed to purge old clients from recovery"
478 			" directory %pD\n", nn->rec_file);
479 }
480 
481 static int
482 load_recdir(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
483 {
484 	struct xdr_netobj name;
485 
486 	if (child->d_name.len != HEXDIR_LEN - 1) {
487 		printk("%s: illegal name %pd in recovery directory\n",
488 				__func__, child);
489 		/* Keep trying; maybe the others are OK: */
490 		return 0;
491 	}
492 	name.data = kmemdup_nul(child->d_name.name, child->d_name.len, GFP_KERNEL);
493 	if (!name.data) {
494 		dprintk("%s: failed to allocate memory for name.data!\n",
495 			__func__);
496 		goto out;
497 	}
498 	name.len = HEXDIR_LEN;
499 	if (!nfs4_client_to_reclaim(name, nn))
500 		kfree(name.data);
501 out:
502 	return 0;
503 }
504 
505 static int
506 nfsd4_recdir_load(struct net *net) {
507 	int status;
508 	struct nfsd_net *nn =  net_generic(net, nfsd_net_id);
509 
510 	if (!nn->rec_file)
511 		return 0;
512 
513 	status = nfsd4_list_rec_dir(load_recdir, nn);
514 	if (status)
515 		printk("nfsd4: failed loading clients from recovery"
516 			" directory %pD\n", nn->rec_file);
517 	return status;
518 }
519 
520 /*
521  * Hold reference to the recovery directory.
522  */
523 
524 static int
525 nfsd4_init_recdir(struct net *net)
526 {
527 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
528 	const struct cred *original_cred;
529 	int status;
530 
531 	printk("NFSD: Using %s as the NFSv4 state recovery directory\n",
532 			user_recovery_dirname);
533 
534 	BUG_ON(nn->rec_file);
535 
536 	status = nfs4_save_creds(&original_cred);
537 	if (status < 0) {
538 		printk("NFSD: Unable to change credentials to find recovery"
539 		       " directory: error %d\n",
540 		       status);
541 		return status;
542 	}
543 
544 	nn->rec_file = filp_open(user_recovery_dirname, O_RDONLY | O_DIRECTORY, 0);
545 	if (IS_ERR(nn->rec_file)) {
546 		printk("NFSD: unable to find recovery directory %s\n",
547 				user_recovery_dirname);
548 		status = PTR_ERR(nn->rec_file);
549 		nn->rec_file = NULL;
550 	}
551 
552 	nfs4_reset_creds(original_cred);
553 	if (!status)
554 		nn->in_grace = true;
555 	return status;
556 }
557 
558 static void
559 nfsd4_shutdown_recdir(struct net *net)
560 {
561 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
562 
563 	if (!nn->rec_file)
564 		return;
565 	fput(nn->rec_file);
566 	nn->rec_file = NULL;
567 }
568 
569 static int
570 nfs4_legacy_state_init(struct net *net)
571 {
572 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
573 	int i;
574 
575 	nn->reclaim_str_hashtbl = kmalloc_array(CLIENT_HASH_SIZE,
576 						sizeof(struct list_head),
577 						GFP_KERNEL);
578 	if (!nn->reclaim_str_hashtbl)
579 		return -ENOMEM;
580 
581 	for (i = 0; i < CLIENT_HASH_SIZE; i++)
582 		INIT_LIST_HEAD(&nn->reclaim_str_hashtbl[i]);
583 	nn->reclaim_str_hashtbl_size = 0;
584 
585 	return 0;
586 }
587 
588 static void
589 nfs4_legacy_state_shutdown(struct net *net)
590 {
591 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
592 
593 	kfree(nn->reclaim_str_hashtbl);
594 }
595 
596 static int
597 nfsd4_load_reboot_recovery_data(struct net *net)
598 {
599 	int status;
600 
601 	status = nfsd4_init_recdir(net);
602 	if (status)
603 		return status;
604 
605 	status = nfsd4_recdir_load(net);
606 	if (status)
607 		nfsd4_shutdown_recdir(net);
608 
609 	return status;
610 }
611 
612 static int
613 nfsd4_legacy_tracking_init(struct net *net)
614 {
615 	int status;
616 
617 	/* XXX: The legacy code won't work in a container */
618 	if (net != &init_net) {
619 		pr_warn("NFSD: attempt to initialize legacy client tracking in a container ignored.\n");
620 		return -EINVAL;
621 	}
622 
623 	status = nfs4_legacy_state_init(net);
624 	if (status)
625 		return status;
626 
627 	status = nfsd4_load_reboot_recovery_data(net);
628 	if (status)
629 		goto err;
630 	printk("NFSD: Using legacy client tracking operations.\n");
631 	return 0;
632 
633 err:
634 	nfs4_legacy_state_shutdown(net);
635 	return status;
636 }
637 
638 static void
639 nfsd4_legacy_tracking_exit(struct net *net)
640 {
641 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
642 
643 	nfs4_release_reclaim(nn);
644 	nfsd4_shutdown_recdir(net);
645 	nfs4_legacy_state_shutdown(net);
646 }
647 
648 /*
649  * Change the NFSv4 recovery directory to recdir.
650  */
651 int
652 nfs4_reset_recoverydir(char *recdir)
653 {
654 	int status;
655 	struct path path;
656 
657 	status = kern_path(recdir, LOOKUP_FOLLOW, &path);
658 	if (status)
659 		return status;
660 	status = -ENOTDIR;
661 	if (d_is_dir(path.dentry)) {
662 		strcpy(user_recovery_dirname, recdir);
663 		status = 0;
664 	}
665 	path_put(&path);
666 	return status;
667 }
668 
669 char *
670 nfs4_recoverydir(void)
671 {
672 	return user_recovery_dirname;
673 }
674 
675 static int
676 nfsd4_check_legacy_client(struct nfs4_client *clp)
677 {
678 	int status;
679 	char dname[HEXDIR_LEN];
680 	struct nfs4_client_reclaim *crp;
681 	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
682 	struct xdr_netobj name;
683 
684 	/* did we already find that this client is stable? */
685 	if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
686 		return 0;
687 
688 	status = nfs4_make_rec_clidname(dname, &clp->cl_name);
689 	if (status) {
690 		legacy_recdir_name_error(clp, status);
691 		return status;
692 	}
693 
694 	/* look for it in the reclaim hashtable otherwise */
695 	name.data = kmemdup(dname, HEXDIR_LEN, GFP_KERNEL);
696 	if (!name.data) {
697 		dprintk("%s: failed to allocate memory for name.data!\n",
698 			__func__);
699 		goto out_enoent;
700 	}
701 	name.len = HEXDIR_LEN;
702 	crp = nfsd4_find_reclaim_client(name, nn);
703 	kfree(name.data);
704 	if (crp) {
705 		set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
706 		crp->cr_clp = clp;
707 		return 0;
708 	}
709 
710 out_enoent:
711 	return -ENOENT;
712 }
713 
714 static const struct nfsd4_client_tracking_ops nfsd4_legacy_tracking_ops = {
715 	.init		= nfsd4_legacy_tracking_init,
716 	.exit		= nfsd4_legacy_tracking_exit,
717 	.create		= nfsd4_create_clid_dir,
718 	.remove		= nfsd4_remove_clid_dir,
719 	.check		= nfsd4_check_legacy_client,
720 	.grace_done	= nfsd4_recdir_purge_old,
721 };
722 
723 /* Globals */
724 #define NFSD_PIPE_DIR		"nfsd"
725 #define NFSD_CLD_PIPE		"cld"
726 
727 /* per-net-ns structure for holding cld upcall info */
728 struct cld_net {
729 	struct rpc_pipe		*cn_pipe;
730 	spinlock_t		 cn_lock;
731 	struct list_head	 cn_list;
732 	unsigned int		 cn_xid;
733 	bool			 cn_has_legacy;
734 };
735 
736 struct cld_upcall {
737 	struct list_head	 cu_list;
738 	struct cld_net		*cu_net;
739 	struct completion	 cu_done;
740 	struct cld_msg		 cu_msg;
741 };
742 
743 static int
744 __cld_pipe_upcall(struct rpc_pipe *pipe, struct cld_msg *cmsg)
745 {
746 	int ret;
747 	struct rpc_pipe_msg msg;
748 	struct cld_upcall *cup = container_of(cmsg, struct cld_upcall, cu_msg);
749 
750 	memset(&msg, 0, sizeof(msg));
751 	msg.data = cmsg;
752 	msg.len = sizeof(*cmsg);
753 
754 	ret = rpc_queue_upcall(pipe, &msg);
755 	if (ret < 0) {
756 		goto out;
757 	}
758 
759 	wait_for_completion(&cup->cu_done);
760 
761 	if (msg.errno < 0)
762 		ret = msg.errno;
763 out:
764 	return ret;
765 }
766 
767 static int
768 cld_pipe_upcall(struct rpc_pipe *pipe, struct cld_msg *cmsg)
769 {
770 	int ret;
771 
772 	/*
773 	 * -EAGAIN occurs when pipe is closed and reopened while there are
774 	 *  upcalls queued.
775 	 */
776 	do {
777 		ret = __cld_pipe_upcall(pipe, cmsg);
778 	} while (ret == -EAGAIN);
779 
780 	return ret;
781 }
782 
783 static ssize_t
784 __cld_pipe_inprogress_downcall(const struct cld_msg __user *cmsg,
785 		struct nfsd_net *nn)
786 {
787 	uint8_t cmd;
788 	struct xdr_netobj name;
789 	uint16_t namelen;
790 	struct cld_net *cn = nn->cld_net;
791 
792 	if (get_user(cmd, &cmsg->cm_cmd)) {
793 		dprintk("%s: error when copying cmd from userspace", __func__);
794 		return -EFAULT;
795 	}
796 	if (cmd == Cld_GraceStart) {
797 		if (get_user(namelen, &cmsg->cm_u.cm_name.cn_len))
798 			return -EFAULT;
799 		name.data = memdup_user(&cmsg->cm_u.cm_name.cn_id, namelen);
800 		if (IS_ERR_OR_NULL(name.data))
801 			return -EFAULT;
802 		name.len = namelen;
803 		if (name.len > 5 && memcmp(name.data, "hash:", 5) == 0) {
804 			name.len = name.len - 5;
805 			memmove(name.data, name.data + 5, name.len);
806 			cn->cn_has_legacy = true;
807 		}
808 		if (!nfs4_client_to_reclaim(name, nn)) {
809 			kfree(name.data);
810 			return -EFAULT;
811 		}
812 		return sizeof(*cmsg);
813 	}
814 	return -EFAULT;
815 }
816 
817 static ssize_t
818 cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
819 {
820 	struct cld_upcall *tmp, *cup;
821 	struct cld_msg __user *cmsg = (struct cld_msg __user *)src;
822 	uint32_t xid;
823 	struct nfsd_net *nn = net_generic(file_inode(filp)->i_sb->s_fs_info,
824 						nfsd_net_id);
825 	struct cld_net *cn = nn->cld_net;
826 	int16_t status;
827 
828 	if (mlen != sizeof(*cmsg)) {
829 		dprintk("%s: got %zu bytes, expected %zu\n", __func__, mlen,
830 			sizeof(*cmsg));
831 		return -EINVAL;
832 	}
833 
834 	/* copy just the xid so we can try to find that */
835 	if (copy_from_user(&xid, &cmsg->cm_xid, sizeof(xid)) != 0) {
836 		dprintk("%s: error when copying xid from userspace", __func__);
837 		return -EFAULT;
838 	}
839 
840 	/*
841 	 * copy the status so we know whether to remove the upcall from the
842 	 * list (for -EINPROGRESS, we just want to make sure the xid is
843 	 * valid, not remove the upcall from the list)
844 	 */
845 	if (get_user(status, &cmsg->cm_status)) {
846 		dprintk("%s: error when copying status from userspace", __func__);
847 		return -EFAULT;
848 	}
849 
850 	/* walk the list and find corresponding xid */
851 	cup = NULL;
852 	spin_lock(&cn->cn_lock);
853 	list_for_each_entry(tmp, &cn->cn_list, cu_list) {
854 		if (get_unaligned(&tmp->cu_msg.cm_xid) == xid) {
855 			cup = tmp;
856 			if (status != -EINPROGRESS)
857 				list_del_init(&cup->cu_list);
858 			break;
859 		}
860 	}
861 	spin_unlock(&cn->cn_lock);
862 
863 	/* couldn't find upcall? */
864 	if (!cup) {
865 		dprintk("%s: couldn't find upcall -- xid=%u\n", __func__, xid);
866 		return -EINVAL;
867 	}
868 
869 	if (status == -EINPROGRESS)
870 		return __cld_pipe_inprogress_downcall(cmsg, nn);
871 
872 	if (copy_from_user(&cup->cu_msg, src, mlen) != 0)
873 		return -EFAULT;
874 
875 	complete(&cup->cu_done);
876 	return mlen;
877 }
878 
879 static void
880 cld_pipe_destroy_msg(struct rpc_pipe_msg *msg)
881 {
882 	struct cld_msg *cmsg = msg->data;
883 	struct cld_upcall *cup = container_of(cmsg, struct cld_upcall,
884 						 cu_msg);
885 
886 	/* errno >= 0 means we got a downcall */
887 	if (msg->errno >= 0)
888 		return;
889 
890 	complete(&cup->cu_done);
891 }
892 
893 static const struct rpc_pipe_ops cld_upcall_ops = {
894 	.upcall		= rpc_pipe_generic_upcall,
895 	.downcall	= cld_pipe_downcall,
896 	.destroy_msg	= cld_pipe_destroy_msg,
897 };
898 
899 static struct dentry *
900 nfsd4_cld_register_sb(struct super_block *sb, struct rpc_pipe *pipe)
901 {
902 	struct dentry *dir, *dentry;
903 
904 	dir = rpc_d_lookup_sb(sb, NFSD_PIPE_DIR);
905 	if (dir == NULL)
906 		return ERR_PTR(-ENOENT);
907 	dentry = rpc_mkpipe_dentry(dir, NFSD_CLD_PIPE, NULL, pipe);
908 	dput(dir);
909 	return dentry;
910 }
911 
912 static void
913 nfsd4_cld_unregister_sb(struct rpc_pipe *pipe)
914 {
915 	if (pipe->dentry)
916 		rpc_unlink(pipe->dentry);
917 }
918 
919 static struct dentry *
920 nfsd4_cld_register_net(struct net *net, struct rpc_pipe *pipe)
921 {
922 	struct super_block *sb;
923 	struct dentry *dentry;
924 
925 	sb = rpc_get_sb_net(net);
926 	if (!sb)
927 		return NULL;
928 	dentry = nfsd4_cld_register_sb(sb, pipe);
929 	rpc_put_sb_net(net);
930 	return dentry;
931 }
932 
933 static void
934 nfsd4_cld_unregister_net(struct net *net, struct rpc_pipe *pipe)
935 {
936 	struct super_block *sb;
937 
938 	sb = rpc_get_sb_net(net);
939 	if (sb) {
940 		nfsd4_cld_unregister_sb(pipe);
941 		rpc_put_sb_net(net);
942 	}
943 }
944 
945 /* Initialize rpc_pipefs pipe for communication with client tracking daemon */
946 static int
947 __nfsd4_init_cld_pipe(struct net *net)
948 {
949 	int ret;
950 	struct dentry *dentry;
951 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
952 	struct cld_net *cn;
953 
954 	if (nn->cld_net)
955 		return 0;
956 
957 	cn = kzalloc(sizeof(*cn), GFP_KERNEL);
958 	if (!cn) {
959 		ret = -ENOMEM;
960 		goto err;
961 	}
962 
963 	cn->cn_pipe = rpc_mkpipe_data(&cld_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN);
964 	if (IS_ERR(cn->cn_pipe)) {
965 		ret = PTR_ERR(cn->cn_pipe);
966 		goto err;
967 	}
968 	spin_lock_init(&cn->cn_lock);
969 	INIT_LIST_HEAD(&cn->cn_list);
970 
971 	dentry = nfsd4_cld_register_net(net, cn->cn_pipe);
972 	if (IS_ERR(dentry)) {
973 		ret = PTR_ERR(dentry);
974 		goto err_destroy_data;
975 	}
976 
977 	cn->cn_pipe->dentry = dentry;
978 	cn->cn_has_legacy = false;
979 	nn->cld_net = cn;
980 	return 0;
981 
982 err_destroy_data:
983 	rpc_destroy_pipe_data(cn->cn_pipe);
984 err:
985 	kfree(cn);
986 	printk(KERN_ERR "NFSD: unable to create nfsdcld upcall pipe (%d)\n",
987 			ret);
988 	return ret;
989 }
990 
991 static int
992 nfsd4_init_cld_pipe(struct net *net)
993 {
994 	int status;
995 
996 	status = __nfsd4_init_cld_pipe(net);
997 	if (!status)
998 		printk("NFSD: Using old nfsdcld client tracking operations.\n");
999 	return status;
1000 }
1001 
1002 static void
1003 nfsd4_remove_cld_pipe(struct net *net)
1004 {
1005 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
1006 	struct cld_net *cn = nn->cld_net;
1007 
1008 	nfsd4_cld_unregister_net(net, cn->cn_pipe);
1009 	rpc_destroy_pipe_data(cn->cn_pipe);
1010 	kfree(nn->cld_net);
1011 	nn->cld_net = NULL;
1012 }
1013 
1014 static struct cld_upcall *
1015 alloc_cld_upcall(struct cld_net *cn)
1016 {
1017 	struct cld_upcall *new, *tmp;
1018 
1019 	new = kzalloc(sizeof(*new), GFP_KERNEL);
1020 	if (!new)
1021 		return new;
1022 
1023 	/* FIXME: hard cap on number in flight? */
1024 restart_search:
1025 	spin_lock(&cn->cn_lock);
1026 	list_for_each_entry(tmp, &cn->cn_list, cu_list) {
1027 		if (tmp->cu_msg.cm_xid == cn->cn_xid) {
1028 			cn->cn_xid++;
1029 			spin_unlock(&cn->cn_lock);
1030 			goto restart_search;
1031 		}
1032 	}
1033 	init_completion(&new->cu_done);
1034 	new->cu_msg.cm_vers = CLD_UPCALL_VERSION;
1035 	put_unaligned(cn->cn_xid++, &new->cu_msg.cm_xid);
1036 	new->cu_net = cn;
1037 	list_add(&new->cu_list, &cn->cn_list);
1038 	spin_unlock(&cn->cn_lock);
1039 
1040 	dprintk("%s: allocated xid %u\n", __func__, new->cu_msg.cm_xid);
1041 
1042 	return new;
1043 }
1044 
1045 static void
1046 free_cld_upcall(struct cld_upcall *victim)
1047 {
1048 	struct cld_net *cn = victim->cu_net;
1049 
1050 	spin_lock(&cn->cn_lock);
1051 	list_del(&victim->cu_list);
1052 	spin_unlock(&cn->cn_lock);
1053 	kfree(victim);
1054 }
1055 
1056 /* Ask daemon to create a new record */
1057 static void
1058 nfsd4_cld_create(struct nfs4_client *clp)
1059 {
1060 	int ret;
1061 	struct cld_upcall *cup;
1062 	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
1063 	struct cld_net *cn = nn->cld_net;
1064 
1065 	/* Don't upcall if it's already stored */
1066 	if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
1067 		return;
1068 
1069 	cup = alloc_cld_upcall(cn);
1070 	if (!cup) {
1071 		ret = -ENOMEM;
1072 		goto out_err;
1073 	}
1074 
1075 	cup->cu_msg.cm_cmd = Cld_Create;
1076 	cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
1077 	memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
1078 			clp->cl_name.len);
1079 
1080 	ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
1081 	if (!ret) {
1082 		ret = cup->cu_msg.cm_status;
1083 		set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
1084 	}
1085 
1086 	free_cld_upcall(cup);
1087 out_err:
1088 	if (ret)
1089 		printk(KERN_ERR "NFSD: Unable to create client "
1090 				"record on stable storage: %d\n", ret);
1091 }
1092 
1093 /* Ask daemon to create a new record */
1094 static void
1095 nfsd4_cld_remove(struct nfs4_client *clp)
1096 {
1097 	int ret;
1098 	struct cld_upcall *cup;
1099 	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
1100 	struct cld_net *cn = nn->cld_net;
1101 
1102 	/* Don't upcall if it's already removed */
1103 	if (!test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
1104 		return;
1105 
1106 	cup = alloc_cld_upcall(cn);
1107 	if (!cup) {
1108 		ret = -ENOMEM;
1109 		goto out_err;
1110 	}
1111 
1112 	cup->cu_msg.cm_cmd = Cld_Remove;
1113 	cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
1114 	memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
1115 			clp->cl_name.len);
1116 
1117 	ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
1118 	if (!ret) {
1119 		ret = cup->cu_msg.cm_status;
1120 		clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
1121 	}
1122 
1123 	free_cld_upcall(cup);
1124 out_err:
1125 	if (ret)
1126 		printk(KERN_ERR "NFSD: Unable to remove client "
1127 				"record from stable storage: %d\n", ret);
1128 }
1129 
1130 /*
1131  * For older nfsdcld's that do not allow us to "slurp" the clients
1132  * from the tracking database during startup.
1133  *
1134  * Check for presence of a record, and update its timestamp
1135  */
1136 static int
1137 nfsd4_cld_check_v0(struct nfs4_client *clp)
1138 {
1139 	int ret;
1140 	struct cld_upcall *cup;
1141 	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
1142 	struct cld_net *cn = nn->cld_net;
1143 
1144 	/* Don't upcall if one was already stored during this grace pd */
1145 	if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
1146 		return 0;
1147 
1148 	cup = alloc_cld_upcall(cn);
1149 	if (!cup) {
1150 		printk(KERN_ERR "NFSD: Unable to check client record on "
1151 				"stable storage: %d\n", -ENOMEM);
1152 		return -ENOMEM;
1153 	}
1154 
1155 	cup->cu_msg.cm_cmd = Cld_Check;
1156 	cup->cu_msg.cm_u.cm_name.cn_len = clp->cl_name.len;
1157 	memcpy(cup->cu_msg.cm_u.cm_name.cn_id, clp->cl_name.data,
1158 			clp->cl_name.len);
1159 
1160 	ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
1161 	if (!ret) {
1162 		ret = cup->cu_msg.cm_status;
1163 		set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
1164 	}
1165 
1166 	free_cld_upcall(cup);
1167 	return ret;
1168 }
1169 
1170 /*
1171  * For newer nfsdcld's that allow us to "slurp" the clients
1172  * from the tracking database during startup.
1173  *
1174  * Check for presence of a record in the reclaim_str_hashtbl
1175  */
1176 static int
1177 nfsd4_cld_check(struct nfs4_client *clp)
1178 {
1179 	struct nfs4_client_reclaim *crp;
1180 	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
1181 	struct cld_net *cn = nn->cld_net;
1182 	int status;
1183 	char dname[HEXDIR_LEN];
1184 	struct xdr_netobj name;
1185 
1186 	/* did we already find that this client is stable? */
1187 	if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
1188 		return 0;
1189 
1190 	/* look for it in the reclaim hashtable otherwise */
1191 	crp = nfsd4_find_reclaim_client(clp->cl_name, nn);
1192 	if (crp)
1193 		goto found;
1194 
1195 	if (cn->cn_has_legacy) {
1196 		status = nfs4_make_rec_clidname(dname, &clp->cl_name);
1197 		if (status)
1198 			return -ENOENT;
1199 
1200 		name.data = kmemdup(dname, HEXDIR_LEN, GFP_KERNEL);
1201 		if (!name.data) {
1202 			dprintk("%s: failed to allocate memory for name.data!\n",
1203 				__func__);
1204 			return -ENOENT;
1205 		}
1206 		name.len = HEXDIR_LEN;
1207 		crp = nfsd4_find_reclaim_client(name, nn);
1208 		kfree(name.data);
1209 		if (crp)
1210 			goto found;
1211 
1212 	}
1213 	return -ENOENT;
1214 found:
1215 	crp->cr_clp = clp;
1216 	return 0;
1217 }
1218 
1219 static int
1220 nfsd4_cld_grace_start(struct nfsd_net *nn)
1221 {
1222 	int ret;
1223 	struct cld_upcall *cup;
1224 	struct cld_net *cn = nn->cld_net;
1225 
1226 	cup = alloc_cld_upcall(cn);
1227 	if (!cup) {
1228 		ret = -ENOMEM;
1229 		goto out_err;
1230 	}
1231 
1232 	cup->cu_msg.cm_cmd = Cld_GraceStart;
1233 	ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
1234 	if (!ret)
1235 		ret = cup->cu_msg.cm_status;
1236 
1237 	free_cld_upcall(cup);
1238 out_err:
1239 	if (ret)
1240 		dprintk("%s: Unable to get clients from userspace: %d\n",
1241 			__func__, ret);
1242 	return ret;
1243 }
1244 
1245 /* For older nfsdcld's that need cm_gracetime */
1246 static void
1247 nfsd4_cld_grace_done_v0(struct nfsd_net *nn)
1248 {
1249 	int ret;
1250 	struct cld_upcall *cup;
1251 	struct cld_net *cn = nn->cld_net;
1252 
1253 	cup = alloc_cld_upcall(cn);
1254 	if (!cup) {
1255 		ret = -ENOMEM;
1256 		goto out_err;
1257 	}
1258 
1259 	cup->cu_msg.cm_cmd = Cld_GraceDone;
1260 	cup->cu_msg.cm_u.cm_gracetime = (int64_t)nn->boot_time;
1261 	ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
1262 	if (!ret)
1263 		ret = cup->cu_msg.cm_status;
1264 
1265 	free_cld_upcall(cup);
1266 out_err:
1267 	if (ret)
1268 		printk(KERN_ERR "NFSD: Unable to end grace period: %d\n", ret);
1269 }
1270 
1271 /*
1272  * For newer nfsdcld's that do not need cm_gracetime.  We also need to call
1273  * nfs4_release_reclaim() to clear out the reclaim_str_hashtbl.
1274  */
1275 static void
1276 nfsd4_cld_grace_done(struct nfsd_net *nn)
1277 {
1278 	int ret;
1279 	struct cld_upcall *cup;
1280 	struct cld_net *cn = nn->cld_net;
1281 
1282 	cup = alloc_cld_upcall(cn);
1283 	if (!cup) {
1284 		ret = -ENOMEM;
1285 		goto out_err;
1286 	}
1287 
1288 	cup->cu_msg.cm_cmd = Cld_GraceDone;
1289 	ret = cld_pipe_upcall(cn->cn_pipe, &cup->cu_msg);
1290 	if (!ret)
1291 		ret = cup->cu_msg.cm_status;
1292 
1293 	free_cld_upcall(cup);
1294 out_err:
1295 	nfs4_release_reclaim(nn);
1296 	if (ret)
1297 		printk(KERN_ERR "NFSD: Unable to end grace period: %d\n", ret);
1298 }
1299 
1300 static int
1301 nfs4_cld_state_init(struct net *net)
1302 {
1303 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
1304 	int i;
1305 
1306 	nn->reclaim_str_hashtbl = kmalloc_array(CLIENT_HASH_SIZE,
1307 						sizeof(struct list_head),
1308 						GFP_KERNEL);
1309 	if (!nn->reclaim_str_hashtbl)
1310 		return -ENOMEM;
1311 
1312 	for (i = 0; i < CLIENT_HASH_SIZE; i++)
1313 		INIT_LIST_HEAD(&nn->reclaim_str_hashtbl[i]);
1314 	nn->reclaim_str_hashtbl_size = 0;
1315 	nn->track_reclaim_completes = true;
1316 	atomic_set(&nn->nr_reclaim_complete, 0);
1317 
1318 	return 0;
1319 }
1320 
1321 static void
1322 nfs4_cld_state_shutdown(struct net *net)
1323 {
1324 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
1325 
1326 	nn->track_reclaim_completes = false;
1327 	kfree(nn->reclaim_str_hashtbl);
1328 }
1329 
1330 static bool
1331 cld_running(struct nfsd_net *nn)
1332 {
1333 	struct cld_net *cn = nn->cld_net;
1334 	struct rpc_pipe *pipe = cn->cn_pipe;
1335 
1336 	return pipe->nreaders || pipe->nwriters;
1337 }
1338 
1339 static int
1340 nfsd4_cld_tracking_init(struct net *net)
1341 {
1342 	int status;
1343 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
1344 	bool running;
1345 	int retries = 10;
1346 
1347 	status = nfs4_cld_state_init(net);
1348 	if (status)
1349 		return status;
1350 
1351 	status = __nfsd4_init_cld_pipe(net);
1352 	if (status)
1353 		goto err_shutdown;
1354 
1355 	/*
1356 	 * rpc pipe upcalls take 30 seconds to time out, so we don't want to
1357 	 * queue an upcall unless we know that nfsdcld is running (because we
1358 	 * want this to fail fast so that nfsd4_client_tracking_init() can try
1359 	 * the next client tracking method).  nfsdcld should already be running
1360 	 * before nfsd is started, so the wait here is for nfsdcld to open the
1361 	 * pipefs file we just created.
1362 	 */
1363 	while (!(running = cld_running(nn)) && retries--)
1364 		msleep(100);
1365 
1366 	if (!running) {
1367 		status = -ETIMEDOUT;
1368 		goto err_remove;
1369 	}
1370 
1371 	status = nfsd4_cld_grace_start(nn);
1372 	if (status) {
1373 		if (status == -EOPNOTSUPP)
1374 			printk(KERN_WARNING "NFSD: Please upgrade nfsdcld.\n");
1375 		nfs4_release_reclaim(nn);
1376 		goto err_remove;
1377 	} else
1378 		printk("NFSD: Using nfsdcld client tracking operations.\n");
1379 	return 0;
1380 
1381 err_remove:
1382 	nfsd4_remove_cld_pipe(net);
1383 err_shutdown:
1384 	nfs4_cld_state_shutdown(net);
1385 	return status;
1386 }
1387 
1388 static void
1389 nfsd4_cld_tracking_exit(struct net *net)
1390 {
1391 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
1392 
1393 	nfs4_release_reclaim(nn);
1394 	nfsd4_remove_cld_pipe(net);
1395 	nfs4_cld_state_shutdown(net);
1396 }
1397 
1398 /* For older nfsdcld's */
1399 static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops_v0 = {
1400 	.init		= nfsd4_init_cld_pipe,
1401 	.exit		= nfsd4_remove_cld_pipe,
1402 	.create		= nfsd4_cld_create,
1403 	.remove		= nfsd4_cld_remove,
1404 	.check		= nfsd4_cld_check_v0,
1405 	.grace_done	= nfsd4_cld_grace_done_v0,
1406 };
1407 
1408 /* For newer nfsdcld's */
1409 static const struct nfsd4_client_tracking_ops nfsd4_cld_tracking_ops = {
1410 	.init		= nfsd4_cld_tracking_init,
1411 	.exit		= nfsd4_cld_tracking_exit,
1412 	.create		= nfsd4_cld_create,
1413 	.remove		= nfsd4_cld_remove,
1414 	.check		= nfsd4_cld_check,
1415 	.grace_done	= nfsd4_cld_grace_done,
1416 };
1417 
1418 /* upcall via usermodehelper */
1419 static char cltrack_prog[PATH_MAX] = "/sbin/nfsdcltrack";
1420 module_param_string(cltrack_prog, cltrack_prog, sizeof(cltrack_prog),
1421 			S_IRUGO|S_IWUSR);
1422 MODULE_PARM_DESC(cltrack_prog, "Path to the nfsdcltrack upcall program");
1423 
1424 static bool cltrack_legacy_disable;
1425 module_param(cltrack_legacy_disable, bool, S_IRUGO|S_IWUSR);
1426 MODULE_PARM_DESC(cltrack_legacy_disable,
1427 		"Disable legacy recoverydir conversion. Default: false");
1428 
1429 #define LEGACY_TOPDIR_ENV_PREFIX "NFSDCLTRACK_LEGACY_TOPDIR="
1430 #define LEGACY_RECDIR_ENV_PREFIX "NFSDCLTRACK_LEGACY_RECDIR="
1431 #define HAS_SESSION_ENV_PREFIX "NFSDCLTRACK_CLIENT_HAS_SESSION="
1432 #define GRACE_START_ENV_PREFIX "NFSDCLTRACK_GRACE_START="
1433 
1434 static char *
1435 nfsd4_cltrack_legacy_topdir(void)
1436 {
1437 	int copied;
1438 	size_t len;
1439 	char *result;
1440 
1441 	if (cltrack_legacy_disable)
1442 		return NULL;
1443 
1444 	len = strlen(LEGACY_TOPDIR_ENV_PREFIX) +
1445 		strlen(nfs4_recoverydir()) + 1;
1446 
1447 	result = kmalloc(len, GFP_KERNEL);
1448 	if (!result)
1449 		return result;
1450 
1451 	copied = snprintf(result, len, LEGACY_TOPDIR_ENV_PREFIX "%s",
1452 				nfs4_recoverydir());
1453 	if (copied >= len) {
1454 		/* just return nothing if output was truncated */
1455 		kfree(result);
1456 		return NULL;
1457 	}
1458 
1459 	return result;
1460 }
1461 
1462 static char *
1463 nfsd4_cltrack_legacy_recdir(const struct xdr_netobj *name)
1464 {
1465 	int copied;
1466 	size_t len;
1467 	char *result;
1468 
1469 	if (cltrack_legacy_disable)
1470 		return NULL;
1471 
1472 	/* +1 is for '/' between "topdir" and "recdir" */
1473 	len = strlen(LEGACY_RECDIR_ENV_PREFIX) +
1474 		strlen(nfs4_recoverydir()) + 1 + HEXDIR_LEN;
1475 
1476 	result = kmalloc(len, GFP_KERNEL);
1477 	if (!result)
1478 		return result;
1479 
1480 	copied = snprintf(result, len, LEGACY_RECDIR_ENV_PREFIX "%s/",
1481 				nfs4_recoverydir());
1482 	if (copied > (len - HEXDIR_LEN)) {
1483 		/* just return nothing if output will be truncated */
1484 		kfree(result);
1485 		return NULL;
1486 	}
1487 
1488 	copied = nfs4_make_rec_clidname(result + copied, name);
1489 	if (copied) {
1490 		kfree(result);
1491 		return NULL;
1492 	}
1493 
1494 	return result;
1495 }
1496 
1497 static char *
1498 nfsd4_cltrack_client_has_session(struct nfs4_client *clp)
1499 {
1500 	int copied;
1501 	size_t len;
1502 	char *result;
1503 
1504 	/* prefix + Y/N character + terminating NULL */
1505 	len = strlen(HAS_SESSION_ENV_PREFIX) + 1 + 1;
1506 
1507 	result = kmalloc(len, GFP_KERNEL);
1508 	if (!result)
1509 		return result;
1510 
1511 	copied = snprintf(result, len, HAS_SESSION_ENV_PREFIX "%c",
1512 				clp->cl_minorversion ? 'Y' : 'N');
1513 	if (copied >= len) {
1514 		/* just return nothing if output was truncated */
1515 		kfree(result);
1516 		return NULL;
1517 	}
1518 
1519 	return result;
1520 }
1521 
1522 static char *
1523 nfsd4_cltrack_grace_start(time_t grace_start)
1524 {
1525 	int copied;
1526 	size_t len;
1527 	char *result;
1528 
1529 	/* prefix + max width of int64_t string + terminating NULL */
1530 	len = strlen(GRACE_START_ENV_PREFIX) + 22 + 1;
1531 
1532 	result = kmalloc(len, GFP_KERNEL);
1533 	if (!result)
1534 		return result;
1535 
1536 	copied = snprintf(result, len, GRACE_START_ENV_PREFIX "%ld",
1537 				grace_start);
1538 	if (copied >= len) {
1539 		/* just return nothing if output was truncated */
1540 		kfree(result);
1541 		return NULL;
1542 	}
1543 
1544 	return result;
1545 }
1546 
1547 static int
1548 nfsd4_umh_cltrack_upcall(char *cmd, char *arg, char *env0, char *env1)
1549 {
1550 	char *envp[3];
1551 	char *argv[4];
1552 	int ret;
1553 
1554 	if (unlikely(!cltrack_prog[0])) {
1555 		dprintk("%s: cltrack_prog is disabled\n", __func__);
1556 		return -EACCES;
1557 	}
1558 
1559 	dprintk("%s: cmd: %s\n", __func__, cmd);
1560 	dprintk("%s: arg: %s\n", __func__, arg ? arg : "(null)");
1561 	dprintk("%s: env0: %s\n", __func__, env0 ? env0 : "(null)");
1562 	dprintk("%s: env1: %s\n", __func__, env1 ? env1 : "(null)");
1563 
1564 	envp[0] = env0;
1565 	envp[1] = env1;
1566 	envp[2] = NULL;
1567 
1568 	argv[0] = (char *)cltrack_prog;
1569 	argv[1] = cmd;
1570 	argv[2] = arg;
1571 	argv[3] = NULL;
1572 
1573 	ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
1574 	/*
1575 	 * Disable the upcall mechanism if we're getting an ENOENT or EACCES
1576 	 * error. The admin can re-enable it on the fly by using sysfs
1577 	 * once the problem has been fixed.
1578 	 */
1579 	if (ret == -ENOENT || ret == -EACCES) {
1580 		dprintk("NFSD: %s was not found or isn't executable (%d). "
1581 			"Setting cltrack_prog to blank string!",
1582 			cltrack_prog, ret);
1583 		cltrack_prog[0] = '\0';
1584 	}
1585 	dprintk("%s: %s return value: %d\n", __func__, cltrack_prog, ret);
1586 
1587 	return ret;
1588 }
1589 
1590 static char *
1591 bin_to_hex_dup(const unsigned char *src, int srclen)
1592 {
1593 	int i;
1594 	char *buf, *hex;
1595 
1596 	/* +1 for terminating NULL */
1597 	buf = kmalloc((srclen * 2) + 1, GFP_KERNEL);
1598 	if (!buf)
1599 		return buf;
1600 
1601 	hex = buf;
1602 	for (i = 0; i < srclen; i++) {
1603 		sprintf(hex, "%2.2x", *src++);
1604 		hex += 2;
1605 	}
1606 	return buf;
1607 }
1608 
1609 static int
1610 nfsd4_umh_cltrack_init(struct net *net)
1611 {
1612 	int ret;
1613 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
1614 	char *grace_start = nfsd4_cltrack_grace_start(nn->boot_time);
1615 
1616 	/* XXX: The usermode helper s not working in container yet. */
1617 	if (net != &init_net) {
1618 		pr_warn("NFSD: attempt to initialize umh client tracking in a container ignored.\n");
1619 		kfree(grace_start);
1620 		return -EINVAL;
1621 	}
1622 
1623 	ret = nfsd4_umh_cltrack_upcall("init", NULL, grace_start, NULL);
1624 	kfree(grace_start);
1625 	if (!ret)
1626 		printk("NFSD: Using UMH upcall client tracking operations.\n");
1627 	return ret;
1628 }
1629 
1630 static void
1631 nfsd4_cltrack_upcall_lock(struct nfs4_client *clp)
1632 {
1633 	wait_on_bit_lock(&clp->cl_flags, NFSD4_CLIENT_UPCALL_LOCK,
1634 			 TASK_UNINTERRUPTIBLE);
1635 }
1636 
1637 static void
1638 nfsd4_cltrack_upcall_unlock(struct nfs4_client *clp)
1639 {
1640 	smp_mb__before_atomic();
1641 	clear_bit(NFSD4_CLIENT_UPCALL_LOCK, &clp->cl_flags);
1642 	smp_mb__after_atomic();
1643 	wake_up_bit(&clp->cl_flags, NFSD4_CLIENT_UPCALL_LOCK);
1644 }
1645 
1646 static void
1647 nfsd4_umh_cltrack_create(struct nfs4_client *clp)
1648 {
1649 	char *hexid, *has_session, *grace_start;
1650 	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
1651 
1652 	/*
1653 	 * With v4.0 clients, there's little difference in outcome between a
1654 	 * create and check operation, and we can end up calling into this
1655 	 * function multiple times per client (once for each openowner). So,
1656 	 * for v4.0 clients skip upcalling once the client has been recorded
1657 	 * on stable storage.
1658 	 *
1659 	 * For v4.1+ clients, the outcome of the two operations is different,
1660 	 * so we must ensure that we upcall for the create operation. v4.1+
1661 	 * clients call this on RECLAIM_COMPLETE though, so we should only end
1662 	 * up doing a single create upcall per client.
1663 	 */
1664 	if (clp->cl_minorversion == 0 &&
1665 	    test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
1666 		return;
1667 
1668 	hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len);
1669 	if (!hexid) {
1670 		dprintk("%s: can't allocate memory for upcall!\n", __func__);
1671 		return;
1672 	}
1673 
1674 	has_session = nfsd4_cltrack_client_has_session(clp);
1675 	grace_start = nfsd4_cltrack_grace_start(nn->boot_time);
1676 
1677 	nfsd4_cltrack_upcall_lock(clp);
1678 	if (!nfsd4_umh_cltrack_upcall("create", hexid, has_session, grace_start))
1679 		set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
1680 	nfsd4_cltrack_upcall_unlock(clp);
1681 
1682 	kfree(has_session);
1683 	kfree(grace_start);
1684 	kfree(hexid);
1685 }
1686 
1687 static void
1688 nfsd4_umh_cltrack_remove(struct nfs4_client *clp)
1689 {
1690 	char *hexid;
1691 
1692 	if (!test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
1693 		return;
1694 
1695 	hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len);
1696 	if (!hexid) {
1697 		dprintk("%s: can't allocate memory for upcall!\n", __func__);
1698 		return;
1699 	}
1700 
1701 	nfsd4_cltrack_upcall_lock(clp);
1702 	if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags) &&
1703 	    nfsd4_umh_cltrack_upcall("remove", hexid, NULL, NULL) == 0)
1704 		clear_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
1705 	nfsd4_cltrack_upcall_unlock(clp);
1706 
1707 	kfree(hexid);
1708 }
1709 
1710 static int
1711 nfsd4_umh_cltrack_check(struct nfs4_client *clp)
1712 {
1713 	int ret;
1714 	char *hexid, *has_session, *legacy;
1715 
1716 	if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags))
1717 		return 0;
1718 
1719 	hexid = bin_to_hex_dup(clp->cl_name.data, clp->cl_name.len);
1720 	if (!hexid) {
1721 		dprintk("%s: can't allocate memory for upcall!\n", __func__);
1722 		return -ENOMEM;
1723 	}
1724 
1725 	has_session = nfsd4_cltrack_client_has_session(clp);
1726 	legacy = nfsd4_cltrack_legacy_recdir(&clp->cl_name);
1727 
1728 	nfsd4_cltrack_upcall_lock(clp);
1729 	if (test_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags)) {
1730 		ret = 0;
1731 	} else {
1732 		ret = nfsd4_umh_cltrack_upcall("check", hexid, has_session, legacy);
1733 		if (ret == 0)
1734 			set_bit(NFSD4_CLIENT_STABLE, &clp->cl_flags);
1735 	}
1736 	nfsd4_cltrack_upcall_unlock(clp);
1737 	kfree(has_session);
1738 	kfree(legacy);
1739 	kfree(hexid);
1740 
1741 	return ret;
1742 }
1743 
1744 static void
1745 nfsd4_umh_cltrack_grace_done(struct nfsd_net *nn)
1746 {
1747 	char *legacy;
1748 	char timestr[22]; /* FIXME: better way to determine max size? */
1749 
1750 	sprintf(timestr, "%ld", nn->boot_time);
1751 	legacy = nfsd4_cltrack_legacy_topdir();
1752 	nfsd4_umh_cltrack_upcall("gracedone", timestr, legacy, NULL);
1753 	kfree(legacy);
1754 }
1755 
1756 static const struct nfsd4_client_tracking_ops nfsd4_umh_tracking_ops = {
1757 	.init		= nfsd4_umh_cltrack_init,
1758 	.exit		= NULL,
1759 	.create		= nfsd4_umh_cltrack_create,
1760 	.remove		= nfsd4_umh_cltrack_remove,
1761 	.check		= nfsd4_umh_cltrack_check,
1762 	.grace_done	= nfsd4_umh_cltrack_grace_done,
1763 };
1764 
1765 int
1766 nfsd4_client_tracking_init(struct net *net)
1767 {
1768 	int status;
1769 	struct path path;
1770 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
1771 
1772 	/* just run the init if it the method is already decided */
1773 	if (nn->client_tracking_ops)
1774 		goto do_init;
1775 
1776 	/* First, try to use nfsdcld */
1777 	nn->client_tracking_ops = &nfsd4_cld_tracking_ops;
1778 	status = nn->client_tracking_ops->init(net);
1779 	if (!status)
1780 		return status;
1781 	if (status != -ETIMEDOUT) {
1782 		nn->client_tracking_ops = &nfsd4_cld_tracking_ops_v0;
1783 		status = nn->client_tracking_ops->init(net);
1784 		if (!status)
1785 			return status;
1786 	}
1787 
1788 	/*
1789 	 * Next, try the UMH upcall.
1790 	 */
1791 	nn->client_tracking_ops = &nfsd4_umh_tracking_ops;
1792 	status = nn->client_tracking_ops->init(net);
1793 	if (!status)
1794 		return status;
1795 
1796 	/*
1797 	 * Finally, See if the recoverydir exists and is a directory.
1798 	 * If it is, then use the legacy ops.
1799 	 */
1800 	nn->client_tracking_ops = &nfsd4_legacy_tracking_ops;
1801 	status = kern_path(nfs4_recoverydir(), LOOKUP_FOLLOW, &path);
1802 	if (!status) {
1803 		status = d_is_dir(path.dentry);
1804 		path_put(&path);
1805 		if (!status) {
1806 			status = -EINVAL;
1807 			goto out;
1808 		}
1809 	}
1810 
1811 do_init:
1812 	status = nn->client_tracking_ops->init(net);
1813 out:
1814 	if (status) {
1815 		printk(KERN_WARNING "NFSD: Unable to initialize client "
1816 				    "recovery tracking! (%d)\n", status);
1817 		nn->client_tracking_ops = NULL;
1818 	}
1819 	return status;
1820 }
1821 
1822 void
1823 nfsd4_client_tracking_exit(struct net *net)
1824 {
1825 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
1826 
1827 	if (nn->client_tracking_ops) {
1828 		if (nn->client_tracking_ops->exit)
1829 			nn->client_tracking_ops->exit(net);
1830 		nn->client_tracking_ops = NULL;
1831 	}
1832 }
1833 
1834 void
1835 nfsd4_client_record_create(struct nfs4_client *clp)
1836 {
1837 	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
1838 
1839 	if (nn->client_tracking_ops)
1840 		nn->client_tracking_ops->create(clp);
1841 }
1842 
1843 void
1844 nfsd4_client_record_remove(struct nfs4_client *clp)
1845 {
1846 	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
1847 
1848 	if (nn->client_tracking_ops)
1849 		nn->client_tracking_ops->remove(clp);
1850 }
1851 
1852 int
1853 nfsd4_client_record_check(struct nfs4_client *clp)
1854 {
1855 	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
1856 
1857 	if (nn->client_tracking_ops)
1858 		return nn->client_tracking_ops->check(clp);
1859 
1860 	return -EOPNOTSUPP;
1861 }
1862 
1863 void
1864 nfsd4_record_grace_done(struct nfsd_net *nn)
1865 {
1866 	if (nn->client_tracking_ops)
1867 		nn->client_tracking_ops->grace_done(nn);
1868 }
1869 
1870 static int
1871 rpc_pipefs_event(struct notifier_block *nb, unsigned long event, void *ptr)
1872 {
1873 	struct super_block *sb = ptr;
1874 	struct net *net = sb->s_fs_info;
1875 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
1876 	struct cld_net *cn = nn->cld_net;
1877 	struct dentry *dentry;
1878 	int ret = 0;
1879 
1880 	if (!try_module_get(THIS_MODULE))
1881 		return 0;
1882 
1883 	if (!cn) {
1884 		module_put(THIS_MODULE);
1885 		return 0;
1886 	}
1887 
1888 	switch (event) {
1889 	case RPC_PIPEFS_MOUNT:
1890 		dentry = nfsd4_cld_register_sb(sb, cn->cn_pipe);
1891 		if (IS_ERR(dentry)) {
1892 			ret = PTR_ERR(dentry);
1893 			break;
1894 		}
1895 		cn->cn_pipe->dentry = dentry;
1896 		break;
1897 	case RPC_PIPEFS_UMOUNT:
1898 		if (cn->cn_pipe->dentry)
1899 			nfsd4_cld_unregister_sb(cn->cn_pipe);
1900 		break;
1901 	default:
1902 		ret = -ENOTSUPP;
1903 		break;
1904 	}
1905 	module_put(THIS_MODULE);
1906 	return ret;
1907 }
1908 
1909 static struct notifier_block nfsd4_cld_block = {
1910 	.notifier_call = rpc_pipefs_event,
1911 };
1912 
1913 int
1914 register_cld_notifier(void)
1915 {
1916 	return rpc_pipefs_notifier_register(&nfsd4_cld_block);
1917 }
1918 
1919 void
1920 unregister_cld_notifier(void)
1921 {
1922 	rpc_pipefs_notifier_unregister(&nfsd4_cld_block);
1923 }
1924