xref: /linux/fs/nfs/filelayout/filelayoutdev.c (revision da42b5229b27bb5c0eff3408c92f025e6041dad3)
1  /*
2   *  Device operations for the pnfs nfs4 file layout driver.
3   *
4   *  Copyright (c) 2002
5   *  The Regents of the University of Michigan
6   *  All Rights Reserved
7   *
8   *  Dean Hildebrand <dhildebz@umich.edu>
9   *  Garth Goodson   <Garth.Goodson@netapp.com>
10   *
11   *  Permission is granted to use, copy, create derivative works, and
12   *  redistribute this software and such derivative works for any purpose,
13   *  so long as the name of the University of Michigan is not used in
14   *  any advertising or publicity pertaining to the use or distribution
15   *  of this software without specific, written prior authorization. If
16   *  the above copyright notice or any other identification of the
17   *  University of Michigan is included in any copy of any portion of
18   *  this software, then the disclaimer below must also be included.
19   *
20   *  This software is provided as is, without representation or warranty
21   *  of any kind either express or implied, including without limitation
22   *  the implied warranties of merchantability, fitness for a particular
23   *  purpose, or noninfringement.  The Regents of the University of
24   *  Michigan shall not be liable for any damages, including special,
25   *  indirect, incidental, or consequential damages, with respect to any
26   *  claim arising out of or in connection with the use of the software,
27   *  even if it has been or is hereafter advised of the possibility of
28   *  such damages.
29   */
30  
31  #include <linux/nfs_fs.h>
32  #include <linux/vmalloc.h>
33  #include <linux/module.h>
34  
35  #include "../internal.h"
36  #include "../nfs4session.h"
37  #include "filelayout.h"
38  #include "../nfs4trace.h"
39  
40  #define NFSDBG_FACILITY		NFSDBG_PNFS_LD
41  
42  static unsigned int dataserver_timeo = NFS4_DEF_DS_TIMEO;
43  static unsigned int dataserver_retrans = NFS4_DEF_DS_RETRANS;
44  
45  void
46  nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
47  {
48  	struct nfs4_pnfs_ds *ds;
49  	int i;
50  
51  	nfs4_print_deviceid(&dsaddr->id_node.deviceid);
52  
53  	for (i = 0; i < dsaddr->ds_num; i++) {
54  		ds = dsaddr->ds_list[i];
55  		if (ds != NULL)
56  			nfs4_pnfs_ds_put(ds);
57  	}
58  	kfree(dsaddr->stripe_indices);
59  	kfree_rcu(dsaddr, id_node.rcu);
60  }
61  
62  /* Decode opaque device data and return the result */
63  struct nfs4_file_layout_dsaddr *
64  nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
65  		gfp_t gfp_flags)
66  {
67  	int i;
68  	u32 cnt, num;
69  	u8 *indexp;
70  	__be32 *p;
71  	u8 *stripe_indices;
72  	u8 max_stripe_index;
73  	struct nfs4_file_layout_dsaddr *dsaddr = NULL;
74  	struct xdr_stream stream;
75  	struct xdr_buf buf;
76  	struct page *scratch;
77  	struct list_head dsaddrs;
78  	struct nfs4_pnfs_ds_addr *da;
79  
80  	/* set up xdr stream */
81  	scratch = alloc_page(gfp_flags);
82  	if (!scratch)
83  		goto out_err;
84  
85  	xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen);
86  	xdr_set_scratch_page(&stream, scratch);
87  
88  	/* Get the stripe count (number of stripe index) */
89  	p = xdr_inline_decode(&stream, 4);
90  	if (unlikely(!p))
91  		goto out_err_free_scratch;
92  
93  	cnt = be32_to_cpup(p);
94  	dprintk("%s stripe count  %d\n", __func__, cnt);
95  	if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) {
96  		printk(KERN_WARNING "NFS: %s: stripe count %d greater than "
97  		       "supported maximum %d\n", __func__,
98  			cnt, NFS4_PNFS_MAX_STRIPE_CNT);
99  		goto out_err_free_scratch;
100  	}
101  
102  	/* read stripe indices */
103  	stripe_indices = kcalloc(cnt, sizeof(u8), gfp_flags);
104  	if (!stripe_indices)
105  		goto out_err_free_scratch;
106  
107  	p = xdr_inline_decode(&stream, cnt << 2);
108  	if (unlikely(!p))
109  		goto out_err_free_stripe_indices;
110  
111  	indexp = &stripe_indices[0];
112  	max_stripe_index = 0;
113  	for (i = 0; i < cnt; i++) {
114  		*indexp = be32_to_cpup(p++);
115  		max_stripe_index = max(max_stripe_index, *indexp);
116  		indexp++;
117  	}
118  
119  	/* Check the multipath list count */
120  	p = xdr_inline_decode(&stream, 4);
121  	if (unlikely(!p))
122  		goto out_err_free_stripe_indices;
123  
124  	num = be32_to_cpup(p);
125  	dprintk("%s ds_num %u\n", __func__, num);
126  	if (num > NFS4_PNFS_MAX_MULTI_CNT) {
127  		printk(KERN_WARNING "NFS: %s: multipath count %d greater than "
128  			"supported maximum %d\n", __func__,
129  			num, NFS4_PNFS_MAX_MULTI_CNT);
130  		goto out_err_free_stripe_indices;
131  	}
132  
133  	/* validate stripe indices are all < num */
134  	if (max_stripe_index >= num) {
135  		printk(KERN_WARNING "NFS: %s: stripe index %u >= num ds %u\n",
136  			__func__, max_stripe_index, num);
137  		goto out_err_free_stripe_indices;
138  	}
139  
140  	dsaddr = kzalloc(struct_size(dsaddr, ds_list, num), gfp_flags);
141  	if (!dsaddr)
142  		goto out_err_free_stripe_indices;
143  
144  	dsaddr->stripe_count = cnt;
145  	dsaddr->stripe_indices = stripe_indices;
146  	stripe_indices = NULL;
147  	dsaddr->ds_num = num;
148  	nfs4_init_deviceid_node(&dsaddr->id_node, server, &pdev->dev_id);
149  
150  	INIT_LIST_HEAD(&dsaddrs);
151  
152  	for (i = 0; i < dsaddr->ds_num; i++) {
153  		int j;
154  		u32 mp_count;
155  
156  		p = xdr_inline_decode(&stream, 4);
157  		if (unlikely(!p))
158  			goto out_err_free_deviceid;
159  
160  		mp_count = be32_to_cpup(p); /* multipath count */
161  		for (j = 0; j < mp_count; j++) {
162  			da = nfs4_decode_mp_ds_addr(server->nfs_client->cl_net,
163  						    &stream, gfp_flags);
164  			if (da)
165  				list_add_tail(&da->da_node, &dsaddrs);
166  		}
167  		if (list_empty(&dsaddrs)) {
168  			dprintk("%s: no suitable DS addresses found\n",
169  				__func__);
170  			goto out_err_free_deviceid;
171  		}
172  
173  		dsaddr->ds_list[i] = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags);
174  		if (!dsaddr->ds_list[i])
175  			goto out_err_drain_dsaddrs;
176  		trace_fl_getdevinfo(server, &pdev->dev_id, dsaddr->ds_list[i]->ds_remotestr);
177  
178  		/* If DS was already in cache, free ds addrs */
179  		while (!list_empty(&dsaddrs)) {
180  			da = list_first_entry(&dsaddrs,
181  					      struct nfs4_pnfs_ds_addr,
182  					      da_node);
183  			list_del_init(&da->da_node);
184  			kfree(da->da_remotestr);
185  			kfree(da);
186  		}
187  	}
188  
189  	__free_page(scratch);
190  	return dsaddr;
191  
192  out_err_drain_dsaddrs:
193  	while (!list_empty(&dsaddrs)) {
194  		da = list_first_entry(&dsaddrs, struct nfs4_pnfs_ds_addr,
195  				      da_node);
196  		list_del_init(&da->da_node);
197  		kfree(da->da_remotestr);
198  		kfree(da);
199  	}
200  out_err_free_deviceid:
201  	nfs4_fl_free_deviceid(dsaddr);
202  	/* stripe_indicies was part of dsaddr */
203  	goto out_err_free_scratch;
204  out_err_free_stripe_indices:
205  	kfree(stripe_indices);
206  out_err_free_scratch:
207  	__free_page(scratch);
208  out_err:
209  	dprintk("%s ERROR: returning NULL\n", __func__);
210  	return NULL;
211  }
212  
213  void
214  nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
215  {
216  	nfs4_put_deviceid_node(&dsaddr->id_node);
217  }
218  
219  /*
220   * Want res = (offset - layout->pattern_offset)/ layout->stripe_unit
221   * Then: ((res + fsi) % dsaddr->stripe_count)
222   */
223  u32
224  nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset)
225  {
226  	struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
227  	u64 tmp;
228  
229  	tmp = offset - flseg->pattern_offset;
230  	do_div(tmp, flseg->stripe_unit);
231  	tmp += flseg->first_stripe_index;
232  	return do_div(tmp, flseg->dsaddr->stripe_count);
233  }
234  
235  u32
236  nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j)
237  {
238  	return FILELAYOUT_LSEG(lseg)->dsaddr->stripe_indices[j];
239  }
240  
241  struct nfs_fh *
242  nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j)
243  {
244  	struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
245  	u32 i;
246  
247  	if (flseg->stripe_type == STRIPE_SPARSE) {
248  		if (flseg->num_fh == 1)
249  			i = 0;
250  		else if (flseg->num_fh == 0)
251  			/* Use the MDS OPEN fh set in nfs_read_rpcsetup */
252  			return NULL;
253  		else
254  			i = nfs4_fl_calc_ds_index(lseg, j);
255  	} else
256  		i = j;
257  	return flseg->fh_array[i];
258  }
259  
260  /* Upon return, either ds is connected, or ds is NULL */
261  struct nfs4_pnfs_ds *
262  nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
263  {
264  	struct nfs4_file_layout_dsaddr *dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr;
265  	struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx];
266  	struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
267  	struct nfs4_pnfs_ds *ret = ds;
268  	struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode);
269  	int status;
270  
271  	if (ds == NULL) {
272  		printk(KERN_ERR "NFS: %s: No data server for offset index %d\n",
273  			__func__, ds_idx);
274  		pnfs_generic_mark_devid_invalid(devid);
275  		goto out;
276  	}
277  	smp_rmb();
278  	if (ds->ds_clp)
279  		goto out_test_devid;
280  
281  	status = nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo,
282  			     dataserver_retrans, 4,
283  			     s->nfs_client->cl_minorversion);
284  	if (status) {
285  		nfs4_mark_deviceid_unavailable(devid);
286  		ret = NULL;
287  		goto out;
288  	}
289  
290  out_test_devid:
291  	if (ret->ds_clp == NULL ||
292  	    filelayout_test_devid_unavailable(devid))
293  		ret = NULL;
294  out:
295  	return ret;
296  }
297  
298  module_param(dataserver_retrans, uint, 0644);
299  MODULE_PARM_DESC(dataserver_retrans, "The  number of times the NFSv4.1 client "
300  			"retries a request before it attempts further "
301  			" recovery  action.");
302  module_param(dataserver_timeo, uint, 0644);
303  MODULE_PARM_DESC(dataserver_timeo, "The time (in tenths of a second) the "
304  			"NFSv4.1  client  waits for a response from a "
305  			" data server before it retries an NFS request.");
306