1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Device operations for the pnfs nfs4 file layout driver.
4 *
5 * Copyright (c) 2014, Primary Data, Inc. All rights reserved.
6 *
7 * Tao Peng <bergwolf@primarydata.com>
8 */
9
10 #include <linux/nfs_fs.h>
11 #include <linux/vmalloc.h>
12 #include <linux/module.h>
13 #include <linux/sunrpc/addr.h>
14
15 #include "../internal.h"
16 #include "../nfs4session.h"
17 #include "flexfilelayout.h"
18
19 #define NFSDBG_FACILITY NFSDBG_PNFS_LD
20
21 static unsigned int dataserver_timeo = NFS_DEF_TCP_TIMEO;
22 static unsigned int dataserver_retrans;
23
24 static bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg);
25
nfs4_ff_layout_put_deviceid(struct nfs4_ff_layout_ds * mirror_ds)26 void nfs4_ff_layout_put_deviceid(struct nfs4_ff_layout_ds *mirror_ds)
27 {
28 if (!IS_ERR_OR_NULL(mirror_ds))
29 nfs4_put_deviceid_node(&mirror_ds->id_node);
30 }
31
nfs4_ff_layout_free_deviceid(struct nfs4_ff_layout_ds * mirror_ds)32 void nfs4_ff_layout_free_deviceid(struct nfs4_ff_layout_ds *mirror_ds)
33 {
34 nfs4_print_deviceid(&mirror_ds->id_node.deviceid);
35 nfs4_pnfs_ds_put(mirror_ds->ds);
36 kfree(mirror_ds->ds_versions);
37 kfree_rcu(mirror_ds, id_node.rcu);
38 }
39
40 /* Decode opaque device data and construct new_ds using it */
41 struct nfs4_ff_layout_ds *
nfs4_ff_alloc_deviceid_node(struct nfs_server * server,struct pnfs_device * pdev,gfp_t gfp_flags)42 nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
43 gfp_t gfp_flags)
44 {
45 struct xdr_stream stream;
46 struct xdr_buf buf;
47 struct folio *scratch;
48 struct list_head dsaddrs;
49 struct nfs4_pnfs_ds_addr *da;
50 struct nfs4_ff_layout_ds *new_ds = NULL;
51 struct nfs4_ff_ds_version *ds_versions = NULL;
52 struct net *net = server->nfs_client->cl_net;
53 u32 mp_count;
54 u32 version_count;
55 __be32 *p;
56 int i, ret = -ENOMEM;
57
58 /* set up xdr stream */
59 scratch = folio_alloc(gfp_flags, 0);
60 if (!scratch)
61 goto out_err;
62
63 new_ds = kzalloc_obj(struct nfs4_ff_layout_ds, gfp_flags);
64 if (!new_ds)
65 goto out_scratch;
66
67 nfs4_init_deviceid_node(&new_ds->id_node,
68 server,
69 &pdev->dev_id);
70 INIT_LIST_HEAD(&dsaddrs);
71
72 xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen);
73 xdr_set_scratch_folio(&stream, scratch);
74
75 /* multipath count */
76 p = xdr_inline_decode(&stream, 4);
77 if (unlikely(!p))
78 goto out_err_drain_dsaddrs;
79 mp_count = be32_to_cpup(p);
80 dprintk("%s: multipath ds count %d\n", __func__, mp_count);
81
82 for (i = 0; i < mp_count; i++) {
83 /* multipath ds */
84 da = nfs4_decode_mp_ds_addr(net, &stream, gfp_flags);
85 if (da)
86 list_add_tail(&da->da_node, &dsaddrs);
87 }
88 if (list_empty(&dsaddrs)) {
89 dprintk("%s: no suitable DS addresses found\n",
90 __func__);
91 ret = -ENOMEDIUM;
92 goto out_err_drain_dsaddrs;
93 }
94
95 /* version count */
96 p = xdr_inline_decode(&stream, 4);
97 if (unlikely(!p))
98 goto out_err_drain_dsaddrs;
99 version_count = be32_to_cpup(p);
100 dprintk("%s: version count %d\n", __func__, version_count);
101
102 ds_versions = kzalloc_objs(struct nfs4_ff_ds_version, version_count,
103 gfp_flags);
104 if (!ds_versions)
105 goto out_err_drain_dsaddrs;
106
107 for (i = 0; i < version_count; i++) {
108 /* 20 = version(4) + minor_version(4) + rsize(4) + wsize(4) +
109 * tightly_coupled(4) */
110 p = xdr_inline_decode(&stream, 20);
111 if (unlikely(!p))
112 goto out_err_drain_dsaddrs;
113 ds_versions[i].version = be32_to_cpup(p++);
114 ds_versions[i].minor_version = be32_to_cpup(p++);
115 ds_versions[i].rsize = nfs_io_size(be32_to_cpup(p++),
116 server->nfs_client->cl_proto);
117 ds_versions[i].wsize = nfs_io_size(be32_to_cpup(p++),
118 server->nfs_client->cl_proto);
119 ds_versions[i].tightly_coupled = be32_to_cpup(p);
120
121 if (ds_versions[i].rsize > NFS_MAX_FILE_IO_SIZE)
122 ds_versions[i].rsize = NFS_MAX_FILE_IO_SIZE;
123 if (ds_versions[i].wsize > NFS_MAX_FILE_IO_SIZE)
124 ds_versions[i].wsize = NFS_MAX_FILE_IO_SIZE;
125
126 /*
127 * check for valid major/minor combination.
128 * currently we support dataserver which talk:
129 * v3, v4.0, v4.1, v4.2
130 */
131 if (!((ds_versions[i].version == 3 && ds_versions[i].minor_version == 0) ||
132 (ds_versions[i].version == 4 && ds_versions[i].minor_version < 3))) {
133 dprintk("%s: [%d] unsupported ds version %d-%d\n", __func__,
134 i, ds_versions[i].version,
135 ds_versions[i].minor_version);
136 ret = -EPROTONOSUPPORT;
137 goto out_err_drain_dsaddrs;
138 }
139
140 dprintk("%s: [%d] vers %u minor_ver %u rsize %u wsize %u coupled %d\n",
141 __func__, i, ds_versions[i].version,
142 ds_versions[i].minor_version,
143 ds_versions[i].rsize,
144 ds_versions[i].wsize,
145 ds_versions[i].tightly_coupled);
146 }
147
148 new_ds->ds_versions = ds_versions;
149 new_ds->ds_versions_cnt = version_count;
150
151 new_ds->ds = nfs4_pnfs_ds_add(net, &dsaddrs, gfp_flags);
152 if (!new_ds->ds)
153 goto out_err_drain_dsaddrs;
154
155 /* If DS was already in cache, free ds addrs */
156 while (!list_empty(&dsaddrs)) {
157 da = list_first_entry(&dsaddrs,
158 struct nfs4_pnfs_ds_addr,
159 da_node);
160 list_del_init(&da->da_node);
161 kfree(da->da_remotestr);
162 kfree(da);
163 }
164
165 folio_put(scratch);
166 return new_ds;
167
168 out_err_drain_dsaddrs:
169 while (!list_empty(&dsaddrs)) {
170 da = list_first_entry(&dsaddrs, struct nfs4_pnfs_ds_addr,
171 da_node);
172 list_del_init(&da->da_node);
173 kfree(da->da_remotestr);
174 kfree(da);
175 }
176
177 kfree(ds_versions);
178 out_scratch:
179 folio_put(scratch);
180 out_err:
181 kfree(new_ds);
182
183 dprintk("%s ERROR: returning %d\n", __func__, ret);
184 return NULL;
185 }
186
extend_ds_error(struct nfs4_ff_layout_ds_err * err,u64 offset,u64 length)187 static void extend_ds_error(struct nfs4_ff_layout_ds_err *err,
188 u64 offset, u64 length)
189 {
190 u64 end;
191
192 end = max_t(u64, pnfs_end_offset(err->offset, err->length),
193 pnfs_end_offset(offset, length));
194 err->offset = min_t(u64, err->offset, offset);
195 err->length = end - err->offset;
196 }
197
198 static int
ff_ds_error_match(const struct nfs4_ff_layout_ds_err * e1,const struct nfs4_ff_layout_ds_err * e2)199 ff_ds_error_match(const struct nfs4_ff_layout_ds_err *e1,
200 const struct nfs4_ff_layout_ds_err *e2)
201 {
202 int ret;
203
204 if (e1->opnum != e2->opnum)
205 return e1->opnum < e2->opnum ? -1 : 1;
206 if (e1->status != e2->status)
207 return e1->status < e2->status ? -1 : 1;
208 ret = memcmp(e1->stateid.data, e2->stateid.data,
209 sizeof(e1->stateid.data));
210 if (ret != 0)
211 return ret;
212 ret = memcmp(&e1->deviceid, &e2->deviceid, sizeof(e1->deviceid));
213 if (ret != 0)
214 return ret;
215 if (pnfs_end_offset(e1->offset, e1->length) < e2->offset)
216 return -1;
217 if (e1->offset > pnfs_end_offset(e2->offset, e2->length))
218 return 1;
219 /* If ranges overlap or are contiguous, they are the same */
220 return 0;
221 }
222
223 static void
ff_layout_add_ds_error_locked(struct nfs4_flexfile_layout * flo,struct nfs4_ff_layout_ds_err * dserr)224 ff_layout_add_ds_error_locked(struct nfs4_flexfile_layout *flo,
225 struct nfs4_ff_layout_ds_err *dserr)
226 {
227 struct nfs4_ff_layout_ds_err *err, *tmp;
228 struct list_head *head = &flo->error_list;
229 int match;
230
231 /* Do insertion sort w/ merges */
232 list_for_each_entry_safe(err, tmp, &flo->error_list, list) {
233 match = ff_ds_error_match(err, dserr);
234 if (match < 0)
235 continue;
236 if (match > 0) {
237 /* Add entry "dserr" _before_ entry "err" */
238 head = &err->list;
239 break;
240 }
241 /* Entries match, so merge "err" into "dserr" */
242 extend_ds_error(dserr, err->offset, err->length);
243 list_replace(&err->list, &dserr->list);
244 kfree(err);
245 return;
246 }
247
248 list_add_tail(&dserr->list, head);
249 }
250
ff_layout_track_ds_error(struct nfs4_flexfile_layout * flo,struct nfs4_ff_layout_mirror * mirror,u32 dss_id,u64 offset,u64 length,int status,enum nfs_opnum4 opnum,gfp_t gfp_flags)251 int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo,
252 struct nfs4_ff_layout_mirror *mirror,
253 u32 dss_id, u64 offset, u64 length, int status,
254 enum nfs_opnum4 opnum, gfp_t gfp_flags)
255 {
256 struct nfs4_ff_layout_ds_err *dserr;
257
258 if (status == 0)
259 return 0;
260
261 if (IS_ERR_OR_NULL(mirror->dss[dss_id].mirror_ds))
262 return -EINVAL;
263
264 dserr = kmalloc_obj(*dserr, gfp_flags);
265 if (!dserr)
266 return -ENOMEM;
267
268 INIT_LIST_HEAD(&dserr->list);
269 dserr->offset = offset;
270 dserr->length = length;
271 dserr->status = status;
272 dserr->opnum = opnum;
273 nfs4_stateid_copy(&dserr->stateid, &mirror->dss[dss_id].stateid);
274 memcpy(&dserr->deviceid, &mirror->dss[dss_id].mirror_ds->id_node.deviceid,
275 NFS4_DEVICEID4_SIZE);
276
277 spin_lock(&flo->generic_hdr.plh_inode->i_lock);
278 ff_layout_add_ds_error_locked(flo, dserr);
279 spin_unlock(&flo->generic_hdr.plh_inode->i_lock);
280 return 0;
281 }
282
283 static const struct cred *
ff_layout_get_mirror_cred(struct nfs4_ff_layout_mirror * mirror,u32 iomode,u32 dss_id)284 ff_layout_get_mirror_cred(struct nfs4_ff_layout_mirror *mirror, u32 iomode, u32 dss_id)
285 {
286 const struct cred *cred, __rcu **pcred;
287
288 if (iomode == IOMODE_READ)
289 pcred = &mirror->dss[dss_id].ro_cred;
290 else
291 pcred = &mirror->dss[dss_id].rw_cred;
292
293 rcu_read_lock();
294 do {
295 cred = rcu_dereference(*pcred);
296 if (!cred)
297 break;
298
299 cred = get_cred_rcu(cred);
300 } while(!cred);
301 rcu_read_unlock();
302 return cred;
303 }
304
305 struct nfs_fh *
nfs4_ff_layout_select_ds_fh(struct nfs4_ff_layout_mirror * mirror,u32 dss_id)306 nfs4_ff_layout_select_ds_fh(struct nfs4_ff_layout_mirror *mirror, u32 dss_id)
307 {
308 /* FIXME: For now assume there is only 1 version available for the DS */
309 return &mirror->dss[dss_id].fh_versions[0];
310 }
311
312 void
nfs4_ff_layout_select_ds_stateid(const struct nfs4_ff_layout_mirror * mirror,u32 dss_id,nfs4_stateid * stateid)313 nfs4_ff_layout_select_ds_stateid(const struct nfs4_ff_layout_mirror *mirror,
314 u32 dss_id,
315 nfs4_stateid *stateid)
316 {
317 if (nfs4_ff_layout_ds_version(mirror, dss_id) == 4)
318 nfs4_stateid_copy(stateid, &mirror->dss[dss_id].stateid);
319 }
320
321 static bool
ff_layout_init_mirror_ds(struct pnfs_layout_hdr * lo,struct nfs4_ff_layout_mirror * mirror,u32 dss_id)322 ff_layout_init_mirror_ds(struct pnfs_layout_hdr *lo,
323 struct nfs4_ff_layout_mirror *mirror,
324 u32 dss_id)
325 {
326 if (mirror == NULL)
327 goto outerr;
328 if (mirror->dss[dss_id].mirror_ds == NULL) {
329 struct nfs4_deviceid_node *node;
330 struct nfs4_ff_layout_ds *mirror_ds = ERR_PTR(-ENODEV);
331
332 node = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode),
333 &mirror->dss[dss_id].devid, lo->plh_lc_cred,
334 GFP_KERNEL);
335 if (node)
336 mirror_ds = FF_LAYOUT_MIRROR_DS(node);
337
338 /* check for race with another call to this function */
339 if (cmpxchg(&mirror->dss[dss_id].mirror_ds, NULL, mirror_ds) &&
340 mirror_ds != ERR_PTR(-ENODEV))
341 nfs4_put_deviceid_node(node);
342 }
343
344 if (IS_ERR(mirror->dss[dss_id].mirror_ds))
345 goto outerr;
346
347 return true;
348 outerr:
349 return false;
350 }
351
352 /**
353 * nfs4_ff_layout_prepare_ds - prepare a DS connection for an RPC call
354 * @lseg: the layout segment we're operating on
355 * @mirror: layout mirror describing the DS to use
356 * @dss_id: DS stripe id to select stripe to use
357 * @fail_return: return layout on connect failure?
358 *
359 * Try to prepare a DS connection to accept an RPC call. This involves
360 * selecting a mirror to use and connecting the client to it if it's not
361 * already connected.
362 *
363 * Since we only need a single functioning mirror to satisfy a read, we don't
364 * want to return the layout if there is one. For writes though, any down
365 * mirror should result in a LAYOUTRETURN. @fail_return is how we distinguish
366 * between the two cases.
367 *
368 * Returns a pointer to a connected DS object on success or NULL on failure.
369 */
370 struct nfs4_pnfs_ds *
nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment * lseg,struct nfs4_ff_layout_mirror * mirror,u32 dss_id,bool fail_return)371 nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg,
372 struct nfs4_ff_layout_mirror *mirror,
373 u32 dss_id,
374 bool fail_return)
375 {
376 struct nfs4_pnfs_ds *ds;
377 struct inode *ino = lseg->pls_layout->plh_inode;
378 struct nfs_server *s = NFS_SERVER(ino);
379 unsigned int max_payload;
380 int status = -EAGAIN;
381
382 if (!ff_layout_init_mirror_ds(lseg->pls_layout, mirror, dss_id))
383 goto noconnect;
384
385 ds = mirror->dss[dss_id].mirror_ds->ds;
386 if (READ_ONCE(ds->ds_clp))
387 goto out;
388 /* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */
389 smp_rmb();
390
391 /* FIXME: For now we assume the server sent only one version of NFS
392 * to use for the DS.
393 */
394 status = nfs4_pnfs_ds_connect(s, ds, &mirror->dss[dss_id].mirror_ds->id_node,
395 dataserver_timeo, dataserver_retrans,
396 mirror->dss[dss_id].mirror_ds->ds_versions[0].version,
397 mirror->dss[dss_id].mirror_ds->ds_versions[0].minor_version);
398
399 /* connect success, check rsize/wsize limit */
400 if (!status) {
401 /*
402 * ds_clp is put in destroy_ds().
403 * keep ds_clp even if DS is local, so that if local IO cannot
404 * proceed somehow, we can fall back to NFS whenever we want.
405 */
406 nfs_local_probe_async(ds->ds_clp);
407 max_payload =
408 nfs_block_size(rpc_max_payload(ds->ds_clp->cl_rpcclient),
409 NULL);
410 if (mirror->dss[dss_id].mirror_ds->ds_versions[0].rsize > max_payload)
411 mirror->dss[dss_id].mirror_ds->ds_versions[0].rsize = max_payload;
412 if (mirror->dss[dss_id].mirror_ds->ds_versions[0].wsize > max_payload)
413 mirror->dss[dss_id].mirror_ds->ds_versions[0].wsize = max_payload;
414 goto out;
415 }
416 noconnect:
417 ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout),
418 mirror, dss_id, lseg->pls_range.offset,
419 lseg->pls_range.length, NFS4ERR_NXIO,
420 OP_ILLEGAL, GFP_NOIO);
421 ff_layout_send_layouterror(lseg);
422 if (fail_return || !ff_layout_has_available_ds(lseg))
423 pnfs_error_mark_layout_for_return(ino, lseg);
424 ds = ERR_PTR(status);
425 out:
426 return ds;
427 }
428
429 const struct cred *
ff_layout_get_ds_cred(struct nfs4_ff_layout_mirror * mirror,const struct pnfs_layout_range * range,const struct cred * mdscred,u32 dss_id)430 ff_layout_get_ds_cred(struct nfs4_ff_layout_mirror *mirror,
431 const struct pnfs_layout_range *range,
432 const struct cred *mdscred,
433 u32 dss_id)
434 {
435 const struct cred *cred;
436
437 if (mirror && !mirror->dss[dss_id].mirror_ds->ds_versions[0].tightly_coupled) {
438 cred = ff_layout_get_mirror_cred(mirror, range->iomode, dss_id);
439 if (!cred)
440 cred = get_cred(mdscred);
441 } else {
442 cred = get_cred(mdscred);
443 }
444 return cred;
445 }
446
447 /**
448 * nfs4_ff_find_or_create_ds_client - Find or create a DS rpc client
449 * @mirror: pointer to the mirror
450 * @ds_clp: nfs_client for the DS
451 * @inode: pointer to inode
452 * @dss_id: DS stripe id
453 *
454 * Find or create a DS rpc client with th MDS server rpc client auth flavor
455 * in the nfs_client cl_ds_clients list.
456 */
457 struct rpc_clnt *
nfs4_ff_find_or_create_ds_client(struct nfs4_ff_layout_mirror * mirror,struct nfs_client * ds_clp,struct inode * inode,u32 dss_id)458 nfs4_ff_find_or_create_ds_client(struct nfs4_ff_layout_mirror *mirror,
459 struct nfs_client *ds_clp, struct inode *inode,
460 u32 dss_id)
461 {
462 switch (mirror->dss[dss_id].mirror_ds->ds_versions[0].version) {
463 case 3:
464 /* For NFSv3 DS, flavor is set when creating DS connections */
465 return ds_clp->cl_rpcclient;
466 case 4:
467 return nfs4_find_or_create_ds_client(ds_clp, inode);
468 default:
469 BUG();
470 }
471 }
472
ff_layout_free_ds_ioerr(struct list_head * head)473 void ff_layout_free_ds_ioerr(struct list_head *head)
474 {
475 struct nfs4_ff_layout_ds_err *err;
476
477 while (!list_empty(head)) {
478 err = list_first_entry(head,
479 struct nfs4_ff_layout_ds_err,
480 list);
481 list_del(&err->list);
482 kfree(err);
483 }
484 }
485
486 /* called with inode i_lock held */
ff_layout_encode_ds_ioerr(struct xdr_stream * xdr,const struct list_head * head)487 int ff_layout_encode_ds_ioerr(struct xdr_stream *xdr, const struct list_head *head)
488 {
489 struct nfs4_ff_layout_ds_err *err;
490 __be32 *p;
491
492 list_for_each_entry(err, head, list) {
493 /* offset(8) + length(8) + stateid(NFS4_STATEID_SIZE)
494 * + array length + deviceid(NFS4_DEVICEID4_SIZE)
495 * + status(4) + opnum(4)
496 */
497 p = xdr_reserve_space(xdr,
498 28 + NFS4_STATEID_SIZE + NFS4_DEVICEID4_SIZE);
499 if (unlikely(!p))
500 return -ENOBUFS;
501 p = xdr_encode_hyper(p, err->offset);
502 p = xdr_encode_hyper(p, err->length);
503 p = xdr_encode_opaque_fixed(p, &err->stateid,
504 NFS4_STATEID_SIZE);
505 /* Encode 1 error */
506 *p++ = cpu_to_be32(1);
507 p = xdr_encode_opaque_fixed(p, &err->deviceid,
508 NFS4_DEVICEID4_SIZE);
509 *p++ = cpu_to_be32(err->status);
510 *p++ = cpu_to_be32(err->opnum);
511 dprintk("%s: offset %llu length %llu status %d op %d\n",
512 __func__, err->offset, err->length, err->status,
513 err->opnum);
514 }
515
516 return 0;
517 }
518
519 static
do_layout_fetch_ds_ioerr(struct pnfs_layout_hdr * lo,const struct pnfs_layout_range * range,struct list_head * head,unsigned int maxnum)520 unsigned int do_layout_fetch_ds_ioerr(struct pnfs_layout_hdr *lo,
521 const struct pnfs_layout_range *range,
522 struct list_head *head,
523 unsigned int maxnum)
524 {
525 struct nfs4_flexfile_layout *flo = FF_LAYOUT_FROM_HDR(lo);
526 struct inode *inode = lo->plh_inode;
527 struct nfs4_ff_layout_ds_err *err, *n;
528 unsigned int ret = 0;
529
530 spin_lock(&inode->i_lock);
531 list_for_each_entry_safe(err, n, &flo->error_list, list) {
532 if (!pnfs_is_range_intersecting(err->offset,
533 pnfs_end_offset(err->offset, err->length),
534 range->offset,
535 pnfs_end_offset(range->offset, range->length)))
536 continue;
537 if (!maxnum)
538 break;
539 list_move(&err->list, head);
540 maxnum--;
541 ret++;
542 }
543 spin_unlock(&inode->i_lock);
544 return ret;
545 }
546
ff_layout_fetch_ds_ioerr(struct pnfs_layout_hdr * lo,const struct pnfs_layout_range * range,struct list_head * head,unsigned int maxnum)547 unsigned int ff_layout_fetch_ds_ioerr(struct pnfs_layout_hdr *lo,
548 const struct pnfs_layout_range *range,
549 struct list_head *head,
550 unsigned int maxnum)
551 {
552 unsigned int ret;
553
554 ret = do_layout_fetch_ds_ioerr(lo, range, head, maxnum);
555 /* If we're over the max, discard all remaining entries */
556 if (ret == maxnum) {
557 LIST_HEAD(discard);
558 do_layout_fetch_ds_ioerr(lo, range, &discard, -1);
559 ff_layout_free_ds_ioerr(&discard);
560 }
561 return ret;
562 }
563
ff_read_layout_has_available_ds(struct pnfs_layout_segment * lseg)564 static bool ff_read_layout_has_available_ds(struct pnfs_layout_segment *lseg)
565 {
566 struct nfs4_ff_layout_mirror *mirror;
567 struct nfs4_deviceid_node *devid;
568 u32 idx, dss_id;
569
570 for (idx = 0; idx < FF_LAYOUT_MIRROR_COUNT(lseg); idx++) {
571 mirror = FF_LAYOUT_COMP(lseg, idx);
572 if (!mirror)
573 continue;
574 for (dss_id = 0; dss_id < mirror->dss_count; dss_id++) {
575 if (!mirror->dss[dss_id].mirror_ds)
576 return true;
577 if (IS_ERR(mirror->dss[dss_id].mirror_ds))
578 continue;
579 devid = &mirror->dss[dss_id].mirror_ds->id_node;
580 if (!nfs4_test_deviceid_unavailable(devid))
581 return true;
582 }
583 }
584
585 return false;
586 }
587
ff_rw_layout_has_available_ds(struct pnfs_layout_segment * lseg)588 static bool ff_rw_layout_has_available_ds(struct pnfs_layout_segment *lseg)
589 {
590 struct nfs4_ff_layout_mirror *mirror;
591 struct nfs4_deviceid_node *devid;
592 u32 idx, dss_id;
593
594 for (idx = 0; idx < FF_LAYOUT_MIRROR_COUNT(lseg); idx++) {
595 mirror = FF_LAYOUT_COMP(lseg, idx);
596 if (!mirror)
597 return false;
598 for (dss_id = 0; dss_id < mirror->dss_count; dss_id++) {
599 if (IS_ERR(mirror->dss[dss_id].mirror_ds))
600 return false;
601 if (!mirror->dss[dss_id].mirror_ds)
602 continue;
603 devid = &mirror->dss[dss_id].mirror_ds->id_node;
604 if (nfs4_test_deviceid_unavailable(devid))
605 return false;
606 }
607 }
608
609 return FF_LAYOUT_MIRROR_COUNT(lseg) != 0;
610 }
611
ff_layout_has_available_ds(struct pnfs_layout_segment * lseg)612 static bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg)
613 {
614 if (lseg->pls_range.iomode == IOMODE_READ)
615 return ff_read_layout_has_available_ds(lseg);
616 /* Note: RW layout needs all mirrors available */
617 return ff_rw_layout_has_available_ds(lseg);
618 }
619
ff_layout_avoid_mds_available_ds(struct pnfs_layout_segment * lseg)620 bool ff_layout_avoid_mds_available_ds(struct pnfs_layout_segment *lseg)
621 {
622 return ff_layout_no_fallback_to_mds(lseg) ||
623 ff_layout_has_available_ds(lseg);
624 }
625
ff_layout_avoid_read_on_rw(struct pnfs_layout_segment * lseg)626 bool ff_layout_avoid_read_on_rw(struct pnfs_layout_segment *lseg)
627 {
628 return lseg->pls_range.iomode == IOMODE_RW &&
629 ff_layout_no_read_on_rw(lseg);
630 }
631
632 module_param(dataserver_retrans, uint, 0644);
633 MODULE_PARM_DESC(dataserver_retrans, "The number of times the NFSv4.1 client "
634 "retries a request before it attempts further "
635 " recovery action.");
636 module_param(dataserver_timeo, uint, 0644);
637 MODULE_PARM_DESC(dataserver_timeo, "The time (in tenths of a second) the "
638 "NFSv4.1 client waits for a response from a "
639 " data server before it retries an NFS request.");
640