xref: /linux/fs/nfsd/blocklayout.c (revision 81538c8e42806eed71ce125723877a7c2307370c)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (c) 2014-2016 Christoph Hellwig.
4  */
5 #include <linux/exportfs.h>
6 #include <linux/iomap.h>
7 #include <linux/slab.h>
8 #include <linux/pr.h>
9 
10 #include <linux/nfsd/debug.h>
11 
12 #include "blocklayoutxdr.h"
13 #include "pnfs.h"
14 #include "filecache.h"
15 #include "vfs.h"
16 
17 #define NFSDDBG_FACILITY	NFSDDBG_PNFS
18 
19 
20 static __be32
nfsd4_block_proc_layoutget(struct svc_rqst * rqstp,struct inode * inode,const struct svc_fh * fhp,struct nfsd4_layoutget * args)21 nfsd4_block_proc_layoutget(struct svc_rqst *rqstp, struct inode *inode,
22 		const struct svc_fh *fhp, struct nfsd4_layoutget *args)
23 {
24 	struct nfsd4_layout_seg *seg = &args->lg_seg;
25 	struct super_block *sb = inode->i_sb;
26 	u32 block_size = i_blocksize(inode);
27 	struct pnfs_block_extent *bex;
28 	struct iomap iomap;
29 	u32 device_generation = 0;
30 	int error;
31 
32 	if (locks_in_grace(SVC_NET(rqstp)))
33 		return nfserr_grace;
34 
35 	if (seg->offset & (block_size - 1)) {
36 		dprintk("pnfsd: I/O misaligned\n");
37 		goto out_layoutunavailable;
38 	}
39 
40 	/*
41 	 * Some clients barf on non-zero block numbers for NONE or INVALID
42 	 * layouts, so make sure to zero the whole structure.
43 	 */
44 	error = -ENOMEM;
45 	bex = kzalloc(sizeof(*bex), GFP_KERNEL);
46 	if (!bex)
47 		goto out_error;
48 	args->lg_content = bex;
49 
50 	error = sb->s_export_op->map_blocks(inode, seg->offset, seg->length,
51 					    &iomap, seg->iomode != IOMODE_READ,
52 					    &device_generation);
53 	if (error) {
54 		if (error == -ENXIO)
55 			goto out_layoutunavailable;
56 		goto out_error;
57 	}
58 
59 	if (iomap.length < args->lg_minlength) {
60 		dprintk("pnfsd: extent smaller than minlength\n");
61 		goto out_layoutunavailable;
62 	}
63 
64 	switch (iomap.type) {
65 	case IOMAP_MAPPED:
66 		if (seg->iomode == IOMODE_READ)
67 			bex->es = PNFS_BLOCK_READ_DATA;
68 		else
69 			bex->es = PNFS_BLOCK_READWRITE_DATA;
70 		bex->soff = iomap.addr;
71 		break;
72 	case IOMAP_UNWRITTEN:
73 		if (seg->iomode & IOMODE_RW) {
74 			/*
75 			 * Crack monkey special case from section 2.3.1.
76 			 */
77 			if (args->lg_minlength == 0) {
78 				dprintk("pnfsd: no soup for you!\n");
79 				goto out_layoutunavailable;
80 			}
81 
82 			bex->es = PNFS_BLOCK_INVALID_DATA;
83 			bex->soff = iomap.addr;
84 			break;
85 		}
86 		fallthrough;
87 	case IOMAP_HOLE:
88 		if (seg->iomode == IOMODE_READ) {
89 			bex->es = PNFS_BLOCK_NONE_DATA;
90 			break;
91 		}
92 		fallthrough;
93 	case IOMAP_DELALLOC:
94 	default:
95 		WARN(1, "pnfsd: filesystem returned %d extent\n", iomap.type);
96 		goto out_layoutunavailable;
97 	}
98 
99 	error = nfsd4_set_deviceid(&bex->vol_id, fhp, device_generation);
100 	if (error)
101 		goto out_error;
102 	bex->foff = iomap.offset;
103 	bex->len = iomap.length;
104 
105 	seg->offset = iomap.offset;
106 	seg->length = iomap.length;
107 
108 	dprintk("GET: 0x%llx:0x%llx %d\n", bex->foff, bex->len, bex->es);
109 	return 0;
110 
111 out_error:
112 	seg->length = 0;
113 	return nfserrno(error);
114 out_layoutunavailable:
115 	seg->length = 0;
116 	return nfserr_layoutunavailable;
117 }
118 
119 static __be32
nfsd4_block_commit_blocks(struct inode * inode,struct nfsd4_layoutcommit * lcp,struct iomap * iomaps,int nr_iomaps)120 nfsd4_block_commit_blocks(struct inode *inode, struct nfsd4_layoutcommit *lcp,
121 		struct iomap *iomaps, int nr_iomaps)
122 {
123 	struct timespec64 mtime = inode_get_mtime(inode);
124 	struct iattr iattr = { .ia_valid = 0 };
125 	int error;
126 
127 	if (lcp->lc_mtime.tv_nsec == UTIME_NOW ||
128 	    timespec64_compare(&lcp->lc_mtime, &mtime) < 0)
129 		lcp->lc_mtime = current_time(inode);
130 	iattr.ia_valid |= ATTR_ATIME | ATTR_CTIME | ATTR_MTIME;
131 	iattr.ia_atime = iattr.ia_ctime = iattr.ia_mtime = lcp->lc_mtime;
132 
133 	if (lcp->lc_size_chg) {
134 		iattr.ia_valid |= ATTR_SIZE;
135 		iattr.ia_size = lcp->lc_newsize;
136 	}
137 
138 	error = inode->i_sb->s_export_op->commit_blocks(inode, iomaps,
139 			nr_iomaps, &iattr);
140 	kfree(iomaps);
141 	return nfserrno(error);
142 }
143 
144 #ifdef CONFIG_NFSD_BLOCKLAYOUT
145 static int
nfsd4_block_get_device_info_simple(struct super_block * sb,struct nfsd4_getdeviceinfo * gdp)146 nfsd4_block_get_device_info_simple(struct super_block *sb,
147 		struct nfsd4_getdeviceinfo *gdp)
148 {
149 	struct pnfs_block_deviceaddr *dev;
150 	struct pnfs_block_volume *b;
151 
152 	dev = kzalloc(struct_size(dev, volumes, 1), GFP_KERNEL);
153 	if (!dev)
154 		return -ENOMEM;
155 	gdp->gd_device = dev;
156 
157 	dev->nr_volumes = 1;
158 	b = &dev->volumes[0];
159 
160 	b->type = PNFS_BLOCK_VOLUME_SIMPLE;
161 	b->simple.sig_len = PNFS_BLOCK_UUID_LEN;
162 	return sb->s_export_op->get_uuid(sb, b->simple.sig, &b->simple.sig_len,
163 			&b->simple.offset);
164 }
165 
166 static __be32
nfsd4_block_proc_getdeviceinfo(struct super_block * sb,struct svc_rqst * rqstp,struct nfs4_client * clp,struct nfsd4_getdeviceinfo * gdp)167 nfsd4_block_proc_getdeviceinfo(struct super_block *sb,
168 		struct svc_rqst *rqstp,
169 		struct nfs4_client *clp,
170 		struct nfsd4_getdeviceinfo *gdp)
171 {
172 	if (bdev_is_partition(sb->s_bdev))
173 		return nfserr_inval;
174 	return nfserrno(nfsd4_block_get_device_info_simple(sb, gdp));
175 }
176 
177 static __be32
nfsd4_block_proc_layoutcommit(struct inode * inode,struct svc_rqst * rqstp,struct nfsd4_layoutcommit * lcp)178 nfsd4_block_proc_layoutcommit(struct inode *inode, struct svc_rqst *rqstp,
179 		struct nfsd4_layoutcommit *lcp)
180 {
181 	struct iomap *iomaps;
182 	int nr_iomaps;
183 	__be32 nfserr;
184 
185 	rqstp->rq_arg = lcp->lc_up_layout;
186 	svcxdr_init_decode(rqstp);
187 
188 	nfserr = nfsd4_block_decode_layoutupdate(&rqstp->rq_arg_stream,
189 			&iomaps, &nr_iomaps, i_blocksize(inode));
190 	if (nfserr != nfs_ok)
191 		return nfserr;
192 
193 	return nfsd4_block_commit_blocks(inode, lcp, iomaps, nr_iomaps);
194 }
195 
196 const struct nfsd4_layout_ops bl_layout_ops = {
197 	/*
198 	 * Pretend that we send notification to the client.  This is a blatant
199 	 * lie to force recent Linux clients to cache our device IDs.
200 	 * We rarely ever change the device ID, so the harm of leaking deviceids
201 	 * for a while isn't too bad.  Unfortunately RFC5661 is a complete mess
202 	 * in this regard, but I filed errata 4119 for this a while ago, and
203 	 * hopefully the Linux client will eventually start caching deviceids
204 	 * without this again.
205 	 */
206 	.notify_types		=
207 			NOTIFY_DEVICEID4_DELETE | NOTIFY_DEVICEID4_CHANGE,
208 	.proc_getdeviceinfo	= nfsd4_block_proc_getdeviceinfo,
209 	.encode_getdeviceinfo	= nfsd4_block_encode_getdeviceinfo,
210 	.proc_layoutget		= nfsd4_block_proc_layoutget,
211 	.encode_layoutget	= nfsd4_block_encode_layoutget,
212 	.proc_layoutcommit	= nfsd4_block_proc_layoutcommit,
213 };
214 #endif /* CONFIG_NFSD_BLOCKLAYOUT */
215 
216 #ifdef CONFIG_NFSD_SCSILAYOUT
217 #define NFSD_MDS_PR_KEY		0x0100000000000000ULL
218 
219 /*
220  * We use the client ID as a unique key for the reservations.
221  * This allows us to easily fence a client when recalls fail.
222  */
nfsd4_scsi_pr_key(struct nfs4_client * clp)223 static u64 nfsd4_scsi_pr_key(struct nfs4_client *clp)
224 {
225 	return ((u64)clp->cl_clientid.cl_boot << 32) | clp->cl_clientid.cl_id;
226 }
227 
228 static const u8 designator_types[] = {
229 	PS_DESIGNATOR_EUI64,
230 	PS_DESIGNATOR_NAA,
231 };
232 
233 static int
nfsd4_block_get_unique_id(struct gendisk * disk,struct pnfs_block_volume * b)234 nfsd4_block_get_unique_id(struct gendisk *disk, struct pnfs_block_volume *b)
235 {
236 	int ret, i;
237 
238 	for (i = 0; i < ARRAY_SIZE(designator_types); i++) {
239 		u8 type = designator_types[i];
240 
241 		ret = disk->fops->get_unique_id(disk, b->scsi.designator, type);
242 		if (ret > 0) {
243 			b->scsi.code_set = PS_CODE_SET_BINARY;
244 			b->scsi.designator_type = type;
245 			b->scsi.designator_len = ret;
246 			return 0;
247 		}
248 	}
249 
250 	return -EINVAL;
251 }
252 
253 static int
nfsd4_block_get_device_info_scsi(struct super_block * sb,struct nfs4_client * clp,struct nfsd4_getdeviceinfo * gdp)254 nfsd4_block_get_device_info_scsi(struct super_block *sb,
255 		struct nfs4_client *clp,
256 		struct nfsd4_getdeviceinfo *gdp)
257 {
258 	struct pnfs_block_deviceaddr *dev;
259 	struct pnfs_block_volume *b;
260 	const struct pr_ops *ops;
261 	int ret;
262 
263 	dev = kzalloc(struct_size(dev, volumes, 1), GFP_KERNEL);
264 	if (!dev)
265 		return -ENOMEM;
266 	gdp->gd_device = dev;
267 
268 	dev->nr_volumes = 1;
269 	b = &dev->volumes[0];
270 
271 	b->type = PNFS_BLOCK_VOLUME_SCSI;
272 	b->scsi.pr_key = nfsd4_scsi_pr_key(clp);
273 
274 	ret = nfsd4_block_get_unique_id(sb->s_bdev->bd_disk, b);
275 	if (ret < 0)
276 		goto out_free_dev;
277 
278 	ret = -EINVAL;
279 	ops = sb->s_bdev->bd_disk->fops->pr_ops;
280 	if (!ops) {
281 		pr_err("pNFS: device %s does not support PRs.\n",
282 			sb->s_id);
283 		goto out_free_dev;
284 	}
285 
286 	ret = ops->pr_register(sb->s_bdev, 0, NFSD_MDS_PR_KEY, true);
287 	if (ret) {
288 		pr_err("pNFS: failed to register key for device %s.\n",
289 			sb->s_id);
290 		goto out_free_dev;
291 	}
292 
293 	ret = ops->pr_reserve(sb->s_bdev, NFSD_MDS_PR_KEY,
294 			PR_EXCLUSIVE_ACCESS_REG_ONLY, 0);
295 	if (ret) {
296 		pr_err("pNFS: failed to reserve device %s.\n",
297 			sb->s_id);
298 		goto out_free_dev;
299 	}
300 
301 	return 0;
302 
303 out_free_dev:
304 	kfree(dev);
305 	gdp->gd_device = NULL;
306 	return ret;
307 }
308 
309 static __be32
nfsd4_scsi_proc_getdeviceinfo(struct super_block * sb,struct svc_rqst * rqstp,struct nfs4_client * clp,struct nfsd4_getdeviceinfo * gdp)310 nfsd4_scsi_proc_getdeviceinfo(struct super_block *sb,
311 		struct svc_rqst *rqstp,
312 		struct nfs4_client *clp,
313 		struct nfsd4_getdeviceinfo *gdp)
314 {
315 	if (bdev_is_partition(sb->s_bdev))
316 		return nfserr_inval;
317 	return nfserrno(nfsd4_block_get_device_info_scsi(sb, clp, gdp));
318 }
319 static __be32
nfsd4_scsi_proc_layoutcommit(struct inode * inode,struct svc_rqst * rqstp,struct nfsd4_layoutcommit * lcp)320 nfsd4_scsi_proc_layoutcommit(struct inode *inode, struct svc_rqst *rqstp,
321 		struct nfsd4_layoutcommit *lcp)
322 {
323 	struct iomap *iomaps;
324 	int nr_iomaps;
325 	__be32 nfserr;
326 
327 	rqstp->rq_arg = lcp->lc_up_layout;
328 	svcxdr_init_decode(rqstp);
329 
330 	nfserr = nfsd4_scsi_decode_layoutupdate(&rqstp->rq_arg_stream,
331 			&iomaps, &nr_iomaps, i_blocksize(inode));
332 	if (nfserr != nfs_ok)
333 		return nfserr;
334 
335 	return nfsd4_block_commit_blocks(inode, lcp, iomaps, nr_iomaps);
336 }
337 
338 static void
nfsd4_scsi_fence_client(struct nfs4_layout_stateid * ls,struct nfsd_file * file)339 nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls, struct nfsd_file *file)
340 {
341 	struct nfs4_client *clp = ls->ls_stid.sc_client;
342 	struct block_device *bdev = file->nf_file->f_path.mnt->mnt_sb->s_bdev;
343 
344 	bdev->bd_disk->fops->pr_ops->pr_preempt(bdev, NFSD_MDS_PR_KEY,
345 			nfsd4_scsi_pr_key(clp), 0, true);
346 }
347 
348 const struct nfsd4_layout_ops scsi_layout_ops = {
349 	/*
350 	 * Pretend that we send notification to the client.  This is a blatant
351 	 * lie to force recent Linux clients to cache our device IDs.
352 	 * We rarely ever change the device ID, so the harm of leaking deviceids
353 	 * for a while isn't too bad.  Unfortunately RFC5661 is a complete mess
354 	 * in this regard, but I filed errata 4119 for this a while ago, and
355 	 * hopefully the Linux client will eventually start caching deviceids
356 	 * without this again.
357 	 */
358 	.notify_types		=
359 			NOTIFY_DEVICEID4_DELETE | NOTIFY_DEVICEID4_CHANGE,
360 	.proc_getdeviceinfo	= nfsd4_scsi_proc_getdeviceinfo,
361 	.encode_getdeviceinfo	= nfsd4_block_encode_getdeviceinfo,
362 	.proc_layoutget		= nfsd4_block_proc_layoutget,
363 	.encode_layoutget	= nfsd4_block_encode_layoutget,
364 	.proc_layoutcommit	= nfsd4_scsi_proc_layoutcommit,
365 	.fence_client		= nfsd4_scsi_fence_client,
366 };
367 #endif /* CONFIG_NFSD_SCSILAYOUT */
368