xref: /illumos-gate/usr/src/uts/common/fs/zfs/vdev_file.c (revision da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/zfs_context.h>
29 #include <sys/spa.h>
30 #include <sys/vdev_file.h>
31 #include <sys/vdev_impl.h>
32 #include <sys/zio.h>
33 #include <sys/fs/zfs.h>
34 
35 /*
36  * Virtual device vector for files.
37  */
38 
39 static int
40 vdev_file_open_common(vdev_t *vd)
41 {
42 	vdev_file_t *vf;
43 	vnode_t *vp;
44 	int error;
45 
46 	/*
47 	 * We must have a pathname, and it must be absolute.
48 	 */
49 	if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
50 		vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
51 		return (EINVAL);
52 	}
53 
54 	vf = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_file_t), KM_SLEEP);
55 
56 	/*
57 	 * We always open the files from the root of the global zone, even if
58 	 * we're in a local zone.  If the user has gotten to this point, the
59 	 * administrator has already decided that the pool should be available
60 	 * to local zone users, so the underlying devices should be as well.
61 	 */
62 	ASSERT(vd->vdev_path != NULL && vd->vdev_path[0] == '/');
63 	error = vn_openat(vd->vdev_path + 1, UIO_SYSSPACE,
64 	    spa_mode | FOFFMAX, 0, &vp, 0, 0, rootdir, -1);
65 
66 	if (error) {
67 		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
68 		return (error);
69 	}
70 
71 	vf->vf_vnode = vp;
72 
73 #ifdef _KERNEL
74 	/*
75 	 * Make sure it's a regular file.
76 	 */
77 	if (vp->v_type != VREG) {
78 		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
79 		return (ENODEV);
80 	}
81 #endif
82 
83 	return (0);
84 }
85 
86 static int
87 vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift)
88 {
89 	vdev_file_t *vf;
90 	vattr_t vattr;
91 	int error;
92 
93 	if ((error = vdev_file_open_common(vd)) != 0)
94 		return (error);
95 
96 	vf = vd->vdev_tsd;
97 
98 	/*
99 	 * Determine the physical size of the file.
100 	 */
101 	vattr.va_mask = AT_SIZE;
102 	error = VOP_GETATTR(vf->vf_vnode, &vattr, 0, kcred, NULL);
103 	if (error) {
104 		vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
105 		return (error);
106 	}
107 
108 	*psize = vattr.va_size;
109 	*ashift = SPA_MINBLOCKSHIFT;
110 
111 	return (0);
112 }
113 
114 static void
115 vdev_file_close(vdev_t *vd)
116 {
117 	vdev_file_t *vf = vd->vdev_tsd;
118 
119 	if (vf == NULL)
120 		return;
121 
122 	if (vf->vf_vnode != NULL) {
123 		(void) VOP_PUTPAGE(vf->vf_vnode, 0, 0, B_INVAL, kcred, NULL);
124 		(void) VOP_CLOSE(vf->vf_vnode, spa_mode, 1, 0, kcred, NULL);
125 		VN_RELE(vf->vf_vnode);
126 	}
127 
128 	kmem_free(vf, sizeof (vdev_file_t));
129 	vd->vdev_tsd = NULL;
130 }
131 
132 static int
133 vdev_file_probe_io(vdev_t *vd, caddr_t data, size_t size, uint64_t offset,
134     enum uio_rw rw)
135 {
136 	vdev_file_t *vf = vd->vdev_tsd;
137 	ssize_t resid;
138 	int error = 0;
139 
140 	if (vd == NULL || vf == NULL || vf->vf_vnode == NULL)
141 		return (EINVAL);
142 
143 	ASSERT(rw == UIO_READ || rw ==  UIO_WRITE);
144 
145 	error = vn_rdwr(rw, vf->vf_vnode, data, size, offset, UIO_SYSSPACE,
146 	    0, RLIM64_INFINITY, kcred, &resid);
147 	if (error || resid != 0)
148 		return (EIO);
149 	return (0);
150 }
151 
152 static int
153 vdev_file_probe(vdev_t *vd)
154 {
155 	vdev_t *nvd;
156 	char *vl_boot;
157 	uint64_t offset;
158 	int l, error = 0, retries = 0;
159 
160 	if (vd == NULL)
161 		return (EINVAL);
162 
163 	/* Hijack the current vdev */
164 	nvd = vd;
165 
166 	/*
167 	 * Pick a random label to rewrite.
168 	 */
169 	l = spa_get_random(VDEV_LABELS);
170 	ASSERT(l < VDEV_LABELS);
171 
172 	offset = vdev_label_offset(vd->vdev_psize, l,
173 	    offsetof(vdev_label_t, vl_boot_header));
174 
175 	vl_boot = kmem_alloc(VDEV_BOOT_HEADER_SIZE, KM_SLEEP);
176 
177 	while ((error = vdev_file_probe_io(nvd, vl_boot, VDEV_BOOT_HEADER_SIZE,
178 	    offset, UIO_READ)) != 0 && retries == 0) {
179 
180 		/*
181 		 * If we failed with the vdev that was passed in then
182 		 * try allocating a new one and try again.
183 		 */
184 		nvd = kmem_zalloc(sizeof (vdev_t), KM_SLEEP);
185 		if (vd->vdev_path)
186 			nvd->vdev_path = spa_strdup(vd->vdev_path);
187 		error = vdev_file_open_common(nvd);
188 		if (error) {
189 			vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
190 			    nvd->vdev_stat.vs_aux);
191 			break;
192 		}
193 		retries++;
194 	}
195 
196 	if ((spa_mode & FWRITE) && !error) {
197 		error = vdev_file_probe_io(nvd, vl_boot, VDEV_BOOT_HEADER_SIZE,
198 		    offset, UIO_WRITE);
199 	}
200 
201 	if (retries) {
202 		vdev_file_close(nvd);
203 		if (nvd->vdev_path)
204 			spa_strfree(nvd->vdev_path);
205 		kmem_free(nvd, sizeof (vdev_t));
206 	}
207 	kmem_free(vl_boot, VDEV_BOOT_HEADER_SIZE);
208 
209 	if (!error)
210 		vd->vdev_is_failing = B_FALSE;
211 
212 	return (error);
213 }
214 
215 static void
216 vdev_file_io_start(zio_t *zio)
217 {
218 	vdev_t *vd = zio->io_vd;
219 	vdev_file_t *vf = vd->vdev_tsd;
220 	ssize_t resid;
221 	int error;
222 
223 	if (zio->io_type == ZIO_TYPE_IOCTL) {
224 		zio_vdev_io_bypass(zio);
225 
226 		/* XXPOLICY */
227 		if (!vdev_readable(vd)) {
228 			zio->io_error = ENXIO;
229 			zio_next_stage_async(zio);
230 			return;
231 		}
232 
233 		switch (zio->io_cmd) {
234 		case DKIOCFLUSHWRITECACHE:
235 			zio->io_error = VOP_FSYNC(vf->vf_vnode, FSYNC | FDSYNC,
236 			    kcred, NULL);
237 			dprintf("fsync(%s) = %d\n", vdev_description(vd),
238 			    zio->io_error);
239 			break;
240 		default:
241 			zio->io_error = ENOTSUP;
242 		}
243 
244 		zio_next_stage_async(zio);
245 		return;
246 	}
247 
248 	/*
249 	 * In the kernel, don't bother double-caching, but in userland,
250 	 * we want to test the vdev_cache code.
251 	 */
252 #ifndef _KERNEL
253 	if (zio->io_type == ZIO_TYPE_READ && vdev_cache_read(zio) == 0)
254 		return;
255 #endif
256 
257 	if ((zio = vdev_queue_io(zio)) == NULL)
258 		return;
259 
260 	/* XXPOLICY */
261 	if (zio->io_type == ZIO_TYPE_WRITE)
262 		error = vdev_writeable(vd) ? vdev_error_inject(vd, zio) : ENXIO;
263 	else
264 		error = vdev_readable(vd) ? vdev_error_inject(vd, zio) : ENXIO;
265 	error = (vd->vdev_remove_wanted || vd->vdev_is_failing) ? ENXIO : error;
266 	if (error) {
267 		zio->io_error = error;
268 		zio_next_stage_async(zio);
269 		return;
270 	}
271 
272 	zio->io_error = vn_rdwr(zio->io_type == ZIO_TYPE_READ ?
273 	    UIO_READ : UIO_WRITE, vf->vf_vnode, zio->io_data,
274 	    zio->io_size, zio->io_offset, UIO_SYSSPACE,
275 	    0, RLIM64_INFINITY, kcred, &resid);
276 
277 	if (resid != 0 && zio->io_error == 0)
278 		zio->io_error = ENOSPC;
279 
280 	zio_next_stage_async(zio);
281 }
282 
283 static void
284 vdev_file_io_done(zio_t *zio)
285 {
286 
287 	if (zio_injection_enabled && zio->io_error == 0)
288 		zio->io_error = zio_handle_device_injection(zio->io_vd, EIO);
289 
290 	/*
291 	 * If this device is truely gone, then attempt to remove it
292 	 * from the configuration.
293 	 */
294 	if (zio->io_error == EIO) {
295 		vdev_t *vd = zio->io_vd;
296 
297 		if (vdev_probe(vd) != 0)
298 			vd->vdev_is_failing = B_TRUE;
299 	}
300 
301 	vdev_queue_io_done(zio);
302 
303 #ifndef _KERNEL
304 	if (zio->io_type == ZIO_TYPE_WRITE)
305 		vdev_cache_write(zio);
306 #endif
307 
308 	zio_next_stage(zio);
309 }
310 
311 vdev_ops_t vdev_file_ops = {
312 	vdev_file_open,
313 	vdev_file_close,
314 	vdev_file_probe,
315 	vdev_default_asize,
316 	vdev_file_io_start,
317 	vdev_file_io_done,
318 	NULL,
319 	VDEV_TYPE_FILE,		/* name of this vdev type */
320 	B_TRUE			/* leaf vdev */
321 };
322 
323 /*
324  * From userland we access disks just like files.
325  */
326 #ifndef _KERNEL
327 
328 vdev_ops_t vdev_disk_ops = {
329 	vdev_file_open,
330 	vdev_file_close,
331 	vdev_file_probe,
332 	vdev_default_asize,
333 	vdev_file_io_start,
334 	vdev_file_io_done,
335 	NULL,
336 	VDEV_TYPE_DISK,		/* name of this vdev type */
337 	B_TRUE			/* leaf vdev */
338 };
339 
340 #endif
341