xref: /titanic_41/usr/src/uts/common/avs/ns/nsctl/nsc_disk.c (revision fcf3ce441efd61da9bb2884968af01cb7c1452cc)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/ksynch.h>
28 #include <sys/errno.h>
29 #include <sys/file.h>
30 #include <sys/open.h>
31 #include <sys/cred.h>
32 #include <sys/kmem.h>
33 #include <sys/uio.h>
34 #include <sys/ddi.h>
35 #include <sys/sdt.h>
36 
37 #define	__NSC_GEN__
38 #include "nsc_dev.h"
39 #include "nsc_disk.h"
40 #include "../nsctl.h"
41 
42 
43 #define	_I(x)	(((long)(&((nsc_io_t *)0)->x))/sizeof (long))
44 
45 nsc_def_t _nsc_disk_def[] = {
46 	"UserRead",	(uintptr_t)nsc_ioerr,	_I(uread),
47 	"UserWrite",	(uintptr_t)nsc_ioerr,	_I(uwrite),
48 	"PartSize",	(uintptr_t)nsc_null,	_I(partsize),
49 	"MaxFbas",	(uintptr_t)nsc_null,	_I(maxfbas),
50 	"Control",	(uintptr_t)nsc_ioerr,	_I(control),
51 	0,		0,		0
52 };
53 
54 
55 extern nsc_mem_t *_nsc_local_mem;
56 
57 static int _nsc_uread(dev_t, uio_t *, cred_t *, nsc_fd_t *);
58 static int _nsc_uwrite(dev_t, uio_t *, cred_t *, nsc_fd_t *);
59 static int _nsc_rw_uio(nsc_fd_t *, uio_t *, uio_rw_t);
60 
61 static int _nsc_free_dhandle(nsc_dbuf_t *);
62 static int _nsc_alloc_dbuf(blind_t, nsc_off_t, nsc_size_t, int, nsc_dbuf_t **);
63 static int _nsc_free_dbuf(nsc_dbuf_t *);
64 static void _nsc_wait_dbuf(nsc_dbuf_t *);
65 static int _nsc_read_dbuf(nsc_dbuf_t *, nsc_off_t, nsc_size_t, int);
66 static int _nsc_write_dbuf(nsc_dbuf_t *, nsc_off_t, nsc_size_t, int);
67 static int _nsc_zero_dbuf(nsc_dbuf_t *, nsc_off_t, nsc_size_t, int);
68 static int _nsc_dbuf_io(int (*)(), nsc_dbuf_t *, nsc_off_t, nsc_size_t, int);
69 
70 static nsc_dbuf_t *_nsc_alloc_dhandle(void (*)(), void (*)(), void (*)());
71 
72 
73 /*
74  * void
75  * _nsc_add_disk (nsc_io_t *io)
76  *	Add disk interface functions.
77  *
78  * Calling/Exit State:
79  *	Updates the I/O module with the appropriate
80  *	interface routines.
81  *
82  * Description:
83  *	Add functions to the I/O module to provide a disk
84  *	or cache interface as appropriate.
85  */
86 void
_nsc_add_disk(nsc_io_t * io)87 _nsc_add_disk(nsc_io_t *io)
88 {
89 	if ((io->alloc_buf != nsc_ioerr && io->free_buf != nsc_fatal) ||
90 	    (io->flag & NSC_FILTER)) {
91 		if (io->uread == nsc_ioerr)
92 			io->uread = _nsc_uread;
93 
94 		if (io->uwrite == nsc_ioerr &&
95 		    (io->write != nsc_fatal || (io->flag & NSC_FILTER)))
96 			io->uwrite = _nsc_uwrite;
97 
98 		return;
99 	}
100 
101 	if (io->alloc_h != (nsc_buf_t *(*)())nsc_null ||
102 	    io->free_h != nsc_fatal || io->alloc_buf != nsc_ioerr ||
103 	    io->free_buf != nsc_fatal || io->read != nsc_fatal ||
104 	    io->write != nsc_fatal || io->zero != nsc_fatal)
105 		return;
106 
107 	if (io->uread == nsc_ioerr && io->uwrite == nsc_ioerr)
108 		return;
109 
110 	/*
111 	 * Layer the generic nsc_buf_t provider onto a uio_t provider.
112 	 */
113 
114 	io->alloc_h = (nsc_buf_t *(*)())_nsc_alloc_dhandle;
115 	io->free_h = _nsc_free_dhandle;
116 	io->alloc_buf = _nsc_alloc_dbuf;
117 	io->free_buf = _nsc_free_dbuf;
118 
119 	io->read = _nsc_read_dbuf;
120 	io->write = _nsc_write_dbuf;
121 	io->zero = _nsc_zero_dbuf;
122 
123 	io->provide |= NSC_ANON;
124 }
125 
126 
127 int
nsc_uread(nsc_fd_t * fd,void * uiop,void * crp)128 nsc_uread(nsc_fd_t *fd, void *uiop, void *crp)
129 {
130 	return (*fd->sf_aio->uread)(fd->sf_cd, uiop, crp, fd);
131 }
132 
133 
134 int
nsc_uwrite(nsc_fd_t * fd,void * uiop,void * crp)135 nsc_uwrite(nsc_fd_t *fd, void *uiop, void *crp)
136 {
137 	if ((fd->sf_avail & NSC_WRITE) == 0)
138 		return (EIO);
139 
140 	return (*fd->sf_aio->uwrite)(fd->sf_cd, uiop, crp, fd);
141 }
142 
143 
144 int
nsc_partsize(nsc_fd_t * fd,nsc_size_t * valp)145 nsc_partsize(nsc_fd_t *fd, nsc_size_t *valp)
146 {
147 	*valp = 0;
148 	return (*fd->sf_aio->partsize)(fd->sf_cd, valp);
149 }
150 
151 
152 int
nsc_maxfbas(nsc_fd_t * fd,int flag,nsc_size_t * valp)153 nsc_maxfbas(nsc_fd_t *fd, int flag, nsc_size_t *valp)
154 {
155 	*valp = 0;
156 	return (*fd->sf_aio->maxfbas)(fd->sf_cd, flag, valp);
157 }
158 
159 int
nsc_control(nsc_fd_t * fd,int command,void * argp,int argl)160 nsc_control(nsc_fd_t *fd, int command, void *argp, int argl)
161 {
162 	return (*fd->sf_aio->control)(fd->sf_cd, command, argp, argl);
163 }
164 
165 
166 /* ARGSUSED */
167 
168 static int
_nsc_uread(dev_t dev,uio_t * uiop,cred_t * crp,nsc_fd_t * fd)169 _nsc_uread(dev_t dev, uio_t *uiop, cred_t *crp, nsc_fd_t *fd)
170 {
171 	return (_nsc_rw_uio(fd, uiop, UIO_READ));
172 }
173 
174 
175 /* ARGSUSED */
176 
177 static int
_nsc_uwrite(dev_t dev,uio_t * uiop,cred_t * crp,nsc_fd_t * fd)178 _nsc_uwrite(dev_t dev, uio_t *uiop, cred_t *crp, nsc_fd_t *fd)
179 {
180 	return (_nsc_rw_uio(fd, uiop, UIO_WRITE));
181 }
182 
183 
184 static int
_nsc_rw_uio(nsc_fd_t * fd,uio_t * uiop,uio_rw_t rw)185 _nsc_rw_uio(nsc_fd_t *fd, uio_t *uiop, uio_rw_t rw)
186 {
187 	nsc_size_t buflen, len, limit, chunk;
188 	nsc_off_t pos, off;
189 	nsc_buf_t *buf;
190 	nsc_vec_t *vec;
191 	size_t n;
192 	int rc;
193 
194 	pos = FPOS_TO_FBA(uiop);
195 	off = FPOS_TO_OFF(uiop);
196 	len = FBA_LEN(uiop->uio_resid + off);
197 
198 	DTRACE_PROBE3(_nsc_rw_uio_io,
199 		uint64_t, pos,
200 		uint64_t, off,
201 		uint64_t, len);
202 
203 	/* prevent non-FBA bounded I/O - this is a disk driver! */
204 	if (off != 0 || FBA_OFF(uiop->uio_resid) != 0)
205 		return (EINVAL);
206 
207 	if ((rc = nsc_partsize(fd, &limit)) != 0)
208 		return (rc);
209 
210 	if ((rc = nsc_maxfbas(fd, 0, &chunk)) != 0)
211 		return (rc);
212 
213 	DTRACE_PROBE2(_nsc_rw_uio_limit,
214 		uint64_t, limit,
215 		uint64_t, chunk);
216 
217 	if (limit && pos >= limit) {
218 		if (pos > limit || rw == UIO_WRITE)
219 			return (ENXIO);
220 		return (0);
221 	}
222 
223 	if (limit && pos + len > limit)
224 		len = limit - pos;
225 
226 	while (len > 0) {
227 		buflen = min(len, chunk);
228 
229 		buf = NULL;	/* always use a temporary buffer */
230 		if ((rc = nsc_alloc_buf(fd, pos, buflen,
231 		    (rw == UIO_READ) ? NSC_RDBUF : NSC_WRBUF, &buf)) > 0)
232 			return (rc);
233 
234 		vec = buf->sb_vec;
235 
236 		for (rc = 0;
237 		    !rc && uiop->uio_resid && vec->sv_addr;
238 		    vec++, off = 0) {
239 			n = min(vec->sv_len - off, uiop->uio_resid);
240 			rc = uiomove((char *)vec->sv_addr + off,
241 			    n, rw, uiop);
242 		}
243 
244 		if (rw == UIO_WRITE) {
245 			if (rc) {
246 				(void) nsc_uncommit(buf, pos, buflen, 0);
247 			} else if ((rc = nsc_write(buf, pos, buflen, 0)) < 0) {
248 				rc = 0;
249 			}
250 		}
251 
252 		(void) nsc_free_buf(buf);
253 
254 		len -= buflen;
255 		pos += buflen;
256 	}
257 
258 	return (rc);
259 }
260 
261 
262 /* ARGSUSED */
263 
264 static nsc_dbuf_t *
_nsc_alloc_dhandle(void (* d_cb)(),void (* r_cb)(),void (* w_cb)())265 _nsc_alloc_dhandle(void (*d_cb)(), void (*r_cb)(), void (*w_cb)())
266 {
267 	nsc_dbuf_t *h;
268 
269 	if ((h = nsc_kmem_zalloc(sizeof (nsc_dbuf_t),
270 			KM_SLEEP, _nsc_local_mem)) == NULL)
271 		return (NULL);
272 
273 	h->db_disc = d_cb;
274 	h->db_flag = NSC_HALLOCATED;
275 
276 	return (h);
277 }
278 
279 
280 static int
_nsc_free_dhandle(nsc_dbuf_t * h)281 _nsc_free_dhandle(nsc_dbuf_t *h)
282 {
283 	nsc_kmem_free(h, sizeof (*h));
284 	return (0);
285 }
286 
287 
288 static int
_nsc_alloc_dbuf(blind_t cd,nsc_off_t pos,nsc_size_t len,int flag,nsc_dbuf_t ** hp)289 _nsc_alloc_dbuf(blind_t cd, nsc_off_t pos, nsc_size_t len,
290     int flag, nsc_dbuf_t **hp)
291 {
292 	nsc_dbuf_t *h = *hp;
293 	int rc;
294 
295 	if (cd == NSC_ANON_CD) {
296 		flag &= ~(NSC_READ | NSC_WRITE | NSC_RDAHEAD);
297 	} else {
298 		if (h->db_maxfbas == 0) {
299 			rc = nsc_maxfbas(h->db_fd, 0, &h->db_maxfbas);
300 			if (rc != 0)
301 				return (rc);
302 			else if (h->db_maxfbas == 0)
303 				return (EIO);
304 		}
305 
306 		if (len > h->db_maxfbas)
307 			return (ENOSPC);
308 	}
309 
310 	if (flag & NSC_NODATA) {
311 		ASSERT(!(flag & NSC_RDBUF));
312 		h->db_addr = NULL;
313 	} else {
314 		if (h->db_disc)
315 			(*h->db_disc)(h);
316 
317 		if (!(h->db_addr = nsc_kmem_alloc(FBA_SIZE(len), KM_SLEEP, 0)))
318 			return (ENOMEM);
319 	}
320 
321 	h->db_pos = pos;
322 	h->db_len = len;
323 	h->db_error = 0;
324 	h->db_flag |= flag;
325 
326 	if (flag & NSC_NODATA) {
327 		h->db_vec = NULL;
328 	} else {
329 		h->db_vec = &h->db_bvec[0];
330 		h->db_bvec[0].sv_len = FBA_SIZE(len);
331 		h->db_bvec[0].sv_addr = (void *)h->db_addr;
332 		h->db_bvec[0].sv_vme = 0;
333 
334 		h->db_bvec[1].sv_len = 0;
335 		h->db_bvec[1].sv_addr = 0;
336 		h->db_bvec[1].sv_vme = 0;
337 	}
338 
339 	if ((flag & NSC_RDAHEAD) || (cd == NSC_ANON_CD))
340 		return (NSC_DONE);
341 
342 	_nsc_wait_dbuf(h);
343 
344 	if (flag & NSC_RDBUF) {
345 		if ((rc = _nsc_dbuf_io(nsc_uread, h, pos, len, flag)) != 0) {
346 			(void) _nsc_free_dbuf(h);
347 			return (rc);
348 		}
349 	}
350 
351 	return (NSC_DONE);
352 }
353 
354 
355 static void
_nsc_wait_dbuf(nsc_dbuf_t * h)356 _nsc_wait_dbuf(nsc_dbuf_t *h)
357 {
358 	nsc_iodev_t *iodev = h->db_fd->sf_iodev;
359 	void (*fn)() = h->db_disc;
360 	nsc_dbuf_t *hp;
361 
362 	mutex_enter(&iodev->si_lock);
363 
364 	h->db_next = iodev->si_active;
365 	iodev->si_active = h;
366 
367 	/* CONSTCOND */
368 
369 	while (1) {
370 		for (hp = h->db_next; hp; hp = hp->db_next)
371 			if (h->db_pos + h->db_len > hp->db_pos &&
372 			    h->db_pos < hp->db_pos + hp->db_len) break;
373 
374 		if (!hp)
375 			break;
376 
377 		if (fn)
378 			(*fn)(h), fn = NULL;
379 
380 		cv_wait(&iodev->si_cv, &iodev->si_lock);
381 	}
382 
383 	mutex_exit(&iodev->si_lock);
384 }
385 
386 
387 static int
_nsc_free_dbuf(nsc_dbuf_t * h)388 _nsc_free_dbuf(nsc_dbuf_t *h)
389 {
390 	nsc_dbuf_t **hpp, *hp;
391 	nsc_iodev_t *iodev;
392 	int wake = 0;
393 
394 	if (h->db_fd && !(h->db_flag & NSC_ABUF)) {
395 		iodev = h->db_fd->sf_iodev;
396 
397 		mutex_enter(&iodev->si_lock);
398 
399 		hpp = (nsc_dbuf_t **)&iodev->si_active;
400 
401 		for (; *hpp; hpp = &hp->db_next) {
402 			if ((hp = *hpp) == h) {
403 				*hpp = h->db_next;
404 				break;
405 			}
406 
407 			if (h->db_pos + h->db_len > hp->db_pos &&
408 			    h->db_pos < hp->db_pos + hp->db_len) wake = 1;
409 
410 		}
411 		if (wake)
412 			cv_broadcast(&iodev->si_cv);
413 
414 		mutex_exit(&iodev->si_lock);
415 	}
416 
417 	if (!(h->db_flag & NSC_NODATA) && h->db_addr)
418 		nsc_kmem_free(h->db_addr, FBA_SIZE(h->db_len));
419 
420 	h->db_addr = NULL;
421 	h->db_flag &= NSC_HALLOCATED; /* clear flags, preserve NSC_HALLOCATED */
422 
423 	if ((h->db_flag & NSC_HALLOCATED) == 0)
424 		(void) _nsc_free_dhandle(h);
425 
426 
427 	return (0);
428 }
429 
430 
431 static int
_nsc_read_dbuf(nsc_dbuf_t * h,nsc_off_t pos,nsc_size_t len,int flag)432 _nsc_read_dbuf(nsc_dbuf_t *h, nsc_off_t pos, nsc_size_t len, int flag)
433 {
434 	return (_nsc_dbuf_io(nsc_uread, h, pos, len, flag));
435 }
436 
437 
438 static int
_nsc_write_dbuf(nsc_dbuf_t * h,nsc_off_t pos,nsc_size_t len,int flag)439 _nsc_write_dbuf(nsc_dbuf_t *h, nsc_off_t pos, nsc_size_t len, int flag)
440 {
441 	return (_nsc_dbuf_io(nsc_uwrite, h, pos, len, flag));
442 }
443 
444 
445 static int
_nsc_zero_dbuf(nsc_dbuf_t * h,nsc_off_t pos,nsc_size_t len,int flag)446 _nsc_zero_dbuf(nsc_dbuf_t *h, nsc_off_t pos, nsc_size_t len, int flag)
447 {
448 	return (_nsc_dbuf_io(NULL, h, pos, len, flag));
449 }
450 
451 
452 static int
_nsc_dbuf_io(int (* fn)(),nsc_dbuf_t * h,nsc_off_t pos,nsc_size_t len,int flag)453 _nsc_dbuf_io(int (*fn)(), nsc_dbuf_t *h, nsc_off_t pos,
454     nsc_size_t len, int flag)
455 {
456 	nsc_vec_t *vp = NULL;
457 	cred_t *crp = NULL;
458 	iovec_t *iovp;
459 	nsc_size_t thisio;		/* bytes in this io */
460 	nsc_size_t todo;		/* anticipated bytes to go */
461 	nsc_size_t truedo;		/* actual bytes to go */
462 	nsc_off_t xpos;			/* offset of this io */
463 	int destidx;
464 	nsc_size_t firstentryfix;	/* value used for first entry */
465 
466 	int (*iofn)();
467 	int rc = 0;
468 
469 	if (!h->db_vec || (h->db_flag & NSC_ABUF))
470 		return (EIO);
471 
472 	if (pos < h->db_pos || pos + len > h->db_pos + h->db_len)
473 		return (EINVAL);
474 
475 	if (!len)
476 		return (0);
477 	if (fn == nsc_uread && (flag & NSC_RDAHEAD))
478 		return (0);
479 
480 	if (h->db_disc)
481 		(*h->db_disc)(h);
482 
483 	crp = ddi_get_cred();
484 	bzero(&h->db_uio, sizeof (uio_t));
485 	bzero(&h->db_iov[0], (_NSC_DBUF_NVEC * sizeof (iovec_t)));
486 
487 	todo = FBA_SIZE(len);
488 
489 	/*
490 	 * determine where in the vector array we should start.
491 	 */
492 	vp = h->db_vec;
493 	xpos = pos - h->db_pos;
494 	for (; xpos >= FBA_NUM(vp->sv_len); vp++)
495 		xpos -= FBA_NUM(vp->sv_len);
496 
497 	firstentryfix = FBA_SIZE(xpos);
498 
499 	xpos = pos;
500 
501 	/*
502 	 * Loop performing i/o to the underlying driver.
503 	 */
504 	while (todo) {
505 		destidx = 0;
506 		thisio = 0;
507 		iofn = fn;
508 
509 		/*
510 		 * Copy up to _NSC_DBUF_NVEC vector entries from the
511 		 * nsc_vec_t into the iovec_t so that the number of
512 		 * i/o operations is minimised.
513 		 */
514 		while (destidx < _NSC_DBUF_NVEC && todo) {
515 			iovp = &h->db_iov[destidx];
516 
517 			ASSERT(FBA_LEN(vp->sv_len) == FBA_NUM(vp->sv_len));
518 			ASSERT((vp->sv_len - firstentryfix) && vp->sv_addr);
519 
520 			truedo = min(vp->sv_len - firstentryfix, todo);
521 			iovp->iov_base = (caddr_t)vp->sv_addr + firstentryfix;
522 			firstentryfix = 0;
523 			iovp->iov_len = (size_t)truedo;
524 			if (!iofn) {
525 				bzero(iovp->iov_base, iovp->iov_len);
526 			}
527 			thisio += truedo;
528 			todo -= truedo;
529 			destidx++;
530 			vp++;
531 		}
532 
533 		h->db_uio.uio_iovcnt = destidx;
534 		h->db_uio.uio_iov = &h->db_iov[0];
535 		h->db_uio.uio_segflg = UIO_SYSSPACE;
536 		h->db_uio.uio_resid = (size_t)thisio;
537 
538 		SET_FPOS(&h->db_uio, xpos);
539 
540 		if (!iofn) {
541 			iofn = nsc_uwrite;
542 		}
543 
544 		rc = (*iofn)(h->db_fd, &h->db_uio, crp);
545 		if (rc != 0) {
546 			break;
547 		}
548 
549 		ASSERT(FBA_LEN(thisio) == FBA_NUM(thisio));
550 		xpos += FBA_LEN(thisio);
551 	}
552 
553 	return (rc);
554 }
555