1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #include <sys/types.h>
27 #include <sys/ksynch.h>
28 #include <sys/errno.h>
29 #include <sys/file.h>
30 #include <sys/open.h>
31 #include <sys/cred.h>
32 #include <sys/kmem.h>
33 #include <sys/uio.h>
34 #include <sys/ddi.h>
35 #include <sys/sdt.h>
36
37 #define __NSC_GEN__
38 #include "nsc_dev.h"
39 #include "nsc_disk.h"
40 #include "../nsctl.h"
41
42
43 #define _I(x) (((long)(&((nsc_io_t *)0)->x))/sizeof (long))
44
45 nsc_def_t _nsc_disk_def[] = {
46 "UserRead", (uintptr_t)nsc_ioerr, _I(uread),
47 "UserWrite", (uintptr_t)nsc_ioerr, _I(uwrite),
48 "PartSize", (uintptr_t)nsc_null, _I(partsize),
49 "MaxFbas", (uintptr_t)nsc_null, _I(maxfbas),
50 "Control", (uintptr_t)nsc_ioerr, _I(control),
51 0, 0, 0
52 };
53
54
55 extern nsc_mem_t *_nsc_local_mem;
56
57 static int _nsc_uread(dev_t, uio_t *, cred_t *, nsc_fd_t *);
58 static int _nsc_uwrite(dev_t, uio_t *, cred_t *, nsc_fd_t *);
59 static int _nsc_rw_uio(nsc_fd_t *, uio_t *, uio_rw_t);
60
61 static int _nsc_free_dhandle(nsc_dbuf_t *);
62 static int _nsc_alloc_dbuf(blind_t, nsc_off_t, nsc_size_t, int, nsc_dbuf_t **);
63 static int _nsc_free_dbuf(nsc_dbuf_t *);
64 static void _nsc_wait_dbuf(nsc_dbuf_t *);
65 static int _nsc_read_dbuf(nsc_dbuf_t *, nsc_off_t, nsc_size_t, int);
66 static int _nsc_write_dbuf(nsc_dbuf_t *, nsc_off_t, nsc_size_t, int);
67 static int _nsc_zero_dbuf(nsc_dbuf_t *, nsc_off_t, nsc_size_t, int);
68 static int _nsc_dbuf_io(int (*)(), nsc_dbuf_t *, nsc_off_t, nsc_size_t, int);
69
70 static nsc_dbuf_t *_nsc_alloc_dhandle(void (*)(), void (*)(), void (*)());
71
72
73 /*
74 * void
75 * _nsc_add_disk (nsc_io_t *io)
76 * Add disk interface functions.
77 *
78 * Calling/Exit State:
79 * Updates the I/O module with the appropriate
80 * interface routines.
81 *
82 * Description:
83 * Add functions to the I/O module to provide a disk
84 * or cache interface as appropriate.
85 */
86 void
_nsc_add_disk(nsc_io_t * io)87 _nsc_add_disk(nsc_io_t *io)
88 {
89 if ((io->alloc_buf != nsc_ioerr && io->free_buf != nsc_fatal) ||
90 (io->flag & NSC_FILTER)) {
91 if (io->uread == nsc_ioerr)
92 io->uread = _nsc_uread;
93
94 if (io->uwrite == nsc_ioerr &&
95 (io->write != nsc_fatal || (io->flag & NSC_FILTER)))
96 io->uwrite = _nsc_uwrite;
97
98 return;
99 }
100
101 if (io->alloc_h != (nsc_buf_t *(*)())nsc_null ||
102 io->free_h != nsc_fatal || io->alloc_buf != nsc_ioerr ||
103 io->free_buf != nsc_fatal || io->read != nsc_fatal ||
104 io->write != nsc_fatal || io->zero != nsc_fatal)
105 return;
106
107 if (io->uread == nsc_ioerr && io->uwrite == nsc_ioerr)
108 return;
109
110 /*
111 * Layer the generic nsc_buf_t provider onto a uio_t provider.
112 */
113
114 io->alloc_h = (nsc_buf_t *(*)())_nsc_alloc_dhandle;
115 io->free_h = _nsc_free_dhandle;
116 io->alloc_buf = _nsc_alloc_dbuf;
117 io->free_buf = _nsc_free_dbuf;
118
119 io->read = _nsc_read_dbuf;
120 io->write = _nsc_write_dbuf;
121 io->zero = _nsc_zero_dbuf;
122
123 io->provide |= NSC_ANON;
124 }
125
126
127 int
nsc_uread(nsc_fd_t * fd,void * uiop,void * crp)128 nsc_uread(nsc_fd_t *fd, void *uiop, void *crp)
129 {
130 return (*fd->sf_aio->uread)(fd->sf_cd, uiop, crp, fd);
131 }
132
133
134 int
nsc_uwrite(nsc_fd_t * fd,void * uiop,void * crp)135 nsc_uwrite(nsc_fd_t *fd, void *uiop, void *crp)
136 {
137 if ((fd->sf_avail & NSC_WRITE) == 0)
138 return (EIO);
139
140 return (*fd->sf_aio->uwrite)(fd->sf_cd, uiop, crp, fd);
141 }
142
143
144 int
nsc_partsize(nsc_fd_t * fd,nsc_size_t * valp)145 nsc_partsize(nsc_fd_t *fd, nsc_size_t *valp)
146 {
147 *valp = 0;
148 return (*fd->sf_aio->partsize)(fd->sf_cd, valp);
149 }
150
151
152 int
nsc_maxfbas(nsc_fd_t * fd,int flag,nsc_size_t * valp)153 nsc_maxfbas(nsc_fd_t *fd, int flag, nsc_size_t *valp)
154 {
155 *valp = 0;
156 return (*fd->sf_aio->maxfbas)(fd->sf_cd, flag, valp);
157 }
158
159 int
nsc_control(nsc_fd_t * fd,int command,void * argp,int argl)160 nsc_control(nsc_fd_t *fd, int command, void *argp, int argl)
161 {
162 return (*fd->sf_aio->control)(fd->sf_cd, command, argp, argl);
163 }
164
165
166 /* ARGSUSED */
167
168 static int
_nsc_uread(dev_t dev,uio_t * uiop,cred_t * crp,nsc_fd_t * fd)169 _nsc_uread(dev_t dev, uio_t *uiop, cred_t *crp, nsc_fd_t *fd)
170 {
171 return (_nsc_rw_uio(fd, uiop, UIO_READ));
172 }
173
174
175 /* ARGSUSED */
176
177 static int
_nsc_uwrite(dev_t dev,uio_t * uiop,cred_t * crp,nsc_fd_t * fd)178 _nsc_uwrite(dev_t dev, uio_t *uiop, cred_t *crp, nsc_fd_t *fd)
179 {
180 return (_nsc_rw_uio(fd, uiop, UIO_WRITE));
181 }
182
183
184 static int
_nsc_rw_uio(nsc_fd_t * fd,uio_t * uiop,uio_rw_t rw)185 _nsc_rw_uio(nsc_fd_t *fd, uio_t *uiop, uio_rw_t rw)
186 {
187 nsc_size_t buflen, len, limit, chunk;
188 nsc_off_t pos, off;
189 nsc_buf_t *buf;
190 nsc_vec_t *vec;
191 size_t n;
192 int rc;
193
194 pos = FPOS_TO_FBA(uiop);
195 off = FPOS_TO_OFF(uiop);
196 len = FBA_LEN(uiop->uio_resid + off);
197
198 DTRACE_PROBE3(_nsc_rw_uio_io,
199 uint64_t, pos,
200 uint64_t, off,
201 uint64_t, len);
202
203 /* prevent non-FBA bounded I/O - this is a disk driver! */
204 if (off != 0 || FBA_OFF(uiop->uio_resid) != 0)
205 return (EINVAL);
206
207 if ((rc = nsc_partsize(fd, &limit)) != 0)
208 return (rc);
209
210 if ((rc = nsc_maxfbas(fd, 0, &chunk)) != 0)
211 return (rc);
212
213 DTRACE_PROBE2(_nsc_rw_uio_limit,
214 uint64_t, limit,
215 uint64_t, chunk);
216
217 if (limit && pos >= limit) {
218 if (pos > limit || rw == UIO_WRITE)
219 return (ENXIO);
220 return (0);
221 }
222
223 if (limit && pos + len > limit)
224 len = limit - pos;
225
226 while (len > 0) {
227 buflen = min(len, chunk);
228
229 buf = NULL; /* always use a temporary buffer */
230 if ((rc = nsc_alloc_buf(fd, pos, buflen,
231 (rw == UIO_READ) ? NSC_RDBUF : NSC_WRBUF, &buf)) > 0)
232 return (rc);
233
234 vec = buf->sb_vec;
235
236 for (rc = 0;
237 !rc && uiop->uio_resid && vec->sv_addr;
238 vec++, off = 0) {
239 n = min(vec->sv_len - off, uiop->uio_resid);
240 rc = uiomove((char *)vec->sv_addr + off,
241 n, rw, uiop);
242 }
243
244 if (rw == UIO_WRITE) {
245 if (rc) {
246 (void) nsc_uncommit(buf, pos, buflen, 0);
247 } else if ((rc = nsc_write(buf, pos, buflen, 0)) < 0) {
248 rc = 0;
249 }
250 }
251
252 (void) nsc_free_buf(buf);
253
254 len -= buflen;
255 pos += buflen;
256 }
257
258 return (rc);
259 }
260
261
262 /* ARGSUSED */
263
264 static nsc_dbuf_t *
_nsc_alloc_dhandle(void (* d_cb)(),void (* r_cb)(),void (* w_cb)())265 _nsc_alloc_dhandle(void (*d_cb)(), void (*r_cb)(), void (*w_cb)())
266 {
267 nsc_dbuf_t *h;
268
269 if ((h = nsc_kmem_zalloc(sizeof (nsc_dbuf_t),
270 KM_SLEEP, _nsc_local_mem)) == NULL)
271 return (NULL);
272
273 h->db_disc = d_cb;
274 h->db_flag = NSC_HALLOCATED;
275
276 return (h);
277 }
278
279
280 static int
_nsc_free_dhandle(nsc_dbuf_t * h)281 _nsc_free_dhandle(nsc_dbuf_t *h)
282 {
283 nsc_kmem_free(h, sizeof (*h));
284 return (0);
285 }
286
287
288 static int
_nsc_alloc_dbuf(blind_t cd,nsc_off_t pos,nsc_size_t len,int flag,nsc_dbuf_t ** hp)289 _nsc_alloc_dbuf(blind_t cd, nsc_off_t pos, nsc_size_t len,
290 int flag, nsc_dbuf_t **hp)
291 {
292 nsc_dbuf_t *h = *hp;
293 int rc;
294
295 if (cd == NSC_ANON_CD) {
296 flag &= ~(NSC_READ | NSC_WRITE | NSC_RDAHEAD);
297 } else {
298 if (h->db_maxfbas == 0) {
299 rc = nsc_maxfbas(h->db_fd, 0, &h->db_maxfbas);
300 if (rc != 0)
301 return (rc);
302 else if (h->db_maxfbas == 0)
303 return (EIO);
304 }
305
306 if (len > h->db_maxfbas)
307 return (ENOSPC);
308 }
309
310 if (flag & NSC_NODATA) {
311 ASSERT(!(flag & NSC_RDBUF));
312 h->db_addr = NULL;
313 } else {
314 if (h->db_disc)
315 (*h->db_disc)(h);
316
317 if (!(h->db_addr = nsc_kmem_alloc(FBA_SIZE(len), KM_SLEEP, 0)))
318 return (ENOMEM);
319 }
320
321 h->db_pos = pos;
322 h->db_len = len;
323 h->db_error = 0;
324 h->db_flag |= flag;
325
326 if (flag & NSC_NODATA) {
327 h->db_vec = NULL;
328 } else {
329 h->db_vec = &h->db_bvec[0];
330 h->db_bvec[0].sv_len = FBA_SIZE(len);
331 h->db_bvec[0].sv_addr = (void *)h->db_addr;
332 h->db_bvec[0].sv_vme = 0;
333
334 h->db_bvec[1].sv_len = 0;
335 h->db_bvec[1].sv_addr = 0;
336 h->db_bvec[1].sv_vme = 0;
337 }
338
339 if ((flag & NSC_RDAHEAD) || (cd == NSC_ANON_CD))
340 return (NSC_DONE);
341
342 _nsc_wait_dbuf(h);
343
344 if (flag & NSC_RDBUF) {
345 if ((rc = _nsc_dbuf_io(nsc_uread, h, pos, len, flag)) != 0) {
346 (void) _nsc_free_dbuf(h);
347 return (rc);
348 }
349 }
350
351 return (NSC_DONE);
352 }
353
354
355 static void
_nsc_wait_dbuf(nsc_dbuf_t * h)356 _nsc_wait_dbuf(nsc_dbuf_t *h)
357 {
358 nsc_iodev_t *iodev = h->db_fd->sf_iodev;
359 void (*fn)() = h->db_disc;
360 nsc_dbuf_t *hp;
361
362 mutex_enter(&iodev->si_lock);
363
364 h->db_next = iodev->si_active;
365 iodev->si_active = h;
366
367 /* CONSTCOND */
368
369 while (1) {
370 for (hp = h->db_next; hp; hp = hp->db_next)
371 if (h->db_pos + h->db_len > hp->db_pos &&
372 h->db_pos < hp->db_pos + hp->db_len) break;
373
374 if (!hp)
375 break;
376
377 if (fn)
378 (*fn)(h), fn = NULL;
379
380 cv_wait(&iodev->si_cv, &iodev->si_lock);
381 }
382
383 mutex_exit(&iodev->si_lock);
384 }
385
386
387 static int
_nsc_free_dbuf(nsc_dbuf_t * h)388 _nsc_free_dbuf(nsc_dbuf_t *h)
389 {
390 nsc_dbuf_t **hpp, *hp;
391 nsc_iodev_t *iodev;
392 int wake = 0;
393
394 if (h->db_fd && !(h->db_flag & NSC_ABUF)) {
395 iodev = h->db_fd->sf_iodev;
396
397 mutex_enter(&iodev->si_lock);
398
399 hpp = (nsc_dbuf_t **)&iodev->si_active;
400
401 for (; *hpp; hpp = &hp->db_next) {
402 if ((hp = *hpp) == h) {
403 *hpp = h->db_next;
404 break;
405 }
406
407 if (h->db_pos + h->db_len > hp->db_pos &&
408 h->db_pos < hp->db_pos + hp->db_len) wake = 1;
409
410 }
411 if (wake)
412 cv_broadcast(&iodev->si_cv);
413
414 mutex_exit(&iodev->si_lock);
415 }
416
417 if (!(h->db_flag & NSC_NODATA) && h->db_addr)
418 nsc_kmem_free(h->db_addr, FBA_SIZE(h->db_len));
419
420 h->db_addr = NULL;
421 h->db_flag &= NSC_HALLOCATED; /* clear flags, preserve NSC_HALLOCATED */
422
423 if ((h->db_flag & NSC_HALLOCATED) == 0)
424 (void) _nsc_free_dhandle(h);
425
426
427 return (0);
428 }
429
430
431 static int
_nsc_read_dbuf(nsc_dbuf_t * h,nsc_off_t pos,nsc_size_t len,int flag)432 _nsc_read_dbuf(nsc_dbuf_t *h, nsc_off_t pos, nsc_size_t len, int flag)
433 {
434 return (_nsc_dbuf_io(nsc_uread, h, pos, len, flag));
435 }
436
437
438 static int
_nsc_write_dbuf(nsc_dbuf_t * h,nsc_off_t pos,nsc_size_t len,int flag)439 _nsc_write_dbuf(nsc_dbuf_t *h, nsc_off_t pos, nsc_size_t len, int flag)
440 {
441 return (_nsc_dbuf_io(nsc_uwrite, h, pos, len, flag));
442 }
443
444
445 static int
_nsc_zero_dbuf(nsc_dbuf_t * h,nsc_off_t pos,nsc_size_t len,int flag)446 _nsc_zero_dbuf(nsc_dbuf_t *h, nsc_off_t pos, nsc_size_t len, int flag)
447 {
448 return (_nsc_dbuf_io(NULL, h, pos, len, flag));
449 }
450
451
452 static int
_nsc_dbuf_io(int (* fn)(),nsc_dbuf_t * h,nsc_off_t pos,nsc_size_t len,int flag)453 _nsc_dbuf_io(int (*fn)(), nsc_dbuf_t *h, nsc_off_t pos,
454 nsc_size_t len, int flag)
455 {
456 nsc_vec_t *vp = NULL;
457 cred_t *crp = NULL;
458 iovec_t *iovp;
459 nsc_size_t thisio; /* bytes in this io */
460 nsc_size_t todo; /* anticipated bytes to go */
461 nsc_size_t truedo; /* actual bytes to go */
462 nsc_off_t xpos; /* offset of this io */
463 int destidx;
464 nsc_size_t firstentryfix; /* value used for first entry */
465
466 int (*iofn)();
467 int rc = 0;
468
469 if (!h->db_vec || (h->db_flag & NSC_ABUF))
470 return (EIO);
471
472 if (pos < h->db_pos || pos + len > h->db_pos + h->db_len)
473 return (EINVAL);
474
475 if (!len)
476 return (0);
477 if (fn == nsc_uread && (flag & NSC_RDAHEAD))
478 return (0);
479
480 if (h->db_disc)
481 (*h->db_disc)(h);
482
483 crp = ddi_get_cred();
484 bzero(&h->db_uio, sizeof (uio_t));
485 bzero(&h->db_iov[0], (_NSC_DBUF_NVEC * sizeof (iovec_t)));
486
487 todo = FBA_SIZE(len);
488
489 /*
490 * determine where in the vector array we should start.
491 */
492 vp = h->db_vec;
493 xpos = pos - h->db_pos;
494 for (; xpos >= FBA_NUM(vp->sv_len); vp++)
495 xpos -= FBA_NUM(vp->sv_len);
496
497 firstentryfix = FBA_SIZE(xpos);
498
499 xpos = pos;
500
501 /*
502 * Loop performing i/o to the underlying driver.
503 */
504 while (todo) {
505 destidx = 0;
506 thisio = 0;
507 iofn = fn;
508
509 /*
510 * Copy up to _NSC_DBUF_NVEC vector entries from the
511 * nsc_vec_t into the iovec_t so that the number of
512 * i/o operations is minimised.
513 */
514 while (destidx < _NSC_DBUF_NVEC && todo) {
515 iovp = &h->db_iov[destidx];
516
517 ASSERT(FBA_LEN(vp->sv_len) == FBA_NUM(vp->sv_len));
518 ASSERT((vp->sv_len - firstentryfix) && vp->sv_addr);
519
520 truedo = min(vp->sv_len - firstentryfix, todo);
521 iovp->iov_base = (caddr_t)vp->sv_addr + firstentryfix;
522 firstentryfix = 0;
523 iovp->iov_len = (size_t)truedo;
524 if (!iofn) {
525 bzero(iovp->iov_base, iovp->iov_len);
526 }
527 thisio += truedo;
528 todo -= truedo;
529 destidx++;
530 vp++;
531 }
532
533 h->db_uio.uio_iovcnt = destidx;
534 h->db_uio.uio_iov = &h->db_iov[0];
535 h->db_uio.uio_segflg = UIO_SYSSPACE;
536 h->db_uio.uio_resid = (size_t)thisio;
537
538 SET_FPOS(&h->db_uio, xpos);
539
540 if (!iofn) {
541 iofn = nsc_uwrite;
542 }
543
544 rc = (*iofn)(h->db_fd, &h->db_uio, crp);
545 if (rc != 0) {
546 break;
547 }
548
549 ASSERT(FBA_LEN(thisio) == FBA_NUM(thisio));
550 xpos += FBA_LEN(thisio);
551 }
552
553 return (rc);
554 }
555