1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright (c) 2015 Joyent, Inc. All rights reserved.
14 */
15
16 /*
17 * bootfs vnode operations
18 */
19
20 #include <sys/types.h>
21 #include <sys/uio.h>
22 #include <sys/sunddi.h>
23 #include <sys/errno.h>
24 #include <sys/vfs_opreg.h>
25 #include <sys/vnode.h>
26 #include <sys/mman.h>
27 #include <fs/fs_subr.h>
28 #include <sys/policy.h>
29 #include <sys/sysmacros.h>
30 #include <sys/dirent.h>
31 #include <sys/uio.h>
32 #include <vm/pvn.h>
33 #include <vm/hat.h>
34 #include <vm/seg_map.h>
35 #include <vm/seg_vn.h>
36 #include <sys/vmsystm.h>
37
38 #include <sys/fs/bootfs_impl.h>
39
40 struct vnodeops *bootfs_vnodeops;
41
42 /*ARGSUSED*/
43 static int
bootfs_open(vnode_t ** vpp,int flag,cred_t * cr,caller_context_t * ct)44 bootfs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct)
45 {
46 return (0);
47 }
48
49 /*ARGSUSED*/
50 static int
bootfs_close(vnode_t * vp,int flag,int count,offset_t offset,cred_t * cr,caller_context_t * ct)51 bootfs_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr,
52 caller_context_t *ct)
53 {
54 return (0);
55 }
56
57 /*ARGSUSED*/
58 static int
bootfs_read(vnode_t * vp,struct uio * uiop,int ioflag,cred_t * cr,caller_context_t * ct)59 bootfs_read(vnode_t *vp, struct uio *uiop, int ioflag, cred_t *cr,
60 caller_context_t *ct)
61 {
62 int err;
63 ssize_t sres = uiop->uio_resid;
64 bootfs_node_t *bnp = vp->v_data;
65
66 if (vp->v_type == VDIR)
67 return (EISDIR);
68
69 if (vp->v_type != VREG)
70 return (EINVAL);
71
72 if (uiop->uio_loffset < 0)
73 return (EINVAL);
74
75 if (uiop->uio_loffset >= bnp->bvn_size)
76 return (0);
77
78 err = 0;
79 while (uiop->uio_resid != 0) {
80 caddr_t base;
81 long offset, frem;
82 ulong_t poff, segoff;
83 size_t bytes;
84 int relerr;
85
86 offset = uiop->uio_loffset;
87 poff = offset & PAGEOFFSET;
88 bytes = MIN(PAGESIZE - poff, uiop->uio_resid);
89
90 frem = bnp->bvn_size - offset;
91 if (frem <= 0) {
92 err = 0;
93 break;
94 }
95
96 /* Don't read past EOF */
97 bytes = MIN(bytes, frem);
98
99 /*
100 * Segmaps are likely larger than our page size, so make sure we
101 * have the proper offfset into the resulting segmap data.
102 */
103 segoff = (offset & PAGEMASK) & MAXBOFFSET;
104
105 base = segmap_getmapflt(segkmap, vp, offset & MAXBMASK, bytes,
106 1, S_READ);
107
108 err = uiomove(base + segoff + poff, bytes, UIO_READ, uiop);
109 relerr = segmap_release(segkmap, base, 0);
110
111 if (err == 0)
112 err = relerr;
113
114 if (err != 0)
115 break;
116 }
117
118 /* Even if we had an error in a partial read, return success */
119 if (uiop->uio_resid > sres)
120 err = 0;
121
122 gethrestime(&bnp->bvn_attr.va_atime);
123
124 return (err);
125 }
126
127 /*ARGSUSED*/
128 static int
bootfs_ioctl(vnode_t * vp,int cmd,intptr_t data,int flag,cred_t * cr,int * rvalp,caller_context_t * ct)129 bootfs_ioctl(vnode_t *vp, int cmd, intptr_t data, int flag,
130 cred_t *cr, int *rvalp, caller_context_t *ct)
131 {
132 return (ENOTTY);
133 }
134
135 /*ARGSUSED*/
136 static int
bootfs_getattr(vnode_t * vp,vattr_t * vap,int flags,cred_t * cr,caller_context_t * ct)137 bootfs_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
138 caller_context_t *ct)
139 {
140 uint32_t mask;
141 bootfs_node_t *bpn = (bootfs_node_t *)vp->v_data;
142
143 mask = vap->va_mask;
144 bcopy(&bpn->bvn_attr, vap, sizeof (vattr_t));
145 vap->va_mask = mask;
146 return (0);
147 }
148
149 /*ARGSUSED*/
150 static int
bootfs_access(vnode_t * vp,int mode,int flags,cred_t * cr,caller_context_t * ct)151 bootfs_access(vnode_t *vp, int mode, int flags, cred_t *cr,
152 caller_context_t *ct)
153 {
154 int shift = 0;
155 bootfs_node_t *bpn = (bootfs_node_t *)vp->v_data;
156
157 if (crgetuid(cr) != bpn->bvn_attr.va_uid) {
158 shift += 3;
159 if (groupmember(bpn->bvn_attr.va_gid, cr) == 0)
160 shift += 3;
161 }
162
163 return (secpolicy_vnode_access2(cr, vp, bpn->bvn_attr.va_uid,
164 bpn->bvn_attr.va_mode << shift, mode));
165 }
166
167 /*ARGSUSED*/
168 static int
bootfs_lookup(vnode_t * dvp,char * nm,vnode_t ** vpp,struct pathname * pnp,int flags,vnode_t * rdir,cred_t * cr,caller_context_t * ct,int * direntflags,pathname_t * realpnp)169 bootfs_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, struct pathname *pnp,
170 int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct,
171 int *direntflags, pathname_t *realpnp)
172 {
173 avl_index_t where;
174 bootfs_node_t sn, *bnp;
175 bootfs_node_t *bpp = (bootfs_node_t *)dvp->v_data;
176
177 if (flags & LOOKUP_XATTR)
178 return (EINVAL);
179
180 if (bpp->bvn_attr.va_type != VDIR)
181 return (ENOTDIR);
182
183 if (*nm == '\0' || strcmp(nm, ".") == 0) {
184 VN_HOLD(dvp);
185 *vpp = dvp;
186 return (0);
187 }
188
189 if (strcmp(nm, "..") == 0) {
190 VN_HOLD(bpp->bvn_parent->bvn_vnp);
191 *vpp = bpp->bvn_parent->bvn_vnp;
192 return (0);
193 }
194
195 sn.bvn_name = nm;
196 bnp = avl_find(&bpp->bvn_dir, &sn, &where);
197 if (bnp == NULL)
198 return (ENOENT);
199
200 VN_HOLD(bnp->bvn_vnp);
201 *vpp = bnp->bvn_vnp;
202 return (0);
203 }
204
205 /*ARGSUSED*/
206 static int
bootfs_readdir(vnode_t * vp,struct uio * uiop,cred_t * cr,int * eofp,caller_context_t * ct,int flags)207 bootfs_readdir(vnode_t *vp, struct uio *uiop, cred_t *cr, int *eofp,
208 caller_context_t *ct, int flags)
209 {
210 bootfs_node_t *bnp = (bootfs_node_t *)vp->v_data;
211 dirent64_t *dp;
212 void *buf;
213 ulong_t bsize, brem;
214 offset_t coff, roff;
215 int dlen, ret;
216 bootfs_node_t *dnp;
217 boolean_t first = B_TRUE;
218
219 if (uiop->uio_loffset >= MAXOFF_T) {
220 if (eofp != NULL)
221 *eofp = 1;
222 return (0);
223 }
224
225 if (uiop->uio_iovcnt != 1)
226 return (EINVAL);
227
228 if (!(uiop->uio_iov->iov_len > 0))
229 return (EINVAL);
230
231 if (vp->v_type != VDIR)
232 return (ENOTDIR);
233
234 roff = uiop->uio_loffset;
235 coff = 0;
236 brem = bsize = uiop->uio_iov->iov_len;
237 buf = kmem_alloc(bsize, KM_SLEEP);
238 dp = buf;
239
240 /*
241 * Recall that offsets here are done based on the name of the dirent
242 * excluding the null terminator. Therefore `.` is always at 0, `..` is
243 * always at 1, and then the first real dirent is at 3. This offset is
244 * what's actually stored when we update the offset in the structure.
245 */
246 if (roff == 0) {
247 dlen = DIRENT64_RECLEN(1);
248 if (first == B_TRUE) {
249 if (dlen > brem) {
250 kmem_free(buf, bsize);
251 return (EINVAL);
252 }
253 first = B_FALSE;
254 }
255 dp->d_ino = (ino64_t)bnp->bvn_attr.va_nodeid;
256 dp->d_off = 0;
257 dp->d_reclen = (ushort_t)dlen;
258 (void) strncpy(dp->d_name, ".", DIRENT64_NAMELEN(dlen));
259 dp = (struct dirent64 *)((uintptr_t)dp + dp->d_reclen);
260 brem -= dlen;
261 }
262
263 if (roff <= 1) {
264 dlen = DIRENT64_RECLEN(2);
265 if (first == B_TRUE) {
266 if (dlen > brem) {
267 kmem_free(buf, bsize);
268 return (EINVAL);
269 }
270 first = B_FALSE;
271 }
272 dp->d_ino = (ino64_t)bnp->bvn_parent->bvn_attr.va_nodeid;
273 dp->d_off = 1;
274 dp->d_reclen = (ushort_t)dlen;
275 (void) strncpy(dp->d_name, "..", DIRENT64_NAMELEN(dlen));
276 dp = (struct dirent64 *)((uintptr_t)dp + dp->d_reclen);
277 brem -= dlen;
278 }
279
280 coff = 3;
281 for (dnp = avl_first(&bnp->bvn_dir); dnp != NULL;
282 dnp = AVL_NEXT(&bnp->bvn_dir, dnp)) {
283 size_t nlen = strlen(dnp->bvn_name);
284
285 if (roff > coff) {
286 coff += nlen;
287 continue;
288 }
289
290 dlen = DIRENT64_RECLEN(nlen);
291 if (dlen > brem) {
292 if (first == B_TRUE) {
293 kmem_free(buf, bsize);
294 return (EINVAL);
295 }
296 break;
297 }
298 first = B_FALSE;
299
300 dp->d_ino = (ino64_t)dnp->bvn_attr.va_nodeid;
301 dp->d_off = coff;
302 dp->d_reclen = (ushort_t)dlen;
303 (void) strncpy(dp->d_name, dnp->bvn_name,
304 DIRENT64_NAMELEN(dlen));
305 dp = (struct dirent64 *)((uintptr_t)dp + dp->d_reclen);
306 brem -= dlen;
307 coff += nlen;
308 }
309
310 ret = uiomove(buf, (bsize - brem), UIO_READ, uiop);
311
312 if (ret == 0) {
313 if (dnp == NULL) {
314 coff++;
315 if (eofp != NULL)
316 *eofp = 1;
317 } else if (eofp != NULL) {
318 *eofp = 0;
319 }
320 uiop->uio_loffset = coff;
321 }
322 gethrestime(&bnp->bvn_attr.va_atime);
323 kmem_free(buf, bsize);
324 return (ret);
325 }
326
327 /*ARGSUSED*/
328 static void
bootfs_inactive(vnode_t * vp,cred_t * cr,caller_context_t * ct)329 bootfs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
330 {
331 }
332
333 /*ARGSUSED*/
334 static int
bootfs_rwlock(vnode_t * vp,int write_lock,caller_context_t * ct)335 bootfs_rwlock(vnode_t *vp, int write_lock, caller_context_t *ct)
336 {
337 if (write_lock != 0)
338 return (EINVAL);
339 return (0);
340 }
341
342 /*ARGSUSED*/
343 static void
bootfs_rwunlock(vnode_t * vp,int write_lock,caller_context_t * ct)344 bootfs_rwunlock(vnode_t *vp, int write_lock, caller_context_t *ct)
345 {
346 }
347
348 /*ARGSUSED*/
349 static int
bootfs_seek(vnode_t * vp,offset_t ooff,offset_t * noffp,caller_context_t * ct)350 bootfs_seek(vnode_t *vp, offset_t ooff, offset_t *noffp,
351 caller_context_t *ct)
352 {
353 bootfs_node_t *bnp = (bootfs_node_t *)vp->v_data;
354 if (vp->v_type == VDIR)
355 return (0);
356 return ((*noffp < 0 || *noffp > bnp->bvn_size ? EINVAL : 0));
357 }
358
359 /*
360 * We need to fill in a single page of a vnode's memory based on the actual data
361 * from the kernel. We'll use this node's sliding window into physical memory
362 * and update one page at a time.
363 */
364 /*ARGSUSED*/
365 static int
bootfs_getapage(vnode_t * vp,u_offset_t off,size_t len,uint_t * protp,page_t * pl[],size_t plsz,struct seg * seg,caddr_t addr,enum seg_rw rw,cred_t * cr)366 bootfs_getapage(vnode_t *vp, u_offset_t off, size_t len, uint_t *protp,
367 page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, enum seg_rw rw,
368 cred_t *cr)
369 {
370 bootfs_node_t *bnp = vp->v_data;
371 page_t *pp, *fpp;
372 pfn_t pfn;
373
374 for (;;) {
375 /* Easy case where the page exists */
376 pp = page_lookup(vp, off, rw == S_CREATE ? SE_EXCL : SE_SHARED);
377 if (pp != NULL) {
378 if (pl != NULL) {
379 pl[0] = pp;
380 pl[1] = NULL;
381 } else {
382 page_unlock(pp);
383 }
384 return (0);
385 }
386
387 pp = page_create_va(vp, off, PAGESIZE, PG_EXCL | PG_WAIT, seg,
388 addr);
389
390 /*
391 * If we didn't get the page, that means someone else beat us to
392 * creating this so we need to try again.
393 */
394 if (pp != NULL)
395 break;
396 }
397
398 pfn = btop((bnp->bvn_addr + off) & PAGEMASK);
399 fpp = page_numtopp_nolock(pfn);
400
401 if (ppcopy(fpp, pp) == 0) {
402 pvn_read_done(pp, B_ERROR);
403 return (EIO);
404 }
405
406 if (pl != NULL) {
407 pvn_plist_init(pp, pl, plsz, off, PAGESIZE, rw);
408 } else {
409 pvn_io_done(pp);
410 }
411
412 return (0);
413 }
414
415 /*ARGSUSED*/
416 static int
bootfs_getpage(vnode_t * vp,offset_t off,size_t len,uint_t * protp,page_t * pl[],size_t plsz,struct seg * seg,caddr_t addr,enum seg_rw rw,cred_t * cr,caller_context_t * ct)417 bootfs_getpage(vnode_t *vp, offset_t off, size_t len, uint_t *protp,
418 page_t *pl[], size_t plsz, struct seg *seg, caddr_t addr, enum seg_rw rw,
419 cred_t *cr, caller_context_t *ct)
420 {
421 int err;
422 bootfs_node_t *bnp = vp->v_data;
423
424 if (off + len > bnp->bvn_size + PAGEOFFSET)
425 return (EFAULT);
426
427 if (protp != NULL)
428 *protp = PROT_ALL;
429
430 if (len <= PAGESIZE)
431 err = bootfs_getapage(vp, (u_offset_t)off, len, protp, pl,
432 plsz, seg, addr, rw, cr);
433 else
434 err = pvn_getpages(bootfs_getapage, vp, (u_offset_t)off, len,
435 protp, pl, plsz, seg, addr, rw, cr);
436
437 return (err);
438 }
439
440 /*ARGSUSED*/
441 static int
bootfs_map(vnode_t * vp,offset_t off,struct as * as,caddr_t * addrp,size_t len,uchar_t prot,uchar_t maxprot,uint_t flags,cred_t * cr,caller_context_t * ct)442 bootfs_map(vnode_t *vp, offset_t off, struct as *as, caddr_t *addrp,
443 size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr,
444 caller_context_t *ct)
445 {
446 int ret;
447 segvn_crargs_t vn_a;
448
449 #ifdef _ILP32
450 if (len > MAXOFF_T)
451 return (ENOMEM);
452 #endif
453
454 if (vp->v_flag & VNOMAP)
455 return (ENOSYS);
456
457 if (off < 0 || off > MAXOFFSET_T - off)
458 return (ENXIO);
459
460 if (vp->v_type != VREG)
461 return (ENODEV);
462
463 if ((prot & PROT_WRITE) && (flags & MAP_SHARED))
464 return (ENOTSUP);
465
466 as_rangelock(as);
467 ret = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags);
468 if (ret != 0) {
469 as_rangeunlock(as);
470 return (ret);
471 }
472
473 vn_a.vp = vp;
474 vn_a.offset = (u_offset_t)off;
475 vn_a.type = flags & MAP_TYPE;
476 vn_a.prot = prot;
477 vn_a.maxprot = maxprot;
478 vn_a.cred = cr;
479 vn_a.amp = NULL;
480 vn_a.flags = flags & ~MAP_TYPE;
481 vn_a.szc = 0;
482 vn_a.lgrp_mem_policy_flags = 0;
483
484 ret = as_map(as, *addrp, len, segvn_create, &vn_a);
485
486 as_rangeunlock(as);
487 return (ret);
488
489 }
490
491 /*ARGSUSED*/
492 static int
bootfs_addmap(vnode_t * vp,offset_t off,struct as * as,caddr_t addr,size_t len,uchar_t prot,uchar_t maxprot,uint_t flags,cred_t * cr,caller_context_t * ct)493 bootfs_addmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr,
494 size_t len, uchar_t prot, uchar_t maxprot, uint_t flags, cred_t *cr,
495 caller_context_t *ct)
496 {
497 return (0);
498 }
499
500 /*ARGSUSED*/
501 static int
bootfs_delmap(vnode_t * vp,offset_t off,struct as * as,caddr_t addr,size_t len,uint_t prot,uint_t maxprot,uint_t flags,cred_t * cr,caller_context_t * ct)502 bootfs_delmap(vnode_t *vp, offset_t off, struct as *as, caddr_t addr,
503 size_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr,
504 caller_context_t *ct)
505 {
506 return (0);
507 }
508
509 static int
bootfs_pathconf(vnode_t * vp,int cmd,ulong_t * valp,cred_t * cr,caller_context_t * ct)510 bootfs_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
511 caller_context_t *ct)
512 {
513 int ret;
514
515 switch (cmd) {
516 case _PC_TIMESTAMP_RESOLUTION:
517 *valp = 1L;
518 ret = 0;
519 break;
520 default:
521 ret = fs_pathconf(vp, cmd, valp, cr, ct);
522 }
523
524 return (ret);
525 }
526
527 const fs_operation_def_t bootfs_vnodeops_template[] = {
528 VOPNAME_OPEN, { .vop_open = bootfs_open },
529 VOPNAME_CLOSE, { .vop_close = bootfs_close },
530 VOPNAME_READ, { .vop_read = bootfs_read },
531 VOPNAME_IOCTL, { .vop_ioctl = bootfs_ioctl },
532 VOPNAME_GETATTR, { .vop_getattr = bootfs_getattr },
533 VOPNAME_ACCESS, { .vop_access = bootfs_access },
534 VOPNAME_LOOKUP, { .vop_lookup = bootfs_lookup },
535 VOPNAME_READDIR, { .vop_readdir = bootfs_readdir },
536 VOPNAME_INACTIVE, { .vop_inactive = bootfs_inactive },
537 VOPNAME_RWLOCK, { .vop_rwlock = bootfs_rwlock },
538 VOPNAME_RWUNLOCK, { .vop_rwunlock = bootfs_rwunlock },
539 VOPNAME_SEEK, { .vop_seek = bootfs_seek },
540 VOPNAME_GETPAGE, { .vop_getpage = bootfs_getpage },
541 VOPNAME_MAP, { .vop_map = bootfs_map },
542 VOPNAME_ADDMAP, { .vop_addmap = bootfs_addmap },
543 VOPNAME_DELMAP, { .vop_delmap = bootfs_delmap },
544 VOPNAME_PATHCONF, { .vop_pathconf = bootfs_pathconf },
545 VOPNAME_VNEVENT, { .vop_vnevent = fs_vnevent_nosupport },
546 NULL, NULL
547 };
548