1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 29 /* All rights reserved. */ 30 31 32 #include <sys/types.h> 33 #include <sys/param.h> 34 #include <sys/cmn_err.h> 35 #include <sys/debug.h> 36 #include <sys/dirent.h> 37 #include <sys/errno.h> 38 #include <sys/file.h> 39 #include <sys/inline.h> 40 #include <sys/kmem.h> 41 #include <sys/pathname.h> 42 #include <sys/resource.h> 43 #include <sys/statvfs.h> 44 #include <sys/mount.h> 45 #include <sys/sysmacros.h> 46 #include <sys/systm.h> 47 #include <sys/uio.h> 48 #include <sys/vfs.h> 49 #include <sys/vfs_opreg.h> 50 #include <sys/vnode.h> 51 #include <sys/cred.h> 52 #include <sys/mntent.h> 53 #include <sys/mount.h> 54 #include <sys/user.h> 55 #include <sys/t_lock.h> 56 #include <sys/modctl.h> 57 #include <sys/policy.h> 58 #include <fs/fs_subr.h> 59 #include <sys/atomic.h> 60 #include <sys/mkdev.h> 61 62 #define round(r) (((r)+sizeof (int)-1)&(~(sizeof (int)-1))) 63 #define fdtoi(n) ((n)+100) 64 65 #define FDDIRSIZE 14 66 struct fddirect { 67 short d_ino; 68 char d_name[FDDIRSIZE]; 69 }; 70 71 #define FDROOTINO 2 72 #define FDSDSIZE sizeof (struct fddirect) 73 #define FDNSIZE 10 74 75 static int fdfstype = 0; 76 static major_t fdfsmaj; 77 static minor_t fdfsmin; 78 static major_t fdrmaj; 79 static kmutex_t fd_minor_lock; 80 81 static int fdget(vnode_t *, char *, vnode_t **); 82 83 /* ARGSUSED */ 84 static int 85 fdopen(vnode_t **vpp, int mode, cred_t *cr, caller_context_t *ct) 86 { 87 if ((*vpp)->v_type != VDIR) { 88 mutex_enter(&(*vpp)->v_lock); 89 (*vpp)->v_flag |= VDUP; 90 mutex_exit(&(*vpp)->v_lock); 91 } 92 return (0); 93 } 94 95 /* ARGSUSED */ 96 static int 97 fdclose(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr, 98 caller_context_t *ct) 99 { 100 return (0); 101 } 102 103 /* ARGSUSED */ 104 static int 105 fdread(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr, caller_context_t *ct) 106 { 107 static struct fddirect dotbuf[] = { 108 { FDROOTINO, "." }, 109 { FDROOTINO, ".." } 110 }; 111 struct fddirect dirbuf; 112 int i, n; 113 int minfd, maxfd, modoff, error = 0; 114 int nentries; 115 rctl_qty_t fdno_ctl; 116 int endoff; 117 118 if (vp->v_type != VDIR) 119 return (ENOSYS); 120 121 mutex_enter(&curproc->p_lock); 122 fdno_ctl = rctl_enforced_value(rctlproc_legacy[RLIMIT_NOFILE], 123 curproc->p_rctls, curproc); 124 nentries = MIN(P_FINFO(curproc)->fi_nfiles, (int)fdno_ctl); 125 mutex_exit(&curproc->p_lock); 126 127 endoff = (nentries + 2) * FDSDSIZE; 128 129 /* 130 * Fake up ".", "..", and the /dev/fd directory entries. 131 */ 132 if (uiop->uio_loffset < (offset_t)0 || 133 uiop->uio_loffset >= (offset_t)endoff || 134 uiop->uio_resid <= 0) 135 return (0); 136 ASSERT(uiop->uio_loffset <= MAXOFF_T); 137 if (uiop->uio_offset < 2*FDSDSIZE) { 138 error = uiomove((caddr_t)dotbuf + uiop->uio_offset, 139 MIN(uiop->uio_resid, 2*FDSDSIZE - uiop->uio_offset), 140 UIO_READ, uiop); 141 if (uiop->uio_resid <= 0 || error) 142 return (error); 143 } 144 minfd = (uiop->uio_offset - 2*FDSDSIZE)/FDSDSIZE; 145 maxfd = (uiop->uio_offset + uiop->uio_resid - 1)/FDSDSIZE; 146 modoff = uiop->uio_offset % FDSDSIZE; 147 148 for (i = 0; i < FDDIRSIZE; i++) 149 dirbuf.d_name[i] = '\0'; 150 for (i = minfd; i < MIN(maxfd, nentries); i++) { 151 n = i; 152 dirbuf.d_ino = fdtoi(n); 153 numtos((ulong_t)n, dirbuf.d_name); 154 error = uiomove((caddr_t)&dirbuf + modoff, 155 MIN(uiop->uio_resid, FDSDSIZE - modoff), 156 UIO_READ, uiop); 157 if (uiop->uio_resid <= 0 || error) 158 return (error); 159 modoff = 0; 160 } 161 162 return (error); 163 } 164 165 /* ARGSUSED */ 166 static int 167 fdgetattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr, 168 caller_context_t *ct) 169 { 170 vfs_t *vfsp = vp->v_vfsp; 171 timestruc_t now; 172 173 if (vp->v_type == VDIR) { 174 vap->va_nlink = 2; 175 vap->va_size = (u_offset_t) 176 ((P_FINFO(curproc)->fi_nfiles + 2) * FDSDSIZE); 177 vap->va_mode = 0555; 178 vap->va_nodeid = (ino64_t)FDROOTINO; 179 } else { 180 vap->va_nlink = 1; 181 vap->va_size = (u_offset_t)0; 182 vap->va_mode = 0666; 183 vap->va_nodeid = (ino64_t)fdtoi(getminor(vp->v_rdev)); 184 } 185 vap->va_type = vp->v_type; 186 vap->va_rdev = vp->v_rdev; 187 vap->va_blksize = vfsp->vfs_bsize; 188 vap->va_nblocks = (fsblkcnt64_t)0; 189 gethrestime(&now); 190 vap->va_atime = vap->va_mtime = vap->va_ctime = now; 191 vap->va_uid = 0; 192 vap->va_gid = 0; 193 vap->va_fsid = vfsp->vfs_dev; 194 vap->va_seq = 0; 195 return (0); 196 } 197 198 /* ARGSUSED */ 199 static int 200 fdaccess(vnode_t *vp, int mode, int flags, cred_t *cr, caller_context_t *ct) 201 { 202 return (0); 203 } 204 205 /* ARGSUSED */ 206 static int 207 fdlookup(vnode_t *dp, char *comp, vnode_t **vpp, pathname_t *pnp, 208 int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct, 209 int *direntflags, pathname_t *realpnp) 210 { 211 if (comp[0] == 0 || strcmp(comp, ".") == 0 || strcmp(comp, "..") == 0) { 212 VN_HOLD(dp); 213 *vpp = dp; 214 return (0); 215 } 216 return (fdget(dp, comp, vpp)); 217 } 218 219 /* ARGSUSED */ 220 static int 221 fdcreate(vnode_t *dvp, char *comp, vattr_t *vap, enum vcexcl excl, 222 int mode, vnode_t **vpp, cred_t *cr, int flag, caller_context_t *ct, 223 vsecattr_t *vsecp) 224 { 225 return (fdget(dvp, comp, vpp)); 226 } 227 228 /* ARGSUSED */ 229 static int 230 fdreaddir(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp, caller_context_t *ct, 231 int flags) 232 { 233 /* bp holds one dirent structure */ 234 u_offset_t bp[DIRENT64_RECLEN(FDNSIZE) / sizeof (u_offset_t)]; 235 struct dirent64 *dirent = (struct dirent64 *)bp; 236 int reclen, nentries; 237 rctl_qty_t fdno_ctl; 238 int n; 239 int oresid; 240 off_t off; 241 242 if (uiop->uio_offset < 0 || uiop->uio_resid <= 0 || 243 (uiop->uio_offset % FDSDSIZE) != 0) 244 return (ENOENT); 245 246 ASSERT(uiop->uio_loffset <= MAXOFF_T); 247 oresid = uiop->uio_resid; 248 bzero(bp, sizeof (bp)); 249 250 mutex_enter(&curproc->p_lock); 251 fdno_ctl = rctl_enforced_value(rctlproc_legacy[RLIMIT_NOFILE], 252 curproc->p_rctls, curproc); 253 nentries = MIN(P_FINFO(curproc)->fi_nfiles, (int)fdno_ctl); 254 mutex_exit(&curproc->p_lock); 255 256 while (uiop->uio_resid > 0) { 257 if ((off = uiop->uio_offset) == 0) { /* "." */ 258 dirent->d_ino = (ino64_t)FDROOTINO; 259 dirent->d_name[0] = '.'; 260 dirent->d_name[1] = '\0'; 261 reclen = DIRENT64_RECLEN(1); 262 } else if (off == FDSDSIZE) { /* ".." */ 263 dirent->d_ino = (ino64_t)FDROOTINO; 264 dirent->d_name[0] = '.'; 265 dirent->d_name[1] = '.'; 266 dirent->d_name[2] = '\0'; 267 reclen = DIRENT64_RECLEN(2); 268 } else { 269 /* 270 * Return entries corresponding to the allowable 271 * number of file descriptors for this process. 272 */ 273 if ((n = (off-2*FDSDSIZE)/FDSDSIZE) >= nentries) 274 break; 275 dirent->d_ino = (ino64_t)fdtoi(n); 276 numtos((ulong_t)n, dirent->d_name); 277 reclen = DIRENT64_RECLEN(strlen(dirent->d_name)); 278 } 279 dirent->d_off = (offset_t)(uiop->uio_offset + FDSDSIZE); 280 dirent->d_reclen = (ushort_t)reclen; 281 282 if (reclen > uiop->uio_resid) { 283 /* 284 * Error if no entries have been returned yet. 285 */ 286 if (uiop->uio_resid == oresid) 287 return (EINVAL); 288 break; 289 } 290 /* 291 * uiomove() updates both resid and offset by the same 292 * amount. But we want offset to change in increments 293 * of FDSDSIZE, which is different from the number of bytes 294 * being returned to the user. So we set uio_offset 295 * separately, ignoring what uiomove() does. 296 */ 297 if (uiomove((caddr_t)dirent, reclen, UIO_READ, uiop)) 298 return (EFAULT); 299 uiop->uio_offset = off + FDSDSIZE; 300 } 301 if (eofp) 302 *eofp = ((uiop->uio_offset-2*FDSDSIZE)/FDSDSIZE >= nentries); 303 return (0); 304 } 305 306 /* ARGSUSED */ 307 static void 308 fdinactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) 309 { 310 mutex_enter(&vp->v_lock); 311 ASSERT(vp->v_count >= 1); 312 if (--vp->v_count != 0) { 313 mutex_exit(&vp->v_lock); 314 return; 315 } 316 mutex_exit(&vp->v_lock); 317 vn_invalid(vp); 318 vn_free(vp); 319 } 320 321 static struct vnodeops *fd_vnodeops; 322 323 static const fs_operation_def_t fd_vnodeops_template[] = { 324 VOPNAME_OPEN, { .vop_open = fdopen }, 325 VOPNAME_CLOSE, { .vop_close = fdclose }, 326 VOPNAME_READ, { .vop_read = fdread }, 327 VOPNAME_GETATTR, { .vop_getattr = fdgetattr }, 328 VOPNAME_ACCESS, { .vop_access = fdaccess }, 329 VOPNAME_LOOKUP, { .vop_lookup = fdlookup }, 330 VOPNAME_CREATE, { .vop_create = fdcreate }, 331 VOPNAME_READDIR, { .vop_readdir = fdreaddir }, 332 VOPNAME_INACTIVE, { .vop_inactive = fdinactive }, 333 VOPNAME_FRLOCK, { .error = fs_error }, 334 VOPNAME_POLL, { .error = fs_error }, 335 VOPNAME_DISPOSE, { .error = fs_error }, 336 NULL, NULL 337 }; 338 339 static int 340 fdget(struct vnode *dvp, char *comp, struct vnode **vpp) 341 { 342 int n = 0; 343 struct vnode *vp; 344 345 while (*comp) { 346 if (*comp < '0' || *comp > '9') 347 return (ENOENT); 348 n = 10 * n + *comp++ - '0'; 349 } 350 vp = vn_alloc(KM_SLEEP); 351 vp->v_type = VCHR; 352 vp->v_vfsp = dvp->v_vfsp; 353 vn_setops(vp, fd_vnodeops); 354 vp->v_data = NULL; 355 vp->v_flag = VNOMAP; 356 vp->v_rdev = makedevice(fdrmaj, n); 357 vn_exists(vp); 358 *vpp = vp; 359 return (0); 360 } 361 362 /* 363 * fdfs is mounted on /dev/fd, however, there are two interesting 364 * possibilities - two threads racing to do the same mount (protected 365 * by vfs locking), and two threads mounting fdfs in different places. 366 */ 367 /*ARGSUSED*/ 368 static int 369 fdmount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) 370 { 371 struct vnode *vp; 372 373 if (secpolicy_fs_mount(cr, mvp, vfsp) != 0) 374 return (EPERM); 375 if (mvp->v_type != VDIR) 376 return (ENOTDIR); 377 378 mutex_enter(&mvp->v_lock); 379 if ((uap->flags & MS_OVERLAY) == 0 && 380 (mvp->v_count > 1 || (mvp->v_flag & VROOT))) { 381 mutex_exit(&mvp->v_lock); 382 return (EBUSY); 383 } 384 mutex_exit(&mvp->v_lock); 385 386 /* 387 * Having the resource be anything but "fd" doesn't make sense 388 */ 389 vfs_setresource(vfsp, "fd"); 390 391 vp = vn_alloc(KM_SLEEP); 392 vp->v_vfsp = vfsp; 393 vn_setops(vp, fd_vnodeops); 394 vp->v_type = VDIR; 395 vp->v_data = NULL; 396 vp->v_flag |= VROOT; 397 vfsp->vfs_fstype = fdfstype; 398 vfsp->vfs_data = (char *)vp; 399 mutex_enter(&fd_minor_lock); 400 do { 401 fdfsmin = (fdfsmin + 1) & L_MAXMIN32; 402 vfsp->vfs_dev = makedevice(fdfsmaj, fdfsmin); 403 } while (vfs_devismounted(vfsp->vfs_dev)); 404 mutex_exit(&fd_minor_lock); 405 vfs_make_fsid(&vfsp->vfs_fsid, vfsp->vfs_dev, fdfstype); 406 vfsp->vfs_bsize = 1024; 407 return (0); 408 } 409 410 /* ARGSUSED */ 411 static int 412 fdunmount(vfs_t *vfsp, int flag, cred_t *cr) 413 { 414 vnode_t *rvp; 415 416 if (secpolicy_fs_unmount(cr, vfsp) != 0) 417 return (EPERM); 418 419 /* 420 * forced unmount is not supported by this file system 421 * and thus, ENOTSUP, is being returned. 422 */ 423 if (flag & MS_FORCE) 424 return (ENOTSUP); 425 426 rvp = (vnode_t *)vfsp->vfs_data; 427 if (rvp->v_count > 1) 428 return (EBUSY); 429 430 VN_RELE(rvp); 431 return (0); 432 } 433 434 /* ARGSUSED */ 435 static int 436 fdroot(vfs_t *vfsp, vnode_t **vpp) 437 { 438 vnode_t *vp = (vnode_t *)vfsp->vfs_data; 439 440 VN_HOLD(vp); 441 *vpp = vp; 442 return (0); 443 } 444 445 /* 446 * No locking required because I held the root vnode before calling this 447 * function so the vfs won't disappear on me. To be more explicit: 448 * fdvrootp->v_count will be greater than 1 so fdunmount will just return. 449 */ 450 static int 451 fdstatvfs(struct vfs *vfsp, struct statvfs64 *sp) 452 { 453 dev32_t d32; 454 rctl_qty_t fdno_ctl; 455 456 mutex_enter(&curproc->p_lock); 457 fdno_ctl = rctl_enforced_value(rctlproc_legacy[RLIMIT_NOFILE], 458 curproc->p_rctls, curproc); 459 mutex_exit(&curproc->p_lock); 460 461 bzero(sp, sizeof (*sp)); 462 sp->f_bsize = 1024; 463 sp->f_frsize = 1024; 464 sp->f_blocks = (fsblkcnt64_t)0; 465 sp->f_bfree = (fsblkcnt64_t)0; 466 sp->f_bavail = (fsblkcnt64_t)0; 467 sp->f_files = (fsfilcnt64_t) 468 (MIN(P_FINFO(curproc)->fi_nfiles, fdno_ctl + 2)); 469 sp->f_ffree = (fsfilcnt64_t)0; 470 sp->f_favail = (fsfilcnt64_t)0; 471 (void) cmpldev(&d32, vfsp->vfs_dev); 472 sp->f_fsid = d32; 473 (void) strcpy(sp->f_basetype, vfssw[fdfstype].vsw_name); 474 sp->f_flag = vf_to_stf(vfsp->vfs_flag); 475 sp->f_namemax = FDNSIZE; 476 (void) strcpy(sp->f_fstr, "/dev/fd"); 477 (void) strcpy(&sp->f_fstr[8], "/dev/fd"); 478 return (0); 479 } 480 481 int 482 fdinit(int fstype, char *name) 483 { 484 static const fs_operation_def_t fd_vfsops_template[] = { 485 VFSNAME_MOUNT, { .vfs_mount = fdmount }, 486 VFSNAME_UNMOUNT, { .vfs_unmount = fdunmount }, 487 VFSNAME_ROOT, { .vfs_root = fdroot }, 488 VFSNAME_STATVFS, { .vfs_statvfs = fdstatvfs }, 489 NULL, NULL 490 }; 491 int error; 492 493 fdfstype = fstype; 494 ASSERT(fdfstype != 0); 495 496 /* 497 * Associate VFS ops vector with this fstype. 498 */ 499 error = vfs_setfsops(fstype, fd_vfsops_template, NULL); 500 if (error != 0) { 501 cmn_err(CE_WARN, "fdinit: bad vnode ops template"); 502 return (error); 503 } 504 505 error = vn_make_ops(name, fd_vnodeops_template, &fd_vnodeops); 506 if (error != 0) { 507 (void) vfs_freevfsops_by_type(fstype); 508 cmn_err(CE_WARN, "fdinit: bad vnode ops template"); 509 return (error); 510 } 511 512 /* 513 * Assign unique "device" numbers (reported by stat(2)). 514 */ 515 fdfsmaj = getudev(); 516 fdrmaj = getudev(); 517 if (fdfsmaj == (major_t)-1 || fdrmaj == (major_t)-1) { 518 cmn_err(CE_WARN, "fdinit: can't get unique device numbers"); 519 if (fdfsmaj == (major_t)-1) 520 fdfsmaj = 0; 521 if (fdrmaj == (major_t)-1) 522 fdrmaj = 0; 523 } 524 mutex_init(&fd_minor_lock, NULL, MUTEX_DEFAULT, NULL); 525 return (0); 526 } 527 528 /* 529 * FDFS Mount options table 530 */ 531 static char *rw_cancel[] = { MNTOPT_RO, NULL }; 532 533 static mntopt_t mntopts[] = { 534 /* 535 * option name cancel option default arg flags 536 */ 537 { MNTOPT_RW, rw_cancel, NULL, MO_DEFAULT, 538 (void *)MNTOPT_NOINTR }, 539 { MNTOPT_IGNORE, NULL, NULL, 0, 540 (void *)0 }, 541 }; 542 543 static mntopts_t fdfs_mntopts = { 544 sizeof (mntopts) / sizeof (mntopt_t), 545 mntopts 546 }; 547 548 static vfsdef_t vfw = { 549 VFSDEF_VERSION, 550 "fd", 551 fdinit, 552 VSW_HASPROTO, 553 &fdfs_mntopts 554 }; 555 556 static struct modlfs modlfs = { 557 &mod_fsops, 558 "filesystem for fd", 559 &vfw 560 }; 561 562 static struct modlinkage modlinkage = { 563 MODREV_1, 564 &modlfs, 565 NULL 566 }; 567 568 int 569 _init(void) 570 { 571 return (mod_install(&modlinkage)); 572 } 573 574 int 575 _info(struct modinfo *modinfop) 576 { 577 return (mod_info(&modlinkage, modinfop)); 578 } 579