1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 29 /* All rights reserved. */ 30 31 32 #include <sys/types.h> 33 #include <sys/param.h> 34 #include <sys/cmn_err.h> 35 #include <sys/debug.h> 36 #include <sys/dirent.h> 37 #include <sys/errno.h> 38 #include <sys/file.h> 39 #include <sys/inline.h> 40 #include <sys/kmem.h> 41 #include <sys/pathname.h> 42 #include <sys/resource.h> 43 #include <sys/statvfs.h> 44 #include <sys/mount.h> 45 #include <sys/sysmacros.h> 46 #include <sys/systm.h> 47 #include <sys/uio.h> 48 #include <sys/vfs.h> 49 #include <sys/vfs_opreg.h> 50 #include <sys/vnode.h> 51 #include <sys/cred.h> 52 #include <sys/mntent.h> 53 #include <sys/mount.h> 54 #include <sys/user.h> 55 #include <sys/t_lock.h> 56 #include <sys/modctl.h> 57 #include <sys/policy.h> 58 #include <fs/fs_subr.h> 59 #include <sys/atomic.h> 60 #include <sys/mkdev.h> 61 62 #define round(r) (((r)+sizeof (int)-1)&(~(sizeof (int)-1))) 63 #define fdtoi(n) ((n)+100) 64 65 #define FDDIRSIZE 14 66 struct fddirect { 67 short d_ino; 68 char d_name[FDDIRSIZE]; 69 }; 70 71 #define FDROOTINO 2 72 #define FDSDSIZE sizeof (struct fddirect) 73 #define FDNSIZE 10 74 75 static int fdfstype = 0; 76 static major_t fdfsmaj; 77 static minor_t fdfsmin; 78 static major_t fdrmaj; 79 static kmutex_t fd_minor_lock; 80 81 static int fdget(vnode_t *, char *, vnode_t **); 82 83 /* ARGSUSED */ 84 static int 85 fdopen(vnode_t **vpp, int mode, cred_t *cr) 86 { 87 if ((*vpp)->v_type != VDIR) { 88 mutex_enter(&(*vpp)->v_lock); 89 (*vpp)->v_flag |= VDUP; 90 mutex_exit(&(*vpp)->v_lock); 91 } 92 return (0); 93 } 94 95 /* ARGSUSED */ 96 static int 97 fdclose(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr) 98 { 99 return (0); 100 } 101 102 /* ARGSUSED */ 103 static int 104 fdread(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr, caller_context_t *ct) 105 { 106 static struct fddirect dotbuf[] = { 107 { FDROOTINO, "." }, 108 { FDROOTINO, ".." } 109 }; 110 struct fddirect dirbuf; 111 int i, n; 112 int minfd, maxfd, modoff, error = 0; 113 int nentries; 114 rctl_qty_t fdno_ctl; 115 int endoff; 116 117 if (vp->v_type != VDIR) 118 return (ENOSYS); 119 120 mutex_enter(&curproc->p_lock); 121 fdno_ctl = rctl_enforced_value(rctlproc_legacy[RLIMIT_NOFILE], 122 curproc->p_rctls, curproc); 123 nentries = MIN(P_FINFO(curproc)->fi_nfiles, (int)fdno_ctl); 124 mutex_exit(&curproc->p_lock); 125 126 endoff = (nentries + 2) * FDSDSIZE; 127 128 /* 129 * Fake up ".", "..", and the /dev/fd directory entries. 130 */ 131 if (uiop->uio_loffset < (offset_t)0 || 132 uiop->uio_loffset >= (offset_t)endoff || 133 uiop->uio_resid <= 0) 134 return (0); 135 ASSERT(uiop->uio_loffset <= MAXOFF_T); 136 if (uiop->uio_offset < 2*FDSDSIZE) { 137 error = uiomove((caddr_t)dotbuf + uiop->uio_offset, 138 MIN(uiop->uio_resid, 2*FDSDSIZE - uiop->uio_offset), 139 UIO_READ, uiop); 140 if (uiop->uio_resid <= 0 || error) 141 return (error); 142 } 143 minfd = (uiop->uio_offset - 2*FDSDSIZE)/FDSDSIZE; 144 maxfd = (uiop->uio_offset + uiop->uio_resid - 1)/FDSDSIZE; 145 modoff = uiop->uio_offset % FDSDSIZE; 146 147 for (i = 0; i < FDDIRSIZE; i++) 148 dirbuf.d_name[i] = '\0'; 149 for (i = minfd; i < MIN(maxfd, nentries); i++) { 150 n = i; 151 dirbuf.d_ino = fdtoi(n); 152 numtos((ulong_t)n, dirbuf.d_name); 153 error = uiomove((caddr_t)&dirbuf + modoff, 154 MIN(uiop->uio_resid, FDSDSIZE - modoff), 155 UIO_READ, uiop); 156 if (uiop->uio_resid <= 0 || error) 157 return (error); 158 modoff = 0; 159 } 160 161 return (error); 162 } 163 164 /* ARGSUSED */ 165 static int 166 fdgetattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr) 167 { 168 vfs_t *vfsp = vp->v_vfsp; 169 timestruc_t now; 170 171 if (vp->v_type == VDIR) { 172 vap->va_nlink = 2; 173 vap->va_size = (u_offset_t) 174 ((P_FINFO(curproc)->fi_nfiles + 2) * FDSDSIZE); 175 vap->va_mode = 0555; 176 vap->va_nodeid = (ino64_t)FDROOTINO; 177 } else { 178 vap->va_nlink = 1; 179 vap->va_size = (u_offset_t)0; 180 vap->va_mode = 0666; 181 vap->va_nodeid = (ino64_t)fdtoi(getminor(vp->v_rdev)); 182 } 183 vap->va_type = vp->v_type; 184 vap->va_rdev = vp->v_rdev; 185 vap->va_blksize = vfsp->vfs_bsize; 186 vap->va_nblocks = (fsblkcnt64_t)0; 187 gethrestime(&now); 188 vap->va_atime = vap->va_mtime = vap->va_ctime = now; 189 vap->va_uid = 0; 190 vap->va_gid = 0; 191 vap->va_fsid = vfsp->vfs_dev; 192 vap->va_seq = 0; 193 return (0); 194 } 195 196 /* ARGSUSED */ 197 static int 198 fdaccess(vnode_t *vp, int mode, int flags, cred_t *cr) 199 { 200 return (0); 201 } 202 203 /* ARGSUSED */ 204 static int 205 fdlookup(vnode_t *dp, char *comp, vnode_t **vpp, pathname_t *pnp, 206 int flags, vnode_t *rdir, cred_t *cr) 207 { 208 if (comp[0] == 0 || strcmp(comp, ".") == 0 || strcmp(comp, "..") == 0) { 209 VN_HOLD(dp); 210 *vpp = dp; 211 return (0); 212 } 213 return (fdget(dp, comp, vpp)); 214 } 215 216 /* ARGSUSED */ 217 static int 218 fdcreate(vnode_t *dvp, char *comp, vattr_t *vap, enum vcexcl excl, 219 int mode, vnode_t **vpp, cred_t *cr, int flag) 220 { 221 return (fdget(dvp, comp, vpp)); 222 } 223 224 /* ARGSUSED */ 225 static int 226 fdreaddir(vnode_t *vp, uio_t *uiop, cred_t *cr, int *eofp) 227 { 228 /* bp holds one dirent structure */ 229 u_offset_t bp[DIRENT64_RECLEN(FDNSIZE) / sizeof (u_offset_t)]; 230 struct dirent64 *dirent = (struct dirent64 *)bp; 231 int reclen, nentries; 232 rctl_qty_t fdno_ctl; 233 int n; 234 int oresid; 235 off_t off; 236 237 if (uiop->uio_offset < 0 || uiop->uio_resid <= 0 || 238 (uiop->uio_offset % FDSDSIZE) != 0) 239 return (ENOENT); 240 241 ASSERT(uiop->uio_loffset <= MAXOFF_T); 242 oresid = uiop->uio_resid; 243 bzero(bp, sizeof (bp)); 244 245 mutex_enter(&curproc->p_lock); 246 fdno_ctl = rctl_enforced_value(rctlproc_legacy[RLIMIT_NOFILE], 247 curproc->p_rctls, curproc); 248 nentries = MIN(P_FINFO(curproc)->fi_nfiles, (int)fdno_ctl); 249 mutex_exit(&curproc->p_lock); 250 251 while (uiop->uio_resid > 0) { 252 if ((off = uiop->uio_offset) == 0) { /* "." */ 253 dirent->d_ino = (ino64_t)FDROOTINO; 254 dirent->d_name[0] = '.'; 255 dirent->d_name[1] = '\0'; 256 reclen = DIRENT64_RECLEN(1); 257 } else if (off == FDSDSIZE) { /* ".." */ 258 dirent->d_ino = (ino64_t)FDROOTINO; 259 dirent->d_name[0] = '.'; 260 dirent->d_name[1] = '.'; 261 dirent->d_name[2] = '\0'; 262 reclen = DIRENT64_RECLEN(2); 263 } else { 264 /* 265 * Return entries corresponding to the allowable 266 * number of file descriptors for this process. 267 */ 268 if ((n = (off-2*FDSDSIZE)/FDSDSIZE) >= nentries) 269 break; 270 dirent->d_ino = (ino64_t)fdtoi(n); 271 numtos((ulong_t)n, dirent->d_name); 272 reclen = DIRENT64_RECLEN(strlen(dirent->d_name)); 273 } 274 dirent->d_off = (offset_t)(uiop->uio_offset + FDSDSIZE); 275 dirent->d_reclen = (ushort_t)reclen; 276 277 if (reclen > uiop->uio_resid) { 278 /* 279 * Error if no entries have been returned yet. 280 */ 281 if (uiop->uio_resid == oresid) 282 return (EINVAL); 283 break; 284 } 285 /* 286 * uiomove() updates both resid and offset by the same 287 * amount. But we want offset to change in increments 288 * of FDSDSIZE, which is different from the number of bytes 289 * being returned to the user. So we set uio_offset 290 * separately, ignoring what uiomove() does. 291 */ 292 if (uiomove((caddr_t)dirent, reclen, UIO_READ, uiop)) 293 return (EFAULT); 294 uiop->uio_offset = off + FDSDSIZE; 295 } 296 if (eofp) 297 *eofp = ((uiop->uio_offset-2*FDSDSIZE)/FDSDSIZE >= nentries); 298 return (0); 299 } 300 301 /* ARGSUSED */ 302 static void 303 fdinactive(vnode_t *vp, cred_t *cr) 304 { 305 mutex_enter(&vp->v_lock); 306 ASSERT(vp->v_count >= 1); 307 if (--vp->v_count != 0) { 308 mutex_exit(&vp->v_lock); 309 return; 310 } 311 mutex_exit(&vp->v_lock); 312 vn_invalid(vp); 313 vn_free(vp); 314 } 315 316 static struct vnodeops *fd_vnodeops; 317 318 static const fs_operation_def_t fd_vnodeops_template[] = { 319 VOPNAME_OPEN, { .vop_open = fdopen }, 320 VOPNAME_CLOSE, { .vop_close = fdclose }, 321 VOPNAME_READ, { .vop_read = fdread }, 322 VOPNAME_GETATTR, { .vop_getattr = fdgetattr }, 323 VOPNAME_ACCESS, { .vop_access = fdaccess }, 324 VOPNAME_LOOKUP, { .vop_lookup = fdlookup }, 325 VOPNAME_CREATE, { .vop_create = fdcreate }, 326 VOPNAME_READDIR, { .vop_readdir = fdreaddir }, 327 VOPNAME_INACTIVE, { .vop_inactive = fdinactive }, 328 VOPNAME_FRLOCK, { .error = fs_error }, 329 VOPNAME_POLL, { .error = fs_error }, 330 VOPNAME_DISPOSE, { .error = fs_error }, 331 NULL, NULL 332 }; 333 334 static int 335 fdget(struct vnode *dvp, char *comp, struct vnode **vpp) 336 { 337 int n = 0; 338 struct vnode *vp; 339 340 while (*comp) { 341 if (*comp < '0' || *comp > '9') 342 return (ENOENT); 343 n = 10 * n + *comp++ - '0'; 344 } 345 vp = vn_alloc(KM_SLEEP); 346 vp->v_type = VCHR; 347 vp->v_vfsp = dvp->v_vfsp; 348 vn_setops(vp, fd_vnodeops); 349 vp->v_data = NULL; 350 vp->v_flag = VNOMAP; 351 vp->v_rdev = makedevice(fdrmaj, n); 352 vn_exists(vp); 353 *vpp = vp; 354 return (0); 355 } 356 357 /* 358 * fdfs is mounted on /dev/fd, however, there are two interesting 359 * possibilities - two threads racing to do the same mount (protected 360 * by vfs locking), and two threads mounting fdfs in different places. 361 */ 362 /*ARGSUSED*/ 363 static int 364 fdmount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) 365 { 366 struct vnode *vp; 367 368 if (secpolicy_fs_mount(cr, mvp, vfsp) != 0) 369 return (EPERM); 370 if (mvp->v_type != VDIR) 371 return (ENOTDIR); 372 373 mutex_enter(&mvp->v_lock); 374 if ((uap->flags & MS_OVERLAY) == 0 && 375 (mvp->v_count > 1 || (mvp->v_flag & VROOT))) { 376 mutex_exit(&mvp->v_lock); 377 return (EBUSY); 378 } 379 mutex_exit(&mvp->v_lock); 380 381 /* 382 * Having the resource be anything but "fd" doesn't make sense 383 */ 384 vfs_setresource(vfsp, "fd"); 385 386 vp = vn_alloc(KM_SLEEP); 387 vp->v_vfsp = vfsp; 388 vn_setops(vp, fd_vnodeops); 389 vp->v_type = VDIR; 390 vp->v_data = NULL; 391 vp->v_flag |= VROOT; 392 vfsp->vfs_fstype = fdfstype; 393 vfsp->vfs_data = (char *)vp; 394 mutex_enter(&fd_minor_lock); 395 do { 396 fdfsmin = (fdfsmin + 1) & L_MAXMIN32; 397 vfsp->vfs_dev = makedevice(fdfsmaj, fdfsmin); 398 } while (vfs_devismounted(vfsp->vfs_dev)); 399 mutex_exit(&fd_minor_lock); 400 vfs_make_fsid(&vfsp->vfs_fsid, vfsp->vfs_dev, fdfstype); 401 vfsp->vfs_bsize = 1024; 402 return (0); 403 } 404 405 /* ARGSUSED */ 406 static int 407 fdunmount(vfs_t *vfsp, int flag, cred_t *cr) 408 { 409 vnode_t *rvp; 410 411 if (secpolicy_fs_unmount(cr, vfsp) != 0) 412 return (EPERM); 413 414 /* 415 * forced unmount is not supported by this file system 416 * and thus, ENOTSUP, is being returned. 417 */ 418 if (flag & MS_FORCE) 419 return (ENOTSUP); 420 421 rvp = (vnode_t *)vfsp->vfs_data; 422 if (rvp->v_count > 1) 423 return (EBUSY); 424 425 VN_RELE(rvp); 426 return (0); 427 } 428 429 /* ARGSUSED */ 430 static int 431 fdroot(vfs_t *vfsp, vnode_t **vpp) 432 { 433 vnode_t *vp = (vnode_t *)vfsp->vfs_data; 434 435 VN_HOLD(vp); 436 *vpp = vp; 437 return (0); 438 } 439 440 /* 441 * No locking required because I held the root vnode before calling this 442 * function so the vfs won't disappear on me. To be more explicit: 443 * fdvrootp->v_count will be greater than 1 so fdunmount will just return. 444 */ 445 static int 446 fdstatvfs(struct vfs *vfsp, struct statvfs64 *sp) 447 { 448 dev32_t d32; 449 rctl_qty_t fdno_ctl; 450 451 mutex_enter(&curproc->p_lock); 452 fdno_ctl = rctl_enforced_value(rctlproc_legacy[RLIMIT_NOFILE], 453 curproc->p_rctls, curproc); 454 mutex_exit(&curproc->p_lock); 455 456 bzero(sp, sizeof (*sp)); 457 sp->f_bsize = 1024; 458 sp->f_frsize = 1024; 459 sp->f_blocks = (fsblkcnt64_t)0; 460 sp->f_bfree = (fsblkcnt64_t)0; 461 sp->f_bavail = (fsblkcnt64_t)0; 462 sp->f_files = (fsfilcnt64_t) 463 (MIN(P_FINFO(curproc)->fi_nfiles, fdno_ctl + 2)); 464 sp->f_ffree = (fsfilcnt64_t)0; 465 sp->f_favail = (fsfilcnt64_t)0; 466 (void) cmpldev(&d32, vfsp->vfs_dev); 467 sp->f_fsid = d32; 468 (void) strcpy(sp->f_basetype, vfssw[fdfstype].vsw_name); 469 sp->f_flag = vf_to_stf(vfsp->vfs_flag); 470 sp->f_namemax = FDNSIZE; 471 (void) strcpy(sp->f_fstr, "/dev/fd"); 472 (void) strcpy(&sp->f_fstr[8], "/dev/fd"); 473 return (0); 474 } 475 476 int 477 fdinit(int fstype, char *name) 478 { 479 static const fs_operation_def_t fd_vfsops_template[] = { 480 VFSNAME_MOUNT, { .vfs_mount = fdmount }, 481 VFSNAME_UNMOUNT, { .vfs_unmount = fdunmount }, 482 VFSNAME_ROOT, { .vfs_root = fdroot }, 483 VFSNAME_STATVFS, { .vfs_statvfs = fdstatvfs }, 484 NULL, NULL 485 }; 486 int error; 487 488 fdfstype = fstype; 489 ASSERT(fdfstype != 0); 490 491 /* 492 * Associate VFS ops vector with this fstype. 493 */ 494 error = vfs_setfsops(fstype, fd_vfsops_template, NULL); 495 if (error != 0) { 496 cmn_err(CE_WARN, "fdinit: bad vnode ops template"); 497 return (error); 498 } 499 500 error = vn_make_ops(name, fd_vnodeops_template, &fd_vnodeops); 501 if (error != 0) { 502 (void) vfs_freevfsops_by_type(fstype); 503 cmn_err(CE_WARN, "fdinit: bad vnode ops template"); 504 return (error); 505 } 506 507 /* 508 * Assign unique "device" numbers (reported by stat(2)). 509 */ 510 fdfsmaj = getudev(); 511 fdrmaj = getudev(); 512 if (fdfsmaj == (major_t)-1 || fdrmaj == (major_t)-1) { 513 cmn_err(CE_WARN, "fdinit: can't get unique device numbers"); 514 if (fdfsmaj == (major_t)-1) 515 fdfsmaj = 0; 516 if (fdrmaj == (major_t)-1) 517 fdrmaj = 0; 518 } 519 mutex_init(&fd_minor_lock, NULL, MUTEX_DEFAULT, NULL); 520 return (0); 521 } 522 523 /* 524 * FDFS Mount options table 525 */ 526 static char *rw_cancel[] = { MNTOPT_RO, NULL }; 527 528 static mntopt_t mntopts[] = { 529 /* 530 * option name cancel option default arg flags 531 */ 532 { MNTOPT_RW, rw_cancel, NULL, MO_DEFAULT, 533 (void *)MNTOPT_NOINTR }, 534 { MNTOPT_IGNORE, NULL, NULL, 0, 535 (void *)0 }, 536 }; 537 538 static mntopts_t fdfs_mntopts = { 539 sizeof (mntopts) / sizeof (mntopt_t), 540 mntopts 541 }; 542 543 static vfsdef_t vfw = { 544 VFSDEF_VERSION, 545 "fd", 546 fdinit, 547 VSW_HASPROTO, 548 &fdfs_mntopts 549 }; 550 551 static struct modlfs modlfs = { 552 &mod_fsops, 553 "filesystem for fd", 554 &vfw 555 }; 556 557 static struct modlinkage modlinkage = { 558 MODREV_1, 559 &modlfs, 560 NULL 561 }; 562 563 int 564 _init(void) 565 { 566 return (mod_install(&modlinkage)); 567 } 568 569 int 570 _info(struct modinfo *modinfop) 571 { 572 return (mod_info(&modlinkage, modinfop)); 573 } 574