1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 #include <sys/types.h>
26 #include <sys/t_lock.h>
27 #include <sys/param.h>
28 #include <sys/time.h>
29 #include <sys/systm.h>
30 #include <sys/sysmacros.h>
31 #include <sys/resource.h>
32 #include <sys/signal.h>
33 #include <sys/cred.h>
34 #include <sys/user.h>
35 #include <sys/buf.h>
36 #include <sys/vfs.h>
37 #include <sys/vnode.h>
38 #include <sys/proc.h>
39 #include <sys/disp.h>
40 #include <sys/file.h>
41 #include <sys/fcntl.h>
42 #include <sys/flock.h>
43 #include <sys/atomic.h>
44 #include <sys/kmem.h>
45 #include <sys/uio.h>
46 #include <sys/conf.h>
47 #include <sys/mman.h>
48 #include <sys/pathname.h>
49 #include <sys/debug.h>
50 #include <sys/vmsystm.h>
51 #include <sys/cmn_err.h>
52 #include <sys/filio.h>
53 #include <sys/dnlc.h>
54
55 #include <sys/fs/ufs_filio.h>
56 #include <sys/fs/ufs_lockfs.h>
57 #include <sys/fs/ufs_fs.h>
58 #include <sys/fs/ufs_inode.h>
59 #include <sys/fs/ufs_fsdir.h>
60 #include <sys/fs/ufs_quota.h>
61 #include <sys/fs/ufs_trans.h>
62 #include <sys/fs/ufs_log.h>
63 #include <sys/dirent.h> /* must be AFTER <sys/fs/fsdir.h>! */
64 #include <sys/errno.h>
65 #include <sys/sysinfo.h>
66
67 #include <vm/hat.h>
68 #include <vm/page.h>
69 #include <vm/pvn.h>
70 #include <vm/as.h>
71 #include <vm/seg.h>
72 #include <vm/seg_map.h>
73 #include <vm/seg_vn.h>
74 #include <vm/rm.h>
75 #include <sys/swap.h>
76 #include <sys/model.h>
77 #include <sys/policy.h>
78
79 #include "fs/fs_subr.h"
80
81 /*
82 * ufs_fioio is the ufs equivalent of NFS_CNVT and is tailored to
83 * metamucil's needs. It may change at any time.
84 */
85 /* ARGSUSED */
86 int
ufs_fioio(struct vnode * vp,struct fioio * fiou,int flag,struct cred * cr)87 ufs_fioio(
88 struct vnode *vp, /* any file on the fs */
89 struct fioio *fiou, /* fioio struct in userland */
90 int flag, /* flag from VOP_IOCTL() */
91 struct cred *cr) /* credentials from ufs_ioctl */
92 {
93 int error = 0;
94 struct vnode *vpio = NULL; /* vnode for inode open */
95 struct inode *ipio = NULL; /* inode for inode open */
96 struct file *fpio = NULL; /* file for inode open */
97 struct inode *ip; /* inode for file system */
98 struct fs *fs; /* fs for file system */
99 STRUCT_DECL(fioio, fio); /* copy of user's fioio struct */
100
101 /*
102 * must be privileged
103 */
104 if (secpolicy_fs_config(cr, vp->v_vfsp) != 0)
105 return (EPERM);
106
107 STRUCT_INIT(fio, flag & DATAMODEL_MASK);
108
109 /*
110 * get user's copy of fioio struct
111 */
112 if (copyin(fiou, STRUCT_BUF(fio), STRUCT_SIZE(fio)))
113 return (EFAULT);
114
115 ip = VTOI(vp);
116 fs = ip->i_fs;
117
118 /*
119 * check the inode number against the fs's inode number bounds
120 */
121 if (STRUCT_FGET(fio, fio_ino) < UFSROOTINO)
122 return (ESRCH);
123 if (STRUCT_FGET(fio, fio_ino) >= fs->fs_ncg * fs->fs_ipg)
124 return (ESRCH);
125
126 rw_enter(&ip->i_ufsvfs->vfs_dqrwlock, RW_READER);
127
128 /*
129 * get the inode
130 */
131 error = ufs_iget(ip->i_vfs, STRUCT_FGET(fio, fio_ino), &ipio, cr);
132
133 rw_exit(&ip->i_ufsvfs->vfs_dqrwlock);
134
135 if (error)
136 return (error);
137
138 /*
139 * check the generation number
140 */
141 rw_enter(&ipio->i_contents, RW_READER);
142 if (ipio->i_gen != STRUCT_FGET(fio, fio_gen)) {
143 error = ESTALE;
144 rw_exit(&ipio->i_contents);
145 goto errout;
146 }
147
148 /*
149 * check if the inode is free
150 */
151 if (ipio->i_mode == 0) {
152 error = ENOENT;
153 rw_exit(&ipio->i_contents);
154 goto errout;
155 }
156 rw_exit(&ipio->i_contents);
157
158 /*
159 * Adapted from copen: get a file struct
160 * Large Files: We open this file descriptor with FOFFMAX flag
161 * set so that it will be like a large file open.
162 */
163 if (falloc(NULL, (FREAD|FOFFMAX), &fpio, STRUCT_FADDR(fio, fio_fd)))
164 goto errout;
165
166 /*
167 * Adapted from vn_open: check access and then open the file
168 */
169 vpio = ITOV(ipio);
170 if (error = VOP_ACCESS(vpio, VREAD, 0, cr, NULL))
171 goto errout;
172
173 if (error = VOP_OPEN(&vpio, FREAD, cr, NULL))
174 goto errout;
175
176 /*
177 * Adapted from copen: initialize the file struct
178 */
179 fpio->f_vnode = vpio;
180
181 /*
182 * return the fd
183 */
184 if (copyout(STRUCT_BUF(fio), fiou, STRUCT_SIZE(fio))) {
185 error = EFAULT;
186 goto errout;
187 }
188 setf(STRUCT_FGET(fio, fio_fd), fpio);
189 mutex_exit(&fpio->f_tlock);
190 return (0);
191 errout:
192 /*
193 * free the file struct and fd
194 */
195 if (fpio) {
196 setf(STRUCT_FGET(fio, fio_fd), NULL);
197 unfalloc(fpio);
198 }
199
200 /*
201 * release the hold on the inode
202 */
203 if (ipio)
204 VN_RELE(ITOV(ipio));
205 return (error);
206 }
207
208 /*
209 * ufs_fiosatime
210 * set access time w/o altering change time. This ioctl is tailored
211 * to metamucil's needs and may change at any time.
212 */
213 int
ufs_fiosatime(struct vnode * vp,struct timeval * tvu,int flag,struct cred * cr)214 ufs_fiosatime(
215 struct vnode *vp, /* file's vnode */
216 struct timeval *tvu, /* struct timeval in userland */
217 int flag, /* flag from VOP_IOCTL() */
218 struct cred *cr) /* credentials from ufs_ioctl */
219 {
220 struct inode *ip; /* inode for vp */
221 struct timeval32 tv; /* copy of user's timeval */
222 int now = 0;
223
224 /*
225 * must have sufficient privileges
226 */
227 if (secpolicy_fs_config(cr, vp->v_vfsp) != 0)
228 return (EPERM);
229
230 /*
231 * get user's copy of timeval struct and check values
232 * if input is NULL, will set time to now
233 */
234 if (tvu == NULL) {
235 now = 1;
236 } else {
237 if ((flag & DATAMODEL_MASK) == DATAMODEL_ILP32) {
238 if (copyin(tvu, &tv, sizeof (tv)))
239 return (EFAULT);
240 } else {
241 struct timeval tv64;
242
243 if (copyin(tvu, &tv64, sizeof (tv64)))
244 return (EFAULT);
245 if (TIMEVAL_OVERFLOW(&tv64))
246 return (EOVERFLOW);
247 TIMEVAL_TO_TIMEVAL32(&tv, &tv64);
248 }
249
250 if (tv.tv_usec < 0 || tv.tv_usec >= 1000000)
251 return (EINVAL);
252 }
253
254 /*
255 * update access time
256 */
257 ip = VTOI(vp);
258 rw_enter(&ip->i_contents, RW_WRITER);
259 ITIMES_NOLOCK(ip);
260 if (now) {
261 mutex_enter(&ufs_iuniqtime_lock);
262 ip->i_atime = iuniqtime;
263 mutex_exit(&ufs_iuniqtime_lock);
264 } else {
265 ip->i_atime = tv;
266 }
267 ip->i_flag |= IMODACC;
268 rw_exit(&ip->i_contents);
269
270 return (0);
271 }
272
273 /*
274 * ufs_fiogdio
275 * Get delayed-io state. This ioctl is tailored
276 * to metamucil's needs and may change at any time.
277 */
278 /* ARGSUSED */
279 int
ufs_fiogdio(struct vnode * vp,uint_t * diop,int flag,struct cred * cr)280 ufs_fiogdio(
281 struct vnode *vp, /* file's vnode */
282 uint_t *diop, /* dio state returned here */
283 int flag, /* flag from ufs_ioctl */
284 struct cred *cr) /* credentials from ufs_ioctl */
285 {
286 struct ufsvfs *ufsvfsp = VTOI(vp)->i_ufsvfs;
287
288 /*
289 * forcibly unmounted
290 */
291 if (ufsvfsp == NULL)
292 return (EIO);
293
294 if (suword32(diop, ufsvfsp->vfs_dio))
295 return (EFAULT);
296 return (0);
297 }
298
299 /*
300 * ufs_fiosdio
301 * Set delayed-io state. This ioctl is tailored
302 * to metamucil's needs and may change at any time.
303 */
304 int
ufs_fiosdio(struct vnode * vp,uint_t * diop,int flag,struct cred * cr)305 ufs_fiosdio(
306 struct vnode *vp, /* file's vnode */
307 uint_t *diop, /* dio flag */
308 int flag, /* flag from ufs_ioctl */
309 struct cred *cr) /* credentials from ufs_ioctl */
310 {
311 uint_t dio; /* copy of user's dio */
312 struct inode *ip; /* inode for vp */
313 struct ufsvfs *ufsvfsp;
314 struct fs *fs;
315 struct ulockfs *ulp;
316 int error = 0;
317
318 #ifdef lint
319 flag = flag;
320 #endif
321
322 /* check input conditions */
323 if (secpolicy_fs_config(cr, vp->v_vfsp) != 0)
324 return (EPERM);
325
326 if (copyin(diop, &dio, sizeof (dio)))
327 return (EFAULT);
328
329 if (dio > 1)
330 return (EINVAL);
331
332 /* file system has been forcibly unmounted */
333 if (VTOI(vp)->i_ufsvfs == NULL)
334 return (EIO);
335
336 ip = VTOI(vp);
337 ufsvfsp = ip->i_ufsvfs;
338 ulp = &ufsvfsp->vfs_ulockfs;
339
340 /* logging file system; dio ignored */
341 if (TRANS_ISTRANS(ufsvfsp))
342 return (error);
343
344 /* hold the mutex to prevent race with a lockfs request */
345 vfs_lock_wait(vp->v_vfsp);
346 mutex_enter(&ulp->ul_lock);
347 atomic_inc_ulong(&ufs_quiesce_pend);
348
349 if (ULOCKFS_IS_HLOCK(ulp)) {
350 error = EIO;
351 goto out;
352 }
353
354 if (ULOCKFS_IS_ELOCK(ulp)) {
355 error = EBUSY;
356 goto out;
357 }
358 /* wait for outstanding accesses to finish */
359 if (error = ufs_quiesce(ulp))
360 goto out;
361
362 /* flush w/invalidate */
363 if (error = ufs_flush(vp->v_vfsp))
364 goto out;
365
366 /*
367 * update dio
368 */
369 mutex_enter(&ufsvfsp->vfs_lock);
370 ufsvfsp->vfs_dio = dio;
371
372 /*
373 * enable/disable clean flag processing
374 */
375 fs = ip->i_fs;
376 if (fs->fs_ronly == 0 &&
377 fs->fs_clean != FSBAD &&
378 fs->fs_clean != FSLOG) {
379 if (dio)
380 fs->fs_clean = FSSUSPEND;
381 else
382 fs->fs_clean = FSACTIVE;
383 ufs_sbwrite(ufsvfsp);
384 mutex_exit(&ufsvfsp->vfs_lock);
385 } else
386 mutex_exit(&ufsvfsp->vfs_lock);
387 out:
388 /*
389 * we need this broadcast because of the ufs_quiesce call above
390 */
391 atomic_dec_ulong(&ufs_quiesce_pend);
392 cv_broadcast(&ulp->ul_cv);
393 mutex_exit(&ulp->ul_lock);
394 vfs_unlock(vp->v_vfsp);
395 return (error);
396 }
397
398 /*
399 * ufs_fioffs - ioctl handler for flushing file system
400 */
401 /* ARGSUSED */
402 int
ufs_fioffs(struct vnode * vp,char * vap,struct cred * cr)403 ufs_fioffs(
404 struct vnode *vp,
405 char *vap, /* must be NULL - reserved */
406 struct cred *cr) /* credentials from ufs_ioctl */
407 {
408 int error;
409 struct ufsvfs *ufsvfsp;
410 struct ulockfs *ulp;
411
412 /* file system has been forcibly unmounted */
413 ufsvfsp = VTOI(vp)->i_ufsvfs;
414 if (ufsvfsp == NULL)
415 return (EIO);
416
417 ulp = &ufsvfsp->vfs_ulockfs;
418
419 /*
420 * suspend the delete thread
421 * this must be done outside the lockfs locking protocol
422 */
423 vfs_lock_wait(vp->v_vfsp);
424 ufs_thread_suspend(&ufsvfsp->vfs_delete);
425
426 /* hold the mutex to prevent race with a lockfs request */
427 mutex_enter(&ulp->ul_lock);
428 atomic_inc_ulong(&ufs_quiesce_pend);
429
430 if (ULOCKFS_IS_HLOCK(ulp)) {
431 error = EIO;
432 goto out;
433 }
434 if (ULOCKFS_IS_ELOCK(ulp)) {
435 error = EBUSY;
436 goto out;
437 }
438 /* wait for outstanding accesses to finish */
439 if (error = ufs_quiesce(ulp))
440 goto out;
441
442 /*
443 * If logging, and the logmap was marked as not rollable,
444 * make it rollable now, and start the trans_roll thread and
445 * the reclaim thread. The log at this point is safe to write to.
446 */
447 if (ufsvfsp->vfs_log) {
448 ml_unit_t *ul = ufsvfsp->vfs_log;
449 struct fs *fsp = ufsvfsp->vfs_fs;
450 int err;
451
452 if (ul->un_flags & LDL_NOROLL) {
453 ul->un_flags &= ~LDL_NOROLL;
454 logmap_start_roll(ul);
455 if (!fsp->fs_ronly && (fsp->fs_reclaim &
456 (FS_RECLAIM|FS_RECLAIMING))) {
457 fsp->fs_reclaim &= ~FS_RECLAIM;
458 fsp->fs_reclaim |= FS_RECLAIMING;
459 ufs_thread_start(&ufsvfsp->vfs_reclaim,
460 ufs_thread_reclaim, vp->v_vfsp);
461 if (!fsp->fs_ronly) {
462 TRANS_SBWRITE(ufsvfsp,
463 TOP_SBUPDATE_UPDATE);
464 if (err =
465 geterror(ufsvfsp->vfs_bufp)) {
466 refstr_t *mntpt;
467 mntpt = vfs_getmntpoint(
468 vp->v_vfsp);
469 cmn_err(CE_NOTE,
470 "Filesystem Flush "
471 "Failed to update "
472 "Reclaim Status for "
473 " %s, Write failed to "
474 "update superblock, "
475 "error %d",
476 refstr_value(mntpt),
477 err);
478 refstr_rele(mntpt);
479 }
480 }
481 }
482 }
483 }
484
485 /* synchronously flush dirty data and metadata */
486 error = ufs_flush(vp->v_vfsp);
487
488 out:
489 atomic_dec_ulong(&ufs_quiesce_pend);
490 cv_broadcast(&ulp->ul_cv);
491 mutex_exit(&ulp->ul_lock);
492 vfs_unlock(vp->v_vfsp);
493
494 /*
495 * allow the delete thread to continue
496 */
497 ufs_thread_continue(&ufsvfsp->vfs_delete);
498 return (error);
499 }
500
501 /*
502 * ufs_fioisbusy
503 * Get number of references on this vnode.
504 * Contract-private interface for Legato's NetWorker product.
505 */
506 /* ARGSUSED */
507 int
ufs_fioisbusy(struct vnode * vp,int * isbusy,struct cred * cr)508 ufs_fioisbusy(struct vnode *vp, int *isbusy, struct cred *cr)
509 {
510 int is_it_busy;
511
512 /*
513 * The caller holds one reference, there may be one in the dnlc
514 * so we need to flush it.
515 */
516 if (vp->v_count > 1)
517 dnlc_purge_vp(vp);
518 /*
519 * Since we've just flushed the dnlc and we hold a reference
520 * to this vnode, then anything but 1 means busy (this had
521 * BETTER not be zero!). Also, it's possible for someone to
522 * have this file mmap'ed with no additional reference count.
523 */
524 ASSERT(vp->v_count > 0);
525 if ((vp->v_count == 1) && (VTOI(vp)->i_mapcnt == 0))
526 is_it_busy = 0;
527 else
528 is_it_busy = 1;
529
530 if (suword32(isbusy, is_it_busy))
531 return (EFAULT);
532 return (0);
533 }
534
535 /* ARGSUSED */
536 int
ufs_fiodirectio(struct vnode * vp,int cmd,struct cred * cr)537 ufs_fiodirectio(struct vnode *vp, int cmd, struct cred *cr)
538 {
539 int error = 0;
540 struct inode *ip = VTOI(vp);
541
542 /*
543 * Acquire reader lock and set/reset direct mode
544 */
545 rw_enter(&ip->i_contents, RW_READER);
546 mutex_enter(&ip->i_tlock);
547 if (cmd == DIRECTIO_ON)
548 ip->i_flag |= IDIRECTIO; /* enable direct mode */
549 else if (cmd == DIRECTIO_OFF)
550 ip->i_flag &= ~IDIRECTIO; /* disable direct mode */
551 else
552 error = EINVAL;
553 mutex_exit(&ip->i_tlock);
554 rw_exit(&ip->i_contents);
555 return (error);
556 }
557
558 /*
559 * ufs_fiotune
560 * Allow some tunables to be set on a mounted fs
561 */
562 int
ufs_fiotune(struct vnode * vp,struct fiotune * uftp,struct cred * cr)563 ufs_fiotune(struct vnode *vp, struct fiotune *uftp, struct cred *cr)
564 {
565 struct fiotune ftp;
566 struct fs *fs;
567 struct ufsvfs *ufsvfsp;
568
569 /*
570 * must have sufficient privileges
571 */
572 if (secpolicy_fs_config(cr, vp->v_vfsp) != 0)
573 return (EPERM);
574
575 /*
576 * get user's copy
577 */
578 if (copyin(uftp, &ftp, sizeof (ftp)))
579 return (EFAULT);
580
581 /*
582 * some minimal sanity checks
583 */
584 if ((ftp.maxcontig <= 0) ||
585 (ftp.rotdelay != 0) ||
586 (ftp.maxbpg <= 0) ||
587 (ftp.minfree < 0) ||
588 (ftp.minfree > 99) ||
589 ((ftp.optim != FS_OPTTIME) && (ftp.optim != FS_OPTSPACE)))
590 return (EINVAL);
591
592 /*
593 * update superblock but don't write it! If it gets out, fine.
594 */
595 fs = VTOI(vp)->i_fs;
596
597 fs->fs_maxcontig = ftp.maxcontig;
598 fs->fs_rotdelay = ftp.rotdelay;
599 fs->fs_maxbpg = ftp.maxbpg;
600 fs->fs_minfree = ftp.minfree;
601 fs->fs_optim = ftp.optim;
602
603 /*
604 * Adjust cluster based on the new maxcontig. The cluster size
605 * can be any positive value. The check for this is done above.
606 */
607 ufsvfsp = VTOI(vp)->i_ufsvfs;
608 ufsvfsp->vfs_ioclustsz = fs->fs_bsize * fs->fs_maxcontig;
609
610 /*
611 * Adjust minfrags from minfree
612 */
613 ufsvfsp->vfs_minfrags = (int)((int64_t)fs->fs_dsize *
614 fs->fs_minfree / 100);
615
616 /*
617 * Write the superblock
618 */
619 if (fs->fs_ronly == 0) {
620 TRANS_BEGIN_ASYNC(ufsvfsp, TOP_SBUPDATE_UPDATE,
621 TOP_SBWRITE_SIZE);
622 TRANS_SBWRITE(ufsvfsp, TOP_SBUPDATE_UPDATE);
623 TRANS_END_ASYNC(ufsvfsp, TOP_SBUPDATE_UPDATE, TOP_SBWRITE_SIZE);
624 }
625
626 return (0);
627 }
628
629 /*
630 * Lseek support for finding holes (cmd == _FIO_SEEK_HOLE) and
631 * data (cmd == _FIO_SEEK_DATA). "off" is an in/out parameter.
632 */
633 int
ufs_fio_holey(vnode_t * vp,int cmd,offset_t * off)634 ufs_fio_holey(vnode_t *vp, int cmd, offset_t *off)
635 {
636 inode_t *ip = VTOI(vp);
637 u_offset_t noff = (u_offset_t)*off; /* new offset */
638 u_offset_t isz;
639 int error;
640 boolean_t hole;
641
642 rw_enter(&ip->i_contents, RW_READER);
643 isz = ip->i_size;
644 if (noff >= isz) {
645 rw_exit(&ip->i_contents);
646 return (ENXIO);
647 }
648
649 /*
650 * Check for the usual case where a file has no holes.
651 * If so we can optimise to set the end of the file as the first
652 * (virtual) hole. This avoids bmap_find() searching through
653 * every block in the file for a (non-existent) hole.
654 */
655 if (!bmap_has_holes(ip)) {
656 rw_exit(&ip->i_contents);
657 if (cmd == _FIO_SEEK_HOLE) {
658 *off = isz;
659 return (0);
660 }
661 /* *off must already point to valid data (non hole) */
662 return (0);
663 }
664
665 /*
666 * Calling bmap_read() one block at a time on a 1TB file takes forever,
667 * so we use a special function to search for holes or blocks.
668 */
669 if (cmd == _FIO_SEEK_HOLE)
670 hole = B_TRUE;
671 else
672 hole = B_FALSE;
673 error = bmap_find(ip, hole, &noff);
674 rw_exit(&ip->i_contents);
675
676 /* end of file? */
677 if (error == ENXIO) {
678 /*
679 * Handle the virtual hole at the end of file.
680 */
681 if (cmd == _FIO_SEEK_HOLE) {
682 *off = isz;
683 return (0);
684 }
685 return (ENXIO);
686 }
687 if (noff < *off)
688 return (error);
689 *off = noff;
690 return (error);
691 }
692
693 int
ufs_mark_compressed(struct vnode * vp)694 ufs_mark_compressed(struct vnode *vp)
695 {
696 struct inode *ip = VTOI(vp);
697 struct ufsvfs *ufsvfsp = ip->i_ufsvfs;
698
699 if (vp->v_type != VREG)
700 return (EINVAL);
701
702 rw_enter(&ip->i_contents, RW_WRITER);
703 ip->i_cflags |= ICOMPRESS;
704 TRANS_INODE(ufsvfsp, ip);
705 ip->i_flag |= (ICHG|ISEQ);
706 ip->i_seq++;
707 if (!TRANS_ISTRANS(ufsvfsp))
708 ufs_iupdat(ip, I_ASYNC);
709 rw_exit(&ip->i_contents);
710
711 return (0);
712 }
713