xref: /titanic_44/usr/src/uts/common/fs/ufs/ufs_dir.c (revision 9b5097ee22b7d249db813b466eda136ffc2c21fa)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
55b024a5bSbatschul  * Common Development and Distribution License (the "License").
65b024a5bSbatschul  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
22*9b5097eeSOwen Roberts  * Copyright (c) 1984, 2010, Oracle and/or its affiliates. All rights reserved.
237c478bd9Sstevel@tonic-gate  */
247c478bd9Sstevel@tonic-gate 
257c478bd9Sstevel@tonic-gate /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
267c478bd9Sstevel@tonic-gate /*	  All Rights Reserved  	*/
277c478bd9Sstevel@tonic-gate 
287c478bd9Sstevel@tonic-gate /*
297c478bd9Sstevel@tonic-gate  * University Copyright- Copyright (c) 1982, 1986, 1988
307c478bd9Sstevel@tonic-gate  * The Regents of the University of California
317c478bd9Sstevel@tonic-gate  * All Rights Reserved
327c478bd9Sstevel@tonic-gate  *
337c478bd9Sstevel@tonic-gate  * University Acknowledgment- Portions of this document are derived from
347c478bd9Sstevel@tonic-gate  * software developed by the University of California, Berkeley, and its
357c478bd9Sstevel@tonic-gate  * contributors.
367c478bd9Sstevel@tonic-gate  */
377c478bd9Sstevel@tonic-gate 
387c478bd9Sstevel@tonic-gate /*
397c478bd9Sstevel@tonic-gate  * Directory manipulation routines.
407c478bd9Sstevel@tonic-gate  *
417c478bd9Sstevel@tonic-gate  * When manipulating directories, the i_rwlock provides serialization
427c478bd9Sstevel@tonic-gate  * since directories cannot be mmapped. The i_contents lock is redundant.
437c478bd9Sstevel@tonic-gate  */
447c478bd9Sstevel@tonic-gate 
457c478bd9Sstevel@tonic-gate #include <sys/types.h>
467c478bd9Sstevel@tonic-gate #include <sys/t_lock.h>
477c478bd9Sstevel@tonic-gate #include <sys/param.h>
487c478bd9Sstevel@tonic-gate #include <sys/systm.h>
497c478bd9Sstevel@tonic-gate #include <sys/signal.h>
507c478bd9Sstevel@tonic-gate #include <sys/cred.h>
517c478bd9Sstevel@tonic-gate #include <sys/proc.h>
527c478bd9Sstevel@tonic-gate #include <sys/disp.h>
537c478bd9Sstevel@tonic-gate #include <sys/user.h>
547c478bd9Sstevel@tonic-gate #include <sys/vfs.h>
557c478bd9Sstevel@tonic-gate #include <sys/vnode.h>
567c478bd9Sstevel@tonic-gate #include <sys/stat.h>
577c478bd9Sstevel@tonic-gate #include <sys/mode.h>
587c478bd9Sstevel@tonic-gate #include <sys/buf.h>
597c478bd9Sstevel@tonic-gate #include <sys/uio.h>
607c478bd9Sstevel@tonic-gate #include <sys/dnlc.h>
617c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_inode.h>
627c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_fs.h>
637c478bd9Sstevel@tonic-gate #include <sys/mount.h>
647c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_fsdir.h>
657c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_trans.h>
667c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_panic.h>
677c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_quota.h>
687c478bd9Sstevel@tonic-gate #include <sys/errno.h>
697c478bd9Sstevel@tonic-gate #include <sys/debug.h>
707c478bd9Sstevel@tonic-gate #include <vm/seg.h>
717c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
727c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
737c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
747c478bd9Sstevel@tonic-gate #include <sys/unistd.h>
757c478bd9Sstevel@tonic-gate #include <sys/policy.h>
767c478bd9Sstevel@tonic-gate 
777c478bd9Sstevel@tonic-gate /*
787c478bd9Sstevel@tonic-gate  * This is required since we're using P2ROUNDUP_TYPED on DIRBLKSIZ
797c478bd9Sstevel@tonic-gate  */
807c478bd9Sstevel@tonic-gate #if !ISP2(DIRBLKSIZ)
817c478bd9Sstevel@tonic-gate #error	"DIRBLKSIZ not a power of 2"
827c478bd9Sstevel@tonic-gate #endif
837c478bd9Sstevel@tonic-gate 
847c478bd9Sstevel@tonic-gate /*
857c478bd9Sstevel@tonic-gate  * A virgin directory.
867c478bd9Sstevel@tonic-gate  */
877c478bd9Sstevel@tonic-gate static struct dirtemplate mastertemplate = {
887c478bd9Sstevel@tonic-gate 	0, 12, 1, ".",
897c478bd9Sstevel@tonic-gate 	0, DIRBLKSIZ - 12, 2, ".."
907c478bd9Sstevel@tonic-gate };
917c478bd9Sstevel@tonic-gate 
927c478bd9Sstevel@tonic-gate #define	LDIRSIZ(len) \
937c478bd9Sstevel@tonic-gate 	((sizeof (struct direct) - (MAXNAMLEN + 1)) + ((len + 1 + 3) &~ 3))
947c478bd9Sstevel@tonic-gate #define	MAX_DIR_NAME_LEN(len) \
957c478bd9Sstevel@tonic-gate 	(((len) - (sizeof (struct direct) - (MAXNAMLEN + 1))) - 1)
967c478bd9Sstevel@tonic-gate 
977c478bd9Sstevel@tonic-gate /*
987c478bd9Sstevel@tonic-gate  * The dnlc directory cache allows a 64 bit handle for directory entries.
997c478bd9Sstevel@tonic-gate  * For ufs we squeeze both the 32 bit inumber and a 32 bit disk offset
1007c478bd9Sstevel@tonic-gate  * into the handle. Note, a 32 bit offset allows a 4GB directory, which
1017c478bd9Sstevel@tonic-gate  * is way beyond what could be cached in memory by the directory
1027c478bd9Sstevel@tonic-gate  * caching routines. So we are quite safe with this limit.
1037c478bd9Sstevel@tonic-gate  * The macros below pack and unpack the handle.
1047c478bd9Sstevel@tonic-gate  */
1057c478bd9Sstevel@tonic-gate #define	H_TO_INO(h) (uint32_t)((h) & UINT_MAX)
1067c478bd9Sstevel@tonic-gate #define	H_TO_OFF(h) (off_t)((h) >> 32)
1077c478bd9Sstevel@tonic-gate #define	INO_OFF_TO_H(ino, off) (uint64_t)(((uint64_t)(off) << 32) | (ino))
1087c478bd9Sstevel@tonic-gate 
1097c478bd9Sstevel@tonic-gate /*
1107c478bd9Sstevel@tonic-gate  * The average size of a typical on disk directory entry is about 16 bytes
1117c478bd9Sstevel@tonic-gate  * and so defines AV_DIRECT_SHIFT : log2(16)
1127c478bd9Sstevel@tonic-gate  * This define is only used to approximate the number of entries
1137c478bd9Sstevel@tonic-gate  * is a directory. This is needed for dnlc_dir_start() which will immediately
1147c478bd9Sstevel@tonic-gate  * return an error if the value is not within its acceptable range of
1157c478bd9Sstevel@tonic-gate  * number of files in a directory.
1167c478bd9Sstevel@tonic-gate  */
1177c478bd9Sstevel@tonic-gate #define	AV_DIRECT_SHIFT 4
1187c478bd9Sstevel@tonic-gate /*
1197c478bd9Sstevel@tonic-gate  * If the directory size (from i_size) is greater than the ufs_min_dir_cache
1207c478bd9Sstevel@tonic-gate  * tunable then we request dnlc directory caching.
1217c478bd9Sstevel@tonic-gate  * This has found to be profitable after 1024 file names.
1227c478bd9Sstevel@tonic-gate  */
1237c478bd9Sstevel@tonic-gate int ufs_min_dir_cache = 1024 << AV_DIRECT_SHIFT;
1247c478bd9Sstevel@tonic-gate 
1257f63b8c3Svsakar /* The time point the dnlc directory caching was disabled */
1267f63b8c3Svsakar static hrtime_t ufs_dc_disable_at;
1277f63b8c3Svsakar /* directory caching disable duration */
1287f63b8c3Svsakar static hrtime_t ufs_dc_disable_duration = (hrtime_t)NANOSEC * 5;
1297f63b8c3Svsakar 
1307c478bd9Sstevel@tonic-gate #ifdef DEBUG
1317c478bd9Sstevel@tonic-gate int dirchk = 1;
1327c478bd9Sstevel@tonic-gate #else /* !DEBUG */
1337c478bd9Sstevel@tonic-gate int dirchk = 0;
1347c478bd9Sstevel@tonic-gate #endif /* DEBUG */
1357c478bd9Sstevel@tonic-gate int ufs_negative_cache = 1;
1367c478bd9Sstevel@tonic-gate uint64_t ufs_dirremove_retry_cnt;
1377c478bd9Sstevel@tonic-gate 
1387c478bd9Sstevel@tonic-gate static void dirbad();
1397c478bd9Sstevel@tonic-gate static int ufs_dirrename();
1407c478bd9Sstevel@tonic-gate static int ufs_diraddentry();
1417c478bd9Sstevel@tonic-gate static int ufs_dirempty();
1427c478bd9Sstevel@tonic-gate static int ufs_dirscan();
1437c478bd9Sstevel@tonic-gate static int ufs_dirclrdotdot();
1447c478bd9Sstevel@tonic-gate static int ufs_dirfixdotdot();
1457c478bd9Sstevel@tonic-gate static int ufs_dirpurgedotdot();
1467c478bd9Sstevel@tonic-gate static int dirprepareentry();
1477c478bd9Sstevel@tonic-gate static int ufs_dirmakedirect();
1487c478bd9Sstevel@tonic-gate static int dirbadname();
1497c478bd9Sstevel@tonic-gate static int dirmangled();
1507c478bd9Sstevel@tonic-gate 
1517c478bd9Sstevel@tonic-gate /*
15260c8e821SFrank Batschulat  * Check accessibility of directory against inquired mode and type.
15360c8e821SFrank Batschulat  * Execute access is required to search the directory.
15460c8e821SFrank Batschulat  * Access for write is interpreted as allowing
15560c8e821SFrank Batschulat  * deletion of files in the directory.
15660c8e821SFrank Batschulat  * Note, the reader i_contents lock will be acquired in
15760c8e821SFrank Batschulat  * ufs_iaccess().
15860c8e821SFrank Batschulat  */
15960c8e821SFrank Batschulat int
ufs_diraccess(struct inode * ip,int mode,struct cred * cr)16060c8e821SFrank Batschulat ufs_diraccess(struct inode *ip, int mode, struct cred *cr)
16160c8e821SFrank Batschulat {
16260c8e821SFrank Batschulat 	if (((ip->i_mode & IFMT) != IFDIR) &&
16360c8e821SFrank Batschulat 	    ((ip->i_mode & IFMT) != IFATTRDIR))
16460c8e821SFrank Batschulat 		return (ENOTDIR);
16560c8e821SFrank Batschulat 
16660c8e821SFrank Batschulat 	return (ufs_iaccess(ip, mode, cr, 1));
16760c8e821SFrank Batschulat }
16860c8e821SFrank Batschulat 
16960c8e821SFrank Batschulat /*
1707c478bd9Sstevel@tonic-gate  * Look for a given name in a directory.  On successful return, *ipp
1717c478bd9Sstevel@tonic-gate  * will point to the VN_HELD inode.
17260c8e821SFrank Batschulat  * The caller is responsible for checking accessibility upfront
17360c8e821SFrank Batschulat  * via ufs_diraccess().
1747c478bd9Sstevel@tonic-gate  */
1757c478bd9Sstevel@tonic-gate int
ufs_dirlook(struct inode * dp,char * namep,struct inode ** ipp,struct cred * cr,int skipdnlc,int skipcaching)1767c478bd9Sstevel@tonic-gate ufs_dirlook(
1777c478bd9Sstevel@tonic-gate 	struct inode *dp,
1787c478bd9Sstevel@tonic-gate 	char *namep,
1797c478bd9Sstevel@tonic-gate 	struct inode **ipp,
1807c478bd9Sstevel@tonic-gate 	struct cred *cr,
181*9b5097eeSOwen Roberts 	int skipdnlc,			/* skip the 1st level dnlc */
182*9b5097eeSOwen Roberts 	int skipcaching)		/* force directory caching off */
1837c478bd9Sstevel@tonic-gate {
1847c478bd9Sstevel@tonic-gate 	uint64_t handle;
1857c478bd9Sstevel@tonic-gate 	struct fbuf *fbp;		/* a buffer of directory entries */
1867c478bd9Sstevel@tonic-gate 	struct direct *ep;		/* the current directory entry */
1877c478bd9Sstevel@tonic-gate 	struct vnode *vp;
1887c478bd9Sstevel@tonic-gate 	struct vnode *dvp;		/* directory vnode ptr */
18902ff05a9Svsakar 	struct ulockfs *ulp;
1907c478bd9Sstevel@tonic-gate 	dcanchor_t *dcap;
1917c478bd9Sstevel@tonic-gate 	off_t endsearch;		/* offset to end directory search */
1927c478bd9Sstevel@tonic-gate 	off_t offset;
1937c478bd9Sstevel@tonic-gate 	off_t start_off;		/* starting offset from middle search */
1947c478bd9Sstevel@tonic-gate 	off_t last_offset;		/* last offset */
1957c478bd9Sstevel@tonic-gate 	int entryoffsetinblock;		/* offset of ep in addr's buffer */
1967c478bd9Sstevel@tonic-gate 	int numdirpasses;		/* strategy for directory search */
1977c478bd9Sstevel@tonic-gate 	int namlen;			/* length of name */
1987c478bd9Sstevel@tonic-gate 	int err;
1997c478bd9Sstevel@tonic-gate 	int doingchk;
2007c478bd9Sstevel@tonic-gate 	int i;
2017c478bd9Sstevel@tonic-gate 	int caching;
20202ff05a9Svsakar 	int indeadlock;
2037c478bd9Sstevel@tonic-gate 	ino_t ep_ino;			/* entry i number */
2047c478bd9Sstevel@tonic-gate 	ino_t chkino;
2057c478bd9Sstevel@tonic-gate 	ushort_t ep_reclen;		/* direct local d_reclen */
2067c478bd9Sstevel@tonic-gate 
2077c478bd9Sstevel@tonic-gate 	ASSERT(*namep != '\0'); /* All callers ensure *namep is non null */
2087c478bd9Sstevel@tonic-gate 
20902ff05a9Svsakar 	if (dp->i_ufsvfs)
21002ff05a9Svsakar 		ulp = &dp->i_ufsvfs->vfs_ulockfs;
2117c478bd9Sstevel@tonic-gate 
2127c478bd9Sstevel@tonic-gate 	/*
2137c478bd9Sstevel@tonic-gate 	 * Check the directory name lookup cache, first for individual files
2147c478bd9Sstevel@tonic-gate 	 * then for complete directories.
2157c478bd9Sstevel@tonic-gate 	 */
2167c478bd9Sstevel@tonic-gate 	dvp = ITOV(dp);
2177c478bd9Sstevel@tonic-gate 	if (!skipdnlc && (vp = dnlc_lookup(dvp, namep))) {
2187c478bd9Sstevel@tonic-gate 		/* vp is already held from dnlc_lookup */
2197c478bd9Sstevel@tonic-gate 		if (vp == DNLC_NO_VNODE) {
2207c478bd9Sstevel@tonic-gate 			VN_RELE(vp);
2217c478bd9Sstevel@tonic-gate 			return (ENOENT);
2227c478bd9Sstevel@tonic-gate 		}
2237c478bd9Sstevel@tonic-gate 		*ipp = VTOI(vp);
2247c478bd9Sstevel@tonic-gate 		return (0);
2257c478bd9Sstevel@tonic-gate 	}
2267c478bd9Sstevel@tonic-gate 
2277c478bd9Sstevel@tonic-gate 	dcap = &dp->i_danchor;
2287c478bd9Sstevel@tonic-gate 
2297c478bd9Sstevel@tonic-gate 	/*
2307c478bd9Sstevel@tonic-gate 	 * Grab the reader lock on the directory data before checking
2317c478bd9Sstevel@tonic-gate 	 * the dnlc to avoid a race with ufs_dirremove() & friends.
23202ff05a9Svsakar 	 *
23302ff05a9Svsakar 	 * ufs_tryirwlock uses rw_tryenter and checks for SLOCK to
23402ff05a9Svsakar 	 * avoid i_rwlock, ufs_lockfs_begin deadlock. If deadlock
23502ff05a9Svsakar 	 * possible, retries the operation.
2367c478bd9Sstevel@tonic-gate 	 */
23702ff05a9Svsakar 	ufs_tryirwlock((&dp->i_rwlock), RW_READER, retry_dircache);
23802ff05a9Svsakar 	if (indeadlock)
23902ff05a9Svsakar 		return (EAGAIN);
2407c478bd9Sstevel@tonic-gate 
2417c478bd9Sstevel@tonic-gate 	switch (dnlc_dir_lookup(dcap, namep, &handle)) {
2427c478bd9Sstevel@tonic-gate 	case DFOUND:
2437c478bd9Sstevel@tonic-gate 		ep_ino = (ino_t)H_TO_INO(handle);
2447c478bd9Sstevel@tonic-gate 		if (dp->i_number == ep_ino) {
2457c478bd9Sstevel@tonic-gate 			VN_HOLD(dvp);	/* want ourself, "." */
2467c478bd9Sstevel@tonic-gate 			*ipp = dp;
2477c478bd9Sstevel@tonic-gate 			rw_exit(&dp->i_rwlock);
2487c478bd9Sstevel@tonic-gate 			return (0);
2497c478bd9Sstevel@tonic-gate 		}
2507c478bd9Sstevel@tonic-gate 		if (namep[0] == '.' && namep[1] == '.' && namep[2] == 0) {
2517c478bd9Sstevel@tonic-gate 			uint64_t handle2;
2527c478bd9Sstevel@tonic-gate 			/*
2537c478bd9Sstevel@tonic-gate 			 * release the lock on the dir we are searching
2547c478bd9Sstevel@tonic-gate 			 * to avoid a deadlock when grabbing the
2557c478bd9Sstevel@tonic-gate 			 * i_contents lock in ufs_iget_alloced().
2567c478bd9Sstevel@tonic-gate 			 */
2577c478bd9Sstevel@tonic-gate 			rw_exit(&dp->i_rwlock);
2587c478bd9Sstevel@tonic-gate 			rw_enter(&dp->i_ufsvfs->vfs_dqrwlock, RW_READER);
2597c478bd9Sstevel@tonic-gate 			err = ufs_iget_alloced(dp->i_vfs, ep_ino, ipp, cr);
2607c478bd9Sstevel@tonic-gate 			rw_exit(&dp->i_ufsvfs->vfs_dqrwlock);
2617c478bd9Sstevel@tonic-gate 			/*
2627c478bd9Sstevel@tonic-gate 			 * must recheck as we dropped dp->i_rwlock
2637c478bd9Sstevel@tonic-gate 			 */
26402ff05a9Svsakar 			ufs_tryirwlock(&dp->i_rwlock, RW_READER, retry_parent);
26502ff05a9Svsakar 			if (indeadlock) {
26602ff05a9Svsakar 				if (!err)
26702ff05a9Svsakar 					VN_RELE(ITOV(*ipp));
26802ff05a9Svsakar 				return (EAGAIN);
26902ff05a9Svsakar 			}
2707c478bd9Sstevel@tonic-gate 			if (!err && (dnlc_dir_lookup(dcap, namep, &handle2)
2717c478bd9Sstevel@tonic-gate 			    == DFOUND) && (handle == handle2)) {
2727c478bd9Sstevel@tonic-gate 				dnlc_update(dvp, namep, ITOV(*ipp));
2737c478bd9Sstevel@tonic-gate 				rw_exit(&dp->i_rwlock);
2747c478bd9Sstevel@tonic-gate 				return (0);
2757c478bd9Sstevel@tonic-gate 			}
2767c478bd9Sstevel@tonic-gate 			/* check failed, read the actual directory */
2777c478bd9Sstevel@tonic-gate 			if (!err) {
2787c478bd9Sstevel@tonic-gate 				VN_RELE(ITOV(*ipp));
2797c478bd9Sstevel@tonic-gate 			}
2807c478bd9Sstevel@tonic-gate 			goto restart;
2817c478bd9Sstevel@tonic-gate 		}
2827c478bd9Sstevel@tonic-gate 		/* usual case of not "." nor ".." */
2837c478bd9Sstevel@tonic-gate 		rw_enter(&dp->i_ufsvfs->vfs_dqrwlock, RW_READER);
2847c478bd9Sstevel@tonic-gate 		err = ufs_iget_alloced(dp->i_vfs, ep_ino, ipp, cr);
2857c478bd9Sstevel@tonic-gate 		rw_exit(&dp->i_ufsvfs->vfs_dqrwlock);
2867c478bd9Sstevel@tonic-gate 		if (err) {
2877c478bd9Sstevel@tonic-gate 			rw_exit(&dp->i_rwlock);
2887c478bd9Sstevel@tonic-gate 			return (err);
2897c478bd9Sstevel@tonic-gate 		}
2907c478bd9Sstevel@tonic-gate 		dnlc_update(dvp, namep, ITOV(*ipp));
2917c478bd9Sstevel@tonic-gate 		rw_exit(&dp->i_rwlock);
2927c478bd9Sstevel@tonic-gate 		return (0);
2937c478bd9Sstevel@tonic-gate 	case DNOENT:
2947c478bd9Sstevel@tonic-gate 		if (ufs_negative_cache && (dp->i_nlink > 0)) {
2957c478bd9Sstevel@tonic-gate 			dnlc_enter(dvp, namep, DNLC_NO_VNODE);
2967c478bd9Sstevel@tonic-gate 		}
2977c478bd9Sstevel@tonic-gate 		rw_exit(&dp->i_rwlock);
2987c478bd9Sstevel@tonic-gate 		return (ENOENT);
2997c478bd9Sstevel@tonic-gate 	default:
3007c478bd9Sstevel@tonic-gate 		break;
3017c478bd9Sstevel@tonic-gate 	}
3027c478bd9Sstevel@tonic-gate restart:
3037c478bd9Sstevel@tonic-gate 
3047c478bd9Sstevel@tonic-gate 	fbp = NULL;
3057c478bd9Sstevel@tonic-gate 	doingchk = 0;
3067c478bd9Sstevel@tonic-gate 	chkino = 0;
3077c478bd9Sstevel@tonic-gate 	caching = 0;
3087c478bd9Sstevel@tonic-gate 
3097c478bd9Sstevel@tonic-gate 	/*
3107f63b8c3Svsakar 	 * Attempt to cache any directories greater than the tunable
3117f63b8c3Svsakar 	 * ufs_min_cache_dir. If it fails due to memory shortage (DNOMEM),
3127f63b8c3Svsakar 	 * disable caching for this directory and record the system time.
3137f63b8c3Svsakar 	 * Any attempt after the disable time has expired will enable
3147f63b8c3Svsakar 	 * the caching again.
3157c478bd9Sstevel@tonic-gate 	 */
316*9b5097eeSOwen Roberts 	if (!skipcaching && (dp->i_size >= ufs_min_dir_cache)) {
3177f63b8c3Svsakar 		/*
3187f63b8c3Svsakar 		 * if the directory caching disable time has expired
3197f63b8c3Svsakar 		 * enable the caching again.
3207f63b8c3Svsakar 		 */
3217f63b8c3Svsakar 		if (dp->i_cachedir == CD_DISABLED_NOMEM &&
3227f63b8c3Svsakar 		    gethrtime() - ufs_dc_disable_at > ufs_dc_disable_duration) {
3237f63b8c3Svsakar 			ufs_dc_disable_at = 0;
3247f63b8c3Svsakar 			dp->i_cachedir = CD_ENABLED;
3257f63b8c3Svsakar 		}
3267f63b8c3Svsakar 		if (dp->i_cachedir == CD_ENABLED) {
3277f63b8c3Svsakar 			switch (dnlc_dir_start(dcap, dp->i_size >>
3287f63b8c3Svsakar 			    AV_DIRECT_SHIFT)) {
3297c478bd9Sstevel@tonic-gate 			case DNOMEM:
3307f63b8c3Svsakar 				dp->i_cachedir = CD_DISABLED_NOMEM;
3317f63b8c3Svsakar 				ufs_dc_disable_at = gethrtime();
3327f63b8c3Svsakar 				break;
3337c478bd9Sstevel@tonic-gate 			case DTOOBIG:
3347f63b8c3Svsakar 				dp->i_cachedir = CD_DISABLED_TOOBIG;
3357c478bd9Sstevel@tonic-gate 				break;
3367c478bd9Sstevel@tonic-gate 			case DOK:
3377c478bd9Sstevel@tonic-gate 				caching = 1;
3387c478bd9Sstevel@tonic-gate 				break;
3397c478bd9Sstevel@tonic-gate 			default:
3407c478bd9Sstevel@tonic-gate 				break;
3417c478bd9Sstevel@tonic-gate 			}
3427c478bd9Sstevel@tonic-gate 		}
3437f63b8c3Svsakar 	}
3447c478bd9Sstevel@tonic-gate 	/*
3457c478bd9Sstevel@tonic-gate 	 * If caching we don't stop when the file has been
3467c478bd9Sstevel@tonic-gate 	 * found, but need to know later, so clear *ipp now
3477c478bd9Sstevel@tonic-gate 	 */
3487c478bd9Sstevel@tonic-gate 	*ipp = NULL;
3497c478bd9Sstevel@tonic-gate 
3507c478bd9Sstevel@tonic-gate recheck:
3517c478bd9Sstevel@tonic-gate 	if (caching) {
3527c478bd9Sstevel@tonic-gate 		offset = 0;
3537c478bd9Sstevel@tonic-gate 		entryoffsetinblock = 0;
3547c478bd9Sstevel@tonic-gate 		numdirpasses = 1;
3557c478bd9Sstevel@tonic-gate 	} else {
3567c478bd9Sstevel@tonic-gate 		/*
3577c478bd9Sstevel@tonic-gate 		 * Take care to look at dp->i_diroff only once, as it
3587c478bd9Sstevel@tonic-gate 		 * may be changing due to other threads/cpus.
3597c478bd9Sstevel@tonic-gate 		 */
3607c478bd9Sstevel@tonic-gate 		offset = dp->i_diroff;
3617c478bd9Sstevel@tonic-gate 		if (offset > dp->i_size) {
3627c478bd9Sstevel@tonic-gate 			offset = 0;
3637c478bd9Sstevel@tonic-gate 		}
3647c478bd9Sstevel@tonic-gate 		if (offset == 0) {
3657c478bd9Sstevel@tonic-gate 			entryoffsetinblock = 0;
3667c478bd9Sstevel@tonic-gate 			numdirpasses = 1;
3677c478bd9Sstevel@tonic-gate 		} else {
3687c478bd9Sstevel@tonic-gate 			start_off = offset;
3697c478bd9Sstevel@tonic-gate 
3707c478bd9Sstevel@tonic-gate 			entryoffsetinblock = blkoff(dp->i_fs, offset);
3717c478bd9Sstevel@tonic-gate 			if (entryoffsetinblock != 0) {
3727c478bd9Sstevel@tonic-gate 				err = blkatoff(dp, offset, (char **)0, &fbp);
3737c478bd9Sstevel@tonic-gate 				if (err)
3747c478bd9Sstevel@tonic-gate 					goto bad;
3757c478bd9Sstevel@tonic-gate 			}
3767c478bd9Sstevel@tonic-gate 			numdirpasses = 2;
3777c478bd9Sstevel@tonic-gate 		}
3787c478bd9Sstevel@tonic-gate 	}
3797c478bd9Sstevel@tonic-gate 	endsearch = P2ROUNDUP_TYPED(dp->i_size, DIRBLKSIZ, u_offset_t);
3807c478bd9Sstevel@tonic-gate 	namlen = strlen(namep);
3817c478bd9Sstevel@tonic-gate 	last_offset = 0;
3827c478bd9Sstevel@tonic-gate 
3837c478bd9Sstevel@tonic-gate searchloop:
3847c478bd9Sstevel@tonic-gate 	while (offset < endsearch) {
3857c478bd9Sstevel@tonic-gate 		/*
3867c478bd9Sstevel@tonic-gate 		 * If offset is on a block boundary,
3877c478bd9Sstevel@tonic-gate 		 * read the next directory block.
3887c478bd9Sstevel@tonic-gate 		 * Release previous if it exists.
3897c478bd9Sstevel@tonic-gate 		 */
3907c478bd9Sstevel@tonic-gate 		if (blkoff(dp->i_fs, offset) == 0) {
3917c478bd9Sstevel@tonic-gate 			if (fbp != NULL) {
3927c478bd9Sstevel@tonic-gate 				fbrelse(fbp, S_OTHER);
3937c478bd9Sstevel@tonic-gate 			}
3947c478bd9Sstevel@tonic-gate 			err = blkatoff(dp, offset, (char **)0, &fbp);
3957c478bd9Sstevel@tonic-gate 			if (err)
3967c478bd9Sstevel@tonic-gate 				goto bad;
3977c478bd9Sstevel@tonic-gate 			entryoffsetinblock = 0;
3987c478bd9Sstevel@tonic-gate 		}
3997c478bd9Sstevel@tonic-gate 
4007c478bd9Sstevel@tonic-gate 		/*
4017c478bd9Sstevel@tonic-gate 		 * If the offset to the next entry is invalid or if the
4027c478bd9Sstevel@tonic-gate 		 * next entry is a zero length record or if the record
4037c478bd9Sstevel@tonic-gate 		 * length is invalid, then skip to the next directory
4047c478bd9Sstevel@tonic-gate 		 * block.  Complete validation checks are done if the
4057c478bd9Sstevel@tonic-gate 		 * record length is invalid.
4067c478bd9Sstevel@tonic-gate 		 *
4077c478bd9Sstevel@tonic-gate 		 * Full validation checks are slow so they are disabled
4087c478bd9Sstevel@tonic-gate 		 * by default.  Complete checks can be run by patching
4097c478bd9Sstevel@tonic-gate 		 * "dirchk" to be true.
4107c478bd9Sstevel@tonic-gate 		 *
4117c478bd9Sstevel@tonic-gate 		 * We have to check the validity of entryoffsetinblock
4127c478bd9Sstevel@tonic-gate 		 * here because it can be set to i_diroff above.
4137c478bd9Sstevel@tonic-gate 		 */
4147c478bd9Sstevel@tonic-gate 		ep = (struct direct *)(fbp->fb_addr + entryoffsetinblock);
4157c478bd9Sstevel@tonic-gate 		if ((entryoffsetinblock & 0x3) || ep->d_reclen == 0 ||
4167c478bd9Sstevel@tonic-gate 		    (dirchk || (ep->d_reclen & 0x3)) &&
4177c478bd9Sstevel@tonic-gate 		    dirmangled(dp, ep, entryoffsetinblock, offset)) {
4187c478bd9Sstevel@tonic-gate 			i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1));
4197c478bd9Sstevel@tonic-gate 			offset += i;
4207c478bd9Sstevel@tonic-gate 			entryoffsetinblock += i;
4217c478bd9Sstevel@tonic-gate 			if (caching) {
4227c478bd9Sstevel@tonic-gate 				dnlc_dir_purge(dcap);
4237c478bd9Sstevel@tonic-gate 				caching = 0;
4247c478bd9Sstevel@tonic-gate 			}
4257c478bd9Sstevel@tonic-gate 			continue;
4267c478bd9Sstevel@tonic-gate 		}
4277c478bd9Sstevel@tonic-gate 
4287c478bd9Sstevel@tonic-gate 		ep_reclen = ep->d_reclen;
4297c478bd9Sstevel@tonic-gate 
4307c478bd9Sstevel@tonic-gate 		/*
4317c478bd9Sstevel@tonic-gate 		 * Add named entries and free space into the directory cache
4327c478bd9Sstevel@tonic-gate 		 */
4337c478bd9Sstevel@tonic-gate 		if (caching) {
4347c478bd9Sstevel@tonic-gate 			ushort_t extra;
4357c478bd9Sstevel@tonic-gate 			off_t off2;
4367c478bd9Sstevel@tonic-gate 
4377c478bd9Sstevel@tonic-gate 			if (ep->d_ino == 0) {
4387c478bd9Sstevel@tonic-gate 				extra = ep_reclen;
4397c478bd9Sstevel@tonic-gate 				if (offset & (DIRBLKSIZ - 1)) {
4407c478bd9Sstevel@tonic-gate 					dnlc_dir_purge(dcap);
4417f63b8c3Svsakar 					dp->i_cachedir = CD_DISABLED;
4427c478bd9Sstevel@tonic-gate 					caching = 0;
4437c478bd9Sstevel@tonic-gate 				}
4447c478bd9Sstevel@tonic-gate 			} else {
4457c478bd9Sstevel@tonic-gate 				/*
4467c478bd9Sstevel@tonic-gate 				 * entries hold the previous offset except the
4477c478bd9Sstevel@tonic-gate 				 * 1st which holds the offset + 1
4487c478bd9Sstevel@tonic-gate 				 */
4497c478bd9Sstevel@tonic-gate 				if (offset & (DIRBLKSIZ - 1)) {
4507c478bd9Sstevel@tonic-gate 					off2 = last_offset;
4517c478bd9Sstevel@tonic-gate 				} else {
4527c478bd9Sstevel@tonic-gate 					off2 = offset + 1;
4537c478bd9Sstevel@tonic-gate 				}
4547c478bd9Sstevel@tonic-gate 				caching = (dnlc_dir_add_entry(dcap, ep->d_name,
4557c478bd9Sstevel@tonic-gate 				    INO_OFF_TO_H(ep->d_ino, off2)) == DOK);
4567c478bd9Sstevel@tonic-gate 				extra = ep_reclen - DIRSIZ(ep);
4577c478bd9Sstevel@tonic-gate 			}
4587c478bd9Sstevel@tonic-gate 			if (caching && (extra >= LDIRSIZ(1))) {
4597c478bd9Sstevel@tonic-gate 				caching = (dnlc_dir_add_space(dcap, extra,
4607c478bd9Sstevel@tonic-gate 				    (uint64_t)offset) == DOK);
4617c478bd9Sstevel@tonic-gate 			}
4627c478bd9Sstevel@tonic-gate 		}
4637c478bd9Sstevel@tonic-gate 
4647c478bd9Sstevel@tonic-gate 		/*
4657c478bd9Sstevel@tonic-gate 		 * Check for a name match.
4667c478bd9Sstevel@tonic-gate 		 * We have the parent inode read locked with i_rwlock.
4677c478bd9Sstevel@tonic-gate 		 */
4687c478bd9Sstevel@tonic-gate 		if (ep->d_ino && ep->d_namlen == namlen &&
4697c478bd9Sstevel@tonic-gate 		    *namep == *ep->d_name &&	/* fast chk 1st chr */
4707c478bd9Sstevel@tonic-gate 		    bcmp(namep, ep->d_name, (int)ep->d_namlen) == 0) {
4717c478bd9Sstevel@tonic-gate 
4727c478bd9Sstevel@tonic-gate 			/*
4737c478bd9Sstevel@tonic-gate 			 * We have to release the fbp early here to avoid
4747c478bd9Sstevel@tonic-gate 			 * a possible deadlock situation where we have the
4757c478bd9Sstevel@tonic-gate 			 * fbp and want the directory inode and someone doing
4767c478bd9Sstevel@tonic-gate 			 * a ufs_direnter_* has the directory inode and wants
4777c478bd9Sstevel@tonic-gate 			 * the fbp.  XXX - is this still needed?
4787c478bd9Sstevel@tonic-gate 			 */
4797c478bd9Sstevel@tonic-gate 			ep_ino = (ino_t)ep->d_ino;
4807c478bd9Sstevel@tonic-gate 			ASSERT(fbp != NULL);
4817c478bd9Sstevel@tonic-gate 			fbrelse(fbp, S_OTHER);
4827c478bd9Sstevel@tonic-gate 			fbp = NULL;
4837c478bd9Sstevel@tonic-gate 
4847c478bd9Sstevel@tonic-gate 			/*
4857c478bd9Sstevel@tonic-gate 			 * Atomic update (read lock held)
4867c478bd9Sstevel@tonic-gate 			 */
4877c478bd9Sstevel@tonic-gate 			dp->i_diroff = offset;
4887c478bd9Sstevel@tonic-gate 
4897c478bd9Sstevel@tonic-gate 			if (namlen == 2 && namep[0] == '.' && namep[1] == '.') {
4907c478bd9Sstevel@tonic-gate 				struct timeval32 omtime;
4917c478bd9Sstevel@tonic-gate 
4927c478bd9Sstevel@tonic-gate 				if (caching) {
4937c478bd9Sstevel@tonic-gate 					dnlc_dir_purge(dcap);
4947c478bd9Sstevel@tonic-gate 					caching = 0;
4957c478bd9Sstevel@tonic-gate 				}
4967c478bd9Sstevel@tonic-gate 				if (doingchk) {
4977c478bd9Sstevel@tonic-gate 					/*
4987c478bd9Sstevel@tonic-gate 					 * if the inumber didn't change
4997c478bd9Sstevel@tonic-gate 					 * continue with already found inode.
5007c478bd9Sstevel@tonic-gate 					 */
5017c478bd9Sstevel@tonic-gate 					if (ep_ino == chkino)
5027c478bd9Sstevel@tonic-gate 						goto checkok;
5037c478bd9Sstevel@tonic-gate 					else {
5047c478bd9Sstevel@tonic-gate 						VN_RELE(ITOV(*ipp));
5057c478bd9Sstevel@tonic-gate 						/* *ipp is nulled at restart */
5067c478bd9Sstevel@tonic-gate 						goto restart;
5077c478bd9Sstevel@tonic-gate 					}
5087c478bd9Sstevel@tonic-gate 				}
5097c478bd9Sstevel@tonic-gate 				/*
5107c478bd9Sstevel@tonic-gate 				 * release the lock on the dir we are searching
5117c478bd9Sstevel@tonic-gate 				 * to avoid a deadlock when grabbing the
5127c478bd9Sstevel@tonic-gate 				 * i_contents lock in ufs_iget_alloced().
5137c478bd9Sstevel@tonic-gate 				 */
5147c478bd9Sstevel@tonic-gate 				omtime = dp->i_mtime;
5157c478bd9Sstevel@tonic-gate 				rw_exit(&dp->i_rwlock);
5167c478bd9Sstevel@tonic-gate 				rw_enter(&dp->i_ufsvfs->vfs_dqrwlock,
5177c478bd9Sstevel@tonic-gate 				    RW_READER);
5187c478bd9Sstevel@tonic-gate 				err = ufs_iget_alloced(dp->i_vfs, ep_ino, ipp,
5197c478bd9Sstevel@tonic-gate 				    cr);
5207c478bd9Sstevel@tonic-gate 				rw_exit(&dp->i_ufsvfs->vfs_dqrwlock);
52102ff05a9Svsakar 				ufs_tryirwlock(&dp->i_rwlock, RW_READER,
52202ff05a9Svsakar 				    retry_disk);
52302ff05a9Svsakar 				if (indeadlock) {
52402ff05a9Svsakar 					if (!err)
52502ff05a9Svsakar 						VN_RELE(ITOV(*ipp));
52602ff05a9Svsakar 					return (EAGAIN);
52702ff05a9Svsakar 				}
5287c478bd9Sstevel@tonic-gate 				if (err)
5297c478bd9Sstevel@tonic-gate 					goto bad;
5307c478bd9Sstevel@tonic-gate 				/*
5317c478bd9Sstevel@tonic-gate 				 * Since we released the lock on the directory,
5327c478bd9Sstevel@tonic-gate 				 * we must check that the same inode is still
5337c478bd9Sstevel@tonic-gate 				 * the ".." entry for this directory.
5347c478bd9Sstevel@tonic-gate 				 */
5357c478bd9Sstevel@tonic-gate 				/*CSTYLED*/
5367c478bd9Sstevel@tonic-gate 				if (timercmp(&omtime, &dp->i_mtime, !=)) {
5377c478bd9Sstevel@tonic-gate 					/*
5387c478bd9Sstevel@tonic-gate 					 * Modification time changed on the
5397c478bd9Sstevel@tonic-gate 					 * directory, we must go check if
5407c478bd9Sstevel@tonic-gate 					 * the inumber changed for ".."
5417c478bd9Sstevel@tonic-gate 					 */
5427c478bd9Sstevel@tonic-gate 					doingchk = 1;
5437c478bd9Sstevel@tonic-gate 					chkino = ep_ino;
5447c478bd9Sstevel@tonic-gate 					entryoffsetinblock = 0;
5457c478bd9Sstevel@tonic-gate 					if (caching) {
5467c478bd9Sstevel@tonic-gate 						/*
5477c478bd9Sstevel@tonic-gate 						 * Forget directory caching
5487c478bd9Sstevel@tonic-gate 						 * for this rare case
5497c478bd9Sstevel@tonic-gate 						 */
5507c478bd9Sstevel@tonic-gate 						dnlc_dir_purge(dcap);
5517c478bd9Sstevel@tonic-gate 						caching = 0;
5527c478bd9Sstevel@tonic-gate 					}
5537c478bd9Sstevel@tonic-gate 					goto recheck;
5547c478bd9Sstevel@tonic-gate 				}
5557c478bd9Sstevel@tonic-gate 			} else if (dp->i_number == ep_ino) {
5567c478bd9Sstevel@tonic-gate 				VN_HOLD(dvp);	/* want ourself, "." */
5577c478bd9Sstevel@tonic-gate 				*ipp = dp;
5587c478bd9Sstevel@tonic-gate 				if (caching) {
5597c478bd9Sstevel@tonic-gate 					dnlc_dir_purge(dcap);
5607c478bd9Sstevel@tonic-gate 					caching = 0;
5617c478bd9Sstevel@tonic-gate 				}
5627c478bd9Sstevel@tonic-gate 			} else {
5637c478bd9Sstevel@tonic-gate 				rw_enter(&dp->i_ufsvfs->vfs_dqrwlock,
5647c478bd9Sstevel@tonic-gate 				    RW_READER);
5657c478bd9Sstevel@tonic-gate 				err = ufs_iget_alloced(dp->i_vfs, ep_ino, ipp,
5667c478bd9Sstevel@tonic-gate 				    cr);
5677c478bd9Sstevel@tonic-gate 				rw_exit(&dp->i_ufsvfs->vfs_dqrwlock);
5687c478bd9Sstevel@tonic-gate 				if (err)
5697c478bd9Sstevel@tonic-gate 					goto bad;
5707c478bd9Sstevel@tonic-gate 			}
5717c478bd9Sstevel@tonic-gate checkok:
5727c478bd9Sstevel@tonic-gate 			ASSERT(*ipp);
5737c478bd9Sstevel@tonic-gate 			dnlc_update(dvp, namep, ITOV(*ipp));
5747c478bd9Sstevel@tonic-gate 			/*
5757c478bd9Sstevel@tonic-gate 			 * If we are not caching then just return the entry
5767c478bd9Sstevel@tonic-gate 			 * otherwise complete loading up the cache
5777c478bd9Sstevel@tonic-gate 			 */
5787c478bd9Sstevel@tonic-gate 			if (!caching) {
5797c478bd9Sstevel@tonic-gate 				rw_exit(&dp->i_rwlock);
5807c478bd9Sstevel@tonic-gate 				return (0);
5817c478bd9Sstevel@tonic-gate 			}
5827c478bd9Sstevel@tonic-gate 			err = blkatoff(dp, offset, (char **)0, &fbp);
5837c478bd9Sstevel@tonic-gate 			if (err)
5847c478bd9Sstevel@tonic-gate 				goto bad;
5857c478bd9Sstevel@tonic-gate 		}
5867c478bd9Sstevel@tonic-gate 		last_offset = offset;
5877c478bd9Sstevel@tonic-gate 		offset += ep_reclen;
5887c478bd9Sstevel@tonic-gate 		entryoffsetinblock += ep_reclen;
5897c478bd9Sstevel@tonic-gate 	}
5907c478bd9Sstevel@tonic-gate 	/*
5917c478bd9Sstevel@tonic-gate 	 * If we started in the middle of the directory and failed
5927c478bd9Sstevel@tonic-gate 	 * to find our target, we must check the beginning as well.
5937c478bd9Sstevel@tonic-gate 	 */
5947c478bd9Sstevel@tonic-gate 	if (numdirpasses == 2) {
5957c478bd9Sstevel@tonic-gate 		numdirpasses--;
5967c478bd9Sstevel@tonic-gate 		offset = 0;
5977c478bd9Sstevel@tonic-gate 		endsearch = start_off;
5987c478bd9Sstevel@tonic-gate 		goto searchloop;
5997c478bd9Sstevel@tonic-gate 	}
6007c478bd9Sstevel@tonic-gate 
6017c478bd9Sstevel@tonic-gate 	/*
6027c478bd9Sstevel@tonic-gate 	 * If whole directory caching is on (or was originally on) then
6037c478bd9Sstevel@tonic-gate 	 * the entry may have been found.
6047c478bd9Sstevel@tonic-gate 	 */
6057c478bd9Sstevel@tonic-gate 	if (*ipp == NULL) {
6067c478bd9Sstevel@tonic-gate 		err = ENOENT;
6077c478bd9Sstevel@tonic-gate 		if (ufs_negative_cache && (dp->i_nlink > 0)) {
6087c478bd9Sstevel@tonic-gate 			dnlc_enter(dvp, namep, DNLC_NO_VNODE);
6097c478bd9Sstevel@tonic-gate 		}
6107c478bd9Sstevel@tonic-gate 	}
6117c478bd9Sstevel@tonic-gate 	if (caching) {
6127c478bd9Sstevel@tonic-gate 		dnlc_dir_complete(dcap);
6137c478bd9Sstevel@tonic-gate 		caching = 0;
6147c478bd9Sstevel@tonic-gate 	}
6157c478bd9Sstevel@tonic-gate 
6167c478bd9Sstevel@tonic-gate bad:
6177c478bd9Sstevel@tonic-gate 	if (err && *ipp) {
6187c478bd9Sstevel@tonic-gate 		/*
6197c478bd9Sstevel@tonic-gate 		 * err and *ipp can both be set if we were attempting to
6207c478bd9Sstevel@tonic-gate 		 * cache the directory, and we found the entry, then later
6217c478bd9Sstevel@tonic-gate 		 * while trying to complete the directory cache encountered
6227c478bd9Sstevel@tonic-gate 		 * a error (eg reading a directory sector).
6237c478bd9Sstevel@tonic-gate 		 */
6247c478bd9Sstevel@tonic-gate 		VN_RELE(ITOV(*ipp));
6257c478bd9Sstevel@tonic-gate 		*ipp = NULL;
6267c478bd9Sstevel@tonic-gate 	}
6277c478bd9Sstevel@tonic-gate 
6287c478bd9Sstevel@tonic-gate 	if (fbp)
6297c478bd9Sstevel@tonic-gate 		fbrelse(fbp, S_OTHER);
6307c478bd9Sstevel@tonic-gate 	rw_exit(&dp->i_rwlock);
6317c478bd9Sstevel@tonic-gate 	if (caching)
6327c478bd9Sstevel@tonic-gate 		dnlc_dir_purge(dcap);
6337c478bd9Sstevel@tonic-gate 	return (err);
6347c478bd9Sstevel@tonic-gate }
6357c478bd9Sstevel@tonic-gate 
6367c478bd9Sstevel@tonic-gate /*
6377c478bd9Sstevel@tonic-gate  * Write a new directory entry for DE_CREATE or DE_MKDIR operations.
6387c478bd9Sstevel@tonic-gate  */
6397c478bd9Sstevel@tonic-gate int
ufs_direnter_cm(struct inode * tdp,char * namep,enum de_op op,struct vattr * vap,struct inode ** ipp,struct cred * cr,int flags)6407c478bd9Sstevel@tonic-gate ufs_direnter_cm(
6417c478bd9Sstevel@tonic-gate 	struct inode *tdp,	/* target directory to make entry in */
6427c478bd9Sstevel@tonic-gate 	char *namep,		/* name of entry */
6437c478bd9Sstevel@tonic-gate 	enum de_op op,		/* entry operation */
6447c478bd9Sstevel@tonic-gate 	struct vattr *vap,	/* attributes if new inode needed */
6457c478bd9Sstevel@tonic-gate 	struct inode **ipp,	/* return entered inode here */
6467c478bd9Sstevel@tonic-gate 	struct cred *cr,	/* user credentials */
6477c478bd9Sstevel@tonic-gate 	int flags)		/* no entry exists */
6487c478bd9Sstevel@tonic-gate {
6497c478bd9Sstevel@tonic-gate 	struct inode *tip;	/* inode of (existing) target file */
6507c478bd9Sstevel@tonic-gate 	char *s;
6515b024a5bSbatschul 	struct ufs_slot slot;	/* slot info to pass around */
6527c478bd9Sstevel@tonic-gate 	int namlen;		/* length of name */
6537c478bd9Sstevel@tonic-gate 	int err;		/* error number */
6547c478bd9Sstevel@tonic-gate 	struct inode *nip;	/* new inode */
6557c478bd9Sstevel@tonic-gate 	int do_rele_nip = 0;	/* release nip */
6567c478bd9Sstevel@tonic-gate 	int noentry = flags & ~IQUIET;
6577c478bd9Sstevel@tonic-gate 	int quiet = flags & IQUIET;	/* Suppress out of inodes message */
65802ff05a9Svsakar 	int indeadlock;
65902ff05a9Svsakar 	struct ulockfs *ulp;
6607c478bd9Sstevel@tonic-gate 
6617c478bd9Sstevel@tonic-gate 	ASSERT(RW_WRITE_HELD(&tdp->i_rwlock));
6627c478bd9Sstevel@tonic-gate 
6637c478bd9Sstevel@tonic-gate 	if (((tdp->i_mode & IFMT) == IFATTRDIR) && ((op == DE_MKDIR) ||
6647c478bd9Sstevel@tonic-gate 	    ((vap->va_type == VCHR) || (vap->va_type == VBLK) ||
6657c478bd9Sstevel@tonic-gate 	    (vap->va_type == VDOOR) || (vap->va_type == VSOCK) ||
6667c478bd9Sstevel@tonic-gate 	    (vap->va_type == VFIFO))))
6677c478bd9Sstevel@tonic-gate 		return (EINVAL);
6687c478bd9Sstevel@tonic-gate 
6697c478bd9Sstevel@tonic-gate 	/* don't allow '/' characters in pathname component */
6707c478bd9Sstevel@tonic-gate 	for (s = namep, namlen = 0; *s; s++, namlen++)
6717c478bd9Sstevel@tonic-gate 		if (*s == '/')
6727c478bd9Sstevel@tonic-gate 			return (EACCES);
6737c478bd9Sstevel@tonic-gate 	ASSERT(namlen);
6747c478bd9Sstevel@tonic-gate 
6757c478bd9Sstevel@tonic-gate 	/*
67660c8e821SFrank Batschulat 	 * Check accessibility of target directory.
67760c8e821SFrank Batschulat 	 */
67860c8e821SFrank Batschulat 	if (err = ufs_diraccess(tdp, IEXEC, cr))
67960c8e821SFrank Batschulat 		return (err);
68060c8e821SFrank Batschulat 
68160c8e821SFrank Batschulat 	/*
6827c478bd9Sstevel@tonic-gate 	 * If name is "." or ".." then if this is a create look it up
6837c478bd9Sstevel@tonic-gate 	 * and return EEXIST.
6847c478bd9Sstevel@tonic-gate 	 */
6857c478bd9Sstevel@tonic-gate 	if (namep[0] == '.' &&
6867c478bd9Sstevel@tonic-gate 	    (namlen == 1 || (namlen == 2 && namep[1] == '.'))) {
6877c478bd9Sstevel@tonic-gate 		/*
6887c478bd9Sstevel@tonic-gate 		 * ufs_dirlook will acquire the i_rwlock
6897c478bd9Sstevel@tonic-gate 		 */
69002ff05a9Svsakar 		if (tdp->i_ufsvfs)
69102ff05a9Svsakar 			ulp = &tdp->i_ufsvfs->vfs_ulockfs;
6927c478bd9Sstevel@tonic-gate 		rw_exit(&tdp->i_rwlock);
693*9b5097eeSOwen Roberts 		if (err = ufs_dirlook(tdp, namep, ipp, cr, 0, 0)) {
69402ff05a9Svsakar 			if (err == EAGAIN)
69502ff05a9Svsakar 				return (err);
69602ff05a9Svsakar 
69702ff05a9Svsakar 			/*
69802ff05a9Svsakar 			 * ufs_tryirwlock uses rw_tryenter and checks for
69902ff05a9Svsakar 			 * SLOCK to avoid i_rwlock, ufs_lockfs_begin deadlock.
70002ff05a9Svsakar 			 * If deadlock possible, retries the operation.
70102ff05a9Svsakar 			 */
70202ff05a9Svsakar 			ufs_tryirwlock(&tdp->i_rwlock, RW_WRITER, retry_err);
70302ff05a9Svsakar 			if (indeadlock)
70402ff05a9Svsakar 				return (EAGAIN);
70502ff05a9Svsakar 
7067c478bd9Sstevel@tonic-gate 			return (err);
7077c478bd9Sstevel@tonic-gate 		}
70802ff05a9Svsakar 		ufs_tryirwlock(&tdp->i_rwlock, RW_WRITER, retry);
70902ff05a9Svsakar 		if (indeadlock) {
71002ff05a9Svsakar 			VN_RELE(ITOV(*ipp));
71102ff05a9Svsakar 			return (EAGAIN);
71202ff05a9Svsakar 		}
7137c478bd9Sstevel@tonic-gate 		return (EEXIST);
7147c478bd9Sstevel@tonic-gate 	}
7157c478bd9Sstevel@tonic-gate 
7167c478bd9Sstevel@tonic-gate 	/*
7177c478bd9Sstevel@tonic-gate 	 * If target directory has not been removed, then we can consider
7187c478bd9Sstevel@tonic-gate 	 * allowing file to be created.
7197c478bd9Sstevel@tonic-gate 	 */
7207c478bd9Sstevel@tonic-gate 	if (tdp->i_nlink <= 0) {
7217c478bd9Sstevel@tonic-gate 		return (ENOENT);
7227c478bd9Sstevel@tonic-gate 	}
7237c478bd9Sstevel@tonic-gate 
7247c478bd9Sstevel@tonic-gate 	/*
7257c478bd9Sstevel@tonic-gate 	 * Search for the entry. Return VN_HELD tip if found.
7267c478bd9Sstevel@tonic-gate 	 */
7277c478bd9Sstevel@tonic-gate 	tip = NULL;
7287c478bd9Sstevel@tonic-gate 	slot.fbp = NULL;
7297c478bd9Sstevel@tonic-gate 	slot.status = NONE;
7307c478bd9Sstevel@tonic-gate 	rw_enter(&tdp->i_ufsvfs->vfs_dqrwlock, RW_READER);
7317c478bd9Sstevel@tonic-gate 	rw_enter(&tdp->i_contents, RW_WRITER);
7327c478bd9Sstevel@tonic-gate 	err = ufs_dircheckforname(tdp, namep, namlen, &slot, &tip, cr, noentry);
7337c478bd9Sstevel@tonic-gate 	if (err)
7347c478bd9Sstevel@tonic-gate 		goto out;
7357c478bd9Sstevel@tonic-gate 	if (tip) {
7367c478bd9Sstevel@tonic-gate 		ASSERT(!noentry);
7377c478bd9Sstevel@tonic-gate 		*ipp = tip;
7387c478bd9Sstevel@tonic-gate 		err = EEXIST;
7397c478bd9Sstevel@tonic-gate 	} else {
7407c478bd9Sstevel@tonic-gate 		/*
7417c478bd9Sstevel@tonic-gate 		 * The entry does not exist. Check write permission in
7427c478bd9Sstevel@tonic-gate 		 * directory to see if entry can be created.
7437c478bd9Sstevel@tonic-gate 		 */
74460c8e821SFrank Batschulat 		if (err = ufs_iaccess(tdp, IWRITE, cr, 0))
7457c478bd9Sstevel@tonic-gate 			goto out;
7467c478bd9Sstevel@tonic-gate 		/*
7477c478bd9Sstevel@tonic-gate 		 * Make new inode and directory entry.
7487c478bd9Sstevel@tonic-gate 		 */
7497c478bd9Sstevel@tonic-gate 		tdp->i_flag |= quiet;
7507c478bd9Sstevel@tonic-gate 		if (err = ufs_dirmakeinode(tdp, &nip, vap, op, cr)) {
7517c478bd9Sstevel@tonic-gate 			if (nip != NULL)
7527c478bd9Sstevel@tonic-gate 				do_rele_nip = 1;
7537c478bd9Sstevel@tonic-gate 			goto out;
7547c478bd9Sstevel@tonic-gate 		}
7557c478bd9Sstevel@tonic-gate 		if (err = ufs_diraddentry(tdp, namep, op,
7567c478bd9Sstevel@tonic-gate 		    namlen, &slot, nip, NULL, cr)) {
7577c478bd9Sstevel@tonic-gate 			/*
7587c478bd9Sstevel@tonic-gate 			 * Unmake the inode we just made.
7597c478bd9Sstevel@tonic-gate 			 */
7607c478bd9Sstevel@tonic-gate 			rw_enter(&nip->i_contents, RW_WRITER);
7617c478bd9Sstevel@tonic-gate 			if (((nip->i_mode & IFMT) == IFDIR) ||
7627c478bd9Sstevel@tonic-gate 			    ((nip->i_mode & IFMT) == IFATTRDIR)) {
7637c478bd9Sstevel@tonic-gate 				tdp->i_nlink--;
7647c478bd9Sstevel@tonic-gate 				ufs_setreclaim(tdp);
7657c478bd9Sstevel@tonic-gate 				tdp->i_flag |= ICHG;
7667c478bd9Sstevel@tonic-gate 				tdp->i_seq++;
7677c478bd9Sstevel@tonic-gate 				TRANS_INODE(tdp->i_ufsvfs, tdp);
7687c478bd9Sstevel@tonic-gate 				ITIMES_NOLOCK(tdp);
7697c478bd9Sstevel@tonic-gate 			}
7707c478bd9Sstevel@tonic-gate 			nip->i_nlink = 0;
7717c478bd9Sstevel@tonic-gate 			ufs_setreclaim(nip);
7727c478bd9Sstevel@tonic-gate 			TRANS_INODE(nip->i_ufsvfs, nip);
7737c478bd9Sstevel@tonic-gate 			nip->i_flag |= ICHG;
7747c478bd9Sstevel@tonic-gate 			nip->i_seq++;
7757c478bd9Sstevel@tonic-gate 			ITIMES_NOLOCK(nip);
7767c478bd9Sstevel@tonic-gate 			rw_exit(&nip->i_contents);
7777c478bd9Sstevel@tonic-gate 			do_rele_nip = 1;
7787c478bd9Sstevel@tonic-gate 		} else {
7797c478bd9Sstevel@tonic-gate 			*ipp = nip;
7807c478bd9Sstevel@tonic-gate 		}
7817c478bd9Sstevel@tonic-gate 	}
7827c478bd9Sstevel@tonic-gate 
7837c478bd9Sstevel@tonic-gate out:
7847c478bd9Sstevel@tonic-gate 	if (slot.fbp)
7857c478bd9Sstevel@tonic-gate 		fbrelse(slot.fbp, S_OTHER);
7867c478bd9Sstevel@tonic-gate 
7877c478bd9Sstevel@tonic-gate 	tdp->i_flag &= ~quiet;
7887c478bd9Sstevel@tonic-gate 	rw_exit(&tdp->i_contents);
7897c478bd9Sstevel@tonic-gate 
7907c478bd9Sstevel@tonic-gate 	/*
7917c478bd9Sstevel@tonic-gate 	 * Drop vfs_dqrwlock before calling VN_RELE() on nip to
7927c478bd9Sstevel@tonic-gate 	 * avoid deadlock since ufs_delete() grabs vfs_dqrwlock as reader.
7937c478bd9Sstevel@tonic-gate 	 */
7947c478bd9Sstevel@tonic-gate 	rw_exit(&tdp->i_ufsvfs->vfs_dqrwlock);
7957c478bd9Sstevel@tonic-gate 
7967c478bd9Sstevel@tonic-gate 	if (do_rele_nip) {
7977c478bd9Sstevel@tonic-gate 		VN_RELE(ITOV(nip));
7987c478bd9Sstevel@tonic-gate 	}
7997c478bd9Sstevel@tonic-gate 
8007c478bd9Sstevel@tonic-gate 	return (err);
8017c478bd9Sstevel@tonic-gate }
8027c478bd9Sstevel@tonic-gate 
8037c478bd9Sstevel@tonic-gate /*
8047c478bd9Sstevel@tonic-gate  * Write a new directory entry for DE_LINK, DE_SYMLINK or DE_RENAME operations.
8057c478bd9Sstevel@tonic-gate  */
8067c478bd9Sstevel@tonic-gate int
ufs_direnter_lr(struct inode * tdp,char * namep,enum de_op op,struct inode * sdp,struct inode * sip,struct cred * cr)8077c478bd9Sstevel@tonic-gate ufs_direnter_lr(
8087c478bd9Sstevel@tonic-gate 	struct inode *tdp,	/* target directory to make entry in */
8097c478bd9Sstevel@tonic-gate 	char *namep,		/* name of entry */
8107c478bd9Sstevel@tonic-gate 	enum de_op op,		/* entry operation */
8117c478bd9Sstevel@tonic-gate 	struct inode *sdp,	/* source inode parent if rename */
8127c478bd9Sstevel@tonic-gate 	struct inode *sip,	/* source inode */
813*9b5097eeSOwen Roberts 	struct cred *cr)	/* user credentials */
8147c478bd9Sstevel@tonic-gate {
8157c478bd9Sstevel@tonic-gate 	struct inode *tip;	/* inode of (existing) target file */
8167c478bd9Sstevel@tonic-gate 	char *s;
8175b024a5bSbatschul 	struct ufs_slot slot;	/* slot info to pass around */
8187c478bd9Sstevel@tonic-gate 	int namlen;		/* length of name */
8197c478bd9Sstevel@tonic-gate 	int err;		/* error number */
8207c478bd9Sstevel@tonic-gate 
8217c478bd9Sstevel@tonic-gate 	/* don't allow '/' characters in pathname component */
8227c478bd9Sstevel@tonic-gate 	for (s = namep, namlen = 0; *s; s++, namlen++)
8237c478bd9Sstevel@tonic-gate 		if (*s == '/')
8247c478bd9Sstevel@tonic-gate 			return (EACCES);
8257c478bd9Sstevel@tonic-gate 	ASSERT(namlen);
8267c478bd9Sstevel@tonic-gate 	ASSERT(RW_WRITE_HELD(&tdp->i_rwlock));
8277c478bd9Sstevel@tonic-gate 
8287c478bd9Sstevel@tonic-gate 	/*
8297c478bd9Sstevel@tonic-gate 	 * If name is "." or ".." then if this is a create look it up
8307c478bd9Sstevel@tonic-gate 	 * and return EEXIST.  Rename or link TO "." or ".." is forbidden.
8317c478bd9Sstevel@tonic-gate 	 */
8327c478bd9Sstevel@tonic-gate 	if (namep[0] == '.' &&
8337c478bd9Sstevel@tonic-gate 	    (namlen == 1 || (namlen == 2 && namep[1] == '.'))) {
8347c478bd9Sstevel@tonic-gate 		if (op == DE_RENAME) {
8357c478bd9Sstevel@tonic-gate 			return (EINVAL);	/* *SIGH* should be ENOTEMPTY */
8367c478bd9Sstevel@tonic-gate 		}
8377c478bd9Sstevel@tonic-gate 		return (EEXIST);
8387c478bd9Sstevel@tonic-gate 	}
8397c478bd9Sstevel@tonic-gate 	/*
8407c478bd9Sstevel@tonic-gate 	 * For link and rename lock the source entry and check the link count
8417c478bd9Sstevel@tonic-gate 	 * to see if it has been removed while it was unlocked.  If not, we
8427c478bd9Sstevel@tonic-gate 	 * increment the link count and force the inode to disk to make sure
8437c478bd9Sstevel@tonic-gate 	 * that it is there before any directory entry that points to it.
8447c478bd9Sstevel@tonic-gate 	 *
8457c478bd9Sstevel@tonic-gate 	 * In the case of a symbolic link, we are dealing with a new inode
8467c478bd9Sstevel@tonic-gate 	 * which does not yet have any links.  We've created it with a link
8477c478bd9Sstevel@tonic-gate 	 * count of 1, and we don't want to increment it since this will be
8487c478bd9Sstevel@tonic-gate 	 * its first link.
8497c478bd9Sstevel@tonic-gate 	 *
8507c478bd9Sstevel@tonic-gate 	 * We are about to push the inode to disk. We make sure
8517c478bd9Sstevel@tonic-gate 	 * that the inode's data blocks are flushed first so the
8527c478bd9Sstevel@tonic-gate 	 * inode and it's data blocks are always in sync.  This
8537c478bd9Sstevel@tonic-gate 	 * adds some robustness in in the event of a power failure
8547c478bd9Sstevel@tonic-gate 	 * or panic where sync fails. If we panic before the
8557c478bd9Sstevel@tonic-gate 	 * inode is updated, then the inode still refers to the
8567c478bd9Sstevel@tonic-gate 	 * old data blocks (or none for a new file). If we panic
8577c478bd9Sstevel@tonic-gate 	 * after the inode is updated, then the inode refers to
8587c478bd9Sstevel@tonic-gate 	 * the new data blocks.
8597c478bd9Sstevel@tonic-gate 	 *
8607c478bd9Sstevel@tonic-gate 	 * We do this before grabbing the i_contents lock because
8617c478bd9Sstevel@tonic-gate 	 * ufs_syncip() will want that lock. We could do the data
8627c478bd9Sstevel@tonic-gate 	 * syncing after the removal checks, but upon return from
8637c478bd9Sstevel@tonic-gate 	 * the data sync we would have to repeat the removal
8647c478bd9Sstevel@tonic-gate 	 * checks.
8657c478bd9Sstevel@tonic-gate 	 */
8667c478bd9Sstevel@tonic-gate 	if (err = TRANS_SYNCIP(sip, 0, I_DSYNC, TOP_FSYNC)) {
8677c478bd9Sstevel@tonic-gate 		return (err);
8687c478bd9Sstevel@tonic-gate 	}
8697c478bd9Sstevel@tonic-gate 
8707c478bd9Sstevel@tonic-gate 	rw_enter(&sip->i_contents, RW_WRITER);
8717c478bd9Sstevel@tonic-gate 	if (sip->i_nlink <= 0) {
8727c478bd9Sstevel@tonic-gate 		rw_exit(&sip->i_contents);
8737c478bd9Sstevel@tonic-gate 		return (ENOENT);
8747c478bd9Sstevel@tonic-gate 	}
8757c478bd9Sstevel@tonic-gate 	if (sip->i_nlink == MAXLINK) {
8767c478bd9Sstevel@tonic-gate 		rw_exit(&sip->i_contents);
8777c478bd9Sstevel@tonic-gate 		return (EMLINK);
8787c478bd9Sstevel@tonic-gate 	}
8797c478bd9Sstevel@tonic-gate 
8807c478bd9Sstevel@tonic-gate 	/*
8817c478bd9Sstevel@tonic-gate 	 * Sync the indirect blocks associated with the file
8827c478bd9Sstevel@tonic-gate 	 * for the same reasons as described above.  Since this
8837c478bd9Sstevel@tonic-gate 	 * call wants the i_contents lock held for it we can do
8847c478bd9Sstevel@tonic-gate 	 * this here with no extra work.
8857c478bd9Sstevel@tonic-gate 	 */
8867c478bd9Sstevel@tonic-gate 	if (err = ufs_sync_indir(sip)) {
8877c478bd9Sstevel@tonic-gate 		rw_exit(&sip->i_contents);
8887c478bd9Sstevel@tonic-gate 		return (err);
8897c478bd9Sstevel@tonic-gate 	}
8907c478bd9Sstevel@tonic-gate 
8917c478bd9Sstevel@tonic-gate 	if (op != DE_SYMLINK)
8927c478bd9Sstevel@tonic-gate 		sip->i_nlink++;
8937c478bd9Sstevel@tonic-gate 	TRANS_INODE(sip->i_ufsvfs, sip);
8947c478bd9Sstevel@tonic-gate 	sip->i_flag |= ICHG;
8957c478bd9Sstevel@tonic-gate 	sip->i_seq++;
8967c478bd9Sstevel@tonic-gate 	ufs_iupdat(sip, I_SYNC);
8977c478bd9Sstevel@tonic-gate 	rw_exit(&sip->i_contents);
8987c478bd9Sstevel@tonic-gate 
8997c478bd9Sstevel@tonic-gate 	/*
9007c478bd9Sstevel@tonic-gate 	 * If target directory has not been removed, then we can consider
9017c478bd9Sstevel@tonic-gate 	 * allowing file to be created.
9027c478bd9Sstevel@tonic-gate 	 */
9037c478bd9Sstevel@tonic-gate 	if (tdp->i_nlink <= 0) {
9047c478bd9Sstevel@tonic-gate 		err = ENOENT;
9057c478bd9Sstevel@tonic-gate 		goto out2;
9067c478bd9Sstevel@tonic-gate 	}
90760c8e821SFrank Batschulat 
9087c478bd9Sstevel@tonic-gate 	/*
90960c8e821SFrank Batschulat 	 * Check accessibility of target directory.
9107c478bd9Sstevel@tonic-gate 	 */
91160c8e821SFrank Batschulat 	if (err = ufs_diraccess(tdp, IEXEC, cr))
9127c478bd9Sstevel@tonic-gate 		goto out2;
9137c478bd9Sstevel@tonic-gate 
9147c478bd9Sstevel@tonic-gate 	/*
9157c478bd9Sstevel@tonic-gate 	 * Search for the entry. Return VN_HELD tip if found.
9167c478bd9Sstevel@tonic-gate 	 */
9177c478bd9Sstevel@tonic-gate 	tip = NULL;
9187c478bd9Sstevel@tonic-gate 	slot.status = NONE;
9197c478bd9Sstevel@tonic-gate 	slot.fbp = NULL;
9207c478bd9Sstevel@tonic-gate 	rw_enter(&tdp->i_ufsvfs->vfs_dqrwlock, RW_READER);
9217c478bd9Sstevel@tonic-gate 	rw_enter(&tdp->i_contents, RW_WRITER);
9227c478bd9Sstevel@tonic-gate 	err = ufs_dircheckforname(tdp, namep, namlen, &slot, &tip, cr, 0);
9237c478bd9Sstevel@tonic-gate 	if (err)
9247c478bd9Sstevel@tonic-gate 		goto out;
9257c478bd9Sstevel@tonic-gate 
9267c478bd9Sstevel@tonic-gate 	if (tip) {
9277c478bd9Sstevel@tonic-gate 		switch (op) {
9287c478bd9Sstevel@tonic-gate 		case DE_RENAME:
9297c478bd9Sstevel@tonic-gate 			err = ufs_dirrename(sdp, sip, tdp, namep,
9307c478bd9Sstevel@tonic-gate 			    tip, &slot, cr);
9317c478bd9Sstevel@tonic-gate 			break;
9327c478bd9Sstevel@tonic-gate 
9337c478bd9Sstevel@tonic-gate 		case DE_LINK:
9347c478bd9Sstevel@tonic-gate 		case DE_SYMLINK:
9357c478bd9Sstevel@tonic-gate 			/*
9367c478bd9Sstevel@tonic-gate 			 * Can't link to an existing file.
9377c478bd9Sstevel@tonic-gate 			 */
9387c478bd9Sstevel@tonic-gate 			err = EEXIST;
9397c478bd9Sstevel@tonic-gate 			break;
9407c478bd9Sstevel@tonic-gate 		default:
9417c478bd9Sstevel@tonic-gate 			break;
9427c478bd9Sstevel@tonic-gate 		}
9437c478bd9Sstevel@tonic-gate 	} else {
9447c478bd9Sstevel@tonic-gate 		/*
9457c478bd9Sstevel@tonic-gate 		 * The entry does not exist. Check write permission in
9467c478bd9Sstevel@tonic-gate 		 * directory to see if entry can be created.
9477c478bd9Sstevel@tonic-gate 		 */
94860c8e821SFrank Batschulat 		if (err = ufs_iaccess(tdp, IWRITE, cr, 0))
9497c478bd9Sstevel@tonic-gate 			goto out;
9507c478bd9Sstevel@tonic-gate 		err = ufs_diraddentry(tdp, namep, op, namlen, &slot, sip, sdp,
9517c478bd9Sstevel@tonic-gate 		    cr);
9527c478bd9Sstevel@tonic-gate 	}
9537c478bd9Sstevel@tonic-gate 
9547c478bd9Sstevel@tonic-gate out:
9557c478bd9Sstevel@tonic-gate 	if (slot.fbp)
9567c478bd9Sstevel@tonic-gate 		fbrelse(slot.fbp, S_OTHER);
9577c478bd9Sstevel@tonic-gate 
9587c478bd9Sstevel@tonic-gate 	rw_exit(&tdp->i_contents);
9597c478bd9Sstevel@tonic-gate 
9607c478bd9Sstevel@tonic-gate 	/*
9617c478bd9Sstevel@tonic-gate 	 * Drop vfs_dqrwlock before calling VN_RELE() on tip to
9627c478bd9Sstevel@tonic-gate 	 * avoid deadlock since ufs_delete() grabs vfs_dqrwlock as reader.
9637c478bd9Sstevel@tonic-gate 	 */
9647c478bd9Sstevel@tonic-gate 	rw_exit(&tdp->i_ufsvfs->vfs_dqrwlock);
9657c478bd9Sstevel@tonic-gate 
9667c478bd9Sstevel@tonic-gate 	/*
9677c478bd9Sstevel@tonic-gate 	 * If we renamed a file over the top of an existing file,
9687c478bd9Sstevel@tonic-gate 	 * or linked a file to an existing file (or tried to),
969*9b5097eeSOwen Roberts 	 * then release and delete (or just release) the inode.
9707c478bd9Sstevel@tonic-gate 	 */
971*9b5097eeSOwen Roberts 	if (tip)
9727c478bd9Sstevel@tonic-gate 		VN_RELE(ITOV(tip));
9737c478bd9Sstevel@tonic-gate 
9747c478bd9Sstevel@tonic-gate out2:
9757c478bd9Sstevel@tonic-gate 	if (err) {
9767c478bd9Sstevel@tonic-gate 		/*
9777c478bd9Sstevel@tonic-gate 		 * Undo bumped link count.
9787c478bd9Sstevel@tonic-gate 		 */
9797c478bd9Sstevel@tonic-gate 		if (op != DE_SYMLINK) {
9807c478bd9Sstevel@tonic-gate 			rw_enter(&sip->i_contents, RW_WRITER);
9817c478bd9Sstevel@tonic-gate 			sip->i_nlink--;
9827c478bd9Sstevel@tonic-gate 			ufs_setreclaim(sip);
9837c478bd9Sstevel@tonic-gate 			TRANS_INODE(sip->i_ufsvfs, sip);
9847c478bd9Sstevel@tonic-gate 			sip->i_flag |= ICHG;
9857c478bd9Sstevel@tonic-gate 			sip->i_seq++;
9867c478bd9Sstevel@tonic-gate 			ITIMES_NOLOCK(sip);
9877c478bd9Sstevel@tonic-gate 			rw_exit(&sip->i_contents);
9887c478bd9Sstevel@tonic-gate 		}
9897c478bd9Sstevel@tonic-gate 	}
9907c478bd9Sstevel@tonic-gate 	return (err);
9917c478bd9Sstevel@tonic-gate }
9927c478bd9Sstevel@tonic-gate 
9937c478bd9Sstevel@tonic-gate /*
9947c478bd9Sstevel@tonic-gate  * Check for the existence of a name in a directory (unless noentry
9957c478bd9Sstevel@tonic-gate  * is set) , or else of an empty
9967c478bd9Sstevel@tonic-gate  * slot in which an entry may be made.  If the requested name is found,
9977c478bd9Sstevel@tonic-gate  * then on return *ipp points at the inode and *offp contains
9987c478bd9Sstevel@tonic-gate  * its offset in the directory.  If the name is not found, then *ipp
9997c478bd9Sstevel@tonic-gate  * will be NULL and *slotp will contain information about a directory slot in
10007c478bd9Sstevel@tonic-gate  * which an entry may be made (either an empty slot, or the first position
10017c478bd9Sstevel@tonic-gate  * past the end of the directory).
10027c478bd9Sstevel@tonic-gate  * The target directory inode (tdp) is supplied write locked (i_rwlock).
10037c478bd9Sstevel@tonic-gate  *
10047c478bd9Sstevel@tonic-gate  * This may not be used on "." or "..", but aliases of "." are ok.
10057c478bd9Sstevel@tonic-gate  */
1006baa4d099Sswilcox int
ufs_dircheckforname(struct inode * tdp,char * namep,int namlen,struct ufs_slot * slotp,struct inode ** ipp,struct cred * cr,int noentry)10077c478bd9Sstevel@tonic-gate ufs_dircheckforname(
10087c478bd9Sstevel@tonic-gate 	struct inode *tdp,	/* inode of directory being checked */
10097c478bd9Sstevel@tonic-gate 	char *namep,		/* name we're checking for */
10107c478bd9Sstevel@tonic-gate 	int namlen,		/* length of name, excluding null */
10115b024a5bSbatschul 	struct ufs_slot *slotp,	/* slot structure */
10127c478bd9Sstevel@tonic-gate 	struct inode **ipp,	/* return inode if we find one */
10137c478bd9Sstevel@tonic-gate 	struct cred *cr,
10147c478bd9Sstevel@tonic-gate 	int noentry)		/* noentry - just look for space */
10157c478bd9Sstevel@tonic-gate {
10167c478bd9Sstevel@tonic-gate 	uint64_t handle;
10177c478bd9Sstevel@tonic-gate 	struct fbuf *fbp;	/* pointer to directory block */
10187c478bd9Sstevel@tonic-gate 	struct direct *ep;	/* directory entry */
10197c478bd9Sstevel@tonic-gate 	struct direct *nep;	/* next directory entry */
10207c478bd9Sstevel@tonic-gate 	dcanchor_t *dcap;
10217c478bd9Sstevel@tonic-gate 	vnode_t *dvp;		/* directory vnode ptr */
10227c478bd9Sstevel@tonic-gate 	off_t dirsize;		/* size of the directory */
10237c478bd9Sstevel@tonic-gate 	off_t offset;		/* offset in the directory */
10247c478bd9Sstevel@tonic-gate 	off_t last_offset;	/* last offset */
10257c478bd9Sstevel@tonic-gate 	off_t enduseful;	/* pointer past last used dir slot */
10267c478bd9Sstevel@tonic-gate 	int entryoffsetinblk;	/* offset of ep in fbp's buffer */
10277c478bd9Sstevel@tonic-gate 	int i;			/* length of mangled entry */
10287c478bd9Sstevel@tonic-gate 	int needed;
10297c478bd9Sstevel@tonic-gate 	int err;
10307c478bd9Sstevel@tonic-gate 	int first;
10317c478bd9Sstevel@tonic-gate 	int caching;
10327c478bd9Sstevel@tonic-gate 	int stat;
10337c478bd9Sstevel@tonic-gate 	ino_t ep_ino;
10347c478bd9Sstevel@tonic-gate 	slotstat_t initstat = slotp->status;
10357c478bd9Sstevel@tonic-gate 
10367c478bd9Sstevel@tonic-gate 	ASSERT(RW_WRITE_HELD(&tdp->i_rwlock));
10377c478bd9Sstevel@tonic-gate 	ASSERT(RW_WRITE_HELD(&tdp->i_contents));
10387c478bd9Sstevel@tonic-gate 	ASSERT(*ipp == NULL);
10397c478bd9Sstevel@tonic-gate 	fbp = NULL;
10407c478bd9Sstevel@tonic-gate 
10417c478bd9Sstevel@tonic-gate 	/*
10427c478bd9Sstevel@tonic-gate 	 * First check if there is a complete cache of the directory.
10437c478bd9Sstevel@tonic-gate 	 */
10447c478bd9Sstevel@tonic-gate 	dvp = ITOV(tdp);
10457c478bd9Sstevel@tonic-gate 
10467c478bd9Sstevel@tonic-gate 	dcap = &tdp->i_danchor;
10477c478bd9Sstevel@tonic-gate 	if (noentry) {
10487c478bd9Sstevel@tonic-gate 		/*
10497c478bd9Sstevel@tonic-gate 		 * We know from the 1st level dnlc cache that the entry
10507c478bd9Sstevel@tonic-gate 		 * doesn't exist, so don't bother searching the directory
10517c478bd9Sstevel@tonic-gate 		 * cache, but just look for space (possibly in the directory
10527c478bd9Sstevel@tonic-gate 		 * cache).
10537c478bd9Sstevel@tonic-gate 		 */
10547c478bd9Sstevel@tonic-gate 		stat = DNOENT;
10557c478bd9Sstevel@tonic-gate 	} else {
10567c478bd9Sstevel@tonic-gate 		stat = dnlc_dir_lookup(dcap, namep, &handle);
10577c478bd9Sstevel@tonic-gate 	}
10587c478bd9Sstevel@tonic-gate 	switch (stat) {
10597c478bd9Sstevel@tonic-gate 	case DFOUND:
10607c478bd9Sstevel@tonic-gate 		ep_ino = (ino_t)H_TO_INO(handle);
10617c478bd9Sstevel@tonic-gate 		if (tdp->i_number == ep_ino) {
10627c478bd9Sstevel@tonic-gate 			*ipp = tdp;	/* we want ourself, ie "." */
10637c478bd9Sstevel@tonic-gate 			VN_HOLD(dvp);
10647c478bd9Sstevel@tonic-gate 		} else {
10657c478bd9Sstevel@tonic-gate 			err = ufs_iget_alloced(tdp->i_vfs, ep_ino, ipp, cr);
10667c478bd9Sstevel@tonic-gate 			if (err)
10677c478bd9Sstevel@tonic-gate 				return (err);
10687c478bd9Sstevel@tonic-gate 		}
10697c478bd9Sstevel@tonic-gate 		offset = H_TO_OFF(handle);
10707c478bd9Sstevel@tonic-gate 		first = 0;
10717c478bd9Sstevel@tonic-gate 		if (offset & 1) {
10727c478bd9Sstevel@tonic-gate 			/* This is the first entry in the block */
10737c478bd9Sstevel@tonic-gate 			first = 1;
10747c478bd9Sstevel@tonic-gate 			offset -= 1;
10757c478bd9Sstevel@tonic-gate 			ASSERT((offset & (DIRBLKSIZ - 1)) == 0);
10767c478bd9Sstevel@tonic-gate 		}
10777c478bd9Sstevel@tonic-gate 		err = blkatoff(tdp, offset, (char **)&ep, &fbp);
10787c478bd9Sstevel@tonic-gate 		if (err) {
10797c478bd9Sstevel@tonic-gate 			VN_RELE(ITOV(*ipp));
10807c478bd9Sstevel@tonic-gate 			*ipp = NULL;
10817c478bd9Sstevel@tonic-gate 			return (err);
10827c478bd9Sstevel@tonic-gate 		}
10837c478bd9Sstevel@tonic-gate 		/*
10847c478bd9Sstevel@tonic-gate 		 * Check the validity of the entry.
10857c478bd9Sstevel@tonic-gate 		 * If it's bad, then throw away the cache and
10867c478bd9Sstevel@tonic-gate 		 * continue without it. The dirmangled() routine
10877c478bd9Sstevel@tonic-gate 		 * will then be called upon it.
10887c478bd9Sstevel@tonic-gate 		 */
10897c478bd9Sstevel@tonic-gate 		if ((ep->d_reclen == 0) || (ep->d_reclen & 0x3)) {
10907c478bd9Sstevel@tonic-gate 			VN_RELE(ITOV(*ipp));
10917c478bd9Sstevel@tonic-gate 			*ipp = NULL;
10927c478bd9Sstevel@tonic-gate 			dnlc_dir_purge(dcap);
10937c478bd9Sstevel@tonic-gate 			break;
10947c478bd9Sstevel@tonic-gate 		}
10957c478bd9Sstevel@tonic-gate 		/*
10967c478bd9Sstevel@tonic-gate 		 * Remember the returned offset is the offset of the
10977c478bd9Sstevel@tonic-gate 		 * preceding record (unless this is the 1st record
10987c478bd9Sstevel@tonic-gate 		 * in the DIRBLKSIZ sized block (disk sector)), then it's
10997c478bd9Sstevel@tonic-gate 		 * offset + 1. Note, no real offsets are on odd boundaries.
11007c478bd9Sstevel@tonic-gate 		 */
11017c478bd9Sstevel@tonic-gate 		if (first) {
11027c478bd9Sstevel@tonic-gate 			ASSERT((offset & (DIRBLKSIZ - 1)) == 0);
11037c478bd9Sstevel@tonic-gate 			slotp->offset = offset;
11047c478bd9Sstevel@tonic-gate 			slotp->size = 0;
11057c478bd9Sstevel@tonic-gate 			slotp->ep = ep;
11067c478bd9Sstevel@tonic-gate 		} else {
11077c478bd9Sstevel@tonic-gate 			/* get the next entry */
11087c478bd9Sstevel@tonic-gate 			nep = (struct direct *)((char *)ep + ep->d_reclen);
11097c478bd9Sstevel@tonic-gate 			/*
11107c478bd9Sstevel@tonic-gate 			 * Check the validity of this entry as well
11117c478bd9Sstevel@tonic-gate 			 * If it's bad, then throw away the cache and
11127c478bd9Sstevel@tonic-gate 			 * continue without it. The dirmangled() routine
11137c478bd9Sstevel@tonic-gate 			 * will then be called upon it.
11147c478bd9Sstevel@tonic-gate 			 */
11157c478bd9Sstevel@tonic-gate 			if ((nep->d_reclen == 0) || (nep->d_reclen & 0x3) ||
11167c478bd9Sstevel@tonic-gate 			    (nep->d_ino != ep_ino)) {
11177c478bd9Sstevel@tonic-gate 				VN_RELE(ITOV(*ipp));
11187c478bd9Sstevel@tonic-gate 				*ipp = NULL;
11197c478bd9Sstevel@tonic-gate 				dnlc_dir_purge(dcap);
11207c478bd9Sstevel@tonic-gate 				break;
11217c478bd9Sstevel@tonic-gate 			}
11227c478bd9Sstevel@tonic-gate 			slotp->offset = offset + ep->d_reclen;
11237c478bd9Sstevel@tonic-gate 			slotp->size = ep->d_reclen;
11247c478bd9Sstevel@tonic-gate 			slotp->ep = nep;
11257c478bd9Sstevel@tonic-gate 		}
11267c478bd9Sstevel@tonic-gate 		slotp->status = EXIST;
11277c478bd9Sstevel@tonic-gate 		slotp->fbp = fbp;
11287c478bd9Sstevel@tonic-gate 		slotp->endoff = 0;
11297c478bd9Sstevel@tonic-gate 		slotp->cached = 1;
11307c478bd9Sstevel@tonic-gate 		dnlc_update(dvp, namep, ITOV(*ipp));
11317c478bd9Sstevel@tonic-gate 		return (0);
11327c478bd9Sstevel@tonic-gate 	case DNOENT:
11337c478bd9Sstevel@tonic-gate 		/*
11347c478bd9Sstevel@tonic-gate 		 * The caller gets to set the initial slot status to
11357c478bd9Sstevel@tonic-gate 		 * indicate whether it's interested in getting a
11367c478bd9Sstevel@tonic-gate 		 * empty slot. For example, the status can be set
11377c478bd9Sstevel@tonic-gate 		 * to FOUND when an entry is being deleted.
11387c478bd9Sstevel@tonic-gate 		 */
11397c478bd9Sstevel@tonic-gate 		ASSERT(slotp->fbp == NULL);
11407c478bd9Sstevel@tonic-gate 		if (slotp->status == FOUND) {
11417c478bd9Sstevel@tonic-gate 			return (0);
11427c478bd9Sstevel@tonic-gate 		}
11437c478bd9Sstevel@tonic-gate 		switch (dnlc_dir_rem_space_by_len(dcap, LDIRSIZ(namlen),
11447c478bd9Sstevel@tonic-gate 		    &handle)) {
11457c478bd9Sstevel@tonic-gate 		case DFOUND:
11467c478bd9Sstevel@tonic-gate 			offset = (off_t)handle;
11477c478bd9Sstevel@tonic-gate 			err = blkatoff(tdp, offset, (char **)&ep, &fbp);
11487c478bd9Sstevel@tonic-gate 			if (err) {
11497c478bd9Sstevel@tonic-gate 				dnlc_dir_purge(dcap);
11507c478bd9Sstevel@tonic-gate 				ASSERT(*ipp == NULL);
11517c478bd9Sstevel@tonic-gate 				return (err);
11527c478bd9Sstevel@tonic-gate 			}
11537c478bd9Sstevel@tonic-gate 			/*
11547c478bd9Sstevel@tonic-gate 			 * Check the validity of the entry.
11557c478bd9Sstevel@tonic-gate 			 * If it's bad, then throw away the cache and
11567c478bd9Sstevel@tonic-gate 			 * continue without it. The dirmangled() routine
11577c478bd9Sstevel@tonic-gate 			 * will then be called upon it.
11587c478bd9Sstevel@tonic-gate 			 */
11597c478bd9Sstevel@tonic-gate 			if ((ep->d_reclen == 0) || (ep->d_reclen & 0x3)) {
11607c478bd9Sstevel@tonic-gate 				dnlc_dir_purge(dcap);
11617c478bd9Sstevel@tonic-gate 				break;
11627c478bd9Sstevel@tonic-gate 			}
11637c478bd9Sstevel@tonic-gate 			/*
11647c478bd9Sstevel@tonic-gate 			 * Remember the returned offset is the offset of the
11657c478bd9Sstevel@tonic-gate 			 * containing record.
11667c478bd9Sstevel@tonic-gate 			 */
11677c478bd9Sstevel@tonic-gate 			slotp->status = FOUND;
11687c478bd9Sstevel@tonic-gate 			slotp->ep = ep;
11697c478bd9Sstevel@tonic-gate 			slotp->offset = offset;
11707c478bd9Sstevel@tonic-gate 			slotp->fbp = fbp;
11717c478bd9Sstevel@tonic-gate 			slotp->size = ep->d_reclen;
11727c478bd9Sstevel@tonic-gate 			/*
11737c478bd9Sstevel@tonic-gate 			 * Set end offset to 0. Truncation is handled
11747c478bd9Sstevel@tonic-gate 			 * because the dnlc cache will blow away the
11757c478bd9Sstevel@tonic-gate 			 * cached directory when an entry is removed
11767c478bd9Sstevel@tonic-gate 			 * that drops the entries left to less than half
11777c478bd9Sstevel@tonic-gate 			 * the minumum number (dnlc_min_dir_cache).
11787c478bd9Sstevel@tonic-gate 			 */
11797c478bd9Sstevel@tonic-gate 			slotp->endoff = 0;
11807c478bd9Sstevel@tonic-gate 			slotp->cached = 1;
11817c478bd9Sstevel@tonic-gate 			return (0);
11827c478bd9Sstevel@tonic-gate 		case DNOENT:
11837c478bd9Sstevel@tonic-gate 			slotp->status = NONE;
11847c478bd9Sstevel@tonic-gate 			slotp->offset = P2ROUNDUP_TYPED(tdp->i_size,
11857c478bd9Sstevel@tonic-gate 			    DIRBLKSIZ, u_offset_t);
11867c478bd9Sstevel@tonic-gate 			slotp->size = DIRBLKSIZ;
11877c478bd9Sstevel@tonic-gate 			slotp->endoff = 0;
11887c478bd9Sstevel@tonic-gate 			slotp->cached = 1;
11897c478bd9Sstevel@tonic-gate 			return (0);
11907c478bd9Sstevel@tonic-gate 		default:
11917c478bd9Sstevel@tonic-gate 			break;
11927c478bd9Sstevel@tonic-gate 		}
11937c478bd9Sstevel@tonic-gate 		break;
11947c478bd9Sstevel@tonic-gate 	}
11957c478bd9Sstevel@tonic-gate 	slotp->cached = 0;
11967c478bd9Sstevel@tonic-gate 	caching = NULL;
11977f63b8c3Svsakar 	if (!noentry && tdp->i_size >= ufs_min_dir_cache) {
11987c478bd9Sstevel@tonic-gate 		/*
11997f63b8c3Svsakar 		 * if the directory caching disable time has expired
12007f63b8c3Svsakar 		 * enable caching again.
12017c478bd9Sstevel@tonic-gate 		 */
12027f63b8c3Svsakar 		if (tdp->i_cachedir == CD_DISABLED_NOMEM &&
12037f63b8c3Svsakar 		    gethrtime() - ufs_dc_disable_at > ufs_dc_disable_duration) {
12047f63b8c3Svsakar 			ufs_dc_disable_at = 0;
12057f63b8c3Svsakar 			tdp->i_cachedir = CD_ENABLED;
12067f63b8c3Svsakar 		}
12077f63b8c3Svsakar 		/*
12087f63b8c3Svsakar 		 * Attempt to cache any directories greater than the tunable
12097f63b8c3Svsakar 		 * ufs_min_cache_dir. If it fails due to memory shortage
12107f63b8c3Svsakar 		 * (DNOMEM), disable caching for this directory and record
12117f63b8c3Svsakar 		 * the system time. Any attempt after the disable time has
12127f63b8c3Svsakar 		 * expired will enable the caching again.
12137f63b8c3Svsakar 		 */
12147f63b8c3Svsakar 		if (tdp->i_cachedir == CD_ENABLED) {
12157c478bd9Sstevel@tonic-gate 			switch (dnlc_dir_start(dcap,
12167c478bd9Sstevel@tonic-gate 			    tdp->i_size >> AV_DIRECT_SHIFT)) {
12177c478bd9Sstevel@tonic-gate 			case DNOMEM:
12187f63b8c3Svsakar 				tdp->i_cachedir = CD_DISABLED_NOMEM;
12197f63b8c3Svsakar 				ufs_dc_disable_at = gethrtime();
12207f63b8c3Svsakar 				break;
12217c478bd9Sstevel@tonic-gate 			case DTOOBIG:
12227f63b8c3Svsakar 				tdp->i_cachedir = CD_DISABLED_TOOBIG;
12237c478bd9Sstevel@tonic-gate 				break;
12247c478bd9Sstevel@tonic-gate 			case DOK:
12257c478bd9Sstevel@tonic-gate 				caching = 1;
12267c478bd9Sstevel@tonic-gate 				break;
12277c478bd9Sstevel@tonic-gate 			default:
12287c478bd9Sstevel@tonic-gate 				break;
12297c478bd9Sstevel@tonic-gate 			}
12307c478bd9Sstevel@tonic-gate 		}
12317c478bd9Sstevel@tonic-gate 	}
12327c478bd9Sstevel@tonic-gate 
12337c478bd9Sstevel@tonic-gate 	/*
12347c478bd9Sstevel@tonic-gate 	 * No point in using i_diroff since we must search whole directory
12357c478bd9Sstevel@tonic-gate 	 */
12367c478bd9Sstevel@tonic-gate 	dirsize = P2ROUNDUP_TYPED(tdp->i_size, DIRBLKSIZ, u_offset_t);
12377c478bd9Sstevel@tonic-gate 	enduseful = 0;
12387c478bd9Sstevel@tonic-gate 	offset = last_offset = 0;
12397c478bd9Sstevel@tonic-gate 	entryoffsetinblk = 0;
12407c478bd9Sstevel@tonic-gate 	needed = (int)LDIRSIZ(namlen);
12417c478bd9Sstevel@tonic-gate 	while (offset < dirsize) {
12427c478bd9Sstevel@tonic-gate 		/*
12437c478bd9Sstevel@tonic-gate 		 * If offset is on a block boundary,
12447c478bd9Sstevel@tonic-gate 		 * read the next directory block.
12457c478bd9Sstevel@tonic-gate 		 * Release previous if it exists.
12467c478bd9Sstevel@tonic-gate 		 */
12477c478bd9Sstevel@tonic-gate 		if (blkoff(tdp->i_fs, offset) == 0) {
12487c478bd9Sstevel@tonic-gate 			if (fbp != NULL)
12497c478bd9Sstevel@tonic-gate 				fbrelse(fbp, S_OTHER);
12507c478bd9Sstevel@tonic-gate 
12517c478bd9Sstevel@tonic-gate 			err = blkatoff(tdp, offset, (char **)0, &fbp);
12527c478bd9Sstevel@tonic-gate 			if (err) {
12537c478bd9Sstevel@tonic-gate 				ASSERT(*ipp == NULL);
12547c478bd9Sstevel@tonic-gate 				if (caching) {
12557c478bd9Sstevel@tonic-gate 					dnlc_dir_purge(dcap);
12567c478bd9Sstevel@tonic-gate 				}
12577c478bd9Sstevel@tonic-gate 				return (err);
12587c478bd9Sstevel@tonic-gate 			}
12597c478bd9Sstevel@tonic-gate 			entryoffsetinblk = 0;
12607c478bd9Sstevel@tonic-gate 		}
12617c478bd9Sstevel@tonic-gate 		/*
12627c478bd9Sstevel@tonic-gate 		 * If still looking for a slot, and at a DIRBLKSIZ
12637c478bd9Sstevel@tonic-gate 		 * boundary, have to start looking for free space
12647c478bd9Sstevel@tonic-gate 		 * again.
12657c478bd9Sstevel@tonic-gate 		 */
12667c478bd9Sstevel@tonic-gate 		if (slotp->status == NONE &&
12677c478bd9Sstevel@tonic-gate 		    (entryoffsetinblk & (DIRBLKSIZ - 1)) == 0) {
12687c478bd9Sstevel@tonic-gate 			slotp->offset = -1;
12697c478bd9Sstevel@tonic-gate 		}
12707c478bd9Sstevel@tonic-gate 		/*
12717c478bd9Sstevel@tonic-gate 		 * If the next entry is a zero length record or if the
12727c478bd9Sstevel@tonic-gate 		 * record length is invalid, then skip to the next
12737c478bd9Sstevel@tonic-gate 		 * directory block.  Complete validation checks are
12747c478bd9Sstevel@tonic-gate 		 * done if the record length is invalid.
12757c478bd9Sstevel@tonic-gate 		 *
12767c478bd9Sstevel@tonic-gate 		 * Full validation checks are slow so they are disabled
12777c478bd9Sstevel@tonic-gate 		 * by default.  Complete checks can be run by patching
12787c478bd9Sstevel@tonic-gate 		 * "dirchk" to be true.
12797c478bd9Sstevel@tonic-gate 		 *
12807c478bd9Sstevel@tonic-gate 		 * We do not have to check the validity of
12817c478bd9Sstevel@tonic-gate 		 * entryoffsetinblk here because it starts out as zero
12827c478bd9Sstevel@tonic-gate 		 * and is only incremented by d_reclen values that we
12837c478bd9Sstevel@tonic-gate 		 * validate here.
12847c478bd9Sstevel@tonic-gate 		 */
12857c478bd9Sstevel@tonic-gate 		ep = (struct direct *)(fbp->fb_addr + entryoffsetinblk);
12867c478bd9Sstevel@tonic-gate 		if (ep->d_reclen == 0 ||
12877c478bd9Sstevel@tonic-gate 		    (dirchk || (ep->d_reclen & 0x3)) &&
12887c478bd9Sstevel@tonic-gate 		    dirmangled(tdp, ep, entryoffsetinblk, offset)) {
12897c478bd9Sstevel@tonic-gate 			i = DIRBLKSIZ - (entryoffsetinblk & (DIRBLKSIZ - 1));
12907c478bd9Sstevel@tonic-gate 			offset += i;
12917c478bd9Sstevel@tonic-gate 			entryoffsetinblk += i;
12927c478bd9Sstevel@tonic-gate 			if (caching) {
12937c478bd9Sstevel@tonic-gate 				dnlc_dir_purge(dcap);
12947c478bd9Sstevel@tonic-gate 				caching = 0;
12957c478bd9Sstevel@tonic-gate 			}
12967c478bd9Sstevel@tonic-gate 			continue;
12977c478bd9Sstevel@tonic-gate 		}
12987c478bd9Sstevel@tonic-gate 
12997c478bd9Sstevel@tonic-gate 		/*
13007c478bd9Sstevel@tonic-gate 		 * Add named entries and free space into the directory cache
13017c478bd9Sstevel@tonic-gate 		 */
13027c478bd9Sstevel@tonic-gate 		if (caching) {
13037c478bd9Sstevel@tonic-gate 			ushort_t extra;
13047c478bd9Sstevel@tonic-gate 			off_t off2;
13057c478bd9Sstevel@tonic-gate 
13067c478bd9Sstevel@tonic-gate 			if (ep->d_ino == 0) {
13077c478bd9Sstevel@tonic-gate 				extra = ep->d_reclen;
13087c478bd9Sstevel@tonic-gate 				if (offset & (DIRBLKSIZ - 1)) {
13097c478bd9Sstevel@tonic-gate 					dnlc_dir_purge(dcap);
13107c478bd9Sstevel@tonic-gate 					caching = 0;
13117c478bd9Sstevel@tonic-gate 				}
13127c478bd9Sstevel@tonic-gate 			} else {
13137c478bd9Sstevel@tonic-gate 				/*
13147c478bd9Sstevel@tonic-gate 				 * entries hold the previous offset if
13157c478bd9Sstevel@tonic-gate 				 * not the 1st one
13167c478bd9Sstevel@tonic-gate 				 */
13177c478bd9Sstevel@tonic-gate 				if (offset & (DIRBLKSIZ - 1)) {
13187c478bd9Sstevel@tonic-gate 					off2 = last_offset;
13197c478bd9Sstevel@tonic-gate 				} else {
13207c478bd9Sstevel@tonic-gate 					off2 = offset + 1;
13217c478bd9Sstevel@tonic-gate 				}
13227c478bd9Sstevel@tonic-gate 				caching = (dnlc_dir_add_entry(dcap, ep->d_name,
13237c478bd9Sstevel@tonic-gate 				    INO_OFF_TO_H(ep->d_ino, off2)) == DOK);
13247c478bd9Sstevel@tonic-gate 				extra = ep->d_reclen - DIRSIZ(ep);
13257c478bd9Sstevel@tonic-gate 			}
13267c478bd9Sstevel@tonic-gate 			if (caching && (extra >= LDIRSIZ(1))) {
13277c478bd9Sstevel@tonic-gate 				caching = (dnlc_dir_add_space(dcap, extra,
13287c478bd9Sstevel@tonic-gate 				    (uint64_t)offset) == DOK);
13297c478bd9Sstevel@tonic-gate 			}
13307c478bd9Sstevel@tonic-gate 		}
13317c478bd9Sstevel@tonic-gate 
13327c478bd9Sstevel@tonic-gate 		/*
13337c478bd9Sstevel@tonic-gate 		 * If an appropriate sized slot has not yet been found,
13347c478bd9Sstevel@tonic-gate 		 * check to see if one is available.
13357c478bd9Sstevel@tonic-gate 		 */
13367c478bd9Sstevel@tonic-gate 		if ((slotp->status != FOUND) && (slotp->status != EXIST)) {
13377c478bd9Sstevel@tonic-gate 			int size = ep->d_reclen;
13387c478bd9Sstevel@tonic-gate 
13397c478bd9Sstevel@tonic-gate 			if (ep->d_ino != 0)
13407c478bd9Sstevel@tonic-gate 				size -= DIRSIZ(ep);
13417c478bd9Sstevel@tonic-gate 			if (size > 0) {
13427c478bd9Sstevel@tonic-gate 				if (size >= needed) {
13437c478bd9Sstevel@tonic-gate 					slotp->offset = offset;
13447c478bd9Sstevel@tonic-gate 					slotp->size = ep->d_reclen;
13457c478bd9Sstevel@tonic-gate 					if (noentry) {
13467c478bd9Sstevel@tonic-gate 						slotp->ep = ep;
13477c478bd9Sstevel@tonic-gate 						slotp->fbp = fbp;
13487c478bd9Sstevel@tonic-gate 						slotp->status = FOUND;
13497c478bd9Sstevel@tonic-gate 						slotp->endoff = 0;
13507c478bd9Sstevel@tonic-gate 						return (0);
13517c478bd9Sstevel@tonic-gate 					}
13527c478bd9Sstevel@tonic-gate 					slotp->status = FOUND;
13537c478bd9Sstevel@tonic-gate 				} else if (slotp->status == NONE) {
13547c478bd9Sstevel@tonic-gate 					if (slotp->offset == -1)
13557c478bd9Sstevel@tonic-gate 						slotp->offset = offset;
13567c478bd9Sstevel@tonic-gate 				}
13577c478bd9Sstevel@tonic-gate 			}
13587c478bd9Sstevel@tonic-gate 		}
13597c478bd9Sstevel@tonic-gate 		/*
13607c478bd9Sstevel@tonic-gate 		 * Check for a name match.
13617c478bd9Sstevel@tonic-gate 		 */
13627c478bd9Sstevel@tonic-gate 		if (ep->d_ino && ep->d_namlen == namlen &&
13637c478bd9Sstevel@tonic-gate 		    *namep == *ep->d_name &&	/* fast chk 1st char */
13647c478bd9Sstevel@tonic-gate 		    bcmp(namep, ep->d_name, namlen) == 0) {
13657c478bd9Sstevel@tonic-gate 
13667c478bd9Sstevel@tonic-gate 			tdp->i_diroff = offset;
13677c478bd9Sstevel@tonic-gate 
13687c478bd9Sstevel@tonic-gate 			if (tdp->i_number == ep->d_ino) {
13697c478bd9Sstevel@tonic-gate 				*ipp = tdp;	/* we want ourself, ie "." */
13707c478bd9Sstevel@tonic-gate 				VN_HOLD(dvp);
13717c478bd9Sstevel@tonic-gate 			} else {
13727c478bd9Sstevel@tonic-gate 				err = ufs_iget_alloced(tdp->i_vfs,
13737c478bd9Sstevel@tonic-gate 				    (ino_t)ep->d_ino, ipp, cr);
13747c478bd9Sstevel@tonic-gate 				if (err) {
13757c478bd9Sstevel@tonic-gate 					fbrelse(fbp, S_OTHER);
13767c478bd9Sstevel@tonic-gate 					if (caching)
13777c478bd9Sstevel@tonic-gate 						dnlc_dir_purge(dcap);
13787c478bd9Sstevel@tonic-gate 					return (err);
13797c478bd9Sstevel@tonic-gate 				}
13807c478bd9Sstevel@tonic-gate 			}
13817c478bd9Sstevel@tonic-gate 			slotp->status = EXIST;
13827c478bd9Sstevel@tonic-gate 			slotp->offset = offset;
13837c478bd9Sstevel@tonic-gate 			slotp->size = (int)(offset - last_offset);
13847c478bd9Sstevel@tonic-gate 			slotp->fbp = fbp;
13857c478bd9Sstevel@tonic-gate 			slotp->ep = ep;
13867c478bd9Sstevel@tonic-gate 			slotp->endoff = 0;
13877c478bd9Sstevel@tonic-gate 			if (caching)
13887c478bd9Sstevel@tonic-gate 				dnlc_dir_purge(dcap);
13897c478bd9Sstevel@tonic-gate 			return (0);
13907c478bd9Sstevel@tonic-gate 		}
13917c478bd9Sstevel@tonic-gate 		last_offset = offset;
13927c478bd9Sstevel@tonic-gate 		offset += ep->d_reclen;
13937c478bd9Sstevel@tonic-gate 		entryoffsetinblk += ep->d_reclen;
13947c478bd9Sstevel@tonic-gate 		if (ep->d_ino)
13957c478bd9Sstevel@tonic-gate 			enduseful = offset;
13967c478bd9Sstevel@tonic-gate 	}
13977c478bd9Sstevel@tonic-gate 	if (fbp) {
13987c478bd9Sstevel@tonic-gate 		fbrelse(fbp, S_OTHER);
13997c478bd9Sstevel@tonic-gate 	}
14007c478bd9Sstevel@tonic-gate 
14017c478bd9Sstevel@tonic-gate 	if (caching) {
14027c478bd9Sstevel@tonic-gate 		dnlc_dir_complete(dcap);
14037c478bd9Sstevel@tonic-gate 		slotp->cached = 1;
14047c478bd9Sstevel@tonic-gate 		if (slotp->status == FOUND) {
14057c478bd9Sstevel@tonic-gate 			if (initstat == FOUND) {
14067c478bd9Sstevel@tonic-gate 				return (0);
14077c478bd9Sstevel@tonic-gate 			}
14087c478bd9Sstevel@tonic-gate 			(void) dnlc_dir_rem_space_by_handle(dcap,
14097c478bd9Sstevel@tonic-gate 			    slotp->offset);
14107c478bd9Sstevel@tonic-gate 			slotp->endoff = 0;
14117c478bd9Sstevel@tonic-gate 			return (0);
14127c478bd9Sstevel@tonic-gate 		}
14137c478bd9Sstevel@tonic-gate 	}
14147c478bd9Sstevel@tonic-gate 
14157c478bd9Sstevel@tonic-gate 	if (slotp->status == NONE) {
14167c478bd9Sstevel@tonic-gate 		/*
14177c478bd9Sstevel@tonic-gate 		 * We didn't find a slot; the new directory entry should be put
14187c478bd9Sstevel@tonic-gate 		 * at the end of the directory.  Return an indication of where
14197c478bd9Sstevel@tonic-gate 		 * this is, and set "endoff" to zero; since we're going to have
14207c478bd9Sstevel@tonic-gate 		 * to extend the directory, we're certainly not going to
14217c478bd9Sstevel@tonic-gate 		 * truncate it.
14227c478bd9Sstevel@tonic-gate 		 */
14237c478bd9Sstevel@tonic-gate 		slotp->offset = dirsize;
14247c478bd9Sstevel@tonic-gate 		slotp->size = DIRBLKSIZ;
14257c478bd9Sstevel@tonic-gate 		slotp->endoff = 0;
14267c478bd9Sstevel@tonic-gate 	} else {
14277c478bd9Sstevel@tonic-gate 		/*
14287c478bd9Sstevel@tonic-gate 		 * We found a slot, and will return an indication of where that
14297c478bd9Sstevel@tonic-gate 		 * slot is, as any new directory entry will be put there.
14307c478bd9Sstevel@tonic-gate 		 * Since that slot will become a useful entry, if the last
14317c478bd9Sstevel@tonic-gate 		 * useful entry we found was before this one, update the offset
14327c478bd9Sstevel@tonic-gate 		 * of the last useful entry.
14337c478bd9Sstevel@tonic-gate 		 */
14347c478bd9Sstevel@tonic-gate 		if (enduseful < slotp->offset + slotp->size)
14357c478bd9Sstevel@tonic-gate 			enduseful = slotp->offset + slotp->size;
14367c478bd9Sstevel@tonic-gate 		slotp->endoff = P2ROUNDUP_TYPED(enduseful, DIRBLKSIZ, off_t);
14377c478bd9Sstevel@tonic-gate 	}
14387c478bd9Sstevel@tonic-gate 	*ipp = NULL;
14397c478bd9Sstevel@tonic-gate 	return (0);
14407c478bd9Sstevel@tonic-gate }
14417c478bd9Sstevel@tonic-gate 
14427c478bd9Sstevel@tonic-gate uint64_t ufs_dirrename_retry_cnt;
14437c478bd9Sstevel@tonic-gate 
14447c478bd9Sstevel@tonic-gate /*
14457c478bd9Sstevel@tonic-gate  * Rename the entry in the directory tdp so that it points to
14467c478bd9Sstevel@tonic-gate  * sip instead of tip.
14477c478bd9Sstevel@tonic-gate  */
14487c478bd9Sstevel@tonic-gate static int
ufs_dirrename(struct inode * sdp,struct inode * sip,struct inode * tdp,char * namep,struct inode * tip,struct ufs_slot * slotp,struct cred * cr)14497c478bd9Sstevel@tonic-gate ufs_dirrename(
14507c478bd9Sstevel@tonic-gate 	struct inode *sdp,	/* parent directory of source */
14517c478bd9Sstevel@tonic-gate 	struct inode *sip,	/* source inode */
14527c478bd9Sstevel@tonic-gate 	struct inode *tdp,	/* parent directory of target */
14537c478bd9Sstevel@tonic-gate 	char *namep,		/* entry we are trying to change */
14547c478bd9Sstevel@tonic-gate 	struct inode *tip,	/* target inode */
14555b024a5bSbatschul 	struct ufs_slot *slotp,	/* slot for entry */
14567c478bd9Sstevel@tonic-gate 	struct cred *cr)	/* credentials */
14577c478bd9Sstevel@tonic-gate {
14587c478bd9Sstevel@tonic-gate 	vnode_t *tdvp;
14597c478bd9Sstevel@tonic-gate 	off_t offset;
14607c478bd9Sstevel@tonic-gate 	int err;
14617c478bd9Sstevel@tonic-gate 	int doingdirectory;
14627c478bd9Sstevel@tonic-gate 
14637c478bd9Sstevel@tonic-gate 	ASSERT(sdp->i_ufsvfs != NULL);
14647c478bd9Sstevel@tonic-gate 	ASSERT(RW_WRITE_HELD(&tdp->i_rwlock));
14657c478bd9Sstevel@tonic-gate 	ASSERT(RW_WRITE_HELD(&tdp->i_contents));
14667c478bd9Sstevel@tonic-gate 	/*
14677c478bd9Sstevel@tonic-gate 	 * Short circuit rename of something to itself.
14687c478bd9Sstevel@tonic-gate 	 */
14697c478bd9Sstevel@tonic-gate 	if (sip->i_number == tip->i_number) {
14707c478bd9Sstevel@tonic-gate 		return (ESAME); /* special KLUDGE error code */
14717c478bd9Sstevel@tonic-gate 	}
14727c478bd9Sstevel@tonic-gate 
14737c478bd9Sstevel@tonic-gate 	/*
14747c478bd9Sstevel@tonic-gate 	 * We're locking 2 peer level locks, so must use tryenter
14757c478bd9Sstevel@tonic-gate 	 * on the 2nd to avoid deadlocks that would occur
14767c478bd9Sstevel@tonic-gate 	 * if we renamed a->b and b->a concurrently.
14777c478bd9Sstevel@tonic-gate 	 */
14787c478bd9Sstevel@tonic-gate retry:
14797c478bd9Sstevel@tonic-gate 	rw_enter(&tip->i_contents, RW_WRITER);
14807c478bd9Sstevel@tonic-gate 	if (!rw_tryenter(&sip->i_contents, RW_READER)) {
14817c478bd9Sstevel@tonic-gate 		/*
14827c478bd9Sstevel@tonic-gate 		 * drop tip and wait (sleep) until we stand a chance
14837c478bd9Sstevel@tonic-gate 		 * of holding sip
14847c478bd9Sstevel@tonic-gate 		 */
14857c478bd9Sstevel@tonic-gate 		rw_exit(&tip->i_contents);
14867c478bd9Sstevel@tonic-gate 		rw_enter(&sip->i_contents, RW_READER);
14877c478bd9Sstevel@tonic-gate 		/*
14887c478bd9Sstevel@tonic-gate 		 * Reverse the lock grabs in case we have heavy
14897c478bd9Sstevel@tonic-gate 		 * contention on the 2nd lock.
14907c478bd9Sstevel@tonic-gate 		 */
14917c478bd9Sstevel@tonic-gate 		if (!rw_tryenter(&tip->i_contents, RW_WRITER)) {
14927c478bd9Sstevel@tonic-gate 			ufs_dirrename_retry_cnt++;
14937c478bd9Sstevel@tonic-gate 			rw_exit(&sip->i_contents);
14947c478bd9Sstevel@tonic-gate 			goto retry;
14957c478bd9Sstevel@tonic-gate 		}
14967c478bd9Sstevel@tonic-gate 	}
14977c478bd9Sstevel@tonic-gate 
14987c478bd9Sstevel@tonic-gate 	/*
14997c478bd9Sstevel@tonic-gate 	 * Check that everything is on the same filesystem.
15007c478bd9Sstevel@tonic-gate 	 */
15017c478bd9Sstevel@tonic-gate 	if ((ITOV(tip)->v_vfsp != ITOV(tdp)->v_vfsp) ||
15027c478bd9Sstevel@tonic-gate 	    (ITOV(tip)->v_vfsp != ITOV(sip)->v_vfsp)) {
15037c478bd9Sstevel@tonic-gate 		err = EXDEV;		/* XXX archaic */
15047c478bd9Sstevel@tonic-gate 		goto out;
15057c478bd9Sstevel@tonic-gate 	}
15067c478bd9Sstevel@tonic-gate 	/*
15077c478bd9Sstevel@tonic-gate 	 * Must have write permission to rewrite target entry.
15087c478bd9Sstevel@tonic-gate 	 * Perform additional checks for sticky directories.
15097c478bd9Sstevel@tonic-gate 	 */
151060c8e821SFrank Batschulat 	if ((err = ufs_iaccess(tdp, IWRITE, cr, 0)) != 0 ||
15117c478bd9Sstevel@tonic-gate 	    (err = ufs_sticky_remove_access(tdp, tip, cr)) != 0)
15127c478bd9Sstevel@tonic-gate 		goto out;
15137c478bd9Sstevel@tonic-gate 
15147c478bd9Sstevel@tonic-gate 	/*
15157c478bd9Sstevel@tonic-gate 	 * Ensure source and target are compatible (both directories
15167c478bd9Sstevel@tonic-gate 	 * or both not directories).  If target is a directory it must
15177c478bd9Sstevel@tonic-gate 	 * be empty and have no links to it; in addition it must not
15187c478bd9Sstevel@tonic-gate 	 * be a mount point, and both the source and target must be
15197c478bd9Sstevel@tonic-gate 	 * writable.
15207c478bd9Sstevel@tonic-gate 	 */
15217c478bd9Sstevel@tonic-gate 	doingdirectory = (((sip->i_mode & IFMT) == IFDIR) ||
15227c478bd9Sstevel@tonic-gate 	    ((sip->i_mode & IFMT) == IFATTRDIR));
15237c478bd9Sstevel@tonic-gate 	if (((tip->i_mode & IFMT) == IFDIR) ||
15247c478bd9Sstevel@tonic-gate 	    ((tip->i_mode & IFMT) == IFATTRDIR)) {
15257c478bd9Sstevel@tonic-gate 		if (!doingdirectory) {
15267c478bd9Sstevel@tonic-gate 			err = EISDIR;
15277c478bd9Sstevel@tonic-gate 			goto out;
15287c478bd9Sstevel@tonic-gate 		}
15297c478bd9Sstevel@tonic-gate 		/*
15305b024a5bSbatschul 		 * vn_vfsrlock will prevent mounts from using the directory
1531d5dbd18dSbatschul 		 * until we are done.
15327c478bd9Sstevel@tonic-gate 		 */
15335b024a5bSbatschul 		if (vn_vfsrlock(ITOV(tip))) {
15347c478bd9Sstevel@tonic-gate 			err = EBUSY;
15357c478bd9Sstevel@tonic-gate 			goto out;
15367c478bd9Sstevel@tonic-gate 		}
15377c478bd9Sstevel@tonic-gate 		if (vn_mountedvfs(ITOV(tip)) != NULL) {
15387c478bd9Sstevel@tonic-gate 			vn_vfsunlock(ITOV(tip));
15397c478bd9Sstevel@tonic-gate 			err = EBUSY;
15407c478bd9Sstevel@tonic-gate 			goto out;
15417c478bd9Sstevel@tonic-gate 		}
15427c478bd9Sstevel@tonic-gate 		if (!ufs_dirempty(tip, tdp->i_number, cr) || tip->i_nlink > 2) {
15437c478bd9Sstevel@tonic-gate 			vn_vfsunlock(ITOV(tip));
15447c478bd9Sstevel@tonic-gate 			err = EEXIST;	/* SIGH should be ENOTEMPTY */
15457c478bd9Sstevel@tonic-gate 			goto out;
15467c478bd9Sstevel@tonic-gate 		}
15477c478bd9Sstevel@tonic-gate 	} else if (doingdirectory) {
15487c478bd9Sstevel@tonic-gate 		err = ENOTDIR;
15497c478bd9Sstevel@tonic-gate 		goto out;
15507c478bd9Sstevel@tonic-gate 	}
15517c478bd9Sstevel@tonic-gate 
15527c478bd9Sstevel@tonic-gate 	/*
15537c478bd9Sstevel@tonic-gate 	 * Rewrite the inode pointer for target name entry
15547c478bd9Sstevel@tonic-gate 	 * from the target inode (ip) to the source inode (sip).
15557c478bd9Sstevel@tonic-gate 	 * This prevents the target entry from disappearing
15567c478bd9Sstevel@tonic-gate 	 * during a crash. Mark the directory inode to reflect the changes.
15577c478bd9Sstevel@tonic-gate 	 */
15587c478bd9Sstevel@tonic-gate 	tdvp = ITOV(tdp);
15597c478bd9Sstevel@tonic-gate 	slotp->ep->d_ino = (int32_t)sip->i_number;
15607c478bd9Sstevel@tonic-gate 	dnlc_update(tdvp, namep, ITOV(sip));
15617c478bd9Sstevel@tonic-gate 	if (slotp->size) {
15627c478bd9Sstevel@tonic-gate 		offset = slotp->offset - slotp->size;
15637c478bd9Sstevel@tonic-gate 	} else {
15647c478bd9Sstevel@tonic-gate 		offset = slotp->offset + 1;
15657c478bd9Sstevel@tonic-gate 	}
15667c478bd9Sstevel@tonic-gate 	if (slotp->cached) {
15677c478bd9Sstevel@tonic-gate 		(void) dnlc_dir_update(&tdp->i_danchor, namep,
15687c478bd9Sstevel@tonic-gate 		    INO_OFF_TO_H(slotp->ep->d_ino, offset));
15697c478bd9Sstevel@tonic-gate 	}
15707c478bd9Sstevel@tonic-gate 
15717c478bd9Sstevel@tonic-gate 	err = TRANS_DIR(tdp, slotp->offset);
15727c478bd9Sstevel@tonic-gate 	if (err)
15737c478bd9Sstevel@tonic-gate 		fbrelse(slotp->fbp, S_OTHER);
15747c478bd9Sstevel@tonic-gate 	else
15757c478bd9Sstevel@tonic-gate 		err = ufs_fbwrite(slotp->fbp, tdp);
15767c478bd9Sstevel@tonic-gate 
15777c478bd9Sstevel@tonic-gate 	slotp->fbp = NULL;
15787c478bd9Sstevel@tonic-gate 	if (err) {
15797c478bd9Sstevel@tonic-gate 		if (doingdirectory)
15807c478bd9Sstevel@tonic-gate 			vn_vfsunlock(ITOV(tip));
15817c478bd9Sstevel@tonic-gate 		goto out;
15827c478bd9Sstevel@tonic-gate 	}
15837c478bd9Sstevel@tonic-gate 
15847c478bd9Sstevel@tonic-gate 	TRANS_INODE(tdp->i_ufsvfs, tdp);
15857c478bd9Sstevel@tonic-gate 	tdp->i_flag |= IUPD|ICHG;
15867c478bd9Sstevel@tonic-gate 	tdp->i_seq++;
15877c478bd9Sstevel@tonic-gate 	ITIMES_NOLOCK(tdp);
15887c478bd9Sstevel@tonic-gate 
15897c478bd9Sstevel@tonic-gate 	/*
15907c478bd9Sstevel@tonic-gate 	 * Decrement the link count of the target inode.
15917c478bd9Sstevel@tonic-gate 	 * Fix the ".." entry in sip to point to dp.
15927c478bd9Sstevel@tonic-gate 	 * This is done after the new entry is on the disk.
15937c478bd9Sstevel@tonic-gate 	 */
15947c478bd9Sstevel@tonic-gate 	tip->i_nlink--;
15957c478bd9Sstevel@tonic-gate 	TRANS_INODE(tip->i_ufsvfs, tip);
15967c478bd9Sstevel@tonic-gate 	tip->i_flag |= ICHG;
15977c478bd9Sstevel@tonic-gate 	tip->i_seq++;
15987c478bd9Sstevel@tonic-gate 	ITIMES_NOLOCK(tip);
15997c478bd9Sstevel@tonic-gate 	if (doingdirectory) {
16007c478bd9Sstevel@tonic-gate 		/*
16017c478bd9Sstevel@tonic-gate 		 * The entry for tip no longer exists so I can unlock the
16027c478bd9Sstevel@tonic-gate 		 * vfslock.
16037c478bd9Sstevel@tonic-gate 		 */
16047c478bd9Sstevel@tonic-gate 		vn_vfsunlock(ITOV(tip));
16057c478bd9Sstevel@tonic-gate 		/*
16067c478bd9Sstevel@tonic-gate 		 * Decrement target link count once more if it was a directory.
16077c478bd9Sstevel@tonic-gate 		 */
16087c478bd9Sstevel@tonic-gate 		if (--tip->i_nlink != 0) {
16097c478bd9Sstevel@tonic-gate 			err = ufs_fault(ITOV(tip),
16107c478bd9Sstevel@tonic-gate 		    "ufs_dirrename: target directory link count != 0 (%s)",
16117c478bd9Sstevel@tonic-gate 			    tip->i_fs->fs_fsmnt);
16127c478bd9Sstevel@tonic-gate 			rw_exit(&tip->i_contents);
16137c478bd9Sstevel@tonic-gate 			return (err);
16147c478bd9Sstevel@tonic-gate 		}
16157c478bd9Sstevel@tonic-gate 		TRANS_INODE(tip->i_ufsvfs, tip);
16167c478bd9Sstevel@tonic-gate 		ufs_setreclaim(tip);
16177c478bd9Sstevel@tonic-gate 		/*
16187c478bd9Sstevel@tonic-gate 		 * Renaming a directory with the parent different
16197c478bd9Sstevel@tonic-gate 		 * requires that ".." be rewritten.  The window is
16207c478bd9Sstevel@tonic-gate 		 * still there for ".." to be inconsistent, but this
16217c478bd9Sstevel@tonic-gate 		 * is unavoidable, and a lot shorter than when it was
16227c478bd9Sstevel@tonic-gate 		 * done in a user process.  We decrement the link
16237c478bd9Sstevel@tonic-gate 		 * count in the new parent as appropriate to reflect
16247c478bd9Sstevel@tonic-gate 		 * the just-removed target.  If the parent is the
16257c478bd9Sstevel@tonic-gate 		 * same, this is appropriate since the original
16267c478bd9Sstevel@tonic-gate 		 * directory is going away.  If the new parent is
16277c478bd9Sstevel@tonic-gate 		 * different, ufs_dirfixdotdot() will bump the link count
16287c478bd9Sstevel@tonic-gate 		 * back.
16297c478bd9Sstevel@tonic-gate 		 */
16307c478bd9Sstevel@tonic-gate 		tdp->i_nlink--;
16317c478bd9Sstevel@tonic-gate 		ufs_setreclaim(tdp);
16327c478bd9Sstevel@tonic-gate 		TRANS_INODE(tdp->i_ufsvfs, tdp);
16337c478bd9Sstevel@tonic-gate 		tdp->i_flag |= ICHG;
16347c478bd9Sstevel@tonic-gate 		tdp->i_seq++;
16357c478bd9Sstevel@tonic-gate 		ITIMES_NOLOCK(tdp);
16367c478bd9Sstevel@tonic-gate 		if (sdp != tdp) {
16377c478bd9Sstevel@tonic-gate 			rw_exit(&tip->i_contents);
16387c478bd9Sstevel@tonic-gate 			rw_exit(&sip->i_contents);
16397c478bd9Sstevel@tonic-gate 			err = ufs_dirfixdotdot(sip, sdp, tdp);
16407c478bd9Sstevel@tonic-gate 			return (err);
16417c478bd9Sstevel@tonic-gate 		}
16427c478bd9Sstevel@tonic-gate 	} else
16437c478bd9Sstevel@tonic-gate 		ufs_setreclaim(tip);
16447c478bd9Sstevel@tonic-gate out:
16457c478bd9Sstevel@tonic-gate 	rw_exit(&tip->i_contents);
16467c478bd9Sstevel@tonic-gate 	rw_exit(&sip->i_contents);
16477c478bd9Sstevel@tonic-gate 	return (err);
16487c478bd9Sstevel@tonic-gate }
16497c478bd9Sstevel@tonic-gate 
16507c478bd9Sstevel@tonic-gate /*
16517c478bd9Sstevel@tonic-gate  * Fix the ".." entry of the child directory so that it points
16527c478bd9Sstevel@tonic-gate  * to the new parent directory instead of the old one.  Routine
16537c478bd9Sstevel@tonic-gate  * assumes that dp is a directory and that all the inodes are on
16547c478bd9Sstevel@tonic-gate  * the same file system.
16557c478bd9Sstevel@tonic-gate  */
16567c478bd9Sstevel@tonic-gate static int
ufs_dirfixdotdot(struct inode * dp,struct inode * opdp,struct inode * npdp)16577c478bd9Sstevel@tonic-gate ufs_dirfixdotdot(
16587c478bd9Sstevel@tonic-gate 	struct inode *dp,	/* child directory */
16597c478bd9Sstevel@tonic-gate 	struct inode *opdp,	/* old parent directory */
16607c478bd9Sstevel@tonic-gate 	struct inode *npdp)	/* new parent directory */
16617c478bd9Sstevel@tonic-gate {
16627c478bd9Sstevel@tonic-gate 	struct fbuf *fbp;
16637c478bd9Sstevel@tonic-gate 	struct dirtemplate *dirp;
16647c478bd9Sstevel@tonic-gate 	vnode_t *dvp;
16657c478bd9Sstevel@tonic-gate 	int err;
16667c478bd9Sstevel@tonic-gate 
16677c478bd9Sstevel@tonic-gate 	ASSERT(RW_WRITE_HELD(&npdp->i_rwlock));
16687c478bd9Sstevel@tonic-gate 	ASSERT(RW_WRITE_HELD(&npdp->i_contents));
16697c478bd9Sstevel@tonic-gate 
16707c478bd9Sstevel@tonic-gate 	/*
16717c478bd9Sstevel@tonic-gate 	 * We hold the child directory's i_contents lock before calling
16727c478bd9Sstevel@tonic-gate 	 * blkatoff so that we honor correct locking protocol which is
16737c478bd9Sstevel@tonic-gate 	 * i_contents lock and then page lock. (blkatoff will call
16747c478bd9Sstevel@tonic-gate 	 * ufs_getpage where we want the page lock)
16757c478bd9Sstevel@tonic-gate 	 * We hold the child directory's i_rwlock before i_contents (as
16767c478bd9Sstevel@tonic-gate 	 * per the locking protocol) since we are modifying the ".." entry
16777c478bd9Sstevel@tonic-gate 	 * of the child directory.
16787c478bd9Sstevel@tonic-gate 	 * We hold the i_rwlock and i_contents lock until we record
16797c478bd9Sstevel@tonic-gate 	 * this directory delta to the log (via ufs_trans_dir) and have
16807c478bd9Sstevel@tonic-gate 	 * done fbrelse.
16817c478bd9Sstevel@tonic-gate 	 */
16827c478bd9Sstevel@tonic-gate 	rw_enter(&dp->i_rwlock, RW_WRITER);
16837c478bd9Sstevel@tonic-gate 	rw_enter(&dp->i_contents, RW_WRITER);
16847c478bd9Sstevel@tonic-gate 	err = blkatoff(dp, (off_t)0, (char **)&dirp, &fbp);
16857c478bd9Sstevel@tonic-gate 	if (err)
16867c478bd9Sstevel@tonic-gate 		goto bad;
16877c478bd9Sstevel@tonic-gate 
16887c478bd9Sstevel@tonic-gate 	if (dp->i_nlink <= 0 ||
16897c478bd9Sstevel@tonic-gate 	    dp->i_size < sizeof (struct dirtemplate)) {
16907c478bd9Sstevel@tonic-gate 		err = ENOENT;
16917c478bd9Sstevel@tonic-gate 		goto bad;
16927c478bd9Sstevel@tonic-gate 	}
16937c478bd9Sstevel@tonic-gate 
16947c478bd9Sstevel@tonic-gate 	if (dirp->dotdot_namlen != 2 ||
16957c478bd9Sstevel@tonic-gate 	    dirp->dotdot_name[0] != '.' ||
16967c478bd9Sstevel@tonic-gate 	    dirp->dotdot_name[1] != '.') {	/* Sanity check. */
16977c478bd9Sstevel@tonic-gate 		dirbad(dp, "mangled .. entry", (off_t)0);
16987c478bd9Sstevel@tonic-gate 		err = ENOTDIR;
16997c478bd9Sstevel@tonic-gate 		goto bad;
17007c478bd9Sstevel@tonic-gate 	}
17017c478bd9Sstevel@tonic-gate 
17027c478bd9Sstevel@tonic-gate 	/*
17037c478bd9Sstevel@tonic-gate 	 * Increment the link count in the new parent inode and force it out.
17047c478bd9Sstevel@tonic-gate 	 */
17057c478bd9Sstevel@tonic-gate 	if (npdp->i_nlink == MAXLINK) {
17067c478bd9Sstevel@tonic-gate 		err = EMLINK;
17077c478bd9Sstevel@tonic-gate 		goto bad;
17087c478bd9Sstevel@tonic-gate 	}
17097c478bd9Sstevel@tonic-gate 	npdp->i_nlink++;
17107c478bd9Sstevel@tonic-gate 	TRANS_INODE(npdp->i_ufsvfs, npdp);
17117c478bd9Sstevel@tonic-gate 	npdp->i_flag |= ICHG;
17127c478bd9Sstevel@tonic-gate 	npdp->i_seq++;
17137c478bd9Sstevel@tonic-gate 	ufs_iupdat(npdp, I_SYNC);
17147c478bd9Sstevel@tonic-gate 
17157c478bd9Sstevel@tonic-gate 	/*
17167c478bd9Sstevel@tonic-gate 	 * Rewrite the child ".." entry and force it out.
17177c478bd9Sstevel@tonic-gate 	 */
17187c478bd9Sstevel@tonic-gate 	dvp = ITOV(dp);
17197c478bd9Sstevel@tonic-gate 	dirp->dotdot_ino = (uint32_t)npdp->i_number;
17207c478bd9Sstevel@tonic-gate 	dnlc_update(dvp, "..", ITOV(npdp));
17217c478bd9Sstevel@tonic-gate 	(void) dnlc_dir_update(&dp->i_danchor, "..",
17227c478bd9Sstevel@tonic-gate 	    INO_OFF_TO_H(dirp->dotdot_ino, 0));
17237c478bd9Sstevel@tonic-gate 
17247c478bd9Sstevel@tonic-gate 	err = TRANS_DIR(dp, 0);
17257c478bd9Sstevel@tonic-gate 	if (err)
17267c478bd9Sstevel@tonic-gate 		fbrelse(fbp, S_OTHER);
17277c478bd9Sstevel@tonic-gate 	else
17287c478bd9Sstevel@tonic-gate 		err = ufs_fbwrite(fbp, dp);
17297c478bd9Sstevel@tonic-gate 
17307c478bd9Sstevel@tonic-gate 	fbp = NULL;
17317c478bd9Sstevel@tonic-gate 	if (err)
17327c478bd9Sstevel@tonic-gate 		goto bad;
17337c478bd9Sstevel@tonic-gate 
17347c478bd9Sstevel@tonic-gate 	rw_exit(&dp->i_contents);
17357c478bd9Sstevel@tonic-gate 	rw_exit(&dp->i_rwlock);
17367c478bd9Sstevel@tonic-gate 
17377c478bd9Sstevel@tonic-gate 	/*
17387c478bd9Sstevel@tonic-gate 	 * Decrement the link count of the old parent inode and force it out.
17397c478bd9Sstevel@tonic-gate 	 */
17407c478bd9Sstevel@tonic-gate 	ASSERT(opdp);
17417c478bd9Sstevel@tonic-gate 	rw_enter(&opdp->i_contents, RW_WRITER);
17427c478bd9Sstevel@tonic-gate 	ASSERT(opdp->i_nlink > 0);
17437c478bd9Sstevel@tonic-gate 	opdp->i_nlink--;
17447c478bd9Sstevel@tonic-gate 	ufs_setreclaim(opdp);
17457c478bd9Sstevel@tonic-gate 	TRANS_INODE(opdp->i_ufsvfs, opdp);
17467c478bd9Sstevel@tonic-gate 	opdp->i_flag |= ICHG;
17477c478bd9Sstevel@tonic-gate 	opdp->i_seq++;
17487c478bd9Sstevel@tonic-gate 	ufs_iupdat(opdp, I_SYNC);
17497c478bd9Sstevel@tonic-gate 	rw_exit(&opdp->i_contents);
17507c478bd9Sstevel@tonic-gate 	return (0);
17517c478bd9Sstevel@tonic-gate 
17527c478bd9Sstevel@tonic-gate bad:
17537c478bd9Sstevel@tonic-gate 	if (fbp)
17547c478bd9Sstevel@tonic-gate 		fbrelse(fbp, S_OTHER);
17557c478bd9Sstevel@tonic-gate 	rw_exit(&dp->i_contents);
17567c478bd9Sstevel@tonic-gate 	rw_exit(&dp->i_rwlock);
17577c478bd9Sstevel@tonic-gate 	return (err);
17587c478bd9Sstevel@tonic-gate }
17597c478bd9Sstevel@tonic-gate 
17607c478bd9Sstevel@tonic-gate /*
17617c478bd9Sstevel@tonic-gate  * Enter the file sip in the directory tdp with name namep.
17627c478bd9Sstevel@tonic-gate  */
17637c478bd9Sstevel@tonic-gate static int
ufs_diraddentry(struct inode * tdp,char * namep,enum de_op op,int namlen,struct ufs_slot * slotp,struct inode * sip,struct inode * sdp,struct cred * cr)17647c478bd9Sstevel@tonic-gate ufs_diraddentry(
17657c478bd9Sstevel@tonic-gate 	struct inode *tdp,
17667c478bd9Sstevel@tonic-gate 	char *namep,
17677c478bd9Sstevel@tonic-gate 	enum de_op op,
17687c478bd9Sstevel@tonic-gate 	int namlen,
17695b024a5bSbatschul 	struct ufs_slot *slotp,
17707c478bd9Sstevel@tonic-gate 	struct inode *sip,
17717c478bd9Sstevel@tonic-gate 	struct inode *sdp,
17727c478bd9Sstevel@tonic-gate 	struct cred *cr)
17737c478bd9Sstevel@tonic-gate {
17747c478bd9Sstevel@tonic-gate 	struct direct *ep, *nep;
17757c478bd9Sstevel@tonic-gate 	vnode_t *tdvp;
17767c478bd9Sstevel@tonic-gate 	dcanchor_t *dcap = &tdp->i_danchor;
17777c478bd9Sstevel@tonic-gate 	off_t offset;
17787c478bd9Sstevel@tonic-gate 	int err;
17797c478bd9Sstevel@tonic-gate 	ushort_t extra;
17807c478bd9Sstevel@tonic-gate 
17817c478bd9Sstevel@tonic-gate 	ASSERT(RW_WRITE_HELD(&tdp->i_rwlock));
17827c478bd9Sstevel@tonic-gate 	ASSERT(RW_WRITE_HELD(&tdp->i_contents));
17837c478bd9Sstevel@tonic-gate 	/*
17847c478bd9Sstevel@tonic-gate 	 * Prepare a new entry.  If the caller has not supplied an
17857c478bd9Sstevel@tonic-gate 	 * existing inode, make a new one.
17867c478bd9Sstevel@tonic-gate 	 */
17877c478bd9Sstevel@tonic-gate 	err = dirprepareentry(tdp, slotp, cr);
17887c478bd9Sstevel@tonic-gate 	if (err) {
17897c478bd9Sstevel@tonic-gate 		if (slotp->fbp) {
17907c478bd9Sstevel@tonic-gate 			fbrelse(slotp->fbp, S_OTHER);
17917c478bd9Sstevel@tonic-gate 			slotp->fbp = NULL;
17927c478bd9Sstevel@tonic-gate 		}
17937c478bd9Sstevel@tonic-gate 		return (err);
17947c478bd9Sstevel@tonic-gate 	}
17957c478bd9Sstevel@tonic-gate 	/*
17967c478bd9Sstevel@tonic-gate 	 * Check inode to be linked to see if it is in the
17977c478bd9Sstevel@tonic-gate 	 * same filesystem.
17987c478bd9Sstevel@tonic-gate 	 */
17997c478bd9Sstevel@tonic-gate 	if (ITOV(tdp)->v_vfsp != ITOV(sip)->v_vfsp) {
18007c478bd9Sstevel@tonic-gate 		err = EXDEV;
18017c478bd9Sstevel@tonic-gate 		goto bad;
18027c478bd9Sstevel@tonic-gate 	}
18037c478bd9Sstevel@tonic-gate 
18047c478bd9Sstevel@tonic-gate 	/*
18057c478bd9Sstevel@tonic-gate 	 * If renaming a directory then fix up the ".." entry in the
18067c478bd9Sstevel@tonic-gate 	 * directory to point to the new parent.
18077c478bd9Sstevel@tonic-gate 	 */
18087c478bd9Sstevel@tonic-gate 	if ((op == DE_RENAME) && (((sip->i_mode & IFMT) == IFDIR) ||
18097c478bd9Sstevel@tonic-gate 	    ((sip->i_mode & IFMT) == IFATTRDIR)) && (sdp != tdp)) {
18107c478bd9Sstevel@tonic-gate 		err = ufs_dirfixdotdot(sip, sdp, tdp);
18117c478bd9Sstevel@tonic-gate 		if (err)
18127c478bd9Sstevel@tonic-gate 			goto bad;
18137c478bd9Sstevel@tonic-gate 	}
18147c478bd9Sstevel@tonic-gate 
18157c478bd9Sstevel@tonic-gate 	/*
18167c478bd9Sstevel@tonic-gate 	 * Fill in entry data.
18177c478bd9Sstevel@tonic-gate 	 */
18187c478bd9Sstevel@tonic-gate 	ep = slotp->ep;
18197c478bd9Sstevel@tonic-gate 	ep->d_namlen = (ushort_t)namlen;
18207c478bd9Sstevel@tonic-gate 	(void) strncpy(ep->d_name, namep, (size_t)((namlen + 4) & ~3));
18217c478bd9Sstevel@tonic-gate 	ep->d_ino = (uint32_t)sip->i_number;
18227c478bd9Sstevel@tonic-gate 	tdvp = ITOV(tdp);
18237c478bd9Sstevel@tonic-gate 	dnlc_update(tdvp, namep, ITOV(sip));
18247c478bd9Sstevel@tonic-gate 	/*
18257c478bd9Sstevel@tonic-gate 	 * Note the offset supplied for any named entry is
18267c478bd9Sstevel@tonic-gate 	 * the offset of the previous one, unless it's the 1st.
18277c478bd9Sstevel@tonic-gate 	 * slotp->size is used to pass the length to
18287c478bd9Sstevel@tonic-gate 	 * the previous entry.
18297c478bd9Sstevel@tonic-gate 	 */
18307c478bd9Sstevel@tonic-gate 	if (slotp->size) {
18317c478bd9Sstevel@tonic-gate 		offset = slotp->offset - slotp->size;
18327c478bd9Sstevel@tonic-gate 	} else {
18337c478bd9Sstevel@tonic-gate 		offset = slotp->offset + 1;
18347c478bd9Sstevel@tonic-gate 	}
18357c478bd9Sstevel@tonic-gate 
18367c478bd9Sstevel@tonic-gate 	if (slotp->cached) {
18377c478bd9Sstevel@tonic-gate 		/*
18387c478bd9Sstevel@tonic-gate 		 * Add back any usable unused space to the dnlc directory
18397c478bd9Sstevel@tonic-gate 		 * cache.
18407c478bd9Sstevel@tonic-gate 		 */
18417c478bd9Sstevel@tonic-gate 		extra = ep->d_reclen - DIRSIZ(ep);
18427c478bd9Sstevel@tonic-gate 		if (extra >= LDIRSIZ(1)) {
18437c478bd9Sstevel@tonic-gate 			(void) dnlc_dir_add_space(dcap, extra,
18447c478bd9Sstevel@tonic-gate 			    (uint64_t)slotp->offset);
18457c478bd9Sstevel@tonic-gate 		}
18467c478bd9Sstevel@tonic-gate 
18477c478bd9Sstevel@tonic-gate 		(void) dnlc_dir_add_entry(dcap, namep,
18487c478bd9Sstevel@tonic-gate 		    INO_OFF_TO_H(ep->d_ino, offset));
18497c478bd9Sstevel@tonic-gate 
18507c478bd9Sstevel@tonic-gate 		/* adjust the previous offset of the next entry */
18517c478bd9Sstevel@tonic-gate 		nep = (struct direct *)((char *)ep + ep->d_reclen);
18527c478bd9Sstevel@tonic-gate 		if ((uintptr_t)nep & (DIRBLKSIZ - 1)) {
18537c478bd9Sstevel@tonic-gate 			/*
18547c478bd9Sstevel@tonic-gate 			 * Not a new block.
18557c478bd9Sstevel@tonic-gate 			 *
18567c478bd9Sstevel@tonic-gate 			 * Check the validity of the next entry.
18577c478bd9Sstevel@tonic-gate 			 * If it's bad, then throw away the cache, and
18587c478bd9Sstevel@tonic-gate 			 * continue as before directory caching.
18597c478bd9Sstevel@tonic-gate 			 */
18607c478bd9Sstevel@tonic-gate 			if ((nep->d_reclen == 0) || (nep->d_reclen & 0x3) ||
18617c478bd9Sstevel@tonic-gate 			    dnlc_dir_update(dcap, nep->d_name,
18627c478bd9Sstevel@tonic-gate 			    INO_OFF_TO_H(nep->d_ino, slotp->offset))
18637c478bd9Sstevel@tonic-gate 			    == DNOENT) {
18647c478bd9Sstevel@tonic-gate 				dnlc_dir_purge(dcap);
18657c478bd9Sstevel@tonic-gate 				slotp->cached = 0;
18667c478bd9Sstevel@tonic-gate 			}
18677c478bd9Sstevel@tonic-gate 		}
18687c478bd9Sstevel@tonic-gate 	}
18697c478bd9Sstevel@tonic-gate 
18707c478bd9Sstevel@tonic-gate 	/*
18717c478bd9Sstevel@tonic-gate 	 * Write out the directory block.
18727c478bd9Sstevel@tonic-gate 	 */
18737c478bd9Sstevel@tonic-gate 	err = TRANS_DIR(tdp, slotp->offset);
18747c478bd9Sstevel@tonic-gate 	if (err)
18757c478bd9Sstevel@tonic-gate 		fbrelse(slotp->fbp, S_OTHER);
18767c478bd9Sstevel@tonic-gate 	else
18777c478bd9Sstevel@tonic-gate 		err = ufs_fbwrite(slotp->fbp, tdp);
18787c478bd9Sstevel@tonic-gate 
18797c478bd9Sstevel@tonic-gate 	slotp->fbp = NULL;
18807c478bd9Sstevel@tonic-gate 	/*
18817c478bd9Sstevel@tonic-gate 	 * If this is a rename of a directory, then we have already
18827c478bd9Sstevel@tonic-gate 	 * fixed the ".." entry to refer to the new parent. If err
18837c478bd9Sstevel@tonic-gate 	 * is true at this point, we have failed to update the new
18847c478bd9Sstevel@tonic-gate 	 * parent to refer to the renamed directory.
18857c478bd9Sstevel@tonic-gate 	 * XXX - we need to unwind the ".." fix.
18867c478bd9Sstevel@tonic-gate 	 */
18877c478bd9Sstevel@tonic-gate 	if (err)
18887c478bd9Sstevel@tonic-gate 		return (err);
18897c478bd9Sstevel@tonic-gate 
18907c478bd9Sstevel@tonic-gate 	/*
18917c478bd9Sstevel@tonic-gate 	 * Mark the directory inode to reflect the changes.
18927c478bd9Sstevel@tonic-gate 	 * Truncate the directory to chop off blocks of empty entries.
18937c478bd9Sstevel@tonic-gate 	 */
18947c478bd9Sstevel@tonic-gate 
18957c478bd9Sstevel@tonic-gate 	TRANS_INODE(tdp->i_ufsvfs, tdp);
18967c478bd9Sstevel@tonic-gate 	tdp->i_flag |= IUPD|ICHG;
18977c478bd9Sstevel@tonic-gate 	tdp->i_seq++;
18987c478bd9Sstevel@tonic-gate 	tdp->i_diroff = 0;
18997c478bd9Sstevel@tonic-gate 	ITIMES_NOLOCK(tdp);
19007c478bd9Sstevel@tonic-gate 	/*
19017c478bd9Sstevel@tonic-gate 	 * If the directory grew then dirprepareentry() will have
19027c478bd9Sstevel@tonic-gate 	 * set IATTCHG in tdp->i_flag, then the directory inode must
19037c478bd9Sstevel@tonic-gate 	 * be flushed out. This is because if fsync() is used later
19047c478bd9Sstevel@tonic-gate 	 * the directory size must be correct, otherwise a crash would
19057c478bd9Sstevel@tonic-gate 	 * cause fsck to move the file to lost+found. Also because later
19067c478bd9Sstevel@tonic-gate 	 * a file may be linked in more than one directory, then there
19077c478bd9Sstevel@tonic-gate 	 * is no way to flush the original directory. So it must be
19087c478bd9Sstevel@tonic-gate 	 * flushed out on creation. See bug 4293809.
19097c478bd9Sstevel@tonic-gate 	 */
19107c478bd9Sstevel@tonic-gate 	if (tdp->i_flag & IATTCHG) {
19117c478bd9Sstevel@tonic-gate 		ufs_iupdat(tdp, I_SYNC);
19127c478bd9Sstevel@tonic-gate 	}
19137c478bd9Sstevel@tonic-gate 
19147c478bd9Sstevel@tonic-gate 	if (slotp->endoff && (slotp->endoff < tdp->i_size)) {
19157c478bd9Sstevel@tonic-gate 		if (!TRANS_ISTRANS(tdp->i_ufsvfs)) {
19167c478bd9Sstevel@tonic-gate 			(void) ufs_itrunc(tdp, (u_offset_t)slotp->endoff, 0,
19177c478bd9Sstevel@tonic-gate 			    cr);
19187c478bd9Sstevel@tonic-gate 		}
19197c478bd9Sstevel@tonic-gate 	}
19207c478bd9Sstevel@tonic-gate 
19217c478bd9Sstevel@tonic-gate 
19227c478bd9Sstevel@tonic-gate 	return (0);
19237c478bd9Sstevel@tonic-gate 
19247c478bd9Sstevel@tonic-gate bad:
19257c478bd9Sstevel@tonic-gate 	if (slotp->cached) {
19267c478bd9Sstevel@tonic-gate 		dnlc_dir_purge(dcap);
19277c478bd9Sstevel@tonic-gate 		fbrelse(slotp->fbp, S_OTHER);
19287c478bd9Sstevel@tonic-gate 		slotp->cached = 0;
19297c478bd9Sstevel@tonic-gate 		slotp->fbp = NULL;
19307c478bd9Sstevel@tonic-gate 		return (err);
19317c478bd9Sstevel@tonic-gate 	}
19327c478bd9Sstevel@tonic-gate 
19337c478bd9Sstevel@tonic-gate 	/*
19347c478bd9Sstevel@tonic-gate 	 * Clear out entry prepared by dirprepareent.
19357c478bd9Sstevel@tonic-gate 	 */
19367c478bd9Sstevel@tonic-gate 	slotp->ep->d_ino = 0;
19377c478bd9Sstevel@tonic-gate 	slotp->ep->d_namlen = 0;
19387c478bd9Sstevel@tonic-gate 
19397c478bd9Sstevel@tonic-gate 	/*
19407c478bd9Sstevel@tonic-gate 	 * Don't touch err so we don't clobber the real error that got us here.
19417c478bd9Sstevel@tonic-gate 	 */
19427c478bd9Sstevel@tonic-gate 	if (TRANS_DIR(tdp, slotp->offset))
19437c478bd9Sstevel@tonic-gate 		fbrelse(slotp->fbp, S_OTHER);
19447c478bd9Sstevel@tonic-gate 	else
19457c478bd9Sstevel@tonic-gate 		(void) ufs_fbwrite(slotp->fbp, tdp);
19467c478bd9Sstevel@tonic-gate 	slotp->fbp = NULL;
19477c478bd9Sstevel@tonic-gate 	return (err);
19487c478bd9Sstevel@tonic-gate }
19497c478bd9Sstevel@tonic-gate 
19507c478bd9Sstevel@tonic-gate /*
19517c478bd9Sstevel@tonic-gate  * Prepare a directory slot to receive an entry.
19527c478bd9Sstevel@tonic-gate  */
19537c478bd9Sstevel@tonic-gate static int
dirprepareentry(struct inode * dp,struct ufs_slot * slotp,struct cred * cr)19547c478bd9Sstevel@tonic-gate dirprepareentry(
19557c478bd9Sstevel@tonic-gate 	struct inode *dp,	/* directory we are working in */
19565b024a5bSbatschul 	struct ufs_slot *slotp,	/* available slot info */
19577c478bd9Sstevel@tonic-gate 	struct cred *cr)
19587c478bd9Sstevel@tonic-gate {
19597c478bd9Sstevel@tonic-gate 	struct direct *ep, *nep;
19607c478bd9Sstevel@tonic-gate 	off_t entryend;
19617c478bd9Sstevel@tonic-gate 	int err;
19627c478bd9Sstevel@tonic-gate 	slotstat_t status = slotp->status;
19637c478bd9Sstevel@tonic-gate 	ushort_t dsize;
19647c478bd9Sstevel@tonic-gate 
19657c478bd9Sstevel@tonic-gate 	ASSERT((status == NONE) || (status == FOUND));
19667c478bd9Sstevel@tonic-gate 	ASSERT(RW_WRITE_HELD(&dp->i_rwlock));
19677c478bd9Sstevel@tonic-gate 	ASSERT(RW_WRITE_HELD(&dp->i_contents));
19687c478bd9Sstevel@tonic-gate 	/*
19697c478bd9Sstevel@tonic-gate 	 * If we didn't find a slot, then indicate that the
19707c478bd9Sstevel@tonic-gate 	 * new slot belongs at the end of the directory.
19717c478bd9Sstevel@tonic-gate 	 * If we found a slot, then the new entry can be
19727c478bd9Sstevel@tonic-gate 	 * put at slotp->offset.
19737c478bd9Sstevel@tonic-gate 	 */
19747c478bd9Sstevel@tonic-gate 	entryend = slotp->offset + slotp->size;
19757c478bd9Sstevel@tonic-gate 	if (status == NONE) {
19767c478bd9Sstevel@tonic-gate 		ASSERT((slotp->offset & (DIRBLKSIZ - 1)) == 0);
19777c478bd9Sstevel@tonic-gate 		if (DIRBLKSIZ > dp->i_fs->fs_fsize) {
19787c478bd9Sstevel@tonic-gate 			err = ufs_fault(ITOV(dp),
19797c478bd9Sstevel@tonic-gate 			    "dirprepareentry: bad fs_fsize, DIRBLKSIZ: %d"
19807c478bd9Sstevel@tonic-gate 			    " > dp->i_fs->fs_fsize: %d (%s)",
19817c478bd9Sstevel@tonic-gate 			    DIRBLKSIZ, dp->i_fs->fs_fsize, dp->i_fs->fs_fsmnt);
19827c478bd9Sstevel@tonic-gate 			return (err);
19837c478bd9Sstevel@tonic-gate 		}
19847c478bd9Sstevel@tonic-gate 		/*
19857c478bd9Sstevel@tonic-gate 		 * Allocate the new block.
19867c478bd9Sstevel@tonic-gate 		 */
19877c478bd9Sstevel@tonic-gate 		err = BMAPALLOC(dp, (u_offset_t)slotp->offset,
19887c478bd9Sstevel@tonic-gate 		    (int)(blkoff(dp->i_fs, slotp->offset) + DIRBLKSIZ), cr);
19897c478bd9Sstevel@tonic-gate 		if (err) {
19907c478bd9Sstevel@tonic-gate 			return (err);
19917c478bd9Sstevel@tonic-gate 		}
19927c478bd9Sstevel@tonic-gate 		dp->i_size = entryend;
19937c478bd9Sstevel@tonic-gate 		TRANS_INODE(dp->i_ufsvfs, dp);
19947c478bd9Sstevel@tonic-gate 		dp->i_flag |= IUPD|ICHG|IATTCHG;
19957c478bd9Sstevel@tonic-gate 		dp->i_seq++;
19967c478bd9Sstevel@tonic-gate 		ITIMES_NOLOCK(dp);
19977c478bd9Sstevel@tonic-gate 	} else if (entryend > dp->i_size) {
19987c478bd9Sstevel@tonic-gate 		/*
19997c478bd9Sstevel@tonic-gate 		 * Adjust directory size, if needed. This should never
20007c478bd9Sstevel@tonic-gate 		 * push the size past a new multiple of DIRBLKSIZ.
20017c478bd9Sstevel@tonic-gate 		 * This is an artifact of the old (4.2BSD) way of initializing
20027c478bd9Sstevel@tonic-gate 		 * directory sizes to be less than DIRBLKSIZ.
20037c478bd9Sstevel@tonic-gate 		 */
20047c478bd9Sstevel@tonic-gate 		dp->i_size = P2ROUNDUP_TYPED(entryend, DIRBLKSIZ, off_t);
20057c478bd9Sstevel@tonic-gate 		TRANS_INODE(dp->i_ufsvfs, dp);
20067c478bd9Sstevel@tonic-gate 		dp->i_flag |= IUPD|ICHG|IATTCHG;
20077c478bd9Sstevel@tonic-gate 		dp->i_seq++;
20087c478bd9Sstevel@tonic-gate 		ITIMES_NOLOCK(dp);
20097c478bd9Sstevel@tonic-gate 	}
20107c478bd9Sstevel@tonic-gate 
20117c478bd9Sstevel@tonic-gate 	/*
20127c478bd9Sstevel@tonic-gate 	 * Get the block containing the space for the new directory entry.
20137c478bd9Sstevel@tonic-gate 	 */
20147c478bd9Sstevel@tonic-gate 	if (slotp->fbp == NULL) {
20157c478bd9Sstevel@tonic-gate 		err = blkatoff(dp, slotp->offset, (char **)&slotp->ep,
20167c478bd9Sstevel@tonic-gate 		    &slotp->fbp);
20177c478bd9Sstevel@tonic-gate 		if (err) {
20187c478bd9Sstevel@tonic-gate 			return (err);
20197c478bd9Sstevel@tonic-gate 		}
20207c478bd9Sstevel@tonic-gate 	}
20217c478bd9Sstevel@tonic-gate 	ep = slotp->ep;
20227c478bd9Sstevel@tonic-gate 
20237c478bd9Sstevel@tonic-gate 	switch (status) {
20247c478bd9Sstevel@tonic-gate 	case NONE:
20257c478bd9Sstevel@tonic-gate 		/*
20267c478bd9Sstevel@tonic-gate 		 * No space in the directory. slotp->offset will be on a
20277c478bd9Sstevel@tonic-gate 		 * directory block boundary and we will write the new entry
20287c478bd9Sstevel@tonic-gate 		 * into a fresh block.
20297c478bd9Sstevel@tonic-gate 		 */
20307c478bd9Sstevel@tonic-gate 		ep->d_reclen = DIRBLKSIZ;
20317c478bd9Sstevel@tonic-gate 		slotp->size = 0; /* length of previous entry */
20327c478bd9Sstevel@tonic-gate 		break;
20337c478bd9Sstevel@tonic-gate 	case FOUND:
20347c478bd9Sstevel@tonic-gate 		/*
20357c478bd9Sstevel@tonic-gate 		 * An entry of the required size has been found. Use it.
20367c478bd9Sstevel@tonic-gate 		 */
20377c478bd9Sstevel@tonic-gate 		if (ep->d_ino == 0) {
20387c478bd9Sstevel@tonic-gate 			/* this is the 1st record in a block */
20397c478bd9Sstevel@tonic-gate 			slotp->size = 0; /* length of previous entry */
20407c478bd9Sstevel@tonic-gate 		} else {
20417c478bd9Sstevel@tonic-gate 			dsize = DIRSIZ(ep);
20427c478bd9Sstevel@tonic-gate 			nep = (struct direct *)((char *)ep + dsize);
20437c478bd9Sstevel@tonic-gate 			nep->d_reclen = ep->d_reclen - dsize;
20447c478bd9Sstevel@tonic-gate 			ep->d_reclen = dsize;
20457c478bd9Sstevel@tonic-gate 			slotp->ep = nep;
20467c478bd9Sstevel@tonic-gate 			slotp->offset += dsize;
20477c478bd9Sstevel@tonic-gate 			slotp->size = dsize; /* length of previous entry */
20487c478bd9Sstevel@tonic-gate 		}
20497c478bd9Sstevel@tonic-gate 		break;
20507c478bd9Sstevel@tonic-gate 	default:
20517c478bd9Sstevel@tonic-gate 		break;
20527c478bd9Sstevel@tonic-gate 	}
20537c478bd9Sstevel@tonic-gate 	return (0);
20547c478bd9Sstevel@tonic-gate }
20557c478bd9Sstevel@tonic-gate 
20567c478bd9Sstevel@tonic-gate /*
20577c478bd9Sstevel@tonic-gate  * Allocate and initialize a new inode that will go into directory tdp.
20587c478bd9Sstevel@tonic-gate  * This routine is called from ufs_symlink(), as well as within this file.
20597c478bd9Sstevel@tonic-gate  */
20607c478bd9Sstevel@tonic-gate int
ufs_dirmakeinode(struct inode * tdp,struct inode ** ipp,struct vattr * vap,enum de_op op,struct cred * cr)20617c478bd9Sstevel@tonic-gate ufs_dirmakeinode(
20627c478bd9Sstevel@tonic-gate 	struct inode *tdp,
20637c478bd9Sstevel@tonic-gate 	struct inode **ipp,
20647c478bd9Sstevel@tonic-gate 	struct vattr *vap,
20657c478bd9Sstevel@tonic-gate 	enum de_op op,
20667c478bd9Sstevel@tonic-gate 	struct cred *cr)
20677c478bd9Sstevel@tonic-gate {
20687c478bd9Sstevel@tonic-gate 	struct inode *ip;
20697c478bd9Sstevel@tonic-gate 	enum vtype type;
20707c478bd9Sstevel@tonic-gate 	int imode;			/* mode and format as in inode */
20717c478bd9Sstevel@tonic-gate 	ino_t ipref;
20727c478bd9Sstevel@tonic-gate 	int err;
20737c478bd9Sstevel@tonic-gate 	timestruc_t now;
20747c478bd9Sstevel@tonic-gate 
20757c478bd9Sstevel@tonic-gate 	ASSERT(vap != NULL);
20767c478bd9Sstevel@tonic-gate 	ASSERT(op == DE_CREATE || op == DE_MKDIR || op == DE_ATTRDIR ||
20777c478bd9Sstevel@tonic-gate 	    op == DE_SYMLINK);
20787c478bd9Sstevel@tonic-gate 	ASSERT((vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE));
20797c478bd9Sstevel@tonic-gate 	ASSERT(RW_WRITE_HELD(&tdp->i_rwlock));
20807c478bd9Sstevel@tonic-gate 	ASSERT(RW_WRITE_HELD(&tdp->i_contents));
20817c478bd9Sstevel@tonic-gate 	/*
20827c478bd9Sstevel@tonic-gate 	 * Allocate a new inode.
20837c478bd9Sstevel@tonic-gate 	 */
20847c478bd9Sstevel@tonic-gate 	type = vap->va_type;
20857c478bd9Sstevel@tonic-gate 	if (type == VDIR) {
20867c478bd9Sstevel@tonic-gate 		ipref = dirpref(tdp);
20877c478bd9Sstevel@tonic-gate 	} else {
20887c478bd9Sstevel@tonic-gate 		ipref = tdp->i_number;
20897c478bd9Sstevel@tonic-gate 	}
20907c478bd9Sstevel@tonic-gate 	if (op == DE_ATTRDIR)
20917c478bd9Sstevel@tonic-gate 		imode = vap->va_mode;
20927c478bd9Sstevel@tonic-gate 	else
20937c478bd9Sstevel@tonic-gate 		imode = MAKEIMODE(type, vap->va_mode);
20947c478bd9Sstevel@tonic-gate 	*ipp = NULL;
20957c478bd9Sstevel@tonic-gate 	err = ufs_ialloc(tdp, ipref, imode, &ip, cr);
20967c478bd9Sstevel@tonic-gate 	if (err)
20977c478bd9Sstevel@tonic-gate 		return (err);
20987c478bd9Sstevel@tonic-gate 
20997c478bd9Sstevel@tonic-gate 	/*
21007c478bd9Sstevel@tonic-gate 	 * We don't need to grab vfs_dqrwlock here because it is held
21017c478bd9Sstevel@tonic-gate 	 * in ufs_direnter_*() above us.
21027c478bd9Sstevel@tonic-gate 	 */
21037c478bd9Sstevel@tonic-gate 	ASSERT(RW_READ_HELD(&ip->i_ufsvfs->vfs_dqrwlock));
21047c478bd9Sstevel@tonic-gate 	rw_enter(&ip->i_contents, RW_WRITER);
21057c478bd9Sstevel@tonic-gate 	if (ip->i_dquot != NULL) {
21067c478bd9Sstevel@tonic-gate 		err = ufs_fault(ITOV(ip),
21077c478bd9Sstevel@tonic-gate 		    "ufs_dirmakeinode, ip->i_dquot != NULL: dquot (%s)",
21087c478bd9Sstevel@tonic-gate 		    tdp->i_fs->fs_fsmnt);
21097c478bd9Sstevel@tonic-gate 		rw_exit(&ip->i_contents);
21107c478bd9Sstevel@tonic-gate 		return (err);
21117c478bd9Sstevel@tonic-gate 	}
21127c478bd9Sstevel@tonic-gate 	*ipp = ip;
21137c478bd9Sstevel@tonic-gate 	ip->i_mode = (o_mode_t)imode;
21147c478bd9Sstevel@tonic-gate 	if (type == VBLK || type == VCHR) {
21157c478bd9Sstevel@tonic-gate 		dev_t d = vap->va_rdev;
21167c478bd9Sstevel@tonic-gate 		dev32_t dev32;
21177c478bd9Sstevel@tonic-gate 
21187c478bd9Sstevel@tonic-gate 		/*
21197c478bd9Sstevel@tonic-gate 		 * Don't allow a special file to be created with a
21207c478bd9Sstevel@tonic-gate 		 * dev_t that cannot be represented by this filesystem
21217c478bd9Sstevel@tonic-gate 		 * format on disk.
21227c478bd9Sstevel@tonic-gate 		 */
21237c478bd9Sstevel@tonic-gate 		if (!cmpldev(&dev32, d)) {
21247c478bd9Sstevel@tonic-gate 			err = EOVERFLOW;
21257c478bd9Sstevel@tonic-gate 			goto fail;
21267c478bd9Sstevel@tonic-gate 		}
21277c478bd9Sstevel@tonic-gate 
21287c478bd9Sstevel@tonic-gate 		ITOV(ip)->v_rdev = ip->i_rdev = d;
21297c478bd9Sstevel@tonic-gate 
21307c478bd9Sstevel@tonic-gate 		if (dev32 & ~((O_MAXMAJ << L_BITSMINOR32) | O_MAXMIN)) {
21317c478bd9Sstevel@tonic-gate 			ip->i_ordev = dev32; /* can't use old format */
21327c478bd9Sstevel@tonic-gate 		} else {
21337c478bd9Sstevel@tonic-gate 			ip->i_ordev = cmpdev(d);
21347c478bd9Sstevel@tonic-gate 		}
21357c478bd9Sstevel@tonic-gate 	}
21367c478bd9Sstevel@tonic-gate 	ITOV(ip)->v_type = type;
21377c478bd9Sstevel@tonic-gate 	ufs_reset_vnode(ip->i_vnode);
21387c478bd9Sstevel@tonic-gate 	if (type == VDIR) {
21397c478bd9Sstevel@tonic-gate 		ip->i_nlink = 2; /* anticipating a call to dirmakedirect */
21407c478bd9Sstevel@tonic-gate 	} else {
21417c478bd9Sstevel@tonic-gate 		ip->i_nlink = 1;
21427c478bd9Sstevel@tonic-gate 	}
21437c478bd9Sstevel@tonic-gate 
21447c478bd9Sstevel@tonic-gate 	if (op == DE_ATTRDIR) {
21457c478bd9Sstevel@tonic-gate 		ip->i_uid = vap->va_uid;
21467c478bd9Sstevel@tonic-gate 		ip->i_gid = vap->va_gid;
21477c478bd9Sstevel@tonic-gate 	} else
21487c478bd9Sstevel@tonic-gate 		ip->i_uid = crgetuid(cr);
21497c478bd9Sstevel@tonic-gate 	/*
21507c478bd9Sstevel@tonic-gate 	 * To determine the group-id of the created file:
21517c478bd9Sstevel@tonic-gate 	 *   1) If the gid is set in the attribute list (non-Sun & pre-4.0
21527c478bd9Sstevel@tonic-gate 	 *	clients are not likely to set the gid), then use it if
21537c478bd9Sstevel@tonic-gate 	 *	the process is privileged, belongs to the target group,
21547c478bd9Sstevel@tonic-gate 	 *	or the group is the same as the parent directory.
21557c478bd9Sstevel@tonic-gate 	 *   2) If the filesystem was not mounted with the Old-BSD-compatible
21567c478bd9Sstevel@tonic-gate 	 *	GRPID option, and the directory's set-gid bit is clear,
21577c478bd9Sstevel@tonic-gate 	 *	then use the process's gid.
21587c478bd9Sstevel@tonic-gate 	 *   3) Otherwise, set the group-id to the gid of the parent directory.
21597c478bd9Sstevel@tonic-gate 	 */
21607c478bd9Sstevel@tonic-gate 	if (op != DE_ATTRDIR && (vap->va_mask & AT_GID) &&
21617c478bd9Sstevel@tonic-gate 	    ((vap->va_gid == tdp->i_gid) || groupmember(vap->va_gid, cr) ||
21627c478bd9Sstevel@tonic-gate 	    secpolicy_vnode_create_gid(cr) == 0)) {
21637c478bd9Sstevel@tonic-gate 		/*
21647c478bd9Sstevel@tonic-gate 		 * XXX - is this only the case when a 4.0 NFS client, or a
21657c478bd9Sstevel@tonic-gate 		 * client derived from that code, makes a call over the wire?
21667c478bd9Sstevel@tonic-gate 		 */
21677c478bd9Sstevel@tonic-gate 		ip->i_gid = vap->va_gid;
21687c478bd9Sstevel@tonic-gate 	} else
21697c478bd9Sstevel@tonic-gate 		ip->i_gid = (tdp->i_mode & ISGID) ? tdp->i_gid : crgetgid(cr);
21707c478bd9Sstevel@tonic-gate 
21717c478bd9Sstevel@tonic-gate 	/*
21727c478bd9Sstevel@tonic-gate 	 * For SunOS 5.0->5.4, the lines below read:
21737c478bd9Sstevel@tonic-gate 	 *
21747c478bd9Sstevel@tonic-gate 	 * ip->i_suid = (ip->i_uid > MAXUID) ? UID_LONG : ip->i_uid;
21757c478bd9Sstevel@tonic-gate 	 * ip->i_sgid = (ip->i_gid > MAXUID) ? GID_LONG : ip->i_gid;
21767c478bd9Sstevel@tonic-gate 	 *
21777c478bd9Sstevel@tonic-gate 	 * where MAXUID was set to 60002.  See notes on this in ufs_inode.c
21787c478bd9Sstevel@tonic-gate 	 */
217980d34432Sfrankho 	ip->i_suid =
218080d34432Sfrankho 	    (ulong_t)ip->i_uid > (ulong_t)USHRT_MAX ? UID_LONG : ip->i_uid;
218180d34432Sfrankho 	ip->i_sgid =
218280d34432Sfrankho 	    (ulong_t)ip->i_gid > (ulong_t)USHRT_MAX ? GID_LONG : ip->i_gid;
21837c478bd9Sstevel@tonic-gate 
21847c478bd9Sstevel@tonic-gate 	/*
21857c478bd9Sstevel@tonic-gate 	 * If we're creating a directory, and the parent directory has the
21867c478bd9Sstevel@tonic-gate 	 * set-GID bit set, set it on the new directory.
21877c478bd9Sstevel@tonic-gate 	 * Otherwise, if the user is neither privileged nor a member of the
21887c478bd9Sstevel@tonic-gate 	 * file's new group, clear the file's set-GID bit.
21897c478bd9Sstevel@tonic-gate 	 */
21907c478bd9Sstevel@tonic-gate 	if ((tdp->i_mode & ISGID) && (type == VDIR))
21917c478bd9Sstevel@tonic-gate 		ip->i_mode |= ISGID;
21927c478bd9Sstevel@tonic-gate 	else {
21937c478bd9Sstevel@tonic-gate 		if ((ip->i_mode & ISGID) &&
21947c478bd9Sstevel@tonic-gate 		    secpolicy_vnode_setids_setgids(cr, ip->i_gid) != 0)
21957c478bd9Sstevel@tonic-gate 			ip->i_mode &= ~ISGID;
21967c478bd9Sstevel@tonic-gate 	}
21977c478bd9Sstevel@tonic-gate 
21987c478bd9Sstevel@tonic-gate 	if (((vap->va_mask & AT_ATIME) && TIMESPEC_OVERFLOW(&vap->va_atime)) ||
21997c478bd9Sstevel@tonic-gate 	    ((vap->va_mask & AT_MTIME) && TIMESPEC_OVERFLOW(&vap->va_mtime))) {
22007c478bd9Sstevel@tonic-gate 		err = EOVERFLOW;
22017c478bd9Sstevel@tonic-gate 		goto fail;
22027c478bd9Sstevel@tonic-gate 	}
22037c478bd9Sstevel@tonic-gate 
22047c478bd9Sstevel@tonic-gate 	/*
22057c478bd9Sstevel@tonic-gate 	 * Extended attribute directories are not subject to quotas.
22067c478bd9Sstevel@tonic-gate 	 */
22077c478bd9Sstevel@tonic-gate 	if (op != DE_ATTRDIR)
22087c478bd9Sstevel@tonic-gate 		ip->i_dquot = getinoquota(ip);
22097c478bd9Sstevel@tonic-gate 	else
22107c478bd9Sstevel@tonic-gate 		ip->i_dquot = NULL;
22117c478bd9Sstevel@tonic-gate 
22127c478bd9Sstevel@tonic-gate 	if (op == DE_MKDIR || op == DE_ATTRDIR) {
22137c478bd9Sstevel@tonic-gate 		err = ufs_dirmakedirect(ip, tdp, (op == DE_MKDIR) ? 0 : 1, cr);
22147c478bd9Sstevel@tonic-gate 		if (err)
22157c478bd9Sstevel@tonic-gate 			goto fail;
22167c478bd9Sstevel@tonic-gate 	}
22177c478bd9Sstevel@tonic-gate 
22187c478bd9Sstevel@tonic-gate 	/*
22197c478bd9Sstevel@tonic-gate 	 * generate the shadow inode and attach it to the new object
22207c478bd9Sstevel@tonic-gate 	 */
22217c478bd9Sstevel@tonic-gate 	ASSERT((tdp->i_shadow && tdp->i_ufs_acl) ||
22227c478bd9Sstevel@tonic-gate 	    (!tdp->i_shadow && !tdp->i_ufs_acl));
22237c478bd9Sstevel@tonic-gate 	if (tdp->i_shadow && tdp->i_ufs_acl &&
22247c478bd9Sstevel@tonic-gate 	    (((tdp->i_mode & IFMT) == IFDIR) ||
22257c478bd9Sstevel@tonic-gate 	    ((tdp->i_mode & IFMT) == IFATTRDIR))) {
22267c478bd9Sstevel@tonic-gate 		err = ufs_si_inherit(ip, tdp, ip->i_mode, cr);
22277c478bd9Sstevel@tonic-gate 		if (err) {
22287c478bd9Sstevel@tonic-gate 			if (op == DE_MKDIR) {
22297c478bd9Sstevel@tonic-gate 				/*
22307c478bd9Sstevel@tonic-gate 				 * clean up parent directory
22317c478bd9Sstevel@tonic-gate 				 *
22327c478bd9Sstevel@tonic-gate 				 * tdp->i_contents already locked from
22337c478bd9Sstevel@tonic-gate 				 * ufs_direnter_*()
22347c478bd9Sstevel@tonic-gate 				 */
22357c478bd9Sstevel@tonic-gate 				tdp->i_nlink--;
22367c478bd9Sstevel@tonic-gate 				TRANS_INODE(tdp->i_ufsvfs, tdp);
22377c478bd9Sstevel@tonic-gate 				tdp->i_flag |= ICHG;
22387c478bd9Sstevel@tonic-gate 				tdp->i_seq++;
22397c478bd9Sstevel@tonic-gate 				ufs_iupdat(tdp, I_SYNC);
22407c478bd9Sstevel@tonic-gate 			}
22417c478bd9Sstevel@tonic-gate 			goto fail;
22427c478bd9Sstevel@tonic-gate 		}
22437c478bd9Sstevel@tonic-gate 	}
22447c478bd9Sstevel@tonic-gate 
22457c478bd9Sstevel@tonic-gate 	/*
22467c478bd9Sstevel@tonic-gate 	 * If the passed in attributes contain atime and/or mtime
22477c478bd9Sstevel@tonic-gate 	 * settings, then use them instead of using the current
22487c478bd9Sstevel@tonic-gate 	 * high resolution time.
22497c478bd9Sstevel@tonic-gate 	 */
22507c478bd9Sstevel@tonic-gate 	if (vap->va_mask & (AT_MTIME|AT_ATIME)) {
22517c478bd9Sstevel@tonic-gate 		if (vap->va_mask & AT_ATIME) {
22527c478bd9Sstevel@tonic-gate 			ip->i_atime.tv_sec = vap->va_atime.tv_sec;
22537c478bd9Sstevel@tonic-gate 			ip->i_atime.tv_usec = vap->va_atime.tv_nsec / 1000;
22547c478bd9Sstevel@tonic-gate 			ip->i_flag &= ~IACC;
22557c478bd9Sstevel@tonic-gate 		} else
22567c478bd9Sstevel@tonic-gate 			ip->i_flag |= IACC;
22577c478bd9Sstevel@tonic-gate 		if (vap->va_mask & AT_MTIME) {
22587c478bd9Sstevel@tonic-gate 			ip->i_mtime.tv_sec = vap->va_mtime.tv_sec;
22597c478bd9Sstevel@tonic-gate 			ip->i_mtime.tv_usec = vap->va_mtime.tv_nsec / 1000;
22607c478bd9Sstevel@tonic-gate 			gethrestime(&now);
22617c478bd9Sstevel@tonic-gate 			if (now.tv_sec > TIME32_MAX) {
22627c478bd9Sstevel@tonic-gate 				/*
22637c478bd9Sstevel@tonic-gate 				 * In 2038, ctime sticks forever..
22647c478bd9Sstevel@tonic-gate 				 */
22657c478bd9Sstevel@tonic-gate 				ip->i_ctime.tv_sec = TIME32_MAX;
22667c478bd9Sstevel@tonic-gate 				ip->i_ctime.tv_usec = 0;
22677c478bd9Sstevel@tonic-gate 			} else {
22687c478bd9Sstevel@tonic-gate 				ip->i_ctime.tv_sec = now.tv_sec;
22697c478bd9Sstevel@tonic-gate 				ip->i_ctime.tv_usec = now.tv_nsec / 1000;
22707c478bd9Sstevel@tonic-gate 			}
22717c478bd9Sstevel@tonic-gate 			ip->i_flag &= ~(IUPD|ICHG);
22727c478bd9Sstevel@tonic-gate 			ip->i_flag |= IMODTIME;
22737c478bd9Sstevel@tonic-gate 		} else
22747c478bd9Sstevel@tonic-gate 			ip->i_flag |= IUPD|ICHG;
22757c478bd9Sstevel@tonic-gate 		ip->i_flag |= IMOD;
22767c478bd9Sstevel@tonic-gate 	} else
22777c478bd9Sstevel@tonic-gate 		ip->i_flag |= IACC|IUPD|ICHG;
22787c478bd9Sstevel@tonic-gate 	ip->i_seq++;
22797c478bd9Sstevel@tonic-gate 
22807c478bd9Sstevel@tonic-gate 	/*
22817c478bd9Sstevel@tonic-gate 	 * If this is an attribute tag it as one.
22827c478bd9Sstevel@tonic-gate 	 */
22837c478bd9Sstevel@tonic-gate 	if ((tdp->i_mode & IFMT) == IFATTRDIR) {
22847c478bd9Sstevel@tonic-gate 		ip->i_cflags |= IXATTR;
22857c478bd9Sstevel@tonic-gate 	}
22867c478bd9Sstevel@tonic-gate 
22877c478bd9Sstevel@tonic-gate 	/*
22887c478bd9Sstevel@tonic-gate 	 * push inode before it's name appears in a directory
22897c478bd9Sstevel@tonic-gate 	 */
22907c478bd9Sstevel@tonic-gate 	TRANS_INODE(ip->i_ufsvfs, ip);
22917c478bd9Sstevel@tonic-gate 	ufs_iupdat(ip, I_SYNC);
22927c478bd9Sstevel@tonic-gate 	rw_exit(&ip->i_contents);
22937c478bd9Sstevel@tonic-gate 	return (0);
22947c478bd9Sstevel@tonic-gate 
22957c478bd9Sstevel@tonic-gate fail:
22967c478bd9Sstevel@tonic-gate 	/* Throw away inode we just allocated. */
22977c478bd9Sstevel@tonic-gate 	ip->i_nlink = 0;
22987c478bd9Sstevel@tonic-gate 	ufs_setreclaim(ip);
22997c478bd9Sstevel@tonic-gate 	TRANS_INODE(ip->i_ufsvfs, ip);
23007c478bd9Sstevel@tonic-gate 	ip->i_flag |= ICHG;
23017c478bd9Sstevel@tonic-gate 	ip->i_seq++;
23027c478bd9Sstevel@tonic-gate 	ITIMES_NOLOCK(ip);
23037c478bd9Sstevel@tonic-gate 	rw_exit(&ip->i_contents);
23047c478bd9Sstevel@tonic-gate 	return (err);
23057c478bd9Sstevel@tonic-gate }
23067c478bd9Sstevel@tonic-gate 
23077c478bd9Sstevel@tonic-gate /*
23087c478bd9Sstevel@tonic-gate  * Write a prototype directory into the empty inode ip, whose parent is dp.
23097c478bd9Sstevel@tonic-gate  */
23107c478bd9Sstevel@tonic-gate static int
ufs_dirmakedirect(struct inode * ip,struct inode * dp,int attrdir,struct cred * cr)23117c478bd9Sstevel@tonic-gate ufs_dirmakedirect(
23127c478bd9Sstevel@tonic-gate 	struct inode *ip,		/* new directory */
23137c478bd9Sstevel@tonic-gate 	struct inode *dp,		/* parent directory */
23147c478bd9Sstevel@tonic-gate 	int	attrdir,
23157c478bd9Sstevel@tonic-gate 	struct cred *cr)
23167c478bd9Sstevel@tonic-gate {
23177c478bd9Sstevel@tonic-gate 	struct dirtemplate *dirp;
23187c478bd9Sstevel@tonic-gate 	struct fbuf *fbp;
23197c478bd9Sstevel@tonic-gate 	int err;
23207c478bd9Sstevel@tonic-gate 
23217c478bd9Sstevel@tonic-gate 	ASSERT(RW_WRITE_HELD(&ip->i_contents));
23227c478bd9Sstevel@tonic-gate 	ASSERT(RW_WRITE_HELD(&dp->i_rwlock));
23237c478bd9Sstevel@tonic-gate 	ASSERT(RW_WRITE_HELD(&dp->i_contents));
23247c478bd9Sstevel@tonic-gate 	/*
23257c478bd9Sstevel@tonic-gate 	 * Allocate space for the directory we're creating.
23267c478bd9Sstevel@tonic-gate 	 */
23277c478bd9Sstevel@tonic-gate 	err = BMAPALLOC(ip, (u_offset_t)0, DIRBLKSIZ, cr);
23287c478bd9Sstevel@tonic-gate 	if (err)
23297c478bd9Sstevel@tonic-gate 		return (err);
23307c478bd9Sstevel@tonic-gate 	if (DIRBLKSIZ > dp->i_fs->fs_fsize) {
23317c478bd9Sstevel@tonic-gate 		err = ufs_fault(ITOV(dp),
23327c478bd9Sstevel@tonic-gate "ufs_dirmakedirect: bad fs_fsize, DIRBLKSIZ: %d > dp->i_fs->fs_fsize: %d (%s)",
23337c478bd9Sstevel@tonic-gate 		    DIRBLKSIZ, dp->i_fs->fs_fsize,
23347c478bd9Sstevel@tonic-gate 		    dp->i_fs->fs_fsmnt);
23357c478bd9Sstevel@tonic-gate 		return (err);
23367c478bd9Sstevel@tonic-gate 	}
23377c478bd9Sstevel@tonic-gate 	ip->i_size = DIRBLKSIZ;
23387c478bd9Sstevel@tonic-gate 	TRANS_INODE(ip->i_ufsvfs, ip);
23397c478bd9Sstevel@tonic-gate 	ip->i_flag |= IUPD|ICHG|IATTCHG;
23407c478bd9Sstevel@tonic-gate 	ip->i_seq++;
23417c478bd9Sstevel@tonic-gate 	ITIMES_NOLOCK(ip);
23427c478bd9Sstevel@tonic-gate 	/*
23437c478bd9Sstevel@tonic-gate 	 * Update the tdp link count and write out the change.
23447c478bd9Sstevel@tonic-gate 	 * This reflects the ".." entry we'll soon write.
23457c478bd9Sstevel@tonic-gate 	 */
23467c478bd9Sstevel@tonic-gate 	if (dp->i_nlink == MAXLINK)
23477c478bd9Sstevel@tonic-gate 		return (EMLINK);
23487c478bd9Sstevel@tonic-gate 	if (attrdir == 0)
23497c478bd9Sstevel@tonic-gate 		dp->i_nlink++;
23507c478bd9Sstevel@tonic-gate 	TRANS_INODE(dp->i_ufsvfs, dp);
23517c478bd9Sstevel@tonic-gate 	dp->i_flag |= ICHG;
23527c478bd9Sstevel@tonic-gate 	dp->i_seq++;
23537c478bd9Sstevel@tonic-gate 	ufs_iupdat(dp, I_SYNC);
23547c478bd9Sstevel@tonic-gate 	/*
23557c478bd9Sstevel@tonic-gate 	 * Initialize directory with "."
23567c478bd9Sstevel@tonic-gate 	 * and ".." from static template.
23577c478bd9Sstevel@tonic-gate 	 *
23587c478bd9Sstevel@tonic-gate 	 * Since the parent directory is locked, we don't have to
23597c478bd9Sstevel@tonic-gate 	 * worry about anything changing when we drop the write
23607c478bd9Sstevel@tonic-gate 	 * lock on (ip).
23617c478bd9Sstevel@tonic-gate 	 *
23627c478bd9Sstevel@tonic-gate 	 */
23637c478bd9Sstevel@tonic-gate 	err = fbread(ITOV(ip), (offset_t)0, (uint_t)ip->i_fs->fs_fsize,
23647c478bd9Sstevel@tonic-gate 	    S_READ, &fbp);
23657c478bd9Sstevel@tonic-gate 
23667c478bd9Sstevel@tonic-gate 	if (err) {
23677c478bd9Sstevel@tonic-gate 		goto fail;
23687c478bd9Sstevel@tonic-gate 	}
23697c478bd9Sstevel@tonic-gate 	dirp = (struct dirtemplate *)fbp->fb_addr;
23707c478bd9Sstevel@tonic-gate 	/*
23717c478bd9Sstevel@tonic-gate 	 * Now initialize the directory we're creating
23727c478bd9Sstevel@tonic-gate 	 * with the "." and ".." entries.
23737c478bd9Sstevel@tonic-gate 	 */
23747c478bd9Sstevel@tonic-gate 	*dirp = mastertemplate;			/* structure assignment */
23757c478bd9Sstevel@tonic-gate 	dirp->dot_ino = (uint32_t)ip->i_number;
23767c478bd9Sstevel@tonic-gate 	dirp->dotdot_ino = (uint32_t)dp->i_number;
23777c478bd9Sstevel@tonic-gate 
23787c478bd9Sstevel@tonic-gate 	err = TRANS_DIR(ip, 0);
23797c478bd9Sstevel@tonic-gate 	if (err) {
23807c478bd9Sstevel@tonic-gate 		fbrelse(fbp, S_OTHER);
23817c478bd9Sstevel@tonic-gate 		goto fail;
23827c478bd9Sstevel@tonic-gate 	}
23837c478bd9Sstevel@tonic-gate 
23847c478bd9Sstevel@tonic-gate 	err = ufs_fbwrite(fbp, ip);
23857c478bd9Sstevel@tonic-gate 	if (err) {
23867c478bd9Sstevel@tonic-gate 		goto fail;
23877c478bd9Sstevel@tonic-gate 	}
23887c478bd9Sstevel@tonic-gate 
23897c478bd9Sstevel@tonic-gate 	return (0);
23907c478bd9Sstevel@tonic-gate 
23917c478bd9Sstevel@tonic-gate fail:
23927c478bd9Sstevel@tonic-gate 	if (attrdir == 0)
23937c478bd9Sstevel@tonic-gate 		dp->i_nlink--;
23947c478bd9Sstevel@tonic-gate 	TRANS_INODE(dp->i_ufsvfs, dp);
23957c478bd9Sstevel@tonic-gate 	dp->i_flag |= ICHG;
23967c478bd9Sstevel@tonic-gate 	dp->i_seq++;
23977c478bd9Sstevel@tonic-gate 	ufs_iupdat(dp, I_SYNC);
23987c478bd9Sstevel@tonic-gate 	return (err);
23997c478bd9Sstevel@tonic-gate }
24007c478bd9Sstevel@tonic-gate 
24017c478bd9Sstevel@tonic-gate /*
24027c478bd9Sstevel@tonic-gate  * Delete a directory entry.  If oip is nonzero the entry is checked
24037c478bd9Sstevel@tonic-gate  * to make sure it still reflects oip.
24047c478bd9Sstevel@tonic-gate  */
24057c478bd9Sstevel@tonic-gate int
ufs_dirremove(struct inode * dp,char * namep,struct inode * oip,struct vnode * cdir,enum dr_op op,struct cred * cr)24067c478bd9Sstevel@tonic-gate ufs_dirremove(
24077c478bd9Sstevel@tonic-gate 	struct inode *dp,
24087c478bd9Sstevel@tonic-gate 	char *namep,
24097c478bd9Sstevel@tonic-gate 	struct inode *oip,
24107c478bd9Sstevel@tonic-gate 	struct vnode *cdir,
24117c478bd9Sstevel@tonic-gate 	enum dr_op op,
2412*9b5097eeSOwen Roberts 	struct cred *cr)
24137c478bd9Sstevel@tonic-gate {
24147c478bd9Sstevel@tonic-gate 	struct direct *ep, *pep, *nep;
24157c478bd9Sstevel@tonic-gate 	struct inode *ip;
24167c478bd9Sstevel@tonic-gate 	vnode_t *dvp, *vp;
24175b024a5bSbatschul 	struct ufs_slot slot;
24187c478bd9Sstevel@tonic-gate 	int namlen;
24197c478bd9Sstevel@tonic-gate 	int err;
24207c478bd9Sstevel@tonic-gate 	int mode;
24217c478bd9Sstevel@tonic-gate 	ushort_t extra;
24227c478bd9Sstevel@tonic-gate 
24237c478bd9Sstevel@tonic-gate 	namlen = (int)strlen(namep);
2424fa871852Sbatschul 	if (namlen == 0) {
2425fa871852Sbatschul 		struct fs	*fs = dp->i_fs;
2426fa871852Sbatschul 
2427fa871852Sbatschul 		cmn_err(CE_WARN, "%s: ufs_dirremove: attempted to remove"
2428fa871852Sbatschul 		    " nameless file in directory (directory inode %llu)",
2429fa871852Sbatschul 		    fs->fs_fsmnt, (u_longlong_t)dp->i_number);
2430fa871852Sbatschul 		ASSERT(namlen != 0);
2431fa871852Sbatschul 
2432fa871852Sbatschul 		return (ENOENT);
2433fa871852Sbatschul 	}
2434fa871852Sbatschul 
24357c478bd9Sstevel@tonic-gate 	/*
24367c478bd9Sstevel@tonic-gate 	 * return error when removing . and ..
24377c478bd9Sstevel@tonic-gate 	 */
24387c478bd9Sstevel@tonic-gate 	if (namep[0] == '.') {
24397c478bd9Sstevel@tonic-gate 		if (namlen == 1)
24407c478bd9Sstevel@tonic-gate 			return (EINVAL);
24417c478bd9Sstevel@tonic-gate 		else if (namlen == 2 && namep[1] == '.') {
24427c478bd9Sstevel@tonic-gate 			return (EEXIST);	/* SIGH should be ENOTEMPTY */
24437c478bd9Sstevel@tonic-gate 		}
24447c478bd9Sstevel@tonic-gate 	}
24457c478bd9Sstevel@tonic-gate 
24467c478bd9Sstevel@tonic-gate 	ASSERT(RW_WRITE_HELD(&dp->i_rwlock));
244760c8e821SFrank Batschulat 
244860c8e821SFrank Batschulat retry:
24497c478bd9Sstevel@tonic-gate 	/*
24507c478bd9Sstevel@tonic-gate 	 * Check accessibility of directory.
24517c478bd9Sstevel@tonic-gate 	 */
245260c8e821SFrank Batschulat 	if (err = ufs_diraccess(dp, IEXEC|IWRITE, cr))
24537c478bd9Sstevel@tonic-gate 		return (err);
24547c478bd9Sstevel@tonic-gate 
24557c478bd9Sstevel@tonic-gate 	ip = NULL;
24567c478bd9Sstevel@tonic-gate 	slot.fbp = NULL;
24577c478bd9Sstevel@tonic-gate 	slot.status = FOUND;	/* don't need to look for empty slot */
24587c478bd9Sstevel@tonic-gate 	rw_enter(&dp->i_ufsvfs->vfs_dqrwlock, RW_READER);
24597c478bd9Sstevel@tonic-gate 	rw_enter(&dp->i_contents, RW_WRITER);
246060c8e821SFrank Batschulat 
24617c478bd9Sstevel@tonic-gate 	err = ufs_dircheckforname(dp, namep, namlen, &slot, &ip, cr, 0);
24627c478bd9Sstevel@tonic-gate 	if (err)
24637c478bd9Sstevel@tonic-gate 		goto out_novfs;
24647c478bd9Sstevel@tonic-gate 	if (ip == NULL) {
24657c478bd9Sstevel@tonic-gate 		err = ENOENT;
24667c478bd9Sstevel@tonic-gate 		goto out_novfs;
24677c478bd9Sstevel@tonic-gate 	}
24687c478bd9Sstevel@tonic-gate 	vp = ITOV(ip);
24697c478bd9Sstevel@tonic-gate 	if (oip && oip != ip) {
24707c478bd9Sstevel@tonic-gate 		err = ENOENT;
24717c478bd9Sstevel@tonic-gate 		goto out_novfs;
24727c478bd9Sstevel@tonic-gate 	}
24737c478bd9Sstevel@tonic-gate 
24747c478bd9Sstevel@tonic-gate 	mode = ip->i_mode & IFMT;
24757c478bd9Sstevel@tonic-gate 	if (mode == IFDIR || mode == IFATTRDIR) {
24767c478bd9Sstevel@tonic-gate 
24777c478bd9Sstevel@tonic-gate 		/*
24785b024a5bSbatschul 		 * vn_vfsrlock() prevents races between mount and rmdir.
24797c478bd9Sstevel@tonic-gate 		 */
24805b024a5bSbatschul 		if (vn_vfsrlock(vp)) {
24817c478bd9Sstevel@tonic-gate 			err = EBUSY;
24827c478bd9Sstevel@tonic-gate 			goto out_novfs;
24837c478bd9Sstevel@tonic-gate 		}
24847c478bd9Sstevel@tonic-gate 		if (vn_mountedvfs(vp) != NULL && op != DR_RENAME) {
24857c478bd9Sstevel@tonic-gate 			err = EBUSY;
24867c478bd9Sstevel@tonic-gate 			goto out;
24877c478bd9Sstevel@tonic-gate 		}
24887c478bd9Sstevel@tonic-gate 		/*
24897c478bd9Sstevel@tonic-gate 		 * If we are removing a directory, get a lock on it.
24907c478bd9Sstevel@tonic-gate 		 * Taking a writer lock prevents a parallel ufs_dirlook from
24917c478bd9Sstevel@tonic-gate 		 * incorrectly entering a negative cache vnode entry in the dnlc
24927c478bd9Sstevel@tonic-gate 		 * If the directory is empty, it will stay empty until
24937c478bd9Sstevel@tonic-gate 		 * we can remove it.
24947c478bd9Sstevel@tonic-gate 		 */
24957c478bd9Sstevel@tonic-gate 		if (!rw_tryenter(&ip->i_rwlock, RW_WRITER)) {
24967c478bd9Sstevel@tonic-gate 			/*
24977c478bd9Sstevel@tonic-gate 			 * It is possible that a thread in rename would have
24987c478bd9Sstevel@tonic-gate 			 * acquired this rwlock. To prevent a deadlock we
24997c478bd9Sstevel@tonic-gate 			 * do a rw_tryenter. If we fail to get the lock
25007c478bd9Sstevel@tonic-gate 			 * we drop all the locks we have acquired, wait
25017c478bd9Sstevel@tonic-gate 			 * for 2 ticks and reacquire the
25027c478bd9Sstevel@tonic-gate 			 * directory's (dp) i_rwlock and try again.
25037c478bd9Sstevel@tonic-gate 			 * If we dont drop dp's i_rwlock then we will panic
25047c478bd9Sstevel@tonic-gate 			 * with a "Deadlock: cycle in blocking chain"
25057c478bd9Sstevel@tonic-gate 			 * since in ufs_dircheckpath we want dp's i_rwlock.
25067c478bd9Sstevel@tonic-gate 			 * dp is guaranteed to exist since ufs_dirremove is
25077c478bd9Sstevel@tonic-gate 			 * called after a VN_HOLD(dp) has been done.
25087c478bd9Sstevel@tonic-gate 			 */
25097c478bd9Sstevel@tonic-gate 			ufs_dirremove_retry_cnt++;
25107c478bd9Sstevel@tonic-gate 			vn_vfsunlock(vp);
25117c478bd9Sstevel@tonic-gate 			if (slot.fbp)
25127c478bd9Sstevel@tonic-gate 				fbrelse(slot.fbp, S_OTHER);
25137c478bd9Sstevel@tonic-gate 			rw_exit(&dp->i_contents);
25147c478bd9Sstevel@tonic-gate 			rw_exit(&dp->i_ufsvfs->vfs_dqrwlock);
25157c478bd9Sstevel@tonic-gate 			rw_exit(&dp->i_rwlock);
25167c478bd9Sstevel@tonic-gate 			VN_RELE(vp);
25177c478bd9Sstevel@tonic-gate 			delay(2);
25187c478bd9Sstevel@tonic-gate 			rw_enter(&dp->i_rwlock, RW_WRITER);
25197c478bd9Sstevel@tonic-gate 			goto retry;
25207c478bd9Sstevel@tonic-gate 		}
25217c478bd9Sstevel@tonic-gate 	}
25227c478bd9Sstevel@tonic-gate 	rw_enter(&ip->i_contents, RW_READER);
25237c478bd9Sstevel@tonic-gate 
25247c478bd9Sstevel@tonic-gate 	/*
25257c478bd9Sstevel@tonic-gate 	 * Now check the restrictions that apply on sticky directories.
25267c478bd9Sstevel@tonic-gate 	 */
25277c478bd9Sstevel@tonic-gate 	if ((err = ufs_sticky_remove_access(dp, ip, cr)) != 0) {
25287c478bd9Sstevel@tonic-gate 		rw_exit(&ip->i_contents);
25297c478bd9Sstevel@tonic-gate 		if (mode == IFDIR || mode == IFATTRDIR)
25307c478bd9Sstevel@tonic-gate 			rw_exit(&ip->i_rwlock);
25317c478bd9Sstevel@tonic-gate 		goto out;
25327c478bd9Sstevel@tonic-gate 	}
25337c478bd9Sstevel@tonic-gate 
25347c478bd9Sstevel@tonic-gate 	if (op == DR_RMDIR) {
25357c478bd9Sstevel@tonic-gate 		/*
25367c478bd9Sstevel@tonic-gate 		 * For rmdir(2), some special checks are required.
25377c478bd9Sstevel@tonic-gate 		 * (a) Don't remove any alias of the parent (e.g. ".").
25387c478bd9Sstevel@tonic-gate 		 * (b) Don't remove the current directory.
25397c478bd9Sstevel@tonic-gate 		 * (c) Make sure the entry is (still) a directory.
25407c478bd9Sstevel@tonic-gate 		 * (d) Make sure the directory is empty.
25417c478bd9Sstevel@tonic-gate 		 */
25427c478bd9Sstevel@tonic-gate 
25437c478bd9Sstevel@tonic-gate 		if (dp == ip || vp == cdir)
25447c478bd9Sstevel@tonic-gate 			err = EINVAL;
25457c478bd9Sstevel@tonic-gate 		else if (((ip->i_mode & IFMT) != IFDIR) &&
25467c478bd9Sstevel@tonic-gate 		    ((ip->i_mode & IFMT) != IFATTRDIR))
25477c478bd9Sstevel@tonic-gate 			err = ENOTDIR;
25487c478bd9Sstevel@tonic-gate 		else if ((ip->i_nlink > 2) ||
25497c478bd9Sstevel@tonic-gate 		    !ufs_dirempty(ip, dp->i_number, cr)) {
25507c478bd9Sstevel@tonic-gate 			err = EEXIST;	/* SIGH should be ENOTEMPTY */
25517c478bd9Sstevel@tonic-gate 		}
25527c478bd9Sstevel@tonic-gate 
25537c478bd9Sstevel@tonic-gate 		if (err) {
25547c478bd9Sstevel@tonic-gate 			rw_exit(&ip->i_contents);
25557c478bd9Sstevel@tonic-gate 			if (mode == IFDIR || mode == IFATTRDIR)
25567c478bd9Sstevel@tonic-gate 				rw_exit(&ip->i_rwlock);
25577c478bd9Sstevel@tonic-gate 			goto out;
25587c478bd9Sstevel@tonic-gate 		}
25597c478bd9Sstevel@tonic-gate 	} else if (op == DR_REMOVE)  {
25607c478bd9Sstevel@tonic-gate 		/*
25617c478bd9Sstevel@tonic-gate 		 * unlink(2) requires a different check: allow only
25627c478bd9Sstevel@tonic-gate 		 * privileged users to unlink a directory.
25637c478bd9Sstevel@tonic-gate 		 */
25647c478bd9Sstevel@tonic-gate 		if (vp->v_type == VDIR &&
25657c478bd9Sstevel@tonic-gate 		    secpolicy_fs_linkdir(cr, vp->v_vfsp)) {
25667c478bd9Sstevel@tonic-gate 			err = EPERM;
25677c478bd9Sstevel@tonic-gate 			rw_exit(&ip->i_contents);
25687c478bd9Sstevel@tonic-gate 			rw_exit(&ip->i_rwlock);
25697c478bd9Sstevel@tonic-gate 			goto out;
25707c478bd9Sstevel@tonic-gate 		}
25717c478bd9Sstevel@tonic-gate 	}
25727c478bd9Sstevel@tonic-gate 
25737c478bd9Sstevel@tonic-gate 	rw_exit(&ip->i_contents);
25747c478bd9Sstevel@tonic-gate 
25757c478bd9Sstevel@tonic-gate 	/*
25767c478bd9Sstevel@tonic-gate 	 * Remove the cache'd entry, if any.
25777c478bd9Sstevel@tonic-gate 	 */
25787c478bd9Sstevel@tonic-gate 	dvp = ITOV(dp);
25797c478bd9Sstevel@tonic-gate 	dnlc_remove(dvp, namep);
25807c478bd9Sstevel@tonic-gate 	ep = slot.ep;
25817c478bd9Sstevel@tonic-gate 	ep->d_ino = 0;
25827c478bd9Sstevel@tonic-gate 
25837c478bd9Sstevel@tonic-gate 	if (slot.cached) {
25847c478bd9Sstevel@tonic-gate 		dcanchor_t *dcap = &dp->i_danchor;
25857c478bd9Sstevel@tonic-gate 
25867c478bd9Sstevel@tonic-gate 		(void) dnlc_dir_rem_entry(dcap, namep, NULL);
25877c478bd9Sstevel@tonic-gate 		if (((int)ep->d_reclen - (int)DIRSIZ(ep)) >= LDIRSIZ(1)) {
25887c478bd9Sstevel@tonic-gate 			(void) dnlc_dir_rem_space_by_handle(dcap, slot.offset);
25897c478bd9Sstevel@tonic-gate 		}
25907c478bd9Sstevel@tonic-gate 		if (slot.offset & (DIRBLKSIZ - 1)) {
25917c478bd9Sstevel@tonic-gate 			/*
25927c478bd9Sstevel@tonic-gate 			 * Collapse new free space into previous entry.
25937c478bd9Sstevel@tonic-gate 			 * Note, the previous entry has already been
25947c478bd9Sstevel@tonic-gate 			 * validated in ufs_dircheckforname().
25957c478bd9Sstevel@tonic-gate 			 */
25967c478bd9Sstevel@tonic-gate 			ASSERT(slot.size);
25977c478bd9Sstevel@tonic-gate 			pep = (struct direct *)((char *)ep - slot.size);
25987c478bd9Sstevel@tonic-gate 			if ((pep->d_ino == 0) &&
25997c478bd9Sstevel@tonic-gate 			    ((uintptr_t)pep & (DIRBLKSIZ - 1))) {
26007c478bd9Sstevel@tonic-gate 				dnlc_dir_purge(dcap);
26017c478bd9Sstevel@tonic-gate 				slot.cached = 0;
26027c478bd9Sstevel@tonic-gate 				goto nocache;
26037c478bd9Sstevel@tonic-gate 			}
26047c478bd9Sstevel@tonic-gate 			if (pep->d_ino) {
26057c478bd9Sstevel@tonic-gate 				extra = pep->d_reclen - DIRSIZ(pep);
26067c478bd9Sstevel@tonic-gate 			} else {
26077c478bd9Sstevel@tonic-gate 				extra = pep->d_reclen;
26087c478bd9Sstevel@tonic-gate 			}
26097c478bd9Sstevel@tonic-gate 			if (extra >= LDIRSIZ(1)) {
26107c478bd9Sstevel@tonic-gate 				(void) dnlc_dir_rem_space_by_handle(dcap,
26117c478bd9Sstevel@tonic-gate 				    (uint64_t)(slot.offset - slot.size));
26127c478bd9Sstevel@tonic-gate 			}
26137c478bd9Sstevel@tonic-gate 			pep->d_reclen += ep->d_reclen;
26147c478bd9Sstevel@tonic-gate 			(void) dnlc_dir_add_space(dcap, extra + ep->d_reclen,
26157c478bd9Sstevel@tonic-gate 			    (uint64_t)(slot.offset - slot.size));
26167c478bd9Sstevel@tonic-gate 			/* adjust the previous pointer in the next entry */
26177c478bd9Sstevel@tonic-gate 			nep = (struct direct *)((char *)ep + ep->d_reclen);
26187c478bd9Sstevel@tonic-gate 			if ((uintptr_t)nep & (DIRBLKSIZ - 1)) {
26197c478bd9Sstevel@tonic-gate 				/*
26207c478bd9Sstevel@tonic-gate 				 * Not a new block.
26217c478bd9Sstevel@tonic-gate 				 *
26227c478bd9Sstevel@tonic-gate 				 * Check the validity of the entry.
26237c478bd9Sstevel@tonic-gate 				 * If it's bad, then throw away the cache and
26247c478bd9Sstevel@tonic-gate 				 * continue.
26257c478bd9Sstevel@tonic-gate 				 */
26267c478bd9Sstevel@tonic-gate 				if ((nep->d_reclen == 0) ||
26277c478bd9Sstevel@tonic-gate 				    (nep->d_reclen & 0x3) ||
26287c478bd9Sstevel@tonic-gate 				    (dnlc_dir_update(dcap, nep->d_name,
26297c478bd9Sstevel@tonic-gate 				    INO_OFF_TO_H(nep->d_ino,
26307c478bd9Sstevel@tonic-gate 				    slot.offset - slot.size)) == DNOENT)) {
26317c478bd9Sstevel@tonic-gate 					dnlc_dir_purge(dcap);
26327c478bd9Sstevel@tonic-gate 					slot.cached = 0;
26337c478bd9Sstevel@tonic-gate 				}
26347c478bd9Sstevel@tonic-gate 			}
26357c478bd9Sstevel@tonic-gate 		} else {
26367c478bd9Sstevel@tonic-gate 			(void) dnlc_dir_add_space(dcap, ep->d_reclen,
26377c478bd9Sstevel@tonic-gate 			    (uint64_t)slot.offset);
26387c478bd9Sstevel@tonic-gate 		}
26397c478bd9Sstevel@tonic-gate 	} else {
26407c478bd9Sstevel@tonic-gate 		/*
26417c478bd9Sstevel@tonic-gate 		 * If the entry isn't the first in the directory, we must
26427c478bd9Sstevel@tonic-gate 		 * reclaim the space of the now empty record by adding
26437c478bd9Sstevel@tonic-gate 		 * the record size to the size of the previous entry.
26447c478bd9Sstevel@tonic-gate 		 */
26457c478bd9Sstevel@tonic-gate 		if (slot.offset & (DIRBLKSIZ - 1)) {
26467c478bd9Sstevel@tonic-gate 			/*
26477c478bd9Sstevel@tonic-gate 			 * Collapse new free space into previous entry.
26487c478bd9Sstevel@tonic-gate 			 */
26497c478bd9Sstevel@tonic-gate 			pep = (struct direct *)((char *)ep - slot.size);
26507c478bd9Sstevel@tonic-gate 			pep->d_reclen += ep->d_reclen;
26517c478bd9Sstevel@tonic-gate 		}
26527c478bd9Sstevel@tonic-gate 	}
26537c478bd9Sstevel@tonic-gate nocache:
26547c478bd9Sstevel@tonic-gate 
26557c478bd9Sstevel@tonic-gate 
26567c478bd9Sstevel@tonic-gate 	err = TRANS_DIR(dp, slot.offset);
26577c478bd9Sstevel@tonic-gate 	if (err)
26587c478bd9Sstevel@tonic-gate 		fbrelse(slot.fbp, S_OTHER);
26597c478bd9Sstevel@tonic-gate 	else
26607c478bd9Sstevel@tonic-gate 		err = ufs_fbwrite(slot.fbp, dp);
26617c478bd9Sstevel@tonic-gate 	slot.fbp = NULL;
26627c478bd9Sstevel@tonic-gate 
26637c478bd9Sstevel@tonic-gate 	/*
26647c478bd9Sstevel@tonic-gate 	 * If we were removing a directory, it is 'gone' now, but we cannot
26657c478bd9Sstevel@tonic-gate 	 * unlock it as a thread may be waiting for the lock in ufs_create. If
26667c478bd9Sstevel@tonic-gate 	 * we did, it could then create a file in a deleted directory.
26677c478bd9Sstevel@tonic-gate 	 */
26687c478bd9Sstevel@tonic-gate 
26697c478bd9Sstevel@tonic-gate 	if (err) {
26707c478bd9Sstevel@tonic-gate 		if (mode == IFDIR || mode == IFATTRDIR)
26717c478bd9Sstevel@tonic-gate 			rw_exit(&ip->i_rwlock);
26727c478bd9Sstevel@tonic-gate 		goto out;
26737c478bd9Sstevel@tonic-gate 	}
26747c478bd9Sstevel@tonic-gate 
26757c478bd9Sstevel@tonic-gate 	rw_enter(&ip->i_contents, RW_WRITER);
26767c478bd9Sstevel@tonic-gate 
26777c478bd9Sstevel@tonic-gate 	dp->i_flag |= IUPD|ICHG;
26787c478bd9Sstevel@tonic-gate 	dp->i_seq++;
26797c478bd9Sstevel@tonic-gate 	ip->i_flag |= ICHG;
26807c478bd9Sstevel@tonic-gate 	ip->i_seq++;
26817c478bd9Sstevel@tonic-gate 
26827c478bd9Sstevel@tonic-gate 	TRANS_INODE(dp->i_ufsvfs, dp);
26837c478bd9Sstevel@tonic-gate 	TRANS_INODE(ip->i_ufsvfs, ip);
26847c478bd9Sstevel@tonic-gate 	/*
26857c478bd9Sstevel@tonic-gate 	 * Now dispose of the inode.
26867c478bd9Sstevel@tonic-gate 	 */
26877c478bd9Sstevel@tonic-gate 	if (ip->i_nlink > 0) {
26887c478bd9Sstevel@tonic-gate 		/*
26897c478bd9Sstevel@tonic-gate 		 * This is not done for IFATTRDIR's because they don't
26907c478bd9Sstevel@tonic-gate 		 * have entries in the dnlc and the link counts are
26917c478bd9Sstevel@tonic-gate 		 * not incremented when they are created.
26927c478bd9Sstevel@tonic-gate 		 */
26937c478bd9Sstevel@tonic-gate 		if (op == DR_RMDIR && (ip->i_mode & IFMT) == IFDIR) {
26947c478bd9Sstevel@tonic-gate 			/*
26957c478bd9Sstevel@tonic-gate 			 * Decrement by 2 because we're trashing the "."
26967c478bd9Sstevel@tonic-gate 			 * entry as well as removing the entry in dp.
26977c478bd9Sstevel@tonic-gate 			 * Clear the directory entry, but there may be
26987c478bd9Sstevel@tonic-gate 			 * other hard links so don't free the inode.
26997c478bd9Sstevel@tonic-gate 			 * Decrement the dp linkcount because we're
27007c478bd9Sstevel@tonic-gate 			 * trashing the ".." entry.
27017c478bd9Sstevel@tonic-gate 			 */
27027c478bd9Sstevel@tonic-gate 			ip->i_nlink -= 2;
27037c478bd9Sstevel@tonic-gate 			dp->i_nlink--;
27047c478bd9Sstevel@tonic-gate 			ufs_setreclaim(dp);
27057c478bd9Sstevel@tonic-gate 			/*
27067c478bd9Sstevel@tonic-gate 			 * XXX need to discard negative cache entries
27077c478bd9Sstevel@tonic-gate 			 * for vp.  See comment in ufs_delete().
27087c478bd9Sstevel@tonic-gate 			 */
27097c478bd9Sstevel@tonic-gate 			dnlc_remove(vp, ".");
27107c478bd9Sstevel@tonic-gate 			dnlc_remove(vp, "..");
27117c478bd9Sstevel@tonic-gate 			/*
27127c478bd9Sstevel@tonic-gate 			 * The return value is ignored here bacause if
27137c478bd9Sstevel@tonic-gate 			 * the directory purge fails we don't want to
27147c478bd9Sstevel@tonic-gate 			 * stop the delete. If ufs_dirpurgedotdot fails
27157c478bd9Sstevel@tonic-gate 			 * the delete will continue with the preexiting
27167c478bd9Sstevel@tonic-gate 			 * behavior.
27177c478bd9Sstevel@tonic-gate 			 */
27187c478bd9Sstevel@tonic-gate 			(void) ufs_dirpurgedotdot(ip, dp->i_number, cr);
27197c478bd9Sstevel@tonic-gate 		} else {
27207c478bd9Sstevel@tonic-gate 			ip->i_nlink--;
27217c478bd9Sstevel@tonic-gate 		}
27227c478bd9Sstevel@tonic-gate 		ufs_setreclaim(ip);
27237c478bd9Sstevel@tonic-gate 	}
27247c478bd9Sstevel@tonic-gate 	ITIMES_NOLOCK(dp);
27257c478bd9Sstevel@tonic-gate 	ITIMES_NOLOCK(ip);
27267c478bd9Sstevel@tonic-gate 
27277c478bd9Sstevel@tonic-gate 	if (!TRANS_ISTRANS(dp->i_ufsvfs))
27287c478bd9Sstevel@tonic-gate 		ufs_iupdat(dp, I_SYNC);
27297c478bd9Sstevel@tonic-gate 	if (!TRANS_ISTRANS(ip->i_ufsvfs))
27307c478bd9Sstevel@tonic-gate 		ufs_iupdat(ip, I_SYNC);
27317c478bd9Sstevel@tonic-gate 
27327c478bd9Sstevel@tonic-gate 	rw_exit(&ip->i_contents);
27337c478bd9Sstevel@tonic-gate 	if (mode == IFDIR || mode == IFATTRDIR)
27347c478bd9Sstevel@tonic-gate 		rw_exit(&ip->i_rwlock);
27357c478bd9Sstevel@tonic-gate out:
27367c478bd9Sstevel@tonic-gate 	if (mode == IFDIR || mode == IFATTRDIR) {
27377c478bd9Sstevel@tonic-gate 		vn_vfsunlock(vp);
27387c478bd9Sstevel@tonic-gate 	}
27397c478bd9Sstevel@tonic-gate out_novfs:
27407c478bd9Sstevel@tonic-gate 	ASSERT(RW_WRITE_HELD(&dp->i_contents));
27417c478bd9Sstevel@tonic-gate 
27427c478bd9Sstevel@tonic-gate 	if (slot.fbp)
27437c478bd9Sstevel@tonic-gate 		fbrelse(slot.fbp, S_OTHER);
27447c478bd9Sstevel@tonic-gate 
27457c478bd9Sstevel@tonic-gate 	rw_exit(&dp->i_contents);
27467c478bd9Sstevel@tonic-gate 	rw_exit(&dp->i_ufsvfs->vfs_dqrwlock);
27477c478bd9Sstevel@tonic-gate 
27487c478bd9Sstevel@tonic-gate 	/*
27497c478bd9Sstevel@tonic-gate 	 * Release (and delete) the inode after we drop vfs_dqrwlock to
27507c478bd9Sstevel@tonic-gate 	 * avoid deadlock since ufs_delete() grabs vfs_dqrwlock as reader.
27517c478bd9Sstevel@tonic-gate 	 */
2752*9b5097eeSOwen Roberts 	if (ip)
27537c478bd9Sstevel@tonic-gate 		VN_RELE(vp);
27547c478bd9Sstevel@tonic-gate 
27557c478bd9Sstevel@tonic-gate 	return (err);
27567c478bd9Sstevel@tonic-gate }
27577c478bd9Sstevel@tonic-gate 
27587c478bd9Sstevel@tonic-gate /*
27597c478bd9Sstevel@tonic-gate  * Return buffer with contents of block "offset"
27607c478bd9Sstevel@tonic-gate  * from the beginning of directory "ip".  If "res"
27617c478bd9Sstevel@tonic-gate  * is non-zero, fill it in with a pointer to the
27627c478bd9Sstevel@tonic-gate  * remaining space in the directory.
27637c478bd9Sstevel@tonic-gate  *
27647c478bd9Sstevel@tonic-gate  */
27657c478bd9Sstevel@tonic-gate 
27667c478bd9Sstevel@tonic-gate int
blkatoff(struct inode * ip,off_t offset,char ** res,struct fbuf ** fbpp)27677c478bd9Sstevel@tonic-gate blkatoff(
27687c478bd9Sstevel@tonic-gate 	struct inode *ip,
27697c478bd9Sstevel@tonic-gate 	off_t offset,
27707c478bd9Sstevel@tonic-gate 	char **res,
27717c478bd9Sstevel@tonic-gate 	struct fbuf **fbpp)
27727c478bd9Sstevel@tonic-gate {
27737c478bd9Sstevel@tonic-gate 	struct fs *fs;
27747c478bd9Sstevel@tonic-gate 	struct fbuf *fbp;
27757c478bd9Sstevel@tonic-gate 	daddr_t lbn;
27767c478bd9Sstevel@tonic-gate 	uint_t bsize;
27777c478bd9Sstevel@tonic-gate 	int err;
27787c478bd9Sstevel@tonic-gate 
27797c478bd9Sstevel@tonic-gate 	CPU_STATS_ADD_K(sys, ufsdirblk, 1);
27807c478bd9Sstevel@tonic-gate 	fs = ip->i_fs;
27817c478bd9Sstevel@tonic-gate 	lbn = (daddr_t)lblkno(fs, offset);
27827c478bd9Sstevel@tonic-gate 	bsize = (uint_t)blksize(fs, ip, lbn);
27837c478bd9Sstevel@tonic-gate 	err = fbread(ITOV(ip), (offset_t)(offset & fs->fs_bmask),
27847c478bd9Sstevel@tonic-gate 	    bsize, S_READ, &fbp);
27857c478bd9Sstevel@tonic-gate 	if (err) {
27867c478bd9Sstevel@tonic-gate 		*fbpp = (struct fbuf *)NULL;
27877c478bd9Sstevel@tonic-gate 		return (err);
27887c478bd9Sstevel@tonic-gate 	}
27897c478bd9Sstevel@tonic-gate 	if (res)
27907c478bd9Sstevel@tonic-gate 		*res = fbp->fb_addr + blkoff(fs, offset);
27917c478bd9Sstevel@tonic-gate 	*fbpp = fbp;
27927c478bd9Sstevel@tonic-gate 	return (0);
27937c478bd9Sstevel@tonic-gate }
27947c478bd9Sstevel@tonic-gate 
27957c478bd9Sstevel@tonic-gate /*
27967c478bd9Sstevel@tonic-gate  * Do consistency checking:
27977c478bd9Sstevel@tonic-gate  *	record length must be multiple of 4
27987c478bd9Sstevel@tonic-gate  *	entry must fit in rest of its DIRBLKSIZ block
27997c478bd9Sstevel@tonic-gate  *	record must be large enough to contain entry
28007c478bd9Sstevel@tonic-gate  *	name is not longer than MAXNAMLEN
28017c478bd9Sstevel@tonic-gate  *	name must be as long as advertised, and null terminated
28027c478bd9Sstevel@tonic-gate  * NOTE: record length must not be zero (should be checked previously).
28037c478bd9Sstevel@tonic-gate  *       This routine is only called if dirchk is true.
28047c478bd9Sstevel@tonic-gate  *       It would be nice to set the FSBAD flag in the super-block when
28057c478bd9Sstevel@tonic-gate  *       this routine fails so that a fsck is forced on next reboot,
28067c478bd9Sstevel@tonic-gate  *       but locking is a problem.
28077c478bd9Sstevel@tonic-gate  */
28087c478bd9Sstevel@tonic-gate static int
dirmangled(struct inode * dp,struct direct * ep,int entryoffsetinblock,off_t offset)28097c478bd9Sstevel@tonic-gate dirmangled(
28107c478bd9Sstevel@tonic-gate 	struct inode *dp,
28117c478bd9Sstevel@tonic-gate 	struct direct *ep,
28127c478bd9Sstevel@tonic-gate 	int entryoffsetinblock,
28137c478bd9Sstevel@tonic-gate 	off_t offset)
28147c478bd9Sstevel@tonic-gate {
28157c478bd9Sstevel@tonic-gate 	int i;
28167c478bd9Sstevel@tonic-gate 
28177c478bd9Sstevel@tonic-gate 	i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1));
28187c478bd9Sstevel@tonic-gate 	if ((ep->d_reclen & 0x3) != 0 || (int)ep->d_reclen > i ||
28197c478bd9Sstevel@tonic-gate 	    (uint_t)ep->d_reclen < DIRSIZ(ep) || ep->d_namlen > MAXNAMLEN ||
28207c478bd9Sstevel@tonic-gate 	    ep->d_ino && dirbadname(ep->d_name, (int)ep->d_namlen)) {
28217c478bd9Sstevel@tonic-gate 		dirbad(dp, "mangled entry", offset);
28227c478bd9Sstevel@tonic-gate 		return (1);
28237c478bd9Sstevel@tonic-gate 	}
28247c478bd9Sstevel@tonic-gate 	return (0);
28257c478bd9Sstevel@tonic-gate }
28267c478bd9Sstevel@tonic-gate 
28277c478bd9Sstevel@tonic-gate static void
dirbad(struct inode * ip,char * how,off_t offset)28287c478bd9Sstevel@tonic-gate dirbad(struct inode *ip, char *how, off_t offset)
28297c478bd9Sstevel@tonic-gate {
28307c478bd9Sstevel@tonic-gate 	cmn_err(CE_NOTE, "%s: bad dir ino %d at offset %ld: %s",
28317c478bd9Sstevel@tonic-gate 	    ip->i_fs->fs_fsmnt, (int)ip->i_number, offset, how);
28327c478bd9Sstevel@tonic-gate }
28337c478bd9Sstevel@tonic-gate 
28347c478bd9Sstevel@tonic-gate static int
dirbadname(char * sp,int l)28357c478bd9Sstevel@tonic-gate dirbadname(char *sp, int l)
28367c478bd9Sstevel@tonic-gate {
28377c478bd9Sstevel@tonic-gate 	while (l--) {			/* check for nulls */
28387c478bd9Sstevel@tonic-gate 		if (*sp++ == '\0') {
28397c478bd9Sstevel@tonic-gate 			return (1);
28407c478bd9Sstevel@tonic-gate 		}
28417c478bd9Sstevel@tonic-gate 	}
28427c478bd9Sstevel@tonic-gate 	return (*sp);			/* check for terminating null */
28437c478bd9Sstevel@tonic-gate }
28447c478bd9Sstevel@tonic-gate 
28457c478bd9Sstevel@tonic-gate /*
28467c478bd9Sstevel@tonic-gate  * Check if a directory is empty or not.
28477c478bd9Sstevel@tonic-gate  */
28487c478bd9Sstevel@tonic-gate static int
ufs_dirempty(struct inode * ip,ino_t parentino,struct cred * cr)28497c478bd9Sstevel@tonic-gate ufs_dirempty(
28507c478bd9Sstevel@tonic-gate 	struct inode *ip,
28517c478bd9Sstevel@tonic-gate 	ino_t parentino,
28527c478bd9Sstevel@tonic-gate 	struct cred *cr)
28537c478bd9Sstevel@tonic-gate {
28547c478bd9Sstevel@tonic-gate 	return (ufs_dirscan(ip, parentino, cr, 0));
28557c478bd9Sstevel@tonic-gate }
28567c478bd9Sstevel@tonic-gate 
28577c478bd9Sstevel@tonic-gate /*
28587c478bd9Sstevel@tonic-gate  * clear the .. directory entry.
28597c478bd9Sstevel@tonic-gate  */
28607c478bd9Sstevel@tonic-gate static int
ufs_dirpurgedotdot(struct inode * ip,ino_t parentino,struct cred * cr)28617c478bd9Sstevel@tonic-gate ufs_dirpurgedotdot(
28627c478bd9Sstevel@tonic-gate 	struct inode *ip,
28637c478bd9Sstevel@tonic-gate 	ino_t parentino,
28647c478bd9Sstevel@tonic-gate 	struct cred *cr)
28657c478bd9Sstevel@tonic-gate {
28667c478bd9Sstevel@tonic-gate 	return (ufs_dirscan(ip, parentino, cr, 1));
28677c478bd9Sstevel@tonic-gate }
28687c478bd9Sstevel@tonic-gate 
28697c478bd9Sstevel@tonic-gate /*
28707c478bd9Sstevel@tonic-gate  * Scan the directoy. If clr_dotdot is true clear the ..
28717c478bd9Sstevel@tonic-gate  * directory else check to see if the directory is empty.
28727c478bd9Sstevel@tonic-gate  *
28737c478bd9Sstevel@tonic-gate  * Using a struct dirtemplate here is not precisely
28747c478bd9Sstevel@tonic-gate  * what we want, but better than using a struct direct.
28757c478bd9Sstevel@tonic-gate  *
28767c478bd9Sstevel@tonic-gate  * clr_dotdot is used as a flag to tell us if we need
28777c478bd9Sstevel@tonic-gate  * to clear the dotdot entry
28787c478bd9Sstevel@tonic-gate  *
28797c478bd9Sstevel@tonic-gate  * N.B.: does not handle corrupted directories.
28807c478bd9Sstevel@tonic-gate  */
28817c478bd9Sstevel@tonic-gate static int
ufs_dirscan(struct inode * ip,ino_t parentino,struct cred * cr,int clr_dotdot)28827c478bd9Sstevel@tonic-gate ufs_dirscan(
28837c478bd9Sstevel@tonic-gate 	struct inode *ip,
28847c478bd9Sstevel@tonic-gate 	ino_t parentino,
28857c478bd9Sstevel@tonic-gate 	struct cred *cr,
28867c478bd9Sstevel@tonic-gate 	int clr_dotdot)
28877c478bd9Sstevel@tonic-gate {
28887c478bd9Sstevel@tonic-gate 	offset_t off;
28897c478bd9Sstevel@tonic-gate 	struct dirtemplate dbuf;
28907c478bd9Sstevel@tonic-gate 	struct direct *dp = (struct direct *)&dbuf;
28917c478bd9Sstevel@tonic-gate 	int err, count;
28927c478bd9Sstevel@tonic-gate 	int empty = 1;	/* Assume it's empty */
28937c478bd9Sstevel@tonic-gate #define	MINDIRSIZ (sizeof (struct dirtemplate) / 2)
28947c478bd9Sstevel@tonic-gate 
28957c478bd9Sstevel@tonic-gate 	ASSERT(RW_LOCK_HELD(&ip->i_contents));
28967c478bd9Sstevel@tonic-gate 
28977c478bd9Sstevel@tonic-gate 	ASSERT(ip->i_size <= (offset_t)MAXOFF_T);
28987c478bd9Sstevel@tonic-gate 	for (off = 0; off < ip->i_size; off += dp->d_reclen) {
28997c478bd9Sstevel@tonic-gate 		err = ufs_rdwri(UIO_READ, FREAD, ip, (caddr_t)dp,
29007c478bd9Sstevel@tonic-gate 		    (ssize_t)MINDIRSIZ, off, UIO_SYSSPACE, &count, cr);
29017c478bd9Sstevel@tonic-gate 		/*
29027c478bd9Sstevel@tonic-gate 		 * Since we read MINDIRSIZ, residual must
29037c478bd9Sstevel@tonic-gate 		 * be 0 unless we're at end of file.
29047c478bd9Sstevel@tonic-gate 		 */
29057c478bd9Sstevel@tonic-gate 		if (err || count != 0 || dp->d_reclen == 0) {
29067c478bd9Sstevel@tonic-gate 			empty = 0;
29077c478bd9Sstevel@tonic-gate 			break;
29087c478bd9Sstevel@tonic-gate 		}
29097c478bd9Sstevel@tonic-gate 		/* skip empty entries */
29107c478bd9Sstevel@tonic-gate 		if (dp->d_ino == 0)
29117c478bd9Sstevel@tonic-gate 			continue;
29127c478bd9Sstevel@tonic-gate 		/* accept only "." and ".." */
29137c478bd9Sstevel@tonic-gate 		if (dp->d_namlen > 2 || dp->d_name[0] != '.') {
29147c478bd9Sstevel@tonic-gate 			empty = 0;
29157c478bd9Sstevel@tonic-gate 			break;
29167c478bd9Sstevel@tonic-gate 		}
29177c478bd9Sstevel@tonic-gate 		/*
29187c478bd9Sstevel@tonic-gate 		 * At this point d_namlen must be 1 or 2.
29197c478bd9Sstevel@tonic-gate 		 * 1 implies ".", 2 implies ".." if second
29207c478bd9Sstevel@tonic-gate 		 * char is also "."
29217c478bd9Sstevel@tonic-gate 		 */
29227c478bd9Sstevel@tonic-gate 		if (dp->d_namlen == 1)
29237c478bd9Sstevel@tonic-gate 			continue;
29247c478bd9Sstevel@tonic-gate 		if (dp->d_name[1] == '.' &&
29257c478bd9Sstevel@tonic-gate 		    (ino_t)dp->d_ino == parentino) {
29267c478bd9Sstevel@tonic-gate 			/*
29277c478bd9Sstevel@tonic-gate 			 * If we're doing a purge we need to check for
29287c478bd9Sstevel@tonic-gate 			 * the . and .. entries and clear the d_ino for ..
29297c478bd9Sstevel@tonic-gate 			 *
29307c478bd9Sstevel@tonic-gate 			 * if clr_dotdot is set ufs_dirscan does not
29317c478bd9Sstevel@tonic-gate 			 * check for an empty directory.
29327c478bd9Sstevel@tonic-gate 			 */
29337c478bd9Sstevel@tonic-gate 			if (clr_dotdot) {
29347c478bd9Sstevel@tonic-gate 				/*
29357c478bd9Sstevel@tonic-gate 				 * Have to actually zap the ..
29367c478bd9Sstevel@tonic-gate 				 * entry in the directory, as
29377c478bd9Sstevel@tonic-gate 				 * otherwise someone might have
29387c478bd9Sstevel@tonic-gate 				 * dp as its cwd and try to
29397c478bd9Sstevel@tonic-gate 				 * open .., which now points to
29407c478bd9Sstevel@tonic-gate 				 * an unallocated inode.
29417c478bd9Sstevel@tonic-gate 				 */
29427c478bd9Sstevel@tonic-gate 				empty = ufs_dirclrdotdot(ip, parentino);
29437c478bd9Sstevel@tonic-gate 				break;
29447c478bd9Sstevel@tonic-gate 			} else {
29457c478bd9Sstevel@tonic-gate 				continue;
29467c478bd9Sstevel@tonic-gate 			}
29477c478bd9Sstevel@tonic-gate 		}
29487c478bd9Sstevel@tonic-gate 		empty = 0;
29497c478bd9Sstevel@tonic-gate 		break;
29507c478bd9Sstevel@tonic-gate 	}
29517c478bd9Sstevel@tonic-gate 	return (empty);
29527c478bd9Sstevel@tonic-gate }
29537c478bd9Sstevel@tonic-gate 
29547c478bd9Sstevel@tonic-gate clock_t retry_backoff_delay = 1; /* delay before retrying the i_rwlock */
29557c478bd9Sstevel@tonic-gate uint64_t dircheck_retry_cnt;
29567c478bd9Sstevel@tonic-gate /*
29577c478bd9Sstevel@tonic-gate  * Check if source directory inode is in the path of the target directory.
29587c478bd9Sstevel@tonic-gate  * Target is supplied locked.
29597c478bd9Sstevel@tonic-gate  *
29607c478bd9Sstevel@tonic-gate  * The source and target inode's should be different upon entry.
29617c478bd9Sstevel@tonic-gate  */
29627c478bd9Sstevel@tonic-gate int
ufs_dircheckpath(ino_t source_ino,struct inode * target,struct inode * sdp,struct cred * cr)29637c478bd9Sstevel@tonic-gate ufs_dircheckpath(
29647c478bd9Sstevel@tonic-gate 	ino_t source_ino,
29657c478bd9Sstevel@tonic-gate 	struct inode *target,
29667c478bd9Sstevel@tonic-gate 	struct inode *sdp,
29677c478bd9Sstevel@tonic-gate 	struct cred *cr)
29687c478bd9Sstevel@tonic-gate {
29697c478bd9Sstevel@tonic-gate 	struct fbuf *fbp;
29707c478bd9Sstevel@tonic-gate 	struct dirtemplate *dirp;
29717c478bd9Sstevel@tonic-gate 	struct inode *ip;
29727c478bd9Sstevel@tonic-gate 	struct ufsvfs *ufsvfsp;
29737c478bd9Sstevel@tonic-gate 	struct inode *tip;
29747c478bd9Sstevel@tonic-gate 	ino_t dotdotino;
29757c478bd9Sstevel@tonic-gate 	int err;
29767c478bd9Sstevel@tonic-gate 
29777c478bd9Sstevel@tonic-gate 	ASSERT(target->i_ufsvfs != NULL);
29787c478bd9Sstevel@tonic-gate 	ASSERT(RW_LOCK_HELD(&target->i_rwlock));
29797c478bd9Sstevel@tonic-gate 	ASSERT(RW_LOCK_HELD(&sdp->i_rwlock));
29807c478bd9Sstevel@tonic-gate 
29817c478bd9Sstevel@tonic-gate 	ip = target;
29827c478bd9Sstevel@tonic-gate 	if (ip->i_number == source_ino) {
29837c478bd9Sstevel@tonic-gate 		err = EINVAL;
29847c478bd9Sstevel@tonic-gate 		goto out;
29857c478bd9Sstevel@tonic-gate 	}
29867c478bd9Sstevel@tonic-gate 	if (ip->i_number == UFSROOTINO) {
29877c478bd9Sstevel@tonic-gate 		err = 0;
29887c478bd9Sstevel@tonic-gate 		goto out;
29897c478bd9Sstevel@tonic-gate 	}
29907c478bd9Sstevel@tonic-gate 	/*
29917c478bd9Sstevel@tonic-gate 	 * Search back through the directory tree, using the ".." entries.
29927c478bd9Sstevel@tonic-gate 	 * Fail any attempt to move a directory into an ancestor directory.
29937c478bd9Sstevel@tonic-gate 	 */
29947c478bd9Sstevel@tonic-gate 	fbp = NULL;
29957c478bd9Sstevel@tonic-gate 	for (;;) {
29967c478bd9Sstevel@tonic-gate 		struct vfs	*vfs;
29977c478bd9Sstevel@tonic-gate 
29987c478bd9Sstevel@tonic-gate 		err = blkatoff(ip, (off_t)0, (char **)&dirp, &fbp);
29997c478bd9Sstevel@tonic-gate 		if (err)
30007c478bd9Sstevel@tonic-gate 			break;
30017c478bd9Sstevel@tonic-gate 		if (((ip->i_mode & IFMT) != IFDIR) || ip->i_nlink == 0 ||
30027c478bd9Sstevel@tonic-gate 		    ip->i_size < sizeof (struct dirtemplate)) {
30037c478bd9Sstevel@tonic-gate 			dirbad(ip, "bad size, unlinked or not dir", (off_t)0);
30047c478bd9Sstevel@tonic-gate 			err = ENOTDIR;
30057c478bd9Sstevel@tonic-gate 			break;
30067c478bd9Sstevel@tonic-gate 		}
30077c478bd9Sstevel@tonic-gate 		if (dirp->dotdot_namlen != 2 ||
30087c478bd9Sstevel@tonic-gate 		    dirp->dotdot_name[0] != '.' ||
30097c478bd9Sstevel@tonic-gate 		    dirp->dotdot_name[1] != '.') {
30107c478bd9Sstevel@tonic-gate 			dirbad(ip, "mangled .. entry", (off_t)0);
30117c478bd9Sstevel@tonic-gate 			err = ENOTDIR;		/* Sanity check */
30127c478bd9Sstevel@tonic-gate 			break;
30137c478bd9Sstevel@tonic-gate 		}
30147c478bd9Sstevel@tonic-gate 		dotdotino = (ino_t)dirp->dotdot_ino;
30157c478bd9Sstevel@tonic-gate 		if (dotdotino == source_ino) {
30167c478bd9Sstevel@tonic-gate 			err = EINVAL;
30177c478bd9Sstevel@tonic-gate 			break;
30187c478bd9Sstevel@tonic-gate 		}
30197c478bd9Sstevel@tonic-gate 		if (dotdotino == UFSROOTINO)
30207c478bd9Sstevel@tonic-gate 			break;
30217c478bd9Sstevel@tonic-gate 		if (fbp) {
30227c478bd9Sstevel@tonic-gate 			fbrelse(fbp, S_OTHER);
30237c478bd9Sstevel@tonic-gate 			fbp = NULL;
30247c478bd9Sstevel@tonic-gate 		}
30257c478bd9Sstevel@tonic-gate 		vfs = ip->i_vfs;
30267c478bd9Sstevel@tonic-gate 		ufsvfsp = ip->i_ufsvfs;
30277c478bd9Sstevel@tonic-gate 
30287c478bd9Sstevel@tonic-gate 		if (ip != target) {
30297c478bd9Sstevel@tonic-gate 			rw_exit(&ip->i_rwlock);
30307c478bd9Sstevel@tonic-gate 			VN_RELE(ITOV(ip));
30317c478bd9Sstevel@tonic-gate 		}
30327c478bd9Sstevel@tonic-gate 		/*
30337c478bd9Sstevel@tonic-gate 		 * Race to get the inode.
30347c478bd9Sstevel@tonic-gate 		 */
30357c478bd9Sstevel@tonic-gate 		rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER);
30367c478bd9Sstevel@tonic-gate 		if (err = ufs_iget_alloced(vfs, dotdotino, &tip, cr)) {
30377c478bd9Sstevel@tonic-gate 			rw_exit(&ufsvfsp->vfs_dqrwlock);
30387c478bd9Sstevel@tonic-gate 			ip = NULL;
30397c478bd9Sstevel@tonic-gate 			break;
30407c478bd9Sstevel@tonic-gate 		}
30417c478bd9Sstevel@tonic-gate 		rw_exit(&ufsvfsp->vfs_dqrwlock);
30427c478bd9Sstevel@tonic-gate 		/*
30437c478bd9Sstevel@tonic-gate 		 * If the directory of the source inode (also a directory)
30447c478bd9Sstevel@tonic-gate 		 * is the same as this next entry up the chain, then
30457c478bd9Sstevel@tonic-gate 		 * we know the source directory itself can't be in the
30467c478bd9Sstevel@tonic-gate 		 * chain. This also prevents a panic because we already
30477c478bd9Sstevel@tonic-gate 		 * have sdp->i_rwlock locked.
30487c478bd9Sstevel@tonic-gate 		 */
30497c478bd9Sstevel@tonic-gate 		if (tip == sdp) {
30507c478bd9Sstevel@tonic-gate 			VN_RELE(ITOV(tip));
30517c478bd9Sstevel@tonic-gate 			ip = NULL;
30527c478bd9Sstevel@tonic-gate 			break;
30537c478bd9Sstevel@tonic-gate 		}
30547c478bd9Sstevel@tonic-gate 		ip = tip;
30557c478bd9Sstevel@tonic-gate 
30567c478bd9Sstevel@tonic-gate 		/*
30577c478bd9Sstevel@tonic-gate 		 * If someone has set the WRITE_WANTED bit in this lock and if
30587c478bd9Sstevel@tonic-gate 		 * this happens to be a sdp or tdp of another parallel rename
30597c478bd9Sstevel@tonic-gate 		 * which is executing  the same code and in similar situation
30607c478bd9Sstevel@tonic-gate 		 * we end up in a 4 way deadlock. We need to make sure that
30617c478bd9Sstevel@tonic-gate 		 * the WRITE_WANTED bit is not  set.
30627c478bd9Sstevel@tonic-gate 		 */
30637c478bd9Sstevel@tonic-gate retry_lock:
30647c478bd9Sstevel@tonic-gate 		if (!rw_tryenter(&ip->i_rwlock, RW_READER)) {
30657c478bd9Sstevel@tonic-gate 			/*
30667c478bd9Sstevel@tonic-gate 			 * If the lock held as WRITER thats fine but if it
30677c478bd9Sstevel@tonic-gate 			 * has WRITE_WANTED bit set we might end up in a
30687c478bd9Sstevel@tonic-gate 			 * deadlock. If WRITE_WANTED is set we return
30697c478bd9Sstevel@tonic-gate 			 * with EAGAIN else we just go back and try.
30707c478bd9Sstevel@tonic-gate 			 */
30717c478bd9Sstevel@tonic-gate 			if (RW_ISWRITER(&ip->i_rwlock) &&
30727c478bd9Sstevel@tonic-gate 			    !(RW_WRITE_HELD(&ip->i_rwlock))) {
30737c478bd9Sstevel@tonic-gate 				err = EAGAIN;
30747c478bd9Sstevel@tonic-gate 				if (fbp) {
30757c478bd9Sstevel@tonic-gate 					fbrelse(fbp, S_OTHER);
30767c478bd9Sstevel@tonic-gate 				}
30777c478bd9Sstevel@tonic-gate 				VN_RELE(ITOV(ip));
30787c478bd9Sstevel@tonic-gate 				return (err);
30797c478bd9Sstevel@tonic-gate 			} else {
30807c478bd9Sstevel@tonic-gate 				/*
30817c478bd9Sstevel@tonic-gate 				 * The lock is being write held. We could
30827c478bd9Sstevel@tonic-gate 				 * just do a rw_enter here but there is a
30837c478bd9Sstevel@tonic-gate 				 * window between the check and now, where
30847c478bd9Sstevel@tonic-gate 				 * the status could have changed, so to
30857c478bd9Sstevel@tonic-gate 				 * avoid looping we backoff and go back to
30867c478bd9Sstevel@tonic-gate 				 * try for the lock.
30877c478bd9Sstevel@tonic-gate 				 */
30887c478bd9Sstevel@tonic-gate 				delay(retry_backoff_delay);
30897c478bd9Sstevel@tonic-gate 				dircheck_retry_cnt++;
30907c478bd9Sstevel@tonic-gate 				goto retry_lock;
30917c478bd9Sstevel@tonic-gate 			}
30927c478bd9Sstevel@tonic-gate 		}
30937c478bd9Sstevel@tonic-gate 	}
30947c478bd9Sstevel@tonic-gate 	if (fbp) {
30957c478bd9Sstevel@tonic-gate 		fbrelse(fbp, S_OTHER);
30967c478bd9Sstevel@tonic-gate 	}
30977c478bd9Sstevel@tonic-gate out:
30987c478bd9Sstevel@tonic-gate 	if (ip) {
30997c478bd9Sstevel@tonic-gate 		if (ip != target) {
31007c478bd9Sstevel@tonic-gate 			rw_exit(&ip->i_rwlock);
31017c478bd9Sstevel@tonic-gate 			VN_RELE(ITOV(ip));
31027c478bd9Sstevel@tonic-gate 		}
31037c478bd9Sstevel@tonic-gate 	}
31047c478bd9Sstevel@tonic-gate 	return (err);
31057c478bd9Sstevel@tonic-gate }
31067c478bd9Sstevel@tonic-gate 
31077c478bd9Sstevel@tonic-gate int
ufs_xattrdirempty(struct inode * ip,ino_t parentino,struct cred * cr)31087c478bd9Sstevel@tonic-gate ufs_xattrdirempty(struct inode *ip, ino_t parentino, struct cred *cr)
31097c478bd9Sstevel@tonic-gate {
31107c478bd9Sstevel@tonic-gate 	offset_t off;
31117c478bd9Sstevel@tonic-gate 	struct dirtemplate dbuf;
31127c478bd9Sstevel@tonic-gate 	struct direct *dp = (struct direct *)&dbuf;
31137c478bd9Sstevel@tonic-gate 	int err, count;
31147c478bd9Sstevel@tonic-gate 	int empty = 1;	/* Assume it's empty */
31157c478bd9Sstevel@tonic-gate #define	MINDIRSIZ (sizeof (struct dirtemplate) / 2)
31167c478bd9Sstevel@tonic-gate 
31177c478bd9Sstevel@tonic-gate 	ASSERT(RW_LOCK_HELD(&ip->i_contents));
31187c478bd9Sstevel@tonic-gate 
31197c478bd9Sstevel@tonic-gate 	ASSERT(ip->i_size <= (offset_t)MAXOFF_T);
31207c478bd9Sstevel@tonic-gate 	for (off = 0; off < ip->i_size; off += dp->d_reclen) {
31217c478bd9Sstevel@tonic-gate 		err = ufs_rdwri(UIO_READ, FREAD, ip, (caddr_t)dp,
31227c478bd9Sstevel@tonic-gate 		    (ssize_t)MINDIRSIZ, off, UIO_SYSSPACE, &count, cr);
31237c478bd9Sstevel@tonic-gate 		/*
31247c478bd9Sstevel@tonic-gate 		 * Since we read MINDIRSIZ, residual must
31257c478bd9Sstevel@tonic-gate 		 * be 0 unless we're at end of file.
31267c478bd9Sstevel@tonic-gate 		 */
31277c478bd9Sstevel@tonic-gate 
31287c478bd9Sstevel@tonic-gate 		if (err || count != 0 || dp->d_reclen == 0) {
31297c478bd9Sstevel@tonic-gate 			empty = 0;
31307c478bd9Sstevel@tonic-gate 			break;
31317c478bd9Sstevel@tonic-gate 		}
31327c478bd9Sstevel@tonic-gate 		/* skip empty entries */
31337c478bd9Sstevel@tonic-gate 		if (dp->d_ino == 0)
31347c478bd9Sstevel@tonic-gate 			continue;
31357c478bd9Sstevel@tonic-gate 		/*
31367c478bd9Sstevel@tonic-gate 		 * At this point d_namlen must be 1 or 2.
31377c478bd9Sstevel@tonic-gate 		 * 1 implies ".", 2 implies ".." if second
31387c478bd9Sstevel@tonic-gate 		 * char is also "."
31397c478bd9Sstevel@tonic-gate 		 */
31407c478bd9Sstevel@tonic-gate 
31417c478bd9Sstevel@tonic-gate 		if (dp->d_namlen == 1 && dp->d_name[0] == '.' &&
31427c478bd9Sstevel@tonic-gate 		    (ino_t)dp->d_ino == parentino)
31437c478bd9Sstevel@tonic-gate 			continue;
31447c478bd9Sstevel@tonic-gate 
31457c478bd9Sstevel@tonic-gate 		if (dp->d_namlen == 2 && dp->d_name[0] == '.' &&
31467c478bd9Sstevel@tonic-gate 		    dp->d_name[1] == '.') {
31477c478bd9Sstevel@tonic-gate 			continue;
31487c478bd9Sstevel@tonic-gate 		}
31497c478bd9Sstevel@tonic-gate 		empty = 0;
31507c478bd9Sstevel@tonic-gate 		break;
31517c478bd9Sstevel@tonic-gate 	}
31527c478bd9Sstevel@tonic-gate 	return (empty);
31537c478bd9Sstevel@tonic-gate }
31547c478bd9Sstevel@tonic-gate 
31557c478bd9Sstevel@tonic-gate 
31567c478bd9Sstevel@tonic-gate /*
31577c478bd9Sstevel@tonic-gate  * Allocate and initialize a new shadow inode to contain extended attributes.
31587c478bd9Sstevel@tonic-gate  */
31597c478bd9Sstevel@tonic-gate int
ufs_xattrmkdir(struct inode * tdp,struct inode ** ipp,int flags,struct cred * cr)31607c478bd9Sstevel@tonic-gate ufs_xattrmkdir(
31617c478bd9Sstevel@tonic-gate 	struct inode *tdp,
31627c478bd9Sstevel@tonic-gate 	struct inode **ipp,
31637c478bd9Sstevel@tonic-gate 	int flags,
31647c478bd9Sstevel@tonic-gate 	struct cred *cr)
31657c478bd9Sstevel@tonic-gate {
31667c478bd9Sstevel@tonic-gate 	struct inode *ip;
31677c478bd9Sstevel@tonic-gate 	struct vattr va;
31687c478bd9Sstevel@tonic-gate 	int err;
31697c478bd9Sstevel@tonic-gate 	int retry = 1;
31707c478bd9Sstevel@tonic-gate 	struct ufsvfs *ufsvfsp;
31717c478bd9Sstevel@tonic-gate 	struct ulockfs *ulp;
31727c478bd9Sstevel@tonic-gate 	int issync;
31737c478bd9Sstevel@tonic-gate 	int trans_size;
31747c478bd9Sstevel@tonic-gate 	int dorwlock;		/* 0 = not yet taken, */
31757c478bd9Sstevel@tonic-gate 				/* 1 = taken outside the transaction, */
31767c478bd9Sstevel@tonic-gate 				/* 2 = taken inside the transaction */
31777c478bd9Sstevel@tonic-gate 
31787c478bd9Sstevel@tonic-gate 	/*
31797c478bd9Sstevel@tonic-gate 	 * Validate permission to create attribute directory
31807c478bd9Sstevel@tonic-gate 	 */
31817c478bd9Sstevel@tonic-gate 
318260c8e821SFrank Batschulat 	if ((err = ufs_iaccess(tdp, IWRITE, cr, 1)) != 0) {
31837c478bd9Sstevel@tonic-gate 		return (err);
31847c478bd9Sstevel@tonic-gate 	}
31857c478bd9Sstevel@tonic-gate 
31867c478bd9Sstevel@tonic-gate 	if (vn_is_readonly(ITOV(tdp)))
31877c478bd9Sstevel@tonic-gate 		return (EROFS);
31887c478bd9Sstevel@tonic-gate 
31897c478bd9Sstevel@tonic-gate 	/*
31907c478bd9Sstevel@tonic-gate 	 * No need to re-init err after again:, since it's set before
31917c478bd9Sstevel@tonic-gate 	 * the next use of it.
31927c478bd9Sstevel@tonic-gate 	 */
31937c478bd9Sstevel@tonic-gate again:
31947c478bd9Sstevel@tonic-gate 	dorwlock = 0;
31957c478bd9Sstevel@tonic-gate 	va.va_type = VDIR;
31967c478bd9Sstevel@tonic-gate 	va.va_uid = tdp->i_uid;
31977c478bd9Sstevel@tonic-gate 	va.va_gid = tdp->i_gid;
31987c478bd9Sstevel@tonic-gate 
31997c478bd9Sstevel@tonic-gate 	if ((tdp->i_mode & IFMT) == IFDIR) {
32007c478bd9Sstevel@tonic-gate 		va.va_mode = (o_mode_t)IFATTRDIR;
32017c478bd9Sstevel@tonic-gate 		va.va_mode |= tdp->i_mode & 0777;
32027c478bd9Sstevel@tonic-gate 	} else {
32037c478bd9Sstevel@tonic-gate 		va.va_mode = (o_mode_t)IFATTRDIR|0700;
32047c478bd9Sstevel@tonic-gate 		if (tdp->i_mode & 0040)
32057c478bd9Sstevel@tonic-gate 			va.va_mode |= 0750;
32067c478bd9Sstevel@tonic-gate 		if (tdp->i_mode & 0004)
32077c478bd9Sstevel@tonic-gate 			va.va_mode |= 0705;
32087c478bd9Sstevel@tonic-gate 	}
32097c478bd9Sstevel@tonic-gate 	va.va_mask = AT_TYPE|AT_MODE;
32107c478bd9Sstevel@tonic-gate 
32117c478bd9Sstevel@tonic-gate 	ufsvfsp = tdp->i_ufsvfs;
32127c478bd9Sstevel@tonic-gate 
32137c478bd9Sstevel@tonic-gate 	err = ufs_lockfs_begin(ufsvfsp, &ulp, ULOCKFS_MKDIR_MASK);
32147c478bd9Sstevel@tonic-gate 	if (err)
32157c478bd9Sstevel@tonic-gate 		return (err);
32167c478bd9Sstevel@tonic-gate 
32177c478bd9Sstevel@tonic-gate 	/*
32187c478bd9Sstevel@tonic-gate 	 * Acquire i_rwlock before TRANS_BEGIN_CSYNC() if this is a file.
32197c478bd9Sstevel@tonic-gate 	 * This follows the protocol for read()/write().
32207c478bd9Sstevel@tonic-gate 	 */
32217c478bd9Sstevel@tonic-gate 	if (ITOV(tdp)->v_type != VDIR) {
32227c478bd9Sstevel@tonic-gate 		rw_enter(&tdp->i_rwlock, RW_WRITER);
32237c478bd9Sstevel@tonic-gate 		dorwlock = 1;
32247c478bd9Sstevel@tonic-gate 	}
32257c478bd9Sstevel@tonic-gate 
32267c478bd9Sstevel@tonic-gate 	if (ulp) {
32277c478bd9Sstevel@tonic-gate 		trans_size = (int)TOP_MKDIR_SIZE(tdp);
32287c478bd9Sstevel@tonic-gate 		TRANS_BEGIN_CSYNC(ufsvfsp, issync, TOP_MKDIR, trans_size);
32297c478bd9Sstevel@tonic-gate 	}
32307c478bd9Sstevel@tonic-gate 
32317c478bd9Sstevel@tonic-gate 	/*
32327c478bd9Sstevel@tonic-gate 	 * Acquire i_rwlock after TRANS_BEGIN_CSYNC() if this is a directory.
32337c478bd9Sstevel@tonic-gate 	 * This follows the protocol established by
32347c478bd9Sstevel@tonic-gate 	 * ufs_link/create/remove/rename/mkdir/rmdir/symlink.
32357c478bd9Sstevel@tonic-gate 	 */
32367c478bd9Sstevel@tonic-gate 	if (dorwlock == 0) {
32377c478bd9Sstevel@tonic-gate 		rw_enter(&tdp->i_rwlock, RW_WRITER);
32387c478bd9Sstevel@tonic-gate 		dorwlock = 2;
32397c478bd9Sstevel@tonic-gate 	}
32407c478bd9Sstevel@tonic-gate 	rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER);
32417c478bd9Sstevel@tonic-gate 	rw_enter(&tdp->i_contents, RW_WRITER);
32427c478bd9Sstevel@tonic-gate 
32437c478bd9Sstevel@tonic-gate 	/*
32447c478bd9Sstevel@tonic-gate 	 * Suppress out of inodes messages if we will retry.
32457c478bd9Sstevel@tonic-gate 	 */
32467c478bd9Sstevel@tonic-gate 	if (retry)
32477c478bd9Sstevel@tonic-gate 		tdp->i_flag |= IQUIET;
32487c478bd9Sstevel@tonic-gate 	err = ufs_dirmakeinode(tdp, &ip, &va, DE_ATTRDIR, cr);
32497c478bd9Sstevel@tonic-gate 	tdp->i_flag &= ~IQUIET;
32507c478bd9Sstevel@tonic-gate 
32517c478bd9Sstevel@tonic-gate 	if (err)
32527c478bd9Sstevel@tonic-gate 		goto fail;
32537c478bd9Sstevel@tonic-gate 
32547c478bd9Sstevel@tonic-gate 	if (flags) {
32557c478bd9Sstevel@tonic-gate 
32567c478bd9Sstevel@tonic-gate 		/*
32577c478bd9Sstevel@tonic-gate 		 * Now attach it to src file.
32587c478bd9Sstevel@tonic-gate 		 */
32597c478bd9Sstevel@tonic-gate 
32607c478bd9Sstevel@tonic-gate 		tdp->i_oeftflag = ip->i_number;
32617c478bd9Sstevel@tonic-gate 	}
32627c478bd9Sstevel@tonic-gate 
32637c478bd9Sstevel@tonic-gate 	ip->i_cflags |= IXATTR;
32647c478bd9Sstevel@tonic-gate 	ITOV(ip)->v_flag |= V_XATTRDIR;
32657c478bd9Sstevel@tonic-gate 	TRANS_INODE(ufsvfsp, tdp);
32667c478bd9Sstevel@tonic-gate 	tdp->i_flag |= ICHG | IUPD;
32677c478bd9Sstevel@tonic-gate 	tdp->i_seq++;
32687c478bd9Sstevel@tonic-gate 	ufs_iupdat(tdp, I_SYNC);
32697c478bd9Sstevel@tonic-gate 	rw_exit(&tdp->i_contents);
32707c478bd9Sstevel@tonic-gate 	rw_exit(&ufsvfsp->vfs_dqrwlock);
32717c478bd9Sstevel@tonic-gate 
32727c478bd9Sstevel@tonic-gate 	rw_enter(&ip->i_rwlock, RW_WRITER);
32737c478bd9Sstevel@tonic-gate 	rw_enter(&ip->i_contents, RW_WRITER);
32747c478bd9Sstevel@tonic-gate 	TRANS_INODE(ufsvfsp, ip);
32757c478bd9Sstevel@tonic-gate 	ip->i_flag |= ICHG| IUPD;
32767c478bd9Sstevel@tonic-gate 	ip->i_seq++;
32777c478bd9Sstevel@tonic-gate 	ufs_iupdat(ip, I_SYNC);
32787c478bd9Sstevel@tonic-gate 	rw_exit(&ip->i_contents);
32797c478bd9Sstevel@tonic-gate 	rw_exit(&ip->i_rwlock);
32807c478bd9Sstevel@tonic-gate 	if (dorwlock == 2)
32817c478bd9Sstevel@tonic-gate 		rw_exit(&tdp->i_rwlock);
32827c478bd9Sstevel@tonic-gate 	if (ulp) {
32837c478bd9Sstevel@tonic-gate 		int terr = 0;
32847c478bd9Sstevel@tonic-gate 
32857c478bd9Sstevel@tonic-gate 		TRANS_END_CSYNC(ufsvfsp, err, issync, TOP_MKDIR, trans_size);
32867c478bd9Sstevel@tonic-gate 		ufs_lockfs_end(ulp);
32877c478bd9Sstevel@tonic-gate 		if (err == 0)
32887c478bd9Sstevel@tonic-gate 			err = terr;
32897c478bd9Sstevel@tonic-gate 	}
32907c478bd9Sstevel@tonic-gate 	if (dorwlock == 1)
32917c478bd9Sstevel@tonic-gate 		rw_exit(&tdp->i_rwlock);
32927c478bd9Sstevel@tonic-gate 	*ipp = ip;
32937c478bd9Sstevel@tonic-gate 	return (err);
32947c478bd9Sstevel@tonic-gate 
32957c478bd9Sstevel@tonic-gate fail:
32967c478bd9Sstevel@tonic-gate 	rw_exit(&tdp->i_contents);
32977c478bd9Sstevel@tonic-gate 	rw_exit(&ufsvfsp->vfs_dqrwlock);
32987c478bd9Sstevel@tonic-gate 	if (dorwlock == 2)
32997c478bd9Sstevel@tonic-gate 		rw_exit(&tdp->i_rwlock);
33007c478bd9Sstevel@tonic-gate 	if (ulp) {
33017c478bd9Sstevel@tonic-gate 		TRANS_END_CSYNC(ufsvfsp, err, issync, TOP_MKDIR, trans_size);
33027c478bd9Sstevel@tonic-gate 		ufs_lockfs_end(ulp);
33037c478bd9Sstevel@tonic-gate 	}
33047c478bd9Sstevel@tonic-gate 	if (dorwlock == 1)
33057c478bd9Sstevel@tonic-gate 		rw_exit(&tdp->i_rwlock);
33067c478bd9Sstevel@tonic-gate 	if (ip != NULL)
33077c478bd9Sstevel@tonic-gate 		VN_RELE(ITOV(ip));
33087c478bd9Sstevel@tonic-gate 
33097c478bd9Sstevel@tonic-gate 	/*
33107c478bd9Sstevel@tonic-gate 	 * No inodes?  See if any are tied up in pending deletions.
33117c478bd9Sstevel@tonic-gate 	 * This has to be done outside of any of the above, because
33127c478bd9Sstevel@tonic-gate 	 * the draining operation can't be done from inside a transaction.
33137c478bd9Sstevel@tonic-gate 	 */
33147c478bd9Sstevel@tonic-gate 	if ((err == ENOSPC) && retry && TRANS_ISTRANS(ufsvfsp)) {
33157c478bd9Sstevel@tonic-gate 		ufs_delete_drain_wait(ufsvfsp, 1);
33167c478bd9Sstevel@tonic-gate 		retry = 0;
33177c478bd9Sstevel@tonic-gate 		goto again;
33187c478bd9Sstevel@tonic-gate 	}
33197c478bd9Sstevel@tonic-gate 
33207c478bd9Sstevel@tonic-gate 	return (err);
33217c478bd9Sstevel@tonic-gate }
33227c478bd9Sstevel@tonic-gate 
33237c478bd9Sstevel@tonic-gate /*
33247c478bd9Sstevel@tonic-gate  * clear the dotdot directory entry.
33257c478bd9Sstevel@tonic-gate  * Used by ufs_dirscan when clr_dotdot
33267c478bd9Sstevel@tonic-gate  * flag is set and we're deleting a
33277c478bd9Sstevel@tonic-gate  * directory.
33287c478bd9Sstevel@tonic-gate  */
33297c478bd9Sstevel@tonic-gate static int
ufs_dirclrdotdot(struct inode * ip,ino_t parentino)33307c478bd9Sstevel@tonic-gate ufs_dirclrdotdot(struct inode *ip, ino_t parentino)
33317c478bd9Sstevel@tonic-gate {
33327c478bd9Sstevel@tonic-gate 	struct fbuf *fbp;
33337c478bd9Sstevel@tonic-gate 	struct direct *dotp, *dotdotp;
33347c478bd9Sstevel@tonic-gate 	int err = 0;
33357c478bd9Sstevel@tonic-gate 
33367c478bd9Sstevel@tonic-gate 	ASSERT(RW_WRITE_HELD(&ip->i_rwlock));
33377c478bd9Sstevel@tonic-gate 	ASSERT(RW_LOCK_HELD(&ip->i_contents));
33387c478bd9Sstevel@tonic-gate 	err = blkatoff(ip, 0, NULL, &fbp);
33397c478bd9Sstevel@tonic-gate 	if (err) {
33407c478bd9Sstevel@tonic-gate 		return (err);
33417c478bd9Sstevel@tonic-gate 	}
33427c478bd9Sstevel@tonic-gate 
33437c478bd9Sstevel@tonic-gate 	dotp = (struct direct *)fbp->fb_addr;
33447c478bd9Sstevel@tonic-gate 	if ((dotp->d_namlen < (MAXNAMLEN + 1)) &&
33457c478bd9Sstevel@tonic-gate 	    ((DIRBLKSIZ - DIRSIZ(dotp)) >= (sizeof (struct dirtemplate) / 2))) {
33467c478bd9Sstevel@tonic-gate 		dotdotp = (struct direct *)((char *)dotp + dotp->d_reclen);
33477c478bd9Sstevel@tonic-gate 		if ((dotdotp->d_namlen < (MAXNAMLEN + 1)) &&
33487c478bd9Sstevel@tonic-gate 		    ((DIRBLKSIZ - DIRSIZ(dotp)) >= dotdotp->d_reclen)) {
33497c478bd9Sstevel@tonic-gate 
33507c478bd9Sstevel@tonic-gate 			dotp->d_reclen += dotdotp->d_reclen;
33517c478bd9Sstevel@tonic-gate 			if (parentino == dotdotp->d_ino) {
33527c478bd9Sstevel@tonic-gate 				dotdotp->d_ino = 0;
33537c478bd9Sstevel@tonic-gate 				dotdotp->d_namlen = 0;
33547c478bd9Sstevel@tonic-gate 				dotdotp->d_reclen = 0;
33557c478bd9Sstevel@tonic-gate 			}
33567c478bd9Sstevel@tonic-gate 
33577c478bd9Sstevel@tonic-gate 			err = TRANS_DIR(ip, 0);
33587c478bd9Sstevel@tonic-gate 			if (err) {
33597c478bd9Sstevel@tonic-gate 				fbrelse(fbp, S_OTHER);
33607c478bd9Sstevel@tonic-gate 			} else {
33617c478bd9Sstevel@tonic-gate 				err = ufs_fbwrite(fbp, ip);
33627c478bd9Sstevel@tonic-gate 			}
33637c478bd9Sstevel@tonic-gate 		}
33647c478bd9Sstevel@tonic-gate 	} else {
33657c478bd9Sstevel@tonic-gate 		err = -1;
33667c478bd9Sstevel@tonic-gate 	}
33677c478bd9Sstevel@tonic-gate 	return (err);
33687c478bd9Sstevel@tonic-gate }
3369