xref: /illumos-gate/usr/src/uts/common/fs/xattr.c (revision 129b3e6c5b0ac55b5021a4c38db6387b6acdaaf1)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/param.h>
27 #include <sys/isa_defs.h>
28 #include <sys/types.h>
29 #include <sys/sysmacros.h>
30 #include <sys/cred.h>
31 #include <sys/systm.h>
32 #include <sys/errno.h>
33 #include <sys/fcntl.h>
34 #include <sys/pathname.h>
35 #include <sys/stat.h>
36 #include <sys/vfs.h>
37 #include <sys/acl.h>
38 #include <sys/file.h>
39 #include <sys/sunddi.h>
40 #include <sys/debug.h>
41 #include <sys/cmn_err.h>
42 #include <sys/vnode.h>
43 #include <sys/mode.h>
44 #include <sys/nvpair.h>
45 #include <sys/attr.h>
46 #include <sys/gfs.h>
47 #include <sys/mutex.h>
48 #include <fs/fs_subr.h>
49 #include <sys/kidmap.h>
50 
51 typedef struct {
52 	gfs_file_t	gfs_private;
53 	xattr_view_t	xattr_view;
54 } xattr_file_t;
55 
56 /* ARGSUSED */
57 static int
58 xattr_file_open(vnode_t **vpp, int flags, cred_t *cr, caller_context_t *ct)
59 {
60 	xattr_file_t *np = (*vpp)->v_data;
61 
62 	if ((np->xattr_view == XATTR_VIEW_READONLY) && (flags & FWRITE))
63 		return (EACCES);
64 
65 	return (0);
66 }
67 
68 /* ARGSUSED */
69 static int
70 xattr_file_access(vnode_t *vp, int mode, int flags, cred_t *cr,
71     caller_context_t *ct)
72 {
73 	xattr_file_t *np = vp->v_data;
74 
75 	if ((np->xattr_view == XATTR_VIEW_READONLY) && (mode & VWRITE))
76 		return (EACCES);
77 
78 	return (0);
79 }
80 
81 /* ARGSUSED */
82 static int
83 xattr_file_close(vnode_t *vp, int flags, int count, offset_t off,
84     cred_t *cr, caller_context_t *ct)
85 {
86 	cleanlocks(vp, ddi_get_pid(), 0);
87 	cleanshares(vp, ddi_get_pid());
88 	return (0);
89 }
90 
91 static int
92 xattr_common_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
93 {
94 	xattr_fid_t	*xfidp;
95 	vnode_t		*pvp, *savevp;
96 	int		error;
97 	uint16_t	orig_len;
98 
99 	if (fidp->fid_len < XATTR_FIDSZ) {
100 		fidp->fid_len = XATTR_FIDSZ;
101 		return (ENOSPC);
102 	}
103 
104 	savevp = pvp = gfs_file_parent(vp);
105 	mutex_enter(&savevp->v_lock);
106 	if (pvp->v_flag & V_XATTRDIR) {
107 		pvp = gfs_file_parent(pvp);
108 	}
109 	mutex_exit(&savevp->v_lock);
110 
111 	xfidp = (xattr_fid_t *)fidp;
112 	orig_len = fidp->fid_len;
113 	fidp->fid_len = sizeof (xfidp->parent_fid);
114 
115 	error = VOP_FID(pvp, fidp, ct);
116 	if (error) {
117 		fidp->fid_len = orig_len;
118 		return (error);
119 	}
120 
121 	xfidp->parent_len = fidp->fid_len;
122 	fidp->fid_len = XATTR_FIDSZ;
123 	xfidp->dir_offset = gfs_file_inode(vp);
124 
125 	return (0);
126 }
127 
128 /* ARGSUSED */
129 static int
130 xattr_fill_nvlist(vnode_t *vp, xattr_view_t xattr_view, nvlist_t *nvlp,
131     cred_t *cr, caller_context_t *ct)
132 {
133 	int error;
134 	f_attr_t attr;
135 	uint64_t fsid;
136 	xvattr_t xvattr;
137 	xoptattr_t *xoap;	/* Pointer to optional attributes */
138 	vnode_t *ppvp;
139 	const char *domain;
140 	uint32_t rid;
141 
142 	xva_init(&xvattr);
143 
144 	if ((xoap = xva_getxoptattr(&xvattr)) == NULL)
145 		return (EINVAL);
146 
147 	/*
148 	 * For detecting ephemeral uid/gid
149 	 */
150 	xvattr.xva_vattr.va_mask |= (AT_UID|AT_GID);
151 
152 	/*
153 	 * We need to access the real fs object.
154 	 * vp points to a GFS file; ppvp points to the real object.
155 	 */
156 	ppvp = gfs_file_parent(gfs_file_parent(vp));
157 
158 	/*
159 	 * Iterate through the attrs associated with this view
160 	 */
161 
162 	for (attr = 0; attr < F_ATTR_ALL; attr++) {
163 		if (xattr_view != attr_to_xattr_view(attr)) {
164 			continue;
165 		}
166 
167 		switch (attr) {
168 		case F_SYSTEM:
169 			XVA_SET_REQ(&xvattr, XAT_SYSTEM);
170 			break;
171 		case F_READONLY:
172 			XVA_SET_REQ(&xvattr, XAT_READONLY);
173 			break;
174 		case F_HIDDEN:
175 			XVA_SET_REQ(&xvattr, XAT_HIDDEN);
176 			break;
177 		case F_ARCHIVE:
178 			XVA_SET_REQ(&xvattr, XAT_ARCHIVE);
179 			break;
180 		case F_IMMUTABLE:
181 			XVA_SET_REQ(&xvattr, XAT_IMMUTABLE);
182 			break;
183 		case F_APPENDONLY:
184 			XVA_SET_REQ(&xvattr, XAT_APPENDONLY);
185 			break;
186 		case F_NOUNLINK:
187 			XVA_SET_REQ(&xvattr, XAT_NOUNLINK);
188 			break;
189 		case F_OPAQUE:
190 			XVA_SET_REQ(&xvattr, XAT_OPAQUE);
191 			break;
192 		case F_NODUMP:
193 			XVA_SET_REQ(&xvattr, XAT_NODUMP);
194 			break;
195 		case F_AV_QUARANTINED:
196 			XVA_SET_REQ(&xvattr, XAT_AV_QUARANTINED);
197 			break;
198 		case F_AV_MODIFIED:
199 			XVA_SET_REQ(&xvattr, XAT_AV_MODIFIED);
200 			break;
201 		case F_AV_SCANSTAMP:
202 			if (ppvp->v_type == VREG)
203 				XVA_SET_REQ(&xvattr, XAT_AV_SCANSTAMP);
204 			break;
205 		case F_CRTIME:
206 			XVA_SET_REQ(&xvattr, XAT_CREATETIME);
207 			break;
208 		case F_FSID:
209 			fsid = (((uint64_t)vp->v_vfsp->vfs_fsid.val[0] << 32) |
210 			    (uint64_t)(vp->v_vfsp->vfs_fsid.val[1] &
211 			    0xffffffff));
212 			VERIFY(nvlist_add_uint64(nvlp, attr_to_name(attr),
213 			    fsid) == 0);
214 			break;
215 		default:
216 			break;
217 		}
218 	}
219 
220 	error = VOP_GETATTR(ppvp, &xvattr.xva_vattr, 0, cr, ct);
221 	if (error)
222 		return (error);
223 
224 	/*
225 	 * Process all the optional attributes together here.  Notice that
226 	 * xoap was set when the optional attribute bits were set above.
227 	 */
228 	if ((xvattr.xva_vattr.va_mask & AT_XVATTR) && xoap) {
229 		if (XVA_ISSET_RTN(&xvattr, XAT_READONLY)) {
230 			VERIFY(nvlist_add_boolean_value(nvlp,
231 			    attr_to_name(F_READONLY),
232 			    xoap->xoa_readonly) == 0);
233 		}
234 		if (XVA_ISSET_RTN(&xvattr, XAT_HIDDEN)) {
235 			VERIFY(nvlist_add_boolean_value(nvlp,
236 			    attr_to_name(F_HIDDEN),
237 			    xoap->xoa_hidden) == 0);
238 		}
239 		if (XVA_ISSET_RTN(&xvattr, XAT_SYSTEM)) {
240 			VERIFY(nvlist_add_boolean_value(nvlp,
241 			    attr_to_name(F_SYSTEM),
242 			    xoap->xoa_system) == 0);
243 		}
244 		if (XVA_ISSET_RTN(&xvattr, XAT_ARCHIVE)) {
245 			VERIFY(nvlist_add_boolean_value(nvlp,
246 			    attr_to_name(F_ARCHIVE),
247 			    xoap->xoa_archive) == 0);
248 		}
249 		if (XVA_ISSET_RTN(&xvattr, XAT_IMMUTABLE)) {
250 			VERIFY(nvlist_add_boolean_value(nvlp,
251 			    attr_to_name(F_IMMUTABLE),
252 			    xoap->xoa_immutable) == 0);
253 		}
254 		if (XVA_ISSET_RTN(&xvattr, XAT_NOUNLINK)) {
255 			VERIFY(nvlist_add_boolean_value(nvlp,
256 			    attr_to_name(F_NOUNLINK),
257 			    xoap->xoa_nounlink) == 0);
258 		}
259 		if (XVA_ISSET_RTN(&xvattr, XAT_APPENDONLY)) {
260 			VERIFY(nvlist_add_boolean_value(nvlp,
261 			    attr_to_name(F_APPENDONLY),
262 			    xoap->xoa_appendonly) == 0);
263 		}
264 		if (XVA_ISSET_RTN(&xvattr, XAT_NODUMP)) {
265 			VERIFY(nvlist_add_boolean_value(nvlp,
266 			    attr_to_name(F_NODUMP),
267 			    xoap->xoa_nodump) == 0);
268 		}
269 		if (XVA_ISSET_RTN(&xvattr, XAT_OPAQUE)) {
270 			VERIFY(nvlist_add_boolean_value(nvlp,
271 			    attr_to_name(F_OPAQUE),
272 			    xoap->xoa_opaque) == 0);
273 		}
274 		if (XVA_ISSET_RTN(&xvattr, XAT_AV_QUARANTINED)) {
275 			VERIFY(nvlist_add_boolean_value(nvlp,
276 			    attr_to_name(F_AV_QUARANTINED),
277 			    xoap->xoa_av_quarantined) == 0);
278 		}
279 		if (XVA_ISSET_RTN(&xvattr, XAT_AV_MODIFIED)) {
280 			VERIFY(nvlist_add_boolean_value(nvlp,
281 			    attr_to_name(F_AV_MODIFIED),
282 			    xoap->xoa_av_modified) == 0);
283 		}
284 		if (XVA_ISSET_RTN(&xvattr, XAT_AV_SCANSTAMP)) {
285 			VERIFY(nvlist_add_uint8_array(nvlp,
286 			    attr_to_name(F_AV_SCANSTAMP),
287 			    xoap->xoa_av_scanstamp,
288 			    sizeof (xoap->xoa_av_scanstamp)) == 0);
289 		}
290 		if (XVA_ISSET_RTN(&xvattr, XAT_CREATETIME)) {
291 			VERIFY(nvlist_add_uint64_array(nvlp,
292 			    attr_to_name(F_CRTIME),
293 			    (uint64_t *)&(xoap->xoa_createtime),
294 			    sizeof (xoap->xoa_createtime) /
295 			    sizeof (uint64_t)) == 0);
296 		}
297 	}
298 	/*
299 	 * Check for optional ownersid/groupsid
300 	 */
301 
302 	if (xvattr.xva_vattr.va_uid > MAXUID) {
303 		nvlist_t *nvl_sid;
304 
305 		if (nvlist_alloc(&nvl_sid, NV_UNIQUE_NAME, KM_SLEEP))
306 			return (ENOMEM);
307 
308 		if (kidmap_getsidbyuid(crgetzone(cr), xvattr.xva_vattr.va_uid,
309 		    &domain, &rid) == 0) {
310 			VERIFY(nvlist_add_string(nvl_sid,
311 			    SID_DOMAIN, domain) == 0);
312 			VERIFY(nvlist_add_uint32(nvl_sid, SID_RID, rid) == 0);
313 			VERIFY(nvlist_add_nvlist(nvlp, attr_to_name(F_OWNERSID),
314 			    nvl_sid) == 0);
315 		}
316 		nvlist_free(nvl_sid);
317 	}
318 	if (xvattr.xva_vattr.va_gid > MAXUID) {
319 		nvlist_t *nvl_sid;
320 
321 		if (nvlist_alloc(&nvl_sid, NV_UNIQUE_NAME, KM_SLEEP))
322 			return (ENOMEM);
323 
324 		if (kidmap_getsidbygid(crgetzone(cr), xvattr.xva_vattr.va_gid,
325 		    &domain, &rid) == 0) {
326 			VERIFY(nvlist_add_string(nvl_sid,
327 			    SID_DOMAIN, domain) == 0);
328 			VERIFY(nvlist_add_uint32(nvl_sid, SID_RID, rid) == 0);
329 			VERIFY(nvlist_add_nvlist(nvlp, attr_to_name(F_GROUPSID),
330 			    nvl_sid) == 0);
331 		}
332 		nvlist_free(nvl_sid);
333 	}
334 
335 	return (0);
336 }
337 
338 /*
339  * The size of a sysattr file is the size of the nvlist that will be
340  * returned by xattr_file_read().  A call to xattr_file_write() could
341  * change the size of that nvlist.  That size is not stored persistently
342  * so xattr_fill_nvlist() calls VOP_GETATTR so that it can be calculated.
343  */
344 static int
345 xattr_file_size(vnode_t *vp, xattr_view_t xattr_view, size_t *size,
346     cred_t *cr, caller_context_t *ct)
347 {
348 	nvlist_t *nvl;
349 
350 	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) {
351 		return (ENOMEM);
352 	}
353 
354 	if (xattr_fill_nvlist(vp, xattr_view, nvl, cr, ct)) {
355 		nvlist_free(nvl);
356 		return (EFAULT);
357 	}
358 
359 	VERIFY(nvlist_size(nvl, size, NV_ENCODE_XDR) == 0);
360 	nvlist_free(nvl);
361 	return (0);
362 }
363 
364 /* ARGSUSED */
365 static int
366 xattr_file_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
367     caller_context_t *ct)
368 {
369 	xattr_file_t *np = vp->v_data;
370 	timestruc_t now;
371 	size_t size;
372 	int error;
373 	vnode_t *pvp;
374 	vattr_t pvattr;
375 
376 	vap->va_type = VREG;
377 	vap->va_mode = MAKEIMODE(vap->va_type,
378 	    (np->xattr_view == XATTR_VIEW_READONLY ? 0444 : 0644));
379 	vap->va_nodeid = gfs_file_inode(vp);
380 	vap->va_nlink = 1;
381 	pvp = gfs_file_parent(vp);
382 	(void) memset(&pvattr, 0, sizeof (pvattr));
383 	pvattr.va_mask = AT_CTIME|AT_MTIME;
384 	error = VOP_GETATTR(pvp, &pvattr, flags, cr, ct);
385 	if (error) {
386 		return (error);
387 	}
388 	vap->va_ctime = pvattr.va_ctime;
389 	vap->va_mtime = pvattr.va_mtime;
390 	gethrestime(&now);
391 	vap->va_atime = now;
392 	vap->va_uid = 0;
393 	vap->va_gid = 0;
394 	vap->va_rdev = 0;
395 	vap->va_blksize = DEV_BSIZE;
396 	vap->va_seq = 0;
397 	vap->va_fsid = vp->v_vfsp->vfs_dev;
398 	error = xattr_file_size(vp, np->xattr_view, &size, cr, ct);
399 	vap->va_size = size;
400 	vap->va_nblocks = howmany(vap->va_size, vap->va_blksize);
401 	return (error);
402 }
403 
404 /* ARGSUSED */
405 static int
406 xattr_file_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
407     caller_context_t *ct)
408 {
409 	xattr_file_t *np = vp->v_data;
410 	xattr_view_t xattr_view = np->xattr_view;
411 	char *buf;
412 	size_t filesize;
413 	nvlist_t *nvl;
414 	int error;
415 
416 	/*
417 	 * Validate file offset and fasttrack empty reads
418 	 */
419 	if (uiop->uio_loffset < (offset_t)0)
420 		return (EINVAL);
421 
422 	if (uiop->uio_resid == 0)
423 		return (0);
424 
425 	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP))
426 		return (ENOMEM);
427 
428 	if (xattr_fill_nvlist(vp, xattr_view, nvl, cr, ct)) {
429 		nvlist_free(nvl);
430 		return (EFAULT);
431 	}
432 
433 	VERIFY(nvlist_size(nvl, &filesize, NV_ENCODE_XDR) == 0);
434 
435 	if (uiop->uio_loffset >= filesize) {
436 		nvlist_free(nvl);
437 		return (0);
438 	}
439 
440 	buf = kmem_alloc(filesize, KM_SLEEP);
441 	VERIFY(nvlist_pack(nvl, &buf, &filesize, NV_ENCODE_XDR,
442 	    KM_SLEEP) == 0);
443 
444 	error = uiomove((caddr_t)buf, filesize, UIO_READ, uiop);
445 	kmem_free(buf, filesize);
446 	nvlist_free(nvl);
447 	return (error);
448 }
449 
450 /* ARGSUSED */
451 static int
452 xattr_file_write(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
453     caller_context_t *ct)
454 {
455 	int error = 0;
456 	char *buf;
457 	char *domain;
458 	uint32_t rid;
459 	ssize_t size = uiop->uio_resid;
460 	nvlist_t *nvp;
461 	nvpair_t *pair = NULL;
462 	vnode_t *ppvp;
463 	xvattr_t xvattr;
464 	xoptattr_t *xoap = NULL;	/* Pointer to optional attributes */
465 
466 	if (vfs_has_feature(vp->v_vfsp, VFSFT_XVATTR) == 0)
467 		return (EINVAL);
468 
469 	/*
470 	 * Validate file offset and size.
471 	 */
472 	if (uiop->uio_loffset < (offset_t)0)
473 		return (EINVAL);
474 
475 	if (size == 0)
476 		return (EINVAL);
477 
478 	xva_init(&xvattr);
479 
480 	if ((xoap = xva_getxoptattr(&xvattr)) == NULL) {
481 		return (EINVAL);
482 	}
483 
484 	/*
485 	 * Copy and unpack the nvlist
486 	 */
487 	buf = kmem_alloc(size, KM_SLEEP);
488 	if (uiomove((caddr_t)buf, size, UIO_WRITE, uiop)) {
489 		return (EFAULT);
490 	}
491 
492 	if (nvlist_unpack(buf, size, &nvp, KM_SLEEP) != 0) {
493 		kmem_free(buf, size);
494 		uiop->uio_resid = size;
495 		return (EINVAL);
496 	}
497 	kmem_free(buf, size);
498 
499 	/*
500 	 * Fasttrack empty writes (nvlist with no nvpairs)
501 	 */
502 	if (nvlist_next_nvpair(nvp, NULL) == 0)
503 		return (0);
504 
505 	ppvp = gfs_file_parent(gfs_file_parent(vp));
506 
507 	while (pair = nvlist_next_nvpair(nvp, pair)) {
508 		data_type_t type;
509 		f_attr_t attr;
510 		boolean_t value;
511 		uint64_t *time, *times;
512 		uint_t elem, nelems;
513 		nvlist_t *nvp_sid;
514 		uint8_t *scanstamp;
515 
516 		/*
517 		 * Validate the name and type of each attribute.
518 		 * Log any unknown names and continue.  This will
519 		 * help if additional attributes are added later.
520 		 */
521 		type = nvpair_type(pair);
522 		if ((attr = name_to_attr(nvpair_name(pair))) == F_ATTR_INVAL) {
523 			cmn_err(CE_WARN, "Unknown attribute %s",
524 			    nvpair_name(pair));
525 			continue;
526 		}
527 
528 		/*
529 		 * Verify nvlist type matches required type and view is OK
530 		 */
531 
532 		if (type != attr_to_data_type(attr) ||
533 		    (attr_to_xattr_view(attr) == XATTR_VIEW_READONLY)) {
534 			nvlist_free(nvp);
535 			return (EINVAL);
536 		}
537 
538 		/*
539 		 * For OWNERSID/GROUPSID make sure the target
540 		 * file system support ephemeral ID's
541 		 */
542 		if ((attr == F_OWNERSID || attr == F_GROUPSID) &&
543 		    (!(vp->v_vfsp->vfs_flag & VFS_XID))) {
544 			nvlist_free(nvp);
545 			return (EINVAL);
546 		}
547 
548 		/*
549 		 * Retrieve data from nvpair
550 		 */
551 		switch (type) {
552 		case DATA_TYPE_BOOLEAN_VALUE:
553 			if (nvpair_value_boolean_value(pair, &value)) {
554 				nvlist_free(nvp);
555 				return (EINVAL);
556 			}
557 			break;
558 		case DATA_TYPE_UINT64_ARRAY:
559 			if (nvpair_value_uint64_array(pair, &times, &nelems)) {
560 				nvlist_free(nvp);
561 				return (EINVAL);
562 			}
563 			break;
564 		case DATA_TYPE_NVLIST:
565 			if (nvpair_value_nvlist(pair, &nvp_sid)) {
566 				nvlist_free(nvp);
567 				return (EINVAL);
568 			}
569 			break;
570 		case DATA_TYPE_UINT8_ARRAY:
571 			if (nvpair_value_uint8_array(pair,
572 			    &scanstamp, &nelems)) {
573 				nvlist_free(nvp);
574 				return (EINVAL);
575 			}
576 			break;
577 		default:
578 			nvlist_free(nvp);
579 			return (EINVAL);
580 		}
581 
582 		switch (attr) {
583 		/*
584 		 * If we have several similar optional attributes to
585 		 * process then we should do it all together here so that
586 		 * xoap and the requested bitmap can be set in one place.
587 		 */
588 		case F_READONLY:
589 			XVA_SET_REQ(&xvattr, XAT_READONLY);
590 			xoap->xoa_readonly = value;
591 			break;
592 		case F_HIDDEN:
593 			XVA_SET_REQ(&xvattr, XAT_HIDDEN);
594 			xoap->xoa_hidden = value;
595 			break;
596 		case F_SYSTEM:
597 			XVA_SET_REQ(&xvattr, XAT_SYSTEM);
598 			xoap->xoa_system = value;
599 			break;
600 		case F_ARCHIVE:
601 			XVA_SET_REQ(&xvattr, XAT_ARCHIVE);
602 			xoap->xoa_archive = value;
603 			break;
604 		case F_IMMUTABLE:
605 			XVA_SET_REQ(&xvattr, XAT_IMMUTABLE);
606 			xoap->xoa_immutable = value;
607 			break;
608 		case F_NOUNLINK:
609 			XVA_SET_REQ(&xvattr, XAT_NOUNLINK);
610 			xoap->xoa_nounlink = value;
611 			break;
612 		case F_APPENDONLY:
613 			XVA_SET_REQ(&xvattr, XAT_APPENDONLY);
614 			xoap->xoa_appendonly = value;
615 			break;
616 		case F_NODUMP:
617 			XVA_SET_REQ(&xvattr, XAT_NODUMP);
618 			xoap->xoa_nodump = value;
619 			break;
620 		case F_AV_QUARANTINED:
621 			XVA_SET_REQ(&xvattr, XAT_AV_QUARANTINED);
622 			xoap->xoa_av_quarantined = value;
623 			break;
624 		case F_AV_MODIFIED:
625 			XVA_SET_REQ(&xvattr, XAT_AV_MODIFIED);
626 			xoap->xoa_av_modified = value;
627 			break;
628 		case F_CRTIME:
629 			XVA_SET_REQ(&xvattr, XAT_CREATETIME);
630 			time = (uint64_t *)&(xoap->xoa_createtime);
631 			for (elem = 0; elem < nelems; elem++)
632 				*time++ = times[elem];
633 			break;
634 		case F_OWNERSID:
635 		case F_GROUPSID:
636 			if (nvlist_lookup_string(nvp_sid, SID_DOMAIN,
637 			    &domain) || nvlist_lookup_uint32(nvp_sid, SID_RID,
638 			    &rid)) {
639 				nvlist_free(nvp);
640 				return (EINVAL);
641 			}
642 
643 			/*
644 			 * Now map domain+rid to ephemeral id's
645 			 *
646 			 * If mapping fails, then the uid/gid will
647 			 * be set to UID_NOBODY by Winchester.
648 			 */
649 
650 			if (attr == F_OWNERSID) {
651 				(void) kidmap_getuidbysid(crgetzone(cr), domain,
652 				    rid, &xvattr.xva_vattr.va_uid);
653 				xvattr.xva_vattr.va_mask |= AT_UID;
654 			} else {
655 				(void) kidmap_getgidbysid(crgetzone(cr), domain,
656 				    rid, &xvattr.xva_vattr.va_gid);
657 				xvattr.xva_vattr.va_mask |= AT_GID;
658 			}
659 			break;
660 		case F_AV_SCANSTAMP:
661 			if (ppvp->v_type == VREG) {
662 				XVA_SET_REQ(&xvattr, XAT_AV_SCANSTAMP);
663 				(void) memcpy(xoap->xoa_av_scanstamp,
664 				    scanstamp, nelems);
665 			} else {
666 				nvlist_free(nvp);
667 				return (EINVAL);
668 			}
669 			break;
670 		default:
671 			break;
672 		}
673 	}
674 
675 	ppvp = gfs_file_parent(gfs_file_parent(vp));
676 	error = VOP_SETATTR(ppvp, &xvattr.xva_vattr, 0, cr, ct);
677 	if (error)
678 		uiop->uio_resid = size;
679 
680 	nvlist_free(nvp);
681 	return (error);
682 }
683 
684 static int
685 xattr_file_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
686     caller_context_t *ct)
687 {
688 	switch (cmd) {
689 	case _PC_XATTR_EXISTS:
690 	case _PC_SATTR_ENABLED:
691 	case _PC_SATTR_EXISTS:
692 		*valp = 0;
693 		return (0);
694 	default:
695 		return (fs_pathconf(vp, cmd, valp, cr, ct));
696 	}
697 }
698 
699 vnodeops_t *xattr_file_ops;
700 
701 static const fs_operation_def_t xattr_file_tops[] = {
702 	{ VOPNAME_OPEN,		{ .vop_open = xattr_file_open }		},
703 	{ VOPNAME_CLOSE,	{ .vop_close = xattr_file_close }	},
704 	{ VOPNAME_READ,		{ .vop_read = xattr_file_read }		},
705 	{ VOPNAME_WRITE,	{ .vop_write = xattr_file_write }	},
706 	{ VOPNAME_IOCTL,	{ .error = fs_ioctl }			},
707 	{ VOPNAME_GETATTR,	{ .vop_getattr = xattr_file_getattr }	},
708 	{ VOPNAME_ACCESS,	{ .vop_access = xattr_file_access }	},
709 	{ VOPNAME_READDIR,	{ .error = fs_notdir }			},
710 	{ VOPNAME_SEEK,		{ .vop_seek = fs_seek }			},
711 	{ VOPNAME_INACTIVE,	{ .vop_inactive = gfs_vop_inactive }	},
712 	{ VOPNAME_FID,		{ .vop_fid = xattr_common_fid }		},
713 	{ VOPNAME_PATHCONF,	{ .vop_pathconf = xattr_file_pathconf }	},
714 	{ VOPNAME_PUTPAGE,	{ .error = fs_putpage }			},
715 	{ VOPNAME_FSYNC,	{ .error = fs_fsync }			},
716 	{ NULL }
717 };
718 
719 vnode_t *
720 xattr_mkfile(vnode_t *pvp, xattr_view_t xattr_view)
721 {
722 	vnode_t *vp;
723 	xattr_file_t *np;
724 
725 	vp = gfs_file_create(sizeof (xattr_file_t), pvp, xattr_file_ops);
726 	np = vp->v_data;
727 	np->xattr_view = xattr_view;
728 	vp->v_flag |= V_SYSATTR;
729 	return (vp);
730 }
731 
732 vnode_t *
733 xattr_mkfile_ro(vnode_t *pvp)
734 {
735 	return (xattr_mkfile(pvp, XATTR_VIEW_READONLY));
736 }
737 
738 vnode_t *
739 xattr_mkfile_rw(vnode_t *pvp)
740 {
741 	return (xattr_mkfile(pvp, XATTR_VIEW_READWRITE));
742 }
743 
744 vnodeops_t *xattr_dir_ops;
745 
746 static gfs_dirent_t xattr_dirents[] = {
747 	{ VIEW_READONLY, xattr_mkfile_ro, GFS_CACHE_VNODE, },
748 	{ VIEW_READWRITE, xattr_mkfile_rw, GFS_CACHE_VNODE, },
749 	{ NULL },
750 };
751 
752 #define	XATTRDIR_NENTS	((sizeof (xattr_dirents) / sizeof (gfs_dirent_t)) - 1)
753 
754 static int
755 is_sattr_name(char *s)
756 {
757 	int i;
758 
759 	for (i = 0; i < XATTRDIR_NENTS; ++i) {
760 		if (strcmp(s, xattr_dirents[i].gfse_name) == 0) {
761 			return (1);
762 		}
763 	}
764 	return (0);
765 }
766 
767 /*
768  * Given the name of an extended attribute file, determine if there is a
769  * normalization conflict with a sysattr view name.
770  */
771 int
772 xattr_sysattr_casechk(char *s)
773 {
774 	int i;
775 
776 	for (i = 0; i < XATTRDIR_NENTS; ++i) {
777 		if (strcasecmp(s, xattr_dirents[i].gfse_name) == 0)
778 			return (1);
779 	}
780 	return (0);
781 }
782 
783 static int
784 xattr_copy(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm,
785     cred_t *cr, caller_context_t *ct)
786 {
787 	xvattr_t xvattr;
788 	vnode_t *pdvp;
789 	int error;
790 
791 	/*
792 	 * Only copy system attrs if the views are the same
793 	 */
794 	if (strcmp(snm, tnm) != 0)
795 		return (EINVAL);
796 
797 	xva_init(&xvattr);
798 
799 	XVA_SET_REQ(&xvattr, XAT_SYSTEM);
800 	XVA_SET_REQ(&xvattr, XAT_READONLY);
801 	XVA_SET_REQ(&xvattr, XAT_HIDDEN);
802 	XVA_SET_REQ(&xvattr, XAT_ARCHIVE);
803 	XVA_SET_REQ(&xvattr, XAT_APPENDONLY);
804 	XVA_SET_REQ(&xvattr, XAT_NOUNLINK);
805 	XVA_SET_REQ(&xvattr, XAT_IMMUTABLE);
806 	XVA_SET_REQ(&xvattr, XAT_NODUMP);
807 	XVA_SET_REQ(&xvattr, XAT_AV_MODIFIED);
808 	XVA_SET_REQ(&xvattr, XAT_AV_QUARANTINED);
809 	XVA_SET_REQ(&xvattr, XAT_CREATETIME);
810 
811 	pdvp = gfs_file_parent(sdvp);
812 	error = VOP_GETATTR(pdvp, &xvattr.xva_vattr, 0, cr, ct);
813 	if (error)
814 		return (error);
815 
816 	pdvp = gfs_file_parent(tdvp);
817 	error = VOP_SETATTR(pdvp, &xvattr.xva_vattr, 0, cr, ct);
818 	return (error);
819 }
820 
821 static int
822 xattr_dir_realdir(vnode_t *dvp, vnode_t **realdvp, int lookup_flags,
823     cred_t *cr, caller_context_t *ct)
824 {
825 	vnode_t *pvp;
826 	int error;
827 	struct pathname pn;
828 	char *startnm = "";
829 
830 	*realdvp = NULL;
831 
832 	pvp = gfs_file_parent(dvp);
833 
834 	error = pn_get(startnm, UIO_SYSSPACE, &pn);
835 	if (error) {
836 		VN_RELE(pvp);
837 		return (error);
838 	}
839 
840 	/*
841 	 * Set the LOOKUP_HAVE_SYSATTR_DIR flag so that we don't get into an
842 	 * infinite loop with fop_lookup calling back to xattr_dir_lookup.
843 	 */
844 	lookup_flags |= LOOKUP_HAVE_SYSATTR_DIR;
845 	error = VOP_LOOKUP(pvp, startnm, realdvp, &pn, lookup_flags,
846 	    rootvp, cr, ct, NULL, NULL);
847 	pn_free(&pn);
848 
849 	return (error);
850 }
851 
852 /* ARGSUSED */
853 static int
854 xattr_dir_open(vnode_t **vpp, int flags, cred_t *cr, caller_context_t *ct)
855 {
856 	if (flags & FWRITE) {
857 		return (EACCES);
858 	}
859 
860 	return (0);
861 }
862 
863 /* ARGSUSED */
864 static int
865 xattr_dir_close(vnode_t *vpp, int flags, int count, offset_t off, cred_t *cr,
866     caller_context_t *ct)
867 {
868 	return (0);
869 }
870 
871 /*
872  * Retrieve the attributes on an xattr directory.  If there is a "real"
873  * xattr directory, use that.  Otherwise, get the attributes (represented
874  * by PARENT_ATTRMASK) from the "parent" node and fill in the rest.  Note
875  * that VOP_GETATTR() could turn off bits in the va_mask.
876  */
877 
878 #define	PARENT_ATTRMASK	(AT_UID|AT_GID|AT_RDEV|AT_CTIME|AT_MTIME)
879 
880 /* ARGSUSED */
881 static int
882 xattr_dir_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
883     caller_context_t *ct)
884 {
885 	timestruc_t now;
886 	vnode_t *pvp;
887 	int error;
888 
889 	error = xattr_dir_realdir(vp, &pvp, LOOKUP_XATTR, cr, ct);
890 	if (error == 0) {
891 		error = VOP_GETATTR(pvp, vap, 0, cr, ct);
892 		VN_RELE(pvp);
893 		if (error) {
894 			return (error);
895 		}
896 		vap->va_nlink += XATTRDIR_NENTS;
897 		vap->va_size += XATTRDIR_NENTS;
898 		return (0);
899 	}
900 
901 	/*
902 	 * There is no real xattr directory.  Cobble together
903 	 * an entry using info from the parent object (if needed)
904 	 * plus information common to all xattrs.
905 	 */
906 	if (vap->va_mask & PARENT_ATTRMASK) {
907 		vattr_t pvattr;
908 		uint_t  off_bits;
909 
910 		pvp = gfs_file_parent(vp);
911 		(void) memset(&pvattr, 0, sizeof (pvattr));
912 		pvattr.va_mask = PARENT_ATTRMASK;
913 		error = VOP_GETATTR(pvp, &pvattr, 0, cr, ct);
914 		if (error) {
915 			return (error);
916 		}
917 
918 		/*
919 		 * VOP_GETATTR() might have turned off some bits in
920 		 * pvattr.va_mask.  This means that the underlying
921 		 * file system couldn't process those attributes.
922 		 * We need to make sure those bits get turned off
923 		 * in the vattr_t structure that gets passed back
924 		 * to the caller.  Figure out which bits were turned
925 		 * off (if any) then set pvattr.va_mask before it
926 		 * gets copied to the vattr_t that the caller sees.
927 		 */
928 		off_bits = (pvattr.va_mask ^ PARENT_ATTRMASK) & PARENT_ATTRMASK;
929 		pvattr.va_mask = vap->va_mask & ~off_bits;
930 		*vap = pvattr;
931 	}
932 
933 	vap->va_type = VDIR;
934 	vap->va_mode = MAKEIMODE(vap->va_type, S_ISVTX | 0777);
935 	vap->va_fsid = vp->v_vfsp->vfs_dev;
936 	vap->va_nodeid = gfs_file_inode(vp);
937 	vap->va_nlink = XATTRDIR_NENTS+2;
938 	vap->va_size = vap->va_nlink;
939 	gethrestime(&now);
940 	vap->va_atime = now;
941 	vap->va_blksize = 0;
942 	vap->va_nblocks = 0;
943 	vap->va_seq = 0;
944 	return (0);
945 }
946 
947 static int
948 xattr_dir_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
949     caller_context_t *ct)
950 {
951 	vnode_t *realvp;
952 	int error;
953 
954 	/*
955 	 * If there is a real xattr directory, do the setattr there.
956 	 * Otherwise, just return success.  The GFS directory is transient,
957 	 * and any setattr changes can disappear anyway.
958 	 */
959 	error = xattr_dir_realdir(vp, &realvp, LOOKUP_XATTR, cr, ct);
960 	if (error == 0) {
961 		error = VOP_SETATTR(realvp, vap, flags, cr, ct);
962 		VN_RELE(realvp);
963 	}
964 	if (error == ENOENT) {
965 		error = 0;
966 	}
967 	return (error);
968 }
969 
970 /* ARGSUSED */
971 static int
972 xattr_dir_access(vnode_t *vp, int mode, int flags, cred_t *cr,
973     caller_context_t *ct)
974 {
975 	int error;
976 	vnode_t *realvp = NULL;
977 
978 	if (mode & VWRITE) {
979 		return (EACCES);
980 	}
981 
982 	error = xattr_dir_realdir(vp, &realvp, LOOKUP_XATTR, cr, ct);
983 
984 	if (realvp)
985 		VN_RELE(realvp);
986 
987 	/*
988 	 * No real xattr dir isn't an error
989 	 * an error of EINVAL indicates attributes on attributes
990 	 * are not supported.  In that case just allow access to the
991 	 * transient directory.
992 	 */
993 	return ((error == ENOENT || error == EINVAL) ? 0 : error);
994 }
995 
996 static int
997 xattr_dir_create(vnode_t *dvp, char *name, vattr_t *vap, vcexcl_t excl,
998     int mode, vnode_t **vpp, cred_t *cr, int flag, caller_context_t *ct,
999     vsecattr_t *vsecp)
1000 {
1001 	vnode_t *pvp;
1002 	int error;
1003 
1004 	*vpp = NULL;
1005 
1006 	/*
1007 	 * Don't allow creation of extended attributes with sysattr names.
1008 	 */
1009 	if (is_sattr_name(name)) {
1010 		return (gfs_dir_lookup(dvp, name, vpp, cr, 0, NULL, NULL));
1011 	}
1012 
1013 	error = xattr_dir_realdir(dvp, &pvp, LOOKUP_XATTR|CREATE_XATTR_DIR,
1014 	    cr, ct);
1015 	if (error == 0) {
1016 		error = VOP_CREATE(pvp, name, vap, excl, mode, vpp, cr, flag,
1017 		    ct, vsecp);
1018 		VN_RELE(pvp);
1019 	}
1020 	return (error);
1021 }
1022 
1023 static int
1024 xattr_dir_remove(vnode_t *dvp, char *name, cred_t *cr, caller_context_t *ct,
1025     int flags)
1026 {
1027 	vnode_t *pvp;
1028 	int error;
1029 
1030 	if (is_sattr_name(name)) {
1031 		return (EACCES);
1032 	}
1033 
1034 	error = xattr_dir_realdir(dvp, &pvp, LOOKUP_XATTR, cr, ct);
1035 	if (error == 0) {
1036 		error = VOP_REMOVE(pvp, name, cr, ct, flags);
1037 		VN_RELE(pvp);
1038 	}
1039 	return (error);
1040 }
1041 
1042 static int
1043 xattr_dir_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr,
1044     caller_context_t *ct, int flags)
1045 {
1046 	vnode_t *pvp;
1047 	int error;
1048 
1049 	if (svp->v_flag & V_SYSATTR) {
1050 		return (EINVAL);
1051 	}
1052 
1053 	error = xattr_dir_realdir(tdvp, &pvp, LOOKUP_XATTR, cr, ct);
1054 	if (error == 0) {
1055 		error = VOP_LINK(pvp, svp, name, cr, ct, flags);
1056 		VN_RELE(pvp);
1057 	}
1058 	return (error);
1059 }
1060 
1061 static int
1062 xattr_dir_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm,
1063     cred_t *cr, caller_context_t *ct, int flags)
1064 {
1065 	vnode_t *spvp, *tpvp;
1066 	int error;
1067 	int held_tgt;
1068 
1069 	if (is_sattr_name(snm) || is_sattr_name(tnm))
1070 		return (xattr_copy(sdvp, snm, tdvp, tnm, cr, ct));
1071 	/*
1072 	 * We know that sdvp is a GFS dir, or we wouldn't be here.
1073 	 * Get the real unnamed directory.
1074 	 */
1075 	error = xattr_dir_realdir(sdvp, &spvp, LOOKUP_XATTR, cr, ct);
1076 	if (error) {
1077 		return (error);
1078 	}
1079 
1080 	if (sdvp == tdvp) {
1081 		/*
1082 		 * If the source and target are the same GFS directory, the
1083 		 * underlying unnamed source and target dir will be the same.
1084 		 */
1085 		tpvp = spvp;
1086 		VN_HOLD(tpvp);
1087 		held_tgt = 1;
1088 	} else if (tdvp->v_flag & V_SYSATTR) {
1089 		/*
1090 		 * If the target dir is a different GFS directory,
1091 		 * find its underlying unnamed dir.
1092 		 */
1093 		error = xattr_dir_realdir(tdvp, &tpvp, LOOKUP_XATTR, cr, ct);
1094 		if (error) {
1095 			VN_RELE(spvp);
1096 			return (error);
1097 		}
1098 		held_tgt = 1;
1099 	} else {
1100 		/*
1101 		 * Target dir is outside of GFS, pass it on through.
1102 		 */
1103 		tpvp = tdvp;
1104 		held_tgt = 0;
1105 	}
1106 
1107 	error = VOP_RENAME(spvp, snm, tpvp, tnm, cr, ct, flags);
1108 
1109 	if (held_tgt) {
1110 		VN_RELE(tpvp);
1111 	}
1112 	VN_RELE(spvp);
1113 
1114 	return (error);
1115 }
1116 
1117 /*
1118  * readdir_xattr_casecmp: given a system attribute name, see if there
1119  * is a real xattr with the same normalized name.
1120  */
1121 static int
1122 readdir_xattr_casecmp(vnode_t *dvp, char *nm, cred_t *cr, caller_context_t *ct,
1123     int *eflags)
1124 {
1125 	int error;
1126 	vnode_t *vp;
1127 	struct pathname pn;
1128 
1129 	*eflags = 0;
1130 
1131 	error = pn_get(nm, UIO_SYSSPACE, &pn);
1132 	if (error == 0) {
1133 		error = VOP_LOOKUP(dvp, nm, &vp, &pn,
1134 		    FIGNORECASE, rootvp, cr, ct, NULL, NULL);
1135 		if (error == 0) {
1136 			*eflags = ED_CASE_CONFLICT;
1137 			VN_RELE(vp);
1138 		} else if (error == ENOENT) {
1139 			error = 0;
1140 		}
1141 		pn_free(&pn);
1142 	}
1143 
1144 	return (error);
1145 }
1146 
1147 static int
1148 xattr_dir_readdir(vnode_t *dvp, uio_t *uiop, cred_t *cr, int *eofp,
1149     caller_context_t *ct, int flags)
1150 {
1151 	vnode_t *pvp;
1152 	int error;
1153 	int local_eof;
1154 	int reset_off = 0;
1155 	int has_xattrs = 0;
1156 
1157 	if (eofp == NULL) {
1158 		eofp = &local_eof;
1159 	}
1160 	*eofp = 0;
1161 
1162 	/*
1163 	 * See if there is a real extended attribute directory.
1164 	 */
1165 	error = xattr_dir_realdir(dvp, &pvp, LOOKUP_XATTR, cr, ct);
1166 	if (error == 0) {
1167 		has_xattrs = 1;
1168 	}
1169 
1170 	/*
1171 	 * Start by reading up the static entries.
1172 	 */
1173 	if (uiop->uio_loffset == 0) {
1174 		ino64_t pino, ino;
1175 		offset_t off;
1176 		gfs_dir_t *dp = dvp->v_data;
1177 		gfs_readdir_state_t gstate;
1178 
1179 		if (has_xattrs) {
1180 			/*
1181 			 * If there is a real xattr dir, skip . and ..
1182 			 * in the GFS dir.  We'll pick them up below
1183 			 * when we call into the underlying fs.
1184 			 */
1185 			uiop->uio_loffset = GFS_STATIC_ENTRY_OFFSET;
1186 		}
1187 		error = gfs_get_parent_ino(dvp, cr, ct, &pino, &ino);
1188 		if (error == 0) {
1189 			error = gfs_readdir_init(&gstate, dp->gfsd_maxlen, 1,
1190 			    uiop, pino, ino, flags);
1191 		}
1192 		if (error) {
1193 			if (has_xattrs)
1194 				VN_RELE(pvp);
1195 			return (error);
1196 		}
1197 
1198 		while ((error = gfs_readdir_pred(&gstate, uiop, &off)) == 0 &&
1199 		    !*eofp) {
1200 			if (off >= 0 && off < dp->gfsd_nstatic) {
1201 				int eflags;
1202 
1203 				/*
1204 				 * Check to see if this sysattr set name has a
1205 				 * case-insensitive conflict with a real xattr
1206 				 * name.
1207 				 */
1208 				eflags = 0;
1209 				if ((flags & V_RDDIR_ENTFLAGS) && has_xattrs) {
1210 					error = readdir_xattr_casecmp(pvp,
1211 					    dp->gfsd_static[off].gfse_name,
1212 					    cr, ct, &eflags);
1213 					if (error)
1214 						break;
1215 				}
1216 				ino = dp->gfsd_inode(dvp, off);
1217 
1218 				error = gfs_readdir_emit(&gstate, uiop, off,
1219 				    ino, dp->gfsd_static[off].gfse_name,
1220 				    eflags);
1221 				if (error)
1222 					break;
1223 			} else {
1224 				*eofp = 1;
1225 			}
1226 		}
1227 
1228 		error = gfs_readdir_fini(&gstate, error, eofp, *eofp);
1229 		if (error) {
1230 			if (has_xattrs)
1231 				VN_RELE(pvp);
1232 			return (error);
1233 		}
1234 
1235 		/*
1236 		 * We must read all of the static entries in the first
1237 		 * call.  Otherwise we won't know if uio_loffset in a
1238 		 * subsequent call refers to the static entries or to those
1239 		 * in an underlying fs.
1240 		 */
1241 		if (*eofp == 0)
1242 			return (EINVAL);
1243 		reset_off = 1;
1244 	}
1245 
1246 	if (!has_xattrs) {
1247 		*eofp = 1;
1248 		return (0);
1249 	}
1250 
1251 	*eofp = 0;
1252 	if (reset_off) {
1253 		uiop->uio_loffset = 0;
1254 	}
1255 	(void) VOP_RWLOCK(pvp, V_WRITELOCK_FALSE, NULL);
1256 	error = VOP_READDIR(pvp, uiop, cr, eofp, ct, flags);
1257 	VOP_RWUNLOCK(pvp, V_WRITELOCK_FALSE, NULL);
1258 	VN_RELE(pvp);
1259 
1260 	return (error);
1261 }
1262 
1263 /* ARGSUSED */
1264 static void
1265 xattr_dir_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
1266 {
1267 	gfs_file_t *fp;
1268 
1269 	fp = gfs_dir_inactive(vp);
1270 	if (fp != NULL) {
1271 		kmem_free(fp, fp->gfs_size);
1272 	}
1273 }
1274 
1275 static int
1276 xattr_dir_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
1277     caller_context_t *ct)
1278 {
1279 	switch (cmd) {
1280 	case _PC_XATTR_EXISTS:
1281 	case _PC_SATTR_ENABLED:
1282 	case _PC_SATTR_EXISTS:
1283 		*valp = 0;
1284 		return (0);
1285 	default:
1286 		return (fs_pathconf(vp, cmd, valp, cr, ct));
1287 	}
1288 }
1289 
1290 static const fs_operation_def_t xattr_dir_tops[] = {
1291 	{ VOPNAME_OPEN,		{ .vop_open = xattr_dir_open }		},
1292 	{ VOPNAME_CLOSE,	{ .vop_close = xattr_dir_close }	},
1293 	{ VOPNAME_IOCTL,	{ .error = fs_inval }			},
1294 	{ VOPNAME_GETATTR,	{ .vop_getattr = xattr_dir_getattr }	},
1295 	{ VOPNAME_SETATTR,	{ .vop_setattr = xattr_dir_setattr }	},
1296 	{ VOPNAME_ACCESS,	{ .vop_access = xattr_dir_access }	},
1297 	{ VOPNAME_READDIR,	{ .vop_readdir = xattr_dir_readdir }	},
1298 	{ VOPNAME_LOOKUP,	{ .vop_lookup = gfs_vop_lookup }	},
1299 	{ VOPNAME_CREATE,	{ .vop_create = xattr_dir_create }	},
1300 	{ VOPNAME_REMOVE,	{ .vop_remove = xattr_dir_remove }	},
1301 	{ VOPNAME_LINK,		{ .vop_link = xattr_dir_link }		},
1302 	{ VOPNAME_RENAME,	{ .vop_rename = xattr_dir_rename }	},
1303 	{ VOPNAME_MKDIR,	{ .error = fs_inval }			},
1304 	{ VOPNAME_SEEK,		{ .vop_seek = fs_seek }			},
1305 	{ VOPNAME_INACTIVE,	{ .vop_inactive = xattr_dir_inactive }	},
1306 	{ VOPNAME_FID,		{ .vop_fid = xattr_common_fid }		},
1307 	{ VOPNAME_PATHCONF,	{ .vop_pathconf = xattr_dir_pathconf }	},
1308 	{ NULL, NULL }
1309 };
1310 
1311 static gfs_opsvec_t xattr_opsvec[] = {
1312 	{ "xattr dir", xattr_dir_tops, &xattr_dir_ops },
1313 	{ "system attributes", xattr_file_tops, &xattr_file_ops },
1314 	{ NULL, NULL, NULL }
1315 };
1316 
1317 static int
1318 xattr_lookup_cb(vnode_t *vp, const char *nm, vnode_t **vpp, ino64_t *inop,
1319     cred_t *cr, int flags, int *deflags, pathname_t *rpnp)
1320 {
1321 	vnode_t *pvp;
1322 	struct pathname pn;
1323 	int error;
1324 
1325 	*vpp = NULL;
1326 	*inop = 0;
1327 
1328 	error = xattr_dir_realdir(vp, &pvp, LOOKUP_XATTR|CREATE_XATTR_DIR,
1329 	    cr, NULL);
1330 
1331 	/*
1332 	 * Return ENOENT for EACCES requests during lookup.  Once an
1333 	 * attribute create is attempted EACCES will be returned.
1334 	 */
1335 	if (error) {
1336 		if (error == EACCES)
1337 			return (ENOENT);
1338 		return (error);
1339 	}
1340 
1341 	error = pn_get((char *)nm, UIO_SYSSPACE, &pn);
1342 	if (error == 0) {
1343 		error = VOP_LOOKUP(pvp, (char *)nm, vpp, &pn, flags, rootvp,
1344 		    cr, NULL, deflags, rpnp);
1345 		pn_free(&pn);
1346 	}
1347 	VN_RELE(pvp);
1348 
1349 	return (error);
1350 }
1351 
1352 /* ARGSUSED */
1353 static ino64_t
1354 xattrdir_do_ino(vnode_t *vp, int index)
1355 {
1356 	/*
1357 	 * We use index 0 for the directory fid.  Start
1358 	 * the file numbering at 1.
1359 	 */
1360 	return ((ino64_t)index+1);
1361 }
1362 
1363 void
1364 xattr_init(void)
1365 {
1366 	VERIFY(gfs_make_opsvec(xattr_opsvec) == 0);
1367 }
1368 
1369 int
1370 xattr_dir_lookup(vnode_t *dvp, vnode_t **vpp, int flags, cred_t *cr)
1371 {
1372 	int error = 0;
1373 
1374 	*vpp = NULL;
1375 
1376 	if (dvp->v_type != VDIR && dvp->v_type != VREG)
1377 		return (EINVAL);
1378 
1379 	mutex_enter(&dvp->v_lock);
1380 
1381 	/*
1382 	 * If we're already in sysattr space, don't allow creation
1383 	 * of another level of sysattrs.
1384 	 */
1385 	if (dvp->v_flag & V_SYSATTR) {
1386 		mutex_exit(&dvp->v_lock);
1387 		return (EINVAL);
1388 	}
1389 
1390 	if (dvp->v_xattrdir != NULL) {
1391 		*vpp = dvp->v_xattrdir;
1392 		VN_HOLD(*vpp);
1393 	} else {
1394 		ulong_t val;
1395 		int xattrs_allowed = dvp->v_vfsp->vfs_flag & VFS_XATTR;
1396 		int sysattrs_allowed = 1;
1397 
1398 		/*
1399 		 * We have to drop the lock on dvp.  gfs_dir_create will
1400 		 * grab it for a VN_HOLD.
1401 		 */
1402 		mutex_exit(&dvp->v_lock);
1403 
1404 		/*
1405 		 * If dvp allows xattr creation, but not sysattr
1406 		 * creation, return the real xattr dir vp. We can't
1407 		 * use the vfs feature mask here because _PC_SATTR_ENABLED
1408 		 * has vnode-level granularity (e.g. .zfs).
1409 		 */
1410 		error = VOP_PATHCONF(dvp, _PC_SATTR_ENABLED, &val, cr, NULL);
1411 		if (error != 0 || val == 0)
1412 			sysattrs_allowed = 0;
1413 
1414 		if (!xattrs_allowed && !sysattrs_allowed)
1415 			return (EINVAL);
1416 
1417 		if (!sysattrs_allowed) {
1418 			struct pathname pn;
1419 			char *nm = "";
1420 
1421 			error = pn_get(nm, UIO_SYSSPACE, &pn);
1422 			if (error)
1423 				return (error);
1424 			error = VOP_LOOKUP(dvp, nm, vpp, &pn,
1425 			    flags|LOOKUP_HAVE_SYSATTR_DIR, rootvp, cr, NULL,
1426 			    NULL, NULL);
1427 			pn_free(&pn);
1428 			return (error);
1429 		}
1430 
1431 		/*
1432 		 * Note that we act as if we were given CREATE_XATTR_DIR,
1433 		 * but only for creation of the GFS directory.
1434 		 */
1435 		*vpp = gfs_dir_create(
1436 		    sizeof (gfs_dir_t), dvp, xattr_dir_ops, xattr_dirents,
1437 		    xattrdir_do_ino, MAXNAMELEN, NULL, xattr_lookup_cb);
1438 		mutex_enter(&dvp->v_lock);
1439 		if (dvp->v_xattrdir != NULL) {
1440 			/*
1441 			 * We lost the race to create the xattr dir.
1442 			 * Destroy this one, use the winner.  We can't
1443 			 * just call VN_RELE(*vpp), because the vnode
1444 			 * is only partially initialized.
1445 			 */
1446 			gfs_dir_t *dp = (*vpp)->v_data;
1447 
1448 			ASSERT((*vpp)->v_count == 1);
1449 			vn_free(*vpp);
1450 
1451 			mutex_destroy(&dp->gfsd_lock);
1452 			kmem_free(dp->gfsd_static,
1453 			    dp->gfsd_nstatic * sizeof (gfs_dirent_t));
1454 			kmem_free(dp, dp->gfsd_file.gfs_size);
1455 
1456 			/*
1457 			 * There is an implied VN_HOLD(dvp) here.  We should
1458 			 * be doing a VN_RELE(dvp) to clean up the reference
1459 			 * from *vpp, and then a VN_HOLD(dvp) for the new
1460 			 * reference.  Instead, we just leave the count alone.
1461 			 */
1462 
1463 			*vpp = dvp->v_xattrdir;
1464 			VN_HOLD(*vpp);
1465 		} else {
1466 			(*vpp)->v_flag |= (V_XATTRDIR|V_SYSATTR);
1467 			dvp->v_xattrdir = *vpp;
1468 		}
1469 	}
1470 	mutex_exit(&dvp->v_lock);
1471 
1472 	return (error);
1473 }
1474 
1475 int
1476 xattr_dir_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp)
1477 {
1478 	int error;
1479 	vnode_t *pvp, *dvp;
1480 	xattr_fid_t *xfidp;
1481 	struct pathname pn;
1482 	char *nm;
1483 	uint16_t orig_len;
1484 
1485 	*vpp = NULL;
1486 
1487 	if (fidp->fid_len < XATTR_FIDSZ)
1488 		return (EINVAL);
1489 
1490 	xfidp = (xattr_fid_t *)fidp;
1491 	orig_len = fidp->fid_len;
1492 	fidp->fid_len = xfidp->parent_len;
1493 
1494 	error = VFS_VGET(vfsp, &pvp, fidp);
1495 	fidp->fid_len = orig_len;
1496 	if (error)
1497 		return (error);
1498 
1499 	/*
1500 	 * Start by getting the GFS sysattr directory.	We might need
1501 	 * to recreate it during the VOP_LOOKUP.
1502 	 */
1503 	nm = "";
1504 	error = pn_get(nm, UIO_SYSSPACE, &pn);
1505 	if (error) {
1506 		VN_RELE(pvp);
1507 		return (EINVAL);
1508 	}
1509 
1510 	error = VOP_LOOKUP(pvp, nm, &dvp, &pn, LOOKUP_XATTR|CREATE_XATTR_DIR,
1511 	    rootvp, CRED(), NULL, NULL, NULL);
1512 	pn_free(&pn);
1513 	VN_RELE(pvp);
1514 	if (error)
1515 		return (error);
1516 
1517 	if (xfidp->dir_offset == 0) {
1518 		/*
1519 		 * If we were looking for the directory, we're done.
1520 		 */
1521 		*vpp = dvp;
1522 		return (0);
1523 	}
1524 
1525 	if (xfidp->dir_offset > XATTRDIR_NENTS) {
1526 		VN_RELE(dvp);
1527 		return (EINVAL);
1528 	}
1529 
1530 	nm = xattr_dirents[xfidp->dir_offset - 1].gfse_name;
1531 
1532 	error = pn_get(nm, UIO_SYSSPACE, &pn);
1533 	if (error) {
1534 		VN_RELE(dvp);
1535 		return (EINVAL);
1536 	}
1537 
1538 	error = VOP_LOOKUP(dvp, nm, vpp, &pn, 0, rootvp, CRED(), NULL,
1539 	    NULL, NULL);
1540 
1541 	pn_free(&pn);
1542 	VN_RELE(dvp);
1543 
1544 	return (error);
1545 }
1546