xref: /illumos-gate/usr/src/uts/common/fs/xattr.c (revision 503609a9497e27f206d815a06ce90a747d2ce573)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
24  */
25 
26 /*
27  * Big Theory Statement for Extended Attribute (XATTR) directories
28  *
29  * The Solaris VFS layer presents extended file attributes using a special
30  * "XATTR" directory under files or directories that have extended file
31  * attributes.  See fsattr(5) for background.
32  *
33  * This design avoids the need for a separate set of VFS or vnode functions
34  * for operating on XATTR objects.  File system implementations that support
35  * XATTR instantiate a special XATTR directory using this module.
36  * Applications get to the XATTR directory by passing the LOOKUP_XATTR flag
37  * to fop_lookup.  Once the XATTR directory is obtained, all other file
38  * system operations on extended attributes happen via the normal vnode
39  * functions, applied to the XATTR directory or its contents.
40  *
41  * The XATTR directories returned by fop_lookup (with LOOKUP_XATTR) are
42  * implemented differntly, depending on whether the file system supports
43  * "extended attributes" (XATTR), "system attributes" (SYSATTR), or both.
44  *
45  * When SYSATTR=true, XATTR=true:
46  *	The XATTR directory is a "generic file system" (GFS) object
47  *	that adds the special system attribute names (SUNWattr*) to
48  *	the list of XATTR files presented by the underling FS.
49  *	In this case, many operations are "passed through" to the
50  *	lower-level FS.
51  *
52  * When SYSATTR=true, XATTR=false:
53  *	The XATTR directory is a "generic file system" (GFS) object,
54  *	presenting only the system attribute names (SUNWattr*)
55  *	In this case there's no lower-level FS, only the GFS object.
56  *
57  * When SYSATTR=false, XATTR=true:
58  *	The XATTR directory is implemented by the file system code,
59  *	and this module is not involved after xattr_dir_lookup()
60  *	returns the XATTR dir from the underlying file system.
61  *
62  * When SYSATTR=false, XATTR=false:
63  *	xattr_dir_lookup just returns EINVAL
64  *
65  * In the first two cases (where we have system attributes) this module
66  * implements what can be thought of as a "translucent" directory containing
67  * both the system attribute names (SUNWattr*) and whatever XATTR names may
68  * exist in the XATTR directory of the underlying file system, if any.
69  *
70  * This affects operations on the (GFS) XATTR directory as follows:
71  *
72  * readdir:	Merges the SUNWattr* names with any contents from the
73  *		underlying XATTR directory.
74  *
75  * rename:	If "to" or "from" is a SUNWattr name, special handling,
76  *		else pass through to the lower FS.
77  *
78  * link:	If "from" is a SUNWattr name, disallow.
79  *
80  * create:	If a SUNWattr name, disallow, else pass to lower FS.
81  * remove:	(same)
82  *
83  * open,close:	Just pass through to the XATTR dir in the lower FS.
84  *
85  * lookup:	Lookup an XATTR file in either the (GFS) XATTR directory
86  *		or the "real" XATTR directory of the underlying FS.
87  *		Note for file systems the support SYSATTR but not XATTR,
88  *		only the GFS XATTR directory will exist.  When both exist,
89  *		gfs_vop_lookup uses the xattr_lookup_cb callback function
90  *		which passes the lookup call through to the "real" FS.
91  *
92  * Operations on the XATTR _files_ are simpler:
93  *
94  * If the file vnode came from lookup at the GFS level, the file is one of
95  * the special SUNWattr* vnodes, and it's vnode operations (xattr_file_tops)
96  * allow only what's appropriate on these "files".
97  *
98  * If the file vnode came from the underlying FS, all operations on that
99  * object are handled through the vnode operations set by that FS.
100  */
101 
102 #include <sys/param.h>
103 #include <sys/isa_defs.h>
104 #include <sys/types.h>
105 #include <sys/sysmacros.h>
106 #include <sys/cred.h>
107 #include <sys/systm.h>
108 #include <sys/errno.h>
109 #include <sys/fcntl.h>
110 #include <sys/pathname.h>
111 #include <sys/stat.h>
112 #include <sys/vfs.h>
113 #include <sys/acl.h>
114 #include <sys/file.h>
115 #include <sys/sunddi.h>
116 #include <sys/debug.h>
117 #include <sys/cmn_err.h>
118 #include <sys/vnode.h>
119 #include <sys/mode.h>
120 #include <sys/nvpair.h>
121 #include <sys/attr.h>
122 #include <sys/gfs.h>
123 #include <sys/mutex.h>
124 #include <fs/fs_subr.h>
125 #include <sys/kidmap.h>
126 
127 typedef struct {
128 	gfs_file_t	xattr_gfs_private;
129 	xattr_view_t	xattr_view;
130 } xattr_file_t;
131 
132 typedef struct {
133 	gfs_dir_t	xattr_gfs_private;
134 	vnode_t		*xattr_realvp;
135 } xattr_dir_t;
136 
137 /* ARGSUSED */
138 static int
139 xattr_file_open(vnode_t **vpp, int flags, cred_t *cr, caller_context_t *ct)
140 {
141 	xattr_file_t *np = (*vpp)->v_data;
142 
143 	if ((np->xattr_view == XATTR_VIEW_READONLY) && (flags & FWRITE))
144 		return (EACCES);
145 
146 	return (0);
147 }
148 
149 /* ARGSUSED */
150 static int
151 xattr_file_access(vnode_t *vp, int mode, int flags, cred_t *cr,
152     caller_context_t *ct)
153 {
154 	xattr_file_t *np = vp->v_data;
155 
156 	if ((np->xattr_view == XATTR_VIEW_READONLY) && (mode & VWRITE))
157 		return (EACCES);
158 
159 	return (0);
160 }
161 
162 /* ARGSUSED */
163 static int
164 xattr_file_close(vnode_t *vp, int flags, int count, offset_t off,
165     cred_t *cr, caller_context_t *ct)
166 {
167 	cleanlocks(vp, ddi_get_pid(), 0);
168 	cleanshares(vp, ddi_get_pid());
169 	return (0);
170 }
171 
172 static int
173 xattr_common_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
174 {
175 	xattr_fid_t	*xfidp;
176 	vnode_t		*pvp, *savevp;
177 	int		error;
178 	uint16_t	orig_len;
179 
180 	if (fidp->fid_len < XATTR_FIDSZ) {
181 		fidp->fid_len = XATTR_FIDSZ;
182 		return (ENOSPC);
183 	}
184 
185 	savevp = pvp = gfs_file_parent(vp);
186 	mutex_enter(&savevp->v_lock);
187 	if (pvp->v_flag & V_XATTRDIR) {
188 		pvp = gfs_file_parent(pvp);
189 	}
190 	mutex_exit(&savevp->v_lock);
191 
192 	xfidp = (xattr_fid_t *)fidp;
193 	orig_len = fidp->fid_len;
194 	fidp->fid_len = sizeof (xfidp->parent_fid);
195 
196 	error = VOP_FID(pvp, fidp, ct);
197 	if (error) {
198 		fidp->fid_len = orig_len;
199 		return (error);
200 	}
201 
202 	xfidp->parent_len = fidp->fid_len;
203 	fidp->fid_len = XATTR_FIDSZ;
204 	xfidp->dir_offset = gfs_file_inode(vp);
205 
206 	return (0);
207 }
208 
209 /* ARGSUSED */
210 static int
211 xattr_fill_nvlist(vnode_t *vp, xattr_view_t xattr_view, nvlist_t *nvlp,
212     cred_t *cr, caller_context_t *ct)
213 {
214 	int error;
215 	f_attr_t attr;
216 	uint64_t fsid;
217 	xvattr_t xvattr;
218 	xoptattr_t *xoap;	/* Pointer to optional attributes */
219 	vnode_t *ppvp;
220 	const char *domain;
221 	uint32_t rid;
222 
223 	xva_init(&xvattr);
224 
225 	if ((xoap = xva_getxoptattr(&xvattr)) == NULL)
226 		return (EINVAL);
227 
228 	/*
229 	 * For detecting ephemeral uid/gid
230 	 */
231 	xvattr.xva_vattr.va_mask |= (AT_UID|AT_GID);
232 
233 	/*
234 	 * We need to access the real fs object.
235 	 * vp points to a GFS file; ppvp points to the real object.
236 	 */
237 	ppvp = gfs_file_parent(gfs_file_parent(vp));
238 
239 	/*
240 	 * Iterate through the attrs associated with this view
241 	 */
242 
243 	for (attr = 0; attr < F_ATTR_ALL; attr++) {
244 		if (xattr_view != attr_to_xattr_view(attr)) {
245 			continue;
246 		}
247 
248 		switch (attr) {
249 		case F_SYSTEM:
250 			XVA_SET_REQ(&xvattr, XAT_SYSTEM);
251 			break;
252 		case F_READONLY:
253 			XVA_SET_REQ(&xvattr, XAT_READONLY);
254 			break;
255 		case F_HIDDEN:
256 			XVA_SET_REQ(&xvattr, XAT_HIDDEN);
257 			break;
258 		case F_ARCHIVE:
259 			XVA_SET_REQ(&xvattr, XAT_ARCHIVE);
260 			break;
261 		case F_IMMUTABLE:
262 			XVA_SET_REQ(&xvattr, XAT_IMMUTABLE);
263 			break;
264 		case F_APPENDONLY:
265 			XVA_SET_REQ(&xvattr, XAT_APPENDONLY);
266 			break;
267 		case F_NOUNLINK:
268 			XVA_SET_REQ(&xvattr, XAT_NOUNLINK);
269 			break;
270 		case F_OPAQUE:
271 			XVA_SET_REQ(&xvattr, XAT_OPAQUE);
272 			break;
273 		case F_NODUMP:
274 			XVA_SET_REQ(&xvattr, XAT_NODUMP);
275 			break;
276 		case F_AV_QUARANTINED:
277 			XVA_SET_REQ(&xvattr, XAT_AV_QUARANTINED);
278 			break;
279 		case F_AV_MODIFIED:
280 			XVA_SET_REQ(&xvattr, XAT_AV_MODIFIED);
281 			break;
282 		case F_AV_SCANSTAMP:
283 			if (ppvp->v_type == VREG)
284 				XVA_SET_REQ(&xvattr, XAT_AV_SCANSTAMP);
285 			break;
286 		case F_CRTIME:
287 			XVA_SET_REQ(&xvattr, XAT_CREATETIME);
288 			break;
289 		case F_FSID:
290 			fsid = (((uint64_t)vp->v_vfsp->vfs_fsid.val[0] << 32) |
291 			    (uint64_t)(vp->v_vfsp->vfs_fsid.val[1] &
292 			    0xffffffff));
293 			VERIFY(nvlist_add_uint64(nvlp, attr_to_name(attr),
294 			    fsid) == 0);
295 			break;
296 		case F_REPARSE:
297 			XVA_SET_REQ(&xvattr, XAT_REPARSE);
298 			break;
299 		case F_GEN:
300 			XVA_SET_REQ(&xvattr, XAT_GEN);
301 			break;
302 		case F_OFFLINE:
303 			XVA_SET_REQ(&xvattr, XAT_OFFLINE);
304 			break;
305 		case F_SPARSE:
306 			XVA_SET_REQ(&xvattr, XAT_SPARSE);
307 			break;
308 		default:
309 			break;
310 		}
311 	}
312 
313 	error = VOP_GETATTR(ppvp, &xvattr.xva_vattr, 0, cr, ct);
314 	if (error)
315 		return (error);
316 
317 	/*
318 	 * Process all the optional attributes together here.  Notice that
319 	 * xoap was set when the optional attribute bits were set above.
320 	 */
321 	if ((xvattr.xva_vattr.va_mask & AT_XVATTR) && xoap) {
322 		if (XVA_ISSET_RTN(&xvattr, XAT_READONLY)) {
323 			VERIFY(nvlist_add_boolean_value(nvlp,
324 			    attr_to_name(F_READONLY),
325 			    xoap->xoa_readonly) == 0);
326 		}
327 		if (XVA_ISSET_RTN(&xvattr, XAT_HIDDEN)) {
328 			VERIFY(nvlist_add_boolean_value(nvlp,
329 			    attr_to_name(F_HIDDEN),
330 			    xoap->xoa_hidden) == 0);
331 		}
332 		if (XVA_ISSET_RTN(&xvattr, XAT_SYSTEM)) {
333 			VERIFY(nvlist_add_boolean_value(nvlp,
334 			    attr_to_name(F_SYSTEM),
335 			    xoap->xoa_system) == 0);
336 		}
337 		if (XVA_ISSET_RTN(&xvattr, XAT_ARCHIVE)) {
338 			VERIFY(nvlist_add_boolean_value(nvlp,
339 			    attr_to_name(F_ARCHIVE),
340 			    xoap->xoa_archive) == 0);
341 		}
342 		if (XVA_ISSET_RTN(&xvattr, XAT_IMMUTABLE)) {
343 			VERIFY(nvlist_add_boolean_value(nvlp,
344 			    attr_to_name(F_IMMUTABLE),
345 			    xoap->xoa_immutable) == 0);
346 		}
347 		if (XVA_ISSET_RTN(&xvattr, XAT_NOUNLINK)) {
348 			VERIFY(nvlist_add_boolean_value(nvlp,
349 			    attr_to_name(F_NOUNLINK),
350 			    xoap->xoa_nounlink) == 0);
351 		}
352 		if (XVA_ISSET_RTN(&xvattr, XAT_APPENDONLY)) {
353 			VERIFY(nvlist_add_boolean_value(nvlp,
354 			    attr_to_name(F_APPENDONLY),
355 			    xoap->xoa_appendonly) == 0);
356 		}
357 		if (XVA_ISSET_RTN(&xvattr, XAT_NODUMP)) {
358 			VERIFY(nvlist_add_boolean_value(nvlp,
359 			    attr_to_name(F_NODUMP),
360 			    xoap->xoa_nodump) == 0);
361 		}
362 		if (XVA_ISSET_RTN(&xvattr, XAT_OPAQUE)) {
363 			VERIFY(nvlist_add_boolean_value(nvlp,
364 			    attr_to_name(F_OPAQUE),
365 			    xoap->xoa_opaque) == 0);
366 		}
367 		if (XVA_ISSET_RTN(&xvattr, XAT_AV_QUARANTINED)) {
368 			VERIFY(nvlist_add_boolean_value(nvlp,
369 			    attr_to_name(F_AV_QUARANTINED),
370 			    xoap->xoa_av_quarantined) == 0);
371 		}
372 		if (XVA_ISSET_RTN(&xvattr, XAT_AV_MODIFIED)) {
373 			VERIFY(nvlist_add_boolean_value(nvlp,
374 			    attr_to_name(F_AV_MODIFIED),
375 			    xoap->xoa_av_modified) == 0);
376 		}
377 		if (XVA_ISSET_RTN(&xvattr, XAT_AV_SCANSTAMP)) {
378 			VERIFY(nvlist_add_uint8_array(nvlp,
379 			    attr_to_name(F_AV_SCANSTAMP),
380 			    xoap->xoa_av_scanstamp,
381 			    sizeof (xoap->xoa_av_scanstamp)) == 0);
382 		}
383 		if (XVA_ISSET_RTN(&xvattr, XAT_CREATETIME)) {
384 			VERIFY(nvlist_add_uint64_array(nvlp,
385 			    attr_to_name(F_CRTIME),
386 			    (uint64_t *)&(xoap->xoa_createtime),
387 			    sizeof (xoap->xoa_createtime) /
388 			    sizeof (uint64_t)) == 0);
389 		}
390 		if (XVA_ISSET_RTN(&xvattr, XAT_REPARSE)) {
391 			VERIFY(nvlist_add_boolean_value(nvlp,
392 			    attr_to_name(F_REPARSE),
393 			    xoap->xoa_reparse) == 0);
394 		}
395 		if (XVA_ISSET_RTN(&xvattr, XAT_GEN)) {
396 			VERIFY(nvlist_add_uint64(nvlp,
397 			    attr_to_name(F_GEN),
398 			    xoap->xoa_generation) == 0);
399 		}
400 		if (XVA_ISSET_RTN(&xvattr, XAT_OFFLINE)) {
401 			VERIFY(nvlist_add_boolean_value(nvlp,
402 			    attr_to_name(F_OFFLINE),
403 			    xoap->xoa_offline) == 0);
404 		}
405 		if (XVA_ISSET_RTN(&xvattr, XAT_SPARSE)) {
406 			VERIFY(nvlist_add_boolean_value(nvlp,
407 			    attr_to_name(F_SPARSE),
408 			    xoap->xoa_sparse) == 0);
409 		}
410 	}
411 	/*
412 	 * Check for optional ownersid/groupsid
413 	 */
414 
415 	if (xvattr.xva_vattr.va_uid > MAXUID) {
416 		nvlist_t *nvl_sid;
417 
418 		if (nvlist_alloc(&nvl_sid, NV_UNIQUE_NAME, KM_SLEEP))
419 			return (ENOMEM);
420 
421 		if (kidmap_getsidbyuid(crgetzone(cr), xvattr.xva_vattr.va_uid,
422 		    &domain, &rid) == 0) {
423 			VERIFY(nvlist_add_string(nvl_sid,
424 			    SID_DOMAIN, domain) == 0);
425 			VERIFY(nvlist_add_uint32(nvl_sid, SID_RID, rid) == 0);
426 			VERIFY(nvlist_add_nvlist(nvlp, attr_to_name(F_OWNERSID),
427 			    nvl_sid) == 0);
428 		}
429 		nvlist_free(nvl_sid);
430 	}
431 	if (xvattr.xva_vattr.va_gid > MAXUID) {
432 		nvlist_t *nvl_sid;
433 
434 		if (nvlist_alloc(&nvl_sid, NV_UNIQUE_NAME, KM_SLEEP))
435 			return (ENOMEM);
436 
437 		if (kidmap_getsidbygid(crgetzone(cr), xvattr.xva_vattr.va_gid,
438 		    &domain, &rid) == 0) {
439 			VERIFY(nvlist_add_string(nvl_sid,
440 			    SID_DOMAIN, domain) == 0);
441 			VERIFY(nvlist_add_uint32(nvl_sid, SID_RID, rid) == 0);
442 			VERIFY(nvlist_add_nvlist(nvlp, attr_to_name(F_GROUPSID),
443 			    nvl_sid) == 0);
444 		}
445 		nvlist_free(nvl_sid);
446 	}
447 
448 	return (0);
449 }
450 
451 /*
452  * The size of a sysattr file is the size of the nvlist that will be
453  * returned by xattr_file_read().  A call to xattr_file_write() could
454  * change the size of that nvlist.  That size is not stored persistently
455  * so xattr_fill_nvlist() calls VOP_GETATTR so that it can be calculated.
456  */
457 static int
458 xattr_file_size(vnode_t *vp, xattr_view_t xattr_view, size_t *size,
459     cred_t *cr, caller_context_t *ct)
460 {
461 	nvlist_t *nvl;
462 
463 	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) {
464 		return (ENOMEM);
465 	}
466 
467 	if (xattr_fill_nvlist(vp, xattr_view, nvl, cr, ct)) {
468 		nvlist_free(nvl);
469 		return (EFAULT);
470 	}
471 
472 	VERIFY(nvlist_size(nvl, size, NV_ENCODE_XDR) == 0);
473 	nvlist_free(nvl);
474 	return (0);
475 }
476 
477 /* ARGSUSED */
478 static int
479 xattr_file_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
480     caller_context_t *ct)
481 {
482 	xattr_file_t *np = vp->v_data;
483 	timestruc_t now;
484 	size_t size;
485 	int error;
486 	vnode_t *pvp;
487 	vattr_t pvattr;
488 
489 	vap->va_type = VREG;
490 	vap->va_mode = MAKEIMODE(vap->va_type,
491 	    (np->xattr_view == XATTR_VIEW_READONLY ? 0444 : 0644));
492 	vap->va_nodeid = gfs_file_inode(vp);
493 	vap->va_nlink = 1;
494 	pvp = gfs_file_parent(vp);
495 	(void) memset(&pvattr, 0, sizeof (pvattr));
496 	pvattr.va_mask = AT_CTIME|AT_MTIME;
497 	error = VOP_GETATTR(pvp, &pvattr, flags, cr, ct);
498 	if (error) {
499 		return (error);
500 	}
501 	vap->va_ctime = pvattr.va_ctime;
502 	vap->va_mtime = pvattr.va_mtime;
503 	gethrestime(&now);
504 	vap->va_atime = now;
505 	vap->va_uid = 0;
506 	vap->va_gid = 0;
507 	vap->va_rdev = 0;
508 	vap->va_blksize = DEV_BSIZE;
509 	vap->va_seq = 0;
510 	vap->va_fsid = vp->v_vfsp->vfs_dev;
511 	error = xattr_file_size(vp, np->xattr_view, &size, cr, ct);
512 	vap->va_size = size;
513 	vap->va_nblocks = howmany(vap->va_size, vap->va_blksize);
514 	return (error);
515 }
516 
517 /* ARGSUSED */
518 static int
519 xattr_file_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
520     caller_context_t *ct)
521 {
522 	xattr_file_t *np = vp->v_data;
523 	xattr_view_t xattr_view = np->xattr_view;
524 	char *buf;
525 	size_t filesize;
526 	nvlist_t *nvl;
527 	int error;
528 
529 	/*
530 	 * Validate file offset and fasttrack empty reads
531 	 */
532 	if (uiop->uio_loffset < (offset_t)0)
533 		return (EINVAL);
534 
535 	if (uiop->uio_resid == 0)
536 		return (0);
537 
538 	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP))
539 		return (ENOMEM);
540 
541 	if (xattr_fill_nvlist(vp, xattr_view, nvl, cr, ct)) {
542 		nvlist_free(nvl);
543 		return (EFAULT);
544 	}
545 
546 	VERIFY(nvlist_size(nvl, &filesize, NV_ENCODE_XDR) == 0);
547 
548 	if (uiop->uio_loffset >= filesize) {
549 		nvlist_free(nvl);
550 		return (0);
551 	}
552 
553 	buf = kmem_alloc(filesize, KM_SLEEP);
554 	VERIFY(nvlist_pack(nvl, &buf, &filesize, NV_ENCODE_XDR,
555 	    KM_SLEEP) == 0);
556 
557 	error = uiomove((caddr_t)buf, filesize, UIO_READ, uiop);
558 	kmem_free(buf, filesize);
559 	nvlist_free(nvl);
560 	return (error);
561 }
562 
563 /* ARGSUSED */
564 static int
565 xattr_file_write(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
566     caller_context_t *ct)
567 {
568 	int error = 0;
569 	char *buf;
570 	char *domain;
571 	uint32_t rid;
572 	ssize_t size = uiop->uio_resid;
573 	nvlist_t *nvp;
574 	nvpair_t *pair = NULL;
575 	vnode_t *ppvp;
576 	xvattr_t xvattr;
577 	xoptattr_t *xoap = NULL;	/* Pointer to optional attributes */
578 
579 	if (vfs_has_feature(vp->v_vfsp, VFSFT_XVATTR) == 0)
580 		return (EINVAL);
581 
582 	/*
583 	 * Validate file offset and size.
584 	 */
585 	if (uiop->uio_loffset < (offset_t)0)
586 		return (EINVAL);
587 
588 	if (size == 0)
589 		return (EINVAL);
590 
591 	xva_init(&xvattr);
592 
593 	if ((xoap = xva_getxoptattr(&xvattr)) == NULL) {
594 		return (EINVAL);
595 	}
596 
597 	/*
598 	 * Copy and unpack the nvlist
599 	 */
600 	buf = kmem_alloc(size, KM_SLEEP);
601 	if (uiomove((caddr_t)buf, size, UIO_WRITE, uiop)) {
602 		return (EFAULT);
603 	}
604 
605 	if (nvlist_unpack(buf, size, &nvp, KM_SLEEP) != 0) {
606 		kmem_free(buf, size);
607 		uiop->uio_resid = size;
608 		return (EINVAL);
609 	}
610 	kmem_free(buf, size);
611 
612 	/*
613 	 * Fasttrack empty writes (nvlist with no nvpairs)
614 	 */
615 	if (nvlist_next_nvpair(nvp, NULL) == 0)
616 		return (0);
617 
618 	ppvp = gfs_file_parent(gfs_file_parent(vp));
619 
620 	while (pair = nvlist_next_nvpair(nvp, pair)) {
621 		data_type_t type;
622 		f_attr_t attr;
623 		boolean_t value;
624 		uint64_t *time, *times;
625 		uint_t elem, nelems;
626 		nvlist_t *nvp_sid;
627 		uint8_t *scanstamp;
628 
629 		/*
630 		 * Validate the name and type of each attribute.
631 		 * Log any unknown names and continue.  This will
632 		 * help if additional attributes are added later.
633 		 */
634 		type = nvpair_type(pair);
635 		if ((attr = name_to_attr(nvpair_name(pair))) == F_ATTR_INVAL) {
636 			cmn_err(CE_WARN, "Unknown attribute %s",
637 			    nvpair_name(pair));
638 			continue;
639 		}
640 
641 		/*
642 		 * Verify nvlist type matches required type and view is OK
643 		 */
644 
645 		if (type != attr_to_data_type(attr) ||
646 		    (attr_to_xattr_view(attr) == XATTR_VIEW_READONLY)) {
647 			nvlist_free(nvp);
648 			return (EINVAL);
649 		}
650 
651 		/*
652 		 * For OWNERSID/GROUPSID make sure the target
653 		 * file system support ephemeral ID's
654 		 */
655 		if ((attr == F_OWNERSID || attr == F_GROUPSID) &&
656 		    (!(vp->v_vfsp->vfs_flag & VFS_XID))) {
657 			nvlist_free(nvp);
658 			return (EINVAL);
659 		}
660 
661 		/*
662 		 * Retrieve data from nvpair
663 		 */
664 		switch (type) {
665 		case DATA_TYPE_BOOLEAN_VALUE:
666 			if (nvpair_value_boolean_value(pair, &value)) {
667 				nvlist_free(nvp);
668 				return (EINVAL);
669 			}
670 			break;
671 		case DATA_TYPE_UINT64_ARRAY:
672 			if (nvpair_value_uint64_array(pair, &times, &nelems)) {
673 				nvlist_free(nvp);
674 				return (EINVAL);
675 			}
676 			break;
677 		case DATA_TYPE_NVLIST:
678 			if (nvpair_value_nvlist(pair, &nvp_sid)) {
679 				nvlist_free(nvp);
680 				return (EINVAL);
681 			}
682 			break;
683 		case DATA_TYPE_UINT8_ARRAY:
684 			if (nvpair_value_uint8_array(pair,
685 			    &scanstamp, &nelems)) {
686 				nvlist_free(nvp);
687 				return (EINVAL);
688 			}
689 			break;
690 		default:
691 			nvlist_free(nvp);
692 			return (EINVAL);
693 		}
694 
695 		switch (attr) {
696 		/*
697 		 * If we have several similar optional attributes to
698 		 * process then we should do it all together here so that
699 		 * xoap and the requested bitmap can be set in one place.
700 		 */
701 		case F_READONLY:
702 			XVA_SET_REQ(&xvattr, XAT_READONLY);
703 			xoap->xoa_readonly = value;
704 			break;
705 		case F_HIDDEN:
706 			XVA_SET_REQ(&xvattr, XAT_HIDDEN);
707 			xoap->xoa_hidden = value;
708 			break;
709 		case F_SYSTEM:
710 			XVA_SET_REQ(&xvattr, XAT_SYSTEM);
711 			xoap->xoa_system = value;
712 			break;
713 		case F_ARCHIVE:
714 			XVA_SET_REQ(&xvattr, XAT_ARCHIVE);
715 			xoap->xoa_archive = value;
716 			break;
717 		case F_IMMUTABLE:
718 			XVA_SET_REQ(&xvattr, XAT_IMMUTABLE);
719 			xoap->xoa_immutable = value;
720 			break;
721 		case F_NOUNLINK:
722 			XVA_SET_REQ(&xvattr, XAT_NOUNLINK);
723 			xoap->xoa_nounlink = value;
724 			break;
725 		case F_APPENDONLY:
726 			XVA_SET_REQ(&xvattr, XAT_APPENDONLY);
727 			xoap->xoa_appendonly = value;
728 			break;
729 		case F_NODUMP:
730 			XVA_SET_REQ(&xvattr, XAT_NODUMP);
731 			xoap->xoa_nodump = value;
732 			break;
733 		case F_AV_QUARANTINED:
734 			XVA_SET_REQ(&xvattr, XAT_AV_QUARANTINED);
735 			xoap->xoa_av_quarantined = value;
736 			break;
737 		case F_AV_MODIFIED:
738 			XVA_SET_REQ(&xvattr, XAT_AV_MODIFIED);
739 			xoap->xoa_av_modified = value;
740 			break;
741 		case F_CRTIME:
742 			XVA_SET_REQ(&xvattr, XAT_CREATETIME);
743 			time = (uint64_t *)&(xoap->xoa_createtime);
744 			for (elem = 0; elem < nelems; elem++)
745 				*time++ = times[elem];
746 			break;
747 		case F_OWNERSID:
748 		case F_GROUPSID:
749 			if (nvlist_lookup_string(nvp_sid, SID_DOMAIN,
750 			    &domain) || nvlist_lookup_uint32(nvp_sid, SID_RID,
751 			    &rid)) {
752 				nvlist_free(nvp);
753 				return (EINVAL);
754 			}
755 
756 			/*
757 			 * Now map domain+rid to ephemeral id's
758 			 *
759 			 * If mapping fails, then the uid/gid will
760 			 * be set to UID_NOBODY by Winchester.
761 			 */
762 
763 			if (attr == F_OWNERSID) {
764 				(void) kidmap_getuidbysid(crgetzone(cr), domain,
765 				    rid, &xvattr.xva_vattr.va_uid);
766 				xvattr.xva_vattr.va_mask |= AT_UID;
767 			} else {
768 				(void) kidmap_getgidbysid(crgetzone(cr), domain,
769 				    rid, &xvattr.xva_vattr.va_gid);
770 				xvattr.xva_vattr.va_mask |= AT_GID;
771 			}
772 			break;
773 		case F_AV_SCANSTAMP:
774 			if (ppvp->v_type == VREG) {
775 				XVA_SET_REQ(&xvattr, XAT_AV_SCANSTAMP);
776 				(void) memcpy(xoap->xoa_av_scanstamp,
777 				    scanstamp, nelems);
778 			} else {
779 				nvlist_free(nvp);
780 				return (EINVAL);
781 			}
782 			break;
783 		case F_REPARSE:
784 			XVA_SET_REQ(&xvattr, XAT_REPARSE);
785 			xoap->xoa_reparse = value;
786 			break;
787 		case F_OFFLINE:
788 			XVA_SET_REQ(&xvattr, XAT_OFFLINE);
789 			xoap->xoa_offline = value;
790 			break;
791 		case F_SPARSE:
792 			XVA_SET_REQ(&xvattr, XAT_SPARSE);
793 			xoap->xoa_sparse = value;
794 			break;
795 		default:
796 			break;
797 		}
798 	}
799 
800 	ppvp = gfs_file_parent(gfs_file_parent(vp));
801 	error = VOP_SETATTR(ppvp, &xvattr.xva_vattr, 0, cr, ct);
802 	if (error)
803 		uiop->uio_resid = size;
804 
805 	nvlist_free(nvp);
806 	return (error);
807 }
808 
809 static int
810 xattr_file_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
811     caller_context_t *ct)
812 {
813 	switch (cmd) {
814 	case _PC_XATTR_EXISTS:
815 	case _PC_SATTR_ENABLED:
816 	case _PC_SATTR_EXISTS:
817 		*valp = 0;
818 		return (0);
819 	default:
820 		return (fs_pathconf(vp, cmd, valp, cr, ct));
821 	}
822 }
823 
824 vnodeops_t *xattr_file_ops;
825 
826 static const fs_operation_def_t xattr_file_tops[] = {
827 	{ VOPNAME_OPEN,		{ .vop_open = xattr_file_open }		},
828 	{ VOPNAME_CLOSE,	{ .vop_close = xattr_file_close }	},
829 	{ VOPNAME_READ,		{ .vop_read = xattr_file_read }		},
830 	{ VOPNAME_WRITE,	{ .vop_write = xattr_file_write }	},
831 	{ VOPNAME_IOCTL,	{ .error = fs_ioctl }			},
832 	{ VOPNAME_GETATTR,	{ .vop_getattr = xattr_file_getattr }	},
833 	{ VOPNAME_ACCESS,	{ .vop_access = xattr_file_access }	},
834 	{ VOPNAME_READDIR,	{ .error = fs_notdir }			},
835 	{ VOPNAME_SEEK,		{ .vop_seek = fs_seek }			},
836 	{ VOPNAME_INACTIVE,	{ .vop_inactive = gfs_vop_inactive }	},
837 	{ VOPNAME_FID,		{ .vop_fid = xattr_common_fid }		},
838 	{ VOPNAME_PATHCONF,	{ .vop_pathconf = xattr_file_pathconf }	},
839 	{ VOPNAME_PUTPAGE,	{ .error = fs_putpage }			},
840 	{ VOPNAME_FSYNC,	{ .error = fs_fsync }			},
841 	{ NULL }
842 };
843 
844 vnode_t *
845 xattr_mkfile(vnode_t *pvp, xattr_view_t xattr_view)
846 {
847 	vnode_t *vp;
848 	xattr_file_t *np;
849 
850 	vp = gfs_file_create(sizeof (xattr_file_t), pvp, xattr_file_ops);
851 	np = vp->v_data;
852 	np->xattr_view = xattr_view;
853 	vp->v_flag |= V_SYSATTR;
854 	return (vp);
855 }
856 
857 vnode_t *
858 xattr_mkfile_ro(vnode_t *pvp)
859 {
860 	return (xattr_mkfile(pvp, XATTR_VIEW_READONLY));
861 }
862 
863 vnode_t *
864 xattr_mkfile_rw(vnode_t *pvp)
865 {
866 	return (xattr_mkfile(pvp, XATTR_VIEW_READWRITE));
867 }
868 
869 vnodeops_t *xattr_dir_ops;
870 
871 static gfs_dirent_t xattr_dirents[] = {
872 	{ VIEW_READONLY, xattr_mkfile_ro, GFS_CACHE_VNODE, },
873 	{ VIEW_READWRITE, xattr_mkfile_rw, GFS_CACHE_VNODE, },
874 	{ NULL },
875 };
876 
877 #define	XATTRDIR_NENTS	((sizeof (xattr_dirents) / sizeof (gfs_dirent_t)) - 1)
878 
879 static int
880 is_sattr_name(char *s)
881 {
882 	int i;
883 
884 	for (i = 0; i < XATTRDIR_NENTS; ++i) {
885 		if (strcmp(s, xattr_dirents[i].gfse_name) == 0) {
886 			return (1);
887 		}
888 	}
889 	return (0);
890 }
891 
892 /*
893  * Given the name of an extended attribute file, determine if there is a
894  * normalization conflict with a sysattr view name.
895  */
896 int
897 xattr_sysattr_casechk(char *s)
898 {
899 	int i;
900 
901 	for (i = 0; i < XATTRDIR_NENTS; ++i) {
902 		if (strcasecmp(s, xattr_dirents[i].gfse_name) == 0)
903 			return (1);
904 	}
905 	return (0);
906 }
907 
908 static int
909 xattr_copy(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm,
910     cred_t *cr, caller_context_t *ct)
911 {
912 	xvattr_t xvattr;
913 	vnode_t *pdvp;
914 	int error;
915 
916 	/*
917 	 * Only copy system attrs if the views are the same
918 	 */
919 	if (strcmp(snm, tnm) != 0)
920 		return (EINVAL);
921 
922 	xva_init(&xvattr);
923 
924 	XVA_SET_REQ(&xvattr, XAT_SYSTEM);
925 	XVA_SET_REQ(&xvattr, XAT_READONLY);
926 	XVA_SET_REQ(&xvattr, XAT_HIDDEN);
927 	XVA_SET_REQ(&xvattr, XAT_ARCHIVE);
928 	XVA_SET_REQ(&xvattr, XAT_APPENDONLY);
929 	XVA_SET_REQ(&xvattr, XAT_NOUNLINK);
930 	XVA_SET_REQ(&xvattr, XAT_IMMUTABLE);
931 	XVA_SET_REQ(&xvattr, XAT_NODUMP);
932 	XVA_SET_REQ(&xvattr, XAT_AV_MODIFIED);
933 	XVA_SET_REQ(&xvattr, XAT_AV_QUARANTINED);
934 	XVA_SET_REQ(&xvattr, XAT_CREATETIME);
935 	XVA_SET_REQ(&xvattr, XAT_REPARSE);
936 	XVA_SET_REQ(&xvattr, XAT_OFFLINE);
937 	XVA_SET_REQ(&xvattr, XAT_SPARSE);
938 
939 	pdvp = gfs_file_parent(sdvp);
940 	error = VOP_GETATTR(pdvp, &xvattr.xva_vattr, 0, cr, ct);
941 	if (error)
942 		return (error);
943 
944 	pdvp = gfs_file_parent(tdvp);
945 	error = VOP_SETATTR(pdvp, &xvattr.xva_vattr, 0, cr, ct);
946 	return (error);
947 }
948 
949 /*
950  * Get the "real" XATTR directory associtated with the GFS XATTR directory.
951  * Note: This does NOT take any additional hold on the returned real_vp,
952  * because when this lookup succeeds we save the result in xattr_realvp
953  * and keep that hold until the GFS XATTR directory goes inactive.
954  */
955 static int
956 xattr_dir_realdir(vnode_t *gfs_dvp, vnode_t **ret_vpp, int flags,
957     cred_t *cr, caller_context_t *ct)
958 {
959 	struct pathname pn;
960 	char *nm = "";
961 	xattr_dir_t *xattr_dir;
962 	vnode_t *realvp;
963 	int error;
964 
965 	*ret_vpp = NULL;
966 
967 	/*
968 	 * Usually, we've already found the underlying XATTR directory
969 	 * during some previous lookup and stored it in xattr_realvp.
970 	 */
971 	mutex_enter(&gfs_dvp->v_lock);
972 	xattr_dir = gfs_dvp->v_data;
973 	realvp = xattr_dir->xattr_realvp;
974 	mutex_exit(&gfs_dvp->v_lock);
975 	if (realvp != NULL) {
976 		*ret_vpp = realvp;
977 		return (0);
978 	}
979 
980 	/*
981 	 * Lookup the XATTR dir in the underlying FS, relative to our
982 	 * "parent", which is the real object for which this GFS XATTR
983 	 * directory was created.  Set the LOOKUP_HAVE_SYSATTR_DIR flag
984 	 * so that we don't get into an infinite loop with fop_lookup
985 	 * calling back to xattr_dir_lookup.
986 	 */
987 	error = pn_get(nm, UIO_SYSSPACE, &pn);
988 	if (error != 0)
989 		return (error);
990 	error = VOP_LOOKUP(gfs_file_parent(gfs_dvp), nm, &realvp, &pn,
991 	    flags | LOOKUP_HAVE_SYSATTR_DIR, rootvp, cr, ct, NULL, NULL);
992 	pn_free(&pn);
993 	if (error != 0)
994 		return (error);
995 
996 	/*
997 	 * Have the real XATTR directory.  Save it -- but first
998 	 * check whether we lost a race doing the lookup.
999 	 */
1000 	mutex_enter(&gfs_dvp->v_lock);
1001 	xattr_dir = gfs_dvp->v_data;
1002 	if (xattr_dir->xattr_realvp == NULL) {
1003 		/*
1004 		 * Note that the hold taken by the VOP_LOOKUP above is
1005 		 * retained from here until xattr_dir_inactive.
1006 		 */
1007 		xattr_dir->xattr_realvp = realvp;
1008 	} else {
1009 		/* We lost the race. */
1010 		VN_RELE(realvp);
1011 		realvp = xattr_dir->xattr_realvp;
1012 	}
1013 	mutex_exit(&gfs_dvp->v_lock);
1014 
1015 	*ret_vpp = realvp;
1016 	return (0);
1017 }
1018 
1019 /* ARGSUSED */
1020 static int
1021 xattr_dir_open(vnode_t **vpp, int flags, cred_t *cr, caller_context_t *ct)
1022 {
1023 	vnode_t *realvp;
1024 	int error;
1025 
1026 	if (flags & FWRITE) {
1027 		return (EACCES);
1028 	}
1029 
1030 	/*
1031 	 * If there is a real extended attribute directory,
1032 	 * let the underlying FS see the VOP_OPEN call;
1033 	 * otherwise just return zero.
1034 	 */
1035 	error = xattr_dir_realdir(*vpp, &realvp, LOOKUP_XATTR, cr, ct);
1036 	if (error == 0) {
1037 		error = VOP_OPEN(&realvp, flags, cr, ct);
1038 	} else {
1039 		error = 0;
1040 	}
1041 
1042 	return (error);
1043 }
1044 
1045 /* ARGSUSED */
1046 static int
1047 xattr_dir_close(vnode_t *vp, int flags, int count, offset_t off, cred_t *cr,
1048     caller_context_t *ct)
1049 {
1050 	vnode_t *realvp;
1051 	int error;
1052 
1053 	/*
1054 	 * If there is a real extended attribute directory,
1055 	 * let the underlying FS see the VOP_CLOSE call;
1056 	 * otherwise just return zero.
1057 	 */
1058 	error = xattr_dir_realdir(vp, &realvp, LOOKUP_XATTR, cr, ct);
1059 	if (error == 0) {
1060 		error = VOP_CLOSE(realvp, flags, count, off, cr, ct);
1061 	} else {
1062 		error = 0;
1063 	}
1064 
1065 	return (error);
1066 }
1067 
1068 /*
1069  * Retrieve the attributes on an xattr directory.  If there is a "real"
1070  * xattr directory, use that.  Otherwise, get the attributes (represented
1071  * by PARENT_ATTRMASK) from the "parent" node and fill in the rest.  Note
1072  * that VOP_GETATTR() could turn off bits in the va_mask.
1073  */
1074 
1075 #define	PARENT_ATTRMASK	(AT_UID|AT_GID|AT_RDEV|AT_CTIME|AT_MTIME)
1076 
1077 /* ARGSUSED */
1078 static int
1079 xattr_dir_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
1080     caller_context_t *ct)
1081 {
1082 	timestruc_t now;
1083 	vnode_t *pvp;
1084 	int error;
1085 
1086 	error = xattr_dir_realdir(vp, &pvp, LOOKUP_XATTR, cr, ct);
1087 	if (error == 0) {
1088 		error = VOP_GETATTR(pvp, vap, 0, cr, ct);
1089 		if (error) {
1090 			return (error);
1091 		}
1092 		vap->va_nlink += XATTRDIR_NENTS;
1093 		vap->va_size += XATTRDIR_NENTS;
1094 		return (0);
1095 	}
1096 
1097 	/*
1098 	 * There is no real xattr directory.  Cobble together
1099 	 * an entry using info from the parent object (if needed)
1100 	 * plus information common to all xattrs.
1101 	 */
1102 	if (vap->va_mask & PARENT_ATTRMASK) {
1103 		vattr_t pvattr;
1104 		uint_t  off_bits;
1105 
1106 		pvp = gfs_file_parent(vp);
1107 		(void) memset(&pvattr, 0, sizeof (pvattr));
1108 		pvattr.va_mask = PARENT_ATTRMASK;
1109 		error = VOP_GETATTR(pvp, &pvattr, 0, cr, ct);
1110 		if (error) {
1111 			return (error);
1112 		}
1113 
1114 		/*
1115 		 * VOP_GETATTR() might have turned off some bits in
1116 		 * pvattr.va_mask.  This means that the underlying
1117 		 * file system couldn't process those attributes.
1118 		 * We need to make sure those bits get turned off
1119 		 * in the vattr_t structure that gets passed back
1120 		 * to the caller.  Figure out which bits were turned
1121 		 * off (if any) then set pvattr.va_mask before it
1122 		 * gets copied to the vattr_t that the caller sees.
1123 		 */
1124 		off_bits = (pvattr.va_mask ^ PARENT_ATTRMASK) & PARENT_ATTRMASK;
1125 		pvattr.va_mask = vap->va_mask & ~off_bits;
1126 		*vap = pvattr;
1127 	}
1128 
1129 	vap->va_type = VDIR;
1130 	vap->va_mode = MAKEIMODE(vap->va_type, S_ISVTX | 0777);
1131 	vap->va_fsid = vp->v_vfsp->vfs_dev;
1132 	vap->va_nodeid = gfs_file_inode(vp);
1133 	vap->va_nlink = XATTRDIR_NENTS+2;
1134 	vap->va_size = vap->va_nlink;
1135 	gethrestime(&now);
1136 	vap->va_atime = now;
1137 	vap->va_blksize = 0;
1138 	vap->va_nblocks = 0;
1139 	vap->va_seq = 0;
1140 	return (0);
1141 }
1142 
1143 static int
1144 xattr_dir_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
1145     caller_context_t *ct)
1146 {
1147 	vnode_t *realvp;
1148 	int error;
1149 
1150 	/*
1151 	 * If there is a real xattr directory, do the setattr there.
1152 	 * Otherwise, just return success.  The GFS directory is transient,
1153 	 * and any setattr changes can disappear anyway.
1154 	 */
1155 	error = xattr_dir_realdir(vp, &realvp, LOOKUP_XATTR, cr, ct);
1156 	if (error == 0) {
1157 		error = VOP_SETATTR(realvp, vap, flags, cr, ct);
1158 	}
1159 	if (error == ENOENT) {
1160 		error = 0;
1161 	}
1162 	return (error);
1163 }
1164 
1165 /* ARGSUSED */
1166 static int
1167 xattr_dir_access(vnode_t *vp, int mode, int flags, cred_t *cr,
1168     caller_context_t *ct)
1169 {
1170 	int error;
1171 	vnode_t *realvp = NULL;
1172 
1173 	if (mode & VWRITE) {
1174 		return (EACCES);
1175 	}
1176 
1177 	/*
1178 	 * If there is a real xattr directory, check access there;
1179 	 * otherwise just return success.
1180 	 */
1181 	error = xattr_dir_realdir(vp, &realvp, LOOKUP_XATTR, cr, ct);
1182 	if (error == 0) {
1183 		error = VOP_ACCESS(realvp, mode, flags, cr, ct);
1184 	} else {
1185 		error = 0;
1186 	}
1187 
1188 	return (error);
1189 }
1190 
1191 static int
1192 xattr_dir_create(vnode_t *dvp, char *name, vattr_t *vap, vcexcl_t excl,
1193     int mode, vnode_t **vpp, cred_t *cr, int flag, caller_context_t *ct,
1194     vsecattr_t *vsecp)
1195 {
1196 	vnode_t *pvp;
1197 	int error;
1198 
1199 	*vpp = NULL;
1200 
1201 	/*
1202 	 * Don't allow creation of extended attributes with sysattr names.
1203 	 */
1204 	if (is_sattr_name(name)) {
1205 		return (gfs_dir_lookup(dvp, name, vpp, cr, 0, NULL, NULL));
1206 	}
1207 
1208 	error = xattr_dir_realdir(dvp, &pvp, LOOKUP_XATTR|CREATE_XATTR_DIR,
1209 	    cr, ct);
1210 	if (error == 0) {
1211 		error = VOP_CREATE(pvp, name, vap, excl, mode, vpp, cr, flag,
1212 		    ct, vsecp);
1213 	}
1214 	return (error);
1215 }
1216 
1217 static int
1218 xattr_dir_remove(vnode_t *dvp, char *name, cred_t *cr, caller_context_t *ct,
1219     int flags)
1220 {
1221 	vnode_t *pvp;
1222 	int error;
1223 
1224 	if (is_sattr_name(name)) {
1225 		return (EACCES);
1226 	}
1227 
1228 	error = xattr_dir_realdir(dvp, &pvp, LOOKUP_XATTR, cr, ct);
1229 	if (error == 0) {
1230 		error = VOP_REMOVE(pvp, name, cr, ct, flags);
1231 	}
1232 	return (error);
1233 }
1234 
1235 static int
1236 xattr_dir_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr,
1237     caller_context_t *ct, int flags)
1238 {
1239 	vnode_t *pvp;
1240 	int error;
1241 
1242 	if (svp->v_flag & V_SYSATTR) {
1243 		return (EINVAL);
1244 	}
1245 
1246 	error = xattr_dir_realdir(tdvp, &pvp, LOOKUP_XATTR, cr, ct);
1247 	if (error == 0) {
1248 		error = VOP_LINK(pvp, svp, name, cr, ct, flags);
1249 	}
1250 	return (error);
1251 }
1252 
1253 static int
1254 xattr_dir_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm,
1255     cred_t *cr, caller_context_t *ct, int flags)
1256 {
1257 	vnode_t *spvp, *tpvp;
1258 	int error;
1259 
1260 	if (is_sattr_name(snm) || is_sattr_name(tnm))
1261 		return (xattr_copy(sdvp, snm, tdvp, tnm, cr, ct));
1262 	/*
1263 	 * We know that sdvp is a GFS dir, or we wouldn't be here.
1264 	 * Get the real unnamed directory.
1265 	 */
1266 	error = xattr_dir_realdir(sdvp, &spvp, LOOKUP_XATTR, cr, ct);
1267 	if (error) {
1268 		return (error);
1269 	}
1270 
1271 	if (sdvp == tdvp) {
1272 		/*
1273 		 * If the source and target are the same GFS directory, the
1274 		 * underlying unnamed source and target dir will be the same.
1275 		 */
1276 		tpvp = spvp;
1277 	} else if (tdvp->v_flag & V_SYSATTR) {
1278 		/*
1279 		 * If the target dir is a different GFS directory,
1280 		 * find its underlying unnamed dir.
1281 		 */
1282 		error = xattr_dir_realdir(tdvp, &tpvp, LOOKUP_XATTR, cr, ct);
1283 		if (error) {
1284 			return (error);
1285 		}
1286 	} else {
1287 		/*
1288 		 * Target dir is outside of GFS, pass it on through.
1289 		 */
1290 		tpvp = tdvp;
1291 	}
1292 
1293 	error = VOP_RENAME(spvp, snm, tpvp, tnm, cr, ct, flags);
1294 
1295 	return (error);
1296 }
1297 
1298 /*
1299  * readdir_xattr_casecmp: given a system attribute name, see if there
1300  * is a real xattr with the same normalized name.
1301  */
1302 static int
1303 readdir_xattr_casecmp(vnode_t *dvp, char *nm, cred_t *cr, caller_context_t *ct,
1304     int *eflags)
1305 {
1306 	int error;
1307 	vnode_t *vp;
1308 	struct pathname pn;
1309 
1310 	*eflags = 0;
1311 
1312 	error = pn_get(nm, UIO_SYSSPACE, &pn);
1313 	if (error == 0) {
1314 		error = VOP_LOOKUP(dvp, nm, &vp, &pn,
1315 		    FIGNORECASE, rootvp, cr, ct, NULL, NULL);
1316 		if (error == 0) {
1317 			*eflags = ED_CASE_CONFLICT;
1318 			VN_RELE(vp);
1319 		} else if (error == ENOENT) {
1320 			error = 0;
1321 		}
1322 		pn_free(&pn);
1323 	}
1324 
1325 	return (error);
1326 }
1327 
1328 static int
1329 xattr_dir_readdir(vnode_t *dvp, uio_t *uiop, cred_t *cr, int *eofp,
1330     caller_context_t *ct, int flags)
1331 {
1332 	vnode_t *pvp;
1333 	int error;
1334 	int local_eof;
1335 	int reset_off = 0;
1336 	int has_xattrs = 0;
1337 
1338 	if (eofp == NULL) {
1339 		eofp = &local_eof;
1340 	}
1341 	*eofp = 0;
1342 
1343 	/*
1344 	 * See if there is a real extended attribute directory.
1345 	 */
1346 	error = xattr_dir_realdir(dvp, &pvp, LOOKUP_XATTR, cr, ct);
1347 	if (error == 0) {
1348 		has_xattrs = 1;
1349 	}
1350 
1351 	/*
1352 	 * Start by reading up the static entries.
1353 	 */
1354 	if (uiop->uio_loffset == 0) {
1355 		ino64_t pino, ino;
1356 		offset_t off;
1357 		gfs_dir_t *dp = dvp->v_data;
1358 		gfs_readdir_state_t gstate;
1359 
1360 		if (has_xattrs) {
1361 			/*
1362 			 * If there is a real xattr dir, skip . and ..
1363 			 * in the GFS dir.  We'll pick them up below
1364 			 * when we call into the underlying fs.
1365 			 */
1366 			uiop->uio_loffset = GFS_STATIC_ENTRY_OFFSET;
1367 		}
1368 		error = gfs_get_parent_ino(dvp, cr, ct, &pino, &ino);
1369 		if (error == 0) {
1370 			error = gfs_readdir_init(&gstate, dp->gfsd_maxlen, 1,
1371 			    uiop, pino, ino, flags);
1372 		}
1373 		if (error) {
1374 			return (error);
1375 		}
1376 
1377 		while ((error = gfs_readdir_pred(&gstate, uiop, &off)) == 0 &&
1378 		    !*eofp) {
1379 			if (off >= 0 && off < dp->gfsd_nstatic) {
1380 				int eflags;
1381 
1382 				/*
1383 				 * Check to see if this sysattr set name has a
1384 				 * case-insensitive conflict with a real xattr
1385 				 * name.
1386 				 */
1387 				eflags = 0;
1388 				if ((flags & V_RDDIR_ENTFLAGS) && has_xattrs) {
1389 					error = readdir_xattr_casecmp(pvp,
1390 					    dp->gfsd_static[off].gfse_name,
1391 					    cr, ct, &eflags);
1392 					if (error)
1393 						break;
1394 				}
1395 				ino = dp->gfsd_inode(dvp, off);
1396 
1397 				error = gfs_readdir_emit(&gstate, uiop, off,
1398 				    ino, dp->gfsd_static[off].gfse_name,
1399 				    eflags);
1400 				if (error)
1401 					break;
1402 			} else {
1403 				*eofp = 1;
1404 			}
1405 		}
1406 
1407 		error = gfs_readdir_fini(&gstate, error, eofp, *eofp);
1408 		if (error) {
1409 			return (error);
1410 		}
1411 
1412 		/*
1413 		 * We must read all of the static entries in the first
1414 		 * call.  Otherwise we won't know if uio_loffset in a
1415 		 * subsequent call refers to the static entries or to those
1416 		 * in an underlying fs.
1417 		 */
1418 		if (*eofp == 0)
1419 			return (EINVAL);
1420 		reset_off = 1;
1421 	}
1422 
1423 	if (!has_xattrs) {
1424 		*eofp = 1;
1425 		return (0);
1426 	}
1427 
1428 	*eofp = 0;
1429 	if (reset_off) {
1430 		uiop->uio_loffset = 0;
1431 	}
1432 	(void) VOP_RWLOCK(pvp, V_WRITELOCK_FALSE, NULL);
1433 	error = VOP_READDIR(pvp, uiop, cr, eofp, ct, flags);
1434 	VOP_RWUNLOCK(pvp, V_WRITELOCK_FALSE, NULL);
1435 
1436 	return (error);
1437 }
1438 
1439 /*
1440  * Last reference on a (GFS) XATTR directory.
1441  *
1442  * If there's a real XATTR directory in the underlying FS, we will have
1443  * taken a hold on that directory in xattr_dir_realdir.  Now that the
1444  * last hold on the GFS directory is gone, it's time to release that
1445  * hold on the underlying XATTR directory.
1446  */
1447 /* ARGSUSED */
1448 static void
1449 xattr_dir_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
1450 {
1451 	gfs_file_t *fp;
1452 	xattr_dir_t *xattr_dir;
1453 	vnode_t *real_vp = NULL;
1454 
1455 	mutex_enter(&vp->v_lock);
1456 	xattr_dir = vp->v_data;
1457 	if (xattr_dir->xattr_realvp) {
1458 		real_vp = xattr_dir->xattr_realvp;
1459 		xattr_dir->xattr_realvp = NULL;
1460 	}
1461 	mutex_exit(&vp->v_lock);
1462 	if (real_vp != NULL)
1463 		VN_RELE(real_vp);
1464 	fp = gfs_dir_inactive(vp);
1465 	if (fp != NULL) {
1466 		kmem_free(fp, fp->gfs_size);
1467 	}
1468 }
1469 
1470 static int
1471 xattr_dir_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
1472     caller_context_t *ct)
1473 {
1474 	switch (cmd) {
1475 	case _PC_XATTR_EXISTS:
1476 	case _PC_SATTR_ENABLED:
1477 	case _PC_SATTR_EXISTS:
1478 		*valp = 0;
1479 		return (0);
1480 	default:
1481 		return (fs_pathconf(vp, cmd, valp, cr, ct));
1482 	}
1483 }
1484 
1485 /* ARGSUSED */
1486 static int
1487 xattr_dir_realvp(vnode_t *vp, vnode_t **realvp, caller_context_t *ct)
1488 {
1489 	int error;
1490 
1491 	error = xattr_dir_realdir(vp, realvp, LOOKUP_XATTR, kcred, NULL);
1492 	return (error);
1493 
1494 }
1495 
1496 static const fs_operation_def_t xattr_dir_tops[] = {
1497 	{ VOPNAME_OPEN,		{ .vop_open = xattr_dir_open }		},
1498 	{ VOPNAME_CLOSE,	{ .vop_close = xattr_dir_close }	},
1499 	{ VOPNAME_IOCTL,	{ .error = fs_inval }			},
1500 	{ VOPNAME_GETATTR,	{ .vop_getattr = xattr_dir_getattr }	},
1501 	{ VOPNAME_SETATTR,	{ .vop_setattr = xattr_dir_setattr }	},
1502 	{ VOPNAME_ACCESS,	{ .vop_access = xattr_dir_access }	},
1503 	{ VOPNAME_READDIR,	{ .vop_readdir = xattr_dir_readdir }	},
1504 	{ VOPNAME_LOOKUP,	{ .vop_lookup = gfs_vop_lookup }	},
1505 	{ VOPNAME_CREATE,	{ .vop_create = xattr_dir_create }	},
1506 	{ VOPNAME_REMOVE,	{ .vop_remove = xattr_dir_remove }	},
1507 	{ VOPNAME_LINK,		{ .vop_link = xattr_dir_link }		},
1508 	{ VOPNAME_RENAME,	{ .vop_rename = xattr_dir_rename }	},
1509 	{ VOPNAME_MKDIR,	{ .error = fs_inval }			},
1510 	{ VOPNAME_SEEK,		{ .vop_seek = fs_seek }			},
1511 	{ VOPNAME_INACTIVE,	{ .vop_inactive = xattr_dir_inactive }	},
1512 	{ VOPNAME_FID,		{ .vop_fid = xattr_common_fid }		},
1513 	{ VOPNAME_PATHCONF,	{ .vop_pathconf = xattr_dir_pathconf }	},
1514 	{ VOPNAME_REALVP,	{ .vop_realvp = xattr_dir_realvp } },
1515 	{ NULL, NULL }
1516 };
1517 
1518 static gfs_opsvec_t xattr_opsvec[] = {
1519 	{ "xattr dir", xattr_dir_tops, &xattr_dir_ops },
1520 	{ "system attributes", xattr_file_tops, &xattr_file_ops },
1521 	{ NULL, NULL, NULL }
1522 };
1523 
1524 /*
1525  * Callback supporting lookup in a GFS XATTR directory.
1526  */
1527 static int
1528 xattr_lookup_cb(vnode_t *vp, const char *nm, vnode_t **vpp, ino64_t *inop,
1529     cred_t *cr, int flags, int *deflags, pathname_t *rpnp)
1530 {
1531 	vnode_t *pvp;
1532 	struct pathname pn;
1533 	int error;
1534 
1535 	*vpp = NULL;
1536 	*inop = 0;
1537 
1538 	error = xattr_dir_realdir(vp, &pvp, LOOKUP_XATTR, cr, NULL);
1539 
1540 	/*
1541 	 * Return ENOENT for EACCES requests during lookup.  Once an
1542 	 * attribute create is attempted EACCES will be returned.
1543 	 */
1544 	if (error) {
1545 		if (error == EACCES)
1546 			return (ENOENT);
1547 		return (error);
1548 	}
1549 
1550 	error = pn_get((char *)nm, UIO_SYSSPACE, &pn);
1551 	if (error == 0) {
1552 		error = VOP_LOOKUP(pvp, (char *)nm, vpp, &pn, flags, rootvp,
1553 		    cr, NULL, deflags, rpnp);
1554 		pn_free(&pn);
1555 	}
1556 
1557 	return (error);
1558 }
1559 
1560 /* ARGSUSED */
1561 static ino64_t
1562 xattrdir_do_ino(vnode_t *vp, int index)
1563 {
1564 	/*
1565 	 * We use index 0 for the directory fid.  Start
1566 	 * the file numbering at 1.
1567 	 */
1568 	return ((ino64_t)index+1);
1569 }
1570 
1571 void
1572 xattr_init(void)
1573 {
1574 	VERIFY(gfs_make_opsvec(xattr_opsvec) == 0);
1575 }
1576 
1577 /*
1578  * Get the XATTR dir for some file or directory.
1579  * See vnode.c: fop_lookup()
1580  *
1581  * Note this only gets the GFS XATTR directory.  We'll get the
1582  * real XATTR directory later, in xattr_dir_realdir.
1583  */
1584 int
1585 xattr_dir_lookup(vnode_t *dvp, vnode_t **vpp, int flags, cred_t *cr)
1586 {
1587 	int error = 0;
1588 
1589 	*vpp = NULL;
1590 
1591 	if (dvp->v_type != VDIR && dvp->v_type != VREG)
1592 		return (EINVAL);
1593 
1594 	mutex_enter(&dvp->v_lock);
1595 
1596 	/*
1597 	 * If we're already in sysattr space, don't allow creation
1598 	 * of another level of sysattrs.
1599 	 */
1600 	if (dvp->v_flag & V_SYSATTR) {
1601 		mutex_exit(&dvp->v_lock);
1602 		return (EINVAL);
1603 	}
1604 
1605 	if (dvp->v_xattrdir != NULL) {
1606 		*vpp = dvp->v_xattrdir;
1607 		VN_HOLD(*vpp);
1608 	} else {
1609 		ulong_t val;
1610 		int xattrs_allowed = dvp->v_vfsp->vfs_flag & VFS_XATTR;
1611 		int sysattrs_allowed = 1;
1612 
1613 		/*
1614 		 * We have to drop the lock on dvp.  gfs_dir_create will
1615 		 * grab it for a VN_HOLD.
1616 		 */
1617 		mutex_exit(&dvp->v_lock);
1618 
1619 		/*
1620 		 * If dvp allows xattr creation, but not sysattr
1621 		 * creation, return the real xattr dir vp. We can't
1622 		 * use the vfs feature mask here because _PC_SATTR_ENABLED
1623 		 * has vnode-level granularity (e.g. .zfs).
1624 		 */
1625 		error = VOP_PATHCONF(dvp, _PC_SATTR_ENABLED, &val, cr, NULL);
1626 		if (error != 0 || val == 0)
1627 			sysattrs_allowed = 0;
1628 
1629 		if (!xattrs_allowed && !sysattrs_allowed)
1630 			return (EINVAL);
1631 
1632 		if (!sysattrs_allowed) {
1633 			struct pathname pn;
1634 			char *nm = "";
1635 
1636 			error = pn_get(nm, UIO_SYSSPACE, &pn);
1637 			if (error)
1638 				return (error);
1639 			error = VOP_LOOKUP(dvp, nm, vpp, &pn,
1640 			    flags|LOOKUP_HAVE_SYSATTR_DIR, rootvp, cr, NULL,
1641 			    NULL, NULL);
1642 			pn_free(&pn);
1643 			return (error);
1644 		}
1645 
1646 		/*
1647 		 * Note that we act as if we were given CREATE_XATTR_DIR,
1648 		 * but only for creation of the GFS directory.
1649 		 */
1650 		*vpp = gfs_dir_create(
1651 		    sizeof (xattr_dir_t), dvp, xattr_dir_ops, xattr_dirents,
1652 		    xattrdir_do_ino, MAXNAMELEN, NULL, xattr_lookup_cb);
1653 		mutex_enter(&dvp->v_lock);
1654 		if (dvp->v_xattrdir != NULL) {
1655 			/*
1656 			 * We lost the race to create the xattr dir.
1657 			 * Destroy this one, use the winner.  We can't
1658 			 * just call VN_RELE(*vpp), because the vnode
1659 			 * is only partially initialized.
1660 			 */
1661 			gfs_dir_t *dp = (*vpp)->v_data;
1662 
1663 			ASSERT((*vpp)->v_count == 1);
1664 			vn_free(*vpp);
1665 
1666 			mutex_destroy(&dp->gfsd_lock);
1667 			kmem_free(dp->gfsd_static,
1668 			    dp->gfsd_nstatic * sizeof (gfs_dirent_t));
1669 			kmem_free(dp, dp->gfsd_file.gfs_size);
1670 
1671 			/*
1672 			 * There is an implied VN_HOLD(dvp) here.  We should
1673 			 * be doing a VN_RELE(dvp) to clean up the reference
1674 			 * from *vpp, and then a VN_HOLD(dvp) for the new
1675 			 * reference.  Instead, we just leave the count alone.
1676 			 */
1677 
1678 			*vpp = dvp->v_xattrdir;
1679 			VN_HOLD(*vpp);
1680 		} else {
1681 			(*vpp)->v_flag |= (V_XATTRDIR|V_SYSATTR);
1682 			dvp->v_xattrdir = *vpp;
1683 		}
1684 	}
1685 	mutex_exit(&dvp->v_lock);
1686 
1687 	return (error);
1688 }
1689 
1690 int
1691 xattr_dir_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp)
1692 {
1693 	int error;
1694 	vnode_t *pvp, *dvp;
1695 	xattr_fid_t *xfidp;
1696 	struct pathname pn;
1697 	char *nm;
1698 	uint16_t orig_len;
1699 
1700 	*vpp = NULL;
1701 
1702 	if (fidp->fid_len < XATTR_FIDSZ)
1703 		return (EINVAL);
1704 
1705 	xfidp = (xattr_fid_t *)fidp;
1706 	orig_len = fidp->fid_len;
1707 	fidp->fid_len = xfidp->parent_len;
1708 
1709 	error = VFS_VGET(vfsp, &pvp, fidp);
1710 	fidp->fid_len = orig_len;
1711 	if (error)
1712 		return (error);
1713 
1714 	/*
1715 	 * Start by getting the GFS sysattr directory.	We might need
1716 	 * to recreate it during the VOP_LOOKUP.
1717 	 */
1718 	nm = "";
1719 	error = pn_get(nm, UIO_SYSSPACE, &pn);
1720 	if (error) {
1721 		VN_RELE(pvp);
1722 		return (EINVAL);
1723 	}
1724 
1725 	error = VOP_LOOKUP(pvp, nm, &dvp, &pn, LOOKUP_XATTR|CREATE_XATTR_DIR,
1726 	    rootvp, CRED(), NULL, NULL, NULL);
1727 	pn_free(&pn);
1728 	VN_RELE(pvp);
1729 	if (error)
1730 		return (error);
1731 
1732 	if (xfidp->dir_offset == 0) {
1733 		/*
1734 		 * If we were looking for the directory, we're done.
1735 		 */
1736 		*vpp = dvp;
1737 		return (0);
1738 	}
1739 
1740 	if (xfidp->dir_offset > XATTRDIR_NENTS) {
1741 		VN_RELE(dvp);
1742 		return (EINVAL);
1743 	}
1744 
1745 	nm = xattr_dirents[xfidp->dir_offset - 1].gfse_name;
1746 
1747 	error = pn_get(nm, UIO_SYSSPACE, &pn);
1748 	if (error) {
1749 		VN_RELE(dvp);
1750 		return (EINVAL);
1751 	}
1752 
1753 	error = VOP_LOOKUP(dvp, nm, vpp, &pn, 0, rootvp, CRED(), NULL,
1754 	    NULL, NULL);
1755 
1756 	pn_free(&pn);
1757 	VN_RELE(dvp);
1758 
1759 	return (error);
1760 }
1761