xref: /illumos-gate/usr/src/uts/common/fs/xattr.c (revision 16b76d3cb933ff92018a2a75594449010192eacb)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
24  */
25 
26 /*
27  * Big Theory Statement for Extended Attribute (XATTR) directories
28  *
29  * The Solaris VFS layer presents extended file attributes using a special
30  * "XATTR" directory under files or directories that have extended file
31  * attributes.  See fsattr(7) for background.
32  *
33  * This design avoids the need for a separate set of VFS or vnode functions
34  * for operating on XATTR objects.  File system implementations that support
35  * XATTR instantiate a special XATTR directory using this module.
36  * Applications get to the XATTR directory by passing the LOOKUP_XATTR flag
37  * to fop_lookup.  Once the XATTR directory is obtained, all other file
38  * system operations on extended attributes happen via the normal vnode
39  * functions, applied to the XATTR directory or its contents.
40  *
41  * The XATTR directories returned by fop_lookup (with LOOKUP_XATTR) are
42  * implemented differntly, depending on whether the file system supports
43  * "extended attributes" (XATTR), "system attributes" (SYSATTR), or both.
44  *
45  * When SYSATTR=true, XATTR=true:
46  *	The XATTR directory is a "generic file system" (GFS) object
47  *	that adds the special system attribute names (SUNWattr*) to
48  *	the list of XATTR files presented by the underling FS.
49  *	In this case, many operations are "passed through" to the
50  *	lower-level FS.
51  *
52  * When SYSATTR=true, XATTR=false:
53  *	The XATTR directory is a "generic file system" (GFS) object,
54  *	presenting only the system attribute names (SUNWattr*)
55  *	In this case there's no lower-level FS, only the GFS object.
56  *
57  * When SYSATTR=false, XATTR=true:
58  *	The XATTR directory is implemented by the file system code,
59  *	and this module is not involved after xattr_dir_lookup()
60  *	returns the XATTR dir from the underlying file system.
61  *
62  * When SYSATTR=false, XATTR=false:
63  *	xattr_dir_lookup just returns EINVAL
64  *
65  * In the first two cases (where we have system attributes) this module
66  * implements what can be thought of as a "translucent" directory containing
67  * both the system attribute names (SUNWattr*) and whatever XATTR names may
68  * exist in the XATTR directory of the underlying file system, if any.
69  *
70  * This affects operations on the (GFS) XATTR directory as follows:
71  *
72  * readdir:	Merges the SUNWattr* names with any contents from the
73  *		underlying XATTR directory.
74  *
75  * rename:	If "to" or "from" is a SUNWattr name, special handling,
76  *		else pass through to the lower FS.
77  *
78  * link:	If "from" is a SUNWattr name, disallow.
79  *
80  * create:	If a SUNWattr name, disallow, else pass to lower FS.
81  * remove:	(same)
82  *
83  * open,close:	Just pass through to the XATTR dir in the lower FS.
84  *
85  * lookup:	Lookup an XATTR file in either the (GFS) XATTR directory
86  *		or the "real" XATTR directory of the underlying FS.
87  *		Note for file systems the support SYSATTR but not XATTR,
88  *		only the GFS XATTR directory will exist.  When both exist,
89  *		gfs_vop_lookup uses the xattr_lookup_cb callback function
90  *		which passes the lookup call through to the "real" FS.
91  *
92  * Operations on the XATTR _files_ are simpler:
93  *
94  * If the file vnode came from lookup at the GFS level, the file is one of
95  * the special SUNWattr* vnodes, and it's vnode operations (xattr_file_tops)
96  * allow only what's appropriate on these "files".
97  *
98  * If the file vnode came from the underlying FS, all operations on that
99  * object are handled through the vnode operations set by that FS.
100  */
101 
102 #include <sys/param.h>
103 #include <sys/isa_defs.h>
104 #include <sys/types.h>
105 #include <sys/sysmacros.h>
106 #include <sys/cred.h>
107 #include <sys/systm.h>
108 #include <sys/errno.h>
109 #include <sys/fcntl.h>
110 #include <sys/pathname.h>
111 #include <sys/stat.h>
112 #include <sys/vfs.h>
113 #include <sys/acl.h>
114 #include <sys/file.h>
115 #include <sys/sunddi.h>
116 #include <sys/debug.h>
117 #include <sys/cmn_err.h>
118 #include <sys/vnode.h>
119 #include <sys/mode.h>
120 #include <sys/nvpair.h>
121 #include <sys/attr.h>
122 #include <sys/gfs.h>
123 #include <sys/mutex.h>
124 #include <fs/fs_subr.h>
125 #include <sys/kidmap.h>
126 
127 typedef struct {
128 	gfs_file_t	xattr_gfs_private;
129 	xattr_view_t	xattr_view;
130 } xattr_file_t;
131 
132 typedef struct {
133 	gfs_dir_t	xattr_gfs_private;
134 	vnode_t		*xattr_realvp;
135 } xattr_dir_t;
136 
137 /* ARGSUSED */
138 static int
139 xattr_file_open(vnode_t **vpp, int flags, cred_t *cr, caller_context_t *ct)
140 {
141 	xattr_file_t *np = (*vpp)->v_data;
142 
143 	if ((np->xattr_view == XATTR_VIEW_READONLY) && (flags & FWRITE))
144 		return (EACCES);
145 
146 	return (0);
147 }
148 
149 /* ARGSUSED */
150 static int
151 xattr_file_access(vnode_t *vp, int mode, int flags, cred_t *cr,
152     caller_context_t *ct)
153 {
154 	xattr_file_t *np = vp->v_data;
155 
156 	if ((np->xattr_view == XATTR_VIEW_READONLY) && (mode & VWRITE))
157 		return (EACCES);
158 
159 	return (0);
160 }
161 
162 /* ARGSUSED */
163 static int
164 xattr_file_close(vnode_t *vp, int flags, int count, offset_t off,
165     cred_t *cr, caller_context_t *ct)
166 {
167 	cleanlocks(vp, ddi_get_pid(), 0);
168 	cleanshares(vp, ddi_get_pid());
169 	return (0);
170 }
171 
172 static int
173 xattr_common_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct)
174 {
175 	xattr_fid_t	*xfidp;
176 	vnode_t		*pvp, *savevp;
177 	int		error;
178 	uint16_t	orig_len;
179 
180 	if (fidp->fid_len < XATTR_FIDSZ) {
181 		fidp->fid_len = XATTR_FIDSZ;
182 		return (ENOSPC);
183 	}
184 
185 	savevp = pvp = gfs_file_parent(vp);
186 	mutex_enter(&savevp->v_lock);
187 	if (pvp->v_flag & V_XATTRDIR) {
188 		pvp = gfs_file_parent(pvp);
189 	}
190 	mutex_exit(&savevp->v_lock);
191 
192 	xfidp = (xattr_fid_t *)fidp;
193 	orig_len = fidp->fid_len;
194 	fidp->fid_len = sizeof (xfidp->parent_fid);
195 
196 	error = VOP_FID(pvp, fidp, ct);
197 	if (error) {
198 		fidp->fid_len = orig_len;
199 		return (error);
200 	}
201 
202 	xfidp->parent_len = fidp->fid_len;
203 	fidp->fid_len = XATTR_FIDSZ;
204 	xfidp->dir_offset = gfs_file_inode(vp);
205 
206 	return (0);
207 }
208 
209 /* ARGSUSED */
210 static int
211 xattr_fill_nvlist(vnode_t *vp, xattr_view_t xattr_view, nvlist_t *nvlp,
212     cred_t *cr, caller_context_t *ct)
213 {
214 	int error;
215 	f_attr_t attr;
216 	uint64_t fsid;
217 	xvattr_t xvattr;
218 	xoptattr_t *xoap;	/* Pointer to optional attributes */
219 	vnode_t *ppvp;
220 	const char *domain;
221 	uint32_t rid;
222 
223 	xva_init(&xvattr);
224 
225 	if ((xoap = xva_getxoptattr(&xvattr)) == NULL)
226 		return (EINVAL);
227 
228 	/*
229 	 * For detecting ephemeral uid/gid
230 	 */
231 	xvattr.xva_vattr.va_mask |= (AT_UID|AT_GID);
232 
233 	/*
234 	 * We need to access the real fs object.
235 	 * vp points to a GFS file; ppvp points to the real object.
236 	 */
237 	ppvp = gfs_file_parent(gfs_file_parent(vp));
238 
239 	/*
240 	 * Iterate through the attrs associated with this view
241 	 */
242 
243 	for (attr = 0; attr < F_ATTR_ALL; attr++) {
244 		if (xattr_view != attr_to_xattr_view(attr)) {
245 			continue;
246 		}
247 
248 		switch (attr) {
249 		case F_SYSTEM:
250 			XVA_SET_REQ(&xvattr, XAT_SYSTEM);
251 			break;
252 		case F_READONLY:
253 			XVA_SET_REQ(&xvattr, XAT_READONLY);
254 			break;
255 		case F_HIDDEN:
256 			XVA_SET_REQ(&xvattr, XAT_HIDDEN);
257 			break;
258 		case F_ARCHIVE:
259 			XVA_SET_REQ(&xvattr, XAT_ARCHIVE);
260 			break;
261 		case F_IMMUTABLE:
262 			XVA_SET_REQ(&xvattr, XAT_IMMUTABLE);
263 			break;
264 		case F_APPENDONLY:
265 			XVA_SET_REQ(&xvattr, XAT_APPENDONLY);
266 			break;
267 		case F_NOUNLINK:
268 			XVA_SET_REQ(&xvattr, XAT_NOUNLINK);
269 			break;
270 		case F_OPAQUE:
271 			XVA_SET_REQ(&xvattr, XAT_OPAQUE);
272 			break;
273 		case F_NODUMP:
274 			XVA_SET_REQ(&xvattr, XAT_NODUMP);
275 			break;
276 		case F_AV_QUARANTINED:
277 			XVA_SET_REQ(&xvattr, XAT_AV_QUARANTINED);
278 			break;
279 		case F_AV_MODIFIED:
280 			XVA_SET_REQ(&xvattr, XAT_AV_MODIFIED);
281 			break;
282 		case F_AV_SCANSTAMP:
283 			if (ppvp->v_type == VREG)
284 				XVA_SET_REQ(&xvattr, XAT_AV_SCANSTAMP);
285 			break;
286 		case F_CRTIME:
287 			XVA_SET_REQ(&xvattr, XAT_CREATETIME);
288 			break;
289 		case F_FSID:
290 			fsid = (((uint64_t)vp->v_vfsp->vfs_fsid.val[0] << 32) |
291 			    (uint64_t)(vp->v_vfsp->vfs_fsid.val[1] &
292 			    0xffffffff));
293 			VERIFY(nvlist_add_uint64(nvlp, attr_to_name(attr),
294 			    fsid) == 0);
295 			break;
296 		case F_REPARSE:
297 			XVA_SET_REQ(&xvattr, XAT_REPARSE);
298 			break;
299 		case F_GEN:
300 			XVA_SET_REQ(&xvattr, XAT_GEN);
301 			break;
302 		case F_OFFLINE:
303 			XVA_SET_REQ(&xvattr, XAT_OFFLINE);
304 			break;
305 		case F_SPARSE:
306 			XVA_SET_REQ(&xvattr, XAT_SPARSE);
307 			break;
308 		default:
309 			break;
310 		}
311 	}
312 
313 	error = VOP_GETATTR(ppvp, &xvattr.xva_vattr, 0, cr, ct);
314 	if (error)
315 		return (error);
316 
317 	/*
318 	 * Process all the optional attributes together here.  Notice that
319 	 * xoap was set when the optional attribute bits were set above.
320 	 */
321 	if ((xvattr.xva_vattr.va_mask & AT_XVATTR) && xoap) {
322 		if (XVA_ISSET_RTN(&xvattr, XAT_READONLY)) {
323 			VERIFY(nvlist_add_boolean_value(nvlp,
324 			    attr_to_name(F_READONLY),
325 			    xoap->xoa_readonly) == 0);
326 		}
327 		if (XVA_ISSET_RTN(&xvattr, XAT_HIDDEN)) {
328 			VERIFY(nvlist_add_boolean_value(nvlp,
329 			    attr_to_name(F_HIDDEN),
330 			    xoap->xoa_hidden) == 0);
331 		}
332 		if (XVA_ISSET_RTN(&xvattr, XAT_SYSTEM)) {
333 			VERIFY(nvlist_add_boolean_value(nvlp,
334 			    attr_to_name(F_SYSTEM),
335 			    xoap->xoa_system) == 0);
336 		}
337 		if (XVA_ISSET_RTN(&xvattr, XAT_ARCHIVE)) {
338 			VERIFY(nvlist_add_boolean_value(nvlp,
339 			    attr_to_name(F_ARCHIVE),
340 			    xoap->xoa_archive) == 0);
341 		}
342 		if (XVA_ISSET_RTN(&xvattr, XAT_IMMUTABLE)) {
343 			VERIFY(nvlist_add_boolean_value(nvlp,
344 			    attr_to_name(F_IMMUTABLE),
345 			    xoap->xoa_immutable) == 0);
346 		}
347 		if (XVA_ISSET_RTN(&xvattr, XAT_NOUNLINK)) {
348 			VERIFY(nvlist_add_boolean_value(nvlp,
349 			    attr_to_name(F_NOUNLINK),
350 			    xoap->xoa_nounlink) == 0);
351 		}
352 		if (XVA_ISSET_RTN(&xvattr, XAT_APPENDONLY)) {
353 			VERIFY(nvlist_add_boolean_value(nvlp,
354 			    attr_to_name(F_APPENDONLY),
355 			    xoap->xoa_appendonly) == 0);
356 		}
357 		if (XVA_ISSET_RTN(&xvattr, XAT_NODUMP)) {
358 			VERIFY(nvlist_add_boolean_value(nvlp,
359 			    attr_to_name(F_NODUMP),
360 			    xoap->xoa_nodump) == 0);
361 		}
362 		if (XVA_ISSET_RTN(&xvattr, XAT_OPAQUE)) {
363 			VERIFY(nvlist_add_boolean_value(nvlp,
364 			    attr_to_name(F_OPAQUE),
365 			    xoap->xoa_opaque) == 0);
366 		}
367 		if (XVA_ISSET_RTN(&xvattr, XAT_AV_QUARANTINED)) {
368 			VERIFY(nvlist_add_boolean_value(nvlp,
369 			    attr_to_name(F_AV_QUARANTINED),
370 			    xoap->xoa_av_quarantined) == 0);
371 		}
372 		if (XVA_ISSET_RTN(&xvattr, XAT_AV_MODIFIED)) {
373 			VERIFY(nvlist_add_boolean_value(nvlp,
374 			    attr_to_name(F_AV_MODIFIED),
375 			    xoap->xoa_av_modified) == 0);
376 		}
377 		if (XVA_ISSET_RTN(&xvattr, XAT_AV_SCANSTAMP)) {
378 			VERIFY(nvlist_add_uint8_array(nvlp,
379 			    attr_to_name(F_AV_SCANSTAMP),
380 			    xoap->xoa_av_scanstamp,
381 			    sizeof (xoap->xoa_av_scanstamp)) == 0);
382 		}
383 		if (XVA_ISSET_RTN(&xvattr, XAT_CREATETIME)) {
384 			VERIFY(nvlist_add_uint64_array(nvlp,
385 			    attr_to_name(F_CRTIME),
386 			    (uint64_t *)&(xoap->xoa_createtime),
387 			    sizeof (xoap->xoa_createtime) /
388 			    sizeof (uint64_t)) == 0);
389 		}
390 		if (XVA_ISSET_RTN(&xvattr, XAT_REPARSE)) {
391 			VERIFY(nvlist_add_boolean_value(nvlp,
392 			    attr_to_name(F_REPARSE),
393 			    xoap->xoa_reparse) == 0);
394 		}
395 		if (XVA_ISSET_RTN(&xvattr, XAT_GEN)) {
396 			VERIFY(nvlist_add_uint64(nvlp,
397 			    attr_to_name(F_GEN),
398 			    xoap->xoa_generation) == 0);
399 		}
400 		if (XVA_ISSET_RTN(&xvattr, XAT_OFFLINE)) {
401 			VERIFY(nvlist_add_boolean_value(nvlp,
402 			    attr_to_name(F_OFFLINE),
403 			    xoap->xoa_offline) == 0);
404 		}
405 		if (XVA_ISSET_RTN(&xvattr, XAT_SPARSE)) {
406 			VERIFY(nvlist_add_boolean_value(nvlp,
407 			    attr_to_name(F_SPARSE),
408 			    xoap->xoa_sparse) == 0);
409 		}
410 	}
411 	/*
412 	 * Check for optional ownersid/groupsid
413 	 */
414 
415 	if (xvattr.xva_vattr.va_uid > MAXUID) {
416 		nvlist_t *nvl_sid;
417 
418 		if (nvlist_alloc(&nvl_sid, NV_UNIQUE_NAME, KM_SLEEP))
419 			return (ENOMEM);
420 
421 		if (kidmap_getsidbyuid(crgetzone(cr), xvattr.xva_vattr.va_uid,
422 		    &domain, &rid) == 0) {
423 			VERIFY(nvlist_add_string(nvl_sid,
424 			    SID_DOMAIN, domain) == 0);
425 			VERIFY(nvlist_add_uint32(nvl_sid, SID_RID, rid) == 0);
426 			VERIFY(nvlist_add_nvlist(nvlp, attr_to_name(F_OWNERSID),
427 			    nvl_sid) == 0);
428 		}
429 		nvlist_free(nvl_sid);
430 	}
431 	if (xvattr.xva_vattr.va_gid > MAXUID) {
432 		nvlist_t *nvl_sid;
433 
434 		if (nvlist_alloc(&nvl_sid, NV_UNIQUE_NAME, KM_SLEEP))
435 			return (ENOMEM);
436 
437 		if (kidmap_getsidbygid(crgetzone(cr), xvattr.xva_vattr.va_gid,
438 		    &domain, &rid) == 0) {
439 			VERIFY(nvlist_add_string(nvl_sid,
440 			    SID_DOMAIN, domain) == 0);
441 			VERIFY(nvlist_add_uint32(nvl_sid, SID_RID, rid) == 0);
442 			VERIFY(nvlist_add_nvlist(nvlp, attr_to_name(F_GROUPSID),
443 			    nvl_sid) == 0);
444 		}
445 		nvlist_free(nvl_sid);
446 	}
447 
448 	return (0);
449 }
450 
451 /*
452  * The size of a sysattr file is the size of the nvlist that will be
453  * returned by xattr_file_read().  A call to xattr_file_write() could
454  * change the size of that nvlist.  That size is not stored persistently
455  * so xattr_fill_nvlist() calls VOP_GETATTR so that it can be calculated.
456  */
457 static int
458 xattr_file_size(vnode_t *vp, xattr_view_t xattr_view, size_t *size,
459     cred_t *cr, caller_context_t *ct)
460 {
461 	nvlist_t *nvl;
462 
463 	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) {
464 		return (ENOMEM);
465 	}
466 
467 	if (xattr_fill_nvlist(vp, xattr_view, nvl, cr, ct)) {
468 		nvlist_free(nvl);
469 		return (EFAULT);
470 	}
471 
472 	VERIFY(nvlist_size(nvl, size, NV_ENCODE_XDR) == 0);
473 	nvlist_free(nvl);
474 	return (0);
475 }
476 
477 /* ARGSUSED */
478 static int
479 xattr_file_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
480     caller_context_t *ct)
481 {
482 	xattr_file_t *np = vp->v_data;
483 	timestruc_t now;
484 	size_t size;
485 	int error;
486 	vnode_t *pvp;
487 	vattr_t pvattr;
488 
489 	vap->va_type = VREG;
490 	vap->va_mode = MAKEIMODE(vap->va_type,
491 	    (np->xattr_view == XATTR_VIEW_READONLY ? 0444 : 0644));
492 	vap->va_nodeid = gfs_file_inode(vp);
493 	vap->va_nlink = 1;
494 	pvp = gfs_file_parent(vp);
495 	(void) memset(&pvattr, 0, sizeof (pvattr));
496 	pvattr.va_mask = AT_CTIME|AT_MTIME;
497 	error = VOP_GETATTR(pvp, &pvattr, flags, cr, ct);
498 	if (error) {
499 		return (error);
500 	}
501 	vap->va_ctime = pvattr.va_ctime;
502 	vap->va_mtime = pvattr.va_mtime;
503 	gethrestime(&now);
504 	vap->va_atime = now;
505 	vap->va_uid = 0;
506 	vap->va_gid = 0;
507 	vap->va_rdev = 0;
508 	vap->va_blksize = DEV_BSIZE;
509 	vap->va_seq = 0;
510 	vap->va_fsid = vp->v_vfsp->vfs_dev;
511 	error = xattr_file_size(vp, np->xattr_view, &size, cr, ct);
512 	vap->va_size = size;
513 	vap->va_nblocks = howmany(vap->va_size, vap->va_blksize);
514 	return (error);
515 }
516 
517 /* ARGSUSED */
518 static int
519 xattr_file_read(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
520     caller_context_t *ct)
521 {
522 	xattr_file_t *np = vp->v_data;
523 	xattr_view_t xattr_view = np->xattr_view;
524 	char *buf;
525 	size_t filesize;
526 	nvlist_t *nvl;
527 	int error;
528 
529 	/*
530 	 * Validate file offset and fasttrack empty reads
531 	 */
532 	if (uiop->uio_loffset < (offset_t)0)
533 		return (EINVAL);
534 
535 	if (uiop->uio_resid == 0)
536 		return (0);
537 
538 	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP))
539 		return (ENOMEM);
540 
541 	if (xattr_fill_nvlist(vp, xattr_view, nvl, cr, ct)) {
542 		nvlist_free(nvl);
543 		return (EFAULT);
544 	}
545 
546 	VERIFY(nvlist_size(nvl, &filesize, NV_ENCODE_XDR) == 0);
547 
548 	if (uiop->uio_loffset >= filesize) {
549 		nvlist_free(nvl);
550 		return (0);
551 	}
552 
553 	buf = kmem_alloc(filesize, KM_SLEEP);
554 	VERIFY(nvlist_pack(nvl, &buf, &filesize, NV_ENCODE_XDR,
555 	    KM_SLEEP) == 0);
556 
557 	error = uiomove((caddr_t)buf, filesize, UIO_READ, uiop);
558 	kmem_free(buf, filesize);
559 	nvlist_free(nvl);
560 	return (error);
561 }
562 
563 /* ARGSUSED */
564 static int
565 xattr_file_write(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *cr,
566     caller_context_t *ct)
567 {
568 	int error = 0;
569 	char *buf;
570 	char *domain;
571 	uint32_t rid;
572 	ssize_t size = uiop->uio_resid;
573 	nvlist_t *nvp;
574 	nvpair_t *pair = NULL;
575 	vnode_t *ppvp;
576 	xvattr_t xvattr;
577 	xoptattr_t *xoap = NULL;	/* Pointer to optional attributes */
578 
579 	if (vfs_has_feature(vp->v_vfsp, VFSFT_XVATTR) == 0)
580 		return (EINVAL);
581 
582 	/*
583 	 * Validate file offset and size.
584 	 */
585 	if (uiop->uio_loffset < (offset_t)0)
586 		return (EINVAL);
587 
588 	if (size == 0)
589 		return (EINVAL);
590 
591 	xva_init(&xvattr);
592 
593 	if ((xoap = xva_getxoptattr(&xvattr)) == NULL) {
594 		return (EINVAL);
595 	}
596 
597 	/*
598 	 * Copy and unpack the nvlist
599 	 */
600 	buf = kmem_alloc(size, KM_SLEEP);
601 	if (uiomove((caddr_t)buf, size, UIO_WRITE, uiop)) {
602 		return (EFAULT);
603 	}
604 
605 	if (nvlist_unpack(buf, size, &nvp, KM_SLEEP) != 0) {
606 		kmem_free(buf, size);
607 		uiop->uio_resid = size;
608 		return (EINVAL);
609 	}
610 	kmem_free(buf, size);
611 
612 	/*
613 	 * Fasttrack empty writes (nvlist with no nvpairs)
614 	 */
615 	if (nvlist_next_nvpair(nvp, NULL) == 0)
616 		return (0);
617 
618 	ppvp = gfs_file_parent(gfs_file_parent(vp));
619 
620 	while (pair = nvlist_next_nvpair(nvp, pair)) {
621 		data_type_t type;
622 		f_attr_t attr;
623 		boolean_t value;
624 		uint64_t *time, *times;
625 		uint_t elem, nelems;
626 		nvlist_t *nvp_sid;
627 		uint8_t *scanstamp;
628 
629 		/*
630 		 * Validate the name and type of each attribute.
631 		 * Log any unknown names and continue.  This will
632 		 * help if additional attributes are added later.
633 		 */
634 		type = nvpair_type(pair);
635 		if ((attr = name_to_attr(nvpair_name(pair))) == F_ATTR_INVAL) {
636 			cmn_err(CE_WARN, "Unknown attribute %s",
637 			    nvpair_name(pair));
638 			continue;
639 		}
640 
641 		/*
642 		 * Verify nvlist type matches required type and view is OK
643 		 */
644 
645 		if (type != attr_to_data_type(attr) ||
646 		    (attr_to_xattr_view(attr) == XATTR_VIEW_READONLY)) {
647 			nvlist_free(nvp);
648 			return (EINVAL);
649 		}
650 
651 		/*
652 		 * For OWNERSID/GROUPSID make sure the target
653 		 * file system support ephemeral ID's
654 		 */
655 		if ((attr == F_OWNERSID || attr == F_GROUPSID) &&
656 		    (!(vp->v_vfsp->vfs_flag & VFS_XID))) {
657 			nvlist_free(nvp);
658 			return (EINVAL);
659 		}
660 
661 		/*
662 		 * Retrieve data from nvpair
663 		 */
664 		switch (type) {
665 		case DATA_TYPE_BOOLEAN_VALUE:
666 			if (nvpair_value_boolean_value(pair, &value)) {
667 				nvlist_free(nvp);
668 				return (EINVAL);
669 			}
670 			break;
671 		case DATA_TYPE_UINT64_ARRAY:
672 			if (nvpair_value_uint64_array(pair, &times, &nelems)) {
673 				nvlist_free(nvp);
674 				return (EINVAL);
675 			}
676 			break;
677 		case DATA_TYPE_NVLIST:
678 			if (nvpair_value_nvlist(pair, &nvp_sid)) {
679 				nvlist_free(nvp);
680 				return (EINVAL);
681 			}
682 			break;
683 		case DATA_TYPE_UINT8_ARRAY:
684 			if (nvpair_value_uint8_array(pair,
685 			    &scanstamp, &nelems)) {
686 				nvlist_free(nvp);
687 				return (EINVAL);
688 			}
689 			break;
690 		default:
691 			nvlist_free(nvp);
692 			return (EINVAL);
693 		}
694 
695 		switch (attr) {
696 		/*
697 		 * If we have several similar optional attributes to
698 		 * process then we should do it all together here so that
699 		 * xoap and the requested bitmap can be set in one place.
700 		 */
701 		case F_READONLY:
702 			XVA_SET_REQ(&xvattr, XAT_READONLY);
703 			xoap->xoa_readonly = value;
704 			break;
705 		case F_HIDDEN:
706 			XVA_SET_REQ(&xvattr, XAT_HIDDEN);
707 			xoap->xoa_hidden = value;
708 			break;
709 		case F_SYSTEM:
710 			XVA_SET_REQ(&xvattr, XAT_SYSTEM);
711 			xoap->xoa_system = value;
712 			break;
713 		case F_ARCHIVE:
714 			XVA_SET_REQ(&xvattr, XAT_ARCHIVE);
715 			xoap->xoa_archive = value;
716 			break;
717 		case F_IMMUTABLE:
718 			XVA_SET_REQ(&xvattr, XAT_IMMUTABLE);
719 			xoap->xoa_immutable = value;
720 			break;
721 		case F_NOUNLINK:
722 			XVA_SET_REQ(&xvattr, XAT_NOUNLINK);
723 			xoap->xoa_nounlink = value;
724 			break;
725 		case F_APPENDONLY:
726 			XVA_SET_REQ(&xvattr, XAT_APPENDONLY);
727 			xoap->xoa_appendonly = value;
728 			break;
729 		case F_NODUMP:
730 			XVA_SET_REQ(&xvattr, XAT_NODUMP);
731 			xoap->xoa_nodump = value;
732 			break;
733 		case F_AV_QUARANTINED:
734 			XVA_SET_REQ(&xvattr, XAT_AV_QUARANTINED);
735 			xoap->xoa_av_quarantined = value;
736 			break;
737 		case F_AV_MODIFIED:
738 			XVA_SET_REQ(&xvattr, XAT_AV_MODIFIED);
739 			xoap->xoa_av_modified = value;
740 			break;
741 		case F_CRTIME:
742 			XVA_SET_REQ(&xvattr, XAT_CREATETIME);
743 			time = (uint64_t *)&(xoap->xoa_createtime);
744 			for (elem = 0; elem < nelems; elem++)
745 				*time++ = times[elem];
746 			break;
747 		case F_OWNERSID:
748 		case F_GROUPSID:
749 			if (nvlist_lookup_string(nvp_sid, SID_DOMAIN,
750 			    &domain) || nvlist_lookup_uint32(nvp_sid, SID_RID,
751 			    &rid)) {
752 				nvlist_free(nvp);
753 				return (EINVAL);
754 			}
755 
756 			/*
757 			 * Now map domain+rid to ephemeral id's
758 			 *
759 			 * If mapping fails, then the uid/gid will
760 			 * be set to UID_NOBODY by Winchester.
761 			 */
762 
763 			if (attr == F_OWNERSID) {
764 				(void) kidmap_getuidbysid(crgetzone(cr), domain,
765 				    rid, &xvattr.xva_vattr.va_uid);
766 				xvattr.xva_vattr.va_mask |= AT_UID;
767 			} else {
768 				(void) kidmap_getgidbysid(crgetzone(cr), domain,
769 				    rid, &xvattr.xva_vattr.va_gid);
770 				xvattr.xva_vattr.va_mask |= AT_GID;
771 			}
772 			break;
773 		case F_AV_SCANSTAMP:
774 			if (ppvp->v_type == VREG) {
775 				XVA_SET_REQ(&xvattr, XAT_AV_SCANSTAMP);
776 				(void) memcpy(xoap->xoa_av_scanstamp,
777 				    scanstamp, nelems);
778 			} else {
779 				nvlist_free(nvp);
780 				return (EINVAL);
781 			}
782 			break;
783 		case F_REPARSE:
784 			XVA_SET_REQ(&xvattr, XAT_REPARSE);
785 			xoap->xoa_reparse = value;
786 			break;
787 		case F_OFFLINE:
788 			XVA_SET_REQ(&xvattr, XAT_OFFLINE);
789 			xoap->xoa_offline = value;
790 			break;
791 		case F_SPARSE:
792 			XVA_SET_REQ(&xvattr, XAT_SPARSE);
793 			xoap->xoa_sparse = value;
794 			break;
795 		default:
796 			break;
797 		}
798 	}
799 
800 	ppvp = gfs_file_parent(gfs_file_parent(vp));
801 	error = VOP_SETATTR(ppvp, &xvattr.xva_vattr, 0, cr, ct);
802 	if (error)
803 		uiop->uio_resid = size;
804 
805 	nvlist_free(nvp);
806 	return (error);
807 }
808 
809 static int
810 xattr_file_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
811     caller_context_t *ct)
812 {
813 	switch (cmd) {
814 	case _PC_XATTR_EXISTS:
815 	case _PC_SATTR_ENABLED:
816 	case _PC_SATTR_EXISTS:
817 		*valp = 0;
818 		return (0);
819 	default:
820 		return (fs_pathconf(vp, cmd, valp, cr, ct));
821 	}
822 }
823 
824 vnodeops_t *xattr_file_ops;
825 
826 static const fs_operation_def_t xattr_file_tops[] = {
827 	{ VOPNAME_OPEN,		{ .vop_open = xattr_file_open }		},
828 	{ VOPNAME_CLOSE,	{ .vop_close = xattr_file_close }	},
829 	{ VOPNAME_READ,		{ .vop_read = xattr_file_read }		},
830 	{ VOPNAME_WRITE,	{ .vop_write = xattr_file_write }	},
831 	{ VOPNAME_IOCTL,	{ .error = fs_ioctl }			},
832 	{ VOPNAME_GETATTR,	{ .vop_getattr = xattr_file_getattr }	},
833 	{ VOPNAME_ACCESS,	{ .vop_access = xattr_file_access }	},
834 	{ VOPNAME_READDIR,	{ .error = fs_notdir }			},
835 	{ VOPNAME_SEEK,		{ .vop_seek = fs_seek }			},
836 	{ VOPNAME_INACTIVE,	{ .vop_inactive = gfs_vop_inactive }	},
837 	{ VOPNAME_FID,		{ .vop_fid = xattr_common_fid }		},
838 	{ VOPNAME_PATHCONF,	{ .vop_pathconf = xattr_file_pathconf }	},
839 	{ VOPNAME_PUTPAGE,	{ .error = fs_putpage }			},
840 	{ VOPNAME_FSYNC,	{ .error = fs_fsync }			},
841 	{ NULL }
842 };
843 
844 vnode_t *
845 xattr_mkfile(vnode_t *pvp, xattr_view_t xattr_view)
846 {
847 	vnode_t *vp;
848 	xattr_file_t *np;
849 
850 	vp = gfs_file_create(sizeof (xattr_file_t), pvp, xattr_file_ops);
851 	np = vp->v_data;
852 	np->xattr_view = xattr_view;
853 	vp->v_flag |= V_SYSATTR;
854 	return (vp);
855 }
856 
857 vnode_t *
858 xattr_mkfile_ro(vnode_t *pvp)
859 {
860 	return (xattr_mkfile(pvp, XATTR_VIEW_READONLY));
861 }
862 
863 vnode_t *
864 xattr_mkfile_rw(vnode_t *pvp)
865 {
866 	return (xattr_mkfile(pvp, XATTR_VIEW_READWRITE));
867 }
868 
869 vnodeops_t *xattr_dir_ops;
870 
871 static gfs_dirent_t xattr_dirents[] = {
872 	{ VIEW_READONLY, xattr_mkfile_ro, GFS_CACHE_VNODE, },
873 	{ VIEW_READWRITE, xattr_mkfile_rw, GFS_CACHE_VNODE, },
874 	{ NULL },
875 };
876 
877 #define	XATTRDIR_NENTS	((sizeof (xattr_dirents) / sizeof (gfs_dirent_t)) - 1)
878 
879 static int
880 is_sattr_name(char *s)
881 {
882 	int i;
883 
884 	for (i = 0; i < XATTRDIR_NENTS; ++i) {
885 		if (strcmp(s, xattr_dirents[i].gfse_name) == 0) {
886 			return (1);
887 		}
888 	}
889 	return (0);
890 }
891 
892 /*
893  * Given the name of an extended attribute file, determine if there is a
894  * normalization conflict with a sysattr view name.
895  */
896 int
897 xattr_sysattr_casechk(char *s)
898 {
899 	int i;
900 
901 	for (i = 0; i < XATTRDIR_NENTS; ++i) {
902 		if (strcasecmp(s, xattr_dirents[i].gfse_name) == 0)
903 			return (1);
904 	}
905 	return (0);
906 }
907 
908 static int
909 xattr_copy(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm,
910     cred_t *cr, caller_context_t *ct)
911 {
912 	xvattr_t xvattr;
913 	vnode_t *pdvp;
914 	int error;
915 
916 	/*
917 	 * Only copy system attrs if the views are the same
918 	 */
919 	if (strcmp(snm, tnm) != 0)
920 		return (EINVAL);
921 
922 	xva_init(&xvattr);
923 
924 	XVA_SET_REQ(&xvattr, XAT_SYSTEM);
925 	XVA_SET_REQ(&xvattr, XAT_READONLY);
926 	XVA_SET_REQ(&xvattr, XAT_HIDDEN);
927 	XVA_SET_REQ(&xvattr, XAT_ARCHIVE);
928 	XVA_SET_REQ(&xvattr, XAT_APPENDONLY);
929 	XVA_SET_REQ(&xvattr, XAT_NOUNLINK);
930 	XVA_SET_REQ(&xvattr, XAT_IMMUTABLE);
931 	XVA_SET_REQ(&xvattr, XAT_NODUMP);
932 	XVA_SET_REQ(&xvattr, XAT_AV_MODIFIED);
933 	XVA_SET_REQ(&xvattr, XAT_AV_QUARANTINED);
934 	XVA_SET_REQ(&xvattr, XAT_CREATETIME);
935 	XVA_SET_REQ(&xvattr, XAT_REPARSE);
936 	XVA_SET_REQ(&xvattr, XAT_OFFLINE);
937 	XVA_SET_REQ(&xvattr, XAT_SPARSE);
938 
939 	pdvp = gfs_file_parent(sdvp);
940 	error = VOP_GETATTR(pdvp, &xvattr.xva_vattr, 0, cr, ct);
941 	if (error)
942 		return (error);
943 
944 	pdvp = gfs_file_parent(tdvp);
945 	error = VOP_SETATTR(pdvp, &xvattr.xva_vattr, 0, cr, ct);
946 	return (error);
947 }
948 
949 /*
950  * Get the "real" XATTR directory associtated with the GFS XATTR directory.
951  * Note: This does NOT take any additional hold on the returned real_vp,
952  * because when this lookup succeeds we save the result in xattr_realvp
953  * and keep that hold until the GFS XATTR directory goes inactive.
954  */
955 static int
956 xattr_dir_realdir(vnode_t *gfs_dvp, vnode_t **ret_vpp, int flags,
957     cred_t *cr, caller_context_t *ct)
958 {
959 	struct pathname pn;
960 	char *nm = "";
961 	xattr_dir_t *xattr_dir;
962 	vnode_t *realvp;
963 	int error;
964 
965 	*ret_vpp = NULL;
966 
967 	/*
968 	 * Usually, we've already found the underlying XATTR directory
969 	 * during some previous lookup and stored it in xattr_realvp.
970 	 */
971 	mutex_enter(&gfs_dvp->v_lock);
972 	xattr_dir = gfs_dvp->v_data;
973 	realvp = xattr_dir->xattr_realvp;
974 	mutex_exit(&gfs_dvp->v_lock);
975 	if (realvp != NULL) {
976 		*ret_vpp = realvp;
977 		return (0);
978 	}
979 
980 	/*
981 	 * Lookup the XATTR dir in the underlying FS, relative to our
982 	 * "parent", which is the real object for which this GFS XATTR
983 	 * directory was created.  Set the LOOKUP_HAVE_SYSATTR_DIR flag
984 	 * so that we don't get into an infinite loop with fop_lookup
985 	 * calling back to xattr_dir_lookup.
986 	 */
987 	error = pn_get(nm, UIO_SYSSPACE, &pn);
988 	if (error != 0)
989 		return (error);
990 	error = VOP_LOOKUP(gfs_file_parent(gfs_dvp), nm, &realvp, &pn,
991 	    flags | LOOKUP_HAVE_SYSATTR_DIR, rootvp, cr, ct, NULL, NULL);
992 	pn_free(&pn);
993 	if (error != 0)
994 		return (error);
995 
996 	/*
997 	 * Have the real XATTR directory.  Save it -- but first
998 	 * check whether we lost a race doing the lookup.
999 	 */
1000 	mutex_enter(&gfs_dvp->v_lock);
1001 	xattr_dir = gfs_dvp->v_data;
1002 	if (xattr_dir->xattr_realvp == NULL) {
1003 		/*
1004 		 * Note that the hold taken by the VOP_LOOKUP above is
1005 		 * retained from here until xattr_dir_inactive.
1006 		 */
1007 		xattr_dir->xattr_realvp = realvp;
1008 	} else {
1009 		/* We lost the race. */
1010 		VN_RELE(realvp);
1011 		realvp = xattr_dir->xattr_realvp;
1012 	}
1013 	mutex_exit(&gfs_dvp->v_lock);
1014 
1015 	*ret_vpp = realvp;
1016 	return (0);
1017 }
1018 
1019 /* ARGSUSED */
1020 static int
1021 xattr_dir_open(vnode_t **vpp, int flags, cred_t *cr, caller_context_t *ct)
1022 {
1023 	vnode_t *realvp;
1024 	int error;
1025 
1026 	if (flags & FWRITE) {
1027 		return (EACCES);
1028 	}
1029 
1030 	/*
1031 	 * If there is a real extended attribute directory,
1032 	 * let the underlying FS see the VOP_OPEN call;
1033 	 * otherwise just return zero.
1034 	 */
1035 	error = xattr_dir_realdir(*vpp, &realvp, LOOKUP_XATTR, cr, ct);
1036 	if (error == 0) {
1037 		error = VOP_OPEN(&realvp, flags, cr, ct);
1038 	} else {
1039 		error = 0;
1040 	}
1041 
1042 	return (error);
1043 }
1044 
1045 /* ARGSUSED */
1046 static int
1047 xattr_dir_close(vnode_t *vp, int flags, int count, offset_t off, cred_t *cr,
1048     caller_context_t *ct)
1049 {
1050 	vnode_t *realvp;
1051 	int error;
1052 
1053 	/*
1054 	 * If there is a real extended attribute directory,
1055 	 * let the underlying FS see the VOP_CLOSE call;
1056 	 * otherwise just return zero.
1057 	 */
1058 	error = xattr_dir_realdir(vp, &realvp, LOOKUP_XATTR, cr, ct);
1059 	if (error == 0) {
1060 		error = VOP_CLOSE(realvp, flags, count, off, cr, ct);
1061 	} else {
1062 		error = 0;
1063 	}
1064 
1065 	return (error);
1066 }
1067 
1068 /*
1069  * Retrieve the attributes on an xattr directory.  If there is a "real"
1070  * xattr directory, use that.  Otherwise, get the attributes (represented
1071  * by PARENT_ATTRMASK) from the "parent" node and fill in the rest.  Note
1072  * that VOP_GETATTR() could turn off bits in the va_mask.
1073  */
1074 
1075 #define	PARENT_ATTRMASK	(AT_UID|AT_GID|AT_RDEV|AT_CTIME|AT_MTIME)
1076 
1077 /* ARGSUSED */
1078 static int
1079 xattr_dir_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
1080     caller_context_t *ct)
1081 {
1082 	timestruc_t now;
1083 	vnode_t *pvp;
1084 	int error;
1085 
1086 	error = xattr_dir_realdir(vp, &pvp, LOOKUP_XATTR, cr, ct);
1087 	if (error == 0) {
1088 		error = VOP_GETATTR(pvp, vap, 0, cr, ct);
1089 		if (error) {
1090 			return (error);
1091 		}
1092 		vap->va_nlink += XATTRDIR_NENTS;
1093 		vap->va_size += XATTRDIR_NENTS;
1094 		return (0);
1095 	}
1096 
1097 	/*
1098 	 * There is no real xattr directory.  Cobble together
1099 	 * an entry using info from the parent object (if needed)
1100 	 * plus information common to all xattrs.
1101 	 */
1102 	if (vap->va_mask & PARENT_ATTRMASK) {
1103 		vattr_t pvattr;
1104 		uint_t  off_bits;
1105 
1106 		pvp = gfs_file_parent(vp);
1107 		(void) memset(&pvattr, 0, sizeof (pvattr));
1108 		pvattr.va_mask = PARENT_ATTRMASK;
1109 		error = VOP_GETATTR(pvp, &pvattr, 0, cr, ct);
1110 		if (error) {
1111 			return (error);
1112 		}
1113 
1114 		/*
1115 		 * VOP_GETATTR() might have turned off some bits in
1116 		 * pvattr.va_mask.  This means that the underlying
1117 		 * file system couldn't process those attributes.
1118 		 * We need to make sure those bits get turned off
1119 		 * in the vattr_t structure that gets passed back
1120 		 * to the caller.  Figure out which bits were turned
1121 		 * off (if any) then set pvattr.va_mask before it
1122 		 * gets copied to the vattr_t that the caller sees.
1123 		 */
1124 		off_bits = (pvattr.va_mask ^ PARENT_ATTRMASK) & PARENT_ATTRMASK;
1125 		pvattr.va_mask = vap->va_mask & ~off_bits;
1126 		*vap = pvattr;
1127 	}
1128 
1129 	vap->va_type = VDIR;
1130 	vap->va_mode = MAKEIMODE(vap->va_type, S_ISVTX | 0777);
1131 	vap->va_fsid = vp->v_vfsp->vfs_dev;
1132 	vap->va_nodeid = gfs_file_inode(vp);
1133 	vap->va_nlink = XATTRDIR_NENTS+2;
1134 	vap->va_size = vap->va_nlink;
1135 	gethrestime(&now);
1136 	vap->va_atime = now;
1137 	vap->va_blksize = 0;
1138 	vap->va_nblocks = 0;
1139 	vap->va_seq = 0;
1140 	return (0);
1141 }
1142 
1143 static int
1144 xattr_dir_setattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr,
1145     caller_context_t *ct)
1146 {
1147 	vnode_t *realvp;
1148 	int error;
1149 
1150 	/*
1151 	 * If there is a real xattr directory, do the setattr there.
1152 	 * Otherwise, just return success.  The GFS directory is transient,
1153 	 * and any setattr changes can disappear anyway.
1154 	 */
1155 	error = xattr_dir_realdir(vp, &realvp, LOOKUP_XATTR, cr, ct);
1156 	if (error == 0) {
1157 		error = VOP_SETATTR(realvp, vap, flags, cr, ct);
1158 	}
1159 	if (error == ENOENT) {
1160 		error = 0;
1161 	}
1162 	return (error);
1163 }
1164 
1165 /* ARGSUSED */
1166 static int
1167 xattr_dir_access(vnode_t *vp, int mode, int flags, cred_t *cr,
1168     caller_context_t *ct)
1169 {
1170 	int error;
1171 	vnode_t *realvp = NULL;
1172 
1173 	if (mode & VWRITE) {
1174 		return (EACCES);
1175 	}
1176 
1177 	error = xattr_dir_realdir(vp, &realvp, LOOKUP_XATTR, cr, ct);
1178 	if ((error == ENOENT || error == EINVAL)) {
1179 		/*
1180 		 * These errors mean there's no "real" xattr dir.
1181 		 * The GFS xattr dir always allows access.
1182 		 */
1183 		return (0);
1184 	}
1185 	if (error != 0) {
1186 		/*
1187 		 * The "real" xattr dir was not accessible.
1188 		 */
1189 		return (error);
1190 	}
1191 	/*
1192 	 * We got the "real" xattr dir.
1193 	 * Pass through the access call.
1194 	 */
1195 	error = VOP_ACCESS(realvp, mode, flags, cr, ct);
1196 
1197 	return (error);
1198 }
1199 
1200 static int
1201 xattr_dir_create(vnode_t *dvp, char *name, vattr_t *vap, vcexcl_t excl,
1202     int mode, vnode_t **vpp, cred_t *cr, int flag, caller_context_t *ct,
1203     vsecattr_t *vsecp)
1204 {
1205 	vnode_t *pvp;
1206 	int error;
1207 
1208 	*vpp = NULL;
1209 
1210 	/*
1211 	 * Don't allow creation of extended attributes with sysattr names.
1212 	 */
1213 	if (is_sattr_name(name)) {
1214 		return (gfs_dir_lookup(dvp, name, vpp, cr, 0, NULL, NULL));
1215 	}
1216 
1217 	error = xattr_dir_realdir(dvp, &pvp, LOOKUP_XATTR|CREATE_XATTR_DIR,
1218 	    cr, ct);
1219 	if (error == 0) {
1220 		error = VOP_CREATE(pvp, name, vap, excl, mode, vpp, cr, flag,
1221 		    ct, vsecp);
1222 	}
1223 	return (error);
1224 }
1225 
1226 static int
1227 xattr_dir_remove(vnode_t *dvp, char *name, cred_t *cr, caller_context_t *ct,
1228     int flags)
1229 {
1230 	vnode_t *pvp;
1231 	int error;
1232 
1233 	if (is_sattr_name(name)) {
1234 		return (EACCES);
1235 	}
1236 
1237 	error = xattr_dir_realdir(dvp, &pvp, LOOKUP_XATTR, cr, ct);
1238 	if (error == 0) {
1239 		error = VOP_REMOVE(pvp, name, cr, ct, flags);
1240 	}
1241 	return (error);
1242 }
1243 
1244 static int
1245 xattr_dir_link(vnode_t *tdvp, vnode_t *svp, char *name, cred_t *cr,
1246     caller_context_t *ct, int flags)
1247 {
1248 	vnode_t *pvp;
1249 	int error;
1250 
1251 	if (svp->v_flag & V_SYSATTR) {
1252 		return (EINVAL);
1253 	}
1254 
1255 	error = xattr_dir_realdir(tdvp, &pvp, LOOKUP_XATTR, cr, ct);
1256 	if (error == 0) {
1257 		error = VOP_LINK(pvp, svp, name, cr, ct, flags);
1258 	}
1259 	return (error);
1260 }
1261 
1262 static int
1263 xattr_dir_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm,
1264     cred_t *cr, caller_context_t *ct, int flags)
1265 {
1266 	vnode_t *spvp, *tpvp;
1267 	int error;
1268 
1269 	if (is_sattr_name(snm) || is_sattr_name(tnm))
1270 		return (xattr_copy(sdvp, snm, tdvp, tnm, cr, ct));
1271 	/*
1272 	 * We know that sdvp is a GFS dir, or we wouldn't be here.
1273 	 * Get the real unnamed directory.
1274 	 */
1275 	error = xattr_dir_realdir(sdvp, &spvp, LOOKUP_XATTR, cr, ct);
1276 	if (error) {
1277 		return (error);
1278 	}
1279 
1280 	if (sdvp == tdvp) {
1281 		/*
1282 		 * If the source and target are the same GFS directory, the
1283 		 * underlying unnamed source and target dir will be the same.
1284 		 */
1285 		tpvp = spvp;
1286 	} else if (tdvp->v_flag & V_SYSATTR) {
1287 		/*
1288 		 * If the target dir is a different GFS directory,
1289 		 * find its underlying unnamed dir.
1290 		 */
1291 		error = xattr_dir_realdir(tdvp, &tpvp, LOOKUP_XATTR, cr, ct);
1292 		if (error) {
1293 			return (error);
1294 		}
1295 	} else {
1296 		/*
1297 		 * Target dir is outside of GFS, pass it on through.
1298 		 */
1299 		tpvp = tdvp;
1300 	}
1301 
1302 	error = VOP_RENAME(spvp, snm, tpvp, tnm, cr, ct, flags);
1303 
1304 	return (error);
1305 }
1306 
1307 /*
1308  * readdir_xattr_casecmp: given a system attribute name, see if there
1309  * is a real xattr with the same normalized name.
1310  */
1311 static int
1312 readdir_xattr_casecmp(vnode_t *dvp, char *nm, cred_t *cr, caller_context_t *ct,
1313     int *eflags)
1314 {
1315 	int error;
1316 	vnode_t *vp;
1317 	struct pathname pn;
1318 
1319 	*eflags = 0;
1320 
1321 	error = pn_get(nm, UIO_SYSSPACE, &pn);
1322 	if (error == 0) {
1323 		error = VOP_LOOKUP(dvp, nm, &vp, &pn,
1324 		    FIGNORECASE, rootvp, cr, ct, NULL, NULL);
1325 		if (error == 0) {
1326 			*eflags = ED_CASE_CONFLICT;
1327 			VN_RELE(vp);
1328 		} else if (error == ENOENT) {
1329 			error = 0;
1330 		}
1331 		pn_free(&pn);
1332 	}
1333 
1334 	return (error);
1335 }
1336 
1337 static int
1338 xattr_dir_readdir(vnode_t *dvp, uio_t *uiop, cred_t *cr, int *eofp,
1339     caller_context_t *ct, int flags)
1340 {
1341 	vnode_t *pvp;
1342 	int error;
1343 	int local_eof;
1344 	int reset_off = 0;
1345 	int has_xattrs = 0;
1346 
1347 	if (eofp == NULL) {
1348 		eofp = &local_eof;
1349 	}
1350 	*eofp = 0;
1351 
1352 	/*
1353 	 * See if there is a real extended attribute directory.
1354 	 */
1355 	error = xattr_dir_realdir(dvp, &pvp, LOOKUP_XATTR, cr, ct);
1356 	if (error == 0) {
1357 		has_xattrs = 1;
1358 	}
1359 
1360 	/*
1361 	 * Start by reading up the static entries.
1362 	 */
1363 	if (uiop->uio_loffset == 0) {
1364 		ino64_t pino, ino;
1365 		offset_t off;
1366 		gfs_dir_t *dp = dvp->v_data;
1367 		gfs_readdir_state_t gstate;
1368 
1369 		if (has_xattrs) {
1370 			/*
1371 			 * If there is a real xattr dir, skip . and ..
1372 			 * in the GFS dir.  We'll pick them up below
1373 			 * when we call into the underlying fs.
1374 			 */
1375 			uiop->uio_loffset = GFS_STATIC_ENTRY_OFFSET;
1376 		}
1377 		error = gfs_get_parent_ino(dvp, cr, ct, &pino, &ino);
1378 		if (error == 0) {
1379 			error = gfs_readdir_init(&gstate, dp->gfsd_maxlen, 1,
1380 			    uiop, pino, ino, flags);
1381 		}
1382 		if (error) {
1383 			return (error);
1384 		}
1385 
1386 		while ((error = gfs_readdir_pred(&gstate, uiop, &off)) == 0 &&
1387 		    !*eofp) {
1388 			if (off >= 0 && off < dp->gfsd_nstatic) {
1389 				int eflags;
1390 
1391 				/*
1392 				 * Check to see if this sysattr set name has a
1393 				 * case-insensitive conflict with a real xattr
1394 				 * name.
1395 				 */
1396 				eflags = 0;
1397 				if ((flags & V_RDDIR_ENTFLAGS) && has_xattrs) {
1398 					error = readdir_xattr_casecmp(pvp,
1399 					    dp->gfsd_static[off].gfse_name,
1400 					    cr, ct, &eflags);
1401 					if (error)
1402 						break;
1403 				}
1404 				ino = dp->gfsd_inode(dvp, off);
1405 
1406 				error = gfs_readdir_emit(&gstate, uiop, off,
1407 				    ino, dp->gfsd_static[off].gfse_name,
1408 				    eflags);
1409 				if (error)
1410 					break;
1411 			} else {
1412 				*eofp = 1;
1413 			}
1414 		}
1415 
1416 		error = gfs_readdir_fini(&gstate, error, eofp, *eofp);
1417 		if (error) {
1418 			return (error);
1419 		}
1420 
1421 		/*
1422 		 * We must read all of the static entries in the first
1423 		 * call.  Otherwise we won't know if uio_loffset in a
1424 		 * subsequent call refers to the static entries or to those
1425 		 * in an underlying fs.
1426 		 */
1427 		if (*eofp == 0)
1428 			return (EINVAL);
1429 		reset_off = 1;
1430 	}
1431 
1432 	if (!has_xattrs) {
1433 		*eofp = 1;
1434 		return (0);
1435 	}
1436 
1437 	*eofp = 0;
1438 	if (reset_off) {
1439 		uiop->uio_loffset = 0;
1440 	}
1441 	(void) VOP_RWLOCK(pvp, V_WRITELOCK_FALSE, NULL);
1442 	error = VOP_READDIR(pvp, uiop, cr, eofp, ct, flags);
1443 	VOP_RWUNLOCK(pvp, V_WRITELOCK_FALSE, NULL);
1444 
1445 	return (error);
1446 }
1447 
1448 /*
1449  * Last reference on a (GFS) XATTR directory.
1450  *
1451  * If there's a real XATTR directory in the underlying FS, we will have
1452  * taken a hold on that directory in xattr_dir_realdir.  Now that the
1453  * last hold on the GFS directory is gone, it's time to release that
1454  * hold on the underlying XATTR directory.
1455  */
1456 /* ARGSUSED */
1457 static void
1458 xattr_dir_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct)
1459 {
1460 	xattr_dir_t *dp;
1461 
1462 	dp = gfs_dir_inactive(vp);	/* will track v_count */
1463 	if (dp != NULL) {
1464 		/* vp was freed */
1465 		if (dp->xattr_realvp != NULL)
1466 			VN_RELE(dp->xattr_realvp);
1467 
1468 		kmem_free(dp, ((gfs_file_t *)dp)->gfs_size);
1469 	}
1470 }
1471 
1472 static int
1473 xattr_dir_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr,
1474     caller_context_t *ct)
1475 {
1476 	switch (cmd) {
1477 	case _PC_XATTR_EXISTS:
1478 	case _PC_SATTR_ENABLED:
1479 	case _PC_SATTR_EXISTS:
1480 		*valp = 0;
1481 		return (0);
1482 	default:
1483 		return (fs_pathconf(vp, cmd, valp, cr, ct));
1484 	}
1485 }
1486 
1487 /* ARGSUSED */
1488 static int
1489 xattr_dir_realvp(vnode_t *vp, vnode_t **realvp, caller_context_t *ct)
1490 {
1491 	int error;
1492 
1493 	error = xattr_dir_realdir(vp, realvp, LOOKUP_XATTR, kcred, NULL);
1494 	return (error);
1495 
1496 }
1497 
1498 static const fs_operation_def_t xattr_dir_tops[] = {
1499 	{ VOPNAME_OPEN,		{ .vop_open = xattr_dir_open }		},
1500 	{ VOPNAME_CLOSE,	{ .vop_close = xattr_dir_close }	},
1501 	{ VOPNAME_IOCTL,	{ .error = fs_inval }			},
1502 	{ VOPNAME_GETATTR,	{ .vop_getattr = xattr_dir_getattr }	},
1503 	{ VOPNAME_SETATTR,	{ .vop_setattr = xattr_dir_setattr }	},
1504 	{ VOPNAME_ACCESS,	{ .vop_access = xattr_dir_access }	},
1505 	{ VOPNAME_READDIR,	{ .vop_readdir = xattr_dir_readdir }	},
1506 	{ VOPNAME_LOOKUP,	{ .vop_lookup = gfs_vop_lookup }	},
1507 	{ VOPNAME_CREATE,	{ .vop_create = xattr_dir_create }	},
1508 	{ VOPNAME_REMOVE,	{ .vop_remove = xattr_dir_remove }	},
1509 	{ VOPNAME_LINK,		{ .vop_link = xattr_dir_link }		},
1510 	{ VOPNAME_RENAME,	{ .vop_rename = xattr_dir_rename }	},
1511 	{ VOPNAME_MKDIR,	{ .error = fs_inval }			},
1512 	{ VOPNAME_SEEK,		{ .vop_seek = fs_seek }			},
1513 	{ VOPNAME_INACTIVE,	{ .vop_inactive = xattr_dir_inactive }	},
1514 	{ VOPNAME_FID,		{ .vop_fid = xattr_common_fid }		},
1515 	{ VOPNAME_PATHCONF,	{ .vop_pathconf = xattr_dir_pathconf }	},
1516 	{ VOPNAME_REALVP,	{ .vop_realvp = xattr_dir_realvp } },
1517 	{ NULL, NULL }
1518 };
1519 
1520 static gfs_opsvec_t xattr_opsvec[] = {
1521 	{ "xattr dir", xattr_dir_tops, &xattr_dir_ops },
1522 	{ "system attributes", xattr_file_tops, &xattr_file_ops },
1523 	{ NULL, NULL, NULL }
1524 };
1525 
1526 /*
1527  * Callback supporting lookup in a GFS XATTR directory.
1528  */
1529 static int
1530 xattr_lookup_cb(vnode_t *vp, const char *nm, vnode_t **vpp, ino64_t *inop,
1531     cred_t *cr, int flags, int *deflags, pathname_t *rpnp)
1532 {
1533 	vnode_t *pvp;
1534 	struct pathname pn;
1535 	int error;
1536 
1537 	*vpp = NULL;
1538 	*inop = 0;
1539 
1540 	error = xattr_dir_realdir(vp, &pvp, LOOKUP_XATTR, cr, NULL);
1541 
1542 	/*
1543 	 * Return ENOENT for EACCES requests during lookup.  Once an
1544 	 * attribute create is attempted EACCES will be returned.
1545 	 */
1546 	if (error) {
1547 		if (error == EACCES)
1548 			return (ENOENT);
1549 		return (error);
1550 	}
1551 
1552 	error = pn_get((char *)nm, UIO_SYSSPACE, &pn);
1553 	if (error == 0) {
1554 		error = VOP_LOOKUP(pvp, (char *)nm, vpp, &pn, flags, rootvp,
1555 		    cr, NULL, deflags, rpnp);
1556 		pn_free(&pn);
1557 	}
1558 
1559 	return (error);
1560 }
1561 
1562 /* ARGSUSED */
1563 static ino64_t
1564 xattrdir_do_ino(vnode_t *vp, int index)
1565 {
1566 	/*
1567 	 * We use index 0 for the directory fid.  Start
1568 	 * the file numbering at 1.
1569 	 */
1570 	return ((ino64_t)index+1);
1571 }
1572 
1573 void
1574 xattr_init(void)
1575 {
1576 	VERIFY(gfs_make_opsvec(xattr_opsvec) == 0);
1577 }
1578 
1579 /*
1580  * Get the XATTR dir for some file or directory.
1581  * See vnode.c: fop_lookup()
1582  *
1583  * Note this only gets the GFS XATTR directory.  We'll get the
1584  * real XATTR directory later, in xattr_dir_realdir.
1585  */
1586 int
1587 xattr_dir_lookup(vnode_t *dvp, vnode_t **vpp, int flags, cred_t *cr)
1588 {
1589 	int error = 0;
1590 
1591 	*vpp = NULL;
1592 
1593 	if (dvp->v_type != VDIR && dvp->v_type != VREG)
1594 		return (EINVAL);
1595 
1596 	mutex_enter(&dvp->v_lock);
1597 
1598 	/*
1599 	 * If we're already in sysattr space, don't allow creation
1600 	 * of another level of sysattrs.
1601 	 */
1602 	if (dvp->v_flag & V_SYSATTR) {
1603 		mutex_exit(&dvp->v_lock);
1604 		return (EINVAL);
1605 	}
1606 
1607 	if (dvp->v_xattrdir != NULL) {
1608 		*vpp = dvp->v_xattrdir;
1609 		VN_HOLD(*vpp);
1610 	} else {
1611 		ulong_t val;
1612 		int xattrs_allowed = dvp->v_vfsp->vfs_flag & VFS_XATTR;
1613 		int sysattrs_allowed = 1;
1614 
1615 		/*
1616 		 * We have to drop the lock on dvp.  gfs_dir_create will
1617 		 * grab it for a VN_HOLD.
1618 		 */
1619 		mutex_exit(&dvp->v_lock);
1620 
1621 		/*
1622 		 * If dvp allows xattr creation, but not sysattr
1623 		 * creation, return the real xattr dir vp. We can't
1624 		 * use the vfs feature mask here because _PC_SATTR_ENABLED
1625 		 * has vnode-level granularity (e.g. .zfs).
1626 		 */
1627 		error = VOP_PATHCONF(dvp, _PC_SATTR_ENABLED, &val, cr, NULL);
1628 		if (error != 0 || val == 0)
1629 			sysattrs_allowed = 0;
1630 
1631 		if (!xattrs_allowed && !sysattrs_allowed)
1632 			return (EINVAL);
1633 
1634 		if (!sysattrs_allowed) {
1635 			struct pathname pn;
1636 			char *nm = "";
1637 
1638 			error = pn_get(nm, UIO_SYSSPACE, &pn);
1639 			if (error)
1640 				return (error);
1641 			error = VOP_LOOKUP(dvp, nm, vpp, &pn,
1642 			    flags|LOOKUP_HAVE_SYSATTR_DIR, rootvp, cr, NULL,
1643 			    NULL, NULL);
1644 			pn_free(&pn);
1645 			return (error);
1646 		}
1647 
1648 		/*
1649 		 * Note that we act as if we were given CREATE_XATTR_DIR,
1650 		 * but only for creation of the GFS directory.
1651 		 */
1652 		*vpp = gfs_dir_create(
1653 		    sizeof (xattr_dir_t), dvp, xattr_dir_ops, xattr_dirents,
1654 		    xattrdir_do_ino, MAXNAMELEN, NULL, xattr_lookup_cb);
1655 		mutex_enter(&dvp->v_lock);
1656 		if (dvp->v_xattrdir != NULL) {
1657 			/*
1658 			 * We lost the race to create the xattr dir.
1659 			 * Destroy this one, use the winner.  We can't
1660 			 * just call VN_RELE(*vpp), because the vnode
1661 			 * is only partially initialized.
1662 			 */
1663 			gfs_dir_t *dp = (*vpp)->v_data;
1664 
1665 			ASSERT((*vpp)->v_count == 1);
1666 			vn_free(*vpp);
1667 			VN_RELE_LOCKED(dvp);
1668 
1669 			mutex_destroy(&dp->gfsd_lock);
1670 			kmem_free(dp->gfsd_static,
1671 			    dp->gfsd_nstatic * sizeof (gfs_dirent_t));
1672 			kmem_free(dp, dp->gfsd_file.gfs_size);
1673 
1674 			/* dvp was held by winner in gfs_dir_create */
1675 			*vpp = dvp->v_xattrdir;
1676 			VN_HOLD(*vpp);
1677 		} else {
1678 			/* winner */
1679 			(*vpp)->v_flag |= (V_XATTRDIR|V_SYSATTR);
1680 			dvp->v_xattrdir = *vpp;
1681 		}
1682 	}
1683 	mutex_exit(&dvp->v_lock);
1684 
1685 	return (error);
1686 }
1687 
1688 int
1689 xattr_dir_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp)
1690 {
1691 	int error;
1692 	vnode_t *pvp, *dvp;
1693 	xattr_fid_t *xfidp;
1694 	struct pathname pn;
1695 	char *nm;
1696 	uint16_t orig_len;
1697 
1698 	*vpp = NULL;
1699 
1700 	if (fidp->fid_len < XATTR_FIDSZ)
1701 		return (EINVAL);
1702 
1703 	xfidp = (xattr_fid_t *)fidp;
1704 	orig_len = fidp->fid_len;
1705 	fidp->fid_len = xfidp->parent_len;
1706 
1707 	error = VFS_VGET(vfsp, &pvp, fidp);
1708 	fidp->fid_len = orig_len;
1709 	if (error)
1710 		return (error);
1711 
1712 	/*
1713 	 * Start by getting the GFS sysattr directory.	We might need
1714 	 * to recreate it during the VOP_LOOKUP.
1715 	 */
1716 	nm = "";
1717 	error = pn_get(nm, UIO_SYSSPACE, &pn);
1718 	if (error) {
1719 		VN_RELE(pvp);
1720 		return (EINVAL);
1721 	}
1722 
1723 	error = VOP_LOOKUP(pvp, nm, &dvp, &pn, LOOKUP_XATTR|CREATE_XATTR_DIR,
1724 	    rootvp, CRED(), NULL, NULL, NULL);
1725 	pn_free(&pn);
1726 	VN_RELE(pvp);
1727 	if (error)
1728 		return (error);
1729 
1730 	if (xfidp->dir_offset == 0) {
1731 		/*
1732 		 * If we were looking for the directory, we're done.
1733 		 */
1734 		*vpp = dvp;
1735 		return (0);
1736 	}
1737 
1738 	if (xfidp->dir_offset > XATTRDIR_NENTS) {
1739 		VN_RELE(dvp);
1740 		return (EINVAL);
1741 	}
1742 
1743 	nm = xattr_dirents[xfidp->dir_offset - 1].gfse_name;
1744 
1745 	error = pn_get(nm, UIO_SYSSPACE, &pn);
1746 	if (error) {
1747 		VN_RELE(dvp);
1748 		return (EINVAL);
1749 	}
1750 
1751 	error = VOP_LOOKUP(dvp, nm, vpp, &pn, 0, rootvp, CRED(), NULL,
1752 	    NULL, NULL);
1753 
1754 	pn_free(&pn);
1755 	VN_RELE(dvp);
1756 
1757 	return (error);
1758 }
1759