xref: /titanic_52/usr/src/uts/common/os/klpd.c (revision 8f514e743bde41fe7e0ca48510a6d4c40ca51c23)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 #include <sys/atomic.h>
27 #include <sys/door.h>
28 #include <sys/proc.h>
29 #include <sys/cred_impl.h>
30 #include <sys/policy.h>
31 #include <sys/priv.h>
32 #include <sys/klpd.h>
33 #include <sys/errno.h>
34 #include <sys/kmem.h>
35 #include <sys/project.h>
36 #include <sys/systm.h>
37 #include <sys/sysmacros.h>
38 #include <sys/pathname.h>
39 #include <sys/varargs.h>
40 #include <sys/zone.h>
41 #include <netinet/in.h>
42 
43 #define	ROUNDUP(a, n) (((a) + ((n) - 1)) & ~((n) - 1))
44 
45 static kmutex_t klpd_mutex;
46 
47 typedef struct klpd_reg {
48 	struct klpd_reg *klpd_next;
49 	struct klpd_reg **klpd_refp;
50 	door_handle_t 	klpd_door;
51 	pid_t		klpd_door_pid;
52 	priv_set_t	klpd_pset;
53 	cred_t		*klpd_cred;
54 	int		klpd_indel;		/* Disabled */
55 	uint32_t	klpd_ref;
56 } klpd_reg_t;
57 
58 
59 /*
60  * This data structure hangs off the credential of a process; the
61  * credential is finalized and cannot be changed; but this structure
62  * can be changed when a new door server for the particular group
63  * needs to be registered.  It is refcounted and shared between
64  * processes with common ancestry.
65  *
66  * The reference count is atomically updated.
67  *
68  * But the registration probably needs to be updated under a lock.
69  */
70 typedef struct credklpd {
71 	kmutex_t	crkl_lock;
72 	klpd_reg_t	*crkl_reg;
73 	uint32_t	crkl_ref;
74 } credklpd_t;
75 
76 klpd_reg_t *klpd_list;
77 
78 static void klpd_unlink(klpd_reg_t *);
79 static int klpd_unreg_dh(door_handle_t);
80 
81 static credklpd_t *crklpd_alloc(void);
82 
83 void crklpd_setreg(credklpd_t *, klpd_reg_t *);
84 
85 extern size_t max_vnode_path;
86 
87 void
88 klpd_rele(klpd_reg_t *p)
89 {
90 	if (atomic_add_32_nv(&p->klpd_ref, -1) == 0) {
91 		if (p->klpd_refp != NULL)
92 			klpd_unlink(p);
93 		if (p->klpd_cred != NULL)
94 			crfree(p->klpd_cred);
95 		door_ki_rele(p->klpd_door);
96 		kmem_free(p, sizeof (*p));
97 	}
98 }
99 
100 /*
101  * In order to be able to walk the lists, we can't unlink the entry
102  * until the reference count drops to 0.  If we remove it too soon,
103  * list walkers will terminate when they happen to call a now orphaned
104  * entry.
105  */
106 static klpd_reg_t *
107 klpd_rele_next(klpd_reg_t *p)
108 {
109 	klpd_reg_t *r = p->klpd_next;
110 
111 	klpd_rele(p);
112 	return (r);
113 }
114 
115 
116 static void
117 klpd_hold(klpd_reg_t *p)
118 {
119 	atomic_add_32(&p->klpd_ref, 1);
120 }
121 
122 /*
123  * Remove registration from where it is registered.  Returns next in list.
124  */
125 static void
126 klpd_unlink(klpd_reg_t *p)
127 {
128 	ASSERT(p->klpd_refp == NULL || *p->klpd_refp == p);
129 
130 	if (p->klpd_refp != NULL)
131 		*p->klpd_refp = p->klpd_next;
132 
133 	if (p->klpd_next != NULL)
134 		p->klpd_next->klpd_refp = p->klpd_refp;
135 	p->klpd_refp = NULL;
136 }
137 
138 /*
139  * Remove all elements of the klpd list and decrement their refcnts.
140  * The lock guarding the list should be held; this function is
141  * called when we are sure we want to destroy the list completely
142  * list but not so sure that the reference counts of all elements have
143  * dropped back to 1.
144  */
145 void
146 klpd_freelist(klpd_reg_t **pp)
147 {
148 	klpd_reg_t *p;
149 
150 	while ((p = *pp) != NULL) {
151 		klpd_unlink(p);
152 		klpd_rele(p);
153 	}
154 }
155 
156 /*
157  * Link new entry in list.  The Boolean argument specifies whether this
158  * list can contain only a single item or multiple items.
159  * Returns the entry which needs to be released if single is B_TRUE.
160  */
161 static klpd_reg_t *
162 klpd_link(klpd_reg_t *p, klpd_reg_t **listp, boolean_t single)
163 {
164 	klpd_reg_t *old = *listp;
165 
166 	ASSERT(p->klpd_ref == 1);
167 
168 	ASSERT(old == NULL || *old->klpd_refp == old);
169 	p->klpd_refp = listp;
170 	p->klpd_next = single ? NULL : old;
171 	*listp = p;
172 	if (old != NULL) {
173 		if (single) {
174 			ASSERT(old->klpd_next == NULL);
175 			old->klpd_refp = NULL;
176 			return (old);
177 		} else
178 			old->klpd_refp = &p->klpd_next;
179 	}
180 	return (NULL);
181 }
182 
183 /*
184  * The typical call consists of:
185  *	- priv_set_t
186  *	- some integer data (type, value)
187  * for now, it's just one bit.
188  */
189 static klpd_head_t *
190 klpd_marshall(klpd_reg_t *p, const priv_set_t *rq, va_list ap)
191 {
192 	char	*tmp;
193 	uint_t	type;
194 	vnode_t *vp;
195 	size_t	len = sizeof (priv_set_t) + sizeof (klpd_head_t);
196 	size_t	plen, clen;
197 	int	proto;
198 
199 	klpd_arg_t *kap = NULL;
200 	klpd_head_t *khp;
201 
202 	type = va_arg(ap, uint_t);
203 	switch (type) {
204 	case KLPDARG_NOMORE:
205 		khp = kmem_zalloc(len, KM_SLEEP);
206 		khp->klh_argoff = 0;
207 		break;
208 	case KLPDARG_VNODE:
209 		len += offsetof(klpd_arg_t, kla_str);
210 		vp = va_arg(ap, vnode_t *);
211 		if (vp == NULL)
212 			return (NULL);
213 
214 		tmp = va_arg(ap, char *);
215 
216 		if (tmp != NULL && *tmp != '\0')
217 			clen = strlen(tmp) + 1;
218 		else
219 			clen = 0;
220 
221 		len += ROUNDUP(MAXPATHLEN, sizeof (uint_t));
222 		khp = kmem_zalloc(len, KM_SLEEP);
223 
224 		khp->klh_argoff = sizeof (klpd_head_t) + sizeof (priv_set_t);
225 		kap = KLH_ARG(khp);
226 
227 		if (vnodetopath(crgetzone(p->klpd_cred)->zone_rootvp,
228 		    vp, kap->kla_str, MAXPATHLEN, p->klpd_cred) != 0) {
229 			kmem_free(khp, len);
230 			return (NULL);
231 		}
232 		if (clen != 0) {
233 			plen = strlen(kap->kla_str);
234 			if (plen + clen + 1 >= MAXPATHLEN) {
235 				kmem_free(khp, len);
236 				return (NULL);
237 			}
238 			/* Don't make root into a double "/" */
239 			if (plen <= 2)
240 				plen = 0;
241 			kap->kla_str[plen] = '/';
242 			bcopy(tmp, &kap->kla_str[plen + 1], clen);
243 		}
244 		break;
245 	case KLPDARG_PORT:
246 		proto = va_arg(ap, int);
247 		switch (proto) {
248 		case IPPROTO_TCP:	type = KLPDARG_TCPPORT;
249 					break;
250 		case IPPROTO_UDP:	type = KLPDARG_UDPPORT;
251 					break;
252 		case IPPROTO_SCTP:	type = KLPDARG_SCTPPORT;
253 					break;
254 		case PROTO_SDP:		type = KLPDARG_SDPPORT;
255 					break;
256 		}
257 		/* FALLTHROUGH */
258 	case KLPDARG_INT:
259 	case KLPDARG_TCPPORT:
260 	case KLPDARG_UDPPORT:
261 	case KLPDARG_SCTPPORT:
262 	case KLPDARG_SDPPORT:
263 		len += sizeof (*kap);
264 		khp = kmem_zalloc(len, KM_SLEEP);
265 		khp->klh_argoff = sizeof (klpd_head_t) + sizeof (priv_set_t);
266 		kap = KLH_ARG(khp);
267 		kap->kla_int = va_arg(ap, int);
268 		break;
269 	default:
270 		return (NULL);
271 	}
272 	khp->klh_vers = KLPDCALL_VERS;
273 	khp->klh_len = len;
274 	khp->klh_privoff = sizeof (*khp);
275 	*KLH_PRIVSET(khp) = *rq;
276 	if (kap != NULL) {
277 		kap->kla_type = type;
278 		kap->kla_dlen = len - khp->klh_argoff;
279 	}
280 	return (khp);
281 }
282 
283 static int
284 klpd_do_call(klpd_reg_t *p, const priv_set_t *req, va_list ap)
285 {
286 	door_arg_t da;
287 	int res;
288 	int dres;
289 	klpd_head_t *klh;
290 
291 	if (p->klpd_door_pid == curproc->p_pid)
292 		return (-1);
293 
294 	klh = klpd_marshall(p, req, ap);
295 
296 	if (klh == NULL)
297 		return (-1);
298 
299 	da.data_ptr = (char *)klh;
300 	da.data_size = klh->klh_len;
301 	da.desc_ptr = NULL;
302 	da.desc_num = 0;
303 	da.rbuf = (char *)&res;
304 	da.rsize = sizeof (res);
305 
306 	while ((dres = door_ki_upcall_limited(p->klpd_door, &da, NULL,
307 	    SIZE_MAX, 0)) != 0) {
308 		switch (dres) {
309 		case EAGAIN:
310 			delay(1);
311 			continue;
312 		case EINVAL:
313 		case EBADF:
314 			/* Bad door, don't call it again. */
315 			(void) klpd_unreg_dh(p->klpd_door);
316 			/* FALLTHROUGH */
317 		case EINTR:
318 			/* Pending signal, nothing we can do. */
319 			/* FALLTHROUGH */
320 		default:
321 			kmem_free(klh, klh->klh_len);
322 			return (-1);
323 		}
324 	}
325 	kmem_free(klh, klh->klh_len);
326 	/* Bogus return value, must be a failure */
327 	if (da.rbuf != (char *)&res) {
328 		kmem_free(da.rbuf, da.rsize);
329 		return (-1);
330 	}
331 	return (res);
332 }
333 
334 uint32_t klpd_bad_locks;
335 
336 int
337 klpd_call(const cred_t *cr, const priv_set_t *req, va_list ap)
338 {
339 	klpd_reg_t *p;
340 	int rv = -1;
341 	credklpd_t *ckp;
342 	zone_t *ckzone;
343 
344 	/*
345 	 * These locks must not be held when this code is called;
346 	 * callbacks to userland with these locks held will result
347 	 * in issues.  That said, the code at the call sides was
348 	 * restructured not to call with any of the locks held and
349 	 * no policies operate by default on most processes.
350 	 */
351 	if (mutex_owned(&pidlock) || mutex_owned(&curproc->p_lock) ||
352 	    mutex_owned(&curproc->p_crlock)) {
353 		atomic_add_32(&klpd_bad_locks, 1);
354 		return (-1);
355 	}
356 
357 	/*
358 	 * Enforce the limit set for the call process (still).
359 	 */
360 	if (!priv_issubset(req, &CR_LPRIV(cr)))
361 		return (-1);
362 
363 	/* Try 1: get the credential specific klpd */
364 	if ((ckp = crgetcrklpd(cr)) != NULL) {
365 		mutex_enter(&ckp->crkl_lock);
366 		if ((p = ckp->crkl_reg) != NULL &&
367 		    p->klpd_indel == 0 &&
368 		    priv_issubset(req, &p->klpd_pset)) {
369 			klpd_hold(p);
370 			mutex_exit(&ckp->crkl_lock);
371 			rv = klpd_do_call(p, req, ap);
372 			mutex_enter(&ckp->crkl_lock);
373 			klpd_rele(p);
374 			mutex_exit(&ckp->crkl_lock);
375 			if (rv != -1)
376 				return (rv == 0 ? 0 : -1);
377 		} else {
378 			mutex_exit(&ckp->crkl_lock);
379 		}
380 	}
381 
382 	/* Try 2: get the project specific klpd */
383 	mutex_enter(&klpd_mutex);
384 
385 	if ((p = curproj->kpj_klpd) != NULL) {
386 		klpd_hold(p);
387 		mutex_exit(&klpd_mutex);
388 		if (p->klpd_indel == 0 &&
389 		    priv_issubset(req, &p->klpd_pset)) {
390 			rv = klpd_do_call(p, req, ap);
391 		}
392 		mutex_enter(&klpd_mutex);
393 		klpd_rele(p);
394 		mutex_exit(&klpd_mutex);
395 
396 		if (rv != -1)
397 			return (rv == 0 ? 0 : -1);
398 	} else {
399 		mutex_exit(&klpd_mutex);
400 	}
401 
402 	/* Try 3: get the global klpd list */
403 	ckzone = crgetzone(cr);
404 	mutex_enter(&klpd_mutex);
405 
406 	for (p = klpd_list; p != NULL; ) {
407 		zone_t *kkzone = crgetzone(p->klpd_cred);
408 		if ((kkzone == &zone0 || kkzone == ckzone) &&
409 		    p->klpd_indel == 0 &&
410 		    priv_issubset(req, &p->klpd_pset)) {
411 			klpd_hold(p);
412 			mutex_exit(&klpd_mutex);
413 			rv = klpd_do_call(p, req, ap);
414 			mutex_enter(&klpd_mutex);
415 
416 			p = klpd_rele_next(p);
417 
418 			if (rv != -1)
419 				break;
420 		} else {
421 			p = p->klpd_next;
422 		}
423 	}
424 	mutex_exit(&klpd_mutex);
425 	return (rv == 0 ? 0 : -1);
426 }
427 
428 /*
429  * Register the klpd.
430  * If the pid_t passed in is positive, update the registration for
431  * the specific process; that is only possible if the process already
432  * has a registration on it.  This change of registration will affect
433  * all processes which share common ancestry.
434  *
435  * MY_PID (pid 0) can be used to create or change the context for
436  * the current process, typically done after fork().
437  *
438  * A negative value can be used to register a klpd globally.
439  *
440  * The per-credential klpd needs to be cleaned up when entering
441  * a zone or unsetting the flag.
442  */
443 int
444 klpd_reg(int did, idtype_t type, id_t id, priv_set_t *psetbuf)
445 {
446 	cred_t *cr = CRED();
447 	door_handle_t dh;
448 	klpd_reg_t *kpd;
449 	priv_set_t pset;
450 	door_info_t di;
451 	credklpd_t *ckp = NULL;
452 	pid_t pid = -1;
453 	projid_t proj = -1;
454 	kproject_t *kpp = NULL;
455 
456 	if (CR_FLAGS(cr) & PRIV_XPOLICY)
457 		return (set_errno(EINVAL));
458 
459 	if (copyin(psetbuf, &pset, sizeof (priv_set_t)))
460 		return (set_errno(EFAULT));
461 
462 	if (!priv_issubset(&pset, &CR_OEPRIV(cr)))
463 		return (set_errno(EPERM));
464 
465 	switch (type) {
466 	case P_PID:
467 		pid = (pid_t)id;
468 		if (pid == P_MYPID)
469 			pid = curproc->p_pid;
470 		if (pid == curproc->p_pid)
471 			ckp = crklpd_alloc();
472 		break;
473 	case P_PROJID:
474 		proj = (projid_t)id;
475 		kpp = project_hold_by_id(proj, crgetzone(cr),
476 		    PROJECT_HOLD_FIND);
477 		if (kpp == NULL)
478 			return (set_errno(ESRCH));
479 		break;
480 	default:
481 		return (set_errno(ENOTSUP));
482 	}
483 
484 
485 	/*
486 	 * Verify the door passed in; it must be a door and we won't
487 	 * allow processes to be called on their own behalf.
488 	 */
489 	dh = door_ki_lookup(did);
490 	if (dh == NULL || door_ki_info(dh, &di) != 0) {
491 		if (ckp != NULL)
492 			crklpd_rele(ckp);
493 		if (kpp != NULL)
494 			project_rele(kpp);
495 		return (set_errno(EBADF));
496 	}
497 	if (type == P_PID && pid == di.di_target) {
498 		if (ckp != NULL)
499 			crklpd_rele(ckp);
500 		ASSERT(kpp == NULL);
501 		return (set_errno(EINVAL));
502 	}
503 
504 	kpd = kmem_zalloc(sizeof (*kpd), KM_SLEEP);
505 	crhold(kpd->klpd_cred = cr);
506 	kpd->klpd_door = dh;
507 	kpd->klpd_door_pid = di.di_target;
508 	kpd->klpd_ref = 1;
509 	kpd->klpd_pset = pset;
510 
511 	if (kpp != NULL) {
512 		mutex_enter(&klpd_mutex);
513 		kpd = klpd_link(kpd, &kpp->kpj_klpd, B_TRUE);
514 		mutex_exit(&klpd_mutex);
515 		if (kpd != NULL)
516 			klpd_rele(kpd);
517 		project_rele(kpp);
518 	} else if ((int)pid < 0) {
519 		/* Global daemon */
520 		mutex_enter(&klpd_mutex);
521 		(void) klpd_link(kpd, &klpd_list, B_FALSE);
522 		mutex_exit(&klpd_mutex);
523 	} else if (pid == curproc->p_pid) {
524 		proc_t *p = curproc;
525 		cred_t *newcr = cralloc();
526 
527 		/* No need to lock, sole reference to ckp */
528 		kpd = klpd_link(kpd, &ckp->crkl_reg, B_TRUE);
529 
530 		if (kpd != NULL)
531 			klpd_rele(kpd);
532 
533 		mutex_enter(&p->p_crlock);
534 		cr = p->p_cred;
535 		crdup_to(cr, newcr);
536 		crsetcrklpd(newcr, ckp);
537 		p->p_cred = newcr;	/* Already held for p_cred */
538 
539 		crhold(newcr);		/* Hold once for the current thread */
540 		mutex_exit(&p->p_crlock);
541 		crfree(cr);		/* One for the p_cred */
542 		crset(p, newcr);
543 	} else {
544 		proc_t *p;
545 		cred_t *pcr;
546 		mutex_enter(&pidlock);
547 		p = prfind(pid);
548 		if (p == NULL || !prochasprocperm(p, curproc, CRED())) {
549 			mutex_exit(&pidlock);
550 			klpd_rele(kpd);
551 			return (set_errno(p == NULL ? ESRCH : EPERM));
552 		}
553 		mutex_enter(&p->p_crlock);
554 		crhold(pcr = p->p_cred);
555 		mutex_exit(&pidlock);
556 		mutex_exit(&p->p_crlock);
557 		/*
558 		 * We're going to update the credential's ckp in place;
559 		 * this requires that it exists.
560 		 */
561 		ckp = crgetcrklpd(pcr);
562 		if (ckp == NULL) {
563 			crfree(pcr);
564 			klpd_rele(kpd);
565 			return (set_errno(EINVAL));
566 		}
567 		crklpd_setreg(ckp, kpd);
568 		crfree(pcr);
569 	}
570 
571 	return (0);
572 }
573 
574 static int
575 klpd_unreg_dh(door_handle_t dh)
576 {
577 	klpd_reg_t *p;
578 
579 	mutex_enter(&klpd_mutex);
580 	for (p = klpd_list; p != NULL; p = p->klpd_next) {
581 		if (p->klpd_door == dh)
582 			break;
583 	}
584 	if (p == NULL) {
585 		mutex_exit(&klpd_mutex);
586 		return (EINVAL);
587 	}
588 	if (p->klpd_indel != 0) {
589 		mutex_exit(&klpd_mutex);
590 		return (EAGAIN);
591 	}
592 	p->klpd_indel = 1;
593 	klpd_rele(p);
594 	mutex_exit(&klpd_mutex);
595 	return (0);
596 }
597 
598 int
599 klpd_unreg(int did, idtype_t type, id_t id)
600 {
601 	door_handle_t dh;
602 	int res = 0;
603 	proc_t *p;
604 	pid_t pid;
605 	projid_t proj;
606 	kproject_t *kpp = NULL;
607 	credklpd_t *ckp;
608 
609 	switch (type) {
610 	case P_PID:
611 		pid = (pid_t)id;
612 		break;
613 	case P_PROJID:
614 		proj = (projid_t)id;
615 		kpp = project_hold_by_id(proj, crgetzone(CRED()),
616 		    PROJECT_HOLD_FIND);
617 		if (kpp == NULL)
618 			return (set_errno(ESRCH));
619 		break;
620 	default:
621 		return (set_errno(ENOTSUP));
622 	}
623 
624 	dh = door_ki_lookup(did);
625 	if (dh == NULL) {
626 		if (kpp != NULL)
627 			project_rele(kpp);
628 		return (set_errno(EINVAL));
629 	}
630 
631 	if (kpp != NULL) {
632 		mutex_enter(&klpd_mutex);
633 		if (kpp->kpj_klpd == NULL)
634 			res = ESRCH;
635 		else
636 			klpd_freelist(&kpp->kpj_klpd);
637 		mutex_exit(&klpd_mutex);
638 		project_rele(kpp);
639 		goto out;
640 	} else if ((int)pid > 0) {
641 		mutex_enter(&pidlock);
642 		p = prfind(pid);
643 		if (p == NULL) {
644 			mutex_exit(&pidlock);
645 			door_ki_rele(dh);
646 			return (set_errno(ESRCH));
647 		}
648 		mutex_enter(&p->p_crlock);
649 		mutex_exit(&pidlock);
650 	} else if (pid == 0) {
651 		p = curproc;
652 		mutex_enter(&p->p_crlock);
653 	} else {
654 		res = klpd_unreg_dh(dh);
655 		goto out;
656 	}
657 
658 	ckp = crgetcrklpd(p->p_cred);
659 	if (ckp != NULL) {
660 		crklpd_setreg(ckp, NULL);
661 	} else {
662 		res = ESRCH;
663 	}
664 	mutex_exit(&p->p_crlock);
665 
666 out:
667 	door_ki_rele(dh);
668 
669 	if (res != 0)
670 		return (set_errno(res));
671 	return (0);
672 }
673 
674 void
675 crklpd_hold(credklpd_t *crkpd)
676 {
677 	atomic_add_32(&crkpd->crkl_ref, 1);
678 }
679 
680 void
681 crklpd_rele(credklpd_t *crkpd)
682 {
683 	if (atomic_add_32_nv(&crkpd->crkl_ref, -1) == 0) {
684 		if (crkpd->crkl_reg != NULL)
685 			klpd_rele(crkpd->crkl_reg);
686 		mutex_destroy(&crkpd->crkl_lock);
687 		kmem_free(crkpd, sizeof (*crkpd));
688 	}
689 }
690 
691 static credklpd_t *
692 crklpd_alloc(void)
693 {
694 	credklpd_t *res = kmem_alloc(sizeof (*res), KM_SLEEP);
695 
696 	mutex_init(&res->crkl_lock, NULL, MUTEX_DEFAULT, NULL);
697 	res->crkl_ref = 1;
698 	res->crkl_reg = NULL;
699 
700 	return (res);
701 }
702 
703 void
704 crklpd_setreg(credklpd_t *crk, klpd_reg_t *new)
705 {
706 	klpd_reg_t *old;
707 
708 	mutex_enter(&crk->crkl_lock);
709 	if (new == NULL) {
710 		old = crk->crkl_reg;
711 		if (old != NULL)
712 			klpd_unlink(old);
713 	} else {
714 		old = klpd_link(new, &crk->crkl_reg, B_TRUE);
715 	}
716 	mutex_exit(&crk->crkl_lock);
717 
718 	if (old != NULL)
719 		klpd_rele(old);
720 }
721 
722 /* Allocate and register the pfexec specific callback */
723 int
724 pfexec_reg(int did)
725 {
726 	door_handle_t dh;
727 	int err = secpolicy_pfexec_register(CRED());
728 	klpd_reg_t *pfx;
729 	door_info_t di;
730 	zone_t *myzone = crgetzone(CRED());
731 
732 	if (err != 0)
733 		return (set_errno(err));
734 
735 	dh = door_ki_lookup(did);
736 	if (dh == NULL || door_ki_info(dh, &di) != 0)
737 		return (set_errno(EBADF));
738 
739 	pfx = kmem_zalloc(sizeof (*pfx), KM_SLEEP);
740 
741 	pfx->klpd_door = dh;
742 	pfx->klpd_door_pid = di.di_target;
743 	pfx->klpd_ref = 1;
744 	pfx->klpd_cred = NULL;
745 	mutex_enter(&myzone->zone_lock);
746 	pfx = klpd_link(pfx, &myzone->zone_pfexecd, B_TRUE);
747 	mutex_exit(&myzone->zone_lock);
748 	if (pfx != NULL)
749 		klpd_rele(pfx);
750 
751 	return (0);
752 }
753 
754 int
755 pfexec_unreg(int did)
756 {
757 	door_handle_t dh;
758 	int err = 0;
759 	zone_t *myzone = crgetzone(CRED());
760 	klpd_reg_t *pfd;
761 
762 	dh = door_ki_lookup(did);
763 	if (dh == NULL)
764 		return (set_errno(EBADF));
765 
766 	mutex_enter(&myzone->zone_lock);
767 	pfd = myzone->zone_pfexecd;
768 	if (pfd != NULL && pfd->klpd_door == dh) {
769 		klpd_unlink(pfd);
770 	} else {
771 		pfd = NULL;
772 		err = EINVAL;
773 	}
774 	mutex_exit(&myzone->zone_lock);
775 	door_ki_rele(dh);
776 	/*
777 	 * crfree() cannot be called with zone_lock held; it is called
778 	 * indirectly through closing the door handle
779 	 */
780 	if (pfd != NULL)
781 		klpd_rele(pfd);
782 	if (err != 0)
783 		return (set_errno(err));
784 	return (0);
785 }
786 
787 static int
788 get_path(char *buf, const char *path, int len)
789 {
790 	size_t lc;
791 	char *s;
792 
793 	if (len < 0)
794 		len = strlen(path);
795 
796 	if (*path == '/' && len < MAXPATHLEN) {
797 		(void) strcpy(buf, path);
798 		return (0);
799 	}
800 	/*
801 	 * Build the pathname using the current directory + resolve pathname.
802 	 * The resolve pathname either starts with a normal component and
803 	 * we can just concatenate them or it starts with one
804 	 * or more ".." component and we can remove those; the
805 	 * last one cannot be a ".." and the current directory has
806 	 * more components than the number of ".." in the resolved pathname.
807 	 */
808 	if (dogetcwd(buf, MAXPATHLEN) != 0)
809 		return (-1);
810 
811 	lc = strlen(buf);
812 
813 	while (len > 3 && strncmp("../", path, 3) == 0) {
814 		len -= 3;
815 		path += 3;
816 
817 		s = strrchr(buf, '/');
818 		if (s == NULL || s == buf)
819 			return (-1);
820 
821 		*s = '\0';
822 		lc = s - buf;
823 	}
824 	/* Add a "/" and a NUL */
825 	if (lc < 2 || lc + len + 2 >= MAXPATHLEN)
826 		return (-1);
827 
828 	buf[lc] = '/';
829 	(void) strcpy(buf + lc + 1, path);
830 
831 	return (0);
832 }
833 
834 /*
835  * Perform the pfexec upcall.
836  *
837  * The pfexec upcall is different from the klpd_upcall in that a failure
838  * will lead to a denial of execution.
839  */
840 int
841 pfexec_call(const cred_t *cr, struct pathname *rpnp, cred_t **pfcr,
842     boolean_t *scrub)
843 {
844 	klpd_reg_t *pfd;
845 	pfexec_arg_t *pap;
846 	pfexec_reply_t pr, *prp;
847 	door_arg_t da;
848 	int dres;
849 	cred_t *ncr = NULL;
850 	int err = -1;
851 	priv_set_t *iset;
852 	priv_set_t *lset;
853 	zone_t *myzone = crgetzone(CRED());
854 	size_t pasize = PFEXEC_ARG_SIZE(MAXPATHLEN);
855 
856 	/* Find registration */
857 	mutex_enter(&myzone->zone_lock);
858 	if ((pfd = myzone->zone_pfexecd) != NULL)
859 		klpd_hold(pfd);
860 	mutex_exit(&myzone->zone_lock);
861 
862 	if (pfd == NULL)
863 		return (0);
864 
865 	if (pfd->klpd_door_pid == curproc->p_pid) {
866 		klpd_rele(pfd);
867 		return (0);
868 	}
869 
870 	pap = kmem_zalloc(pasize, KM_SLEEP);
871 
872 	if (get_path(pap->pfa_path, rpnp->pn_path, rpnp->pn_pathlen) == -1)
873 		goto out1;
874 
875 	pap->pfa_vers = PFEXEC_ARG_VERS;
876 	pap->pfa_call = PFEXEC_EXEC_ATTRS;
877 	pap->pfa_len = pasize;
878 	pap->pfa_uid = crgetruid(cr);
879 
880 	da.data_ptr = (char *)pap;
881 	da.data_size = pap->pfa_len;
882 	da.desc_ptr = NULL;
883 	da.desc_num = 0;
884 	da.rbuf = (char *)&pr;
885 	da.rsize = sizeof (pr);
886 
887 	while ((dres = door_ki_upcall(pfd->klpd_door, &da)) != 0) {
888 		switch (dres) {
889 		case EAGAIN:
890 			delay(1);
891 			continue;
892 		case EINVAL:
893 		case EBADF:
894 			/* FALLTHROUGH */
895 		case EINTR:
896 			/* FALLTHROUGH */
897 		default:
898 			goto out;
899 		}
900 	}
901 
902 	prp = (pfexec_reply_t *)da.rbuf;
903 	/*
904 	 * Check the size of the result and the alignment of the
905 	 * privilege sets.
906 	 */
907 	if (da.rsize < sizeof (pr) ||
908 	    prp->pfr_ioff > da.rsize - sizeof (priv_set_t) ||
909 	    prp->pfr_loff > da.rsize - sizeof (priv_set_t) ||
910 	    (prp->pfr_loff & (sizeof (priv_chunk_t) - 1)) != 0 ||
911 	    (prp->pfr_loff & (sizeof (priv_chunk_t) - 1)) != 0)
912 		goto out;
913 
914 	/*
915 	 * Get results:
916 	 *	allow/allow with additional credentials/disallow[*]
917 	 *
918 	 *	euid, uid, egid, gid, privs, and limitprivs
919 	 * We now have somewhat more flexibility we could even set E and P
920 	 * judiciously but that would break some currently valid assumptions
921 	 *	[*] Disallow is not readily supported by always including
922 	 *	the Basic Solaris User profile in all user's profiles.
923 	 */
924 
925 	if (!prp->pfr_allowed) {
926 		err = EACCES;
927 		goto out;
928 	}
929 	if (!prp->pfr_setcred) {
930 		err = 0;
931 		goto out;
932 	}
933 	ncr = crdup((cred_t *)cr);
934 
935 	/*
936 	 * Generate the new credential set scrubenv if ruid != euid (or set)
937 	 * the "I'm set-uid flag" but that is not inherited so scrubbing
938 	 * the environment is a requirement.
939 	 */
940 	/* Set uids or gids, note that -1 will do the right thing */
941 	if (crsetresuid(ncr, prp->pfr_ruid, prp->pfr_euid, prp->pfr_euid) != 0)
942 		goto out;
943 	if (crsetresgid(ncr, prp->pfr_rgid, prp->pfr_egid, prp->pfr_egid) != 0)
944 		goto out;
945 
946 	*scrub = prp->pfr_scrubenv;
947 
948 	if (prp->pfr_clearflag)
949 		CR_FLAGS(ncr) &= ~PRIV_PFEXEC;
950 
951 	/* We cannot exceed our Limit set, no matter what */
952 	iset = PFEXEC_REPLY_IPRIV(prp);
953 
954 	if (iset != NULL) {
955 		if (!priv_issubset(iset, &CR_LPRIV(ncr)))
956 			goto out;
957 		priv_union(iset, &CR_IPRIV(ncr));
958 	}
959 
960 	/* Nor can we increate our Limit set itself */
961 	lset = PFEXEC_REPLY_LPRIV(prp);
962 
963 	if (lset != NULL) {
964 		if (!priv_issubset(lset, &CR_LPRIV(ncr)))
965 			goto out;
966 		CR_LPRIV(ncr) = *lset;
967 	}
968 
969 	/* Exec will do the standard set operations */
970 
971 	err = 0;
972 out:
973 	if (da.rbuf != (char *)&pr)
974 		kmem_free(da.rbuf, da.rsize);
975 out1:
976 	kmem_free(pap, pasize);
977 	klpd_rele(pfd);
978 	if (ncr != NULL) {
979 		if (err == 0)
980 			*pfcr = ncr;
981 		else
982 			crfree(ncr);
983 	}
984 	return (err);
985 }
986 
987 int
988 get_forced_privs(const cred_t *cr, const char *respn, priv_set_t *set)
989 {
990 	klpd_reg_t *pfd;
991 	pfexec_arg_t *pap;
992 	door_arg_t da;
993 	int dres;
994 	int err = -1;
995 	priv_set_t *fset, pmem;
996 	cred_t *zkcr;
997 	zone_t *myzone = crgetzone(cr);
998 	size_t pasize = PFEXEC_ARG_SIZE(MAXPATHLEN);
999 
1000 	mutex_enter(&myzone->zone_lock);
1001 	if ((pfd = myzone->zone_pfexecd) != NULL)
1002 		klpd_hold(pfd);
1003 	mutex_exit(&myzone->zone_lock);
1004 
1005 	if (pfd == NULL)
1006 		return (-1);
1007 
1008 	if (pfd->klpd_door_pid == curproc->p_pid) {
1009 		klpd_rele(pfd);
1010 		return (0);
1011 	}
1012 
1013 	pap = kmem_zalloc(pasize, KM_SLEEP);
1014 
1015 	if (get_path(pap->pfa_path, respn, -1) == -1)
1016 		goto out1;
1017 
1018 	pap->pfa_vers = PFEXEC_ARG_VERS;
1019 	pap->pfa_call = PFEXEC_FORCED_PRIVS;
1020 	pap->pfa_len = pasize;
1021 	pap->pfa_uid = (uid_t)-1;			/* Not relevant */
1022 
1023 	da.data_ptr = (char *)pap;
1024 	da.data_size = pap->pfa_len;
1025 	da.desc_ptr = NULL;
1026 	da.desc_num = 0;
1027 	da.rbuf = (char *)&pmem;
1028 	da.rsize = sizeof (pmem);
1029 
1030 	while ((dres = door_ki_upcall(pfd->klpd_door, &da)) != 0) {
1031 		switch (dres) {
1032 		case EAGAIN:
1033 			delay(1);
1034 			continue;
1035 		case EINVAL:
1036 		case EBADF:
1037 		case EINTR:
1038 		default:
1039 			goto out;
1040 		}
1041 	}
1042 
1043 	/*
1044 	 * Check the size of the result, it's a privilege set.
1045 	 */
1046 	if (da.rsize != sizeof (priv_set_t))
1047 		goto out;
1048 
1049 	fset = (priv_set_t *)da.rbuf;
1050 
1051 	/*
1052 	 * We restrict the forced privileges with whatever is available in
1053 	 * the current zone.
1054 	 */
1055 	zkcr = zone_kcred();
1056 	priv_intersect(&CR_LPRIV(zkcr), fset);
1057 
1058 	/*
1059 	 * But we fail if the forced privileges are not found in the current
1060 	 * Limit set.
1061 	 */
1062 	if (!priv_issubset(fset, &CR_LPRIV(cr))) {
1063 		err = EACCES;
1064 	} else if (!priv_isemptyset(fset)) {
1065 		err = 0;
1066 		*set = *fset;
1067 	}
1068 out:
1069 	if (da.rbuf != (char *)&pmem)
1070 		kmem_free(da.rbuf, da.rsize);
1071 out1:
1072 	kmem_free(pap, pasize);
1073 	klpd_rele(pfd);
1074 	return (err);
1075 }
1076 
1077 int
1078 check_user_privs(const cred_t *cr, const priv_set_t *set)
1079 {
1080 	klpd_reg_t *pfd;
1081 	pfexec_arg_t *pap;
1082 	door_arg_t da;
1083 	int dres;
1084 	int err = -1;
1085 	zone_t *myzone = crgetzone(cr);
1086 	size_t pasize = PFEXEC_ARG_SIZE(sizeof (priv_set_t));
1087 	uint32_t res;
1088 
1089 	mutex_enter(&myzone->zone_lock);
1090 	if ((pfd = myzone->zone_pfexecd) != NULL)
1091 		klpd_hold(pfd);
1092 	mutex_exit(&myzone->zone_lock);
1093 
1094 	if (pfd == NULL)
1095 		return (-1);
1096 
1097 	if (pfd->klpd_door_pid == curproc->p_pid) {
1098 		klpd_rele(pfd);
1099 		return (0);
1100 	}
1101 
1102 	pap = kmem_zalloc(pasize, KM_SLEEP);
1103 
1104 	*(priv_set_t *)&pap->pfa_buf = *set;
1105 
1106 	pap->pfa_vers = PFEXEC_ARG_VERS;
1107 	pap->pfa_call = PFEXEC_USER_PRIVS;
1108 	pap->pfa_len = pasize;
1109 	pap->pfa_uid = crgetruid(cr);
1110 
1111 	da.data_ptr = (char *)pap;
1112 	da.data_size = pap->pfa_len;
1113 	da.desc_ptr = NULL;
1114 	da.desc_num = 0;
1115 	da.rbuf = (char *)&res;
1116 	da.rsize = sizeof (res);
1117 
1118 	while ((dres = door_ki_upcall(pfd->klpd_door, &da)) != 0) {
1119 		switch (dres) {
1120 		case EAGAIN:
1121 			delay(1);
1122 			continue;
1123 		case EINVAL:
1124 		case EBADF:
1125 		case EINTR:
1126 		default:
1127 			goto out;
1128 		}
1129 	}
1130 
1131 	/*
1132 	 * Check the size of the result.
1133 	 */
1134 	if (da.rsize != sizeof (res))
1135 		goto out;
1136 
1137 	if (*(uint32_t *)da.rbuf == 1)
1138 		err = 0;
1139 out:
1140 	if (da.rbuf != (char *)&res)
1141 		kmem_free(da.rbuf, da.rsize);
1142 out1:
1143 	kmem_free(pap, pasize);
1144 	klpd_rele(pfd);
1145 	return (err);
1146 }
1147