xref: /illumos-gate/usr/src/uts/common/os/klpd.c (revision 0245b61fd282e95735b173b8d95be0d6688163b4)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright 2015, Joyent, Inc.
25  */
26 
27 #include <sys/atomic.h>
28 #include <sys/door.h>
29 #include <sys/proc.h>
30 #include <sys/cred_impl.h>
31 #include <sys/policy.h>
32 #include <sys/priv.h>
33 #include <sys/klpd.h>
34 #include <sys/errno.h>
35 #include <sys/kmem.h>
36 #include <sys/project.h>
37 #include <sys/systm.h>
38 #include <sys/sysmacros.h>
39 #include <sys/pathname.h>
40 #include <sys/varargs.h>
41 #include <sys/zone.h>
42 #include <sys/cmn_err.h>
43 #include <sys/sdt.h>
44 #include <netinet/in.h>
45 
46 #define	ROUNDUP(a, n) (((a) + ((n) - 1)) & ~((n) - 1))
47 
48 static kmutex_t klpd_mutex;
49 
50 typedef struct klpd_reg {
51 	struct klpd_reg *klpd_next;
52 	struct klpd_reg **klpd_refp;
53 	door_handle_t 	klpd_door;
54 	pid_t		klpd_door_pid;
55 	priv_set_t	klpd_pset;
56 	cred_t		*klpd_cred;
57 	int		klpd_indel;		/* Disabled */
58 	uint32_t	klpd_ref;
59 } klpd_reg_t;
60 
61 
62 /*
63  * This data structure hangs off the credential of a process; the
64  * credential is finalized and cannot be changed; but this structure
65  * can be changed when a new door server for the particular group
66  * needs to be registered.  It is refcounted and shared between
67  * processes with common ancestry.
68  *
69  * The reference count is atomically updated.
70  *
71  * But the registration probably needs to be updated under a lock.
72  */
73 typedef struct credklpd {
74 	kmutex_t	crkl_lock;
75 	klpd_reg_t	*crkl_reg;
76 	uint32_t	crkl_ref;
77 } credklpd_t;
78 
79 klpd_reg_t *klpd_list;
80 
81 static void klpd_unlink(klpd_reg_t *);
82 static int klpd_unreg_dh(door_handle_t);
83 
84 static credklpd_t *crklpd_alloc(void);
85 
86 void crklpd_setreg(credklpd_t *, klpd_reg_t *);
87 
88 extern size_t max_vnode_path;
89 
90 void
91 klpd_rele(klpd_reg_t *p)
92 {
93 	if (atomic_dec_32_nv(&p->klpd_ref) == 0) {
94 		if (p->klpd_refp != NULL)
95 			klpd_unlink(p);
96 		if (p->klpd_cred != NULL)
97 			crfree(p->klpd_cred);
98 		door_ki_rele(p->klpd_door);
99 		kmem_free(p, sizeof (*p));
100 	}
101 }
102 
103 /*
104  * In order to be able to walk the lists, we can't unlink the entry
105  * until the reference count drops to 0.  If we remove it too soon,
106  * list walkers will terminate when they happen to call a now orphaned
107  * entry.
108  */
109 static klpd_reg_t *
110 klpd_rele_next(klpd_reg_t *p)
111 {
112 	klpd_reg_t *r = p->klpd_next;
113 
114 	klpd_rele(p);
115 	return (r);
116 }
117 
118 
119 static void
120 klpd_hold(klpd_reg_t *p)
121 {
122 	atomic_inc_32(&p->klpd_ref);
123 }
124 
125 /*
126  * Remove registration from where it is registered.  Returns next in list.
127  */
128 static void
129 klpd_unlink(klpd_reg_t *p)
130 {
131 	ASSERT(p->klpd_refp == NULL || *p->klpd_refp == p);
132 
133 	if (p->klpd_refp != NULL)
134 		*p->klpd_refp = p->klpd_next;
135 
136 	if (p->klpd_next != NULL)
137 		p->klpd_next->klpd_refp = p->klpd_refp;
138 	p->klpd_refp = NULL;
139 }
140 
141 /*
142  * Remove all elements of the klpd list and decrement their refcnts.
143  * The lock guarding the list should be held; this function is
144  * called when we are sure we want to destroy the list completely
145  * list but not so sure that the reference counts of all elements have
146  * dropped back to 1.
147  */
148 void
149 klpd_freelist(klpd_reg_t **pp)
150 {
151 	klpd_reg_t *p;
152 
153 	while ((p = *pp) != NULL) {
154 		klpd_unlink(p);
155 		klpd_rele(p);
156 	}
157 }
158 
159 /*
160  * Link new entry in list.  The Boolean argument specifies whether this
161  * list can contain only a single item or multiple items.
162  * Returns the entry which needs to be released if single is B_TRUE.
163  */
164 static klpd_reg_t *
165 klpd_link(klpd_reg_t *p, klpd_reg_t **listp, boolean_t single)
166 {
167 	klpd_reg_t *old = *listp;
168 
169 	ASSERT(p->klpd_ref == 1);
170 
171 	ASSERT(old == NULL || *old->klpd_refp == old);
172 	p->klpd_refp = listp;
173 	p->klpd_next = single ? NULL : old;
174 	*listp = p;
175 	if (old != NULL) {
176 		if (single) {
177 			ASSERT(old->klpd_next == NULL);
178 			old->klpd_refp = NULL;
179 			return (old);
180 		} else
181 			old->klpd_refp = &p->klpd_next;
182 	}
183 	return (NULL);
184 }
185 
186 /*
187  * The typical call consists of:
188  *	- priv_set_t
189  *	- some integer data (type, value)
190  * for now, it's just one bit.
191  */
192 static klpd_head_t *
193 klpd_marshall(klpd_reg_t *p, const priv_set_t *rq, va_list ap)
194 {
195 	char	*tmp;
196 	uint_t	type;
197 	vnode_t *vp;
198 	size_t	len = sizeof (priv_set_t) + sizeof (klpd_head_t);
199 	size_t	plen, clen;
200 	int	proto;
201 
202 	klpd_arg_t *kap = NULL;
203 	klpd_head_t *khp;
204 
205 	type = va_arg(ap, uint_t);
206 	switch (type) {
207 	case KLPDARG_NOMORE:
208 		khp = kmem_zalloc(len, KM_SLEEP);
209 		khp->klh_argoff = 0;
210 		break;
211 	case KLPDARG_VNODE:
212 		len += offsetof(klpd_arg_t, kla_str);
213 		vp = va_arg(ap, vnode_t *);
214 		if (vp == NULL)
215 			return (NULL);
216 
217 		tmp = va_arg(ap, char *);
218 
219 		if (tmp != NULL && *tmp != '\0')
220 			clen = strlen(tmp) + 1;
221 		else
222 			clen = 0;
223 
224 		len += ROUNDUP(MAXPATHLEN, sizeof (uint_t));
225 		khp = kmem_zalloc(len, KM_SLEEP);
226 
227 		khp->klh_argoff = sizeof (klpd_head_t) + sizeof (priv_set_t);
228 		kap = KLH_ARG(khp);
229 
230 		if (vnodetopath(crgetzone(p->klpd_cred)->zone_rootvp,
231 		    vp, kap->kla_str, MAXPATHLEN, p->klpd_cred) != 0) {
232 			kmem_free(khp, len);
233 			return (NULL);
234 		}
235 		if (clen != 0) {
236 			plen = strlen(kap->kla_str);
237 			if (plen + clen + 1 >= MAXPATHLEN) {
238 				kmem_free(khp, len);
239 				return (NULL);
240 			}
241 			/* Don't make root into a double "/" */
242 			if (plen <= 2)
243 				plen = 0;
244 			kap->kla_str[plen] = '/';
245 			bcopy(tmp, &kap->kla_str[plen + 1], clen);
246 		}
247 		break;
248 	case KLPDARG_PORT:
249 		proto = va_arg(ap, int);
250 		switch (proto) {
251 		case IPPROTO_TCP:	type = KLPDARG_TCPPORT;
252 					break;
253 		case IPPROTO_UDP:	type = KLPDARG_UDPPORT;
254 					break;
255 		case IPPROTO_SCTP:	type = KLPDARG_SCTPPORT;
256 					break;
257 		case PROTO_SDP:		type = KLPDARG_SDPPORT;
258 					break;
259 		}
260 		/* FALLTHROUGH */
261 	case KLPDARG_INT:
262 	case KLPDARG_TCPPORT:
263 	case KLPDARG_UDPPORT:
264 	case KLPDARG_SCTPPORT:
265 	case KLPDARG_SDPPORT:
266 		len += sizeof (*kap);
267 		khp = kmem_zalloc(len, KM_SLEEP);
268 		khp->klh_argoff = sizeof (klpd_head_t) + sizeof (priv_set_t);
269 		kap = KLH_ARG(khp);
270 		kap->kla_int = va_arg(ap, int);
271 		break;
272 	default:
273 		return (NULL);
274 	}
275 	khp->klh_vers = KLPDCALL_VERS;
276 	khp->klh_len = len;
277 	khp->klh_privoff = sizeof (*khp);
278 	*KLH_PRIVSET(khp) = *rq;
279 	if (kap != NULL) {
280 		kap->kla_type = type;
281 		kap->kla_dlen = len - khp->klh_argoff;
282 	}
283 	return (khp);
284 }
285 
286 static int
287 klpd_do_call(klpd_reg_t *p, const priv_set_t *req, va_list ap)
288 {
289 	door_arg_t da;
290 	int res;
291 	int dres;
292 	klpd_head_t *klh;
293 
294 	if (p->klpd_door_pid == curproc->p_pid)
295 		return (-1);
296 
297 	klh = klpd_marshall(p, req, ap);
298 
299 	if (klh == NULL)
300 		return (-1);
301 
302 	da.data_ptr = (char *)klh;
303 	da.data_size = klh->klh_len;
304 	da.desc_ptr = NULL;
305 	da.desc_num = 0;
306 	da.rbuf = (char *)&res;
307 	da.rsize = sizeof (res);
308 
309 	while ((dres = door_ki_upcall_limited(p->klpd_door, &da, NULL,
310 	    SIZE_MAX, 0)) != 0) {
311 		switch (dres) {
312 		case EAGAIN:
313 			delay(1);
314 			continue;
315 		case EINVAL:
316 		case EBADF:
317 			/* Bad door, don't call it again. */
318 			(void) klpd_unreg_dh(p->klpd_door);
319 			/* FALLTHROUGH */
320 		case EINTR:
321 			/* Pending signal, nothing we can do. */
322 			/* FALLTHROUGH */
323 		default:
324 			kmem_free(klh, klh->klh_len);
325 			return (-1);
326 		}
327 	}
328 	kmem_free(klh, klh->klh_len);
329 	/* Bogus return value, must be a failure */
330 	if (da.rbuf != (char *)&res) {
331 		kmem_free(da.rbuf, da.rsize);
332 		return (-1);
333 	}
334 	return (res);
335 }
336 
337 uint32_t klpd_bad_locks;
338 
339 int
340 klpd_call(const cred_t *cr, const priv_set_t *req, va_list ap)
341 {
342 	klpd_reg_t *p;
343 	int rv = -1;
344 	credklpd_t *ckp;
345 	zone_t *ckzone;
346 
347 	/*
348 	 * These locks must not be held when this code is called;
349 	 * callbacks to userland with these locks held will result
350 	 * in issues.  That said, the code at the call sides was
351 	 * restructured not to call with any of the locks held and
352 	 * no policies operate by default on most processes.
353 	 */
354 	if (mutex_owned(&pidlock) || mutex_owned(&curproc->p_lock) ||
355 	    mutex_owned(&curproc->p_crlock)) {
356 		atomic_inc_32(&klpd_bad_locks);
357 		return (-1);
358 	}
359 
360 	/*
361 	 * Enforce the limit set for the call process (still).
362 	 */
363 	if (!priv_issubset(req, &CR_LPRIV(cr)))
364 		return (-1);
365 
366 	/* Try 1: get the credential specific klpd */
367 	if ((ckp = crgetcrklpd(cr)) != NULL) {
368 		mutex_enter(&ckp->crkl_lock);
369 		if ((p = ckp->crkl_reg) != NULL &&
370 		    p->klpd_indel == 0 &&
371 		    priv_issubset(req, &p->klpd_pset)) {
372 			klpd_hold(p);
373 			mutex_exit(&ckp->crkl_lock);
374 			rv = klpd_do_call(p, req, ap);
375 			mutex_enter(&ckp->crkl_lock);
376 			klpd_rele(p);
377 			mutex_exit(&ckp->crkl_lock);
378 			if (rv != -1)
379 				return (rv == 0 ? 0 : -1);
380 		} else {
381 			mutex_exit(&ckp->crkl_lock);
382 		}
383 	}
384 
385 	/* Try 2: get the project specific klpd */
386 	mutex_enter(&klpd_mutex);
387 
388 	if ((p = curproj->kpj_klpd) != NULL) {
389 		klpd_hold(p);
390 		mutex_exit(&klpd_mutex);
391 		if (p->klpd_indel == 0 &&
392 		    priv_issubset(req, &p->klpd_pset)) {
393 			rv = klpd_do_call(p, req, ap);
394 		}
395 		mutex_enter(&klpd_mutex);
396 		klpd_rele(p);
397 		mutex_exit(&klpd_mutex);
398 
399 		if (rv != -1)
400 			return (rv == 0 ? 0 : -1);
401 	} else {
402 		mutex_exit(&klpd_mutex);
403 	}
404 
405 	/* Try 3: get the global klpd list */
406 	ckzone = crgetzone(cr);
407 	mutex_enter(&klpd_mutex);
408 
409 	for (p = klpd_list; p != NULL; ) {
410 		zone_t *kkzone = crgetzone(p->klpd_cred);
411 		if ((kkzone == &zone0 || kkzone == ckzone) &&
412 		    p->klpd_indel == 0 &&
413 		    priv_issubset(req, &p->klpd_pset)) {
414 			klpd_hold(p);
415 			mutex_exit(&klpd_mutex);
416 			rv = klpd_do_call(p, req, ap);
417 			mutex_enter(&klpd_mutex);
418 
419 			p = klpd_rele_next(p);
420 
421 			if (rv != -1)
422 				break;
423 		} else {
424 			p = p->klpd_next;
425 		}
426 	}
427 	mutex_exit(&klpd_mutex);
428 	return (rv == 0 ? 0 : -1);
429 }
430 
431 /*
432  * Register the klpd.
433  * If the pid_t passed in is positive, update the registration for
434  * the specific process; that is only possible if the process already
435  * has a registration on it.  This change of registration will affect
436  * all processes which share common ancestry.
437  *
438  * MY_PID (pid 0) can be used to create or change the context for
439  * the current process, typically done after fork().
440  *
441  * A negative value can be used to register a klpd globally.
442  *
443  * The per-credential klpd needs to be cleaned up when entering
444  * a zone or unsetting the flag.
445  */
446 int
447 klpd_reg(int did, idtype_t type, id_t id, priv_set_t *psetbuf)
448 {
449 	cred_t *cr = CRED();
450 	door_handle_t dh;
451 	klpd_reg_t *kpd;
452 	priv_set_t pset;
453 	door_info_t di;
454 	credklpd_t *ckp = NULL;
455 	pid_t pid = -1;
456 	projid_t proj = -1;
457 	kproject_t *kpp = NULL;
458 
459 	if (CR_FLAGS(cr) & PRIV_XPOLICY)
460 		return (set_errno(EINVAL));
461 
462 	if (copyin(psetbuf, &pset, sizeof (priv_set_t)))
463 		return (set_errno(EFAULT));
464 
465 	if (!priv_issubset(&pset, &CR_OEPRIV(cr)))
466 		return (set_errno(EPERM));
467 
468 	switch (type) {
469 	case P_PID:
470 		pid = (pid_t)id;
471 		if (pid == P_MYPID)
472 			pid = curproc->p_pid;
473 		if (pid == curproc->p_pid)
474 			ckp = crklpd_alloc();
475 		break;
476 	case P_PROJID:
477 		proj = (projid_t)id;
478 		kpp = project_hold_by_id(proj, crgetzone(cr),
479 		    PROJECT_HOLD_FIND);
480 		if (kpp == NULL)
481 			return (set_errno(ESRCH));
482 		break;
483 	default:
484 		return (set_errno(ENOTSUP));
485 	}
486 
487 
488 	/*
489 	 * Verify the door passed in; it must be a door and we won't
490 	 * allow processes to be called on their own behalf.
491 	 */
492 	dh = door_ki_lookup(did);
493 	if (dh == NULL || door_ki_info(dh, &di) != 0) {
494 		if (ckp != NULL)
495 			crklpd_rele(ckp);
496 		if (kpp != NULL)
497 			project_rele(kpp);
498 		return (set_errno(EBADF));
499 	}
500 	if (type == P_PID && pid == di.di_target) {
501 		if (ckp != NULL)
502 			crklpd_rele(ckp);
503 		ASSERT(kpp == NULL);
504 		return (set_errno(EINVAL));
505 	}
506 
507 	kpd = kmem_zalloc(sizeof (*kpd), KM_SLEEP);
508 	crhold(kpd->klpd_cred = cr);
509 	kpd->klpd_door = dh;
510 	kpd->klpd_door_pid = di.di_target;
511 	kpd->klpd_ref = 1;
512 	kpd->klpd_pset = pset;
513 
514 	if (kpp != NULL) {
515 		mutex_enter(&klpd_mutex);
516 		kpd = klpd_link(kpd, &kpp->kpj_klpd, B_TRUE);
517 		mutex_exit(&klpd_mutex);
518 		if (kpd != NULL)
519 			klpd_rele(kpd);
520 		project_rele(kpp);
521 	} else if ((int)pid < 0) {
522 		/* Global daemon */
523 		mutex_enter(&klpd_mutex);
524 		(void) klpd_link(kpd, &klpd_list, B_FALSE);
525 		mutex_exit(&klpd_mutex);
526 	} else if (pid == curproc->p_pid) {
527 		proc_t *p = curproc;
528 		cred_t *newcr = cralloc();
529 
530 		/* No need to lock, sole reference to ckp */
531 		kpd = klpd_link(kpd, &ckp->crkl_reg, B_TRUE);
532 
533 		if (kpd != NULL)
534 			klpd_rele(kpd);
535 
536 		mutex_enter(&p->p_crlock);
537 		cr = p->p_cred;
538 		crdup_to(cr, newcr);
539 		crsetcrklpd(newcr, ckp);
540 		p->p_cred = newcr;	/* Already held for p_cred */
541 
542 		crhold(newcr);		/* Hold once for the current thread */
543 		mutex_exit(&p->p_crlock);
544 		crfree(cr);		/* One for the p_cred */
545 		crset(p, newcr);
546 	} else {
547 		proc_t *p;
548 		cred_t *pcr;
549 		mutex_enter(&pidlock);
550 		p = prfind(pid);
551 		if (p == NULL || !prochasprocperm(p, curproc, CRED())) {
552 			mutex_exit(&pidlock);
553 			klpd_rele(kpd);
554 			return (set_errno(p == NULL ? ESRCH : EPERM));
555 		}
556 		mutex_enter(&p->p_crlock);
557 		crhold(pcr = p->p_cred);
558 		mutex_exit(&pidlock);
559 		mutex_exit(&p->p_crlock);
560 		/*
561 		 * We're going to update the credential's ckp in place;
562 		 * this requires that it exists.
563 		 */
564 		ckp = crgetcrklpd(pcr);
565 		if (ckp == NULL) {
566 			crfree(pcr);
567 			klpd_rele(kpd);
568 			return (set_errno(EINVAL));
569 		}
570 		crklpd_setreg(ckp, kpd);
571 		crfree(pcr);
572 	}
573 
574 	return (0);
575 }
576 
577 static int
578 klpd_unreg_dh(door_handle_t dh)
579 {
580 	klpd_reg_t *p;
581 
582 	mutex_enter(&klpd_mutex);
583 	for (p = klpd_list; p != NULL; p = p->klpd_next) {
584 		if (p->klpd_door == dh)
585 			break;
586 	}
587 	if (p == NULL) {
588 		mutex_exit(&klpd_mutex);
589 		return (EINVAL);
590 	}
591 	if (p->klpd_indel != 0) {
592 		mutex_exit(&klpd_mutex);
593 		return (EAGAIN);
594 	}
595 	p->klpd_indel = 1;
596 	klpd_rele(p);
597 	mutex_exit(&klpd_mutex);
598 	return (0);
599 }
600 
601 int
602 klpd_unreg(int did, idtype_t type, id_t id)
603 {
604 	door_handle_t dh;
605 	int res = 0;
606 	proc_t *p;
607 	pid_t pid;
608 	projid_t proj;
609 	kproject_t *kpp = NULL;
610 	credklpd_t *ckp;
611 
612 	switch (type) {
613 	case P_PID:
614 		pid = (pid_t)id;
615 		break;
616 	case P_PROJID:
617 		proj = (projid_t)id;
618 		kpp = project_hold_by_id(proj, crgetzone(CRED()),
619 		    PROJECT_HOLD_FIND);
620 		if (kpp == NULL)
621 			return (set_errno(ESRCH));
622 		break;
623 	default:
624 		return (set_errno(ENOTSUP));
625 	}
626 
627 	dh = door_ki_lookup(did);
628 	if (dh == NULL) {
629 		if (kpp != NULL)
630 			project_rele(kpp);
631 		return (set_errno(EINVAL));
632 	}
633 
634 	if (kpp != NULL) {
635 		mutex_enter(&klpd_mutex);
636 		if (kpp->kpj_klpd == NULL)
637 			res = ESRCH;
638 		else
639 			klpd_freelist(&kpp->kpj_klpd);
640 		mutex_exit(&klpd_mutex);
641 		project_rele(kpp);
642 		goto out;
643 	} else if ((int)pid > 0) {
644 		mutex_enter(&pidlock);
645 		p = prfind(pid);
646 		if (p == NULL) {
647 			mutex_exit(&pidlock);
648 			door_ki_rele(dh);
649 			return (set_errno(ESRCH));
650 		}
651 		mutex_enter(&p->p_crlock);
652 		mutex_exit(&pidlock);
653 	} else if (pid == 0) {
654 		p = curproc;
655 		mutex_enter(&p->p_crlock);
656 	} else {
657 		res = klpd_unreg_dh(dh);
658 		goto out;
659 	}
660 
661 	ckp = crgetcrklpd(p->p_cred);
662 	if (ckp != NULL) {
663 		crklpd_setreg(ckp, NULL);
664 	} else {
665 		res = ESRCH;
666 	}
667 	mutex_exit(&p->p_crlock);
668 
669 out:
670 	door_ki_rele(dh);
671 
672 	if (res != 0)
673 		return (set_errno(res));
674 	return (0);
675 }
676 
677 void
678 crklpd_hold(credklpd_t *crkpd)
679 {
680 	atomic_inc_32(&crkpd->crkl_ref);
681 }
682 
683 void
684 crklpd_rele(credklpd_t *crkpd)
685 {
686 	if (atomic_dec_32_nv(&crkpd->crkl_ref) == 0) {
687 		if (crkpd->crkl_reg != NULL)
688 			klpd_rele(crkpd->crkl_reg);
689 		mutex_destroy(&crkpd->crkl_lock);
690 		kmem_free(crkpd, sizeof (*crkpd));
691 	}
692 }
693 
694 static credklpd_t *
695 crklpd_alloc(void)
696 {
697 	credklpd_t *res = kmem_alloc(sizeof (*res), KM_SLEEP);
698 
699 	mutex_init(&res->crkl_lock, NULL, MUTEX_DEFAULT, NULL);
700 	res->crkl_ref = 1;
701 	res->crkl_reg = NULL;
702 
703 	return (res);
704 }
705 
706 void
707 crklpd_setreg(credklpd_t *crk, klpd_reg_t *new)
708 {
709 	klpd_reg_t *old;
710 
711 	mutex_enter(&crk->crkl_lock);
712 	if (new == NULL) {
713 		old = crk->crkl_reg;
714 		if (old != NULL)
715 			klpd_unlink(old);
716 	} else {
717 		old = klpd_link(new, &crk->crkl_reg, B_TRUE);
718 	}
719 	mutex_exit(&crk->crkl_lock);
720 
721 	if (old != NULL)
722 		klpd_rele(old);
723 }
724 
725 /* Allocate and register the pfexec specific callback */
726 int
727 pfexec_reg(int did)
728 {
729 	door_handle_t dh;
730 	int err = secpolicy_pfexec_register(CRED());
731 	klpd_reg_t *pfx;
732 	door_info_t di;
733 	zone_t *myzone = crgetzone(CRED());
734 
735 	if (err != 0)
736 		return (set_errno(err));
737 
738 	dh = door_ki_lookup(did);
739 	if (dh == NULL || door_ki_info(dh, &di) != 0)
740 		return (set_errno(EBADF));
741 
742 	pfx = kmem_zalloc(sizeof (*pfx), KM_SLEEP);
743 
744 	pfx->klpd_door = dh;
745 	pfx->klpd_door_pid = di.di_target;
746 	pfx->klpd_ref = 1;
747 	pfx->klpd_cred = NULL;
748 	mutex_enter(&myzone->zone_lock);
749 	pfx = klpd_link(pfx, &myzone->zone_pfexecd, B_TRUE);
750 	mutex_exit(&myzone->zone_lock);
751 	if (pfx != NULL)
752 		klpd_rele(pfx);
753 
754 	return (0);
755 }
756 
757 int
758 pfexec_unreg(int did)
759 {
760 	door_handle_t dh;
761 	int err = 0;
762 	zone_t *myzone = crgetzone(CRED());
763 	klpd_reg_t *pfd;
764 
765 	dh = door_ki_lookup(did);
766 	if (dh == NULL)
767 		return (set_errno(EBADF));
768 
769 	mutex_enter(&myzone->zone_lock);
770 	pfd = myzone->zone_pfexecd;
771 	if (pfd != NULL && pfd->klpd_door == dh) {
772 		klpd_unlink(pfd);
773 	} else {
774 		pfd = NULL;
775 		err = EINVAL;
776 	}
777 	mutex_exit(&myzone->zone_lock);
778 	door_ki_rele(dh);
779 	/*
780 	 * crfree() cannot be called with zone_lock held; it is called
781 	 * indirectly through closing the door handle
782 	 */
783 	if (pfd != NULL)
784 		klpd_rele(pfd);
785 	if (err != 0)
786 		return (set_errno(err));
787 	return (0);
788 }
789 
790 static int
791 get_path(char *buf, const char *path, int len)
792 {
793 	size_t lc;
794 	char *s;
795 
796 	if (len < 0)
797 		len = strlen(path);
798 
799 	if (*path == '/' && len < MAXPATHLEN) {
800 		(void) strcpy(buf, path);
801 		return (0);
802 	}
803 	/*
804 	 * Build the pathname using the current directory + resolve pathname.
805 	 * The resolve pathname either starts with a normal component and
806 	 * we can just concatenate them or it starts with one
807 	 * or more ".." component and we can remove those; the
808 	 * last one cannot be a ".." and the current directory has
809 	 * more components than the number of ".." in the resolved pathname.
810 	 */
811 	if (dogetcwd(buf, MAXPATHLEN) != 0)
812 		return (-1);
813 
814 	lc = strlen(buf);
815 
816 	while (len > 3 && strncmp("../", path, 3) == 0) {
817 		len -= 3;
818 		path += 3;
819 
820 		s = strrchr(buf, '/');
821 		if (s == NULL || s == buf)
822 			return (-1);
823 
824 		*s = '\0';
825 		lc = s - buf;
826 	}
827 	/* Add a "/" and a NUL */
828 	if (lc < 2 || lc + len + 2 >= MAXPATHLEN)
829 		return (-1);
830 
831 	buf[lc] = '/';
832 	(void) strcpy(buf + lc + 1, path);
833 
834 	return (0);
835 }
836 
837 /*
838  * Perform the pfexec upcall.
839  *
840  * The pfexec upcall is different from the klpd_upcall in that a failure
841  * will lead to a denial of execution.
842  */
843 int
844 pfexec_call(const cred_t *cr, struct pathname *rpnp, cred_t **pfcr,
845     boolean_t *scrub)
846 {
847 	klpd_reg_t *pfd;
848 	pfexec_arg_t *pap;
849 	pfexec_reply_t pr, *prp;
850 	door_arg_t da;
851 	int dres;
852 	cred_t *ncr = NULL;
853 	int err = EACCES;
854 	priv_set_t *iset;
855 	priv_set_t *lset;
856 	zone_t *myzone = crgetzone(CRED());
857 	size_t pasize = PFEXEC_ARG_SIZE(MAXPATHLEN);
858 
859 	/* Find registration */
860 	mutex_enter(&myzone->zone_lock);
861 	if ((pfd = myzone->zone_pfexecd) != NULL)
862 		klpd_hold(pfd);
863 	mutex_exit(&myzone->zone_lock);
864 
865 	if (pfd == NULL) {
866 		DTRACE_PROBE2(pfexecd__not__running,
867 		    zone_t *, myzone, char *, rpnp->pn_path);
868 		uprintf("pfexecd not running; pid %d privileges not "
869 		    "elevated\n", curproc->p_pid);
870 		return (0);
871 	}
872 
873 	if (pfd->klpd_door_pid == curproc->p_pid) {
874 		klpd_rele(pfd);
875 		return (0);
876 	}
877 
878 	pap = kmem_zalloc(pasize, KM_SLEEP);
879 
880 	if (get_path(pap->pfa_path, rpnp->pn_path, rpnp->pn_pathlen) == -1)
881 		goto out1;
882 
883 	pap->pfa_vers = PFEXEC_ARG_VERS;
884 	pap->pfa_call = PFEXEC_EXEC_ATTRS;
885 	pap->pfa_len = pasize;
886 	pap->pfa_uid = crgetruid(cr);
887 
888 	da.data_ptr = (char *)pap;
889 	da.data_size = pap->pfa_len;
890 	da.desc_ptr = NULL;
891 	da.desc_num = 0;
892 	da.rbuf = (char *)&pr;
893 	da.rsize = sizeof (pr);
894 
895 	while ((dres = door_ki_upcall(pfd->klpd_door, &da)) != 0) {
896 		switch (dres) {
897 		case EAGAIN:
898 			delay(1);
899 			continue;
900 		case EINVAL:
901 		case EBADF:
902 			/* FALLTHROUGH */
903 		case EINTR:
904 			/* FALLTHROUGH */
905 		default:
906 			DTRACE_PROBE4(pfexecd__failure,
907 			    int, dres, zone_t *, myzone,
908 			    char *, rpnp->pn_path, klpd_reg_t *, pfd);
909 			goto out;
910 		}
911 	}
912 
913 	prp = (pfexec_reply_t *)da.rbuf;
914 	/*
915 	 * Check the size of the result and the alignment of the
916 	 * privilege sets.
917 	 */
918 	if (da.rsize < sizeof (pr) ||
919 	    prp->pfr_ioff > da.rsize - sizeof (priv_set_t) ||
920 	    prp->pfr_loff > da.rsize - sizeof (priv_set_t) ||
921 	    (prp->pfr_loff & (sizeof (priv_chunk_t) - 1)) != 0 ||
922 	    (prp->pfr_ioff & (sizeof (priv_chunk_t) - 1)) != 0)
923 		goto out;
924 
925 	/*
926 	 * Get results:
927 	 *	allow/allow with additional credentials/disallow[*]
928 	 *
929 	 *	euid, uid, egid, gid, privs, and limitprivs
930 	 * We now have somewhat more flexibility we could even set E and P
931 	 * judiciously but that would break some currently valid assumptions
932 	 *	[*] Disallow is not readily supported by always including
933 	 *	the Basic Solaris User profile in all user's profiles.
934 	 */
935 
936 	if (!prp->pfr_allowed) {
937 		err = EACCES;
938 		goto out;
939 	}
940 	if (!prp->pfr_setcred) {
941 		err = 0;
942 		goto out;
943 	}
944 	ncr = crdup((cred_t *)cr);
945 
946 	/*
947 	 * Generate the new credential set scrubenv if ruid != euid (or set)
948 	 * the "I'm set-uid flag" but that is not inherited so scrubbing
949 	 * the environment is a requirement.
950 	 */
951 	/* Set uids or gids, note that -1 will do the right thing */
952 	if (crsetresuid(ncr, prp->pfr_ruid, prp->pfr_euid, prp->pfr_euid) != 0)
953 		goto out;
954 	if (crsetresgid(ncr, prp->pfr_rgid, prp->pfr_egid, prp->pfr_egid) != 0)
955 		goto out;
956 
957 	*scrub = prp->pfr_scrubenv;
958 
959 	if (prp->pfr_clearflag)
960 		CR_FLAGS(ncr) &= ~PRIV_PFEXEC;
961 
962 	/* We cannot exceed our Limit set, no matter what */
963 	iset = PFEXEC_REPLY_IPRIV(prp);
964 
965 	if (iset != NULL) {
966 		if (!priv_issubset(iset, &CR_LPRIV(ncr)))
967 			goto out;
968 		priv_union(iset, &CR_IPRIV(ncr));
969 	}
970 
971 	/* Nor can we increate our Limit set itself */
972 	lset = PFEXEC_REPLY_LPRIV(prp);
973 
974 	if (lset != NULL) {
975 		if (!priv_issubset(lset, &CR_LPRIV(ncr)))
976 			goto out;
977 		CR_LPRIV(ncr) = *lset;
978 	}
979 
980 	/* Exec will do the standard set operations */
981 
982 	err = 0;
983 out:
984 	if (da.rbuf != (char *)&pr)
985 		kmem_free(da.rbuf, da.rsize);
986 out1:
987 	kmem_free(pap, pasize);
988 	klpd_rele(pfd);
989 	if (ncr != NULL) {
990 		if (err == 0)
991 			*pfcr = ncr;
992 		else
993 			crfree(ncr);
994 	}
995 	return (err);
996 }
997 
998 int
999 get_forced_privs(const cred_t *cr, const char *respn, priv_set_t *set)
1000 {
1001 	klpd_reg_t *pfd;
1002 	pfexec_arg_t *pap;
1003 	door_arg_t da;
1004 	int dres;
1005 	int err = -1;
1006 	priv_set_t *fset, pmem;
1007 	cred_t *zkcr;
1008 	zone_t *myzone = crgetzone(cr);
1009 	size_t pasize = PFEXEC_ARG_SIZE(MAXPATHLEN);
1010 
1011 	mutex_enter(&myzone->zone_lock);
1012 	if ((pfd = myzone->zone_pfexecd) != NULL)
1013 		klpd_hold(pfd);
1014 	mutex_exit(&myzone->zone_lock);
1015 
1016 	if (pfd == NULL)
1017 		return (-1);
1018 
1019 	if (pfd->klpd_door_pid == curproc->p_pid) {
1020 		klpd_rele(pfd);
1021 		return (0);
1022 	}
1023 
1024 	pap = kmem_zalloc(pasize, KM_SLEEP);
1025 
1026 	if (get_path(pap->pfa_path, respn, -1) == -1)
1027 		goto out1;
1028 
1029 	pap->pfa_vers = PFEXEC_ARG_VERS;
1030 	pap->pfa_call = PFEXEC_FORCED_PRIVS;
1031 	pap->pfa_len = pasize;
1032 	pap->pfa_uid = (uid_t)-1;			/* Not relevant */
1033 
1034 	da.data_ptr = (char *)pap;
1035 	da.data_size = pap->pfa_len;
1036 	da.desc_ptr = NULL;
1037 	da.desc_num = 0;
1038 	da.rbuf = (char *)&pmem;
1039 	da.rsize = sizeof (pmem);
1040 
1041 	while ((dres = door_ki_upcall(pfd->klpd_door, &da)) != 0) {
1042 		switch (dres) {
1043 		case EAGAIN:
1044 			delay(1);
1045 			continue;
1046 		case EINVAL:
1047 		case EBADF:
1048 		case EINTR:
1049 		default:
1050 			goto out;
1051 		}
1052 	}
1053 
1054 	/*
1055 	 * Check the size of the result, it's a privilege set.
1056 	 */
1057 	if (da.rsize != sizeof (priv_set_t))
1058 		goto out;
1059 
1060 	fset = (priv_set_t *)da.rbuf;
1061 
1062 	/*
1063 	 * We restrict the forced privileges with whatever is available in
1064 	 * the current zone.
1065 	 */
1066 	zkcr = zone_kcred();
1067 	priv_intersect(&CR_LPRIV(zkcr), fset);
1068 
1069 	/*
1070 	 * But we fail if the forced privileges are not found in the current
1071 	 * Limit set.
1072 	 */
1073 	if (!priv_issubset(fset, &CR_LPRIV(cr))) {
1074 		err = EACCES;
1075 	} else if (!priv_isemptyset(fset)) {
1076 		err = 0;
1077 		*set = *fset;
1078 	}
1079 out:
1080 	if (da.rbuf != (char *)&pmem)
1081 		kmem_free(da.rbuf, da.rsize);
1082 out1:
1083 	kmem_free(pap, pasize);
1084 	klpd_rele(pfd);
1085 	return (err);
1086 }
1087 
1088 int
1089 check_user_privs(const cred_t *cr, const priv_set_t *set)
1090 {
1091 	klpd_reg_t *pfd;
1092 	pfexec_arg_t *pap;
1093 	door_arg_t da;
1094 	int dres;
1095 	int err = -1;
1096 	zone_t *myzone = crgetzone(cr);
1097 	size_t pasize = PFEXEC_ARG_SIZE(sizeof (priv_set_t));
1098 	uint32_t res;
1099 
1100 	mutex_enter(&myzone->zone_lock);
1101 	if ((pfd = myzone->zone_pfexecd) != NULL)
1102 		klpd_hold(pfd);
1103 	mutex_exit(&myzone->zone_lock);
1104 
1105 	if (pfd == NULL)
1106 		return (-1);
1107 
1108 	if (pfd->klpd_door_pid == curproc->p_pid) {
1109 		klpd_rele(pfd);
1110 		return (0);
1111 	}
1112 
1113 	pap = kmem_zalloc(pasize, KM_SLEEP);
1114 
1115 	*(priv_set_t *)&pap->pfa_buf = *set;
1116 
1117 	pap->pfa_vers = PFEXEC_ARG_VERS;
1118 	pap->pfa_call = PFEXEC_USER_PRIVS;
1119 	pap->pfa_len = pasize;
1120 	pap->pfa_uid = crgetruid(cr);
1121 
1122 	da.data_ptr = (char *)pap;
1123 	da.data_size = pap->pfa_len;
1124 	da.desc_ptr = NULL;
1125 	da.desc_num = 0;
1126 	da.rbuf = (char *)&res;
1127 	da.rsize = sizeof (res);
1128 
1129 	while ((dres = door_ki_upcall(pfd->klpd_door, &da)) != 0) {
1130 		switch (dres) {
1131 		case EAGAIN:
1132 			delay(1);
1133 			continue;
1134 		case EINVAL:
1135 		case EBADF:
1136 		case EINTR:
1137 		default:
1138 			goto out;
1139 		}
1140 	}
1141 
1142 	/*
1143 	 * Check the size of the result.
1144 	 */
1145 	if (da.rsize != sizeof (res))
1146 		goto out;
1147 
1148 	if (*(uint32_t *)da.rbuf == 1)
1149 		err = 0;
1150 out:
1151 	if (da.rbuf != (char *)&res)
1152 		kmem_free(da.rbuf, da.rsize);
1153 
1154 	kmem_free(pap, pasize);
1155 	klpd_rele(pfd);
1156 	return (err);
1157 }
1158