xref: /illumos-gate/usr/src/uts/common/fs/tmpfs/tmp_subr.c (revision e9db39cef1f968a982994f50c05903cc988a3dd3)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2015 Joyent, Inc.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/errno.h>
28 #include <sys/param.h>
29 #include <sys/t_lock.h>
30 #include <sys/systm.h>
31 #include <sys/sysmacros.h>
32 #include <sys/debug.h>
33 #include <sys/time.h>
34 #include <sys/cmn_err.h>
35 #include <sys/vnode.h>
36 #include <sys/stat.h>
37 #include <sys/vfs.h>
38 #include <sys/cred.h>
39 #include <sys/kmem.h>
40 #include <sys/atomic.h>
41 #include <sys/policy.h>
42 #include <sys/fs/tmp.h>
43 #include <sys/fs/tmpnode.h>
44 #include <sys/ddi.h>
45 #include <sys/sunddi.h>
46 
47 #define	KILOBYTE	1024
48 #define	MEGABYTE	(1024 * KILOBYTE)
49 #define	GIGABYTE	(1024 * MEGABYTE)
50 
51 #define	MODESHIFT	3
52 
53 #define	VALIDMODEBITS	07777
54 
55 extern pgcnt_t swapfs_minfree;
56 
57 int
58 tmp_taccess(void *vtp, int mode, struct cred *cred)
59 {
60 	struct tmpnode *tp = vtp;
61 	int shift = 0;
62 	/*
63 	 * Check access based on owner, group and
64 	 * public permissions in tmpnode.
65 	 */
66 	if (crgetuid(cred) != tp->tn_uid) {
67 		shift += MODESHIFT;
68 		if (groupmember(tp->tn_gid, cred) == 0)
69 			shift += MODESHIFT;
70 	}
71 
72 	return (secpolicy_vnode_access2(cred, TNTOV(tp), tp->tn_uid,
73 	    tp->tn_mode << shift, mode));
74 }
75 
76 /*
77  * Decide whether it is okay to remove within a sticky directory.
78  * Two conditions need to be met:  write access to the directory
79  * is needed.  In sticky directories, write access is not sufficient;
80  * you can remove entries from a directory only if you own the directory,
81  * if you are privileged, if you own the entry or if they entry is
82  * a plain file and you have write access to that file.
83  * Function returns 0 if remove access is granted.
84  */
85 int
86 tmp_sticky_remove_access(struct tmpnode *dir, struct tmpnode *entry,
87     struct cred *cr)
88 {
89 	uid_t uid = crgetuid(cr);
90 
91 	if ((dir->tn_mode & S_ISVTX) &&
92 	    uid != dir->tn_uid &&
93 	    uid != entry->tn_uid &&
94 	    (entry->tn_type != VREG ||
95 	    tmp_taccess(entry, VWRITE, cr) != 0))
96 		return (secpolicy_vnode_remove(cr));
97 
98 	return (0);
99 }
100 
101 /*
102  * Allocate zeroed memory if tmpfs_maxkmem has not been exceeded
103  * or the 'musthave' flag is set.  'musthave' allocations should
104  * always be subordinate to normal allocations so that tmpfs_maxkmem
105  * can't be exceeded by more than a few KB.  Example: when creating
106  * a new directory, the tmpnode is a normal allocation; if that
107  * succeeds, the dirents for "." and ".." are 'musthave' allocations.
108  */
109 void *
110 tmp_memalloc(size_t size, int musthave)
111 {
112 	static time_t last_warning;
113 	time_t now;
114 
115 	if (atomic_add_long_nv(&tmp_kmemspace, size) < tmpfs_maxkmem ||
116 	    musthave)
117 		return (kmem_zalloc(size, KM_SLEEP));
118 
119 	atomic_add_long(&tmp_kmemspace, -size);
120 	now = gethrestime_sec();
121 	if (last_warning != now) {
122 		last_warning = now;
123 		cmn_err(CE_WARN, "tmp_memalloc: tmpfs over memory limit");
124 	}
125 	return (NULL);
126 }
127 
128 void
129 tmp_memfree(void *cp, size_t size)
130 {
131 	kmem_free(cp, size);
132 	atomic_add_long(&tmp_kmemspace, -size);
133 }
134 
135 /*
136  * Convert a string containing a number (number of bytes) to a pgcnt_t,
137  * containing the corresponding number of pages. On 32-bit kernels, the
138  * maximum value encoded in 'str' is PAGESIZE * ULONG_MAX, while the value
139  * returned in 'maxpg' is at most ULONG_MAX.
140  *
141  * The number may be followed by a magnitude suffix: "k" or "K" for kilobytes;
142  * "m" or "M" for megabytes; "g" or "G" for gigabytes.  This interface allows
143  * for an arguably esoteric interpretation of multiple suffix characters:
144  * namely, they cascade.  For example, the caller may specify "2mk", which is
145  * interpreted as 2 gigabytes.  It would seem, at this late stage, that the
146  * horse has left not only the barn but indeed the country, and possibly the
147  * entire planetary system. Alternatively, the number may be followed by a
148  * single '%' sign, indicating the size is a percentage of either the zone's
149  * swap limit or the system's overall swap size.
150  *
151  * Parse and overflow errors are detected and a non-zero number returned on
152  * error.
153  */
154 int
155 tmp_convnum(char *str, pgcnt_t *maxpg)
156 {
157 	u_longlong_t num = 0;
158 #ifdef _LP64
159 	u_longlong_t max_bytes = ULONG_MAX;
160 #else
161 	u_longlong_t max_bytes = PAGESIZE * (uint64_t)ULONG_MAX;
162 #endif
163 	char *c;
164 	const struct convchar {
165 		char *cc_char;
166 		uint64_t cc_factor;
167 	} convchars[] = {
168 		{ "kK", KILOBYTE },
169 		{ "mM", MEGABYTE },
170 		{ "gG", GIGABYTE },
171 		{ NULL, 0 }
172 	};
173 
174 	if (str == NULL) {
175 		return (EINVAL);
176 	}
177 	c = str;
178 
179 	/*
180 	 * Convert the initial numeric portion of the input string.
181 	 */
182 	if (ddi_strtoull(str, &c, 10, &num) != 0) {
183 		return (EINVAL);
184 	}
185 
186 	/*
187 	 * Handle a size in percent. Anything other than a single percent
188 	 * modifier is invalid. We use either the zone's swap limit or the
189 	 * system's total available swap size as the initial value. Perform the
190 	 * intermediate calculation in pages to avoid overflow.
191 	 */
192 	if (*c == '%') {
193 		u_longlong_t cap;
194 
195 		if (*(c + 1) != '\0')
196 			return (EINVAL);
197 
198 		if (num > 100)
199 			return (EINVAL);
200 
201 		cap = (u_longlong_t)curproc->p_zone->zone_max_swap_ctl;
202 		if (cap == UINT64_MAX) {
203 			/*
204 			 * Use the amount of available physical and memory swap
205 			 */
206 			mutex_enter(&anoninfo_lock);
207 			cap = TOTAL_AVAILABLE_SWAP;
208 			mutex_exit(&anoninfo_lock);
209 		} else {
210 			cap = btop(cap);
211 		}
212 
213 		num = ptob(cap * num / 100);
214 		goto done;
215 	}
216 
217 	/*
218 	 * Apply the (potentially cascading) magnitude suffixes until an
219 	 * invalid character is found, or the string comes to an end.
220 	 */
221 	for (; *c != '\0'; c++) {
222 		int i;
223 
224 		for (i = 0; convchars[i].cc_char != NULL; i++) {
225 			/*
226 			 * Check if this character matches this multiplier
227 			 * class:
228 			 */
229 			if (strchr(convchars[i].cc_char, *c) != NULL) {
230 				/*
231 				 * Check for overflow:
232 				 */
233 				if (num > max_bytes / convchars[i].cc_factor) {
234 					return (EINVAL);
235 				}
236 
237 				num *= convchars[i].cc_factor;
238 				goto valid_char;
239 			}
240 		}
241 
242 		/*
243 		 * This was not a valid multiplier suffix character.
244 		 */
245 		return (EINVAL);
246 
247 valid_char:
248 		continue;
249 	}
250 
251 done:
252 	/*
253 	 * Since btopr() rounds up to page granularity, this round-up can
254 	 * cause an overflow only if 'num' is between (max_bytes - PAGESIZE)
255 	 * and (max_bytes). In this case the resulting number is zero, which
256 	 * is what we check for below.
257 	 */
258 	if ((*maxpg = (pgcnt_t)btopr(num)) == 0 && num != 0)
259 		return (EINVAL);
260 	return (0);
261 }
262 
263 /*
264  * Parse an octal mode string for use as the permissions set for the root
265  * of the tmpfs mount.
266  */
267 int
268 tmp_convmode(char *str, mode_t *mode)
269 {
270 	ulong_t num;
271 	char *c;
272 
273 	if (str == NULL) {
274 		return (EINVAL);
275 	}
276 
277 	if (ddi_strtoul(str, &c, 8, &num) != 0) {
278 		return (EINVAL);
279 	}
280 
281 	if ((num & ~VALIDMODEBITS) != 0) {
282 		return (EINVAL);
283 	}
284 
285 	*mode = VALIDMODEBITS & num;
286 	return (0);
287 }
288