1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright 2015 Joyent, Inc. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/errno.h> 28 #include <sys/param.h> 29 #include <sys/t_lock.h> 30 #include <sys/systm.h> 31 #include <sys/sysmacros.h> 32 #include <sys/debug.h> 33 #include <sys/time.h> 34 #include <sys/cmn_err.h> 35 #include <sys/vnode.h> 36 #include <sys/stat.h> 37 #include <sys/vfs.h> 38 #include <sys/cred.h> 39 #include <sys/kmem.h> 40 #include <sys/atomic.h> 41 #include <sys/policy.h> 42 #include <sys/fs/tmp.h> 43 #include <sys/fs/tmpnode.h> 44 #include <sys/ddi.h> 45 #include <sys/sunddi.h> 46 47 #define KILOBYTE 1024 48 #define MEGABYTE (1024 * KILOBYTE) 49 #define GIGABYTE (1024 * MEGABYTE) 50 51 #define MODESHIFT 3 52 53 #define VALIDMODEBITS 07777 54 55 extern pgcnt_t swapfs_minfree; 56 57 int 58 tmp_taccess(void *vtp, int mode, struct cred *cred) 59 { 60 struct tmpnode *tp = vtp; 61 int shift = 0; 62 /* 63 * Check access based on owner, group and 64 * public permissions in tmpnode. 65 */ 66 if (crgetuid(cred) != tp->tn_uid) { 67 shift += MODESHIFT; 68 if (groupmember(tp->tn_gid, cred) == 0) 69 shift += MODESHIFT; 70 } 71 72 return (secpolicy_vnode_access2(cred, TNTOV(tp), tp->tn_uid, 73 tp->tn_mode << shift, mode)); 74 } 75 76 /* 77 * Decide whether it is okay to remove within a sticky directory. 78 * Two conditions need to be met: write access to the directory 79 * is needed. In sticky directories, write access is not sufficient; 80 * you can remove entries from a directory only if you own the directory, 81 * if you are privileged, if you own the entry or if they entry is 82 * a plain file and you have write access to that file. 83 * Function returns 0 if remove access is granted. 84 */ 85 int 86 tmp_sticky_remove_access(struct tmpnode *dir, struct tmpnode *entry, 87 struct cred *cr) 88 { 89 uid_t uid = crgetuid(cr); 90 91 if ((dir->tn_mode & S_ISVTX) && 92 uid != dir->tn_uid && 93 uid != entry->tn_uid && 94 (entry->tn_type != VREG || 95 tmp_taccess(entry, VWRITE, cr) != 0)) 96 return (secpolicy_vnode_remove(cr)); 97 98 return (0); 99 } 100 101 /* 102 * Allocate zeroed memory if tmpfs_maxkmem has not been exceeded 103 * or the 'musthave' flag is set. 'musthave' allocations should 104 * always be subordinate to normal allocations so that tmpfs_maxkmem 105 * can't be exceeded by more than a few KB. Example: when creating 106 * a new directory, the tmpnode is a normal allocation; if that 107 * succeeds, the dirents for "." and ".." are 'musthave' allocations. 108 */ 109 void * 110 tmp_memalloc(size_t size, int musthave) 111 { 112 static time_t last_warning; 113 time_t now; 114 115 if (atomic_add_long_nv(&tmp_kmemspace, size) < tmpfs_maxkmem || 116 musthave) 117 return (kmem_zalloc(size, KM_SLEEP)); 118 119 atomic_add_long(&tmp_kmemspace, -size); 120 now = gethrestime_sec(); 121 if (last_warning != now) { 122 last_warning = now; 123 cmn_err(CE_WARN, "tmp_memalloc: tmpfs over memory limit"); 124 } 125 return (NULL); 126 } 127 128 void 129 tmp_memfree(void *cp, size_t size) 130 { 131 kmem_free(cp, size); 132 atomic_add_long(&tmp_kmemspace, -size); 133 } 134 135 /* 136 * Convert a string containing a number (number of bytes) to a pgcnt_t, 137 * containing the corresponding number of pages. On 32-bit kernels, the 138 * maximum value encoded in 'str' is PAGESIZE * ULONG_MAX, while the value 139 * returned in 'maxpg' is at most ULONG_MAX. 140 * 141 * The number may be followed by a magnitude suffix: "k" or "K" for kilobytes; 142 * "m" or "M" for megabytes; "g" or "G" for gigabytes. This interface allows 143 * for an arguably esoteric interpretation of multiple suffix characters: 144 * namely, they cascade. For example, the caller may specify "2mk", which is 145 * interpreted as 2 gigabytes. It would seem, at this late stage, that the 146 * horse has left not only the barn but indeed the country, and possibly the 147 * entire planetary system. Alternatively, the number may be followed by a 148 * single '%' sign, indicating the size is a percentage of either the zone's 149 * swap limit or the system's overall swap size. 150 * 151 * Parse and overflow errors are detected and a non-zero number returned on 152 * error. 153 */ 154 int 155 tmp_convnum(char *str, pgcnt_t *maxpg) 156 { 157 u_longlong_t num = 0; 158 #ifdef _LP64 159 u_longlong_t max_bytes = ULONG_MAX; 160 #else 161 u_longlong_t max_bytes = PAGESIZE * (uint64_t)ULONG_MAX; 162 #endif 163 char *c; 164 const struct convchar { 165 char *cc_char; 166 uint64_t cc_factor; 167 } convchars[] = { 168 { "kK", KILOBYTE }, 169 { "mM", MEGABYTE }, 170 { "gG", GIGABYTE }, 171 { NULL, 0 } 172 }; 173 174 if (str == NULL) { 175 return (EINVAL); 176 } 177 c = str; 178 179 /* 180 * Convert the initial numeric portion of the input string. 181 */ 182 if (ddi_strtoull(str, &c, 10, &num) != 0) { 183 return (EINVAL); 184 } 185 186 /* 187 * Handle a size in percent. Anything other than a single percent 188 * modifier is invalid. We use either the zone's swap limit or the 189 * system's total available swap size as the initial value. Perform the 190 * intermediate calculation in pages to avoid overflow. 191 */ 192 if (*c == '%') { 193 u_longlong_t cap; 194 195 if (*(c + 1) != '\0') 196 return (EINVAL); 197 198 if (num > 100) 199 return (EINVAL); 200 201 cap = (u_longlong_t)curproc->p_zone->zone_max_swap_ctl; 202 if (cap == UINT64_MAX) { 203 /* 204 * Use the amount of available physical and memory swap 205 */ 206 mutex_enter(&anoninfo_lock); 207 cap = TOTAL_AVAILABLE_SWAP; 208 mutex_exit(&anoninfo_lock); 209 } else { 210 cap = btop(cap); 211 } 212 213 num = ptob(cap * num / 100); 214 goto done; 215 } 216 217 /* 218 * Apply the (potentially cascading) magnitude suffixes until an 219 * invalid character is found, or the string comes to an end. 220 */ 221 for (; *c != '\0'; c++) { 222 int i; 223 224 for (i = 0; convchars[i].cc_char != NULL; i++) { 225 /* 226 * Check if this character matches this multiplier 227 * class: 228 */ 229 if (strchr(convchars[i].cc_char, *c) != NULL) { 230 /* 231 * Check for overflow: 232 */ 233 if (num > max_bytes / convchars[i].cc_factor) { 234 return (EINVAL); 235 } 236 237 num *= convchars[i].cc_factor; 238 goto valid_char; 239 } 240 } 241 242 /* 243 * This was not a valid multiplier suffix character. 244 */ 245 return (EINVAL); 246 247 valid_char: 248 continue; 249 } 250 251 done: 252 /* 253 * Since btopr() rounds up to page granularity, this round-up can 254 * cause an overflow only if 'num' is between (max_bytes - PAGESIZE) 255 * and (max_bytes). In this case the resulting number is zero, which 256 * is what we check for below. 257 */ 258 if ((*maxpg = (pgcnt_t)btopr(num)) == 0 && num != 0) 259 return (EINVAL); 260 return (0); 261 } 262 263 /* 264 * Parse an octal mode string for use as the permissions set for the root 265 * of the tmpfs mount. 266 */ 267 int 268 tmp_convmode(char *str, mode_t *mode) 269 { 270 ulong_t num; 271 char *c; 272 273 if (str == NULL) { 274 return (EINVAL); 275 } 276 277 if (ddi_strtoul(str, &c, 8, &num) != 0) { 278 return (EINVAL); 279 } 280 281 if ((num & ~VALIDMODEBITS) != 0) { 282 return (EINVAL); 283 } 284 285 *mode = VALIDMODEBITS & num; 286 return (0); 287 } 288