1 // SPDX-License-Identifier: GPL-2.0-only 2 3 #include <linux/stat.h> 4 #include <linux/sysctl.h> 5 #include <linux/slab.h> 6 #include <linux/cred.h> 7 #include <linux/hash.h> 8 #include <linux/kmemleak.h> 9 #include <linux/user_namespace.h> 10 11 struct ucounts init_ucounts = { 12 .ns = &init_user_ns, 13 .uid = GLOBAL_ROOT_UID, 14 .count = RCUREF_INIT(1), 15 }; 16 17 #define UCOUNTS_HASHTABLE_BITS 10 18 #define UCOUNTS_HASHTABLE_ENTRIES (1 << UCOUNTS_HASHTABLE_BITS) 19 static struct hlist_nulls_head ucounts_hashtable[UCOUNTS_HASHTABLE_ENTRIES] = { 20 [0 ... UCOUNTS_HASHTABLE_ENTRIES - 1] = HLIST_NULLS_HEAD_INIT(0) 21 }; 22 static DEFINE_SPINLOCK(ucounts_lock); 23 24 #define ucounts_hashfn(ns, uid) \ 25 hash_long((unsigned long)__kuid_val(uid) + (unsigned long)(ns), \ 26 UCOUNTS_HASHTABLE_BITS) 27 #define ucounts_hashentry(ns, uid) \ 28 (ucounts_hashtable + ucounts_hashfn(ns, uid)) 29 30 #ifdef CONFIG_SYSCTL 31 static struct ctl_table_set * 32 set_lookup(struct ctl_table_root *root) 33 { 34 return ¤t_user_ns()->set; 35 } 36 37 static int set_is_seen(struct ctl_table_set *set) 38 { 39 return ¤t_user_ns()->set == set; 40 } 41 42 static int set_permissions(struct ctl_table_header *head, 43 const struct ctl_table *table) 44 { 45 struct user_namespace *user_ns = 46 container_of(head->set, struct user_namespace, set); 47 int mode; 48 49 /* Allow users with CAP_SYS_RESOURCE unrestrained access */ 50 if (ns_capable(user_ns, CAP_SYS_RESOURCE)) 51 mode = (table->mode & S_IRWXU) >> 6; 52 else 53 /* Allow all others at most read-only access */ 54 mode = table->mode & S_IROTH; 55 return (mode << 6) | (mode << 3) | mode; 56 } 57 58 static struct ctl_table_root set_root = { 59 .lookup = set_lookup, 60 .permissions = set_permissions, 61 }; 62 63 static long ue_zero = 0; 64 static long ue_int_max = INT_MAX; 65 66 #define UCOUNT_ENTRY(name) \ 67 { \ 68 .procname = name, \ 69 .maxlen = sizeof(long), \ 70 .mode = 0644, \ 71 .proc_handler = proc_doulongvec_minmax, \ 72 .extra1 = &ue_zero, \ 73 .extra2 = &ue_int_max, \ 74 } 75 static const struct ctl_table user_table[] = { 76 UCOUNT_ENTRY("max_user_namespaces"), 77 UCOUNT_ENTRY("max_pid_namespaces"), 78 UCOUNT_ENTRY("max_uts_namespaces"), 79 UCOUNT_ENTRY("max_ipc_namespaces"), 80 UCOUNT_ENTRY("max_net_namespaces"), 81 UCOUNT_ENTRY("max_mnt_namespaces"), 82 UCOUNT_ENTRY("max_cgroup_namespaces"), 83 UCOUNT_ENTRY("max_time_namespaces"), 84 #ifdef CONFIG_INOTIFY_USER 85 UCOUNT_ENTRY("max_inotify_instances"), 86 UCOUNT_ENTRY("max_inotify_watches"), 87 #endif 88 #ifdef CONFIG_FANOTIFY 89 UCOUNT_ENTRY("max_fanotify_groups"), 90 UCOUNT_ENTRY("max_fanotify_marks"), 91 #endif 92 }; 93 #endif /* CONFIG_SYSCTL */ 94 95 bool setup_userns_sysctls(struct user_namespace *ns) 96 { 97 #ifdef CONFIG_SYSCTL 98 struct ctl_table *tbl; 99 100 BUILD_BUG_ON(ARRAY_SIZE(user_table) != UCOUNT_COUNTS); 101 setup_sysctl_set(&ns->set, &set_root, set_is_seen); 102 tbl = kmemdup(user_table, sizeof(user_table), GFP_KERNEL); 103 if (tbl) { 104 int i; 105 for (i = 0; i < UCOUNT_COUNTS; i++) { 106 tbl[i].data = &ns->ucount_max[i]; 107 } 108 ns->sysctls = __register_sysctl_table(&ns->set, "user", tbl, 109 ARRAY_SIZE(user_table)); 110 } 111 if (!ns->sysctls) { 112 kfree(tbl); 113 retire_sysctl_set(&ns->set); 114 return false; 115 } 116 #endif 117 return true; 118 } 119 120 void retire_userns_sysctls(struct user_namespace *ns) 121 { 122 #ifdef CONFIG_SYSCTL 123 const struct ctl_table *tbl; 124 125 tbl = ns->sysctls->ctl_table_arg; 126 unregister_sysctl_table(ns->sysctls); 127 retire_sysctl_set(&ns->set); 128 kfree(tbl); 129 #endif 130 } 131 132 static struct ucounts *find_ucounts(struct user_namespace *ns, kuid_t uid, 133 struct hlist_nulls_head *hashent) 134 { 135 struct ucounts *ucounts; 136 struct hlist_nulls_node *pos; 137 138 guard(rcu)(); 139 hlist_nulls_for_each_entry_rcu(ucounts, pos, hashent, node) { 140 if (uid_eq(ucounts->uid, uid) && (ucounts->ns == ns)) { 141 if (rcuref_get(&ucounts->count)) 142 return ucounts; 143 } 144 } 145 return NULL; 146 } 147 148 static void hlist_add_ucounts(struct ucounts *ucounts) 149 { 150 struct hlist_nulls_head *hashent = ucounts_hashentry(ucounts->ns, ucounts->uid); 151 152 spin_lock_irq(&ucounts_lock); 153 hlist_nulls_add_head_rcu(&ucounts->node, hashent); 154 spin_unlock_irq(&ucounts_lock); 155 } 156 157 struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid) 158 { 159 struct hlist_nulls_head *hashent = ucounts_hashentry(ns, uid); 160 struct ucounts *ucounts, *new; 161 162 ucounts = find_ucounts(ns, uid, hashent); 163 if (ucounts) 164 return ucounts; 165 166 new = kzalloc(sizeof(*new), GFP_KERNEL); 167 if (!new) 168 return NULL; 169 170 new->ns = ns; 171 new->uid = uid; 172 rcuref_init(&new->count, 1); 173 174 spin_lock_irq(&ucounts_lock); 175 ucounts = find_ucounts(ns, uid, hashent); 176 if (ucounts) { 177 spin_unlock_irq(&ucounts_lock); 178 kfree(new); 179 return ucounts; 180 } 181 182 hlist_nulls_add_head_rcu(&new->node, hashent); 183 get_user_ns(new->ns); 184 spin_unlock_irq(&ucounts_lock); 185 return new; 186 } 187 188 void put_ucounts(struct ucounts *ucounts) 189 { 190 unsigned long flags; 191 192 if (rcuref_put(&ucounts->count)) { 193 spin_lock_irqsave(&ucounts_lock, flags); 194 hlist_nulls_del_rcu(&ucounts->node); 195 spin_unlock_irqrestore(&ucounts_lock, flags); 196 197 put_user_ns(ucounts->ns); 198 kfree_rcu(ucounts, rcu); 199 } 200 } 201 202 static inline bool atomic_long_inc_below(atomic_long_t *v, long u) 203 { 204 long c = atomic_long_read(v); 205 206 do { 207 if (unlikely(c >= u)) 208 return false; 209 } while (!atomic_long_try_cmpxchg(v, &c, c+1)); 210 211 return true; 212 } 213 214 struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid, 215 enum ucount_type type) 216 { 217 struct ucounts *ucounts, *iter, *bad; 218 struct user_namespace *tns; 219 ucounts = alloc_ucounts(ns, uid); 220 for (iter = ucounts; iter; iter = tns->ucounts) { 221 long max; 222 tns = iter->ns; 223 max = READ_ONCE(tns->ucount_max[type]); 224 if (!atomic_long_inc_below(&iter->ucount[type], max)) 225 goto fail; 226 } 227 return ucounts; 228 fail: 229 bad = iter; 230 for (iter = ucounts; iter != bad; iter = iter->ns->ucounts) 231 atomic_long_dec(&iter->ucount[type]); 232 233 put_ucounts(ucounts); 234 return NULL; 235 } 236 237 void dec_ucount(struct ucounts *ucounts, enum ucount_type type) 238 { 239 struct ucounts *iter; 240 for (iter = ucounts; iter; iter = iter->ns->ucounts) { 241 long dec = atomic_long_dec_if_positive(&iter->ucount[type]); 242 WARN_ON_ONCE(dec < 0); 243 } 244 put_ucounts(ucounts); 245 } 246 247 long inc_rlimit_ucounts(struct ucounts *ucounts, enum rlimit_type type, long v) 248 { 249 struct ucounts *iter; 250 long max = LONG_MAX; 251 long ret = 0; 252 253 for (iter = ucounts; iter; iter = iter->ns->ucounts) { 254 long new = atomic_long_add_return(v, &iter->rlimit[type]); 255 if (new < 0 || new > max) 256 ret = LONG_MAX; 257 else if (iter == ucounts) 258 ret = new; 259 max = get_userns_rlimit_max(iter->ns, type); 260 } 261 return ret; 262 } 263 264 bool dec_rlimit_ucounts(struct ucounts *ucounts, enum rlimit_type type, long v) 265 { 266 struct ucounts *iter; 267 long new = -1; /* Silence compiler warning */ 268 for (iter = ucounts; iter; iter = iter->ns->ucounts) { 269 long dec = atomic_long_sub_return(v, &iter->rlimit[type]); 270 WARN_ON_ONCE(dec < 0); 271 if (iter == ucounts) 272 new = dec; 273 } 274 return (new == 0); 275 } 276 277 static void do_dec_rlimit_put_ucounts(struct ucounts *ucounts, 278 struct ucounts *last, enum rlimit_type type) 279 { 280 struct ucounts *iter, *next; 281 for (iter = ucounts; iter != last; iter = next) { 282 long dec = atomic_long_sub_return(1, &iter->rlimit[type]); 283 WARN_ON_ONCE(dec < 0); 284 next = iter->ns->ucounts; 285 if (dec == 0) 286 put_ucounts(iter); 287 } 288 } 289 290 void dec_rlimit_put_ucounts(struct ucounts *ucounts, enum rlimit_type type) 291 { 292 do_dec_rlimit_put_ucounts(ucounts, NULL, type); 293 } 294 295 long inc_rlimit_get_ucounts(struct ucounts *ucounts, enum rlimit_type type, 296 bool override_rlimit) 297 { 298 /* Caller must hold a reference to ucounts */ 299 struct ucounts *iter; 300 long max = LONG_MAX; 301 long dec, ret = 0; 302 303 for (iter = ucounts; iter; iter = iter->ns->ucounts) { 304 long new = atomic_long_add_return(1, &iter->rlimit[type]); 305 if (new < 0 || new > max) 306 goto dec_unwind; 307 if (iter == ucounts) 308 ret = new; 309 if (!override_rlimit) 310 max = get_userns_rlimit_max(iter->ns, type); 311 /* 312 * Grab an extra ucount reference for the caller when 313 * the rlimit count was previously 0. 314 */ 315 if (new != 1) 316 continue; 317 if (!get_ucounts(iter)) 318 goto dec_unwind; 319 } 320 return ret; 321 dec_unwind: 322 dec = atomic_long_sub_return(1, &iter->rlimit[type]); 323 WARN_ON_ONCE(dec < 0); 324 do_dec_rlimit_put_ucounts(ucounts, iter, type); 325 return 0; 326 } 327 328 bool is_rlimit_overlimit(struct ucounts *ucounts, enum rlimit_type type, unsigned long rlimit) 329 { 330 struct ucounts *iter; 331 long max = rlimit; 332 if (rlimit > LONG_MAX) 333 max = LONG_MAX; 334 for (iter = ucounts; iter; iter = iter->ns->ucounts) { 335 long val = get_rlimit_value(iter, type); 336 if (val < 0 || val > max) 337 return true; 338 max = get_userns_rlimit_max(iter->ns, type); 339 } 340 return false; 341 } 342 343 static __init int user_namespace_sysctl_init(void) 344 { 345 #ifdef CONFIG_SYSCTL 346 static struct ctl_table_header *user_header; 347 static struct ctl_table empty[1]; 348 /* 349 * It is necessary to register the user directory in the 350 * default set so that registrations in the child sets work 351 * properly. 352 */ 353 user_header = register_sysctl_sz("user", empty, 0); 354 kmemleak_ignore(user_header); 355 BUG_ON(!user_header); 356 BUG_ON(!setup_userns_sysctls(&init_user_ns)); 357 #endif 358 hlist_add_ucounts(&init_ucounts); 359 inc_rlimit_ucounts(&init_ucounts, UCOUNT_RLIMIT_NPROC, 1); 360 return 0; 361 } 362 subsys_initcall(user_namespace_sysctl_init); 363