1 /* 2 * linux/fs/file.c 3 * 4 * Copyright (C) 1998-1999, Stephen Tweedie and Bill Hawes 5 * 6 * Manage the dynamic fd arrays in the process files_struct. 7 */ 8 9 #include <linux/fs.h> 10 #include <linux/mm.h> 11 #include <linux/time.h> 12 #include <linux/slab.h> 13 #include <linux/vmalloc.h> 14 #include <linux/file.h> 15 #include <linux/bitops.h> 16 #include <linux/interrupt.h> 17 #include <linux/spinlock.h> 18 #include <linux/rcupdate.h> 19 #include <linux/workqueue.h> 20 21 struct fdtable_defer { 22 spinlock_t lock; 23 struct work_struct wq; 24 struct fdtable *next; 25 }; 26 27 /* 28 * We use this list to defer free fdtables that have vmalloced 29 * sets/arrays. By keeping a per-cpu list, we avoid having to embed 30 * the work_struct in fdtable itself which avoids a 64 byte (i386) increase in 31 * this per-task structure. 32 */ 33 static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list); 34 35 36 /* 37 * Allocate an fd array, using kmalloc or vmalloc. 38 * Note: the array isn't cleared at allocation time. 39 */ 40 struct file ** alloc_fd_array(int num) 41 { 42 struct file **new_fds; 43 int size = num * sizeof(struct file *); 44 45 if (size <= PAGE_SIZE) 46 new_fds = (struct file **) kmalloc(size, GFP_KERNEL); 47 else 48 new_fds = (struct file **) vmalloc(size); 49 return new_fds; 50 } 51 52 void free_fd_array(struct file **array, int num) 53 { 54 int size = num * sizeof(struct file *); 55 56 if (!array) { 57 printk (KERN_ERR "free_fd_array: array = 0 (num = %d)\n", num); 58 return; 59 } 60 61 if (num <= NR_OPEN_DEFAULT) /* Don't free the embedded fd array! */ 62 return; 63 else if (size <= PAGE_SIZE) 64 kfree(array); 65 else 66 vfree(array); 67 } 68 69 static void __free_fdtable(struct fdtable *fdt) 70 { 71 free_fdset(fdt->open_fds, fdt->max_fdset); 72 free_fdset(fdt->close_on_exec, fdt->max_fdset); 73 free_fd_array(fdt->fd, fdt->max_fds); 74 kfree(fdt); 75 } 76 77 static void free_fdtable_work(struct work_struct *work) 78 { 79 struct fdtable_defer *f = 80 container_of(work, struct fdtable_defer, wq); 81 struct fdtable *fdt; 82 83 spin_lock_bh(&f->lock); 84 fdt = f->next; 85 f->next = NULL; 86 spin_unlock_bh(&f->lock); 87 while(fdt) { 88 struct fdtable *next = fdt->next; 89 __free_fdtable(fdt); 90 fdt = next; 91 } 92 } 93 94 static void free_fdtable_rcu(struct rcu_head *rcu) 95 { 96 struct fdtable *fdt = container_of(rcu, struct fdtable, rcu); 97 int fdset_size, fdarray_size; 98 struct fdtable_defer *fddef; 99 100 BUG_ON(!fdt); 101 fdset_size = fdt->max_fdset / 8; 102 fdarray_size = fdt->max_fds * sizeof(struct file *); 103 104 if (fdt->free_files) { 105 /* 106 * The this fdtable was embedded in the files structure 107 * and the files structure itself was getting destroyed. 108 * It is now safe to free the files structure. 109 */ 110 kmem_cache_free(files_cachep, fdt->free_files); 111 return; 112 } 113 if (fdt->max_fdset <= EMBEDDED_FD_SET_SIZE && 114 fdt->max_fds <= NR_OPEN_DEFAULT) { 115 /* 116 * The fdtable was embedded 117 */ 118 return; 119 } 120 if (fdset_size <= PAGE_SIZE && fdarray_size <= PAGE_SIZE) { 121 kfree(fdt->open_fds); 122 kfree(fdt->close_on_exec); 123 kfree(fdt->fd); 124 kfree(fdt); 125 } else { 126 fddef = &get_cpu_var(fdtable_defer_list); 127 spin_lock(&fddef->lock); 128 fdt->next = fddef->next; 129 fddef->next = fdt; 130 /* vmallocs are handled from the workqueue context */ 131 schedule_work(&fddef->wq); 132 spin_unlock(&fddef->lock); 133 put_cpu_var(fdtable_defer_list); 134 } 135 } 136 137 void free_fdtable(struct fdtable *fdt) 138 { 139 if (fdt->free_files || 140 fdt->max_fdset > EMBEDDED_FD_SET_SIZE || 141 fdt->max_fds > NR_OPEN_DEFAULT) 142 call_rcu(&fdt->rcu, free_fdtable_rcu); 143 } 144 145 /* 146 * Expand the fdset in the files_struct. Called with the files spinlock 147 * held for write. 148 */ 149 static void copy_fdtable(struct fdtable *nfdt, struct fdtable *fdt) 150 { 151 int i; 152 int count; 153 154 BUG_ON(nfdt->max_fdset < fdt->max_fdset); 155 BUG_ON(nfdt->max_fds < fdt->max_fds); 156 /* Copy the existing tables and install the new pointers */ 157 158 i = fdt->max_fdset / (sizeof(unsigned long) * 8); 159 count = (nfdt->max_fdset - fdt->max_fdset) / 8; 160 161 /* 162 * Don't copy the entire array if the current fdset is 163 * not yet initialised. 164 */ 165 if (i) { 166 memcpy (nfdt->open_fds, fdt->open_fds, 167 fdt->max_fdset/8); 168 memcpy (nfdt->close_on_exec, fdt->close_on_exec, 169 fdt->max_fdset/8); 170 memset (&nfdt->open_fds->fds_bits[i], 0, count); 171 memset (&nfdt->close_on_exec->fds_bits[i], 0, count); 172 } 173 174 /* Don't copy/clear the array if we are creating a new 175 fd array for fork() */ 176 if (fdt->max_fds) { 177 memcpy(nfdt->fd, fdt->fd, 178 fdt->max_fds * sizeof(struct file *)); 179 /* clear the remainder of the array */ 180 memset(&nfdt->fd[fdt->max_fds], 0, 181 (nfdt->max_fds - fdt->max_fds) * 182 sizeof(struct file *)); 183 } 184 } 185 186 /* 187 * Allocate an fdset array, using kmalloc or vmalloc. 188 * Note: the array isn't cleared at allocation time. 189 */ 190 fd_set * alloc_fdset(int num) 191 { 192 fd_set *new_fdset; 193 int size = num / 8; 194 195 if (size <= PAGE_SIZE) 196 new_fdset = (fd_set *) kmalloc(size, GFP_KERNEL); 197 else 198 new_fdset = (fd_set *) vmalloc(size); 199 return new_fdset; 200 } 201 202 void free_fdset(fd_set *array, int num) 203 { 204 if (num <= EMBEDDED_FD_SET_SIZE) /* Don't free an embedded fdset */ 205 return; 206 else if (num <= 8 * PAGE_SIZE) 207 kfree(array); 208 else 209 vfree(array); 210 } 211 212 static struct fdtable *alloc_fdtable(int nr) 213 { 214 struct fdtable *fdt = NULL; 215 int nfds = 0; 216 fd_set *new_openset = NULL, *new_execset = NULL; 217 struct file **new_fds; 218 219 fdt = kzalloc(sizeof(*fdt), GFP_KERNEL); 220 if (!fdt) 221 goto out; 222 223 nfds = max_t(int, 8 * L1_CACHE_BYTES, roundup_pow_of_two(nr + 1)); 224 if (nfds > NR_OPEN) 225 nfds = NR_OPEN; 226 227 new_openset = alloc_fdset(nfds); 228 new_execset = alloc_fdset(nfds); 229 if (!new_openset || !new_execset) 230 goto out; 231 fdt->open_fds = new_openset; 232 fdt->close_on_exec = new_execset; 233 fdt->max_fdset = nfds; 234 235 nfds = NR_OPEN_DEFAULT; 236 /* 237 * Expand to the max in easy steps, and keep expanding it until 238 * we have enough for the requested fd array size. 239 */ 240 do { 241 #if NR_OPEN_DEFAULT < 256 242 if (nfds < 256) 243 nfds = 256; 244 else 245 #endif 246 if (nfds < (PAGE_SIZE / sizeof(struct file *))) 247 nfds = PAGE_SIZE / sizeof(struct file *); 248 else { 249 nfds = nfds * 2; 250 if (nfds > NR_OPEN) 251 nfds = NR_OPEN; 252 } 253 } while (nfds <= nr); 254 new_fds = alloc_fd_array(nfds); 255 if (!new_fds) 256 goto out2; 257 fdt->fd = new_fds; 258 fdt->max_fds = nfds; 259 fdt->free_files = NULL; 260 return fdt; 261 out2: 262 nfds = fdt->max_fdset; 263 out: 264 free_fdset(new_openset, nfds); 265 free_fdset(new_execset, nfds); 266 kfree(fdt); 267 return NULL; 268 } 269 270 /* 271 * Expand the file descriptor table. 272 * This function will allocate a new fdtable and both fd array and fdset, of 273 * the given size. 274 * Return <0 error code on error; 1 on successful completion. 275 * The files->file_lock should be held on entry, and will be held on exit. 276 */ 277 static int expand_fdtable(struct files_struct *files, int nr) 278 __releases(files->file_lock) 279 __acquires(files->file_lock) 280 { 281 struct fdtable *new_fdt, *cur_fdt; 282 283 spin_unlock(&files->file_lock); 284 new_fdt = alloc_fdtable(nr); 285 spin_lock(&files->file_lock); 286 if (!new_fdt) 287 return -ENOMEM; 288 /* 289 * Check again since another task may have expanded the fd table while 290 * we dropped the lock 291 */ 292 cur_fdt = files_fdtable(files); 293 if (nr >= cur_fdt->max_fds || nr >= cur_fdt->max_fdset) { 294 /* Continue as planned */ 295 copy_fdtable(new_fdt, cur_fdt); 296 rcu_assign_pointer(files->fdt, new_fdt); 297 free_fdtable(cur_fdt); 298 } else { 299 /* Somebody else expanded, so undo our attempt */ 300 __free_fdtable(new_fdt); 301 } 302 return 1; 303 } 304 305 /* 306 * Expand files. 307 * This function will expand the file structures, if the requested size exceeds 308 * the current capacity and there is room for expansion. 309 * Return <0 error code on error; 0 when nothing done; 1 when files were 310 * expanded and execution may have blocked. 311 * The files->file_lock should be held on entry, and will be held on exit. 312 */ 313 int expand_files(struct files_struct *files, int nr) 314 { 315 struct fdtable *fdt; 316 317 fdt = files_fdtable(files); 318 /* Do we need to expand? */ 319 if (nr < fdt->max_fdset && nr < fdt->max_fds) 320 return 0; 321 /* Can we expand? */ 322 if (fdt->max_fdset >= NR_OPEN || fdt->max_fds >= NR_OPEN || 323 nr >= NR_OPEN) 324 return -EMFILE; 325 326 /* All good, so we try */ 327 return expand_fdtable(files, nr); 328 } 329 330 static void __devinit fdtable_defer_list_init(int cpu) 331 { 332 struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu); 333 spin_lock_init(&fddef->lock); 334 INIT_WORK(&fddef->wq, free_fdtable_work); 335 fddef->next = NULL; 336 } 337 338 void __init files_defer_init(void) 339 { 340 int i; 341 for_each_possible_cpu(i) 342 fdtable_defer_list_init(i); 343 } 344