1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/param.h> 28 #include <sys/t_lock.h> 29 #include <sys/systm.h> 30 #include <sys/time.h> 31 #include <sys/sysmacros.h> 32 #include <sys/proc.h> 33 #include <sys/disp.h> 34 #include <sys/user.h> 35 #include <sys/time.h> 36 #include <sys/vfs.h> 37 #include <sys/vnode.h> 38 #include <sys/stat.h> 39 #include <sys/mode.h> 40 #include <sys/errno.h> 41 #include <sys/kmem.h> 42 #include <vm/seg.h> 43 #include <vm/seg_map.h> 44 #include <vm/anon.h> 45 #include <vm/page.h> 46 #include <vm/pvn.h> 47 #include <sys/fs/tmp.h> 48 #include <sys/fs/tmpnode.h> 49 #include <sys/debug.h> 50 #include <sys/cmn_err.h> 51 #include <sys/swap.h> 52 #include <sys/vtrace.h> 53 54 /* 55 * Reserve swap space for the size of the file. 56 * Called before growing a file (i.e. ftruncate, write) 57 * Returns 0 on success. 58 */ 59 int 60 tmp_resv( 61 struct tmount *tm, 62 struct tmpnode *tp, 63 size_t delta, /* size needed */ 64 int pagecreate) /* call anon_resv if set */ 65 { 66 pgcnt_t pages = btopr(delta); 67 zone_t *zone; 68 69 ASSERT(RW_WRITE_HELD(&tp->tn_rwlock)); 70 ASSERT(tp->tn_type == VREG); 71 /* 72 * pagecreate is set only if we actually need to call anon_resv 73 * to reserve an additional page of anonymous memory. 74 * Since anon_resv always reserves a page at a time, 75 * it should only get called when we know we're growing the 76 * file into a new page or filling a hole. 77 * 78 * Deny if trying to reserve more than tmpfs can allocate 79 */ 80 zone = tm->tm_vfsp->vfs_zone; 81 if (pagecreate && ((tm->tm_anonmem + pages > tm->tm_anonmax) || 82 (!anon_checkspace(ptob(pages + tmpfs_minfree), zone)) || 83 (anon_try_resv_zone(delta, zone) == 0))) { 84 return (1); 85 } 86 87 /* 88 * update statistics 89 */ 90 if (pagecreate) { 91 mutex_enter(&tm->tm_contents); 92 tm->tm_anonmem += pages; 93 mutex_exit(&tm->tm_contents); 94 95 TRACE_2(TR_FAC_VM, TR_ANON_TMPFS, "anon tmpfs:%p %lu", 96 tp, delta); 97 } 98 99 return (0); 100 } 101 102 /* 103 * tmp_unresv - called when truncating a file 104 * Only called if we're freeing at least pagesize bytes 105 * because anon_unresv does a btopr(delta) 106 */ 107 static void 108 tmp_unresv( 109 struct tmount *tm, 110 struct tmpnode *tp, 111 size_t delta) 112 { 113 ASSERT(RW_WRITE_HELD(&tp->tn_rwlock)); 114 ASSERT(tp->tn_type == VREG); 115 116 anon_unresv_zone(delta, tm->tm_vfsp->vfs_zone); 117 118 mutex_enter(&tm->tm_contents); 119 tm->tm_anonmem -= btopr(delta); 120 mutex_exit(&tm->tm_contents); 121 122 TRACE_2(TR_FAC_VM, TR_ANON_TMPFS, "anon tmpfs:%p %lu", tp, delta); 123 } 124 125 #define TMP_INIT_SZ 128 126 127 /* 128 * Grow the anon pointer array to cover 'newsize' bytes plus slack. 129 */ 130 void 131 tmpnode_growmap(struct tmpnode *tp, ulong_t newsize) 132 { 133 pgcnt_t np = btopr(newsize); 134 135 ASSERT(RW_WRITE_HELD(&tp->tn_rwlock)); 136 ASSERT(RW_WRITE_HELD(&tp->tn_contents)); 137 ASSERT(tp->tn_type == VREG); 138 139 if (tp->tn_asize >= np) 140 return; 141 142 if (newsize > MAXOFF_T) 143 np = btopr((u_offset_t)MAXOFF_T); 144 145 if (tp->tn_anon == NULL) { 146 tp->tn_anon = anon_create(MAX(np, TMP_INIT_SZ), ANON_SLEEP); 147 tp->tn_asize = tp->tn_anon->size; 148 return; 149 } 150 151 tp->tn_asize = anon_grow(tp->tn_anon, NULL, tp->tn_asize, 152 np - tp->tn_asize, ANON_SLEEP); 153 ASSERT(tp->tn_asize >= np); 154 } 155 156 /* 157 * Initialize a tmpnode and add it to file list under mount point. 158 */ 159 void 160 tmpnode_init(struct tmount *tm, struct tmpnode *t, vattr_t *vap, cred_t *cred) 161 { 162 struct vnode *vp; 163 timestruc_t now; 164 165 ASSERT(vap != NULL); 166 167 rw_init(&t->tn_rwlock, NULL, RW_DEFAULT, NULL); 168 mutex_init(&t->tn_tlock, NULL, MUTEX_DEFAULT, NULL); 169 t->tn_mode = MAKEIMODE(vap->va_type, vap->va_mode); 170 t->tn_mask = 0; 171 t->tn_type = vap->va_type; 172 t->tn_nodeid = (ino64_t)(uint32_t)((uintptr_t)t >> 3); 173 t->tn_nlink = 1; 174 t->tn_size = 0; 175 176 if (cred == NULL) { 177 t->tn_uid = vap->va_uid; 178 t->tn_gid = vap->va_gid; 179 } else { 180 t->tn_uid = crgetuid(cred); 181 t->tn_gid = crgetgid(cred); 182 } 183 184 t->tn_fsid = tm->tm_dev; 185 t->tn_rdev = vap->va_rdev; 186 t->tn_blksize = PAGESIZE; 187 t->tn_nblocks = 0; 188 gethrestime(&now); 189 t->tn_atime = now; 190 t->tn_mtime = now; 191 t->tn_ctime = now; 192 t->tn_seq = 0; 193 t->tn_dir = NULL; 194 195 t->tn_vnode = vn_alloc(KM_SLEEP); 196 vp = TNTOV(t); 197 vn_setops(vp, tmp_vnodeops); 198 vp->v_vfsp = tm->tm_vfsp; 199 vp->v_type = vap->va_type; 200 vp->v_rdev = vap->va_rdev; 201 vp->v_data = (caddr_t)t; 202 mutex_enter(&tm->tm_contents); 203 /* 204 * Increment the pseudo generation number for this tmpnode. 205 * Since tmpnodes are allocated and freed, there really is no 206 * particular generation number for a new tmpnode. Just fake it 207 * by using a counter in each file system. 208 */ 209 t->tn_gen = tm->tm_gen++; 210 211 /* 212 * Add new tmpnode to end of linked list of tmpnodes for this tmpfs 213 * Root directory is handled specially in tmp_mount. 214 */ 215 if (tm->tm_rootnode != (struct tmpnode *)NULL) { 216 t->tn_forw = NULL; 217 t->tn_back = tm->tm_rootnode->tn_back; 218 t->tn_back->tn_forw = tm->tm_rootnode->tn_back = t; 219 } 220 mutex_exit(&tm->tm_contents); 221 vn_exists(vp); 222 } 223 224 /* 225 * tmpnode_trunc - set length of tmpnode and deal with resources 226 */ 227 int 228 tmpnode_trunc( 229 struct tmount *tm, 230 struct tmpnode *tp, 231 ulong_t newsize) 232 { 233 size_t oldsize = tp->tn_size; 234 size_t delta; 235 struct vnode *vp = TNTOV(tp); 236 timestruc_t now; 237 int error = 0; 238 239 ASSERT(RW_WRITE_HELD(&tp->tn_rwlock)); 240 ASSERT(RW_WRITE_HELD(&tp->tn_contents)); 241 242 if (newsize == oldsize) { 243 /* Required by POSIX */ 244 goto stamp_out; 245 } 246 247 switch (tp->tn_type) { 248 case VREG: 249 /* Growing the file */ 250 if (newsize > oldsize) { 251 delta = P2ROUNDUP(newsize, PAGESIZE) - 252 P2ROUNDUP(oldsize, PAGESIZE); 253 /* 254 * Grow the size of the anon array to the new size 255 * Reserve the space for the growth here. 256 * We do it this way for now because this is how 257 * tmpfs used to do it, and this way the reserved 258 * space is alway equal to the file size. 259 * Alternatively, we could wait to reserve space 'til 260 * someone tries to store into one of the newly 261 * trunc'ed up pages. This would give us behavior 262 * identical to ufs; i.e., you could fail a 263 * fault on storing into a holey region of a file 264 * if there is no space in the filesystem to fill 265 * the hole at that time. 266 */ 267 /* 268 * tmp_resv calls anon_resv only if we're extending 269 * the file into a new page 270 */ 271 if (tmp_resv(tm, tp, delta, 272 (btopr(newsize) != btopr(oldsize)))) { 273 error = ENOSPC; 274 goto out; 275 } 276 tmpnode_growmap(tp, newsize); 277 tp->tn_size = newsize; 278 break; 279 } 280 281 /* Free anon pages if shrinking file over page boundary. */ 282 if (btopr(newsize) != btopr(oldsize)) { 283 pgcnt_t freed; 284 delta = P2ROUNDUP(oldsize, PAGESIZE) - 285 P2ROUNDUP(newsize, PAGESIZE); 286 freed = anon_pages(tp->tn_anon, btopr(newsize), 287 btopr(delta)); 288 tp->tn_nblocks -= freed; 289 anon_free(tp->tn_anon, btopr(newsize), delta); 290 tmp_unresv(tm, tp, delta); 291 } 292 293 /* 294 * Update the file size now to reflect the pages we just 295 * blew away as we're about to drop the 296 * contents lock to zero the partial page (which could 297 * re-enter tmpfs via getpage and try to reacquire the lock) 298 * Once we drop the lock, faulters can fill in holes in 299 * the file and if we haven't updated the size they 300 * may fill in holes that are beyond EOF, which will then 301 * never get cleared. 302 */ 303 tp->tn_size = newsize; 304 305 /* Zero new size of file to page boundary. */ 306 if (anon_get_ptr(tp->tn_anon, btop(newsize)) != NULL) { 307 size_t zlen; 308 309 zlen = PAGESIZE - ((ulong_t)newsize & PAGEOFFSET); 310 rw_exit(&tp->tn_contents); 311 pvn_vpzero(TNTOV(tp), (u_offset_t)newsize, zlen); 312 rw_enter(&tp->tn_contents, RW_WRITER); 313 } 314 315 if (newsize == 0) { 316 /* Delete anon array for tmpnode */ 317 ASSERT(tp->tn_nblocks == 0); 318 ASSERT(anon_get_ptr(tp->tn_anon, 0) == NULL); 319 ASSERT(!vn_has_cached_data(vp)); 320 321 anon_release(tp->tn_anon, tp->tn_asize); 322 tp->tn_anon = NULL; 323 tp->tn_asize = 0; 324 } 325 break; 326 case VLNK: 327 /* 328 * Don't do anything here 329 * tmp_inactive frees the memory 330 */ 331 if (newsize != 0) 332 error = EINVAL; 333 goto out; 334 case VDIR: 335 /* 336 * Remove all the directory entries under this directory. 337 */ 338 if (newsize != 0) { 339 error = EINVAL; 340 goto out; 341 } 342 tdirtrunc(tp); 343 ASSERT(tp->tn_nlink == 0); 344 break; 345 default: 346 goto out; 347 } 348 349 stamp_out: 350 gethrestime(&now); 351 tp->tn_mtime = now; 352 tp->tn_ctime = now; 353 out: 354 /* 355 * tmpnode_trunc() cannot fail when newsize == 0. 356 */ 357 ASSERT(error == 0 || newsize != 0); 358 return (error); 359 } 360