1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/types.h> 29 #include <sys/param.h> 30 #include <sys/t_lock.h> 31 #include <sys/systm.h> 32 #include <sys/time.h> 33 #include <sys/sysmacros.h> 34 #include <sys/proc.h> 35 #include <sys/disp.h> 36 #include <sys/user.h> 37 #include <sys/time.h> 38 #include <sys/vfs.h> 39 #include <sys/vnode.h> 40 #include <sys/stat.h> 41 #include <sys/mode.h> 42 #include <sys/errno.h> 43 #include <sys/kmem.h> 44 #include <vm/seg.h> 45 #include <vm/seg_map.h> 46 #include <vm/anon.h> 47 #include <vm/page.h> 48 #include <vm/pvn.h> 49 #include <sys/fs/tmp.h> 50 #include <sys/fs/tmpnode.h> 51 #include <sys/debug.h> 52 #include <sys/cmn_err.h> 53 #include <sys/swap.h> 54 #include <sys/vtrace.h> 55 56 /* 57 * Reserve swap space for the size of the file. 58 * Called before growing a file (i.e. ftruncate, write) 59 * Returns 0 on success. 60 */ 61 int 62 tmp_resv( 63 struct tmount *tm, 64 struct tmpnode *tp, 65 size_t delta, /* size needed */ 66 int pagecreate) /* call anon_resv if set */ 67 { 68 pgcnt_t pages = btopr(delta); 69 zone_t *zone; 70 71 ASSERT(RW_WRITE_HELD(&tp->tn_rwlock)); 72 ASSERT(tp->tn_type == VREG); 73 /* 74 * pagecreate is set only if we actually need to call anon_resv 75 * to reserve an additional page of anonymous memory. 76 * Since anon_resv always reserves a page at a time, 77 * it should only get called when we know we're growing the 78 * file into a new page or filling a hole. 79 * 80 * Deny if trying to reserve more than tmpfs can allocate 81 */ 82 zone = tm->tm_vfsp->vfs_zone; 83 if (pagecreate && ((tm->tm_anonmem + pages > tm->tm_anonmax) || 84 (!anon_checkspace(ptob(pages + tmpfs_minfree), zone)) || 85 (anon_try_resv_zone(delta, zone) == 0))) { 86 return (1); 87 } 88 89 /* 90 * update statistics 91 */ 92 if (pagecreate) { 93 mutex_enter(&tm->tm_contents); 94 tm->tm_anonmem += pages; 95 mutex_exit(&tm->tm_contents); 96 97 TRACE_2(TR_FAC_VM, TR_ANON_TMPFS, "anon tmpfs:%p %lu", 98 tp, delta); 99 } 100 101 return (0); 102 } 103 104 /* 105 * tmp_unresv - called when truncating a file 106 * Only called if we're freeing at least pagesize bytes 107 * because anon_unresv does a btopr(delta) 108 */ 109 static void 110 tmp_unresv( 111 struct tmount *tm, 112 struct tmpnode *tp, 113 size_t delta) 114 { 115 ASSERT(RW_WRITE_HELD(&tp->tn_rwlock)); 116 ASSERT(tp->tn_type == VREG); 117 118 anon_unresv_zone(delta, tm->tm_vfsp->vfs_zone); 119 120 mutex_enter(&tm->tm_contents); 121 tm->tm_anonmem -= btopr(delta); 122 mutex_exit(&tm->tm_contents); 123 124 TRACE_2(TR_FAC_VM, TR_ANON_TMPFS, "anon tmpfs:%p %lu", tp, delta); 125 } 126 127 #define TMP_INIT_SZ 128 128 129 /* 130 * Grow the anon pointer array to cover 'newsize' bytes plus slack. 131 */ 132 void 133 tmpnode_growmap(struct tmpnode *tp, ulong_t newsize) 134 { 135 pgcnt_t np = btopr(newsize); 136 137 ASSERT(RW_WRITE_HELD(&tp->tn_rwlock)); 138 ASSERT(RW_WRITE_HELD(&tp->tn_contents)); 139 ASSERT(tp->tn_type == VREG); 140 141 if (tp->tn_asize >= np) 142 return; 143 144 if (newsize > MAXOFF_T) 145 np = btopr(MAXOFF_T); 146 147 if (tp->tn_anon == NULL) { 148 tp->tn_anon = anon_create(MAX(np, TMP_INIT_SZ), ANON_SLEEP); 149 tp->tn_asize = tp->tn_anon->size; 150 return; 151 } 152 153 tp->tn_asize = anon_grow(tp->tn_anon, NULL, tp->tn_asize, 154 np - tp->tn_asize, ANON_SLEEP); 155 ASSERT(tp->tn_asize >= np); 156 } 157 158 /* 159 * Initialize a tmpnode and add it to file list under mount point. 160 */ 161 void 162 tmpnode_init(struct tmount *tm, struct tmpnode *t, vattr_t *vap, cred_t *cred) 163 { 164 struct vnode *vp; 165 timestruc_t now; 166 167 ASSERT(vap != NULL); 168 169 rw_init(&t->tn_rwlock, NULL, RW_DEFAULT, NULL); 170 mutex_init(&t->tn_tlock, NULL, MUTEX_DEFAULT, NULL); 171 t->tn_mode = MAKEIMODE(vap->va_type, vap->va_mode); 172 t->tn_mask = 0; 173 t->tn_type = vap->va_type; 174 t->tn_nodeid = (ino64_t)(uint32_t)((uintptr_t)t >> 3); 175 t->tn_nlink = 1; 176 t->tn_size = 0; 177 178 if (cred == NULL) { 179 t->tn_uid = vap->va_uid; 180 t->tn_gid = vap->va_gid; 181 } else { 182 t->tn_uid = crgetuid(cred); 183 t->tn_gid = crgetgid(cred); 184 } 185 186 t->tn_fsid = tm->tm_dev; 187 t->tn_rdev = vap->va_rdev; 188 t->tn_blksize = PAGESIZE; 189 t->tn_nblocks = 0; 190 gethrestime(&now); 191 t->tn_atime = now; 192 t->tn_mtime = now; 193 t->tn_ctime = now; 194 t->tn_seq = 0; 195 t->tn_dir = NULL; 196 197 t->tn_vnode = vn_alloc(KM_SLEEP); 198 vp = TNTOV(t); 199 vn_setops(vp, tmp_vnodeops); 200 vp->v_vfsp = tm->tm_vfsp; 201 vp->v_type = vap->va_type; 202 vp->v_rdev = vap->va_rdev; 203 vp->v_data = (caddr_t)t; 204 mutex_enter(&tm->tm_contents); 205 /* 206 * Increment the pseudo generation number for this tmpnode. 207 * Since tmpnodes are allocated and freed, there really is no 208 * particular generation number for a new tmpnode. Just fake it 209 * by using a counter in each file system. 210 */ 211 t->tn_gen = tm->tm_gen++; 212 213 /* 214 * Add new tmpnode to end of linked list of tmpnodes for this tmpfs 215 * Root directory is handled specially in tmp_mount. 216 */ 217 if (tm->tm_rootnode != (struct tmpnode *)NULL) { 218 t->tn_forw = NULL; 219 t->tn_back = tm->tm_rootnode->tn_back; 220 t->tn_back->tn_forw = tm->tm_rootnode->tn_back = t; 221 } 222 mutex_exit(&tm->tm_contents); 223 vn_exists(vp); 224 } 225 226 /* 227 * tmpnode_trunc - set length of tmpnode and deal with resources 228 */ 229 int 230 tmpnode_trunc( 231 struct tmount *tm, 232 struct tmpnode *tp, 233 ulong_t newsize) 234 { 235 size_t oldsize = tp->tn_size; 236 size_t delta; 237 struct vnode *vp = TNTOV(tp); 238 timestruc_t now; 239 int error = 0; 240 241 ASSERT(RW_WRITE_HELD(&tp->tn_rwlock)); 242 ASSERT(RW_WRITE_HELD(&tp->tn_contents)); 243 244 if (newsize == oldsize) { 245 /* Required by POSIX */ 246 goto stamp_out; 247 } 248 249 switch (tp->tn_type) { 250 case VREG: 251 /* Growing the file */ 252 if (newsize > oldsize) { 253 delta = P2ROUNDUP(newsize, PAGESIZE) - 254 P2ROUNDUP(oldsize, PAGESIZE); 255 /* 256 * Grow the size of the anon array to the new size 257 * Reserve the space for the growth here. 258 * We do it this way for now because this is how 259 * tmpfs used to do it, and this way the reserved 260 * space is alway equal to the file size. 261 * Alternatively, we could wait to reserve space 'til 262 * someone tries to store into one of the newly 263 * trunc'ed up pages. This would give us behavior 264 * identical to ufs; i.e., you could fail a 265 * fault on storing into a holey region of a file 266 * if there is no space in the filesystem to fill 267 * the hole at that time. 268 */ 269 /* 270 * tmp_resv calls anon_resv only if we're extending 271 * the file into a new page 272 */ 273 if (tmp_resv(tm, tp, delta, 274 (btopr(newsize) != btopr(oldsize)))) { 275 error = ENOSPC; 276 goto out; 277 } 278 tmpnode_growmap(tp, newsize); 279 tp->tn_size = newsize; 280 break; 281 } 282 283 /* Free anon pages if shrinking file over page boundary. */ 284 if (btopr(newsize) != btopr(oldsize)) { 285 pgcnt_t freed; 286 delta = P2ROUNDUP(oldsize, PAGESIZE) - 287 P2ROUNDUP(newsize, PAGESIZE); 288 freed = anon_pages(tp->tn_anon, btopr(newsize), 289 btopr(delta)); 290 tp->tn_nblocks -= freed; 291 anon_free(tp->tn_anon, btopr(newsize), delta); 292 tmp_unresv(tm, tp, delta); 293 } 294 295 /* 296 * Update the file size now to reflect the pages we just 297 * blew away as we're about to drop the 298 * contents lock to zero the partial page (which could 299 * re-enter tmpfs via getpage and try to reacquire the lock) 300 * Once we drop the lock, faulters can fill in holes in 301 * the file and if we haven't updated the size they 302 * may fill in holes that are beyond EOF, which will then 303 * never get cleared. 304 */ 305 tp->tn_size = newsize; 306 307 /* Zero new size of file to page boundary. */ 308 if (anon_get_ptr(tp->tn_anon, btop(newsize)) != NULL) { 309 size_t zlen; 310 311 zlen = PAGESIZE - ((ulong_t)newsize & PAGEOFFSET); 312 rw_exit(&tp->tn_contents); 313 pvn_vpzero(TNTOV(tp), (u_offset_t)newsize, zlen); 314 rw_enter(&tp->tn_contents, RW_WRITER); 315 } 316 317 if (newsize == 0) { 318 /* Delete anon array for tmpnode */ 319 ASSERT(tp->tn_nblocks == 0); 320 ASSERT(anon_get_ptr(tp->tn_anon, 0) == NULL); 321 ASSERT(!vn_has_cached_data(vp)); 322 323 anon_release(tp->tn_anon, tp->tn_asize); 324 tp->tn_anon = NULL; 325 tp->tn_asize = 0; 326 } 327 break; 328 case VLNK: 329 /* 330 * Don't do anything here 331 * tmp_inactive frees the memory 332 */ 333 if (newsize != 0) 334 error = EINVAL; 335 goto out; 336 case VDIR: 337 /* 338 * Remove all the directory entries under this directory. 339 */ 340 if (newsize != 0) { 341 error = EINVAL; 342 goto out; 343 } 344 tdirtrunc(tp); 345 ASSERT(tp->tn_nlink == 0); 346 break; 347 default: 348 goto out; 349 } 350 351 stamp_out: 352 gethrestime(&now); 353 tp->tn_mtime = now; 354 tp->tn_ctime = now; 355 out: 356 /* 357 * tmpnode_trunc() cannot fail when newsize == 0. 358 */ 359 ASSERT(error == 0 || newsize != 0); 360 return (error); 361 } 362