1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/param.h> 31 #include <sys/t_lock.h> 32 #include <sys/systm.h> 33 #include <sys/time.h> 34 #include <sys/sysmacros.h> 35 #include <sys/proc.h> 36 #include <sys/disp.h> 37 #include <sys/user.h> 38 #include <sys/time.h> 39 #include <sys/vfs.h> 40 #include <sys/vnode.h> 41 #include <sys/stat.h> 42 #include <sys/mode.h> 43 #include <sys/errno.h> 44 #include <sys/kmem.h> 45 #include <vm/seg.h> 46 #include <vm/seg_map.h> 47 #include <vm/anon.h> 48 #include <vm/page.h> 49 #include <vm/pvn.h> 50 #include <sys/fs/tmp.h> 51 #include <sys/fs/tmpnode.h> 52 #include <sys/debug.h> 53 #include <sys/cmn_err.h> 54 #include <sys/swap.h> 55 #include <sys/vtrace.h> 56 57 /* 58 * Reserve swap space for the size of the file. 59 * Called before growing a file (i.e. ftruncate, write) 60 * Returns 0 on success. 61 */ 62 int 63 tmp_resv( 64 struct tmount *tm, 65 struct tmpnode *tp, 66 size_t delta, /* size needed */ 67 int pagecreate) /* call anon_resv if set */ 68 { 69 pgcnt_t pages = btopr(delta); 70 71 ASSERT(RW_WRITE_HELD(&tp->tn_rwlock)); 72 ASSERT(tp->tn_type == VREG); 73 /* 74 * pagecreate is set only if we actually need to call anon_resv 75 * to reserve an additional page of anonymous memory. 76 * Since anon_resv always reserves a page at a time, 77 * it should only get called when we know we're growing the 78 * file into a new page or filling a hole. 79 * 80 * Deny if trying to reserve more than tmpfs can allocate 81 */ 82 if (pagecreate && ((tm->tm_anonmem + pages > tm->tm_anonmax) || 83 (!anon_checkspace(ptob(pages + tmpfs_minfree))) || 84 (anon_resv(delta) == 0))) { 85 return (1); 86 } 87 88 /* 89 * update statistics 90 */ 91 if (pagecreate) { 92 mutex_enter(&tm->tm_contents); 93 tm->tm_anonmem += pages; 94 mutex_exit(&tm->tm_contents); 95 96 TRACE_2(TR_FAC_VM, TR_ANON_TMPFS, "anon tmpfs:%p %lu", 97 tp, delta); 98 } 99 100 return (0); 101 } 102 103 /* 104 * tmp_unresv - called when truncating a file 105 * Only called if we're freeing at least pagesize bytes 106 * because anon_unresv does a btopr(delta) 107 */ 108 static void 109 tmp_unresv( 110 struct tmount *tm, 111 struct tmpnode *tp, 112 size_t delta) 113 { 114 ASSERT(RW_WRITE_HELD(&tp->tn_rwlock)); 115 ASSERT(tp->tn_type == VREG); 116 117 anon_unresv(delta); 118 119 mutex_enter(&tm->tm_contents); 120 tm->tm_anonmem -= btopr(delta); 121 mutex_exit(&tm->tm_contents); 122 123 TRACE_2(TR_FAC_VM, TR_ANON_TMPFS, "anon tmpfs:%p %lu", tp, delta); 124 } 125 126 #define TMP_INIT_SZ 128 127 128 /* 129 * Grow the anon pointer array to cover 'newsize' bytes plus slack. 130 */ 131 void 132 tmpnode_growmap(struct tmpnode *tp, ulong_t newsize) 133 { 134 pgcnt_t np = btopr(newsize); 135 136 ASSERT(RW_WRITE_HELD(&tp->tn_rwlock)); 137 ASSERT(RW_WRITE_HELD(&tp->tn_contents)); 138 ASSERT(tp->tn_type == VREG); 139 140 if (tp->tn_asize >= np) 141 return; 142 143 if (newsize > MAXOFF_T) 144 np = btopr(MAXOFF_T); 145 146 if (tp->tn_anon == NULL) { 147 tp->tn_anon = anon_create(MAX(np, TMP_INIT_SZ), ANON_SLEEP); 148 tp->tn_asize = tp->tn_anon->size; 149 return; 150 } 151 152 tp->tn_asize = anon_grow(tp->tn_anon, NULL, tp->tn_asize, 153 np - tp->tn_asize, ANON_SLEEP); 154 ASSERT(tp->tn_asize >= np); 155 } 156 157 /* 158 * Initialize a tmpnode and add it to file list under mount point. 159 */ 160 void 161 tmpnode_init(struct tmount *tm, struct tmpnode *t, vattr_t *vap, cred_t *cred) 162 { 163 struct vnode *vp; 164 timestruc_t now; 165 166 ASSERT(vap != NULL); 167 168 rw_init(&t->tn_rwlock, NULL, RW_DEFAULT, NULL); 169 mutex_init(&t->tn_tlock, NULL, MUTEX_DEFAULT, NULL); 170 t->tn_mode = MAKEIMODE(vap->va_type, vap->va_mode); 171 t->tn_mask = 0; 172 t->tn_type = vap->va_type; 173 t->tn_nodeid = (ino64_t)(uint32_t)((uintptr_t)t >> 3); 174 t->tn_nlink = 1; 175 t->tn_size = 0; 176 177 if (cred == NULL) { 178 t->tn_uid = vap->va_uid; 179 t->tn_gid = vap->va_gid; 180 } else { 181 t->tn_uid = crgetuid(cred); 182 t->tn_gid = crgetgid(cred); 183 } 184 185 t->tn_fsid = tm->tm_dev; 186 t->tn_rdev = vap->va_rdev; 187 t->tn_blksize = PAGESIZE; 188 t->tn_nblocks = 0; 189 gethrestime(&now); 190 t->tn_atime = now; 191 t->tn_mtime = now; 192 t->tn_ctime = now; 193 t->tn_seq = 0; 194 t->tn_dir = NULL; 195 196 t->tn_vnode = vn_alloc(KM_SLEEP); 197 vp = TNTOV(t); 198 vn_setops(vp, tmp_vnodeops); 199 vp->v_vfsp = tm->tm_vfsp; 200 vp->v_type = vap->va_type; 201 vp->v_rdev = vap->va_rdev; 202 vp->v_data = (caddr_t)t; 203 mutex_enter(&tm->tm_contents); 204 /* 205 * Increment the pseudo generation number for this tmpnode. 206 * Since tmpnodes are allocated and freed, there really is no 207 * particular generation number for a new tmpnode. Just fake it 208 * by using a counter in each file system. 209 */ 210 t->tn_gen = tm->tm_gen++; 211 212 /* 213 * Add new tmpnode to end of linked list of tmpnodes for this tmpfs 214 * Root directory is handled specially in tmp_mount. 215 */ 216 if (tm->tm_rootnode != (struct tmpnode *)NULL) { 217 t->tn_forw = NULL; 218 t->tn_back = tm->tm_rootnode->tn_back; 219 t->tn_back->tn_forw = tm->tm_rootnode->tn_back = t; 220 } 221 mutex_exit(&tm->tm_contents); 222 vn_exists(vp); 223 } 224 225 /* 226 * tmpnode_trunc - set length of tmpnode and deal with resources 227 */ 228 int 229 tmpnode_trunc( 230 struct tmount *tm, 231 struct tmpnode *tp, 232 ulong_t newsize) 233 { 234 size_t oldsize = tp->tn_size; 235 size_t delta; 236 struct vnode *vp = TNTOV(tp); 237 timestruc_t now; 238 int error = 0; 239 240 ASSERT(RW_WRITE_HELD(&tp->tn_rwlock)); 241 ASSERT(RW_WRITE_HELD(&tp->tn_contents)); 242 243 if (newsize == oldsize) { 244 /* Required by POSIX */ 245 goto stamp_out; 246 } 247 248 switch (tp->tn_type) { 249 case VREG: 250 /* Growing the file */ 251 if (newsize > oldsize) { 252 delta = P2ROUNDUP(newsize, PAGESIZE) - 253 P2ROUNDUP(oldsize, PAGESIZE); 254 /* 255 * Grow the size of the anon array to the new size 256 * Reserve the space for the growth here. 257 * We do it this way for now because this is how 258 * tmpfs used to do it, and this way the reserved 259 * space is alway equal to the file size. 260 * Alternatively, we could wait to reserve space 'til 261 * someone tries to store into one of the newly 262 * trunc'ed up pages. This would give us behavior 263 * identical to ufs; i.e., you could fail a 264 * fault on storing into a holey region of a file 265 * if there is no space in the filesystem to fill 266 * the hole at that time. 267 */ 268 /* 269 * tmp_resv calls anon_resv only if we're extending 270 * the file into a new page 271 */ 272 if (tmp_resv(tm, tp, delta, 273 (btopr(newsize) != btopr(oldsize)))) { 274 error = ENOSPC; 275 goto out; 276 } 277 tmpnode_growmap(tp, newsize); 278 tp->tn_size = newsize; 279 break; 280 } 281 282 /* Free anon pages if shrinking file over page boundary. */ 283 if (btopr(newsize) != btopr(oldsize)) { 284 pgcnt_t freed; 285 delta = P2ROUNDUP(oldsize, PAGESIZE) - 286 P2ROUNDUP(newsize, PAGESIZE); 287 freed = anon_pages(tp->tn_anon, btopr(newsize), 288 btopr(delta)); 289 tp->tn_nblocks -= freed; 290 anon_free(tp->tn_anon, btopr(newsize), delta); 291 tmp_unresv(tm, tp, delta); 292 } 293 294 /* 295 * Update the file size now to reflect the pages we just 296 * blew away as we're about to drop the 297 * contents lock to zero the partial page (which could 298 * re-enter tmpfs via getpage and try to reacquire the lock) 299 * Once we drop the lock, faulters can fill in holes in 300 * the file and if we haven't updated the size they 301 * may fill in holes that are beyond EOF, which will then 302 * never get cleared. 303 */ 304 tp->tn_size = newsize; 305 306 /* Zero new size of file to page boundary. */ 307 if (anon_get_ptr(tp->tn_anon, btop(newsize)) != NULL) { 308 size_t zlen; 309 310 zlen = PAGESIZE - ((ulong_t)newsize & PAGEOFFSET); 311 rw_exit(&tp->tn_contents); 312 pvn_vpzero(TNTOV(tp), (u_offset_t)newsize, zlen); 313 rw_enter(&tp->tn_contents, RW_WRITER); 314 } 315 316 if (newsize == 0) { 317 /* Delete anon array for tmpnode */ 318 ASSERT(tp->tn_nblocks == 0); 319 ASSERT(anon_get_ptr(tp->tn_anon, 0) == NULL); 320 ASSERT(!vn_has_cached_data(vp)); 321 322 anon_release(tp->tn_anon, tp->tn_asize); 323 tp->tn_anon = NULL; 324 tp->tn_asize = 0; 325 } 326 break; 327 case VLNK: 328 /* 329 * Don't do anything here 330 * tmp_inactive frees the memory 331 */ 332 if (newsize != 0) 333 error = EINVAL; 334 goto out; 335 case VDIR: 336 /* 337 * Remove all the directory entries under this directory. 338 */ 339 if (newsize != 0) { 340 error = EINVAL; 341 goto out; 342 } 343 tdirtrunc(tp); 344 ASSERT(tp->tn_nlink == 0); 345 break; 346 default: 347 goto out; 348 } 349 350 stamp_out: 351 gethrestime(&now); 352 tp->tn_mtime = now; 353 tp->tn_ctime = now; 354 out: 355 /* 356 * tmpnode_trunc() cannot fail when newsize == 0. 357 */ 358 ASSERT(error == 0 || newsize != 0); 359 return (error); 360 } 361