1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #include <sys/types.h>
27 #include <sys/param.h>
28 #include <sys/t_lock.h>
29 #include <sys/systm.h>
30 #include <sys/time.h>
31 #include <sys/sysmacros.h>
32 #include <sys/proc.h>
33 #include <sys/disp.h>
34 #include <sys/user.h>
35 #include <sys/time.h>
36 #include <sys/vfs.h>
37 #include <sys/vnode.h>
38 #include <sys/stat.h>
39 #include <sys/mode.h>
40 #include <sys/errno.h>
41 #include <sys/kmem.h>
42 #include <vm/seg.h>
43 #include <vm/seg_map.h>
44 #include <vm/anon.h>
45 #include <vm/page.h>
46 #include <vm/pvn.h>
47 #include <sys/fs/tmp.h>
48 #include <sys/fs/tmpnode.h>
49 #include <sys/debug.h>
50 #include <sys/cmn_err.h>
51 #include <sys/swap.h>
52 #include <sys/vtrace.h>
53
54 /*
55 * Reserve swap space for the size of the file.
56 * Called before growing a file (i.e. ftruncate, write)
57 * Returns 0 on success.
58 */
59 int
tmp_resv(struct tmount * tm,struct tmpnode * tp,size_t delta,int pagecreate)60 tmp_resv(
61 struct tmount *tm,
62 struct tmpnode *tp,
63 size_t delta, /* size needed */
64 int pagecreate) /* call anon_resv if set */
65 {
66 pgcnt_t pages = btopr(delta);
67 zone_t *zone;
68
69 ASSERT(RW_WRITE_HELD(&tp->tn_rwlock));
70 ASSERT(tp->tn_type == VREG);
71 /*
72 * pagecreate is set only if we actually need to call anon_resv
73 * to reserve an additional page of anonymous memory.
74 * Since anon_resv always reserves a page at a time,
75 * it should only get called when we know we're growing the
76 * file into a new page or filling a hole.
77 *
78 * Deny if trying to reserve more than tmpfs can allocate
79 */
80 zone = tm->tm_vfsp->vfs_zone;
81 if (pagecreate && ((tm->tm_anonmem + pages > tm->tm_anonmax) ||
82 (!anon_checkspace(ptob(pages + tmpfs_minfree), zone)) ||
83 (anon_try_resv_zone(delta, zone) == 0))) {
84 return (1);
85 }
86
87 /*
88 * update statistics
89 */
90 if (pagecreate) {
91 mutex_enter(&tm->tm_contents);
92 tm->tm_anonmem += pages;
93 mutex_exit(&tm->tm_contents);
94
95 TRACE_2(TR_FAC_VM, TR_ANON_TMPFS, "anon tmpfs:%p %lu",
96 tp, delta);
97 }
98
99 return (0);
100 }
101
102 /*
103 * tmp_unresv - called when truncating a file
104 * Only called if we're freeing at least pagesize bytes
105 * because anon_unresv does a btopr(delta)
106 */
107 static void
tmp_unresv(struct tmount * tm,struct tmpnode * tp,size_t delta)108 tmp_unresv(
109 struct tmount *tm,
110 struct tmpnode *tp,
111 size_t delta)
112 {
113 ASSERT(RW_WRITE_HELD(&tp->tn_rwlock));
114 ASSERT(tp->tn_type == VREG);
115
116 anon_unresv_zone(delta, tm->tm_vfsp->vfs_zone);
117
118 mutex_enter(&tm->tm_contents);
119 tm->tm_anonmem -= btopr(delta);
120 mutex_exit(&tm->tm_contents);
121
122 TRACE_2(TR_FAC_VM, TR_ANON_TMPFS, "anon tmpfs:%p %lu", tp, delta);
123 }
124
125 #define TMP_INIT_SZ 128
126
127 /*
128 * Grow the anon pointer array to cover 'newsize' bytes plus slack.
129 */
130 void
tmpnode_growmap(struct tmpnode * tp,ulong_t newsize)131 tmpnode_growmap(struct tmpnode *tp, ulong_t newsize)
132 {
133 pgcnt_t np = btopr(newsize);
134
135 ASSERT(RW_WRITE_HELD(&tp->tn_rwlock));
136 ASSERT(RW_WRITE_HELD(&tp->tn_contents));
137 ASSERT(tp->tn_type == VREG);
138
139 if (tp->tn_asize >= np)
140 return;
141
142 if (newsize > MAXOFF_T)
143 np = btopr((u_offset_t)MAXOFF_T);
144
145 if (tp->tn_anon == NULL) {
146 tp->tn_anon = anon_create(MAX(np, TMP_INIT_SZ), ANON_SLEEP);
147 tp->tn_asize = tp->tn_anon->size;
148 return;
149 }
150
151 tp->tn_asize = anon_grow(tp->tn_anon, NULL, tp->tn_asize,
152 np - tp->tn_asize, ANON_SLEEP);
153 ASSERT(tp->tn_asize >= np);
154 }
155
156 /*
157 * Initialize a tmpnode and add it to file list under mount point.
158 */
159 void
tmpnode_init(struct tmount * tm,struct tmpnode * t,vattr_t * vap,cred_t * cred)160 tmpnode_init(struct tmount *tm, struct tmpnode *t, vattr_t *vap, cred_t *cred)
161 {
162 struct vnode *vp;
163 timestruc_t now;
164
165 ASSERT(vap != NULL);
166
167 rw_init(&t->tn_rwlock, NULL, RW_DEFAULT, NULL);
168 mutex_init(&t->tn_tlock, NULL, MUTEX_DEFAULT, NULL);
169 t->tn_mode = MAKEIMODE(vap->va_type, vap->va_mode);
170 t->tn_mask = 0;
171 t->tn_type = vap->va_type;
172 t->tn_nodeid = (ino64_t)(uint32_t)((uintptr_t)t >> 3);
173 t->tn_nlink = 1;
174 t->tn_size = 0;
175
176 if (cred == NULL) {
177 t->tn_uid = vap->va_uid;
178 t->tn_gid = vap->va_gid;
179 } else {
180 t->tn_uid = crgetuid(cred);
181 t->tn_gid = crgetgid(cred);
182 }
183
184 t->tn_fsid = tm->tm_dev;
185 t->tn_rdev = vap->va_rdev;
186 t->tn_blksize = PAGESIZE;
187 t->tn_nblocks = 0;
188 gethrestime(&now);
189 t->tn_atime = now;
190 t->tn_mtime = now;
191 t->tn_ctime = now;
192 t->tn_seq = 0;
193 t->tn_dir = NULL;
194
195 t->tn_vnode = vn_alloc(KM_SLEEP);
196 vp = TNTOV(t);
197 vn_setops(vp, tmp_vnodeops);
198 vp->v_vfsp = tm->tm_vfsp;
199 vp->v_type = vap->va_type;
200 vp->v_rdev = vap->va_rdev;
201 vp->v_data = (caddr_t)t;
202 mutex_enter(&tm->tm_contents);
203 /*
204 * Increment the pseudo generation number for this tmpnode.
205 * Since tmpnodes are allocated and freed, there really is no
206 * particular generation number for a new tmpnode. Just fake it
207 * by using a counter in each file system.
208 */
209 t->tn_gen = tm->tm_gen++;
210
211 /*
212 * Add new tmpnode to end of linked list of tmpnodes for this tmpfs
213 * Root directory is handled specially in tmp_mount.
214 */
215 if (tm->tm_rootnode != (struct tmpnode *)NULL) {
216 t->tn_forw = NULL;
217 t->tn_back = tm->tm_rootnode->tn_back;
218 t->tn_back->tn_forw = tm->tm_rootnode->tn_back = t;
219 }
220 mutex_exit(&tm->tm_contents);
221 vn_exists(vp);
222 }
223
224 /*
225 * tmpnode_trunc - set length of tmpnode and deal with resources
226 */
227 int
tmpnode_trunc(struct tmount * tm,struct tmpnode * tp,ulong_t newsize)228 tmpnode_trunc(
229 struct tmount *tm,
230 struct tmpnode *tp,
231 ulong_t newsize)
232 {
233 size_t oldsize = tp->tn_size;
234 size_t delta;
235 struct vnode *vp = TNTOV(tp);
236 timestruc_t now;
237 int error = 0;
238
239 ASSERT(RW_WRITE_HELD(&tp->tn_rwlock));
240 ASSERT(RW_WRITE_HELD(&tp->tn_contents));
241
242 if (newsize == oldsize) {
243 /* Required by POSIX */
244 goto stamp_out;
245 }
246
247 switch (tp->tn_type) {
248 case VREG:
249 /* Growing the file */
250 if (newsize > oldsize) {
251 delta = P2ROUNDUP(newsize, PAGESIZE) -
252 P2ROUNDUP(oldsize, PAGESIZE);
253 /*
254 * Grow the size of the anon array to the new size
255 * Reserve the space for the growth here.
256 * We do it this way for now because this is how
257 * tmpfs used to do it, and this way the reserved
258 * space is alway equal to the file size.
259 * Alternatively, we could wait to reserve space 'til
260 * someone tries to store into one of the newly
261 * trunc'ed up pages. This would give us behavior
262 * identical to ufs; i.e., you could fail a
263 * fault on storing into a holey region of a file
264 * if there is no space in the filesystem to fill
265 * the hole at that time.
266 */
267 /*
268 * tmp_resv calls anon_resv only if we're extending
269 * the file into a new page
270 */
271 if (tmp_resv(tm, tp, delta,
272 (btopr(newsize) != btopr(oldsize)))) {
273 error = ENOSPC;
274 goto out;
275 }
276 tmpnode_growmap(tp, newsize);
277 tp->tn_size = newsize;
278 break;
279 }
280
281 /* Free anon pages if shrinking file over page boundary. */
282 if (btopr(newsize) != btopr(oldsize)) {
283 pgcnt_t freed;
284 delta = P2ROUNDUP(oldsize, PAGESIZE) -
285 P2ROUNDUP(newsize, PAGESIZE);
286 freed = anon_pages(tp->tn_anon, btopr(newsize),
287 btopr(delta));
288 tp->tn_nblocks -= freed;
289 anon_free(tp->tn_anon, btopr(newsize), delta);
290 tmp_unresv(tm, tp, delta);
291 }
292
293 /*
294 * Update the file size now to reflect the pages we just
295 * blew away as we're about to drop the
296 * contents lock to zero the partial page (which could
297 * re-enter tmpfs via getpage and try to reacquire the lock)
298 * Once we drop the lock, faulters can fill in holes in
299 * the file and if we haven't updated the size they
300 * may fill in holes that are beyond EOF, which will then
301 * never get cleared.
302 */
303 tp->tn_size = newsize;
304
305 /* Zero new size of file to page boundary. */
306 if (anon_get_ptr(tp->tn_anon, btop(newsize)) != NULL) {
307 size_t zlen;
308
309 zlen = PAGESIZE - ((ulong_t)newsize & PAGEOFFSET);
310 rw_exit(&tp->tn_contents);
311 pvn_vpzero(TNTOV(tp), (u_offset_t)newsize, zlen);
312 rw_enter(&tp->tn_contents, RW_WRITER);
313 }
314
315 if (newsize == 0) {
316 /* Delete anon array for tmpnode */
317 ASSERT(tp->tn_nblocks == 0);
318 ASSERT(anon_get_ptr(tp->tn_anon, 0) == NULL);
319 ASSERT(!vn_has_cached_data(vp));
320
321 anon_release(tp->tn_anon, tp->tn_asize);
322 tp->tn_anon = NULL;
323 tp->tn_asize = 0;
324 }
325 break;
326 case VLNK:
327 /*
328 * Don't do anything here
329 * tmp_inactive frees the memory
330 */
331 if (newsize != 0)
332 error = EINVAL;
333 goto out;
334 case VDIR:
335 /*
336 * Remove all the directory entries under this directory.
337 */
338 if (newsize != 0) {
339 error = EINVAL;
340 goto out;
341 }
342 tdirtrunc(tp);
343 ASSERT(tp->tn_nlink == 0);
344 break;
345 default:
346 goto out;
347 }
348
349 stamp_out:
350 gethrestime(&now);
351 tp->tn_mtime = now;
352 tp->tn_ctime = now;
353 out:
354 /*
355 * tmpnode_trunc() cannot fail when newsize == 0.
356 */
357 ASSERT(error == 0 || newsize != 0);
358 return (error);
359 }
360