xref: /titanic_50/usr/src/uts/common/os/acct.c (revision 0a1278f26ea4b7c8c0285d4f2d6c5b680904aa01)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
26 /*	  All Rights Reserved  	*/
27 
28 
29 #include <sys/types.h>
30 #include <sys/sysmacros.h>
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/acct.h>
34 #include <sys/cred.h>
35 #include <sys/user.h>
36 #include <sys/errno.h>
37 #include <sys/file.h>
38 #include <sys/vnode.h>
39 #include <sys/debug.h>
40 #include <sys/proc.h>
41 #include <sys/resource.h>
42 #include <sys/session.h>
43 #include <sys/modctl.h>
44 #include <sys/syscall.h>
45 #include <sys/policy.h>
46 #include <sys/list.h>
47 #include <sys/time.h>
48 #include <sys/msacct.h>
49 #include <sys/zone.h>
50 
51 /*
52  * Each zone has its own accounting settings (on or off) and associated
53  * file.  The global zone is not special in this aspect; it will only
54  * generate records for processes that ran in the global zone.  We could
55  * allow the global zone to record all activity on the system, but there
56  * would be no way of knowing the zone in which the processes executed.
57  * sysacct() is thus virtualized to only act on the caller's zone.
58  */
59 struct acct_globals {
60 	struct acct	acctbuf;
61 	kmutex_t	aclock;
62 	struct vnode	*acctvp;
63 	list_node_t	aclink;
64 };
65 
66 /*
67  * We need a list of all accounting settings for all zones, so we can
68  * accurately determine if a file is in use for accounting (possibly by
69  * another zone).
70  */
71 static zone_key_t acct_zone_key;
72 static list_t acct_list;
73 kmutex_t acct_list_lock;
74 
75 static struct sysent acctsysent = {
76 	1,
77 	SE_NOUNLOAD | SE_ARGC | SE_32RVAL1,
78 	sysacct
79 };
80 
81 static struct modlsys modlsys = {
82 	&mod_syscallops, "acct(2) syscall", &acctsysent
83 };
84 
85 #ifdef _SYSCALL32_IMPL
86 static struct modlsys modlsys32 = {
87 	&mod_syscallops32, "32-bit acct(2) syscall", &acctsysent
88 };
89 #endif
90 
91 static struct modlinkage modlinkage = {
92 	MODREV_1,
93 	&modlsys,
94 #ifdef _SYSCALL32_IMPL
95 	&modlsys32,
96 #endif
97 	NULL
98 };
99 
100 /*ARGSUSED*/
101 static void *
102 acct_init(zoneid_t zoneid)
103 {
104 	struct acct_globals *ag;
105 
106 	ag = kmem_alloc(sizeof (*ag), KM_SLEEP);
107 	bzero(&ag->acctbuf, sizeof (ag->acctbuf));
108 	mutex_init(&ag->aclock, NULL, MUTEX_DEFAULT, NULL);
109 	ag->acctvp = NULL;
110 
111 	mutex_enter(&acct_list_lock);
112 	list_insert_tail(&acct_list, ag);
113 	mutex_exit(&acct_list_lock);
114 	return (ag);
115 }
116 
117 /* ARGSUSED */
118 static void
119 acct_shutdown(zoneid_t zoneid, void *arg)
120 {
121 	struct acct_globals *ag = arg;
122 
123 	mutex_enter(&ag->aclock);
124 	if (ag->acctvp) {
125 		/*
126 		 * This needs to be done as a shutdown callback, otherwise this
127 		 * held vnode may cause filesystems to be busy, and the zone
128 		 * shutdown operation to fail.
129 		 */
130 		(void) VOP_CLOSE(ag->acctvp, FWRITE, 1, (offset_t)0, kcred,
131 		    NULL);
132 		VN_RELE(ag->acctvp);
133 	}
134 	ag->acctvp = NULL;
135 	mutex_exit(&ag->aclock);
136 }
137 
138 /*ARGSUSED*/
139 static void
140 acct_fini(zoneid_t zoneid, void *arg)
141 {
142 	struct acct_globals *ag = arg;
143 
144 	mutex_enter(&acct_list_lock);
145 	list_remove(&acct_list, ag);
146 	mutex_exit(&acct_list_lock);
147 
148 	mutex_destroy(&ag->aclock);
149 	kmem_free(ag, sizeof (*ag));
150 }
151 
152 int
153 _init(void)
154 {
155 	int error;
156 
157 	mutex_init(&acct_list_lock, NULL, MUTEX_DEFAULT, NULL);
158 	list_create(&acct_list, sizeof (struct acct_globals),
159 	    offsetof(struct acct_globals, aclink));
160 	/*
161 	 * Using an initializer here wastes a bit of memory for zones that
162 	 * don't use accounting, but vastly simplifies the locking.
163 	 */
164 	zone_key_create(&acct_zone_key, acct_init, acct_shutdown, acct_fini);
165 	if ((error = mod_install(&modlinkage)) != 0) {
166 		(void) zone_key_delete(acct_zone_key);
167 		list_destroy(&acct_list);
168 		mutex_destroy(&acct_list_lock);
169 	}
170 	return (error);
171 }
172 
173 int
174 _info(struct modinfo *modinfop)
175 {
176 	return (mod_info(&modlinkage, modinfop));
177 }
178 
179 /*
180  * acct() is a "weak stub" routine called from exit().
181  * Once this module has been loaded, we refuse to allow
182  * it to unload - otherwise accounting would quietly
183  * cease.  See 1211661.  It's possible to make this module
184  * unloadable but it's substantially safer not to bother.
185  */
186 int
187 _fini(void)
188 {
189 	return (EBUSY);
190 }
191 
192 /*
193  * See if vp is in use by the accounting system on any zone.  This does a deep
194  * comparison of vnodes such that a file and a lofs "shadow" node of it will
195  * appear to be the same.
196  *
197  * If 'compare_vfs' is true, the function will do a comparison of vfs_t's
198  * instead (ie, is the vfs_t on which the vnode resides in use by the
199  * accounting system in any zone).
200  *
201  * Returns 1 if found (in use), 0 otherwise.
202  */
203 static int
204 acct_find(vnode_t *vp, boolean_t compare_vfs)
205 {
206 	struct acct_globals *ag;
207 	vnode_t *realvp;
208 
209 	ASSERT(MUTEX_HELD(&acct_list_lock));
210 	ASSERT(vp != NULL);
211 
212 	if (VOP_REALVP(vp, &realvp, NULL))
213 		realvp = vp;
214 	for (ag = list_head(&acct_list); ag != NULL;
215 	    ag = list_next(&acct_list, ag)) {
216 		vnode_t *racctvp;
217 		boolean_t found = B_FALSE;
218 
219 		mutex_enter(&ag->aclock);
220 		if (ag->acctvp == NULL) {
221 			mutex_exit(&ag->aclock);
222 			continue;
223 		}
224 		if (VOP_REALVP(ag->acctvp, &racctvp, NULL))
225 			racctvp = ag->acctvp;
226 		if (compare_vfs) {
227 			if (racctvp->v_vfsp == realvp->v_vfsp)
228 				found = B_TRUE;
229 		} else {
230 			if (VN_CMP(realvp, racctvp))
231 				found = B_TRUE;
232 		}
233 		mutex_exit(&ag->aclock);
234 		if (found)
235 			return (1);
236 	}
237 	return (0);
238 }
239 
240 /*
241  * Returns 1 if the vfs that vnode resides on is in use for the accounting
242  * subsystem, 0 otherwise.
243  */
244 int
245 acct_fs_in_use(vnode_t *vp)
246 {
247 	int found;
248 
249 	if (vp == NULL)
250 		return (0);
251 	mutex_enter(&acct_list_lock);
252 	found = acct_find(vp, B_TRUE);
253 	mutex_exit(&acct_list_lock);
254 	return (found);
255 }
256 
257 /*
258  * Perform process accounting functions.
259  */
260 int
261 sysacct(char *fname)
262 {
263 	struct acct_globals *ag;
264 	struct vnode *vp;
265 	int error = 0;
266 
267 	if (secpolicy_acct(CRED()) != 0)
268 		return (set_errno(EPERM));
269 
270 	ag = zone_getspecific(acct_zone_key, curproc->p_zone);
271 	ASSERT(ag != NULL);
272 
273 	if (fname == NULL) {
274 		/*
275 		 * Close the file and stop accounting.
276 		 */
277 		mutex_enter(&ag->aclock);
278 		vp = ag->acctvp;
279 		ag->acctvp = NULL;
280 		mutex_exit(&ag->aclock);
281 		if (vp) {
282 			error = VOP_CLOSE(vp, FWRITE, 1, (offset_t)0, CRED(),
283 			    NULL);
284 			VN_RELE(vp);
285 		}
286 		return (error == 0 ? 0 : set_errno(error));
287 	}
288 
289 	/*
290 	 * Either (a) open a new file and begin accounting -or- (b)
291 	 * switch accounting from an old to a new file.
292 	 *
293 	 * (Open the file without holding aclock in case it
294 	 * sleeps (holding the lock prevents process exit).)
295 	 */
296 	if ((error = vn_open(fname, UIO_USERSPACE, FWRITE,
297 	    0, &vp, (enum create)0, 0)) != 0) {
298 		/* SVID  compliance */
299 		if (error == EISDIR)
300 			error = EACCES;
301 		return (set_errno(error));
302 	}
303 
304 	if (vp->v_type != VREG) {
305 		error = EACCES;
306 	} else {
307 		mutex_enter(&acct_list_lock);
308 		if (acct_find(vp, B_FALSE)) {
309 			error = EBUSY;
310 		} else {
311 			mutex_enter(&ag->aclock);
312 			if (ag->acctvp) {
313 				vnode_t *oldvp;
314 
315 				/*
316 				 * close old acctvp, and point acct()
317 				 * at new file by swapping vp and acctvp
318 				 */
319 				oldvp = ag->acctvp;
320 				ag->acctvp = vp;
321 				vp = oldvp;
322 			} else {
323 				/*
324 				 * no existing file, start accounting ..
325 				 */
326 				ag->acctvp = vp;
327 				vp = NULL;
328 			}
329 			mutex_exit(&ag->aclock);
330 		}
331 		mutex_exit(&acct_list_lock);
332 	}
333 
334 	if (vp) {
335 		(void) VOP_CLOSE(vp, FWRITE, 1, (offset_t)0, CRED(), NULL);
336 		VN_RELE(vp);
337 	}
338 	return (error == 0 ? 0 : set_errno(error));
339 }
340 
341 /*
342  * Produce a pseudo-floating point representation
343  * with 3 bits base-8 exponent, 13 bits fraction.
344  */
345 static comp_t
346 acct_compress(ulong_t t)
347 {
348 	int exp = 0, round = 0;
349 
350 	while (t >= 8192) {
351 		exp++;
352 		round = t & 04;
353 		t >>= 3;
354 	}
355 	if (round) {
356 		t++;
357 		if (t >= 8192) {
358 			t >>= 3;
359 			exp++;
360 		}
361 	}
362 #ifdef _LP64
363 	if (exp > 7) {
364 		/* prevent wraparound */
365 		t = 8191;
366 		exp = 7;
367 	}
368 #endif
369 	return ((exp << 13) + t);
370 }
371 
372 /*
373  * On exit, write a record on the accounting file.
374  */
375 void
376 acct(char st)
377 {
378 	struct vnode *vp;
379 	struct cred *cr;
380 	struct proc *p;
381 	user_t *ua;
382 	struct vattr va;
383 	ssize_t resid = 0;
384 	int error;
385 	struct acct_globals *ag;
386 
387 	/*
388 	 * If sysacct module is loaded when zone is in down state then
389 	 * the following function can return NULL.
390 	 */
391 	ag = zone_getspecific(acct_zone_key, curproc->p_zone);
392 	if (ag == NULL)
393 		return;
394 
395 	mutex_enter(&ag->aclock);
396 	if ((vp = ag->acctvp) == NULL) {
397 		mutex_exit(&ag->aclock);
398 		return;
399 	}
400 
401 	/*
402 	 * This only gets called from exit after all lwp's have exited so no
403 	 * cred locking is needed.
404 	 */
405 	p = curproc;
406 	ua = PTOU(p);
407 	bcopy(ua->u_comm, ag->acctbuf.ac_comm, sizeof (ag->acctbuf.ac_comm));
408 	ag->acctbuf.ac_btime = ua->u_start.tv_sec;
409 	ag->acctbuf.ac_utime = acct_compress(NSEC_TO_TICK(p->p_acct[LMS_USER]));
410 	ag->acctbuf.ac_stime = acct_compress(
411 	    NSEC_TO_TICK(p->p_acct[LMS_SYSTEM] + p->p_acct[LMS_TRAP]));
412 	ag->acctbuf.ac_etime = acct_compress(ddi_get_lbolt() - ua->u_ticks);
413 	ag->acctbuf.ac_mem = acct_compress((ulong_t)ua->u_mem);
414 	ag->acctbuf.ac_io = acct_compress((ulong_t)p->p_ru.ioch);
415 	ag->acctbuf.ac_rw = acct_compress((ulong_t)(p->p_ru.inblock +
416 	    p->p_ru.oublock));
417 	cr = CRED();
418 	ag->acctbuf.ac_uid = crgetruid(cr);
419 	ag->acctbuf.ac_gid = crgetrgid(cr);
420 	(void) cmpldev(&ag->acctbuf.ac_tty, cttydev(p));
421 	ag->acctbuf.ac_stat = st;
422 	ag->acctbuf.ac_flag = (ua->u_acflag | AEXPND);
423 
424 	/*
425 	 * Save the size. If the write fails, reset the size to avoid
426 	 * corrupted acct files.
427 	 *
428 	 * Large Files: We deliberately prevent accounting files from
429 	 * exceeding the 2GB limit as none of the accounting commands are
430 	 * currently large file aware.
431 	 */
432 	va.va_mask = AT_SIZE;
433 	if (VOP_GETATTR(vp, &va, 0, kcred, NULL) == 0) {
434 		error = vn_rdwr(UIO_WRITE, vp, (caddr_t)&ag->acctbuf,
435 		    sizeof (ag->acctbuf), 0LL, UIO_SYSSPACE, FAPPEND,
436 		    (rlim64_t)MAXOFF32_T, kcred, &resid);
437 		if (error || resid)
438 			(void) VOP_SETATTR(vp, &va, 0, kcred, NULL);
439 	}
440 	mutex_exit(&ag->aclock);
441 }
442