1 /*- 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Copyright (c) 1994 Christopher G. Demetriou 11 * Copyright (c) 2005 Robert N. M. Watson 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by the University of 24 * California, Berkeley and its contributors. 25 * 4. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 * 41 * @(#)kern_acct.c 8.1 (Berkeley) 6/14/93 42 */ 43 44 #include <sys/cdefs.h> 45 __FBSDID("$FreeBSD$"); 46 47 #include "opt_mac.h" 48 49 #include <sys/param.h> 50 #include <sys/systm.h> 51 #include <sys/lock.h> 52 #include <sys/mutex.h> 53 #include <sys/sysproto.h> 54 #include <sys/proc.h> 55 #include <sys/mac.h> 56 #include <sys/mount.h> 57 #include <sys/vnode.h> 58 #include <sys/fcntl.h> 59 #include <sys/syslog.h> 60 #include <sys/kernel.h> 61 #include <sys/sx.h> 62 #include <sys/sysent.h> 63 #include <sys/sysctl.h> 64 #include <sys/namei.h> 65 #include <sys/acct.h> 66 #include <sys/resourcevar.h> 67 #include <sys/tty.h> 68 69 /* 70 * The routines implemented in this file are described in: 71 * Leffler, et al.: The Design and Implementation of the 4.3BSD 72 * UNIX Operating System (Addison Welley, 1989) 73 * on pages 62-63. 74 * 75 * Arguably, to simplify accounting operations, this mechanism should 76 * be replaced by one in which an accounting log file (similar to /dev/klog) 77 * is read by a user process, etc. However, that has its own problems. 78 */ 79 80 /* 81 * Internal accounting functions. 82 * The former's operation is described in Leffler, et al., and the latter 83 * was provided by UCB with the 4.4BSD-Lite release 84 */ 85 static comp_t encode_comp_t(u_long, u_long); 86 static void acctwatch(void *); 87 88 /* 89 * Accounting callout used for periodic scheduling of acctwatch. 90 */ 91 static struct callout acctwatch_callout; 92 93 /* 94 * Accounting vnode pointer, saved vnode pointer, and flags for each. 95 * acct_sx protects against changes to the active vnode and credentials 96 * while accounting records are being committed to disk. 97 */ 98 static int acct_suspended; 99 static struct vnode *acct_vp; 100 static struct ucred *acct_cred; 101 static int acct_flags; 102 static struct sx acct_sx; 103 104 SX_SYSINIT(acct, &acct_sx, "acct_sx"); 105 106 /* 107 * Values associated with enabling and disabling accounting 108 */ 109 static int acctsuspend = 2; /* stop accounting when < 2% free space left */ 110 SYSCTL_INT(_kern, OID_AUTO, acct_suspend, CTLFLAG_RW, 111 &acctsuspend, 0, "percentage of free disk space below which accounting stops"); 112 113 static int acctresume = 4; /* resume when free space risen to > 4% */ 114 SYSCTL_INT(_kern, OID_AUTO, acct_resume, CTLFLAG_RW, 115 &acctresume, 0, "percentage of free disk space above which accounting resumes"); 116 117 static int acctchkfreq = 15; /* frequency (in seconds) to check space */ 118 SYSCTL_INT(_kern, OID_AUTO, acct_chkfreq, CTLFLAG_RW, 119 &acctchkfreq, 0, "frequency for checking the free space"); 120 121 SYSCTL_INT(_kern, OID_AUTO, acct_suspended, CTLFLAG_RD, &acct_suspended, 0, 122 "Accounting suspended or not"); 123 124 /* 125 * Accounting system call. Written based on the specification and 126 * previous implementation done by Mark Tinguely. 127 * 128 * MPSAFE 129 */ 130 int 131 acct(struct thread *td, struct acct_args *uap) 132 { 133 struct nameidata nd; 134 int error, flags; 135 136 /* Make sure that the caller is root. */ 137 error = suser(td); 138 if (error) 139 return (error); 140 141 /* 142 * If accounting is to be started to a file, open that file for 143 * appending and make sure it's a 'normal'. While we could 144 * conditionally acquire Giant here, we're actually interacting with 145 * vnodes from possibly two file systems, making the logic a bit 146 * complicated. For now, use Giant unconditionally. 147 */ 148 mtx_lock(&Giant); 149 if (uap->path != NULL) { 150 NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, uap->path, td); 151 flags = FWRITE | O_APPEND; 152 error = vn_open(&nd, &flags, 0, -1); 153 if (error) 154 goto done; 155 NDFREE(&nd, NDF_ONLY_PNBUF); 156 #ifdef MAC 157 error = mac_check_system_acct(td->td_ucred, nd.ni_vp); 158 if (error) { 159 VOP_UNLOCK(nd.ni_vp, 0, td); 160 vn_close(nd.ni_vp, flags, td->td_ucred, td); 161 goto done; 162 } 163 #endif 164 VOP_UNLOCK(nd.ni_vp, 0, td); 165 if (nd.ni_vp->v_type != VREG) { 166 vn_close(nd.ni_vp, flags, td->td_ucred, td); 167 error = EACCES; 168 goto done; 169 } 170 #ifdef MAC 171 } else { 172 error = mac_check_system_acct(td->td_ucred, NULL); 173 if (error) 174 goto done; 175 #endif 176 } 177 178 /* 179 * Disallow concurrent access to the accounting vnode while we swap 180 * it out, in order to prevent access after close. 181 */ 182 sx_xlock(&acct_sx); 183 184 /* 185 * If accounting was previously enabled, kill the old space-watcher, 186 * close the file, and (if no new file was specified, leave). Reset 187 * the suspended state regardless of whether accounting remains 188 * enabled. 189 */ 190 acct_suspended = 0; 191 if (acct_vp != NULL) { 192 callout_stop(&acctwatch_callout); 193 error = vn_close(acct_vp, acct_flags, acct_cred, td); 194 crfree(acct_cred); 195 acct_vp = NULL; 196 acct_cred = NULL; 197 acct_flags = 0; 198 log(LOG_NOTICE, "Accounting disabled\n"); 199 } 200 if (uap->path == NULL) { 201 sx_xunlock(&acct_sx); 202 goto done; 203 } 204 205 /* 206 * Save the new accounting file vnode, and schedule the new 207 * free space watcher. 208 */ 209 acct_vp = nd.ni_vp; 210 acct_cred = crhold(td->td_ucred); 211 acct_flags = flags; 212 callout_init(&acctwatch_callout, CALLOUT_MPSAFE); 213 sx_xunlock(&acct_sx); 214 log(LOG_NOTICE, "Accounting enabled\n"); 215 acctwatch(NULL); 216 done: 217 mtx_unlock(&Giant); 218 return (error); 219 } 220 221 /* 222 * Write out process accounting information, on process exit. 223 * Data to be written out is specified in Leffler, et al. 224 * and are enumerated below. (They're also noted in the system 225 * "acct.h" header file.) 226 */ 227 int 228 acct_process(struct thread *td) 229 { 230 struct acct acct; 231 struct timeval ut, st, tmp; 232 struct plimit *newlim, *oldlim; 233 struct proc *p; 234 struct rusage *r; 235 int t, ret, vfslocked; 236 237 /* 238 * Lockless check of accounting condition before doing the hard 239 * work. 240 */ 241 if (acct_vp == NULL || acct_suspended) 242 return (0); 243 244 sx_slock(&acct_sx); 245 246 /* 247 * If accounting isn't enabled, don't bother. Have to check again 248 * once we own the lock in case we raced with disabling of accounting 249 * by another thread. 250 */ 251 if (acct_vp == NULL || acct_suspended) { 252 sx_sunlock(&acct_sx); 253 return (0); 254 } 255 256 p = td->td_proc; 257 258 /* 259 * Get process accounting information. 260 */ 261 262 PROC_LOCK(p); 263 /* (1) The name of the command that ran */ 264 bcopy(p->p_comm, acct.ac_comm, sizeof acct.ac_comm); 265 266 /* (2) The amount of user and system time that was used */ 267 calcru(p, &ut, &st); 268 acct.ac_utime = encode_comp_t(ut.tv_sec, ut.tv_usec); 269 acct.ac_stime = encode_comp_t(st.tv_sec, st.tv_usec); 270 271 /* (3) The elapsed time the command ran (and its starting time) */ 272 tmp = boottime; 273 timevaladd(&tmp, &p->p_stats->p_start); 274 acct.ac_btime = tmp.tv_sec; 275 microuptime(&tmp); 276 timevalsub(&tmp, &p->p_stats->p_start); 277 acct.ac_etime = encode_comp_t(tmp.tv_sec, tmp.tv_usec); 278 279 /* (4) The average amount of memory used */ 280 r = &p->p_stats->p_ru; 281 tmp = ut; 282 timevaladd(&tmp, &st); 283 t = tmp.tv_sec * hz + tmp.tv_usec / tick; 284 if (t) 285 acct.ac_mem = (r->ru_ixrss + r->ru_idrss + r->ru_isrss) / t; 286 else 287 acct.ac_mem = 0; 288 289 /* (5) The number of disk I/O operations done */ 290 acct.ac_io = encode_comp_t(r->ru_inblock + r->ru_oublock, 0); 291 292 /* (6) The UID and GID of the process */ 293 acct.ac_uid = p->p_ucred->cr_ruid; 294 acct.ac_gid = p->p_ucred->cr_rgid; 295 296 /* (7) The terminal from which the process was started */ 297 SESS_LOCK(p->p_session); 298 if ((p->p_flag & P_CONTROLT) && p->p_pgrp->pg_session->s_ttyp) 299 acct.ac_tty = dev2udev(p->p_pgrp->pg_session->s_ttyp->t_dev); 300 else 301 acct.ac_tty = NODEV; 302 SESS_UNLOCK(p->p_session); 303 304 /* (8) The boolean flags that tell how the process terminated, etc. */ 305 acct.ac_flag = p->p_acflag; 306 PROC_UNLOCK(p); 307 308 /* 309 * Eliminate any file size rlimit. 310 */ 311 newlim = lim_alloc(); 312 PROC_LOCK(p); 313 oldlim = p->p_limit; 314 lim_copy(newlim, oldlim); 315 newlim->pl_rlimit[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY; 316 p->p_limit = newlim; 317 PROC_UNLOCK(p); 318 lim_free(oldlim); 319 320 /* 321 * Write the accounting information to the file. 322 */ 323 vfslocked = VFS_LOCK_GIANT(acct_vp->v_mount); 324 VOP_LEASE(acct_vp, td, acct_cred, LEASE_WRITE); 325 ret = vn_rdwr(UIO_WRITE, acct_vp, (caddr_t)&acct, sizeof (acct), 326 (off_t)0, UIO_SYSSPACE, IO_APPEND|IO_UNIT, acct_cred, NOCRED, 327 (int *)0, td); 328 VFS_UNLOCK_GIANT(vfslocked); 329 sx_sunlock(&acct_sx); 330 return (ret); 331 } 332 333 /* 334 * Encode_comp_t converts from ticks in seconds and microseconds 335 * to ticks in 1/AHZ seconds. The encoding is described in 336 * Leffler, et al., on page 63. 337 */ 338 339 #define MANTSIZE 13 /* 13 bit mantissa. */ 340 #define EXPSIZE 3 /* Base 8 (3 bit) exponent. */ 341 #define MAXFRACT ((1 << MANTSIZE) - 1) /* Maximum fractional value. */ 342 343 static comp_t 344 encode_comp_t(u_long s, u_long us) 345 { 346 int exp, rnd; 347 348 exp = 0; 349 rnd = 0; 350 s *= AHZ; 351 s += us / (1000000 / AHZ); /* Maximize precision. */ 352 353 while (s > MAXFRACT) { 354 rnd = s & (1 << (EXPSIZE - 1)); /* Round up? */ 355 s >>= EXPSIZE; /* Base 8 exponent == 3 bit shift. */ 356 exp++; 357 } 358 359 /* If we need to round up, do it (and handle overflow correctly). */ 360 if (rnd && (++s > MAXFRACT)) { 361 s >>= EXPSIZE; 362 exp++; 363 } 364 365 /* Clean it up and polish it off. */ 366 exp <<= MANTSIZE; /* Shift the exponent into place */ 367 exp += s; /* and add on the mantissa. */ 368 return (exp); 369 } 370 371 /* 372 * Periodically check the filesystem to see if accounting 373 * should be turned on or off. Beware the case where the vnode 374 * has been vgone()'d out from underneath us, e.g. when the file 375 * system containing the accounting file has been forcibly unmounted. 376 */ 377 /* ARGSUSED */ 378 static void 379 acctwatch(void *a) 380 { 381 struct statfs sb; 382 int vfslocked; 383 384 sx_xlock(&acct_sx); 385 vfslocked = VFS_LOCK_GIANT(acct_vp->v_mount); 386 if (acct_vp->v_type == VBAD) { 387 (void) vn_close(acct_vp, acct_flags, acct_cred, NULL); 388 VFS_UNLOCK_GIANT(vfslocked); 389 crfree(acct_cred); 390 acct_vp = NULL; 391 acct_cred = NULL; 392 acct_flags = 0; 393 sx_xunlock(&acct_sx); 394 log(LOG_NOTICE, "Accounting disabled\n"); 395 return; 396 } 397 /* 398 * Stopping here is better than continuing, maybe it will be VBAD 399 * next time around. 400 */ 401 if (VFS_STATFS(acct_vp->v_mount, &sb, curthread) < 0) { 402 VFS_UNLOCK_GIANT(vfslocked); 403 sx_xunlock(&acct_sx); 404 return; 405 } 406 VFS_UNLOCK_GIANT(vfslocked); 407 if (acct_suspended) { 408 if (sb.f_bavail > (int64_t)(acctresume * sb.f_blocks / 409 100)) { 410 acct_suspended = 0; 411 log(LOG_NOTICE, "Accounting resumed\n"); 412 } 413 } else { 414 if (sb.f_bavail <= (int64_t)(acctsuspend * sb.f_blocks / 415 100)) { 416 acct_suspended = 1; 417 log(LOG_NOTICE, "Accounting suspended\n"); 418 } 419 } 420 callout_reset(&acctwatch_callout, acctchkfreq * hz, acctwatch, NULL); 421 sx_xunlock(&acct_sx); 422 } 423