1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/proc.h> 29 #include <sys/systm.h> 30 #include <sys/param.h> 31 #include <sys/kmem.h> 32 #include <sys/sysmacros.h> 33 #include <sys/types.h> 34 #include <sys/cmn_err.h> 35 #include <sys/user.h> 36 #include <sys/cred.h> 37 #include <sys/vnode.h> 38 #include <sys/file.h> 39 #include <sys/pathname.h> 40 #include <sys/modctl.h> 41 #include <sys/acctctl.h> 42 #include <sys/bitmap.h> 43 #include <sys/exacct.h> 44 #include <sys/policy.h> 45 46 /* 47 * acctctl(2) 48 * 49 * acctctl() provides the administrative interface to the extended accounting 50 * subsystem. The process and task accounting facilities are configurable: 51 * resources can be individually specified for recording in the appropriate 52 * accounting file. 53 * 54 * The current implementation of acctctl() requires that the process and task 55 * and flow files be distinct across all zones. 56 * 57 * Locking 58 * Each accounting species has an ac_info_t which contains a mutex, 59 * used to protect the ac_info_t's contents, and to serialize access to the 60 * appropriate file. 61 */ 62 63 static list_t exacct_globals_list; 64 static kmutex_t exacct_globals_list_lock; 65 66 static int 67 ac_state_set(ac_info_t *info, void *buf, size_t bufsz) 68 { 69 int state; 70 71 if (buf == NULL || (bufsz != sizeof (int))) 72 return (EINVAL); 73 74 if (copyin(buf, &state, bufsz) != 0) 75 return (EFAULT); 76 77 if (state != AC_ON && state != AC_OFF) 78 return (EINVAL); 79 80 mutex_enter(&info->ac_lock); 81 info->ac_state = state; 82 mutex_exit(&info->ac_lock); 83 return (0); 84 } 85 86 static int 87 ac_state_get(ac_info_t *info, void *buf, size_t bufsz) 88 { 89 if (buf == NULL || (bufsz != sizeof (int))) 90 return (EINVAL); 91 92 mutex_enter(&info->ac_lock); 93 if (copyout(&info->ac_state, buf, bufsz) != 0) { 94 mutex_exit(&info->ac_lock); 95 return (EFAULT); 96 } 97 mutex_exit(&info->ac_lock); 98 return (0); 99 } 100 101 static boolean_t 102 ac_file_in_use(vnode_t *vp) 103 { 104 boolean_t in_use = B_FALSE; 105 struct exacct_globals *acg; 106 107 if (vp == NULL) 108 return (B_FALSE); 109 mutex_enter(&exacct_globals_list_lock); 110 /* 111 * Start off by grabbing all locks. 112 */ 113 for (acg = list_head(&exacct_globals_list); acg != NULL; 114 acg = list_next(&exacct_globals_list, acg)) { 115 mutex_enter(&acg->ac_proc.ac_lock); 116 mutex_enter(&acg->ac_task.ac_lock); 117 mutex_enter(&acg->ac_flow.ac_lock); 118 } 119 120 for (acg = list_head(&exacct_globals_list); !in_use && acg != NULL; 121 acg = list_next(&exacct_globals_list, acg)) { 122 /* 123 * We need to verify that we aren't already using this file for 124 * accounting in any zone. 125 */ 126 if (vn_compare(acg->ac_proc.ac_vnode, vp) || 127 vn_compare(acg->ac_task.ac_vnode, vp) || 128 vn_compare(acg->ac_flow.ac_vnode, vp)) 129 in_use = B_TRUE; 130 } 131 132 /* 133 * Drop all locks. 134 */ 135 for (acg = list_head(&exacct_globals_list); acg != NULL; 136 acg = list_next(&exacct_globals_list, acg)) { 137 mutex_exit(&acg->ac_proc.ac_lock); 138 mutex_exit(&acg->ac_task.ac_lock); 139 mutex_exit(&acg->ac_flow.ac_lock); 140 } 141 mutex_exit(&exacct_globals_list_lock); 142 return (in_use); 143 } 144 145 static int 146 ac_file_set(ac_info_t *info, void *ubuf, size_t bufsz) 147 { 148 int error = 0; 149 void *kbuf; 150 void *namebuf; 151 int namelen; 152 vnode_t *vp; 153 void *hdr; 154 size_t hdrsize; 155 156 if (ubuf == NULL) { 157 mutex_enter(&info->ac_lock); 158 159 /* 160 * Closing accounting file 161 */ 162 if (info->ac_vnode != NULL) { 163 error = VOP_CLOSE(info->ac_vnode, FWRITE, 1, 0, 164 CRED(), NULL); 165 if (error) { 166 mutex_exit(&info->ac_lock); 167 return (error); 168 } 169 VN_RELE(info->ac_vnode); 170 info->ac_vnode = NULL; 171 } 172 if (info->ac_file != NULL) { 173 kmem_free(info->ac_file, strlen(info->ac_file) + 1); 174 info->ac_file = NULL; 175 } 176 177 mutex_exit(&info->ac_lock); 178 return (error); 179 } 180 181 if (bufsz < 2 || bufsz > MAXPATHLEN) 182 return (EINVAL); 183 184 /* 185 * We have to copy in the whole buffer since we can't tell the length 186 * of the string in user's address space. 187 */ 188 kbuf = kmem_zalloc(bufsz, KM_SLEEP); 189 if ((error = copyinstr((char *)ubuf, (char *)kbuf, bufsz, NULL)) != 0) { 190 kmem_free(kbuf, bufsz); 191 return (error); 192 } 193 if (*((char *)kbuf) != '/') { 194 kmem_free(kbuf, bufsz); 195 return (EINVAL); 196 } 197 198 /* 199 * Now, allocate the space where we are going to save the 200 * name of the accounting file and kmem_free kbuf. We have to do this 201 * now because it is not good to sleep in kmem_alloc() while 202 * holding ac_info's lock. 203 */ 204 namelen = strlen(kbuf) + 1; 205 namebuf = kmem_alloc(namelen, KM_SLEEP); 206 (void) strcpy(namebuf, kbuf); 207 kmem_free(kbuf, bufsz); 208 209 /* 210 * Check if this file already exists. 211 */ 212 error = lookupname(namebuf, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp); 213 214 /* 215 * Check if the file is already in use. 216 */ 217 if (!error) { 218 if (ac_file_in_use(vp)) { 219 /* 220 * If we're already using it then return EBUSY 221 */ 222 kmem_free(namebuf, namelen); 223 VN_RELE(vp); 224 return (EBUSY); 225 } 226 VN_RELE(vp); 227 } 228 229 /* 230 * Now, grab info's ac_lock and try to set up everything. 231 */ 232 mutex_enter(&info->ac_lock); 233 234 if ((error = vn_open(namebuf, UIO_SYSSPACE, 235 FCREAT | FWRITE | FTRUNC, 0600, &vp, CRCREAT, 0)) != 0) { 236 mutex_exit(&info->ac_lock); 237 kmem_free(namebuf, namelen); 238 return (error); 239 } 240 241 if (vp->v_type != VREG) { 242 VN_RELE(vp); 243 mutex_exit(&info->ac_lock); 244 kmem_free(namebuf, namelen); 245 return (EACCES); 246 } 247 248 if (info->ac_vnode != NULL) { 249 /* 250 * Switch from an old file to a new file by swapping 251 * their vnode pointers. 252 */ 253 vnode_t *oldvp; 254 oldvp = info->ac_vnode; 255 info->ac_vnode = vp; 256 vp = oldvp; 257 } else { 258 /* 259 * Start writing accounting records to a new file. 260 */ 261 info->ac_vnode = vp; 262 vp = NULL; 263 } 264 if (vp) { 265 /* 266 * We still need to close the old file. 267 */ 268 if ((error = VOP_CLOSE(vp, FWRITE, 1, 0, CRED(), NULL)) != 0) { 269 VN_RELE(vp); 270 mutex_exit(&info->ac_lock); 271 kmem_free(namebuf, namelen); 272 return (error); 273 } 274 VN_RELE(vp); 275 if (info->ac_file != NULL) { 276 kmem_free(info->ac_file, 277 strlen(info->ac_file) + 1); 278 info->ac_file = NULL; 279 } 280 } 281 /* 282 * Finally, point ac_file to the filename string and release the lock. 283 */ 284 info->ac_file = namebuf; 285 mutex_exit(&info->ac_lock); 286 287 /* 288 * Create and write an exacct header to the file. 289 */ 290 hdr = exacct_create_header(&hdrsize); 291 error = exacct_write_header(info, hdr, hdrsize); 292 293 return (error); 294 } 295 296 static int 297 ac_file_get(ac_info_t *info, void *buf, size_t bufsz) 298 { 299 int error = 0; 300 vnode_t *vnode; 301 char *file; 302 303 mutex_enter(&info->ac_lock); 304 file = info->ac_file; 305 vnode = info->ac_vnode; 306 307 if (file == NULL || vnode == NULL) { 308 mutex_exit(&info->ac_lock); 309 return (ENOTACTIVE); 310 } 311 312 if (strlen(file) >= bufsz) 313 error = ENOMEM; 314 else 315 error = copyoutstr(file, buf, MAXPATHLEN, NULL); 316 317 mutex_exit(&info->ac_lock); 318 return (error); 319 } 320 321 static int 322 ac_res_set(ac_info_t *info, void *buf, size_t bufsz, int maxres) 323 { 324 ac_res_t *res; 325 ac_res_t *tmp; 326 ulong_t *maskp; 327 int id; 328 uint_t counter = 0; 329 330 /* 331 * Validate that a non-zero buffer, sized within limits and to an 332 * integral number of ac_res_t's has been specified. 333 */ 334 if (bufsz == 0 || 335 bufsz > sizeof (ac_res_t) * (AC_MAX_RES + 1) || 336 (bufsz / sizeof (ac_res_t)) * sizeof (ac_res_t) != bufsz) 337 return (EINVAL); 338 339 tmp = res = kmem_alloc(bufsz, KM_SLEEP); 340 if (copyin(buf, res, bufsz) != 0) { 341 kmem_free(res, bufsz); 342 return (EFAULT); 343 } 344 345 maskp = (ulong_t *)&info->ac_mask; 346 347 mutex_enter(&info->ac_lock); 348 while ((id = tmp->ar_id) != AC_NONE && counter < maxres + 1) { 349 if (id > maxres || id < 0) { 350 mutex_exit(&info->ac_lock); 351 kmem_free(res, bufsz); 352 return (EINVAL); 353 } 354 if (tmp->ar_state == AC_ON) { 355 BT_SET(maskp, id); 356 } else if (tmp->ar_state == AC_OFF) { 357 BT_CLEAR(maskp, id); 358 } else { 359 mutex_exit(&info->ac_lock); 360 kmem_free(res, bufsz); 361 return (EINVAL); 362 } 363 tmp++; 364 counter++; 365 } 366 mutex_exit(&info->ac_lock); 367 kmem_free(res, bufsz); 368 return (0); 369 } 370 371 static int 372 ac_res_get(ac_info_t *info, void *buf, size_t bufsz, int maxres) 373 { 374 int error = 0; 375 ac_res_t *res; 376 ac_res_t *tmp; 377 size_t ressz = sizeof (ac_res_t) * (maxres + 1); 378 ulong_t *maskp; 379 int id; 380 381 if (bufsz < ressz) 382 return (EINVAL); 383 tmp = res = kmem_alloc(ressz, KM_SLEEP); 384 385 mutex_enter(&info->ac_lock); 386 maskp = (ulong_t *)&info->ac_mask; 387 for (id = 1; id <= maxres; id++) { 388 tmp->ar_id = id; 389 tmp->ar_state = BT_TEST(maskp, id); 390 tmp++; 391 } 392 tmp->ar_id = AC_NONE; 393 tmp->ar_state = AC_OFF; 394 mutex_exit(&info->ac_lock); 395 error = copyout(res, buf, ressz); 396 kmem_free(res, ressz); 397 return (error); 398 } 399 400 /* 401 * acctctl() 402 * 403 * Overview 404 * acctctl() is the entry point for the acctctl(2) system call. 405 * 406 * Return values 407 * On successful completion, return 0; otherwise -1 is returned and errno is 408 * set appropriately. 409 * 410 * Caller's context 411 * Called from the system call path. 412 */ 413 int 414 acctctl(int cmd, void *buf, size_t bufsz) 415 { 416 int error = 0; 417 int mode = AC_MODE(cmd); 418 int option = AC_OPTION(cmd); 419 int maxres; 420 ac_info_t *info; 421 zone_t *zone = curproc->p_zone; 422 struct exacct_globals *acg; 423 424 acg = zone_getspecific(exacct_zone_key, zone); 425 /* 426 * exacct_zone_key and associated per-zone state were initialized when 427 * the module was loaded. 428 */ 429 ASSERT(exacct_zone_key != ZONE_KEY_UNINITIALIZED); 430 ASSERT(acg != NULL); 431 432 switch (mode) { /* sanity check */ 433 case AC_TASK: 434 info = &acg->ac_task; 435 maxres = AC_TASK_MAX_RES; 436 break; 437 case AC_PROC: 438 info = &acg->ac_proc; 439 maxres = AC_PROC_MAX_RES; 440 break; 441 case AC_FLOW: 442 /* 443 * Flow accounting isn't currently configurable in non-global 444 * zones, but we have this field on a per-zone basis for future 445 * expansion as well as the ability to return default "unset" 446 * values for the various AC_*_GET queries. AC_*_SET commands 447 * fail with EPERM for AC_FLOW in non-global zones. 448 */ 449 info = &acg->ac_flow; 450 maxres = AC_FLOW_MAX_RES; 451 break; 452 default: 453 return (set_errno(EINVAL)); 454 } 455 456 switch (option) { 457 case AC_STATE_SET: 458 if ((error = secpolicy_acct(CRED())) != 0) 459 break; 460 if (mode == AC_FLOW && getzoneid() != GLOBAL_ZONEID) { 461 error = EPERM; 462 break; 463 } 464 error = ac_state_set(info, buf, bufsz); 465 break; 466 case AC_STATE_GET: 467 error = ac_state_get(info, buf, bufsz); 468 break; 469 case AC_FILE_SET: 470 if ((error = secpolicy_acct(CRED())) != 0) 471 break; 472 if (mode == AC_FLOW && getzoneid() != GLOBAL_ZONEID) { 473 error = EPERM; 474 break; 475 } 476 error = ac_file_set(info, buf, bufsz); 477 break; 478 case AC_FILE_GET: 479 error = ac_file_get(info, buf, bufsz); 480 break; 481 case AC_RES_SET: 482 if ((error = secpolicy_acct(CRED())) != 0) 483 break; 484 if (mode == AC_FLOW && getzoneid() != GLOBAL_ZONEID) { 485 error = EPERM; 486 break; 487 } 488 error = ac_res_set(info, buf, bufsz, maxres); 489 break; 490 case AC_RES_GET: 491 error = ac_res_get(info, buf, bufsz, maxres); 492 break; 493 default: 494 return (set_errno(EINVAL)); 495 } 496 if (error) 497 return (set_errno(error)); 498 return (0); 499 } 500 501 static struct sysent ac_sysent = { 502 3, 503 SE_NOUNLOAD | SE_ARGC | SE_32RVAL1, 504 acctctl 505 }; 506 507 static struct modlsys modlsys = { 508 &mod_syscallops, 509 "acctctl system call", 510 &ac_sysent 511 }; 512 513 #ifdef _SYSCALL32_IMPL 514 static struct modlsys modlsys32 = { 515 &mod_syscallops32, 516 "32-bit acctctl system call", 517 &ac_sysent 518 }; 519 #endif 520 521 static struct modlinkage modlinkage = { 522 MODREV_1, 523 &modlsys, 524 #ifdef _SYSCALL32_IMPL 525 &modlsys32, 526 #endif 527 NULL 528 }; 529 530 /* ARGSUSED */ 531 static void * 532 exacct_zone_init(zoneid_t zoneid) 533 { 534 struct exacct_globals *acg; 535 536 acg = kmem_zalloc(sizeof (*acg), KM_SLEEP); 537 mutex_enter(&exacct_globals_list_lock); 538 list_insert_tail(&exacct_globals_list, acg); 539 mutex_exit(&exacct_globals_list_lock); 540 return (acg); 541 } 542 543 static void 544 exacct_free_info(ac_info_t *info) 545 { 546 mutex_enter(&info->ac_lock); 547 if (info->ac_vnode) { 548 (void) VOP_CLOSE(info->ac_vnode, FWRITE, 1, 0, kcred, NULL); 549 VN_RELE(info->ac_vnode); 550 kmem_free(info->ac_file, strlen(info->ac_file) + 1); 551 } 552 info->ac_state = AC_OFF; 553 info->ac_vnode = NULL; 554 info->ac_file = NULL; 555 mutex_exit(&info->ac_lock); 556 } 557 558 /* ARGSUSED */ 559 static void 560 exacct_zone_shutdown(zoneid_t zoneid, void *data) 561 { 562 struct exacct_globals *acg = data; 563 564 /* 565 * The accounting files need to be closed during shutdown rather than 566 * destroy, since otherwise the filesystem they reside on may fail to 567 * unmount, thus causing the entire zone halt/reboot to fail. 568 */ 569 exacct_free_info(&acg->ac_proc); 570 exacct_free_info(&acg->ac_task); 571 exacct_free_info(&acg->ac_flow); 572 } 573 574 /* ARGSUSED */ 575 static void 576 exacct_zone_fini(zoneid_t zoneid, void *data) 577 { 578 struct exacct_globals *acg = data; 579 580 mutex_enter(&exacct_globals_list_lock); 581 list_remove(&exacct_globals_list, acg); 582 mutex_exit(&exacct_globals_list_lock); 583 584 mutex_destroy(&acg->ac_proc.ac_lock); 585 mutex_destroy(&acg->ac_task.ac_lock); 586 mutex_destroy(&acg->ac_flow.ac_lock); 587 kmem_free(acg, sizeof (*acg)); 588 } 589 590 int 591 _init() 592 { 593 int error; 594 595 mutex_init(&exacct_globals_list_lock, NULL, MUTEX_DEFAULT, NULL); 596 list_create(&exacct_globals_list, sizeof (struct exacct_globals), 597 offsetof(struct exacct_globals, ac_link)); 598 zone_key_create(&exacct_zone_key, exacct_zone_init, 599 exacct_zone_shutdown, exacct_zone_fini); 600 601 if ((error = mod_install(&modlinkage)) != 0) { 602 (void) zone_key_delete(exacct_zone_key); 603 exacct_zone_key = ZONE_KEY_UNINITIALIZED; 604 mutex_destroy(&exacct_globals_list_lock); 605 list_destroy(&exacct_globals_list); 606 } 607 return (error); 608 } 609 610 int 611 _info(struct modinfo *modinfop) 612 { 613 return (mod_info(&modlinkage, modinfop)); 614 } 615 616 int 617 _fini() 618 { 619 return (EBUSY); 620 } 621