1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/proc.h> 29 #include <sys/systm.h> 30 #include <sys/param.h> 31 #include <sys/kmem.h> 32 #include <sys/sysmacros.h> 33 #include <sys/types.h> 34 #include <sys/cmn_err.h> 35 #include <sys/user.h> 36 #include <sys/cred.h> 37 #include <sys/vnode.h> 38 #include <sys/file.h> 39 #include <sys/pathname.h> 40 #include <sys/modctl.h> 41 #include <sys/acctctl.h> 42 #include <sys/bitmap.h> 43 #include <sys/exacct.h> 44 #include <sys/policy.h> 45 46 /* 47 * acctctl(2) 48 * 49 * acctctl() provides the administrative interface to the extended accounting 50 * subsystem. The process and task accounting facilities are configurable: 51 * resources can be individually specified for recording in the appropriate 52 * accounting file. 53 * 54 * The current implementation of acctctl() requires that the process and task 55 * and flow files be distinct across all zones. 56 * 57 * Locking 58 * Each accounting species has an ac_info_t which contains a mutex, 59 * used to protect the ac_info_t's contents, and to serialize access to the 60 * appropriate file. 61 */ 62 63 static list_t exacct_globals_list; 64 static kmutex_t exacct_globals_list_lock; 65 66 static int 67 ac_state_set(ac_info_t *info, void *buf, size_t bufsz) 68 { 69 int state; 70 71 if (buf == NULL || (bufsz != sizeof (int))) 72 return (EINVAL); 73 74 if (copyin(buf, &state, bufsz) != 0) 75 return (EFAULT); 76 77 if (state != AC_ON && state != AC_OFF) 78 return (EINVAL); 79 80 mutex_enter(&info->ac_lock); 81 info->ac_state = state; 82 mutex_exit(&info->ac_lock); 83 return (0); 84 } 85 86 static int 87 ac_state_get(ac_info_t *info, void *buf, size_t bufsz) 88 { 89 if (buf == NULL || (bufsz != sizeof (int))) 90 return (EINVAL); 91 92 mutex_enter(&info->ac_lock); 93 if (copyout(&info->ac_state, buf, bufsz) != 0) { 94 mutex_exit(&info->ac_lock); 95 return (EFAULT); 96 } 97 mutex_exit(&info->ac_lock); 98 return (0); 99 } 100 101 static boolean_t 102 ac_file_in_use(vnode_t *vp) 103 { 104 boolean_t in_use = B_FALSE; 105 struct exacct_globals *acg; 106 107 if (vp == NULL) 108 return (B_FALSE); 109 mutex_enter(&exacct_globals_list_lock); 110 /* 111 * Start off by grabbing all locks. 112 */ 113 for (acg = list_head(&exacct_globals_list); acg != NULL; 114 acg = list_next(&exacct_globals_list, acg)) { 115 mutex_enter(&acg->ac_proc.ac_lock); 116 mutex_enter(&acg->ac_task.ac_lock); 117 mutex_enter(&acg->ac_flow.ac_lock); 118 } 119 120 for (acg = list_head(&exacct_globals_list); !in_use && acg != NULL; 121 acg = list_next(&exacct_globals_list, acg)) { 122 /* 123 * We need to verify that we aren't already using this file for 124 * accounting in any zone. 125 */ 126 if (vn_compare(acg->ac_proc.ac_vnode, vp) || 127 vn_compare(acg->ac_task.ac_vnode, vp) || 128 vn_compare(acg->ac_flow.ac_vnode, vp)) 129 in_use = B_TRUE; 130 } 131 132 /* 133 * Drop all locks. 134 */ 135 for (acg = list_head(&exacct_globals_list); acg != NULL; 136 acg = list_next(&exacct_globals_list, acg)) { 137 mutex_exit(&acg->ac_proc.ac_lock); 138 mutex_exit(&acg->ac_task.ac_lock); 139 mutex_exit(&acg->ac_flow.ac_lock); 140 } 141 mutex_exit(&exacct_globals_list_lock); 142 return (in_use); 143 } 144 145 static int 146 ac_file_set(ac_info_t *info, void *ubuf, size_t bufsz) 147 { 148 int error = 0; 149 void *kbuf; 150 void *namebuf; 151 int namelen; 152 vnode_t *vp; 153 void *hdr; 154 size_t hdrsize; 155 vattr_t va; 156 157 if (ubuf == NULL) { 158 mutex_enter(&info->ac_lock); 159 160 /* 161 * Closing accounting file 162 */ 163 if (info->ac_vnode != NULL) { 164 error = VOP_CLOSE(info->ac_vnode, FWRITE, 1, 0, 165 CRED(), NULL); 166 if (error) { 167 mutex_exit(&info->ac_lock); 168 return (error); 169 } 170 VN_RELE(info->ac_vnode); 171 info->ac_vnode = NULL; 172 } 173 if (info->ac_file != NULL) { 174 kmem_free(info->ac_file, strlen(info->ac_file) + 1); 175 info->ac_file = NULL; 176 } 177 178 mutex_exit(&info->ac_lock); 179 return (error); 180 } 181 182 if (bufsz < 2 || bufsz > MAXPATHLEN) 183 return (EINVAL); 184 185 /* 186 * We have to copy in the whole buffer since we can't tell the length 187 * of the string in user's address space. 188 */ 189 kbuf = kmem_zalloc(bufsz, KM_SLEEP); 190 if ((error = copyinstr((char *)ubuf, (char *)kbuf, bufsz, NULL)) != 0) { 191 kmem_free(kbuf, bufsz); 192 return (error); 193 } 194 if (*((char *)kbuf) != '/') { 195 kmem_free(kbuf, bufsz); 196 return (EINVAL); 197 } 198 199 /* 200 * Now, allocate the space where we are going to save the 201 * name of the accounting file and kmem_free kbuf. We have to do this 202 * now because it is not good to sleep in kmem_alloc() while 203 * holding ac_info's lock. 204 */ 205 namelen = strlen(kbuf) + 1; 206 namebuf = kmem_alloc(namelen, KM_SLEEP); 207 (void) strcpy(namebuf, kbuf); 208 kmem_free(kbuf, bufsz); 209 210 /* 211 * Check if this file already exists. 212 */ 213 error = lookupname(namebuf, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp); 214 215 /* 216 * Check if the file is already in use. 217 */ 218 if (!error) { 219 if (ac_file_in_use(vp)) { 220 /* 221 * If we're already using it then return EBUSY 222 */ 223 kmem_free(namebuf, namelen); 224 VN_RELE(vp); 225 return (EBUSY); 226 } 227 VN_RELE(vp); 228 } 229 230 /* 231 * Create an exacct header here because exacct_create_header() may 232 * sleep so we should not be holding ac_lock. At this point we cannot 233 * reliably know if we need the header or not, so we may end up not 234 * using the header. 235 */ 236 hdr = exacct_create_header(&hdrsize); 237 238 /* 239 * Now, grab info's ac_lock and try to set up everything. 240 */ 241 mutex_enter(&info->ac_lock); 242 243 if ((error = vn_open(namebuf, UIO_SYSSPACE, 244 FCREAT | FWRITE | FOFFMAX, 0600, &vp, CRCREAT, 0)) != 0) { 245 mutex_exit(&info->ac_lock); 246 kmem_free(namebuf, namelen); 247 kmem_free(hdr, hdrsize); 248 return (error); 249 } 250 251 if (vp->v_type != VREG) { 252 VN_RELE(vp); 253 mutex_exit(&info->ac_lock); 254 kmem_free(namebuf, namelen); 255 kmem_free(hdr, hdrsize); 256 return (EACCES); 257 } 258 259 if (info->ac_vnode != NULL) { 260 /* 261 * Switch from an old file to a new file by swapping 262 * their vnode pointers. 263 */ 264 vnode_t *oldvp; 265 oldvp = info->ac_vnode; 266 info->ac_vnode = vp; 267 vp = oldvp; 268 } else { 269 /* 270 * Start writing accounting records to a new file. 271 */ 272 info->ac_vnode = vp; 273 vp = NULL; 274 } 275 if (vp) { 276 /* 277 * We still need to close the old file. 278 */ 279 if ((error = VOP_CLOSE(vp, FWRITE, 1, 0, CRED(), NULL)) != 0) { 280 VN_RELE(vp); 281 mutex_exit(&info->ac_lock); 282 kmem_free(namebuf, namelen); 283 kmem_free(hdr, hdrsize); 284 return (error); 285 } 286 VN_RELE(vp); 287 if (info->ac_file != NULL) { 288 kmem_free(info->ac_file, 289 strlen(info->ac_file) + 1); 290 info->ac_file = NULL; 291 } 292 } 293 info->ac_file = namebuf; 294 295 /* 296 * Write the exacct header only if the file is empty. 297 */ 298 error = VOP_GETATTR(info->ac_vnode, &va, AT_SIZE, CRED(), NULL); 299 if (error == 0 && va.va_size == 0) 300 error = exacct_write_header(info, hdr, hdrsize); 301 302 mutex_exit(&info->ac_lock); 303 kmem_free(hdr, hdrsize); 304 return (error); 305 } 306 307 static int 308 ac_file_get(ac_info_t *info, void *buf, size_t bufsz) 309 { 310 int error = 0; 311 vnode_t *vnode; 312 char *file; 313 314 mutex_enter(&info->ac_lock); 315 file = info->ac_file; 316 vnode = info->ac_vnode; 317 318 if (file == NULL || vnode == NULL) { 319 mutex_exit(&info->ac_lock); 320 return (ENOTACTIVE); 321 } 322 323 if (strlen(file) >= bufsz) 324 error = ENOMEM; 325 else 326 error = copyoutstr(file, buf, MAXPATHLEN, NULL); 327 328 mutex_exit(&info->ac_lock); 329 return (error); 330 } 331 332 static int 333 ac_res_set(ac_info_t *info, void *buf, size_t bufsz, int maxres) 334 { 335 ac_res_t *res; 336 ac_res_t *tmp; 337 ulong_t *maskp; 338 int id; 339 uint_t counter = 0; 340 341 /* 342 * Validate that a non-zero buffer, sized within limits and to an 343 * integral number of ac_res_t's has been specified. 344 */ 345 if (bufsz == 0 || 346 bufsz > sizeof (ac_res_t) * (AC_MAX_RES + 1) || 347 (bufsz / sizeof (ac_res_t)) * sizeof (ac_res_t) != bufsz) 348 return (EINVAL); 349 350 tmp = res = kmem_alloc(bufsz, KM_SLEEP); 351 if (copyin(buf, res, bufsz) != 0) { 352 kmem_free(res, bufsz); 353 return (EFAULT); 354 } 355 356 maskp = (ulong_t *)&info->ac_mask; 357 358 mutex_enter(&info->ac_lock); 359 while ((id = tmp->ar_id) != AC_NONE && counter < maxres + 1) { 360 if (id > maxres || id < 0) { 361 mutex_exit(&info->ac_lock); 362 kmem_free(res, bufsz); 363 return (EINVAL); 364 } 365 if (tmp->ar_state == AC_ON) { 366 BT_SET(maskp, id); 367 } else if (tmp->ar_state == AC_OFF) { 368 BT_CLEAR(maskp, id); 369 } else { 370 mutex_exit(&info->ac_lock); 371 kmem_free(res, bufsz); 372 return (EINVAL); 373 } 374 tmp++; 375 counter++; 376 } 377 mutex_exit(&info->ac_lock); 378 kmem_free(res, bufsz); 379 return (0); 380 } 381 382 static int 383 ac_res_get(ac_info_t *info, void *buf, size_t bufsz, int maxres) 384 { 385 int error = 0; 386 ac_res_t *res; 387 ac_res_t *tmp; 388 size_t ressz = sizeof (ac_res_t) * (maxres + 1); 389 ulong_t *maskp; 390 int id; 391 392 if (bufsz < ressz) 393 return (EINVAL); 394 tmp = res = kmem_alloc(ressz, KM_SLEEP); 395 396 mutex_enter(&info->ac_lock); 397 maskp = (ulong_t *)&info->ac_mask; 398 for (id = 1; id <= maxres; id++) { 399 tmp->ar_id = id; 400 tmp->ar_state = BT_TEST(maskp, id); 401 tmp++; 402 } 403 tmp->ar_id = AC_NONE; 404 tmp->ar_state = AC_OFF; 405 mutex_exit(&info->ac_lock); 406 error = copyout(res, buf, ressz); 407 kmem_free(res, ressz); 408 return (error); 409 } 410 411 /* 412 * acctctl() 413 * 414 * Overview 415 * acctctl() is the entry point for the acctctl(2) system call. 416 * 417 * Return values 418 * On successful completion, return 0; otherwise -1 is returned and errno is 419 * set appropriately. 420 * 421 * Caller's context 422 * Called from the system call path. 423 */ 424 int 425 acctctl(int cmd, void *buf, size_t bufsz) 426 { 427 int error = 0; 428 int mode = AC_MODE(cmd); 429 int option = AC_OPTION(cmd); 430 int maxres; 431 ac_info_t *info; 432 zone_t *zone = curproc->p_zone; 433 struct exacct_globals *acg; 434 435 acg = zone_getspecific(exacct_zone_key, zone); 436 /* 437 * exacct_zone_key and associated per-zone state were initialized when 438 * the module was loaded. 439 */ 440 ASSERT(exacct_zone_key != ZONE_KEY_UNINITIALIZED); 441 ASSERT(acg != NULL); 442 443 switch (mode) { /* sanity check */ 444 case AC_TASK: 445 info = &acg->ac_task; 446 maxres = AC_TASK_MAX_RES; 447 break; 448 case AC_PROC: 449 info = &acg->ac_proc; 450 maxres = AC_PROC_MAX_RES; 451 break; 452 case AC_FLOW: 453 /* 454 * Flow accounting isn't currently configurable in non-global 455 * zones, but we have this field on a per-zone basis for future 456 * expansion as well as the ability to return default "unset" 457 * values for the various AC_*_GET queries. AC_*_SET commands 458 * fail with EPERM for AC_FLOW in non-global zones. 459 */ 460 info = &acg->ac_flow; 461 maxres = AC_FLOW_MAX_RES; 462 break; 463 default: 464 return (set_errno(EINVAL)); 465 } 466 467 switch (option) { 468 case AC_STATE_SET: 469 if ((error = secpolicy_acct(CRED())) != 0) 470 break; 471 if (mode == AC_FLOW && getzoneid() != GLOBAL_ZONEID) { 472 error = EPERM; 473 break; 474 } 475 error = ac_state_set(info, buf, bufsz); 476 break; 477 case AC_STATE_GET: 478 error = ac_state_get(info, buf, bufsz); 479 break; 480 case AC_FILE_SET: 481 if ((error = secpolicy_acct(CRED())) != 0) 482 break; 483 if (mode == AC_FLOW && getzoneid() != GLOBAL_ZONEID) { 484 error = EPERM; 485 break; 486 } 487 error = ac_file_set(info, buf, bufsz); 488 break; 489 case AC_FILE_GET: 490 error = ac_file_get(info, buf, bufsz); 491 break; 492 case AC_RES_SET: 493 if ((error = secpolicy_acct(CRED())) != 0) 494 break; 495 if (mode == AC_FLOW && getzoneid() != GLOBAL_ZONEID) { 496 error = EPERM; 497 break; 498 } 499 error = ac_res_set(info, buf, bufsz, maxres); 500 break; 501 case AC_RES_GET: 502 error = ac_res_get(info, buf, bufsz, maxres); 503 break; 504 default: 505 return (set_errno(EINVAL)); 506 } 507 if (error) 508 return (set_errno(error)); 509 return (0); 510 } 511 512 static struct sysent ac_sysent = { 513 3, 514 SE_NOUNLOAD | SE_ARGC | SE_32RVAL1, 515 acctctl 516 }; 517 518 static struct modlsys modlsys = { 519 &mod_syscallops, 520 "acctctl system call", 521 &ac_sysent 522 }; 523 524 #ifdef _SYSCALL32_IMPL 525 static struct modlsys modlsys32 = { 526 &mod_syscallops32, 527 "32-bit acctctl system call", 528 &ac_sysent 529 }; 530 #endif 531 532 static struct modlinkage modlinkage = { 533 MODREV_1, 534 &modlsys, 535 #ifdef _SYSCALL32_IMPL 536 &modlsys32, 537 #endif 538 NULL 539 }; 540 541 /* ARGSUSED */ 542 static void * 543 exacct_zone_init(zoneid_t zoneid) 544 { 545 struct exacct_globals *acg; 546 547 acg = kmem_zalloc(sizeof (*acg), KM_SLEEP); 548 mutex_enter(&exacct_globals_list_lock); 549 list_insert_tail(&exacct_globals_list, acg); 550 mutex_exit(&exacct_globals_list_lock); 551 return (acg); 552 } 553 554 static void 555 exacct_free_info(ac_info_t *info) 556 { 557 mutex_enter(&info->ac_lock); 558 if (info->ac_vnode) { 559 (void) VOP_CLOSE(info->ac_vnode, FWRITE, 1, 0, kcred, NULL); 560 VN_RELE(info->ac_vnode); 561 kmem_free(info->ac_file, strlen(info->ac_file) + 1); 562 } 563 info->ac_state = AC_OFF; 564 info->ac_vnode = NULL; 565 info->ac_file = NULL; 566 mutex_exit(&info->ac_lock); 567 } 568 569 /* ARGSUSED */ 570 static void 571 exacct_zone_shutdown(zoneid_t zoneid, void *data) 572 { 573 struct exacct_globals *acg = data; 574 575 /* 576 * The accounting files need to be closed during shutdown rather than 577 * destroy, since otherwise the filesystem they reside on may fail to 578 * unmount, thus causing the entire zone halt/reboot to fail. 579 */ 580 exacct_free_info(&acg->ac_proc); 581 exacct_free_info(&acg->ac_task); 582 exacct_free_info(&acg->ac_flow); 583 } 584 585 /* ARGSUSED */ 586 static void 587 exacct_zone_fini(zoneid_t zoneid, void *data) 588 { 589 struct exacct_globals *acg = data; 590 591 mutex_enter(&exacct_globals_list_lock); 592 list_remove(&exacct_globals_list, acg); 593 mutex_exit(&exacct_globals_list_lock); 594 595 mutex_destroy(&acg->ac_proc.ac_lock); 596 mutex_destroy(&acg->ac_task.ac_lock); 597 mutex_destroy(&acg->ac_flow.ac_lock); 598 kmem_free(acg, sizeof (*acg)); 599 } 600 601 int 602 _init() 603 { 604 int error; 605 606 mutex_init(&exacct_globals_list_lock, NULL, MUTEX_DEFAULT, NULL); 607 list_create(&exacct_globals_list, sizeof (struct exacct_globals), 608 offsetof(struct exacct_globals, ac_link)); 609 zone_key_create(&exacct_zone_key, exacct_zone_init, 610 exacct_zone_shutdown, exacct_zone_fini); 611 612 if ((error = mod_install(&modlinkage)) != 0) { 613 (void) zone_key_delete(exacct_zone_key); 614 exacct_zone_key = ZONE_KEY_UNINITIALIZED; 615 mutex_destroy(&exacct_globals_list_lock); 616 list_destroy(&exacct_globals_list); 617 } 618 return (error); 619 } 620 621 int 622 _info(struct modinfo *modinfop) 623 { 624 return (mod_info(&modlinkage, modinfop)); 625 } 626 627 int 628 _fini() 629 { 630 return (EBUSY); 631 } 632