1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/kmem.h> 31 #include <sys/conf.h> 32 #include <sys/atomic.h> 33 #include <netinet/in.h> 34 #include <netinet/in_systm.h> 35 #include <netinet/ip6.h> 36 #include <sys/socket.h> 37 #include <sys/acct.h> 38 #include <sys/exacct.h> 39 #include <inet/common.h> 40 #include <inet/ip.h> 41 #include <inet/ip6.h> 42 #include <sys/ddi.h> 43 #include <sys/strsun.h> 44 #include <ipp/flowacct/flowacct_impl.h> 45 46 /* 47 * flowacct - IPQoS accounting module. The module maintains an array 48 * of 256 hash buckets. When the action routine is invoked for a flow, 49 * if the flow (identified by the 5-tuple: saddr, daddr, sport, dport, proto) 50 * is already present in the flow table (indexed by the hash function FLOW_HASH) 51 * then a check is made to see if an item for this flow with the same 52 * dsfield, projid & user id is present. If it is, then the number of packets 53 * and the bytes are incremented for that item. If the item does 54 * not exist a new item is added for the flow. If the flow is not present 55 * an entry is made for the flow. 56 * 57 * A timer runs thru the table and writes all the flow items that have 58 * timed out to the accounting file (via exacct PSARC/1999/119), if present 59 * Configuration commands to change the timing interval is provided. The 60 * flow timeout value can also be configured. While the timeout is in nsec, 61 * the flow timer interval is in usec. 62 * Information for an active flow can be obtained by using kstats. 63 */ 64 65 /* Used in computing the hash index */ 66 #define FLOWACCT_ADDR_HASH(addr) \ 67 ((addr).s6_addr8[8] ^ (addr).s6_addr8[9] ^ \ 68 (addr).s6_addr8[10] ^ (addr).s6_addr8[13] ^ \ 69 (addr).s6_addr8[14] ^ (addr).s6_addr8[15]) 70 71 #define FLOWACCT_FLOW_HASH(f) \ 72 (((FLOWACCT_ADDR_HASH(f->saddr)) + \ 73 (FLOWACCT_ADDR_HASH(f->daddr)) + \ 74 (f->proto) + (f->sport) + (f->dport)) \ 75 % FLOW_TBL_COUNT) 76 77 /* 78 * Compute difference between a and b in nsec and store in delta. 79 * delta should be a hrtime_t. Taken from ip_mroute.c. 80 */ 81 #define FLOWACCT_DELTA(a, b, delta) { \ 82 int xxs; \ 83 \ 84 delta = (a).tv_nsec - (b).tv_nsec; \ 85 if ((xxs = (a).tv_sec - (b).tv_sec) != 0) { \ 86 switch (xxs) { \ 87 case 2: \ 88 delta += NANOSEC; \ 89 /*FALLTHRU*/ \ 90 case 1: \ 91 delta += NANOSEC; \ 92 break; \ 93 default: \ 94 delta += ((hrtime_t)NANOSEC * xxs); \ 95 } \ 96 } \ 97 } 98 99 /* Debug level */ 100 int flowacct_debug = 0; 101 102 /* Collect timed out flows to be written to the accounting file */ 103 typedef struct flow_records_s { 104 flow_usage_t *fl_use; 105 struct flow_records_s *next; 106 }flow_records_t; 107 108 /* Get port information from the packet. Ignore fragments. */ 109 static void 110 flowacct_port_info(header_t *header, void *iph, int af, mblk_t *mp) 111 { 112 uint16_t *up; 113 114 if (af == AF_INET) { 115 ipha_t *ipha = (ipha_t *)iph; 116 uint32_t u2, u1; 117 uint_t iplen; 118 119 u2 = ntohs(ipha->ipha_fragment_offset_and_flags); 120 u1 = u2 & (IPH_MF | IPH_OFFSET); 121 if (u1 != 0) { 122 return; 123 } 124 iplen = (ipha->ipha_version_and_hdr_length & 0xF) << 2; 125 up = (uint16_t *)(mp->b_rptr + iplen); 126 header->sport = (uint16_t)*up++; 127 header->dport = (uint16_t)*up; 128 } else { 129 ip6_t *ip6h = (ip6_t *)iph; 130 uint_t length = IPV6_HDR_LEN; 131 uint_t ehdrlen; 132 uint8_t *nexthdrp, *whereptr, *endptr; 133 ip6_dest_t *desthdr; 134 ip6_rthdr_t *rthdr; 135 ip6_hbh_t *hbhhdr; 136 137 whereptr = ((uint8_t *)&ip6h[1]); 138 endptr = mp->b_wptr; 139 nexthdrp = &ip6h->ip6_nxt; 140 while (whereptr < endptr) { 141 switch (*nexthdrp) { 142 case IPPROTO_HOPOPTS: 143 hbhhdr = (ip6_hbh_t *)whereptr; 144 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 145 if ((uchar_t *)hbhhdr + ehdrlen > endptr) 146 return; 147 nexthdrp = &hbhhdr->ip6h_nxt; 148 break; 149 case IPPROTO_DSTOPTS: 150 desthdr = (ip6_dest_t *)whereptr; 151 ehdrlen = 8 * (desthdr->ip6d_len + 1); 152 if ((uchar_t *)desthdr + ehdrlen > endptr) 153 return; 154 nexthdrp = &desthdr->ip6d_nxt; 155 break; 156 case IPPROTO_ROUTING: 157 rthdr = (ip6_rthdr_t *)whereptr; 158 ehdrlen = 8 * (rthdr->ip6r_len + 1); 159 if ((uchar_t *)rthdr + ehdrlen > endptr) 160 return; 161 nexthdrp = &rthdr->ip6r_nxt; 162 break; 163 case IPPROTO_FRAGMENT: 164 return; 165 case IPPROTO_TCP: 166 case IPPROTO_UDP: 167 case IPPROTO_SCTP: 168 /* 169 * Verify we have at least ICMP_MIN_TP_HDR_LEN 170 * bytes of the ULP's header to get the port 171 * info. 172 */ 173 if (((uchar_t *)ip6h + length + 174 ICMP_MIN_TP_HDR_LEN) > endptr) { 175 return; 176 } 177 /* Get the protocol & ports */ 178 header->proto = *nexthdrp; 179 up = (uint16_t *)((uchar_t *)ip6h + length); 180 header->sport = (uint16_t)*up++; 181 header->dport = (uint16_t)*up; 182 return; 183 case IPPROTO_ICMPV6: 184 case IPPROTO_ENCAP: 185 case IPPROTO_IPV6: 186 case IPPROTO_ESP: 187 case IPPROTO_AH: 188 header->proto = *nexthdrp; 189 return; 190 case IPPROTO_NONE: 191 default: 192 return; 193 } 194 length += ehdrlen; 195 whereptr += ehdrlen; 196 } 197 } 198 } 199 200 /* 201 * flowacct_find_ids(mp, header) 202 * 203 * attempt to discern the uid and projid of the originator of a packet by 204 * looking at the dblks making up the packet - yeuch! 205 * 206 * We do it by skipping any fragments with a credp of NULL (originated in 207 * kernel), taking the first value that isn't NULL to be the cred_t for the 208 * whole packet. 209 */ 210 static void 211 flowacct_find_ids(mblk_t *mp, header_t *header) 212 { 213 cred_t *cr; 214 215 while (DB_CRED(mp) == NULL && mp->b_cont != NULL) 216 mp = mp->b_cont; 217 218 if ((cr = DB_CRED(mp)) != NULL) { 219 header->uid = crgetuid(cr); 220 header->projid = crgetprojid(cr); 221 } else { 222 header->uid = (uid_t)-1; 223 header->projid = -1; 224 } 225 } 226 227 /* 228 * Extract header information in a header_t structure so that we don't have 229 * have to parse the packet everytime. 230 */ 231 static int 232 flowacct_extract_header(mblk_t *mp, header_t *header) 233 { 234 ipha_t *ipha; 235 ip6_t *ip6h; 236 #define rptr ((uchar_t *)ipha) 237 238 /* 0 means no port extracted. */ 239 header->sport = 0; 240 header->dport = 0; 241 flowacct_find_ids(mp, header); 242 243 V6_SET_ZERO(header->saddr); 244 V6_SET_ZERO(header->daddr); 245 246 ipha = (ipha_t *)mp->b_rptr; 247 header->isv4 = IPH_HDR_VERSION(ipha) == IPV4_VERSION; 248 if (header->isv4) { 249 ipha = (ipha_t *)mp->b_rptr; 250 V4_PART_OF_V6(header->saddr) = (int32_t)ipha->ipha_src; 251 V4_PART_OF_V6(header->daddr) = (int32_t)ipha->ipha_dst; 252 header->dsfield = ipha->ipha_type_of_service; 253 header->proto = ipha->ipha_protocol; 254 header->pktlen = ntohs(ipha->ipha_length); 255 if ((header->proto == IPPROTO_TCP) || 256 (header->proto == IPPROTO_UDP) || 257 (header->proto == IPPROTO_SCTP)) { 258 flowacct_port_info(header, ipha, AF_INET, mp); 259 } 260 } else { 261 /* 262 * Need to pullup everything. 263 */ 264 if (mp->b_cont != NULL) { 265 if (!pullupmsg(mp, -1)) { 266 flowacct0dbg(("flowacct_extract_header: "\ 267 "pullup error")); 268 return (-1); 269 } 270 } 271 ip6h = (ip6_t *)mp->b_rptr; 272 bcopy(ip6h->ip6_src.s6_addr32, header->saddr.s6_addr32, 273 sizeof (ip6h->ip6_src.s6_addr32)); 274 bcopy(ip6h->ip6_dst.s6_addr32, header->daddr.s6_addr32, 275 sizeof (ip6h->ip6_dst.s6_addr32)); 276 header->dsfield = __IPV6_TCLASS_FROM_FLOW(ip6h->ip6_vcf); 277 header->proto = ip6h->ip6_nxt; 278 header->pktlen = ntohs(ip6h->ip6_plen) + 279 ip_hdr_length_v6(mp, ip6h); 280 flowacct_port_info(header, ip6h, AF_INET6, mp); 281 282 } 283 #undef rptr 284 return (0); 285 } 286 287 /* Check if the flow (identified by the 5-tuple) exists in the hash table */ 288 static flow_t * 289 flowacct_flow_present(header_t *header, int index, 290 flowacct_data_t *flowacct_data) 291 { 292 list_hdr_t *hdr = flowacct_data->flows_tbl[index].head; 293 flow_t *flow; 294 295 while (hdr != NULL) { 296 flow = (flow_t *)hdr->objp; 297 if ((flow != NULL) && 298 (IN6_ARE_ADDR_EQUAL(&flow->saddr, &header->saddr)) && 299 (IN6_ARE_ADDR_EQUAL(&flow->daddr, &header->daddr)) && 300 (flow->proto == header->proto) && 301 (flow->sport == header->sport) && 302 (flow->dport == header->dport)) { 303 return (flow); 304 } 305 hdr = hdr->next; 306 } 307 return ((flow_t *)NULL); 308 } 309 310 /* 311 * Add an object to the list at insert_point. This could be a flow item or 312 * a flow itself. 313 */ 314 static list_hdr_t * 315 flowacct_add_obj(list_head_t *tophdr, list_hdr_t *insert_point, void *obj) 316 { 317 list_hdr_t *new_hdr; 318 319 if (tophdr == NULL) { 320 return ((list_hdr_t *)NULL); 321 } 322 323 new_hdr = (list_hdr_t *)kmem_zalloc(FLOWACCT_HDR_SZ, KM_NOSLEEP); 324 if (new_hdr == NULL) { 325 flowacct0dbg(("flowacct_add_obj: error allocating mem")); 326 return ((list_hdr_t *)NULL); 327 } 328 gethrestime(&new_hdr->last_seen); 329 new_hdr->objp = obj; 330 tophdr->nbr_items++; 331 332 if (insert_point == NULL) { 333 if (tophdr->head == NULL) { 334 tophdr->head = new_hdr; 335 tophdr->tail = new_hdr; 336 return (new_hdr); 337 } 338 339 new_hdr->next = tophdr->head; 340 tophdr->head->prev = new_hdr; 341 tophdr->head = new_hdr; 342 return (new_hdr); 343 } 344 345 if (insert_point == tophdr->tail) { 346 tophdr->tail->next = new_hdr; 347 new_hdr->prev = tophdr->tail; 348 tophdr->tail = new_hdr; 349 return (new_hdr); 350 } 351 352 new_hdr->next = insert_point->next; 353 new_hdr->prev = insert_point; 354 insert_point->next->prev = new_hdr; 355 insert_point->next = new_hdr; 356 return (new_hdr); 357 } 358 359 /* Delete an obj from the list. This could be a flow item or the flow itself */ 360 static void 361 flowacct_del_obj(list_head_t *tophdr, list_hdr_t *hdr, uint_t mode) 362 { 363 size_t length; 364 uint_t type; 365 366 if ((tophdr == NULL) || (hdr == NULL)) { 367 return; 368 } 369 370 type = ((flow_t *)hdr->objp)->type; 371 372 tophdr->nbr_items--; 373 374 if (hdr->next != NULL) { 375 hdr->next->prev = hdr->prev; 376 } 377 if (hdr->prev != NULL) { 378 hdr->prev->next = hdr->next; 379 } 380 if (tophdr->head == hdr) { 381 tophdr->head = hdr->next; 382 } 383 if (tophdr->tail == hdr) { 384 tophdr->tail = hdr->prev; 385 } 386 387 if (mode == FLOWACCT_DEL_OBJ) { 388 switch (type) { 389 case FLOWACCT_FLOW: 390 length = FLOWACCT_FLOW_SZ; 391 break; 392 case FLOWACCT_ITEM: 393 length = FLOWACCT_ITEM_SZ; 394 break; 395 } 396 kmem_free(hdr->objp, length); 397 hdr->objp = NULL; 398 } 399 400 kmem_free((void *)hdr, FLOWACCT_HDR_SZ); 401 } 402 403 /* 404 * Checks if the given item (identified by dsfield, project id and uid) 405 * is already present for the flow. 406 */ 407 static flow_item_t * 408 flowacct_item_present(flow_t *flow, uint8_t dsfield, pid_t proj_id, uint_t uid) 409 { 410 list_hdr_t *itemhdr; 411 flow_item_t *item; 412 413 itemhdr = flow->items.head; 414 415 while (itemhdr != NULL) { 416 item = (flow_item_t *)itemhdr->objp; 417 418 if ((item->dsfield != dsfield) || (item->projid != proj_id) || 419 (item->uid != uid)) { 420 itemhdr = itemhdr->next; 421 continue; 422 } 423 return (item); 424 } 425 426 return ((flow_item_t *)NULL); 427 } 428 429 /* 430 * Add the flow to the table, if not already present. If the flow is 431 * present in the table, add the item. Also, update the flow stats. 432 * Additionally, re-adjust the timout list as well. 433 */ 434 static int 435 flowacct_update_flows_tbl(header_t *header, flowacct_data_t *flowacct_data) 436 { 437 int index; 438 list_head_t *fhead; 439 list_head_t *thead; 440 list_head_t *ihead; 441 boolean_t added_flow = B_FALSE; 442 timespec_t now; 443 flow_item_t *item; 444 flow_t *flow; 445 446 index = FLOWACCT_FLOW_HASH(header); 447 fhead = &flowacct_data->flows_tbl[index]; 448 449 /* The timeout list */ 450 thead = &flowacct_data->flows_tbl[FLOW_TBL_COUNT]; 451 452 mutex_enter(&fhead->lock); 453 flow = flowacct_flow_present(header, index, flowacct_data); 454 if (flow == NULL) { 455 flow = (flow_t *)kmem_zalloc(FLOWACCT_FLOW_SZ, KM_NOSLEEP); 456 if (flow == NULL) { 457 flowacct0dbg(("flowacct_update_flows_tbl: mem alloc "\ 458 "error")); 459 mutex_exit(&fhead->lock); 460 return (-1); 461 } 462 flow->hdr = flowacct_add_obj(fhead, fhead->tail, (void *)flow); 463 if (flow->hdr == NULL) { 464 flowacct0dbg(("flowacct_update_flows_tbl: mem alloc "\ 465 "error")); 466 kmem_free(flow, FLOWACCT_FLOW_SZ); 467 mutex_exit(&fhead->lock); 468 return (-1); 469 } 470 471 flow->type = FLOWACCT_FLOW; 472 flow->isv4 = header->isv4; 473 bcopy(header->saddr.s6_addr32, flow->saddr.s6_addr32, 474 sizeof (header->saddr.s6_addr32)); 475 bcopy(header->daddr.s6_addr32, flow->daddr.s6_addr32, 476 sizeof (header->daddr.s6_addr32)); 477 flow->proto = header->proto; 478 flow->sport = header->sport; 479 flow->dport = header->dport; 480 flow->back_ptr = fhead; 481 added_flow = B_TRUE; 482 } else { 483 /* 484 * We need to make sure that this 'flow' is not deleted 485 * either by a scheduled timeout or an explict call 486 * to flowacct_timer() below. 487 */ 488 flow->inuse = B_TRUE; 489 } 490 491 ihead = &flow->items; 492 item = flowacct_item_present(flow, header->dsfield, header->projid, 493 header->uid); 494 if (item == NULL) { 495 boolean_t just_once = B_TRUE; 496 /* 497 * For all practical purposes, we limit the no. of entries in 498 * the flow table - i.e. the max_limt that a user specifies is 499 * the maximum no. of flow items in the table. 500 */ 501 try_again: 502 atomic_add_32(&flowacct_data->nflows, 1); 503 if (flowacct_data->nflows > flowacct_data->max_limit) { 504 atomic_add_32(&flowacct_data->nflows, -1); 505 506 /* Try timing out once */ 507 if (just_once) { 508 /* 509 * Need to release the lock, as this entry 510 * could contain a flow that can be timed 511 * out. 512 */ 513 mutex_exit(&fhead->lock); 514 flowacct_timer(FLOWACCT_JUST_ONE, 515 flowacct_data); 516 mutex_enter(&fhead->lock); 517 /* Lets check again */ 518 just_once = B_FALSE; 519 goto try_again; 520 } else { 521 flow->inuse = B_FALSE; 522 mutex_exit(&fhead->lock); 523 flowacct1dbg(("flowacct_update_flows_tbl: "\ 524 "maximum active flows exceeded\n")); 525 if (added_flow) { 526 flowacct_del_obj(fhead, flow->hdr, 527 FLOWACCT_DEL_OBJ); 528 } 529 return (-1); 530 } 531 } 532 item = (flow_item_t *)kmem_zalloc(FLOWACCT_ITEM_SZ, KM_NOSLEEP); 533 if (item == NULL) { 534 flowacct0dbg(("flowacct_update_flows_tbl: mem alloc "\ 535 "error")); 536 /* Need to remove the flow, if one was added */ 537 if (added_flow) { 538 flowacct_del_obj(fhead, flow->hdr, 539 FLOWACCT_DEL_OBJ); 540 } 541 atomic_add_32(&flowacct_data->nflows, -1); 542 flow->inuse = B_FALSE; 543 mutex_exit(&fhead->lock); 544 return (-1); 545 } 546 item->hdr = flowacct_add_obj(ihead, ihead->tail, (void *)item); 547 if (item->hdr == NULL) { 548 flowacct0dbg(("flowacct_update_flows_tbl: mem alloc "\ 549 "error\n")); 550 kmem_free(item, FLOWACCT_ITEM_SZ); 551 /* Need to remove the flow, if one was added */ 552 if (added_flow) { 553 flowacct_del_obj(fhead, flow->hdr, 554 FLOWACCT_DEL_OBJ); 555 } 556 atomic_add_32(&flowacct_data->nflows, -1); 557 flow->inuse = B_FALSE; 558 mutex_exit(&fhead->lock); 559 return (-1); 560 } 561 /* If a flow was added, add it too */ 562 if (added_flow) { 563 atomic_add_64(&flowacct_data->usedmem, 564 FLOWACCT_FLOW_RECORD_SZ); 565 } 566 atomic_add_64(&flowacct_data->usedmem, FLOWACCT_ITEM_RECORD_SZ); 567 568 item->type = FLOWACCT_ITEM; 569 item->dsfield = header->dsfield; 570 item->projid = header->projid; 571 item->uid = header->uid; 572 item->npackets = 1; 573 item->nbytes = header->pktlen; 574 item->creation_time = item->hdr->last_seen; 575 } else { 576 item->npackets++; 577 item->nbytes += header->pktlen; 578 } 579 gethrestime(&now); 580 flow->hdr->last_seen = item->hdr->last_seen = now; 581 mutex_exit(&fhead->lock); 582 583 /* 584 * Re-adjust the timeout list. The timer takes the thead lock 585 * follwed by fhead lock(s), so we release fhead, take thead 586 * and re-take fhead. 587 */ 588 mutex_enter(&thead->lock); 589 mutex_enter(&fhead->lock); 590 /* If the flow was added, append it to the tail of the timeout list */ 591 if (added_flow) { 592 if (thead->head == NULL) { 593 thead->head = flow->hdr; 594 thead->tail = flow->hdr; 595 } else { 596 thead->tail->timeout_next = flow->hdr; 597 flow->hdr->timeout_prev = thead->tail; 598 thead->tail = flow->hdr; 599 } 600 /* 601 * Else, move this flow to the tail of the timeout list, if it is not 602 * already. 603 */ 604 } else if (flow->hdr != thead->tail) { 605 if (flow->hdr == thead->head) { 606 thead->head->timeout_next->timeout_prev = NULL; 607 thead->head = thead->head->timeout_next; 608 flow->hdr->timeout_next = NULL; 609 thead->tail->timeout_next = flow->hdr; 610 flow->hdr->timeout_prev = thead->tail; 611 thead->tail = flow->hdr; 612 } else { 613 flow->hdr->timeout_prev->timeout_next = 614 flow->hdr->timeout_next; 615 flow->hdr->timeout_next->timeout_prev = 616 flow->hdr->timeout_prev; 617 flow->hdr->timeout_next = NULL; 618 thead->tail->timeout_next = flow->hdr; 619 flow->hdr->timeout_prev = thead->tail; 620 thead->tail = flow->hdr; 621 } 622 /* 623 * Unset this variable, now it is fine even if this 624 * flow gets deleted (i.e. after timing out its 625 * flow items) since we are done using it. 626 */ 627 flow->inuse = B_FALSE; 628 } 629 mutex_exit(&fhead->lock); 630 mutex_exit(&thead->lock); 631 atomic_add_64(&flowacct_data->tbytes, header->pktlen); 632 return (0); 633 } 634 635 /* Timer for timing out flows/items from the flow table */ 636 void 637 flowacct_timeout_flows(void *args) 638 { 639 flowacct_data_t *flowacct_data = (flowacct_data_t *)args; 640 flowacct_timer(FLOWACCT_FLOW_TIMER, flowacct_data); 641 flowacct_data->flow_tid = timeout(flowacct_timeout_flows, flowacct_data, 642 drv_usectohz(flowacct_data->timer)); 643 } 644 645 646 /* Delete the item from the flow in the flow table */ 647 static void 648 flowacct_timeout_item(flow_t **flow, list_hdr_t **item_hdr) 649 { 650 list_hdr_t *next_it_hdr; 651 652 next_it_hdr = (*item_hdr)->next; 653 flowacct_del_obj(&(*flow)->items, *item_hdr, FLOWACCT_DEL_OBJ); 654 *item_hdr = next_it_hdr; 655 } 656 657 /* Create a flow record for this timed out item */ 658 static flow_records_t * 659 flowacct_create_record(flow_t *flow, list_hdr_t *ithdr) 660 { 661 int count; 662 flow_item_t *item = (flow_item_t *)ithdr->objp; 663 flow_records_t *tmp_frec = NULL; 664 665 /* Record to be written into the accounting file */ 666 tmp_frec = kmem_zalloc(sizeof (flow_records_t), KM_NOSLEEP); 667 if (tmp_frec == NULL) { 668 flowacct0dbg(("flowacct_create_record: mem alloc error.\n")); 669 return (NULL); 670 } 671 tmp_frec->fl_use = kmem_zalloc(sizeof (flow_usage_t), KM_NOSLEEP); 672 if (tmp_frec->fl_use == NULL) { 673 flowacct0dbg(("flowacct_create_record: mem alloc error\n")); 674 kmem_free(tmp_frec, sizeof (flow_records_t)); 675 return (NULL); 676 } 677 678 /* Copy the IP address */ 679 for (count = 0; count < 4; count++) { 680 tmp_frec->fl_use->fu_saddr[count] = 681 htonl(flow->saddr.s6_addr32[count]); 682 tmp_frec->fl_use->fu_daddr[count] = 683 htonl(flow->daddr.s6_addr32[count]); 684 } 685 686 /* 687 * Ports, protocol, version, dsfield, project id, uid, nbytes, npackets 688 * creation time and last seen. 689 */ 690 tmp_frec->fl_use->fu_sport = htons(flow->sport); 691 tmp_frec->fl_use->fu_dport = htons(flow->dport); 692 tmp_frec->fl_use->fu_protocol = flow->proto; 693 tmp_frec->fl_use->fu_isv4 = flow->isv4; 694 tmp_frec->fl_use->fu_dsfield = item->dsfield; 695 tmp_frec->fl_use->fu_projid = item->projid; 696 tmp_frec->fl_use->fu_userid = item->uid; 697 tmp_frec->fl_use->fu_nbytes = item->nbytes; 698 tmp_frec->fl_use->fu_npackets = item->npackets; 699 tmp_frec->fl_use->fu_lseen = 700 (uint64_t)(ulong_t)ithdr->last_seen.tv_sec; 701 tmp_frec->fl_use->fu_ctime = 702 (uint64_t)(ulong_t)item->creation_time.tv_sec; 703 704 return (tmp_frec); 705 } 706 707 /* 708 * Scan thru the timeout list and write the records to the accounting file, if 709 * possible. Basically step thru the timeout list maintained in the last 710 * hash bucket, FLOW_COUNT_TBL + 1, and timeout flows. This could be called 711 * from the timer, FLOWACCT_TIMER - delete only timed out flows or when this 712 * instance is deleted, FLOWACCT_PURGE_FLOW - delete all the flows from the 713 * table or as FLOWACCT_JUST_ONE - delete the first timed out flow. Since the 714 * flows are cronologically arranged in the timeout list, when called as 715 * FLOWACCT_TIMER and FLOWACCT_JUST_ONE, we can stop when we come across 716 * the first flow that has not timed out (which means none of the following 717 * flows would have timed out). 718 */ 719 void 720 flowacct_timer(int type, flowacct_data_t *flowacct_data) 721 { 722 hrtime_t diff; 723 timespec_t now; 724 list_head_t *head, *thead; 725 flow_t *flow; 726 flow_item_t *item; 727 list_hdr_t *fl_hdr, *next_fl_hdr; 728 list_hdr_t *ithdr = (list_hdr_t *)NULL; 729 flow_records_t *frec = NULL, *tmp_frec, *tail; 730 uint64_t flow_size; 731 uint64_t item_size; 732 733 ASSERT(flowacct_data != NULL); 734 735 /* 2s-complement for subtraction */ 736 flow_size = ~FLOWACCT_FLOW_RECORD_SZ + 1; 737 item_size = ~FLOWACCT_ITEM_RECORD_SZ + 1; 738 739 /* Get the current time */ 740 gethrestime(&now); 741 742 /* 743 * For each flow in the table, scan thru all the items and delete 744 * those that have exceeded the timeout. If all the items in a 745 * flow have timed out, delete the flow entry as well. Finally, 746 * write all the delted items to the accounting file. 747 */ 748 thead = &flowacct_data->flows_tbl[FLOW_TBL_COUNT]; 749 750 mutex_enter(&thead->lock); 751 fl_hdr = thead->head; 752 while (fl_hdr != NULL) { 753 uint32_t items_deleted = 0; 754 755 next_fl_hdr = fl_hdr->timeout_next; 756 flow = (flow_t *)fl_hdr->objp; 757 head = flow->back_ptr; 758 mutex_enter(&head->lock); 759 760 /*LINTED*/ 761 FLOWACCT_DELTA(now, fl_hdr->last_seen, diff); 762 763 /* 764 * If type is FLOW_TIMER, then check if the item has timed out. 765 * If type is FLOW_PURGE delete the entry anyways. 766 */ 767 if ((type != FLOWACCT_PURGE_FLOW) && 768 (diff < flowacct_data->timeout)) { 769 mutex_exit(&head->lock); 770 mutex_exit(&thead->lock); 771 goto write_records; 772 } 773 774 ithdr = flow->items.head; 775 while (ithdr != NULL) { 776 item = (flow_item_t *)ithdr->objp; 777 /* 778 * Fill in the flow record to be 779 * written to the accounting file. 780 */ 781 tmp_frec = flowacct_create_record(flow, ithdr); 782 /* 783 * If we don't have memory for records, 784 * we will come back in case this is 785 * called as FLOW_TIMER, else we will 786 * go ahead and delete the item from 787 * the table (when asked to PURGE the 788 * table), so there could be some 789 * entries not written to the file 790 * when this action instance is 791 * deleted. 792 */ 793 if (tmp_frec != NULL) { 794 tmp_frec->fl_use->fu_aname = 795 flowacct_data->act_name; 796 if (frec == NULL) { 797 frec = tmp_frec; 798 tail = frec; 799 } else { 800 tail->next = tmp_frec; 801 tail = tmp_frec; 802 } 803 } else if (type != FLOWACCT_PURGE_FLOW) { 804 mutex_exit(&head->lock); 805 mutex_exit(&thead->lock); 806 atomic_add_32(&flowacct_data->nflows, 807 (~items_deleted + 1)); 808 goto write_records; 809 } 810 811 /* Update stats */ 812 atomic_add_64(&flowacct_data->tbytes, (~item->nbytes + 813 1)); 814 815 /* Delete the item */ 816 flowacct_timeout_item(&flow, &ithdr); 817 items_deleted++; 818 atomic_add_64(&flowacct_data->usedmem, item_size); 819 } 820 ASSERT(flow->items.nbr_items == 0); 821 atomic_add_32(&flowacct_data->nflows, (~items_deleted + 1)); 822 823 /* 824 * Don't delete this flow if we are making place for 825 * a new item for this flow. 826 */ 827 if (!flow->inuse) { 828 if (fl_hdr->timeout_prev != NULL) { 829 fl_hdr->timeout_prev->timeout_next = 830 fl_hdr->timeout_next; 831 } else { 832 thead->head = fl_hdr->timeout_next; 833 } 834 if (fl_hdr->timeout_next != NULL) { 835 fl_hdr->timeout_next->timeout_prev = 836 fl_hdr->timeout_prev; 837 } else { 838 thead->tail = fl_hdr->timeout_prev; 839 } 840 fl_hdr->timeout_prev = NULL; 841 fl_hdr->timeout_next = NULL; 842 flowacct_del_obj(head, fl_hdr, FLOWACCT_DEL_OBJ); 843 atomic_add_64(&flowacct_data->usedmem, flow_size); 844 } 845 mutex_exit(&head->lock); 846 if (type == FLOWACCT_JUST_ONE) { 847 mutex_exit(&thead->lock); 848 goto write_records; 849 } 850 fl_hdr = next_fl_hdr; 851 } 852 mutex_exit(&thead->lock); 853 write_records: 854 /* Write all the timed out flows to the accounting file */ 855 while (frec != NULL) { 856 tmp_frec = frec->next; 857 exacct_commit_flow(frec->fl_use); 858 kmem_free(frec->fl_use, sizeof (flow_usage_t)); 859 kmem_free(frec, sizeof (flow_records_t)); 860 frec = tmp_frec; 861 } 862 } 863 864 /* 865 * Get the IP header contents from the packet, update the flow table with 866 * this item and return. 867 */ 868 int 869 flowacct_process(mblk_t **mpp, flowacct_data_t *flowacct_data) 870 { 871 header_t *header; 872 mblk_t *mp = *mpp; 873 874 ASSERT(mp != NULL); 875 876 /* If we don't find an M_DATA, return error */ 877 if (mp->b_datap->db_type != M_DATA) { 878 if ((mp->b_cont != NULL) && 879 (mp->b_cont->b_datap->db_type == M_DATA)) { 880 mp = mp->b_cont; 881 } else { 882 flowacct0dbg(("flowacct_process: no data\n")); 883 atomic_add_64(&flowacct_data->epackets, 1); 884 return (EINVAL); 885 } 886 } 887 888 header = kmem_zalloc(FLOWACCT_HEADER_SZ, KM_NOSLEEP); 889 if (header == NULL) { 890 flowacct0dbg(("flowacct_process: error allocing mem")); 891 atomic_add_64(&flowacct_data->epackets, 1); 892 return (ENOMEM); 893 } 894 895 /* Get all the required information into header. */ 896 if (flowacct_extract_header(mp, header) != 0) { 897 kmem_free(header, FLOWACCT_HEADER_SZ); 898 atomic_add_64(&flowacct_data->epackets, 1); 899 return (EINVAL); 900 } 901 902 /* Updated the flow table with this entry */ 903 if (flowacct_update_flows_tbl(header, flowacct_data) != 0) { 904 kmem_free(header, FLOWACCT_HEADER_SZ); 905 atomic_add_64(&flowacct_data->epackets, 1); 906 return (ENOMEM); 907 } 908 909 /* Update global stats */ 910 atomic_add_64(&flowacct_data->npackets, 1); 911 atomic_add_64(&flowacct_data->nbytes, header->pktlen); 912 913 kmem_free(header, FLOWACCT_HEADER_SZ); 914 if (flowacct_data->flow_tid == 0) { 915 flowacct_data->flow_tid = timeout(flowacct_timeout_flows, 916 flowacct_data, drv_usectohz(flowacct_data->timer)); 917 } 918 return (0); 919 } 920