1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/kmem.h> 29 #include <sys/conf.h> 30 #include <sys/atomic.h> 31 #include <netinet/in.h> 32 #include <netinet/in_systm.h> 33 #include <netinet/ip6.h> 34 #include <sys/socket.h> 35 #include <sys/acct.h> 36 #include <sys/exacct.h> 37 #include <inet/common.h> 38 #include <inet/ip.h> 39 #include <inet/ip6.h> 40 #include <sys/ddi.h> 41 #include <sys/strsun.h> 42 #include <sys/strsubr.h> 43 #include <ipp/flowacct/flowacct_impl.h> 44 45 /* 46 * flowacct - IPQoS accounting module. The module maintains an array 47 * of 256 hash buckets. When the action routine is invoked for a flow, 48 * if the flow (identified by the 5-tuple: saddr, daddr, sport, dport, proto) 49 * is already present in the flow table (indexed by the hash function FLOW_HASH) 50 * then a check is made to see if an item for this flow with the same 51 * dsfield, projid & user id is present. If it is, then the number of packets 52 * and the bytes are incremented for that item. If the item does 53 * not exist a new item is added for the flow. If the flow is not present 54 * an entry is made for the flow. 55 * 56 * A timer runs thru the table and writes all the flow items that have 57 * timed out to the accounting file (via exacct PSARC/1999/119), if present 58 * Configuration commands to change the timing interval is provided. The 59 * flow timeout value can also be configured. While the timeout is in nsec, 60 * the flow timer interval is in usec. 61 * Information for an active flow can be obtained by using kstats. 62 */ 63 64 /* Used in computing the hash index */ 65 #define FLOWACCT_ADDR_HASH(addr) \ 66 ((addr).s6_addr8[8] ^ (addr).s6_addr8[9] ^ \ 67 (addr).s6_addr8[10] ^ (addr).s6_addr8[13] ^ \ 68 (addr).s6_addr8[14] ^ (addr).s6_addr8[15]) 69 70 #define FLOWACCT_FLOW_HASH(f) \ 71 (((FLOWACCT_ADDR_HASH(f->saddr)) + \ 72 (FLOWACCT_ADDR_HASH(f->daddr)) + \ 73 (f->proto) + (f->sport) + (f->dport)) \ 74 % FLOW_TBL_COUNT) 75 76 /* 77 * Compute difference between a and b in nsec and store in delta. 78 * delta should be a hrtime_t. Taken from ip_mroute.c. 79 */ 80 #define FLOWACCT_DELTA(a, b, delta) { \ 81 int xxs; \ 82 \ 83 delta = (a).tv_nsec - (b).tv_nsec; \ 84 if ((xxs = (a).tv_sec - (b).tv_sec) != 0) { \ 85 switch (xxs) { \ 86 case 2: \ 87 delta += NANOSEC; \ 88 /*FALLTHRU*/ \ 89 case 1: \ 90 delta += NANOSEC; \ 91 break; \ 92 default: \ 93 delta += ((hrtime_t)NANOSEC * xxs); \ 94 } \ 95 } \ 96 } 97 98 /* Debug level */ 99 int flowacct_debug = 0; 100 101 /* Collect timed out flows to be written to the accounting file */ 102 typedef struct flow_records_s { 103 flow_usage_t *fl_use; 104 struct flow_records_s *next; 105 }flow_records_t; 106 107 /* Get port information from the packet. Ignore fragments. */ 108 static void 109 flowacct_port_info(header_t *header, void *iph, int af, mblk_t *mp) 110 { 111 uint16_t *up; 112 113 if (af == AF_INET) { 114 ipha_t *ipha = (ipha_t *)iph; 115 uint32_t u2, u1; 116 uint_t iplen; 117 118 u2 = ntohs(ipha->ipha_fragment_offset_and_flags); 119 u1 = u2 & (IPH_MF | IPH_OFFSET); 120 if (u1 != 0) { 121 return; 122 } 123 iplen = (ipha->ipha_version_and_hdr_length & 0xF) << 2; 124 up = (uint16_t *)(mp->b_rptr + iplen); 125 header->sport = (uint16_t)*up++; 126 header->dport = (uint16_t)*up; 127 } else { 128 ip6_t *ip6h = (ip6_t *)iph; 129 uint_t length = IPV6_HDR_LEN; 130 uint_t ehdrlen; 131 uint8_t *nexthdrp, *whereptr, *endptr; 132 ip6_dest_t *desthdr; 133 ip6_rthdr_t *rthdr; 134 ip6_hbh_t *hbhhdr; 135 136 whereptr = ((uint8_t *)&ip6h[1]); 137 endptr = mp->b_wptr; 138 nexthdrp = &ip6h->ip6_nxt; 139 while (whereptr < endptr) { 140 switch (*nexthdrp) { 141 case IPPROTO_HOPOPTS: 142 hbhhdr = (ip6_hbh_t *)whereptr; 143 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 144 if ((uchar_t *)hbhhdr + ehdrlen > endptr) 145 return; 146 nexthdrp = &hbhhdr->ip6h_nxt; 147 break; 148 case IPPROTO_DSTOPTS: 149 desthdr = (ip6_dest_t *)whereptr; 150 ehdrlen = 8 * (desthdr->ip6d_len + 1); 151 if ((uchar_t *)desthdr + ehdrlen > endptr) 152 return; 153 nexthdrp = &desthdr->ip6d_nxt; 154 break; 155 case IPPROTO_ROUTING: 156 rthdr = (ip6_rthdr_t *)whereptr; 157 ehdrlen = 8 * (rthdr->ip6r_len + 1); 158 if ((uchar_t *)rthdr + ehdrlen > endptr) 159 return; 160 nexthdrp = &rthdr->ip6r_nxt; 161 break; 162 case IPPROTO_FRAGMENT: 163 return; 164 case IPPROTO_TCP: 165 case IPPROTO_UDP: 166 case IPPROTO_SCTP: 167 /* 168 * Verify we have at least ICMP_MIN_TP_HDR_LEN 169 * bytes of the ULP's header to get the port 170 * info. 171 */ 172 if (((uchar_t *)ip6h + length + 173 ICMP_MIN_TP_HDR_LEN) > endptr) { 174 return; 175 } 176 /* Get the protocol & ports */ 177 header->proto = *nexthdrp; 178 up = (uint16_t *)((uchar_t *)ip6h + length); 179 header->sport = (uint16_t)*up++; 180 header->dport = (uint16_t)*up; 181 return; 182 case IPPROTO_ICMPV6: 183 case IPPROTO_ENCAP: 184 case IPPROTO_IPV6: 185 case IPPROTO_ESP: 186 case IPPROTO_AH: 187 header->proto = *nexthdrp; 188 return; 189 case IPPROTO_NONE: 190 default: 191 return; 192 } 193 length += ehdrlen; 194 whereptr += ehdrlen; 195 } 196 } 197 } 198 199 /* 200 * flowacct_find_ids(mp, header) 201 * 202 * attempt to discern the uid and projid of the originator of a packet by 203 * looking at the dblks making up the packet - yeuch! 204 * 205 * We do it by skipping any fragments with a credp of NULL (originated in 206 * kernel), taking the first value that isn't NULL to be the cred_t for the 207 * whole packet. 208 */ 209 static void 210 flowacct_find_ids(mblk_t *mp, header_t *header) 211 { 212 cred_t *cr; 213 214 cr = msg_getcred(mp, NULL); 215 if (cr != NULL) { 216 header->uid = crgetuid(cr); 217 header->projid = crgetprojid(cr); 218 } else { 219 header->uid = (uid_t)-1; 220 header->projid = -1; 221 } 222 } 223 224 /* 225 * Extract header information in a header_t structure so that we don't have 226 * have to parse the packet everytime. 227 */ 228 static int 229 flowacct_extract_header(mblk_t *mp, header_t *header) 230 { 231 ipha_t *ipha; 232 ip6_t *ip6h; 233 #define rptr ((uchar_t *)ipha) 234 235 /* 0 means no port extracted. */ 236 header->sport = 0; 237 header->dport = 0; 238 flowacct_find_ids(mp, header); 239 240 V6_SET_ZERO(header->saddr); 241 V6_SET_ZERO(header->daddr); 242 243 ipha = (ipha_t *)mp->b_rptr; 244 header->isv4 = IPH_HDR_VERSION(ipha) == IPV4_VERSION; 245 if (header->isv4) { 246 ipha = (ipha_t *)mp->b_rptr; 247 V4_PART_OF_V6(header->saddr) = (int32_t)ipha->ipha_src; 248 V4_PART_OF_V6(header->daddr) = (int32_t)ipha->ipha_dst; 249 header->dsfield = ipha->ipha_type_of_service; 250 header->proto = ipha->ipha_protocol; 251 header->pktlen = ntohs(ipha->ipha_length); 252 if ((header->proto == IPPROTO_TCP) || 253 (header->proto == IPPROTO_UDP) || 254 (header->proto == IPPROTO_SCTP)) { 255 flowacct_port_info(header, ipha, AF_INET, mp); 256 } 257 } else { 258 /* 259 * Need to pullup everything. 260 */ 261 if (mp->b_cont != NULL) { 262 if (!pullupmsg(mp, -1)) { 263 flowacct0dbg(("flowacct_extract_header: "\ 264 "pullup error")); 265 return (-1); 266 } 267 } 268 ip6h = (ip6_t *)mp->b_rptr; 269 bcopy(ip6h->ip6_src.s6_addr32, header->saddr.s6_addr32, 270 sizeof (ip6h->ip6_src.s6_addr32)); 271 bcopy(ip6h->ip6_dst.s6_addr32, header->daddr.s6_addr32, 272 sizeof (ip6h->ip6_dst.s6_addr32)); 273 header->dsfield = __IPV6_TCLASS_FROM_FLOW(ip6h->ip6_vcf); 274 header->proto = ip6h->ip6_nxt; 275 header->pktlen = ntohs(ip6h->ip6_plen) + 276 ip_hdr_length_v6(mp, ip6h); 277 flowacct_port_info(header, ip6h, AF_INET6, mp); 278 279 } 280 #undef rptr 281 return (0); 282 } 283 284 /* Check if the flow (identified by the 5-tuple) exists in the hash table */ 285 static flow_t * 286 flowacct_flow_present(header_t *header, int index, 287 flowacct_data_t *flowacct_data) 288 { 289 list_hdr_t *hdr = flowacct_data->flows_tbl[index].head; 290 flow_t *flow; 291 292 while (hdr != NULL) { 293 flow = (flow_t *)hdr->objp; 294 if ((flow != NULL) && 295 (IN6_ARE_ADDR_EQUAL(&flow->saddr, &header->saddr)) && 296 (IN6_ARE_ADDR_EQUAL(&flow->daddr, &header->daddr)) && 297 (flow->proto == header->proto) && 298 (flow->sport == header->sport) && 299 (flow->dport == header->dport)) { 300 return (flow); 301 } 302 hdr = hdr->next; 303 } 304 return ((flow_t *)NULL); 305 } 306 307 /* 308 * Add an object to the list at insert_point. This could be a flow item or 309 * a flow itself. 310 */ 311 static list_hdr_t * 312 flowacct_add_obj(list_head_t *tophdr, list_hdr_t *insert_point, void *obj) 313 { 314 list_hdr_t *new_hdr; 315 316 if (tophdr == NULL) { 317 return ((list_hdr_t *)NULL); 318 } 319 320 new_hdr = (list_hdr_t *)kmem_zalloc(FLOWACCT_HDR_SZ, KM_NOSLEEP); 321 if (new_hdr == NULL) { 322 flowacct0dbg(("flowacct_add_obj: error allocating mem")); 323 return ((list_hdr_t *)NULL); 324 } 325 gethrestime(&new_hdr->last_seen); 326 new_hdr->objp = obj; 327 tophdr->nbr_items++; 328 329 if (insert_point == NULL) { 330 if (tophdr->head == NULL) { 331 tophdr->head = new_hdr; 332 tophdr->tail = new_hdr; 333 return (new_hdr); 334 } 335 336 new_hdr->next = tophdr->head; 337 tophdr->head->prev = new_hdr; 338 tophdr->head = new_hdr; 339 return (new_hdr); 340 } 341 342 if (insert_point == tophdr->tail) { 343 tophdr->tail->next = new_hdr; 344 new_hdr->prev = tophdr->tail; 345 tophdr->tail = new_hdr; 346 return (new_hdr); 347 } 348 349 new_hdr->next = insert_point->next; 350 new_hdr->prev = insert_point; 351 insert_point->next->prev = new_hdr; 352 insert_point->next = new_hdr; 353 return (new_hdr); 354 } 355 356 /* Delete an obj from the list. This could be a flow item or the flow itself */ 357 static void 358 flowacct_del_obj(list_head_t *tophdr, list_hdr_t *hdr, uint_t mode) 359 { 360 size_t length; 361 uint_t type; 362 363 if ((tophdr == NULL) || (hdr == NULL)) { 364 return; 365 } 366 367 type = ((flow_t *)hdr->objp)->type; 368 369 tophdr->nbr_items--; 370 371 if (hdr->next != NULL) { 372 hdr->next->prev = hdr->prev; 373 } 374 if (hdr->prev != NULL) { 375 hdr->prev->next = hdr->next; 376 } 377 if (tophdr->head == hdr) { 378 tophdr->head = hdr->next; 379 } 380 if (tophdr->tail == hdr) { 381 tophdr->tail = hdr->prev; 382 } 383 384 if (mode == FLOWACCT_DEL_OBJ) { 385 switch (type) { 386 case FLOWACCT_FLOW: 387 length = FLOWACCT_FLOW_SZ; 388 break; 389 case FLOWACCT_ITEM: 390 length = FLOWACCT_ITEM_SZ; 391 break; 392 } 393 kmem_free(hdr->objp, length); 394 hdr->objp = NULL; 395 } 396 397 kmem_free((void *)hdr, FLOWACCT_HDR_SZ); 398 } 399 400 /* 401 * Checks if the given item (identified by dsfield, project id and uid) 402 * is already present for the flow. 403 */ 404 static flow_item_t * 405 flowacct_item_present(flow_t *flow, uint8_t dsfield, pid_t proj_id, uint_t uid) 406 { 407 list_hdr_t *itemhdr; 408 flow_item_t *item; 409 410 itemhdr = flow->items.head; 411 412 while (itemhdr != NULL) { 413 item = (flow_item_t *)itemhdr->objp; 414 415 if ((item->dsfield != dsfield) || (item->projid != proj_id) || 416 (item->uid != uid)) { 417 itemhdr = itemhdr->next; 418 continue; 419 } 420 return (item); 421 } 422 423 return ((flow_item_t *)NULL); 424 } 425 426 /* 427 * Add the flow to the table, if not already present. If the flow is 428 * present in the table, add the item. Also, update the flow stats. 429 * Additionally, re-adjust the timout list as well. 430 */ 431 static int 432 flowacct_update_flows_tbl(header_t *header, flowacct_data_t *flowacct_data) 433 { 434 int index; 435 list_head_t *fhead; 436 list_head_t *thead; 437 list_head_t *ihead; 438 boolean_t added_flow = B_FALSE; 439 timespec_t now; 440 flow_item_t *item; 441 flow_t *flow; 442 443 index = FLOWACCT_FLOW_HASH(header); 444 fhead = &flowacct_data->flows_tbl[index]; 445 446 /* The timeout list */ 447 thead = &flowacct_data->flows_tbl[FLOW_TBL_COUNT]; 448 449 mutex_enter(&fhead->lock); 450 flow = flowacct_flow_present(header, index, flowacct_data); 451 if (flow == NULL) { 452 flow = (flow_t *)kmem_zalloc(FLOWACCT_FLOW_SZ, KM_NOSLEEP); 453 if (flow == NULL) { 454 mutex_exit(&fhead->lock); 455 flowacct0dbg(("flowacct_update_flows_tbl: mem alloc "\ 456 "error")); 457 return (-1); 458 } 459 flow->hdr = flowacct_add_obj(fhead, fhead->tail, (void *)flow); 460 if (flow->hdr == NULL) { 461 mutex_exit(&fhead->lock); 462 kmem_free(flow, FLOWACCT_FLOW_SZ); 463 flowacct0dbg(("flowacct_update_flows_tbl: mem alloc "\ 464 "error")); 465 return (-1); 466 } 467 468 flow->type = FLOWACCT_FLOW; 469 flow->isv4 = header->isv4; 470 bcopy(header->saddr.s6_addr32, flow->saddr.s6_addr32, 471 sizeof (header->saddr.s6_addr32)); 472 bcopy(header->daddr.s6_addr32, flow->daddr.s6_addr32, 473 sizeof (header->daddr.s6_addr32)); 474 flow->proto = header->proto; 475 flow->sport = header->sport; 476 flow->dport = header->dport; 477 flow->back_ptr = fhead; 478 added_flow = B_TRUE; 479 } else { 480 /* 481 * We need to make sure that this 'flow' is not deleted 482 * either by a scheduled timeout or an explict call 483 * to flowacct_timer() below. 484 */ 485 flow->inuse = B_TRUE; 486 } 487 488 ihead = &flow->items; 489 item = flowacct_item_present(flow, header->dsfield, header->projid, 490 header->uid); 491 if (item == NULL) { 492 boolean_t just_once = B_TRUE; 493 /* 494 * For all practical purposes, we limit the no. of entries in 495 * the flow table - i.e. the max_limt that a user specifies is 496 * the maximum no. of flow items in the table. 497 */ 498 try_again: 499 atomic_add_32(&flowacct_data->nflows, 1); 500 if (flowacct_data->nflows > flowacct_data->max_limit) { 501 atomic_add_32(&flowacct_data->nflows, -1); 502 503 /* Try timing out once */ 504 if (just_once) { 505 /* 506 * Need to release the lock, as this entry 507 * could contain a flow that can be timed 508 * out. 509 */ 510 mutex_exit(&fhead->lock); 511 flowacct_timer(FLOWACCT_JUST_ONE, 512 flowacct_data); 513 mutex_enter(&fhead->lock); 514 /* Lets check again */ 515 just_once = B_FALSE; 516 goto try_again; 517 } else { 518 flow->inuse = B_FALSE; 519 /* Need to remove the flow, if one was added */ 520 if (added_flow) { 521 flowacct_del_obj(fhead, flow->hdr, 522 FLOWACCT_DEL_OBJ); 523 } 524 mutex_exit(&fhead->lock); 525 flowacct1dbg(("flowacct_update_flows_tbl: "\ 526 "maximum active flows exceeded\n")); 527 return (-1); 528 } 529 } 530 item = (flow_item_t *)kmem_zalloc(FLOWACCT_ITEM_SZ, KM_NOSLEEP); 531 if (item == NULL) { 532 flow->inuse = B_FALSE; 533 /* Need to remove the flow, if one was added */ 534 if (added_flow) { 535 flowacct_del_obj(fhead, flow->hdr, 536 FLOWACCT_DEL_OBJ); 537 } 538 mutex_exit(&fhead->lock); 539 atomic_add_32(&flowacct_data->nflows, -1); 540 flowacct0dbg(("flowacct_update_flows_tbl: mem alloc "\ 541 "error")); 542 return (-1); 543 } 544 item->hdr = flowacct_add_obj(ihead, ihead->tail, (void *)item); 545 if (item->hdr == NULL) { 546 flow->inuse = B_FALSE; 547 /* Need to remove the flow, if one was added */ 548 if (added_flow) { 549 flowacct_del_obj(fhead, flow->hdr, 550 FLOWACCT_DEL_OBJ); 551 } 552 mutex_exit(&fhead->lock); 553 atomic_add_32(&flowacct_data->nflows, -1); 554 kmem_free(item, FLOWACCT_ITEM_SZ); 555 flowacct0dbg(("flowacct_update_flows_tbl: mem alloc "\ 556 "error\n")); 557 return (-1); 558 } 559 /* If a flow was added, add it too */ 560 if (added_flow) { 561 atomic_add_64(&flowacct_data->usedmem, 562 FLOWACCT_FLOW_RECORD_SZ); 563 } 564 atomic_add_64(&flowacct_data->usedmem, FLOWACCT_ITEM_RECORD_SZ); 565 566 item->type = FLOWACCT_ITEM; 567 item->dsfield = header->dsfield; 568 item->projid = header->projid; 569 item->uid = header->uid; 570 item->npackets = 1; 571 item->nbytes = header->pktlen; 572 item->creation_time = item->hdr->last_seen; 573 } else { 574 item->npackets++; 575 item->nbytes += header->pktlen; 576 } 577 gethrestime(&now); 578 flow->hdr->last_seen = item->hdr->last_seen = now; 579 mutex_exit(&fhead->lock); 580 581 /* 582 * Re-adjust the timeout list. The timer takes the thead lock 583 * follwed by fhead lock(s), so we release fhead, take thead 584 * and re-take fhead. 585 */ 586 mutex_enter(&thead->lock); 587 mutex_enter(&fhead->lock); 588 /* If the flow was added, append it to the tail of the timeout list */ 589 if (added_flow) { 590 if (thead->head == NULL) { 591 thead->head = flow->hdr; 592 thead->tail = flow->hdr; 593 } else { 594 thead->tail->timeout_next = flow->hdr; 595 flow->hdr->timeout_prev = thead->tail; 596 thead->tail = flow->hdr; 597 } 598 /* 599 * Else, move this flow to the tail of the timeout list, if it is not 600 * already. 601 * flow->hdr in the timeout list :- 602 * timeout_next = NULL, timeout_prev != NULL, at the tail end. 603 * timeout_next != NULL, timeout_prev = NULL, at the head. 604 * timeout_next != NULL, timeout_prev != NULL, in the middle. 605 * timeout_next = NULL, timeout_prev = NULL, not in the timeout list, 606 * ignore such flow. 607 */ 608 } else if ((flow->hdr->timeout_next != NULL) || 609 (flow->hdr->timeout_prev != NULL)) { 610 if (flow->hdr != thead->tail) { 611 if (flow->hdr == thead->head) { 612 thead->head->timeout_next->timeout_prev = NULL; 613 thead->head = thead->head->timeout_next; 614 flow->hdr->timeout_next = NULL; 615 thead->tail->timeout_next = flow->hdr; 616 flow->hdr->timeout_prev = thead->tail; 617 thead->tail = flow->hdr; 618 } else { 619 flow->hdr->timeout_prev->timeout_next = 620 flow->hdr->timeout_next; 621 flow->hdr->timeout_next->timeout_prev = 622 flow->hdr->timeout_prev; 623 flow->hdr->timeout_next = NULL; 624 thead->tail->timeout_next = flow->hdr; 625 flow->hdr->timeout_prev = thead->tail; 626 thead->tail = flow->hdr; 627 } 628 } 629 } 630 /* 631 * Unset this variable, now it is fine even if this 632 * flow gets deleted (i.e. after timing out its 633 * flow items) since we are done using it. 634 */ 635 flow->inuse = B_FALSE; 636 mutex_exit(&fhead->lock); 637 mutex_exit(&thead->lock); 638 atomic_add_64(&flowacct_data->tbytes, header->pktlen); 639 return (0); 640 } 641 642 /* Timer for timing out flows/items from the flow table */ 643 void 644 flowacct_timeout_flows(void *args) 645 { 646 flowacct_data_t *flowacct_data = (flowacct_data_t *)args; 647 flowacct_timer(FLOWACCT_FLOW_TIMER, flowacct_data); 648 flowacct_data->flow_tid = timeout(flowacct_timeout_flows, flowacct_data, 649 drv_usectohz(flowacct_data->timer)); 650 } 651 652 653 /* Delete the item from the flow in the flow table */ 654 static void 655 flowacct_timeout_item(flow_t **flow, list_hdr_t **item_hdr) 656 { 657 list_hdr_t *next_it_hdr; 658 659 next_it_hdr = (*item_hdr)->next; 660 flowacct_del_obj(&(*flow)->items, *item_hdr, FLOWACCT_DEL_OBJ); 661 *item_hdr = next_it_hdr; 662 } 663 664 /* Create a flow record for this timed out item */ 665 static flow_records_t * 666 flowacct_create_record(flow_t *flow, list_hdr_t *ithdr) 667 { 668 int count; 669 flow_item_t *item = (flow_item_t *)ithdr->objp; 670 flow_records_t *tmp_frec = NULL; 671 672 /* Record to be written into the accounting file */ 673 tmp_frec = kmem_zalloc(sizeof (flow_records_t), KM_NOSLEEP); 674 if (tmp_frec == NULL) { 675 flowacct0dbg(("flowacct_create_record: mem alloc error.\n")); 676 return (NULL); 677 } 678 tmp_frec->fl_use = kmem_zalloc(sizeof (flow_usage_t), KM_NOSLEEP); 679 if (tmp_frec->fl_use == NULL) { 680 flowacct0dbg(("flowacct_create_record: mem alloc error\n")); 681 kmem_free(tmp_frec, sizeof (flow_records_t)); 682 return (NULL); 683 } 684 685 /* Copy the IP address */ 686 for (count = 0; count < 4; count++) { 687 tmp_frec->fl_use->fu_saddr[count] = 688 htonl(flow->saddr.s6_addr32[count]); 689 tmp_frec->fl_use->fu_daddr[count] = 690 htonl(flow->daddr.s6_addr32[count]); 691 } 692 693 /* 694 * Ports, protocol, version, dsfield, project id, uid, nbytes, npackets 695 * creation time and last seen. 696 */ 697 tmp_frec->fl_use->fu_sport = htons(flow->sport); 698 tmp_frec->fl_use->fu_dport = htons(flow->dport); 699 tmp_frec->fl_use->fu_protocol = flow->proto; 700 tmp_frec->fl_use->fu_isv4 = flow->isv4; 701 tmp_frec->fl_use->fu_dsfield = item->dsfield; 702 tmp_frec->fl_use->fu_projid = item->projid; 703 tmp_frec->fl_use->fu_userid = item->uid; 704 tmp_frec->fl_use->fu_nbytes = item->nbytes; 705 tmp_frec->fl_use->fu_npackets = item->npackets; 706 tmp_frec->fl_use->fu_lseen = 707 (uint64_t)(ulong_t)ithdr->last_seen.tv_sec; 708 tmp_frec->fl_use->fu_ctime = 709 (uint64_t)(ulong_t)item->creation_time.tv_sec; 710 711 return (tmp_frec); 712 } 713 714 /* 715 * Scan thru the timeout list and write the records to the accounting file, if 716 * possible. Basically step thru the timeout list maintained in the last 717 * hash bucket, FLOW_COUNT_TBL + 1, and timeout flows. This could be called 718 * from the timer, FLOWACCT_TIMER - delete only timed out flows or when this 719 * instance is deleted, FLOWACCT_PURGE_FLOW - delete all the flows from the 720 * table or as FLOWACCT_JUST_ONE - delete the first timed out flow. Since the 721 * flows are cronologically arranged in the timeout list, when called as 722 * FLOWACCT_TIMER and FLOWACCT_JUST_ONE, we can stop when we come across 723 * the first flow that has not timed out (which means none of the following 724 * flows would have timed out). 725 */ 726 void 727 flowacct_timer(int type, flowacct_data_t *flowacct_data) 728 { 729 hrtime_t diff; 730 timespec_t now; 731 list_head_t *head, *thead; 732 flow_t *flow; 733 flow_item_t *item; 734 list_hdr_t *fl_hdr, *next_fl_hdr; 735 list_hdr_t *ithdr = (list_hdr_t *)NULL; 736 flow_records_t *frec = NULL, *tmp_frec, *tail; 737 uint64_t flow_size; 738 uint64_t item_size; 739 740 ASSERT(flowacct_data != NULL); 741 742 /* 2s-complement for subtraction */ 743 flow_size = ~FLOWACCT_FLOW_RECORD_SZ + 1; 744 item_size = ~FLOWACCT_ITEM_RECORD_SZ + 1; 745 746 /* Get the current time */ 747 gethrestime(&now); 748 749 /* 750 * For each flow in the table, scan thru all the items and delete 751 * those that have exceeded the timeout. If all the items in a 752 * flow have timed out, delete the flow entry as well. Finally, 753 * write all the delted items to the accounting file. 754 */ 755 thead = &flowacct_data->flows_tbl[FLOW_TBL_COUNT]; 756 757 mutex_enter(&thead->lock); 758 fl_hdr = thead->head; 759 while (fl_hdr != NULL) { 760 uint32_t items_deleted = 0; 761 762 next_fl_hdr = fl_hdr->timeout_next; 763 flow = (flow_t *)fl_hdr->objp; 764 head = flow->back_ptr; 765 mutex_enter(&head->lock); 766 767 /*LINTED*/ 768 FLOWACCT_DELTA(now, fl_hdr->last_seen, diff); 769 770 /* 771 * If type is FLOW_TIMER, then check if the item has timed out. 772 * If type is FLOW_PURGE delete the entry anyways. 773 */ 774 if ((type != FLOWACCT_PURGE_FLOW) && 775 (diff < flowacct_data->timeout)) { 776 mutex_exit(&head->lock); 777 mutex_exit(&thead->lock); 778 goto write_records; 779 } 780 781 ithdr = flow->items.head; 782 while (ithdr != NULL) { 783 item = (flow_item_t *)ithdr->objp; 784 /* 785 * Fill in the flow record to be 786 * written to the accounting file. 787 */ 788 tmp_frec = flowacct_create_record(flow, ithdr); 789 /* 790 * If we don't have memory for records, 791 * we will come back in case this is 792 * called as FLOW_TIMER, else we will 793 * go ahead and delete the item from 794 * the table (when asked to PURGE the 795 * table), so there could be some 796 * entries not written to the file 797 * when this action instance is 798 * deleted. 799 */ 800 if (tmp_frec != NULL) { 801 tmp_frec->fl_use->fu_aname = 802 flowacct_data->act_name; 803 if (frec == NULL) { 804 frec = tmp_frec; 805 tail = frec; 806 } else { 807 tail->next = tmp_frec; 808 tail = tmp_frec; 809 } 810 } else if (type != FLOWACCT_PURGE_FLOW) { 811 mutex_exit(&head->lock); 812 mutex_exit(&thead->lock); 813 atomic_add_32(&flowacct_data->nflows, 814 (~items_deleted + 1)); 815 goto write_records; 816 } 817 818 /* Update stats */ 819 atomic_add_64(&flowacct_data->tbytes, (~item->nbytes + 820 1)); 821 822 /* Delete the item */ 823 flowacct_timeout_item(&flow, &ithdr); 824 items_deleted++; 825 atomic_add_64(&flowacct_data->usedmem, item_size); 826 } 827 ASSERT(flow->items.nbr_items == 0); 828 atomic_add_32(&flowacct_data->nflows, (~items_deleted + 1)); 829 830 /* 831 * Don't delete this flow if we are making place for 832 * a new item for this flow. 833 */ 834 if (!flow->inuse) { 835 if (fl_hdr->timeout_prev != NULL) { 836 fl_hdr->timeout_prev->timeout_next = 837 fl_hdr->timeout_next; 838 } else { 839 thead->head = fl_hdr->timeout_next; 840 } 841 if (fl_hdr->timeout_next != NULL) { 842 fl_hdr->timeout_next->timeout_prev = 843 fl_hdr->timeout_prev; 844 } else { 845 thead->tail = fl_hdr->timeout_prev; 846 } 847 fl_hdr->timeout_prev = NULL; 848 fl_hdr->timeout_next = NULL; 849 flowacct_del_obj(head, fl_hdr, FLOWACCT_DEL_OBJ); 850 atomic_add_64(&flowacct_data->usedmem, flow_size); 851 } 852 mutex_exit(&head->lock); 853 if (type == FLOWACCT_JUST_ONE) { 854 mutex_exit(&thead->lock); 855 goto write_records; 856 } 857 fl_hdr = next_fl_hdr; 858 } 859 mutex_exit(&thead->lock); 860 write_records: 861 /* Write all the timed out flows to the accounting file */ 862 while (frec != NULL) { 863 tmp_frec = frec->next; 864 exacct_commit_flow(frec->fl_use); 865 kmem_free(frec->fl_use, sizeof (flow_usage_t)); 866 kmem_free(frec, sizeof (flow_records_t)); 867 frec = tmp_frec; 868 } 869 } 870 871 /* 872 * Get the IP header contents from the packet, update the flow table with 873 * this item and return. 874 */ 875 int 876 flowacct_process(mblk_t **mpp, flowacct_data_t *flowacct_data) 877 { 878 header_t *header; 879 mblk_t *mp = *mpp; 880 881 ASSERT(mp != NULL); 882 883 /* If we don't find an M_DATA, return error */ 884 if (mp->b_datap->db_type != M_DATA) { 885 if ((mp->b_cont != NULL) && 886 (mp->b_cont->b_datap->db_type == M_DATA)) { 887 mp = mp->b_cont; 888 } else { 889 flowacct0dbg(("flowacct_process: no data\n")); 890 atomic_add_64(&flowacct_data->epackets, 1); 891 return (EINVAL); 892 } 893 } 894 895 header = kmem_zalloc(FLOWACCT_HEADER_SZ, KM_NOSLEEP); 896 if (header == NULL) { 897 flowacct0dbg(("flowacct_process: error allocing mem")); 898 atomic_add_64(&flowacct_data->epackets, 1); 899 return (ENOMEM); 900 } 901 902 /* Get all the required information into header. */ 903 if (flowacct_extract_header(mp, header) != 0) { 904 kmem_free(header, FLOWACCT_HEADER_SZ); 905 atomic_add_64(&flowacct_data->epackets, 1); 906 return (EINVAL); 907 } 908 909 /* Updated the flow table with this entry */ 910 if (flowacct_update_flows_tbl(header, flowacct_data) != 0) { 911 kmem_free(header, FLOWACCT_HEADER_SZ); 912 atomic_add_64(&flowacct_data->epackets, 1); 913 return (ENOMEM); 914 } 915 916 /* Update global stats */ 917 atomic_add_64(&flowacct_data->npackets, 1); 918 atomic_add_64(&flowacct_data->nbytes, header->pktlen); 919 920 kmem_free(header, FLOWACCT_HEADER_SZ); 921 if (flowacct_data->flow_tid == 0) { 922 flowacct_data->flow_tid = timeout(flowacct_timeout_flows, 923 flowacct_data, drv_usectohz(flowacct_data->timer)); 924 } 925 return (0); 926 } 927