1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/kmem.h> 31 #include <sys/conf.h> 32 #include <sys/atomic.h> 33 #include <netinet/in.h> 34 #include <netinet/in_systm.h> 35 #include <netinet/ip6.h> 36 #include <sys/socket.h> 37 #include <sys/acct.h> 38 #include <sys/exacct.h> 39 #include <inet/common.h> 40 #include <inet/ip.h> 41 #include <inet/ip6.h> 42 #include <sys/ddi.h> 43 #include <sys/strsun.h> 44 #include <ipp/flowacct/flowacct_impl.h> 45 46 /* 47 * flowacct - IPQoS accounting module. The module maintains an array 48 * of 256 hash buckets. When the action routine is invoked for a flow, 49 * if the flow (identified by the 5-tuple: saddr, daddr, sport, dport, proto) 50 * is already present in the flow table (indexed by the hash function FLOW_HASH) 51 * then a check is made to see if an item for this flow with the same 52 * dsfield, projid & user id is present. If it is, then the number of packets 53 * and the bytes are incremented for that item. If the item does 54 * not exist a new item is added for the flow. If the flow is not present 55 * an entry is made for the flow. 56 * 57 * A timer runs thru the table and writes all the flow items that have 58 * timed out to the accounting file (via exacct PSARC/1999/119), if present 59 * Configuration commands to change the timing interval is provided. The 60 * flow timeout value can also be configured. While the timeout is in nsec, 61 * the flow timer interval is in usec. 62 * Information for an active flow can be obtained by using kstats. 63 */ 64 65 /* Used in computing the hash index */ 66 #define FLOWACCT_ADDR_HASH(addr) \ 67 ((addr).s6_addr8[8] ^ (addr).s6_addr8[9] ^ \ 68 (addr).s6_addr8[10] ^ (addr).s6_addr8[13] ^ \ 69 (addr).s6_addr8[14] ^ (addr).s6_addr8[15]) 70 71 #define FLOWACCT_FLOW_HASH(f) \ 72 (((FLOWACCT_ADDR_HASH(f->saddr)) + \ 73 (FLOWACCT_ADDR_HASH(f->daddr)) + \ 74 (f->proto) + (f->sport) + (f->dport)) \ 75 % FLOW_TBL_COUNT) 76 77 /* 78 * Compute difference between a and b in nsec and store in delta. 79 * delta should be a hrtime_t. Taken from ip_mroute.c. 80 */ 81 #define FLOWACCT_DELTA(a, b, delta) { \ 82 int xxs; \ 83 \ 84 delta = (a).tv_nsec - (b).tv_nsec; \ 85 if ((xxs = (a).tv_sec - (b).tv_sec) != 0) { \ 86 switch (xxs) { \ 87 case 2: \ 88 delta += NANOSEC; \ 89 /*FALLTHRU*/ \ 90 case 1: \ 91 delta += NANOSEC; \ 92 break; \ 93 default: \ 94 delta += ((hrtime_t)NANOSEC * xxs); \ 95 } \ 96 } \ 97 } 98 99 /* Debug level */ 100 int flowacct_debug = 0; 101 102 /* Collect timed out flows to be written to the accounting file */ 103 typedef struct flow_records_s { 104 flow_usage_t *fl_use; 105 struct flow_records_s *next; 106 }flow_records_t; 107 108 /* Get port information from the packet. Ignore fragments. */ 109 static void 110 flowacct_port_info(header_t *header, void *iph, int af, mblk_t *mp) 111 { 112 uint16_t *up; 113 114 if (af == AF_INET) { 115 ipha_t *ipha = (ipha_t *)iph; 116 uint32_t u2, u1; 117 uint_t iplen; 118 119 u2 = ntohs(ipha->ipha_fragment_offset_and_flags); 120 u1 = u2 & (IPH_MF | IPH_OFFSET); 121 if (u1 != 0) { 122 return; 123 } 124 iplen = (ipha->ipha_version_and_hdr_length & 0xF) << 2; 125 up = (uint16_t *)(mp->b_rptr + iplen); 126 header->sport = (uint16_t)*up++; 127 header->dport = (uint16_t)*up; 128 } else { 129 ip6_t *ip6h = (ip6_t *)iph; 130 uint_t length = IPV6_HDR_LEN; 131 uint_t ehdrlen; 132 uint8_t *nexthdrp, *whereptr, *endptr; 133 ip6_dest_t *desthdr; 134 ip6_rthdr_t *rthdr; 135 ip6_hbh_t *hbhhdr; 136 137 whereptr = ((uint8_t *)&ip6h[1]); 138 endptr = mp->b_wptr; 139 nexthdrp = &ip6h->ip6_nxt; 140 while (whereptr < endptr) { 141 switch (*nexthdrp) { 142 case IPPROTO_HOPOPTS: 143 hbhhdr = (ip6_hbh_t *)whereptr; 144 ehdrlen = 8 * (hbhhdr->ip6h_len + 1); 145 if ((uchar_t *)hbhhdr + ehdrlen > endptr) 146 return; 147 nexthdrp = &hbhhdr->ip6h_nxt; 148 break; 149 case IPPROTO_DSTOPTS: 150 desthdr = (ip6_dest_t *)whereptr; 151 ehdrlen = 8 * (desthdr->ip6d_len + 1); 152 if ((uchar_t *)desthdr + ehdrlen > endptr) 153 return; 154 nexthdrp = &desthdr->ip6d_nxt; 155 break; 156 case IPPROTO_ROUTING: 157 rthdr = (ip6_rthdr_t *)whereptr; 158 ehdrlen = 8 * (rthdr->ip6r_len + 1); 159 if ((uchar_t *)rthdr + ehdrlen > endptr) 160 return; 161 nexthdrp = &rthdr->ip6r_nxt; 162 break; 163 case IPPROTO_FRAGMENT: 164 return; 165 case IPPROTO_TCP: 166 case IPPROTO_UDP: 167 case IPPROTO_SCTP: 168 /* 169 * Verify we have at least ICMP_MIN_TP_HDR_LEN 170 * bytes of the ULP's header to get the port 171 * info. 172 */ 173 if (((uchar_t *)ip6h + length + 174 ICMP_MIN_TP_HDR_LEN) > endptr) { 175 return; 176 } 177 /* Get the protocol & ports */ 178 header->proto = *nexthdrp; 179 up = (uint16_t *)((uchar_t *)ip6h + length); 180 header->sport = (uint16_t)*up++; 181 header->dport = (uint16_t)*up; 182 return; 183 case IPPROTO_ICMPV6: 184 case IPPROTO_ENCAP: 185 case IPPROTO_IPV6: 186 case IPPROTO_ESP: 187 case IPPROTO_AH: 188 header->proto = *nexthdrp; 189 return; 190 case IPPROTO_NONE: 191 default: 192 return; 193 } 194 length += ehdrlen; 195 whereptr += ehdrlen; 196 } 197 } 198 } 199 200 /* 201 * flowacct_find_ids(mp, header) 202 * 203 * attempt to discern the uid and projid of the originator of a packet by 204 * looking at the dblks making up the packet - yeuch! 205 * 206 * We do it by skipping any fragments with a credp of NULL (originated in 207 * kernel), taking the first value that isn't NULL to be the cred_t for the 208 * whole packet. 209 */ 210 static void 211 flowacct_find_ids(mblk_t *mp, header_t *header) 212 { 213 cred_t *cr; 214 215 while (DB_CRED(mp) == NULL && mp->b_cont != NULL) 216 mp = mp->b_cont; 217 218 if ((cr = DB_CRED(mp)) != NULL) { 219 header->uid = crgetuid(cr); 220 header->projid = crgetprojid(cr); 221 } else { 222 header->uid = (uid_t)-1; 223 header->projid = -1; 224 } 225 } 226 227 /* 228 * Extract header information in a header_t structure so that we don't have 229 * have to parse the packet everytime. 230 */ 231 static int 232 flowacct_extract_header(mblk_t *mp, header_t *header) 233 { 234 ipha_t *ipha; 235 ip6_t *ip6h; 236 #define rptr ((uchar_t *)ipha) 237 238 /* 0 means no port extracted. */ 239 header->sport = 0; 240 header->dport = 0; 241 flowacct_find_ids(mp, header); 242 243 V6_SET_ZERO(header->saddr); 244 V6_SET_ZERO(header->daddr); 245 246 ipha = (ipha_t *)mp->b_rptr; 247 header->isv4 = IPH_HDR_VERSION(ipha) == IPV4_VERSION; 248 if (header->isv4) { 249 ipha = (ipha_t *)mp->b_rptr; 250 V4_PART_OF_V6(header->saddr) = (int32_t)ipha->ipha_src; 251 V4_PART_OF_V6(header->daddr) = (int32_t)ipha->ipha_dst; 252 header->dsfield = ipha->ipha_type_of_service; 253 header->proto = ipha->ipha_protocol; 254 header->pktlen = ntohs(ipha->ipha_length); 255 if ((header->proto == IPPROTO_TCP) || 256 (header->proto == IPPROTO_UDP) || 257 (header->proto == IPPROTO_SCTP)) { 258 flowacct_port_info(header, ipha, AF_INET, mp); 259 } 260 } else { 261 /* 262 * Need to pullup everything. 263 */ 264 if (mp->b_cont != NULL) { 265 if (!pullupmsg(mp, -1)) { 266 flowacct0dbg(("flowacct_extract_header: "\ 267 "pullup error")); 268 return (-1); 269 } 270 } 271 ip6h = (ip6_t *)mp->b_rptr; 272 bcopy(ip6h->ip6_src.s6_addr32, header->saddr.s6_addr32, 273 sizeof (ip6h->ip6_src.s6_addr32)); 274 bcopy(ip6h->ip6_dst.s6_addr32, header->daddr.s6_addr32, 275 sizeof (ip6h->ip6_dst.s6_addr32)); 276 header->dsfield = __IPV6_TCLASS_FROM_FLOW(ip6h->ip6_vcf); 277 header->proto = ip6h->ip6_nxt; 278 header->pktlen = ntohs(ip6h->ip6_plen) + 279 ip_hdr_length_v6(mp, ip6h); 280 flowacct_port_info(header, ip6h, AF_INET6, mp); 281 282 } 283 #undef rptr 284 return (0); 285 } 286 287 /* Check if the flow (identified by the 5-tuple) exists in the hash table */ 288 static flow_t * 289 flowacct_flow_present(header_t *header, int index, 290 flowacct_data_t *flowacct_data) 291 { 292 list_hdr_t *hdr = flowacct_data->flows_tbl[index].head; 293 flow_t *flow; 294 295 while (hdr != NULL) { 296 flow = (flow_t *)hdr->objp; 297 if ((flow != NULL) && 298 (IN6_ARE_ADDR_EQUAL(&flow->saddr, &header->saddr)) && 299 (IN6_ARE_ADDR_EQUAL(&flow->daddr, &header->daddr)) && 300 (flow->proto == header->proto) && 301 (flow->sport == header->sport) && 302 (flow->dport == header->dport)) { 303 return (flow); 304 } 305 hdr = hdr->next; 306 } 307 return ((flow_t *)NULL); 308 } 309 310 /* 311 * Add an object to the list at insert_point. This could be a flow item or 312 * a flow itself. 313 */ 314 static list_hdr_t * 315 flowacct_add_obj(list_head_t *tophdr, list_hdr_t *insert_point, void *obj) 316 { 317 list_hdr_t *new_hdr; 318 319 if (tophdr == NULL) { 320 return ((list_hdr_t *)NULL); 321 } 322 323 new_hdr = (list_hdr_t *)kmem_zalloc(FLOWACCT_HDR_SZ, KM_NOSLEEP); 324 if (new_hdr == NULL) { 325 flowacct0dbg(("flowacct_add_obj: error allocating mem")); 326 return ((list_hdr_t *)NULL); 327 } 328 gethrestime(&new_hdr->last_seen); 329 new_hdr->objp = obj; 330 tophdr->nbr_items++; 331 332 if (insert_point == NULL) { 333 if (tophdr->head == NULL) { 334 tophdr->head = new_hdr; 335 tophdr->tail = new_hdr; 336 return (new_hdr); 337 } 338 339 new_hdr->next = tophdr->head; 340 tophdr->head->prev = new_hdr; 341 tophdr->head = new_hdr; 342 return (new_hdr); 343 } 344 345 if (insert_point == tophdr->tail) { 346 tophdr->tail->next = new_hdr; 347 new_hdr->prev = tophdr->tail; 348 tophdr->tail = new_hdr; 349 return (new_hdr); 350 } 351 352 new_hdr->next = insert_point->next; 353 new_hdr->prev = insert_point; 354 insert_point->next->prev = new_hdr; 355 insert_point->next = new_hdr; 356 return (new_hdr); 357 } 358 359 /* Delete an obj from the list. This could be a flow item or the flow itself */ 360 static void 361 flowacct_del_obj(list_head_t *tophdr, list_hdr_t *hdr, uint_t mode) 362 { 363 size_t length; 364 uint_t type; 365 366 if ((tophdr == NULL) || (hdr == NULL)) { 367 return; 368 } 369 370 type = ((flow_t *)hdr->objp)->type; 371 372 tophdr->nbr_items--; 373 374 if (hdr->next != NULL) { 375 hdr->next->prev = hdr->prev; 376 } 377 if (hdr->prev != NULL) { 378 hdr->prev->next = hdr->next; 379 } 380 if (tophdr->head == hdr) { 381 tophdr->head = hdr->next; 382 } 383 if (tophdr->tail == hdr) { 384 tophdr->tail = hdr->prev; 385 } 386 387 if (mode == FLOWACCT_DEL_OBJ) { 388 switch (type) { 389 case FLOWACCT_FLOW: 390 length = FLOWACCT_FLOW_SZ; 391 break; 392 case FLOWACCT_ITEM: 393 length = FLOWACCT_ITEM_SZ; 394 break; 395 } 396 kmem_free(hdr->objp, length); 397 hdr->objp = NULL; 398 } 399 400 kmem_free((void *)hdr, FLOWACCT_HDR_SZ); 401 } 402 403 /* 404 * Checks if the given item (identified by dsfield, project id and uid) 405 * is already present for the flow. 406 */ 407 static flow_item_t * 408 flowacct_item_present(flow_t *flow, uint8_t dsfield, pid_t proj_id, uint_t uid) 409 { 410 list_hdr_t *itemhdr; 411 flow_item_t *item; 412 413 itemhdr = flow->items.head; 414 415 while (itemhdr != NULL) { 416 item = (flow_item_t *)itemhdr->objp; 417 418 if ((item->dsfield != dsfield) || (item->projid != proj_id) || 419 (item->uid != uid)) { 420 itemhdr = itemhdr->next; 421 continue; 422 } 423 return (item); 424 } 425 426 return ((flow_item_t *)NULL); 427 } 428 429 /* 430 * Add the flow to the table, if not already present. If the flow is 431 * present in the table, add the item. Also, update the flow stats. 432 * Additionally, re-adjust the timout list as well. 433 */ 434 static int 435 flowacct_update_flows_tbl(header_t *header, flowacct_data_t *flowacct_data) 436 { 437 int index; 438 list_head_t *fhead; 439 list_head_t *thead; 440 list_head_t *ihead; 441 boolean_t added_flow = B_FALSE; 442 timespec_t now; 443 flow_item_t *item; 444 flow_t *flow; 445 446 index = FLOWACCT_FLOW_HASH(header); 447 fhead = &flowacct_data->flows_tbl[index]; 448 449 /* The timeout list */ 450 thead = &flowacct_data->flows_tbl[FLOW_TBL_COUNT]; 451 452 mutex_enter(&fhead->lock); 453 flow = flowacct_flow_present(header, index, flowacct_data); 454 if (flow == NULL) { 455 flow = (flow_t *)kmem_zalloc(FLOWACCT_FLOW_SZ, KM_NOSLEEP); 456 if (flow == NULL) { 457 flowacct0dbg(("flowacct_update_flows_tbl: mem alloc "\ 458 "error")); 459 mutex_exit(&fhead->lock); 460 return (-1); 461 } 462 flow->hdr = flowacct_add_obj(fhead, fhead->tail, (void *)flow); 463 if (flow->hdr == NULL) { 464 flowacct0dbg(("flowacct_update_flows_tbl: mem alloc "\ 465 "error")); 466 kmem_free(flow, FLOWACCT_FLOW_SZ); 467 mutex_exit(&fhead->lock); 468 return (-1); 469 } 470 471 flow->type = FLOWACCT_FLOW; 472 flow->isv4 = header->isv4; 473 bcopy(header->saddr.s6_addr32, flow->saddr.s6_addr32, 474 sizeof (header->saddr.s6_addr32)); 475 bcopy(header->daddr.s6_addr32, flow->daddr.s6_addr32, 476 sizeof (header->daddr.s6_addr32)); 477 flow->proto = header->proto; 478 flow->sport = header->sport; 479 flow->dport = header->dport; 480 flow->back_ptr = fhead; 481 added_flow = B_TRUE; 482 } else { 483 /* 484 * We need to make sure that this 'flow' is not deleted 485 * either by a scheduled timeout or an explict call 486 * to flowacct_timer() below. 487 */ 488 flow->inuse = B_TRUE; 489 } 490 491 ihead = &flow->items; 492 item = flowacct_item_present(flow, header->dsfield, header->projid, 493 header->uid); 494 if (item == NULL) { 495 boolean_t just_once = B_TRUE; 496 /* 497 * For all practical purposes, we limit the no. of entries in 498 * the flow table - i.e. the max_limt that a user specifies is 499 * the maximum no. of flow items in the table. 500 */ 501 try_again: 502 atomic_add_32(&flowacct_data->nflows, 1); 503 if (flowacct_data->nflows > flowacct_data->max_limit) { 504 atomic_add_32(&flowacct_data->nflows, -1); 505 506 /* Try timing out once */ 507 if (just_once) { 508 /* 509 * Need to release the lock, as this entry 510 * could contain a flow that can be timed 511 * out. 512 */ 513 mutex_exit(&fhead->lock); 514 flowacct_timer(FLOWACCT_JUST_ONE, 515 flowacct_data); 516 mutex_enter(&fhead->lock); 517 /* Lets check again */ 518 just_once = B_FALSE; 519 goto try_again; 520 } else { 521 mutex_exit(&fhead->lock); 522 flowacct1dbg(("flowacct_update_flows_tbl: "\ 523 "maximum active flows exceeded\n")); 524 if (added_flow) { 525 flowacct_del_obj(fhead, flow->hdr, 526 FLOWACCT_DEL_OBJ); 527 } 528 return (-1); 529 } 530 } 531 item = (flow_item_t *)kmem_zalloc(FLOWACCT_ITEM_SZ, KM_NOSLEEP); 532 if (item == NULL) { 533 flowacct0dbg(("flowacct_update_flows_tbl: mem alloc "\ 534 "error")); 535 /* Need to remove the flow, if one was added */ 536 if (added_flow) { 537 flowacct_del_obj(fhead, flow->hdr, 538 FLOWACCT_DEL_OBJ); 539 } 540 atomic_add_32(&flowacct_data->nflows, -1); 541 mutex_exit(&fhead->lock); 542 return (-1); 543 } 544 item->hdr = flowacct_add_obj(ihead, ihead->tail, (void *)item); 545 if (item->hdr == NULL) { 546 flowacct0dbg(("flowacct_update_flows_tbl: mem alloc "\ 547 "error\n")); 548 kmem_free(item, FLOWACCT_ITEM_SZ); 549 /* Need to remove the flow, if one was added */ 550 if (added_flow) { 551 flowacct_del_obj(fhead, flow->hdr, 552 FLOWACCT_DEL_OBJ); 553 } 554 atomic_add_32(&flowacct_data->nflows, -1); 555 mutex_exit(&fhead->lock); 556 return (-1); 557 } 558 /* If a flow was added, add it too */ 559 if (added_flow) { 560 atomic_add_64(&flowacct_data->usedmem, 561 FLOWACCT_FLOW_RECORD_SZ); 562 } 563 atomic_add_64(&flowacct_data->usedmem, FLOWACCT_ITEM_RECORD_SZ); 564 565 item->type = FLOWACCT_ITEM; 566 item->dsfield = header->dsfield; 567 item->projid = header->projid; 568 item->uid = header->uid; 569 item->npackets = 1; 570 item->nbytes = header->pktlen; 571 item->creation_time = item->hdr->last_seen; 572 } else { 573 item->npackets++; 574 item->nbytes += header->pktlen; 575 } 576 gethrestime(&now); 577 flow->hdr->last_seen = item->hdr->last_seen = now; 578 mutex_exit(&fhead->lock); 579 580 /* 581 * Re-adjust the timeout list. The timer takes the thead lock 582 * follwed by fhead lock(s), so we release fhead, take thead 583 * and re-take fhead. 584 */ 585 mutex_enter(&thead->lock); 586 mutex_enter(&fhead->lock); 587 /* If the flow was added, append it to the tail of the timeout list */ 588 if (added_flow) { 589 if (thead->head == NULL) { 590 thead->head = flow->hdr; 591 thead->tail = flow->hdr; 592 } else { 593 thead->tail->timeout_next = flow->hdr; 594 flow->hdr->timeout_prev = thead->tail; 595 thead->tail = flow->hdr; 596 } 597 /* 598 * Else, move this flow to the tail of the timeout list, if it is not 599 * already. 600 */ 601 } else if (flow->hdr != thead->tail) { 602 if (flow->hdr == thead->head) { 603 thead->head->timeout_next->timeout_prev = NULL; 604 thead->head = thead->head->timeout_next; 605 flow->hdr->timeout_next = NULL; 606 thead->tail->timeout_next = flow->hdr; 607 flow->hdr->timeout_prev = thead->tail; 608 thead->tail = flow->hdr; 609 } else { 610 flow->hdr->timeout_prev->timeout_next = 611 flow->hdr->timeout_next; 612 flow->hdr->timeout_next->timeout_prev = 613 flow->hdr->timeout_prev; 614 flow->hdr->timeout_next = NULL; 615 thead->tail->timeout_next = flow->hdr; 616 flow->hdr->timeout_prev = thead->tail; 617 thead->tail = flow->hdr; 618 } 619 /* 620 * Unset this variable, now it is fine even if this 621 * flow gets deleted (i.e. after timing out its 622 * flow items) since we are done using it. 623 */ 624 flow->inuse = B_FALSE; 625 } 626 mutex_exit(&fhead->lock); 627 mutex_exit(&thead->lock); 628 atomic_add_64(&flowacct_data->tbytes, header->pktlen); 629 return (0); 630 } 631 632 /* Timer for timing out flows/items from the flow table */ 633 void 634 flowacct_timeout_flows(void *args) 635 { 636 flowacct_data_t *flowacct_data = (flowacct_data_t *)args; 637 flowacct_timer(FLOWACCT_FLOW_TIMER, flowacct_data); 638 flowacct_data->flow_tid = timeout(flowacct_timeout_flows, flowacct_data, 639 drv_usectohz(flowacct_data->timer)); 640 } 641 642 643 /* Delete the item from the flow in the flow table */ 644 static void 645 flowacct_timeout_item(flow_t **flow, list_hdr_t **item_hdr) 646 { 647 list_hdr_t *next_it_hdr; 648 649 next_it_hdr = (*item_hdr)->next; 650 flowacct_del_obj(&(*flow)->items, *item_hdr, FLOWACCT_DEL_OBJ); 651 *item_hdr = next_it_hdr; 652 } 653 654 /* Create a flow record for this timed out item */ 655 static flow_records_t * 656 flowacct_create_record(flow_t *flow, list_hdr_t *ithdr) 657 { 658 int count; 659 flow_item_t *item = (flow_item_t *)ithdr->objp; 660 flow_records_t *tmp_frec = NULL; 661 662 /* Record to be written into the accounting file */ 663 tmp_frec = kmem_zalloc(sizeof (flow_records_t), KM_NOSLEEP); 664 if (tmp_frec == NULL) { 665 flowacct0dbg(("flowacct_create_record: mem alloc error.\n")); 666 return (NULL); 667 } 668 tmp_frec->fl_use = kmem_zalloc(sizeof (flow_usage_t), KM_NOSLEEP); 669 if (tmp_frec->fl_use == NULL) { 670 flowacct0dbg(("flowacct_create_record: mem alloc error\n")); 671 kmem_free(tmp_frec, sizeof (flow_records_t)); 672 return (NULL); 673 } 674 675 /* Copy the IP address */ 676 for (count = 0; count < 4; count++) { 677 tmp_frec->fl_use->fu_saddr[count] = 678 htonl(flow->saddr.s6_addr32[count]); 679 tmp_frec->fl_use->fu_daddr[count] = 680 htonl(flow->daddr.s6_addr32[count]); 681 } 682 683 /* 684 * Ports, protocol, version, dsfield, project id, uid, nbytes, npackets 685 * creation time and last seen. 686 */ 687 tmp_frec->fl_use->fu_sport = htons(flow->sport); 688 tmp_frec->fl_use->fu_dport = htons(flow->dport); 689 tmp_frec->fl_use->fu_protocol = flow->proto; 690 tmp_frec->fl_use->fu_isv4 = flow->isv4; 691 tmp_frec->fl_use->fu_dsfield = item->dsfield; 692 tmp_frec->fl_use->fu_projid = item->projid; 693 tmp_frec->fl_use->fu_userid = item->uid; 694 tmp_frec->fl_use->fu_nbytes = item->nbytes; 695 tmp_frec->fl_use->fu_npackets = item->npackets; 696 tmp_frec->fl_use->fu_lseen = 697 (uint64_t)(ulong_t)ithdr->last_seen.tv_sec; 698 tmp_frec->fl_use->fu_ctime = 699 (uint64_t)(ulong_t)item->creation_time.tv_sec; 700 701 return (tmp_frec); 702 } 703 704 /* 705 * Scan thru the timeout list and write the records to the accounting file, if 706 * possible. Basically step thru the timeout list maintained in the last 707 * hash bucket, FLOW_COUNT_TBL + 1, and timeout flows. This could be called 708 * from the timer, FLOWACCT_TIMER - delete only timed out flows or when this 709 * instance is deleted, FLOWACCT_PURGE_FLOW - delete all the flows from the 710 * table or as FLOWACCT_JUST_ONE - delete the first timed out flow. Since the 711 * flows are cronologically arranged in the timeout list, when called as 712 * FLOWACCT_TIMER and FLOWACCT_JUST_ONE, we can stop when we come across 713 * the first flow that has not timed out (which means none of the following 714 * flows would have timed out). 715 */ 716 void 717 flowacct_timer(int type, flowacct_data_t *flowacct_data) 718 { 719 hrtime_t diff; 720 timespec_t now; 721 list_head_t *head, *thead; 722 flow_t *flow; 723 flow_item_t *item; 724 list_hdr_t *fl_hdr, *next_fl_hdr; 725 list_hdr_t *ithdr = (list_hdr_t *)NULL; 726 flow_records_t *frec = NULL, *tmp_frec, *tail; 727 uint64_t flow_size; 728 uint64_t item_size; 729 730 ASSERT(flowacct_data != NULL); 731 732 /* 2s-complement for subtraction */ 733 flow_size = ~FLOWACCT_FLOW_RECORD_SZ + 1; 734 item_size = ~FLOWACCT_ITEM_RECORD_SZ + 1; 735 736 /* Get the current time */ 737 gethrestime(&now); 738 739 /* 740 * For each flow in the table, scan thru all the items and delete 741 * those that have exceeded the timeout. If all the items in a 742 * flow have timed out, delete the flow entry as well. Finally, 743 * write all the delted items to the accounting file. 744 */ 745 thead = &flowacct_data->flows_tbl[FLOW_TBL_COUNT]; 746 747 mutex_enter(&thead->lock); 748 fl_hdr = thead->head; 749 while (fl_hdr != NULL) { 750 uint32_t items_deleted = 0; 751 752 next_fl_hdr = fl_hdr->timeout_next; 753 flow = (flow_t *)fl_hdr->objp; 754 head = flow->back_ptr; 755 mutex_enter(&head->lock); 756 757 /*LINTED*/ 758 FLOWACCT_DELTA(now, fl_hdr->last_seen, diff); 759 760 /* 761 * If type is FLOW_TIMER, then check if the item has timed out. 762 * If type is FLOW_PURGE delete the entry anyways. 763 */ 764 if ((type != FLOWACCT_PURGE_FLOW) && 765 (diff < flowacct_data->timeout)) { 766 mutex_exit(&head->lock); 767 mutex_exit(&thead->lock); 768 goto write_records; 769 } 770 771 ithdr = flow->items.head; 772 while (ithdr != NULL) { 773 item = (flow_item_t *)ithdr->objp; 774 /* 775 * Fill in the flow record to be 776 * written to the accounting file. 777 */ 778 tmp_frec = flowacct_create_record(flow, ithdr); 779 /* 780 * If we don't have memory for records, 781 * we will come back in case this is 782 * called as FLOW_TIMER, else we will 783 * go ahead and delete the item from 784 * the table (when asked to PURGE the 785 * table), so there could be some 786 * entries not written to the file 787 * when this action instance is 788 * deleted. 789 */ 790 if (tmp_frec != NULL) { 791 tmp_frec->fl_use->fu_aname = 792 flowacct_data->act_name; 793 if (frec == NULL) { 794 frec = tmp_frec; 795 tail = frec; 796 } else { 797 tail->next = tmp_frec; 798 tail = tmp_frec; 799 } 800 } else if (type != FLOWACCT_PURGE_FLOW) { 801 mutex_exit(&head->lock); 802 mutex_exit(&thead->lock); 803 atomic_add_32(&flowacct_data->nflows, 804 (~items_deleted + 1)); 805 goto write_records; 806 } 807 808 /* Update stats */ 809 atomic_add_64(&flowacct_data->tbytes, (~item->nbytes + 810 1)); 811 812 /* Delete the item */ 813 flowacct_timeout_item(&flow, &ithdr); 814 items_deleted++; 815 atomic_add_64(&flowacct_data->usedmem, item_size); 816 } 817 ASSERT(flow->items.nbr_items == 0); 818 atomic_add_32(&flowacct_data->nflows, (~items_deleted + 1)); 819 820 /* 821 * Don't delete this flow if we are making place for 822 * a new item for this flow. 823 */ 824 if (!flow->inuse) { 825 if (fl_hdr == thead->tail) { 826 thead->head = thead->tail = NULL; 827 } else { 828 thead->head = fl_hdr->timeout_next; 829 thead->head->timeout_prev = NULL; 830 } 831 flowacct_del_obj(head, fl_hdr, FLOWACCT_DEL_OBJ); 832 atomic_add_64(&flowacct_data->usedmem, flow_size); 833 } 834 mutex_exit(&head->lock); 835 if (type == FLOWACCT_JUST_ONE) { 836 mutex_exit(&thead->lock); 837 goto write_records; 838 } 839 fl_hdr = next_fl_hdr; 840 } 841 mutex_exit(&thead->lock); 842 write_records: 843 /* Write all the timed out flows to the accounting file */ 844 while (frec != NULL) { 845 tmp_frec = frec->next; 846 exacct_commit_flow(frec->fl_use); 847 kmem_free(frec->fl_use, sizeof (flow_usage_t)); 848 kmem_free(frec, sizeof (flow_records_t)); 849 frec = tmp_frec; 850 } 851 } 852 853 /* 854 * Get the IP header contents from the packet, update the flow table with 855 * this item and return. 856 */ 857 int 858 flowacct_process(mblk_t **mpp, flowacct_data_t *flowacct_data) 859 { 860 header_t *header; 861 mblk_t *mp = *mpp; 862 863 ASSERT(mp != NULL); 864 865 /* If we don't find an M_DATA, return error */ 866 if (mp->b_datap->db_type != M_DATA) { 867 if ((mp->b_cont != NULL) && 868 (mp->b_cont->b_datap->db_type == M_DATA)) { 869 mp = mp->b_cont; 870 } else { 871 flowacct0dbg(("flowacct_process: no data\n")); 872 atomic_add_64(&flowacct_data->epackets, 1); 873 return (EINVAL); 874 } 875 } 876 877 header = kmem_zalloc(FLOWACCT_HEADER_SZ, KM_NOSLEEP); 878 if (header == NULL) { 879 flowacct0dbg(("flowacct_process: error allocing mem")); 880 atomic_add_64(&flowacct_data->epackets, 1); 881 return (ENOMEM); 882 } 883 884 /* Get all the required information into header. */ 885 if (flowacct_extract_header(mp, header) != 0) { 886 kmem_free(header, FLOWACCT_HEADER_SZ); 887 atomic_add_64(&flowacct_data->epackets, 1); 888 return (EINVAL); 889 } 890 891 /* Updated the flow table with this entry */ 892 if (flowacct_update_flows_tbl(header, flowacct_data) != 0) { 893 kmem_free(header, FLOWACCT_HEADER_SZ); 894 atomic_add_64(&flowacct_data->epackets, 1); 895 return (ENOMEM); 896 } 897 898 /* Update global stats */ 899 atomic_add_64(&flowacct_data->npackets, 1); 900 atomic_add_64(&flowacct_data->nbytes, header->pktlen); 901 902 kmem_free(header, FLOWACCT_HEADER_SZ); 903 if (flowacct_data->flow_tid == 0) { 904 flowacct_data->flow_tid = timeout(flowacct_timeout_flows, 905 flowacct_data, drv_usectohz(flowacct_data->timer)); 906 } 907 return (0); 908 } 909