1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #include <sys/types.h>
28 #include <sys/kmem.h>
29 #include <sys/conf.h>
30 #include <sys/atomic.h>
31 #include <netinet/in.h>
32 #include <netinet/in_systm.h>
33 #include <netinet/ip6.h>
34 #include <sys/socket.h>
35 #include <sys/acct.h>
36 #include <sys/exacct.h>
37 #include <inet/common.h>
38 #include <inet/ip.h>
39 #include <inet/ip6.h>
40 #include <sys/ddi.h>
41 #include <sys/strsun.h>
42 #include <sys/strsubr.h>
43 #include <ipp/flowacct/flowacct_impl.h>
44
45 /*
46 * flowacct - IPQoS accounting module. The module maintains an array
47 * of 256 hash buckets. When the action routine is invoked for a flow,
48 * if the flow (identified by the 5-tuple: saddr, daddr, sport, dport, proto)
49 * is already present in the flow table (indexed by the hash function FLOW_HASH)
50 * then a check is made to see if an item for this flow with the same
51 * dsfield, projid & user id is present. If it is, then the number of packets
52 * and the bytes are incremented for that item. If the item does
53 * not exist a new item is added for the flow. If the flow is not present
54 * an entry is made for the flow.
55 *
56 * A timer runs thru the table and writes all the flow items that have
57 * timed out to the accounting file (via exacct PSARC/1999/119), if present
58 * Configuration commands to change the timing interval is provided. The
59 * flow timeout value can also be configured. While the timeout is in nsec,
60 * the flow timer interval is in usec.
61 * Information for an active flow can be obtained by using kstats.
62 */
63
64 /* Used in computing the hash index */
65 #define FLOWACCT_ADDR_HASH(addr) \
66 ((addr).s6_addr8[8] ^ (addr).s6_addr8[9] ^ \
67 (addr).s6_addr8[10] ^ (addr).s6_addr8[13] ^ \
68 (addr).s6_addr8[14] ^ (addr).s6_addr8[15])
69
70 #define FLOWACCT_FLOW_HASH(f) \
71 (((FLOWACCT_ADDR_HASH(f->saddr)) + \
72 (FLOWACCT_ADDR_HASH(f->daddr)) + \
73 (f->proto) + (f->sport) + (f->dport)) \
74 % FLOW_TBL_COUNT)
75
76 /*
77 * Compute difference between a and b in nsec and store in delta.
78 * delta should be a hrtime_t. Taken from ip_mroute.c.
79 */
80 #define FLOWACCT_DELTA(a, b, delta) { \
81 int xxs; \
82 \
83 delta = (a).tv_nsec - (b).tv_nsec; \
84 if ((xxs = (a).tv_sec - (b).tv_sec) != 0) { \
85 switch (xxs) { \
86 case 2: \
87 delta += NANOSEC; \
88 /*FALLTHRU*/ \
89 case 1: \
90 delta += NANOSEC; \
91 break; \
92 default: \
93 delta += ((hrtime_t)NANOSEC * xxs); \
94 } \
95 } \
96 }
97
98 /* Debug level */
99 int flowacct_debug = 0;
100
101 /* Collect timed out flows to be written to the accounting file */
102 typedef struct flow_records_s {
103 flow_usage_t *fl_use;
104 struct flow_records_s *next;
105 }flow_records_t;
106
107 /* Get port information from the packet. Ignore fragments. */
108 static void
flowacct_port_info(header_t * header,void * iph,int af,mblk_t * mp)109 flowacct_port_info(header_t *header, void *iph, int af, mblk_t *mp)
110 {
111 uint16_t *up;
112
113 if (af == AF_INET) {
114 ipha_t *ipha = (ipha_t *)iph;
115 uint32_t u2, u1;
116 uint_t iplen;
117
118 u2 = ntohs(ipha->ipha_fragment_offset_and_flags);
119 u1 = u2 & (IPH_MF | IPH_OFFSET);
120 if (u1 != 0) {
121 return;
122 }
123 iplen = (ipha->ipha_version_and_hdr_length & 0xF) << 2;
124 up = (uint16_t *)(mp->b_rptr + iplen);
125 header->sport = (uint16_t)*up++;
126 header->dport = (uint16_t)*up;
127 } else {
128 ip6_t *ip6h = (ip6_t *)iph;
129 uint_t length = IPV6_HDR_LEN;
130 uint_t ehdrlen;
131 uint8_t *nexthdrp, *whereptr, *endptr;
132 ip6_dest_t *desthdr;
133 ip6_rthdr_t *rthdr;
134 ip6_hbh_t *hbhhdr;
135
136 whereptr = ((uint8_t *)&ip6h[1]);
137 endptr = mp->b_wptr;
138 nexthdrp = &ip6h->ip6_nxt;
139 while (whereptr < endptr) {
140 switch (*nexthdrp) {
141 case IPPROTO_HOPOPTS:
142 hbhhdr = (ip6_hbh_t *)whereptr;
143 ehdrlen = 8 * (hbhhdr->ip6h_len + 1);
144 if ((uchar_t *)hbhhdr + ehdrlen > endptr)
145 return;
146 nexthdrp = &hbhhdr->ip6h_nxt;
147 break;
148 case IPPROTO_DSTOPTS:
149 desthdr = (ip6_dest_t *)whereptr;
150 ehdrlen = 8 * (desthdr->ip6d_len + 1);
151 if ((uchar_t *)desthdr + ehdrlen > endptr)
152 return;
153 nexthdrp = &desthdr->ip6d_nxt;
154 break;
155 case IPPROTO_ROUTING:
156 rthdr = (ip6_rthdr_t *)whereptr;
157 ehdrlen = 8 * (rthdr->ip6r_len + 1);
158 if ((uchar_t *)rthdr + ehdrlen > endptr)
159 return;
160 nexthdrp = &rthdr->ip6r_nxt;
161 break;
162 case IPPROTO_FRAGMENT:
163 return;
164 case IPPROTO_TCP:
165 case IPPROTO_UDP:
166 case IPPROTO_SCTP:
167 /*
168 * Verify we have at least ICMP_MIN_TP_HDR_LEN
169 * bytes of the ULP's header to get the port
170 * info.
171 */
172 if (((uchar_t *)ip6h + length +
173 ICMP_MIN_TP_HDR_LEN) > endptr) {
174 return;
175 }
176 /* Get the protocol & ports */
177 header->proto = *nexthdrp;
178 up = (uint16_t *)((uchar_t *)ip6h + length);
179 header->sport = (uint16_t)*up++;
180 header->dport = (uint16_t)*up;
181 return;
182 case IPPROTO_ICMPV6:
183 case IPPROTO_ENCAP:
184 case IPPROTO_IPV6:
185 case IPPROTO_ESP:
186 case IPPROTO_AH:
187 header->proto = *nexthdrp;
188 return;
189 case IPPROTO_NONE:
190 default:
191 return;
192 }
193 length += ehdrlen;
194 whereptr += ehdrlen;
195 }
196 }
197 }
198
199 /*
200 * flowacct_find_ids(mp, header)
201 *
202 * attempt to discern the uid and projid of the originator of a packet by
203 * looking at the dblks making up the packet - yeuch!
204 *
205 * We do it by skipping any fragments with a credp of NULL (originated in
206 * kernel), taking the first value that isn't NULL to be the cred_t for the
207 * whole packet.
208 */
209 static void
flowacct_find_ids(mblk_t * mp,header_t * header)210 flowacct_find_ids(mblk_t *mp, header_t *header)
211 {
212 cred_t *cr;
213
214 cr = msg_getcred(mp, NULL);
215 if (cr != NULL) {
216 header->uid = crgetuid(cr);
217 header->projid = crgetprojid(cr);
218 } else {
219 header->uid = (uid_t)-1;
220 header->projid = -1;
221 }
222 }
223
224 /*
225 * Extract header information in a header_t structure so that we don't have
226 * have to parse the packet everytime.
227 */
228 static int
flowacct_extract_header(mblk_t * mp,header_t * header)229 flowacct_extract_header(mblk_t *mp, header_t *header)
230 {
231 ipha_t *ipha;
232 ip6_t *ip6h;
233 #define rptr ((uchar_t *)ipha)
234
235 /* 0 means no port extracted. */
236 header->sport = 0;
237 header->dport = 0;
238 flowacct_find_ids(mp, header);
239
240 V6_SET_ZERO(header->saddr);
241 V6_SET_ZERO(header->daddr);
242
243 ipha = (ipha_t *)mp->b_rptr;
244 header->isv4 = IPH_HDR_VERSION(ipha) == IPV4_VERSION;
245 if (header->isv4) {
246 ipha = (ipha_t *)mp->b_rptr;
247 V4_PART_OF_V6(header->saddr) = (int32_t)ipha->ipha_src;
248 V4_PART_OF_V6(header->daddr) = (int32_t)ipha->ipha_dst;
249 header->dsfield = ipha->ipha_type_of_service;
250 header->proto = ipha->ipha_protocol;
251 header->pktlen = ntohs(ipha->ipha_length);
252 if ((header->proto == IPPROTO_TCP) ||
253 (header->proto == IPPROTO_UDP) ||
254 (header->proto == IPPROTO_SCTP)) {
255 flowacct_port_info(header, ipha, AF_INET, mp);
256 }
257 } else {
258 /*
259 * Need to pullup everything.
260 */
261 if (mp->b_cont != NULL) {
262 if (!pullupmsg(mp, -1)) {
263 flowacct0dbg(("flowacct_extract_header: "\
264 "pullup error"));
265 return (-1);
266 }
267 }
268 ip6h = (ip6_t *)mp->b_rptr;
269 bcopy(ip6h->ip6_src.s6_addr32, header->saddr.s6_addr32,
270 sizeof (ip6h->ip6_src.s6_addr32));
271 bcopy(ip6h->ip6_dst.s6_addr32, header->daddr.s6_addr32,
272 sizeof (ip6h->ip6_dst.s6_addr32));
273 header->dsfield = __IPV6_TCLASS_FROM_FLOW(ip6h->ip6_vcf);
274 header->proto = ip6h->ip6_nxt;
275 header->pktlen = ntohs(ip6h->ip6_plen) +
276 ip_hdr_length_v6(mp, ip6h);
277 flowacct_port_info(header, ip6h, AF_INET6, mp);
278
279 }
280 #undef rptr
281 return (0);
282 }
283
284 /* Check if the flow (identified by the 5-tuple) exists in the hash table */
285 static flow_t *
flowacct_flow_present(header_t * header,int index,flowacct_data_t * flowacct_data)286 flowacct_flow_present(header_t *header, int index,
287 flowacct_data_t *flowacct_data)
288 {
289 list_hdr_t *hdr = flowacct_data->flows_tbl[index].head;
290 flow_t *flow;
291
292 while (hdr != NULL) {
293 flow = (flow_t *)hdr->objp;
294 if ((flow != NULL) &&
295 (IN6_ARE_ADDR_EQUAL(&flow->saddr, &header->saddr)) &&
296 (IN6_ARE_ADDR_EQUAL(&flow->daddr, &header->daddr)) &&
297 (flow->proto == header->proto) &&
298 (flow->sport == header->sport) &&
299 (flow->dport == header->dport)) {
300 return (flow);
301 }
302 hdr = hdr->next;
303 }
304 return ((flow_t *)NULL);
305 }
306
307 /*
308 * Add an object to the list at insert_point. This could be a flow item or
309 * a flow itself.
310 */
311 static list_hdr_t *
flowacct_add_obj(list_head_t * tophdr,list_hdr_t * insert_point,void * obj)312 flowacct_add_obj(list_head_t *tophdr, list_hdr_t *insert_point, void *obj)
313 {
314 list_hdr_t *new_hdr;
315
316 if (tophdr == NULL) {
317 return ((list_hdr_t *)NULL);
318 }
319
320 new_hdr = (list_hdr_t *)kmem_zalloc(FLOWACCT_HDR_SZ, KM_NOSLEEP);
321 if (new_hdr == NULL) {
322 flowacct0dbg(("flowacct_add_obj: error allocating mem"));
323 return ((list_hdr_t *)NULL);
324 }
325 gethrestime(&new_hdr->last_seen);
326 new_hdr->objp = obj;
327 tophdr->nbr_items++;
328
329 if (insert_point == NULL) {
330 if (tophdr->head == NULL) {
331 tophdr->head = new_hdr;
332 tophdr->tail = new_hdr;
333 return (new_hdr);
334 }
335
336 new_hdr->next = tophdr->head;
337 tophdr->head->prev = new_hdr;
338 tophdr->head = new_hdr;
339 return (new_hdr);
340 }
341
342 if (insert_point == tophdr->tail) {
343 tophdr->tail->next = new_hdr;
344 new_hdr->prev = tophdr->tail;
345 tophdr->tail = new_hdr;
346 return (new_hdr);
347 }
348
349 new_hdr->next = insert_point->next;
350 new_hdr->prev = insert_point;
351 insert_point->next->prev = new_hdr;
352 insert_point->next = new_hdr;
353 return (new_hdr);
354 }
355
356 /* Delete an obj from the list. This could be a flow item or the flow itself */
357 static void
flowacct_del_obj(list_head_t * tophdr,list_hdr_t * hdr,uint_t mode)358 flowacct_del_obj(list_head_t *tophdr, list_hdr_t *hdr, uint_t mode)
359 {
360 size_t length;
361 uint_t type;
362
363 if ((tophdr == NULL) || (hdr == NULL)) {
364 return;
365 }
366
367 type = ((flow_t *)hdr->objp)->type;
368
369 tophdr->nbr_items--;
370
371 if (hdr->next != NULL) {
372 hdr->next->prev = hdr->prev;
373 }
374 if (hdr->prev != NULL) {
375 hdr->prev->next = hdr->next;
376 }
377 if (tophdr->head == hdr) {
378 tophdr->head = hdr->next;
379 }
380 if (tophdr->tail == hdr) {
381 tophdr->tail = hdr->prev;
382 }
383
384 if (mode == FLOWACCT_DEL_OBJ) {
385 switch (type) {
386 case FLOWACCT_FLOW:
387 length = FLOWACCT_FLOW_SZ;
388 break;
389 case FLOWACCT_ITEM:
390 length = FLOWACCT_ITEM_SZ;
391 break;
392 }
393 kmem_free(hdr->objp, length);
394 hdr->objp = NULL;
395 }
396
397 kmem_free((void *)hdr, FLOWACCT_HDR_SZ);
398 }
399
400 /*
401 * Checks if the given item (identified by dsfield, project id and uid)
402 * is already present for the flow.
403 */
404 static flow_item_t *
flowacct_item_present(flow_t * flow,uint8_t dsfield,pid_t proj_id,uint_t uid)405 flowacct_item_present(flow_t *flow, uint8_t dsfield, pid_t proj_id, uint_t uid)
406 {
407 list_hdr_t *itemhdr;
408 flow_item_t *item;
409
410 itemhdr = flow->items.head;
411
412 while (itemhdr != NULL) {
413 item = (flow_item_t *)itemhdr->objp;
414
415 if ((item->dsfield != dsfield) || (item->projid != proj_id) ||
416 (item->uid != uid)) {
417 itemhdr = itemhdr->next;
418 continue;
419 }
420 return (item);
421 }
422
423 return ((flow_item_t *)NULL);
424 }
425
426 /*
427 * Add the flow to the table, if not already present. If the flow is
428 * present in the table, add the item. Also, update the flow stats.
429 * Additionally, re-adjust the timout list as well.
430 */
431 static int
flowacct_update_flows_tbl(header_t * header,flowacct_data_t * flowacct_data)432 flowacct_update_flows_tbl(header_t *header, flowacct_data_t *flowacct_data)
433 {
434 int index;
435 list_head_t *fhead;
436 list_head_t *thead;
437 list_head_t *ihead;
438 boolean_t added_flow = B_FALSE;
439 timespec_t now;
440 flow_item_t *item;
441 flow_t *flow;
442
443 index = FLOWACCT_FLOW_HASH(header);
444 fhead = &flowacct_data->flows_tbl[index];
445
446 /* The timeout list */
447 thead = &flowacct_data->flows_tbl[FLOW_TBL_COUNT];
448
449 mutex_enter(&fhead->lock);
450 flow = flowacct_flow_present(header, index, flowacct_data);
451 if (flow == NULL) {
452 flow = (flow_t *)kmem_zalloc(FLOWACCT_FLOW_SZ, KM_NOSLEEP);
453 if (flow == NULL) {
454 mutex_exit(&fhead->lock);
455 flowacct0dbg(("flowacct_update_flows_tbl: mem alloc "\
456 "error"));
457 return (-1);
458 }
459 flow->hdr = flowacct_add_obj(fhead, fhead->tail, (void *)flow);
460 if (flow->hdr == NULL) {
461 mutex_exit(&fhead->lock);
462 kmem_free(flow, FLOWACCT_FLOW_SZ);
463 flowacct0dbg(("flowacct_update_flows_tbl: mem alloc "\
464 "error"));
465 return (-1);
466 }
467
468 flow->type = FLOWACCT_FLOW;
469 flow->isv4 = header->isv4;
470 bcopy(header->saddr.s6_addr32, flow->saddr.s6_addr32,
471 sizeof (header->saddr.s6_addr32));
472 bcopy(header->daddr.s6_addr32, flow->daddr.s6_addr32,
473 sizeof (header->daddr.s6_addr32));
474 flow->proto = header->proto;
475 flow->sport = header->sport;
476 flow->dport = header->dport;
477 flow->back_ptr = fhead;
478 added_flow = B_TRUE;
479 } else {
480 /*
481 * We need to make sure that this 'flow' is not deleted
482 * either by a scheduled timeout or an explict call
483 * to flowacct_timer() below.
484 */
485 flow->inuse = B_TRUE;
486 }
487
488 ihead = &flow->items;
489 item = flowacct_item_present(flow, header->dsfield, header->projid,
490 header->uid);
491 if (item == NULL) {
492 boolean_t just_once = B_TRUE;
493 /*
494 * For all practical purposes, we limit the no. of entries in
495 * the flow table - i.e. the max_limt that a user specifies is
496 * the maximum no. of flow items in the table.
497 */
498 try_again:
499 atomic_inc_32(&flowacct_data->nflows);
500 if (flowacct_data->nflows > flowacct_data->max_limit) {
501 atomic_dec_32(&flowacct_data->nflows);
502
503 /* Try timing out once */
504 if (just_once) {
505 /*
506 * Need to release the lock, as this entry
507 * could contain a flow that can be timed
508 * out.
509 */
510 mutex_exit(&fhead->lock);
511 flowacct_timer(FLOWACCT_JUST_ONE,
512 flowacct_data);
513 mutex_enter(&fhead->lock);
514 /* Lets check again */
515 just_once = B_FALSE;
516 goto try_again;
517 } else {
518 flow->inuse = B_FALSE;
519 /* Need to remove the flow, if one was added */
520 if (added_flow) {
521 flowacct_del_obj(fhead, flow->hdr,
522 FLOWACCT_DEL_OBJ);
523 }
524 mutex_exit(&fhead->lock);
525 flowacct1dbg(("flowacct_update_flows_tbl: "\
526 "maximum active flows exceeded\n"));
527 return (-1);
528 }
529 }
530 item = (flow_item_t *)kmem_zalloc(FLOWACCT_ITEM_SZ, KM_NOSLEEP);
531 if (item == NULL) {
532 flow->inuse = B_FALSE;
533 /* Need to remove the flow, if one was added */
534 if (added_flow) {
535 flowacct_del_obj(fhead, flow->hdr,
536 FLOWACCT_DEL_OBJ);
537 }
538 mutex_exit(&fhead->lock);
539 atomic_dec_32(&flowacct_data->nflows);
540 flowacct0dbg(("flowacct_update_flows_tbl: mem alloc "\
541 "error"));
542 return (-1);
543 }
544 item->hdr = flowacct_add_obj(ihead, ihead->tail, (void *)item);
545 if (item->hdr == NULL) {
546 flow->inuse = B_FALSE;
547 /* Need to remove the flow, if one was added */
548 if (added_flow) {
549 flowacct_del_obj(fhead, flow->hdr,
550 FLOWACCT_DEL_OBJ);
551 }
552 mutex_exit(&fhead->lock);
553 atomic_dec_32(&flowacct_data->nflows);
554 kmem_free(item, FLOWACCT_ITEM_SZ);
555 flowacct0dbg(("flowacct_update_flows_tbl: mem alloc "\
556 "error\n"));
557 return (-1);
558 }
559 /* If a flow was added, add it too */
560 if (added_flow) {
561 atomic_add_64(&flowacct_data->usedmem,
562 FLOWACCT_FLOW_RECORD_SZ);
563 }
564 atomic_add_64(&flowacct_data->usedmem, FLOWACCT_ITEM_RECORD_SZ);
565
566 item->type = FLOWACCT_ITEM;
567 item->dsfield = header->dsfield;
568 item->projid = header->projid;
569 item->uid = header->uid;
570 item->npackets = 1;
571 item->nbytes = header->pktlen;
572 item->creation_time = item->hdr->last_seen;
573 } else {
574 item->npackets++;
575 item->nbytes += header->pktlen;
576 }
577 gethrestime(&now);
578 flow->hdr->last_seen = item->hdr->last_seen = now;
579 mutex_exit(&fhead->lock);
580
581 /*
582 * Re-adjust the timeout list. The timer takes the thead lock
583 * follwed by fhead lock(s), so we release fhead, take thead
584 * and re-take fhead.
585 */
586 mutex_enter(&thead->lock);
587 mutex_enter(&fhead->lock);
588 /* If the flow was added, append it to the tail of the timeout list */
589 if (added_flow) {
590 if (thead->head == NULL) {
591 thead->head = flow->hdr;
592 thead->tail = flow->hdr;
593 } else {
594 thead->tail->timeout_next = flow->hdr;
595 flow->hdr->timeout_prev = thead->tail;
596 thead->tail = flow->hdr;
597 }
598 /*
599 * Else, move this flow to the tail of the timeout list, if it is not
600 * already.
601 * flow->hdr in the timeout list :-
602 * timeout_next = NULL, timeout_prev != NULL, at the tail end.
603 * timeout_next != NULL, timeout_prev = NULL, at the head.
604 * timeout_next != NULL, timeout_prev != NULL, in the middle.
605 * timeout_next = NULL, timeout_prev = NULL, not in the timeout list,
606 * ignore such flow.
607 */
608 } else if ((flow->hdr->timeout_next != NULL) ||
609 (flow->hdr->timeout_prev != NULL)) {
610 if (flow->hdr != thead->tail) {
611 if (flow->hdr == thead->head) {
612 thead->head->timeout_next->timeout_prev = NULL;
613 thead->head = thead->head->timeout_next;
614 flow->hdr->timeout_next = NULL;
615 thead->tail->timeout_next = flow->hdr;
616 flow->hdr->timeout_prev = thead->tail;
617 thead->tail = flow->hdr;
618 } else {
619 flow->hdr->timeout_prev->timeout_next =
620 flow->hdr->timeout_next;
621 flow->hdr->timeout_next->timeout_prev =
622 flow->hdr->timeout_prev;
623 flow->hdr->timeout_next = NULL;
624 thead->tail->timeout_next = flow->hdr;
625 flow->hdr->timeout_prev = thead->tail;
626 thead->tail = flow->hdr;
627 }
628 }
629 }
630 /*
631 * Unset this variable, now it is fine even if this
632 * flow gets deleted (i.e. after timing out its
633 * flow items) since we are done using it.
634 */
635 flow->inuse = B_FALSE;
636 mutex_exit(&fhead->lock);
637 mutex_exit(&thead->lock);
638 atomic_add_64(&flowacct_data->tbytes, header->pktlen);
639 return (0);
640 }
641
642 /* Timer for timing out flows/items from the flow table */
643 void
flowacct_timeout_flows(void * args)644 flowacct_timeout_flows(void *args)
645 {
646 flowacct_data_t *flowacct_data = (flowacct_data_t *)args;
647 flowacct_timer(FLOWACCT_FLOW_TIMER, flowacct_data);
648 flowacct_data->flow_tid = timeout(flowacct_timeout_flows, flowacct_data,
649 drv_usectohz(flowacct_data->timer));
650 }
651
652
653 /* Delete the item from the flow in the flow table */
654 static void
flowacct_timeout_item(flow_t ** flow,list_hdr_t ** item_hdr)655 flowacct_timeout_item(flow_t **flow, list_hdr_t **item_hdr)
656 {
657 list_hdr_t *next_it_hdr;
658
659 next_it_hdr = (*item_hdr)->next;
660 flowacct_del_obj(&(*flow)->items, *item_hdr, FLOWACCT_DEL_OBJ);
661 *item_hdr = next_it_hdr;
662 }
663
664 /* Create a flow record for this timed out item */
665 static flow_records_t *
flowacct_create_record(flow_t * flow,list_hdr_t * ithdr)666 flowacct_create_record(flow_t *flow, list_hdr_t *ithdr)
667 {
668 int count;
669 flow_item_t *item = (flow_item_t *)ithdr->objp;
670 flow_records_t *tmp_frec = NULL;
671
672 /* Record to be written into the accounting file */
673 tmp_frec = kmem_zalloc(sizeof (flow_records_t), KM_NOSLEEP);
674 if (tmp_frec == NULL) {
675 flowacct0dbg(("flowacct_create_record: mem alloc error.\n"));
676 return (NULL);
677 }
678 tmp_frec->fl_use = kmem_zalloc(sizeof (flow_usage_t), KM_NOSLEEP);
679 if (tmp_frec->fl_use == NULL) {
680 flowacct0dbg(("flowacct_create_record: mem alloc error\n"));
681 kmem_free(tmp_frec, sizeof (flow_records_t));
682 return (NULL);
683 }
684
685 /* Copy the IP address */
686 for (count = 0; count < 4; count++) {
687 tmp_frec->fl_use->fu_saddr[count] =
688 htonl(flow->saddr.s6_addr32[count]);
689 tmp_frec->fl_use->fu_daddr[count] =
690 htonl(flow->daddr.s6_addr32[count]);
691 }
692
693 /*
694 * Ports, protocol, version, dsfield, project id, uid, nbytes, npackets
695 * creation time and last seen.
696 */
697 tmp_frec->fl_use->fu_sport = htons(flow->sport);
698 tmp_frec->fl_use->fu_dport = htons(flow->dport);
699 tmp_frec->fl_use->fu_protocol = flow->proto;
700 tmp_frec->fl_use->fu_isv4 = flow->isv4;
701 tmp_frec->fl_use->fu_dsfield = item->dsfield;
702 tmp_frec->fl_use->fu_projid = item->projid;
703 tmp_frec->fl_use->fu_userid = item->uid;
704 tmp_frec->fl_use->fu_nbytes = item->nbytes;
705 tmp_frec->fl_use->fu_npackets = item->npackets;
706 tmp_frec->fl_use->fu_lseen =
707 (uint64_t)(ulong_t)ithdr->last_seen.tv_sec;
708 tmp_frec->fl_use->fu_ctime =
709 (uint64_t)(ulong_t)item->creation_time.tv_sec;
710
711 return (tmp_frec);
712 }
713
714 /*
715 * Scan thru the timeout list and write the records to the accounting file, if
716 * possible. Basically step thru the timeout list maintained in the last
717 * hash bucket, FLOW_COUNT_TBL + 1, and timeout flows. This could be called
718 * from the timer, FLOWACCT_TIMER - delete only timed out flows or when this
719 * instance is deleted, FLOWACCT_PURGE_FLOW - delete all the flows from the
720 * table or as FLOWACCT_JUST_ONE - delete the first timed out flow. Since the
721 * flows are cronologically arranged in the timeout list, when called as
722 * FLOWACCT_TIMER and FLOWACCT_JUST_ONE, we can stop when we come across
723 * the first flow that has not timed out (which means none of the following
724 * flows would have timed out).
725 */
726 void
flowacct_timer(int type,flowacct_data_t * flowacct_data)727 flowacct_timer(int type, flowacct_data_t *flowacct_data)
728 {
729 hrtime_t diff;
730 timespec_t now;
731 list_head_t *head, *thead;
732 flow_t *flow;
733 flow_item_t *item;
734 list_hdr_t *fl_hdr, *next_fl_hdr;
735 list_hdr_t *ithdr = (list_hdr_t *)NULL;
736 flow_records_t *frec = NULL, *tmp_frec, *tail;
737 uint64_t flow_size;
738 uint64_t item_size;
739
740 ASSERT(flowacct_data != NULL);
741
742 /* 2s-complement for subtraction */
743 flow_size = ~FLOWACCT_FLOW_RECORD_SZ + 1;
744 item_size = ~FLOWACCT_ITEM_RECORD_SZ + 1;
745
746 /* Get the current time */
747 gethrestime(&now);
748
749 /*
750 * For each flow in the table, scan thru all the items and delete
751 * those that have exceeded the timeout. If all the items in a
752 * flow have timed out, delete the flow entry as well. Finally,
753 * write all the delted items to the accounting file.
754 */
755 thead = &flowacct_data->flows_tbl[FLOW_TBL_COUNT];
756
757 mutex_enter(&thead->lock);
758 fl_hdr = thead->head;
759 while (fl_hdr != NULL) {
760 uint32_t items_deleted = 0;
761
762 next_fl_hdr = fl_hdr->timeout_next;
763 flow = (flow_t *)fl_hdr->objp;
764 head = flow->back_ptr;
765 mutex_enter(&head->lock);
766
767 /*LINTED*/
768 FLOWACCT_DELTA(now, fl_hdr->last_seen, diff);
769
770 /*
771 * If type is FLOW_TIMER, then check if the item has timed out.
772 * If type is FLOW_PURGE delete the entry anyways.
773 */
774 if ((type != FLOWACCT_PURGE_FLOW) &&
775 (diff < flowacct_data->timeout)) {
776 mutex_exit(&head->lock);
777 mutex_exit(&thead->lock);
778 goto write_records;
779 }
780
781 ithdr = flow->items.head;
782 while (ithdr != NULL) {
783 item = (flow_item_t *)ithdr->objp;
784 /*
785 * Fill in the flow record to be
786 * written to the accounting file.
787 */
788 tmp_frec = flowacct_create_record(flow, ithdr);
789 /*
790 * If we don't have memory for records,
791 * we will come back in case this is
792 * called as FLOW_TIMER, else we will
793 * go ahead and delete the item from
794 * the table (when asked to PURGE the
795 * table), so there could be some
796 * entries not written to the file
797 * when this action instance is
798 * deleted.
799 */
800 if (tmp_frec != NULL) {
801 tmp_frec->fl_use->fu_aname =
802 flowacct_data->act_name;
803 if (frec == NULL) {
804 frec = tmp_frec;
805 tail = frec;
806 } else {
807 tail->next = tmp_frec;
808 tail = tmp_frec;
809 }
810 } else if (type != FLOWACCT_PURGE_FLOW) {
811 mutex_exit(&head->lock);
812 mutex_exit(&thead->lock);
813 atomic_add_32(&flowacct_data->nflows,
814 (~items_deleted + 1));
815 goto write_records;
816 }
817
818 /* Update stats */
819 atomic_add_64(&flowacct_data->tbytes, (~item->nbytes +
820 1));
821
822 /* Delete the item */
823 flowacct_timeout_item(&flow, &ithdr);
824 items_deleted++;
825 atomic_add_64(&flowacct_data->usedmem, item_size);
826 }
827 ASSERT(flow->items.nbr_items == 0);
828 atomic_add_32(&flowacct_data->nflows, (~items_deleted + 1));
829
830 /*
831 * Don't delete this flow if we are making place for
832 * a new item for this flow.
833 */
834 if (!flow->inuse) {
835 if (fl_hdr->timeout_prev != NULL) {
836 fl_hdr->timeout_prev->timeout_next =
837 fl_hdr->timeout_next;
838 } else {
839 thead->head = fl_hdr->timeout_next;
840 }
841 if (fl_hdr->timeout_next != NULL) {
842 fl_hdr->timeout_next->timeout_prev =
843 fl_hdr->timeout_prev;
844 } else {
845 thead->tail = fl_hdr->timeout_prev;
846 }
847 fl_hdr->timeout_prev = NULL;
848 fl_hdr->timeout_next = NULL;
849 flowacct_del_obj(head, fl_hdr, FLOWACCT_DEL_OBJ);
850 atomic_add_64(&flowacct_data->usedmem, flow_size);
851 }
852 mutex_exit(&head->lock);
853 if (type == FLOWACCT_JUST_ONE) {
854 mutex_exit(&thead->lock);
855 goto write_records;
856 }
857 fl_hdr = next_fl_hdr;
858 }
859 mutex_exit(&thead->lock);
860 write_records:
861 /* Write all the timed out flows to the accounting file */
862 while (frec != NULL) {
863 tmp_frec = frec->next;
864 exacct_commit_flow(frec->fl_use);
865 kmem_free(frec->fl_use, sizeof (flow_usage_t));
866 kmem_free(frec, sizeof (flow_records_t));
867 frec = tmp_frec;
868 }
869 }
870
871 /*
872 * Get the IP header contents from the packet, update the flow table with
873 * this item and return.
874 */
875 int
flowacct_process(mblk_t ** mpp,flowacct_data_t * flowacct_data)876 flowacct_process(mblk_t **mpp, flowacct_data_t *flowacct_data)
877 {
878 header_t *header;
879 mblk_t *mp = *mpp;
880
881 ASSERT(mp != NULL);
882
883 /* If we don't find an M_DATA, return error */
884 if (mp->b_datap->db_type != M_DATA) {
885 if ((mp->b_cont != NULL) &&
886 (mp->b_cont->b_datap->db_type == M_DATA)) {
887 mp = mp->b_cont;
888 } else {
889 flowacct0dbg(("flowacct_process: no data\n"));
890 atomic_inc_64(&flowacct_data->epackets);
891 return (EINVAL);
892 }
893 }
894
895 header = kmem_zalloc(FLOWACCT_HEADER_SZ, KM_NOSLEEP);
896 if (header == NULL) {
897 flowacct0dbg(("flowacct_process: error allocing mem"));
898 atomic_inc_64(&flowacct_data->epackets);
899 return (ENOMEM);
900 }
901
902 /* Get all the required information into header. */
903 if (flowacct_extract_header(mp, header) != 0) {
904 kmem_free(header, FLOWACCT_HEADER_SZ);
905 atomic_inc_64(&flowacct_data->epackets);
906 return (EINVAL);
907 }
908
909 /* Updated the flow table with this entry */
910 if (flowacct_update_flows_tbl(header, flowacct_data) != 0) {
911 kmem_free(header, FLOWACCT_HEADER_SZ);
912 atomic_inc_64(&flowacct_data->epackets);
913 return (ENOMEM);
914 }
915
916 /* Update global stats */
917 atomic_inc_64(&flowacct_data->npackets);
918 atomic_add_64(&flowacct_data->nbytes, header->pktlen);
919
920 kmem_free(header, FLOWACCT_HEADER_SZ);
921 if (flowacct_data->flow_tid == 0) {
922 flowacct_data->flow_tid = timeout(flowacct_timeout_flows,
923 flowacct_data, drv_usectohz(flowacct_data->timer));
924 }
925 return (0);
926 }
927