1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/kmem.h> 31 #include <sys/random.h> 32 #include <netinet/in.h> 33 #include <netinet/in_systm.h> 34 #include <netinet/ip6.h> 35 #include <inet/common.h> 36 #include <inet/ip.h> 37 #include <inet/ip6.h> 38 #include <ipp/meters/meter_impl.h> 39 40 /* 41 * Module : Time Sliding Window meter - tswtclmtr 42 * Description 43 * This module implements the metering part of RFC 2859. It accepts the 44 * committed rate, peak rate and the window for a flow and determines 45 * if the flow is within the committed/peak rate and assigns the appropriate 46 * next action. 47 * The meter provides an estimate of the running average bandwidth for the 48 * flow over the specified window. It uses probability to benefit TCP flows 49 * as it reduces the likelihood of dropping multiple packets within a TCP 50 * window without adversely effecting UDP flows. 51 */ 52 53 int tswtcl_debug = 0; 54 55 /* 56 * Given a packet and the tswtcl_data it belongs to, this routine meters the 57 * ToS or DSCP for IPv4 and IPv6 resp. with the values configured for 58 * the tswtcl_data. 59 */ 60 /* ARGSUSED */ 61 int 62 tswtcl_process(mblk_t **mpp, tswtcl_data_t *tswtcl_data, 63 ipp_action_id_t *next_action) 64 { 65 ipha_t *ipha; 66 hrtime_t now; 67 ip6_t *ip6_hdr; 68 uint32_t pkt_len; 69 mblk_t *mp = *mpp; 70 hrtime_t deltaT; 71 uint64_t bitsinwin; 72 uint32_t min = 0, additive, rnd; 73 tswtcl_cfg_t *cfg_parms = tswtcl_data->cfg_parms; 74 75 if (mp == NULL) { 76 tswtcl0dbg(("tswtcl_process: null mp!\n")); 77 atomic_add_64(&tswtcl_data->epackets, 1); 78 return (EINVAL); 79 } 80 81 if (mp->b_datap->db_type != M_DATA) { 82 if ((mp->b_cont != NULL) && 83 (mp->b_cont->b_datap->db_type == M_DATA)) { 84 mp = mp->b_cont; 85 } else { 86 tswtcl0dbg(("tswtcl_process: no data\n")); 87 atomic_add_64(&tswtcl_data->epackets, 1); 88 return (EINVAL); 89 } 90 } 91 92 /* Figure out the ToS/Traffic Class and length from the message */ 93 if ((mp->b_wptr - mp->b_rptr) < IP_SIMPLE_HDR_LENGTH) { 94 if (!pullupmsg(mp, IP_SIMPLE_HDR_LENGTH)) { 95 tswtcl0dbg(("tswtcl_process: pullup error\n")); 96 atomic_add_64(&tswtcl_data->epackets, 1); 97 return (EINVAL); 98 } 99 } 100 ipha = (ipha_t *)mp->b_rptr; 101 if (IPH_HDR_VERSION(ipha) == IPV4_VERSION) { 102 pkt_len = ntohs(ipha->ipha_length); 103 } else { 104 ip6_hdr = (ip6_t *)mp->b_rptr; 105 pkt_len = ntohs(ip6_hdr->ip6_plen) + 106 ip_hdr_length_v6(mp, ip6_hdr); 107 } 108 109 /* Convert into bits */ 110 pkt_len <<= 3; 111 112 /* Get current time */ 113 now = gethrtime(); 114 115 /* Update the avg_rate and win_front tswtcl_data */ 116 mutex_enter(&tswtcl_data->tswtcl_lock); 117 118 /* avg_rate = bits/sec and window in msec */ 119 bitsinwin = ((uint64_t)tswtcl_data->avg_rate * cfg_parms->window / 120 1000) + pkt_len; 121 122 deltaT = now - tswtcl_data->win_front + cfg_parms->nsecwindow; 123 124 tswtcl_data->avg_rate = (uint64_t)bitsinwin * METER_SEC_TO_NSEC / 125 deltaT; 126 tswtcl_data->win_front = now; 127 128 if (tswtcl_data->avg_rate <= cfg_parms->committed_rate) { 129 *next_action = cfg_parms->green_action; 130 } else if (tswtcl_data->avg_rate <= cfg_parms->peak_rate) { 131 /* 132 * Compute the probability: 133 * 134 * p0 = (avg_rate - committed_rate) / avg_rate 135 * 136 * Yellow with probability p0 137 * Green with probability (1 - p0) 138 * 139 */ 140 uint32_t aminusc; 141 142 /* Get a random no. betweeen 0 and avg_rate */ 143 (void) random_get_pseudo_bytes((uint8_t *)&additive, 144 sizeof (additive)); 145 rnd = min + (additive % (tswtcl_data->avg_rate - min + 1)); 146 147 aminusc = tswtcl_data->avg_rate - cfg_parms->committed_rate; 148 if (aminusc >= rnd) { 149 *next_action = cfg_parms->yellow_action; 150 } else { 151 *next_action = cfg_parms->green_action; 152 } 153 } else { 154 /* 155 * Compute the probability: 156 * 157 * p1 = (avg_rate - peak_rate) / avg_rate 158 * p2 = (peak_rate - committed_rate) / avg_rate 159 * 160 * Red with probability p1 161 * Yellow with probability p2 162 * Green with probability (1 - (p1 + p2)) 163 * 164 */ 165 uint32_t aminusp; 166 167 /* Get a random no. betweeen 0 and avg_rate */ 168 (void) random_get_pseudo_bytes((uint8_t *)&additive, 169 sizeof (additive)); 170 rnd = min + (additive % (tswtcl_data->avg_rate - min + 1)); 171 172 aminusp = tswtcl_data->avg_rate - cfg_parms->peak_rate; 173 174 if (aminusp >= rnd) { 175 *next_action = cfg_parms->red_action; 176 } else if ((cfg_parms->pminusc + aminusp) >= rnd) { 177 *next_action = cfg_parms->yellow_action; 178 } else { 179 *next_action = cfg_parms->green_action; 180 } 181 182 } 183 mutex_exit(&tswtcl_data->tswtcl_lock); 184 185 /* Update Stats */ 186 if (*next_action == cfg_parms->green_action) { 187 atomic_add_64(&tswtcl_data->green_packets, 1); 188 atomic_add_64(&tswtcl_data->green_bits, pkt_len); 189 } else if (*next_action == cfg_parms->yellow_action) { 190 atomic_add_64(&tswtcl_data->yellow_packets, 1); 191 atomic_add_64(&tswtcl_data->yellow_bits, pkt_len); 192 } else { 193 ASSERT(*next_action == cfg_parms->red_action); 194 atomic_add_64(&tswtcl_data->red_packets, 1); 195 atomic_add_64(&tswtcl_data->red_bits, pkt_len); 196 } 197 return (0); 198 } 199