1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007-2009 Myricom, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #ifndef lint 28 static const char __idstring[] = 29 "@(#)$Id: myri10ge_lro.c,v 1.7 2009-06-29 13:47:22 gallatin Exp $"; 30 #endif 31 32 #include "myri10ge_var.h" 33 34 #define IP_OFFMASK 0x1fff 35 #define TCPOPT_TIMESTAMP 8 36 #define TCPOLEN_TIMESTAMP 10 37 #define TCPOLEN_TSTAMP_APPA 12 38 39 40 /* 41 * Assume len is a multiple of 4. Note that "raw" must be 42 * suitably aligned. In practice, it will always enter algned on 43 * at least a 4 bytes bounday, due to the alignment of our rx buffers. 44 */ 45 uint16_t 46 myri10ge_csum_generic(uint16_t *raw, int len) 47 { 48 uint32_t csum; 49 csum = 0; 50 while (len > 0) { 51 csum += *raw; 52 raw++; 53 csum += *raw; 54 raw++; 55 len -= 4; 56 } 57 csum = (csum >> 16) + (csum & 0xffff); 58 csum = (csum >> 16) + (csum & 0xffff); 59 return ((uint16_t)csum); 60 } 61 62 static uint16_t 63 myri10ge_in_pseudo(unsigned int a, unsigned int b, 64 unsigned int c) 65 { 66 uint64_t csum; 67 68 csum = (uint64_t)a + b + c; 69 csum = (csum >> 16) + (csum & 0xffff); 70 csum = (csum >> 16) + (csum & 0xffff); 71 return ((uint16_t)csum); 72 } 73 74 void 75 myri10ge_lro_flush(struct myri10ge_slice_state *ss, struct lro_entry *lro, 76 struct myri10ge_mblk_list *mbl) 77 { 78 struct ip *ip; 79 struct tcphdr *tcp; 80 uint32_t *ts_ptr; 81 uint32_t tcplen, tcp_csum; 82 83 if (lro->append_cnt) { 84 /* 85 * incorporate the new len into the ip header and 86 * re-calculate the checksum 87 */ 88 ip = lro->ip; 89 ip->ip_len = htons(lro->len - ETHERNET_HEADER_SIZE); 90 ip->ip_sum = 0; 91 ip->ip_sum = 0xffff ^ 92 myri10ge_csum_generic((uint16_t *)ip, sizeof (*ip)); 93 /* incorporate the latest ack into the tcp header */ 94 tcp = (struct tcphdr *)(ip + 1); 95 tcp->th_ack = lro->ack_seq; 96 tcp->th_win = lro->window; 97 tcp->th_flags = lro->flags; 98 /* incorporate latest timestamp into the tcp header */ 99 if (lro->timestamp) { 100 ts_ptr = (uint32_t *)(tcp + 1); 101 ts_ptr[1] = htonl(lro->tsval); 102 ts_ptr[2] = lro->tsecr; 103 } 104 /* 105 * update checksum in tcp header by re-calculating the 106 * tcp pseudoheader checksum, and adding it to the checksum 107 * of the tcp payload data 108 */ 109 tcp->th_sum = 0; 110 tcplen = lro->len - sizeof (*ip) - ETHERNET_HEADER_SIZE; 111 tcp_csum = lro->data_csum; 112 tcp_csum += myri10ge_in_pseudo(ip->ip_src.s_addr, 113 ip->ip_dst.s_addr, htons(tcplen + IPPROTO_TCP)); 114 tcp_csum += myri10ge_csum_generic((uint16_t *)tcp, 115 tcp->th_off << 2); 116 tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16); 117 tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16); 118 tcp->th_sum = 0xffff ^ tcp_csum; 119 } 120 121 mac_hcksum_set(lro->m_head, 0, 0, 0, 122 0, HCK_IPV4_HDRCKSUM_OK | HCK_FULLCKSUM_OK); 123 124 mbl->cnt += lro->append_cnt; 125 myri10ge_mbl_append(ss, mbl, lro->m_head); 126 MYRI10GE_SLICE_STAT_INC(lro_flushed); 127 MYRI10GE_SLICE_STAT_ADD(lro_queued, lro->append_cnt + 1); 128 lro->m_head = NULL; 129 lro->timestamp = 0; 130 lro->append_cnt = 0; 131 lro->next = ss->lro_free; 132 ss->lro_free = lro; 133 } 134 135 int 136 myri10ge_lro_rx(struct myri10ge_slice_state *ss, mblk_t *m_head, 137 uint32_t csum, struct myri10ge_mblk_list *mbl) 138 { 139 struct ether_header *eh; 140 struct ip *ip; 141 struct tcphdr *tcp; 142 uint32_t *ts_ptr; 143 struct lro_entry *lro, *curr; 144 int hlen, ip_len, tcp_hdr_len, tcp_data_len; 145 int opt_bytes, trim; 146 int tot_len = MBLKL(m_head); 147 uint32_t seq, tmp_csum; 148 149 eh = (struct ether_header *)(void *)m_head->b_rptr; 150 if (eh->ether_type != htons(ETHERTYPE_IP)) 151 return (EINVAL); 152 ip = (struct ip *)(void *)(eh + 1); 153 if (ip->ip_p != IPPROTO_TCP) 154 return (EINVAL); 155 156 /* ensure there are no options */ 157 if ((ip->ip_hl << 2) != sizeof (*ip)) 158 return (EINVAL); 159 160 /* .. and the packet is not fragmented */ 161 if (ip->ip_off & htons(IP_MF|IP_OFFMASK)) 162 return (EINVAL); 163 164 /* verify that the IP header checksum is correct */ 165 tmp_csum = myri10ge_csum_generic((uint16_t *)ip, sizeof (*ip)); 166 if (unlikely((tmp_csum ^ 0xffff) != 0)) { 167 MYRI10GE_SLICE_STAT_INC(lro_bad_csum); 168 return (EINVAL); 169 } 170 171 /* find the TCP header */ 172 tcp = (struct tcphdr *)(ip + 1); 173 174 /* ensure no bits set besides ack or psh */ 175 if ((tcp->th_flags & ~(TH_ACK | TH_PUSH)) != 0) 176 return (EINVAL); 177 178 /* 179 * check for timestamps. Since the only option we handle are 180 * timestamps, we only have to handle the simple case of 181 * aligned timestamps 182 */ 183 184 opt_bytes = (tcp->th_off << 2) - sizeof (*tcp); 185 tcp_hdr_len = sizeof (*tcp) + opt_bytes; 186 ts_ptr = (uint32_t *)(tcp + 1); 187 if (opt_bytes != 0) { 188 if (unlikely(opt_bytes != TCPOLEN_TSTAMP_APPA) || 189 (*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16| 190 TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP))) 191 return (EINVAL); 192 } 193 194 ip_len = ntohs(ip->ip_len); 195 tcp_data_len = ip_len - (tcp->th_off << 2) - sizeof (*ip); 196 197 /* 198 * If frame is padded beyond the end of the IP packet, 199 * then we must trim the extra bytes off the end. 200 */ 201 trim = tot_len - (ip_len + ETHERNET_HEADER_SIZE); 202 if (trim != 0) { 203 if (trim < 0) { 204 /* truncated packet */ 205 return (EINVAL); 206 } 207 m_head->b_wptr -= trim; 208 tot_len -= trim; 209 } 210 211 /* Verify TCP checksum */ 212 csum = ntohs((uint16_t)csum); 213 tmp_csum = csum + myri10ge_in_pseudo(ip->ip_src.s_addr, 214 ip->ip_dst.s_addr, htons(tcp_hdr_len + tcp_data_len + IPPROTO_TCP)); 215 tmp_csum = (tmp_csum & 0xffff) + (tmp_csum >> 16); 216 tmp_csum = (tmp_csum & 0xffff) + (tmp_csum >> 16); 217 if (tmp_csum != 0xffff) { 218 MYRI10GE_SLICE_STAT_INC(lro_bad_csum); 219 return (EINVAL); 220 } 221 222 hlen = ip_len + ETHERNET_HEADER_SIZE - tcp_data_len; 223 seq = ntohl(tcp->th_seq); 224 225 for (lro = ss->lro_active; lro != NULL; lro = lro->next) { 226 if (lro->source_port == tcp->th_sport && 227 lro->dest_port == tcp->th_dport && 228 lro->source_ip == ip->ip_src.s_addr && 229 lro->dest_ip == ip->ip_dst.s_addr) { 230 /* Try to append it */ 231 232 if (unlikely(seq != lro->next_seq)) { 233 /* out of order packet */ 234 if (ss->lro_active == lro) { 235 ss->lro_active = lro->next; 236 } else { 237 curr = ss->lro_active; 238 while (curr->next != lro) 239 curr = curr->next; 240 curr->next = lro->next; 241 } 242 myri10ge_lro_flush(ss, lro, mbl); 243 return (EINVAL); 244 } 245 246 if (opt_bytes) { 247 uint32_t tsval = ntohl(*(ts_ptr + 1)); 248 /* make sure timestamp values are increasing */ 249 if (unlikely(lro->tsval > tsval || 250 *(ts_ptr + 2) == 0)) { 251 return (-8); 252 } 253 lro->tsval = tsval; 254 lro->tsecr = *(ts_ptr + 2); 255 } 256 257 lro->next_seq += tcp_data_len; 258 lro->ack_seq = tcp->th_ack; 259 lro->window = tcp->th_win; 260 lro->flags |= tcp->th_flags; 261 lro->append_cnt++; 262 if (tcp_data_len == 0) { 263 freeb(m_head); 264 return (0); 265 } 266 /* 267 * subtract off the checksum of the tcp header 268 * from the hardware checksum, and add it to 269 * the stored tcp data checksum. Byteswap 270 * the checksum if the total length so far is 271 * odd 272 */ 273 tmp_csum = myri10ge_csum_generic((uint16_t *)tcp, 274 tcp_hdr_len); 275 csum = csum + (tmp_csum ^ 0xffff); 276 csum = (csum & 0xffff) + (csum >> 16); 277 csum = (csum & 0xffff) + (csum >> 16); 278 if (lro->len & 0x1) { 279 /* Odd number of bytes so far, flip bytes */ 280 csum = ((csum << 8) | (csum >> 8)) & 0xffff; 281 } 282 csum = csum + lro->data_csum; 283 csum = (csum & 0xffff) + (csum >> 16); 284 csum = (csum & 0xffff) + (csum >> 16); 285 lro->data_csum = csum; 286 287 lro->len += tcp_data_len; 288 289 /* 290 * adjust mblk so that rptr points to 291 * the first byte of the payload 292 */ 293 m_head->b_rptr += hlen; 294 /* append mbuf chain */ 295 lro->m_tail->b_cont = m_head; 296 /* advance the last pointer */ 297 lro->m_tail = m_head; 298 /* flush packet if required */ 299 if (lro->len > (65535 - myri10ge_mtu) || 300 (lro->append_cnt + 1) == myri10ge_lro_max_aggr) { 301 if (ss->lro_active == lro) { 302 ss->lro_active = lro->next; 303 } else { 304 curr = ss->lro_active; 305 while (curr->next != lro) 306 curr = curr->next; 307 curr->next = lro->next; 308 } 309 myri10ge_lro_flush(ss, lro, mbl); 310 } 311 return (0); 312 } 313 } 314 315 if (ss->lro_free == NULL) 316 return (ENOMEM); 317 318 /* start a new chain */ 319 lro = ss->lro_free; 320 ss->lro_free = lro->next; 321 lro->next = ss->lro_active; 322 ss->lro_active = lro; 323 lro->source_port = tcp->th_sport; 324 lro->dest_port = tcp->th_dport; 325 lro->source_ip = ip->ip_src.s_addr; 326 lro->dest_ip = ip->ip_dst.s_addr; 327 lro->next_seq = seq + tcp_data_len; 328 lro->mss = (uint16_t)tcp_data_len; 329 lro->ack_seq = tcp->th_ack; 330 lro->window = tcp->th_win; 331 lro->flags = tcp->th_flags; 332 333 /* 334 * save the checksum of just the TCP payload by 335 * subtracting off the checksum of the TCP header from 336 * the entire hardware checksum 337 * Since IP header checksum is correct, checksum over 338 * the IP header is -0. Substracting -0 is unnecessary. 339 */ 340 tmp_csum = myri10ge_csum_generic((uint16_t *)tcp, tcp_hdr_len); 341 csum = csum + (tmp_csum ^ 0xffff); 342 csum = (csum & 0xffff) + (csum >> 16); 343 csum = (csum & 0xffff) + (csum >> 16); 344 lro->data_csum = csum; 345 lro->ip = ip; 346 347 /* record timestamp if it is present */ 348 if (opt_bytes) { 349 lro->timestamp = 1; 350 lro->tsval = ntohl(*(ts_ptr + 1)); 351 lro->tsecr = *(ts_ptr + 2); 352 } 353 lro->len = tot_len; 354 lro->m_head = m_head; 355 lro->m_tail = m_head; 356 return (0); 357 } 358 359 /* 360 * This file uses MyriGE driver indentation. 361 * 362 * Local Variables: 363 * c-file-style:"sun" 364 * tab-width:8 365 * End: 366 */ 367