1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007-2009 Myricom, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include "myri10ge_var.h" 28 29 #define IP_OFFMASK 0x1fff 30 #define TCPOPT_TIMESTAMP 8 31 #define TCPOLEN_TIMESTAMP 10 32 #define TCPOLEN_TSTAMP_APPA 12 33 34 35 /* 36 * Assume len is a multiple of 4. Note that "raw" must be 37 * suitably aligned. In practice, it will always enter algned on 38 * at least a 4 bytes bounday, due to the alignment of our rx buffers. 39 */ 40 uint16_t 41 myri10ge_csum_generic(uint16_t *raw, int len) 42 { 43 uint32_t csum; 44 csum = 0; 45 while (len > 0) { 46 csum += *raw; 47 raw++; 48 csum += *raw; 49 raw++; 50 len -= 4; 51 } 52 csum = (csum >> 16) + (csum & 0xffff); 53 csum = (csum >> 16) + (csum & 0xffff); 54 return ((uint16_t)csum); 55 } 56 57 static uint16_t 58 myri10ge_in_pseudo(unsigned int a, unsigned int b, 59 unsigned int c) 60 { 61 uint64_t csum; 62 63 csum = (uint64_t)a + b + c; 64 csum = (csum >> 16) + (csum & 0xffff); 65 csum = (csum >> 16) + (csum & 0xffff); 66 return ((uint16_t)csum); 67 } 68 69 void 70 myri10ge_lro_flush(struct myri10ge_slice_state *ss, struct lro_entry *lro, 71 struct myri10ge_mblk_list *mbl) 72 { 73 struct ip *ip; 74 struct tcphdr *tcp; 75 uint32_t *ts_ptr; 76 uint32_t tcplen, tcp_csum; 77 78 if (lro->append_cnt) { 79 /* 80 * incorporate the new len into the ip header and 81 * re-calculate the checksum 82 */ 83 ip = lro->ip; 84 ip->ip_len = htons(lro->len - ETHERNET_HEADER_SIZE); 85 ip->ip_sum = 0; 86 ip->ip_sum = 0xffff ^ 87 myri10ge_csum_generic((uint16_t *)ip, sizeof (*ip)); 88 /* incorporate the latest ack into the tcp header */ 89 tcp = (struct tcphdr *)(ip + 1); 90 tcp->th_ack = lro->ack_seq; 91 tcp->th_win = lro->window; 92 tcp->th_flags = lro->flags; 93 /* incorporate latest timestamp into the tcp header */ 94 if (lro->timestamp) { 95 ts_ptr = (uint32_t *)(tcp + 1); 96 ts_ptr[1] = htonl(lro->tsval); 97 ts_ptr[2] = lro->tsecr; 98 } 99 /* 100 * update checksum in tcp header by re-calculating the 101 * tcp pseudoheader checksum, and adding it to the checksum 102 * of the tcp payload data 103 */ 104 tcp->th_sum = 0; 105 tcplen = lro->len - sizeof (*ip) - ETHERNET_HEADER_SIZE; 106 tcp_csum = lro->data_csum; 107 tcp_csum += myri10ge_in_pseudo(ip->ip_src.s_addr, 108 ip->ip_dst.s_addr, htons(tcplen + IPPROTO_TCP)); 109 tcp_csum += myri10ge_csum_generic((uint16_t *)tcp, 110 tcp->th_off << 2); 111 tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16); 112 tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16); 113 tcp->th_sum = 0xffff ^ tcp_csum; 114 } 115 116 mac_hcksum_set(lro->m_head, 0, 0, 0, 117 0, HCK_IPV4_HDRCKSUM_OK | HCK_FULLCKSUM_OK); 118 119 mbl->cnt += lro->append_cnt; 120 myri10ge_mbl_append(ss, mbl, lro->m_head); 121 MYRI10GE_SLICE_STAT_INC(lro_flushed); 122 MYRI10GE_SLICE_STAT_ADD(lro_queued, lro->append_cnt + 1); 123 lro->m_head = NULL; 124 lro->timestamp = 0; 125 lro->append_cnt = 0; 126 lro->next = ss->lro_free; 127 ss->lro_free = lro; 128 } 129 130 int 131 myri10ge_lro_rx(struct myri10ge_slice_state *ss, mblk_t *m_head, 132 uint32_t csum, struct myri10ge_mblk_list *mbl) 133 { 134 struct ether_header *eh; 135 struct ip *ip; 136 struct tcphdr *tcp; 137 uint32_t *ts_ptr; 138 struct lro_entry *lro, *curr; 139 int hlen, ip_len, tcp_hdr_len, tcp_data_len; 140 int opt_bytes, trim; 141 int tot_len = MBLKL(m_head); 142 uint32_t seq, tmp_csum; 143 144 eh = (struct ether_header *)(void *)m_head->b_rptr; 145 if (eh->ether_type != htons(ETHERTYPE_IP)) 146 return (EINVAL); 147 ip = (struct ip *)(void *)(eh + 1); 148 if (ip->ip_p != IPPROTO_TCP) 149 return (EINVAL); 150 151 /* ensure there are no options */ 152 if ((ip->ip_hl << 2) != sizeof (*ip)) 153 return (EINVAL); 154 155 /* .. and the packet is not fragmented */ 156 if (ip->ip_off & htons(IP_MF|IP_OFFMASK)) 157 return (EINVAL); 158 159 /* verify that the IP header checksum is correct */ 160 tmp_csum = myri10ge_csum_generic((uint16_t *)ip, sizeof (*ip)); 161 if (unlikely((tmp_csum ^ 0xffff) != 0)) { 162 MYRI10GE_SLICE_STAT_INC(lro_bad_csum); 163 return (EINVAL); 164 } 165 166 /* find the TCP header */ 167 tcp = (struct tcphdr *)(ip + 1); 168 169 /* ensure no bits set besides ack or psh */ 170 if ((tcp->th_flags & ~(TH_ACK | TH_PUSH)) != 0) 171 return (EINVAL); 172 173 /* 174 * check for timestamps. Since the only option we handle are 175 * timestamps, we only have to handle the simple case of 176 * aligned timestamps 177 */ 178 179 opt_bytes = (tcp->th_off << 2) - sizeof (*tcp); 180 tcp_hdr_len = sizeof (*tcp) + opt_bytes; 181 ts_ptr = (uint32_t *)(tcp + 1); 182 if (opt_bytes != 0) { 183 if (unlikely(opt_bytes != TCPOLEN_TSTAMP_APPA) || 184 (*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16| 185 TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP))) 186 return (EINVAL); 187 } 188 189 ip_len = ntohs(ip->ip_len); 190 tcp_data_len = ip_len - (tcp->th_off << 2) - sizeof (*ip); 191 192 /* 193 * If frame is padded beyond the end of the IP packet, 194 * then we must trim the extra bytes off the end. 195 */ 196 trim = tot_len - (ip_len + ETHERNET_HEADER_SIZE); 197 if (trim != 0) { 198 if (trim < 0) { 199 /* truncated packet */ 200 return (EINVAL); 201 } 202 m_head->b_wptr -= trim; 203 tot_len -= trim; 204 } 205 206 /* Verify TCP checksum */ 207 csum = ntohs((uint16_t)csum); 208 tmp_csum = csum + myri10ge_in_pseudo(ip->ip_src.s_addr, 209 ip->ip_dst.s_addr, htons(tcp_hdr_len + tcp_data_len + IPPROTO_TCP)); 210 tmp_csum = (tmp_csum & 0xffff) + (tmp_csum >> 16); 211 tmp_csum = (tmp_csum & 0xffff) + (tmp_csum >> 16); 212 if (tmp_csum != 0xffff) { 213 MYRI10GE_SLICE_STAT_INC(lro_bad_csum); 214 return (EINVAL); 215 } 216 217 hlen = ip_len + ETHERNET_HEADER_SIZE - tcp_data_len; 218 seq = ntohl(tcp->th_seq); 219 220 for (lro = ss->lro_active; lro != NULL; lro = lro->next) { 221 if (lro->source_port == tcp->th_sport && 222 lro->dest_port == tcp->th_dport && 223 lro->source_ip == ip->ip_src.s_addr && 224 lro->dest_ip == ip->ip_dst.s_addr) { 225 /* Try to append it */ 226 227 if (unlikely(seq != lro->next_seq)) { 228 /* out of order packet */ 229 if (ss->lro_active == lro) { 230 ss->lro_active = lro->next; 231 } else { 232 curr = ss->lro_active; 233 while (curr->next != lro) 234 curr = curr->next; 235 curr->next = lro->next; 236 } 237 myri10ge_lro_flush(ss, lro, mbl); 238 return (EINVAL); 239 } 240 241 if (opt_bytes) { 242 uint32_t tsval = ntohl(*(ts_ptr + 1)); 243 /* make sure timestamp values are increasing */ 244 if (unlikely(lro->tsval > tsval || 245 *(ts_ptr + 2) == 0)) { 246 return (-8); 247 } 248 lro->tsval = tsval; 249 lro->tsecr = *(ts_ptr + 2); 250 } 251 252 lro->next_seq += tcp_data_len; 253 lro->ack_seq = tcp->th_ack; 254 lro->window = tcp->th_win; 255 lro->flags |= tcp->th_flags; 256 lro->append_cnt++; 257 if (tcp_data_len == 0) { 258 freeb(m_head); 259 return (0); 260 } 261 /* 262 * subtract off the checksum of the tcp header 263 * from the hardware checksum, and add it to 264 * the stored tcp data checksum. Byteswap 265 * the checksum if the total length so far is 266 * odd 267 */ 268 tmp_csum = myri10ge_csum_generic((uint16_t *)tcp, 269 tcp_hdr_len); 270 csum = csum + (tmp_csum ^ 0xffff); 271 csum = (csum & 0xffff) + (csum >> 16); 272 csum = (csum & 0xffff) + (csum >> 16); 273 if (lro->len & 0x1) { 274 /* Odd number of bytes so far, flip bytes */ 275 csum = ((csum << 8) | (csum >> 8)) & 0xffff; 276 } 277 csum = csum + lro->data_csum; 278 csum = (csum & 0xffff) + (csum >> 16); 279 csum = (csum & 0xffff) + (csum >> 16); 280 lro->data_csum = csum; 281 282 lro->len += tcp_data_len; 283 284 /* 285 * adjust mblk so that rptr points to 286 * the first byte of the payload 287 */ 288 m_head->b_rptr += hlen; 289 /* append mbuf chain */ 290 lro->m_tail->b_cont = m_head; 291 /* advance the last pointer */ 292 lro->m_tail = m_head; 293 /* flush packet if required */ 294 if (lro->len > (65535 - myri10ge_mtu) || 295 (lro->append_cnt + 1) == myri10ge_lro_max_aggr) { 296 if (ss->lro_active == lro) { 297 ss->lro_active = lro->next; 298 } else { 299 curr = ss->lro_active; 300 while (curr->next != lro) 301 curr = curr->next; 302 curr->next = lro->next; 303 } 304 myri10ge_lro_flush(ss, lro, mbl); 305 } 306 return (0); 307 } 308 } 309 310 if (ss->lro_free == NULL) 311 return (ENOMEM); 312 313 /* start a new chain */ 314 lro = ss->lro_free; 315 ss->lro_free = lro->next; 316 lro->next = ss->lro_active; 317 ss->lro_active = lro; 318 lro->source_port = tcp->th_sport; 319 lro->dest_port = tcp->th_dport; 320 lro->source_ip = ip->ip_src.s_addr; 321 lro->dest_ip = ip->ip_dst.s_addr; 322 lro->next_seq = seq + tcp_data_len; 323 lro->mss = (uint16_t)tcp_data_len; 324 lro->ack_seq = tcp->th_ack; 325 lro->window = tcp->th_win; 326 lro->flags = tcp->th_flags; 327 328 /* 329 * save the checksum of just the TCP payload by 330 * subtracting off the checksum of the TCP header from 331 * the entire hardware checksum 332 * Since IP header checksum is correct, checksum over 333 * the IP header is -0. Substracting -0 is unnecessary. 334 */ 335 tmp_csum = myri10ge_csum_generic((uint16_t *)tcp, tcp_hdr_len); 336 csum = csum + (tmp_csum ^ 0xffff); 337 csum = (csum & 0xffff) + (csum >> 16); 338 csum = (csum & 0xffff) + (csum >> 16); 339 lro->data_csum = csum; 340 lro->ip = ip; 341 342 /* record timestamp if it is present */ 343 if (opt_bytes) { 344 lro->timestamp = 1; 345 lro->tsval = ntohl(*(ts_ptr + 1)); 346 lro->tsecr = *(ts_ptr + 2); 347 } 348 lro->len = tot_len; 349 lro->m_head = m_head; 350 lro->m_tail = m_head; 351 return (0); 352 } 353 354 /* 355 * This file uses MyriGE driver indentation. 356 * 357 * Local Variables: 358 * c-file-style:"sun" 359 * tab-width:8 360 * End: 361 */ 362