1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2007-2009 Myricom, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #ifndef lint
28 static const char __idstring[] =
29 "@(#)$Id: myri10ge_lro.c,v 1.7 2009-06-29 13:47:22 gallatin Exp $";
30 #endif
31
32 #include "myri10ge_var.h"
33
34 #define IP_OFFMASK 0x1fff
35 #define TCPOPT_TIMESTAMP 8
36 #define TCPOLEN_TIMESTAMP 10
37 #define TCPOLEN_TSTAMP_APPA 12
38
39
40 /*
41 * Assume len is a multiple of 4. Note that "raw" must be
42 * suitably aligned. In practice, it will always enter algned on
43 * at least a 4 bytes bounday, due to the alignment of our rx buffers.
44 */
45 uint16_t
myri10ge_csum_generic(uint16_t * raw,int len)46 myri10ge_csum_generic(uint16_t *raw, int len)
47 {
48 uint32_t csum;
49 csum = 0;
50 while (len > 0) {
51 csum += *raw;
52 raw++;
53 csum += *raw;
54 raw++;
55 len -= 4;
56 }
57 csum = (csum >> 16) + (csum & 0xffff);
58 csum = (csum >> 16) + (csum & 0xffff);
59 return ((uint16_t)csum);
60 }
61
62 static uint16_t
myri10ge_in_pseudo(unsigned int a,unsigned int b,unsigned int c)63 myri10ge_in_pseudo(unsigned int a, unsigned int b,
64 unsigned int c)
65 {
66 uint64_t csum;
67
68 csum = (uint64_t)a + b + c;
69 csum = (csum >> 16) + (csum & 0xffff);
70 csum = (csum >> 16) + (csum & 0xffff);
71 return ((uint16_t)csum);
72 }
73
74 void
myri10ge_lro_flush(struct myri10ge_slice_state * ss,struct lro_entry * lro,struct myri10ge_mblk_list * mbl)75 myri10ge_lro_flush(struct myri10ge_slice_state *ss, struct lro_entry *lro,
76 struct myri10ge_mblk_list *mbl)
77 {
78 struct ip *ip;
79 struct tcphdr *tcp;
80 uint32_t *ts_ptr;
81 uint32_t tcplen, tcp_csum;
82
83 if (lro->append_cnt) {
84 /*
85 * incorporate the new len into the ip header and
86 * re-calculate the checksum
87 */
88 ip = lro->ip;
89 ip->ip_len = htons(lro->len - ETHERNET_HEADER_SIZE);
90 ip->ip_sum = 0;
91 ip->ip_sum = 0xffff ^
92 myri10ge_csum_generic((uint16_t *)ip, sizeof (*ip));
93 /* incorporate the latest ack into the tcp header */
94 tcp = (struct tcphdr *)(ip + 1);
95 tcp->th_ack = lro->ack_seq;
96 tcp->th_win = lro->window;
97 tcp->th_flags = lro->flags;
98 /* incorporate latest timestamp into the tcp header */
99 if (lro->timestamp) {
100 ts_ptr = (uint32_t *)(tcp + 1);
101 ts_ptr[1] = htonl(lro->tsval);
102 ts_ptr[2] = lro->tsecr;
103 }
104 /*
105 * update checksum in tcp header by re-calculating the
106 * tcp pseudoheader checksum, and adding it to the checksum
107 * of the tcp payload data
108 */
109 tcp->th_sum = 0;
110 tcplen = lro->len - sizeof (*ip) - ETHERNET_HEADER_SIZE;
111 tcp_csum = lro->data_csum;
112 tcp_csum += myri10ge_in_pseudo(ip->ip_src.s_addr,
113 ip->ip_dst.s_addr, htons(tcplen + IPPROTO_TCP));
114 tcp_csum += myri10ge_csum_generic((uint16_t *)tcp,
115 tcp->th_off << 2);
116 tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16);
117 tcp_csum = (tcp_csum & 0xffff) + (tcp_csum >> 16);
118 tcp->th_sum = 0xffff ^ tcp_csum;
119 }
120
121 mac_hcksum_set(lro->m_head, 0, 0, 0,
122 0, HCK_IPV4_HDRCKSUM_OK | HCK_FULLCKSUM_OK);
123
124 mbl->cnt += lro->append_cnt;
125 myri10ge_mbl_append(ss, mbl, lro->m_head);
126 MYRI10GE_SLICE_STAT_INC(lro_flushed);
127 MYRI10GE_SLICE_STAT_ADD(lro_queued, lro->append_cnt + 1);
128 lro->m_head = NULL;
129 lro->timestamp = 0;
130 lro->append_cnt = 0;
131 lro->next = ss->lro_free;
132 ss->lro_free = lro;
133 }
134
135 int
myri10ge_lro_rx(struct myri10ge_slice_state * ss,mblk_t * m_head,uint32_t csum,struct myri10ge_mblk_list * mbl)136 myri10ge_lro_rx(struct myri10ge_slice_state *ss, mblk_t *m_head,
137 uint32_t csum, struct myri10ge_mblk_list *mbl)
138 {
139 struct ether_header *eh;
140 struct ip *ip;
141 struct tcphdr *tcp;
142 uint32_t *ts_ptr;
143 struct lro_entry *lro, *curr;
144 int hlen, ip_len, tcp_hdr_len, tcp_data_len;
145 int opt_bytes, trim;
146 int tot_len = MBLKL(m_head);
147 uint32_t seq, tmp_csum;
148
149 eh = (struct ether_header *)(void *)m_head->b_rptr;
150 if (eh->ether_type != htons(ETHERTYPE_IP))
151 return (EINVAL);
152 ip = (struct ip *)(void *)(eh + 1);
153 if (ip->ip_p != IPPROTO_TCP)
154 return (EINVAL);
155
156 /* ensure there are no options */
157 if ((ip->ip_hl << 2) != sizeof (*ip))
158 return (EINVAL);
159
160 /* .. and the packet is not fragmented */
161 if (ip->ip_off & htons(IP_MF|IP_OFFMASK))
162 return (EINVAL);
163
164 /* verify that the IP header checksum is correct */
165 tmp_csum = myri10ge_csum_generic((uint16_t *)ip, sizeof (*ip));
166 if (unlikely((tmp_csum ^ 0xffff) != 0)) {
167 MYRI10GE_SLICE_STAT_INC(lro_bad_csum);
168 return (EINVAL);
169 }
170
171 /* find the TCP header */
172 tcp = (struct tcphdr *)(ip + 1);
173
174 /* ensure no bits set besides ack or psh */
175 if ((tcp->th_flags & ~(TH_ACK | TH_PUSH)) != 0)
176 return (EINVAL);
177
178 /*
179 * check for timestamps. Since the only option we handle are
180 * timestamps, we only have to handle the simple case of
181 * aligned timestamps
182 */
183
184 opt_bytes = (tcp->th_off << 2) - sizeof (*tcp);
185 tcp_hdr_len = sizeof (*tcp) + opt_bytes;
186 ts_ptr = (uint32_t *)(tcp + 1);
187 if (opt_bytes != 0) {
188 if (unlikely(opt_bytes != TCPOLEN_TSTAMP_APPA) ||
189 (*ts_ptr != ntohl(TCPOPT_NOP<<24|TCPOPT_NOP<<16|
190 TCPOPT_TIMESTAMP<<8|TCPOLEN_TIMESTAMP)))
191 return (EINVAL);
192 }
193
194 ip_len = ntohs(ip->ip_len);
195 tcp_data_len = ip_len - (tcp->th_off << 2) - sizeof (*ip);
196
197 /*
198 * If frame is padded beyond the end of the IP packet,
199 * then we must trim the extra bytes off the end.
200 */
201 trim = tot_len - (ip_len + ETHERNET_HEADER_SIZE);
202 if (trim != 0) {
203 if (trim < 0) {
204 /* truncated packet */
205 return (EINVAL);
206 }
207 m_head->b_wptr -= trim;
208 tot_len -= trim;
209 }
210
211 /* Verify TCP checksum */
212 csum = ntohs((uint16_t)csum);
213 tmp_csum = csum + myri10ge_in_pseudo(ip->ip_src.s_addr,
214 ip->ip_dst.s_addr, htons(tcp_hdr_len + tcp_data_len + IPPROTO_TCP));
215 tmp_csum = (tmp_csum & 0xffff) + (tmp_csum >> 16);
216 tmp_csum = (tmp_csum & 0xffff) + (tmp_csum >> 16);
217 if (tmp_csum != 0xffff) {
218 MYRI10GE_SLICE_STAT_INC(lro_bad_csum);
219 return (EINVAL);
220 }
221
222 hlen = ip_len + ETHERNET_HEADER_SIZE - tcp_data_len;
223 seq = ntohl(tcp->th_seq);
224
225 for (lro = ss->lro_active; lro != NULL; lro = lro->next) {
226 if (lro->source_port == tcp->th_sport &&
227 lro->dest_port == tcp->th_dport &&
228 lro->source_ip == ip->ip_src.s_addr &&
229 lro->dest_ip == ip->ip_dst.s_addr) {
230 /* Try to append it */
231
232 if (unlikely(seq != lro->next_seq)) {
233 /* out of order packet */
234 if (ss->lro_active == lro) {
235 ss->lro_active = lro->next;
236 } else {
237 curr = ss->lro_active;
238 while (curr->next != lro)
239 curr = curr->next;
240 curr->next = lro->next;
241 }
242 myri10ge_lro_flush(ss, lro, mbl);
243 return (EINVAL);
244 }
245
246 if (opt_bytes) {
247 uint32_t tsval = ntohl(*(ts_ptr + 1));
248 /* make sure timestamp values are increasing */
249 if (unlikely(lro->tsval > tsval ||
250 *(ts_ptr + 2) == 0)) {
251 return (-8);
252 }
253 lro->tsval = tsval;
254 lro->tsecr = *(ts_ptr + 2);
255 }
256
257 lro->next_seq += tcp_data_len;
258 lro->ack_seq = tcp->th_ack;
259 lro->window = tcp->th_win;
260 lro->flags |= tcp->th_flags;
261 lro->append_cnt++;
262 if (tcp_data_len == 0) {
263 freeb(m_head);
264 return (0);
265 }
266 /*
267 * subtract off the checksum of the tcp header
268 * from the hardware checksum, and add it to
269 * the stored tcp data checksum. Byteswap
270 * the checksum if the total length so far is
271 * odd
272 */
273 tmp_csum = myri10ge_csum_generic((uint16_t *)tcp,
274 tcp_hdr_len);
275 csum = csum + (tmp_csum ^ 0xffff);
276 csum = (csum & 0xffff) + (csum >> 16);
277 csum = (csum & 0xffff) + (csum >> 16);
278 if (lro->len & 0x1) {
279 /* Odd number of bytes so far, flip bytes */
280 csum = ((csum << 8) | (csum >> 8)) & 0xffff;
281 }
282 csum = csum + lro->data_csum;
283 csum = (csum & 0xffff) + (csum >> 16);
284 csum = (csum & 0xffff) + (csum >> 16);
285 lro->data_csum = csum;
286
287 lro->len += tcp_data_len;
288
289 /*
290 * adjust mblk so that rptr points to
291 * the first byte of the payload
292 */
293 m_head->b_rptr += hlen;
294 /* append mbuf chain */
295 lro->m_tail->b_cont = m_head;
296 /* advance the last pointer */
297 lro->m_tail = m_head;
298 /* flush packet if required */
299 if (lro->len > (65535 - myri10ge_mtu) ||
300 (lro->append_cnt + 1) == myri10ge_lro_max_aggr) {
301 if (ss->lro_active == lro) {
302 ss->lro_active = lro->next;
303 } else {
304 curr = ss->lro_active;
305 while (curr->next != lro)
306 curr = curr->next;
307 curr->next = lro->next;
308 }
309 myri10ge_lro_flush(ss, lro, mbl);
310 }
311 return (0);
312 }
313 }
314
315 if (ss->lro_free == NULL)
316 return (ENOMEM);
317
318 /* start a new chain */
319 lro = ss->lro_free;
320 ss->lro_free = lro->next;
321 lro->next = ss->lro_active;
322 ss->lro_active = lro;
323 lro->source_port = tcp->th_sport;
324 lro->dest_port = tcp->th_dport;
325 lro->source_ip = ip->ip_src.s_addr;
326 lro->dest_ip = ip->ip_dst.s_addr;
327 lro->next_seq = seq + tcp_data_len;
328 lro->mss = (uint16_t)tcp_data_len;
329 lro->ack_seq = tcp->th_ack;
330 lro->window = tcp->th_win;
331 lro->flags = tcp->th_flags;
332
333 /*
334 * save the checksum of just the TCP payload by
335 * subtracting off the checksum of the TCP header from
336 * the entire hardware checksum
337 * Since IP header checksum is correct, checksum over
338 * the IP header is -0. Substracting -0 is unnecessary.
339 */
340 tmp_csum = myri10ge_csum_generic((uint16_t *)tcp, tcp_hdr_len);
341 csum = csum + (tmp_csum ^ 0xffff);
342 csum = (csum & 0xffff) + (csum >> 16);
343 csum = (csum & 0xffff) + (csum >> 16);
344 lro->data_csum = csum;
345 lro->ip = ip;
346
347 /* record timestamp if it is present */
348 if (opt_bytes) {
349 lro->timestamp = 1;
350 lro->tsval = ntohl(*(ts_ptr + 1));
351 lro->tsecr = *(ts_ptr + 2);
352 }
353 lro->len = tot_len;
354 lro->m_head = m_head;
355 lro->m_tail = m_head;
356 return (0);
357 }
358
359 /*
360 * This file uses MyriGE driver indentation.
361 *
362 * Local Variables:
363 * c-file-style:"sun"
364 * tab-width:8
365 * End:
366 */
367