1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2012 The FreeBSD Foundation
5 *
6 * This software was developed by Edward Tomasz Napierala under sponsorship
7 * from the FreeBSD Foundation.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 *
30 */
31
32 /*
33 * Software implementation of iSCSI Common Layer kobj(9) interface.
34 */
35
36 #include <sys/param.h>
37 #include <sys/bio.h>
38 #include <sys/capsicum.h>
39 #include <sys/condvar.h>
40 #include <sys/conf.h>
41 #include <sys/gsb_crc32.h>
42 #include <sys/file.h>
43 #include <sys/kernel.h>
44 #include <sys/kthread.h>
45 #include <sys/lock.h>
46 #include <sys/mbuf.h>
47 #include <sys/mutex.h>
48 #include <sys/module.h>
49 #include <sys/protosw.h>
50 #include <sys/socket.h>
51 #include <sys/socketvar.h>
52 #include <sys/sysctl.h>
53 #include <sys/systm.h>
54 #include <sys/sx.h>
55 #include <sys/uio.h>
56 #include <vm/uma.h>
57 #include <vm/vm_page.h>
58 #include <netinet/in.h>
59 #include <netinet/tcp.h>
60
61 #include <dev/iscsi/icl.h>
62 #include <dev/iscsi/iscsi_proto.h>
63 #include <icl_conn_if.h>
64
65 #define ICL_CONN_STATE_BHS 1
66 #define ICL_CONN_STATE_AHS 2
67 #define ICL_CONN_STATE_HEADER_DIGEST 3
68 #define ICL_CONN_STATE_DATA 4
69 #define ICL_CONN_STATE_DATA_DIGEST 5
70
71 struct icl_soft_conn {
72 struct icl_conn ic;
73
74 /* soft specific stuff goes here. */
75 STAILQ_HEAD(, icl_pdu) to_send;
76 struct cv send_cv;
77 struct cv receive_cv;
78 struct icl_pdu *receive_pdu;
79 size_t receive_len;
80 int receive_state;
81 bool receive_running;
82 bool check_send_space;
83 bool send_running;
84 };
85
86 struct icl_soft_pdu {
87 struct icl_pdu ip;
88
89 /* soft specific stuff goes here. */
90 u_int ref_cnt;
91 icl_pdu_cb cb;
92 int error;
93 };
94
95 SYSCTL_NODE(_kern_icl, OID_AUTO, soft, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
96 "Software iSCSI");
97 static int coalesce = 1;
98 SYSCTL_INT(_kern_icl_soft, OID_AUTO, coalesce, CTLFLAG_RWTUN,
99 &coalesce, 0, "Try to coalesce PDUs before sending");
100 static int partial_receive_len = 256 * 1024;
101 SYSCTL_INT(_kern_icl_soft, OID_AUTO, partial_receive_len, CTLFLAG_RWTUN,
102 &partial_receive_len, 0, "Minimum read size for partially received "
103 "data segment");
104 static int max_data_segment_length = 256 * 1024;
105 SYSCTL_INT(_kern_icl_soft, OID_AUTO, max_data_segment_length, CTLFLAG_RWTUN,
106 &max_data_segment_length, 0, "Maximum data segment length");
107 static int first_burst_length = 1024 * 1024;
108 SYSCTL_INT(_kern_icl_soft, OID_AUTO, first_burst_length, CTLFLAG_RWTUN,
109 &first_burst_length, 0, "First burst length");
110 static int max_burst_length = 1024 * 1024;
111 SYSCTL_INT(_kern_icl_soft, OID_AUTO, max_burst_length, CTLFLAG_RWTUN,
112 &max_burst_length, 0, "Maximum burst length");
113 static int sendspace = 1536 * 1024;
114 SYSCTL_INT(_kern_icl_soft, OID_AUTO, sendspace, CTLFLAG_RWTUN,
115 &sendspace, 0, "Default send socket buffer size");
116 static int recvspace = 1536 * 1024;
117 SYSCTL_INT(_kern_icl_soft, OID_AUTO, recvspace, CTLFLAG_RWTUN,
118 &recvspace, 0, "Default receive socket buffer size");
119
120 static MALLOC_DEFINE(M_ICL_SOFT, "icl_soft", "iSCSI software backend");
121 static uma_zone_t icl_soft_pdu_zone;
122
123 static volatile u_int icl_ncons;
124
125 STAILQ_HEAD(icl_pdu_stailq, icl_pdu);
126
127 static icl_conn_new_pdu_t icl_soft_conn_new_pdu;
128 static icl_conn_pdu_free_t icl_soft_conn_pdu_free;
129 static icl_conn_pdu_data_segment_length_t
130 icl_soft_conn_pdu_data_segment_length;
131 static icl_conn_pdu_append_bio_t icl_soft_conn_pdu_append_bio;
132 static icl_conn_pdu_append_data_t icl_soft_conn_pdu_append_data;
133 static icl_conn_pdu_get_bio_t icl_soft_conn_pdu_get_bio;
134 static icl_conn_pdu_get_data_t icl_soft_conn_pdu_get_data;
135 static icl_conn_pdu_queue_t icl_soft_conn_pdu_queue;
136 static icl_conn_pdu_queue_cb_t icl_soft_conn_pdu_queue_cb;
137 static icl_conn_handoff_t icl_soft_conn_handoff;
138 static icl_conn_free_t icl_soft_conn_free;
139 static icl_conn_close_t icl_soft_conn_close;
140 static icl_conn_task_setup_t icl_soft_conn_task_setup;
141 static icl_conn_task_done_t icl_soft_conn_task_done;
142 static icl_conn_transfer_setup_t icl_soft_conn_transfer_setup;
143 static icl_conn_transfer_done_t icl_soft_conn_transfer_done;
144 #ifdef ICL_KERNEL_PROXY
145 static icl_conn_connect_t icl_soft_conn_connect;
146 #endif
147
148 static kobj_method_t icl_soft_methods[] = {
149 KOBJMETHOD(icl_conn_new_pdu, icl_soft_conn_new_pdu),
150 KOBJMETHOD(icl_conn_pdu_free, icl_soft_conn_pdu_free),
151 KOBJMETHOD(icl_conn_pdu_data_segment_length,
152 icl_soft_conn_pdu_data_segment_length),
153 KOBJMETHOD(icl_conn_pdu_append_bio, icl_soft_conn_pdu_append_bio),
154 KOBJMETHOD(icl_conn_pdu_append_data, icl_soft_conn_pdu_append_data),
155 KOBJMETHOD(icl_conn_pdu_get_bio, icl_soft_conn_pdu_get_bio),
156 KOBJMETHOD(icl_conn_pdu_get_data, icl_soft_conn_pdu_get_data),
157 KOBJMETHOD(icl_conn_pdu_queue, icl_soft_conn_pdu_queue),
158 KOBJMETHOD(icl_conn_pdu_queue_cb, icl_soft_conn_pdu_queue_cb),
159 KOBJMETHOD(icl_conn_handoff, icl_soft_conn_handoff),
160 KOBJMETHOD(icl_conn_free, icl_soft_conn_free),
161 KOBJMETHOD(icl_conn_close, icl_soft_conn_close),
162 KOBJMETHOD(icl_conn_task_setup, icl_soft_conn_task_setup),
163 KOBJMETHOD(icl_conn_task_done, icl_soft_conn_task_done),
164 KOBJMETHOD(icl_conn_transfer_setup, icl_soft_conn_transfer_setup),
165 KOBJMETHOD(icl_conn_transfer_done, icl_soft_conn_transfer_done),
166 #ifdef ICL_KERNEL_PROXY
167 KOBJMETHOD(icl_conn_connect, icl_soft_conn_connect),
168 #endif
169 { 0, 0 }
170 };
171
172 DEFINE_CLASS(icl_soft, icl_soft_methods, sizeof(struct icl_soft_conn));
173
174 static void
icl_conn_fail(struct icl_conn * ic)175 icl_conn_fail(struct icl_conn *ic)
176 {
177 if (ic->ic_socket == NULL)
178 return;
179
180 /*
181 * XXX
182 */
183 ic->ic_socket->so_error = EDOOFUS;
184 (ic->ic_error)(ic);
185 }
186
187 static void
icl_soft_conn_pdu_free(struct icl_conn * ic,struct icl_pdu * ip)188 icl_soft_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip)
189 {
190 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip;
191
192 KASSERT(isp->ref_cnt == 0, ("freeing active PDU"));
193 m_freem(ip->ip_bhs_mbuf);
194 m_freem(ip->ip_ahs_mbuf);
195 m_freem(ip->ip_data_mbuf);
196 uma_zfree(icl_soft_pdu_zone, isp);
197 #ifdef DIAGNOSTIC
198 refcount_release(&ic->ic_outstanding_pdus);
199 #endif
200 }
201
202 static void
icl_soft_pdu_call_cb(struct icl_pdu * ip)203 icl_soft_pdu_call_cb(struct icl_pdu *ip)
204 {
205 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip;
206
207 if (isp->cb != NULL)
208 isp->cb(ip, isp->error);
209 #ifdef DIAGNOSTIC
210 refcount_release(&ip->ip_conn->ic_outstanding_pdus);
211 #endif
212 uma_zfree(icl_soft_pdu_zone, isp);
213 }
214
215 static void
icl_soft_pdu_done(struct icl_pdu * ip,int error)216 icl_soft_pdu_done(struct icl_pdu *ip, int error)
217 {
218 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip;
219
220 if (error != 0)
221 isp->error = error;
222
223 m_freem(ip->ip_bhs_mbuf);
224 ip->ip_bhs_mbuf = NULL;
225 m_freem(ip->ip_ahs_mbuf);
226 ip->ip_ahs_mbuf = NULL;
227 m_freem(ip->ip_data_mbuf);
228 ip->ip_data_mbuf = NULL;
229
230 if (atomic_fetchadd_int(&isp->ref_cnt, -1) == 1)
231 icl_soft_pdu_call_cb(ip);
232 }
233
234 static void
icl_soft_mbuf_done(struct mbuf * mb)235 icl_soft_mbuf_done(struct mbuf *mb)
236 {
237 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)mb->m_ext.ext_arg1;
238
239 icl_soft_pdu_call_cb(&isp->ip);
240 }
241
242 /*
243 * Allocate icl_pdu with empty BHS to fill up by the caller.
244 */
245 struct icl_pdu *
icl_soft_conn_new_pdu(struct icl_conn * ic,int flags)246 icl_soft_conn_new_pdu(struct icl_conn *ic, int flags)
247 {
248 struct icl_soft_pdu *isp;
249 struct icl_pdu *ip;
250
251 #ifdef DIAGNOSTIC
252 refcount_acquire(&ic->ic_outstanding_pdus);
253 #endif
254 isp = uma_zalloc(icl_soft_pdu_zone, flags | M_ZERO);
255 if (isp == NULL) {
256 ICL_WARN("failed to allocate soft PDU");
257 #ifdef DIAGNOSTIC
258 refcount_release(&ic->ic_outstanding_pdus);
259 #endif
260 return (NULL);
261 }
262 ip = &isp->ip;
263 ip->ip_conn = ic;
264
265 CTASSERT(sizeof(struct iscsi_bhs) <= MHLEN);
266 ip->ip_bhs_mbuf = m_gethdr(flags, MT_DATA);
267 if (ip->ip_bhs_mbuf == NULL) {
268 ICL_WARN("failed to allocate BHS mbuf");
269 icl_soft_conn_pdu_free(ic, ip);
270 return (NULL);
271 }
272 ip->ip_bhs = mtod(ip->ip_bhs_mbuf, struct iscsi_bhs *);
273 memset(ip->ip_bhs, 0, sizeof(struct iscsi_bhs));
274 ip->ip_bhs_mbuf->m_len = sizeof(struct iscsi_bhs);
275
276 return (ip);
277 }
278
279 static int
icl_pdu_ahs_length(const struct icl_pdu * request)280 icl_pdu_ahs_length(const struct icl_pdu *request)
281 {
282
283 return (request->ip_bhs->bhs_total_ahs_len * 4);
284 }
285
286 static size_t
icl_pdu_data_segment_length(const struct icl_pdu * request)287 icl_pdu_data_segment_length(const struct icl_pdu *request)
288 {
289 uint32_t len = 0;
290
291 len += request->ip_bhs->bhs_data_segment_len[0];
292 len <<= 8;
293 len += request->ip_bhs->bhs_data_segment_len[1];
294 len <<= 8;
295 len += request->ip_bhs->bhs_data_segment_len[2];
296
297 return (len);
298 }
299
300 size_t
icl_soft_conn_pdu_data_segment_length(struct icl_conn * ic,const struct icl_pdu * request)301 icl_soft_conn_pdu_data_segment_length(struct icl_conn *ic,
302 const struct icl_pdu *request)
303 {
304
305 return (icl_pdu_data_segment_length(request));
306 }
307
308 static void
icl_pdu_set_data_segment_length(struct icl_pdu * response,uint32_t len)309 icl_pdu_set_data_segment_length(struct icl_pdu *response, uint32_t len)
310 {
311
312 response->ip_bhs->bhs_data_segment_len[2] = len;
313 response->ip_bhs->bhs_data_segment_len[1] = len >> 8;
314 response->ip_bhs->bhs_data_segment_len[0] = len >> 16;
315 }
316
317 static size_t
icl_pdu_padding(const struct icl_pdu * ip)318 icl_pdu_padding(const struct icl_pdu *ip)
319 {
320
321 if ((ip->ip_data_len % 4) != 0)
322 return (4 - (ip->ip_data_len % 4));
323
324 return (0);
325 }
326
327 static size_t
icl_pdu_size(const struct icl_pdu * response)328 icl_pdu_size(const struct icl_pdu *response)
329 {
330 size_t len;
331
332 KASSERT(response->ip_ahs_len == 0, ("responding with AHS"));
333
334 len = sizeof(struct iscsi_bhs) + response->ip_data_len +
335 icl_pdu_padding(response);
336 if (response->ip_conn->ic_header_crc32c)
337 len += ISCSI_HEADER_DIGEST_SIZE;
338 if (response->ip_data_len != 0 && response->ip_conn->ic_data_crc32c)
339 len += ISCSI_DATA_DIGEST_SIZE;
340
341 return (len);
342 }
343
344 static void
icl_soft_receive_buf(struct mbuf ** r,size_t * rs,void * buf,size_t s)345 icl_soft_receive_buf(struct mbuf **r, size_t *rs, void *buf, size_t s)
346 {
347
348 m_copydata(*r, 0, s, buf);
349 m_adj(*r, s);
350 while ((*r) != NULL && (*r)->m_len == 0)
351 *r = m_free(*r);
352 *rs -= s;
353 }
354
355 static void
icl_pdu_receive_ahs(struct icl_pdu * request,struct mbuf ** r,size_t * rs)356 icl_pdu_receive_ahs(struct icl_pdu *request, struct mbuf **r, size_t *rs)
357 {
358
359 request->ip_ahs_len = icl_pdu_ahs_length(request);
360 if (request->ip_ahs_len == 0)
361 return;
362
363 request->ip_ahs_mbuf = *r;
364 *r = m_split(request->ip_ahs_mbuf, request->ip_ahs_len, M_WAITOK);
365 *rs -= request->ip_ahs_len;
366 }
367
368 static int
mbuf_crc32c_helper(void * arg,void * data,u_int len)369 mbuf_crc32c_helper(void *arg, void *data, u_int len)
370 {
371 uint32_t *digestp = arg;
372
373 *digestp = calculate_crc32c(*digestp, data, len);
374 return (0);
375 }
376
377 static uint32_t
icl_mbuf_to_crc32c(struct mbuf * m0,size_t len)378 icl_mbuf_to_crc32c(struct mbuf *m0, size_t len)
379 {
380 uint32_t digest = 0xffffffff;
381
382 m_apply(m0, 0, len, mbuf_crc32c_helper, &digest);
383 digest = digest ^ 0xffffffff;
384
385 return (digest);
386 }
387
388 static int
icl_pdu_check_header_digest(struct icl_pdu * request,struct mbuf ** r,size_t * rs)389 icl_pdu_check_header_digest(struct icl_pdu *request, struct mbuf **r, size_t *rs)
390 {
391 uint32_t received_digest, valid_digest;
392
393 if (request->ip_conn->ic_header_crc32c == false)
394 return (0);
395
396 CTASSERT(sizeof(received_digest) == ISCSI_HEADER_DIGEST_SIZE);
397 icl_soft_receive_buf(r, rs, &received_digest, ISCSI_HEADER_DIGEST_SIZE);
398
399 /* Temporary attach AHS to BHS to calculate header digest. */
400 request->ip_bhs_mbuf->m_next = request->ip_ahs_mbuf;
401 valid_digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf, ISCSI_BHS_SIZE);
402 request->ip_bhs_mbuf->m_next = NULL;
403 if (received_digest != valid_digest) {
404 ICL_WARN("header digest check failed; got 0x%x, "
405 "should be 0x%x", received_digest, valid_digest);
406 return (-1);
407 }
408
409 return (0);
410 }
411
412 /*
413 * Return the number of bytes that should be waiting in the receive socket
414 * before icl_pdu_receive_data_segment() gets called.
415 */
416 static size_t
icl_pdu_data_segment_receive_len(const struct icl_pdu * request)417 icl_pdu_data_segment_receive_len(const struct icl_pdu *request)
418 {
419 size_t len;
420
421 len = icl_pdu_data_segment_length(request);
422 if (len == 0)
423 return (0);
424
425 /*
426 * Account for the parts of data segment already read from
427 * the socket buffer.
428 */
429 KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len"));
430 len -= request->ip_data_len;
431
432 /*
433 * Don't always wait for the full data segment to be delivered
434 * to the socket; this might badly affect performance due to
435 * TCP window scaling.
436 */
437 if (len > partial_receive_len) {
438 #if 0
439 ICL_DEBUG("need %zd bytes of data, limiting to %zd",
440 len, partial_receive_len));
441 #endif
442 len = partial_receive_len;
443
444 return (len);
445 }
446
447 /*
448 * Account for padding. Note that due to the way code is written,
449 * the icl_pdu_receive_data_segment() must always receive padding
450 * along with the last part of data segment, because it would be
451 * impossible to tell whether we've already received the full data
452 * segment including padding, or without it.
453 */
454 if ((len % 4) != 0)
455 len += 4 - (len % 4);
456
457 #if 0
458 ICL_DEBUG("need %zd bytes of data", len));
459 #endif
460
461 return (len);
462 }
463
464 static int
icl_pdu_receive_data_segment(struct icl_pdu * request,struct mbuf ** r,size_t * rs,bool * more_neededp)465 icl_pdu_receive_data_segment(struct icl_pdu *request, struct mbuf **r,
466 size_t *rs, bool *more_neededp)
467 {
468 struct icl_soft_conn *isc;
469 size_t len, padding = 0;
470 struct mbuf *m;
471
472 isc = (struct icl_soft_conn *)request->ip_conn;
473
474 *more_neededp = false;
475 isc->receive_len = 0;
476
477 len = icl_pdu_data_segment_length(request);
478 if (len == 0)
479 return (0);
480
481 if ((len % 4) != 0)
482 padding = 4 - (len % 4);
483
484 /*
485 * Account for already received parts of data segment.
486 */
487 KASSERT(len > request->ip_data_len, ("len <= request->ip_data_len"));
488 len -= request->ip_data_len;
489
490 if (len + padding > *rs) {
491 /*
492 * Not enough data in the socket buffer. Receive as much
493 * as we can. Don't receive padding, since, obviously, it's
494 * not the end of data segment yet.
495 */
496 #if 0
497 ICL_DEBUG("limited from %zd to %zd",
498 len + padding, *rs - padding));
499 #endif
500 len = *rs - padding;
501 *more_neededp = true;
502 padding = 0;
503 }
504
505 /*
506 * Must not try to receive padding without at least one byte
507 * of actual data segment.
508 */
509 if (len > 0) {
510 m = *r;
511 *r = m_split(m, len + padding, M_WAITOK);
512 *rs -= len + padding;
513
514 if (request->ip_data_mbuf == NULL)
515 request->ip_data_mbuf = m;
516 else
517 m_cat(request->ip_data_mbuf, m);
518
519 request->ip_data_len += len;
520 } else
521 ICL_DEBUG("len 0");
522
523 if (*more_neededp)
524 isc->receive_len = icl_pdu_data_segment_receive_len(request);
525
526 return (0);
527 }
528
529 static int
icl_pdu_check_data_digest(struct icl_pdu * request,struct mbuf ** r,size_t * rs)530 icl_pdu_check_data_digest(struct icl_pdu *request, struct mbuf **r, size_t *rs)
531 {
532 uint32_t received_digest, valid_digest;
533
534 if (request->ip_conn->ic_data_crc32c == false)
535 return (0);
536
537 if (request->ip_data_len == 0)
538 return (0);
539
540 CTASSERT(sizeof(received_digest) == ISCSI_DATA_DIGEST_SIZE);
541 icl_soft_receive_buf(r, rs, &received_digest, ISCSI_DATA_DIGEST_SIZE);
542
543 /*
544 * Note that ip_data_mbuf also contains padding; since digest
545 * calculation is supposed to include that, we iterate over
546 * the entire ip_data_mbuf chain, not just ip_data_len bytes of it.
547 */
548 valid_digest = icl_mbuf_to_crc32c(request->ip_data_mbuf,
549 roundup2(request->ip_data_len, 4));
550 if (received_digest != valid_digest) {
551 ICL_WARN("data digest check failed; got 0x%x, "
552 "should be 0x%x", received_digest, valid_digest);
553 return (-1);
554 }
555
556 return (0);
557 }
558
559 /*
560 * Somewhat contrary to the name, this attempts to receive only one
561 * "part" of PDU at a time; call it repeatedly until it returns non-NULL.
562 */
563 static struct icl_pdu *
icl_conn_receive_pdu(struct icl_soft_conn * isc,struct mbuf ** r,size_t * rs)564 icl_conn_receive_pdu(struct icl_soft_conn *isc, struct mbuf **r, size_t *rs)
565 {
566 struct icl_conn *ic = &isc->ic;
567 struct icl_pdu *request;
568 size_t len;
569 int error = 0;
570 bool more_needed;
571
572 if (isc->receive_state == ICL_CONN_STATE_BHS) {
573 KASSERT(isc->receive_pdu == NULL,
574 ("isc->receive_pdu != NULL"));
575 request = icl_soft_conn_new_pdu(ic, M_NOWAIT);
576 if (request == NULL) {
577 ICL_DEBUG("failed to allocate PDU; "
578 "dropping connection");
579 icl_conn_fail(ic);
580 return (NULL);
581 }
582 isc->receive_pdu = request;
583 } else {
584 KASSERT(isc->receive_pdu != NULL,
585 ("isc->receive_pdu == NULL"));
586 request = isc->receive_pdu;
587 }
588
589 switch (isc->receive_state) {
590 case ICL_CONN_STATE_BHS:
591 //ICL_DEBUG("receiving BHS");
592 icl_soft_receive_buf(r, rs, request->ip_bhs,
593 sizeof(struct iscsi_bhs));
594
595 /*
596 * We don't enforce any limit for AHS length;
597 * its length is stored in 8 bit field.
598 */
599
600 len = icl_pdu_data_segment_length(request);
601 if (len > ic->ic_max_recv_data_segment_length) {
602 ICL_WARN("received data segment "
603 "length %zd is larger than negotiated; "
604 "dropping connection", len);
605 error = EINVAL;
606 break;
607 }
608
609 isc->receive_state = ICL_CONN_STATE_AHS;
610 isc->receive_len = icl_pdu_ahs_length(request);
611 break;
612
613 case ICL_CONN_STATE_AHS:
614 //ICL_DEBUG("receiving AHS");
615 icl_pdu_receive_ahs(request, r, rs);
616 isc->receive_state = ICL_CONN_STATE_HEADER_DIGEST;
617 if (ic->ic_header_crc32c == false)
618 isc->receive_len = 0;
619 else
620 isc->receive_len = ISCSI_HEADER_DIGEST_SIZE;
621 break;
622
623 case ICL_CONN_STATE_HEADER_DIGEST:
624 //ICL_DEBUG("receiving header digest");
625 error = icl_pdu_check_header_digest(request, r, rs);
626 if (error != 0) {
627 ICL_DEBUG("header digest failed; "
628 "dropping connection");
629 break;
630 }
631
632 isc->receive_state = ICL_CONN_STATE_DATA;
633 isc->receive_len = icl_pdu_data_segment_receive_len(request);
634 break;
635
636 case ICL_CONN_STATE_DATA:
637 //ICL_DEBUG("receiving data segment");
638 error = icl_pdu_receive_data_segment(request, r, rs,
639 &more_needed);
640 if (error != 0) {
641 ICL_DEBUG("failed to receive data segment;"
642 "dropping connection");
643 break;
644 }
645
646 if (more_needed)
647 break;
648
649 isc->receive_state = ICL_CONN_STATE_DATA_DIGEST;
650 if (request->ip_data_len == 0 || ic->ic_data_crc32c == false)
651 isc->receive_len = 0;
652 else
653 isc->receive_len = ISCSI_DATA_DIGEST_SIZE;
654 break;
655
656 case ICL_CONN_STATE_DATA_DIGEST:
657 //ICL_DEBUG("receiving data digest");
658 error = icl_pdu_check_data_digest(request, r, rs);
659 if (error != 0) {
660 ICL_DEBUG("data digest failed; "
661 "dropping connection");
662 break;
663 }
664
665 /*
666 * We've received complete PDU; reset the receive state machine
667 * and return the PDU.
668 */
669 isc->receive_state = ICL_CONN_STATE_BHS;
670 isc->receive_len = sizeof(struct iscsi_bhs);
671 isc->receive_pdu = NULL;
672 return (request);
673
674 default:
675 panic("invalid receive_state %d\n", isc->receive_state);
676 }
677
678 if (error != 0) {
679 /*
680 * Don't free the PDU; it's pointed to by isc->receive_pdu
681 * and will get freed in icl_soft_conn_close().
682 */
683 icl_conn_fail(ic);
684 }
685
686 return (NULL);
687 }
688
689 static void
icl_conn_receive_pdus(struct icl_soft_conn * isc,struct mbuf ** r,size_t * rs)690 icl_conn_receive_pdus(struct icl_soft_conn *isc, struct mbuf **r, size_t *rs)
691 {
692 struct icl_conn *ic = &isc->ic;
693 struct icl_pdu *response;
694
695 for (;;) {
696 if (ic->ic_disconnecting)
697 return;
698
699 /*
700 * Loop until we have a complete PDU or there is not enough
701 * data in the socket buffer.
702 */
703 if (*rs < isc->receive_len) {
704 #if 0
705 ICL_DEBUG("not enough data; have %zd, need %zd",
706 *rs, isc->receive_len);
707 #endif
708 return;
709 }
710
711 response = icl_conn_receive_pdu(isc, r, rs);
712 if (response == NULL)
713 continue;
714
715 if (response->ip_ahs_len > 0) {
716 ICL_WARN("received PDU with unsupported "
717 "AHS; opcode 0x%x; dropping connection",
718 response->ip_bhs->bhs_opcode);
719 icl_soft_conn_pdu_free(ic, response);
720 icl_conn_fail(ic);
721 return;
722 }
723
724 (ic->ic_receive)(response);
725 }
726 }
727
728 static void
icl_receive_thread(void * arg)729 icl_receive_thread(void *arg)
730 {
731 struct icl_soft_conn *isc = arg;
732 struct icl_conn *ic = &isc->ic;
733 size_t available, read = 0;
734 struct socket *so;
735 struct mbuf *m, *r = NULL;
736 struct uio uio;
737 int error, flags;
738
739 so = ic->ic_socket;
740
741 for (;;) {
742 SOCKBUF_LOCK(&so->so_rcv);
743 if (ic->ic_disconnecting) {
744 SOCKBUF_UNLOCK(&so->so_rcv);
745 break;
746 }
747
748 /*
749 * Set the low watermark, to be checked by
750 * soreadable() in icl_soupcall_receive()
751 * to avoid unnecessary wakeups until there
752 * is enough data received to read the PDU.
753 */
754 available = sbavail(&so->so_rcv);
755 if (read + available < isc->receive_len) {
756 so->so_rcv.sb_lowat = isc->receive_len - read;
757 cv_wait(&isc->receive_cv, SOCKBUF_MTX(&so->so_rcv));
758 so->so_rcv.sb_lowat = so->so_rcv.sb_hiwat + 1;
759 available = sbavail(&so->so_rcv);
760 }
761 SOCKBUF_UNLOCK(&so->so_rcv);
762
763 if (available == 0) {
764 if (so->so_error != 0) {
765 ICL_DEBUG("connection error %d; "
766 "dropping connection", so->so_error);
767 icl_conn_fail(ic);
768 break;
769 }
770 continue;
771 }
772
773 memset(&uio, 0, sizeof(uio));
774 uio.uio_resid = available;
775 flags = MSG_DONTWAIT;
776 error = soreceive(so, NULL, &uio, &m, NULL, &flags);
777 if (error != 0) {
778 ICL_DEBUG("soreceive error %d", error);
779 break;
780 }
781 if (uio.uio_resid != 0) {
782 m_freem(m);
783 ICL_DEBUG("short read");
784 break;
785 }
786 if (r)
787 m_cat(r, m);
788 else
789 r = m;
790 read += available;
791
792 icl_conn_receive_pdus(isc, &r, &read);
793 }
794
795 if (r)
796 m_freem(r);
797
798 ICL_CONN_LOCK(ic);
799 isc->receive_running = false;
800 cv_signal(&isc->send_cv);
801 ICL_CONN_UNLOCK(ic);
802 kthread_exit();
803 }
804
805 static int
icl_soupcall_receive(struct socket * so,void * arg,int waitflag)806 icl_soupcall_receive(struct socket *so, void *arg, int waitflag)
807 {
808 struct icl_soft_conn *isc;
809
810 if (!soreadable(so))
811 return (SU_OK);
812
813 isc = arg;
814 cv_signal(&isc->receive_cv);
815 return (SU_OK);
816 }
817
818 static int
icl_pdu_finalize(struct icl_pdu * request)819 icl_pdu_finalize(struct icl_pdu *request)
820 {
821 size_t padding, pdu_len;
822 uint32_t digest, zero = 0;
823 int ok;
824 struct icl_conn *ic;
825
826 ic = request->ip_conn;
827
828 icl_pdu_set_data_segment_length(request, request->ip_data_len);
829
830 pdu_len = icl_pdu_size(request);
831
832 if (ic->ic_header_crc32c) {
833 digest = icl_mbuf_to_crc32c(request->ip_bhs_mbuf,
834 ISCSI_BHS_SIZE);
835 ok = m_append(request->ip_bhs_mbuf, sizeof(digest),
836 (void *)&digest);
837 if (ok != 1) {
838 ICL_WARN("failed to append header digest");
839 return (1);
840 }
841 }
842
843 if (request->ip_data_len != 0) {
844 padding = icl_pdu_padding(request);
845 if (padding > 0) {
846 ok = m_append(request->ip_data_mbuf, padding,
847 (void *)&zero);
848 if (ok != 1) {
849 ICL_WARN("failed to append padding");
850 return (1);
851 }
852 }
853
854 if (ic->ic_data_crc32c) {
855 digest = icl_mbuf_to_crc32c(request->ip_data_mbuf,
856 roundup2(request->ip_data_len, 4));
857
858 ok = m_append(request->ip_data_mbuf, sizeof(digest),
859 (void *)&digest);
860 if (ok != 1) {
861 ICL_WARN("failed to append data digest");
862 return (1);
863 }
864 }
865
866 m_cat(request->ip_bhs_mbuf, request->ip_data_mbuf);
867 request->ip_data_mbuf = NULL;
868 }
869
870 request->ip_bhs_mbuf->m_pkthdr.len = pdu_len;
871
872 return (0);
873 }
874
875 static void
icl_conn_send_pdus(struct icl_soft_conn * isc,struct icl_pdu_stailq * queue)876 icl_conn_send_pdus(struct icl_soft_conn *isc, struct icl_pdu_stailq *queue)
877 {
878 struct icl_conn *ic = &isc->ic;
879 struct icl_pdu *request, *request2;
880 struct mbuf *m;
881 struct socket *so;
882 long available, size, size2;
883 #ifdef DEBUG_COALESCED
884 int coalesced;
885 #endif
886 int error;
887
888 ICL_CONN_LOCK_ASSERT_NOT(ic);
889
890 so = ic->ic_socket;
891
892 SOCKBUF_LOCK(&so->so_snd);
893 /*
894 * Check how much space do we have for transmit. We can't just
895 * call sosend() and retry when we get EWOULDBLOCK or EMSGSIZE,
896 * as it always frees the mbuf chain passed to it, even in case
897 * of error.
898 */
899 available = sbspace(&so->so_snd);
900 isc->check_send_space = false;
901
902 /*
903 * Notify the socket upcall that we don't need wakeups
904 * for the time being.
905 */
906 so->so_snd.sb_lowat = so->so_snd.sb_hiwat + 1;
907 SOCKBUF_UNLOCK(&so->so_snd);
908
909 while (!STAILQ_EMPTY(queue)) {
910 request = STAILQ_FIRST(queue);
911 size = icl_pdu_size(request);
912 if (available < size) {
913 /*
914 * Set the low watermark, to be checked by
915 * sowriteable() in icl_soupcall_send()
916 * to avoid unnecessary wakeups until there
917 * is enough space for the PDU to fit.
918 */
919 SOCKBUF_LOCK(&so->so_snd);
920 available = sbspace(&so->so_snd);
921 if (available < size) {
922 #if 1
923 ICL_DEBUG("no space to send; "
924 "have %ld, need %ld",
925 available, size);
926 #endif
927 so->so_snd.sb_lowat = max(size,
928 so->so_snd.sb_hiwat / 8);
929 SOCKBUF_UNLOCK(&so->so_snd);
930 return;
931 }
932 SOCKBUF_UNLOCK(&so->so_snd);
933 }
934 STAILQ_REMOVE_HEAD(queue, ip_next);
935 error = icl_pdu_finalize(request);
936 if (error != 0) {
937 ICL_DEBUG("failed to finalize PDU; "
938 "dropping connection");
939 icl_soft_pdu_done(request, EIO);
940 icl_conn_fail(ic);
941 return;
942 }
943 if (coalesce) {
944 m = request->ip_bhs_mbuf;
945 for (
946 #ifdef DEBUG_COALESCED
947 coalesced = 1
948 #endif
949 ; ;
950 #ifdef DEBUG_COALESCED
951 coalesced++
952 #endif
953 ) {
954 request2 = STAILQ_FIRST(queue);
955 if (request2 == NULL)
956 break;
957 size2 = icl_pdu_size(request2);
958 if (available < size + size2)
959 break;
960 STAILQ_REMOVE_HEAD(queue, ip_next);
961 error = icl_pdu_finalize(request2);
962 if (error != 0) {
963 ICL_DEBUG("failed to finalize PDU; "
964 "dropping connection");
965 icl_soft_pdu_done(request, EIO);
966 icl_soft_pdu_done(request2, EIO);
967 icl_conn_fail(ic);
968 return;
969 }
970 while (m->m_next)
971 m = m->m_next;
972 m_cat(m, request2->ip_bhs_mbuf);
973 request2->ip_bhs_mbuf = NULL;
974 request->ip_bhs_mbuf->m_pkthdr.len += size2;
975 size += size2;
976 icl_soft_pdu_done(request2, 0);
977 }
978 #ifdef DEBUG_COALESCED
979 if (coalesced > 1) {
980 ICL_DEBUG("coalesced %d PDUs into %ld bytes",
981 coalesced, size);
982 }
983 #endif
984 }
985 available -= size;
986 error = sosend(so, NULL, NULL, request->ip_bhs_mbuf,
987 NULL, MSG_DONTWAIT, curthread);
988 request->ip_bhs_mbuf = NULL; /* Sosend consumes the mbuf. */
989 if (error != 0) {
990 ICL_DEBUG("failed to send PDU, error %d; "
991 "dropping connection", error);
992 icl_soft_pdu_done(request, error);
993 icl_conn_fail(ic);
994 return;
995 }
996 icl_soft_pdu_done(request, 0);
997 }
998 }
999
1000 static void
icl_send_thread(void * arg)1001 icl_send_thread(void *arg)
1002 {
1003 struct icl_soft_conn *isc;
1004 struct icl_conn *ic;
1005 struct icl_pdu_stailq queue;
1006
1007 isc = arg;
1008 ic = &isc->ic;
1009
1010 STAILQ_INIT(&queue);
1011
1012 ICL_CONN_LOCK(ic);
1013 for (;;) {
1014 for (;;) {
1015 /*
1016 * Populate the local queue from the main one.
1017 * This way the icl_conn_send_pdus() can go through
1018 * all the queued PDUs without holding any locks.
1019 */
1020 if (STAILQ_EMPTY(&queue) || isc->check_send_space)
1021 STAILQ_CONCAT(&queue, &isc->to_send);
1022
1023 ICL_CONN_UNLOCK(ic);
1024 icl_conn_send_pdus(isc, &queue);
1025 ICL_CONN_LOCK(ic);
1026
1027 /*
1028 * The icl_soupcall_send() was called since the last
1029 * call to sbspace(); go around;
1030 */
1031 if (isc->check_send_space)
1032 continue;
1033
1034 /*
1035 * Local queue is empty, but we still have PDUs
1036 * in the main one; go around.
1037 */
1038 if (STAILQ_EMPTY(&queue) &&
1039 !STAILQ_EMPTY(&isc->to_send))
1040 continue;
1041
1042 /*
1043 * There might be some stuff in the local queue,
1044 * which didn't get sent due to not having enough send
1045 * space. Wait for socket upcall.
1046 */
1047 break;
1048 }
1049
1050 if (ic->ic_disconnecting) {
1051 //ICL_DEBUG("terminating");
1052 break;
1053 }
1054
1055 cv_wait(&isc->send_cv, ic->ic_lock);
1056 }
1057
1058 /*
1059 * We're exiting; move PDUs back to the main queue, so they can
1060 * get freed properly. At this point ordering doesn't matter.
1061 */
1062 STAILQ_CONCAT(&isc->to_send, &queue);
1063
1064 isc->send_running = false;
1065 cv_signal(&isc->send_cv);
1066 ICL_CONN_UNLOCK(ic);
1067 kthread_exit();
1068 }
1069
1070 static int
icl_soupcall_send(struct socket * so,void * arg,int waitflag)1071 icl_soupcall_send(struct socket *so, void *arg, int waitflag)
1072 {
1073 struct icl_soft_conn *isc;
1074 struct icl_conn *ic;
1075
1076 if (!sowriteable(so))
1077 return (SU_OK);
1078
1079 isc = arg;
1080 ic = &isc->ic;
1081
1082 ICL_CONN_LOCK(ic);
1083 isc->check_send_space = true;
1084 ICL_CONN_UNLOCK(ic);
1085
1086 cv_signal(&isc->send_cv);
1087
1088 return (SU_OK);
1089 }
1090
1091 static void
icl_soft_free_mext_pg(struct mbuf * m)1092 icl_soft_free_mext_pg(struct mbuf *m)
1093 {
1094 struct icl_soft_pdu *isp;
1095
1096 M_ASSERTEXTPG(m);
1097
1098 /*
1099 * Nothing to do for the pages; they are owned by the PDU /
1100 * I/O request.
1101 */
1102
1103 /* Drop reference on the PDU. */
1104 isp = m->m_ext.ext_arg1;
1105 if (atomic_fetchadd_int(&isp->ref_cnt, -1) == 1)
1106 icl_soft_pdu_call_cb(&isp->ip);
1107 }
1108
1109 static int
icl_soft_conn_pdu_append_bio(struct icl_conn * ic,struct icl_pdu * request,struct bio * bp,size_t offset,size_t len,int flags)1110 icl_soft_conn_pdu_append_bio(struct icl_conn *ic, struct icl_pdu *request,
1111 struct bio *bp, size_t offset, size_t len, int flags)
1112 {
1113 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)request;
1114 struct mbuf *m, *m_tail;
1115 vm_offset_t vaddr;
1116 size_t mtodo, page_offset, todo;
1117 int i;
1118
1119 KASSERT(len > 0, ("len == 0"));
1120
1121 m_tail = request->ip_data_mbuf;
1122 if (m_tail != NULL)
1123 for (; m_tail->m_next != NULL; m_tail = m_tail->m_next)
1124 ;
1125
1126 MPASS(bp->bio_flags & BIO_UNMAPPED);
1127 if (offset < PAGE_SIZE - bp->bio_ma_offset) {
1128 page_offset = bp->bio_ma_offset + offset;
1129 i = 0;
1130 } else {
1131 offset -= PAGE_SIZE - bp->bio_ma_offset;
1132 for (i = 1; offset >= PAGE_SIZE; i++)
1133 offset -= PAGE_SIZE;
1134 page_offset = offset;
1135 }
1136
1137 if (flags & ICL_NOCOPY) {
1138 m = NULL;
1139 while (len > 0) {
1140 if (m == NULL) {
1141 m = mb_alloc_ext_pgs(flags & ~ICL_NOCOPY,
1142 icl_soft_free_mext_pg, 0);
1143 if (__predict_false(m == NULL))
1144 return (ENOMEM);
1145 atomic_add_int(&isp->ref_cnt, 1);
1146 m->m_ext.ext_arg1 = isp;
1147 m->m_epg_1st_off = page_offset;
1148 }
1149
1150 todo = MIN(len, PAGE_SIZE - page_offset);
1151
1152 m->m_epg_pa[m->m_epg_npgs] =
1153 VM_PAGE_TO_PHYS(bp->bio_ma[i]);
1154 m->m_epg_npgs++;
1155 m->m_epg_last_len = todo;
1156 m->m_len += todo;
1157 m->m_ext.ext_size += PAGE_SIZE;
1158 MBUF_EXT_PGS_ASSERT_SANITY(m);
1159
1160 if (m->m_epg_npgs == MBUF_PEXT_MAX_PGS) {
1161 if (m_tail != NULL)
1162 m_tail->m_next = m;
1163 else
1164 request->ip_data_mbuf = m;
1165 m_tail = m;
1166 request->ip_data_len += m->m_len;
1167 m = NULL;
1168 }
1169
1170 page_offset = 0;
1171 len -= todo;
1172 i++;
1173 }
1174
1175 if (m != NULL) {
1176 if (m_tail != NULL)
1177 m_tail->m_next = m;
1178 else
1179 request->ip_data_mbuf = m;
1180 request->ip_data_len += m->m_len;
1181 }
1182 return (0);
1183 }
1184
1185 m = m_getm2(NULL, len, flags, MT_DATA, 0);
1186 if (__predict_false(m == NULL))
1187 return (ENOMEM);
1188
1189 if (request->ip_data_mbuf == NULL) {
1190 request->ip_data_mbuf = m;
1191 request->ip_data_len = len;
1192 } else {
1193 m_tail->m_next = m;
1194 request->ip_data_len += len;
1195 }
1196
1197 while (len > 0) {
1198 todo = MIN(len, PAGE_SIZE - page_offset);
1199 vaddr = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(bp->bio_ma[i]));
1200
1201 do {
1202 mtodo = min(todo, M_SIZE(m) - m->m_len);
1203 memcpy(mtod(m, char *) + m->m_len, (char *)vaddr +
1204 page_offset, mtodo);
1205 m->m_len += mtodo;
1206 if (m->m_len == M_SIZE(m))
1207 m = m->m_next;
1208 page_offset += mtodo;
1209 todo -= mtodo;
1210 } while (todo > 0);
1211
1212 page_offset = 0;
1213 len -= todo;
1214 i++;
1215 }
1216
1217 return (0);
1218 }
1219
1220 static int
icl_soft_conn_pdu_append_data(struct icl_conn * ic,struct icl_pdu * request,const void * addr,size_t len,int flags)1221 icl_soft_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *request,
1222 const void *addr, size_t len, int flags)
1223 {
1224 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)request;
1225 struct mbuf *mb, *newmb;
1226 size_t copylen, off = 0;
1227
1228 KASSERT(len > 0, ("len == 0"));
1229
1230 if (flags & ICL_NOCOPY) {
1231 newmb = m_get(flags & ~ICL_NOCOPY, MT_DATA);
1232 if (newmb == NULL) {
1233 ICL_WARN("failed to allocate mbuf");
1234 return (ENOMEM);
1235 }
1236
1237 newmb->m_flags |= M_RDONLY;
1238 m_extaddref(newmb, __DECONST(char *, addr), len, &isp->ref_cnt,
1239 icl_soft_mbuf_done, isp, NULL);
1240 newmb->m_len = len;
1241 } else {
1242 newmb = m_getm2(NULL, len, flags, MT_DATA, 0);
1243 if (newmb == NULL) {
1244 ICL_WARN("failed to allocate mbuf for %zd bytes", len);
1245 return (ENOMEM);
1246 }
1247
1248 for (mb = newmb; mb != NULL; mb = mb->m_next) {
1249 copylen = min(M_TRAILINGSPACE(mb), len - off);
1250 memcpy(mtod(mb, char *), (const char *)addr + off, copylen);
1251 mb->m_len = copylen;
1252 off += copylen;
1253 }
1254 KASSERT(off == len, ("%s: off != len", __func__));
1255 }
1256
1257 if (request->ip_data_mbuf == NULL) {
1258 request->ip_data_mbuf = newmb;
1259 request->ip_data_len = len;
1260 } else {
1261 m_cat(request->ip_data_mbuf, newmb);
1262 request->ip_data_len += len;
1263 }
1264
1265 return (0);
1266 }
1267
1268 void
icl_soft_conn_pdu_get_bio(struct icl_conn * ic,struct icl_pdu * ip,size_t pdu_off,struct bio * bp,size_t bio_off,size_t len)1269 icl_soft_conn_pdu_get_bio(struct icl_conn *ic, struct icl_pdu *ip,
1270 size_t pdu_off, struct bio *bp, size_t bio_off, size_t len)
1271 {
1272 vm_offset_t vaddr;
1273 size_t page_offset, todo;
1274 int i __unused;
1275
1276 MPASS(bp->bio_flags & BIO_UNMAPPED);
1277 if (bio_off < PAGE_SIZE - bp->bio_ma_offset) {
1278 page_offset = bp->bio_ma_offset + bio_off;
1279 i = 0;
1280 } else {
1281 bio_off -= PAGE_SIZE - bp->bio_ma_offset;
1282 for (i = 1; bio_off >= PAGE_SIZE; i++)
1283 bio_off -= PAGE_SIZE;
1284 page_offset = bio_off;
1285 }
1286
1287 while (len > 0) {
1288 todo = MIN(len, PAGE_SIZE - page_offset);
1289
1290 vaddr = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(bp->bio_ma[i]));
1291 m_copydata(ip->ip_data_mbuf, pdu_off, todo, (char *)vaddr +
1292 page_offset);
1293
1294 page_offset = 0;
1295 pdu_off += todo;
1296 len -= todo;
1297 i++;
1298 }
1299 }
1300
1301 void
icl_soft_conn_pdu_get_data(struct icl_conn * ic,struct icl_pdu * ip,size_t off,void * addr,size_t len)1302 icl_soft_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip,
1303 size_t off, void *addr, size_t len)
1304 {
1305
1306 m_copydata(ip->ip_data_mbuf, off, len, addr);
1307 }
1308
1309 static void
icl_soft_conn_pdu_queue(struct icl_conn * ic,struct icl_pdu * ip)1310 icl_soft_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip)
1311 {
1312
1313 icl_soft_conn_pdu_queue_cb(ic, ip, NULL);
1314 }
1315
1316 static void
icl_soft_conn_pdu_queue_cb(struct icl_conn * ic,struct icl_pdu * ip,icl_pdu_cb cb)1317 icl_soft_conn_pdu_queue_cb(struct icl_conn *ic, struct icl_pdu *ip,
1318 icl_pdu_cb cb)
1319 {
1320 struct icl_soft_conn *isc = (struct icl_soft_conn *)ic;
1321 struct icl_soft_pdu *isp = (struct icl_soft_pdu *)ip;
1322
1323 ICL_CONN_LOCK_ASSERT(ic);
1324 isp->ref_cnt++;
1325 isp->cb = cb;
1326
1327 if (ic->ic_disconnecting || ic->ic_socket == NULL) {
1328 ICL_DEBUG("icl_pdu_queue on closed connection");
1329 icl_soft_pdu_done(ip, ENOTCONN);
1330 return;
1331 }
1332
1333 if (!STAILQ_EMPTY(&isc->to_send)) {
1334 STAILQ_INSERT_TAIL(&isc->to_send, ip, ip_next);
1335 /*
1336 * If the queue is not empty, someone else had already
1337 * signaled the send thread; no need to do that again,
1338 * just return.
1339 */
1340 return;
1341 }
1342
1343 STAILQ_INSERT_TAIL(&isc->to_send, ip, ip_next);
1344 cv_signal(&isc->send_cv);
1345 }
1346
1347 static struct icl_conn *
icl_soft_new_conn(const char * name,struct mtx * lock)1348 icl_soft_new_conn(const char *name, struct mtx *lock)
1349 {
1350 struct icl_soft_conn *isc;
1351 struct icl_conn *ic;
1352
1353 refcount_acquire(&icl_ncons);
1354
1355 isc = (struct icl_soft_conn *)kobj_create(&icl_soft_class, M_ICL_SOFT,
1356 M_WAITOK | M_ZERO);
1357
1358 STAILQ_INIT(&isc->to_send);
1359 cv_init(&isc->send_cv, "icl_tx");
1360 cv_init(&isc->receive_cv, "icl_rx");
1361
1362 ic = &isc->ic;
1363 ic->ic_lock = lock;
1364 #ifdef DIAGNOSTIC
1365 refcount_init(&ic->ic_outstanding_pdus, 0);
1366 #endif
1367 ic->ic_name = name;
1368 ic->ic_offload = "None";
1369 ic->ic_unmapped = PMAP_HAS_DMAP;
1370
1371 return (ic);
1372 }
1373
1374 void
icl_soft_conn_free(struct icl_conn * ic)1375 icl_soft_conn_free(struct icl_conn *ic)
1376 {
1377 struct icl_soft_conn *isc = (struct icl_soft_conn *)ic;
1378
1379 #ifdef DIAGNOSTIC
1380 KASSERT(ic->ic_outstanding_pdus == 0,
1381 ("destroying session with %d outstanding PDUs",
1382 ic->ic_outstanding_pdus));
1383 #endif
1384 cv_destroy(&isc->send_cv);
1385 cv_destroy(&isc->receive_cv);
1386 kobj_delete((struct kobj *)isc, M_ICL_SOFT);
1387 refcount_release(&icl_ncons);
1388 }
1389
1390 static int
icl_conn_start(struct icl_conn * ic)1391 icl_conn_start(struct icl_conn *ic)
1392 {
1393 struct icl_soft_conn *isc = (struct icl_soft_conn *)ic;
1394 size_t minspace;
1395 struct sockopt opt;
1396 int error, one = 1;
1397
1398 ICL_CONN_LOCK(ic);
1399
1400 /*
1401 * XXX: Ugly hack.
1402 */
1403 if (ic->ic_socket == NULL) {
1404 ICL_CONN_UNLOCK(ic);
1405 return (EINVAL);
1406 }
1407
1408 isc->receive_state = ICL_CONN_STATE_BHS;
1409 isc->receive_len = sizeof(struct iscsi_bhs);
1410 ic->ic_disconnecting = false;
1411
1412 ICL_CONN_UNLOCK(ic);
1413
1414 /*
1415 * For sendspace, this is required because the current code cannot
1416 * send a PDU in pieces; thus, the minimum buffer size is equal
1417 * to the maximum PDU size. "+4" is to account for possible padding.
1418 */
1419 minspace = sizeof(struct iscsi_bhs) +
1420 ic->ic_max_send_data_segment_length +
1421 ISCSI_HEADER_DIGEST_SIZE + ISCSI_DATA_DIGEST_SIZE + 4;
1422 if (sendspace < minspace) {
1423 ICL_WARN("kern.icl.sendspace too low; must be at least %zd",
1424 minspace);
1425 sendspace = minspace;
1426 }
1427 minspace = sizeof(struct iscsi_bhs) +
1428 ic->ic_max_recv_data_segment_length +
1429 ISCSI_HEADER_DIGEST_SIZE + ISCSI_DATA_DIGEST_SIZE + 4;
1430 if (recvspace < minspace) {
1431 ICL_WARN("kern.icl.recvspace too low; must be at least %zd",
1432 minspace);
1433 recvspace = minspace;
1434 }
1435
1436 error = soreserve(ic->ic_socket, sendspace, recvspace);
1437 if (error != 0) {
1438 ICL_WARN("soreserve failed with error %d", error);
1439 icl_soft_conn_close(ic);
1440 return (error);
1441 }
1442 ic->ic_socket->so_snd.sb_flags |= SB_AUTOSIZE;
1443 ic->ic_socket->so_rcv.sb_flags |= SB_AUTOSIZE;
1444
1445 /*
1446 * Disable Nagle.
1447 */
1448 bzero(&opt, sizeof(opt));
1449 opt.sopt_dir = SOPT_SET;
1450 opt.sopt_level = IPPROTO_TCP;
1451 opt.sopt_name = TCP_NODELAY;
1452 opt.sopt_val = &one;
1453 opt.sopt_valsize = sizeof(one);
1454 error = sosetopt(ic->ic_socket, &opt);
1455 if (error != 0) {
1456 ICL_WARN("disabling TCP_NODELAY failed with error %d", error);
1457 icl_soft_conn_close(ic);
1458 return (error);
1459 }
1460
1461 /*
1462 * Register socket upcall, to get notified about incoming PDUs
1463 * and free space to send outgoing ones.
1464 */
1465 SOCKBUF_LOCK(&ic->ic_socket->so_snd);
1466 soupcall_set(ic->ic_socket, SO_SND, icl_soupcall_send, isc);
1467 SOCKBUF_UNLOCK(&ic->ic_socket->so_snd);
1468 SOCKBUF_LOCK(&ic->ic_socket->so_rcv);
1469 soupcall_set(ic->ic_socket, SO_RCV, icl_soupcall_receive, isc);
1470 SOCKBUF_UNLOCK(&ic->ic_socket->so_rcv);
1471
1472 /*
1473 * Start threads.
1474 */
1475 ICL_CONN_LOCK(ic);
1476 isc->send_running = isc->receive_running = true;
1477 ICL_CONN_UNLOCK(ic);
1478 error = kthread_add(icl_send_thread, ic, NULL, NULL, 0, 0, "%stx",
1479 ic->ic_name);
1480 if (error != 0) {
1481 ICL_WARN("kthread_add(9) failed with error %d", error);
1482 ICL_CONN_LOCK(ic);
1483 isc->send_running = isc->receive_running = false;
1484 cv_signal(&isc->send_cv);
1485 ICL_CONN_UNLOCK(ic);
1486 icl_soft_conn_close(ic);
1487 return (error);
1488 }
1489 error = kthread_add(icl_receive_thread, ic, NULL, NULL, 0, 0, "%srx",
1490 ic->ic_name);
1491 if (error != 0) {
1492 ICL_WARN("kthread_add(9) failed with error %d", error);
1493 ICL_CONN_LOCK(ic);
1494 isc->receive_running = false;
1495 cv_signal(&isc->send_cv);
1496 ICL_CONN_UNLOCK(ic);
1497 icl_soft_conn_close(ic);
1498 return (error);
1499 }
1500
1501 return (0);
1502 }
1503
1504 int
icl_soft_conn_handoff(struct icl_conn * ic,int fd)1505 icl_soft_conn_handoff(struct icl_conn *ic, int fd)
1506 {
1507 struct file *fp;
1508 struct socket *so;
1509 cap_rights_t rights;
1510 int error;
1511
1512 ICL_CONN_LOCK_ASSERT_NOT(ic);
1513
1514 #ifdef ICL_KERNEL_PROXY
1515 /*
1516 * We're transitioning to Full Feature phase, and we don't
1517 * really care.
1518 */
1519 if (fd == 0) {
1520 ICL_CONN_LOCK(ic);
1521 if (ic->ic_socket == NULL) {
1522 ICL_CONN_UNLOCK(ic);
1523 ICL_WARN("proxy handoff without connect");
1524 return (EINVAL);
1525 }
1526 ICL_CONN_UNLOCK(ic);
1527 return (0);
1528 }
1529 #endif
1530
1531 /*
1532 * Steal the socket from userland.
1533 */
1534 error = fget(curthread, fd,
1535 cap_rights_init_one(&rights, CAP_SOCK_CLIENT), &fp);
1536 if (error != 0)
1537 return (error);
1538 if (fp->f_type != DTYPE_SOCKET) {
1539 fdrop(fp, curthread);
1540 return (EINVAL);
1541 }
1542 so = fp->f_data;
1543 if (so->so_type != SOCK_STREAM) {
1544 fdrop(fp, curthread);
1545 return (EINVAL);
1546 }
1547
1548 ICL_CONN_LOCK(ic);
1549
1550 if (ic->ic_socket != NULL) {
1551 ICL_CONN_UNLOCK(ic);
1552 fdrop(fp, curthread);
1553 return (EBUSY);
1554 }
1555
1556 ic->ic_socket = fp->f_data;
1557 fp->f_ops = &badfileops;
1558 fp->f_data = NULL;
1559 fdrop(fp, curthread);
1560 ICL_CONN_UNLOCK(ic);
1561
1562 error = icl_conn_start(ic);
1563
1564 return (error);
1565 }
1566
1567 void
icl_soft_conn_close(struct icl_conn * ic)1568 icl_soft_conn_close(struct icl_conn *ic)
1569 {
1570 struct icl_soft_conn *isc = (struct icl_soft_conn *)ic;
1571 struct icl_pdu *pdu;
1572 struct socket *so;
1573
1574 /*
1575 * Wake up the threads, so they can properly terminate.
1576 * Receive thread sleeps on so->so_rcv lock, send on ic->ic_lock.
1577 */
1578 ICL_CONN_LOCK(ic);
1579 if (!ic->ic_disconnecting) {
1580 so = ic->ic_socket;
1581 if (so)
1582 SOCKBUF_LOCK(&so->so_rcv);
1583 ic->ic_disconnecting = true;
1584 if (so)
1585 SOCKBUF_UNLOCK(&so->so_rcv);
1586 }
1587 while (isc->receive_running || isc->send_running) {
1588 cv_signal(&isc->receive_cv);
1589 cv_signal(&isc->send_cv);
1590 cv_wait(&isc->send_cv, ic->ic_lock);
1591 }
1592
1593 /* Some other thread could close the connection same time. */
1594 so = ic->ic_socket;
1595 if (so == NULL) {
1596 ICL_CONN_UNLOCK(ic);
1597 return;
1598 }
1599 ic->ic_socket = NULL;
1600
1601 /*
1602 * Deregister socket upcalls.
1603 */
1604 ICL_CONN_UNLOCK(ic);
1605 SOCKBUF_LOCK(&so->so_snd);
1606 if (so->so_snd.sb_upcall != NULL)
1607 soupcall_clear(so, SO_SND);
1608 SOCKBUF_UNLOCK(&so->so_snd);
1609 SOCKBUF_LOCK(&so->so_rcv);
1610 if (so->so_rcv.sb_upcall != NULL)
1611 soupcall_clear(so, SO_RCV);
1612 SOCKBUF_UNLOCK(&so->so_rcv);
1613 soclose(so);
1614 ICL_CONN_LOCK(ic);
1615
1616 if (isc->receive_pdu != NULL) {
1617 //ICL_DEBUG("freeing partially received PDU");
1618 icl_soft_conn_pdu_free(ic, isc->receive_pdu);
1619 isc->receive_pdu = NULL;
1620 }
1621
1622 /*
1623 * Remove any outstanding PDUs from the send queue.
1624 */
1625 while (!STAILQ_EMPTY(&isc->to_send)) {
1626 pdu = STAILQ_FIRST(&isc->to_send);
1627 STAILQ_REMOVE_HEAD(&isc->to_send, ip_next);
1628 icl_soft_pdu_done(pdu, ENOTCONN);
1629 }
1630
1631 KASSERT(STAILQ_EMPTY(&isc->to_send),
1632 ("destroying session with non-empty send queue"));
1633 ICL_CONN_UNLOCK(ic);
1634 }
1635
1636 int
icl_soft_conn_task_setup(struct icl_conn * ic,struct icl_pdu * ip,struct ccb_scsiio * csio,uint32_t * task_tagp,void ** prvp)1637 icl_soft_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip,
1638 struct ccb_scsiio *csio, uint32_t *task_tagp, void **prvp)
1639 {
1640
1641 return (0);
1642 }
1643
1644 void
icl_soft_conn_task_done(struct icl_conn * ic,void * prv)1645 icl_soft_conn_task_done(struct icl_conn *ic, void *prv)
1646 {
1647 }
1648
1649 int
icl_soft_conn_transfer_setup(struct icl_conn * ic,struct icl_pdu * ip,union ctl_io * io,uint32_t * transfer_tag,void ** prvp)1650 icl_soft_conn_transfer_setup(struct icl_conn *ic, struct icl_pdu *ip,
1651 union ctl_io *io, uint32_t *transfer_tag, void **prvp)
1652 {
1653
1654 return (0);
1655 }
1656
1657 void
icl_soft_conn_transfer_done(struct icl_conn * ic,void * prv)1658 icl_soft_conn_transfer_done(struct icl_conn *ic, void *prv)
1659 {
1660 }
1661
1662 static int
icl_soft_limits(struct icl_drv_limits * idl,int socket)1663 icl_soft_limits(struct icl_drv_limits *idl, int socket)
1664 {
1665
1666 idl->idl_max_recv_data_segment_length = max_data_segment_length;
1667 idl->idl_max_send_data_segment_length = max_data_segment_length;
1668 idl->idl_max_burst_length = max_burst_length;
1669 idl->idl_first_burst_length = first_burst_length;
1670
1671 return (0);
1672 }
1673
1674 #ifdef ICL_KERNEL_PROXY
1675 int
icl_soft_conn_connect(struct icl_conn * ic,int domain,int socktype,int protocol,struct sockaddr * from_sa,struct sockaddr * to_sa)1676 icl_soft_conn_connect(struct icl_conn *ic, int domain, int socktype,
1677 int protocol, struct sockaddr *from_sa, struct sockaddr *to_sa)
1678 {
1679
1680 return (icl_soft_proxy_connect(ic, domain, socktype, protocol,
1681 from_sa, to_sa));
1682 }
1683
1684 int
icl_soft_handoff_sock(struct icl_conn * ic,struct socket * so)1685 icl_soft_handoff_sock(struct icl_conn *ic, struct socket *so)
1686 {
1687 int error;
1688
1689 ICL_CONN_LOCK_ASSERT_NOT(ic);
1690
1691 if (so->so_type != SOCK_STREAM)
1692 return (EINVAL);
1693
1694 ICL_CONN_LOCK(ic);
1695 if (ic->ic_socket != NULL) {
1696 ICL_CONN_UNLOCK(ic);
1697 return (EBUSY);
1698 }
1699 ic->ic_socket = so;
1700 ICL_CONN_UNLOCK(ic);
1701
1702 error = icl_conn_start(ic);
1703
1704 return (error);
1705 }
1706 #endif /* ICL_KERNEL_PROXY */
1707
1708 static int
icl_soft_load(void)1709 icl_soft_load(void)
1710 {
1711 int error;
1712
1713 icl_soft_pdu_zone = uma_zcreate("icl_soft_pdu",
1714 sizeof(struct icl_soft_pdu), NULL, NULL, NULL, NULL,
1715 UMA_ALIGN_PTR, 0);
1716 refcount_init(&icl_ncons, 0);
1717
1718 /*
1719 * The reason we call this "none" is that to the user,
1720 * it's known as "offload driver"; "offload driver: soft"
1721 * doesn't make much sense.
1722 */
1723 error = icl_register("none", false, 0,
1724 icl_soft_limits, icl_soft_new_conn);
1725 KASSERT(error == 0, ("failed to register"));
1726
1727 #if defined(ICL_KERNEL_PROXY) && 0
1728 /*
1729 * Debugging aid for kernel proxy functionality.
1730 */
1731 error = icl_register("proxytest", true, 0,
1732 icl_soft_limits, icl_soft_new_conn);
1733 KASSERT(error == 0, ("failed to register"));
1734 #endif
1735
1736 return (error);
1737 }
1738
1739 static int
icl_soft_unload(void)1740 icl_soft_unload(void)
1741 {
1742
1743 if (icl_ncons != 0)
1744 return (EBUSY);
1745
1746 icl_unregister("none", false);
1747 #if defined(ICL_KERNEL_PROXY) && 0
1748 icl_unregister("proxytest", true);
1749 #endif
1750
1751 uma_zdestroy(icl_soft_pdu_zone);
1752
1753 return (0);
1754 }
1755
1756 static int
icl_soft_modevent(module_t mod,int what,void * arg)1757 icl_soft_modevent(module_t mod, int what, void *arg)
1758 {
1759
1760 switch (what) {
1761 case MOD_LOAD:
1762 return (icl_soft_load());
1763 case MOD_UNLOAD:
1764 return (icl_soft_unload());
1765 default:
1766 return (EINVAL);
1767 }
1768 }
1769
1770 moduledata_t icl_soft_data = {
1771 "icl_soft",
1772 icl_soft_modevent,
1773 0
1774 };
1775
1776 DECLARE_MODULE(icl_soft, icl_soft_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE);
1777 MODULE_DEPEND(icl_soft, icl, 1, 1, 1);
1778 MODULE_VERSION(icl_soft, 1);
1779