1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
5 * The Regents of the University of California. All rights reserved.
6 * Copyright (c) 2004 The FreeBSD Foundation. All rights reserved.
7 * Copyright (c) 2004-2008 Robert N. M. Watson. All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * Excerpts taken from tcp_subr.c, tcp_usrreq.c, uipc_socket.c
34 */
35
36 /*
37 *
38 * Copyright (c) 2010 Isilon Systems, Inc.
39 * Copyright (c) 2010 iX Systems, Inc.
40 * Copyright (c) 2010 Panasas, Inc.
41 * All rights reserved.
42 *
43 * Redistribution and use in source and binary forms, with or without
44 * modification, are permitted provided that the following conditions
45 * are met:
46 * 1. Redistributions of source code must retain the above copyright
47 * notice unmodified, this list of conditions, and the following
48 * disclaimer.
49 * 2. Redistributions in binary form must reproduce the above copyright
50 * notice, this list of conditions and the following disclaimer in the
51 * documentation and/or other materials provided with the distribution.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
54 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
55 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
56 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
57 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
58 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
59 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
60 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
61 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
62 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
63 *
64 */
65
66 #include <sys/param.h>
67 #include <sys/eventhandler.h>
68 #include <sys/kernel.h>
69 #include <sys/malloc.h>
70
71 #include "sdp.h"
72
73 #include <net/if.h>
74 #include <net/route.h>
75 #include <net/vnet.h>
76 #include <sys/sysctl.h>
77
78 uma_zone_t sdp_zone;
79 struct rwlock sdp_lock;
80 LIST_HEAD(, sdp_sock) sdp_list;
81
82 struct workqueue_struct *rx_comp_wq;
83
84 RW_SYSINIT(sdplockinit, &sdp_lock, "SDP lock");
85 #define SDP_LIST_WLOCK() rw_wlock(&sdp_lock)
86 #define SDP_LIST_RLOCK() rw_rlock(&sdp_lock)
87 #define SDP_LIST_WUNLOCK() rw_wunlock(&sdp_lock)
88 #define SDP_LIST_RUNLOCK() rw_runlock(&sdp_lock)
89 #define SDP_LIST_WLOCK_ASSERT() rw_assert(&sdp_lock, RW_WLOCKED)
90 #define SDP_LIST_RLOCK_ASSERT() rw_assert(&sdp_lock, RW_RLOCKED)
91 #define SDP_LIST_LOCK_ASSERT() rw_assert(&sdp_lock, RW_LOCKED)
92
93 MALLOC_DEFINE(M_SDP, "sdp", "Sockets Direct Protocol");
94
95 static void sdp_stop_keepalive_timer(struct socket *so);
96
97 /*
98 * SDP protocol interface to socket abstraction.
99 */
100 /*
101 * sdp_sendspace and sdp_recvspace are the default send and receive window
102 * sizes, respectively.
103 */
104 u_long sdp_sendspace = 1024*32;
105 u_long sdp_recvspace = 1024*64;
106
107 static int sdp_count;
108
109 /*
110 * Disable async. CMA events for sockets which are being torn down.
111 */
112 static void
sdp_destroy_cma(struct sdp_sock * ssk)113 sdp_destroy_cma(struct sdp_sock *ssk)
114 {
115
116 if (ssk->id == NULL)
117 return;
118 rdma_destroy_id(ssk->id);
119 ssk->id = NULL;
120 }
121
122 static int
sdp_pcbbind(struct sdp_sock * ssk,struct sockaddr * nam,struct ucred * cred)123 sdp_pcbbind(struct sdp_sock *ssk, struct sockaddr *nam, struct ucred *cred)
124 {
125 struct sockaddr_in *sin;
126 struct sockaddr_in null;
127 int error;
128
129 SDP_WLOCK_ASSERT(ssk);
130
131 if (ssk->lport != 0 || ssk->laddr != INADDR_ANY)
132 return (EINVAL);
133 /* rdma_bind_addr handles bind races. */
134 SDP_WUNLOCK(ssk);
135 if (ssk->id == NULL)
136 ssk->id = rdma_create_id(&init_net, sdp_cma_handler, ssk, RDMA_PS_SDP, IB_QPT_RC);
137 if (ssk->id == NULL) {
138 SDP_WLOCK(ssk);
139 return (ENOMEM);
140 }
141 if (nam == NULL) {
142 null.sin_family = AF_INET;
143 null.sin_len = sizeof(null);
144 null.sin_addr.s_addr = INADDR_ANY;
145 null.sin_port = 0;
146 bzero(&null.sin_zero, sizeof(null.sin_zero));
147 nam = (struct sockaddr *)&null;
148 }
149 error = -rdma_bind_addr(ssk->id, nam);
150 SDP_WLOCK(ssk);
151 if (error == 0) {
152 sin = (struct sockaddr_in *)&ssk->id->route.addr.src_addr;
153 ssk->laddr = sin->sin_addr.s_addr;
154 ssk->lport = sin->sin_port;
155 } else
156 sdp_destroy_cma(ssk);
157 return (error);
158 }
159
160 static void
sdp_pcbfree(struct sdp_sock * ssk)161 sdp_pcbfree(struct sdp_sock *ssk)
162 {
163
164 KASSERT(ssk->socket == NULL, ("ssk %p socket still attached", ssk));
165 KASSERT((ssk->flags & SDP_DESTROY) == 0,
166 ("ssk %p already destroyed", ssk));
167
168 sdp_dbg(ssk->socket, "Freeing pcb");
169 SDP_WLOCK_ASSERT(ssk);
170 ssk->flags |= SDP_DESTROY;
171 SDP_WUNLOCK(ssk);
172 SDP_LIST_WLOCK();
173 sdp_count--;
174 LIST_REMOVE(ssk, list);
175 SDP_LIST_WUNLOCK();
176 crfree(ssk->cred);
177 ssk->qp_active = 0;
178 if (ssk->qp) {
179 ib_destroy_qp(ssk->qp);
180 ssk->qp = NULL;
181 }
182 sdp_tx_ring_destroy(ssk);
183 sdp_rx_ring_destroy(ssk);
184 sdp_destroy_cma(ssk);
185 rw_destroy(&ssk->rx_ring.destroyed_lock);
186 rw_destroy(&ssk->lock);
187 uma_zfree(sdp_zone, ssk);
188 }
189
190 static int
sdp_getsockaddr(struct socket * so,struct sockaddr * sa)191 sdp_getsockaddr(struct socket *so, struct sockaddr *sa)
192 {
193 struct sdp_sock *ssk = sdp_sk(so);
194
195 SDP_RLOCK(ssk);
196 *(struct sockaddr_in *)sa = (struct sockaddr_in ){
197 .sin_family = AF_INET,
198 .sin_len = sizeof(struct sockaddr_in),
199 .sin_addr.s_addr = ssk->laddr,
200 .sin_port = ssk->lport,
201 };
202 SDP_RUNLOCK(ssk);
203
204 return (0);
205 }
206
207 static int
sdp_getpeeraddr(struct socket * so,struct sockaddr * sa)208 sdp_getpeeraddr(struct socket *so, struct sockaddr *sa)
209 {
210 struct sdp_sock *ssk = sdp_sk(so);
211
212 SDP_RLOCK(ssk);
213 *(struct sockaddr_in *)sa = (struct sockaddr_in ){
214 .sin_family = AF_INET,
215 .sin_len = sizeof(struct sockaddr_in),
216 .sin_addr.s_addr = ssk->faddr,
217 .sin_port = ssk->fport,
218 };
219 SDP_RUNLOCK(ssk);
220
221 return (0);
222 }
223
224 #if 0
225 static void
226 sdp_apply_all(void (*func)(struct sdp_sock *, void *), void *arg)
227 {
228 struct sdp_sock *ssk;
229
230 SDP_LIST_RLOCK();
231 LIST_FOREACH(ssk, &sdp_list, list) {
232 SDP_WLOCK(ssk);
233 func(ssk, arg);
234 SDP_WUNLOCK(ssk);
235 }
236 SDP_LIST_RUNLOCK();
237 }
238 #endif
239
240 static void
sdp_output_reset(struct sdp_sock * ssk)241 sdp_output_reset(struct sdp_sock *ssk)
242 {
243 struct rdma_cm_id *id;
244
245 SDP_WLOCK_ASSERT(ssk);
246 if (ssk->id) {
247 id = ssk->id;
248 ssk->qp_active = 0;
249 SDP_WUNLOCK(ssk);
250 rdma_disconnect(id);
251 SDP_WLOCK(ssk);
252 }
253 ssk->state = TCPS_CLOSED;
254 }
255
256 /*
257 * Attempt to close a SDP socket, marking it as dropped, and freeing
258 * the socket if we hold the only reference.
259 */
260 static struct sdp_sock *
sdp_closed(struct sdp_sock * ssk)261 sdp_closed(struct sdp_sock *ssk)
262 {
263 struct socket *so;
264
265 SDP_WLOCK_ASSERT(ssk);
266
267 ssk->flags |= SDP_DROPPED;
268 so = ssk->socket;
269 soisdisconnected(so);
270 if (ssk->flags & SDP_SOCKREF) {
271 ssk->flags &= ~SDP_SOCKREF;
272 SDP_WUNLOCK(ssk);
273 sorele(so);
274 return (NULL);
275 }
276 return (ssk);
277 }
278
279 /*
280 * Perform timer based shutdowns which can not operate in
281 * callout context.
282 */
283 static void
sdp_shutdown_task(void * data,int pending)284 sdp_shutdown_task(void *data, int pending)
285 {
286 struct sdp_sock *ssk;
287
288 ssk = data;
289 SDP_WLOCK(ssk);
290 /*
291 * I don't think this can race with another call to pcbfree()
292 * because SDP_TIMEWAIT protects it. SDP_DESTROY may be redundant.
293 */
294 if (ssk->flags & SDP_DESTROY)
295 panic("sdp_shutdown_task: Racing with pcbfree for ssk %p",
296 ssk);
297 if (ssk->flags & SDP_DISCON)
298 sdp_output_reset(ssk);
299 /* We have to clear this so sdp_detach() will call pcbfree(). */
300 ssk->flags &= ~(SDP_TIMEWAIT | SDP_DREQWAIT);
301 if ((ssk->flags & SDP_DROPPED) == 0 &&
302 sdp_closed(ssk) == NULL)
303 return;
304 if (ssk->socket == NULL) {
305 sdp_pcbfree(ssk);
306 return;
307 }
308 SDP_WUNLOCK(ssk);
309 }
310
311 /*
312 * 2msl has expired, schedule the shutdown task.
313 */
314 static void
sdp_2msl_timeout(void * data)315 sdp_2msl_timeout(void *data)
316 {
317 struct sdp_sock *ssk;
318
319 ssk = data;
320 /* Callout canceled. */
321 if (!callout_active(&ssk->keep2msl))
322 goto out;
323 callout_deactivate(&ssk->keep2msl);
324 /* Should be impossible, defensive programming. */
325 if ((ssk->flags & SDP_TIMEWAIT) == 0)
326 goto out;
327 taskqueue_enqueue(taskqueue_thread, &ssk->shutdown_task);
328 out:
329 SDP_WUNLOCK(ssk);
330 return;
331 }
332
333 /*
334 * Schedule the 2msl wait timer.
335 */
336 static void
sdp_2msl_wait(struct sdp_sock * ssk)337 sdp_2msl_wait(struct sdp_sock *ssk)
338 {
339
340 SDP_WLOCK_ASSERT(ssk);
341 ssk->flags |= SDP_TIMEWAIT;
342 ssk->state = TCPS_TIME_WAIT;
343 soisdisconnected(ssk->socket);
344 callout_reset(&ssk->keep2msl, TCPTV_MSL, sdp_2msl_timeout, ssk);
345 }
346
347 /*
348 * Timed out waiting for the final fin/ack from rdma_disconnect().
349 */
350 static void
sdp_dreq_timeout(void * data)351 sdp_dreq_timeout(void *data)
352 {
353 struct sdp_sock *ssk;
354
355 ssk = data;
356 /* Callout canceled. */
357 if (!callout_active(&ssk->keep2msl))
358 goto out;
359 /* Callout rescheduled, probably as a different timer. */
360 if (callout_pending(&ssk->keep2msl))
361 goto out;
362 callout_deactivate(&ssk->keep2msl);
363 if (ssk->state != TCPS_FIN_WAIT_1 && ssk->state != TCPS_LAST_ACK)
364 goto out;
365 if ((ssk->flags & SDP_DREQWAIT) == 0)
366 goto out;
367 ssk->flags &= ~SDP_DREQWAIT;
368 ssk->flags |= SDP_DISCON;
369 sdp_2msl_wait(ssk);
370 ssk->qp_active = 0;
371 out:
372 SDP_WUNLOCK(ssk);
373 }
374
375 /*
376 * Received the final fin/ack. Cancel the 2msl.
377 */
378 void
sdp_cancel_dreq_wait_timeout(struct sdp_sock * ssk)379 sdp_cancel_dreq_wait_timeout(struct sdp_sock *ssk)
380 {
381 sdp_dbg(ssk->socket, "cancelling dreq wait timeout\n");
382 ssk->flags &= ~SDP_DREQWAIT;
383 sdp_2msl_wait(ssk);
384 }
385
386 static int
sdp_init_sock(struct socket * sk)387 sdp_init_sock(struct socket *sk)
388 {
389 struct sdp_sock *ssk = sdp_sk(sk);
390
391 sdp_dbg(sk, "%s\n", __func__);
392
393 callout_init_rw(&ssk->keep2msl, &ssk->lock, CALLOUT_RETURNUNLOCKED);
394 TASK_INIT(&ssk->shutdown_task, 0, sdp_shutdown_task, ssk);
395 #ifdef SDP_ZCOPY
396 INIT_DELAYED_WORK(&ssk->srcavail_cancel_work, srcavail_cancel_timeout);
397 ssk->zcopy_thresh = -1; /* use global sdp_zcopy_thresh */
398 ssk->tx_ring.rdma_inflight = NULL;
399 #endif
400 atomic_set(&ssk->mseq_ack, 0);
401 sdp_rx_ring_init(ssk);
402 ssk->tx_ring.buffer = NULL;
403
404 return 0;
405 }
406
407 /*
408 * Allocate an sdp_sock for the socket and reserve socket buffer space.
409 */
410 static int
sdp_attach(struct socket * so,int proto,struct thread * td)411 sdp_attach(struct socket *so, int proto, struct thread *td)
412 {
413 struct sdp_sock *ssk;
414 int error;
415
416 ssk = sdp_sk(so);
417 KASSERT(ssk == NULL, ("sdp_attach: ssk already set on so %p", so));
418 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
419 error = soreserve(so, sdp_sendspace, sdp_recvspace);
420 if (error)
421 return (error);
422 }
423 so->so_rcv.sb_flags |= SB_AUTOSIZE;
424 so->so_snd.sb_flags |= SB_AUTOSIZE;
425 ssk = uma_zalloc(sdp_zone, M_NOWAIT | M_ZERO);
426 if (ssk == NULL)
427 return (ENOBUFS);
428 rw_init(&ssk->lock, "sdpsock");
429 ssk->socket = so;
430 ssk->cred = crhold(so->so_cred);
431 so->so_pcb = (caddr_t)ssk;
432 sdp_init_sock(so);
433 ssk->flags = 0;
434 ssk->qp_active = 0;
435 ssk->state = TCPS_CLOSED;
436 mbufq_init(&ssk->rxctlq, INT_MAX);
437 SDP_LIST_WLOCK();
438 LIST_INSERT_HEAD(&sdp_list, ssk, list);
439 sdp_count++;
440 SDP_LIST_WUNLOCK();
441
442 return (0);
443 }
444
445 /*
446 * Detach SDP from the socket, potentially leaving it around for the
447 * timewait to expire.
448 */
449 static void
sdp_detach(struct socket * so)450 sdp_detach(struct socket *so)
451 {
452 struct sdp_sock *ssk;
453
454 ssk = sdp_sk(so);
455 SDP_WLOCK(ssk);
456 KASSERT(ssk->socket != NULL, ("sdp_detach: socket is NULL"));
457 ssk->socket->so_pcb = NULL;
458 ssk->socket = NULL;
459 if (ssk->flags & (SDP_TIMEWAIT | SDP_DREQWAIT))
460 SDP_WUNLOCK(ssk);
461 else if (ssk->flags & SDP_DROPPED || ssk->state < TCPS_SYN_SENT)
462 sdp_pcbfree(ssk);
463 else
464 panic("sdp_detach: Unexpected state, ssk %p.\n", ssk);
465 }
466
467 /*
468 * Allocate a local address for the socket.
469 */
470 static int
sdp_bind(struct socket * so,struct sockaddr * nam,struct thread * td)471 sdp_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
472 {
473 int error = 0;
474 struct sdp_sock *ssk;
475 struct sockaddr_in *sin;
476
477 sin = (struct sockaddr_in *)nam;
478 if (sin->sin_family != AF_INET)
479 return (EAFNOSUPPORT);
480 if (nam->sa_len != sizeof(*sin))
481 return (EINVAL);
482 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
483 return (EAFNOSUPPORT);
484
485 ssk = sdp_sk(so);
486 SDP_WLOCK(ssk);
487 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
488 error = EINVAL;
489 goto out;
490 }
491 error = sdp_pcbbind(ssk, nam, td->td_ucred);
492 out:
493 SDP_WUNLOCK(ssk);
494
495 return (error);
496 }
497
498 /*
499 * Prepare to accept connections.
500 */
501 static int
sdp_listen(struct socket * so,int backlog,struct thread * td)502 sdp_listen(struct socket *so, int backlog, struct thread *td)
503 {
504 int error = 0;
505 struct sdp_sock *ssk;
506
507 ssk = sdp_sk(so);
508 SDP_WLOCK(ssk);
509 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
510 error = EINVAL;
511 goto out;
512 }
513 if (error == 0 && ssk->lport == 0)
514 error = sdp_pcbbind(ssk, (struct sockaddr *)0, td->td_ucred);
515 SOCK_LOCK(so);
516 if (error == 0)
517 error = solisten_proto_check(so);
518 if (error == 0) {
519 solisten_proto(so, backlog);
520 ssk->state = TCPS_LISTEN;
521 }
522 SOCK_UNLOCK(so);
523
524 out:
525 SDP_WUNLOCK(ssk);
526 if (error == 0)
527 error = -rdma_listen(ssk->id, backlog);
528 return (error);
529 }
530
531 /*
532 * Initiate a SDP connection to nam.
533 */
534 static int
sdp_start_connect(struct sdp_sock * ssk,struct sockaddr * nam,struct thread * td)535 sdp_start_connect(struct sdp_sock *ssk, struct sockaddr *nam, struct thread *td)
536 {
537 struct sockaddr_in src;
538 struct socket *so;
539 int error;
540
541 so = ssk->socket;
542
543 SDP_WLOCK_ASSERT(ssk);
544 if (ssk->lport == 0) {
545 error = sdp_pcbbind(ssk, (struct sockaddr *)0, td->td_ucred);
546 if (error)
547 return error;
548 }
549 src.sin_family = AF_INET;
550 src.sin_len = sizeof(src);
551 bzero(&src.sin_zero, sizeof(src.sin_zero));
552 src.sin_port = ssk->lport;
553 src.sin_addr.s_addr = ssk->laddr;
554 soisconnecting(so);
555 SDP_WUNLOCK(ssk);
556 error = -rdma_resolve_addr(ssk->id, (struct sockaddr *)&src, nam,
557 SDP_RESOLVE_TIMEOUT);
558 SDP_WLOCK(ssk);
559 if (error == 0)
560 ssk->state = TCPS_SYN_SENT;
561
562 return 0;
563 }
564
565 /*
566 * Initiate SDP connection.
567 */
568 static int
sdp_connect(struct socket * so,struct sockaddr * nam,struct thread * td)569 sdp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
570 {
571 int error = 0;
572 struct sdp_sock *ssk;
573 struct sockaddr_in *sin;
574
575 sin = (struct sockaddr_in *)nam;
576 if (nam->sa_len != sizeof(*sin))
577 return (EINVAL);
578 if (sin->sin_family != AF_INET)
579 return (EAFNOSUPPORT);
580 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
581 return (EAFNOSUPPORT);
582 if ((error = prison_remote_ip4(td->td_ucred, &sin->sin_addr)) != 0)
583 return (error);
584 ssk = sdp_sk(so);
585 SDP_WLOCK(ssk);
586 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED))
587 error = EINVAL;
588 else
589 error = sdp_start_connect(ssk, nam, td);
590 SDP_WUNLOCK(ssk);
591 return (error);
592 }
593
594 /*
595 * Drop a SDP socket, reporting
596 * the specified error. If connection is synchronized,
597 * then send a RST to peer.
598 */
599 static struct sdp_sock *
sdp_drop(struct sdp_sock * ssk,int errno)600 sdp_drop(struct sdp_sock *ssk, int errno)
601 {
602 struct socket *so;
603
604 SDP_WLOCK_ASSERT(ssk);
605 so = ssk->socket;
606 if (TCPS_HAVERCVDSYN(ssk->state))
607 sdp_output_reset(ssk);
608 if (errno == ETIMEDOUT && ssk->softerror)
609 errno = ssk->softerror;
610 so->so_error = errno;
611 return (sdp_closed(ssk));
612 }
613
614 /*
615 * User issued close, and wish to trail through shutdown states:
616 * if never received SYN, just forget it. If got a SYN from peer,
617 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
618 * If already got a FIN from peer, then almost done; go to LAST_ACK
619 * state. In all other cases, have already sent FIN to peer (e.g.
620 * after PRU_SHUTDOWN), and just have to play tedious game waiting
621 * for peer to send FIN or not respond to keep-alives, etc.
622 * We can let the user exit from the close as soon as the FIN is acked.
623 */
624 static void
sdp_usrclosed(struct sdp_sock * ssk)625 sdp_usrclosed(struct sdp_sock *ssk)
626 {
627
628 SDP_WLOCK_ASSERT(ssk);
629
630 switch (ssk->state) {
631 case TCPS_LISTEN:
632 ssk->state = TCPS_CLOSED;
633 SDP_WUNLOCK(ssk);
634 sdp_destroy_cma(ssk);
635 SDP_WLOCK(ssk);
636 /* FALLTHROUGH */
637 case TCPS_CLOSED:
638 ssk = sdp_closed(ssk);
639 /*
640 * sdp_closed() should never return NULL here as the socket is
641 * still open.
642 */
643 KASSERT(ssk != NULL,
644 ("sdp_usrclosed: sdp_closed() returned NULL"));
645 break;
646
647 case TCPS_SYN_SENT:
648 /* FALLTHROUGH */
649 case TCPS_SYN_RECEIVED:
650 ssk->flags |= SDP_NEEDFIN;
651 break;
652
653 case TCPS_ESTABLISHED:
654 ssk->flags |= SDP_NEEDFIN;
655 ssk->state = TCPS_FIN_WAIT_1;
656 break;
657
658 case TCPS_CLOSE_WAIT:
659 ssk->state = TCPS_LAST_ACK;
660 break;
661 }
662 if (ssk->state >= TCPS_FIN_WAIT_2) {
663 /* Prevent the connection hanging in FIN_WAIT_2 forever. */
664 if (ssk->state == TCPS_FIN_WAIT_2)
665 sdp_2msl_wait(ssk);
666 else
667 soisdisconnected(ssk->socket);
668 }
669 }
670
671 static void
sdp_output_disconnect(struct sdp_sock * ssk)672 sdp_output_disconnect(struct sdp_sock *ssk)
673 {
674
675 SDP_WLOCK_ASSERT(ssk);
676 callout_reset(&ssk->keep2msl, SDP_FIN_WAIT_TIMEOUT,
677 sdp_dreq_timeout, ssk);
678 ssk->flags |= SDP_NEEDFIN | SDP_DREQWAIT;
679 sdp_post_sends(ssk, M_NOWAIT);
680 }
681
682 /*
683 * Initiate or continue a disconnect.
684 * If embryonic state, just send reset (once).
685 * If in ``let data drain'' option and linger null, just drop.
686 * Otherwise (hard), mark socket disconnecting and drop
687 * current input data; switch states based on user close, and
688 * send segment to peer (with FIN).
689 */
690 static void
sdp_start_disconnect(struct sdp_sock * ssk)691 sdp_start_disconnect(struct sdp_sock *ssk)
692 {
693 struct socket *so;
694 int unread;
695
696 so = ssk->socket;
697 SDP_WLOCK_ASSERT(ssk);
698 sdp_stop_keepalive_timer(so);
699 /*
700 * Neither sdp_closed() nor sdp_drop() should return NULL, as the
701 * socket is still open.
702 */
703 if (ssk->state < TCPS_ESTABLISHED) {
704 ssk = sdp_closed(ssk);
705 KASSERT(ssk != NULL,
706 ("sdp_start_disconnect: sdp_close() returned NULL"));
707 } else if ((so->so_options & SO_LINGER) && so->so_linger == 0) {
708 ssk = sdp_drop(ssk, 0);
709 KASSERT(ssk != NULL,
710 ("sdp_start_disconnect: sdp_drop() returned NULL"));
711 } else {
712 soisdisconnecting(so);
713 unread = sbused(&so->so_rcv);
714 sbflush(&so->so_rcv);
715 sdp_usrclosed(ssk);
716 if (!(ssk->flags & SDP_DROPPED)) {
717 if (unread)
718 sdp_output_reset(ssk);
719 else
720 sdp_output_disconnect(ssk);
721 }
722 }
723 }
724
725 /*
726 * User initiated disconnect.
727 */
728 static int
sdp_disconnect(struct socket * so)729 sdp_disconnect(struct socket *so)
730 {
731 struct sdp_sock *ssk;
732 int error = 0;
733
734 ssk = sdp_sk(so);
735 SDP_WLOCK(ssk);
736 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
737 error = ECONNRESET;
738 goto out;
739 }
740 sdp_start_disconnect(ssk);
741 out:
742 SDP_WUNLOCK(ssk);
743 return (error);
744 }
745
746 /*
747 * Accept a connection. Essentially all the work is done at higher levels;
748 * just return the address of the peer, storing through addr.
749 *
750 *
751 * XXX This is broken XXX
752 *
753 * The rationale for acquiring the sdp lock here is somewhat complicated,
754 * and is described in detail in the commit log entry for r175612. Acquiring
755 * it delays an accept(2) racing with sonewconn(), which inserts the socket
756 * before the address/port fields are initialized. A better fix would
757 * prevent the socket from being placed in the listen queue until all fields
758 * are fully initialized.
759 */
760 static int
sdp_accept(struct socket * so,struct sockaddr * sa)761 sdp_accept(struct socket *so, struct sockaddr *sa)
762 {
763 struct sdp_sock *ssk = NULL;
764 int error;
765
766 if (so->so_state & SS_ISDISCONNECTED)
767 return (ECONNABORTED);
768
769 error = 0;
770 ssk = sdp_sk(so);
771 SDP_WLOCK(ssk);
772 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED))
773 error = ECONNABORTED;
774 else
775 *(struct sockaddr_in *)sa = (struct sockaddr_in ){
776 .sin_family = AF_INET,
777 .sin_len = sizeof(struct sockaddr_in),
778 .sin_addr.s_addr = ssk->faddr,
779 .sin_port = ssk->fport,
780 };
781 SDP_WUNLOCK(ssk);
782
783 return (error);
784 }
785
786 /*
787 * Mark the connection as being incapable of further output.
788 */
789 static int
sdp_shutdown(struct socket * so,enum shutdown_how how)790 sdp_shutdown(struct socket *so, enum shutdown_how how)
791 {
792 struct sdp_sock *ssk = sdp_sk(so);
793 int error = 0;
794
795 SOCK_LOCK(so);
796 if ((so->so_state &
797 (SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING)) == 0) {
798 SOCK_UNLOCK(so);
799 return (ENOTCONN);
800 }
801 if (SOLISTENING(so)) {
802 if (how != SHUT_WR) {
803 so->so_error = ECONNABORTED;
804 solisten_wakeup(so); /* unlocks so */
805 } else
806 SOCK_UNLOCK(so);
807 return (0);
808 }
809 SOCK_UNLOCK(so);
810
811 switch (how) {
812 case SHUT_RD:
813 socantrcvmore(so);
814 sbrelease(so, SO_RCV);
815 break;
816 case SHUT_RDWR:
817 socantrcvmore(so);
818 sbrelease(so, SO_RCV);
819 /* FALLTHROUGH */
820 case SHUT_WR:
821 SDP_WLOCK(ssk);
822 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
823 SDP_WUNLOCK(ssk);
824 error = ECONNRESET;
825 break;
826 }
827 socantsendmore(so);
828 sdp_usrclosed(ssk);
829 if (!(ssk->flags & SDP_DROPPED))
830 sdp_output_disconnect(ssk);
831 SDP_WUNLOCK(ssk);
832 }
833 wakeup(&so->so_timeo);
834
835 return (error);
836 }
837
838 static void
sdp_append(struct sdp_sock * ssk,struct sockbuf * sb,struct mbuf * mb,int cnt)839 sdp_append(struct sdp_sock *ssk, struct sockbuf *sb, struct mbuf *mb, int cnt)
840 {
841 struct mbuf *n;
842 int ncnt;
843
844 SOCKBUF_LOCK_ASSERT(sb);
845 SBLASTRECORDCHK(sb);
846 KASSERT(mb->m_flags & M_PKTHDR,
847 ("sdp_append: %p Missing packet header.\n", mb));
848 n = sb->sb_lastrecord;
849 /*
850 * If the queue is empty just set all pointers and proceed.
851 */
852 if (n == NULL) {
853 sb->sb_lastrecord = sb->sb_mb = sb->sb_sndptr = mb;
854 for (; mb; mb = mb->m_next) {
855 sb->sb_mbtail = mb;
856 sballoc(sb, mb);
857 }
858 return;
859 }
860 /*
861 * Count the number of mbufs in the current tail.
862 */
863 for (ncnt = 0; n->m_next; n = n->m_next)
864 ncnt++;
865 n = sb->sb_lastrecord;
866 /*
867 * If the two chains can fit in a single sdp packet and
868 * the last record has not been sent yet (WRITABLE) coalesce
869 * them. The lastrecord remains the same but we must strip the
870 * packet header and then let sbcompress do the hard part.
871 */
872 if (M_WRITABLE(n) && ncnt + cnt < SDP_MAX_SEND_SGES &&
873 n->m_pkthdr.len + mb->m_pkthdr.len - SDP_HEAD_SIZE <
874 ssk->xmit_size_goal) {
875 m_adj(mb, SDP_HEAD_SIZE);
876 n->m_pkthdr.len += mb->m_pkthdr.len;
877 n->m_flags |= mb->m_flags & (M_PUSH | M_URG);
878 m_demote(mb, 1, 0);
879 sbcompress(sb, mb, sb->sb_mbtail);
880 return;
881 }
882 /*
883 * Not compressible, just append to the end and adjust counters.
884 */
885 sb->sb_lastrecord->m_flags |= M_PUSH;
886 sb->sb_lastrecord->m_nextpkt = mb;
887 sb->sb_lastrecord = mb;
888 if (sb->sb_sndptr == NULL)
889 sb->sb_sndptr = mb;
890 for (; mb; mb = mb->m_next) {
891 sb->sb_mbtail = mb;
892 sballoc(sb, mb);
893 }
894 }
895
896 /*
897 * Do a send by putting data in output queue and updating urgent
898 * marker if URG set. Possibly send more data. Unlike the other
899 * pru_*() routines, the mbuf chains are our responsibility. We
900 * must either enqueue them or free them. The other pru_* routines
901 * generally are caller-frees.
902 *
903 * This comes from sendfile, normal sends will come from sdp_sosend().
904 */
905 static int
sdp_send(struct socket * so,int flags,struct mbuf * m,struct sockaddr * nam,struct mbuf * control,struct thread * td)906 sdp_send(struct socket *so, int flags, struct mbuf *m,
907 struct sockaddr *nam, struct mbuf *control, struct thread *td)
908 {
909 struct sdp_sock *ssk;
910 struct mbuf *n;
911 int error;
912 int cnt;
913
914 if (nam != NULL) {
915 if (nam->sa_family != AF_INET) {
916 if (control)
917 m_freem(control);
918 m_freem(m);
919 return (EAFNOSUPPORT);
920 }
921 if (nam->sa_len != sizeof(struct sockaddr_in)) {
922 if (control)
923 m_freem(control);
924 m_freem(m);
925 return (EINVAL);
926 }
927 }
928
929 error = 0;
930 ssk = sdp_sk(so);
931 KASSERT(m->m_flags & M_PKTHDR,
932 ("sdp_send: %p no packet header", m));
933 M_PREPEND(m, SDP_HEAD_SIZE, M_WAITOK);
934 mtod(m, struct sdp_bsdh *)->mid = SDP_MID_DATA;
935 for (n = m, cnt = 0; n->m_next; n = n->m_next)
936 cnt++;
937 if (cnt > SDP_MAX_SEND_SGES) {
938 n = m_collapse(m, M_WAITOK, SDP_MAX_SEND_SGES);
939 if (n == NULL) {
940 m_freem(m);
941 return (EMSGSIZE);
942 }
943 m = n;
944 for (cnt = 0; n->m_next; n = n->m_next)
945 cnt++;
946 }
947 SDP_WLOCK(ssk);
948 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
949 if (control)
950 m_freem(control);
951 if (m)
952 m_freem(m);
953 error = ECONNRESET;
954 goto out;
955 }
956 if (control) {
957 /* SDP doesn't support control messages. */
958 if (control->m_len) {
959 m_freem(control);
960 if (m)
961 m_freem(m);
962 error = EINVAL;
963 goto out;
964 }
965 m_freem(control); /* empty control, just free it */
966 }
967 if (!(flags & PRUS_OOB)) {
968 SOCKBUF_LOCK(&so->so_snd);
969 sdp_append(ssk, &so->so_snd, m, cnt);
970 SOCKBUF_UNLOCK(&so->so_snd);
971 if (nam && ssk->state < TCPS_SYN_SENT) {
972 /*
973 * Do implied connect if not yet connected.
974 */
975 error = sdp_start_connect(ssk, nam, td);
976 if (error)
977 goto out;
978 }
979 if (flags & PRUS_EOF) {
980 /*
981 * Close the send side of the connection after
982 * the data is sent.
983 */
984 socantsendmore(so);
985 sdp_usrclosed(ssk);
986 if (!(ssk->flags & SDP_DROPPED))
987 sdp_output_disconnect(ssk);
988 } else if (!(ssk->flags & SDP_DROPPED) &&
989 !(flags & PRUS_MORETOCOME))
990 sdp_post_sends(ssk, M_NOWAIT);
991 SDP_WUNLOCK(ssk);
992 return (0);
993 } else {
994 SOCKBUF_LOCK(&so->so_snd);
995 if (sbspace(&so->so_snd) < -512) {
996 SOCKBUF_UNLOCK(&so->so_snd);
997 m_freem(m);
998 error = ENOBUFS;
999 goto out;
1000 }
1001 /*
1002 * According to RFC961 (Assigned Protocols),
1003 * the urgent pointer points to the last octet
1004 * of urgent data. We continue, however,
1005 * to consider it to indicate the first octet
1006 * of data past the urgent section.
1007 * Otherwise, snd_up should be one lower.
1008 */
1009 m->m_flags |= M_URG | M_PUSH;
1010 sdp_append(ssk, &so->so_snd, m, cnt);
1011 SOCKBUF_UNLOCK(&so->so_snd);
1012 if (nam && ssk->state < TCPS_SYN_SENT) {
1013 /*
1014 * Do implied connect if not yet connected.
1015 */
1016 error = sdp_start_connect(ssk, nam, td);
1017 if (error)
1018 goto out;
1019 }
1020 sdp_post_sends(ssk, M_NOWAIT);
1021 SDP_WUNLOCK(ssk);
1022 return (0);
1023 }
1024 out:
1025 SDP_WUNLOCK(ssk);
1026 return (error);
1027 }
1028
1029 /*
1030 * Send on a socket. If send must go all at once and message is larger than
1031 * send buffering, then hard error. Lock against other senders. If must go
1032 * all at once and not enough room now, then inform user that this would
1033 * block and do nothing. Otherwise, if nonblocking, send as much as
1034 * possible. The data to be sent is described by "uio" if nonzero, otherwise
1035 * by the mbuf chain "top" (which must be null if uio is not). Data provided
1036 * in mbuf chain must be small enough to send all at once.
1037 *
1038 * Returns nonzero on error, timeout or signal; callers must check for short
1039 * counts if EINTR/ERESTART are returned. Data and control buffers are freed
1040 * on return.
1041 */
1042 static int
sdp_sosend(struct socket * so,struct sockaddr * addr,struct uio * uio,struct mbuf * top,struct mbuf * control,int flags,struct thread * td)1043 sdp_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
1044 struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
1045 {
1046 struct sdp_sock *ssk;
1047 long space, resid;
1048 int atomic;
1049 int error;
1050 int copy;
1051
1052 if (uio != NULL)
1053 resid = uio->uio_resid;
1054 else
1055 resid = top->m_pkthdr.len;
1056 atomic = top != NULL;
1057 if (control != NULL) {
1058 if (control->m_len) {
1059 m_freem(control);
1060 if (top)
1061 m_freem(top);
1062 return (EINVAL);
1063 }
1064 m_freem(control);
1065 control = NULL;
1066 }
1067 /*
1068 * In theory resid should be unsigned. However, space must be
1069 * signed, as it might be less than 0 if we over-committed, and we
1070 * must use a signed comparison of space and resid. On the other
1071 * hand, a negative resid causes us to loop sending 0-length
1072 * segments to the protocol.
1073 *
1074 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
1075 * type sockets since that's an error.
1076 */
1077 if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) {
1078 error = EINVAL;
1079 goto out;
1080 }
1081 if (td != NULL)
1082 td->td_ru.ru_msgsnd++;
1083
1084 ssk = sdp_sk(so);
1085 error = SOCK_IO_SEND_LOCK(so, SBLOCKWAIT(flags));
1086 if (error)
1087 goto out;
1088
1089 restart:
1090 do {
1091 SOCKBUF_LOCK(&so->so_snd);
1092 if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
1093 SOCKBUF_UNLOCK(&so->so_snd);
1094 error = EPIPE;
1095 goto release;
1096 }
1097 if (so->so_error) {
1098 error = so->so_error;
1099 so->so_error = 0;
1100 SOCKBUF_UNLOCK(&so->so_snd);
1101 goto release;
1102 }
1103 if ((so->so_state & SS_ISCONNECTED) == 0 && addr == NULL) {
1104 SOCKBUF_UNLOCK(&so->so_snd);
1105 error = ENOTCONN;
1106 goto release;
1107 }
1108 space = sbspace(&so->so_snd);
1109 if (flags & MSG_OOB)
1110 space += 1024;
1111 if (atomic && resid > ssk->xmit_size_goal - SDP_HEAD_SIZE) {
1112 SOCKBUF_UNLOCK(&so->so_snd);
1113 error = EMSGSIZE;
1114 goto release;
1115 }
1116 if (space < resid &&
1117 (atomic || space < so->so_snd.sb_lowat)) {
1118 if ((so->so_state & SS_NBIO) ||
1119 (flags & (MSG_NBIO | MSG_DONTWAIT)) != 0) {
1120 SOCKBUF_UNLOCK(&so->so_snd);
1121 error = EWOULDBLOCK;
1122 goto release;
1123 }
1124 error = sbwait(so, SO_SND);
1125 SOCKBUF_UNLOCK(&so->so_snd);
1126 if (error)
1127 goto release;
1128 goto restart;
1129 }
1130 SOCKBUF_UNLOCK(&so->so_snd);
1131 do {
1132 if (uio == NULL) {
1133 resid = 0;
1134 if (flags & MSG_EOR)
1135 top->m_flags |= M_EOR;
1136 } else {
1137 /*
1138 * Copy the data from userland into a mbuf
1139 * chain. If no data is to be copied in,
1140 * a single empty mbuf is returned.
1141 */
1142 copy = min(space,
1143 ssk->xmit_size_goal - SDP_HEAD_SIZE);
1144 top = m_uiotombuf(uio, M_WAITOK, copy,
1145 0, M_PKTHDR |
1146 ((flags & MSG_EOR) ? M_EOR : 0));
1147 if (top == NULL) {
1148 /* only possible error */
1149 error = EFAULT;
1150 goto release;
1151 }
1152 space -= resid - uio->uio_resid;
1153 resid = uio->uio_resid;
1154 }
1155 /*
1156 * XXX all the SBS_CANTSENDMORE checks previously
1157 * done could be out of date after dropping the
1158 * socket lock.
1159 */
1160 error = sdp_send(so, (flags & MSG_OOB) ? PRUS_OOB :
1161 /*
1162 * Set EOF on the last send if the user specified
1163 * MSG_EOF.
1164 */
1165 ((flags & MSG_EOF) && (resid <= 0)) ? PRUS_EOF :
1166 /* If there is more to send set PRUS_MORETOCOME. */
1167 (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0,
1168 top, addr, NULL, td);
1169 top = NULL;
1170 if (error)
1171 goto release;
1172 } while (resid && space > 0);
1173 } while (resid);
1174
1175 release:
1176 SOCK_IO_SEND_UNLOCK(so);
1177 out:
1178 if (top != NULL)
1179 m_freem(top);
1180 return (error);
1181 }
1182
1183 /*
1184 * The part of soreceive() that implements reading non-inline out-of-band
1185 * data from a socket. For more complete comments, see soreceive(), from
1186 * which this code originated.
1187 *
1188 * Note that soreceive_rcvoob(), unlike the remainder of soreceive(), is
1189 * unable to return an mbuf chain to the caller.
1190 */
1191 static int
soreceive_rcvoob(struct socket * so,struct uio * uio,int flags)1192 soreceive_rcvoob(struct socket *so, struct uio *uio, int flags)
1193 {
1194 struct protosw *pr = so->so_proto;
1195 struct mbuf *m;
1196 int error;
1197
1198 KASSERT(flags & MSG_OOB, ("soreceive_rcvoob: (flags & MSG_OOB) == 0"));
1199
1200 m = m_get(M_WAITOK, MT_DATA);
1201 error = pr->pr_rcvoob(so, m, flags & MSG_PEEK);
1202 if (error)
1203 goto bad;
1204 do {
1205 error = uiomove(mtod(m, void *),
1206 (int) min(uio->uio_resid, m->m_len), uio);
1207 m = m_free(m);
1208 } while (uio->uio_resid && error == 0 && m);
1209 bad:
1210 if (m != NULL)
1211 m_freem(m);
1212 return (error);
1213 }
1214
1215 /*
1216 * Optimized version of soreceive() for stream (TCP) sockets.
1217 */
1218 static int
sdp_sorecv(struct socket * so,struct sockaddr ** psa,struct uio * uio,struct mbuf ** mp0,struct mbuf ** controlp,int * flagsp)1219 sdp_sorecv(struct socket *so, struct sockaddr **psa, struct uio *uio,
1220 struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
1221 {
1222 int len = 0, error = 0, flags, oresid;
1223 struct sockbuf *sb;
1224 struct mbuf *m, *n = NULL;
1225 struct sdp_sock *ssk;
1226
1227 /* We only do stream sockets. */
1228 if (so->so_type != SOCK_STREAM)
1229 return (EINVAL);
1230 if (psa != NULL)
1231 *psa = NULL;
1232 if (controlp != NULL)
1233 return (EINVAL);
1234 if (flagsp != NULL)
1235 flags = *flagsp &~ MSG_EOR;
1236 else
1237 flags = 0;
1238 if (flags & MSG_OOB)
1239 return (soreceive_rcvoob(so, uio, flags));
1240 if (mp0 != NULL)
1241 *mp0 = NULL;
1242
1243 sb = &so->so_rcv;
1244 ssk = sdp_sk(so);
1245
1246 /* Prevent other readers from entering the socket. */
1247 error = SOCK_IO_RECV_LOCK(so, SBLOCKWAIT(flags));
1248 if (error)
1249 return (error);
1250 SOCKBUF_LOCK(sb);
1251
1252 /* Easy one, no space to copyout anything. */
1253 if (uio->uio_resid == 0) {
1254 error = EINVAL;
1255 goto out;
1256 }
1257 oresid = uio->uio_resid;
1258
1259 /* We will never ever get anything unless we are connected. */
1260 if (!(so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED))) {
1261 /* When disconnecting there may be still some data left. */
1262 if (sbavail(sb))
1263 goto deliver;
1264 if (!(so->so_state & SS_ISDISCONNECTED))
1265 error = ENOTCONN;
1266 goto out;
1267 }
1268
1269 /* Socket buffer is empty and we shall not block. */
1270 if (sbavail(sb) == 0 &&
1271 ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)))) {
1272 error = EAGAIN;
1273 goto out;
1274 }
1275
1276 restart:
1277 SOCKBUF_LOCK_ASSERT(&so->so_rcv);
1278
1279 /* Abort if socket has reported problems. */
1280 if (so->so_error) {
1281 if (sbavail(sb))
1282 goto deliver;
1283 if (oresid > uio->uio_resid)
1284 goto out;
1285 error = so->so_error;
1286 if (!(flags & MSG_PEEK))
1287 so->so_error = 0;
1288 goto out;
1289 }
1290
1291 /* Door is closed. Deliver what is left, if any. */
1292 if (sb->sb_state & SBS_CANTRCVMORE) {
1293 if (sbavail(sb))
1294 goto deliver;
1295 else
1296 goto out;
1297 }
1298
1299 /* Socket buffer got some data that we shall deliver now. */
1300 if (sbavail(sb) && !(flags & MSG_WAITALL) &&
1301 ((so->so_state & SS_NBIO) ||
1302 (flags & (MSG_DONTWAIT|MSG_NBIO)) ||
1303 sbavail(sb) >= sb->sb_lowat ||
1304 sbavail(sb) >= uio->uio_resid ||
1305 sbavail(sb) >= sb->sb_hiwat) ) {
1306 goto deliver;
1307 }
1308
1309 /* On MSG_WAITALL we must wait until all data or error arrives. */
1310 if ((flags & MSG_WAITALL) &&
1311 (sbavail(sb) >= uio->uio_resid || sbavail(sb) >= sb->sb_lowat))
1312 goto deliver;
1313
1314 /*
1315 * Wait and block until (more) data comes in.
1316 * NB: Drops the sockbuf lock during wait.
1317 */
1318 error = sbwait(so, SO_RCV);
1319 if (error)
1320 goto out;
1321 goto restart;
1322
1323 deliver:
1324 SOCKBUF_LOCK_ASSERT(&so->so_rcv);
1325 KASSERT(sbavail(sb), ("%s: sockbuf empty", __func__));
1326 KASSERT(sb->sb_mb != NULL, ("%s: sb_mb == NULL", __func__));
1327
1328 /* Statistics. */
1329 if (uio->uio_td)
1330 uio->uio_td->td_ru.ru_msgrcv++;
1331
1332 /* Fill uio until full or current end of socket buffer is reached. */
1333 len = min(uio->uio_resid, sbavail(sb));
1334 if (mp0 != NULL) {
1335 /* Dequeue as many mbufs as possible. */
1336 if (!(flags & MSG_PEEK) && len >= sb->sb_mb->m_len) {
1337 for (*mp0 = m = sb->sb_mb;
1338 m != NULL && m->m_len <= len;
1339 m = m->m_next) {
1340 len -= m->m_len;
1341 uio->uio_resid -= m->m_len;
1342 sbfree(sb, m);
1343 n = m;
1344 }
1345 sb->sb_mb = m;
1346 if (sb->sb_mb == NULL)
1347 SB_EMPTY_FIXUP(sb);
1348 n->m_next = NULL;
1349 }
1350 /* Copy the remainder. */
1351 if (len > 0) {
1352 KASSERT(sb->sb_mb != NULL,
1353 ("%s: len > 0 && sb->sb_mb empty", __func__));
1354
1355 m = m_copym(sb->sb_mb, 0, len, M_NOWAIT);
1356 if (m == NULL)
1357 len = 0; /* Don't flush data from sockbuf. */
1358 else
1359 uio->uio_resid -= m->m_len;
1360 if (*mp0 != NULL)
1361 n->m_next = m;
1362 else
1363 *mp0 = m;
1364 if (*mp0 == NULL) {
1365 error = ENOBUFS;
1366 goto out;
1367 }
1368 }
1369 } else {
1370 /* NB: Must unlock socket buffer as uiomove may sleep. */
1371 SOCKBUF_UNLOCK(sb);
1372 error = m_mbuftouio(uio, sb->sb_mb, len);
1373 SOCKBUF_LOCK(sb);
1374 if (error)
1375 goto out;
1376 }
1377 SBLASTRECORDCHK(sb);
1378 SBLASTMBUFCHK(sb);
1379
1380 /*
1381 * Remove the delivered data from the socket buffer unless we
1382 * were only peeking.
1383 */
1384 if (!(flags & MSG_PEEK)) {
1385 if (len > 0)
1386 sbdrop_locked(sb, len);
1387
1388 /* Notify protocol that we drained some data. */
1389 SOCKBUF_UNLOCK(sb);
1390 SDP_WLOCK(ssk);
1391 sdp_do_posts(ssk);
1392 SDP_WUNLOCK(ssk);
1393 SOCKBUF_LOCK(sb);
1394 }
1395
1396 /*
1397 * For MSG_WAITALL we may have to loop again and wait for
1398 * more data to come in.
1399 */
1400 if ((flags & MSG_WAITALL) && uio->uio_resid > 0)
1401 goto restart;
1402 out:
1403 SBLASTRECORDCHK(sb);
1404 SBLASTMBUFCHK(sb);
1405 SOCKBUF_UNLOCK(sb);
1406 SOCK_IO_RECV_UNLOCK(so);
1407 return (error);
1408 }
1409
1410 /*
1411 * Abort is used to teardown a connection typically while sitting in
1412 * the accept queue.
1413 */
1414 void
sdp_abort(struct socket * so)1415 sdp_abort(struct socket *so)
1416 {
1417 struct sdp_sock *ssk;
1418
1419 ssk = sdp_sk(so);
1420 SDP_WLOCK(ssk);
1421 /*
1422 * If we have not yet dropped, do it now.
1423 */
1424 if (!(ssk->flags & SDP_TIMEWAIT) &&
1425 !(ssk->flags & SDP_DROPPED))
1426 sdp_drop(ssk, ECONNABORTED);
1427 KASSERT(ssk->flags & SDP_DROPPED, ("sdp_abort: %p not dropped 0x%X",
1428 ssk, ssk->flags));
1429 SDP_WUNLOCK(ssk);
1430 }
1431
1432 /*
1433 * Close a SDP socket and initiate a friendly disconnect.
1434 */
1435 static void
sdp_close(struct socket * so)1436 sdp_close(struct socket *so)
1437 {
1438 struct sdp_sock *ssk;
1439
1440 ssk = sdp_sk(so);
1441 SDP_WLOCK(ssk);
1442 /*
1443 * If we have not yet dropped, do it now.
1444 */
1445 if (!(ssk->flags & SDP_TIMEWAIT) &&
1446 !(ssk->flags & SDP_DROPPED))
1447 sdp_start_disconnect(ssk);
1448
1449 /*
1450 * If we've still not dropped let the socket layer know we're
1451 * holding on to the socket and pcb for a while.
1452 */
1453 if (!(ssk->flags & SDP_DROPPED)) {
1454 ssk->flags |= SDP_SOCKREF;
1455 soref(so);
1456 }
1457 SDP_WUNLOCK(ssk);
1458 }
1459
1460 /*
1461 * User requests out-of-band data.
1462 */
1463 static int
sdp_rcvoob(struct socket * so,struct mbuf * m,int flags)1464 sdp_rcvoob(struct socket *so, struct mbuf *m, int flags)
1465 {
1466 int error = 0;
1467 struct sdp_sock *ssk;
1468
1469 ssk = sdp_sk(so);
1470 SDP_WLOCK(ssk);
1471 if (!rx_ring_trylock(&ssk->rx_ring)) {
1472 SDP_WUNLOCK(ssk);
1473 return (ECONNRESET);
1474 }
1475 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
1476 error = ECONNRESET;
1477 goto out;
1478 }
1479 if ((so->so_oobmark == 0 &&
1480 (so->so_rcv.sb_state & SBS_RCVATMARK) == 0) ||
1481 so->so_options & SO_OOBINLINE ||
1482 ssk->oobflags & SDP_HADOOB) {
1483 error = EINVAL;
1484 goto out;
1485 }
1486 if ((ssk->oobflags & SDP_HAVEOOB) == 0) {
1487 error = EWOULDBLOCK;
1488 goto out;
1489 }
1490 m->m_len = 1;
1491 *mtod(m, caddr_t) = ssk->iobc;
1492 if ((flags & MSG_PEEK) == 0)
1493 ssk->oobflags ^= (SDP_HAVEOOB | SDP_HADOOB);
1494 out:
1495 rx_ring_unlock(&ssk->rx_ring);
1496 SDP_WUNLOCK(ssk);
1497 return (error);
1498 }
1499
1500 void
sdp_urg(struct sdp_sock * ssk,struct mbuf * mb)1501 sdp_urg(struct sdp_sock *ssk, struct mbuf *mb)
1502 {
1503 struct mbuf *m;
1504 struct socket *so;
1505
1506 so = ssk->socket;
1507 if (so == NULL)
1508 return;
1509
1510 so->so_oobmark = sbused(&so->so_rcv) + mb->m_pkthdr.len - 1;
1511 sohasoutofband(so);
1512 ssk->oobflags &= ~(SDP_HAVEOOB | SDP_HADOOB);
1513 if (!(so->so_options & SO_OOBINLINE)) {
1514 for (m = mb; m->m_next != NULL; m = m->m_next);
1515 ssk->iobc = *(mtod(m, char *) + m->m_len - 1);
1516 ssk->oobflags |= SDP_HAVEOOB;
1517 m->m_len--;
1518 mb->m_pkthdr.len--;
1519 }
1520 }
1521
1522 /*
1523 * Notify a sdp socket of an asynchronous error.
1524 *
1525 * Do not wake up user since there currently is no mechanism for
1526 * reporting soft errors (yet - a kqueue filter may be added).
1527 */
1528 struct sdp_sock *
sdp_notify(struct sdp_sock * ssk,int error)1529 sdp_notify(struct sdp_sock *ssk, int error)
1530 {
1531
1532 SDP_WLOCK_ASSERT(ssk);
1533
1534 if ((ssk->flags & SDP_TIMEWAIT) ||
1535 (ssk->flags & SDP_DROPPED))
1536 return (ssk);
1537
1538 /*
1539 * Ignore some errors if we are hooked up.
1540 */
1541 if (ssk->state == TCPS_ESTABLISHED &&
1542 (error == EHOSTUNREACH || error == ENETUNREACH ||
1543 error == EHOSTDOWN))
1544 return (ssk);
1545 ssk->softerror = error;
1546 return sdp_drop(ssk, error);
1547 }
1548
1549 static void
sdp_keepalive_timeout(void * data)1550 sdp_keepalive_timeout(void *data)
1551 {
1552 struct sdp_sock *ssk;
1553
1554 ssk = data;
1555 /* Callout canceled. */
1556 if (!callout_active(&ssk->keep2msl))
1557 return;
1558 /* Callout rescheduled as a different kind of timer. */
1559 if (callout_pending(&ssk->keep2msl))
1560 goto out;
1561 callout_deactivate(&ssk->keep2msl);
1562 if (ssk->flags & SDP_DROPPED ||
1563 (ssk->socket->so_options & SO_KEEPALIVE) == 0)
1564 goto out;
1565 sdp_post_keepalive(ssk);
1566 callout_reset(&ssk->keep2msl, SDP_KEEPALIVE_TIME,
1567 sdp_keepalive_timeout, ssk);
1568 out:
1569 SDP_WUNLOCK(ssk);
1570 }
1571
1572
1573 void
sdp_start_keepalive_timer(struct socket * so)1574 sdp_start_keepalive_timer(struct socket *so)
1575 {
1576 struct sdp_sock *ssk;
1577
1578 ssk = sdp_sk(so);
1579 if (!callout_pending(&ssk->keep2msl))
1580 callout_reset(&ssk->keep2msl, SDP_KEEPALIVE_TIME,
1581 sdp_keepalive_timeout, ssk);
1582 }
1583
1584 static void
sdp_stop_keepalive_timer(struct socket * so)1585 sdp_stop_keepalive_timer(struct socket *so)
1586 {
1587 struct sdp_sock *ssk;
1588
1589 ssk = sdp_sk(so);
1590 callout_stop(&ssk->keep2msl);
1591 }
1592
1593 /*
1594 * sdp_ctloutput() must drop the inpcb lock before performing copyin on
1595 * socket option arguments. When it re-acquires the lock after the copy, it
1596 * has to revalidate that the connection is still valid for the socket
1597 * option.
1598 */
1599 #define SDP_WLOCK_RECHECK(inp) do { \
1600 SDP_WLOCK(ssk); \
1601 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) { \
1602 SDP_WUNLOCK(ssk); \
1603 return (ECONNRESET); \
1604 } \
1605 } while(0)
1606
1607 static int
sdp_ctloutput(struct socket * so,struct sockopt * sopt)1608 sdp_ctloutput(struct socket *so, struct sockopt *sopt)
1609 {
1610 int error, opt, optval;
1611 struct sdp_sock *ssk;
1612
1613 error = 0;
1614 ssk = sdp_sk(so);
1615 if (sopt->sopt_level == SOL_SOCKET && sopt->sopt_name == SO_KEEPALIVE) {
1616 SDP_WLOCK(ssk);
1617 if (so->so_options & SO_KEEPALIVE)
1618 sdp_start_keepalive_timer(so);
1619 else
1620 sdp_stop_keepalive_timer(so);
1621 SDP_WUNLOCK(ssk);
1622 }
1623 if (sopt->sopt_level != IPPROTO_TCP)
1624 return (error);
1625
1626 SDP_WLOCK(ssk);
1627 if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
1628 SDP_WUNLOCK(ssk);
1629 return (ECONNRESET);
1630 }
1631
1632 switch (sopt->sopt_dir) {
1633 case SOPT_SET:
1634 switch (sopt->sopt_name) {
1635 case TCP_NODELAY:
1636 SDP_WUNLOCK(ssk);
1637 error = sooptcopyin(sopt, &optval, sizeof optval,
1638 sizeof optval);
1639 if (error)
1640 return (error);
1641
1642 SDP_WLOCK_RECHECK(ssk);
1643 opt = SDP_NODELAY;
1644 if (optval)
1645 ssk->flags |= opt;
1646 else
1647 ssk->flags &= ~opt;
1648 sdp_do_posts(ssk);
1649 SDP_WUNLOCK(ssk);
1650 break;
1651
1652 default:
1653 SDP_WUNLOCK(ssk);
1654 error = ENOPROTOOPT;
1655 break;
1656 }
1657 break;
1658
1659 case SOPT_GET:
1660 switch (sopt->sopt_name) {
1661 case TCP_NODELAY:
1662 optval = ssk->flags & SDP_NODELAY;
1663 SDP_WUNLOCK(ssk);
1664 error = sooptcopyout(sopt, &optval, sizeof optval);
1665 break;
1666 default:
1667 SDP_WUNLOCK(ssk);
1668 error = ENOPROTOOPT;
1669 break;
1670 }
1671 break;
1672 }
1673 return (error);
1674 }
1675 #undef SDP_WLOCK_RECHECK
1676
1677 int sdp_mod_count = 0;
1678 int sdp_mod_usec = 0;
1679
1680 void
sdp_set_default_moderation(struct sdp_sock * ssk)1681 sdp_set_default_moderation(struct sdp_sock *ssk)
1682 {
1683 if (sdp_mod_count <= 0 || sdp_mod_usec <= 0)
1684 return;
1685 ib_modify_cq(ssk->rx_ring.cq, sdp_mod_count, sdp_mod_usec);
1686 }
1687
1688 static void
sdp_dev_add(struct ib_device * device)1689 sdp_dev_add(struct ib_device *device)
1690 {
1691 struct ib_fmr_pool_param param;
1692 struct sdp_device *sdp_dev;
1693
1694 sdp_dev = malloc(sizeof(*sdp_dev), M_SDP, M_WAITOK | M_ZERO);
1695 sdp_dev->pd = ib_alloc_pd(device, 0);
1696 if (IS_ERR(sdp_dev->pd))
1697 goto out_pd;
1698 memset(¶m, 0, sizeof param);
1699 param.max_pages_per_fmr = SDP_FMR_SIZE;
1700 param.page_shift = PAGE_SHIFT;
1701 param.access = (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ);
1702 param.pool_size = SDP_FMR_POOL_SIZE;
1703 param.dirty_watermark = SDP_FMR_DIRTY_SIZE;
1704 param.cache = 1;
1705 sdp_dev->fmr_pool = ib_create_fmr_pool(sdp_dev->pd, ¶m);
1706 if (IS_ERR(sdp_dev->fmr_pool))
1707 goto out_fmr;
1708 ib_set_client_data(device, &sdp_client, sdp_dev);
1709 return;
1710
1711 out_fmr:
1712 ib_dealloc_pd(sdp_dev->pd);
1713 out_pd:
1714 free(sdp_dev, M_SDP);
1715 }
1716
1717 static void
sdp_dev_rem(struct ib_device * device,void * client_data)1718 sdp_dev_rem(struct ib_device *device, void *client_data)
1719 {
1720 struct sdp_device *sdp_dev;
1721 struct sdp_sock *ssk;
1722
1723 SDP_LIST_WLOCK();
1724 LIST_FOREACH(ssk, &sdp_list, list) {
1725 if (ssk->ib_device != device)
1726 continue;
1727 SDP_WLOCK(ssk);
1728 if ((ssk->flags & SDP_DESTROY) == 0)
1729 ssk = sdp_notify(ssk, ECONNRESET);
1730 if (ssk)
1731 SDP_WUNLOCK(ssk);
1732 }
1733 SDP_LIST_WUNLOCK();
1734 /*
1735 * XXX Do I need to wait between these two?
1736 */
1737 sdp_dev = ib_get_client_data(device, &sdp_client);
1738 if (!sdp_dev)
1739 return;
1740 ib_flush_fmr_pool(sdp_dev->fmr_pool);
1741 ib_destroy_fmr_pool(sdp_dev->fmr_pool);
1742 ib_dealloc_pd(sdp_dev->pd);
1743 free(sdp_dev, M_SDP);
1744 }
1745
1746 struct ib_client sdp_client =
1747 { .name = "sdp", .add = sdp_dev_add, .remove = sdp_dev_rem };
1748
1749
1750 static int
sdp_pcblist(SYSCTL_HANDLER_ARGS)1751 sdp_pcblist(SYSCTL_HANDLER_ARGS)
1752 {
1753 int error, n, i;
1754 struct sdp_sock *ssk;
1755 struct xinpgen xig;
1756
1757 /*
1758 * The process of preparing the TCB list is too time-consuming and
1759 * resource-intensive to repeat twice on every request.
1760 */
1761 if (req->oldptr == NULL) {
1762 n = sdp_count;
1763 n += imax(n / 8, 10);
1764 req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xtcpcb);
1765 return (0);
1766 }
1767
1768 if (req->newptr != NULL)
1769 return (EPERM);
1770
1771 /*
1772 * OK, now we're committed to doing something.
1773 */
1774 SDP_LIST_RLOCK();
1775 n = sdp_count;
1776 SDP_LIST_RUNLOCK();
1777
1778 error = sysctl_wire_old_buffer(req, 2 * (sizeof xig)
1779 + n * sizeof(struct xtcpcb));
1780 if (error != 0)
1781 return (error);
1782
1783 bzero(&xig, sizeof(xig));
1784 xig.xig_len = sizeof xig;
1785 xig.xig_count = n;
1786 xig.xig_gen = 0;
1787 xig.xig_sogen = so_gencnt;
1788 error = SYSCTL_OUT(req, &xig, sizeof xig);
1789 if (error)
1790 return (error);
1791
1792 SDP_LIST_RLOCK();
1793 for (ssk = LIST_FIRST(&sdp_list), i = 0;
1794 ssk != NULL && i < n; ssk = LIST_NEXT(ssk, list)) {
1795 struct xtcpcb xt;
1796
1797 SDP_RLOCK(ssk);
1798 if (ssk->flags & SDP_TIMEWAIT) {
1799 if (ssk->cred != NULL)
1800 error = cr_cansee(req->td->td_ucred,
1801 ssk->cred);
1802 else
1803 error = EINVAL; /* Skip this inp. */
1804 } else if (ssk->socket)
1805 error = cr_canseesocket(req->td->td_ucred,
1806 ssk->socket);
1807 else
1808 error = EINVAL;
1809 if (error) {
1810 error = 0;
1811 goto next;
1812 }
1813
1814 bzero(&xt, sizeof(xt));
1815 xt.xt_len = sizeof xt;
1816 xt.xt_inp.inp_gencnt = 0;
1817 xt.xt_inp.inp_vflag = INP_IPV4;
1818 memcpy(&xt.xt_inp.inp_laddr, &ssk->laddr, sizeof(ssk->laddr));
1819 xt.xt_inp.inp_lport = ssk->lport;
1820 memcpy(&xt.xt_inp.inp_faddr, &ssk->faddr, sizeof(ssk->faddr));
1821 xt.xt_inp.inp_fport = ssk->fport;
1822 xt.t_state = ssk->state;
1823 if (ssk->socket != NULL)
1824 sotoxsocket(ssk->socket, &xt.xt_inp.xi_socket);
1825 xt.xt_inp.xi_socket.xso_protocol = IPPROTO_TCP;
1826 SDP_RUNLOCK(ssk);
1827 error = SYSCTL_OUT(req, &xt, sizeof xt);
1828 if (error)
1829 break;
1830 i++;
1831 continue;
1832 next:
1833 SDP_RUNLOCK(ssk);
1834 }
1835 if (!error) {
1836 /*
1837 * Give the user an updated idea of our state.
1838 * If the generation differs from what we told
1839 * her before, she knows that something happened
1840 * while we were processing this request, and it
1841 * might be necessary to retry.
1842 */
1843 xig.xig_gen = 0;
1844 xig.xig_sogen = so_gencnt;
1845 xig.xig_count = sdp_count;
1846 error = SYSCTL_OUT(req, &xig, sizeof xig);
1847 }
1848 SDP_LIST_RUNLOCK();
1849 return (error);
1850 }
1851
1852 SYSCTL_NODE(_net_inet, -1, sdp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
1853 "SDP");
1854
1855 SYSCTL_PROC(_net_inet_sdp, TCPCTL_PCBLIST, pcblist,
1856 CTLFLAG_RD | CTLTYPE_STRUCT | CTLFLAG_MPSAFE,
1857 0, 0, sdp_pcblist, "S,xtcpcb",
1858 "List of active SDP connections");
1859
1860 static void
sdp_zone_change(void * tag)1861 sdp_zone_change(void *tag)
1862 {
1863
1864 uma_zone_set_max(sdp_zone, maxsockets);
1865 }
1866
1867 static void
sdp_init(void * arg __unused)1868 sdp_init(void *arg __unused)
1869 {
1870
1871 LIST_INIT(&sdp_list);
1872 sdp_zone = uma_zcreate("sdp_sock", sizeof(struct sdp_sock),
1873 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1874 uma_zone_set_max(sdp_zone, maxsockets);
1875 EVENTHANDLER_REGISTER(maxsockets_change, sdp_zone_change, NULL,
1876 EVENTHANDLER_PRI_ANY);
1877 rx_comp_wq = create_singlethread_workqueue("rx_comp_wq");
1878 ib_register_client(&sdp_client);
1879 }
1880 SYSINIT(sdp_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_SECOND, sdp_init, NULL);
1881
1882 #define SDP_PROTOSW \
1883 .pr_type = SOCK_STREAM, \
1884 .pr_flags = PR_CONNREQUIRED|PR_IMPLOPCL|PR_WANTRCVD,\
1885 .pr_ctloutput = sdp_ctloutput, \
1886 .pr_abort = sdp_abort, \
1887 .pr_accept = sdp_accept, \
1888 .pr_attach = sdp_attach, \
1889 .pr_bind = sdp_bind, \
1890 .pr_connect = sdp_connect, \
1891 .pr_detach = sdp_detach, \
1892 .pr_disconnect = sdp_disconnect, \
1893 .pr_listen = sdp_listen, \
1894 .pr_peeraddr = sdp_getpeeraddr, \
1895 .pr_rcvoob = sdp_rcvoob, \
1896 .pr_send = sdp_send, \
1897 .pr_sosend = sdp_sosend, \
1898 .pr_soreceive = sdp_sorecv, \
1899 .pr_shutdown = sdp_shutdown, \
1900 .pr_sockaddr = sdp_getsockaddr, \
1901 .pr_close = sdp_close
1902
1903
1904 static struct protosw sdp_ip_protosw = {
1905 .pr_protocol = IPPROTO_IP,
1906 SDP_PROTOSW
1907 };
1908 static struct protosw sdp_tcp_protosw = {
1909 .pr_protocol = IPPROTO_TCP,
1910 SDP_PROTOSW
1911 };
1912
1913 static struct domain sdpdomain = {
1914 .dom_family = AF_INET_SDP,
1915 .dom_name = "SDP",
1916 .dom_nprotosw = 2,
1917 .dom_protosw = {
1918 &sdp_ip_protosw,
1919 &sdp_tcp_protosw,
1920 },
1921 };
1922
1923 DOMAIN_SET(sdp);
1924
1925 int sdp_debug_level = 1;
1926 int sdp_data_debug_level = 0;
1927