xref: /freebsd/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c (revision 5f4c09dd85bff675e0ca63c55ea3c517e0fddfcc)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
5  *      The Regents of the University of California.  All rights reserved.
6  * Copyright (c) 2004 The FreeBSD Foundation.  All rights reserved.
7  * Copyright (c) 2004-2008 Robert N. M. Watson.  All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  * Excerpts taken from tcp_subr.c, tcp_usrreq.c, uipc_socket.c
34  */
35 
36 /*
37  *
38  * Copyright (c) 2010 Isilon Systems, Inc.
39  * Copyright (c) 2010 iX Systems, Inc.
40  * Copyright (c) 2010 Panasas, Inc.
41  * All rights reserved.
42  *
43  * Redistribution and use in source and binary forms, with or without
44  * modification, are permitted provided that the following conditions
45  * are met:
46  * 1. Redistributions of source code must retain the above copyright
47  *    notice unmodified, this list of conditions, and the following
48  *    disclaimer.
49  * 2. Redistributions in binary form must reproduce the above copyright
50  *    notice, this list of conditions and the following disclaimer in the
51  *    documentation and/or other materials provided with the distribution.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
54  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
55  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
56  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
57  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
58  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
59  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
60  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
61  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
62  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
63  *
64  */
65 #include <sys/cdefs.h>
66 #include <sys/param.h>
67 #include <sys/eventhandler.h>
68 #include <sys/kernel.h>
69 #include <sys/malloc.h>
70 
71 #include "sdp.h"
72 
73 #include <net/if.h>
74 #include <net/route.h>
75 #include <net/vnet.h>
76 #include <sys/sysctl.h>
77 
78 uma_zone_t	sdp_zone;
79 struct rwlock	sdp_lock;
80 LIST_HEAD(, sdp_sock) sdp_list;
81 
82 struct workqueue_struct *rx_comp_wq;
83 
84 RW_SYSINIT(sdplockinit, &sdp_lock, "SDP lock");
85 #define	SDP_LIST_WLOCK()	rw_wlock(&sdp_lock)
86 #define	SDP_LIST_RLOCK()	rw_rlock(&sdp_lock)
87 #define	SDP_LIST_WUNLOCK()	rw_wunlock(&sdp_lock)
88 #define	SDP_LIST_RUNLOCK()	rw_runlock(&sdp_lock)
89 #define	SDP_LIST_WLOCK_ASSERT()	rw_assert(&sdp_lock, RW_WLOCKED)
90 #define	SDP_LIST_RLOCK_ASSERT()	rw_assert(&sdp_lock, RW_RLOCKED)
91 #define	SDP_LIST_LOCK_ASSERT()	rw_assert(&sdp_lock, RW_LOCKED)
92 
93 MALLOC_DEFINE(M_SDP, "sdp", "Sockets Direct Protocol");
94 
95 static void sdp_stop_keepalive_timer(struct socket *so);
96 
97 /*
98  * SDP protocol interface to socket abstraction.
99  */
100 /*
101  * sdp_sendspace and sdp_recvspace are the default send and receive window
102  * sizes, respectively.
103  */
104 u_long	sdp_sendspace = 1024*32;
105 u_long	sdp_recvspace = 1024*64;
106 
107 static int sdp_count;
108 
109 /*
110  * Disable async. CMA events for sockets which are being torn down.
111  */
112 static void
113 sdp_destroy_cma(struct sdp_sock *ssk)
114 {
115 
116 	if (ssk->id == NULL)
117 		return;
118 	rdma_destroy_id(ssk->id);
119 	ssk->id = NULL;
120 }
121 
122 static int
123 sdp_pcbbind(struct sdp_sock *ssk, struct sockaddr *nam, struct ucred *cred)
124 {
125 	struct sockaddr_in *sin;
126 	struct sockaddr_in null;
127 	int error;
128 
129 	SDP_WLOCK_ASSERT(ssk);
130 
131 	if (ssk->lport != 0 || ssk->laddr != INADDR_ANY)
132 		return (EINVAL);
133 	/* rdma_bind_addr handles bind races.  */
134 	SDP_WUNLOCK(ssk);
135 	if (ssk->id == NULL)
136 		ssk->id = rdma_create_id(&init_net, sdp_cma_handler, ssk, RDMA_PS_SDP, IB_QPT_RC);
137 	if (ssk->id == NULL) {
138 		SDP_WLOCK(ssk);
139 		return (ENOMEM);
140 	}
141 	if (nam == NULL) {
142 		null.sin_family = AF_INET;
143 		null.sin_len = sizeof(null);
144 		null.sin_addr.s_addr = INADDR_ANY;
145 		null.sin_port = 0;
146 		bzero(&null.sin_zero, sizeof(null.sin_zero));
147 		nam = (struct sockaddr *)&null;
148 	}
149 	error = -rdma_bind_addr(ssk->id, nam);
150 	SDP_WLOCK(ssk);
151 	if (error == 0) {
152 		sin = (struct sockaddr_in *)&ssk->id->route.addr.src_addr;
153 		ssk->laddr = sin->sin_addr.s_addr;
154 		ssk->lport = sin->sin_port;
155 	} else
156 		sdp_destroy_cma(ssk);
157 	return (error);
158 }
159 
160 static void
161 sdp_pcbfree(struct sdp_sock *ssk)
162 {
163 
164 	KASSERT(ssk->socket == NULL, ("ssk %p socket still attached", ssk));
165 	KASSERT((ssk->flags & SDP_DESTROY) == 0,
166 	    ("ssk %p already destroyed", ssk));
167 
168 	sdp_dbg(ssk->socket, "Freeing pcb");
169 	SDP_WLOCK_ASSERT(ssk);
170 	ssk->flags |= SDP_DESTROY;
171 	SDP_WUNLOCK(ssk);
172 	SDP_LIST_WLOCK();
173 	sdp_count--;
174 	LIST_REMOVE(ssk, list);
175 	SDP_LIST_WUNLOCK();
176 	crfree(ssk->cred);
177 	ssk->qp_active = 0;
178 	if (ssk->qp) {
179 		ib_destroy_qp(ssk->qp);
180 		ssk->qp = NULL;
181 	}
182 	sdp_tx_ring_destroy(ssk);
183 	sdp_rx_ring_destroy(ssk);
184 	sdp_destroy_cma(ssk);
185 	rw_destroy(&ssk->rx_ring.destroyed_lock);
186 	rw_destroy(&ssk->lock);
187 	uma_zfree(sdp_zone, ssk);
188 }
189 
190 /*
191  * Common routines to return a socket address.
192  */
193 static struct sockaddr *
194 sdp_sockaddr(in_port_t port, struct in_addr *addr_p)
195 {
196 	struct sockaddr_in *sin;
197 
198 	sin = malloc(sizeof *sin, M_SONAME,
199 		M_WAITOK | M_ZERO);
200 	sin->sin_family = AF_INET;
201 	sin->sin_len = sizeof(*sin);
202 	sin->sin_addr = *addr_p;
203 	sin->sin_port = port;
204 
205 	return (struct sockaddr *)sin;
206 }
207 
208 static int
209 sdp_getsockaddr(struct socket *so, struct sockaddr **nam)
210 {
211 	struct sdp_sock *ssk;
212 	struct in_addr addr;
213 	in_port_t port;
214 
215 	ssk = sdp_sk(so);
216 	SDP_RLOCK(ssk);
217 	port = ssk->lport;
218 	addr.s_addr = ssk->laddr;
219 	SDP_RUNLOCK(ssk);
220 
221 	*nam = sdp_sockaddr(port, &addr);
222 	return 0;
223 }
224 
225 static int
226 sdp_getpeeraddr(struct socket *so, struct sockaddr **nam)
227 {
228 	struct sdp_sock *ssk;
229 	struct in_addr addr;
230 	in_port_t port;
231 
232 	ssk = sdp_sk(so);
233 	SDP_RLOCK(ssk);
234 	port = ssk->fport;
235 	addr.s_addr = ssk->faddr;
236 	SDP_RUNLOCK(ssk);
237 
238 	*nam = sdp_sockaddr(port, &addr);
239 	return 0;
240 }
241 
242 #if 0
243 static void
244 sdp_apply_all(void (*func)(struct sdp_sock *, void *), void *arg)
245 {
246 	struct sdp_sock *ssk;
247 
248 	SDP_LIST_RLOCK();
249 	LIST_FOREACH(ssk, &sdp_list, list) {
250 		SDP_WLOCK(ssk);
251 		func(ssk, arg);
252 		SDP_WUNLOCK(ssk);
253 	}
254 	SDP_LIST_RUNLOCK();
255 }
256 #endif
257 
258 static void
259 sdp_output_reset(struct sdp_sock *ssk)
260 {
261 	struct rdma_cm_id *id;
262 
263 	SDP_WLOCK_ASSERT(ssk);
264 	if (ssk->id) {
265 		id = ssk->id;
266 		ssk->qp_active = 0;
267 		SDP_WUNLOCK(ssk);
268 		rdma_disconnect(id);
269 		SDP_WLOCK(ssk);
270 	}
271 	ssk->state = TCPS_CLOSED;
272 }
273 
274 /*
275  * Attempt to close a SDP socket, marking it as dropped, and freeing
276  * the socket if we hold the only reference.
277  */
278 static struct sdp_sock *
279 sdp_closed(struct sdp_sock *ssk)
280 {
281 	struct socket *so;
282 
283 	SDP_WLOCK_ASSERT(ssk);
284 
285 	ssk->flags |= SDP_DROPPED;
286 	so = ssk->socket;
287 	soisdisconnected(so);
288 	if (ssk->flags & SDP_SOCKREF) {
289 		ssk->flags &= ~SDP_SOCKREF;
290 		SDP_WUNLOCK(ssk);
291 		sorele(so);
292 		return (NULL);
293 	}
294 	return (ssk);
295 }
296 
297 /*
298  * Perform timer based shutdowns which can not operate in
299  * callout context.
300  */
301 static void
302 sdp_shutdown_task(void *data, int pending)
303 {
304 	struct sdp_sock *ssk;
305 
306 	ssk = data;
307 	SDP_WLOCK(ssk);
308 	/*
309 	 * I don't think this can race with another call to pcbfree()
310 	 * because SDP_TIMEWAIT protects it.  SDP_DESTROY may be redundant.
311 	 */
312 	if (ssk->flags & SDP_DESTROY)
313 		panic("sdp_shutdown_task: Racing with pcbfree for ssk %p",
314 		    ssk);
315 	if (ssk->flags & SDP_DISCON)
316 		sdp_output_reset(ssk);
317 	/* We have to clear this so sdp_detach() will call pcbfree(). */
318 	ssk->flags &= ~(SDP_TIMEWAIT | SDP_DREQWAIT);
319 	if ((ssk->flags & SDP_DROPPED) == 0 &&
320 	    sdp_closed(ssk) == NULL)
321 		return;
322 	if (ssk->socket == NULL) {
323 		sdp_pcbfree(ssk);
324 		return;
325 	}
326 	SDP_WUNLOCK(ssk);
327 }
328 
329 /*
330  * 2msl has expired, schedule the shutdown task.
331  */
332 static void
333 sdp_2msl_timeout(void *data)
334 {
335 	struct sdp_sock *ssk;
336 
337 	ssk = data;
338 	/* Callout canceled. */
339         if (!callout_active(&ssk->keep2msl))
340 		goto out;
341         callout_deactivate(&ssk->keep2msl);
342 	/* Should be impossible, defensive programming. */
343 	if ((ssk->flags & SDP_TIMEWAIT) == 0)
344 		goto out;
345 	taskqueue_enqueue(taskqueue_thread, &ssk->shutdown_task);
346 out:
347 	SDP_WUNLOCK(ssk);
348 	return;
349 }
350 
351 /*
352  * Schedule the 2msl wait timer.
353  */
354 static void
355 sdp_2msl_wait(struct sdp_sock *ssk)
356 {
357 
358 	SDP_WLOCK_ASSERT(ssk);
359 	ssk->flags |= SDP_TIMEWAIT;
360 	ssk->state = TCPS_TIME_WAIT;
361 	soisdisconnected(ssk->socket);
362 	callout_reset(&ssk->keep2msl, TCPTV_MSL, sdp_2msl_timeout, ssk);
363 }
364 
365 /*
366  * Timed out waiting for the final fin/ack from rdma_disconnect().
367  */
368 static void
369 sdp_dreq_timeout(void *data)
370 {
371 	struct sdp_sock *ssk;
372 
373 	ssk = data;
374 	/* Callout canceled. */
375         if (!callout_active(&ssk->keep2msl))
376 		goto out;
377 	/* Callout rescheduled, probably as a different timer. */
378 	if (callout_pending(&ssk->keep2msl))
379 		goto out;
380         callout_deactivate(&ssk->keep2msl);
381 	if (ssk->state != TCPS_FIN_WAIT_1 && ssk->state != TCPS_LAST_ACK)
382 		goto out;
383 	if ((ssk->flags & SDP_DREQWAIT) == 0)
384 		goto out;
385 	ssk->flags &= ~SDP_DREQWAIT;
386 	ssk->flags |= SDP_DISCON;
387 	sdp_2msl_wait(ssk);
388 	ssk->qp_active = 0;
389 out:
390 	SDP_WUNLOCK(ssk);
391 }
392 
393 /*
394  * Received the final fin/ack.  Cancel the 2msl.
395  */
396 void
397 sdp_cancel_dreq_wait_timeout(struct sdp_sock *ssk)
398 {
399 	sdp_dbg(ssk->socket, "cancelling dreq wait timeout\n");
400 	ssk->flags &= ~SDP_DREQWAIT;
401 	sdp_2msl_wait(ssk);
402 }
403 
404 static int
405 sdp_init_sock(struct socket *sk)
406 {
407 	struct sdp_sock *ssk = sdp_sk(sk);
408 
409 	sdp_dbg(sk, "%s\n", __func__);
410 
411 	callout_init_rw(&ssk->keep2msl, &ssk->lock, CALLOUT_RETURNUNLOCKED);
412 	TASK_INIT(&ssk->shutdown_task, 0, sdp_shutdown_task, ssk);
413 #ifdef SDP_ZCOPY
414 	INIT_DELAYED_WORK(&ssk->srcavail_cancel_work, srcavail_cancel_timeout);
415 	ssk->zcopy_thresh = -1; /* use global sdp_zcopy_thresh */
416 	ssk->tx_ring.rdma_inflight = NULL;
417 #endif
418 	atomic_set(&ssk->mseq_ack, 0);
419 	sdp_rx_ring_init(ssk);
420 	ssk->tx_ring.buffer = NULL;
421 
422 	return 0;
423 }
424 
425 /*
426  * Allocate an sdp_sock for the socket and reserve socket buffer space.
427  */
428 static int
429 sdp_attach(struct socket *so, int proto, struct thread *td)
430 {
431 	struct sdp_sock *ssk;
432 	int error;
433 
434 	ssk = sdp_sk(so);
435 	KASSERT(ssk == NULL, ("sdp_attach: ssk already set on so %p", so));
436 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
437 		error = soreserve(so, sdp_sendspace, sdp_recvspace);
438 		if (error)
439 			return (error);
440 	}
441 	so->so_rcv.sb_flags |= SB_AUTOSIZE;
442 	so->so_snd.sb_flags |= SB_AUTOSIZE;
443 	ssk = uma_zalloc(sdp_zone, M_NOWAIT | M_ZERO);
444 	if (ssk == NULL)
445 		return (ENOBUFS);
446 	rw_init(&ssk->lock, "sdpsock");
447 	ssk->socket = so;
448 	ssk->cred = crhold(so->so_cred);
449 	so->so_pcb = (caddr_t)ssk;
450 	sdp_init_sock(so);
451 	ssk->flags = 0;
452 	ssk->qp_active = 0;
453 	ssk->state = TCPS_CLOSED;
454 	mbufq_init(&ssk->rxctlq, INT_MAX);
455 	SDP_LIST_WLOCK();
456 	LIST_INSERT_HEAD(&sdp_list, ssk, list);
457 	sdp_count++;
458 	SDP_LIST_WUNLOCK();
459 
460 	return (0);
461 }
462 
463 /*
464  * Detach SDP from the socket, potentially leaving it around for the
465  * timewait to expire.
466  */
467 static void
468 sdp_detach(struct socket *so)
469 {
470 	struct sdp_sock *ssk;
471 
472 	ssk = sdp_sk(so);
473 	SDP_WLOCK(ssk);
474 	KASSERT(ssk->socket != NULL, ("sdp_detach: socket is NULL"));
475 	ssk->socket->so_pcb = NULL;
476 	ssk->socket = NULL;
477 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DREQWAIT))
478 		SDP_WUNLOCK(ssk);
479 	else if (ssk->flags & SDP_DROPPED || ssk->state < TCPS_SYN_SENT)
480 		sdp_pcbfree(ssk);
481 	else
482 		panic("sdp_detach: Unexpected state, ssk %p.\n", ssk);
483 }
484 
485 /*
486  * Allocate a local address for the socket.
487  */
488 static int
489 sdp_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
490 {
491 	int error = 0;
492 	struct sdp_sock *ssk;
493 	struct sockaddr_in *sin;
494 
495 	sin = (struct sockaddr_in *)nam;
496 	if (sin->sin_family != AF_INET)
497 		return (EAFNOSUPPORT);
498 	if (nam->sa_len != sizeof(*sin))
499 		return (EINVAL);
500 	if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
501 		return (EAFNOSUPPORT);
502 
503 	ssk = sdp_sk(so);
504 	SDP_WLOCK(ssk);
505 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
506 		error = EINVAL;
507 		goto out;
508 	}
509 	error = sdp_pcbbind(ssk, nam, td->td_ucred);
510 out:
511 	SDP_WUNLOCK(ssk);
512 
513 	return (error);
514 }
515 
516 /*
517  * Prepare to accept connections.
518  */
519 static int
520 sdp_listen(struct socket *so, int backlog, struct thread *td)
521 {
522 	int error = 0;
523 	struct sdp_sock *ssk;
524 
525 	ssk = sdp_sk(so);
526 	SDP_WLOCK(ssk);
527 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
528 		error = EINVAL;
529 		goto out;
530 	}
531 	if (error == 0 && ssk->lport == 0)
532 		error = sdp_pcbbind(ssk, (struct sockaddr *)0, td->td_ucred);
533 	SOCK_LOCK(so);
534 	if (error == 0)
535 		error = solisten_proto_check(so);
536 	if (error == 0) {
537 		solisten_proto(so, backlog);
538 		ssk->state = TCPS_LISTEN;
539 	}
540 	SOCK_UNLOCK(so);
541 
542 out:
543 	SDP_WUNLOCK(ssk);
544 	if (error == 0)
545 		error = -rdma_listen(ssk->id, backlog);
546 	return (error);
547 }
548 
549 /*
550  * Initiate a SDP connection to nam.
551  */
552 static int
553 sdp_start_connect(struct sdp_sock *ssk, struct sockaddr *nam, struct thread *td)
554 {
555 	struct sockaddr_in src;
556 	struct socket *so;
557 	int error;
558 
559 	so = ssk->socket;
560 
561 	SDP_WLOCK_ASSERT(ssk);
562 	if (ssk->lport == 0) {
563 		error = sdp_pcbbind(ssk, (struct sockaddr *)0, td->td_ucred);
564 		if (error)
565 			return error;
566 	}
567 	src.sin_family = AF_INET;
568 	src.sin_len = sizeof(src);
569 	bzero(&src.sin_zero, sizeof(src.sin_zero));
570 	src.sin_port = ssk->lport;
571 	src.sin_addr.s_addr = ssk->laddr;
572 	soisconnecting(so);
573 	SDP_WUNLOCK(ssk);
574 	error = -rdma_resolve_addr(ssk->id, (struct sockaddr *)&src, nam,
575 	    SDP_RESOLVE_TIMEOUT);
576 	SDP_WLOCK(ssk);
577 	if (error == 0)
578 		ssk->state = TCPS_SYN_SENT;
579 
580 	return 0;
581 }
582 
583 /*
584  * Initiate SDP connection.
585  */
586 static int
587 sdp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
588 {
589 	int error = 0;
590 	struct sdp_sock *ssk;
591 	struct sockaddr_in *sin;
592 
593 	sin = (struct sockaddr_in *)nam;
594 	if (nam->sa_len != sizeof(*sin))
595 		return (EINVAL);
596 	if (sin->sin_family != AF_INET)
597 		return (EAFNOSUPPORT);
598 	if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
599 		return (EAFNOSUPPORT);
600 	if ((error = prison_remote_ip4(td->td_ucred, &sin->sin_addr)) != 0)
601 		return (error);
602 	ssk = sdp_sk(so);
603 	SDP_WLOCK(ssk);
604 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED))
605 		error = EINVAL;
606 	else
607 		error = sdp_start_connect(ssk, nam, td);
608 	SDP_WUNLOCK(ssk);
609 	return (error);
610 }
611 
612 /*
613  * Drop a SDP socket, reporting
614  * the specified error.  If connection is synchronized,
615  * then send a RST to peer.
616  */
617 static struct sdp_sock *
618 sdp_drop(struct sdp_sock *ssk, int errno)
619 {
620 	struct socket *so;
621 
622 	SDP_WLOCK_ASSERT(ssk);
623 	so = ssk->socket;
624 	if (TCPS_HAVERCVDSYN(ssk->state))
625 		sdp_output_reset(ssk);
626 	if (errno == ETIMEDOUT && ssk->softerror)
627 		errno = ssk->softerror;
628 	so->so_error = errno;
629 	return (sdp_closed(ssk));
630 }
631 
632 /*
633  * User issued close, and wish to trail through shutdown states:
634  * if never received SYN, just forget it.  If got a SYN from peer,
635  * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
636  * If already got a FIN from peer, then almost done; go to LAST_ACK
637  * state.  In all other cases, have already sent FIN to peer (e.g.
638  * after PRU_SHUTDOWN), and just have to play tedious game waiting
639  * for peer to send FIN or not respond to keep-alives, etc.
640  * We can let the user exit from the close as soon as the FIN is acked.
641  */
642 static void
643 sdp_usrclosed(struct sdp_sock *ssk)
644 {
645 
646 	SDP_WLOCK_ASSERT(ssk);
647 
648 	switch (ssk->state) {
649 	case TCPS_LISTEN:
650 		ssk->state = TCPS_CLOSED;
651 		SDP_WUNLOCK(ssk);
652 		sdp_destroy_cma(ssk);
653 		SDP_WLOCK(ssk);
654 		/* FALLTHROUGH */
655 	case TCPS_CLOSED:
656 		ssk = sdp_closed(ssk);
657 		/*
658 		 * sdp_closed() should never return NULL here as the socket is
659 		 * still open.
660 		 */
661 		KASSERT(ssk != NULL,
662 		    ("sdp_usrclosed: sdp_closed() returned NULL"));
663 		break;
664 
665 	case TCPS_SYN_SENT:
666 		/* FALLTHROUGH */
667 	case TCPS_SYN_RECEIVED:
668 		ssk->flags |= SDP_NEEDFIN;
669 		break;
670 
671 	case TCPS_ESTABLISHED:
672 		ssk->flags |= SDP_NEEDFIN;
673 		ssk->state = TCPS_FIN_WAIT_1;
674 		break;
675 
676 	case TCPS_CLOSE_WAIT:
677 		ssk->state = TCPS_LAST_ACK;
678 		break;
679 	}
680 	if (ssk->state >= TCPS_FIN_WAIT_2) {
681 		/* Prevent the connection hanging in FIN_WAIT_2 forever. */
682 		if (ssk->state == TCPS_FIN_WAIT_2)
683 			sdp_2msl_wait(ssk);
684 		else
685 			soisdisconnected(ssk->socket);
686 	}
687 }
688 
689 static void
690 sdp_output_disconnect(struct sdp_sock *ssk)
691 {
692 
693 	SDP_WLOCK_ASSERT(ssk);
694 	callout_reset(&ssk->keep2msl, SDP_FIN_WAIT_TIMEOUT,
695 	    sdp_dreq_timeout, ssk);
696 	ssk->flags |= SDP_NEEDFIN | SDP_DREQWAIT;
697 	sdp_post_sends(ssk, M_NOWAIT);
698 }
699 
700 /*
701  * Initiate or continue a disconnect.
702  * If embryonic state, just send reset (once).
703  * If in ``let data drain'' option and linger null, just drop.
704  * Otherwise (hard), mark socket disconnecting and drop
705  * current input data; switch states based on user close, and
706  * send segment to peer (with FIN).
707  */
708 static void
709 sdp_start_disconnect(struct sdp_sock *ssk)
710 {
711 	struct socket *so;
712 	int unread;
713 
714 	so = ssk->socket;
715 	SDP_WLOCK_ASSERT(ssk);
716 	sdp_stop_keepalive_timer(so);
717 	/*
718 	 * Neither sdp_closed() nor sdp_drop() should return NULL, as the
719 	 * socket is still open.
720 	 */
721 	if (ssk->state < TCPS_ESTABLISHED) {
722 		ssk = sdp_closed(ssk);
723 		KASSERT(ssk != NULL,
724 		    ("sdp_start_disconnect: sdp_close() returned NULL"));
725 	} else if ((so->so_options & SO_LINGER) && so->so_linger == 0) {
726 		ssk = sdp_drop(ssk, 0);
727 		KASSERT(ssk != NULL,
728 		    ("sdp_start_disconnect: sdp_drop() returned NULL"));
729 	} else {
730 		soisdisconnecting(so);
731 		unread = sbused(&so->so_rcv);
732 		sbflush(&so->so_rcv);
733 		sdp_usrclosed(ssk);
734 		if (!(ssk->flags & SDP_DROPPED)) {
735 			if (unread)
736 				sdp_output_reset(ssk);
737 			else
738 				sdp_output_disconnect(ssk);
739 		}
740 	}
741 }
742 
743 /*
744  * User initiated disconnect.
745  */
746 static int
747 sdp_disconnect(struct socket *so)
748 {
749 	struct sdp_sock *ssk;
750 	int error = 0;
751 
752 	ssk = sdp_sk(so);
753 	SDP_WLOCK(ssk);
754 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
755 		error = ECONNRESET;
756 		goto out;
757 	}
758 	sdp_start_disconnect(ssk);
759 out:
760 	SDP_WUNLOCK(ssk);
761 	return (error);
762 }
763 
764 /*
765  * Accept a connection.  Essentially all the work is done at higher levels;
766  * just return the address of the peer, storing through addr.
767  *
768  *
769  * XXX This is broken XXX
770  *
771  * The rationale for acquiring the sdp lock here is somewhat complicated,
772  * and is described in detail in the commit log entry for r175612.  Acquiring
773  * it delays an accept(2) racing with sonewconn(), which inserts the socket
774  * before the address/port fields are initialized.  A better fix would
775  * prevent the socket from being placed in the listen queue until all fields
776  * are fully initialized.
777  */
778 static int
779 sdp_accept(struct socket *so, struct sockaddr **nam)
780 {
781 	struct sdp_sock *ssk = NULL;
782 	struct in_addr addr;
783 	in_port_t port;
784 	int error;
785 
786 	if (so->so_state & SS_ISDISCONNECTED)
787 		return (ECONNABORTED);
788 
789 	port = 0;
790 	addr.s_addr = 0;
791 	error = 0;
792 	ssk = sdp_sk(so);
793 	SDP_WLOCK(ssk);
794 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
795 		error = ECONNABORTED;
796 		goto out;
797 	}
798 	port = ssk->fport;
799 	addr.s_addr = ssk->faddr;
800 out:
801 	SDP_WUNLOCK(ssk);
802 	if (error == 0)
803 		*nam = sdp_sockaddr(port, &addr);
804 	return error;
805 }
806 
807 /*
808  * Mark the connection as being incapable of further output.
809  */
810 static int
811 sdp_shutdown(struct socket *so)
812 {
813 	int error = 0;
814 	struct sdp_sock *ssk;
815 
816 	ssk = sdp_sk(so);
817 	SDP_WLOCK(ssk);
818 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
819 		error = ECONNRESET;
820 		goto out;
821 	}
822 	socantsendmore(so);
823 	sdp_usrclosed(ssk);
824 	if (!(ssk->flags & SDP_DROPPED))
825 		sdp_output_disconnect(ssk);
826 
827 out:
828 	SDP_WUNLOCK(ssk);
829 
830 	return (error);
831 }
832 
833 static void
834 sdp_append(struct sdp_sock *ssk, struct sockbuf *sb, struct mbuf *mb, int cnt)
835 {
836 	struct mbuf *n;
837 	int ncnt;
838 
839 	SOCKBUF_LOCK_ASSERT(sb);
840 	SBLASTRECORDCHK(sb);
841 	KASSERT(mb->m_flags & M_PKTHDR,
842 		("sdp_append: %p Missing packet header.\n", mb));
843 	n = sb->sb_lastrecord;
844 	/*
845 	 * If the queue is empty just set all pointers and proceed.
846 	 */
847 	if (n == NULL) {
848 		sb->sb_lastrecord = sb->sb_mb = sb->sb_sndptr = mb;
849 		for (; mb; mb = mb->m_next) {
850 	                sb->sb_mbtail = mb;
851 			sballoc(sb, mb);
852 		}
853 		return;
854 	}
855 	/*
856 	 * Count the number of mbufs in the current tail.
857 	 */
858 	for (ncnt = 0; n->m_next; n = n->m_next)
859 		ncnt++;
860 	n = sb->sb_lastrecord;
861 	/*
862 	 * If the two chains can fit in a single sdp packet and
863 	 * the last record has not been sent yet (WRITABLE) coalesce
864 	 * them.  The lastrecord remains the same but we must strip the
865 	 * packet header and then let sbcompress do the hard part.
866 	 */
867 	if (M_WRITABLE(n) && ncnt + cnt < SDP_MAX_SEND_SGES &&
868 	    n->m_pkthdr.len + mb->m_pkthdr.len - SDP_HEAD_SIZE <
869 	    ssk->xmit_size_goal) {
870 		m_adj(mb, SDP_HEAD_SIZE);
871 		n->m_pkthdr.len += mb->m_pkthdr.len;
872 		n->m_flags |= mb->m_flags & (M_PUSH | M_URG);
873 		m_demote(mb, 1, 0);
874 		sbcompress(sb, mb, sb->sb_mbtail);
875 		return;
876 	}
877 	/*
878 	 * Not compressible, just append to the end and adjust counters.
879 	 */
880 	sb->sb_lastrecord->m_flags |= M_PUSH;
881 	sb->sb_lastrecord->m_nextpkt = mb;
882 	sb->sb_lastrecord = mb;
883 	if (sb->sb_sndptr == NULL)
884 		sb->sb_sndptr = mb;
885 	for (; mb; mb = mb->m_next) {
886 		sb->sb_mbtail = mb;
887 		sballoc(sb, mb);
888 	}
889 }
890 
891 /*
892  * Do a send by putting data in output queue and updating urgent
893  * marker if URG set.  Possibly send more data.  Unlike the other
894  * pru_*() routines, the mbuf chains are our responsibility.  We
895  * must either enqueue them or free them.  The other pru_* routines
896  * generally are caller-frees.
897  *
898  * This comes from sendfile, normal sends will come from sdp_sosend().
899  */
900 static int
901 sdp_send(struct socket *so, int flags, struct mbuf *m,
902     struct sockaddr *nam, struct mbuf *control, struct thread *td)
903 {
904 	struct sdp_sock *ssk;
905 	struct mbuf *n;
906 	int error;
907 	int cnt;
908 
909 	if (nam != NULL) {
910 		if (nam->sa_family != AF_INET) {
911 			if (control)
912 				m_freem(control);
913 			m_freem(m);
914 			return (EAFNOSUPPORT);
915 		}
916 		if (nam->sa_len != sizeof(struct sockaddr_in)) {
917 			if (control)
918 				m_freem(control);
919 			m_freem(m);
920 			return (EINVAL);
921 		}
922 	}
923 
924 	error = 0;
925 	ssk = sdp_sk(so);
926 	KASSERT(m->m_flags & M_PKTHDR,
927 	    ("sdp_send: %p no packet header", m));
928 	M_PREPEND(m, SDP_HEAD_SIZE, M_WAITOK);
929 	mtod(m, struct sdp_bsdh *)->mid = SDP_MID_DATA;
930 	for (n = m, cnt = 0; n->m_next; n = n->m_next)
931 		cnt++;
932 	if (cnt > SDP_MAX_SEND_SGES) {
933 		n = m_collapse(m, M_WAITOK, SDP_MAX_SEND_SGES);
934 		if (n == NULL) {
935 			m_freem(m);
936 			return (EMSGSIZE);
937 		}
938 		m = n;
939 		for (cnt = 0; n->m_next; n = n->m_next)
940 			cnt++;
941 	}
942 	SDP_WLOCK(ssk);
943 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
944 		if (control)
945 			m_freem(control);
946 		if (m)
947 			m_freem(m);
948 		error = ECONNRESET;
949 		goto out;
950 	}
951 	if (control) {
952 		/* SDP doesn't support control messages. */
953 		if (control->m_len) {
954 			m_freem(control);
955 			if (m)
956 				m_freem(m);
957 			error = EINVAL;
958 			goto out;
959 		}
960 		m_freem(control);	/* empty control, just free it */
961 	}
962 	if (!(flags & PRUS_OOB)) {
963 		SOCKBUF_LOCK(&so->so_snd);
964 		sdp_append(ssk, &so->so_snd, m, cnt);
965 		SOCKBUF_UNLOCK(&so->so_snd);
966 		if (nam && ssk->state < TCPS_SYN_SENT) {
967 			/*
968 			 * Do implied connect if not yet connected.
969 			 */
970 			error = sdp_start_connect(ssk, nam, td);
971 			if (error)
972 				goto out;
973 		}
974 		if (flags & PRUS_EOF) {
975 			/*
976 			 * Close the send side of the connection after
977 			 * the data is sent.
978 			 */
979 			socantsendmore(so);
980 			sdp_usrclosed(ssk);
981 			if (!(ssk->flags & SDP_DROPPED))
982 				sdp_output_disconnect(ssk);
983 		} else if (!(ssk->flags & SDP_DROPPED) &&
984 		    !(flags & PRUS_MORETOCOME))
985 			sdp_post_sends(ssk, M_NOWAIT);
986 		SDP_WUNLOCK(ssk);
987 		return (0);
988 	} else {
989 		SOCKBUF_LOCK(&so->so_snd);
990 		if (sbspace(&so->so_snd) < -512) {
991 			SOCKBUF_UNLOCK(&so->so_snd);
992 			m_freem(m);
993 			error = ENOBUFS;
994 			goto out;
995 		}
996 		/*
997 		 * According to RFC961 (Assigned Protocols),
998 		 * the urgent pointer points to the last octet
999 		 * of urgent data.  We continue, however,
1000 		 * to consider it to indicate the first octet
1001 		 * of data past the urgent section.
1002 		 * Otherwise, snd_up should be one lower.
1003 		 */
1004 		m->m_flags |= M_URG | M_PUSH;
1005 		sdp_append(ssk, &so->so_snd, m, cnt);
1006 		SOCKBUF_UNLOCK(&so->so_snd);
1007 		if (nam && ssk->state < TCPS_SYN_SENT) {
1008 			/*
1009 			 * Do implied connect if not yet connected.
1010 			 */
1011 			error = sdp_start_connect(ssk, nam, td);
1012 			if (error)
1013 				goto out;
1014 		}
1015 		sdp_post_sends(ssk, M_NOWAIT);
1016 		SDP_WUNLOCK(ssk);
1017 		return (0);
1018 	}
1019 out:
1020 	SDP_WUNLOCK(ssk);
1021 	return (error);
1022 }
1023 
1024 /*
1025  * Send on a socket.  If send must go all at once and message is larger than
1026  * send buffering, then hard error.  Lock against other senders.  If must go
1027  * all at once and not enough room now, then inform user that this would
1028  * block and do nothing.  Otherwise, if nonblocking, send as much as
1029  * possible.  The data to be sent is described by "uio" if nonzero, otherwise
1030  * by the mbuf chain "top" (which must be null if uio is not).  Data provided
1031  * in mbuf chain must be small enough to send all at once.
1032  *
1033  * Returns nonzero on error, timeout or signal; callers must check for short
1034  * counts if EINTR/ERESTART are returned.  Data and control buffers are freed
1035  * on return.
1036  */
1037 static int
1038 sdp_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
1039     struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
1040 {
1041 	struct sdp_sock *ssk;
1042 	long space, resid;
1043 	int atomic;
1044 	int error;
1045 	int copy;
1046 
1047 	if (uio != NULL)
1048 		resid = uio->uio_resid;
1049 	else
1050 		resid = top->m_pkthdr.len;
1051 	atomic = top != NULL;
1052 	if (control != NULL) {
1053 		if (control->m_len) {
1054 			m_freem(control);
1055 			if (top)
1056 				m_freem(top);
1057 			return (EINVAL);
1058 		}
1059 		m_freem(control);
1060 		control = NULL;
1061 	}
1062 	/*
1063 	 * In theory resid should be unsigned.  However, space must be
1064 	 * signed, as it might be less than 0 if we over-committed, and we
1065 	 * must use a signed comparison of space and resid.  On the other
1066 	 * hand, a negative resid causes us to loop sending 0-length
1067 	 * segments to the protocol.
1068 	 *
1069 	 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
1070 	 * type sockets since that's an error.
1071 	 */
1072 	if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) {
1073 		error = EINVAL;
1074 		goto out;
1075 	}
1076 	if (td != NULL)
1077 		td->td_ru.ru_msgsnd++;
1078 
1079 	ssk = sdp_sk(so);
1080 	error = SOCK_IO_SEND_LOCK(so, SBLOCKWAIT(flags));
1081 	if (error)
1082 		goto out;
1083 
1084 restart:
1085 	do {
1086 		SOCKBUF_LOCK(&so->so_snd);
1087 		if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
1088 			SOCKBUF_UNLOCK(&so->so_snd);
1089 			error = EPIPE;
1090 			goto release;
1091 		}
1092 		if (so->so_error) {
1093 			error = so->so_error;
1094 			so->so_error = 0;
1095 			SOCKBUF_UNLOCK(&so->so_snd);
1096 			goto release;
1097 		}
1098 		if ((so->so_state & SS_ISCONNECTED) == 0 && addr == NULL) {
1099 			SOCKBUF_UNLOCK(&so->so_snd);
1100 			error = ENOTCONN;
1101 			goto release;
1102 		}
1103 		space = sbspace(&so->so_snd);
1104 		if (flags & MSG_OOB)
1105 			space += 1024;
1106 		if (atomic && resid > ssk->xmit_size_goal - SDP_HEAD_SIZE) {
1107 			SOCKBUF_UNLOCK(&so->so_snd);
1108 			error = EMSGSIZE;
1109 			goto release;
1110 		}
1111 		if (space < resid &&
1112 		    (atomic || space < so->so_snd.sb_lowat)) {
1113 			if ((so->so_state & SS_NBIO) ||
1114 			    (flags & (MSG_NBIO | MSG_DONTWAIT)) != 0) {
1115 				SOCKBUF_UNLOCK(&so->so_snd);
1116 				error = EWOULDBLOCK;
1117 				goto release;
1118 			}
1119 			error = sbwait(so, SO_SND);
1120 			SOCKBUF_UNLOCK(&so->so_snd);
1121 			if (error)
1122 				goto release;
1123 			goto restart;
1124 		}
1125 		SOCKBUF_UNLOCK(&so->so_snd);
1126 		do {
1127 			if (uio == NULL) {
1128 				resid = 0;
1129 				if (flags & MSG_EOR)
1130 					top->m_flags |= M_EOR;
1131 			} else {
1132 				/*
1133 				 * Copy the data from userland into a mbuf
1134 				 * chain.  If no data is to be copied in,
1135 				 * a single empty mbuf is returned.
1136 				 */
1137 				copy = min(space,
1138 				    ssk->xmit_size_goal - SDP_HEAD_SIZE);
1139 				top = m_uiotombuf(uio, M_WAITOK, copy,
1140 				    0, M_PKTHDR |
1141 				    ((flags & MSG_EOR) ? M_EOR : 0));
1142 				if (top == NULL) {
1143 					/* only possible error */
1144 					error = EFAULT;
1145 					goto release;
1146 				}
1147 				space -= resid - uio->uio_resid;
1148 				resid = uio->uio_resid;
1149 			}
1150 			/*
1151 			 * XXX all the SBS_CANTSENDMORE checks previously
1152 			 * done could be out of date after dropping the
1153 			 * socket lock.
1154 			 */
1155 			error = sdp_send(so, (flags & MSG_OOB) ? PRUS_OOB :
1156 			/*
1157 			 * Set EOF on the last send if the user specified
1158 			 * MSG_EOF.
1159 			 */
1160 			    ((flags & MSG_EOF) && (resid <= 0)) ? PRUS_EOF :
1161 			/* If there is more to send set PRUS_MORETOCOME. */
1162 			    (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0,
1163 			    top, addr, NULL, td);
1164 			top = NULL;
1165 			if (error)
1166 				goto release;
1167 		} while (resid && space > 0);
1168 	} while (resid);
1169 
1170 release:
1171 	SOCK_IO_SEND_UNLOCK(so);
1172 out:
1173 	if (top != NULL)
1174 		m_freem(top);
1175 	return (error);
1176 }
1177 
1178 /*
1179  * The part of soreceive() that implements reading non-inline out-of-band
1180  * data from a socket.  For more complete comments, see soreceive(), from
1181  * which this code originated.
1182  *
1183  * Note that soreceive_rcvoob(), unlike the remainder of soreceive(), is
1184  * unable to return an mbuf chain to the caller.
1185  */
1186 static int
1187 soreceive_rcvoob(struct socket *so, struct uio *uio, int flags)
1188 {
1189 	struct protosw *pr = so->so_proto;
1190 	struct mbuf *m;
1191 	int error;
1192 
1193 	KASSERT(flags & MSG_OOB, ("soreceive_rcvoob: (flags & MSG_OOB) == 0"));
1194 
1195 	m = m_get(M_WAITOK, MT_DATA);
1196 	error = pr->pr_rcvoob(so, m, flags & MSG_PEEK);
1197 	if (error)
1198 		goto bad;
1199 	do {
1200 		error = uiomove(mtod(m, void *),
1201 		    (int) min(uio->uio_resid, m->m_len), uio);
1202 		m = m_free(m);
1203 	} while (uio->uio_resid && error == 0 && m);
1204 bad:
1205 	if (m != NULL)
1206 		m_freem(m);
1207 	return (error);
1208 }
1209 
1210 /*
1211  * Optimized version of soreceive() for stream (TCP) sockets.
1212  */
1213 static int
1214 sdp_sorecv(struct socket *so, struct sockaddr **psa, struct uio *uio,
1215     struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
1216 {
1217 	int len = 0, error = 0, flags, oresid;
1218 	struct sockbuf *sb;
1219 	struct mbuf *m, *n = NULL;
1220 	struct sdp_sock *ssk;
1221 
1222 	/* We only do stream sockets. */
1223 	if (so->so_type != SOCK_STREAM)
1224 		return (EINVAL);
1225 	if (psa != NULL)
1226 		*psa = NULL;
1227 	if (controlp != NULL)
1228 		return (EINVAL);
1229 	if (flagsp != NULL)
1230 		flags = *flagsp &~ MSG_EOR;
1231 	else
1232 		flags = 0;
1233 	if (flags & MSG_OOB)
1234 		return (soreceive_rcvoob(so, uio, flags));
1235 	if (mp0 != NULL)
1236 		*mp0 = NULL;
1237 
1238 	sb = &so->so_rcv;
1239 	ssk = sdp_sk(so);
1240 
1241 	/* Prevent other readers from entering the socket. */
1242 	error = SOCK_IO_RECV_LOCK(so, SBLOCKWAIT(flags));
1243 	if (error)
1244 		return (error);
1245 	SOCKBUF_LOCK(sb);
1246 
1247 	/* Easy one, no space to copyout anything. */
1248 	if (uio->uio_resid == 0) {
1249 		error = EINVAL;
1250 		goto out;
1251 	}
1252 	oresid = uio->uio_resid;
1253 
1254 	/* We will never ever get anything unless we are connected. */
1255 	if (!(so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED))) {
1256 		/* When disconnecting there may be still some data left. */
1257 		if (sbavail(sb))
1258 			goto deliver;
1259 		if (!(so->so_state & SS_ISDISCONNECTED))
1260 			error = ENOTCONN;
1261 		goto out;
1262 	}
1263 
1264 	/* Socket buffer is empty and we shall not block. */
1265 	if (sbavail(sb) == 0 &&
1266 	    ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)))) {
1267 		error = EAGAIN;
1268 		goto out;
1269 	}
1270 
1271 restart:
1272 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
1273 
1274 	/* Abort if socket has reported problems. */
1275 	if (so->so_error) {
1276 		if (sbavail(sb))
1277 			goto deliver;
1278 		if (oresid > uio->uio_resid)
1279 			goto out;
1280 		error = so->so_error;
1281 		if (!(flags & MSG_PEEK))
1282 			so->so_error = 0;
1283 		goto out;
1284 	}
1285 
1286 	/* Door is closed.  Deliver what is left, if any. */
1287 	if (sb->sb_state & SBS_CANTRCVMORE) {
1288 		if (sbavail(sb))
1289 			goto deliver;
1290 		else
1291 			goto out;
1292 	}
1293 
1294 	/* Socket buffer got some data that we shall deliver now. */
1295 	if (sbavail(sb) && !(flags & MSG_WAITALL) &&
1296 	    ((so->so_state & SS_NBIO) ||
1297 	     (flags & (MSG_DONTWAIT|MSG_NBIO)) ||
1298 	     sbavail(sb) >= sb->sb_lowat ||
1299 	     sbavail(sb) >= uio->uio_resid ||
1300 	     sbavail(sb) >= sb->sb_hiwat) ) {
1301 		goto deliver;
1302 	}
1303 
1304 	/* On MSG_WAITALL we must wait until all data or error arrives. */
1305 	if ((flags & MSG_WAITALL) &&
1306 	    (sbavail(sb) >= uio->uio_resid || sbavail(sb) >= sb->sb_lowat))
1307 		goto deliver;
1308 
1309 	/*
1310 	 * Wait and block until (more) data comes in.
1311 	 * NB: Drops the sockbuf lock during wait.
1312 	 */
1313 	error = sbwait(so, SO_RCV);
1314 	if (error)
1315 		goto out;
1316 	goto restart;
1317 
1318 deliver:
1319 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
1320 	KASSERT(sbavail(sb), ("%s: sockbuf empty", __func__));
1321 	KASSERT(sb->sb_mb != NULL, ("%s: sb_mb == NULL", __func__));
1322 
1323 	/* Statistics. */
1324 	if (uio->uio_td)
1325 		uio->uio_td->td_ru.ru_msgrcv++;
1326 
1327 	/* Fill uio until full or current end of socket buffer is reached. */
1328 	len = min(uio->uio_resid, sbavail(sb));
1329 	if (mp0 != NULL) {
1330 		/* Dequeue as many mbufs as possible. */
1331 		if (!(flags & MSG_PEEK) && len >= sb->sb_mb->m_len) {
1332 			for (*mp0 = m = sb->sb_mb;
1333 			     m != NULL && m->m_len <= len;
1334 			     m = m->m_next) {
1335 				len -= m->m_len;
1336 				uio->uio_resid -= m->m_len;
1337 				sbfree(sb, m);
1338 				n = m;
1339 			}
1340 			sb->sb_mb = m;
1341 			if (sb->sb_mb == NULL)
1342 				SB_EMPTY_FIXUP(sb);
1343 			n->m_next = NULL;
1344 		}
1345 		/* Copy the remainder. */
1346 		if (len > 0) {
1347 			KASSERT(sb->sb_mb != NULL,
1348 			    ("%s: len > 0 && sb->sb_mb empty", __func__));
1349 
1350 			m = m_copym(sb->sb_mb, 0, len, M_NOWAIT);
1351 			if (m == NULL)
1352 				len = 0;	/* Don't flush data from sockbuf. */
1353 			else
1354 				uio->uio_resid -= m->m_len;
1355 			if (*mp0 != NULL)
1356 				n->m_next = m;
1357 			else
1358 				*mp0 = m;
1359 			if (*mp0 == NULL) {
1360 				error = ENOBUFS;
1361 				goto out;
1362 			}
1363 		}
1364 	} else {
1365 		/* NB: Must unlock socket buffer as uiomove may sleep. */
1366 		SOCKBUF_UNLOCK(sb);
1367 		error = m_mbuftouio(uio, sb->sb_mb, len);
1368 		SOCKBUF_LOCK(sb);
1369 		if (error)
1370 			goto out;
1371 	}
1372 	SBLASTRECORDCHK(sb);
1373 	SBLASTMBUFCHK(sb);
1374 
1375 	/*
1376 	 * Remove the delivered data from the socket buffer unless we
1377 	 * were only peeking.
1378 	 */
1379 	if (!(flags & MSG_PEEK)) {
1380 		if (len > 0)
1381 			sbdrop_locked(sb, len);
1382 
1383 		/* Notify protocol that we drained some data. */
1384 		SOCKBUF_UNLOCK(sb);
1385 		SDP_WLOCK(ssk);
1386 		sdp_do_posts(ssk);
1387 		SDP_WUNLOCK(ssk);
1388 		SOCKBUF_LOCK(sb);
1389 	}
1390 
1391 	/*
1392 	 * For MSG_WAITALL we may have to loop again and wait for
1393 	 * more data to come in.
1394 	 */
1395 	if ((flags & MSG_WAITALL) && uio->uio_resid > 0)
1396 		goto restart;
1397 out:
1398 	SBLASTRECORDCHK(sb);
1399 	SBLASTMBUFCHK(sb);
1400 	SOCKBUF_UNLOCK(sb);
1401 	SOCK_IO_RECV_UNLOCK(so);
1402 	return (error);
1403 }
1404 
1405 /*
1406  * Abort is used to teardown a connection typically while sitting in
1407  * the accept queue.
1408  */
1409 void
1410 sdp_abort(struct socket *so)
1411 {
1412 	struct sdp_sock *ssk;
1413 
1414 	ssk = sdp_sk(so);
1415 	SDP_WLOCK(ssk);
1416 	/*
1417 	 * If we have not yet dropped, do it now.
1418 	 */
1419 	if (!(ssk->flags & SDP_TIMEWAIT) &&
1420 	    !(ssk->flags & SDP_DROPPED))
1421 		sdp_drop(ssk, ECONNABORTED);
1422 	KASSERT(ssk->flags & SDP_DROPPED, ("sdp_abort: %p not dropped 0x%X",
1423 	    ssk, ssk->flags));
1424 	SDP_WUNLOCK(ssk);
1425 }
1426 
1427 /*
1428  * Close a SDP socket and initiate a friendly disconnect.
1429  */
1430 static void
1431 sdp_close(struct socket *so)
1432 {
1433 	struct sdp_sock *ssk;
1434 
1435 	ssk = sdp_sk(so);
1436 	SDP_WLOCK(ssk);
1437 	/*
1438 	 * If we have not yet dropped, do it now.
1439 	 */
1440 	if (!(ssk->flags & SDP_TIMEWAIT) &&
1441 	    !(ssk->flags & SDP_DROPPED))
1442 		sdp_start_disconnect(ssk);
1443 
1444 	/*
1445 	 * If we've still not dropped let the socket layer know we're
1446 	 * holding on to the socket and pcb for a while.
1447 	 */
1448 	if (!(ssk->flags & SDP_DROPPED)) {
1449 		ssk->flags |= SDP_SOCKREF;
1450 		soref(so);
1451 	}
1452 	SDP_WUNLOCK(ssk);
1453 }
1454 
1455 /*
1456  * User requests out-of-band data.
1457  */
1458 static int
1459 sdp_rcvoob(struct socket *so, struct mbuf *m, int flags)
1460 {
1461 	int error = 0;
1462 	struct sdp_sock *ssk;
1463 
1464 	ssk = sdp_sk(so);
1465 	SDP_WLOCK(ssk);
1466 	if (!rx_ring_trylock(&ssk->rx_ring)) {
1467 		SDP_WUNLOCK(ssk);
1468 		return (ECONNRESET);
1469 	}
1470 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
1471 		error = ECONNRESET;
1472 		goto out;
1473 	}
1474 	if ((so->so_oobmark == 0 &&
1475 	     (so->so_rcv.sb_state & SBS_RCVATMARK) == 0) ||
1476 	    so->so_options & SO_OOBINLINE ||
1477 	    ssk->oobflags & SDP_HADOOB) {
1478 		error = EINVAL;
1479 		goto out;
1480 	}
1481 	if ((ssk->oobflags & SDP_HAVEOOB) == 0) {
1482 		error = EWOULDBLOCK;
1483 		goto out;
1484 	}
1485 	m->m_len = 1;
1486 	*mtod(m, caddr_t) = ssk->iobc;
1487 	if ((flags & MSG_PEEK) == 0)
1488 		ssk->oobflags ^= (SDP_HAVEOOB | SDP_HADOOB);
1489 out:
1490 	rx_ring_unlock(&ssk->rx_ring);
1491 	SDP_WUNLOCK(ssk);
1492 	return (error);
1493 }
1494 
1495 void
1496 sdp_urg(struct sdp_sock *ssk, struct mbuf *mb)
1497 {
1498 	struct mbuf *m;
1499 	struct socket *so;
1500 
1501 	so = ssk->socket;
1502 	if (so == NULL)
1503 		return;
1504 
1505 	so->so_oobmark = sbused(&so->so_rcv) + mb->m_pkthdr.len - 1;
1506 	sohasoutofband(so);
1507 	ssk->oobflags &= ~(SDP_HAVEOOB | SDP_HADOOB);
1508 	if (!(so->so_options & SO_OOBINLINE)) {
1509 		for (m = mb; m->m_next != NULL; m = m->m_next);
1510 		ssk->iobc = *(mtod(m, char *) + m->m_len - 1);
1511 		ssk->oobflags |= SDP_HAVEOOB;
1512 		m->m_len--;
1513 		mb->m_pkthdr.len--;
1514 	}
1515 }
1516 
1517 /*
1518  * Notify a sdp socket of an asynchronous error.
1519  *
1520  * Do not wake up user since there currently is no mechanism for
1521  * reporting soft errors (yet - a kqueue filter may be added).
1522  */
1523 struct sdp_sock *
1524 sdp_notify(struct sdp_sock *ssk, int error)
1525 {
1526 
1527 	SDP_WLOCK_ASSERT(ssk);
1528 
1529 	if ((ssk->flags & SDP_TIMEWAIT) ||
1530 	    (ssk->flags & SDP_DROPPED))
1531 		return (ssk);
1532 
1533 	/*
1534 	 * Ignore some errors if we are hooked up.
1535 	 */
1536 	if (ssk->state == TCPS_ESTABLISHED &&
1537 	    (error == EHOSTUNREACH || error == ENETUNREACH ||
1538 	     error == EHOSTDOWN))
1539 		return (ssk);
1540 	ssk->softerror = error;
1541 	return sdp_drop(ssk, error);
1542 }
1543 
1544 static void
1545 sdp_keepalive_timeout(void *data)
1546 {
1547 	struct sdp_sock *ssk;
1548 
1549 	ssk = data;
1550 	/* Callout canceled. */
1551         if (!callout_active(&ssk->keep2msl))
1552                 return;
1553 	/* Callout rescheduled as a different kind of timer. */
1554 	if (callout_pending(&ssk->keep2msl))
1555 		goto out;
1556         callout_deactivate(&ssk->keep2msl);
1557 	if (ssk->flags & SDP_DROPPED ||
1558 	    (ssk->socket->so_options & SO_KEEPALIVE) == 0)
1559 		goto out;
1560 	sdp_post_keepalive(ssk);
1561 	callout_reset(&ssk->keep2msl, SDP_KEEPALIVE_TIME,
1562 	    sdp_keepalive_timeout, ssk);
1563 out:
1564 	SDP_WUNLOCK(ssk);
1565 }
1566 
1567 
1568 void
1569 sdp_start_keepalive_timer(struct socket *so)
1570 {
1571 	struct sdp_sock *ssk;
1572 
1573 	ssk = sdp_sk(so);
1574 	if (!callout_pending(&ssk->keep2msl))
1575                 callout_reset(&ssk->keep2msl, SDP_KEEPALIVE_TIME,
1576                     sdp_keepalive_timeout, ssk);
1577 }
1578 
1579 static void
1580 sdp_stop_keepalive_timer(struct socket *so)
1581 {
1582 	struct sdp_sock *ssk;
1583 
1584 	ssk = sdp_sk(so);
1585 	callout_stop(&ssk->keep2msl);
1586 }
1587 
1588 /*
1589  * sdp_ctloutput() must drop the inpcb lock before performing copyin on
1590  * socket option arguments.  When it re-acquires the lock after the copy, it
1591  * has to revalidate that the connection is still valid for the socket
1592  * option.
1593  */
1594 #define SDP_WLOCK_RECHECK(inp) do {					\
1595 	SDP_WLOCK(ssk);							\
1596 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {		\
1597 		SDP_WUNLOCK(ssk);					\
1598 		return (ECONNRESET);					\
1599 	}								\
1600 } while(0)
1601 
1602 static int
1603 sdp_ctloutput(struct socket *so, struct sockopt *sopt)
1604 {
1605 	int	error, opt, optval;
1606 	struct sdp_sock *ssk;
1607 
1608 	error = 0;
1609 	ssk = sdp_sk(so);
1610 	if (sopt->sopt_level == SOL_SOCKET && sopt->sopt_name == SO_KEEPALIVE) {
1611 		SDP_WLOCK(ssk);
1612 		if (so->so_options & SO_KEEPALIVE)
1613 			sdp_start_keepalive_timer(so);
1614 		else
1615 			sdp_stop_keepalive_timer(so);
1616 		SDP_WUNLOCK(ssk);
1617 	}
1618 	if (sopt->sopt_level != IPPROTO_TCP)
1619 		return (error);
1620 
1621 	SDP_WLOCK(ssk);
1622 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
1623 		SDP_WUNLOCK(ssk);
1624 		return (ECONNRESET);
1625 	}
1626 
1627 	switch (sopt->sopt_dir) {
1628 	case SOPT_SET:
1629 		switch (sopt->sopt_name) {
1630 		case TCP_NODELAY:
1631 			SDP_WUNLOCK(ssk);
1632 			error = sooptcopyin(sopt, &optval, sizeof optval,
1633 			    sizeof optval);
1634 			if (error)
1635 				return (error);
1636 
1637 			SDP_WLOCK_RECHECK(ssk);
1638 			opt = SDP_NODELAY;
1639 			if (optval)
1640 				ssk->flags |= opt;
1641 			else
1642 				ssk->flags &= ~opt;
1643 			sdp_do_posts(ssk);
1644 			SDP_WUNLOCK(ssk);
1645 			break;
1646 
1647 		default:
1648 			SDP_WUNLOCK(ssk);
1649 			error = ENOPROTOOPT;
1650 			break;
1651 		}
1652 		break;
1653 
1654 	case SOPT_GET:
1655 		switch (sopt->sopt_name) {
1656 		case TCP_NODELAY:
1657 			optval = ssk->flags & SDP_NODELAY;
1658 			SDP_WUNLOCK(ssk);
1659 			error = sooptcopyout(sopt, &optval, sizeof optval);
1660 			break;
1661 		default:
1662 			SDP_WUNLOCK(ssk);
1663 			error = ENOPROTOOPT;
1664 			break;
1665 		}
1666 		break;
1667 	}
1668 	return (error);
1669 }
1670 #undef SDP_WLOCK_RECHECK
1671 
1672 int sdp_mod_count = 0;
1673 int sdp_mod_usec = 0;
1674 
1675 void
1676 sdp_set_default_moderation(struct sdp_sock *ssk)
1677 {
1678 	if (sdp_mod_count <= 0 || sdp_mod_usec <= 0)
1679 		return;
1680 	ib_modify_cq(ssk->rx_ring.cq, sdp_mod_count, sdp_mod_usec);
1681 }
1682 
1683 static void
1684 sdp_dev_add(struct ib_device *device)
1685 {
1686 	struct ib_fmr_pool_param param;
1687 	struct sdp_device *sdp_dev;
1688 
1689 	sdp_dev = malloc(sizeof(*sdp_dev), M_SDP, M_WAITOK | M_ZERO);
1690 	sdp_dev->pd = ib_alloc_pd(device, 0);
1691 	if (IS_ERR(sdp_dev->pd))
1692 		goto out_pd;
1693 	memset(&param, 0, sizeof param);
1694 	param.max_pages_per_fmr = SDP_FMR_SIZE;
1695 	param.page_shift = PAGE_SHIFT;
1696 	param.access = (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ);
1697 	param.pool_size = SDP_FMR_POOL_SIZE;
1698 	param.dirty_watermark = SDP_FMR_DIRTY_SIZE;
1699 	param.cache = 1;
1700 	sdp_dev->fmr_pool = ib_create_fmr_pool(sdp_dev->pd, &param);
1701 	if (IS_ERR(sdp_dev->fmr_pool))
1702 		goto out_fmr;
1703 	ib_set_client_data(device, &sdp_client, sdp_dev);
1704 	return;
1705 
1706 out_fmr:
1707 	ib_dealloc_pd(sdp_dev->pd);
1708 out_pd:
1709 	free(sdp_dev, M_SDP);
1710 }
1711 
1712 static void
1713 sdp_dev_rem(struct ib_device *device, void *client_data)
1714 {
1715 	struct sdp_device *sdp_dev;
1716 	struct sdp_sock *ssk;
1717 
1718 	SDP_LIST_WLOCK();
1719 	LIST_FOREACH(ssk, &sdp_list, list) {
1720 		if (ssk->ib_device != device)
1721 			continue;
1722 		SDP_WLOCK(ssk);
1723 		if ((ssk->flags & SDP_DESTROY) == 0)
1724 			ssk = sdp_notify(ssk, ECONNRESET);
1725 		if (ssk)
1726 			SDP_WUNLOCK(ssk);
1727 	}
1728 	SDP_LIST_WUNLOCK();
1729 	/*
1730 	 * XXX Do I need to wait between these two?
1731 	 */
1732 	sdp_dev = ib_get_client_data(device, &sdp_client);
1733 	if (!sdp_dev)
1734 		return;
1735 	ib_flush_fmr_pool(sdp_dev->fmr_pool);
1736 	ib_destroy_fmr_pool(sdp_dev->fmr_pool);
1737 	ib_dealloc_pd(sdp_dev->pd);
1738 	free(sdp_dev, M_SDP);
1739 }
1740 
1741 struct ib_client sdp_client =
1742     { .name = "sdp", .add = sdp_dev_add, .remove = sdp_dev_rem };
1743 
1744 
1745 static int
1746 sdp_pcblist(SYSCTL_HANDLER_ARGS)
1747 {
1748 	int error, n, i;
1749 	struct sdp_sock *ssk;
1750 	struct xinpgen xig;
1751 
1752 	/*
1753 	 * The process of preparing the TCB list is too time-consuming and
1754 	 * resource-intensive to repeat twice on every request.
1755 	 */
1756 	if (req->oldptr == NULL) {
1757 		n = sdp_count;
1758 		n += imax(n / 8, 10);
1759 		req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xtcpcb);
1760 		return (0);
1761 	}
1762 
1763 	if (req->newptr != NULL)
1764 		return (EPERM);
1765 
1766 	/*
1767 	 * OK, now we're committed to doing something.
1768 	 */
1769 	SDP_LIST_RLOCK();
1770 	n = sdp_count;
1771 	SDP_LIST_RUNLOCK();
1772 
1773 	error = sysctl_wire_old_buffer(req, 2 * (sizeof xig)
1774 		+ n * sizeof(struct xtcpcb));
1775 	if (error != 0)
1776 		return (error);
1777 
1778 	bzero(&xig, sizeof(xig));
1779 	xig.xig_len = sizeof xig;
1780 	xig.xig_count = n;
1781 	xig.xig_gen = 0;
1782 	xig.xig_sogen = so_gencnt;
1783 	error = SYSCTL_OUT(req, &xig, sizeof xig);
1784 	if (error)
1785 		return (error);
1786 
1787 	SDP_LIST_RLOCK();
1788 	for (ssk = LIST_FIRST(&sdp_list), i = 0;
1789 	    ssk != NULL && i < n; ssk = LIST_NEXT(ssk, list)) {
1790 		struct xtcpcb xt;
1791 
1792 		SDP_RLOCK(ssk);
1793 		if (ssk->flags & SDP_TIMEWAIT) {
1794 			if (ssk->cred != NULL)
1795 				error = cr_cansee(req->td->td_ucred,
1796 				    ssk->cred);
1797 			else
1798 				error = EINVAL;	/* Skip this inp. */
1799 		} else if (ssk->socket)
1800 			error = cr_canseesocket(req->td->td_ucred,
1801 			    ssk->socket);
1802 		else
1803 			error = EINVAL;
1804 		if (error) {
1805 			error = 0;
1806 			goto next;
1807 		}
1808 
1809 		bzero(&xt, sizeof(xt));
1810 		xt.xt_len = sizeof xt;
1811 		xt.xt_inp.inp_gencnt = 0;
1812 		xt.xt_inp.inp_vflag = INP_IPV4;
1813 		memcpy(&xt.xt_inp.inp_laddr, &ssk->laddr, sizeof(ssk->laddr));
1814 		xt.xt_inp.inp_lport = ssk->lport;
1815 		memcpy(&xt.xt_inp.inp_faddr, &ssk->faddr, sizeof(ssk->faddr));
1816 		xt.xt_inp.inp_fport = ssk->fport;
1817 		xt.t_state = ssk->state;
1818 		if (ssk->socket != NULL)
1819 			sotoxsocket(ssk->socket, &xt.xt_inp.xi_socket);
1820 		xt.xt_inp.xi_socket.xso_protocol = IPPROTO_TCP;
1821 		SDP_RUNLOCK(ssk);
1822 		error = SYSCTL_OUT(req, &xt, sizeof xt);
1823 		if (error)
1824 			break;
1825 		i++;
1826 		continue;
1827 next:
1828 		SDP_RUNLOCK(ssk);
1829 	}
1830 	if (!error) {
1831 		/*
1832 		 * Give the user an updated idea of our state.
1833 		 * If the generation differs from what we told
1834 		 * her before, she knows that something happened
1835 		 * while we were processing this request, and it
1836 		 * might be necessary to retry.
1837 		 */
1838 		xig.xig_gen = 0;
1839 		xig.xig_sogen = so_gencnt;
1840 		xig.xig_count = sdp_count;
1841 		error = SYSCTL_OUT(req, &xig, sizeof xig);
1842 	}
1843 	SDP_LIST_RUNLOCK();
1844 	return (error);
1845 }
1846 
1847 SYSCTL_NODE(_net_inet, -1, sdp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
1848     "SDP");
1849 
1850 SYSCTL_PROC(_net_inet_sdp, TCPCTL_PCBLIST, pcblist,
1851     CTLFLAG_RD | CTLTYPE_STRUCT | CTLFLAG_MPSAFE,
1852     0, 0, sdp_pcblist, "S,xtcpcb",
1853     "List of active SDP connections");
1854 
1855 static void
1856 sdp_zone_change(void *tag)
1857 {
1858 
1859 	uma_zone_set_max(sdp_zone, maxsockets);
1860 }
1861 
1862 static void
1863 sdp_init(void *arg __unused)
1864 {
1865 
1866 	LIST_INIT(&sdp_list);
1867 	sdp_zone = uma_zcreate("sdp_sock", sizeof(struct sdp_sock),
1868 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1869 	uma_zone_set_max(sdp_zone, maxsockets);
1870 	EVENTHANDLER_REGISTER(maxsockets_change, sdp_zone_change, NULL,
1871 		EVENTHANDLER_PRI_ANY);
1872 	rx_comp_wq = create_singlethread_workqueue("rx_comp_wq");
1873 	ib_register_client(&sdp_client);
1874 }
1875 SYSINIT(sdp_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_SECOND, sdp_init, NULL);
1876 
1877 #define	SDP_PROTOSW							\
1878 	.pr_type =		SOCK_STREAM,				\
1879 	.pr_flags =		PR_CONNREQUIRED|PR_IMPLOPCL|PR_WANTRCVD,\
1880 	.pr_ctloutput =		sdp_ctloutput,				\
1881 	.pr_abort =		sdp_abort,				\
1882 	.pr_accept =		sdp_accept,				\
1883 	.pr_attach =		sdp_attach,				\
1884 	.pr_bind =		sdp_bind,				\
1885 	.pr_connect =		sdp_connect,				\
1886 	.pr_detach =		sdp_detach,				\
1887 	.pr_disconnect =	sdp_disconnect,				\
1888 	.pr_listen =		sdp_listen,				\
1889 	.pr_peeraddr =		sdp_getpeeraddr,			\
1890 	.pr_rcvoob =		sdp_rcvoob,				\
1891 	.pr_send =		sdp_send,				\
1892 	.pr_sosend =		sdp_sosend,				\
1893 	.pr_soreceive =		sdp_sorecv,				\
1894 	.pr_shutdown =		sdp_shutdown,				\
1895 	.pr_sockaddr =		sdp_getsockaddr,			\
1896 	.pr_close =		sdp_close
1897 
1898 
1899 static struct protosw sdp_ip_protosw = {
1900 	.pr_protocol =		IPPROTO_IP,
1901 	SDP_PROTOSW
1902 };
1903 static struct protosw sdp_tcp_protosw = {
1904 	.pr_protocol =		IPPROTO_TCP,
1905 	SDP_PROTOSW
1906 };
1907 
1908 static struct domain sdpdomain = {
1909 	.dom_family =		AF_INET_SDP,
1910 	.dom_name =		"SDP",
1911 	.dom_nprotosw =		2,
1912 	.dom_protosw = {
1913 		&sdp_ip_protosw,
1914 		&sdp_tcp_protosw,
1915 	},
1916 };
1917 
1918 DOMAIN_SET(sdp);
1919 
1920 int sdp_debug_level = 1;
1921 int sdp_data_debug_level = 0;
1922