xref: /freebsd/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c (revision d5b0e70f7e04d971691517ce1304d86a1e367e2e)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
5  *      The Regents of the University of California.  All rights reserved.
6  * Copyright (c) 2004 The FreeBSD Foundation.  All rights reserved.
7  * Copyright (c) 2004-2008 Robert N. M. Watson.  All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  * Excerpts taken from tcp_subr.c, tcp_usrreq.c, uipc_socket.c
34  */
35 
36 /*
37  *
38  * Copyright (c) 2010 Isilon Systems, Inc.
39  * Copyright (c) 2010 iX Systems, Inc.
40  * Copyright (c) 2010 Panasas, Inc.
41  * All rights reserved.
42  *
43  * Redistribution and use in source and binary forms, with or without
44  * modification, are permitted provided that the following conditions
45  * are met:
46  * 1. Redistributions of source code must retain the above copyright
47  *    notice unmodified, this list of conditions, and the following
48  *    disclaimer.
49  * 2. Redistributions in binary form must reproduce the above copyright
50  *    notice, this list of conditions and the following disclaimer in the
51  *    documentation and/or other materials provided with the distribution.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
54  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
55  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
56  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
57  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
58  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
59  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
60  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
61  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
62  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
63  *
64  */
65 #include <sys/cdefs.h>
66 __FBSDID("$FreeBSD$");
67 
68 #include <sys/param.h>
69 #include <sys/eventhandler.h>
70 #include <sys/kernel.h>
71 #include <sys/malloc.h>
72 
73 #include "sdp.h"
74 
75 #include <net/if.h>
76 #include <net/route.h>
77 #include <net/vnet.h>
78 #include <sys/sysctl.h>
79 
80 uma_zone_t	sdp_zone;
81 struct rwlock	sdp_lock;
82 LIST_HEAD(, sdp_sock) sdp_list;
83 
84 struct workqueue_struct *rx_comp_wq;
85 
86 RW_SYSINIT(sdplockinit, &sdp_lock, "SDP lock");
87 #define	SDP_LIST_WLOCK()	rw_wlock(&sdp_lock)
88 #define	SDP_LIST_RLOCK()	rw_rlock(&sdp_lock)
89 #define	SDP_LIST_WUNLOCK()	rw_wunlock(&sdp_lock)
90 #define	SDP_LIST_RUNLOCK()	rw_runlock(&sdp_lock)
91 #define	SDP_LIST_WLOCK_ASSERT()	rw_assert(&sdp_lock, RW_WLOCKED)
92 #define	SDP_LIST_RLOCK_ASSERT()	rw_assert(&sdp_lock, RW_RLOCKED)
93 #define	SDP_LIST_LOCK_ASSERT()	rw_assert(&sdp_lock, RW_LOCKED)
94 
95 MALLOC_DEFINE(M_SDP, "sdp", "Sockets Direct Protocol");
96 
97 static void sdp_stop_keepalive_timer(struct socket *so);
98 
99 /*
100  * SDP protocol interface to socket abstraction.
101  */
102 /*
103  * sdp_sendspace and sdp_recvspace are the default send and receive window
104  * sizes, respectively.
105  */
106 u_long	sdp_sendspace = 1024*32;
107 u_long	sdp_recvspace = 1024*64;
108 
109 static int sdp_count;
110 
111 /*
112  * Disable async. CMA events for sockets which are being torn down.
113  */
114 static void
115 sdp_destroy_cma(struct sdp_sock *ssk)
116 {
117 
118 	if (ssk->id == NULL)
119 		return;
120 	rdma_destroy_id(ssk->id);
121 	ssk->id = NULL;
122 }
123 
124 static int
125 sdp_pcbbind(struct sdp_sock *ssk, struct sockaddr *nam, struct ucred *cred)
126 {
127 	struct sockaddr_in *sin;
128 	struct sockaddr_in null;
129 	int error;
130 
131 	SDP_WLOCK_ASSERT(ssk);
132 
133 	if (ssk->lport != 0 || ssk->laddr != INADDR_ANY)
134 		return (EINVAL);
135 	/* rdma_bind_addr handles bind races.  */
136 	SDP_WUNLOCK(ssk);
137 	if (ssk->id == NULL)
138 		ssk->id = rdma_create_id(&init_net, sdp_cma_handler, ssk, RDMA_PS_SDP, IB_QPT_RC);
139 	if (ssk->id == NULL) {
140 		SDP_WLOCK(ssk);
141 		return (ENOMEM);
142 	}
143 	if (nam == NULL) {
144 		null.sin_family = AF_INET;
145 		null.sin_len = sizeof(null);
146 		null.sin_addr.s_addr = INADDR_ANY;
147 		null.sin_port = 0;
148 		bzero(&null.sin_zero, sizeof(null.sin_zero));
149 		nam = (struct sockaddr *)&null;
150 	}
151 	error = -rdma_bind_addr(ssk->id, nam);
152 	SDP_WLOCK(ssk);
153 	if (error == 0) {
154 		sin = (struct sockaddr_in *)&ssk->id->route.addr.src_addr;
155 		ssk->laddr = sin->sin_addr.s_addr;
156 		ssk->lport = sin->sin_port;
157 	} else
158 		sdp_destroy_cma(ssk);
159 	return (error);
160 }
161 
162 static void
163 sdp_pcbfree(struct sdp_sock *ssk)
164 {
165 
166 	KASSERT(ssk->socket == NULL, ("ssk %p socket still attached", ssk));
167 	KASSERT((ssk->flags & SDP_DESTROY) == 0,
168 	    ("ssk %p already destroyed", ssk));
169 
170 	sdp_dbg(ssk->socket, "Freeing pcb");
171 	SDP_WLOCK_ASSERT(ssk);
172 	ssk->flags |= SDP_DESTROY;
173 	SDP_WUNLOCK(ssk);
174 	SDP_LIST_WLOCK();
175 	sdp_count--;
176 	LIST_REMOVE(ssk, list);
177 	SDP_LIST_WUNLOCK();
178 	crfree(ssk->cred);
179 	ssk->qp_active = 0;
180 	if (ssk->qp) {
181 		ib_destroy_qp(ssk->qp);
182 		ssk->qp = NULL;
183 	}
184 	sdp_tx_ring_destroy(ssk);
185 	sdp_rx_ring_destroy(ssk);
186 	sdp_destroy_cma(ssk);
187 	rw_destroy(&ssk->rx_ring.destroyed_lock);
188 	rw_destroy(&ssk->lock);
189 	uma_zfree(sdp_zone, ssk);
190 }
191 
192 /*
193  * Common routines to return a socket address.
194  */
195 static struct sockaddr *
196 sdp_sockaddr(in_port_t port, struct in_addr *addr_p)
197 {
198 	struct sockaddr_in *sin;
199 
200 	sin = malloc(sizeof *sin, M_SONAME,
201 		M_WAITOK | M_ZERO);
202 	sin->sin_family = AF_INET;
203 	sin->sin_len = sizeof(*sin);
204 	sin->sin_addr = *addr_p;
205 	sin->sin_port = port;
206 
207 	return (struct sockaddr *)sin;
208 }
209 
210 static int
211 sdp_getsockaddr(struct socket *so, struct sockaddr **nam)
212 {
213 	struct sdp_sock *ssk;
214 	struct in_addr addr;
215 	in_port_t port;
216 
217 	ssk = sdp_sk(so);
218 	SDP_RLOCK(ssk);
219 	port = ssk->lport;
220 	addr.s_addr = ssk->laddr;
221 	SDP_RUNLOCK(ssk);
222 
223 	*nam = sdp_sockaddr(port, &addr);
224 	return 0;
225 }
226 
227 static int
228 sdp_getpeeraddr(struct socket *so, struct sockaddr **nam)
229 {
230 	struct sdp_sock *ssk;
231 	struct in_addr addr;
232 	in_port_t port;
233 
234 	ssk = sdp_sk(so);
235 	SDP_RLOCK(ssk);
236 	port = ssk->fport;
237 	addr.s_addr = ssk->faddr;
238 	SDP_RUNLOCK(ssk);
239 
240 	*nam = sdp_sockaddr(port, &addr);
241 	return 0;
242 }
243 
244 #if 0
245 static void
246 sdp_apply_all(void (*func)(struct sdp_sock *, void *), void *arg)
247 {
248 	struct sdp_sock *ssk;
249 
250 	SDP_LIST_RLOCK();
251 	LIST_FOREACH(ssk, &sdp_list, list) {
252 		SDP_WLOCK(ssk);
253 		func(ssk, arg);
254 		SDP_WUNLOCK(ssk);
255 	}
256 	SDP_LIST_RUNLOCK();
257 }
258 #endif
259 
260 static void
261 sdp_output_reset(struct sdp_sock *ssk)
262 {
263 	struct rdma_cm_id *id;
264 
265 	SDP_WLOCK_ASSERT(ssk);
266 	if (ssk->id) {
267 		id = ssk->id;
268 		ssk->qp_active = 0;
269 		SDP_WUNLOCK(ssk);
270 		rdma_disconnect(id);
271 		SDP_WLOCK(ssk);
272 	}
273 	ssk->state = TCPS_CLOSED;
274 }
275 
276 /*
277  * Attempt to close a SDP socket, marking it as dropped, and freeing
278  * the socket if we hold the only reference.
279  */
280 static struct sdp_sock *
281 sdp_closed(struct sdp_sock *ssk)
282 {
283 	struct socket *so;
284 
285 	SDP_WLOCK_ASSERT(ssk);
286 
287 	ssk->flags |= SDP_DROPPED;
288 	so = ssk->socket;
289 	soisdisconnected(so);
290 	if (ssk->flags & SDP_SOCKREF) {
291 		ssk->flags &= ~SDP_SOCKREF;
292 		SDP_WUNLOCK(ssk);
293 		sorele(so);
294 		return (NULL);
295 	}
296 	return (ssk);
297 }
298 
299 /*
300  * Perform timer based shutdowns which can not operate in
301  * callout context.
302  */
303 static void
304 sdp_shutdown_task(void *data, int pending)
305 {
306 	struct sdp_sock *ssk;
307 
308 	ssk = data;
309 	SDP_WLOCK(ssk);
310 	/*
311 	 * I don't think this can race with another call to pcbfree()
312 	 * because SDP_TIMEWAIT protects it.  SDP_DESTROY may be redundant.
313 	 */
314 	if (ssk->flags & SDP_DESTROY)
315 		panic("sdp_shutdown_task: Racing with pcbfree for ssk %p",
316 		    ssk);
317 	if (ssk->flags & SDP_DISCON)
318 		sdp_output_reset(ssk);
319 	/* We have to clear this so sdp_detach() will call pcbfree(). */
320 	ssk->flags &= ~(SDP_TIMEWAIT | SDP_DREQWAIT);
321 	if ((ssk->flags & SDP_DROPPED) == 0 &&
322 	    sdp_closed(ssk) == NULL)
323 		return;
324 	if (ssk->socket == NULL) {
325 		sdp_pcbfree(ssk);
326 		return;
327 	}
328 	SDP_WUNLOCK(ssk);
329 }
330 
331 /*
332  * 2msl has expired, schedule the shutdown task.
333  */
334 static void
335 sdp_2msl_timeout(void *data)
336 {
337 	struct sdp_sock *ssk;
338 
339 	ssk = data;
340 	/* Callout canceled. */
341         if (!callout_active(&ssk->keep2msl))
342 		goto out;
343         callout_deactivate(&ssk->keep2msl);
344 	/* Should be impossible, defensive programming. */
345 	if ((ssk->flags & SDP_TIMEWAIT) == 0)
346 		goto out;
347 	taskqueue_enqueue(taskqueue_thread, &ssk->shutdown_task);
348 out:
349 	SDP_WUNLOCK(ssk);
350 	return;
351 }
352 
353 /*
354  * Schedule the 2msl wait timer.
355  */
356 static void
357 sdp_2msl_wait(struct sdp_sock *ssk)
358 {
359 
360 	SDP_WLOCK_ASSERT(ssk);
361 	ssk->flags |= SDP_TIMEWAIT;
362 	ssk->state = TCPS_TIME_WAIT;
363 	soisdisconnected(ssk->socket);
364 	callout_reset(&ssk->keep2msl, TCPTV_MSL, sdp_2msl_timeout, ssk);
365 }
366 
367 /*
368  * Timed out waiting for the final fin/ack from rdma_disconnect().
369  */
370 static void
371 sdp_dreq_timeout(void *data)
372 {
373 	struct sdp_sock *ssk;
374 
375 	ssk = data;
376 	/* Callout canceled. */
377         if (!callout_active(&ssk->keep2msl))
378 		goto out;
379 	/* Callout rescheduled, probably as a different timer. */
380 	if (callout_pending(&ssk->keep2msl))
381 		goto out;
382         callout_deactivate(&ssk->keep2msl);
383 	if (ssk->state != TCPS_FIN_WAIT_1 && ssk->state != TCPS_LAST_ACK)
384 		goto out;
385 	if ((ssk->flags & SDP_DREQWAIT) == 0)
386 		goto out;
387 	ssk->flags &= ~SDP_DREQWAIT;
388 	ssk->flags |= SDP_DISCON;
389 	sdp_2msl_wait(ssk);
390 	ssk->qp_active = 0;
391 out:
392 	SDP_WUNLOCK(ssk);
393 }
394 
395 /*
396  * Received the final fin/ack.  Cancel the 2msl.
397  */
398 void
399 sdp_cancel_dreq_wait_timeout(struct sdp_sock *ssk)
400 {
401 	sdp_dbg(ssk->socket, "cancelling dreq wait timeout\n");
402 	ssk->flags &= ~SDP_DREQWAIT;
403 	sdp_2msl_wait(ssk);
404 }
405 
406 static int
407 sdp_init_sock(struct socket *sk)
408 {
409 	struct sdp_sock *ssk = sdp_sk(sk);
410 
411 	sdp_dbg(sk, "%s\n", __func__);
412 
413 	callout_init_rw(&ssk->keep2msl, &ssk->lock, CALLOUT_RETURNUNLOCKED);
414 	TASK_INIT(&ssk->shutdown_task, 0, sdp_shutdown_task, ssk);
415 #ifdef SDP_ZCOPY
416 	INIT_DELAYED_WORK(&ssk->srcavail_cancel_work, srcavail_cancel_timeout);
417 	ssk->zcopy_thresh = -1; /* use global sdp_zcopy_thresh */
418 	ssk->tx_ring.rdma_inflight = NULL;
419 #endif
420 	atomic_set(&ssk->mseq_ack, 0);
421 	sdp_rx_ring_init(ssk);
422 	ssk->tx_ring.buffer = NULL;
423 
424 	return 0;
425 }
426 
427 /*
428  * Allocate an sdp_sock for the socket and reserve socket buffer space.
429  */
430 static int
431 sdp_attach(struct socket *so, int proto, struct thread *td)
432 {
433 	struct sdp_sock *ssk;
434 	int error;
435 
436 	ssk = sdp_sk(so);
437 	KASSERT(ssk == NULL, ("sdp_attach: ssk already set on so %p", so));
438 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
439 		error = soreserve(so, sdp_sendspace, sdp_recvspace);
440 		if (error)
441 			return (error);
442 	}
443 	so->so_rcv.sb_flags |= SB_AUTOSIZE;
444 	so->so_snd.sb_flags |= SB_AUTOSIZE;
445 	ssk = uma_zalloc(sdp_zone, M_NOWAIT | M_ZERO);
446 	if (ssk == NULL)
447 		return (ENOBUFS);
448 	rw_init(&ssk->lock, "sdpsock");
449 	ssk->socket = so;
450 	ssk->cred = crhold(so->so_cred);
451 	so->so_pcb = (caddr_t)ssk;
452 	sdp_init_sock(so);
453 	ssk->flags = 0;
454 	ssk->qp_active = 0;
455 	ssk->state = TCPS_CLOSED;
456 	mbufq_init(&ssk->rxctlq, INT_MAX);
457 	SDP_LIST_WLOCK();
458 	LIST_INSERT_HEAD(&sdp_list, ssk, list);
459 	sdp_count++;
460 	SDP_LIST_WUNLOCK();
461 
462 	return (0);
463 }
464 
465 /*
466  * Detach SDP from the socket, potentially leaving it around for the
467  * timewait to expire.
468  */
469 static void
470 sdp_detach(struct socket *so)
471 {
472 	struct sdp_sock *ssk;
473 
474 	ssk = sdp_sk(so);
475 	SDP_WLOCK(ssk);
476 	KASSERT(ssk->socket != NULL, ("sdp_detach: socket is NULL"));
477 	ssk->socket->so_pcb = NULL;
478 	ssk->socket = NULL;
479 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DREQWAIT))
480 		SDP_WUNLOCK(ssk);
481 	else if (ssk->flags & SDP_DROPPED || ssk->state < TCPS_SYN_SENT)
482 		sdp_pcbfree(ssk);
483 	else
484 		panic("sdp_detach: Unexpected state, ssk %p.\n", ssk);
485 }
486 
487 /*
488  * Allocate a local address for the socket.
489  */
490 static int
491 sdp_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
492 {
493 	int error = 0;
494 	struct sdp_sock *ssk;
495 	struct sockaddr_in *sin;
496 
497 	sin = (struct sockaddr_in *)nam;
498 	if (sin->sin_family != AF_INET)
499 		return (EAFNOSUPPORT);
500 	if (nam->sa_len != sizeof(*sin))
501 		return (EINVAL);
502 	if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
503 		return (EAFNOSUPPORT);
504 
505 	ssk = sdp_sk(so);
506 	SDP_WLOCK(ssk);
507 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
508 		error = EINVAL;
509 		goto out;
510 	}
511 	error = sdp_pcbbind(ssk, nam, td->td_ucred);
512 out:
513 	SDP_WUNLOCK(ssk);
514 
515 	return (error);
516 }
517 
518 /*
519  * Prepare to accept connections.
520  */
521 static int
522 sdp_listen(struct socket *so, int backlog, struct thread *td)
523 {
524 	int error = 0;
525 	struct sdp_sock *ssk;
526 
527 	ssk = sdp_sk(so);
528 	SDP_WLOCK(ssk);
529 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
530 		error = EINVAL;
531 		goto out;
532 	}
533 	if (error == 0 && ssk->lport == 0)
534 		error = sdp_pcbbind(ssk, (struct sockaddr *)0, td->td_ucred);
535 	SOCK_LOCK(so);
536 	if (error == 0)
537 		error = solisten_proto_check(so);
538 	if (error == 0) {
539 		solisten_proto(so, backlog);
540 		ssk->state = TCPS_LISTEN;
541 	}
542 	SOCK_UNLOCK(so);
543 
544 out:
545 	SDP_WUNLOCK(ssk);
546 	if (error == 0)
547 		error = -rdma_listen(ssk->id, backlog);
548 	return (error);
549 }
550 
551 /*
552  * Initiate a SDP connection to nam.
553  */
554 static int
555 sdp_start_connect(struct sdp_sock *ssk, struct sockaddr *nam, struct thread *td)
556 {
557 	struct sockaddr_in src;
558 	struct socket *so;
559 	int error;
560 
561 	so = ssk->socket;
562 
563 	SDP_WLOCK_ASSERT(ssk);
564 	if (ssk->lport == 0) {
565 		error = sdp_pcbbind(ssk, (struct sockaddr *)0, td->td_ucred);
566 		if (error)
567 			return error;
568 	}
569 	src.sin_family = AF_INET;
570 	src.sin_len = sizeof(src);
571 	bzero(&src.sin_zero, sizeof(src.sin_zero));
572 	src.sin_port = ssk->lport;
573 	src.sin_addr.s_addr = ssk->laddr;
574 	soisconnecting(so);
575 	SDP_WUNLOCK(ssk);
576 	error = -rdma_resolve_addr(ssk->id, (struct sockaddr *)&src, nam,
577 	    SDP_RESOLVE_TIMEOUT);
578 	SDP_WLOCK(ssk);
579 	if (error == 0)
580 		ssk->state = TCPS_SYN_SENT;
581 
582 	return 0;
583 }
584 
585 /*
586  * Initiate SDP connection.
587  */
588 static int
589 sdp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
590 {
591 	int error = 0;
592 	struct sdp_sock *ssk;
593 	struct sockaddr_in *sin;
594 
595 	sin = (struct sockaddr_in *)nam;
596 	if (nam->sa_len != sizeof(*sin))
597 		return (EINVAL);
598 	if (sin->sin_family != AF_INET)
599 		return (EAFNOSUPPORT);
600 	if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
601 		return (EAFNOSUPPORT);
602 	if ((error = prison_remote_ip4(td->td_ucred, &sin->sin_addr)) != 0)
603 		return (error);
604 	ssk = sdp_sk(so);
605 	SDP_WLOCK(ssk);
606 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED))
607 		error = EINVAL;
608 	else
609 		error = sdp_start_connect(ssk, nam, td);
610 	SDP_WUNLOCK(ssk);
611 	return (error);
612 }
613 
614 /*
615  * Drop a SDP socket, reporting
616  * the specified error.  If connection is synchronized,
617  * then send a RST to peer.
618  */
619 static struct sdp_sock *
620 sdp_drop(struct sdp_sock *ssk, int errno)
621 {
622 	struct socket *so;
623 
624 	SDP_WLOCK_ASSERT(ssk);
625 	so = ssk->socket;
626 	if (TCPS_HAVERCVDSYN(ssk->state))
627 		sdp_output_reset(ssk);
628 	if (errno == ETIMEDOUT && ssk->softerror)
629 		errno = ssk->softerror;
630 	so->so_error = errno;
631 	return (sdp_closed(ssk));
632 }
633 
634 /*
635  * User issued close, and wish to trail through shutdown states:
636  * if never received SYN, just forget it.  If got a SYN from peer,
637  * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
638  * If already got a FIN from peer, then almost done; go to LAST_ACK
639  * state.  In all other cases, have already sent FIN to peer (e.g.
640  * after PRU_SHUTDOWN), and just have to play tedious game waiting
641  * for peer to send FIN or not respond to keep-alives, etc.
642  * We can let the user exit from the close as soon as the FIN is acked.
643  */
644 static void
645 sdp_usrclosed(struct sdp_sock *ssk)
646 {
647 
648 	SDP_WLOCK_ASSERT(ssk);
649 
650 	switch (ssk->state) {
651 	case TCPS_LISTEN:
652 		ssk->state = TCPS_CLOSED;
653 		SDP_WUNLOCK(ssk);
654 		sdp_destroy_cma(ssk);
655 		SDP_WLOCK(ssk);
656 		/* FALLTHROUGH */
657 	case TCPS_CLOSED:
658 		ssk = sdp_closed(ssk);
659 		/*
660 		 * sdp_closed() should never return NULL here as the socket is
661 		 * still open.
662 		 */
663 		KASSERT(ssk != NULL,
664 		    ("sdp_usrclosed: sdp_closed() returned NULL"));
665 		break;
666 
667 	case TCPS_SYN_SENT:
668 		/* FALLTHROUGH */
669 	case TCPS_SYN_RECEIVED:
670 		ssk->flags |= SDP_NEEDFIN;
671 		break;
672 
673 	case TCPS_ESTABLISHED:
674 		ssk->flags |= SDP_NEEDFIN;
675 		ssk->state = TCPS_FIN_WAIT_1;
676 		break;
677 
678 	case TCPS_CLOSE_WAIT:
679 		ssk->state = TCPS_LAST_ACK;
680 		break;
681 	}
682 	if (ssk->state >= TCPS_FIN_WAIT_2) {
683 		/* Prevent the connection hanging in FIN_WAIT_2 forever. */
684 		if (ssk->state == TCPS_FIN_WAIT_2)
685 			sdp_2msl_wait(ssk);
686 		else
687 			soisdisconnected(ssk->socket);
688 	}
689 }
690 
691 static void
692 sdp_output_disconnect(struct sdp_sock *ssk)
693 {
694 
695 	SDP_WLOCK_ASSERT(ssk);
696 	callout_reset(&ssk->keep2msl, SDP_FIN_WAIT_TIMEOUT,
697 	    sdp_dreq_timeout, ssk);
698 	ssk->flags |= SDP_NEEDFIN | SDP_DREQWAIT;
699 	sdp_post_sends(ssk, M_NOWAIT);
700 }
701 
702 /*
703  * Initiate or continue a disconnect.
704  * If embryonic state, just send reset (once).
705  * If in ``let data drain'' option and linger null, just drop.
706  * Otherwise (hard), mark socket disconnecting and drop
707  * current input data; switch states based on user close, and
708  * send segment to peer (with FIN).
709  */
710 static void
711 sdp_start_disconnect(struct sdp_sock *ssk)
712 {
713 	struct socket *so;
714 	int unread;
715 
716 	so = ssk->socket;
717 	SDP_WLOCK_ASSERT(ssk);
718 	sdp_stop_keepalive_timer(so);
719 	/*
720 	 * Neither sdp_closed() nor sdp_drop() should return NULL, as the
721 	 * socket is still open.
722 	 */
723 	if (ssk->state < TCPS_ESTABLISHED) {
724 		ssk = sdp_closed(ssk);
725 		KASSERT(ssk != NULL,
726 		    ("sdp_start_disconnect: sdp_close() returned NULL"));
727 	} else if ((so->so_options & SO_LINGER) && so->so_linger == 0) {
728 		ssk = sdp_drop(ssk, 0);
729 		KASSERT(ssk != NULL,
730 		    ("sdp_start_disconnect: sdp_drop() returned NULL"));
731 	} else {
732 		soisdisconnecting(so);
733 		unread = sbused(&so->so_rcv);
734 		sbflush(&so->so_rcv);
735 		sdp_usrclosed(ssk);
736 		if (!(ssk->flags & SDP_DROPPED)) {
737 			if (unread)
738 				sdp_output_reset(ssk);
739 			else
740 				sdp_output_disconnect(ssk);
741 		}
742 	}
743 }
744 
745 /*
746  * User initiated disconnect.
747  */
748 static int
749 sdp_disconnect(struct socket *so)
750 {
751 	struct sdp_sock *ssk;
752 	int error = 0;
753 
754 	ssk = sdp_sk(so);
755 	SDP_WLOCK(ssk);
756 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
757 		error = ECONNRESET;
758 		goto out;
759 	}
760 	sdp_start_disconnect(ssk);
761 out:
762 	SDP_WUNLOCK(ssk);
763 	return (error);
764 }
765 
766 /*
767  * Accept a connection.  Essentially all the work is done at higher levels;
768  * just return the address of the peer, storing through addr.
769  *
770  *
771  * XXX This is broken XXX
772  *
773  * The rationale for acquiring the sdp lock here is somewhat complicated,
774  * and is described in detail in the commit log entry for r175612.  Acquiring
775  * it delays an accept(2) racing with sonewconn(), which inserts the socket
776  * before the address/port fields are initialized.  A better fix would
777  * prevent the socket from being placed in the listen queue until all fields
778  * are fully initialized.
779  */
780 static int
781 sdp_accept(struct socket *so, struct sockaddr **nam)
782 {
783 	struct sdp_sock *ssk = NULL;
784 	struct in_addr addr;
785 	in_port_t port;
786 	int error;
787 
788 	if (so->so_state & SS_ISDISCONNECTED)
789 		return (ECONNABORTED);
790 
791 	port = 0;
792 	addr.s_addr = 0;
793 	error = 0;
794 	ssk = sdp_sk(so);
795 	SDP_WLOCK(ssk);
796 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
797 		error = ECONNABORTED;
798 		goto out;
799 	}
800 	port = ssk->fport;
801 	addr.s_addr = ssk->faddr;
802 out:
803 	SDP_WUNLOCK(ssk);
804 	if (error == 0)
805 		*nam = sdp_sockaddr(port, &addr);
806 	return error;
807 }
808 
809 /*
810  * Mark the connection as being incapable of further output.
811  */
812 static int
813 sdp_shutdown(struct socket *so)
814 {
815 	int error = 0;
816 	struct sdp_sock *ssk;
817 
818 	ssk = sdp_sk(so);
819 	SDP_WLOCK(ssk);
820 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
821 		error = ECONNRESET;
822 		goto out;
823 	}
824 	socantsendmore(so);
825 	sdp_usrclosed(ssk);
826 	if (!(ssk->flags & SDP_DROPPED))
827 		sdp_output_disconnect(ssk);
828 
829 out:
830 	SDP_WUNLOCK(ssk);
831 
832 	return (error);
833 }
834 
835 static void
836 sdp_append(struct sdp_sock *ssk, struct sockbuf *sb, struct mbuf *mb, int cnt)
837 {
838 	struct mbuf *n;
839 	int ncnt;
840 
841 	SOCKBUF_LOCK_ASSERT(sb);
842 	SBLASTRECORDCHK(sb);
843 	KASSERT(mb->m_flags & M_PKTHDR,
844 		("sdp_append: %p Missing packet header.\n", mb));
845 	n = sb->sb_lastrecord;
846 	/*
847 	 * If the queue is empty just set all pointers and proceed.
848 	 */
849 	if (n == NULL) {
850 		sb->sb_lastrecord = sb->sb_mb = sb->sb_sndptr = mb;
851 		for (; mb; mb = mb->m_next) {
852 	                sb->sb_mbtail = mb;
853 			sballoc(sb, mb);
854 		}
855 		return;
856 	}
857 	/*
858 	 * Count the number of mbufs in the current tail.
859 	 */
860 	for (ncnt = 0; n->m_next; n = n->m_next)
861 		ncnt++;
862 	n = sb->sb_lastrecord;
863 	/*
864 	 * If the two chains can fit in a single sdp packet and
865 	 * the last record has not been sent yet (WRITABLE) coalesce
866 	 * them.  The lastrecord remains the same but we must strip the
867 	 * packet header and then let sbcompress do the hard part.
868 	 */
869 	if (M_WRITABLE(n) && ncnt + cnt < SDP_MAX_SEND_SGES &&
870 	    n->m_pkthdr.len + mb->m_pkthdr.len - SDP_HEAD_SIZE <
871 	    ssk->xmit_size_goal) {
872 		m_adj(mb, SDP_HEAD_SIZE);
873 		n->m_pkthdr.len += mb->m_pkthdr.len;
874 		n->m_flags |= mb->m_flags & (M_PUSH | M_URG);
875 		m_demote(mb, 1, 0);
876 		sbcompress(sb, mb, sb->sb_mbtail);
877 		return;
878 	}
879 	/*
880 	 * Not compressible, just append to the end and adjust counters.
881 	 */
882 	sb->sb_lastrecord->m_flags |= M_PUSH;
883 	sb->sb_lastrecord->m_nextpkt = mb;
884 	sb->sb_lastrecord = mb;
885 	if (sb->sb_sndptr == NULL)
886 		sb->sb_sndptr = mb;
887 	for (; mb; mb = mb->m_next) {
888 		sb->sb_mbtail = mb;
889 		sballoc(sb, mb);
890 	}
891 }
892 
893 /*
894  * Do a send by putting data in output queue and updating urgent
895  * marker if URG set.  Possibly send more data.  Unlike the other
896  * pru_*() routines, the mbuf chains are our responsibility.  We
897  * must either enqueue them or free them.  The other pru_* routines
898  * generally are caller-frees.
899  *
900  * This comes from sendfile, normal sends will come from sdp_sosend().
901  */
902 static int
903 sdp_send(struct socket *so, int flags, struct mbuf *m,
904     struct sockaddr *nam, struct mbuf *control, struct thread *td)
905 {
906 	struct sdp_sock *ssk;
907 	struct mbuf *n;
908 	int error;
909 	int cnt;
910 
911 	if (nam != NULL) {
912 		if (nam->sa_family != AF_INET) {
913 			if (control)
914 				m_freem(control);
915 			m_freem(m);
916 			return (EAFNOSUPPORT);
917 		}
918 		if (nam->sa_len != sizeof(struct sockaddr_in)) {
919 			if (control)
920 				m_freem(control);
921 			m_freem(m);
922 			return (EINVAL);
923 		}
924 	}
925 
926 	error = 0;
927 	ssk = sdp_sk(so);
928 	KASSERT(m->m_flags & M_PKTHDR,
929 	    ("sdp_send: %p no packet header", m));
930 	M_PREPEND(m, SDP_HEAD_SIZE, M_WAITOK);
931 	mtod(m, struct sdp_bsdh *)->mid = SDP_MID_DATA;
932 	for (n = m, cnt = 0; n->m_next; n = n->m_next)
933 		cnt++;
934 	if (cnt > SDP_MAX_SEND_SGES) {
935 		n = m_collapse(m, M_WAITOK, SDP_MAX_SEND_SGES);
936 		if (n == NULL) {
937 			m_freem(m);
938 			return (EMSGSIZE);
939 		}
940 		m = n;
941 		for (cnt = 0; n->m_next; n = n->m_next)
942 			cnt++;
943 	}
944 	SDP_WLOCK(ssk);
945 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
946 		if (control)
947 			m_freem(control);
948 		if (m)
949 			m_freem(m);
950 		error = ECONNRESET;
951 		goto out;
952 	}
953 	if (control) {
954 		/* SDP doesn't support control messages. */
955 		if (control->m_len) {
956 			m_freem(control);
957 			if (m)
958 				m_freem(m);
959 			error = EINVAL;
960 			goto out;
961 		}
962 		m_freem(control);	/* empty control, just free it */
963 	}
964 	if (!(flags & PRUS_OOB)) {
965 		SOCKBUF_LOCK(&so->so_snd);
966 		sdp_append(ssk, &so->so_snd, m, cnt);
967 		SOCKBUF_UNLOCK(&so->so_snd);
968 		if (nam && ssk->state < TCPS_SYN_SENT) {
969 			/*
970 			 * Do implied connect if not yet connected.
971 			 */
972 			error = sdp_start_connect(ssk, nam, td);
973 			if (error)
974 				goto out;
975 		}
976 		if (flags & PRUS_EOF) {
977 			/*
978 			 * Close the send side of the connection after
979 			 * the data is sent.
980 			 */
981 			socantsendmore(so);
982 			sdp_usrclosed(ssk);
983 			if (!(ssk->flags & SDP_DROPPED))
984 				sdp_output_disconnect(ssk);
985 		} else if (!(ssk->flags & SDP_DROPPED) &&
986 		    !(flags & PRUS_MORETOCOME))
987 			sdp_post_sends(ssk, M_NOWAIT);
988 		SDP_WUNLOCK(ssk);
989 		return (0);
990 	} else {
991 		SOCKBUF_LOCK(&so->so_snd);
992 		if (sbspace(&so->so_snd) < -512) {
993 			SOCKBUF_UNLOCK(&so->so_snd);
994 			m_freem(m);
995 			error = ENOBUFS;
996 			goto out;
997 		}
998 		/*
999 		 * According to RFC961 (Assigned Protocols),
1000 		 * the urgent pointer points to the last octet
1001 		 * of urgent data.  We continue, however,
1002 		 * to consider it to indicate the first octet
1003 		 * of data past the urgent section.
1004 		 * Otherwise, snd_up should be one lower.
1005 		 */
1006 		m->m_flags |= M_URG | M_PUSH;
1007 		sdp_append(ssk, &so->so_snd, m, cnt);
1008 		SOCKBUF_UNLOCK(&so->so_snd);
1009 		if (nam && ssk->state < TCPS_SYN_SENT) {
1010 			/*
1011 			 * Do implied connect if not yet connected.
1012 			 */
1013 			error = sdp_start_connect(ssk, nam, td);
1014 			if (error)
1015 				goto out;
1016 		}
1017 		sdp_post_sends(ssk, M_NOWAIT);
1018 		SDP_WUNLOCK(ssk);
1019 		return (0);
1020 	}
1021 out:
1022 	SDP_WUNLOCK(ssk);
1023 	return (error);
1024 }
1025 
1026 /*
1027  * Send on a socket.  If send must go all at once and message is larger than
1028  * send buffering, then hard error.  Lock against other senders.  If must go
1029  * all at once and not enough room now, then inform user that this would
1030  * block and do nothing.  Otherwise, if nonblocking, send as much as
1031  * possible.  The data to be sent is described by "uio" if nonzero, otherwise
1032  * by the mbuf chain "top" (which must be null if uio is not).  Data provided
1033  * in mbuf chain must be small enough to send all at once.
1034  *
1035  * Returns nonzero on error, timeout or signal; callers must check for short
1036  * counts if EINTR/ERESTART are returned.  Data and control buffers are freed
1037  * on return.
1038  */
1039 static int
1040 sdp_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
1041     struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
1042 {
1043 	struct sdp_sock *ssk;
1044 	long space, resid;
1045 	int atomic;
1046 	int error;
1047 	int copy;
1048 
1049 	if (uio != NULL)
1050 		resid = uio->uio_resid;
1051 	else
1052 		resid = top->m_pkthdr.len;
1053 	atomic = top != NULL;
1054 	if (control != NULL) {
1055 		if (control->m_len) {
1056 			m_freem(control);
1057 			if (top)
1058 				m_freem(top);
1059 			return (EINVAL);
1060 		}
1061 		m_freem(control);
1062 		control = NULL;
1063 	}
1064 	/*
1065 	 * In theory resid should be unsigned.  However, space must be
1066 	 * signed, as it might be less than 0 if we over-committed, and we
1067 	 * must use a signed comparison of space and resid.  On the other
1068 	 * hand, a negative resid causes us to loop sending 0-length
1069 	 * segments to the protocol.
1070 	 *
1071 	 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
1072 	 * type sockets since that's an error.
1073 	 */
1074 	if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) {
1075 		error = EINVAL;
1076 		goto out;
1077 	}
1078 	if (td != NULL)
1079 		td->td_ru.ru_msgsnd++;
1080 
1081 	ssk = sdp_sk(so);
1082 	error = SOCK_IO_SEND_LOCK(so, SBLOCKWAIT(flags));
1083 	if (error)
1084 		goto out;
1085 
1086 restart:
1087 	do {
1088 		SOCKBUF_LOCK(&so->so_snd);
1089 		if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
1090 			SOCKBUF_UNLOCK(&so->so_snd);
1091 			error = EPIPE;
1092 			goto release;
1093 		}
1094 		if (so->so_error) {
1095 			error = so->so_error;
1096 			so->so_error = 0;
1097 			SOCKBUF_UNLOCK(&so->so_snd);
1098 			goto release;
1099 		}
1100 		if ((so->so_state & SS_ISCONNECTED) == 0 && addr == NULL) {
1101 			SOCKBUF_UNLOCK(&so->so_snd);
1102 			error = ENOTCONN;
1103 			goto release;
1104 		}
1105 		space = sbspace(&so->so_snd);
1106 		if (flags & MSG_OOB)
1107 			space += 1024;
1108 		if (atomic && resid > ssk->xmit_size_goal - SDP_HEAD_SIZE) {
1109 			SOCKBUF_UNLOCK(&so->so_snd);
1110 			error = EMSGSIZE;
1111 			goto release;
1112 		}
1113 		if (space < resid &&
1114 		    (atomic || space < so->so_snd.sb_lowat)) {
1115 			if ((so->so_state & SS_NBIO) ||
1116 			    (flags & (MSG_NBIO | MSG_DONTWAIT)) != 0) {
1117 				SOCKBUF_UNLOCK(&so->so_snd);
1118 				error = EWOULDBLOCK;
1119 				goto release;
1120 			}
1121 			error = sbwait(so, SO_SND);
1122 			SOCKBUF_UNLOCK(&so->so_snd);
1123 			if (error)
1124 				goto release;
1125 			goto restart;
1126 		}
1127 		SOCKBUF_UNLOCK(&so->so_snd);
1128 		do {
1129 			if (uio == NULL) {
1130 				resid = 0;
1131 				if (flags & MSG_EOR)
1132 					top->m_flags |= M_EOR;
1133 			} else {
1134 				/*
1135 				 * Copy the data from userland into a mbuf
1136 				 * chain.  If no data is to be copied in,
1137 				 * a single empty mbuf is returned.
1138 				 */
1139 				copy = min(space,
1140 				    ssk->xmit_size_goal - SDP_HEAD_SIZE);
1141 				top = m_uiotombuf(uio, M_WAITOK, copy,
1142 				    0, M_PKTHDR |
1143 				    ((flags & MSG_EOR) ? M_EOR : 0));
1144 				if (top == NULL) {
1145 					/* only possible error */
1146 					error = EFAULT;
1147 					goto release;
1148 				}
1149 				space -= resid - uio->uio_resid;
1150 				resid = uio->uio_resid;
1151 			}
1152 			/*
1153 			 * XXX all the SBS_CANTSENDMORE checks previously
1154 			 * done could be out of date after dropping the
1155 			 * socket lock.
1156 			 */
1157 			error = sdp_send(so, (flags & MSG_OOB) ? PRUS_OOB :
1158 			/*
1159 			 * Set EOF on the last send if the user specified
1160 			 * MSG_EOF.
1161 			 */
1162 			    ((flags & MSG_EOF) && (resid <= 0)) ? PRUS_EOF :
1163 			/* If there is more to send set PRUS_MORETOCOME. */
1164 			    (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0,
1165 			    top, addr, NULL, td);
1166 			top = NULL;
1167 			if (error)
1168 				goto release;
1169 		} while (resid && space > 0);
1170 	} while (resid);
1171 
1172 release:
1173 	SOCK_IO_SEND_UNLOCK(so);
1174 out:
1175 	if (top != NULL)
1176 		m_freem(top);
1177 	return (error);
1178 }
1179 
1180 /*
1181  * The part of soreceive() that implements reading non-inline out-of-band
1182  * data from a socket.  For more complete comments, see soreceive(), from
1183  * which this code originated.
1184  *
1185  * Note that soreceive_rcvoob(), unlike the remainder of soreceive(), is
1186  * unable to return an mbuf chain to the caller.
1187  */
1188 static int
1189 soreceive_rcvoob(struct socket *so, struct uio *uio, int flags)
1190 {
1191 	struct protosw *pr = so->so_proto;
1192 	struct mbuf *m;
1193 	int error;
1194 
1195 	KASSERT(flags & MSG_OOB, ("soreceive_rcvoob: (flags & MSG_OOB) == 0"));
1196 
1197 	m = m_get(M_WAITOK, MT_DATA);
1198 	error = pr->pr_rcvoob(so, m, flags & MSG_PEEK);
1199 	if (error)
1200 		goto bad;
1201 	do {
1202 		error = uiomove(mtod(m, void *),
1203 		    (int) min(uio->uio_resid, m->m_len), uio);
1204 		m = m_free(m);
1205 	} while (uio->uio_resid && error == 0 && m);
1206 bad:
1207 	if (m != NULL)
1208 		m_freem(m);
1209 	return (error);
1210 }
1211 
1212 /*
1213  * Optimized version of soreceive() for stream (TCP) sockets.
1214  */
1215 static int
1216 sdp_sorecv(struct socket *so, struct sockaddr **psa, struct uio *uio,
1217     struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
1218 {
1219 	int len = 0, error = 0, flags, oresid;
1220 	struct sockbuf *sb;
1221 	struct mbuf *m, *n = NULL;
1222 	struct sdp_sock *ssk;
1223 
1224 	/* We only do stream sockets. */
1225 	if (so->so_type != SOCK_STREAM)
1226 		return (EINVAL);
1227 	if (psa != NULL)
1228 		*psa = NULL;
1229 	if (controlp != NULL)
1230 		return (EINVAL);
1231 	if (flagsp != NULL)
1232 		flags = *flagsp &~ MSG_EOR;
1233 	else
1234 		flags = 0;
1235 	if (flags & MSG_OOB)
1236 		return (soreceive_rcvoob(so, uio, flags));
1237 	if (mp0 != NULL)
1238 		*mp0 = NULL;
1239 
1240 	sb = &so->so_rcv;
1241 	ssk = sdp_sk(so);
1242 
1243 	/* Prevent other readers from entering the socket. */
1244 	error = SOCK_IO_RECV_LOCK(so, SBLOCKWAIT(flags));
1245 	if (error)
1246 		return (error);
1247 	SOCKBUF_LOCK(sb);
1248 
1249 	/* Easy one, no space to copyout anything. */
1250 	if (uio->uio_resid == 0) {
1251 		error = EINVAL;
1252 		goto out;
1253 	}
1254 	oresid = uio->uio_resid;
1255 
1256 	/* We will never ever get anything unless we are connected. */
1257 	if (!(so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED))) {
1258 		/* When disconnecting there may be still some data left. */
1259 		if (sbavail(sb))
1260 			goto deliver;
1261 		if (!(so->so_state & SS_ISDISCONNECTED))
1262 			error = ENOTCONN;
1263 		goto out;
1264 	}
1265 
1266 	/* Socket buffer is empty and we shall not block. */
1267 	if (sbavail(sb) == 0 &&
1268 	    ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)))) {
1269 		error = EAGAIN;
1270 		goto out;
1271 	}
1272 
1273 restart:
1274 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
1275 
1276 	/* Abort if socket has reported problems. */
1277 	if (so->so_error) {
1278 		if (sbavail(sb))
1279 			goto deliver;
1280 		if (oresid > uio->uio_resid)
1281 			goto out;
1282 		error = so->so_error;
1283 		if (!(flags & MSG_PEEK))
1284 			so->so_error = 0;
1285 		goto out;
1286 	}
1287 
1288 	/* Door is closed.  Deliver what is left, if any. */
1289 	if (sb->sb_state & SBS_CANTRCVMORE) {
1290 		if (sbavail(sb))
1291 			goto deliver;
1292 		else
1293 			goto out;
1294 	}
1295 
1296 	/* Socket buffer got some data that we shall deliver now. */
1297 	if (sbavail(sb) && !(flags & MSG_WAITALL) &&
1298 	    ((so->so_state & SS_NBIO) ||
1299 	     (flags & (MSG_DONTWAIT|MSG_NBIO)) ||
1300 	     sbavail(sb) >= sb->sb_lowat ||
1301 	     sbavail(sb) >= uio->uio_resid ||
1302 	     sbavail(sb) >= sb->sb_hiwat) ) {
1303 		goto deliver;
1304 	}
1305 
1306 	/* On MSG_WAITALL we must wait until all data or error arrives. */
1307 	if ((flags & MSG_WAITALL) &&
1308 	    (sbavail(sb) >= uio->uio_resid || sbavail(sb) >= sb->sb_lowat))
1309 		goto deliver;
1310 
1311 	/*
1312 	 * Wait and block until (more) data comes in.
1313 	 * NB: Drops the sockbuf lock during wait.
1314 	 */
1315 	error = sbwait(so, SO_RCV);
1316 	if (error)
1317 		goto out;
1318 	goto restart;
1319 
1320 deliver:
1321 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
1322 	KASSERT(sbavail(sb), ("%s: sockbuf empty", __func__));
1323 	KASSERT(sb->sb_mb != NULL, ("%s: sb_mb == NULL", __func__));
1324 
1325 	/* Statistics. */
1326 	if (uio->uio_td)
1327 		uio->uio_td->td_ru.ru_msgrcv++;
1328 
1329 	/* Fill uio until full or current end of socket buffer is reached. */
1330 	len = min(uio->uio_resid, sbavail(sb));
1331 	if (mp0 != NULL) {
1332 		/* Dequeue as many mbufs as possible. */
1333 		if (!(flags & MSG_PEEK) && len >= sb->sb_mb->m_len) {
1334 			for (*mp0 = m = sb->sb_mb;
1335 			     m != NULL && m->m_len <= len;
1336 			     m = m->m_next) {
1337 				len -= m->m_len;
1338 				uio->uio_resid -= m->m_len;
1339 				sbfree(sb, m);
1340 				n = m;
1341 			}
1342 			sb->sb_mb = m;
1343 			if (sb->sb_mb == NULL)
1344 				SB_EMPTY_FIXUP(sb);
1345 			n->m_next = NULL;
1346 		}
1347 		/* Copy the remainder. */
1348 		if (len > 0) {
1349 			KASSERT(sb->sb_mb != NULL,
1350 			    ("%s: len > 0 && sb->sb_mb empty", __func__));
1351 
1352 			m = m_copym(sb->sb_mb, 0, len, M_NOWAIT);
1353 			if (m == NULL)
1354 				len = 0;	/* Don't flush data from sockbuf. */
1355 			else
1356 				uio->uio_resid -= m->m_len;
1357 			if (*mp0 != NULL)
1358 				n->m_next = m;
1359 			else
1360 				*mp0 = m;
1361 			if (*mp0 == NULL) {
1362 				error = ENOBUFS;
1363 				goto out;
1364 			}
1365 		}
1366 	} else {
1367 		/* NB: Must unlock socket buffer as uiomove may sleep. */
1368 		SOCKBUF_UNLOCK(sb);
1369 		error = m_mbuftouio(uio, sb->sb_mb, len);
1370 		SOCKBUF_LOCK(sb);
1371 		if (error)
1372 			goto out;
1373 	}
1374 	SBLASTRECORDCHK(sb);
1375 	SBLASTMBUFCHK(sb);
1376 
1377 	/*
1378 	 * Remove the delivered data from the socket buffer unless we
1379 	 * were only peeking.
1380 	 */
1381 	if (!(flags & MSG_PEEK)) {
1382 		if (len > 0)
1383 			sbdrop_locked(sb, len);
1384 
1385 		/* Notify protocol that we drained some data. */
1386 		SOCKBUF_UNLOCK(sb);
1387 		SDP_WLOCK(ssk);
1388 		sdp_do_posts(ssk);
1389 		SDP_WUNLOCK(ssk);
1390 		SOCKBUF_LOCK(sb);
1391 	}
1392 
1393 	/*
1394 	 * For MSG_WAITALL we may have to loop again and wait for
1395 	 * more data to come in.
1396 	 */
1397 	if ((flags & MSG_WAITALL) && uio->uio_resid > 0)
1398 		goto restart;
1399 out:
1400 	SBLASTRECORDCHK(sb);
1401 	SBLASTMBUFCHK(sb);
1402 	SOCKBUF_UNLOCK(sb);
1403 	SOCK_IO_RECV_UNLOCK(so);
1404 	return (error);
1405 }
1406 
1407 /*
1408  * Abort is used to teardown a connection typically while sitting in
1409  * the accept queue.
1410  */
1411 void
1412 sdp_abort(struct socket *so)
1413 {
1414 	struct sdp_sock *ssk;
1415 
1416 	ssk = sdp_sk(so);
1417 	SDP_WLOCK(ssk);
1418 	/*
1419 	 * If we have not yet dropped, do it now.
1420 	 */
1421 	if (!(ssk->flags & SDP_TIMEWAIT) &&
1422 	    !(ssk->flags & SDP_DROPPED))
1423 		sdp_drop(ssk, ECONNABORTED);
1424 	KASSERT(ssk->flags & SDP_DROPPED, ("sdp_abort: %p not dropped 0x%X",
1425 	    ssk, ssk->flags));
1426 	SDP_WUNLOCK(ssk);
1427 }
1428 
1429 /*
1430  * Close a SDP socket and initiate a friendly disconnect.
1431  */
1432 static void
1433 sdp_close(struct socket *so)
1434 {
1435 	struct sdp_sock *ssk;
1436 
1437 	ssk = sdp_sk(so);
1438 	SDP_WLOCK(ssk);
1439 	/*
1440 	 * If we have not yet dropped, do it now.
1441 	 */
1442 	if (!(ssk->flags & SDP_TIMEWAIT) &&
1443 	    !(ssk->flags & SDP_DROPPED))
1444 		sdp_start_disconnect(ssk);
1445 
1446 	/*
1447 	 * If we've still not dropped let the socket layer know we're
1448 	 * holding on to the socket and pcb for a while.
1449 	 */
1450 	if (!(ssk->flags & SDP_DROPPED)) {
1451 		ssk->flags |= SDP_SOCKREF;
1452 		soref(so);
1453 	}
1454 	SDP_WUNLOCK(ssk);
1455 }
1456 
1457 /*
1458  * User requests out-of-band data.
1459  */
1460 static int
1461 sdp_rcvoob(struct socket *so, struct mbuf *m, int flags)
1462 {
1463 	int error = 0;
1464 	struct sdp_sock *ssk;
1465 
1466 	ssk = sdp_sk(so);
1467 	SDP_WLOCK(ssk);
1468 	if (!rx_ring_trylock(&ssk->rx_ring)) {
1469 		SDP_WUNLOCK(ssk);
1470 		return (ECONNRESET);
1471 	}
1472 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
1473 		error = ECONNRESET;
1474 		goto out;
1475 	}
1476 	if ((so->so_oobmark == 0 &&
1477 	     (so->so_rcv.sb_state & SBS_RCVATMARK) == 0) ||
1478 	    so->so_options & SO_OOBINLINE ||
1479 	    ssk->oobflags & SDP_HADOOB) {
1480 		error = EINVAL;
1481 		goto out;
1482 	}
1483 	if ((ssk->oobflags & SDP_HAVEOOB) == 0) {
1484 		error = EWOULDBLOCK;
1485 		goto out;
1486 	}
1487 	m->m_len = 1;
1488 	*mtod(m, caddr_t) = ssk->iobc;
1489 	if ((flags & MSG_PEEK) == 0)
1490 		ssk->oobflags ^= (SDP_HAVEOOB | SDP_HADOOB);
1491 out:
1492 	rx_ring_unlock(&ssk->rx_ring);
1493 	SDP_WUNLOCK(ssk);
1494 	return (error);
1495 }
1496 
1497 void
1498 sdp_urg(struct sdp_sock *ssk, struct mbuf *mb)
1499 {
1500 	struct mbuf *m;
1501 	struct socket *so;
1502 
1503 	so = ssk->socket;
1504 	if (so == NULL)
1505 		return;
1506 
1507 	so->so_oobmark = sbused(&so->so_rcv) + mb->m_pkthdr.len - 1;
1508 	sohasoutofband(so);
1509 	ssk->oobflags &= ~(SDP_HAVEOOB | SDP_HADOOB);
1510 	if (!(so->so_options & SO_OOBINLINE)) {
1511 		for (m = mb; m->m_next != NULL; m = m->m_next);
1512 		ssk->iobc = *(mtod(m, char *) + m->m_len - 1);
1513 		ssk->oobflags |= SDP_HAVEOOB;
1514 		m->m_len--;
1515 		mb->m_pkthdr.len--;
1516 	}
1517 }
1518 
1519 /*
1520  * Notify a sdp socket of an asynchronous error.
1521  *
1522  * Do not wake up user since there currently is no mechanism for
1523  * reporting soft errors (yet - a kqueue filter may be added).
1524  */
1525 struct sdp_sock *
1526 sdp_notify(struct sdp_sock *ssk, int error)
1527 {
1528 
1529 	SDP_WLOCK_ASSERT(ssk);
1530 
1531 	if ((ssk->flags & SDP_TIMEWAIT) ||
1532 	    (ssk->flags & SDP_DROPPED))
1533 		return (ssk);
1534 
1535 	/*
1536 	 * Ignore some errors if we are hooked up.
1537 	 */
1538 	if (ssk->state == TCPS_ESTABLISHED &&
1539 	    (error == EHOSTUNREACH || error == ENETUNREACH ||
1540 	     error == EHOSTDOWN))
1541 		return (ssk);
1542 	ssk->softerror = error;
1543 	return sdp_drop(ssk, error);
1544 }
1545 
1546 static void
1547 sdp_keepalive_timeout(void *data)
1548 {
1549 	struct sdp_sock *ssk;
1550 
1551 	ssk = data;
1552 	/* Callout canceled. */
1553         if (!callout_active(&ssk->keep2msl))
1554                 return;
1555 	/* Callout rescheduled as a different kind of timer. */
1556 	if (callout_pending(&ssk->keep2msl))
1557 		goto out;
1558         callout_deactivate(&ssk->keep2msl);
1559 	if (ssk->flags & SDP_DROPPED ||
1560 	    (ssk->socket->so_options & SO_KEEPALIVE) == 0)
1561 		goto out;
1562 	sdp_post_keepalive(ssk);
1563 	callout_reset(&ssk->keep2msl, SDP_KEEPALIVE_TIME,
1564 	    sdp_keepalive_timeout, ssk);
1565 out:
1566 	SDP_WUNLOCK(ssk);
1567 }
1568 
1569 
1570 void
1571 sdp_start_keepalive_timer(struct socket *so)
1572 {
1573 	struct sdp_sock *ssk;
1574 
1575 	ssk = sdp_sk(so);
1576 	if (!callout_pending(&ssk->keep2msl))
1577                 callout_reset(&ssk->keep2msl, SDP_KEEPALIVE_TIME,
1578                     sdp_keepalive_timeout, ssk);
1579 }
1580 
1581 static void
1582 sdp_stop_keepalive_timer(struct socket *so)
1583 {
1584 	struct sdp_sock *ssk;
1585 
1586 	ssk = sdp_sk(so);
1587 	callout_stop(&ssk->keep2msl);
1588 }
1589 
1590 /*
1591  * sdp_ctloutput() must drop the inpcb lock before performing copyin on
1592  * socket option arguments.  When it re-acquires the lock after the copy, it
1593  * has to revalidate that the connection is still valid for the socket
1594  * option.
1595  */
1596 #define SDP_WLOCK_RECHECK(inp) do {					\
1597 	SDP_WLOCK(ssk);							\
1598 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {		\
1599 		SDP_WUNLOCK(ssk);					\
1600 		return (ECONNRESET);					\
1601 	}								\
1602 } while(0)
1603 
1604 static int
1605 sdp_ctloutput(struct socket *so, struct sockopt *sopt)
1606 {
1607 	int	error, opt, optval;
1608 	struct sdp_sock *ssk;
1609 
1610 	error = 0;
1611 	ssk = sdp_sk(so);
1612 	if (sopt->sopt_level == SOL_SOCKET && sopt->sopt_name == SO_KEEPALIVE) {
1613 		SDP_WLOCK(ssk);
1614 		if (so->so_options & SO_KEEPALIVE)
1615 			sdp_start_keepalive_timer(so);
1616 		else
1617 			sdp_stop_keepalive_timer(so);
1618 		SDP_WUNLOCK(ssk);
1619 	}
1620 	if (sopt->sopt_level != IPPROTO_TCP)
1621 		return (error);
1622 
1623 	SDP_WLOCK(ssk);
1624 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
1625 		SDP_WUNLOCK(ssk);
1626 		return (ECONNRESET);
1627 	}
1628 
1629 	switch (sopt->sopt_dir) {
1630 	case SOPT_SET:
1631 		switch (sopt->sopt_name) {
1632 		case TCP_NODELAY:
1633 			SDP_WUNLOCK(ssk);
1634 			error = sooptcopyin(sopt, &optval, sizeof optval,
1635 			    sizeof optval);
1636 			if (error)
1637 				return (error);
1638 
1639 			SDP_WLOCK_RECHECK(ssk);
1640 			opt = SDP_NODELAY;
1641 			if (optval)
1642 				ssk->flags |= opt;
1643 			else
1644 				ssk->flags &= ~opt;
1645 			sdp_do_posts(ssk);
1646 			SDP_WUNLOCK(ssk);
1647 			break;
1648 
1649 		default:
1650 			SDP_WUNLOCK(ssk);
1651 			error = ENOPROTOOPT;
1652 			break;
1653 		}
1654 		break;
1655 
1656 	case SOPT_GET:
1657 		switch (sopt->sopt_name) {
1658 		case TCP_NODELAY:
1659 			optval = ssk->flags & SDP_NODELAY;
1660 			SDP_WUNLOCK(ssk);
1661 			error = sooptcopyout(sopt, &optval, sizeof optval);
1662 			break;
1663 		default:
1664 			SDP_WUNLOCK(ssk);
1665 			error = ENOPROTOOPT;
1666 			break;
1667 		}
1668 		break;
1669 	}
1670 	return (error);
1671 }
1672 #undef SDP_WLOCK_RECHECK
1673 
1674 int sdp_mod_count = 0;
1675 int sdp_mod_usec = 0;
1676 
1677 void
1678 sdp_set_default_moderation(struct sdp_sock *ssk)
1679 {
1680 	if (sdp_mod_count <= 0 || sdp_mod_usec <= 0)
1681 		return;
1682 	ib_modify_cq(ssk->rx_ring.cq, sdp_mod_count, sdp_mod_usec);
1683 }
1684 
1685 static void
1686 sdp_dev_add(struct ib_device *device)
1687 {
1688 	struct ib_fmr_pool_param param;
1689 	struct sdp_device *sdp_dev;
1690 
1691 	sdp_dev = malloc(sizeof(*sdp_dev), M_SDP, M_WAITOK | M_ZERO);
1692 	sdp_dev->pd = ib_alloc_pd(device, 0);
1693 	if (IS_ERR(sdp_dev->pd))
1694 		goto out_pd;
1695 	memset(&param, 0, sizeof param);
1696 	param.max_pages_per_fmr = SDP_FMR_SIZE;
1697 	param.page_shift = PAGE_SHIFT;
1698 	param.access = (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ);
1699 	param.pool_size = SDP_FMR_POOL_SIZE;
1700 	param.dirty_watermark = SDP_FMR_DIRTY_SIZE;
1701 	param.cache = 1;
1702 	sdp_dev->fmr_pool = ib_create_fmr_pool(sdp_dev->pd, &param);
1703 	if (IS_ERR(sdp_dev->fmr_pool))
1704 		goto out_fmr;
1705 	ib_set_client_data(device, &sdp_client, sdp_dev);
1706 	return;
1707 
1708 out_fmr:
1709 	ib_dealloc_pd(sdp_dev->pd);
1710 out_pd:
1711 	free(sdp_dev, M_SDP);
1712 }
1713 
1714 static void
1715 sdp_dev_rem(struct ib_device *device, void *client_data)
1716 {
1717 	struct sdp_device *sdp_dev;
1718 	struct sdp_sock *ssk;
1719 
1720 	SDP_LIST_WLOCK();
1721 	LIST_FOREACH(ssk, &sdp_list, list) {
1722 		if (ssk->ib_device != device)
1723 			continue;
1724 		SDP_WLOCK(ssk);
1725 		if ((ssk->flags & SDP_DESTROY) == 0)
1726 			ssk = sdp_notify(ssk, ECONNRESET);
1727 		if (ssk)
1728 			SDP_WUNLOCK(ssk);
1729 	}
1730 	SDP_LIST_WUNLOCK();
1731 	/*
1732 	 * XXX Do I need to wait between these two?
1733 	 */
1734 	sdp_dev = ib_get_client_data(device, &sdp_client);
1735 	if (!sdp_dev)
1736 		return;
1737 	ib_flush_fmr_pool(sdp_dev->fmr_pool);
1738 	ib_destroy_fmr_pool(sdp_dev->fmr_pool);
1739 	ib_dealloc_pd(sdp_dev->pd);
1740 	free(sdp_dev, M_SDP);
1741 }
1742 
1743 struct ib_client sdp_client =
1744     { .name = "sdp", .add = sdp_dev_add, .remove = sdp_dev_rem };
1745 
1746 
1747 static int
1748 sdp_pcblist(SYSCTL_HANDLER_ARGS)
1749 {
1750 	int error, n, i;
1751 	struct sdp_sock *ssk;
1752 	struct xinpgen xig;
1753 
1754 	/*
1755 	 * The process of preparing the TCB list is too time-consuming and
1756 	 * resource-intensive to repeat twice on every request.
1757 	 */
1758 	if (req->oldptr == NULL) {
1759 		n = sdp_count;
1760 		n += imax(n / 8, 10);
1761 		req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xtcpcb);
1762 		return (0);
1763 	}
1764 
1765 	if (req->newptr != NULL)
1766 		return (EPERM);
1767 
1768 	/*
1769 	 * OK, now we're committed to doing something.
1770 	 */
1771 	SDP_LIST_RLOCK();
1772 	n = sdp_count;
1773 	SDP_LIST_RUNLOCK();
1774 
1775 	error = sysctl_wire_old_buffer(req, 2 * (sizeof xig)
1776 		+ n * sizeof(struct xtcpcb));
1777 	if (error != 0)
1778 		return (error);
1779 
1780 	bzero(&xig, sizeof(xig));
1781 	xig.xig_len = sizeof xig;
1782 	xig.xig_count = n;
1783 	xig.xig_gen = 0;
1784 	xig.xig_sogen = so_gencnt;
1785 	error = SYSCTL_OUT(req, &xig, sizeof xig);
1786 	if (error)
1787 		return (error);
1788 
1789 	SDP_LIST_RLOCK();
1790 	for (ssk = LIST_FIRST(&sdp_list), i = 0;
1791 	    ssk != NULL && i < n; ssk = LIST_NEXT(ssk, list)) {
1792 		struct xtcpcb xt;
1793 
1794 		SDP_RLOCK(ssk);
1795 		if (ssk->flags & SDP_TIMEWAIT) {
1796 			if (ssk->cred != NULL)
1797 				error = cr_cansee(req->td->td_ucred,
1798 				    ssk->cred);
1799 			else
1800 				error = EINVAL;	/* Skip this inp. */
1801 		} else if (ssk->socket)
1802 			error = cr_canseesocket(req->td->td_ucred,
1803 			    ssk->socket);
1804 		else
1805 			error = EINVAL;
1806 		if (error) {
1807 			error = 0;
1808 			goto next;
1809 		}
1810 
1811 		bzero(&xt, sizeof(xt));
1812 		xt.xt_len = sizeof xt;
1813 		xt.xt_inp.inp_gencnt = 0;
1814 		xt.xt_inp.inp_vflag = INP_IPV4;
1815 		memcpy(&xt.xt_inp.inp_laddr, &ssk->laddr, sizeof(ssk->laddr));
1816 		xt.xt_inp.inp_lport = ssk->lport;
1817 		memcpy(&xt.xt_inp.inp_faddr, &ssk->faddr, sizeof(ssk->faddr));
1818 		xt.xt_inp.inp_fport = ssk->fport;
1819 		xt.t_state = ssk->state;
1820 		if (ssk->socket != NULL)
1821 			sotoxsocket(ssk->socket, &xt.xt_inp.xi_socket);
1822 		xt.xt_inp.xi_socket.xso_protocol = IPPROTO_TCP;
1823 		SDP_RUNLOCK(ssk);
1824 		error = SYSCTL_OUT(req, &xt, sizeof xt);
1825 		if (error)
1826 			break;
1827 		i++;
1828 		continue;
1829 next:
1830 		SDP_RUNLOCK(ssk);
1831 	}
1832 	if (!error) {
1833 		/*
1834 		 * Give the user an updated idea of our state.
1835 		 * If the generation differs from what we told
1836 		 * her before, she knows that something happened
1837 		 * while we were processing this request, and it
1838 		 * might be necessary to retry.
1839 		 */
1840 		xig.xig_gen = 0;
1841 		xig.xig_sogen = so_gencnt;
1842 		xig.xig_count = sdp_count;
1843 		error = SYSCTL_OUT(req, &xig, sizeof xig);
1844 	}
1845 	SDP_LIST_RUNLOCK();
1846 	return (error);
1847 }
1848 
1849 SYSCTL_NODE(_net_inet, -1, sdp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
1850     "SDP");
1851 
1852 SYSCTL_PROC(_net_inet_sdp, TCPCTL_PCBLIST, pcblist,
1853     CTLFLAG_RD | CTLTYPE_STRUCT | CTLFLAG_MPSAFE,
1854     0, 0, sdp_pcblist, "S,xtcpcb",
1855     "List of active SDP connections");
1856 
1857 static void
1858 sdp_zone_change(void *tag)
1859 {
1860 
1861 	uma_zone_set_max(sdp_zone, maxsockets);
1862 }
1863 
1864 static void
1865 sdp_init(void *arg __unused)
1866 {
1867 
1868 	LIST_INIT(&sdp_list);
1869 	sdp_zone = uma_zcreate("sdp_sock", sizeof(struct sdp_sock),
1870 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1871 	uma_zone_set_max(sdp_zone, maxsockets);
1872 	EVENTHANDLER_REGISTER(maxsockets_change, sdp_zone_change, NULL,
1873 		EVENTHANDLER_PRI_ANY);
1874 	rx_comp_wq = create_singlethread_workqueue("rx_comp_wq");
1875 	ib_register_client(&sdp_client);
1876 }
1877 SYSINIT(sdp_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_SECOND, sdp_init, NULL);
1878 
1879 #define	SDP_PROTOSW							\
1880 	.pr_type =		SOCK_STREAM,				\
1881 	.pr_flags =		PR_CONNREQUIRED|PR_IMPLOPCL|PR_WANTRCVD,\
1882 	.pr_ctloutput =		sdp_ctloutput,				\
1883 	.pr_abort =		sdp_abort,				\
1884 	.pr_accept =		sdp_accept,				\
1885 	.pr_attach =		sdp_attach,				\
1886 	.pr_bind =		sdp_bind,				\
1887 	.pr_connect =		sdp_connect,				\
1888 	.pr_detach =		sdp_detach,				\
1889 	.pr_disconnect =	sdp_disconnect,				\
1890 	.pr_listen =		sdp_listen,				\
1891 	.pr_peeraddr =		sdp_getpeeraddr,			\
1892 	.pr_rcvoob =		sdp_rcvoob,				\
1893 	.pr_send =		sdp_send,				\
1894 	.pr_sosend =		sdp_sosend,				\
1895 	.pr_soreceive =		sdp_sorecv,				\
1896 	.pr_shutdown =		sdp_shutdown,				\
1897 	.pr_sockaddr =		sdp_getsockaddr,			\
1898 	.pr_close =		sdp_close
1899 
1900 
1901 static struct protosw sdp_ip_protosw = {
1902 	.pr_protocol =		IPPROTO_IP,
1903 	SDP_PROTOSW
1904 };
1905 static struct protosw sdp_tcp_protosw = {
1906 	.pr_protocol =		IPPROTO_TCP,
1907 	SDP_PROTOSW
1908 };
1909 
1910 static struct domain sdpdomain = {
1911 	.dom_family =		AF_INET_SDP,
1912 	.dom_name =		"SDP",
1913 	.dom_nprotosw =		2,
1914 	.dom_protosw = {
1915 		&sdp_ip_protosw,
1916 		&sdp_tcp_protosw,
1917 	},
1918 };
1919 
1920 DOMAIN_SET(sdp);
1921 
1922 int sdp_debug_level = 1;
1923 int sdp_data_debug_level = 0;
1924