xref: /freebsd/sys/ofed/drivers/infiniband/ulp/sdp/sdp_main.c (revision 069ac18495ad8fde2748bc94b0f80a50250bb01d)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
5  *      The Regents of the University of California.  All rights reserved.
6  * Copyright (c) 2004 The FreeBSD Foundation.  All rights reserved.
7  * Copyright (c) 2004-2008 Robert N. M. Watson.  All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  * Excerpts taken from tcp_subr.c, tcp_usrreq.c, uipc_socket.c
34  */
35 
36 /*
37  *
38  * Copyright (c) 2010 Isilon Systems, Inc.
39  * Copyright (c) 2010 iX Systems, Inc.
40  * Copyright (c) 2010 Panasas, Inc.
41  * All rights reserved.
42  *
43  * Redistribution and use in source and binary forms, with or without
44  * modification, are permitted provided that the following conditions
45  * are met:
46  * 1. Redistributions of source code must retain the above copyright
47  *    notice unmodified, this list of conditions, and the following
48  *    disclaimer.
49  * 2. Redistributions in binary form must reproduce the above copyright
50  *    notice, this list of conditions and the following disclaimer in the
51  *    documentation and/or other materials provided with the distribution.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
54  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
55  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
56  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
57  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
58  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
59  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
60  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
61  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
62  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
63  *
64  */
65 
66 #include <sys/param.h>
67 #include <sys/eventhandler.h>
68 #include <sys/kernel.h>
69 #include <sys/malloc.h>
70 
71 #include "sdp.h"
72 
73 #include <net/if.h>
74 #include <net/route.h>
75 #include <net/vnet.h>
76 #include <sys/sysctl.h>
77 
78 uma_zone_t	sdp_zone;
79 struct rwlock	sdp_lock;
80 LIST_HEAD(, sdp_sock) sdp_list;
81 
82 struct workqueue_struct *rx_comp_wq;
83 
84 RW_SYSINIT(sdplockinit, &sdp_lock, "SDP lock");
85 #define	SDP_LIST_WLOCK()	rw_wlock(&sdp_lock)
86 #define	SDP_LIST_RLOCK()	rw_rlock(&sdp_lock)
87 #define	SDP_LIST_WUNLOCK()	rw_wunlock(&sdp_lock)
88 #define	SDP_LIST_RUNLOCK()	rw_runlock(&sdp_lock)
89 #define	SDP_LIST_WLOCK_ASSERT()	rw_assert(&sdp_lock, RW_WLOCKED)
90 #define	SDP_LIST_RLOCK_ASSERT()	rw_assert(&sdp_lock, RW_RLOCKED)
91 #define	SDP_LIST_LOCK_ASSERT()	rw_assert(&sdp_lock, RW_LOCKED)
92 
93 MALLOC_DEFINE(M_SDP, "sdp", "Sockets Direct Protocol");
94 
95 static void sdp_stop_keepalive_timer(struct socket *so);
96 
97 /*
98  * SDP protocol interface to socket abstraction.
99  */
100 /*
101  * sdp_sendspace and sdp_recvspace are the default send and receive window
102  * sizes, respectively.
103  */
104 u_long	sdp_sendspace = 1024*32;
105 u_long	sdp_recvspace = 1024*64;
106 
107 static int sdp_count;
108 
109 /*
110  * Disable async. CMA events for sockets which are being torn down.
111  */
112 static void
113 sdp_destroy_cma(struct sdp_sock *ssk)
114 {
115 
116 	if (ssk->id == NULL)
117 		return;
118 	rdma_destroy_id(ssk->id);
119 	ssk->id = NULL;
120 }
121 
122 static int
123 sdp_pcbbind(struct sdp_sock *ssk, struct sockaddr *nam, struct ucred *cred)
124 {
125 	struct sockaddr_in *sin;
126 	struct sockaddr_in null;
127 	int error;
128 
129 	SDP_WLOCK_ASSERT(ssk);
130 
131 	if (ssk->lport != 0 || ssk->laddr != INADDR_ANY)
132 		return (EINVAL);
133 	/* rdma_bind_addr handles bind races.  */
134 	SDP_WUNLOCK(ssk);
135 	if (ssk->id == NULL)
136 		ssk->id = rdma_create_id(&init_net, sdp_cma_handler, ssk, RDMA_PS_SDP, IB_QPT_RC);
137 	if (ssk->id == NULL) {
138 		SDP_WLOCK(ssk);
139 		return (ENOMEM);
140 	}
141 	if (nam == NULL) {
142 		null.sin_family = AF_INET;
143 		null.sin_len = sizeof(null);
144 		null.sin_addr.s_addr = INADDR_ANY;
145 		null.sin_port = 0;
146 		bzero(&null.sin_zero, sizeof(null.sin_zero));
147 		nam = (struct sockaddr *)&null;
148 	}
149 	error = -rdma_bind_addr(ssk->id, nam);
150 	SDP_WLOCK(ssk);
151 	if (error == 0) {
152 		sin = (struct sockaddr_in *)&ssk->id->route.addr.src_addr;
153 		ssk->laddr = sin->sin_addr.s_addr;
154 		ssk->lport = sin->sin_port;
155 	} else
156 		sdp_destroy_cma(ssk);
157 	return (error);
158 }
159 
160 static void
161 sdp_pcbfree(struct sdp_sock *ssk)
162 {
163 
164 	KASSERT(ssk->socket == NULL, ("ssk %p socket still attached", ssk));
165 	KASSERT((ssk->flags & SDP_DESTROY) == 0,
166 	    ("ssk %p already destroyed", ssk));
167 
168 	sdp_dbg(ssk->socket, "Freeing pcb");
169 	SDP_WLOCK_ASSERT(ssk);
170 	ssk->flags |= SDP_DESTROY;
171 	SDP_WUNLOCK(ssk);
172 	SDP_LIST_WLOCK();
173 	sdp_count--;
174 	LIST_REMOVE(ssk, list);
175 	SDP_LIST_WUNLOCK();
176 	crfree(ssk->cred);
177 	ssk->qp_active = 0;
178 	if (ssk->qp) {
179 		ib_destroy_qp(ssk->qp);
180 		ssk->qp = NULL;
181 	}
182 	sdp_tx_ring_destroy(ssk);
183 	sdp_rx_ring_destroy(ssk);
184 	sdp_destroy_cma(ssk);
185 	rw_destroy(&ssk->rx_ring.destroyed_lock);
186 	rw_destroy(&ssk->lock);
187 	uma_zfree(sdp_zone, ssk);
188 }
189 
190 static int
191 sdp_getsockaddr(struct socket *so, struct sockaddr *sa)
192 {
193 	struct sdp_sock *ssk = sdp_sk(so);
194 
195 	SDP_RLOCK(ssk);
196 	*(struct sockaddr_in *)sa = (struct sockaddr_in ){
197 		.sin_family = AF_INET,
198 		.sin_len = sizeof(struct sockaddr_in),
199 		.sin_addr.s_addr = ssk->laddr,
200 		.sin_port = ssk->lport,
201 	};
202 	SDP_RUNLOCK(ssk);
203 
204 	return (0);
205 }
206 
207 static int
208 sdp_getpeeraddr(struct socket *so, struct sockaddr *sa)
209 {
210 	struct sdp_sock *ssk = sdp_sk(so);
211 
212 	SDP_RLOCK(ssk);
213 	*(struct sockaddr_in *)sa = (struct sockaddr_in ){
214 		.sin_family = AF_INET,
215 		.sin_len = sizeof(struct sockaddr_in),
216 		.sin_addr.s_addr = ssk->faddr,
217 		.sin_port = ssk->fport,
218 	};
219 	SDP_RUNLOCK(ssk);
220 
221 	return (0);
222 }
223 
224 #if 0
225 static void
226 sdp_apply_all(void (*func)(struct sdp_sock *, void *), void *arg)
227 {
228 	struct sdp_sock *ssk;
229 
230 	SDP_LIST_RLOCK();
231 	LIST_FOREACH(ssk, &sdp_list, list) {
232 		SDP_WLOCK(ssk);
233 		func(ssk, arg);
234 		SDP_WUNLOCK(ssk);
235 	}
236 	SDP_LIST_RUNLOCK();
237 }
238 #endif
239 
240 static void
241 sdp_output_reset(struct sdp_sock *ssk)
242 {
243 	struct rdma_cm_id *id;
244 
245 	SDP_WLOCK_ASSERT(ssk);
246 	if (ssk->id) {
247 		id = ssk->id;
248 		ssk->qp_active = 0;
249 		SDP_WUNLOCK(ssk);
250 		rdma_disconnect(id);
251 		SDP_WLOCK(ssk);
252 	}
253 	ssk->state = TCPS_CLOSED;
254 }
255 
256 /*
257  * Attempt to close a SDP socket, marking it as dropped, and freeing
258  * the socket if we hold the only reference.
259  */
260 static struct sdp_sock *
261 sdp_closed(struct sdp_sock *ssk)
262 {
263 	struct socket *so;
264 
265 	SDP_WLOCK_ASSERT(ssk);
266 
267 	ssk->flags |= SDP_DROPPED;
268 	so = ssk->socket;
269 	soisdisconnected(so);
270 	if (ssk->flags & SDP_SOCKREF) {
271 		ssk->flags &= ~SDP_SOCKREF;
272 		SDP_WUNLOCK(ssk);
273 		sorele(so);
274 		return (NULL);
275 	}
276 	return (ssk);
277 }
278 
279 /*
280  * Perform timer based shutdowns which can not operate in
281  * callout context.
282  */
283 static void
284 sdp_shutdown_task(void *data, int pending)
285 {
286 	struct sdp_sock *ssk;
287 
288 	ssk = data;
289 	SDP_WLOCK(ssk);
290 	/*
291 	 * I don't think this can race with another call to pcbfree()
292 	 * because SDP_TIMEWAIT protects it.  SDP_DESTROY may be redundant.
293 	 */
294 	if (ssk->flags & SDP_DESTROY)
295 		panic("sdp_shutdown_task: Racing with pcbfree for ssk %p",
296 		    ssk);
297 	if (ssk->flags & SDP_DISCON)
298 		sdp_output_reset(ssk);
299 	/* We have to clear this so sdp_detach() will call pcbfree(). */
300 	ssk->flags &= ~(SDP_TIMEWAIT | SDP_DREQWAIT);
301 	if ((ssk->flags & SDP_DROPPED) == 0 &&
302 	    sdp_closed(ssk) == NULL)
303 		return;
304 	if (ssk->socket == NULL) {
305 		sdp_pcbfree(ssk);
306 		return;
307 	}
308 	SDP_WUNLOCK(ssk);
309 }
310 
311 /*
312  * 2msl has expired, schedule the shutdown task.
313  */
314 static void
315 sdp_2msl_timeout(void *data)
316 {
317 	struct sdp_sock *ssk;
318 
319 	ssk = data;
320 	/* Callout canceled. */
321         if (!callout_active(&ssk->keep2msl))
322 		goto out;
323         callout_deactivate(&ssk->keep2msl);
324 	/* Should be impossible, defensive programming. */
325 	if ((ssk->flags & SDP_TIMEWAIT) == 0)
326 		goto out;
327 	taskqueue_enqueue(taskqueue_thread, &ssk->shutdown_task);
328 out:
329 	SDP_WUNLOCK(ssk);
330 	return;
331 }
332 
333 /*
334  * Schedule the 2msl wait timer.
335  */
336 static void
337 sdp_2msl_wait(struct sdp_sock *ssk)
338 {
339 
340 	SDP_WLOCK_ASSERT(ssk);
341 	ssk->flags |= SDP_TIMEWAIT;
342 	ssk->state = TCPS_TIME_WAIT;
343 	soisdisconnected(ssk->socket);
344 	callout_reset(&ssk->keep2msl, TCPTV_MSL, sdp_2msl_timeout, ssk);
345 }
346 
347 /*
348  * Timed out waiting for the final fin/ack from rdma_disconnect().
349  */
350 static void
351 sdp_dreq_timeout(void *data)
352 {
353 	struct sdp_sock *ssk;
354 
355 	ssk = data;
356 	/* Callout canceled. */
357         if (!callout_active(&ssk->keep2msl))
358 		goto out;
359 	/* Callout rescheduled, probably as a different timer. */
360 	if (callout_pending(&ssk->keep2msl))
361 		goto out;
362         callout_deactivate(&ssk->keep2msl);
363 	if (ssk->state != TCPS_FIN_WAIT_1 && ssk->state != TCPS_LAST_ACK)
364 		goto out;
365 	if ((ssk->flags & SDP_DREQWAIT) == 0)
366 		goto out;
367 	ssk->flags &= ~SDP_DREQWAIT;
368 	ssk->flags |= SDP_DISCON;
369 	sdp_2msl_wait(ssk);
370 	ssk->qp_active = 0;
371 out:
372 	SDP_WUNLOCK(ssk);
373 }
374 
375 /*
376  * Received the final fin/ack.  Cancel the 2msl.
377  */
378 void
379 sdp_cancel_dreq_wait_timeout(struct sdp_sock *ssk)
380 {
381 	sdp_dbg(ssk->socket, "cancelling dreq wait timeout\n");
382 	ssk->flags &= ~SDP_DREQWAIT;
383 	sdp_2msl_wait(ssk);
384 }
385 
386 static int
387 sdp_init_sock(struct socket *sk)
388 {
389 	struct sdp_sock *ssk = sdp_sk(sk);
390 
391 	sdp_dbg(sk, "%s\n", __func__);
392 
393 	callout_init_rw(&ssk->keep2msl, &ssk->lock, CALLOUT_RETURNUNLOCKED);
394 	TASK_INIT(&ssk->shutdown_task, 0, sdp_shutdown_task, ssk);
395 #ifdef SDP_ZCOPY
396 	INIT_DELAYED_WORK(&ssk->srcavail_cancel_work, srcavail_cancel_timeout);
397 	ssk->zcopy_thresh = -1; /* use global sdp_zcopy_thresh */
398 	ssk->tx_ring.rdma_inflight = NULL;
399 #endif
400 	atomic_set(&ssk->mseq_ack, 0);
401 	sdp_rx_ring_init(ssk);
402 	ssk->tx_ring.buffer = NULL;
403 
404 	return 0;
405 }
406 
407 /*
408  * Allocate an sdp_sock for the socket and reserve socket buffer space.
409  */
410 static int
411 sdp_attach(struct socket *so, int proto, struct thread *td)
412 {
413 	struct sdp_sock *ssk;
414 	int error;
415 
416 	ssk = sdp_sk(so);
417 	KASSERT(ssk == NULL, ("sdp_attach: ssk already set on so %p", so));
418 	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
419 		error = soreserve(so, sdp_sendspace, sdp_recvspace);
420 		if (error)
421 			return (error);
422 	}
423 	so->so_rcv.sb_flags |= SB_AUTOSIZE;
424 	so->so_snd.sb_flags |= SB_AUTOSIZE;
425 	ssk = uma_zalloc(sdp_zone, M_NOWAIT | M_ZERO);
426 	if (ssk == NULL)
427 		return (ENOBUFS);
428 	rw_init(&ssk->lock, "sdpsock");
429 	ssk->socket = so;
430 	ssk->cred = crhold(so->so_cred);
431 	so->so_pcb = (caddr_t)ssk;
432 	sdp_init_sock(so);
433 	ssk->flags = 0;
434 	ssk->qp_active = 0;
435 	ssk->state = TCPS_CLOSED;
436 	mbufq_init(&ssk->rxctlq, INT_MAX);
437 	SDP_LIST_WLOCK();
438 	LIST_INSERT_HEAD(&sdp_list, ssk, list);
439 	sdp_count++;
440 	SDP_LIST_WUNLOCK();
441 
442 	return (0);
443 }
444 
445 /*
446  * Detach SDP from the socket, potentially leaving it around for the
447  * timewait to expire.
448  */
449 static void
450 sdp_detach(struct socket *so)
451 {
452 	struct sdp_sock *ssk;
453 
454 	ssk = sdp_sk(so);
455 	SDP_WLOCK(ssk);
456 	KASSERT(ssk->socket != NULL, ("sdp_detach: socket is NULL"));
457 	ssk->socket->so_pcb = NULL;
458 	ssk->socket = NULL;
459 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DREQWAIT))
460 		SDP_WUNLOCK(ssk);
461 	else if (ssk->flags & SDP_DROPPED || ssk->state < TCPS_SYN_SENT)
462 		sdp_pcbfree(ssk);
463 	else
464 		panic("sdp_detach: Unexpected state, ssk %p.\n", ssk);
465 }
466 
467 /*
468  * Allocate a local address for the socket.
469  */
470 static int
471 sdp_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
472 {
473 	int error = 0;
474 	struct sdp_sock *ssk;
475 	struct sockaddr_in *sin;
476 
477 	sin = (struct sockaddr_in *)nam;
478 	if (sin->sin_family != AF_INET)
479 		return (EAFNOSUPPORT);
480 	if (nam->sa_len != sizeof(*sin))
481 		return (EINVAL);
482 	if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
483 		return (EAFNOSUPPORT);
484 
485 	ssk = sdp_sk(so);
486 	SDP_WLOCK(ssk);
487 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
488 		error = EINVAL;
489 		goto out;
490 	}
491 	error = sdp_pcbbind(ssk, nam, td->td_ucred);
492 out:
493 	SDP_WUNLOCK(ssk);
494 
495 	return (error);
496 }
497 
498 /*
499  * Prepare to accept connections.
500  */
501 static int
502 sdp_listen(struct socket *so, int backlog, struct thread *td)
503 {
504 	int error = 0;
505 	struct sdp_sock *ssk;
506 
507 	ssk = sdp_sk(so);
508 	SDP_WLOCK(ssk);
509 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
510 		error = EINVAL;
511 		goto out;
512 	}
513 	if (error == 0 && ssk->lport == 0)
514 		error = sdp_pcbbind(ssk, (struct sockaddr *)0, td->td_ucred);
515 	SOCK_LOCK(so);
516 	if (error == 0)
517 		error = solisten_proto_check(so);
518 	if (error == 0) {
519 		solisten_proto(so, backlog);
520 		ssk->state = TCPS_LISTEN;
521 	}
522 	SOCK_UNLOCK(so);
523 
524 out:
525 	SDP_WUNLOCK(ssk);
526 	if (error == 0)
527 		error = -rdma_listen(ssk->id, backlog);
528 	return (error);
529 }
530 
531 /*
532  * Initiate a SDP connection to nam.
533  */
534 static int
535 sdp_start_connect(struct sdp_sock *ssk, struct sockaddr *nam, struct thread *td)
536 {
537 	struct sockaddr_in src;
538 	struct socket *so;
539 	int error;
540 
541 	so = ssk->socket;
542 
543 	SDP_WLOCK_ASSERT(ssk);
544 	if (ssk->lport == 0) {
545 		error = sdp_pcbbind(ssk, (struct sockaddr *)0, td->td_ucred);
546 		if (error)
547 			return error;
548 	}
549 	src.sin_family = AF_INET;
550 	src.sin_len = sizeof(src);
551 	bzero(&src.sin_zero, sizeof(src.sin_zero));
552 	src.sin_port = ssk->lport;
553 	src.sin_addr.s_addr = ssk->laddr;
554 	soisconnecting(so);
555 	SDP_WUNLOCK(ssk);
556 	error = -rdma_resolve_addr(ssk->id, (struct sockaddr *)&src, nam,
557 	    SDP_RESOLVE_TIMEOUT);
558 	SDP_WLOCK(ssk);
559 	if (error == 0)
560 		ssk->state = TCPS_SYN_SENT;
561 
562 	return 0;
563 }
564 
565 /*
566  * Initiate SDP connection.
567  */
568 static int
569 sdp_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
570 {
571 	int error = 0;
572 	struct sdp_sock *ssk;
573 	struct sockaddr_in *sin;
574 
575 	sin = (struct sockaddr_in *)nam;
576 	if (nam->sa_len != sizeof(*sin))
577 		return (EINVAL);
578 	if (sin->sin_family != AF_INET)
579 		return (EAFNOSUPPORT);
580 	if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
581 		return (EAFNOSUPPORT);
582 	if ((error = prison_remote_ip4(td->td_ucred, &sin->sin_addr)) != 0)
583 		return (error);
584 	ssk = sdp_sk(so);
585 	SDP_WLOCK(ssk);
586 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED))
587 		error = EINVAL;
588 	else
589 		error = sdp_start_connect(ssk, nam, td);
590 	SDP_WUNLOCK(ssk);
591 	return (error);
592 }
593 
594 /*
595  * Drop a SDP socket, reporting
596  * the specified error.  If connection is synchronized,
597  * then send a RST to peer.
598  */
599 static struct sdp_sock *
600 sdp_drop(struct sdp_sock *ssk, int errno)
601 {
602 	struct socket *so;
603 
604 	SDP_WLOCK_ASSERT(ssk);
605 	so = ssk->socket;
606 	if (TCPS_HAVERCVDSYN(ssk->state))
607 		sdp_output_reset(ssk);
608 	if (errno == ETIMEDOUT && ssk->softerror)
609 		errno = ssk->softerror;
610 	so->so_error = errno;
611 	return (sdp_closed(ssk));
612 }
613 
614 /*
615  * User issued close, and wish to trail through shutdown states:
616  * if never received SYN, just forget it.  If got a SYN from peer,
617  * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
618  * If already got a FIN from peer, then almost done; go to LAST_ACK
619  * state.  In all other cases, have already sent FIN to peer (e.g.
620  * after PRU_SHUTDOWN), and just have to play tedious game waiting
621  * for peer to send FIN or not respond to keep-alives, etc.
622  * We can let the user exit from the close as soon as the FIN is acked.
623  */
624 static void
625 sdp_usrclosed(struct sdp_sock *ssk)
626 {
627 
628 	SDP_WLOCK_ASSERT(ssk);
629 
630 	switch (ssk->state) {
631 	case TCPS_LISTEN:
632 		ssk->state = TCPS_CLOSED;
633 		SDP_WUNLOCK(ssk);
634 		sdp_destroy_cma(ssk);
635 		SDP_WLOCK(ssk);
636 		/* FALLTHROUGH */
637 	case TCPS_CLOSED:
638 		ssk = sdp_closed(ssk);
639 		/*
640 		 * sdp_closed() should never return NULL here as the socket is
641 		 * still open.
642 		 */
643 		KASSERT(ssk != NULL,
644 		    ("sdp_usrclosed: sdp_closed() returned NULL"));
645 		break;
646 
647 	case TCPS_SYN_SENT:
648 		/* FALLTHROUGH */
649 	case TCPS_SYN_RECEIVED:
650 		ssk->flags |= SDP_NEEDFIN;
651 		break;
652 
653 	case TCPS_ESTABLISHED:
654 		ssk->flags |= SDP_NEEDFIN;
655 		ssk->state = TCPS_FIN_WAIT_1;
656 		break;
657 
658 	case TCPS_CLOSE_WAIT:
659 		ssk->state = TCPS_LAST_ACK;
660 		break;
661 	}
662 	if (ssk->state >= TCPS_FIN_WAIT_2) {
663 		/* Prevent the connection hanging in FIN_WAIT_2 forever. */
664 		if (ssk->state == TCPS_FIN_WAIT_2)
665 			sdp_2msl_wait(ssk);
666 		else
667 			soisdisconnected(ssk->socket);
668 	}
669 }
670 
671 static void
672 sdp_output_disconnect(struct sdp_sock *ssk)
673 {
674 
675 	SDP_WLOCK_ASSERT(ssk);
676 	callout_reset(&ssk->keep2msl, SDP_FIN_WAIT_TIMEOUT,
677 	    sdp_dreq_timeout, ssk);
678 	ssk->flags |= SDP_NEEDFIN | SDP_DREQWAIT;
679 	sdp_post_sends(ssk, M_NOWAIT);
680 }
681 
682 /*
683  * Initiate or continue a disconnect.
684  * If embryonic state, just send reset (once).
685  * If in ``let data drain'' option and linger null, just drop.
686  * Otherwise (hard), mark socket disconnecting and drop
687  * current input data; switch states based on user close, and
688  * send segment to peer (with FIN).
689  */
690 static void
691 sdp_start_disconnect(struct sdp_sock *ssk)
692 {
693 	struct socket *so;
694 	int unread;
695 
696 	so = ssk->socket;
697 	SDP_WLOCK_ASSERT(ssk);
698 	sdp_stop_keepalive_timer(so);
699 	/*
700 	 * Neither sdp_closed() nor sdp_drop() should return NULL, as the
701 	 * socket is still open.
702 	 */
703 	if (ssk->state < TCPS_ESTABLISHED) {
704 		ssk = sdp_closed(ssk);
705 		KASSERT(ssk != NULL,
706 		    ("sdp_start_disconnect: sdp_close() returned NULL"));
707 	} else if ((so->so_options & SO_LINGER) && so->so_linger == 0) {
708 		ssk = sdp_drop(ssk, 0);
709 		KASSERT(ssk != NULL,
710 		    ("sdp_start_disconnect: sdp_drop() returned NULL"));
711 	} else {
712 		soisdisconnecting(so);
713 		unread = sbused(&so->so_rcv);
714 		sbflush(&so->so_rcv);
715 		sdp_usrclosed(ssk);
716 		if (!(ssk->flags & SDP_DROPPED)) {
717 			if (unread)
718 				sdp_output_reset(ssk);
719 			else
720 				sdp_output_disconnect(ssk);
721 		}
722 	}
723 }
724 
725 /*
726  * User initiated disconnect.
727  */
728 static int
729 sdp_disconnect(struct socket *so)
730 {
731 	struct sdp_sock *ssk;
732 	int error = 0;
733 
734 	ssk = sdp_sk(so);
735 	SDP_WLOCK(ssk);
736 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
737 		error = ECONNRESET;
738 		goto out;
739 	}
740 	sdp_start_disconnect(ssk);
741 out:
742 	SDP_WUNLOCK(ssk);
743 	return (error);
744 }
745 
746 /*
747  * Accept a connection.  Essentially all the work is done at higher levels;
748  * just return the address of the peer, storing through addr.
749  *
750  *
751  * XXX This is broken XXX
752  *
753  * The rationale for acquiring the sdp lock here is somewhat complicated,
754  * and is described in detail in the commit log entry for r175612.  Acquiring
755  * it delays an accept(2) racing with sonewconn(), which inserts the socket
756  * before the address/port fields are initialized.  A better fix would
757  * prevent the socket from being placed in the listen queue until all fields
758  * are fully initialized.
759  */
760 static int
761 sdp_accept(struct socket *so, struct sockaddr *sa)
762 {
763 	struct sdp_sock *ssk = NULL;
764 	int error;
765 
766 	if (so->so_state & SS_ISDISCONNECTED)
767 		return (ECONNABORTED);
768 
769 	error = 0;
770 	ssk = sdp_sk(so);
771 	SDP_WLOCK(ssk);
772 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED))
773 		error = ECONNABORTED;
774 	else
775 		*(struct sockaddr_in *)sa = (struct sockaddr_in ){
776 			.sin_family = AF_INET,
777 			.sin_len = sizeof(struct sockaddr_in),
778 			.sin_addr.s_addr = ssk->faddr,
779 			.sin_port = ssk->fport,
780 		};
781 	SDP_WUNLOCK(ssk);
782 
783 	return (error);
784 }
785 
786 /*
787  * Mark the connection as being incapable of further output.
788  */
789 static int
790 sdp_shutdown(struct socket *so)
791 {
792 	int error = 0;
793 	struct sdp_sock *ssk;
794 
795 	ssk = sdp_sk(so);
796 	SDP_WLOCK(ssk);
797 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
798 		error = ECONNRESET;
799 		goto out;
800 	}
801 	socantsendmore(so);
802 	sdp_usrclosed(ssk);
803 	if (!(ssk->flags & SDP_DROPPED))
804 		sdp_output_disconnect(ssk);
805 
806 out:
807 	SDP_WUNLOCK(ssk);
808 
809 	return (error);
810 }
811 
812 static void
813 sdp_append(struct sdp_sock *ssk, struct sockbuf *sb, struct mbuf *mb, int cnt)
814 {
815 	struct mbuf *n;
816 	int ncnt;
817 
818 	SOCKBUF_LOCK_ASSERT(sb);
819 	SBLASTRECORDCHK(sb);
820 	KASSERT(mb->m_flags & M_PKTHDR,
821 		("sdp_append: %p Missing packet header.\n", mb));
822 	n = sb->sb_lastrecord;
823 	/*
824 	 * If the queue is empty just set all pointers and proceed.
825 	 */
826 	if (n == NULL) {
827 		sb->sb_lastrecord = sb->sb_mb = sb->sb_sndptr = mb;
828 		for (; mb; mb = mb->m_next) {
829 	                sb->sb_mbtail = mb;
830 			sballoc(sb, mb);
831 		}
832 		return;
833 	}
834 	/*
835 	 * Count the number of mbufs in the current tail.
836 	 */
837 	for (ncnt = 0; n->m_next; n = n->m_next)
838 		ncnt++;
839 	n = sb->sb_lastrecord;
840 	/*
841 	 * If the two chains can fit in a single sdp packet and
842 	 * the last record has not been sent yet (WRITABLE) coalesce
843 	 * them.  The lastrecord remains the same but we must strip the
844 	 * packet header and then let sbcompress do the hard part.
845 	 */
846 	if (M_WRITABLE(n) && ncnt + cnt < SDP_MAX_SEND_SGES &&
847 	    n->m_pkthdr.len + mb->m_pkthdr.len - SDP_HEAD_SIZE <
848 	    ssk->xmit_size_goal) {
849 		m_adj(mb, SDP_HEAD_SIZE);
850 		n->m_pkthdr.len += mb->m_pkthdr.len;
851 		n->m_flags |= mb->m_flags & (M_PUSH | M_URG);
852 		m_demote(mb, 1, 0);
853 		sbcompress(sb, mb, sb->sb_mbtail);
854 		return;
855 	}
856 	/*
857 	 * Not compressible, just append to the end and adjust counters.
858 	 */
859 	sb->sb_lastrecord->m_flags |= M_PUSH;
860 	sb->sb_lastrecord->m_nextpkt = mb;
861 	sb->sb_lastrecord = mb;
862 	if (sb->sb_sndptr == NULL)
863 		sb->sb_sndptr = mb;
864 	for (; mb; mb = mb->m_next) {
865 		sb->sb_mbtail = mb;
866 		sballoc(sb, mb);
867 	}
868 }
869 
870 /*
871  * Do a send by putting data in output queue and updating urgent
872  * marker if URG set.  Possibly send more data.  Unlike the other
873  * pru_*() routines, the mbuf chains are our responsibility.  We
874  * must either enqueue them or free them.  The other pru_* routines
875  * generally are caller-frees.
876  *
877  * This comes from sendfile, normal sends will come from sdp_sosend().
878  */
879 static int
880 sdp_send(struct socket *so, int flags, struct mbuf *m,
881     struct sockaddr *nam, struct mbuf *control, struct thread *td)
882 {
883 	struct sdp_sock *ssk;
884 	struct mbuf *n;
885 	int error;
886 	int cnt;
887 
888 	if (nam != NULL) {
889 		if (nam->sa_family != AF_INET) {
890 			if (control)
891 				m_freem(control);
892 			m_freem(m);
893 			return (EAFNOSUPPORT);
894 		}
895 		if (nam->sa_len != sizeof(struct sockaddr_in)) {
896 			if (control)
897 				m_freem(control);
898 			m_freem(m);
899 			return (EINVAL);
900 		}
901 	}
902 
903 	error = 0;
904 	ssk = sdp_sk(so);
905 	KASSERT(m->m_flags & M_PKTHDR,
906 	    ("sdp_send: %p no packet header", m));
907 	M_PREPEND(m, SDP_HEAD_SIZE, M_WAITOK);
908 	mtod(m, struct sdp_bsdh *)->mid = SDP_MID_DATA;
909 	for (n = m, cnt = 0; n->m_next; n = n->m_next)
910 		cnt++;
911 	if (cnt > SDP_MAX_SEND_SGES) {
912 		n = m_collapse(m, M_WAITOK, SDP_MAX_SEND_SGES);
913 		if (n == NULL) {
914 			m_freem(m);
915 			return (EMSGSIZE);
916 		}
917 		m = n;
918 		for (cnt = 0; n->m_next; n = n->m_next)
919 			cnt++;
920 	}
921 	SDP_WLOCK(ssk);
922 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
923 		if (control)
924 			m_freem(control);
925 		if (m)
926 			m_freem(m);
927 		error = ECONNRESET;
928 		goto out;
929 	}
930 	if (control) {
931 		/* SDP doesn't support control messages. */
932 		if (control->m_len) {
933 			m_freem(control);
934 			if (m)
935 				m_freem(m);
936 			error = EINVAL;
937 			goto out;
938 		}
939 		m_freem(control);	/* empty control, just free it */
940 	}
941 	if (!(flags & PRUS_OOB)) {
942 		SOCKBUF_LOCK(&so->so_snd);
943 		sdp_append(ssk, &so->so_snd, m, cnt);
944 		SOCKBUF_UNLOCK(&so->so_snd);
945 		if (nam && ssk->state < TCPS_SYN_SENT) {
946 			/*
947 			 * Do implied connect if not yet connected.
948 			 */
949 			error = sdp_start_connect(ssk, nam, td);
950 			if (error)
951 				goto out;
952 		}
953 		if (flags & PRUS_EOF) {
954 			/*
955 			 * Close the send side of the connection after
956 			 * the data is sent.
957 			 */
958 			socantsendmore(so);
959 			sdp_usrclosed(ssk);
960 			if (!(ssk->flags & SDP_DROPPED))
961 				sdp_output_disconnect(ssk);
962 		} else if (!(ssk->flags & SDP_DROPPED) &&
963 		    !(flags & PRUS_MORETOCOME))
964 			sdp_post_sends(ssk, M_NOWAIT);
965 		SDP_WUNLOCK(ssk);
966 		return (0);
967 	} else {
968 		SOCKBUF_LOCK(&so->so_snd);
969 		if (sbspace(&so->so_snd) < -512) {
970 			SOCKBUF_UNLOCK(&so->so_snd);
971 			m_freem(m);
972 			error = ENOBUFS;
973 			goto out;
974 		}
975 		/*
976 		 * According to RFC961 (Assigned Protocols),
977 		 * the urgent pointer points to the last octet
978 		 * of urgent data.  We continue, however,
979 		 * to consider it to indicate the first octet
980 		 * of data past the urgent section.
981 		 * Otherwise, snd_up should be one lower.
982 		 */
983 		m->m_flags |= M_URG | M_PUSH;
984 		sdp_append(ssk, &so->so_snd, m, cnt);
985 		SOCKBUF_UNLOCK(&so->so_snd);
986 		if (nam && ssk->state < TCPS_SYN_SENT) {
987 			/*
988 			 * Do implied connect if not yet connected.
989 			 */
990 			error = sdp_start_connect(ssk, nam, td);
991 			if (error)
992 				goto out;
993 		}
994 		sdp_post_sends(ssk, M_NOWAIT);
995 		SDP_WUNLOCK(ssk);
996 		return (0);
997 	}
998 out:
999 	SDP_WUNLOCK(ssk);
1000 	return (error);
1001 }
1002 
1003 /*
1004  * Send on a socket.  If send must go all at once and message is larger than
1005  * send buffering, then hard error.  Lock against other senders.  If must go
1006  * all at once and not enough room now, then inform user that this would
1007  * block and do nothing.  Otherwise, if nonblocking, send as much as
1008  * possible.  The data to be sent is described by "uio" if nonzero, otherwise
1009  * by the mbuf chain "top" (which must be null if uio is not).  Data provided
1010  * in mbuf chain must be small enough to send all at once.
1011  *
1012  * Returns nonzero on error, timeout or signal; callers must check for short
1013  * counts if EINTR/ERESTART are returned.  Data and control buffers are freed
1014  * on return.
1015  */
1016 static int
1017 sdp_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
1018     struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
1019 {
1020 	struct sdp_sock *ssk;
1021 	long space, resid;
1022 	int atomic;
1023 	int error;
1024 	int copy;
1025 
1026 	if (uio != NULL)
1027 		resid = uio->uio_resid;
1028 	else
1029 		resid = top->m_pkthdr.len;
1030 	atomic = top != NULL;
1031 	if (control != NULL) {
1032 		if (control->m_len) {
1033 			m_freem(control);
1034 			if (top)
1035 				m_freem(top);
1036 			return (EINVAL);
1037 		}
1038 		m_freem(control);
1039 		control = NULL;
1040 	}
1041 	/*
1042 	 * In theory resid should be unsigned.  However, space must be
1043 	 * signed, as it might be less than 0 if we over-committed, and we
1044 	 * must use a signed comparison of space and resid.  On the other
1045 	 * hand, a negative resid causes us to loop sending 0-length
1046 	 * segments to the protocol.
1047 	 *
1048 	 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
1049 	 * type sockets since that's an error.
1050 	 */
1051 	if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) {
1052 		error = EINVAL;
1053 		goto out;
1054 	}
1055 	if (td != NULL)
1056 		td->td_ru.ru_msgsnd++;
1057 
1058 	ssk = sdp_sk(so);
1059 	error = SOCK_IO_SEND_LOCK(so, SBLOCKWAIT(flags));
1060 	if (error)
1061 		goto out;
1062 
1063 restart:
1064 	do {
1065 		SOCKBUF_LOCK(&so->so_snd);
1066 		if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
1067 			SOCKBUF_UNLOCK(&so->so_snd);
1068 			error = EPIPE;
1069 			goto release;
1070 		}
1071 		if (so->so_error) {
1072 			error = so->so_error;
1073 			so->so_error = 0;
1074 			SOCKBUF_UNLOCK(&so->so_snd);
1075 			goto release;
1076 		}
1077 		if ((so->so_state & SS_ISCONNECTED) == 0 && addr == NULL) {
1078 			SOCKBUF_UNLOCK(&so->so_snd);
1079 			error = ENOTCONN;
1080 			goto release;
1081 		}
1082 		space = sbspace(&so->so_snd);
1083 		if (flags & MSG_OOB)
1084 			space += 1024;
1085 		if (atomic && resid > ssk->xmit_size_goal - SDP_HEAD_SIZE) {
1086 			SOCKBUF_UNLOCK(&so->so_snd);
1087 			error = EMSGSIZE;
1088 			goto release;
1089 		}
1090 		if (space < resid &&
1091 		    (atomic || space < so->so_snd.sb_lowat)) {
1092 			if ((so->so_state & SS_NBIO) ||
1093 			    (flags & (MSG_NBIO | MSG_DONTWAIT)) != 0) {
1094 				SOCKBUF_UNLOCK(&so->so_snd);
1095 				error = EWOULDBLOCK;
1096 				goto release;
1097 			}
1098 			error = sbwait(so, SO_SND);
1099 			SOCKBUF_UNLOCK(&so->so_snd);
1100 			if (error)
1101 				goto release;
1102 			goto restart;
1103 		}
1104 		SOCKBUF_UNLOCK(&so->so_snd);
1105 		do {
1106 			if (uio == NULL) {
1107 				resid = 0;
1108 				if (flags & MSG_EOR)
1109 					top->m_flags |= M_EOR;
1110 			} else {
1111 				/*
1112 				 * Copy the data from userland into a mbuf
1113 				 * chain.  If no data is to be copied in,
1114 				 * a single empty mbuf is returned.
1115 				 */
1116 				copy = min(space,
1117 				    ssk->xmit_size_goal - SDP_HEAD_SIZE);
1118 				top = m_uiotombuf(uio, M_WAITOK, copy,
1119 				    0, M_PKTHDR |
1120 				    ((flags & MSG_EOR) ? M_EOR : 0));
1121 				if (top == NULL) {
1122 					/* only possible error */
1123 					error = EFAULT;
1124 					goto release;
1125 				}
1126 				space -= resid - uio->uio_resid;
1127 				resid = uio->uio_resid;
1128 			}
1129 			/*
1130 			 * XXX all the SBS_CANTSENDMORE checks previously
1131 			 * done could be out of date after dropping the
1132 			 * socket lock.
1133 			 */
1134 			error = sdp_send(so, (flags & MSG_OOB) ? PRUS_OOB :
1135 			/*
1136 			 * Set EOF on the last send if the user specified
1137 			 * MSG_EOF.
1138 			 */
1139 			    ((flags & MSG_EOF) && (resid <= 0)) ? PRUS_EOF :
1140 			/* If there is more to send set PRUS_MORETOCOME. */
1141 			    (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0,
1142 			    top, addr, NULL, td);
1143 			top = NULL;
1144 			if (error)
1145 				goto release;
1146 		} while (resid && space > 0);
1147 	} while (resid);
1148 
1149 release:
1150 	SOCK_IO_SEND_UNLOCK(so);
1151 out:
1152 	if (top != NULL)
1153 		m_freem(top);
1154 	return (error);
1155 }
1156 
1157 /*
1158  * The part of soreceive() that implements reading non-inline out-of-band
1159  * data from a socket.  For more complete comments, see soreceive(), from
1160  * which this code originated.
1161  *
1162  * Note that soreceive_rcvoob(), unlike the remainder of soreceive(), is
1163  * unable to return an mbuf chain to the caller.
1164  */
1165 static int
1166 soreceive_rcvoob(struct socket *so, struct uio *uio, int flags)
1167 {
1168 	struct protosw *pr = so->so_proto;
1169 	struct mbuf *m;
1170 	int error;
1171 
1172 	KASSERT(flags & MSG_OOB, ("soreceive_rcvoob: (flags & MSG_OOB) == 0"));
1173 
1174 	m = m_get(M_WAITOK, MT_DATA);
1175 	error = pr->pr_rcvoob(so, m, flags & MSG_PEEK);
1176 	if (error)
1177 		goto bad;
1178 	do {
1179 		error = uiomove(mtod(m, void *),
1180 		    (int) min(uio->uio_resid, m->m_len), uio);
1181 		m = m_free(m);
1182 	} while (uio->uio_resid && error == 0 && m);
1183 bad:
1184 	if (m != NULL)
1185 		m_freem(m);
1186 	return (error);
1187 }
1188 
1189 /*
1190  * Optimized version of soreceive() for stream (TCP) sockets.
1191  */
1192 static int
1193 sdp_sorecv(struct socket *so, struct sockaddr **psa, struct uio *uio,
1194     struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
1195 {
1196 	int len = 0, error = 0, flags, oresid;
1197 	struct sockbuf *sb;
1198 	struct mbuf *m, *n = NULL;
1199 	struct sdp_sock *ssk;
1200 
1201 	/* We only do stream sockets. */
1202 	if (so->so_type != SOCK_STREAM)
1203 		return (EINVAL);
1204 	if (psa != NULL)
1205 		*psa = NULL;
1206 	if (controlp != NULL)
1207 		return (EINVAL);
1208 	if (flagsp != NULL)
1209 		flags = *flagsp &~ MSG_EOR;
1210 	else
1211 		flags = 0;
1212 	if (flags & MSG_OOB)
1213 		return (soreceive_rcvoob(so, uio, flags));
1214 	if (mp0 != NULL)
1215 		*mp0 = NULL;
1216 
1217 	sb = &so->so_rcv;
1218 	ssk = sdp_sk(so);
1219 
1220 	/* Prevent other readers from entering the socket. */
1221 	error = SOCK_IO_RECV_LOCK(so, SBLOCKWAIT(flags));
1222 	if (error)
1223 		return (error);
1224 	SOCKBUF_LOCK(sb);
1225 
1226 	/* Easy one, no space to copyout anything. */
1227 	if (uio->uio_resid == 0) {
1228 		error = EINVAL;
1229 		goto out;
1230 	}
1231 	oresid = uio->uio_resid;
1232 
1233 	/* We will never ever get anything unless we are connected. */
1234 	if (!(so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED))) {
1235 		/* When disconnecting there may be still some data left. */
1236 		if (sbavail(sb))
1237 			goto deliver;
1238 		if (!(so->so_state & SS_ISDISCONNECTED))
1239 			error = ENOTCONN;
1240 		goto out;
1241 	}
1242 
1243 	/* Socket buffer is empty and we shall not block. */
1244 	if (sbavail(sb) == 0 &&
1245 	    ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)))) {
1246 		error = EAGAIN;
1247 		goto out;
1248 	}
1249 
1250 restart:
1251 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
1252 
1253 	/* Abort if socket has reported problems. */
1254 	if (so->so_error) {
1255 		if (sbavail(sb))
1256 			goto deliver;
1257 		if (oresid > uio->uio_resid)
1258 			goto out;
1259 		error = so->so_error;
1260 		if (!(flags & MSG_PEEK))
1261 			so->so_error = 0;
1262 		goto out;
1263 	}
1264 
1265 	/* Door is closed.  Deliver what is left, if any. */
1266 	if (sb->sb_state & SBS_CANTRCVMORE) {
1267 		if (sbavail(sb))
1268 			goto deliver;
1269 		else
1270 			goto out;
1271 	}
1272 
1273 	/* Socket buffer got some data that we shall deliver now. */
1274 	if (sbavail(sb) && !(flags & MSG_WAITALL) &&
1275 	    ((so->so_state & SS_NBIO) ||
1276 	     (flags & (MSG_DONTWAIT|MSG_NBIO)) ||
1277 	     sbavail(sb) >= sb->sb_lowat ||
1278 	     sbavail(sb) >= uio->uio_resid ||
1279 	     sbavail(sb) >= sb->sb_hiwat) ) {
1280 		goto deliver;
1281 	}
1282 
1283 	/* On MSG_WAITALL we must wait until all data or error arrives. */
1284 	if ((flags & MSG_WAITALL) &&
1285 	    (sbavail(sb) >= uio->uio_resid || sbavail(sb) >= sb->sb_lowat))
1286 		goto deliver;
1287 
1288 	/*
1289 	 * Wait and block until (more) data comes in.
1290 	 * NB: Drops the sockbuf lock during wait.
1291 	 */
1292 	error = sbwait(so, SO_RCV);
1293 	if (error)
1294 		goto out;
1295 	goto restart;
1296 
1297 deliver:
1298 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
1299 	KASSERT(sbavail(sb), ("%s: sockbuf empty", __func__));
1300 	KASSERT(sb->sb_mb != NULL, ("%s: sb_mb == NULL", __func__));
1301 
1302 	/* Statistics. */
1303 	if (uio->uio_td)
1304 		uio->uio_td->td_ru.ru_msgrcv++;
1305 
1306 	/* Fill uio until full or current end of socket buffer is reached. */
1307 	len = min(uio->uio_resid, sbavail(sb));
1308 	if (mp0 != NULL) {
1309 		/* Dequeue as many mbufs as possible. */
1310 		if (!(flags & MSG_PEEK) && len >= sb->sb_mb->m_len) {
1311 			for (*mp0 = m = sb->sb_mb;
1312 			     m != NULL && m->m_len <= len;
1313 			     m = m->m_next) {
1314 				len -= m->m_len;
1315 				uio->uio_resid -= m->m_len;
1316 				sbfree(sb, m);
1317 				n = m;
1318 			}
1319 			sb->sb_mb = m;
1320 			if (sb->sb_mb == NULL)
1321 				SB_EMPTY_FIXUP(sb);
1322 			n->m_next = NULL;
1323 		}
1324 		/* Copy the remainder. */
1325 		if (len > 0) {
1326 			KASSERT(sb->sb_mb != NULL,
1327 			    ("%s: len > 0 && sb->sb_mb empty", __func__));
1328 
1329 			m = m_copym(sb->sb_mb, 0, len, M_NOWAIT);
1330 			if (m == NULL)
1331 				len = 0;	/* Don't flush data from sockbuf. */
1332 			else
1333 				uio->uio_resid -= m->m_len;
1334 			if (*mp0 != NULL)
1335 				n->m_next = m;
1336 			else
1337 				*mp0 = m;
1338 			if (*mp0 == NULL) {
1339 				error = ENOBUFS;
1340 				goto out;
1341 			}
1342 		}
1343 	} else {
1344 		/* NB: Must unlock socket buffer as uiomove may sleep. */
1345 		SOCKBUF_UNLOCK(sb);
1346 		error = m_mbuftouio(uio, sb->sb_mb, len);
1347 		SOCKBUF_LOCK(sb);
1348 		if (error)
1349 			goto out;
1350 	}
1351 	SBLASTRECORDCHK(sb);
1352 	SBLASTMBUFCHK(sb);
1353 
1354 	/*
1355 	 * Remove the delivered data from the socket buffer unless we
1356 	 * were only peeking.
1357 	 */
1358 	if (!(flags & MSG_PEEK)) {
1359 		if (len > 0)
1360 			sbdrop_locked(sb, len);
1361 
1362 		/* Notify protocol that we drained some data. */
1363 		SOCKBUF_UNLOCK(sb);
1364 		SDP_WLOCK(ssk);
1365 		sdp_do_posts(ssk);
1366 		SDP_WUNLOCK(ssk);
1367 		SOCKBUF_LOCK(sb);
1368 	}
1369 
1370 	/*
1371 	 * For MSG_WAITALL we may have to loop again and wait for
1372 	 * more data to come in.
1373 	 */
1374 	if ((flags & MSG_WAITALL) && uio->uio_resid > 0)
1375 		goto restart;
1376 out:
1377 	SBLASTRECORDCHK(sb);
1378 	SBLASTMBUFCHK(sb);
1379 	SOCKBUF_UNLOCK(sb);
1380 	SOCK_IO_RECV_UNLOCK(so);
1381 	return (error);
1382 }
1383 
1384 /*
1385  * Abort is used to teardown a connection typically while sitting in
1386  * the accept queue.
1387  */
1388 void
1389 sdp_abort(struct socket *so)
1390 {
1391 	struct sdp_sock *ssk;
1392 
1393 	ssk = sdp_sk(so);
1394 	SDP_WLOCK(ssk);
1395 	/*
1396 	 * If we have not yet dropped, do it now.
1397 	 */
1398 	if (!(ssk->flags & SDP_TIMEWAIT) &&
1399 	    !(ssk->flags & SDP_DROPPED))
1400 		sdp_drop(ssk, ECONNABORTED);
1401 	KASSERT(ssk->flags & SDP_DROPPED, ("sdp_abort: %p not dropped 0x%X",
1402 	    ssk, ssk->flags));
1403 	SDP_WUNLOCK(ssk);
1404 }
1405 
1406 /*
1407  * Close a SDP socket and initiate a friendly disconnect.
1408  */
1409 static void
1410 sdp_close(struct socket *so)
1411 {
1412 	struct sdp_sock *ssk;
1413 
1414 	ssk = sdp_sk(so);
1415 	SDP_WLOCK(ssk);
1416 	/*
1417 	 * If we have not yet dropped, do it now.
1418 	 */
1419 	if (!(ssk->flags & SDP_TIMEWAIT) &&
1420 	    !(ssk->flags & SDP_DROPPED))
1421 		sdp_start_disconnect(ssk);
1422 
1423 	/*
1424 	 * If we've still not dropped let the socket layer know we're
1425 	 * holding on to the socket and pcb for a while.
1426 	 */
1427 	if (!(ssk->flags & SDP_DROPPED)) {
1428 		ssk->flags |= SDP_SOCKREF;
1429 		soref(so);
1430 	}
1431 	SDP_WUNLOCK(ssk);
1432 }
1433 
1434 /*
1435  * User requests out-of-band data.
1436  */
1437 static int
1438 sdp_rcvoob(struct socket *so, struct mbuf *m, int flags)
1439 {
1440 	int error = 0;
1441 	struct sdp_sock *ssk;
1442 
1443 	ssk = sdp_sk(so);
1444 	SDP_WLOCK(ssk);
1445 	if (!rx_ring_trylock(&ssk->rx_ring)) {
1446 		SDP_WUNLOCK(ssk);
1447 		return (ECONNRESET);
1448 	}
1449 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
1450 		error = ECONNRESET;
1451 		goto out;
1452 	}
1453 	if ((so->so_oobmark == 0 &&
1454 	     (so->so_rcv.sb_state & SBS_RCVATMARK) == 0) ||
1455 	    so->so_options & SO_OOBINLINE ||
1456 	    ssk->oobflags & SDP_HADOOB) {
1457 		error = EINVAL;
1458 		goto out;
1459 	}
1460 	if ((ssk->oobflags & SDP_HAVEOOB) == 0) {
1461 		error = EWOULDBLOCK;
1462 		goto out;
1463 	}
1464 	m->m_len = 1;
1465 	*mtod(m, caddr_t) = ssk->iobc;
1466 	if ((flags & MSG_PEEK) == 0)
1467 		ssk->oobflags ^= (SDP_HAVEOOB | SDP_HADOOB);
1468 out:
1469 	rx_ring_unlock(&ssk->rx_ring);
1470 	SDP_WUNLOCK(ssk);
1471 	return (error);
1472 }
1473 
1474 void
1475 sdp_urg(struct sdp_sock *ssk, struct mbuf *mb)
1476 {
1477 	struct mbuf *m;
1478 	struct socket *so;
1479 
1480 	so = ssk->socket;
1481 	if (so == NULL)
1482 		return;
1483 
1484 	so->so_oobmark = sbused(&so->so_rcv) + mb->m_pkthdr.len - 1;
1485 	sohasoutofband(so);
1486 	ssk->oobflags &= ~(SDP_HAVEOOB | SDP_HADOOB);
1487 	if (!(so->so_options & SO_OOBINLINE)) {
1488 		for (m = mb; m->m_next != NULL; m = m->m_next);
1489 		ssk->iobc = *(mtod(m, char *) + m->m_len - 1);
1490 		ssk->oobflags |= SDP_HAVEOOB;
1491 		m->m_len--;
1492 		mb->m_pkthdr.len--;
1493 	}
1494 }
1495 
1496 /*
1497  * Notify a sdp socket of an asynchronous error.
1498  *
1499  * Do not wake up user since there currently is no mechanism for
1500  * reporting soft errors (yet - a kqueue filter may be added).
1501  */
1502 struct sdp_sock *
1503 sdp_notify(struct sdp_sock *ssk, int error)
1504 {
1505 
1506 	SDP_WLOCK_ASSERT(ssk);
1507 
1508 	if ((ssk->flags & SDP_TIMEWAIT) ||
1509 	    (ssk->flags & SDP_DROPPED))
1510 		return (ssk);
1511 
1512 	/*
1513 	 * Ignore some errors if we are hooked up.
1514 	 */
1515 	if (ssk->state == TCPS_ESTABLISHED &&
1516 	    (error == EHOSTUNREACH || error == ENETUNREACH ||
1517 	     error == EHOSTDOWN))
1518 		return (ssk);
1519 	ssk->softerror = error;
1520 	return sdp_drop(ssk, error);
1521 }
1522 
1523 static void
1524 sdp_keepalive_timeout(void *data)
1525 {
1526 	struct sdp_sock *ssk;
1527 
1528 	ssk = data;
1529 	/* Callout canceled. */
1530         if (!callout_active(&ssk->keep2msl))
1531                 return;
1532 	/* Callout rescheduled as a different kind of timer. */
1533 	if (callout_pending(&ssk->keep2msl))
1534 		goto out;
1535         callout_deactivate(&ssk->keep2msl);
1536 	if (ssk->flags & SDP_DROPPED ||
1537 	    (ssk->socket->so_options & SO_KEEPALIVE) == 0)
1538 		goto out;
1539 	sdp_post_keepalive(ssk);
1540 	callout_reset(&ssk->keep2msl, SDP_KEEPALIVE_TIME,
1541 	    sdp_keepalive_timeout, ssk);
1542 out:
1543 	SDP_WUNLOCK(ssk);
1544 }
1545 
1546 
1547 void
1548 sdp_start_keepalive_timer(struct socket *so)
1549 {
1550 	struct sdp_sock *ssk;
1551 
1552 	ssk = sdp_sk(so);
1553 	if (!callout_pending(&ssk->keep2msl))
1554                 callout_reset(&ssk->keep2msl, SDP_KEEPALIVE_TIME,
1555                     sdp_keepalive_timeout, ssk);
1556 }
1557 
1558 static void
1559 sdp_stop_keepalive_timer(struct socket *so)
1560 {
1561 	struct sdp_sock *ssk;
1562 
1563 	ssk = sdp_sk(so);
1564 	callout_stop(&ssk->keep2msl);
1565 }
1566 
1567 /*
1568  * sdp_ctloutput() must drop the inpcb lock before performing copyin on
1569  * socket option arguments.  When it re-acquires the lock after the copy, it
1570  * has to revalidate that the connection is still valid for the socket
1571  * option.
1572  */
1573 #define SDP_WLOCK_RECHECK(inp) do {					\
1574 	SDP_WLOCK(ssk);							\
1575 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {		\
1576 		SDP_WUNLOCK(ssk);					\
1577 		return (ECONNRESET);					\
1578 	}								\
1579 } while(0)
1580 
1581 static int
1582 sdp_ctloutput(struct socket *so, struct sockopt *sopt)
1583 {
1584 	int	error, opt, optval;
1585 	struct sdp_sock *ssk;
1586 
1587 	error = 0;
1588 	ssk = sdp_sk(so);
1589 	if (sopt->sopt_level == SOL_SOCKET && sopt->sopt_name == SO_KEEPALIVE) {
1590 		SDP_WLOCK(ssk);
1591 		if (so->so_options & SO_KEEPALIVE)
1592 			sdp_start_keepalive_timer(so);
1593 		else
1594 			sdp_stop_keepalive_timer(so);
1595 		SDP_WUNLOCK(ssk);
1596 	}
1597 	if (sopt->sopt_level != IPPROTO_TCP)
1598 		return (error);
1599 
1600 	SDP_WLOCK(ssk);
1601 	if (ssk->flags & (SDP_TIMEWAIT | SDP_DROPPED)) {
1602 		SDP_WUNLOCK(ssk);
1603 		return (ECONNRESET);
1604 	}
1605 
1606 	switch (sopt->sopt_dir) {
1607 	case SOPT_SET:
1608 		switch (sopt->sopt_name) {
1609 		case TCP_NODELAY:
1610 			SDP_WUNLOCK(ssk);
1611 			error = sooptcopyin(sopt, &optval, sizeof optval,
1612 			    sizeof optval);
1613 			if (error)
1614 				return (error);
1615 
1616 			SDP_WLOCK_RECHECK(ssk);
1617 			opt = SDP_NODELAY;
1618 			if (optval)
1619 				ssk->flags |= opt;
1620 			else
1621 				ssk->flags &= ~opt;
1622 			sdp_do_posts(ssk);
1623 			SDP_WUNLOCK(ssk);
1624 			break;
1625 
1626 		default:
1627 			SDP_WUNLOCK(ssk);
1628 			error = ENOPROTOOPT;
1629 			break;
1630 		}
1631 		break;
1632 
1633 	case SOPT_GET:
1634 		switch (sopt->sopt_name) {
1635 		case TCP_NODELAY:
1636 			optval = ssk->flags & SDP_NODELAY;
1637 			SDP_WUNLOCK(ssk);
1638 			error = sooptcopyout(sopt, &optval, sizeof optval);
1639 			break;
1640 		default:
1641 			SDP_WUNLOCK(ssk);
1642 			error = ENOPROTOOPT;
1643 			break;
1644 		}
1645 		break;
1646 	}
1647 	return (error);
1648 }
1649 #undef SDP_WLOCK_RECHECK
1650 
1651 int sdp_mod_count = 0;
1652 int sdp_mod_usec = 0;
1653 
1654 void
1655 sdp_set_default_moderation(struct sdp_sock *ssk)
1656 {
1657 	if (sdp_mod_count <= 0 || sdp_mod_usec <= 0)
1658 		return;
1659 	ib_modify_cq(ssk->rx_ring.cq, sdp_mod_count, sdp_mod_usec);
1660 }
1661 
1662 static void
1663 sdp_dev_add(struct ib_device *device)
1664 {
1665 	struct ib_fmr_pool_param param;
1666 	struct sdp_device *sdp_dev;
1667 
1668 	sdp_dev = malloc(sizeof(*sdp_dev), M_SDP, M_WAITOK | M_ZERO);
1669 	sdp_dev->pd = ib_alloc_pd(device, 0);
1670 	if (IS_ERR(sdp_dev->pd))
1671 		goto out_pd;
1672 	memset(&param, 0, sizeof param);
1673 	param.max_pages_per_fmr = SDP_FMR_SIZE;
1674 	param.page_shift = PAGE_SHIFT;
1675 	param.access = (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ);
1676 	param.pool_size = SDP_FMR_POOL_SIZE;
1677 	param.dirty_watermark = SDP_FMR_DIRTY_SIZE;
1678 	param.cache = 1;
1679 	sdp_dev->fmr_pool = ib_create_fmr_pool(sdp_dev->pd, &param);
1680 	if (IS_ERR(sdp_dev->fmr_pool))
1681 		goto out_fmr;
1682 	ib_set_client_data(device, &sdp_client, sdp_dev);
1683 	return;
1684 
1685 out_fmr:
1686 	ib_dealloc_pd(sdp_dev->pd);
1687 out_pd:
1688 	free(sdp_dev, M_SDP);
1689 }
1690 
1691 static void
1692 sdp_dev_rem(struct ib_device *device, void *client_data)
1693 {
1694 	struct sdp_device *sdp_dev;
1695 	struct sdp_sock *ssk;
1696 
1697 	SDP_LIST_WLOCK();
1698 	LIST_FOREACH(ssk, &sdp_list, list) {
1699 		if (ssk->ib_device != device)
1700 			continue;
1701 		SDP_WLOCK(ssk);
1702 		if ((ssk->flags & SDP_DESTROY) == 0)
1703 			ssk = sdp_notify(ssk, ECONNRESET);
1704 		if (ssk)
1705 			SDP_WUNLOCK(ssk);
1706 	}
1707 	SDP_LIST_WUNLOCK();
1708 	/*
1709 	 * XXX Do I need to wait between these two?
1710 	 */
1711 	sdp_dev = ib_get_client_data(device, &sdp_client);
1712 	if (!sdp_dev)
1713 		return;
1714 	ib_flush_fmr_pool(sdp_dev->fmr_pool);
1715 	ib_destroy_fmr_pool(sdp_dev->fmr_pool);
1716 	ib_dealloc_pd(sdp_dev->pd);
1717 	free(sdp_dev, M_SDP);
1718 }
1719 
1720 struct ib_client sdp_client =
1721     { .name = "sdp", .add = sdp_dev_add, .remove = sdp_dev_rem };
1722 
1723 
1724 static int
1725 sdp_pcblist(SYSCTL_HANDLER_ARGS)
1726 {
1727 	int error, n, i;
1728 	struct sdp_sock *ssk;
1729 	struct xinpgen xig;
1730 
1731 	/*
1732 	 * The process of preparing the TCB list is too time-consuming and
1733 	 * resource-intensive to repeat twice on every request.
1734 	 */
1735 	if (req->oldptr == NULL) {
1736 		n = sdp_count;
1737 		n += imax(n / 8, 10);
1738 		req->oldidx = 2 * (sizeof xig) + n * sizeof(struct xtcpcb);
1739 		return (0);
1740 	}
1741 
1742 	if (req->newptr != NULL)
1743 		return (EPERM);
1744 
1745 	/*
1746 	 * OK, now we're committed to doing something.
1747 	 */
1748 	SDP_LIST_RLOCK();
1749 	n = sdp_count;
1750 	SDP_LIST_RUNLOCK();
1751 
1752 	error = sysctl_wire_old_buffer(req, 2 * (sizeof xig)
1753 		+ n * sizeof(struct xtcpcb));
1754 	if (error != 0)
1755 		return (error);
1756 
1757 	bzero(&xig, sizeof(xig));
1758 	xig.xig_len = sizeof xig;
1759 	xig.xig_count = n;
1760 	xig.xig_gen = 0;
1761 	xig.xig_sogen = so_gencnt;
1762 	error = SYSCTL_OUT(req, &xig, sizeof xig);
1763 	if (error)
1764 		return (error);
1765 
1766 	SDP_LIST_RLOCK();
1767 	for (ssk = LIST_FIRST(&sdp_list), i = 0;
1768 	    ssk != NULL && i < n; ssk = LIST_NEXT(ssk, list)) {
1769 		struct xtcpcb xt;
1770 
1771 		SDP_RLOCK(ssk);
1772 		if (ssk->flags & SDP_TIMEWAIT) {
1773 			if (ssk->cred != NULL)
1774 				error = cr_cansee(req->td->td_ucred,
1775 				    ssk->cred);
1776 			else
1777 				error = EINVAL;	/* Skip this inp. */
1778 		} else if (ssk->socket)
1779 			error = cr_canseesocket(req->td->td_ucred,
1780 			    ssk->socket);
1781 		else
1782 			error = EINVAL;
1783 		if (error) {
1784 			error = 0;
1785 			goto next;
1786 		}
1787 
1788 		bzero(&xt, sizeof(xt));
1789 		xt.xt_len = sizeof xt;
1790 		xt.xt_inp.inp_gencnt = 0;
1791 		xt.xt_inp.inp_vflag = INP_IPV4;
1792 		memcpy(&xt.xt_inp.inp_laddr, &ssk->laddr, sizeof(ssk->laddr));
1793 		xt.xt_inp.inp_lport = ssk->lport;
1794 		memcpy(&xt.xt_inp.inp_faddr, &ssk->faddr, sizeof(ssk->faddr));
1795 		xt.xt_inp.inp_fport = ssk->fport;
1796 		xt.t_state = ssk->state;
1797 		if (ssk->socket != NULL)
1798 			sotoxsocket(ssk->socket, &xt.xt_inp.xi_socket);
1799 		xt.xt_inp.xi_socket.xso_protocol = IPPROTO_TCP;
1800 		SDP_RUNLOCK(ssk);
1801 		error = SYSCTL_OUT(req, &xt, sizeof xt);
1802 		if (error)
1803 			break;
1804 		i++;
1805 		continue;
1806 next:
1807 		SDP_RUNLOCK(ssk);
1808 	}
1809 	if (!error) {
1810 		/*
1811 		 * Give the user an updated idea of our state.
1812 		 * If the generation differs from what we told
1813 		 * her before, she knows that something happened
1814 		 * while we were processing this request, and it
1815 		 * might be necessary to retry.
1816 		 */
1817 		xig.xig_gen = 0;
1818 		xig.xig_sogen = so_gencnt;
1819 		xig.xig_count = sdp_count;
1820 		error = SYSCTL_OUT(req, &xig, sizeof xig);
1821 	}
1822 	SDP_LIST_RUNLOCK();
1823 	return (error);
1824 }
1825 
1826 SYSCTL_NODE(_net_inet, -1, sdp, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
1827     "SDP");
1828 
1829 SYSCTL_PROC(_net_inet_sdp, TCPCTL_PCBLIST, pcblist,
1830     CTLFLAG_RD | CTLTYPE_STRUCT | CTLFLAG_MPSAFE,
1831     0, 0, sdp_pcblist, "S,xtcpcb",
1832     "List of active SDP connections");
1833 
1834 static void
1835 sdp_zone_change(void *tag)
1836 {
1837 
1838 	uma_zone_set_max(sdp_zone, maxsockets);
1839 }
1840 
1841 static void
1842 sdp_init(void *arg __unused)
1843 {
1844 
1845 	LIST_INIT(&sdp_list);
1846 	sdp_zone = uma_zcreate("sdp_sock", sizeof(struct sdp_sock),
1847 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1848 	uma_zone_set_max(sdp_zone, maxsockets);
1849 	EVENTHANDLER_REGISTER(maxsockets_change, sdp_zone_change, NULL,
1850 		EVENTHANDLER_PRI_ANY);
1851 	rx_comp_wq = create_singlethread_workqueue("rx_comp_wq");
1852 	ib_register_client(&sdp_client);
1853 }
1854 SYSINIT(sdp_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_SECOND, sdp_init, NULL);
1855 
1856 #define	SDP_PROTOSW							\
1857 	.pr_type =		SOCK_STREAM,				\
1858 	.pr_flags =		PR_CONNREQUIRED|PR_IMPLOPCL|PR_WANTRCVD,\
1859 	.pr_ctloutput =		sdp_ctloutput,				\
1860 	.pr_abort =		sdp_abort,				\
1861 	.pr_accept =		sdp_accept,				\
1862 	.pr_attach =		sdp_attach,				\
1863 	.pr_bind =		sdp_bind,				\
1864 	.pr_connect =		sdp_connect,				\
1865 	.pr_detach =		sdp_detach,				\
1866 	.pr_disconnect =	sdp_disconnect,				\
1867 	.pr_listen =		sdp_listen,				\
1868 	.pr_peeraddr =		sdp_getpeeraddr,			\
1869 	.pr_rcvoob =		sdp_rcvoob,				\
1870 	.pr_send =		sdp_send,				\
1871 	.pr_sosend =		sdp_sosend,				\
1872 	.pr_soreceive =		sdp_sorecv,				\
1873 	.pr_shutdown =		sdp_shutdown,				\
1874 	.pr_sockaddr =		sdp_getsockaddr,			\
1875 	.pr_close =		sdp_close
1876 
1877 
1878 static struct protosw sdp_ip_protosw = {
1879 	.pr_protocol =		IPPROTO_IP,
1880 	SDP_PROTOSW
1881 };
1882 static struct protosw sdp_tcp_protosw = {
1883 	.pr_protocol =		IPPROTO_TCP,
1884 	SDP_PROTOSW
1885 };
1886 
1887 static struct domain sdpdomain = {
1888 	.dom_family =		AF_INET_SDP,
1889 	.dom_name =		"SDP",
1890 	.dom_nprotosw =		2,
1891 	.dom_protosw = {
1892 		&sdp_ip_protosw,
1893 		&sdp_tcp_protosw,
1894 	},
1895 };
1896 
1897 DOMAIN_SET(sdp);
1898 
1899 int sdp_debug_level = 1;
1900 int sdp_data_debug_level = 0;
1901