1 /* 2 * linux/net/sunrpc/svcsock.c 3 * 4 * These are the RPC server socket internals. 5 * 6 * The server scheduling algorithm does not always distribute the load 7 * evenly when servicing a single client. May need to modify the 8 * svc_xprt_enqueue procedure... 9 * 10 * TCP support is largely untested and may be a little slow. The problem 11 * is that we currently do two separate recvfrom's, one for the 4-byte 12 * record length, and the second for the actual record. This could possibly 13 * be improved by always reading a minimum size of around 100 bytes and 14 * tucking any superfluous bytes away in a temporary store. Still, that 15 * leaves write requests out in the rain. An alternative may be to peek at 16 * the first skb in the queue, and if it matches the next TCP sequence 17 * number, to extract the record marker. Yuck. 18 * 19 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> 20 */ 21 22 #include <linux/kernel.h> 23 #include <linux/sched.h> 24 #include <linux/errno.h> 25 #include <linux/fcntl.h> 26 #include <linux/net.h> 27 #include <linux/in.h> 28 #include <linux/inet.h> 29 #include <linux/udp.h> 30 #include <linux/tcp.h> 31 #include <linux/unistd.h> 32 #include <linux/slab.h> 33 #include <linux/netdevice.h> 34 #include <linux/skbuff.h> 35 #include <linux/file.h> 36 #include <linux/freezer.h> 37 #include <net/sock.h> 38 #include <net/checksum.h> 39 #include <net/ip.h> 40 #include <net/ipv6.h> 41 #include <net/tcp.h> 42 #include <net/tcp_states.h> 43 #include <asm/uaccess.h> 44 #include <asm/ioctls.h> 45 46 #include <linux/sunrpc/types.h> 47 #include <linux/sunrpc/clnt.h> 48 #include <linux/sunrpc/xdr.h> 49 #include <linux/sunrpc/msg_prot.h> 50 #include <linux/sunrpc/svcsock.h> 51 #include <linux/sunrpc/stats.h> 52 53 #define RPCDBG_FACILITY RPCDBG_SVCXPRT 54 55 56 static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *, 57 int *errp, int flags); 58 static void svc_udp_data_ready(struct sock *, int); 59 static int svc_udp_recvfrom(struct svc_rqst *); 60 static int svc_udp_sendto(struct svc_rqst *); 61 static void svc_sock_detach(struct svc_xprt *); 62 static void svc_tcp_sock_detach(struct svc_xprt *); 63 static void svc_sock_free(struct svc_xprt *); 64 65 static struct svc_xprt *svc_create_socket(struct svc_serv *, int, 66 struct sockaddr *, int, int); 67 #ifdef CONFIG_DEBUG_LOCK_ALLOC 68 static struct lock_class_key svc_key[2]; 69 static struct lock_class_key svc_slock_key[2]; 70 71 static void svc_reclassify_socket(struct socket *sock) 72 { 73 struct sock *sk = sock->sk; 74 BUG_ON(sock_owned_by_user(sk)); 75 switch (sk->sk_family) { 76 case AF_INET: 77 sock_lock_init_class_and_name(sk, "slock-AF_INET-NFSD", 78 &svc_slock_key[0], 79 "sk_xprt.xpt_lock-AF_INET-NFSD", 80 &svc_key[0]); 81 break; 82 83 case AF_INET6: 84 sock_lock_init_class_and_name(sk, "slock-AF_INET6-NFSD", 85 &svc_slock_key[1], 86 "sk_xprt.xpt_lock-AF_INET6-NFSD", 87 &svc_key[1]); 88 break; 89 90 default: 91 BUG(); 92 } 93 } 94 #else 95 static void svc_reclassify_socket(struct socket *sock) 96 { 97 } 98 #endif 99 100 /* 101 * Release an skbuff after use 102 */ 103 static void svc_release_skb(struct svc_rqst *rqstp) 104 { 105 struct sk_buff *skb = rqstp->rq_xprt_ctxt; 106 107 if (skb) { 108 struct svc_sock *svsk = 109 container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); 110 rqstp->rq_xprt_ctxt = NULL; 111 112 dprintk("svc: service %p, releasing skb %p\n", rqstp, skb); 113 skb_free_datagram(svsk->sk_sk, skb); 114 } 115 } 116 117 union svc_pktinfo_u { 118 struct in_pktinfo pkti; 119 struct in6_pktinfo pkti6; 120 }; 121 #define SVC_PKTINFO_SPACE \ 122 CMSG_SPACE(sizeof(union svc_pktinfo_u)) 123 124 static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh) 125 { 126 struct svc_sock *svsk = 127 container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); 128 switch (svsk->sk_sk->sk_family) { 129 case AF_INET: { 130 struct in_pktinfo *pki = CMSG_DATA(cmh); 131 132 cmh->cmsg_level = SOL_IP; 133 cmh->cmsg_type = IP_PKTINFO; 134 pki->ipi_ifindex = 0; 135 pki->ipi_spec_dst.s_addr = rqstp->rq_daddr.addr.s_addr; 136 cmh->cmsg_len = CMSG_LEN(sizeof(*pki)); 137 } 138 break; 139 140 case AF_INET6: { 141 struct in6_pktinfo *pki = CMSG_DATA(cmh); 142 143 cmh->cmsg_level = SOL_IPV6; 144 cmh->cmsg_type = IPV6_PKTINFO; 145 pki->ipi6_ifindex = 0; 146 ipv6_addr_copy(&pki->ipi6_addr, 147 &rqstp->rq_daddr.addr6); 148 cmh->cmsg_len = CMSG_LEN(sizeof(*pki)); 149 } 150 break; 151 } 152 return; 153 } 154 155 /* 156 * Generic sendto routine 157 */ 158 static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) 159 { 160 struct svc_sock *svsk = 161 container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); 162 struct socket *sock = svsk->sk_sock; 163 int slen; 164 union { 165 struct cmsghdr hdr; 166 long all[SVC_PKTINFO_SPACE / sizeof(long)]; 167 } buffer; 168 struct cmsghdr *cmh = &buffer.hdr; 169 int len = 0; 170 int result; 171 int size; 172 struct page **ppage = xdr->pages; 173 size_t base = xdr->page_base; 174 unsigned int pglen = xdr->page_len; 175 unsigned int flags = MSG_MORE; 176 RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); 177 178 slen = xdr->len; 179 180 if (rqstp->rq_prot == IPPROTO_UDP) { 181 struct msghdr msg = { 182 .msg_name = &rqstp->rq_addr, 183 .msg_namelen = rqstp->rq_addrlen, 184 .msg_control = cmh, 185 .msg_controllen = sizeof(buffer), 186 .msg_flags = MSG_MORE, 187 }; 188 189 svc_set_cmsg_data(rqstp, cmh); 190 191 if (sock_sendmsg(sock, &msg, 0) < 0) 192 goto out; 193 } 194 195 /* send head */ 196 if (slen == xdr->head[0].iov_len) 197 flags = 0; 198 len = kernel_sendpage(sock, rqstp->rq_respages[0], 0, 199 xdr->head[0].iov_len, flags); 200 if (len != xdr->head[0].iov_len) 201 goto out; 202 slen -= xdr->head[0].iov_len; 203 if (slen == 0) 204 goto out; 205 206 /* send page data */ 207 size = PAGE_SIZE - base < pglen ? PAGE_SIZE - base : pglen; 208 while (pglen > 0) { 209 if (slen == size) 210 flags = 0; 211 result = kernel_sendpage(sock, *ppage, base, size, flags); 212 if (result > 0) 213 len += result; 214 if (result != size) 215 goto out; 216 slen -= size; 217 pglen -= size; 218 size = PAGE_SIZE < pglen ? PAGE_SIZE : pglen; 219 base = 0; 220 ppage++; 221 } 222 /* send tail */ 223 if (xdr->tail[0].iov_len) { 224 result = kernel_sendpage(sock, rqstp->rq_respages[0], 225 ((unsigned long)xdr->tail[0].iov_base) 226 & (PAGE_SIZE-1), 227 xdr->tail[0].iov_len, 0); 228 229 if (result > 0) 230 len += result; 231 } 232 out: 233 dprintk("svc: socket %p sendto([%p %Zu... ], %d) = %d (addr %s)\n", 234 svsk, xdr->head[0].iov_base, xdr->head[0].iov_len, 235 xdr->len, len, svc_print_addr(rqstp, buf, sizeof(buf))); 236 237 return len; 238 } 239 240 /* 241 * Report socket names for nfsdfs 242 */ 243 static int one_sock_name(char *buf, struct svc_sock *svsk) 244 { 245 int len; 246 247 switch(svsk->sk_sk->sk_family) { 248 case AF_INET: 249 len = sprintf(buf, "ipv4 %s %pI4 %d\n", 250 svsk->sk_sk->sk_protocol == IPPROTO_UDP ? 251 "udp" : "tcp", 252 &inet_sk(svsk->sk_sk)->rcv_saddr, 253 inet_sk(svsk->sk_sk)->num); 254 break; 255 default: 256 len = sprintf(buf, "*unknown-%d*\n", 257 svsk->sk_sk->sk_family); 258 } 259 return len; 260 } 261 262 int 263 svc_sock_names(char *buf, struct svc_serv *serv, char *toclose) 264 { 265 struct svc_sock *svsk, *closesk = NULL; 266 int len = 0; 267 268 if (!serv) 269 return 0; 270 spin_lock_bh(&serv->sv_lock); 271 list_for_each_entry(svsk, &serv->sv_permsocks, sk_xprt.xpt_list) { 272 int onelen = one_sock_name(buf+len, svsk); 273 if (toclose && strcmp(toclose, buf+len) == 0) 274 closesk = svsk; 275 else 276 len += onelen; 277 } 278 spin_unlock_bh(&serv->sv_lock); 279 if (closesk) 280 /* Should unregister with portmap, but you cannot 281 * unregister just one protocol... 282 */ 283 svc_close_xprt(&closesk->sk_xprt); 284 else if (toclose) 285 return -ENOENT; 286 return len; 287 } 288 EXPORT_SYMBOL_GPL(svc_sock_names); 289 290 /* 291 * Check input queue length 292 */ 293 static int svc_recv_available(struct svc_sock *svsk) 294 { 295 struct socket *sock = svsk->sk_sock; 296 int avail, err; 297 298 err = kernel_sock_ioctl(sock, TIOCINQ, (unsigned long) &avail); 299 300 return (err >= 0)? avail : err; 301 } 302 303 /* 304 * Generic recvfrom routine. 305 */ 306 static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, 307 int buflen) 308 { 309 struct svc_sock *svsk = 310 container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); 311 struct msghdr msg = { 312 .msg_flags = MSG_DONTWAIT, 313 }; 314 int len; 315 316 rqstp->rq_xprt_hlen = 0; 317 318 len = kernel_recvmsg(svsk->sk_sock, &msg, iov, nr, buflen, 319 msg.msg_flags); 320 321 dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n", 322 svsk, iov[0].iov_base, iov[0].iov_len, len); 323 return len; 324 } 325 326 /* 327 * Set socket snd and rcv buffer lengths 328 */ 329 static void svc_sock_setbufsize(struct socket *sock, unsigned int snd, 330 unsigned int rcv) 331 { 332 #if 0 333 mm_segment_t oldfs; 334 oldfs = get_fs(); set_fs(KERNEL_DS); 335 sock_setsockopt(sock, SOL_SOCKET, SO_SNDBUF, 336 (char*)&snd, sizeof(snd)); 337 sock_setsockopt(sock, SOL_SOCKET, SO_RCVBUF, 338 (char*)&rcv, sizeof(rcv)); 339 #else 340 /* sock_setsockopt limits use to sysctl_?mem_max, 341 * which isn't acceptable. Until that is made conditional 342 * on not having CAP_SYS_RESOURCE or similar, we go direct... 343 * DaveM said I could! 344 */ 345 lock_sock(sock->sk); 346 sock->sk->sk_sndbuf = snd * 2; 347 sock->sk->sk_rcvbuf = rcv * 2; 348 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK; 349 release_sock(sock->sk); 350 #endif 351 } 352 /* 353 * INET callback when data has been received on the socket. 354 */ 355 static void svc_udp_data_ready(struct sock *sk, int count) 356 { 357 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; 358 359 if (svsk) { 360 dprintk("svc: socket %p(inet %p), count=%d, busy=%d\n", 361 svsk, sk, count, 362 test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags)); 363 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 364 svc_xprt_enqueue(&svsk->sk_xprt); 365 } 366 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 367 wake_up_interruptible(sk->sk_sleep); 368 } 369 370 /* 371 * INET callback when space is newly available on the socket. 372 */ 373 static void svc_write_space(struct sock *sk) 374 { 375 struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data); 376 377 if (svsk) { 378 dprintk("svc: socket %p(inet %p), write_space busy=%d\n", 379 svsk, sk, test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags)); 380 svc_xprt_enqueue(&svsk->sk_xprt); 381 } 382 383 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) { 384 dprintk("RPC svc_write_space: someone sleeping on %p\n", 385 svsk); 386 wake_up_interruptible(sk->sk_sleep); 387 } 388 } 389 390 /* 391 * Copy the UDP datagram's destination address to the rqstp structure. 392 * The 'destination' address in this case is the address to which the 393 * peer sent the datagram, i.e. our local address. For multihomed 394 * hosts, this can change from msg to msg. Note that only the IP 395 * address changes, the port number should remain the same. 396 */ 397 static void svc_udp_get_dest_address(struct svc_rqst *rqstp, 398 struct cmsghdr *cmh) 399 { 400 struct svc_sock *svsk = 401 container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); 402 switch (svsk->sk_sk->sk_family) { 403 case AF_INET: { 404 struct in_pktinfo *pki = CMSG_DATA(cmh); 405 rqstp->rq_daddr.addr.s_addr = pki->ipi_spec_dst.s_addr; 406 break; 407 } 408 case AF_INET6: { 409 struct in6_pktinfo *pki = CMSG_DATA(cmh); 410 ipv6_addr_copy(&rqstp->rq_daddr.addr6, &pki->ipi6_addr); 411 break; 412 } 413 } 414 } 415 416 /* 417 * Receive a datagram from a UDP socket. 418 */ 419 static int svc_udp_recvfrom(struct svc_rqst *rqstp) 420 { 421 struct svc_sock *svsk = 422 container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); 423 struct svc_serv *serv = svsk->sk_xprt.xpt_server; 424 struct sk_buff *skb; 425 union { 426 struct cmsghdr hdr; 427 long all[SVC_PKTINFO_SPACE / sizeof(long)]; 428 } buffer; 429 struct cmsghdr *cmh = &buffer.hdr; 430 int err, len; 431 struct msghdr msg = { 432 .msg_name = svc_addr(rqstp), 433 .msg_control = cmh, 434 .msg_controllen = sizeof(buffer), 435 .msg_flags = MSG_DONTWAIT, 436 }; 437 438 if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags)) 439 /* udp sockets need large rcvbuf as all pending 440 * requests are still in that buffer. sndbuf must 441 * also be large enough that there is enough space 442 * for one reply per thread. We count all threads 443 * rather than threads in a particular pool, which 444 * provides an upper bound on the number of threads 445 * which will access the socket. 446 */ 447 svc_sock_setbufsize(svsk->sk_sock, 448 (serv->sv_nrthreads+3) * serv->sv_max_mesg, 449 (serv->sv_nrthreads+3) * serv->sv_max_mesg); 450 451 clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 452 skb = NULL; 453 err = kernel_recvmsg(svsk->sk_sock, &msg, NULL, 454 0, 0, MSG_PEEK | MSG_DONTWAIT); 455 if (err >= 0) 456 skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err); 457 458 if (skb == NULL) { 459 if (err != -EAGAIN) { 460 /* possibly an icmp error */ 461 dprintk("svc: recvfrom returned error %d\n", -err); 462 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 463 } 464 svc_xprt_received(&svsk->sk_xprt); 465 return -EAGAIN; 466 } 467 len = svc_addr_len(svc_addr(rqstp)); 468 if (len < 0) 469 return len; 470 rqstp->rq_addrlen = len; 471 if (skb->tstamp.tv64 == 0) { 472 skb->tstamp = ktime_get_real(); 473 /* Don't enable netstamp, sunrpc doesn't 474 need that much accuracy */ 475 } 476 svsk->sk_sk->sk_stamp = skb->tstamp; 477 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* there may be more data... */ 478 479 /* 480 * Maybe more packets - kick another thread ASAP. 481 */ 482 svc_xprt_received(&svsk->sk_xprt); 483 484 len = skb->len - sizeof(struct udphdr); 485 rqstp->rq_arg.len = len; 486 487 rqstp->rq_prot = IPPROTO_UDP; 488 489 if (cmh->cmsg_level != IPPROTO_IP || 490 cmh->cmsg_type != IP_PKTINFO) { 491 if (net_ratelimit()) 492 printk("rpcsvc: received unknown control message:" 493 "%d/%d\n", 494 cmh->cmsg_level, cmh->cmsg_type); 495 skb_free_datagram(svsk->sk_sk, skb); 496 return 0; 497 } 498 svc_udp_get_dest_address(rqstp, cmh); 499 500 if (skb_is_nonlinear(skb)) { 501 /* we have to copy */ 502 local_bh_disable(); 503 if (csum_partial_copy_to_xdr(&rqstp->rq_arg, skb)) { 504 local_bh_enable(); 505 /* checksum error */ 506 skb_free_datagram(svsk->sk_sk, skb); 507 return 0; 508 } 509 local_bh_enable(); 510 skb_free_datagram(svsk->sk_sk, skb); 511 } else { 512 /* we can use it in-place */ 513 rqstp->rq_arg.head[0].iov_base = skb->data + 514 sizeof(struct udphdr); 515 rqstp->rq_arg.head[0].iov_len = len; 516 if (skb_checksum_complete(skb)) { 517 skb_free_datagram(svsk->sk_sk, skb); 518 return 0; 519 } 520 rqstp->rq_xprt_ctxt = skb; 521 } 522 523 rqstp->rq_arg.page_base = 0; 524 if (len <= rqstp->rq_arg.head[0].iov_len) { 525 rqstp->rq_arg.head[0].iov_len = len; 526 rqstp->rq_arg.page_len = 0; 527 rqstp->rq_respages = rqstp->rq_pages+1; 528 } else { 529 rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len; 530 rqstp->rq_respages = rqstp->rq_pages + 1 + 531 DIV_ROUND_UP(rqstp->rq_arg.page_len, PAGE_SIZE); 532 } 533 534 if (serv->sv_stats) 535 serv->sv_stats->netudpcnt++; 536 537 return len; 538 } 539 540 static int 541 svc_udp_sendto(struct svc_rqst *rqstp) 542 { 543 int error; 544 545 error = svc_sendto(rqstp, &rqstp->rq_res); 546 if (error == -ECONNREFUSED) 547 /* ICMP error on earlier request. */ 548 error = svc_sendto(rqstp, &rqstp->rq_res); 549 550 return error; 551 } 552 553 static void svc_udp_prep_reply_hdr(struct svc_rqst *rqstp) 554 { 555 } 556 557 static int svc_udp_has_wspace(struct svc_xprt *xprt) 558 { 559 struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); 560 struct svc_serv *serv = xprt->xpt_server; 561 unsigned long required; 562 563 /* 564 * Set the SOCK_NOSPACE flag before checking the available 565 * sock space. 566 */ 567 set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); 568 required = atomic_read(&svsk->sk_xprt.xpt_reserved) + serv->sv_max_mesg; 569 if (required*2 > sock_wspace(svsk->sk_sk)) 570 return 0; 571 clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); 572 return 1; 573 } 574 575 static struct svc_xprt *svc_udp_accept(struct svc_xprt *xprt) 576 { 577 BUG(); 578 return NULL; 579 } 580 581 static struct svc_xprt *svc_udp_create(struct svc_serv *serv, 582 struct sockaddr *sa, int salen, 583 int flags) 584 { 585 return svc_create_socket(serv, IPPROTO_UDP, sa, salen, flags); 586 } 587 588 static struct svc_xprt_ops svc_udp_ops = { 589 .xpo_create = svc_udp_create, 590 .xpo_recvfrom = svc_udp_recvfrom, 591 .xpo_sendto = svc_udp_sendto, 592 .xpo_release_rqst = svc_release_skb, 593 .xpo_detach = svc_sock_detach, 594 .xpo_free = svc_sock_free, 595 .xpo_prep_reply_hdr = svc_udp_prep_reply_hdr, 596 .xpo_has_wspace = svc_udp_has_wspace, 597 .xpo_accept = svc_udp_accept, 598 }; 599 600 static struct svc_xprt_class svc_udp_class = { 601 .xcl_name = "udp", 602 .xcl_owner = THIS_MODULE, 603 .xcl_ops = &svc_udp_ops, 604 .xcl_max_payload = RPCSVC_MAXPAYLOAD_UDP, 605 }; 606 607 static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv) 608 { 609 int one = 1; 610 mm_segment_t oldfs; 611 612 svc_xprt_init(&svc_udp_class, &svsk->sk_xprt, serv); 613 clear_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags); 614 svsk->sk_sk->sk_data_ready = svc_udp_data_ready; 615 svsk->sk_sk->sk_write_space = svc_write_space; 616 617 /* initialise setting must have enough space to 618 * receive and respond to one request. 619 * svc_udp_recvfrom will re-adjust if necessary 620 */ 621 svc_sock_setbufsize(svsk->sk_sock, 622 3 * svsk->sk_xprt.xpt_server->sv_max_mesg, 623 3 * svsk->sk_xprt.xpt_server->sv_max_mesg); 624 625 /* data might have come in before data_ready set up */ 626 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 627 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); 628 629 oldfs = get_fs(); 630 set_fs(KERNEL_DS); 631 /* make sure we get destination address info */ 632 svsk->sk_sock->ops->setsockopt(svsk->sk_sock, IPPROTO_IP, IP_PKTINFO, 633 (char __user *)&one, sizeof(one)); 634 set_fs(oldfs); 635 } 636 637 /* 638 * A data_ready event on a listening socket means there's a connection 639 * pending. Do not use state_change as a substitute for it. 640 */ 641 static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused) 642 { 643 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; 644 645 dprintk("svc: socket %p TCP (listen) state change %d\n", 646 sk, sk->sk_state); 647 648 /* 649 * This callback may called twice when a new connection 650 * is established as a child socket inherits everything 651 * from a parent LISTEN socket. 652 * 1) data_ready method of the parent socket will be called 653 * when one of child sockets become ESTABLISHED. 654 * 2) data_ready method of the child socket may be called 655 * when it receives data before the socket is accepted. 656 * In case of 2, we should ignore it silently. 657 */ 658 if (sk->sk_state == TCP_LISTEN) { 659 if (svsk) { 660 set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); 661 svc_xprt_enqueue(&svsk->sk_xprt); 662 } else 663 printk("svc: socket %p: no user data\n", sk); 664 } 665 666 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 667 wake_up_interruptible_all(sk->sk_sleep); 668 } 669 670 /* 671 * A state change on a connected socket means it's dying or dead. 672 */ 673 static void svc_tcp_state_change(struct sock *sk) 674 { 675 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; 676 677 dprintk("svc: socket %p TCP (connected) state change %d (svsk %p)\n", 678 sk, sk->sk_state, sk->sk_user_data); 679 680 if (!svsk) 681 printk("svc: socket %p: no user data\n", sk); 682 else { 683 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); 684 svc_xprt_enqueue(&svsk->sk_xprt); 685 } 686 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 687 wake_up_interruptible_all(sk->sk_sleep); 688 } 689 690 static void svc_tcp_data_ready(struct sock *sk, int count) 691 { 692 struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; 693 694 dprintk("svc: socket %p TCP data ready (svsk %p)\n", 695 sk, sk->sk_user_data); 696 if (svsk) { 697 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 698 svc_xprt_enqueue(&svsk->sk_xprt); 699 } 700 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 701 wake_up_interruptible(sk->sk_sleep); 702 } 703 704 /* 705 * Accept a TCP connection 706 */ 707 static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt) 708 { 709 struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); 710 struct sockaddr_storage addr; 711 struct sockaddr *sin = (struct sockaddr *) &addr; 712 struct svc_serv *serv = svsk->sk_xprt.xpt_server; 713 struct socket *sock = svsk->sk_sock; 714 struct socket *newsock; 715 struct svc_sock *newsvsk; 716 int err, slen; 717 RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); 718 719 dprintk("svc: tcp_accept %p sock %p\n", svsk, sock); 720 if (!sock) 721 return NULL; 722 723 clear_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); 724 err = kernel_accept(sock, &newsock, O_NONBLOCK); 725 if (err < 0) { 726 if (err == -ENOMEM) 727 printk(KERN_WARNING "%s: no more sockets!\n", 728 serv->sv_name); 729 else if (err != -EAGAIN && net_ratelimit()) 730 printk(KERN_WARNING "%s: accept failed (err %d)!\n", 731 serv->sv_name, -err); 732 return NULL; 733 } 734 set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); 735 736 err = kernel_getpeername(newsock, sin, &slen); 737 if (err < 0) { 738 if (net_ratelimit()) 739 printk(KERN_WARNING "%s: peername failed (err %d)!\n", 740 serv->sv_name, -err); 741 goto failed; /* aborted connection or whatever */ 742 } 743 744 /* Ideally, we would want to reject connections from unauthorized 745 * hosts here, but when we get encryption, the IP of the host won't 746 * tell us anything. For now just warn about unpriv connections. 747 */ 748 if (!svc_port_is_privileged(sin)) { 749 dprintk(KERN_WARNING 750 "%s: connect from unprivileged port: %s\n", 751 serv->sv_name, 752 __svc_print_addr(sin, buf, sizeof(buf))); 753 } 754 dprintk("%s: connect from %s\n", serv->sv_name, 755 __svc_print_addr(sin, buf, sizeof(buf))); 756 757 /* make sure that a write doesn't block forever when 758 * low on memory 759 */ 760 newsock->sk->sk_sndtimeo = HZ*30; 761 762 if (!(newsvsk = svc_setup_socket(serv, newsock, &err, 763 (SVC_SOCK_ANONYMOUS | SVC_SOCK_TEMPORARY)))) 764 goto failed; 765 svc_xprt_set_remote(&newsvsk->sk_xprt, sin, slen); 766 err = kernel_getsockname(newsock, sin, &slen); 767 if (unlikely(err < 0)) { 768 dprintk("svc_tcp_accept: kernel_getsockname error %d\n", -err); 769 slen = offsetof(struct sockaddr, sa_data); 770 } 771 svc_xprt_set_local(&newsvsk->sk_xprt, sin, slen); 772 773 if (serv->sv_stats) 774 serv->sv_stats->nettcpconn++; 775 776 return &newsvsk->sk_xprt; 777 778 failed: 779 sock_release(newsock); 780 return NULL; 781 } 782 783 /* 784 * Receive data from a TCP socket. 785 */ 786 static int svc_tcp_recvfrom(struct svc_rqst *rqstp) 787 { 788 struct svc_sock *svsk = 789 container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); 790 struct svc_serv *serv = svsk->sk_xprt.xpt_server; 791 int len; 792 struct kvec *vec; 793 int pnum, vlen; 794 795 dprintk("svc: tcp_recv %p data %d conn %d close %d\n", 796 svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags), 797 test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags), 798 test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags)); 799 800 if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags)) 801 /* sndbuf needs to have room for one request 802 * per thread, otherwise we can stall even when the 803 * network isn't a bottleneck. 804 * 805 * We count all threads rather than threads in a 806 * particular pool, which provides an upper bound 807 * on the number of threads which will access the socket. 808 * 809 * rcvbuf just needs to be able to hold a few requests. 810 * Normally they will be removed from the queue 811 * as soon a a complete request arrives. 812 */ 813 svc_sock_setbufsize(svsk->sk_sock, 814 (serv->sv_nrthreads+3) * serv->sv_max_mesg, 815 3 * serv->sv_max_mesg); 816 817 clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 818 819 /* Receive data. If we haven't got the record length yet, get 820 * the next four bytes. Otherwise try to gobble up as much as 821 * possible up to the complete record length. 822 */ 823 if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) { 824 int want = sizeof(rpc_fraghdr) - svsk->sk_tcplen; 825 struct kvec iov; 826 827 iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen; 828 iov.iov_len = want; 829 if ((len = svc_recvfrom(rqstp, &iov, 1, want)) < 0) 830 goto error; 831 svsk->sk_tcplen += len; 832 833 if (len < want) { 834 dprintk("svc: short recvfrom while reading record " 835 "length (%d of %d)\n", len, want); 836 svc_xprt_received(&svsk->sk_xprt); 837 return -EAGAIN; /* record header not complete */ 838 } 839 840 svsk->sk_reclen = ntohl(svsk->sk_reclen); 841 if (!(svsk->sk_reclen & RPC_LAST_STREAM_FRAGMENT)) { 842 /* FIXME: technically, a record can be fragmented, 843 * and non-terminal fragments will not have the top 844 * bit set in the fragment length header. 845 * But apparently no known nfs clients send fragmented 846 * records. */ 847 if (net_ratelimit()) 848 printk(KERN_NOTICE "RPC: multiple fragments " 849 "per record not supported\n"); 850 goto err_delete; 851 } 852 svsk->sk_reclen &= RPC_FRAGMENT_SIZE_MASK; 853 dprintk("svc: TCP record, %d bytes\n", svsk->sk_reclen); 854 if (svsk->sk_reclen > serv->sv_max_mesg) { 855 if (net_ratelimit()) 856 printk(KERN_NOTICE "RPC: " 857 "fragment too large: 0x%08lx\n", 858 (unsigned long)svsk->sk_reclen); 859 goto err_delete; 860 } 861 } 862 863 /* Check whether enough data is available */ 864 len = svc_recv_available(svsk); 865 if (len < 0) 866 goto error; 867 868 if (len < svsk->sk_reclen) { 869 dprintk("svc: incomplete TCP record (%d of %d)\n", 870 len, svsk->sk_reclen); 871 svc_xprt_received(&svsk->sk_xprt); 872 return -EAGAIN; /* record not complete */ 873 } 874 len = svsk->sk_reclen; 875 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 876 877 vec = rqstp->rq_vec; 878 vec[0] = rqstp->rq_arg.head[0]; 879 vlen = PAGE_SIZE; 880 pnum = 1; 881 while (vlen < len) { 882 vec[pnum].iov_base = page_address(rqstp->rq_pages[pnum]); 883 vec[pnum].iov_len = PAGE_SIZE; 884 pnum++; 885 vlen += PAGE_SIZE; 886 } 887 rqstp->rq_respages = &rqstp->rq_pages[pnum]; 888 889 /* Now receive data */ 890 len = svc_recvfrom(rqstp, vec, pnum, len); 891 if (len < 0) 892 goto error; 893 894 dprintk("svc: TCP complete record (%d bytes)\n", len); 895 rqstp->rq_arg.len = len; 896 rqstp->rq_arg.page_base = 0; 897 if (len <= rqstp->rq_arg.head[0].iov_len) { 898 rqstp->rq_arg.head[0].iov_len = len; 899 rqstp->rq_arg.page_len = 0; 900 } else { 901 rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len; 902 } 903 904 rqstp->rq_xprt_ctxt = NULL; 905 rqstp->rq_prot = IPPROTO_TCP; 906 907 /* Reset TCP read info */ 908 svsk->sk_reclen = 0; 909 svsk->sk_tcplen = 0; 910 911 svc_xprt_copy_addrs(rqstp, &svsk->sk_xprt); 912 svc_xprt_received(&svsk->sk_xprt); 913 if (serv->sv_stats) 914 serv->sv_stats->nettcpcnt++; 915 916 return len; 917 918 err_delete: 919 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); 920 return -EAGAIN; 921 922 error: 923 if (len == -EAGAIN) { 924 dprintk("RPC: TCP recvfrom got EAGAIN\n"); 925 svc_xprt_received(&svsk->sk_xprt); 926 } else { 927 printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", 928 svsk->sk_xprt.xpt_server->sv_name, -len); 929 goto err_delete; 930 } 931 932 return len; 933 } 934 935 /* 936 * Send out data on TCP socket. 937 */ 938 static int svc_tcp_sendto(struct svc_rqst *rqstp) 939 { 940 struct xdr_buf *xbufp = &rqstp->rq_res; 941 int sent; 942 __be32 reclen; 943 944 /* Set up the first element of the reply kvec. 945 * Any other kvecs that may be in use have been taken 946 * care of by the server implementation itself. 947 */ 948 reclen = htonl(0x80000000|((xbufp->len ) - 4)); 949 memcpy(xbufp->head[0].iov_base, &reclen, 4); 950 951 if (test_bit(XPT_DEAD, &rqstp->rq_xprt->xpt_flags)) 952 return -ENOTCONN; 953 954 sent = svc_sendto(rqstp, &rqstp->rq_res); 955 if (sent != xbufp->len) { 956 printk(KERN_NOTICE 957 "rpc-srv/tcp: %s: %s %d when sending %d bytes " 958 "- shutting down socket\n", 959 rqstp->rq_xprt->xpt_server->sv_name, 960 (sent<0)?"got error":"sent only", 961 sent, xbufp->len); 962 set_bit(XPT_CLOSE, &rqstp->rq_xprt->xpt_flags); 963 svc_xprt_enqueue(rqstp->rq_xprt); 964 sent = -EAGAIN; 965 } 966 return sent; 967 } 968 969 /* 970 * Setup response header. TCP has a 4B record length field. 971 */ 972 static void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp) 973 { 974 struct kvec *resv = &rqstp->rq_res.head[0]; 975 976 /* tcp needs a space for the record length... */ 977 svc_putnl(resv, 0); 978 } 979 980 static int svc_tcp_has_wspace(struct svc_xprt *xprt) 981 { 982 struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); 983 struct svc_serv *serv = svsk->sk_xprt.xpt_server; 984 int required; 985 int wspace; 986 987 /* 988 * Set the SOCK_NOSPACE flag before checking the available 989 * sock space. 990 */ 991 set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); 992 required = atomic_read(&svsk->sk_xprt.xpt_reserved) + serv->sv_max_mesg; 993 wspace = sk_stream_wspace(svsk->sk_sk); 994 995 if (wspace < sk_stream_min_wspace(svsk->sk_sk)) 996 return 0; 997 if (required * 2 > wspace) 998 return 0; 999 1000 clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags); 1001 return 1; 1002 } 1003 1004 static struct svc_xprt *svc_tcp_create(struct svc_serv *serv, 1005 struct sockaddr *sa, int salen, 1006 int flags) 1007 { 1008 return svc_create_socket(serv, IPPROTO_TCP, sa, salen, flags); 1009 } 1010 1011 static struct svc_xprt_ops svc_tcp_ops = { 1012 .xpo_create = svc_tcp_create, 1013 .xpo_recvfrom = svc_tcp_recvfrom, 1014 .xpo_sendto = svc_tcp_sendto, 1015 .xpo_release_rqst = svc_release_skb, 1016 .xpo_detach = svc_tcp_sock_detach, 1017 .xpo_free = svc_sock_free, 1018 .xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr, 1019 .xpo_has_wspace = svc_tcp_has_wspace, 1020 .xpo_accept = svc_tcp_accept, 1021 }; 1022 1023 static struct svc_xprt_class svc_tcp_class = { 1024 .xcl_name = "tcp", 1025 .xcl_owner = THIS_MODULE, 1026 .xcl_ops = &svc_tcp_ops, 1027 .xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP, 1028 }; 1029 1030 void svc_init_xprt_sock(void) 1031 { 1032 svc_reg_xprt_class(&svc_tcp_class); 1033 svc_reg_xprt_class(&svc_udp_class); 1034 } 1035 1036 void svc_cleanup_xprt_sock(void) 1037 { 1038 svc_unreg_xprt_class(&svc_tcp_class); 1039 svc_unreg_xprt_class(&svc_udp_class); 1040 } 1041 1042 static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) 1043 { 1044 struct sock *sk = svsk->sk_sk; 1045 1046 svc_xprt_init(&svc_tcp_class, &svsk->sk_xprt, serv); 1047 set_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags); 1048 if (sk->sk_state == TCP_LISTEN) { 1049 dprintk("setting up TCP socket for listening\n"); 1050 set_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags); 1051 sk->sk_data_ready = svc_tcp_listen_data_ready; 1052 set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); 1053 } else { 1054 dprintk("setting up TCP socket for reading\n"); 1055 sk->sk_state_change = svc_tcp_state_change; 1056 sk->sk_data_ready = svc_tcp_data_ready; 1057 sk->sk_write_space = svc_write_space; 1058 1059 svsk->sk_reclen = 0; 1060 svsk->sk_tcplen = 0; 1061 1062 tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; 1063 1064 /* initialise setting must have enough space to 1065 * receive and respond to one request. 1066 * svc_tcp_recvfrom will re-adjust if necessary 1067 */ 1068 svc_sock_setbufsize(svsk->sk_sock, 1069 3 * svsk->sk_xprt.xpt_server->sv_max_mesg, 1070 3 * svsk->sk_xprt.xpt_server->sv_max_mesg); 1071 1072 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); 1073 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 1074 if (sk->sk_state != TCP_ESTABLISHED) 1075 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); 1076 } 1077 } 1078 1079 void svc_sock_update_bufs(struct svc_serv *serv) 1080 { 1081 /* 1082 * The number of server threads has changed. Update 1083 * rcvbuf and sndbuf accordingly on all sockets 1084 */ 1085 struct list_head *le; 1086 1087 spin_lock_bh(&serv->sv_lock); 1088 list_for_each(le, &serv->sv_permsocks) { 1089 struct svc_sock *svsk = 1090 list_entry(le, struct svc_sock, sk_xprt.xpt_list); 1091 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); 1092 } 1093 list_for_each(le, &serv->sv_tempsocks) { 1094 struct svc_sock *svsk = 1095 list_entry(le, struct svc_sock, sk_xprt.xpt_list); 1096 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); 1097 } 1098 spin_unlock_bh(&serv->sv_lock); 1099 } 1100 EXPORT_SYMBOL_GPL(svc_sock_update_bufs); 1101 1102 /* 1103 * Initialize socket for RPC use and create svc_sock struct 1104 * XXX: May want to setsockopt SO_SNDBUF and SO_RCVBUF. 1105 */ 1106 static struct svc_sock *svc_setup_socket(struct svc_serv *serv, 1107 struct socket *sock, 1108 int *errp, int flags) 1109 { 1110 struct svc_sock *svsk; 1111 struct sock *inet; 1112 int pmap_register = !(flags & SVC_SOCK_ANONYMOUS); 1113 1114 dprintk("svc: svc_setup_socket %p\n", sock); 1115 if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) { 1116 *errp = -ENOMEM; 1117 return NULL; 1118 } 1119 1120 inet = sock->sk; 1121 1122 /* Register socket with portmapper */ 1123 if (*errp >= 0 && pmap_register) 1124 *errp = svc_register(serv, inet->sk_family, inet->sk_protocol, 1125 ntohs(inet_sk(inet)->sport)); 1126 1127 if (*errp < 0) { 1128 kfree(svsk); 1129 return NULL; 1130 } 1131 1132 inet->sk_user_data = svsk; 1133 svsk->sk_sock = sock; 1134 svsk->sk_sk = inet; 1135 svsk->sk_ostate = inet->sk_state_change; 1136 svsk->sk_odata = inet->sk_data_ready; 1137 svsk->sk_owspace = inet->sk_write_space; 1138 1139 /* Initialize the socket */ 1140 if (sock->type == SOCK_DGRAM) 1141 svc_udp_init(svsk, serv); 1142 else 1143 svc_tcp_init(svsk, serv); 1144 1145 dprintk("svc: svc_setup_socket created %p (inet %p)\n", 1146 svsk, svsk->sk_sk); 1147 1148 return svsk; 1149 } 1150 1151 int svc_addsock(struct svc_serv *serv, 1152 int fd, 1153 char *name_return) 1154 { 1155 int err = 0; 1156 struct socket *so = sockfd_lookup(fd, &err); 1157 struct svc_sock *svsk = NULL; 1158 1159 if (!so) 1160 return err; 1161 if (so->sk->sk_family != AF_INET) 1162 err = -EAFNOSUPPORT; 1163 else if (so->sk->sk_protocol != IPPROTO_TCP && 1164 so->sk->sk_protocol != IPPROTO_UDP) 1165 err = -EPROTONOSUPPORT; 1166 else if (so->state > SS_UNCONNECTED) 1167 err = -EISCONN; 1168 else { 1169 if (!try_module_get(THIS_MODULE)) 1170 err = -ENOENT; 1171 else 1172 svsk = svc_setup_socket(serv, so, &err, 1173 SVC_SOCK_DEFAULTS); 1174 if (svsk) { 1175 struct sockaddr_storage addr; 1176 struct sockaddr *sin = (struct sockaddr *)&addr; 1177 int salen; 1178 if (kernel_getsockname(svsk->sk_sock, sin, &salen) == 0) 1179 svc_xprt_set_local(&svsk->sk_xprt, sin, salen); 1180 clear_bit(XPT_TEMP, &svsk->sk_xprt.xpt_flags); 1181 spin_lock_bh(&serv->sv_lock); 1182 list_add(&svsk->sk_xprt.xpt_list, &serv->sv_permsocks); 1183 spin_unlock_bh(&serv->sv_lock); 1184 svc_xprt_received(&svsk->sk_xprt); 1185 err = 0; 1186 } else 1187 module_put(THIS_MODULE); 1188 } 1189 if (err) { 1190 sockfd_put(so); 1191 return err; 1192 } 1193 return one_sock_name(name_return, svsk); 1194 } 1195 EXPORT_SYMBOL_GPL(svc_addsock); 1196 1197 /* 1198 * Create socket for RPC service. 1199 */ 1200 static struct svc_xprt *svc_create_socket(struct svc_serv *serv, 1201 int protocol, 1202 struct sockaddr *sin, int len, 1203 int flags) 1204 { 1205 struct svc_sock *svsk; 1206 struct socket *sock; 1207 int error; 1208 int type; 1209 struct sockaddr_storage addr; 1210 struct sockaddr *newsin = (struct sockaddr *)&addr; 1211 int newlen; 1212 int family; 1213 int val; 1214 RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); 1215 1216 dprintk("svc: svc_create_socket(%s, %d, %s)\n", 1217 serv->sv_program->pg_name, protocol, 1218 __svc_print_addr(sin, buf, sizeof(buf))); 1219 1220 if (protocol != IPPROTO_UDP && protocol != IPPROTO_TCP) { 1221 printk(KERN_WARNING "svc: only UDP and TCP " 1222 "sockets supported\n"); 1223 return ERR_PTR(-EINVAL); 1224 } 1225 1226 type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM; 1227 switch (sin->sa_family) { 1228 case AF_INET6: 1229 family = PF_INET6; 1230 break; 1231 case AF_INET: 1232 family = PF_INET; 1233 break; 1234 default: 1235 return ERR_PTR(-EINVAL); 1236 } 1237 1238 error = sock_create_kern(family, type, protocol, &sock); 1239 if (error < 0) 1240 return ERR_PTR(error); 1241 1242 svc_reclassify_socket(sock); 1243 1244 /* 1245 * If this is an PF_INET6 listener, we want to avoid 1246 * getting requests from IPv4 remotes. Those should 1247 * be shunted to a PF_INET listener via rpcbind. 1248 */ 1249 val = 1; 1250 if (family == PF_INET6) 1251 kernel_setsockopt(sock, SOL_IPV6, IPV6_V6ONLY, 1252 (char *)&val, sizeof(val)); 1253 1254 if (type == SOCK_STREAM) 1255 sock->sk->sk_reuse = 1; /* allow address reuse */ 1256 error = kernel_bind(sock, sin, len); 1257 if (error < 0) 1258 goto bummer; 1259 1260 newlen = len; 1261 error = kernel_getsockname(sock, newsin, &newlen); 1262 if (error < 0) 1263 goto bummer; 1264 1265 if (protocol == IPPROTO_TCP) { 1266 if ((error = kernel_listen(sock, 64)) < 0) 1267 goto bummer; 1268 } 1269 1270 if ((svsk = svc_setup_socket(serv, sock, &error, flags)) != NULL) { 1271 svc_xprt_set_local(&svsk->sk_xprt, newsin, newlen); 1272 return (struct svc_xprt *)svsk; 1273 } 1274 1275 bummer: 1276 dprintk("svc: svc_create_socket error = %d\n", -error); 1277 sock_release(sock); 1278 return ERR_PTR(error); 1279 } 1280 1281 /* 1282 * Detach the svc_sock from the socket so that no 1283 * more callbacks occur. 1284 */ 1285 static void svc_sock_detach(struct svc_xprt *xprt) 1286 { 1287 struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); 1288 struct sock *sk = svsk->sk_sk; 1289 1290 dprintk("svc: svc_sock_detach(%p)\n", svsk); 1291 1292 /* put back the old socket callbacks */ 1293 sk->sk_state_change = svsk->sk_ostate; 1294 sk->sk_data_ready = svsk->sk_odata; 1295 sk->sk_write_space = svsk->sk_owspace; 1296 1297 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 1298 wake_up_interruptible(sk->sk_sleep); 1299 } 1300 1301 /* 1302 * Disconnect the socket, and reset the callbacks 1303 */ 1304 static void svc_tcp_sock_detach(struct svc_xprt *xprt) 1305 { 1306 struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); 1307 1308 dprintk("svc: svc_tcp_sock_detach(%p)\n", svsk); 1309 1310 svc_sock_detach(xprt); 1311 1312 if (!test_bit(XPT_LISTENER, &xprt->xpt_flags)) 1313 kernel_sock_shutdown(svsk->sk_sock, SHUT_RDWR); 1314 } 1315 1316 /* 1317 * Free the svc_sock's socket resources and the svc_sock itself. 1318 */ 1319 static void svc_sock_free(struct svc_xprt *xprt) 1320 { 1321 struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); 1322 dprintk("svc: svc_sock_free(%p)\n", svsk); 1323 1324 if (svsk->sk_sock->file) 1325 sockfd_put(svsk->sk_sock); 1326 else 1327 sock_release(svsk->sk_sock); 1328 kfree(svsk); 1329 } 1330