1 /* 2 * linux/net/sunrpc/xprtsock.c 3 * 4 * Client-side transport implementation for sockets. 5 * 6 * TCP callback races fixes (C) 1998 Red Hat 7 * TCP send fixes (C) 1998 Red Hat 8 * TCP NFS related read + write fixes 9 * (C) 1999 Dave Airlie, University of Limerick, Ireland <airlied@linux.ie> 10 * 11 * Rewrite of larges part of the code in order to stabilize TCP stuff. 12 * Fix behaviour when socket buffer is full. 13 * (C) 1999 Trond Myklebust <trond.myklebust@fys.uio.no> 14 * 15 * IP socket transport implementation, (C) 2005 Chuck Lever <cel@netapp.com> 16 * 17 * IPv6 support contributed by Gilles Quillard, Bull Open Source, 2005. 18 * <gilles.quillard@bull.net> 19 */ 20 21 #include <linux/types.h> 22 #include <linux/slab.h> 23 #include <linux/module.h> 24 #include <linux/capability.h> 25 #include <linux/pagemap.h> 26 #include <linux/errno.h> 27 #include <linux/socket.h> 28 #include <linux/in.h> 29 #include <linux/net.h> 30 #include <linux/mm.h> 31 #include <linux/udp.h> 32 #include <linux/tcp.h> 33 #include <linux/sunrpc/clnt.h> 34 #include <linux/sunrpc/sched.h> 35 #include <linux/sunrpc/svcsock.h> 36 #include <linux/sunrpc/xprtsock.h> 37 #include <linux/file.h> 38 #ifdef CONFIG_NFS_V4_1 39 #include <linux/sunrpc/bc_xprt.h> 40 #endif 41 42 #include <net/sock.h> 43 #include <net/checksum.h> 44 #include <net/udp.h> 45 #include <net/tcp.h> 46 47 #include "sunrpc.h" 48 /* 49 * xprtsock tunables 50 */ 51 unsigned int xprt_udp_slot_table_entries = RPC_DEF_SLOT_TABLE; 52 unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE; 53 54 unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT; 55 unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT; 56 57 #define XS_TCP_LINGER_TO (15U * HZ) 58 static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO; 59 60 /* 61 * We can register our own files under /proc/sys/sunrpc by 62 * calling register_sysctl_table() again. The files in that 63 * directory become the union of all files registered there. 64 * 65 * We simply need to make sure that we don't collide with 66 * someone else's file names! 67 */ 68 69 #ifdef RPC_DEBUG 70 71 static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE; 72 static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE; 73 static unsigned int xprt_min_resvport_limit = RPC_MIN_RESVPORT; 74 static unsigned int xprt_max_resvport_limit = RPC_MAX_RESVPORT; 75 76 static struct ctl_table_header *sunrpc_table_header; 77 78 /* 79 * FIXME: changing the UDP slot table size should also resize the UDP 80 * socket buffers for existing UDP transports 81 */ 82 static ctl_table xs_tunables_table[] = { 83 { 84 .procname = "udp_slot_table_entries", 85 .data = &xprt_udp_slot_table_entries, 86 .maxlen = sizeof(unsigned int), 87 .mode = 0644, 88 .proc_handler = proc_dointvec_minmax, 89 .extra1 = &min_slot_table_size, 90 .extra2 = &max_slot_table_size 91 }, 92 { 93 .procname = "tcp_slot_table_entries", 94 .data = &xprt_tcp_slot_table_entries, 95 .maxlen = sizeof(unsigned int), 96 .mode = 0644, 97 .proc_handler = proc_dointvec_minmax, 98 .extra1 = &min_slot_table_size, 99 .extra2 = &max_slot_table_size 100 }, 101 { 102 .procname = "min_resvport", 103 .data = &xprt_min_resvport, 104 .maxlen = sizeof(unsigned int), 105 .mode = 0644, 106 .proc_handler = proc_dointvec_minmax, 107 .extra1 = &xprt_min_resvport_limit, 108 .extra2 = &xprt_max_resvport_limit 109 }, 110 { 111 .procname = "max_resvport", 112 .data = &xprt_max_resvport, 113 .maxlen = sizeof(unsigned int), 114 .mode = 0644, 115 .proc_handler = proc_dointvec_minmax, 116 .extra1 = &xprt_min_resvport_limit, 117 .extra2 = &xprt_max_resvport_limit 118 }, 119 { 120 .procname = "tcp_fin_timeout", 121 .data = &xs_tcp_fin_timeout, 122 .maxlen = sizeof(xs_tcp_fin_timeout), 123 .mode = 0644, 124 .proc_handler = proc_dointvec_jiffies, 125 }, 126 { }, 127 }; 128 129 static ctl_table sunrpc_table[] = { 130 { 131 .procname = "sunrpc", 132 .mode = 0555, 133 .child = xs_tunables_table 134 }, 135 { }, 136 }; 137 138 #endif 139 140 /* 141 * Wait duration for a reply from the RPC portmapper. 142 */ 143 #define XS_BIND_TO (60U * HZ) 144 145 /* 146 * Delay if a UDP socket connect error occurs. This is most likely some 147 * kind of resource problem on the local host. 148 */ 149 #define XS_UDP_REEST_TO (2U * HZ) 150 151 /* 152 * The reestablish timeout allows clients to delay for a bit before attempting 153 * to reconnect to a server that just dropped our connection. 154 * 155 * We implement an exponential backoff when trying to reestablish a TCP 156 * transport connection with the server. Some servers like to drop a TCP 157 * connection when they are overworked, so we start with a short timeout and 158 * increase over time if the server is down or not responding. 159 */ 160 #define XS_TCP_INIT_REEST_TO (3U * HZ) 161 #define XS_TCP_MAX_REEST_TO (5U * 60 * HZ) 162 163 /* 164 * TCP idle timeout; client drops the transport socket if it is idle 165 * for this long. Note that we also timeout UDP sockets to prevent 166 * holding port numbers when there is no RPC traffic. 167 */ 168 #define XS_IDLE_DISC_TO (5U * 60 * HZ) 169 170 #ifdef RPC_DEBUG 171 # undef RPC_DEBUG_DATA 172 # define RPCDBG_FACILITY RPCDBG_TRANS 173 #endif 174 175 #ifdef RPC_DEBUG_DATA 176 static void xs_pktdump(char *msg, u32 *packet, unsigned int count) 177 { 178 u8 *buf = (u8 *) packet; 179 int j; 180 181 dprintk("RPC: %s\n", msg); 182 for (j = 0; j < count && j < 128; j += 4) { 183 if (!(j & 31)) { 184 if (j) 185 dprintk("\n"); 186 dprintk("0x%04x ", j); 187 } 188 dprintk("%02x%02x%02x%02x ", 189 buf[j], buf[j+1], buf[j+2], buf[j+3]); 190 } 191 dprintk("\n"); 192 } 193 #else 194 static inline void xs_pktdump(char *msg, u32 *packet, unsigned int count) 195 { 196 /* NOP */ 197 } 198 #endif 199 200 struct sock_xprt { 201 struct rpc_xprt xprt; 202 203 /* 204 * Network layer 205 */ 206 struct socket * sock; 207 struct sock * inet; 208 209 /* 210 * State of TCP reply receive 211 */ 212 __be32 tcp_fraghdr, 213 tcp_xid; 214 215 u32 tcp_offset, 216 tcp_reclen; 217 218 unsigned long tcp_copied, 219 tcp_flags; 220 221 /* 222 * Connection of transports 223 */ 224 struct delayed_work connect_worker; 225 struct sockaddr_storage srcaddr; 226 unsigned short srcport; 227 228 /* 229 * UDP socket buffer size parameters 230 */ 231 size_t rcvsize, 232 sndsize; 233 234 /* 235 * Saved socket callback addresses 236 */ 237 void (*old_data_ready)(struct sock *, int); 238 void (*old_state_change)(struct sock *); 239 void (*old_write_space)(struct sock *); 240 void (*old_error_report)(struct sock *); 241 }; 242 243 /* 244 * TCP receive state flags 245 */ 246 #define TCP_RCV_LAST_FRAG (1UL << 0) 247 #define TCP_RCV_COPY_FRAGHDR (1UL << 1) 248 #define TCP_RCV_COPY_XID (1UL << 2) 249 #define TCP_RCV_COPY_DATA (1UL << 3) 250 #define TCP_RCV_READ_CALLDIR (1UL << 4) 251 #define TCP_RCV_COPY_CALLDIR (1UL << 5) 252 253 /* 254 * TCP RPC flags 255 */ 256 #define TCP_RPC_REPLY (1UL << 6) 257 258 static inline struct sockaddr *xs_addr(struct rpc_xprt *xprt) 259 { 260 return (struct sockaddr *) &xprt->addr; 261 } 262 263 static inline struct sockaddr_in *xs_addr_in(struct rpc_xprt *xprt) 264 { 265 return (struct sockaddr_in *) &xprt->addr; 266 } 267 268 static inline struct sockaddr_in6 *xs_addr_in6(struct rpc_xprt *xprt) 269 { 270 return (struct sockaddr_in6 *) &xprt->addr; 271 } 272 273 static void xs_format_common_peer_addresses(struct rpc_xprt *xprt) 274 { 275 struct sockaddr *sap = xs_addr(xprt); 276 struct sockaddr_in6 *sin6; 277 struct sockaddr_in *sin; 278 char buf[128]; 279 280 (void)rpc_ntop(sap, buf, sizeof(buf)); 281 xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL); 282 283 switch (sap->sa_family) { 284 case AF_INET: 285 sin = xs_addr_in(xprt); 286 snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr)); 287 break; 288 case AF_INET6: 289 sin6 = xs_addr_in6(xprt); 290 snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr); 291 break; 292 default: 293 BUG(); 294 } 295 xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); 296 } 297 298 static void xs_format_common_peer_ports(struct rpc_xprt *xprt) 299 { 300 struct sockaddr *sap = xs_addr(xprt); 301 char buf[128]; 302 303 snprintf(buf, sizeof(buf), "%u", rpc_get_port(sap)); 304 xprt->address_strings[RPC_DISPLAY_PORT] = kstrdup(buf, GFP_KERNEL); 305 306 snprintf(buf, sizeof(buf), "%4hx", rpc_get_port(sap)); 307 xprt->address_strings[RPC_DISPLAY_HEX_PORT] = kstrdup(buf, GFP_KERNEL); 308 } 309 310 static void xs_format_peer_addresses(struct rpc_xprt *xprt, 311 const char *protocol, 312 const char *netid) 313 { 314 xprt->address_strings[RPC_DISPLAY_PROTO] = protocol; 315 xprt->address_strings[RPC_DISPLAY_NETID] = netid; 316 xs_format_common_peer_addresses(xprt); 317 xs_format_common_peer_ports(xprt); 318 } 319 320 static void xs_update_peer_port(struct rpc_xprt *xprt) 321 { 322 kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]); 323 kfree(xprt->address_strings[RPC_DISPLAY_PORT]); 324 325 xs_format_common_peer_ports(xprt); 326 } 327 328 static void xs_free_peer_addresses(struct rpc_xprt *xprt) 329 { 330 unsigned int i; 331 332 for (i = 0; i < RPC_DISPLAY_MAX; i++) 333 switch (i) { 334 case RPC_DISPLAY_PROTO: 335 case RPC_DISPLAY_NETID: 336 continue; 337 default: 338 kfree(xprt->address_strings[i]); 339 } 340 } 341 342 #define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL) 343 344 static int xs_send_kvec(struct socket *sock, struct sockaddr *addr, int addrlen, struct kvec *vec, unsigned int base, int more) 345 { 346 struct msghdr msg = { 347 .msg_name = addr, 348 .msg_namelen = addrlen, 349 .msg_flags = XS_SENDMSG_FLAGS | (more ? MSG_MORE : 0), 350 }; 351 struct kvec iov = { 352 .iov_base = vec->iov_base + base, 353 .iov_len = vec->iov_len - base, 354 }; 355 356 if (iov.iov_len != 0) 357 return kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len); 358 return kernel_sendmsg(sock, &msg, NULL, 0, 0); 359 } 360 361 static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned int base, int more) 362 { 363 struct page **ppage; 364 unsigned int remainder; 365 int err, sent = 0; 366 367 remainder = xdr->page_len - base; 368 base += xdr->page_base; 369 ppage = xdr->pages + (base >> PAGE_SHIFT); 370 base &= ~PAGE_MASK; 371 for(;;) { 372 unsigned int len = min_t(unsigned int, PAGE_SIZE - base, remainder); 373 int flags = XS_SENDMSG_FLAGS; 374 375 remainder -= len; 376 if (remainder != 0 || more) 377 flags |= MSG_MORE; 378 err = sock->ops->sendpage(sock, *ppage, base, len, flags); 379 if (remainder == 0 || err != len) 380 break; 381 sent += err; 382 ppage++; 383 base = 0; 384 } 385 if (sent == 0) 386 return err; 387 if (err > 0) 388 sent += err; 389 return sent; 390 } 391 392 /** 393 * xs_sendpages - write pages directly to a socket 394 * @sock: socket to send on 395 * @addr: UDP only -- address of destination 396 * @addrlen: UDP only -- length of destination address 397 * @xdr: buffer containing this request 398 * @base: starting position in the buffer 399 * 400 */ 401 static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base) 402 { 403 unsigned int remainder = xdr->len - base; 404 int err, sent = 0; 405 406 if (unlikely(!sock)) 407 return -ENOTSOCK; 408 409 clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags); 410 if (base != 0) { 411 addr = NULL; 412 addrlen = 0; 413 } 414 415 if (base < xdr->head[0].iov_len || addr != NULL) { 416 unsigned int len = xdr->head[0].iov_len - base; 417 remainder -= len; 418 err = xs_send_kvec(sock, addr, addrlen, &xdr->head[0], base, remainder != 0); 419 if (remainder == 0 || err != len) 420 goto out; 421 sent += err; 422 base = 0; 423 } else 424 base -= xdr->head[0].iov_len; 425 426 if (base < xdr->page_len) { 427 unsigned int len = xdr->page_len - base; 428 remainder -= len; 429 err = xs_send_pagedata(sock, xdr, base, remainder != 0); 430 if (remainder == 0 || err != len) 431 goto out; 432 sent += err; 433 base = 0; 434 } else 435 base -= xdr->page_len; 436 437 if (base >= xdr->tail[0].iov_len) 438 return sent; 439 err = xs_send_kvec(sock, NULL, 0, &xdr->tail[0], base, 0); 440 out: 441 if (sent == 0) 442 return err; 443 if (err > 0) 444 sent += err; 445 return sent; 446 } 447 448 static void xs_nospace_callback(struct rpc_task *task) 449 { 450 struct sock_xprt *transport = container_of(task->tk_rqstp->rq_xprt, struct sock_xprt, xprt); 451 452 transport->inet->sk_write_pending--; 453 clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); 454 } 455 456 /** 457 * xs_nospace - place task on wait queue if transmit was incomplete 458 * @task: task to put to sleep 459 * 460 */ 461 static int xs_nospace(struct rpc_task *task) 462 { 463 struct rpc_rqst *req = task->tk_rqstp; 464 struct rpc_xprt *xprt = req->rq_xprt; 465 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 466 int ret = 0; 467 468 dprintk("RPC: %5u xmit incomplete (%u left of %u)\n", 469 task->tk_pid, req->rq_slen - req->rq_bytes_sent, 470 req->rq_slen); 471 472 /* Protect against races with write_space */ 473 spin_lock_bh(&xprt->transport_lock); 474 475 /* Don't race with disconnect */ 476 if (xprt_connected(xprt)) { 477 if (test_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags)) { 478 ret = -EAGAIN; 479 /* 480 * Notify TCP that we're limited by the application 481 * window size 482 */ 483 set_bit(SOCK_NOSPACE, &transport->sock->flags); 484 transport->inet->sk_write_pending++; 485 /* ...and wait for more buffer space */ 486 xprt_wait_for_buffer_space(task, xs_nospace_callback); 487 } 488 } else { 489 clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); 490 ret = -ENOTCONN; 491 } 492 493 spin_unlock_bh(&xprt->transport_lock); 494 return ret; 495 } 496 497 /** 498 * xs_udp_send_request - write an RPC request to a UDP socket 499 * @task: address of RPC task that manages the state of an RPC request 500 * 501 * Return values: 502 * 0: The request has been sent 503 * EAGAIN: The socket was blocked, please call again later to 504 * complete the request 505 * ENOTCONN: Caller needs to invoke connect logic then call again 506 * other: Some other error occured, the request was not sent 507 */ 508 static int xs_udp_send_request(struct rpc_task *task) 509 { 510 struct rpc_rqst *req = task->tk_rqstp; 511 struct rpc_xprt *xprt = req->rq_xprt; 512 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 513 struct xdr_buf *xdr = &req->rq_snd_buf; 514 int status; 515 516 xs_pktdump("packet data:", 517 req->rq_svec->iov_base, 518 req->rq_svec->iov_len); 519 520 if (!xprt_bound(xprt)) 521 return -ENOTCONN; 522 status = xs_sendpages(transport->sock, 523 xs_addr(xprt), 524 xprt->addrlen, xdr, 525 req->rq_bytes_sent); 526 527 dprintk("RPC: xs_udp_send_request(%u) = %d\n", 528 xdr->len - req->rq_bytes_sent, status); 529 530 if (status >= 0) { 531 req->rq_xmit_bytes_sent += status; 532 if (status >= req->rq_slen) 533 return 0; 534 /* Still some bytes left; set up for a retry later. */ 535 status = -EAGAIN; 536 } 537 538 switch (status) { 539 case -ENOTSOCK: 540 status = -ENOTCONN; 541 /* Should we call xs_close() here? */ 542 break; 543 case -EAGAIN: 544 status = xs_nospace(task); 545 break; 546 default: 547 dprintk("RPC: sendmsg returned unrecognized error %d\n", 548 -status); 549 case -ENETUNREACH: 550 case -EPIPE: 551 case -ECONNREFUSED: 552 /* When the server has died, an ICMP port unreachable message 553 * prompts ECONNREFUSED. */ 554 clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); 555 } 556 557 return status; 558 } 559 560 /** 561 * xs_tcp_shutdown - gracefully shut down a TCP socket 562 * @xprt: transport 563 * 564 * Initiates a graceful shutdown of the TCP socket by calling the 565 * equivalent of shutdown(SHUT_WR); 566 */ 567 static void xs_tcp_shutdown(struct rpc_xprt *xprt) 568 { 569 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 570 struct socket *sock = transport->sock; 571 572 if (sock != NULL) 573 kernel_sock_shutdown(sock, SHUT_WR); 574 } 575 576 static inline void xs_encode_tcp_record_marker(struct xdr_buf *buf) 577 { 578 u32 reclen = buf->len - sizeof(rpc_fraghdr); 579 rpc_fraghdr *base = buf->head[0].iov_base; 580 *base = htonl(RPC_LAST_STREAM_FRAGMENT | reclen); 581 } 582 583 /** 584 * xs_tcp_send_request - write an RPC request to a TCP socket 585 * @task: address of RPC task that manages the state of an RPC request 586 * 587 * Return values: 588 * 0: The request has been sent 589 * EAGAIN: The socket was blocked, please call again later to 590 * complete the request 591 * ENOTCONN: Caller needs to invoke connect logic then call again 592 * other: Some other error occured, the request was not sent 593 * 594 * XXX: In the case of soft timeouts, should we eventually give up 595 * if sendmsg is not able to make progress? 596 */ 597 static int xs_tcp_send_request(struct rpc_task *task) 598 { 599 struct rpc_rqst *req = task->tk_rqstp; 600 struct rpc_xprt *xprt = req->rq_xprt; 601 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 602 struct xdr_buf *xdr = &req->rq_snd_buf; 603 int status; 604 605 xs_encode_tcp_record_marker(&req->rq_snd_buf); 606 607 xs_pktdump("packet data:", 608 req->rq_svec->iov_base, 609 req->rq_svec->iov_len); 610 611 /* Continue transmitting the packet/record. We must be careful 612 * to cope with writespace callbacks arriving _after_ we have 613 * called sendmsg(). */ 614 while (1) { 615 status = xs_sendpages(transport->sock, 616 NULL, 0, xdr, req->rq_bytes_sent); 617 618 dprintk("RPC: xs_tcp_send_request(%u) = %d\n", 619 xdr->len - req->rq_bytes_sent, status); 620 621 if (unlikely(status < 0)) 622 break; 623 624 /* If we've sent the entire packet, immediately 625 * reset the count of bytes sent. */ 626 req->rq_bytes_sent += status; 627 req->rq_xmit_bytes_sent += status; 628 if (likely(req->rq_bytes_sent >= req->rq_slen)) { 629 req->rq_bytes_sent = 0; 630 return 0; 631 } 632 633 if (status != 0) 634 continue; 635 status = -EAGAIN; 636 break; 637 } 638 639 switch (status) { 640 case -ENOTSOCK: 641 status = -ENOTCONN; 642 /* Should we call xs_close() here? */ 643 break; 644 case -EAGAIN: 645 status = xs_nospace(task); 646 break; 647 default: 648 dprintk("RPC: sendmsg returned unrecognized error %d\n", 649 -status); 650 case -ECONNRESET: 651 case -EPIPE: 652 xs_tcp_shutdown(xprt); 653 case -ECONNREFUSED: 654 case -ENOTCONN: 655 clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); 656 } 657 658 return status; 659 } 660 661 /** 662 * xs_tcp_release_xprt - clean up after a tcp transmission 663 * @xprt: transport 664 * @task: rpc task 665 * 666 * This cleans up if an error causes us to abort the transmission of a request. 667 * In this case, the socket may need to be reset in order to avoid confusing 668 * the server. 669 */ 670 static void xs_tcp_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task) 671 { 672 struct rpc_rqst *req; 673 674 if (task != xprt->snd_task) 675 return; 676 if (task == NULL) 677 goto out_release; 678 req = task->tk_rqstp; 679 if (req->rq_bytes_sent == 0) 680 goto out_release; 681 if (req->rq_bytes_sent == req->rq_snd_buf.len) 682 goto out_release; 683 set_bit(XPRT_CLOSE_WAIT, &task->tk_xprt->state); 684 out_release: 685 xprt_release_xprt(xprt, task); 686 } 687 688 static void xs_save_old_callbacks(struct sock_xprt *transport, struct sock *sk) 689 { 690 transport->old_data_ready = sk->sk_data_ready; 691 transport->old_state_change = sk->sk_state_change; 692 transport->old_write_space = sk->sk_write_space; 693 transport->old_error_report = sk->sk_error_report; 694 } 695 696 static void xs_restore_old_callbacks(struct sock_xprt *transport, struct sock *sk) 697 { 698 sk->sk_data_ready = transport->old_data_ready; 699 sk->sk_state_change = transport->old_state_change; 700 sk->sk_write_space = transport->old_write_space; 701 sk->sk_error_report = transport->old_error_report; 702 } 703 704 static void xs_reset_transport(struct sock_xprt *transport) 705 { 706 struct socket *sock = transport->sock; 707 struct sock *sk = transport->inet; 708 709 if (sk == NULL) 710 return; 711 712 write_lock_bh(&sk->sk_callback_lock); 713 transport->inet = NULL; 714 transport->sock = NULL; 715 716 sk->sk_user_data = NULL; 717 718 xs_restore_old_callbacks(transport, sk); 719 write_unlock_bh(&sk->sk_callback_lock); 720 721 sk->sk_no_check = 0; 722 723 sock_release(sock); 724 } 725 726 /** 727 * xs_close - close a socket 728 * @xprt: transport 729 * 730 * This is used when all requests are complete; ie, no DRC state remains 731 * on the server we want to save. 732 * 733 * The caller _must_ be holding XPRT_LOCKED in order to avoid issues with 734 * xs_reset_transport() zeroing the socket from underneath a writer. 735 */ 736 static void xs_close(struct rpc_xprt *xprt) 737 { 738 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 739 740 dprintk("RPC: xs_close xprt %p\n", xprt); 741 742 xs_reset_transport(transport); 743 xprt->reestablish_timeout = 0; 744 745 smp_mb__before_clear_bit(); 746 clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); 747 clear_bit(XPRT_CLOSE_WAIT, &xprt->state); 748 clear_bit(XPRT_CLOSING, &xprt->state); 749 smp_mb__after_clear_bit(); 750 xprt_disconnect_done(xprt); 751 } 752 753 static void xs_tcp_close(struct rpc_xprt *xprt) 754 { 755 if (test_and_clear_bit(XPRT_CONNECTION_CLOSE, &xprt->state)) 756 xs_close(xprt); 757 else 758 xs_tcp_shutdown(xprt); 759 } 760 761 /** 762 * xs_destroy - prepare to shutdown a transport 763 * @xprt: doomed transport 764 * 765 */ 766 static void xs_destroy(struct rpc_xprt *xprt) 767 { 768 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 769 770 dprintk("RPC: xs_destroy xprt %p\n", xprt); 771 772 cancel_rearming_delayed_work(&transport->connect_worker); 773 774 xs_close(xprt); 775 xs_free_peer_addresses(xprt); 776 kfree(xprt->slot); 777 kfree(xprt); 778 module_put(THIS_MODULE); 779 } 780 781 static inline struct rpc_xprt *xprt_from_sock(struct sock *sk) 782 { 783 return (struct rpc_xprt *) sk->sk_user_data; 784 } 785 786 /** 787 * xs_udp_data_ready - "data ready" callback for UDP sockets 788 * @sk: socket with data to read 789 * @len: how much data to read 790 * 791 */ 792 static void xs_udp_data_ready(struct sock *sk, int len) 793 { 794 struct rpc_task *task; 795 struct rpc_xprt *xprt; 796 struct rpc_rqst *rovr; 797 struct sk_buff *skb; 798 int err, repsize, copied; 799 u32 _xid; 800 __be32 *xp; 801 802 read_lock(&sk->sk_callback_lock); 803 dprintk("RPC: xs_udp_data_ready...\n"); 804 if (!(xprt = xprt_from_sock(sk))) 805 goto out; 806 807 if ((skb = skb_recv_datagram(sk, 0, 1, &err)) == NULL) 808 goto out; 809 810 if (xprt->shutdown) 811 goto dropit; 812 813 repsize = skb->len - sizeof(struct udphdr); 814 if (repsize < 4) { 815 dprintk("RPC: impossible RPC reply size %d!\n", repsize); 816 goto dropit; 817 } 818 819 /* Copy the XID from the skb... */ 820 xp = skb_header_pointer(skb, sizeof(struct udphdr), 821 sizeof(_xid), &_xid); 822 if (xp == NULL) 823 goto dropit; 824 825 /* Look up and lock the request corresponding to the given XID */ 826 spin_lock(&xprt->transport_lock); 827 rovr = xprt_lookup_rqst(xprt, *xp); 828 if (!rovr) 829 goto out_unlock; 830 task = rovr->rq_task; 831 832 if ((copied = rovr->rq_private_buf.buflen) > repsize) 833 copied = repsize; 834 835 /* Suck it into the iovec, verify checksum if not done by hw. */ 836 if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) { 837 UDPX_INC_STATS_BH(sk, UDP_MIB_INERRORS); 838 goto out_unlock; 839 } 840 841 UDPX_INC_STATS_BH(sk, UDP_MIB_INDATAGRAMS); 842 843 /* Something worked... */ 844 dst_confirm(skb_dst(skb)); 845 846 xprt_adjust_cwnd(task, copied); 847 xprt_complete_rqst(task, copied); 848 849 out_unlock: 850 spin_unlock(&xprt->transport_lock); 851 dropit: 852 skb_free_datagram(sk, skb); 853 out: 854 read_unlock(&sk->sk_callback_lock); 855 } 856 857 static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct xdr_skb_reader *desc) 858 { 859 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 860 size_t len, used; 861 char *p; 862 863 p = ((char *) &transport->tcp_fraghdr) + transport->tcp_offset; 864 len = sizeof(transport->tcp_fraghdr) - transport->tcp_offset; 865 used = xdr_skb_read_bits(desc, p, len); 866 transport->tcp_offset += used; 867 if (used != len) 868 return; 869 870 transport->tcp_reclen = ntohl(transport->tcp_fraghdr); 871 if (transport->tcp_reclen & RPC_LAST_STREAM_FRAGMENT) 872 transport->tcp_flags |= TCP_RCV_LAST_FRAG; 873 else 874 transport->tcp_flags &= ~TCP_RCV_LAST_FRAG; 875 transport->tcp_reclen &= RPC_FRAGMENT_SIZE_MASK; 876 877 transport->tcp_flags &= ~TCP_RCV_COPY_FRAGHDR; 878 transport->tcp_offset = 0; 879 880 /* Sanity check of the record length */ 881 if (unlikely(transport->tcp_reclen < 8)) { 882 dprintk("RPC: invalid TCP record fragment length\n"); 883 xprt_force_disconnect(xprt); 884 return; 885 } 886 dprintk("RPC: reading TCP record fragment of length %d\n", 887 transport->tcp_reclen); 888 } 889 890 static void xs_tcp_check_fraghdr(struct sock_xprt *transport) 891 { 892 if (transport->tcp_offset == transport->tcp_reclen) { 893 transport->tcp_flags |= TCP_RCV_COPY_FRAGHDR; 894 transport->tcp_offset = 0; 895 if (transport->tcp_flags & TCP_RCV_LAST_FRAG) { 896 transport->tcp_flags &= ~TCP_RCV_COPY_DATA; 897 transport->tcp_flags |= TCP_RCV_COPY_XID; 898 transport->tcp_copied = 0; 899 } 900 } 901 } 902 903 static inline void xs_tcp_read_xid(struct sock_xprt *transport, struct xdr_skb_reader *desc) 904 { 905 size_t len, used; 906 char *p; 907 908 len = sizeof(transport->tcp_xid) - transport->tcp_offset; 909 dprintk("RPC: reading XID (%Zu bytes)\n", len); 910 p = ((char *) &transport->tcp_xid) + transport->tcp_offset; 911 used = xdr_skb_read_bits(desc, p, len); 912 transport->tcp_offset += used; 913 if (used != len) 914 return; 915 transport->tcp_flags &= ~TCP_RCV_COPY_XID; 916 transport->tcp_flags |= TCP_RCV_READ_CALLDIR; 917 transport->tcp_copied = 4; 918 dprintk("RPC: reading %s XID %08x\n", 919 (transport->tcp_flags & TCP_RPC_REPLY) ? "reply for" 920 : "request with", 921 ntohl(transport->tcp_xid)); 922 xs_tcp_check_fraghdr(transport); 923 } 924 925 static inline void xs_tcp_read_calldir(struct sock_xprt *transport, 926 struct xdr_skb_reader *desc) 927 { 928 size_t len, used; 929 u32 offset; 930 __be32 calldir; 931 932 /* 933 * We want transport->tcp_offset to be 8 at the end of this routine 934 * (4 bytes for the xid and 4 bytes for the call/reply flag). 935 * When this function is called for the first time, 936 * transport->tcp_offset is 4 (after having already read the xid). 937 */ 938 offset = transport->tcp_offset - sizeof(transport->tcp_xid); 939 len = sizeof(calldir) - offset; 940 dprintk("RPC: reading CALL/REPLY flag (%Zu bytes)\n", len); 941 used = xdr_skb_read_bits(desc, &calldir, len); 942 transport->tcp_offset += used; 943 if (used != len) 944 return; 945 transport->tcp_flags &= ~TCP_RCV_READ_CALLDIR; 946 transport->tcp_flags |= TCP_RCV_COPY_CALLDIR; 947 transport->tcp_flags |= TCP_RCV_COPY_DATA; 948 /* 949 * We don't yet have the XDR buffer, so we will write the calldir 950 * out after we get the buffer from the 'struct rpc_rqst' 951 */ 952 if (ntohl(calldir) == RPC_REPLY) 953 transport->tcp_flags |= TCP_RPC_REPLY; 954 else 955 transport->tcp_flags &= ~TCP_RPC_REPLY; 956 dprintk("RPC: reading %s CALL/REPLY flag %08x\n", 957 (transport->tcp_flags & TCP_RPC_REPLY) ? 958 "reply for" : "request with", calldir); 959 xs_tcp_check_fraghdr(transport); 960 } 961 962 static inline void xs_tcp_read_common(struct rpc_xprt *xprt, 963 struct xdr_skb_reader *desc, 964 struct rpc_rqst *req) 965 { 966 struct sock_xprt *transport = 967 container_of(xprt, struct sock_xprt, xprt); 968 struct xdr_buf *rcvbuf; 969 size_t len; 970 ssize_t r; 971 972 rcvbuf = &req->rq_private_buf; 973 974 if (transport->tcp_flags & TCP_RCV_COPY_CALLDIR) { 975 /* 976 * Save the RPC direction in the XDR buffer 977 */ 978 __be32 calldir = transport->tcp_flags & TCP_RPC_REPLY ? 979 htonl(RPC_REPLY) : 0; 980 981 memcpy(rcvbuf->head[0].iov_base + transport->tcp_copied, 982 &calldir, sizeof(calldir)); 983 transport->tcp_copied += sizeof(calldir); 984 transport->tcp_flags &= ~TCP_RCV_COPY_CALLDIR; 985 } 986 987 len = desc->count; 988 if (len > transport->tcp_reclen - transport->tcp_offset) { 989 struct xdr_skb_reader my_desc; 990 991 len = transport->tcp_reclen - transport->tcp_offset; 992 memcpy(&my_desc, desc, sizeof(my_desc)); 993 my_desc.count = len; 994 r = xdr_partial_copy_from_skb(rcvbuf, transport->tcp_copied, 995 &my_desc, xdr_skb_read_bits); 996 desc->count -= r; 997 desc->offset += r; 998 } else 999 r = xdr_partial_copy_from_skb(rcvbuf, transport->tcp_copied, 1000 desc, xdr_skb_read_bits); 1001 1002 if (r > 0) { 1003 transport->tcp_copied += r; 1004 transport->tcp_offset += r; 1005 } 1006 if (r != len) { 1007 /* Error when copying to the receive buffer, 1008 * usually because we weren't able to allocate 1009 * additional buffer pages. All we can do now 1010 * is turn off TCP_RCV_COPY_DATA, so the request 1011 * will not receive any additional updates, 1012 * and time out. 1013 * Any remaining data from this record will 1014 * be discarded. 1015 */ 1016 transport->tcp_flags &= ~TCP_RCV_COPY_DATA; 1017 dprintk("RPC: XID %08x truncated request\n", 1018 ntohl(transport->tcp_xid)); 1019 dprintk("RPC: xprt = %p, tcp_copied = %lu, " 1020 "tcp_offset = %u, tcp_reclen = %u\n", 1021 xprt, transport->tcp_copied, 1022 transport->tcp_offset, transport->tcp_reclen); 1023 return; 1024 } 1025 1026 dprintk("RPC: XID %08x read %Zd bytes\n", 1027 ntohl(transport->tcp_xid), r); 1028 dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, " 1029 "tcp_reclen = %u\n", xprt, transport->tcp_copied, 1030 transport->tcp_offset, transport->tcp_reclen); 1031 1032 if (transport->tcp_copied == req->rq_private_buf.buflen) 1033 transport->tcp_flags &= ~TCP_RCV_COPY_DATA; 1034 else if (transport->tcp_offset == transport->tcp_reclen) { 1035 if (transport->tcp_flags & TCP_RCV_LAST_FRAG) 1036 transport->tcp_flags &= ~TCP_RCV_COPY_DATA; 1037 } 1038 1039 return; 1040 } 1041 1042 /* 1043 * Finds the request corresponding to the RPC xid and invokes the common 1044 * tcp read code to read the data. 1045 */ 1046 static inline int xs_tcp_read_reply(struct rpc_xprt *xprt, 1047 struct xdr_skb_reader *desc) 1048 { 1049 struct sock_xprt *transport = 1050 container_of(xprt, struct sock_xprt, xprt); 1051 struct rpc_rqst *req; 1052 1053 dprintk("RPC: read reply XID %08x\n", ntohl(transport->tcp_xid)); 1054 1055 /* Find and lock the request corresponding to this xid */ 1056 spin_lock(&xprt->transport_lock); 1057 req = xprt_lookup_rqst(xprt, transport->tcp_xid); 1058 if (!req) { 1059 dprintk("RPC: XID %08x request not found!\n", 1060 ntohl(transport->tcp_xid)); 1061 spin_unlock(&xprt->transport_lock); 1062 return -1; 1063 } 1064 1065 xs_tcp_read_common(xprt, desc, req); 1066 1067 if (!(transport->tcp_flags & TCP_RCV_COPY_DATA)) 1068 xprt_complete_rqst(req->rq_task, transport->tcp_copied); 1069 1070 spin_unlock(&xprt->transport_lock); 1071 return 0; 1072 } 1073 1074 #if defined(CONFIG_NFS_V4_1) 1075 /* 1076 * Obtains an rpc_rqst previously allocated and invokes the common 1077 * tcp read code to read the data. The result is placed in the callback 1078 * queue. 1079 * If we're unable to obtain the rpc_rqst we schedule the closing of the 1080 * connection and return -1. 1081 */ 1082 static inline int xs_tcp_read_callback(struct rpc_xprt *xprt, 1083 struct xdr_skb_reader *desc) 1084 { 1085 struct sock_xprt *transport = 1086 container_of(xprt, struct sock_xprt, xprt); 1087 struct rpc_rqst *req; 1088 1089 req = xprt_alloc_bc_request(xprt); 1090 if (req == NULL) { 1091 printk(KERN_WARNING "Callback slot table overflowed\n"); 1092 xprt_force_disconnect(xprt); 1093 return -1; 1094 } 1095 1096 req->rq_xid = transport->tcp_xid; 1097 dprintk("RPC: read callback XID %08x\n", ntohl(req->rq_xid)); 1098 xs_tcp_read_common(xprt, desc, req); 1099 1100 if (!(transport->tcp_flags & TCP_RCV_COPY_DATA)) { 1101 struct svc_serv *bc_serv = xprt->bc_serv; 1102 1103 /* 1104 * Add callback request to callback list. The callback 1105 * service sleeps on the sv_cb_waitq waiting for new 1106 * requests. Wake it up after adding enqueing the 1107 * request. 1108 */ 1109 dprintk("RPC: add callback request to list\n"); 1110 spin_lock(&bc_serv->sv_cb_lock); 1111 list_add(&req->rq_bc_list, &bc_serv->sv_cb_list); 1112 spin_unlock(&bc_serv->sv_cb_lock); 1113 wake_up(&bc_serv->sv_cb_waitq); 1114 } 1115 1116 req->rq_private_buf.len = transport->tcp_copied; 1117 1118 return 0; 1119 } 1120 1121 static inline int _xs_tcp_read_data(struct rpc_xprt *xprt, 1122 struct xdr_skb_reader *desc) 1123 { 1124 struct sock_xprt *transport = 1125 container_of(xprt, struct sock_xprt, xprt); 1126 1127 return (transport->tcp_flags & TCP_RPC_REPLY) ? 1128 xs_tcp_read_reply(xprt, desc) : 1129 xs_tcp_read_callback(xprt, desc); 1130 } 1131 #else 1132 static inline int _xs_tcp_read_data(struct rpc_xprt *xprt, 1133 struct xdr_skb_reader *desc) 1134 { 1135 return xs_tcp_read_reply(xprt, desc); 1136 } 1137 #endif /* CONFIG_NFS_V4_1 */ 1138 1139 /* 1140 * Read data off the transport. This can be either an RPC_CALL or an 1141 * RPC_REPLY. Relay the processing to helper functions. 1142 */ 1143 static void xs_tcp_read_data(struct rpc_xprt *xprt, 1144 struct xdr_skb_reader *desc) 1145 { 1146 struct sock_xprt *transport = 1147 container_of(xprt, struct sock_xprt, xprt); 1148 1149 if (_xs_tcp_read_data(xprt, desc) == 0) 1150 xs_tcp_check_fraghdr(transport); 1151 else { 1152 /* 1153 * The transport_lock protects the request handling. 1154 * There's no need to hold it to update the tcp_flags. 1155 */ 1156 transport->tcp_flags &= ~TCP_RCV_COPY_DATA; 1157 } 1158 } 1159 1160 static inline void xs_tcp_read_discard(struct sock_xprt *transport, struct xdr_skb_reader *desc) 1161 { 1162 size_t len; 1163 1164 len = transport->tcp_reclen - transport->tcp_offset; 1165 if (len > desc->count) 1166 len = desc->count; 1167 desc->count -= len; 1168 desc->offset += len; 1169 transport->tcp_offset += len; 1170 dprintk("RPC: discarded %Zu bytes\n", len); 1171 xs_tcp_check_fraghdr(transport); 1172 } 1173 1174 static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, unsigned int offset, size_t len) 1175 { 1176 struct rpc_xprt *xprt = rd_desc->arg.data; 1177 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 1178 struct xdr_skb_reader desc = { 1179 .skb = skb, 1180 .offset = offset, 1181 .count = len, 1182 }; 1183 1184 dprintk("RPC: xs_tcp_data_recv started\n"); 1185 do { 1186 /* Read in a new fragment marker if necessary */ 1187 /* Can we ever really expect to get completely empty fragments? */ 1188 if (transport->tcp_flags & TCP_RCV_COPY_FRAGHDR) { 1189 xs_tcp_read_fraghdr(xprt, &desc); 1190 continue; 1191 } 1192 /* Read in the xid if necessary */ 1193 if (transport->tcp_flags & TCP_RCV_COPY_XID) { 1194 xs_tcp_read_xid(transport, &desc); 1195 continue; 1196 } 1197 /* Read in the call/reply flag */ 1198 if (transport->tcp_flags & TCP_RCV_READ_CALLDIR) { 1199 xs_tcp_read_calldir(transport, &desc); 1200 continue; 1201 } 1202 /* Read in the request data */ 1203 if (transport->tcp_flags & TCP_RCV_COPY_DATA) { 1204 xs_tcp_read_data(xprt, &desc); 1205 continue; 1206 } 1207 /* Skip over any trailing bytes on short reads */ 1208 xs_tcp_read_discard(transport, &desc); 1209 } while (desc.count); 1210 dprintk("RPC: xs_tcp_data_recv done\n"); 1211 return len - desc.count; 1212 } 1213 1214 /** 1215 * xs_tcp_data_ready - "data ready" callback for TCP sockets 1216 * @sk: socket with data to read 1217 * @bytes: how much data to read 1218 * 1219 */ 1220 static void xs_tcp_data_ready(struct sock *sk, int bytes) 1221 { 1222 struct rpc_xprt *xprt; 1223 read_descriptor_t rd_desc; 1224 int read; 1225 1226 dprintk("RPC: xs_tcp_data_ready...\n"); 1227 1228 read_lock(&sk->sk_callback_lock); 1229 if (!(xprt = xprt_from_sock(sk))) 1230 goto out; 1231 if (xprt->shutdown) 1232 goto out; 1233 1234 /* Any data means we had a useful conversation, so 1235 * the we don't need to delay the next reconnect 1236 */ 1237 if (xprt->reestablish_timeout) 1238 xprt->reestablish_timeout = 0; 1239 1240 /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */ 1241 rd_desc.arg.data = xprt; 1242 do { 1243 rd_desc.count = 65536; 1244 read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv); 1245 } while (read > 0); 1246 out: 1247 read_unlock(&sk->sk_callback_lock); 1248 } 1249 1250 /* 1251 * Do the equivalent of linger/linger2 handling for dealing with 1252 * broken servers that don't close the socket in a timely 1253 * fashion 1254 */ 1255 static void xs_tcp_schedule_linger_timeout(struct rpc_xprt *xprt, 1256 unsigned long timeout) 1257 { 1258 struct sock_xprt *transport; 1259 1260 if (xprt_test_and_set_connecting(xprt)) 1261 return; 1262 set_bit(XPRT_CONNECTION_ABORT, &xprt->state); 1263 transport = container_of(xprt, struct sock_xprt, xprt); 1264 queue_delayed_work(rpciod_workqueue, &transport->connect_worker, 1265 timeout); 1266 } 1267 1268 static void xs_tcp_cancel_linger_timeout(struct rpc_xprt *xprt) 1269 { 1270 struct sock_xprt *transport; 1271 1272 transport = container_of(xprt, struct sock_xprt, xprt); 1273 1274 if (!test_bit(XPRT_CONNECTION_ABORT, &xprt->state) || 1275 !cancel_delayed_work(&transport->connect_worker)) 1276 return; 1277 clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); 1278 xprt_clear_connecting(xprt); 1279 } 1280 1281 static void xs_sock_mark_closed(struct rpc_xprt *xprt) 1282 { 1283 smp_mb__before_clear_bit(); 1284 clear_bit(XPRT_CLOSE_WAIT, &xprt->state); 1285 clear_bit(XPRT_CLOSING, &xprt->state); 1286 smp_mb__after_clear_bit(); 1287 /* Mark transport as closed and wake up all pending tasks */ 1288 xprt_disconnect_done(xprt); 1289 } 1290 1291 /** 1292 * xs_tcp_state_change - callback to handle TCP socket state changes 1293 * @sk: socket whose state has changed 1294 * 1295 */ 1296 static void xs_tcp_state_change(struct sock *sk) 1297 { 1298 struct rpc_xprt *xprt; 1299 1300 read_lock(&sk->sk_callback_lock); 1301 if (!(xprt = xprt_from_sock(sk))) 1302 goto out; 1303 dprintk("RPC: xs_tcp_state_change client %p...\n", xprt); 1304 dprintk("RPC: state %x conn %d dead %d zapped %d\n", 1305 sk->sk_state, xprt_connected(xprt), 1306 sock_flag(sk, SOCK_DEAD), 1307 sock_flag(sk, SOCK_ZAPPED)); 1308 1309 switch (sk->sk_state) { 1310 case TCP_ESTABLISHED: 1311 spin_lock_bh(&xprt->transport_lock); 1312 if (!xprt_test_and_set_connected(xprt)) { 1313 struct sock_xprt *transport = container_of(xprt, 1314 struct sock_xprt, xprt); 1315 1316 /* Reset TCP record info */ 1317 transport->tcp_offset = 0; 1318 transport->tcp_reclen = 0; 1319 transport->tcp_copied = 0; 1320 transport->tcp_flags = 1321 TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID; 1322 1323 xprt_wake_pending_tasks(xprt, -EAGAIN); 1324 } 1325 spin_unlock_bh(&xprt->transport_lock); 1326 break; 1327 case TCP_FIN_WAIT1: 1328 /* The client initiated a shutdown of the socket */ 1329 xprt->connect_cookie++; 1330 xprt->reestablish_timeout = 0; 1331 set_bit(XPRT_CLOSING, &xprt->state); 1332 smp_mb__before_clear_bit(); 1333 clear_bit(XPRT_CONNECTED, &xprt->state); 1334 clear_bit(XPRT_CLOSE_WAIT, &xprt->state); 1335 smp_mb__after_clear_bit(); 1336 xs_tcp_schedule_linger_timeout(xprt, xs_tcp_fin_timeout); 1337 break; 1338 case TCP_CLOSE_WAIT: 1339 /* The server initiated a shutdown of the socket */ 1340 xprt_force_disconnect(xprt); 1341 case TCP_SYN_SENT: 1342 xprt->connect_cookie++; 1343 case TCP_CLOSING: 1344 /* 1345 * If the server closed down the connection, make sure that 1346 * we back off before reconnecting 1347 */ 1348 if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO) 1349 xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; 1350 break; 1351 case TCP_LAST_ACK: 1352 set_bit(XPRT_CLOSING, &xprt->state); 1353 xs_tcp_schedule_linger_timeout(xprt, xs_tcp_fin_timeout); 1354 smp_mb__before_clear_bit(); 1355 clear_bit(XPRT_CONNECTED, &xprt->state); 1356 smp_mb__after_clear_bit(); 1357 break; 1358 case TCP_CLOSE: 1359 xs_tcp_cancel_linger_timeout(xprt); 1360 xs_sock_mark_closed(xprt); 1361 } 1362 out: 1363 read_unlock(&sk->sk_callback_lock); 1364 } 1365 1366 /** 1367 * xs_error_report - callback mainly for catching socket errors 1368 * @sk: socket 1369 */ 1370 static void xs_error_report(struct sock *sk) 1371 { 1372 struct rpc_xprt *xprt; 1373 1374 read_lock(&sk->sk_callback_lock); 1375 if (!(xprt = xprt_from_sock(sk))) 1376 goto out; 1377 dprintk("RPC: %s client %p...\n" 1378 "RPC: error %d\n", 1379 __func__, xprt, sk->sk_err); 1380 xprt_wake_pending_tasks(xprt, -EAGAIN); 1381 out: 1382 read_unlock(&sk->sk_callback_lock); 1383 } 1384 1385 static void xs_write_space(struct sock *sk) 1386 { 1387 struct socket *sock; 1388 struct rpc_xprt *xprt; 1389 1390 if (unlikely(!(sock = sk->sk_socket))) 1391 return; 1392 clear_bit(SOCK_NOSPACE, &sock->flags); 1393 1394 if (unlikely(!(xprt = xprt_from_sock(sk)))) 1395 return; 1396 if (test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags) == 0) 1397 return; 1398 1399 xprt_write_space(xprt); 1400 } 1401 1402 /** 1403 * xs_udp_write_space - callback invoked when socket buffer space 1404 * becomes available 1405 * @sk: socket whose state has changed 1406 * 1407 * Called when more output buffer space is available for this socket. 1408 * We try not to wake our writers until they can make "significant" 1409 * progress, otherwise we'll waste resources thrashing kernel_sendmsg 1410 * with a bunch of small requests. 1411 */ 1412 static void xs_udp_write_space(struct sock *sk) 1413 { 1414 read_lock(&sk->sk_callback_lock); 1415 1416 /* from net/core/sock.c:sock_def_write_space */ 1417 if (sock_writeable(sk)) 1418 xs_write_space(sk); 1419 1420 read_unlock(&sk->sk_callback_lock); 1421 } 1422 1423 /** 1424 * xs_tcp_write_space - callback invoked when socket buffer space 1425 * becomes available 1426 * @sk: socket whose state has changed 1427 * 1428 * Called when more output buffer space is available for this socket. 1429 * We try not to wake our writers until they can make "significant" 1430 * progress, otherwise we'll waste resources thrashing kernel_sendmsg 1431 * with a bunch of small requests. 1432 */ 1433 static void xs_tcp_write_space(struct sock *sk) 1434 { 1435 read_lock(&sk->sk_callback_lock); 1436 1437 /* from net/core/stream.c:sk_stream_write_space */ 1438 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) 1439 xs_write_space(sk); 1440 1441 read_unlock(&sk->sk_callback_lock); 1442 } 1443 1444 static void xs_udp_do_set_buffer_size(struct rpc_xprt *xprt) 1445 { 1446 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 1447 struct sock *sk = transport->inet; 1448 1449 if (transport->rcvsize) { 1450 sk->sk_userlocks |= SOCK_RCVBUF_LOCK; 1451 sk->sk_rcvbuf = transport->rcvsize * xprt->max_reqs * 2; 1452 } 1453 if (transport->sndsize) { 1454 sk->sk_userlocks |= SOCK_SNDBUF_LOCK; 1455 sk->sk_sndbuf = transport->sndsize * xprt->max_reqs * 2; 1456 sk->sk_write_space(sk); 1457 } 1458 } 1459 1460 /** 1461 * xs_udp_set_buffer_size - set send and receive limits 1462 * @xprt: generic transport 1463 * @sndsize: requested size of send buffer, in bytes 1464 * @rcvsize: requested size of receive buffer, in bytes 1465 * 1466 * Set socket send and receive buffer size limits. 1467 */ 1468 static void xs_udp_set_buffer_size(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize) 1469 { 1470 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 1471 1472 transport->sndsize = 0; 1473 if (sndsize) 1474 transport->sndsize = sndsize + 1024; 1475 transport->rcvsize = 0; 1476 if (rcvsize) 1477 transport->rcvsize = rcvsize + 1024; 1478 1479 xs_udp_do_set_buffer_size(xprt); 1480 } 1481 1482 /** 1483 * xs_udp_timer - called when a retransmit timeout occurs on a UDP transport 1484 * @task: task that timed out 1485 * 1486 * Adjust the congestion window after a retransmit timeout has occurred. 1487 */ 1488 static void xs_udp_timer(struct rpc_task *task) 1489 { 1490 xprt_adjust_cwnd(task, -ETIMEDOUT); 1491 } 1492 1493 static unsigned short xs_get_random_port(void) 1494 { 1495 unsigned short range = xprt_max_resvport - xprt_min_resvport; 1496 unsigned short rand = (unsigned short) net_random() % range; 1497 return rand + xprt_min_resvport; 1498 } 1499 1500 /** 1501 * xs_set_port - reset the port number in the remote endpoint address 1502 * @xprt: generic transport 1503 * @port: new port number 1504 * 1505 */ 1506 static void xs_set_port(struct rpc_xprt *xprt, unsigned short port) 1507 { 1508 dprintk("RPC: setting port for xprt %p to %u\n", xprt, port); 1509 1510 rpc_set_port(xs_addr(xprt), port); 1511 xs_update_peer_port(xprt); 1512 } 1513 1514 static unsigned short xs_get_srcport(struct sock_xprt *transport, struct socket *sock) 1515 { 1516 unsigned short port = transport->srcport; 1517 1518 if (port == 0 && transport->xprt.resvport) 1519 port = xs_get_random_port(); 1520 return port; 1521 } 1522 1523 static unsigned short xs_next_srcport(struct sock_xprt *transport, struct socket *sock, unsigned short port) 1524 { 1525 if (transport->srcport != 0) 1526 transport->srcport = 0; 1527 if (!transport->xprt.resvport) 1528 return 0; 1529 if (port <= xprt_min_resvport || port > xprt_max_resvport) 1530 return xprt_max_resvport; 1531 return --port; 1532 } 1533 1534 static int xs_bind4(struct sock_xprt *transport, struct socket *sock) 1535 { 1536 struct sockaddr_in myaddr = { 1537 .sin_family = AF_INET, 1538 }; 1539 struct sockaddr_in *sa; 1540 int err, nloop = 0; 1541 unsigned short port = xs_get_srcport(transport, sock); 1542 unsigned short last; 1543 1544 sa = (struct sockaddr_in *)&transport->srcaddr; 1545 myaddr.sin_addr = sa->sin_addr; 1546 do { 1547 myaddr.sin_port = htons(port); 1548 err = kernel_bind(sock, (struct sockaddr *) &myaddr, 1549 sizeof(myaddr)); 1550 if (port == 0) 1551 break; 1552 if (err == 0) { 1553 transport->srcport = port; 1554 break; 1555 } 1556 last = port; 1557 port = xs_next_srcport(transport, sock, port); 1558 if (port > last) 1559 nloop++; 1560 } while (err == -EADDRINUSE && nloop != 2); 1561 dprintk("RPC: %s %pI4:%u: %s (%d)\n", 1562 __func__, &myaddr.sin_addr, 1563 port, err ? "failed" : "ok", err); 1564 return err; 1565 } 1566 1567 static int xs_bind6(struct sock_xprt *transport, struct socket *sock) 1568 { 1569 struct sockaddr_in6 myaddr = { 1570 .sin6_family = AF_INET6, 1571 }; 1572 struct sockaddr_in6 *sa; 1573 int err, nloop = 0; 1574 unsigned short port = xs_get_srcport(transport, sock); 1575 unsigned short last; 1576 1577 sa = (struct sockaddr_in6 *)&transport->srcaddr; 1578 myaddr.sin6_addr = sa->sin6_addr; 1579 do { 1580 myaddr.sin6_port = htons(port); 1581 err = kernel_bind(sock, (struct sockaddr *) &myaddr, 1582 sizeof(myaddr)); 1583 if (port == 0) 1584 break; 1585 if (err == 0) { 1586 transport->srcport = port; 1587 break; 1588 } 1589 last = port; 1590 port = xs_next_srcport(transport, sock, port); 1591 if (port > last) 1592 nloop++; 1593 } while (err == -EADDRINUSE && nloop != 2); 1594 dprintk("RPC: xs_bind6 %pI6:%u: %s (%d)\n", 1595 &myaddr.sin6_addr, port, err ? "failed" : "ok", err); 1596 return err; 1597 } 1598 1599 #ifdef CONFIG_DEBUG_LOCK_ALLOC 1600 static struct lock_class_key xs_key[2]; 1601 static struct lock_class_key xs_slock_key[2]; 1602 1603 static inline void xs_reclassify_socket4(struct socket *sock) 1604 { 1605 struct sock *sk = sock->sk; 1606 1607 BUG_ON(sock_owned_by_user(sk)); 1608 sock_lock_init_class_and_name(sk, "slock-AF_INET-RPC", 1609 &xs_slock_key[0], "sk_lock-AF_INET-RPC", &xs_key[0]); 1610 } 1611 1612 static inline void xs_reclassify_socket6(struct socket *sock) 1613 { 1614 struct sock *sk = sock->sk; 1615 1616 BUG_ON(sock_owned_by_user(sk)); 1617 sock_lock_init_class_and_name(sk, "slock-AF_INET6-RPC", 1618 &xs_slock_key[1], "sk_lock-AF_INET6-RPC", &xs_key[1]); 1619 } 1620 #else 1621 static inline void xs_reclassify_socket4(struct socket *sock) 1622 { 1623 } 1624 1625 static inline void xs_reclassify_socket6(struct socket *sock) 1626 { 1627 } 1628 #endif 1629 1630 static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) 1631 { 1632 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 1633 1634 if (!transport->inet) { 1635 struct sock *sk = sock->sk; 1636 1637 write_lock_bh(&sk->sk_callback_lock); 1638 1639 xs_save_old_callbacks(transport, sk); 1640 1641 sk->sk_user_data = xprt; 1642 sk->sk_data_ready = xs_udp_data_ready; 1643 sk->sk_write_space = xs_udp_write_space; 1644 sk->sk_error_report = xs_error_report; 1645 sk->sk_no_check = UDP_CSUM_NORCV; 1646 sk->sk_allocation = GFP_ATOMIC; 1647 1648 xprt_set_connected(xprt); 1649 1650 /* Reset to new socket */ 1651 transport->sock = sock; 1652 transport->inet = sk; 1653 1654 write_unlock_bh(&sk->sk_callback_lock); 1655 } 1656 xs_udp_do_set_buffer_size(xprt); 1657 } 1658 1659 /** 1660 * xs_udp_connect_worker4 - set up a UDP socket 1661 * @work: RPC transport to connect 1662 * 1663 * Invoked by a work queue tasklet. 1664 */ 1665 static void xs_udp_connect_worker4(struct work_struct *work) 1666 { 1667 struct sock_xprt *transport = 1668 container_of(work, struct sock_xprt, connect_worker.work); 1669 struct rpc_xprt *xprt = &transport->xprt; 1670 struct socket *sock = transport->sock; 1671 int err, status = -EIO; 1672 1673 if (xprt->shutdown) 1674 goto out; 1675 1676 /* Start by resetting any existing state */ 1677 xs_reset_transport(transport); 1678 1679 err = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); 1680 if (err < 0) { 1681 dprintk("RPC: can't create UDP transport socket (%d).\n", -err); 1682 goto out; 1683 } 1684 xs_reclassify_socket4(sock); 1685 1686 if (xs_bind4(transport, sock)) { 1687 sock_release(sock); 1688 goto out; 1689 } 1690 1691 dprintk("RPC: worker connecting xprt %p via %s to " 1692 "%s (port %s)\n", xprt, 1693 xprt->address_strings[RPC_DISPLAY_PROTO], 1694 xprt->address_strings[RPC_DISPLAY_ADDR], 1695 xprt->address_strings[RPC_DISPLAY_PORT]); 1696 1697 xs_udp_finish_connecting(xprt, sock); 1698 status = 0; 1699 out: 1700 xprt_clear_connecting(xprt); 1701 xprt_wake_pending_tasks(xprt, status); 1702 } 1703 1704 /** 1705 * xs_udp_connect_worker6 - set up a UDP socket 1706 * @work: RPC transport to connect 1707 * 1708 * Invoked by a work queue tasklet. 1709 */ 1710 static void xs_udp_connect_worker6(struct work_struct *work) 1711 { 1712 struct sock_xprt *transport = 1713 container_of(work, struct sock_xprt, connect_worker.work); 1714 struct rpc_xprt *xprt = &transport->xprt; 1715 struct socket *sock = transport->sock; 1716 int err, status = -EIO; 1717 1718 if (xprt->shutdown) 1719 goto out; 1720 1721 /* Start by resetting any existing state */ 1722 xs_reset_transport(transport); 1723 1724 err = sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock); 1725 if (err < 0) { 1726 dprintk("RPC: can't create UDP transport socket (%d).\n", -err); 1727 goto out; 1728 } 1729 xs_reclassify_socket6(sock); 1730 1731 if (xs_bind6(transport, sock) < 0) { 1732 sock_release(sock); 1733 goto out; 1734 } 1735 1736 dprintk("RPC: worker connecting xprt %p via %s to " 1737 "%s (port %s)\n", xprt, 1738 xprt->address_strings[RPC_DISPLAY_PROTO], 1739 xprt->address_strings[RPC_DISPLAY_ADDR], 1740 xprt->address_strings[RPC_DISPLAY_PORT]); 1741 1742 xs_udp_finish_connecting(xprt, sock); 1743 status = 0; 1744 out: 1745 xprt_clear_connecting(xprt); 1746 xprt_wake_pending_tasks(xprt, status); 1747 } 1748 1749 /* 1750 * We need to preserve the port number so the reply cache on the server can 1751 * find our cached RPC replies when we get around to reconnecting. 1752 */ 1753 static void xs_abort_connection(struct rpc_xprt *xprt, struct sock_xprt *transport) 1754 { 1755 int result; 1756 struct sockaddr any; 1757 1758 dprintk("RPC: disconnecting xprt %p to reuse port\n", xprt); 1759 1760 /* 1761 * Disconnect the transport socket by doing a connect operation 1762 * with AF_UNSPEC. This should return immediately... 1763 */ 1764 memset(&any, 0, sizeof(any)); 1765 any.sa_family = AF_UNSPEC; 1766 result = kernel_connect(transport->sock, &any, sizeof(any), 0); 1767 if (!result) 1768 xs_sock_mark_closed(xprt); 1769 else 1770 dprintk("RPC: AF_UNSPEC connect return code %d\n", 1771 result); 1772 } 1773 1774 static void xs_tcp_reuse_connection(struct rpc_xprt *xprt, struct sock_xprt *transport) 1775 { 1776 unsigned int state = transport->inet->sk_state; 1777 1778 if (state == TCP_CLOSE && transport->sock->state == SS_UNCONNECTED) 1779 return; 1780 if ((1 << state) & (TCPF_ESTABLISHED|TCPF_SYN_SENT)) 1781 return; 1782 xs_abort_connection(xprt, transport); 1783 } 1784 1785 static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) 1786 { 1787 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 1788 1789 if (!transport->inet) { 1790 struct sock *sk = sock->sk; 1791 1792 write_lock_bh(&sk->sk_callback_lock); 1793 1794 xs_save_old_callbacks(transport, sk); 1795 1796 sk->sk_user_data = xprt; 1797 sk->sk_data_ready = xs_tcp_data_ready; 1798 sk->sk_state_change = xs_tcp_state_change; 1799 sk->sk_write_space = xs_tcp_write_space; 1800 sk->sk_error_report = xs_error_report; 1801 sk->sk_allocation = GFP_ATOMIC; 1802 1803 /* socket options */ 1804 sk->sk_userlocks |= SOCK_BINDPORT_LOCK; 1805 sock_reset_flag(sk, SOCK_LINGER); 1806 tcp_sk(sk)->linger2 = 0; 1807 tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; 1808 1809 xprt_clear_connected(xprt); 1810 1811 /* Reset to new socket */ 1812 transport->sock = sock; 1813 transport->inet = sk; 1814 1815 write_unlock_bh(&sk->sk_callback_lock); 1816 } 1817 1818 if (!xprt_bound(xprt)) 1819 return -ENOTCONN; 1820 1821 /* Tell the socket layer to start connecting... */ 1822 xprt->stat.connect_count++; 1823 xprt->stat.connect_start = jiffies; 1824 return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK); 1825 } 1826 1827 /** 1828 * xs_tcp_setup_socket - create a TCP socket and connect to a remote endpoint 1829 * @xprt: RPC transport to connect 1830 * @transport: socket transport to connect 1831 * @create_sock: function to create a socket of the correct type 1832 * 1833 * Invoked by a work queue tasklet. 1834 */ 1835 static void xs_tcp_setup_socket(struct rpc_xprt *xprt, 1836 struct sock_xprt *transport, 1837 struct socket *(*create_sock)(struct rpc_xprt *, 1838 struct sock_xprt *)) 1839 { 1840 struct socket *sock = transport->sock; 1841 int status = -EIO; 1842 1843 if (xprt->shutdown) 1844 goto out; 1845 1846 if (!sock) { 1847 clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); 1848 sock = create_sock(xprt, transport); 1849 if (IS_ERR(sock)) { 1850 status = PTR_ERR(sock); 1851 goto out; 1852 } 1853 } else { 1854 int abort_and_exit; 1855 1856 abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT, 1857 &xprt->state); 1858 /* "close" the socket, preserving the local port */ 1859 xs_tcp_reuse_connection(xprt, transport); 1860 1861 if (abort_and_exit) 1862 goto out_eagain; 1863 } 1864 1865 dprintk("RPC: worker connecting xprt %p via %s to " 1866 "%s (port %s)\n", xprt, 1867 xprt->address_strings[RPC_DISPLAY_PROTO], 1868 xprt->address_strings[RPC_DISPLAY_ADDR], 1869 xprt->address_strings[RPC_DISPLAY_PORT]); 1870 1871 status = xs_tcp_finish_connecting(xprt, sock); 1872 dprintk("RPC: %p connect status %d connected %d sock state %d\n", 1873 xprt, -status, xprt_connected(xprt), 1874 sock->sk->sk_state); 1875 switch (status) { 1876 default: 1877 printk("%s: connect returned unhandled error %d\n", 1878 __func__, status); 1879 case -EADDRNOTAVAIL: 1880 /* We're probably in TIME_WAIT. Get rid of existing socket, 1881 * and retry 1882 */ 1883 set_bit(XPRT_CONNECTION_CLOSE, &xprt->state); 1884 xprt_force_disconnect(xprt); 1885 break; 1886 case -ECONNREFUSED: 1887 case -ECONNRESET: 1888 case -ENETUNREACH: 1889 /* retry with existing socket, after a delay */ 1890 case 0: 1891 case -EINPROGRESS: 1892 case -EALREADY: 1893 xprt_clear_connecting(xprt); 1894 return; 1895 case -EINVAL: 1896 /* Happens, for instance, if the user specified a link 1897 * local IPv6 address without a scope-id. 1898 */ 1899 goto out; 1900 } 1901 out_eagain: 1902 status = -EAGAIN; 1903 out: 1904 xprt_clear_connecting(xprt); 1905 xprt_wake_pending_tasks(xprt, status); 1906 } 1907 1908 static struct socket *xs_create_tcp_sock4(struct rpc_xprt *xprt, 1909 struct sock_xprt *transport) 1910 { 1911 struct socket *sock; 1912 int err; 1913 1914 /* start from scratch */ 1915 err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock); 1916 if (err < 0) { 1917 dprintk("RPC: can't create TCP transport socket (%d).\n", 1918 -err); 1919 goto out_err; 1920 } 1921 xs_reclassify_socket4(sock); 1922 1923 if (xs_bind4(transport, sock) < 0) { 1924 sock_release(sock); 1925 goto out_err; 1926 } 1927 return sock; 1928 out_err: 1929 return ERR_PTR(-EIO); 1930 } 1931 1932 /** 1933 * xs_tcp_connect_worker4 - connect a TCP socket to a remote endpoint 1934 * @work: RPC transport to connect 1935 * 1936 * Invoked by a work queue tasklet. 1937 */ 1938 static void xs_tcp_connect_worker4(struct work_struct *work) 1939 { 1940 struct sock_xprt *transport = 1941 container_of(work, struct sock_xprt, connect_worker.work); 1942 struct rpc_xprt *xprt = &transport->xprt; 1943 1944 xs_tcp_setup_socket(xprt, transport, xs_create_tcp_sock4); 1945 } 1946 1947 static struct socket *xs_create_tcp_sock6(struct rpc_xprt *xprt, 1948 struct sock_xprt *transport) 1949 { 1950 struct socket *sock; 1951 int err; 1952 1953 /* start from scratch */ 1954 err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock); 1955 if (err < 0) { 1956 dprintk("RPC: can't create TCP transport socket (%d).\n", 1957 -err); 1958 goto out_err; 1959 } 1960 xs_reclassify_socket6(sock); 1961 1962 if (xs_bind6(transport, sock) < 0) { 1963 sock_release(sock); 1964 goto out_err; 1965 } 1966 return sock; 1967 out_err: 1968 return ERR_PTR(-EIO); 1969 } 1970 1971 /** 1972 * xs_tcp_connect_worker6 - connect a TCP socket to a remote endpoint 1973 * @work: RPC transport to connect 1974 * 1975 * Invoked by a work queue tasklet. 1976 */ 1977 static void xs_tcp_connect_worker6(struct work_struct *work) 1978 { 1979 struct sock_xprt *transport = 1980 container_of(work, struct sock_xprt, connect_worker.work); 1981 struct rpc_xprt *xprt = &transport->xprt; 1982 1983 xs_tcp_setup_socket(xprt, transport, xs_create_tcp_sock6); 1984 } 1985 1986 /** 1987 * xs_connect - connect a socket to a remote endpoint 1988 * @task: address of RPC task that manages state of connect request 1989 * 1990 * TCP: If the remote end dropped the connection, delay reconnecting. 1991 * 1992 * UDP socket connects are synchronous, but we use a work queue anyway 1993 * to guarantee that even unprivileged user processes can set up a 1994 * socket on a privileged port. 1995 * 1996 * If a UDP socket connect fails, the delay behavior here prevents 1997 * retry floods (hard mounts). 1998 */ 1999 static void xs_connect(struct rpc_task *task) 2000 { 2001 struct rpc_xprt *xprt = task->tk_xprt; 2002 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 2003 2004 if (transport->sock != NULL && !RPC_IS_SOFTCONN(task)) { 2005 dprintk("RPC: xs_connect delayed xprt %p for %lu " 2006 "seconds\n", 2007 xprt, xprt->reestablish_timeout / HZ); 2008 queue_delayed_work(rpciod_workqueue, 2009 &transport->connect_worker, 2010 xprt->reestablish_timeout); 2011 xprt->reestablish_timeout <<= 1; 2012 if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO) 2013 xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; 2014 if (xprt->reestablish_timeout > XS_TCP_MAX_REEST_TO) 2015 xprt->reestablish_timeout = XS_TCP_MAX_REEST_TO; 2016 } else { 2017 dprintk("RPC: xs_connect scheduled xprt %p\n", xprt); 2018 queue_delayed_work(rpciod_workqueue, 2019 &transport->connect_worker, 0); 2020 } 2021 } 2022 2023 /** 2024 * xs_udp_print_stats - display UDP socket-specifc stats 2025 * @xprt: rpc_xprt struct containing statistics 2026 * @seq: output file 2027 * 2028 */ 2029 static void xs_udp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) 2030 { 2031 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 2032 2033 seq_printf(seq, "\txprt:\tudp %u %lu %lu %lu %lu %Lu %Lu\n", 2034 transport->srcport, 2035 xprt->stat.bind_count, 2036 xprt->stat.sends, 2037 xprt->stat.recvs, 2038 xprt->stat.bad_xids, 2039 xprt->stat.req_u, 2040 xprt->stat.bklog_u); 2041 } 2042 2043 /** 2044 * xs_tcp_print_stats - display TCP socket-specifc stats 2045 * @xprt: rpc_xprt struct containing statistics 2046 * @seq: output file 2047 * 2048 */ 2049 static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) 2050 { 2051 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 2052 long idle_time = 0; 2053 2054 if (xprt_connected(xprt)) 2055 idle_time = (long)(jiffies - xprt->last_used) / HZ; 2056 2057 seq_printf(seq, "\txprt:\ttcp %u %lu %lu %lu %ld %lu %lu %lu %Lu %Lu\n", 2058 transport->srcport, 2059 xprt->stat.bind_count, 2060 xprt->stat.connect_count, 2061 xprt->stat.connect_time, 2062 idle_time, 2063 xprt->stat.sends, 2064 xprt->stat.recvs, 2065 xprt->stat.bad_xids, 2066 xprt->stat.req_u, 2067 xprt->stat.bklog_u); 2068 } 2069 2070 /* 2071 * Allocate a bunch of pages for a scratch buffer for the rpc code. The reason 2072 * we allocate pages instead doing a kmalloc like rpc_malloc is because we want 2073 * to use the server side send routines. 2074 */ 2075 static void *bc_malloc(struct rpc_task *task, size_t size) 2076 { 2077 struct page *page; 2078 struct rpc_buffer *buf; 2079 2080 BUG_ON(size > PAGE_SIZE - sizeof(struct rpc_buffer)); 2081 page = alloc_page(GFP_KERNEL); 2082 2083 if (!page) 2084 return NULL; 2085 2086 buf = page_address(page); 2087 buf->len = PAGE_SIZE; 2088 2089 return buf->data; 2090 } 2091 2092 /* 2093 * Free the space allocated in the bc_alloc routine 2094 */ 2095 static void bc_free(void *buffer) 2096 { 2097 struct rpc_buffer *buf; 2098 2099 if (!buffer) 2100 return; 2101 2102 buf = container_of(buffer, struct rpc_buffer, data); 2103 free_page((unsigned long)buf); 2104 } 2105 2106 /* 2107 * Use the svc_sock to send the callback. Must be called with svsk->sk_mutex 2108 * held. Borrows heavily from svc_tcp_sendto and xs_tcp_send_request. 2109 */ 2110 static int bc_sendto(struct rpc_rqst *req) 2111 { 2112 int len; 2113 struct xdr_buf *xbufp = &req->rq_snd_buf; 2114 struct rpc_xprt *xprt = req->rq_xprt; 2115 struct sock_xprt *transport = 2116 container_of(xprt, struct sock_xprt, xprt); 2117 struct socket *sock = transport->sock; 2118 unsigned long headoff; 2119 unsigned long tailoff; 2120 2121 /* 2122 * Set up the rpc header and record marker stuff 2123 */ 2124 xs_encode_tcp_record_marker(xbufp); 2125 2126 tailoff = (unsigned long)xbufp->tail[0].iov_base & ~PAGE_MASK; 2127 headoff = (unsigned long)xbufp->head[0].iov_base & ~PAGE_MASK; 2128 len = svc_send_common(sock, xbufp, 2129 virt_to_page(xbufp->head[0].iov_base), headoff, 2130 xbufp->tail[0].iov_base, tailoff); 2131 2132 if (len != xbufp->len) { 2133 printk(KERN_NOTICE "Error sending entire callback!\n"); 2134 len = -EAGAIN; 2135 } 2136 2137 return len; 2138 } 2139 2140 /* 2141 * The send routine. Borrows from svc_send 2142 */ 2143 static int bc_send_request(struct rpc_task *task) 2144 { 2145 struct rpc_rqst *req = task->tk_rqstp; 2146 struct svc_xprt *xprt; 2147 struct svc_sock *svsk; 2148 u32 len; 2149 2150 dprintk("sending request with xid: %08x\n", ntohl(req->rq_xid)); 2151 /* 2152 * Get the server socket associated with this callback xprt 2153 */ 2154 xprt = req->rq_xprt->bc_xprt; 2155 svsk = container_of(xprt, struct svc_sock, sk_xprt); 2156 2157 /* 2158 * Grab the mutex to serialize data as the connection is shared 2159 * with the fore channel 2160 */ 2161 if (!mutex_trylock(&xprt->xpt_mutex)) { 2162 rpc_sleep_on(&xprt->xpt_bc_pending, task, NULL); 2163 if (!mutex_trylock(&xprt->xpt_mutex)) 2164 return -EAGAIN; 2165 rpc_wake_up_queued_task(&xprt->xpt_bc_pending, task); 2166 } 2167 if (test_bit(XPT_DEAD, &xprt->xpt_flags)) 2168 len = -ENOTCONN; 2169 else 2170 len = bc_sendto(req); 2171 mutex_unlock(&xprt->xpt_mutex); 2172 2173 if (len > 0) 2174 len = 0; 2175 2176 return len; 2177 } 2178 2179 /* 2180 * The close routine. Since this is client initiated, we do nothing 2181 */ 2182 2183 static void bc_close(struct rpc_xprt *xprt) 2184 { 2185 return; 2186 } 2187 2188 /* 2189 * The xprt destroy routine. Again, because this connection is client 2190 * initiated, we do nothing 2191 */ 2192 2193 static void bc_destroy(struct rpc_xprt *xprt) 2194 { 2195 return; 2196 } 2197 2198 static struct rpc_xprt_ops xs_udp_ops = { 2199 .set_buffer_size = xs_udp_set_buffer_size, 2200 .reserve_xprt = xprt_reserve_xprt_cong, 2201 .release_xprt = xprt_release_xprt_cong, 2202 .rpcbind = rpcb_getport_async, 2203 .set_port = xs_set_port, 2204 .connect = xs_connect, 2205 .buf_alloc = rpc_malloc, 2206 .buf_free = rpc_free, 2207 .send_request = xs_udp_send_request, 2208 .set_retrans_timeout = xprt_set_retrans_timeout_rtt, 2209 .timer = xs_udp_timer, 2210 .release_request = xprt_release_rqst_cong, 2211 .close = xs_close, 2212 .destroy = xs_destroy, 2213 .print_stats = xs_udp_print_stats, 2214 }; 2215 2216 static struct rpc_xprt_ops xs_tcp_ops = { 2217 .reserve_xprt = xprt_reserve_xprt, 2218 .release_xprt = xs_tcp_release_xprt, 2219 .rpcbind = rpcb_getport_async, 2220 .set_port = xs_set_port, 2221 .connect = xs_connect, 2222 .buf_alloc = rpc_malloc, 2223 .buf_free = rpc_free, 2224 .send_request = xs_tcp_send_request, 2225 .set_retrans_timeout = xprt_set_retrans_timeout_def, 2226 .close = xs_tcp_close, 2227 .destroy = xs_destroy, 2228 .print_stats = xs_tcp_print_stats, 2229 }; 2230 2231 /* 2232 * The rpc_xprt_ops for the server backchannel 2233 */ 2234 2235 static struct rpc_xprt_ops bc_tcp_ops = { 2236 .reserve_xprt = xprt_reserve_xprt, 2237 .release_xprt = xprt_release_xprt, 2238 .buf_alloc = bc_malloc, 2239 .buf_free = bc_free, 2240 .send_request = bc_send_request, 2241 .set_retrans_timeout = xprt_set_retrans_timeout_def, 2242 .close = bc_close, 2243 .destroy = bc_destroy, 2244 .print_stats = xs_tcp_print_stats, 2245 }; 2246 2247 static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, 2248 unsigned int slot_table_size) 2249 { 2250 struct rpc_xprt *xprt; 2251 struct sock_xprt *new; 2252 2253 if (args->addrlen > sizeof(xprt->addr)) { 2254 dprintk("RPC: xs_setup_xprt: address too large\n"); 2255 return ERR_PTR(-EBADF); 2256 } 2257 2258 new = kzalloc(sizeof(*new), GFP_KERNEL); 2259 if (new == NULL) { 2260 dprintk("RPC: xs_setup_xprt: couldn't allocate " 2261 "rpc_xprt\n"); 2262 return ERR_PTR(-ENOMEM); 2263 } 2264 xprt = &new->xprt; 2265 2266 xprt->max_reqs = slot_table_size; 2267 xprt->slot = kcalloc(xprt->max_reqs, sizeof(struct rpc_rqst), GFP_KERNEL); 2268 if (xprt->slot == NULL) { 2269 kfree(xprt); 2270 dprintk("RPC: xs_setup_xprt: couldn't allocate slot " 2271 "table\n"); 2272 return ERR_PTR(-ENOMEM); 2273 } 2274 2275 memcpy(&xprt->addr, args->dstaddr, args->addrlen); 2276 xprt->addrlen = args->addrlen; 2277 if (args->srcaddr) 2278 memcpy(&new->srcaddr, args->srcaddr, args->addrlen); 2279 2280 return xprt; 2281 } 2282 2283 static const struct rpc_timeout xs_udp_default_timeout = { 2284 .to_initval = 5 * HZ, 2285 .to_maxval = 30 * HZ, 2286 .to_increment = 5 * HZ, 2287 .to_retries = 5, 2288 }; 2289 2290 /** 2291 * xs_setup_udp - Set up transport to use a UDP socket 2292 * @args: rpc transport creation arguments 2293 * 2294 */ 2295 static struct rpc_xprt *xs_setup_udp(struct xprt_create *args) 2296 { 2297 struct sockaddr *addr = args->dstaddr; 2298 struct rpc_xprt *xprt; 2299 struct sock_xprt *transport; 2300 2301 xprt = xs_setup_xprt(args, xprt_udp_slot_table_entries); 2302 if (IS_ERR(xprt)) 2303 return xprt; 2304 transport = container_of(xprt, struct sock_xprt, xprt); 2305 2306 xprt->prot = IPPROTO_UDP; 2307 xprt->tsh_size = 0; 2308 /* XXX: header size can vary due to auth type, IPv6, etc. */ 2309 xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); 2310 2311 xprt->bind_timeout = XS_BIND_TO; 2312 xprt->reestablish_timeout = XS_UDP_REEST_TO; 2313 xprt->idle_timeout = XS_IDLE_DISC_TO; 2314 2315 xprt->ops = &xs_udp_ops; 2316 2317 xprt->timeout = &xs_udp_default_timeout; 2318 2319 switch (addr->sa_family) { 2320 case AF_INET: 2321 if (((struct sockaddr_in *)addr)->sin_port != htons(0)) 2322 xprt_set_bound(xprt); 2323 2324 INIT_DELAYED_WORK(&transport->connect_worker, 2325 xs_udp_connect_worker4); 2326 xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP); 2327 break; 2328 case AF_INET6: 2329 if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) 2330 xprt_set_bound(xprt); 2331 2332 INIT_DELAYED_WORK(&transport->connect_worker, 2333 xs_udp_connect_worker6); 2334 xs_format_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6); 2335 break; 2336 default: 2337 kfree(xprt); 2338 return ERR_PTR(-EAFNOSUPPORT); 2339 } 2340 2341 if (xprt_bound(xprt)) 2342 dprintk("RPC: set up xprt to %s (port %s) via %s\n", 2343 xprt->address_strings[RPC_DISPLAY_ADDR], 2344 xprt->address_strings[RPC_DISPLAY_PORT], 2345 xprt->address_strings[RPC_DISPLAY_PROTO]); 2346 else 2347 dprintk("RPC: set up xprt to %s (autobind) via %s\n", 2348 xprt->address_strings[RPC_DISPLAY_ADDR], 2349 xprt->address_strings[RPC_DISPLAY_PROTO]); 2350 2351 if (try_module_get(THIS_MODULE)) 2352 return xprt; 2353 2354 kfree(xprt->slot); 2355 kfree(xprt); 2356 return ERR_PTR(-EINVAL); 2357 } 2358 2359 static const struct rpc_timeout xs_tcp_default_timeout = { 2360 .to_initval = 60 * HZ, 2361 .to_maxval = 60 * HZ, 2362 .to_retries = 2, 2363 }; 2364 2365 /** 2366 * xs_setup_tcp - Set up transport to use a TCP socket 2367 * @args: rpc transport creation arguments 2368 * 2369 */ 2370 static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) 2371 { 2372 struct sockaddr *addr = args->dstaddr; 2373 struct rpc_xprt *xprt; 2374 struct sock_xprt *transport; 2375 2376 xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries); 2377 if (IS_ERR(xprt)) 2378 return xprt; 2379 transport = container_of(xprt, struct sock_xprt, xprt); 2380 2381 xprt->prot = IPPROTO_TCP; 2382 xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32); 2383 xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; 2384 2385 xprt->bind_timeout = XS_BIND_TO; 2386 xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; 2387 xprt->idle_timeout = XS_IDLE_DISC_TO; 2388 2389 xprt->ops = &xs_tcp_ops; 2390 xprt->timeout = &xs_tcp_default_timeout; 2391 2392 switch (addr->sa_family) { 2393 case AF_INET: 2394 if (((struct sockaddr_in *)addr)->sin_port != htons(0)) 2395 xprt_set_bound(xprt); 2396 2397 INIT_DELAYED_WORK(&transport->connect_worker, 2398 xs_tcp_connect_worker4); 2399 xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP); 2400 break; 2401 case AF_INET6: 2402 if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) 2403 xprt_set_bound(xprt); 2404 2405 INIT_DELAYED_WORK(&transport->connect_worker, 2406 xs_tcp_connect_worker6); 2407 xs_format_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6); 2408 break; 2409 default: 2410 kfree(xprt); 2411 return ERR_PTR(-EAFNOSUPPORT); 2412 } 2413 2414 if (xprt_bound(xprt)) 2415 dprintk("RPC: set up xprt to %s (port %s) via %s\n", 2416 xprt->address_strings[RPC_DISPLAY_ADDR], 2417 xprt->address_strings[RPC_DISPLAY_PORT], 2418 xprt->address_strings[RPC_DISPLAY_PROTO]); 2419 else 2420 dprintk("RPC: set up xprt to %s (autobind) via %s\n", 2421 xprt->address_strings[RPC_DISPLAY_ADDR], 2422 xprt->address_strings[RPC_DISPLAY_PROTO]); 2423 2424 2425 if (try_module_get(THIS_MODULE)) 2426 return xprt; 2427 2428 kfree(xprt->slot); 2429 kfree(xprt); 2430 return ERR_PTR(-EINVAL); 2431 } 2432 2433 /** 2434 * xs_setup_bc_tcp - Set up transport to use a TCP backchannel socket 2435 * @args: rpc transport creation arguments 2436 * 2437 */ 2438 static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) 2439 { 2440 struct sockaddr *addr = args->dstaddr; 2441 struct rpc_xprt *xprt; 2442 struct sock_xprt *transport; 2443 struct svc_sock *bc_sock; 2444 2445 xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries); 2446 if (IS_ERR(xprt)) 2447 return xprt; 2448 transport = container_of(xprt, struct sock_xprt, xprt); 2449 2450 xprt->prot = IPPROTO_TCP; 2451 xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32); 2452 xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; 2453 xprt->timeout = &xs_tcp_default_timeout; 2454 2455 /* backchannel */ 2456 xprt_set_bound(xprt); 2457 xprt->bind_timeout = 0; 2458 xprt->reestablish_timeout = 0; 2459 xprt->idle_timeout = 0; 2460 2461 /* 2462 * The backchannel uses the same socket connection as the 2463 * forechannel 2464 */ 2465 xprt->bc_xprt = args->bc_xprt; 2466 bc_sock = container_of(args->bc_xprt, struct svc_sock, sk_xprt); 2467 bc_sock->sk_bc_xprt = xprt; 2468 transport->sock = bc_sock->sk_sock; 2469 transport->inet = bc_sock->sk_sk; 2470 2471 xprt->ops = &bc_tcp_ops; 2472 2473 switch (addr->sa_family) { 2474 case AF_INET: 2475 xs_format_peer_addresses(xprt, "tcp", 2476 RPCBIND_NETID_TCP); 2477 break; 2478 case AF_INET6: 2479 xs_format_peer_addresses(xprt, "tcp", 2480 RPCBIND_NETID_TCP6); 2481 break; 2482 default: 2483 kfree(xprt); 2484 return ERR_PTR(-EAFNOSUPPORT); 2485 } 2486 2487 if (xprt_bound(xprt)) 2488 dprintk("RPC: set up xprt to %s (port %s) via %s\n", 2489 xprt->address_strings[RPC_DISPLAY_ADDR], 2490 xprt->address_strings[RPC_DISPLAY_PORT], 2491 xprt->address_strings[RPC_DISPLAY_PROTO]); 2492 else 2493 dprintk("RPC: set up xprt to %s (autobind) via %s\n", 2494 xprt->address_strings[RPC_DISPLAY_ADDR], 2495 xprt->address_strings[RPC_DISPLAY_PROTO]); 2496 2497 /* 2498 * Since we don't want connections for the backchannel, we set 2499 * the xprt status to connected 2500 */ 2501 xprt_set_connected(xprt); 2502 2503 2504 if (try_module_get(THIS_MODULE)) 2505 return xprt; 2506 kfree(xprt->slot); 2507 kfree(xprt); 2508 return ERR_PTR(-EINVAL); 2509 } 2510 2511 static struct xprt_class xs_udp_transport = { 2512 .list = LIST_HEAD_INIT(xs_udp_transport.list), 2513 .name = "udp", 2514 .owner = THIS_MODULE, 2515 .ident = XPRT_TRANSPORT_UDP, 2516 .setup = xs_setup_udp, 2517 }; 2518 2519 static struct xprt_class xs_tcp_transport = { 2520 .list = LIST_HEAD_INIT(xs_tcp_transport.list), 2521 .name = "tcp", 2522 .owner = THIS_MODULE, 2523 .ident = XPRT_TRANSPORT_TCP, 2524 .setup = xs_setup_tcp, 2525 }; 2526 2527 static struct xprt_class xs_bc_tcp_transport = { 2528 .list = LIST_HEAD_INIT(xs_bc_tcp_transport.list), 2529 .name = "tcp NFSv4.1 backchannel", 2530 .owner = THIS_MODULE, 2531 .ident = XPRT_TRANSPORT_BC_TCP, 2532 .setup = xs_setup_bc_tcp, 2533 }; 2534 2535 /** 2536 * init_socket_xprt - set up xprtsock's sysctls, register with RPC client 2537 * 2538 */ 2539 int init_socket_xprt(void) 2540 { 2541 #ifdef RPC_DEBUG 2542 if (!sunrpc_table_header) 2543 sunrpc_table_header = register_sysctl_table(sunrpc_table); 2544 #endif 2545 2546 xprt_register_transport(&xs_udp_transport); 2547 xprt_register_transport(&xs_tcp_transport); 2548 xprt_register_transport(&xs_bc_tcp_transport); 2549 2550 return 0; 2551 } 2552 2553 /** 2554 * cleanup_socket_xprt - remove xprtsock's sysctls, unregister 2555 * 2556 */ 2557 void cleanup_socket_xprt(void) 2558 { 2559 #ifdef RPC_DEBUG 2560 if (sunrpc_table_header) { 2561 unregister_sysctl_table(sunrpc_table_header); 2562 sunrpc_table_header = NULL; 2563 } 2564 #endif 2565 2566 xprt_unregister_transport(&xs_udp_transport); 2567 xprt_unregister_transport(&xs_tcp_transport); 2568 xprt_unregister_transport(&xs_bc_tcp_transport); 2569 } 2570 2571 static int param_set_uint_minmax(const char *val, struct kernel_param *kp, 2572 unsigned int min, unsigned int max) 2573 { 2574 unsigned long num; 2575 int ret; 2576 2577 if (!val) 2578 return -EINVAL; 2579 ret = strict_strtoul(val, 0, &num); 2580 if (ret == -EINVAL || num < min || num > max) 2581 return -EINVAL; 2582 *((unsigned int *)kp->arg) = num; 2583 return 0; 2584 } 2585 2586 static int param_set_portnr(const char *val, struct kernel_param *kp) 2587 { 2588 return param_set_uint_minmax(val, kp, 2589 RPC_MIN_RESVPORT, 2590 RPC_MAX_RESVPORT); 2591 } 2592 2593 static int param_get_portnr(char *buffer, struct kernel_param *kp) 2594 { 2595 return param_get_uint(buffer, kp); 2596 } 2597 #define param_check_portnr(name, p) \ 2598 __param_check(name, p, unsigned int); 2599 2600 module_param_named(min_resvport, xprt_min_resvport, portnr, 0644); 2601 module_param_named(max_resvport, xprt_max_resvport, portnr, 0644); 2602 2603 static int param_set_slot_table_size(const char *val, struct kernel_param *kp) 2604 { 2605 return param_set_uint_minmax(val, kp, 2606 RPC_MIN_SLOT_TABLE, 2607 RPC_MAX_SLOT_TABLE); 2608 } 2609 2610 static int param_get_slot_table_size(char *buffer, struct kernel_param *kp) 2611 { 2612 return param_get_uint(buffer, kp); 2613 } 2614 #define param_check_slot_table_size(name, p) \ 2615 __param_check(name, p, unsigned int); 2616 2617 module_param_named(tcp_slot_table_entries, xprt_tcp_slot_table_entries, 2618 slot_table_size, 0644); 2619 module_param_named(udp_slot_table_entries, xprt_udp_slot_table_entries, 2620 slot_table_size, 0644); 2621 2622