12f713615SIlya Dryomov // SPDX-License-Identifier: GPL-2.0 22f713615SIlya Dryomov #include <linux/ceph/ceph_debug.h> 32f713615SIlya Dryomov 42f713615SIlya Dryomov #include <linux/bvec.h> 52f713615SIlya Dryomov #include <linux/crc32c.h> 62f713615SIlya Dryomov #include <linux/net.h> 72f713615SIlya Dryomov #include <linux/socket.h> 82f713615SIlya Dryomov #include <net/sock.h> 92f713615SIlya Dryomov 102f713615SIlya Dryomov #include <linux/ceph/ceph_features.h> 112f713615SIlya Dryomov #include <linux/ceph/decode.h> 122f713615SIlya Dryomov #include <linux/ceph/libceph.h> 132f713615SIlya Dryomov #include <linux/ceph/messenger.h> 142f713615SIlya Dryomov 152f713615SIlya Dryomov /* static tag bytes (protocol control messages) */ 162f713615SIlya Dryomov static char tag_msg = CEPH_MSGR_TAG_MSG; 172f713615SIlya Dryomov static char tag_ack = CEPH_MSGR_TAG_ACK; 182f713615SIlya Dryomov static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE; 192f713615SIlya Dryomov static char tag_keepalive2 = CEPH_MSGR_TAG_KEEPALIVE2; 202f713615SIlya Dryomov 212f713615SIlya Dryomov /* 222f713615SIlya Dryomov * If @buf is NULL, discard up to @len bytes. 232f713615SIlya Dryomov */ 242f713615SIlya Dryomov static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len) 252f713615SIlya Dryomov { 262f713615SIlya Dryomov struct kvec iov = {buf, len}; 272f713615SIlya Dryomov struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL }; 282f713615SIlya Dryomov int r; 292f713615SIlya Dryomov 302f713615SIlya Dryomov if (!buf) 312f713615SIlya Dryomov msg.msg_flags |= MSG_TRUNC; 322f713615SIlya Dryomov 33de4eda9dSAl Viro iov_iter_kvec(&msg.msg_iter, ITER_DEST, &iov, 1, len); 342f713615SIlya Dryomov r = sock_recvmsg(sock, &msg, msg.msg_flags); 352f713615SIlya Dryomov if (r == -EAGAIN) 362f713615SIlya Dryomov r = 0; 372f713615SIlya Dryomov return r; 382f713615SIlya Dryomov } 392f713615SIlya Dryomov 402f713615SIlya Dryomov static int ceph_tcp_recvpage(struct socket *sock, struct page *page, 412f713615SIlya Dryomov int page_offset, size_t length) 422f713615SIlya Dryomov { 431eb9cd15SChristoph Hellwig struct bio_vec bvec; 442f713615SIlya Dryomov struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL }; 452f713615SIlya Dryomov int r; 462f713615SIlya Dryomov 472f713615SIlya Dryomov BUG_ON(page_offset + length > PAGE_SIZE); 481eb9cd15SChristoph Hellwig bvec_set_page(&bvec, page, length, page_offset); 49de4eda9dSAl Viro iov_iter_bvec(&msg.msg_iter, ITER_DEST, &bvec, 1, length); 502f713615SIlya Dryomov r = sock_recvmsg(sock, &msg, msg.msg_flags); 512f713615SIlya Dryomov if (r == -EAGAIN) 522f713615SIlya Dryomov r = 0; 532f713615SIlya Dryomov return r; 542f713615SIlya Dryomov } 552f713615SIlya Dryomov 562f713615SIlya Dryomov /* 572f713615SIlya Dryomov * write something. @more is true if caller will be sending more data 582f713615SIlya Dryomov * shortly. 592f713615SIlya Dryomov */ 602f713615SIlya Dryomov static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov, 612f713615SIlya Dryomov size_t kvlen, size_t len, bool more) 622f713615SIlya Dryomov { 632f713615SIlya Dryomov struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL }; 642f713615SIlya Dryomov int r; 652f713615SIlya Dryomov 662f713615SIlya Dryomov if (more) 672f713615SIlya Dryomov msg.msg_flags |= MSG_MORE; 682f713615SIlya Dryomov else 692f713615SIlya Dryomov msg.msg_flags |= MSG_EOR; /* superfluous, but what the hell */ 702f713615SIlya Dryomov 712f713615SIlya Dryomov r = kernel_sendmsg(sock, &msg, iov, kvlen, len); 722f713615SIlya Dryomov if (r == -EAGAIN) 732f713615SIlya Dryomov r = 0; 742f713615SIlya Dryomov return r; 752f713615SIlya Dryomov } 762f713615SIlya Dryomov 775da4d7b8SDavid Howells /* 785da4d7b8SDavid Howells * @more: MSG_MORE or 0. 795da4d7b8SDavid Howells */ 805da4d7b8SDavid Howells static int ceph_tcp_sendpage(struct socket *sock, struct page *page, 815da4d7b8SDavid Howells int offset, size_t size, int more) 825da4d7b8SDavid Howells { 835da4d7b8SDavid Howells struct msghdr msg = { 845da4d7b8SDavid Howells .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL | more, 855da4d7b8SDavid Howells }; 865da4d7b8SDavid Howells struct bio_vec bvec; 875da4d7b8SDavid Howells int ret; 885da4d7b8SDavid Howells 895da4d7b8SDavid Howells /* 905da4d7b8SDavid Howells * MSG_SPLICE_PAGES cannot properly handle pages with page_count == 0, 915da4d7b8SDavid Howells * we need to fall back to sendmsg if that's the case. 925da4d7b8SDavid Howells * 935da4d7b8SDavid Howells * Same goes for slab pages: skb_can_coalesce() allows 945da4d7b8SDavid Howells * coalescing neighboring slab objects into a single frag which 955da4d7b8SDavid Howells * triggers one of hardened usercopy checks. 965da4d7b8SDavid Howells */ 975da4d7b8SDavid Howells if (sendpage_ok(page)) 985da4d7b8SDavid Howells msg.msg_flags |= MSG_SPLICE_PAGES; 995da4d7b8SDavid Howells 1005da4d7b8SDavid Howells bvec_set_page(&bvec, page, size, offset); 1015da4d7b8SDavid Howells iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size); 1025da4d7b8SDavid Howells 1035da4d7b8SDavid Howells ret = sock_sendmsg(sock, &msg); 1045da4d7b8SDavid Howells if (ret == -EAGAIN) 1055da4d7b8SDavid Howells ret = 0; 1065da4d7b8SDavid Howells 1075da4d7b8SDavid Howells return ret; 1085da4d7b8SDavid Howells } 1095da4d7b8SDavid Howells 1102f713615SIlya Dryomov static void con_out_kvec_reset(struct ceph_connection *con) 1112f713615SIlya Dryomov { 112a56dd9bfSIlya Dryomov BUG_ON(con->v1.out_skip); 1132f713615SIlya Dryomov 114a56dd9bfSIlya Dryomov con->v1.out_kvec_left = 0; 115a56dd9bfSIlya Dryomov con->v1.out_kvec_bytes = 0; 116a56dd9bfSIlya Dryomov con->v1.out_kvec_cur = &con->v1.out_kvec[0]; 1172f713615SIlya Dryomov } 1182f713615SIlya Dryomov 1192f713615SIlya Dryomov static void con_out_kvec_add(struct ceph_connection *con, 1202f713615SIlya Dryomov size_t size, void *data) 1212f713615SIlya Dryomov { 122a56dd9bfSIlya Dryomov int index = con->v1.out_kvec_left; 1232f713615SIlya Dryomov 124a56dd9bfSIlya Dryomov BUG_ON(con->v1.out_skip); 125a56dd9bfSIlya Dryomov BUG_ON(index >= ARRAY_SIZE(con->v1.out_kvec)); 1262f713615SIlya Dryomov 127a56dd9bfSIlya Dryomov con->v1.out_kvec[index].iov_len = size; 128a56dd9bfSIlya Dryomov con->v1.out_kvec[index].iov_base = data; 129a56dd9bfSIlya Dryomov con->v1.out_kvec_left++; 130a56dd9bfSIlya Dryomov con->v1.out_kvec_bytes += size; 1312f713615SIlya Dryomov } 1322f713615SIlya Dryomov 1332f713615SIlya Dryomov /* 1342f713615SIlya Dryomov * Chop off a kvec from the end. Return residual number of bytes for 1352f713615SIlya Dryomov * that kvec, i.e. how many bytes would have been written if the kvec 1362f713615SIlya Dryomov * hadn't been nuked. 1372f713615SIlya Dryomov */ 1382f713615SIlya Dryomov static int con_out_kvec_skip(struct ceph_connection *con) 1392f713615SIlya Dryomov { 1402f713615SIlya Dryomov int skip = 0; 1412f713615SIlya Dryomov 142a56dd9bfSIlya Dryomov if (con->v1.out_kvec_bytes > 0) { 143a56dd9bfSIlya Dryomov skip = con->v1.out_kvec_cur[con->v1.out_kvec_left - 1].iov_len; 144a56dd9bfSIlya Dryomov BUG_ON(con->v1.out_kvec_bytes < skip); 145a56dd9bfSIlya Dryomov BUG_ON(!con->v1.out_kvec_left); 146a56dd9bfSIlya Dryomov con->v1.out_kvec_bytes -= skip; 147a56dd9bfSIlya Dryomov con->v1.out_kvec_left--; 1482f713615SIlya Dryomov } 1492f713615SIlya Dryomov 1502f713615SIlya Dryomov return skip; 1512f713615SIlya Dryomov } 1522f713615SIlya Dryomov 1532f713615SIlya Dryomov static size_t sizeof_footer(struct ceph_connection *con) 1542f713615SIlya Dryomov { 1552f713615SIlya Dryomov return (con->peer_features & CEPH_FEATURE_MSG_AUTH) ? 1562f713615SIlya Dryomov sizeof(struct ceph_msg_footer) : 1572f713615SIlya Dryomov sizeof(struct ceph_msg_footer_old); 1582f713615SIlya Dryomov } 1592f713615SIlya Dryomov 1602f713615SIlya Dryomov static void prepare_message_data(struct ceph_msg *msg, u32 data_len) 1612f713615SIlya Dryomov { 162d396f89dSJeff Layton /* Initialize data cursor if it's not a sparse read */ 163*8e46a2d0SXiubo Li u64 len = msg->sparse_read_total ? : data_len; 164*8e46a2d0SXiubo Li 165*8e46a2d0SXiubo Li ceph_msg_data_cursor_init(&msg->cursor, msg, len); 1662f713615SIlya Dryomov } 1672f713615SIlya Dryomov 1682f713615SIlya Dryomov /* 1692f713615SIlya Dryomov * Prepare footer for currently outgoing message, and finish things 1702f713615SIlya Dryomov * off. Assumes out_kvec* are already valid.. we just add on to the end. 1712f713615SIlya Dryomov */ 1722f713615SIlya Dryomov static void prepare_write_message_footer(struct ceph_connection *con) 1732f713615SIlya Dryomov { 1742f713615SIlya Dryomov struct ceph_msg *m = con->out_msg; 1752f713615SIlya Dryomov 1762f713615SIlya Dryomov m->footer.flags |= CEPH_MSG_FOOTER_COMPLETE; 1772f713615SIlya Dryomov 1782f713615SIlya Dryomov dout("prepare_write_message_footer %p\n", con); 1792f713615SIlya Dryomov con_out_kvec_add(con, sizeof_footer(con), &m->footer); 1802f713615SIlya Dryomov if (con->peer_features & CEPH_FEATURE_MSG_AUTH) { 1812f713615SIlya Dryomov if (con->ops->sign_message) 1822f713615SIlya Dryomov con->ops->sign_message(m); 1832f713615SIlya Dryomov else 1842f713615SIlya Dryomov m->footer.sig = 0; 1852f713615SIlya Dryomov } else { 1862f713615SIlya Dryomov m->old_footer.flags = m->footer.flags; 1872f713615SIlya Dryomov } 188a56dd9bfSIlya Dryomov con->v1.out_more = m->more_to_follow; 189a56dd9bfSIlya Dryomov con->v1.out_msg_done = true; 1902f713615SIlya Dryomov } 1912f713615SIlya Dryomov 1922f713615SIlya Dryomov /* 1932f713615SIlya Dryomov * Prepare headers for the next outgoing message. 1942f713615SIlya Dryomov */ 1952f713615SIlya Dryomov static void prepare_write_message(struct ceph_connection *con) 1962f713615SIlya Dryomov { 1972f713615SIlya Dryomov struct ceph_msg *m; 1982f713615SIlya Dryomov u32 crc; 1992f713615SIlya Dryomov 2002f713615SIlya Dryomov con_out_kvec_reset(con); 201a56dd9bfSIlya Dryomov con->v1.out_msg_done = false; 2022f713615SIlya Dryomov 2032f713615SIlya Dryomov /* Sneak an ack in there first? If we can get it into the same 2042f713615SIlya Dryomov * TCP packet that's a good thing. */ 2052f713615SIlya Dryomov if (con->in_seq > con->in_seq_acked) { 2062f713615SIlya Dryomov con->in_seq_acked = con->in_seq; 2072f713615SIlya Dryomov con_out_kvec_add(con, sizeof (tag_ack), &tag_ack); 208a56dd9bfSIlya Dryomov con->v1.out_temp_ack = cpu_to_le64(con->in_seq_acked); 209a56dd9bfSIlya Dryomov con_out_kvec_add(con, sizeof(con->v1.out_temp_ack), 210a56dd9bfSIlya Dryomov &con->v1.out_temp_ack); 2112f713615SIlya Dryomov } 2122f713615SIlya Dryomov 2132f713615SIlya Dryomov ceph_con_get_out_msg(con); 2142f713615SIlya Dryomov m = con->out_msg; 2152f713615SIlya Dryomov 2162f713615SIlya Dryomov dout("prepare_write_message %p seq %lld type %d len %d+%d+%zd\n", 2172f713615SIlya Dryomov m, con->out_seq, le16_to_cpu(m->hdr.type), 2182f713615SIlya Dryomov le32_to_cpu(m->hdr.front_len), le32_to_cpu(m->hdr.middle_len), 2192f713615SIlya Dryomov m->data_length); 2202f713615SIlya Dryomov WARN_ON(m->front.iov_len != le32_to_cpu(m->hdr.front_len)); 2212f713615SIlya Dryomov WARN_ON(m->data_length != le32_to_cpu(m->hdr.data_len)); 2222f713615SIlya Dryomov 2232f713615SIlya Dryomov /* tag + hdr + front + middle */ 2242f713615SIlya Dryomov con_out_kvec_add(con, sizeof (tag_msg), &tag_msg); 225a56dd9bfSIlya Dryomov con_out_kvec_add(con, sizeof(con->v1.out_hdr), &con->v1.out_hdr); 2262f713615SIlya Dryomov con_out_kvec_add(con, m->front.iov_len, m->front.iov_base); 2272f713615SIlya Dryomov 2282f713615SIlya Dryomov if (m->middle) 2292f713615SIlya Dryomov con_out_kvec_add(con, m->middle->vec.iov_len, 2302f713615SIlya Dryomov m->middle->vec.iov_base); 2312f713615SIlya Dryomov 2322f713615SIlya Dryomov /* fill in hdr crc and finalize hdr */ 2332f713615SIlya Dryomov crc = crc32c(0, &m->hdr, offsetof(struct ceph_msg_header, crc)); 2342f713615SIlya Dryomov con->out_msg->hdr.crc = cpu_to_le32(crc); 235a56dd9bfSIlya Dryomov memcpy(&con->v1.out_hdr, &con->out_msg->hdr, sizeof(con->v1.out_hdr)); 2362f713615SIlya Dryomov 2372f713615SIlya Dryomov /* fill in front and middle crc, footer */ 2382f713615SIlya Dryomov crc = crc32c(0, m->front.iov_base, m->front.iov_len); 2392f713615SIlya Dryomov con->out_msg->footer.front_crc = cpu_to_le32(crc); 2402f713615SIlya Dryomov if (m->middle) { 2412f713615SIlya Dryomov crc = crc32c(0, m->middle->vec.iov_base, 2422f713615SIlya Dryomov m->middle->vec.iov_len); 2432f713615SIlya Dryomov con->out_msg->footer.middle_crc = cpu_to_le32(crc); 2442f713615SIlya Dryomov } else 2452f713615SIlya Dryomov con->out_msg->footer.middle_crc = 0; 2462f713615SIlya Dryomov dout("%s front_crc %u middle_crc %u\n", __func__, 2472f713615SIlya Dryomov le32_to_cpu(con->out_msg->footer.front_crc), 2482f713615SIlya Dryomov le32_to_cpu(con->out_msg->footer.middle_crc)); 2492f713615SIlya Dryomov con->out_msg->footer.flags = 0; 2502f713615SIlya Dryomov 2512f713615SIlya Dryomov /* is there a data payload? */ 2522f713615SIlya Dryomov con->out_msg->footer.data_crc = 0; 2532f713615SIlya Dryomov if (m->data_length) { 2542f713615SIlya Dryomov prepare_message_data(con->out_msg, m->data_length); 255a56dd9bfSIlya Dryomov con->v1.out_more = 1; /* data + footer will follow */ 2562f713615SIlya Dryomov } else { 2572f713615SIlya Dryomov /* no, queue up footer too and be done */ 2582f713615SIlya Dryomov prepare_write_message_footer(con); 2592f713615SIlya Dryomov } 2602f713615SIlya Dryomov 2612f713615SIlya Dryomov ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING); 2622f713615SIlya Dryomov } 2632f713615SIlya Dryomov 2642f713615SIlya Dryomov /* 2652f713615SIlya Dryomov * Prepare an ack. 2662f713615SIlya Dryomov */ 2672f713615SIlya Dryomov static void prepare_write_ack(struct ceph_connection *con) 2682f713615SIlya Dryomov { 2692f713615SIlya Dryomov dout("prepare_write_ack %p %llu -> %llu\n", con, 2702f713615SIlya Dryomov con->in_seq_acked, con->in_seq); 2712f713615SIlya Dryomov con->in_seq_acked = con->in_seq; 2722f713615SIlya Dryomov 2732f713615SIlya Dryomov con_out_kvec_reset(con); 2742f713615SIlya Dryomov 2752f713615SIlya Dryomov con_out_kvec_add(con, sizeof (tag_ack), &tag_ack); 2762f713615SIlya Dryomov 277a56dd9bfSIlya Dryomov con->v1.out_temp_ack = cpu_to_le64(con->in_seq_acked); 278a56dd9bfSIlya Dryomov con_out_kvec_add(con, sizeof(con->v1.out_temp_ack), 279a56dd9bfSIlya Dryomov &con->v1.out_temp_ack); 2802f713615SIlya Dryomov 281a56dd9bfSIlya Dryomov con->v1.out_more = 1; /* more will follow.. eventually.. */ 2822f713615SIlya Dryomov ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING); 2832f713615SIlya Dryomov } 2842f713615SIlya Dryomov 2852f713615SIlya Dryomov /* 2862f713615SIlya Dryomov * Prepare to share the seq during handshake 2872f713615SIlya Dryomov */ 2882f713615SIlya Dryomov static void prepare_write_seq(struct ceph_connection *con) 2892f713615SIlya Dryomov { 2902f713615SIlya Dryomov dout("prepare_write_seq %p %llu -> %llu\n", con, 2912f713615SIlya Dryomov con->in_seq_acked, con->in_seq); 2922f713615SIlya Dryomov con->in_seq_acked = con->in_seq; 2932f713615SIlya Dryomov 2942f713615SIlya Dryomov con_out_kvec_reset(con); 2952f713615SIlya Dryomov 296a56dd9bfSIlya Dryomov con->v1.out_temp_ack = cpu_to_le64(con->in_seq_acked); 297a56dd9bfSIlya Dryomov con_out_kvec_add(con, sizeof(con->v1.out_temp_ack), 298a56dd9bfSIlya Dryomov &con->v1.out_temp_ack); 2992f713615SIlya Dryomov 3002f713615SIlya Dryomov ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING); 3012f713615SIlya Dryomov } 3022f713615SIlya Dryomov 3032f713615SIlya Dryomov /* 3042f713615SIlya Dryomov * Prepare to write keepalive byte. 3052f713615SIlya Dryomov */ 3062f713615SIlya Dryomov static void prepare_write_keepalive(struct ceph_connection *con) 3072f713615SIlya Dryomov { 3082f713615SIlya Dryomov dout("prepare_write_keepalive %p\n", con); 3092f713615SIlya Dryomov con_out_kvec_reset(con); 3102f713615SIlya Dryomov if (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2) { 3112f713615SIlya Dryomov struct timespec64 now; 3122f713615SIlya Dryomov 3132f713615SIlya Dryomov ktime_get_real_ts64(&now); 3142f713615SIlya Dryomov con_out_kvec_add(con, sizeof(tag_keepalive2), &tag_keepalive2); 315a56dd9bfSIlya Dryomov ceph_encode_timespec64(&con->v1.out_temp_keepalive2, &now); 316a56dd9bfSIlya Dryomov con_out_kvec_add(con, sizeof(con->v1.out_temp_keepalive2), 317a56dd9bfSIlya Dryomov &con->v1.out_temp_keepalive2); 3182f713615SIlya Dryomov } else { 3192f713615SIlya Dryomov con_out_kvec_add(con, sizeof(tag_keepalive), &tag_keepalive); 3202f713615SIlya Dryomov } 3212f713615SIlya Dryomov ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING); 3222f713615SIlya Dryomov } 3232f713615SIlya Dryomov 3242f713615SIlya Dryomov /* 3252f713615SIlya Dryomov * Connection negotiation. 3262f713615SIlya Dryomov */ 3272f713615SIlya Dryomov 3282f713615SIlya Dryomov static int get_connect_authorizer(struct ceph_connection *con) 3292f713615SIlya Dryomov { 3302f713615SIlya Dryomov struct ceph_auth_handshake *auth; 3312f713615SIlya Dryomov int auth_proto; 3322f713615SIlya Dryomov 3332f713615SIlya Dryomov if (!con->ops->get_authorizer) { 334a56dd9bfSIlya Dryomov con->v1.auth = NULL; 335a56dd9bfSIlya Dryomov con->v1.out_connect.authorizer_protocol = CEPH_AUTH_UNKNOWN; 336a56dd9bfSIlya Dryomov con->v1.out_connect.authorizer_len = 0; 3372f713615SIlya Dryomov return 0; 3382f713615SIlya Dryomov } 3392f713615SIlya Dryomov 340a56dd9bfSIlya Dryomov auth = con->ops->get_authorizer(con, &auth_proto, con->v1.auth_retry); 3412f713615SIlya Dryomov if (IS_ERR(auth)) 3422f713615SIlya Dryomov return PTR_ERR(auth); 3432f713615SIlya Dryomov 344a56dd9bfSIlya Dryomov con->v1.auth = auth; 345a56dd9bfSIlya Dryomov con->v1.out_connect.authorizer_protocol = cpu_to_le32(auth_proto); 346a56dd9bfSIlya Dryomov con->v1.out_connect.authorizer_len = 347a56dd9bfSIlya Dryomov cpu_to_le32(auth->authorizer_buf_len); 3482f713615SIlya Dryomov return 0; 3492f713615SIlya Dryomov } 3502f713615SIlya Dryomov 3512f713615SIlya Dryomov /* 3522f713615SIlya Dryomov * We connected to a peer and are saying hello. 3532f713615SIlya Dryomov */ 3542f713615SIlya Dryomov static void prepare_write_banner(struct ceph_connection *con) 3552f713615SIlya Dryomov { 3562f713615SIlya Dryomov con_out_kvec_add(con, strlen(CEPH_BANNER), CEPH_BANNER); 3572f713615SIlya Dryomov con_out_kvec_add(con, sizeof (con->msgr->my_enc_addr), 3582f713615SIlya Dryomov &con->msgr->my_enc_addr); 3592f713615SIlya Dryomov 360a56dd9bfSIlya Dryomov con->v1.out_more = 0; 3612f713615SIlya Dryomov ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING); 3622f713615SIlya Dryomov } 3632f713615SIlya Dryomov 3642f713615SIlya Dryomov static void __prepare_write_connect(struct ceph_connection *con) 3652f713615SIlya Dryomov { 366a56dd9bfSIlya Dryomov con_out_kvec_add(con, sizeof(con->v1.out_connect), 367a56dd9bfSIlya Dryomov &con->v1.out_connect); 368a56dd9bfSIlya Dryomov if (con->v1.auth) 369a56dd9bfSIlya Dryomov con_out_kvec_add(con, con->v1.auth->authorizer_buf_len, 370a56dd9bfSIlya Dryomov con->v1.auth->authorizer_buf); 3712f713615SIlya Dryomov 372a56dd9bfSIlya Dryomov con->v1.out_more = 0; 3732f713615SIlya Dryomov ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING); 3742f713615SIlya Dryomov } 3752f713615SIlya Dryomov 3762f713615SIlya Dryomov static int prepare_write_connect(struct ceph_connection *con) 3772f713615SIlya Dryomov { 3782f713615SIlya Dryomov unsigned int global_seq = ceph_get_global_seq(con->msgr, 0); 3792f713615SIlya Dryomov int proto; 3802f713615SIlya Dryomov int ret; 3812f713615SIlya Dryomov 3822f713615SIlya Dryomov switch (con->peer_name.type) { 3832f713615SIlya Dryomov case CEPH_ENTITY_TYPE_MON: 3842f713615SIlya Dryomov proto = CEPH_MONC_PROTOCOL; 3852f713615SIlya Dryomov break; 3862f713615SIlya Dryomov case CEPH_ENTITY_TYPE_OSD: 3872f713615SIlya Dryomov proto = CEPH_OSDC_PROTOCOL; 3882f713615SIlya Dryomov break; 3892f713615SIlya Dryomov case CEPH_ENTITY_TYPE_MDS: 3902f713615SIlya Dryomov proto = CEPH_MDSC_PROTOCOL; 3912f713615SIlya Dryomov break; 3922f713615SIlya Dryomov default: 3932f713615SIlya Dryomov BUG(); 3942f713615SIlya Dryomov } 3952f713615SIlya Dryomov 3962f713615SIlya Dryomov dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con, 397a56dd9bfSIlya Dryomov con->v1.connect_seq, global_seq, proto); 3982f713615SIlya Dryomov 399a56dd9bfSIlya Dryomov con->v1.out_connect.features = 4002f713615SIlya Dryomov cpu_to_le64(from_msgr(con->msgr)->supported_features); 401a56dd9bfSIlya Dryomov con->v1.out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT); 402a56dd9bfSIlya Dryomov con->v1.out_connect.connect_seq = cpu_to_le32(con->v1.connect_seq); 403a56dd9bfSIlya Dryomov con->v1.out_connect.global_seq = cpu_to_le32(global_seq); 404a56dd9bfSIlya Dryomov con->v1.out_connect.protocol_version = cpu_to_le32(proto); 405a56dd9bfSIlya Dryomov con->v1.out_connect.flags = 0; 4062f713615SIlya Dryomov 4072f713615SIlya Dryomov ret = get_connect_authorizer(con); 4082f713615SIlya Dryomov if (ret) 4092f713615SIlya Dryomov return ret; 4102f713615SIlya Dryomov 4112f713615SIlya Dryomov __prepare_write_connect(con); 4122f713615SIlya Dryomov return 0; 4132f713615SIlya Dryomov } 4142f713615SIlya Dryomov 4152f713615SIlya Dryomov /* 4162f713615SIlya Dryomov * write as much of pending kvecs to the socket as we can. 4172f713615SIlya Dryomov * 1 -> done 4182f713615SIlya Dryomov * 0 -> socket full, but more to do 4192f713615SIlya Dryomov * <0 -> error 4202f713615SIlya Dryomov */ 4212f713615SIlya Dryomov static int write_partial_kvec(struct ceph_connection *con) 4222f713615SIlya Dryomov { 4232f713615SIlya Dryomov int ret; 4242f713615SIlya Dryomov 425a56dd9bfSIlya Dryomov dout("write_partial_kvec %p %d left\n", con, con->v1.out_kvec_bytes); 426a56dd9bfSIlya Dryomov while (con->v1.out_kvec_bytes > 0) { 427a56dd9bfSIlya Dryomov ret = ceph_tcp_sendmsg(con->sock, con->v1.out_kvec_cur, 428a56dd9bfSIlya Dryomov con->v1.out_kvec_left, 429a56dd9bfSIlya Dryomov con->v1.out_kvec_bytes, 430a56dd9bfSIlya Dryomov con->v1.out_more); 4312f713615SIlya Dryomov if (ret <= 0) 4322f713615SIlya Dryomov goto out; 433a56dd9bfSIlya Dryomov con->v1.out_kvec_bytes -= ret; 434a56dd9bfSIlya Dryomov if (!con->v1.out_kvec_bytes) 4352f713615SIlya Dryomov break; /* done */ 4362f713615SIlya Dryomov 4372f713615SIlya Dryomov /* account for full iov entries consumed */ 438a56dd9bfSIlya Dryomov while (ret >= con->v1.out_kvec_cur->iov_len) { 439a56dd9bfSIlya Dryomov BUG_ON(!con->v1.out_kvec_left); 440a56dd9bfSIlya Dryomov ret -= con->v1.out_kvec_cur->iov_len; 441a56dd9bfSIlya Dryomov con->v1.out_kvec_cur++; 442a56dd9bfSIlya Dryomov con->v1.out_kvec_left--; 4432f713615SIlya Dryomov } 4442f713615SIlya Dryomov /* and for a partially-consumed entry */ 4452f713615SIlya Dryomov if (ret) { 446a56dd9bfSIlya Dryomov con->v1.out_kvec_cur->iov_len -= ret; 447a56dd9bfSIlya Dryomov con->v1.out_kvec_cur->iov_base += ret; 4482f713615SIlya Dryomov } 4492f713615SIlya Dryomov } 450a56dd9bfSIlya Dryomov con->v1.out_kvec_left = 0; 4512f713615SIlya Dryomov ret = 1; 4522f713615SIlya Dryomov out: 4532f713615SIlya Dryomov dout("write_partial_kvec %p %d left in %d kvecs ret = %d\n", con, 454a56dd9bfSIlya Dryomov con->v1.out_kvec_bytes, con->v1.out_kvec_left, ret); 4552f713615SIlya Dryomov return ret; /* done! */ 4562f713615SIlya Dryomov } 4572f713615SIlya Dryomov 4582f713615SIlya Dryomov /* 4592f713615SIlya Dryomov * Write as much message data payload as we can. If we finish, queue 4602f713615SIlya Dryomov * up the footer. 4612f713615SIlya Dryomov * 1 -> done, footer is now queued in out_kvec[]. 4622f713615SIlya Dryomov * 0 -> socket full, but more to do 4632f713615SIlya Dryomov * <0 -> error 4642f713615SIlya Dryomov */ 4652f713615SIlya Dryomov static int write_partial_message_data(struct ceph_connection *con) 4662f713615SIlya Dryomov { 4672f713615SIlya Dryomov struct ceph_msg *msg = con->out_msg; 4682f713615SIlya Dryomov struct ceph_msg_data_cursor *cursor = &msg->cursor; 4692f713615SIlya Dryomov bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC); 4702f713615SIlya Dryomov u32 crc; 4712f713615SIlya Dryomov 4722f713615SIlya Dryomov dout("%s %p msg %p\n", __func__, con, msg); 4732f713615SIlya Dryomov 4742f713615SIlya Dryomov if (!msg->num_data_items) 4752f713615SIlya Dryomov return -EINVAL; 4762f713615SIlya Dryomov 4772f713615SIlya Dryomov /* 4782f713615SIlya Dryomov * Iterate through each page that contains data to be 4792f713615SIlya Dryomov * written, and send as much as possible for each. 4802f713615SIlya Dryomov * 4812f713615SIlya Dryomov * If we are calculating the data crc (the default), we will 4822f713615SIlya Dryomov * need to map the page. If we have no pages, they have 4832f713615SIlya Dryomov * been revoked, so use the zero page. 4842f713615SIlya Dryomov */ 4852f713615SIlya Dryomov crc = do_datacrc ? le32_to_cpu(msg->footer.data_crc) : 0; 4862f713615SIlya Dryomov while (cursor->total_resid) { 4872f713615SIlya Dryomov struct page *page; 4882f713615SIlya Dryomov size_t page_offset; 4892f713615SIlya Dryomov size_t length; 4902f713615SIlya Dryomov int ret; 4912f713615SIlya Dryomov 4922f713615SIlya Dryomov if (!cursor->resid) { 4932f713615SIlya Dryomov ceph_msg_data_advance(cursor, 0); 4942f713615SIlya Dryomov continue; 4952f713615SIlya Dryomov } 4962f713615SIlya Dryomov 497da4ab869SJeff Layton page = ceph_msg_data_next(cursor, &page_offset, &length); 4985da4d7b8SDavid Howells ret = ceph_tcp_sendpage(con->sock, page, page_offset, length, 4995da4d7b8SDavid Howells MSG_MORE); 5002f713615SIlya Dryomov if (ret <= 0) { 5012f713615SIlya Dryomov if (do_datacrc) 5022f713615SIlya Dryomov msg->footer.data_crc = cpu_to_le32(crc); 5032f713615SIlya Dryomov 5042f713615SIlya Dryomov return ret; 5052f713615SIlya Dryomov } 5062f713615SIlya Dryomov if (do_datacrc && cursor->need_crc) 5072f713615SIlya Dryomov crc = ceph_crc32c_page(crc, page, page_offset, length); 5082f713615SIlya Dryomov ceph_msg_data_advance(cursor, (size_t)ret); 5092f713615SIlya Dryomov } 5102f713615SIlya Dryomov 5112f713615SIlya Dryomov dout("%s %p msg %p done\n", __func__, con, msg); 5122f713615SIlya Dryomov 5132f713615SIlya Dryomov /* prepare and queue up footer, too */ 5142f713615SIlya Dryomov if (do_datacrc) 5152f713615SIlya Dryomov msg->footer.data_crc = cpu_to_le32(crc); 5162f713615SIlya Dryomov else 5172f713615SIlya Dryomov msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC; 5182f713615SIlya Dryomov con_out_kvec_reset(con); 5192f713615SIlya Dryomov prepare_write_message_footer(con); 5202f713615SIlya Dryomov 5212f713615SIlya Dryomov return 1; /* must return > 0 to indicate success */ 5222f713615SIlya Dryomov } 5232f713615SIlya Dryomov 5242f713615SIlya Dryomov /* 5252f713615SIlya Dryomov * write some zeros 5262f713615SIlya Dryomov */ 5272f713615SIlya Dryomov static int write_partial_skip(struct ceph_connection *con) 5282f713615SIlya Dryomov { 5292f713615SIlya Dryomov int ret; 5302f713615SIlya Dryomov 531a56dd9bfSIlya Dryomov dout("%s %p %d left\n", __func__, con, con->v1.out_skip); 532a56dd9bfSIlya Dryomov while (con->v1.out_skip > 0) { 533a56dd9bfSIlya Dryomov size_t size = min(con->v1.out_skip, (int)PAGE_SIZE); 5342f713615SIlya Dryomov 5355da4d7b8SDavid Howells ret = ceph_tcp_sendpage(con->sock, ceph_zero_page, 0, size, 5365da4d7b8SDavid Howells MSG_MORE); 5372f713615SIlya Dryomov if (ret <= 0) 5382f713615SIlya Dryomov goto out; 539a56dd9bfSIlya Dryomov con->v1.out_skip -= ret; 5402f713615SIlya Dryomov } 5412f713615SIlya Dryomov ret = 1; 5422f713615SIlya Dryomov out: 5432f713615SIlya Dryomov return ret; 5442f713615SIlya Dryomov } 5452f713615SIlya Dryomov 5462f713615SIlya Dryomov /* 5472f713615SIlya Dryomov * Prepare to read connection handshake, or an ack. 5482f713615SIlya Dryomov */ 5492f713615SIlya Dryomov static void prepare_read_banner(struct ceph_connection *con) 5502f713615SIlya Dryomov { 5512f713615SIlya Dryomov dout("prepare_read_banner %p\n", con); 552a56dd9bfSIlya Dryomov con->v1.in_base_pos = 0; 5532f713615SIlya Dryomov } 5542f713615SIlya Dryomov 5552f713615SIlya Dryomov static void prepare_read_connect(struct ceph_connection *con) 5562f713615SIlya Dryomov { 5572f713615SIlya Dryomov dout("prepare_read_connect %p\n", con); 558a56dd9bfSIlya Dryomov con->v1.in_base_pos = 0; 5592f713615SIlya Dryomov } 5602f713615SIlya Dryomov 5612f713615SIlya Dryomov static void prepare_read_ack(struct ceph_connection *con) 5622f713615SIlya Dryomov { 5632f713615SIlya Dryomov dout("prepare_read_ack %p\n", con); 564a56dd9bfSIlya Dryomov con->v1.in_base_pos = 0; 5652f713615SIlya Dryomov } 5662f713615SIlya Dryomov 5672f713615SIlya Dryomov static void prepare_read_seq(struct ceph_connection *con) 5682f713615SIlya Dryomov { 5692f713615SIlya Dryomov dout("prepare_read_seq %p\n", con); 570a56dd9bfSIlya Dryomov con->v1.in_base_pos = 0; 571a56dd9bfSIlya Dryomov con->v1.in_tag = CEPH_MSGR_TAG_SEQ; 5722f713615SIlya Dryomov } 5732f713615SIlya Dryomov 5742f713615SIlya Dryomov static void prepare_read_tag(struct ceph_connection *con) 5752f713615SIlya Dryomov { 5762f713615SIlya Dryomov dout("prepare_read_tag %p\n", con); 577a56dd9bfSIlya Dryomov con->v1.in_base_pos = 0; 578a56dd9bfSIlya Dryomov con->v1.in_tag = CEPH_MSGR_TAG_READY; 5792f713615SIlya Dryomov } 5802f713615SIlya Dryomov 5812f713615SIlya Dryomov static void prepare_read_keepalive_ack(struct ceph_connection *con) 5822f713615SIlya Dryomov { 5832f713615SIlya Dryomov dout("prepare_read_keepalive_ack %p\n", con); 584a56dd9bfSIlya Dryomov con->v1.in_base_pos = 0; 5852f713615SIlya Dryomov } 5862f713615SIlya Dryomov 5872f713615SIlya Dryomov /* 5882f713615SIlya Dryomov * Prepare to read a message. 5892f713615SIlya Dryomov */ 5902f713615SIlya Dryomov static int prepare_read_message(struct ceph_connection *con) 5912f713615SIlya Dryomov { 5922f713615SIlya Dryomov dout("prepare_read_message %p\n", con); 5932f713615SIlya Dryomov BUG_ON(con->in_msg != NULL); 594a56dd9bfSIlya Dryomov con->v1.in_base_pos = 0; 5952f713615SIlya Dryomov con->in_front_crc = con->in_middle_crc = con->in_data_crc = 0; 5962f713615SIlya Dryomov return 0; 5972f713615SIlya Dryomov } 5982f713615SIlya Dryomov 5992f713615SIlya Dryomov static int read_partial(struct ceph_connection *con, 6002f713615SIlya Dryomov int end, int size, void *object) 6012f713615SIlya Dryomov { 602a56dd9bfSIlya Dryomov while (con->v1.in_base_pos < end) { 603a56dd9bfSIlya Dryomov int left = end - con->v1.in_base_pos; 6042f713615SIlya Dryomov int have = size - left; 6052f713615SIlya Dryomov int ret = ceph_tcp_recvmsg(con->sock, object + have, left); 6062f713615SIlya Dryomov if (ret <= 0) 6072f713615SIlya Dryomov return ret; 608a56dd9bfSIlya Dryomov con->v1.in_base_pos += ret; 6092f713615SIlya Dryomov } 6102f713615SIlya Dryomov return 1; 6112f713615SIlya Dryomov } 6122f713615SIlya Dryomov 6132f713615SIlya Dryomov /* 6142f713615SIlya Dryomov * Read all or part of the connect-side handshake on a new connection 6152f713615SIlya Dryomov */ 6162f713615SIlya Dryomov static int read_partial_banner(struct ceph_connection *con) 6172f713615SIlya Dryomov { 6182f713615SIlya Dryomov int size; 6192f713615SIlya Dryomov int end; 6202f713615SIlya Dryomov int ret; 6212f713615SIlya Dryomov 622a56dd9bfSIlya Dryomov dout("read_partial_banner %p at %d\n", con, con->v1.in_base_pos); 6232f713615SIlya Dryomov 6242f713615SIlya Dryomov /* peer's banner */ 6252f713615SIlya Dryomov size = strlen(CEPH_BANNER); 6262f713615SIlya Dryomov end = size; 627a56dd9bfSIlya Dryomov ret = read_partial(con, end, size, con->v1.in_banner); 6282f713615SIlya Dryomov if (ret <= 0) 6292f713615SIlya Dryomov goto out; 6302f713615SIlya Dryomov 631a56dd9bfSIlya Dryomov size = sizeof(con->v1.actual_peer_addr); 6322f713615SIlya Dryomov end += size; 633a56dd9bfSIlya Dryomov ret = read_partial(con, end, size, &con->v1.actual_peer_addr); 6342f713615SIlya Dryomov if (ret <= 0) 6352f713615SIlya Dryomov goto out; 636a56dd9bfSIlya Dryomov ceph_decode_banner_addr(&con->v1.actual_peer_addr); 6372f713615SIlya Dryomov 638a56dd9bfSIlya Dryomov size = sizeof(con->v1.peer_addr_for_me); 6392f713615SIlya Dryomov end += size; 640a56dd9bfSIlya Dryomov ret = read_partial(con, end, size, &con->v1.peer_addr_for_me); 6412f713615SIlya Dryomov if (ret <= 0) 6422f713615SIlya Dryomov goto out; 643a56dd9bfSIlya Dryomov ceph_decode_banner_addr(&con->v1.peer_addr_for_me); 6442f713615SIlya Dryomov 6452f713615SIlya Dryomov out: 6462f713615SIlya Dryomov return ret; 6472f713615SIlya Dryomov } 6482f713615SIlya Dryomov 6492f713615SIlya Dryomov static int read_partial_connect(struct ceph_connection *con) 6502f713615SIlya Dryomov { 6512f713615SIlya Dryomov int size; 6522f713615SIlya Dryomov int end; 6532f713615SIlya Dryomov int ret; 6542f713615SIlya Dryomov 655a56dd9bfSIlya Dryomov dout("read_partial_connect %p at %d\n", con, con->v1.in_base_pos); 6562f713615SIlya Dryomov 657a56dd9bfSIlya Dryomov size = sizeof(con->v1.in_reply); 6582f713615SIlya Dryomov end = size; 659a56dd9bfSIlya Dryomov ret = read_partial(con, end, size, &con->v1.in_reply); 6602f713615SIlya Dryomov if (ret <= 0) 6612f713615SIlya Dryomov goto out; 6622f713615SIlya Dryomov 663a56dd9bfSIlya Dryomov if (con->v1.auth) { 664a56dd9bfSIlya Dryomov size = le32_to_cpu(con->v1.in_reply.authorizer_len); 665a56dd9bfSIlya Dryomov if (size > con->v1.auth->authorizer_reply_buf_len) { 6662f713615SIlya Dryomov pr_err("authorizer reply too big: %d > %zu\n", size, 667a56dd9bfSIlya Dryomov con->v1.auth->authorizer_reply_buf_len); 6682f713615SIlya Dryomov ret = -EINVAL; 6692f713615SIlya Dryomov goto out; 6702f713615SIlya Dryomov } 6712f713615SIlya Dryomov 6722f713615SIlya Dryomov end += size; 6732f713615SIlya Dryomov ret = read_partial(con, end, size, 674a56dd9bfSIlya Dryomov con->v1.auth->authorizer_reply_buf); 6752f713615SIlya Dryomov if (ret <= 0) 6762f713615SIlya Dryomov goto out; 6772f713615SIlya Dryomov } 6782f713615SIlya Dryomov 6792f713615SIlya Dryomov dout("read_partial_connect %p tag %d, con_seq = %u, g_seq = %u\n", 680a56dd9bfSIlya Dryomov con, con->v1.in_reply.tag, 681a56dd9bfSIlya Dryomov le32_to_cpu(con->v1.in_reply.connect_seq), 682a56dd9bfSIlya Dryomov le32_to_cpu(con->v1.in_reply.global_seq)); 6832f713615SIlya Dryomov out: 6842f713615SIlya Dryomov return ret; 6852f713615SIlya Dryomov } 6862f713615SIlya Dryomov 6872f713615SIlya Dryomov /* 6882f713615SIlya Dryomov * Verify the hello banner looks okay. 6892f713615SIlya Dryomov */ 6902f713615SIlya Dryomov static int verify_hello(struct ceph_connection *con) 6912f713615SIlya Dryomov { 692a56dd9bfSIlya Dryomov if (memcmp(con->v1.in_banner, CEPH_BANNER, strlen(CEPH_BANNER))) { 6932f713615SIlya Dryomov pr_err("connect to %s got bad banner\n", 6942f713615SIlya Dryomov ceph_pr_addr(&con->peer_addr)); 6952f713615SIlya Dryomov con->error_msg = "protocol error, bad banner"; 6962f713615SIlya Dryomov return -1; 6972f713615SIlya Dryomov } 6982f713615SIlya Dryomov return 0; 6992f713615SIlya Dryomov } 7002f713615SIlya Dryomov 7012f713615SIlya Dryomov static int process_banner(struct ceph_connection *con) 7022f713615SIlya Dryomov { 7032f713615SIlya Dryomov struct ceph_entity_addr *my_addr = &con->msgr->inst.addr; 7042f713615SIlya Dryomov 7052f713615SIlya Dryomov dout("process_banner on %p\n", con); 7062f713615SIlya Dryomov 7072f713615SIlya Dryomov if (verify_hello(con) < 0) 7082f713615SIlya Dryomov return -1; 7092f713615SIlya Dryomov 7102f713615SIlya Dryomov /* 7112f713615SIlya Dryomov * Make sure the other end is who we wanted. note that the other 7122f713615SIlya Dryomov * end may not yet know their ip address, so if it's 0.0.0.0, give 7132f713615SIlya Dryomov * them the benefit of the doubt. 7142f713615SIlya Dryomov */ 715a56dd9bfSIlya Dryomov if (memcmp(&con->peer_addr, &con->v1.actual_peer_addr, 7162f713615SIlya Dryomov sizeof(con->peer_addr)) != 0 && 717a56dd9bfSIlya Dryomov !(ceph_addr_is_blank(&con->v1.actual_peer_addr) && 718a56dd9bfSIlya Dryomov con->v1.actual_peer_addr.nonce == con->peer_addr.nonce)) { 7192f713615SIlya Dryomov pr_warn("wrong peer, want %s/%u, got %s/%u\n", 7202f713615SIlya Dryomov ceph_pr_addr(&con->peer_addr), 7212f713615SIlya Dryomov le32_to_cpu(con->peer_addr.nonce), 722a56dd9bfSIlya Dryomov ceph_pr_addr(&con->v1.actual_peer_addr), 723a56dd9bfSIlya Dryomov le32_to_cpu(con->v1.actual_peer_addr.nonce)); 7242f713615SIlya Dryomov con->error_msg = "wrong peer at address"; 7252f713615SIlya Dryomov return -1; 7262f713615SIlya Dryomov } 7272f713615SIlya Dryomov 7282f713615SIlya Dryomov /* 7292f713615SIlya Dryomov * did we learn our address? 7302f713615SIlya Dryomov */ 7312f713615SIlya Dryomov if (ceph_addr_is_blank(my_addr)) { 7322f713615SIlya Dryomov memcpy(&my_addr->in_addr, 733a56dd9bfSIlya Dryomov &con->v1.peer_addr_for_me.in_addr, 734a56dd9bfSIlya Dryomov sizeof(con->v1.peer_addr_for_me.in_addr)); 7352f713615SIlya Dryomov ceph_addr_set_port(my_addr, 0); 7362f713615SIlya Dryomov ceph_encode_my_addr(con->msgr); 7372f713615SIlya Dryomov dout("process_banner learned my addr is %s\n", 7382f713615SIlya Dryomov ceph_pr_addr(my_addr)); 7392f713615SIlya Dryomov } 7402f713615SIlya Dryomov 7412f713615SIlya Dryomov return 0; 7422f713615SIlya Dryomov } 7432f713615SIlya Dryomov 7442f713615SIlya Dryomov static int process_connect(struct ceph_connection *con) 7452f713615SIlya Dryomov { 7462f713615SIlya Dryomov u64 sup_feat = from_msgr(con->msgr)->supported_features; 7472f713615SIlya Dryomov u64 req_feat = from_msgr(con->msgr)->required_features; 748a56dd9bfSIlya Dryomov u64 server_feat = le64_to_cpu(con->v1.in_reply.features); 7492f713615SIlya Dryomov int ret; 7502f713615SIlya Dryomov 751a56dd9bfSIlya Dryomov dout("process_connect on %p tag %d\n", con, con->v1.in_tag); 7522f713615SIlya Dryomov 753a56dd9bfSIlya Dryomov if (con->v1.auth) { 754a56dd9bfSIlya Dryomov int len = le32_to_cpu(con->v1.in_reply.authorizer_len); 7552f713615SIlya Dryomov 7562f713615SIlya Dryomov /* 7572f713615SIlya Dryomov * Any connection that defines ->get_authorizer() 7582f713615SIlya Dryomov * should also define ->add_authorizer_challenge() and 7592f713615SIlya Dryomov * ->verify_authorizer_reply(). 7602f713615SIlya Dryomov * 7612f713615SIlya Dryomov * See get_connect_authorizer(). 7622f713615SIlya Dryomov */ 763a56dd9bfSIlya Dryomov if (con->v1.in_reply.tag == 764a56dd9bfSIlya Dryomov CEPH_MSGR_TAG_CHALLENGE_AUTHORIZER) { 7652f713615SIlya Dryomov ret = con->ops->add_authorizer_challenge( 766a56dd9bfSIlya Dryomov con, con->v1.auth->authorizer_reply_buf, len); 7672f713615SIlya Dryomov if (ret < 0) 7682f713615SIlya Dryomov return ret; 7692f713615SIlya Dryomov 7702f713615SIlya Dryomov con_out_kvec_reset(con); 7712f713615SIlya Dryomov __prepare_write_connect(con); 7722f713615SIlya Dryomov prepare_read_connect(con); 7732f713615SIlya Dryomov return 0; 7742f713615SIlya Dryomov } 7752f713615SIlya Dryomov 7762f713615SIlya Dryomov if (len) { 7772f713615SIlya Dryomov ret = con->ops->verify_authorizer_reply(con); 7782f713615SIlya Dryomov if (ret < 0) { 7792f713615SIlya Dryomov con->error_msg = "bad authorize reply"; 7802f713615SIlya Dryomov return ret; 7812f713615SIlya Dryomov } 7822f713615SIlya Dryomov } 7832f713615SIlya Dryomov } 7842f713615SIlya Dryomov 785a56dd9bfSIlya Dryomov switch (con->v1.in_reply.tag) { 7862f713615SIlya Dryomov case CEPH_MSGR_TAG_FEATURES: 7872f713615SIlya Dryomov pr_err("%s%lld %s feature set mismatch," 7882f713615SIlya Dryomov " my %llx < server's %llx, missing %llx\n", 7892f713615SIlya Dryomov ENTITY_NAME(con->peer_name), 7902f713615SIlya Dryomov ceph_pr_addr(&con->peer_addr), 7912f713615SIlya Dryomov sup_feat, server_feat, server_feat & ~sup_feat); 7922f713615SIlya Dryomov con->error_msg = "missing required protocol features"; 7932f713615SIlya Dryomov return -1; 7942f713615SIlya Dryomov 7952f713615SIlya Dryomov case CEPH_MSGR_TAG_BADPROTOVER: 7962f713615SIlya Dryomov pr_err("%s%lld %s protocol version mismatch," 7972f713615SIlya Dryomov " my %d != server's %d\n", 7982f713615SIlya Dryomov ENTITY_NAME(con->peer_name), 7992f713615SIlya Dryomov ceph_pr_addr(&con->peer_addr), 800a56dd9bfSIlya Dryomov le32_to_cpu(con->v1.out_connect.protocol_version), 801a56dd9bfSIlya Dryomov le32_to_cpu(con->v1.in_reply.protocol_version)); 8022f713615SIlya Dryomov con->error_msg = "protocol version mismatch"; 8032f713615SIlya Dryomov return -1; 8042f713615SIlya Dryomov 8052f713615SIlya Dryomov case CEPH_MSGR_TAG_BADAUTHORIZER: 806a56dd9bfSIlya Dryomov con->v1.auth_retry++; 8072f713615SIlya Dryomov dout("process_connect %p got BADAUTHORIZER attempt %d\n", con, 808a56dd9bfSIlya Dryomov con->v1.auth_retry); 809a56dd9bfSIlya Dryomov if (con->v1.auth_retry == 2) { 8102f713615SIlya Dryomov con->error_msg = "connect authorization failure"; 8112f713615SIlya Dryomov return -1; 8122f713615SIlya Dryomov } 8132f713615SIlya Dryomov con_out_kvec_reset(con); 8142f713615SIlya Dryomov ret = prepare_write_connect(con); 8152f713615SIlya Dryomov if (ret < 0) 8162f713615SIlya Dryomov return ret; 8172f713615SIlya Dryomov prepare_read_connect(con); 8182f713615SIlya Dryomov break; 8192f713615SIlya Dryomov 8202f713615SIlya Dryomov case CEPH_MSGR_TAG_RESETSESSION: 8212f713615SIlya Dryomov /* 8222f713615SIlya Dryomov * If we connected with a large connect_seq but the peer 8232f713615SIlya Dryomov * has no record of a session with us (no connection, or 8242f713615SIlya Dryomov * connect_seq == 0), they will send RESETSESION to indicate 8252f713615SIlya Dryomov * that they must have reset their session, and may have 8262f713615SIlya Dryomov * dropped messages. 8272f713615SIlya Dryomov */ 8282f713615SIlya Dryomov dout("process_connect got RESET peer seq %u\n", 829a56dd9bfSIlya Dryomov le32_to_cpu(con->v1.in_reply.connect_seq)); 8302f713615SIlya Dryomov pr_info("%s%lld %s session reset\n", 8312f713615SIlya Dryomov ENTITY_NAME(con->peer_name), 8322f713615SIlya Dryomov ceph_pr_addr(&con->peer_addr)); 8332f713615SIlya Dryomov ceph_con_reset_session(con); 8342f713615SIlya Dryomov con_out_kvec_reset(con); 8352f713615SIlya Dryomov ret = prepare_write_connect(con); 8362f713615SIlya Dryomov if (ret < 0) 8372f713615SIlya Dryomov return ret; 8382f713615SIlya Dryomov prepare_read_connect(con); 8392f713615SIlya Dryomov 8402f713615SIlya Dryomov /* Tell ceph about it. */ 8412f713615SIlya Dryomov mutex_unlock(&con->mutex); 8422f713615SIlya Dryomov if (con->ops->peer_reset) 8432f713615SIlya Dryomov con->ops->peer_reset(con); 8442f713615SIlya Dryomov mutex_lock(&con->mutex); 8452f713615SIlya Dryomov if (con->state != CEPH_CON_S_V1_CONNECT_MSG) 8462f713615SIlya Dryomov return -EAGAIN; 8472f713615SIlya Dryomov break; 8482f713615SIlya Dryomov 8492f713615SIlya Dryomov case CEPH_MSGR_TAG_RETRY_SESSION: 8502f713615SIlya Dryomov /* 8512f713615SIlya Dryomov * If we sent a smaller connect_seq than the peer has, try 8522f713615SIlya Dryomov * again with a larger value. 8532f713615SIlya Dryomov */ 8542f713615SIlya Dryomov dout("process_connect got RETRY_SESSION my seq %u, peer %u\n", 855a56dd9bfSIlya Dryomov le32_to_cpu(con->v1.out_connect.connect_seq), 856a56dd9bfSIlya Dryomov le32_to_cpu(con->v1.in_reply.connect_seq)); 857a56dd9bfSIlya Dryomov con->v1.connect_seq = le32_to_cpu(con->v1.in_reply.connect_seq); 8582f713615SIlya Dryomov con_out_kvec_reset(con); 8592f713615SIlya Dryomov ret = prepare_write_connect(con); 8602f713615SIlya Dryomov if (ret < 0) 8612f713615SIlya Dryomov return ret; 8622f713615SIlya Dryomov prepare_read_connect(con); 8632f713615SIlya Dryomov break; 8642f713615SIlya Dryomov 8652f713615SIlya Dryomov case CEPH_MSGR_TAG_RETRY_GLOBAL: 8662f713615SIlya Dryomov /* 8672f713615SIlya Dryomov * If we sent a smaller global_seq than the peer has, try 8682f713615SIlya Dryomov * again with a larger value. 8692f713615SIlya Dryomov */ 8702f713615SIlya Dryomov dout("process_connect got RETRY_GLOBAL my %u peer_gseq %u\n", 871a56dd9bfSIlya Dryomov con->v1.peer_global_seq, 872a56dd9bfSIlya Dryomov le32_to_cpu(con->v1.in_reply.global_seq)); 8732f713615SIlya Dryomov ceph_get_global_seq(con->msgr, 874a56dd9bfSIlya Dryomov le32_to_cpu(con->v1.in_reply.global_seq)); 8752f713615SIlya Dryomov con_out_kvec_reset(con); 8762f713615SIlya Dryomov ret = prepare_write_connect(con); 8772f713615SIlya Dryomov if (ret < 0) 8782f713615SIlya Dryomov return ret; 8792f713615SIlya Dryomov prepare_read_connect(con); 8802f713615SIlya Dryomov break; 8812f713615SIlya Dryomov 8822f713615SIlya Dryomov case CEPH_MSGR_TAG_SEQ: 8832f713615SIlya Dryomov case CEPH_MSGR_TAG_READY: 8842f713615SIlya Dryomov if (req_feat & ~server_feat) { 8852f713615SIlya Dryomov pr_err("%s%lld %s protocol feature mismatch," 8862f713615SIlya Dryomov " my required %llx > server's %llx, need %llx\n", 8872f713615SIlya Dryomov ENTITY_NAME(con->peer_name), 8882f713615SIlya Dryomov ceph_pr_addr(&con->peer_addr), 8892f713615SIlya Dryomov req_feat, server_feat, req_feat & ~server_feat); 8902f713615SIlya Dryomov con->error_msg = "missing required protocol features"; 8912f713615SIlya Dryomov return -1; 8922f713615SIlya Dryomov } 8932f713615SIlya Dryomov 8942f713615SIlya Dryomov WARN_ON(con->state != CEPH_CON_S_V1_CONNECT_MSG); 8952f713615SIlya Dryomov con->state = CEPH_CON_S_OPEN; 896a56dd9bfSIlya Dryomov con->v1.auth_retry = 0; /* we authenticated; clear flag */ 897a56dd9bfSIlya Dryomov con->v1.peer_global_seq = 898a56dd9bfSIlya Dryomov le32_to_cpu(con->v1.in_reply.global_seq); 899a56dd9bfSIlya Dryomov con->v1.connect_seq++; 9002f713615SIlya Dryomov con->peer_features = server_feat; 9012f713615SIlya Dryomov dout("process_connect got READY gseq %d cseq %d (%d)\n", 902a56dd9bfSIlya Dryomov con->v1.peer_global_seq, 903a56dd9bfSIlya Dryomov le32_to_cpu(con->v1.in_reply.connect_seq), 904a56dd9bfSIlya Dryomov con->v1.connect_seq); 905a56dd9bfSIlya Dryomov WARN_ON(con->v1.connect_seq != 906a56dd9bfSIlya Dryomov le32_to_cpu(con->v1.in_reply.connect_seq)); 9072f713615SIlya Dryomov 908a56dd9bfSIlya Dryomov if (con->v1.in_reply.flags & CEPH_MSG_CONNECT_LOSSY) 9092f713615SIlya Dryomov ceph_con_flag_set(con, CEPH_CON_F_LOSSYTX); 9102f713615SIlya Dryomov 9112f713615SIlya Dryomov con->delay = 0; /* reset backoff memory */ 9122f713615SIlya Dryomov 913a56dd9bfSIlya Dryomov if (con->v1.in_reply.tag == CEPH_MSGR_TAG_SEQ) { 9142f713615SIlya Dryomov prepare_write_seq(con); 9152f713615SIlya Dryomov prepare_read_seq(con); 9162f713615SIlya Dryomov } else { 9172f713615SIlya Dryomov prepare_read_tag(con); 9182f713615SIlya Dryomov } 9192f713615SIlya Dryomov break; 9202f713615SIlya Dryomov 9212f713615SIlya Dryomov case CEPH_MSGR_TAG_WAIT: 9222f713615SIlya Dryomov /* 9232f713615SIlya Dryomov * If there is a connection race (we are opening 9242f713615SIlya Dryomov * connections to each other), one of us may just have 9252f713615SIlya Dryomov * to WAIT. This shouldn't happen if we are the 9262f713615SIlya Dryomov * client. 9272f713615SIlya Dryomov */ 9282f713615SIlya Dryomov con->error_msg = "protocol error, got WAIT as client"; 9292f713615SIlya Dryomov return -1; 9302f713615SIlya Dryomov 9312f713615SIlya Dryomov default: 9322f713615SIlya Dryomov con->error_msg = "protocol error, garbage tag during connect"; 9332f713615SIlya Dryomov return -1; 9342f713615SIlya Dryomov } 9352f713615SIlya Dryomov return 0; 9362f713615SIlya Dryomov } 9372f713615SIlya Dryomov 9382f713615SIlya Dryomov /* 9392f713615SIlya Dryomov * read (part of) an ack 9402f713615SIlya Dryomov */ 9412f713615SIlya Dryomov static int read_partial_ack(struct ceph_connection *con) 9422f713615SIlya Dryomov { 943a56dd9bfSIlya Dryomov int size = sizeof(con->v1.in_temp_ack); 9442f713615SIlya Dryomov int end = size; 9452f713615SIlya Dryomov 946a56dd9bfSIlya Dryomov return read_partial(con, end, size, &con->v1.in_temp_ack); 9472f713615SIlya Dryomov } 9482f713615SIlya Dryomov 9492f713615SIlya Dryomov /* 9502f713615SIlya Dryomov * We can finally discard anything that's been acked. 9512f713615SIlya Dryomov */ 9522f713615SIlya Dryomov static void process_ack(struct ceph_connection *con) 9532f713615SIlya Dryomov { 954a56dd9bfSIlya Dryomov u64 ack = le64_to_cpu(con->v1.in_temp_ack); 9552f713615SIlya Dryomov 956a56dd9bfSIlya Dryomov if (con->v1.in_tag == CEPH_MSGR_TAG_ACK) 9572f713615SIlya Dryomov ceph_con_discard_sent(con, ack); 9582f713615SIlya Dryomov else 9592f713615SIlya Dryomov ceph_con_discard_requeued(con, ack); 9602f713615SIlya Dryomov 9612f713615SIlya Dryomov prepare_read_tag(con); 9622f713615SIlya Dryomov } 9632f713615SIlya Dryomov 964d396f89dSJeff Layton static int read_partial_message_chunk(struct ceph_connection *con, 9652f713615SIlya Dryomov struct kvec *section, 9662f713615SIlya Dryomov unsigned int sec_len, u32 *crc) 9672f713615SIlya Dryomov { 9682f713615SIlya Dryomov int ret, left; 9692f713615SIlya Dryomov 9702f713615SIlya Dryomov BUG_ON(!section); 9712f713615SIlya Dryomov 9722f713615SIlya Dryomov while (section->iov_len < sec_len) { 9732f713615SIlya Dryomov BUG_ON(section->iov_base == NULL); 9742f713615SIlya Dryomov left = sec_len - section->iov_len; 9752f713615SIlya Dryomov ret = ceph_tcp_recvmsg(con->sock, (char *)section->iov_base + 9762f713615SIlya Dryomov section->iov_len, left); 9772f713615SIlya Dryomov if (ret <= 0) 9782f713615SIlya Dryomov return ret; 9792f713615SIlya Dryomov section->iov_len += ret; 9802f713615SIlya Dryomov } 9812f713615SIlya Dryomov if (section->iov_len == sec_len) 982d396f89dSJeff Layton *crc = crc32c(*crc, section->iov_base, section->iov_len); 9832f713615SIlya Dryomov 9842f713615SIlya Dryomov return 1; 9852f713615SIlya Dryomov } 9862f713615SIlya Dryomov 987d396f89dSJeff Layton static inline int read_partial_message_section(struct ceph_connection *con, 988d396f89dSJeff Layton struct kvec *section, 989d396f89dSJeff Layton unsigned int sec_len, u32 *crc) 990d396f89dSJeff Layton { 991d396f89dSJeff Layton *crc = 0; 992d396f89dSJeff Layton return read_partial_message_chunk(con, section, sec_len, crc); 993d396f89dSJeff Layton } 994d396f89dSJeff Layton 995ee97302fSXiubo Li static int read_partial_sparse_msg_extent(struct ceph_connection *con, u32 *crc) 996d396f89dSJeff Layton { 997d396f89dSJeff Layton struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor; 998d396f89dSJeff Layton bool do_bounce = ceph_test_opt(from_msgr(con->msgr), RXBOUNCE); 999d396f89dSJeff Layton 1000d396f89dSJeff Layton if (do_bounce && unlikely(!con->bounce_page)) { 1001d396f89dSJeff Layton con->bounce_page = alloc_page(GFP_NOIO); 1002d396f89dSJeff Layton if (!con->bounce_page) { 1003d396f89dSJeff Layton pr_err("failed to allocate bounce page\n"); 1004d396f89dSJeff Layton return -ENOMEM; 1005d396f89dSJeff Layton } 1006d396f89dSJeff Layton } 1007d396f89dSJeff Layton 1008d396f89dSJeff Layton while (cursor->sr_resid > 0) { 1009d396f89dSJeff Layton struct page *page, *rpage; 1010d396f89dSJeff Layton size_t off, len; 1011d396f89dSJeff Layton int ret; 1012d396f89dSJeff Layton 1013d396f89dSJeff Layton page = ceph_msg_data_next(cursor, &off, &len); 1014d396f89dSJeff Layton rpage = do_bounce ? con->bounce_page : page; 1015d396f89dSJeff Layton 1016d396f89dSJeff Layton /* clamp to what remains in extent */ 1017d396f89dSJeff Layton len = min_t(int, len, cursor->sr_resid); 1018d396f89dSJeff Layton ret = ceph_tcp_recvpage(con->sock, rpage, (int)off, len); 1019d396f89dSJeff Layton if (ret <= 0) 1020d396f89dSJeff Layton return ret; 1021d396f89dSJeff Layton *crc = ceph_crc32c_page(*crc, rpage, off, ret); 1022d396f89dSJeff Layton ceph_msg_data_advance(cursor, (size_t)ret); 1023d396f89dSJeff Layton cursor->sr_resid -= ret; 1024d396f89dSJeff Layton if (do_bounce) 1025d396f89dSJeff Layton memcpy_page(page, off, rpage, off, ret); 1026d396f89dSJeff Layton } 1027d396f89dSJeff Layton return 1; 1028d396f89dSJeff Layton } 1029d396f89dSJeff Layton 1030ee97302fSXiubo Li static int read_partial_sparse_msg_data(struct ceph_connection *con) 1031d396f89dSJeff Layton { 1032d396f89dSJeff Layton struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor; 1033d396f89dSJeff Layton bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC); 1034d396f89dSJeff Layton u32 crc = 0; 1035d396f89dSJeff Layton int ret = 1; 1036d396f89dSJeff Layton 1037d396f89dSJeff Layton if (do_datacrc) 1038d396f89dSJeff Layton crc = con->in_data_crc; 1039d396f89dSJeff Layton 1040*8e46a2d0SXiubo Li while (cursor->total_resid) { 1041d396f89dSJeff Layton if (con->v1.in_sr_kvec.iov_base) 1042d396f89dSJeff Layton ret = read_partial_message_chunk(con, 1043d396f89dSJeff Layton &con->v1.in_sr_kvec, 1044d396f89dSJeff Layton con->v1.in_sr_len, 1045d396f89dSJeff Layton &crc); 1046d396f89dSJeff Layton else if (cursor->sr_resid > 0) 1047ee97302fSXiubo Li ret = read_partial_sparse_msg_extent(con, &crc); 1048*8e46a2d0SXiubo Li if (ret <= 0) 1049*8e46a2d0SXiubo Li break; 1050d396f89dSJeff Layton 1051d396f89dSJeff Layton memset(&con->v1.in_sr_kvec, 0, sizeof(con->v1.in_sr_kvec)); 1052d396f89dSJeff Layton ret = con->ops->sparse_read(con, cursor, 1053d396f89dSJeff Layton (char **)&con->v1.in_sr_kvec.iov_base); 1054*8e46a2d0SXiubo Li if (ret <= 0) { 1055*8e46a2d0SXiubo Li ret = ret ? ret : 1; /* must return > 0 to indicate success */ 1056*8e46a2d0SXiubo Li break; 1057*8e46a2d0SXiubo Li } 1058d396f89dSJeff Layton con->v1.in_sr_len = ret; 1059*8e46a2d0SXiubo Li } 1060d396f89dSJeff Layton 1061d396f89dSJeff Layton if (do_datacrc) 1062d396f89dSJeff Layton con->in_data_crc = crc; 1063d396f89dSJeff Layton 1064*8e46a2d0SXiubo Li return ret; 1065d396f89dSJeff Layton } 1066d396f89dSJeff Layton 10672f713615SIlya Dryomov static int read_partial_msg_data(struct ceph_connection *con) 10682f713615SIlya Dryomov { 1069038b8d1dSIlya Dryomov struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor; 10702f713615SIlya Dryomov bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC); 10712f713615SIlya Dryomov struct page *page; 10722f713615SIlya Dryomov size_t page_offset; 10732f713615SIlya Dryomov size_t length; 10742f713615SIlya Dryomov u32 crc = 0; 10752f713615SIlya Dryomov int ret; 10762f713615SIlya Dryomov 10772f713615SIlya Dryomov if (do_datacrc) 10782f713615SIlya Dryomov crc = con->in_data_crc; 10792f713615SIlya Dryomov while (cursor->total_resid) { 10802f713615SIlya Dryomov if (!cursor->resid) { 10812f713615SIlya Dryomov ceph_msg_data_advance(cursor, 0); 10822f713615SIlya Dryomov continue; 10832f713615SIlya Dryomov } 10842f713615SIlya Dryomov 1085da4ab869SJeff Layton page = ceph_msg_data_next(cursor, &page_offset, &length); 10862f713615SIlya Dryomov ret = ceph_tcp_recvpage(con->sock, page, page_offset, length); 10872f713615SIlya Dryomov if (ret <= 0) { 10882f713615SIlya Dryomov if (do_datacrc) 10892f713615SIlya Dryomov con->in_data_crc = crc; 10902f713615SIlya Dryomov 10912f713615SIlya Dryomov return ret; 10922f713615SIlya Dryomov } 10932f713615SIlya Dryomov 10942f713615SIlya Dryomov if (do_datacrc) 10952f713615SIlya Dryomov crc = ceph_crc32c_page(crc, page, page_offset, ret); 10962f713615SIlya Dryomov ceph_msg_data_advance(cursor, (size_t)ret); 10972f713615SIlya Dryomov } 10982f713615SIlya Dryomov if (do_datacrc) 10992f713615SIlya Dryomov con->in_data_crc = crc; 11002f713615SIlya Dryomov 11012f713615SIlya Dryomov return 1; /* must return > 0 to indicate success */ 11022f713615SIlya Dryomov } 11032f713615SIlya Dryomov 1104038b8d1dSIlya Dryomov static int read_partial_msg_data_bounce(struct ceph_connection *con) 1105038b8d1dSIlya Dryomov { 1106038b8d1dSIlya Dryomov struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor; 1107038b8d1dSIlya Dryomov struct page *page; 1108038b8d1dSIlya Dryomov size_t off, len; 1109038b8d1dSIlya Dryomov u32 crc; 1110038b8d1dSIlya Dryomov int ret; 1111038b8d1dSIlya Dryomov 1112038b8d1dSIlya Dryomov if (unlikely(!con->bounce_page)) { 1113038b8d1dSIlya Dryomov con->bounce_page = alloc_page(GFP_NOIO); 1114038b8d1dSIlya Dryomov if (!con->bounce_page) { 1115038b8d1dSIlya Dryomov pr_err("failed to allocate bounce page\n"); 1116038b8d1dSIlya Dryomov return -ENOMEM; 1117038b8d1dSIlya Dryomov } 1118038b8d1dSIlya Dryomov } 1119038b8d1dSIlya Dryomov 1120038b8d1dSIlya Dryomov crc = con->in_data_crc; 1121038b8d1dSIlya Dryomov while (cursor->total_resid) { 1122038b8d1dSIlya Dryomov if (!cursor->resid) { 1123038b8d1dSIlya Dryomov ceph_msg_data_advance(cursor, 0); 1124038b8d1dSIlya Dryomov continue; 1125038b8d1dSIlya Dryomov } 1126038b8d1dSIlya Dryomov 1127da4ab869SJeff Layton page = ceph_msg_data_next(cursor, &off, &len); 1128038b8d1dSIlya Dryomov ret = ceph_tcp_recvpage(con->sock, con->bounce_page, 0, len); 1129038b8d1dSIlya Dryomov if (ret <= 0) { 1130038b8d1dSIlya Dryomov con->in_data_crc = crc; 1131038b8d1dSIlya Dryomov return ret; 1132038b8d1dSIlya Dryomov } 1133038b8d1dSIlya Dryomov 1134038b8d1dSIlya Dryomov crc = crc32c(crc, page_address(con->bounce_page), ret); 1135038b8d1dSIlya Dryomov memcpy_to_page(page, off, page_address(con->bounce_page), ret); 1136038b8d1dSIlya Dryomov 1137038b8d1dSIlya Dryomov ceph_msg_data_advance(cursor, ret); 1138038b8d1dSIlya Dryomov } 1139038b8d1dSIlya Dryomov con->in_data_crc = crc; 1140038b8d1dSIlya Dryomov 1141038b8d1dSIlya Dryomov return 1; /* must return > 0 to indicate success */ 1142038b8d1dSIlya Dryomov } 1143038b8d1dSIlya Dryomov 11442f713615SIlya Dryomov /* 11452f713615SIlya Dryomov * read (part of) a message. 11462f713615SIlya Dryomov */ 11472f713615SIlya Dryomov static int read_partial_message(struct ceph_connection *con) 11482f713615SIlya Dryomov { 11492f713615SIlya Dryomov struct ceph_msg *m = con->in_msg; 11502f713615SIlya Dryomov int size; 11512f713615SIlya Dryomov int end; 11522f713615SIlya Dryomov int ret; 11532f713615SIlya Dryomov unsigned int front_len, middle_len, data_len; 11542f713615SIlya Dryomov bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC); 11552f713615SIlya Dryomov bool need_sign = (con->peer_features & CEPH_FEATURE_MSG_AUTH); 11562f713615SIlya Dryomov u64 seq; 11572f713615SIlya Dryomov u32 crc; 11582f713615SIlya Dryomov 11592f713615SIlya Dryomov dout("read_partial_message con %p msg %p\n", con, m); 11602f713615SIlya Dryomov 11612f713615SIlya Dryomov /* header */ 1162a56dd9bfSIlya Dryomov size = sizeof(con->v1.in_hdr); 11632f713615SIlya Dryomov end = size; 1164a56dd9bfSIlya Dryomov ret = read_partial(con, end, size, &con->v1.in_hdr); 11652f713615SIlya Dryomov if (ret <= 0) 11662f713615SIlya Dryomov return ret; 11672f713615SIlya Dryomov 1168a56dd9bfSIlya Dryomov crc = crc32c(0, &con->v1.in_hdr, offsetof(struct ceph_msg_header, crc)); 1169a56dd9bfSIlya Dryomov if (cpu_to_le32(crc) != con->v1.in_hdr.crc) { 11702f713615SIlya Dryomov pr_err("read_partial_message bad hdr crc %u != expected %u\n", 1171a56dd9bfSIlya Dryomov crc, con->v1.in_hdr.crc); 11722f713615SIlya Dryomov return -EBADMSG; 11732f713615SIlya Dryomov } 11742f713615SIlya Dryomov 1175a56dd9bfSIlya Dryomov front_len = le32_to_cpu(con->v1.in_hdr.front_len); 11762f713615SIlya Dryomov if (front_len > CEPH_MSG_MAX_FRONT_LEN) 11772f713615SIlya Dryomov return -EIO; 1178a56dd9bfSIlya Dryomov middle_len = le32_to_cpu(con->v1.in_hdr.middle_len); 11792f713615SIlya Dryomov if (middle_len > CEPH_MSG_MAX_MIDDLE_LEN) 11802f713615SIlya Dryomov return -EIO; 1181a56dd9bfSIlya Dryomov data_len = le32_to_cpu(con->v1.in_hdr.data_len); 11822f713615SIlya Dryomov if (data_len > CEPH_MSG_MAX_DATA_LEN) 11832f713615SIlya Dryomov return -EIO; 11842f713615SIlya Dryomov 11852f713615SIlya Dryomov /* verify seq# */ 1186a56dd9bfSIlya Dryomov seq = le64_to_cpu(con->v1.in_hdr.seq); 11872f713615SIlya Dryomov if ((s64)seq - (s64)con->in_seq < 1) { 11882f713615SIlya Dryomov pr_info("skipping %s%lld %s seq %lld expected %lld\n", 11892f713615SIlya Dryomov ENTITY_NAME(con->peer_name), 11902f713615SIlya Dryomov ceph_pr_addr(&con->peer_addr), 11912f713615SIlya Dryomov seq, con->in_seq + 1); 1192a56dd9bfSIlya Dryomov con->v1.in_base_pos = -front_len - middle_len - data_len - 11932f713615SIlya Dryomov sizeof_footer(con); 1194a56dd9bfSIlya Dryomov con->v1.in_tag = CEPH_MSGR_TAG_READY; 11952f713615SIlya Dryomov return 1; 11962f713615SIlya Dryomov } else if ((s64)seq - (s64)con->in_seq > 1) { 11972f713615SIlya Dryomov pr_err("read_partial_message bad seq %lld expected %lld\n", 11982f713615SIlya Dryomov seq, con->in_seq + 1); 11992f713615SIlya Dryomov con->error_msg = "bad message sequence # for incoming message"; 12002f713615SIlya Dryomov return -EBADE; 12012f713615SIlya Dryomov } 12022f713615SIlya Dryomov 12032f713615SIlya Dryomov /* allocate message? */ 12042f713615SIlya Dryomov if (!con->in_msg) { 12052f713615SIlya Dryomov int skip = 0; 12062f713615SIlya Dryomov 1207a56dd9bfSIlya Dryomov dout("got hdr type %d front %d data %d\n", con->v1.in_hdr.type, 12082f713615SIlya Dryomov front_len, data_len); 1209a56dd9bfSIlya Dryomov ret = ceph_con_in_msg_alloc(con, &con->v1.in_hdr, &skip); 12102f713615SIlya Dryomov if (ret < 0) 12112f713615SIlya Dryomov return ret; 12122f713615SIlya Dryomov 12139d5ae6f3SIlya Dryomov BUG_ON((!con->in_msg) ^ skip); 12142f713615SIlya Dryomov if (skip) { 12152f713615SIlya Dryomov /* skip this message */ 12162f713615SIlya Dryomov dout("alloc_msg said skip message\n"); 1217a56dd9bfSIlya Dryomov con->v1.in_base_pos = -front_len - middle_len - 1218a56dd9bfSIlya Dryomov data_len - sizeof_footer(con); 1219a56dd9bfSIlya Dryomov con->v1.in_tag = CEPH_MSGR_TAG_READY; 12202f713615SIlya Dryomov con->in_seq++; 12212f713615SIlya Dryomov return 1; 12222f713615SIlya Dryomov } 12232f713615SIlya Dryomov 12242f713615SIlya Dryomov BUG_ON(!con->in_msg); 12252f713615SIlya Dryomov BUG_ON(con->in_msg->con != con); 12262f713615SIlya Dryomov m = con->in_msg; 12272f713615SIlya Dryomov m->front.iov_len = 0; /* haven't read it yet */ 12282f713615SIlya Dryomov if (m->middle) 12292f713615SIlya Dryomov m->middle->vec.iov_len = 0; 12302f713615SIlya Dryomov 12312f713615SIlya Dryomov /* prepare for data payload, if any */ 12322f713615SIlya Dryomov 12332f713615SIlya Dryomov if (data_len) 12342f713615SIlya Dryomov prepare_message_data(con->in_msg, data_len); 12352f713615SIlya Dryomov } 12362f713615SIlya Dryomov 12372f713615SIlya Dryomov /* front */ 12382f713615SIlya Dryomov ret = read_partial_message_section(con, &m->front, front_len, 12392f713615SIlya Dryomov &con->in_front_crc); 12402f713615SIlya Dryomov if (ret <= 0) 12412f713615SIlya Dryomov return ret; 12422f713615SIlya Dryomov 12432f713615SIlya Dryomov /* middle */ 12442f713615SIlya Dryomov if (m->middle) { 12452f713615SIlya Dryomov ret = read_partial_message_section(con, &m->middle->vec, 12462f713615SIlya Dryomov middle_len, 12472f713615SIlya Dryomov &con->in_middle_crc); 12482f713615SIlya Dryomov if (ret <= 0) 12492f713615SIlya Dryomov return ret; 12502f713615SIlya Dryomov } 12512f713615SIlya Dryomov 12522f713615SIlya Dryomov /* (page) data */ 12532f713615SIlya Dryomov if (data_len) { 1254038b8d1dSIlya Dryomov if (!m->num_data_items) 1255038b8d1dSIlya Dryomov return -EIO; 1256038b8d1dSIlya Dryomov 1257*8e46a2d0SXiubo Li if (m->sparse_read_total) 1258ee97302fSXiubo Li ret = read_partial_sparse_msg_data(con); 1259d396f89dSJeff Layton else if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) 1260038b8d1dSIlya Dryomov ret = read_partial_msg_data_bounce(con); 1261038b8d1dSIlya Dryomov else 12622f713615SIlya Dryomov ret = read_partial_msg_data(con); 12632f713615SIlya Dryomov if (ret <= 0) 12642f713615SIlya Dryomov return ret; 12652f713615SIlya Dryomov } 12662f713615SIlya Dryomov 12672f713615SIlya Dryomov /* footer */ 12682f713615SIlya Dryomov size = sizeof_footer(con); 12692f713615SIlya Dryomov end += size; 12702f713615SIlya Dryomov ret = read_partial(con, end, size, &m->footer); 12712f713615SIlya Dryomov if (ret <= 0) 12722f713615SIlya Dryomov return ret; 12732f713615SIlya Dryomov 12742f713615SIlya Dryomov if (!need_sign) { 12752f713615SIlya Dryomov m->footer.flags = m->old_footer.flags; 12762f713615SIlya Dryomov m->footer.sig = 0; 12772f713615SIlya Dryomov } 12782f713615SIlya Dryomov 12792f713615SIlya Dryomov dout("read_partial_message got msg %p %d (%u) + %d (%u) + %d (%u)\n", 12802f713615SIlya Dryomov m, front_len, m->footer.front_crc, middle_len, 12812f713615SIlya Dryomov m->footer.middle_crc, data_len, m->footer.data_crc); 12822f713615SIlya Dryomov 12832f713615SIlya Dryomov /* crc ok? */ 12842f713615SIlya Dryomov if (con->in_front_crc != le32_to_cpu(m->footer.front_crc)) { 12852f713615SIlya Dryomov pr_err("read_partial_message %p front crc %u != exp. %u\n", 12862f713615SIlya Dryomov m, con->in_front_crc, m->footer.front_crc); 12872f713615SIlya Dryomov return -EBADMSG; 12882f713615SIlya Dryomov } 12892f713615SIlya Dryomov if (con->in_middle_crc != le32_to_cpu(m->footer.middle_crc)) { 12902f713615SIlya Dryomov pr_err("read_partial_message %p middle crc %u != exp %u\n", 12912f713615SIlya Dryomov m, con->in_middle_crc, m->footer.middle_crc); 12922f713615SIlya Dryomov return -EBADMSG; 12932f713615SIlya Dryomov } 12942f713615SIlya Dryomov if (do_datacrc && 12952f713615SIlya Dryomov (m->footer.flags & CEPH_MSG_FOOTER_NOCRC) == 0 && 12962f713615SIlya Dryomov con->in_data_crc != le32_to_cpu(m->footer.data_crc)) { 12972f713615SIlya Dryomov pr_err("read_partial_message %p data crc %u != exp. %u\n", m, 12982f713615SIlya Dryomov con->in_data_crc, le32_to_cpu(m->footer.data_crc)); 12992f713615SIlya Dryomov return -EBADMSG; 13002f713615SIlya Dryomov } 13012f713615SIlya Dryomov 13022f713615SIlya Dryomov if (need_sign && con->ops->check_message_signature && 13032f713615SIlya Dryomov con->ops->check_message_signature(m)) { 13042f713615SIlya Dryomov pr_err("read_partial_message %p signature check failed\n", m); 13052f713615SIlya Dryomov return -EBADMSG; 13062f713615SIlya Dryomov } 13072f713615SIlya Dryomov 13082f713615SIlya Dryomov return 1; /* done! */ 13092f713615SIlya Dryomov } 13102f713615SIlya Dryomov 13112f713615SIlya Dryomov static int read_keepalive_ack(struct ceph_connection *con) 13122f713615SIlya Dryomov { 13132f713615SIlya Dryomov struct ceph_timespec ceph_ts; 13142f713615SIlya Dryomov size_t size = sizeof(ceph_ts); 13152f713615SIlya Dryomov int ret = read_partial(con, size, size, &ceph_ts); 13162f713615SIlya Dryomov if (ret <= 0) 13172f713615SIlya Dryomov return ret; 13182f713615SIlya Dryomov ceph_decode_timespec64(&con->last_keepalive_ack, &ceph_ts); 13192f713615SIlya Dryomov prepare_read_tag(con); 13202f713615SIlya Dryomov return 1; 13212f713615SIlya Dryomov } 13222f713615SIlya Dryomov 13232f713615SIlya Dryomov /* 13242f713615SIlya Dryomov * Read what we can from the socket. 13252f713615SIlya Dryomov */ 13262f713615SIlya Dryomov int ceph_con_v1_try_read(struct ceph_connection *con) 13272f713615SIlya Dryomov { 13282f713615SIlya Dryomov int ret = -1; 13292f713615SIlya Dryomov 13302f713615SIlya Dryomov more: 13312f713615SIlya Dryomov dout("try_read start %p state %d\n", con, con->state); 13322f713615SIlya Dryomov if (con->state != CEPH_CON_S_V1_BANNER && 13332f713615SIlya Dryomov con->state != CEPH_CON_S_V1_CONNECT_MSG && 13342f713615SIlya Dryomov con->state != CEPH_CON_S_OPEN) 13352f713615SIlya Dryomov return 0; 13362f713615SIlya Dryomov 13372f713615SIlya Dryomov BUG_ON(!con->sock); 13382f713615SIlya Dryomov 1339a56dd9bfSIlya Dryomov dout("try_read tag %d in_base_pos %d\n", con->v1.in_tag, 1340a56dd9bfSIlya Dryomov con->v1.in_base_pos); 13412f713615SIlya Dryomov 13422f713615SIlya Dryomov if (con->state == CEPH_CON_S_V1_BANNER) { 13432f713615SIlya Dryomov ret = read_partial_banner(con); 13442f713615SIlya Dryomov if (ret <= 0) 13452f713615SIlya Dryomov goto out; 13462f713615SIlya Dryomov ret = process_banner(con); 13472f713615SIlya Dryomov if (ret < 0) 13482f713615SIlya Dryomov goto out; 13492f713615SIlya Dryomov 13502f713615SIlya Dryomov con->state = CEPH_CON_S_V1_CONNECT_MSG; 13512f713615SIlya Dryomov 13522f713615SIlya Dryomov /* 13532f713615SIlya Dryomov * Received banner is good, exchange connection info. 13542f713615SIlya Dryomov * Do not reset out_kvec, as sending our banner raced 13552f713615SIlya Dryomov * with receiving peer banner after connect completed. 13562f713615SIlya Dryomov */ 13572f713615SIlya Dryomov ret = prepare_write_connect(con); 13582f713615SIlya Dryomov if (ret < 0) 13592f713615SIlya Dryomov goto out; 13602f713615SIlya Dryomov prepare_read_connect(con); 13612f713615SIlya Dryomov 13622f713615SIlya Dryomov /* Send connection info before awaiting response */ 13632f713615SIlya Dryomov goto out; 13642f713615SIlya Dryomov } 13652f713615SIlya Dryomov 13662f713615SIlya Dryomov if (con->state == CEPH_CON_S_V1_CONNECT_MSG) { 13672f713615SIlya Dryomov ret = read_partial_connect(con); 13682f713615SIlya Dryomov if (ret <= 0) 13692f713615SIlya Dryomov goto out; 13702f713615SIlya Dryomov ret = process_connect(con); 13712f713615SIlya Dryomov if (ret < 0) 13722f713615SIlya Dryomov goto out; 13732f713615SIlya Dryomov goto more; 13742f713615SIlya Dryomov } 13752f713615SIlya Dryomov 13762f713615SIlya Dryomov WARN_ON(con->state != CEPH_CON_S_OPEN); 13772f713615SIlya Dryomov 1378a56dd9bfSIlya Dryomov if (con->v1.in_base_pos < 0) { 13792f713615SIlya Dryomov /* 13802f713615SIlya Dryomov * skipping + discarding content. 13812f713615SIlya Dryomov */ 1382a56dd9bfSIlya Dryomov ret = ceph_tcp_recvmsg(con->sock, NULL, -con->v1.in_base_pos); 13832f713615SIlya Dryomov if (ret <= 0) 13842f713615SIlya Dryomov goto out; 1385a56dd9bfSIlya Dryomov dout("skipped %d / %d bytes\n", ret, -con->v1.in_base_pos); 1386a56dd9bfSIlya Dryomov con->v1.in_base_pos += ret; 1387a56dd9bfSIlya Dryomov if (con->v1.in_base_pos) 13882f713615SIlya Dryomov goto more; 13892f713615SIlya Dryomov } 1390a56dd9bfSIlya Dryomov if (con->v1.in_tag == CEPH_MSGR_TAG_READY) { 13912f713615SIlya Dryomov /* 13922f713615SIlya Dryomov * what's next? 13932f713615SIlya Dryomov */ 1394a56dd9bfSIlya Dryomov ret = ceph_tcp_recvmsg(con->sock, &con->v1.in_tag, 1); 13952f713615SIlya Dryomov if (ret <= 0) 13962f713615SIlya Dryomov goto out; 1397a56dd9bfSIlya Dryomov dout("try_read got tag %d\n", con->v1.in_tag); 1398a56dd9bfSIlya Dryomov switch (con->v1.in_tag) { 13992f713615SIlya Dryomov case CEPH_MSGR_TAG_MSG: 14002f713615SIlya Dryomov prepare_read_message(con); 14012f713615SIlya Dryomov break; 14022f713615SIlya Dryomov case CEPH_MSGR_TAG_ACK: 14032f713615SIlya Dryomov prepare_read_ack(con); 14042f713615SIlya Dryomov break; 14052f713615SIlya Dryomov case CEPH_MSGR_TAG_KEEPALIVE2_ACK: 14062f713615SIlya Dryomov prepare_read_keepalive_ack(con); 14072f713615SIlya Dryomov break; 14082f713615SIlya Dryomov case CEPH_MSGR_TAG_CLOSE: 14092f713615SIlya Dryomov ceph_con_close_socket(con); 14102f713615SIlya Dryomov con->state = CEPH_CON_S_CLOSED; 14112f713615SIlya Dryomov goto out; 14122f713615SIlya Dryomov default: 14132f713615SIlya Dryomov goto bad_tag; 14142f713615SIlya Dryomov } 14152f713615SIlya Dryomov } 1416a56dd9bfSIlya Dryomov if (con->v1.in_tag == CEPH_MSGR_TAG_MSG) { 14172f713615SIlya Dryomov ret = read_partial_message(con); 14182f713615SIlya Dryomov if (ret <= 0) { 14192f713615SIlya Dryomov switch (ret) { 14202f713615SIlya Dryomov case -EBADMSG: 14212f713615SIlya Dryomov con->error_msg = "bad crc/signature"; 14222f713615SIlya Dryomov fallthrough; 14232f713615SIlya Dryomov case -EBADE: 14242f713615SIlya Dryomov ret = -EIO; 14252f713615SIlya Dryomov break; 14262f713615SIlya Dryomov case -EIO: 14272f713615SIlya Dryomov con->error_msg = "io error"; 14282f713615SIlya Dryomov break; 14292f713615SIlya Dryomov } 14302f713615SIlya Dryomov goto out; 14312f713615SIlya Dryomov } 1432a56dd9bfSIlya Dryomov if (con->v1.in_tag == CEPH_MSGR_TAG_READY) 14332f713615SIlya Dryomov goto more; 14342f713615SIlya Dryomov ceph_con_process_message(con); 14352f713615SIlya Dryomov if (con->state == CEPH_CON_S_OPEN) 14362f713615SIlya Dryomov prepare_read_tag(con); 14372f713615SIlya Dryomov goto more; 14382f713615SIlya Dryomov } 1439a56dd9bfSIlya Dryomov if (con->v1.in_tag == CEPH_MSGR_TAG_ACK || 1440a56dd9bfSIlya Dryomov con->v1.in_tag == CEPH_MSGR_TAG_SEQ) { 14412f713615SIlya Dryomov /* 14422f713615SIlya Dryomov * the final handshake seq exchange is semantically 14432f713615SIlya Dryomov * equivalent to an ACK 14442f713615SIlya Dryomov */ 14452f713615SIlya Dryomov ret = read_partial_ack(con); 14462f713615SIlya Dryomov if (ret <= 0) 14472f713615SIlya Dryomov goto out; 14482f713615SIlya Dryomov process_ack(con); 14492f713615SIlya Dryomov goto more; 14502f713615SIlya Dryomov } 1451a56dd9bfSIlya Dryomov if (con->v1.in_tag == CEPH_MSGR_TAG_KEEPALIVE2_ACK) { 14522f713615SIlya Dryomov ret = read_keepalive_ack(con); 14532f713615SIlya Dryomov if (ret <= 0) 14542f713615SIlya Dryomov goto out; 14552f713615SIlya Dryomov goto more; 14562f713615SIlya Dryomov } 14572f713615SIlya Dryomov 14582f713615SIlya Dryomov out: 14592f713615SIlya Dryomov dout("try_read done on %p ret %d\n", con, ret); 14602f713615SIlya Dryomov return ret; 14612f713615SIlya Dryomov 14622f713615SIlya Dryomov bad_tag: 1463a56dd9bfSIlya Dryomov pr_err("try_read bad tag %d\n", con->v1.in_tag); 14642f713615SIlya Dryomov con->error_msg = "protocol error, garbage tag"; 14652f713615SIlya Dryomov ret = -1; 14662f713615SIlya Dryomov goto out; 14672f713615SIlya Dryomov } 14682f713615SIlya Dryomov 14692f713615SIlya Dryomov /* 14702f713615SIlya Dryomov * Write something to the socket. Called in a worker thread when the 14712f713615SIlya Dryomov * socket appears to be writeable and we have something ready to send. 14722f713615SIlya Dryomov */ 14732f713615SIlya Dryomov int ceph_con_v1_try_write(struct ceph_connection *con) 14742f713615SIlya Dryomov { 14752f713615SIlya Dryomov int ret = 1; 14762f713615SIlya Dryomov 14772f713615SIlya Dryomov dout("try_write start %p state %d\n", con, con->state); 14782f713615SIlya Dryomov if (con->state != CEPH_CON_S_PREOPEN && 14792f713615SIlya Dryomov con->state != CEPH_CON_S_V1_BANNER && 14802f713615SIlya Dryomov con->state != CEPH_CON_S_V1_CONNECT_MSG && 14812f713615SIlya Dryomov con->state != CEPH_CON_S_OPEN) 14822f713615SIlya Dryomov return 0; 14832f713615SIlya Dryomov 14842f713615SIlya Dryomov /* open the socket first? */ 14852f713615SIlya Dryomov if (con->state == CEPH_CON_S_PREOPEN) { 14862f713615SIlya Dryomov BUG_ON(con->sock); 14872f713615SIlya Dryomov con->state = CEPH_CON_S_V1_BANNER; 14882f713615SIlya Dryomov 14892f713615SIlya Dryomov con_out_kvec_reset(con); 14902f713615SIlya Dryomov prepare_write_banner(con); 14912f713615SIlya Dryomov prepare_read_banner(con); 14922f713615SIlya Dryomov 14932f713615SIlya Dryomov BUG_ON(con->in_msg); 1494a56dd9bfSIlya Dryomov con->v1.in_tag = CEPH_MSGR_TAG_READY; 14952f713615SIlya Dryomov dout("try_write initiating connect on %p new state %d\n", 14962f713615SIlya Dryomov con, con->state); 14972f713615SIlya Dryomov ret = ceph_tcp_connect(con); 14982f713615SIlya Dryomov if (ret < 0) { 14992f713615SIlya Dryomov con->error_msg = "connect error"; 15002f713615SIlya Dryomov goto out; 15012f713615SIlya Dryomov } 15022f713615SIlya Dryomov } 15032f713615SIlya Dryomov 15042f713615SIlya Dryomov more: 1505a56dd9bfSIlya Dryomov dout("try_write out_kvec_bytes %d\n", con->v1.out_kvec_bytes); 15062f713615SIlya Dryomov BUG_ON(!con->sock); 15072f713615SIlya Dryomov 15082f713615SIlya Dryomov /* kvec data queued? */ 1509a56dd9bfSIlya Dryomov if (con->v1.out_kvec_left) { 15102f713615SIlya Dryomov ret = write_partial_kvec(con); 15112f713615SIlya Dryomov if (ret <= 0) 15122f713615SIlya Dryomov goto out; 15132f713615SIlya Dryomov } 1514a56dd9bfSIlya Dryomov if (con->v1.out_skip) { 15152f713615SIlya Dryomov ret = write_partial_skip(con); 15162f713615SIlya Dryomov if (ret <= 0) 15172f713615SIlya Dryomov goto out; 15182f713615SIlya Dryomov } 15192f713615SIlya Dryomov 15202f713615SIlya Dryomov /* msg pages? */ 15212f713615SIlya Dryomov if (con->out_msg) { 1522a56dd9bfSIlya Dryomov if (con->v1.out_msg_done) { 15232f713615SIlya Dryomov ceph_msg_put(con->out_msg); 15242f713615SIlya Dryomov con->out_msg = NULL; /* we're done with this one */ 15252f713615SIlya Dryomov goto do_next; 15262f713615SIlya Dryomov } 15272f713615SIlya Dryomov 15282f713615SIlya Dryomov ret = write_partial_message_data(con); 15292f713615SIlya Dryomov if (ret == 1) 15302f713615SIlya Dryomov goto more; /* we need to send the footer, too! */ 15312f713615SIlya Dryomov if (ret == 0) 15322f713615SIlya Dryomov goto out; 15332f713615SIlya Dryomov if (ret < 0) { 15342f713615SIlya Dryomov dout("try_write write_partial_message_data err %d\n", 15352f713615SIlya Dryomov ret); 15362f713615SIlya Dryomov goto out; 15372f713615SIlya Dryomov } 15382f713615SIlya Dryomov } 15392f713615SIlya Dryomov 15402f713615SIlya Dryomov do_next: 15412f713615SIlya Dryomov if (con->state == CEPH_CON_S_OPEN) { 15422f713615SIlya Dryomov if (ceph_con_flag_test_and_clear(con, 15432f713615SIlya Dryomov CEPH_CON_F_KEEPALIVE_PENDING)) { 15442f713615SIlya Dryomov prepare_write_keepalive(con); 15452f713615SIlya Dryomov goto more; 15462f713615SIlya Dryomov } 15472f713615SIlya Dryomov /* is anything else pending? */ 15482f713615SIlya Dryomov if (!list_empty(&con->out_queue)) { 15492f713615SIlya Dryomov prepare_write_message(con); 15502f713615SIlya Dryomov goto more; 15512f713615SIlya Dryomov } 15522f713615SIlya Dryomov if (con->in_seq > con->in_seq_acked) { 15532f713615SIlya Dryomov prepare_write_ack(con); 15542f713615SIlya Dryomov goto more; 15552f713615SIlya Dryomov } 15562f713615SIlya Dryomov } 15572f713615SIlya Dryomov 15582f713615SIlya Dryomov /* Nothing to do! */ 15592f713615SIlya Dryomov ceph_con_flag_clear(con, CEPH_CON_F_WRITE_PENDING); 15602f713615SIlya Dryomov dout("try_write nothing else to write.\n"); 15612f713615SIlya Dryomov ret = 0; 15622f713615SIlya Dryomov out: 15632f713615SIlya Dryomov dout("try_write done on %p ret %d\n", con, ret); 15642f713615SIlya Dryomov return ret; 15652f713615SIlya Dryomov } 15662f713615SIlya Dryomov 15672f713615SIlya Dryomov void ceph_con_v1_revoke(struct ceph_connection *con) 15682f713615SIlya Dryomov { 15692f713615SIlya Dryomov struct ceph_msg *msg = con->out_msg; 15702f713615SIlya Dryomov 1571a56dd9bfSIlya Dryomov WARN_ON(con->v1.out_skip); 15722f713615SIlya Dryomov /* footer */ 1573a56dd9bfSIlya Dryomov if (con->v1.out_msg_done) { 1574a56dd9bfSIlya Dryomov con->v1.out_skip += con_out_kvec_skip(con); 15752f713615SIlya Dryomov } else { 15762f713615SIlya Dryomov WARN_ON(!msg->data_length); 1577a56dd9bfSIlya Dryomov con->v1.out_skip += sizeof_footer(con); 15782f713615SIlya Dryomov } 15792f713615SIlya Dryomov /* data, middle, front */ 15802f713615SIlya Dryomov if (msg->data_length) 1581a56dd9bfSIlya Dryomov con->v1.out_skip += msg->cursor.total_resid; 15822f713615SIlya Dryomov if (msg->middle) 1583a56dd9bfSIlya Dryomov con->v1.out_skip += con_out_kvec_skip(con); 1584a56dd9bfSIlya Dryomov con->v1.out_skip += con_out_kvec_skip(con); 15852f713615SIlya Dryomov 15862f713615SIlya Dryomov dout("%s con %p out_kvec_bytes %d out_skip %d\n", __func__, con, 1587a56dd9bfSIlya Dryomov con->v1.out_kvec_bytes, con->v1.out_skip); 15882f713615SIlya Dryomov } 15892f713615SIlya Dryomov 15902f713615SIlya Dryomov void ceph_con_v1_revoke_incoming(struct ceph_connection *con) 15912f713615SIlya Dryomov { 1592a56dd9bfSIlya Dryomov unsigned int front_len = le32_to_cpu(con->v1.in_hdr.front_len); 1593a56dd9bfSIlya Dryomov unsigned int middle_len = le32_to_cpu(con->v1.in_hdr.middle_len); 1594a56dd9bfSIlya Dryomov unsigned int data_len = le32_to_cpu(con->v1.in_hdr.data_len); 15952f713615SIlya Dryomov 15962f713615SIlya Dryomov /* skip rest of message */ 1597a56dd9bfSIlya Dryomov con->v1.in_base_pos = con->v1.in_base_pos - 15982f713615SIlya Dryomov sizeof(struct ceph_msg_header) - 15992f713615SIlya Dryomov front_len - 16002f713615SIlya Dryomov middle_len - 16012f713615SIlya Dryomov data_len - 16022f713615SIlya Dryomov sizeof(struct ceph_msg_footer); 16032f713615SIlya Dryomov 1604a56dd9bfSIlya Dryomov con->v1.in_tag = CEPH_MSGR_TAG_READY; 16052f713615SIlya Dryomov con->in_seq++; 16062f713615SIlya Dryomov 1607a56dd9bfSIlya Dryomov dout("%s con %p in_base_pos %d\n", __func__, con, con->v1.in_base_pos); 16082f713615SIlya Dryomov } 16092f713615SIlya Dryomov 16102f713615SIlya Dryomov bool ceph_con_v1_opened(struct ceph_connection *con) 16112f713615SIlya Dryomov { 1612a56dd9bfSIlya Dryomov return con->v1.connect_seq; 16132f713615SIlya Dryomov } 16142f713615SIlya Dryomov 16152f713615SIlya Dryomov void ceph_con_v1_reset_session(struct ceph_connection *con) 16162f713615SIlya Dryomov { 1617a56dd9bfSIlya Dryomov con->v1.connect_seq = 0; 1618a56dd9bfSIlya Dryomov con->v1.peer_global_seq = 0; 16192f713615SIlya Dryomov } 16202f713615SIlya Dryomov 16212f713615SIlya Dryomov void ceph_con_v1_reset_protocol(struct ceph_connection *con) 16222f713615SIlya Dryomov { 1623a56dd9bfSIlya Dryomov con->v1.out_skip = 0; 16242f713615SIlya Dryomov } 1625