12f713615SIlya Dryomov // SPDX-License-Identifier: GPL-2.0 22f713615SIlya Dryomov #include <linux/ceph/ceph_debug.h> 32f713615SIlya Dryomov 42f713615SIlya Dryomov #include <linux/bvec.h> 52f713615SIlya Dryomov #include <linux/crc32c.h> 62f713615SIlya Dryomov #include <linux/net.h> 72f713615SIlya Dryomov #include <linux/socket.h> 82f713615SIlya Dryomov #include <net/sock.h> 92f713615SIlya Dryomov 102f713615SIlya Dryomov #include <linux/ceph/ceph_features.h> 112f713615SIlya Dryomov #include <linux/ceph/decode.h> 122f713615SIlya Dryomov #include <linux/ceph/libceph.h> 132f713615SIlya Dryomov #include <linux/ceph/messenger.h> 142f713615SIlya Dryomov 152f713615SIlya Dryomov /* static tag bytes (protocol control messages) */ 162f713615SIlya Dryomov static char tag_msg = CEPH_MSGR_TAG_MSG; 172f713615SIlya Dryomov static char tag_ack = CEPH_MSGR_TAG_ACK; 182f713615SIlya Dryomov static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE; 192f713615SIlya Dryomov static char tag_keepalive2 = CEPH_MSGR_TAG_KEEPALIVE2; 202f713615SIlya Dryomov 212f713615SIlya Dryomov /* 222f713615SIlya Dryomov * If @buf is NULL, discard up to @len bytes. 232f713615SIlya Dryomov */ 242f713615SIlya Dryomov static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len) 252f713615SIlya Dryomov { 262f713615SIlya Dryomov struct kvec iov = {buf, len}; 272f713615SIlya Dryomov struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL }; 282f713615SIlya Dryomov int r; 292f713615SIlya Dryomov 302f713615SIlya Dryomov if (!buf) 312f713615SIlya Dryomov msg.msg_flags |= MSG_TRUNC; 322f713615SIlya Dryomov 33de4eda9dSAl Viro iov_iter_kvec(&msg.msg_iter, ITER_DEST, &iov, 1, len); 342f713615SIlya Dryomov r = sock_recvmsg(sock, &msg, msg.msg_flags); 352f713615SIlya Dryomov if (r == -EAGAIN) 362f713615SIlya Dryomov r = 0; 372f713615SIlya Dryomov return r; 382f713615SIlya Dryomov } 392f713615SIlya Dryomov 402f713615SIlya Dryomov static int ceph_tcp_recvpage(struct socket *sock, struct page *page, 412f713615SIlya Dryomov int page_offset, size_t length) 422f713615SIlya Dryomov { 431eb9cd15SChristoph Hellwig struct bio_vec bvec; 442f713615SIlya Dryomov struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL }; 452f713615SIlya Dryomov int r; 462f713615SIlya Dryomov 472f713615SIlya Dryomov BUG_ON(page_offset + length > PAGE_SIZE); 481eb9cd15SChristoph Hellwig bvec_set_page(&bvec, page, length, page_offset); 49de4eda9dSAl Viro iov_iter_bvec(&msg.msg_iter, ITER_DEST, &bvec, 1, length); 502f713615SIlya Dryomov r = sock_recvmsg(sock, &msg, msg.msg_flags); 512f713615SIlya Dryomov if (r == -EAGAIN) 522f713615SIlya Dryomov r = 0; 532f713615SIlya Dryomov return r; 542f713615SIlya Dryomov } 552f713615SIlya Dryomov 562f713615SIlya Dryomov /* 572f713615SIlya Dryomov * write something. @more is true if caller will be sending more data 582f713615SIlya Dryomov * shortly. 592f713615SIlya Dryomov */ 602f713615SIlya Dryomov static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov, 612f713615SIlya Dryomov size_t kvlen, size_t len, bool more) 622f713615SIlya Dryomov { 632f713615SIlya Dryomov struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL }; 642f713615SIlya Dryomov int r; 652f713615SIlya Dryomov 662f713615SIlya Dryomov if (more) 672f713615SIlya Dryomov msg.msg_flags |= MSG_MORE; 682f713615SIlya Dryomov else 692f713615SIlya Dryomov msg.msg_flags |= MSG_EOR; /* superfluous, but what the hell */ 702f713615SIlya Dryomov 712f713615SIlya Dryomov r = kernel_sendmsg(sock, &msg, iov, kvlen, len); 722f713615SIlya Dryomov if (r == -EAGAIN) 732f713615SIlya Dryomov r = 0; 742f713615SIlya Dryomov return r; 752f713615SIlya Dryomov } 762f713615SIlya Dryomov 775da4d7b8SDavid Howells /* 785da4d7b8SDavid Howells * @more: MSG_MORE or 0. 795da4d7b8SDavid Howells */ 805da4d7b8SDavid Howells static int ceph_tcp_sendpage(struct socket *sock, struct page *page, 815da4d7b8SDavid Howells int offset, size_t size, int more) 825da4d7b8SDavid Howells { 835da4d7b8SDavid Howells struct msghdr msg = { 845da4d7b8SDavid Howells .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL | more, 855da4d7b8SDavid Howells }; 865da4d7b8SDavid Howells struct bio_vec bvec; 875da4d7b8SDavid Howells int ret; 885da4d7b8SDavid Howells 895da4d7b8SDavid Howells /* 905da4d7b8SDavid Howells * MSG_SPLICE_PAGES cannot properly handle pages with page_count == 0, 915da4d7b8SDavid Howells * we need to fall back to sendmsg if that's the case. 925da4d7b8SDavid Howells * 935da4d7b8SDavid Howells * Same goes for slab pages: skb_can_coalesce() allows 945da4d7b8SDavid Howells * coalescing neighboring slab objects into a single frag which 955da4d7b8SDavid Howells * triggers one of hardened usercopy checks. 965da4d7b8SDavid Howells */ 975da4d7b8SDavid Howells if (sendpage_ok(page)) 985da4d7b8SDavid Howells msg.msg_flags |= MSG_SPLICE_PAGES; 995da4d7b8SDavid Howells 1005da4d7b8SDavid Howells bvec_set_page(&bvec, page, size, offset); 1015da4d7b8SDavid Howells iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size); 1025da4d7b8SDavid Howells 1035da4d7b8SDavid Howells ret = sock_sendmsg(sock, &msg); 1045da4d7b8SDavid Howells if (ret == -EAGAIN) 1055da4d7b8SDavid Howells ret = 0; 1065da4d7b8SDavid Howells 1075da4d7b8SDavid Howells return ret; 1085da4d7b8SDavid Howells } 1095da4d7b8SDavid Howells 1102f713615SIlya Dryomov static void con_out_kvec_reset(struct ceph_connection *con) 1112f713615SIlya Dryomov { 112a56dd9bfSIlya Dryomov BUG_ON(con->v1.out_skip); 1132f713615SIlya Dryomov 114a56dd9bfSIlya Dryomov con->v1.out_kvec_left = 0; 115a56dd9bfSIlya Dryomov con->v1.out_kvec_bytes = 0; 116a56dd9bfSIlya Dryomov con->v1.out_kvec_cur = &con->v1.out_kvec[0]; 1172f713615SIlya Dryomov } 1182f713615SIlya Dryomov 1192f713615SIlya Dryomov static void con_out_kvec_add(struct ceph_connection *con, 1202f713615SIlya Dryomov size_t size, void *data) 1212f713615SIlya Dryomov { 122a56dd9bfSIlya Dryomov int index = con->v1.out_kvec_left; 1232f713615SIlya Dryomov 124a56dd9bfSIlya Dryomov BUG_ON(con->v1.out_skip); 125a56dd9bfSIlya Dryomov BUG_ON(index >= ARRAY_SIZE(con->v1.out_kvec)); 1262f713615SIlya Dryomov 127a56dd9bfSIlya Dryomov con->v1.out_kvec[index].iov_len = size; 128a56dd9bfSIlya Dryomov con->v1.out_kvec[index].iov_base = data; 129a56dd9bfSIlya Dryomov con->v1.out_kvec_left++; 130a56dd9bfSIlya Dryomov con->v1.out_kvec_bytes += size; 1312f713615SIlya Dryomov } 1322f713615SIlya Dryomov 1332f713615SIlya Dryomov /* 1342f713615SIlya Dryomov * Chop off a kvec from the end. Return residual number of bytes for 1352f713615SIlya Dryomov * that kvec, i.e. how many bytes would have been written if the kvec 1362f713615SIlya Dryomov * hadn't been nuked. 1372f713615SIlya Dryomov */ 1382f713615SIlya Dryomov static int con_out_kvec_skip(struct ceph_connection *con) 1392f713615SIlya Dryomov { 1402f713615SIlya Dryomov int skip = 0; 1412f713615SIlya Dryomov 142a56dd9bfSIlya Dryomov if (con->v1.out_kvec_bytes > 0) { 143a56dd9bfSIlya Dryomov skip = con->v1.out_kvec_cur[con->v1.out_kvec_left - 1].iov_len; 144a56dd9bfSIlya Dryomov BUG_ON(con->v1.out_kvec_bytes < skip); 145a56dd9bfSIlya Dryomov BUG_ON(!con->v1.out_kvec_left); 146a56dd9bfSIlya Dryomov con->v1.out_kvec_bytes -= skip; 147a56dd9bfSIlya Dryomov con->v1.out_kvec_left--; 1482f713615SIlya Dryomov } 1492f713615SIlya Dryomov 1502f713615SIlya Dryomov return skip; 1512f713615SIlya Dryomov } 1522f713615SIlya Dryomov 1532f713615SIlya Dryomov static size_t sizeof_footer(struct ceph_connection *con) 1542f713615SIlya Dryomov { 1552f713615SIlya Dryomov return (con->peer_features & CEPH_FEATURE_MSG_AUTH) ? 1562f713615SIlya Dryomov sizeof(struct ceph_msg_footer) : 1572f713615SIlya Dryomov sizeof(struct ceph_msg_footer_old); 1582f713615SIlya Dryomov } 1592f713615SIlya Dryomov 1602f713615SIlya Dryomov static void prepare_message_data(struct ceph_msg *msg, u32 data_len) 1612f713615SIlya Dryomov { 162*d396f89dSJeff Layton /* Initialize data cursor if it's not a sparse read */ 163*d396f89dSJeff Layton if (!msg->sparse_read) 1642f713615SIlya Dryomov ceph_msg_data_cursor_init(&msg->cursor, msg, data_len); 1652f713615SIlya Dryomov } 1662f713615SIlya Dryomov 1672f713615SIlya Dryomov /* 1682f713615SIlya Dryomov * Prepare footer for currently outgoing message, and finish things 1692f713615SIlya Dryomov * off. Assumes out_kvec* are already valid.. we just add on to the end. 1702f713615SIlya Dryomov */ 1712f713615SIlya Dryomov static void prepare_write_message_footer(struct ceph_connection *con) 1722f713615SIlya Dryomov { 1732f713615SIlya Dryomov struct ceph_msg *m = con->out_msg; 1742f713615SIlya Dryomov 1752f713615SIlya Dryomov m->footer.flags |= CEPH_MSG_FOOTER_COMPLETE; 1762f713615SIlya Dryomov 1772f713615SIlya Dryomov dout("prepare_write_message_footer %p\n", con); 1782f713615SIlya Dryomov con_out_kvec_add(con, sizeof_footer(con), &m->footer); 1792f713615SIlya Dryomov if (con->peer_features & CEPH_FEATURE_MSG_AUTH) { 1802f713615SIlya Dryomov if (con->ops->sign_message) 1812f713615SIlya Dryomov con->ops->sign_message(m); 1822f713615SIlya Dryomov else 1832f713615SIlya Dryomov m->footer.sig = 0; 1842f713615SIlya Dryomov } else { 1852f713615SIlya Dryomov m->old_footer.flags = m->footer.flags; 1862f713615SIlya Dryomov } 187a56dd9bfSIlya Dryomov con->v1.out_more = m->more_to_follow; 188a56dd9bfSIlya Dryomov con->v1.out_msg_done = true; 1892f713615SIlya Dryomov } 1902f713615SIlya Dryomov 1912f713615SIlya Dryomov /* 1922f713615SIlya Dryomov * Prepare headers for the next outgoing message. 1932f713615SIlya Dryomov */ 1942f713615SIlya Dryomov static void prepare_write_message(struct ceph_connection *con) 1952f713615SIlya Dryomov { 1962f713615SIlya Dryomov struct ceph_msg *m; 1972f713615SIlya Dryomov u32 crc; 1982f713615SIlya Dryomov 1992f713615SIlya Dryomov con_out_kvec_reset(con); 200a56dd9bfSIlya Dryomov con->v1.out_msg_done = false; 2012f713615SIlya Dryomov 2022f713615SIlya Dryomov /* Sneak an ack in there first? If we can get it into the same 2032f713615SIlya Dryomov * TCP packet that's a good thing. */ 2042f713615SIlya Dryomov if (con->in_seq > con->in_seq_acked) { 2052f713615SIlya Dryomov con->in_seq_acked = con->in_seq; 2062f713615SIlya Dryomov con_out_kvec_add(con, sizeof (tag_ack), &tag_ack); 207a56dd9bfSIlya Dryomov con->v1.out_temp_ack = cpu_to_le64(con->in_seq_acked); 208a56dd9bfSIlya Dryomov con_out_kvec_add(con, sizeof(con->v1.out_temp_ack), 209a56dd9bfSIlya Dryomov &con->v1.out_temp_ack); 2102f713615SIlya Dryomov } 2112f713615SIlya Dryomov 2122f713615SIlya Dryomov ceph_con_get_out_msg(con); 2132f713615SIlya Dryomov m = con->out_msg; 2142f713615SIlya Dryomov 2152f713615SIlya Dryomov dout("prepare_write_message %p seq %lld type %d len %d+%d+%zd\n", 2162f713615SIlya Dryomov m, con->out_seq, le16_to_cpu(m->hdr.type), 2172f713615SIlya Dryomov le32_to_cpu(m->hdr.front_len), le32_to_cpu(m->hdr.middle_len), 2182f713615SIlya Dryomov m->data_length); 2192f713615SIlya Dryomov WARN_ON(m->front.iov_len != le32_to_cpu(m->hdr.front_len)); 2202f713615SIlya Dryomov WARN_ON(m->data_length != le32_to_cpu(m->hdr.data_len)); 2212f713615SIlya Dryomov 2222f713615SIlya Dryomov /* tag + hdr + front + middle */ 2232f713615SIlya Dryomov con_out_kvec_add(con, sizeof (tag_msg), &tag_msg); 224a56dd9bfSIlya Dryomov con_out_kvec_add(con, sizeof(con->v1.out_hdr), &con->v1.out_hdr); 2252f713615SIlya Dryomov con_out_kvec_add(con, m->front.iov_len, m->front.iov_base); 2262f713615SIlya Dryomov 2272f713615SIlya Dryomov if (m->middle) 2282f713615SIlya Dryomov con_out_kvec_add(con, m->middle->vec.iov_len, 2292f713615SIlya Dryomov m->middle->vec.iov_base); 2302f713615SIlya Dryomov 2312f713615SIlya Dryomov /* fill in hdr crc and finalize hdr */ 2322f713615SIlya Dryomov crc = crc32c(0, &m->hdr, offsetof(struct ceph_msg_header, crc)); 2332f713615SIlya Dryomov con->out_msg->hdr.crc = cpu_to_le32(crc); 234a56dd9bfSIlya Dryomov memcpy(&con->v1.out_hdr, &con->out_msg->hdr, sizeof(con->v1.out_hdr)); 2352f713615SIlya Dryomov 2362f713615SIlya Dryomov /* fill in front and middle crc, footer */ 2372f713615SIlya Dryomov crc = crc32c(0, m->front.iov_base, m->front.iov_len); 2382f713615SIlya Dryomov con->out_msg->footer.front_crc = cpu_to_le32(crc); 2392f713615SIlya Dryomov if (m->middle) { 2402f713615SIlya Dryomov crc = crc32c(0, m->middle->vec.iov_base, 2412f713615SIlya Dryomov m->middle->vec.iov_len); 2422f713615SIlya Dryomov con->out_msg->footer.middle_crc = cpu_to_le32(crc); 2432f713615SIlya Dryomov } else 2442f713615SIlya Dryomov con->out_msg->footer.middle_crc = 0; 2452f713615SIlya Dryomov dout("%s front_crc %u middle_crc %u\n", __func__, 2462f713615SIlya Dryomov le32_to_cpu(con->out_msg->footer.front_crc), 2472f713615SIlya Dryomov le32_to_cpu(con->out_msg->footer.middle_crc)); 2482f713615SIlya Dryomov con->out_msg->footer.flags = 0; 2492f713615SIlya Dryomov 2502f713615SIlya Dryomov /* is there a data payload? */ 2512f713615SIlya Dryomov con->out_msg->footer.data_crc = 0; 2522f713615SIlya Dryomov if (m->data_length) { 2532f713615SIlya Dryomov prepare_message_data(con->out_msg, m->data_length); 254a56dd9bfSIlya Dryomov con->v1.out_more = 1; /* data + footer will follow */ 2552f713615SIlya Dryomov } else { 2562f713615SIlya Dryomov /* no, queue up footer too and be done */ 2572f713615SIlya Dryomov prepare_write_message_footer(con); 2582f713615SIlya Dryomov } 2592f713615SIlya Dryomov 2602f713615SIlya Dryomov ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING); 2612f713615SIlya Dryomov } 2622f713615SIlya Dryomov 2632f713615SIlya Dryomov /* 2642f713615SIlya Dryomov * Prepare an ack. 2652f713615SIlya Dryomov */ 2662f713615SIlya Dryomov static void prepare_write_ack(struct ceph_connection *con) 2672f713615SIlya Dryomov { 2682f713615SIlya Dryomov dout("prepare_write_ack %p %llu -> %llu\n", con, 2692f713615SIlya Dryomov con->in_seq_acked, con->in_seq); 2702f713615SIlya Dryomov con->in_seq_acked = con->in_seq; 2712f713615SIlya Dryomov 2722f713615SIlya Dryomov con_out_kvec_reset(con); 2732f713615SIlya Dryomov 2742f713615SIlya Dryomov con_out_kvec_add(con, sizeof (tag_ack), &tag_ack); 2752f713615SIlya Dryomov 276a56dd9bfSIlya Dryomov con->v1.out_temp_ack = cpu_to_le64(con->in_seq_acked); 277a56dd9bfSIlya Dryomov con_out_kvec_add(con, sizeof(con->v1.out_temp_ack), 278a56dd9bfSIlya Dryomov &con->v1.out_temp_ack); 2792f713615SIlya Dryomov 280a56dd9bfSIlya Dryomov con->v1.out_more = 1; /* more will follow.. eventually.. */ 2812f713615SIlya Dryomov ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING); 2822f713615SIlya Dryomov } 2832f713615SIlya Dryomov 2842f713615SIlya Dryomov /* 2852f713615SIlya Dryomov * Prepare to share the seq during handshake 2862f713615SIlya Dryomov */ 2872f713615SIlya Dryomov static void prepare_write_seq(struct ceph_connection *con) 2882f713615SIlya Dryomov { 2892f713615SIlya Dryomov dout("prepare_write_seq %p %llu -> %llu\n", con, 2902f713615SIlya Dryomov con->in_seq_acked, con->in_seq); 2912f713615SIlya Dryomov con->in_seq_acked = con->in_seq; 2922f713615SIlya Dryomov 2932f713615SIlya Dryomov con_out_kvec_reset(con); 2942f713615SIlya Dryomov 295a56dd9bfSIlya Dryomov con->v1.out_temp_ack = cpu_to_le64(con->in_seq_acked); 296a56dd9bfSIlya Dryomov con_out_kvec_add(con, sizeof(con->v1.out_temp_ack), 297a56dd9bfSIlya Dryomov &con->v1.out_temp_ack); 2982f713615SIlya Dryomov 2992f713615SIlya Dryomov ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING); 3002f713615SIlya Dryomov } 3012f713615SIlya Dryomov 3022f713615SIlya Dryomov /* 3032f713615SIlya Dryomov * Prepare to write keepalive byte. 3042f713615SIlya Dryomov */ 3052f713615SIlya Dryomov static void prepare_write_keepalive(struct ceph_connection *con) 3062f713615SIlya Dryomov { 3072f713615SIlya Dryomov dout("prepare_write_keepalive %p\n", con); 3082f713615SIlya Dryomov con_out_kvec_reset(con); 3092f713615SIlya Dryomov if (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2) { 3102f713615SIlya Dryomov struct timespec64 now; 3112f713615SIlya Dryomov 3122f713615SIlya Dryomov ktime_get_real_ts64(&now); 3132f713615SIlya Dryomov con_out_kvec_add(con, sizeof(tag_keepalive2), &tag_keepalive2); 314a56dd9bfSIlya Dryomov ceph_encode_timespec64(&con->v1.out_temp_keepalive2, &now); 315a56dd9bfSIlya Dryomov con_out_kvec_add(con, sizeof(con->v1.out_temp_keepalive2), 316a56dd9bfSIlya Dryomov &con->v1.out_temp_keepalive2); 3172f713615SIlya Dryomov } else { 3182f713615SIlya Dryomov con_out_kvec_add(con, sizeof(tag_keepalive), &tag_keepalive); 3192f713615SIlya Dryomov } 3202f713615SIlya Dryomov ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING); 3212f713615SIlya Dryomov } 3222f713615SIlya Dryomov 3232f713615SIlya Dryomov /* 3242f713615SIlya Dryomov * Connection negotiation. 3252f713615SIlya Dryomov */ 3262f713615SIlya Dryomov 3272f713615SIlya Dryomov static int get_connect_authorizer(struct ceph_connection *con) 3282f713615SIlya Dryomov { 3292f713615SIlya Dryomov struct ceph_auth_handshake *auth; 3302f713615SIlya Dryomov int auth_proto; 3312f713615SIlya Dryomov 3322f713615SIlya Dryomov if (!con->ops->get_authorizer) { 333a56dd9bfSIlya Dryomov con->v1.auth = NULL; 334a56dd9bfSIlya Dryomov con->v1.out_connect.authorizer_protocol = CEPH_AUTH_UNKNOWN; 335a56dd9bfSIlya Dryomov con->v1.out_connect.authorizer_len = 0; 3362f713615SIlya Dryomov return 0; 3372f713615SIlya Dryomov } 3382f713615SIlya Dryomov 339a56dd9bfSIlya Dryomov auth = con->ops->get_authorizer(con, &auth_proto, con->v1.auth_retry); 3402f713615SIlya Dryomov if (IS_ERR(auth)) 3412f713615SIlya Dryomov return PTR_ERR(auth); 3422f713615SIlya Dryomov 343a56dd9bfSIlya Dryomov con->v1.auth = auth; 344a56dd9bfSIlya Dryomov con->v1.out_connect.authorizer_protocol = cpu_to_le32(auth_proto); 345a56dd9bfSIlya Dryomov con->v1.out_connect.authorizer_len = 346a56dd9bfSIlya Dryomov cpu_to_le32(auth->authorizer_buf_len); 3472f713615SIlya Dryomov return 0; 3482f713615SIlya Dryomov } 3492f713615SIlya Dryomov 3502f713615SIlya Dryomov /* 3512f713615SIlya Dryomov * We connected to a peer and are saying hello. 3522f713615SIlya Dryomov */ 3532f713615SIlya Dryomov static void prepare_write_banner(struct ceph_connection *con) 3542f713615SIlya Dryomov { 3552f713615SIlya Dryomov con_out_kvec_add(con, strlen(CEPH_BANNER), CEPH_BANNER); 3562f713615SIlya Dryomov con_out_kvec_add(con, sizeof (con->msgr->my_enc_addr), 3572f713615SIlya Dryomov &con->msgr->my_enc_addr); 3582f713615SIlya Dryomov 359a56dd9bfSIlya Dryomov con->v1.out_more = 0; 3602f713615SIlya Dryomov ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING); 3612f713615SIlya Dryomov } 3622f713615SIlya Dryomov 3632f713615SIlya Dryomov static void __prepare_write_connect(struct ceph_connection *con) 3642f713615SIlya Dryomov { 365a56dd9bfSIlya Dryomov con_out_kvec_add(con, sizeof(con->v1.out_connect), 366a56dd9bfSIlya Dryomov &con->v1.out_connect); 367a56dd9bfSIlya Dryomov if (con->v1.auth) 368a56dd9bfSIlya Dryomov con_out_kvec_add(con, con->v1.auth->authorizer_buf_len, 369a56dd9bfSIlya Dryomov con->v1.auth->authorizer_buf); 3702f713615SIlya Dryomov 371a56dd9bfSIlya Dryomov con->v1.out_more = 0; 3722f713615SIlya Dryomov ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING); 3732f713615SIlya Dryomov } 3742f713615SIlya Dryomov 3752f713615SIlya Dryomov static int prepare_write_connect(struct ceph_connection *con) 3762f713615SIlya Dryomov { 3772f713615SIlya Dryomov unsigned int global_seq = ceph_get_global_seq(con->msgr, 0); 3782f713615SIlya Dryomov int proto; 3792f713615SIlya Dryomov int ret; 3802f713615SIlya Dryomov 3812f713615SIlya Dryomov switch (con->peer_name.type) { 3822f713615SIlya Dryomov case CEPH_ENTITY_TYPE_MON: 3832f713615SIlya Dryomov proto = CEPH_MONC_PROTOCOL; 3842f713615SIlya Dryomov break; 3852f713615SIlya Dryomov case CEPH_ENTITY_TYPE_OSD: 3862f713615SIlya Dryomov proto = CEPH_OSDC_PROTOCOL; 3872f713615SIlya Dryomov break; 3882f713615SIlya Dryomov case CEPH_ENTITY_TYPE_MDS: 3892f713615SIlya Dryomov proto = CEPH_MDSC_PROTOCOL; 3902f713615SIlya Dryomov break; 3912f713615SIlya Dryomov default: 3922f713615SIlya Dryomov BUG(); 3932f713615SIlya Dryomov } 3942f713615SIlya Dryomov 3952f713615SIlya Dryomov dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con, 396a56dd9bfSIlya Dryomov con->v1.connect_seq, global_seq, proto); 3972f713615SIlya Dryomov 398a56dd9bfSIlya Dryomov con->v1.out_connect.features = 3992f713615SIlya Dryomov cpu_to_le64(from_msgr(con->msgr)->supported_features); 400a56dd9bfSIlya Dryomov con->v1.out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT); 401a56dd9bfSIlya Dryomov con->v1.out_connect.connect_seq = cpu_to_le32(con->v1.connect_seq); 402a56dd9bfSIlya Dryomov con->v1.out_connect.global_seq = cpu_to_le32(global_seq); 403a56dd9bfSIlya Dryomov con->v1.out_connect.protocol_version = cpu_to_le32(proto); 404a56dd9bfSIlya Dryomov con->v1.out_connect.flags = 0; 4052f713615SIlya Dryomov 4062f713615SIlya Dryomov ret = get_connect_authorizer(con); 4072f713615SIlya Dryomov if (ret) 4082f713615SIlya Dryomov return ret; 4092f713615SIlya Dryomov 4102f713615SIlya Dryomov __prepare_write_connect(con); 4112f713615SIlya Dryomov return 0; 4122f713615SIlya Dryomov } 4132f713615SIlya Dryomov 4142f713615SIlya Dryomov /* 4152f713615SIlya Dryomov * write as much of pending kvecs to the socket as we can. 4162f713615SIlya Dryomov * 1 -> done 4172f713615SIlya Dryomov * 0 -> socket full, but more to do 4182f713615SIlya Dryomov * <0 -> error 4192f713615SIlya Dryomov */ 4202f713615SIlya Dryomov static int write_partial_kvec(struct ceph_connection *con) 4212f713615SIlya Dryomov { 4222f713615SIlya Dryomov int ret; 4232f713615SIlya Dryomov 424a56dd9bfSIlya Dryomov dout("write_partial_kvec %p %d left\n", con, con->v1.out_kvec_bytes); 425a56dd9bfSIlya Dryomov while (con->v1.out_kvec_bytes > 0) { 426a56dd9bfSIlya Dryomov ret = ceph_tcp_sendmsg(con->sock, con->v1.out_kvec_cur, 427a56dd9bfSIlya Dryomov con->v1.out_kvec_left, 428a56dd9bfSIlya Dryomov con->v1.out_kvec_bytes, 429a56dd9bfSIlya Dryomov con->v1.out_more); 4302f713615SIlya Dryomov if (ret <= 0) 4312f713615SIlya Dryomov goto out; 432a56dd9bfSIlya Dryomov con->v1.out_kvec_bytes -= ret; 433a56dd9bfSIlya Dryomov if (!con->v1.out_kvec_bytes) 4342f713615SIlya Dryomov break; /* done */ 4352f713615SIlya Dryomov 4362f713615SIlya Dryomov /* account for full iov entries consumed */ 437a56dd9bfSIlya Dryomov while (ret >= con->v1.out_kvec_cur->iov_len) { 438a56dd9bfSIlya Dryomov BUG_ON(!con->v1.out_kvec_left); 439a56dd9bfSIlya Dryomov ret -= con->v1.out_kvec_cur->iov_len; 440a56dd9bfSIlya Dryomov con->v1.out_kvec_cur++; 441a56dd9bfSIlya Dryomov con->v1.out_kvec_left--; 4422f713615SIlya Dryomov } 4432f713615SIlya Dryomov /* and for a partially-consumed entry */ 4442f713615SIlya Dryomov if (ret) { 445a56dd9bfSIlya Dryomov con->v1.out_kvec_cur->iov_len -= ret; 446a56dd9bfSIlya Dryomov con->v1.out_kvec_cur->iov_base += ret; 4472f713615SIlya Dryomov } 4482f713615SIlya Dryomov } 449a56dd9bfSIlya Dryomov con->v1.out_kvec_left = 0; 4502f713615SIlya Dryomov ret = 1; 4512f713615SIlya Dryomov out: 4522f713615SIlya Dryomov dout("write_partial_kvec %p %d left in %d kvecs ret = %d\n", con, 453a56dd9bfSIlya Dryomov con->v1.out_kvec_bytes, con->v1.out_kvec_left, ret); 4542f713615SIlya Dryomov return ret; /* done! */ 4552f713615SIlya Dryomov } 4562f713615SIlya Dryomov 4572f713615SIlya Dryomov /* 4582f713615SIlya Dryomov * Write as much message data payload as we can. If we finish, queue 4592f713615SIlya Dryomov * up the footer. 4602f713615SIlya Dryomov * 1 -> done, footer is now queued in out_kvec[]. 4612f713615SIlya Dryomov * 0 -> socket full, but more to do 4622f713615SIlya Dryomov * <0 -> error 4632f713615SIlya Dryomov */ 4642f713615SIlya Dryomov static int write_partial_message_data(struct ceph_connection *con) 4652f713615SIlya Dryomov { 4662f713615SIlya Dryomov struct ceph_msg *msg = con->out_msg; 4672f713615SIlya Dryomov struct ceph_msg_data_cursor *cursor = &msg->cursor; 4682f713615SIlya Dryomov bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC); 4692f713615SIlya Dryomov u32 crc; 4702f713615SIlya Dryomov 4712f713615SIlya Dryomov dout("%s %p msg %p\n", __func__, con, msg); 4722f713615SIlya Dryomov 4732f713615SIlya Dryomov if (!msg->num_data_items) 4742f713615SIlya Dryomov return -EINVAL; 4752f713615SIlya Dryomov 4762f713615SIlya Dryomov /* 4772f713615SIlya Dryomov * Iterate through each page that contains data to be 4782f713615SIlya Dryomov * written, and send as much as possible for each. 4792f713615SIlya Dryomov * 4802f713615SIlya Dryomov * If we are calculating the data crc (the default), we will 4812f713615SIlya Dryomov * need to map the page. If we have no pages, they have 4822f713615SIlya Dryomov * been revoked, so use the zero page. 4832f713615SIlya Dryomov */ 4842f713615SIlya Dryomov crc = do_datacrc ? le32_to_cpu(msg->footer.data_crc) : 0; 4852f713615SIlya Dryomov while (cursor->total_resid) { 4862f713615SIlya Dryomov struct page *page; 4872f713615SIlya Dryomov size_t page_offset; 4882f713615SIlya Dryomov size_t length; 4892f713615SIlya Dryomov int ret; 4902f713615SIlya Dryomov 4912f713615SIlya Dryomov if (!cursor->resid) { 4922f713615SIlya Dryomov ceph_msg_data_advance(cursor, 0); 4932f713615SIlya Dryomov continue; 4942f713615SIlya Dryomov } 4952f713615SIlya Dryomov 496da4ab869SJeff Layton page = ceph_msg_data_next(cursor, &page_offset, &length); 4975da4d7b8SDavid Howells ret = ceph_tcp_sendpage(con->sock, page, page_offset, length, 4985da4d7b8SDavid Howells MSG_MORE); 4992f713615SIlya Dryomov if (ret <= 0) { 5002f713615SIlya Dryomov if (do_datacrc) 5012f713615SIlya Dryomov msg->footer.data_crc = cpu_to_le32(crc); 5022f713615SIlya Dryomov 5032f713615SIlya Dryomov return ret; 5042f713615SIlya Dryomov } 5052f713615SIlya Dryomov if (do_datacrc && cursor->need_crc) 5062f713615SIlya Dryomov crc = ceph_crc32c_page(crc, page, page_offset, length); 5072f713615SIlya Dryomov ceph_msg_data_advance(cursor, (size_t)ret); 5082f713615SIlya Dryomov } 5092f713615SIlya Dryomov 5102f713615SIlya Dryomov dout("%s %p msg %p done\n", __func__, con, msg); 5112f713615SIlya Dryomov 5122f713615SIlya Dryomov /* prepare and queue up footer, too */ 5132f713615SIlya Dryomov if (do_datacrc) 5142f713615SIlya Dryomov msg->footer.data_crc = cpu_to_le32(crc); 5152f713615SIlya Dryomov else 5162f713615SIlya Dryomov msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC; 5172f713615SIlya Dryomov con_out_kvec_reset(con); 5182f713615SIlya Dryomov prepare_write_message_footer(con); 5192f713615SIlya Dryomov 5202f713615SIlya Dryomov return 1; /* must return > 0 to indicate success */ 5212f713615SIlya Dryomov } 5222f713615SIlya Dryomov 5232f713615SIlya Dryomov /* 5242f713615SIlya Dryomov * write some zeros 5252f713615SIlya Dryomov */ 5262f713615SIlya Dryomov static int write_partial_skip(struct ceph_connection *con) 5272f713615SIlya Dryomov { 5282f713615SIlya Dryomov int ret; 5292f713615SIlya Dryomov 530a56dd9bfSIlya Dryomov dout("%s %p %d left\n", __func__, con, con->v1.out_skip); 531a56dd9bfSIlya Dryomov while (con->v1.out_skip > 0) { 532a56dd9bfSIlya Dryomov size_t size = min(con->v1.out_skip, (int)PAGE_SIZE); 5332f713615SIlya Dryomov 5345da4d7b8SDavid Howells ret = ceph_tcp_sendpage(con->sock, ceph_zero_page, 0, size, 5355da4d7b8SDavid Howells MSG_MORE); 5362f713615SIlya Dryomov if (ret <= 0) 5372f713615SIlya Dryomov goto out; 538a56dd9bfSIlya Dryomov con->v1.out_skip -= ret; 5392f713615SIlya Dryomov } 5402f713615SIlya Dryomov ret = 1; 5412f713615SIlya Dryomov out: 5422f713615SIlya Dryomov return ret; 5432f713615SIlya Dryomov } 5442f713615SIlya Dryomov 5452f713615SIlya Dryomov /* 5462f713615SIlya Dryomov * Prepare to read connection handshake, or an ack. 5472f713615SIlya Dryomov */ 5482f713615SIlya Dryomov static void prepare_read_banner(struct ceph_connection *con) 5492f713615SIlya Dryomov { 5502f713615SIlya Dryomov dout("prepare_read_banner %p\n", con); 551a56dd9bfSIlya Dryomov con->v1.in_base_pos = 0; 5522f713615SIlya Dryomov } 5532f713615SIlya Dryomov 5542f713615SIlya Dryomov static void prepare_read_connect(struct ceph_connection *con) 5552f713615SIlya Dryomov { 5562f713615SIlya Dryomov dout("prepare_read_connect %p\n", con); 557a56dd9bfSIlya Dryomov con->v1.in_base_pos = 0; 5582f713615SIlya Dryomov } 5592f713615SIlya Dryomov 5602f713615SIlya Dryomov static void prepare_read_ack(struct ceph_connection *con) 5612f713615SIlya Dryomov { 5622f713615SIlya Dryomov dout("prepare_read_ack %p\n", con); 563a56dd9bfSIlya Dryomov con->v1.in_base_pos = 0; 5642f713615SIlya Dryomov } 5652f713615SIlya Dryomov 5662f713615SIlya Dryomov static void prepare_read_seq(struct ceph_connection *con) 5672f713615SIlya Dryomov { 5682f713615SIlya Dryomov dout("prepare_read_seq %p\n", con); 569a56dd9bfSIlya Dryomov con->v1.in_base_pos = 0; 570a56dd9bfSIlya Dryomov con->v1.in_tag = CEPH_MSGR_TAG_SEQ; 5712f713615SIlya Dryomov } 5722f713615SIlya Dryomov 5732f713615SIlya Dryomov static void prepare_read_tag(struct ceph_connection *con) 5742f713615SIlya Dryomov { 5752f713615SIlya Dryomov dout("prepare_read_tag %p\n", con); 576a56dd9bfSIlya Dryomov con->v1.in_base_pos = 0; 577a56dd9bfSIlya Dryomov con->v1.in_tag = CEPH_MSGR_TAG_READY; 5782f713615SIlya Dryomov } 5792f713615SIlya Dryomov 5802f713615SIlya Dryomov static void prepare_read_keepalive_ack(struct ceph_connection *con) 5812f713615SIlya Dryomov { 5822f713615SIlya Dryomov dout("prepare_read_keepalive_ack %p\n", con); 583a56dd9bfSIlya Dryomov con->v1.in_base_pos = 0; 5842f713615SIlya Dryomov } 5852f713615SIlya Dryomov 5862f713615SIlya Dryomov /* 5872f713615SIlya Dryomov * Prepare to read a message. 5882f713615SIlya Dryomov */ 5892f713615SIlya Dryomov static int prepare_read_message(struct ceph_connection *con) 5902f713615SIlya Dryomov { 5912f713615SIlya Dryomov dout("prepare_read_message %p\n", con); 5922f713615SIlya Dryomov BUG_ON(con->in_msg != NULL); 593a56dd9bfSIlya Dryomov con->v1.in_base_pos = 0; 5942f713615SIlya Dryomov con->in_front_crc = con->in_middle_crc = con->in_data_crc = 0; 5952f713615SIlya Dryomov return 0; 5962f713615SIlya Dryomov } 5972f713615SIlya Dryomov 5982f713615SIlya Dryomov static int read_partial(struct ceph_connection *con, 5992f713615SIlya Dryomov int end, int size, void *object) 6002f713615SIlya Dryomov { 601a56dd9bfSIlya Dryomov while (con->v1.in_base_pos < end) { 602a56dd9bfSIlya Dryomov int left = end - con->v1.in_base_pos; 6032f713615SIlya Dryomov int have = size - left; 6042f713615SIlya Dryomov int ret = ceph_tcp_recvmsg(con->sock, object + have, left); 6052f713615SIlya Dryomov if (ret <= 0) 6062f713615SIlya Dryomov return ret; 607a56dd9bfSIlya Dryomov con->v1.in_base_pos += ret; 6082f713615SIlya Dryomov } 6092f713615SIlya Dryomov return 1; 6102f713615SIlya Dryomov } 6112f713615SIlya Dryomov 6122f713615SIlya Dryomov /* 6132f713615SIlya Dryomov * Read all or part of the connect-side handshake on a new connection 6142f713615SIlya Dryomov */ 6152f713615SIlya Dryomov static int read_partial_banner(struct ceph_connection *con) 6162f713615SIlya Dryomov { 6172f713615SIlya Dryomov int size; 6182f713615SIlya Dryomov int end; 6192f713615SIlya Dryomov int ret; 6202f713615SIlya Dryomov 621a56dd9bfSIlya Dryomov dout("read_partial_banner %p at %d\n", con, con->v1.in_base_pos); 6222f713615SIlya Dryomov 6232f713615SIlya Dryomov /* peer's banner */ 6242f713615SIlya Dryomov size = strlen(CEPH_BANNER); 6252f713615SIlya Dryomov end = size; 626a56dd9bfSIlya Dryomov ret = read_partial(con, end, size, con->v1.in_banner); 6272f713615SIlya Dryomov if (ret <= 0) 6282f713615SIlya Dryomov goto out; 6292f713615SIlya Dryomov 630a56dd9bfSIlya Dryomov size = sizeof(con->v1.actual_peer_addr); 6312f713615SIlya Dryomov end += size; 632a56dd9bfSIlya Dryomov ret = read_partial(con, end, size, &con->v1.actual_peer_addr); 6332f713615SIlya Dryomov if (ret <= 0) 6342f713615SIlya Dryomov goto out; 635a56dd9bfSIlya Dryomov ceph_decode_banner_addr(&con->v1.actual_peer_addr); 6362f713615SIlya Dryomov 637a56dd9bfSIlya Dryomov size = sizeof(con->v1.peer_addr_for_me); 6382f713615SIlya Dryomov end += size; 639a56dd9bfSIlya Dryomov ret = read_partial(con, end, size, &con->v1.peer_addr_for_me); 6402f713615SIlya Dryomov if (ret <= 0) 6412f713615SIlya Dryomov goto out; 642a56dd9bfSIlya Dryomov ceph_decode_banner_addr(&con->v1.peer_addr_for_me); 6432f713615SIlya Dryomov 6442f713615SIlya Dryomov out: 6452f713615SIlya Dryomov return ret; 6462f713615SIlya Dryomov } 6472f713615SIlya Dryomov 6482f713615SIlya Dryomov static int read_partial_connect(struct ceph_connection *con) 6492f713615SIlya Dryomov { 6502f713615SIlya Dryomov int size; 6512f713615SIlya Dryomov int end; 6522f713615SIlya Dryomov int ret; 6532f713615SIlya Dryomov 654a56dd9bfSIlya Dryomov dout("read_partial_connect %p at %d\n", con, con->v1.in_base_pos); 6552f713615SIlya Dryomov 656a56dd9bfSIlya Dryomov size = sizeof(con->v1.in_reply); 6572f713615SIlya Dryomov end = size; 658a56dd9bfSIlya Dryomov ret = read_partial(con, end, size, &con->v1.in_reply); 6592f713615SIlya Dryomov if (ret <= 0) 6602f713615SIlya Dryomov goto out; 6612f713615SIlya Dryomov 662a56dd9bfSIlya Dryomov if (con->v1.auth) { 663a56dd9bfSIlya Dryomov size = le32_to_cpu(con->v1.in_reply.authorizer_len); 664a56dd9bfSIlya Dryomov if (size > con->v1.auth->authorizer_reply_buf_len) { 6652f713615SIlya Dryomov pr_err("authorizer reply too big: %d > %zu\n", size, 666a56dd9bfSIlya Dryomov con->v1.auth->authorizer_reply_buf_len); 6672f713615SIlya Dryomov ret = -EINVAL; 6682f713615SIlya Dryomov goto out; 6692f713615SIlya Dryomov } 6702f713615SIlya Dryomov 6712f713615SIlya Dryomov end += size; 6722f713615SIlya Dryomov ret = read_partial(con, end, size, 673a56dd9bfSIlya Dryomov con->v1.auth->authorizer_reply_buf); 6742f713615SIlya Dryomov if (ret <= 0) 6752f713615SIlya Dryomov goto out; 6762f713615SIlya Dryomov } 6772f713615SIlya Dryomov 6782f713615SIlya Dryomov dout("read_partial_connect %p tag %d, con_seq = %u, g_seq = %u\n", 679a56dd9bfSIlya Dryomov con, con->v1.in_reply.tag, 680a56dd9bfSIlya Dryomov le32_to_cpu(con->v1.in_reply.connect_seq), 681a56dd9bfSIlya Dryomov le32_to_cpu(con->v1.in_reply.global_seq)); 6822f713615SIlya Dryomov out: 6832f713615SIlya Dryomov return ret; 6842f713615SIlya Dryomov } 6852f713615SIlya Dryomov 6862f713615SIlya Dryomov /* 6872f713615SIlya Dryomov * Verify the hello banner looks okay. 6882f713615SIlya Dryomov */ 6892f713615SIlya Dryomov static int verify_hello(struct ceph_connection *con) 6902f713615SIlya Dryomov { 691a56dd9bfSIlya Dryomov if (memcmp(con->v1.in_banner, CEPH_BANNER, strlen(CEPH_BANNER))) { 6922f713615SIlya Dryomov pr_err("connect to %s got bad banner\n", 6932f713615SIlya Dryomov ceph_pr_addr(&con->peer_addr)); 6942f713615SIlya Dryomov con->error_msg = "protocol error, bad banner"; 6952f713615SIlya Dryomov return -1; 6962f713615SIlya Dryomov } 6972f713615SIlya Dryomov return 0; 6982f713615SIlya Dryomov } 6992f713615SIlya Dryomov 7002f713615SIlya Dryomov static int process_banner(struct ceph_connection *con) 7012f713615SIlya Dryomov { 7022f713615SIlya Dryomov struct ceph_entity_addr *my_addr = &con->msgr->inst.addr; 7032f713615SIlya Dryomov 7042f713615SIlya Dryomov dout("process_banner on %p\n", con); 7052f713615SIlya Dryomov 7062f713615SIlya Dryomov if (verify_hello(con) < 0) 7072f713615SIlya Dryomov return -1; 7082f713615SIlya Dryomov 7092f713615SIlya Dryomov /* 7102f713615SIlya Dryomov * Make sure the other end is who we wanted. note that the other 7112f713615SIlya Dryomov * end may not yet know their ip address, so if it's 0.0.0.0, give 7122f713615SIlya Dryomov * them the benefit of the doubt. 7132f713615SIlya Dryomov */ 714a56dd9bfSIlya Dryomov if (memcmp(&con->peer_addr, &con->v1.actual_peer_addr, 7152f713615SIlya Dryomov sizeof(con->peer_addr)) != 0 && 716a56dd9bfSIlya Dryomov !(ceph_addr_is_blank(&con->v1.actual_peer_addr) && 717a56dd9bfSIlya Dryomov con->v1.actual_peer_addr.nonce == con->peer_addr.nonce)) { 7182f713615SIlya Dryomov pr_warn("wrong peer, want %s/%u, got %s/%u\n", 7192f713615SIlya Dryomov ceph_pr_addr(&con->peer_addr), 7202f713615SIlya Dryomov le32_to_cpu(con->peer_addr.nonce), 721a56dd9bfSIlya Dryomov ceph_pr_addr(&con->v1.actual_peer_addr), 722a56dd9bfSIlya Dryomov le32_to_cpu(con->v1.actual_peer_addr.nonce)); 7232f713615SIlya Dryomov con->error_msg = "wrong peer at address"; 7242f713615SIlya Dryomov return -1; 7252f713615SIlya Dryomov } 7262f713615SIlya Dryomov 7272f713615SIlya Dryomov /* 7282f713615SIlya Dryomov * did we learn our address? 7292f713615SIlya Dryomov */ 7302f713615SIlya Dryomov if (ceph_addr_is_blank(my_addr)) { 7312f713615SIlya Dryomov memcpy(&my_addr->in_addr, 732a56dd9bfSIlya Dryomov &con->v1.peer_addr_for_me.in_addr, 733a56dd9bfSIlya Dryomov sizeof(con->v1.peer_addr_for_me.in_addr)); 7342f713615SIlya Dryomov ceph_addr_set_port(my_addr, 0); 7352f713615SIlya Dryomov ceph_encode_my_addr(con->msgr); 7362f713615SIlya Dryomov dout("process_banner learned my addr is %s\n", 7372f713615SIlya Dryomov ceph_pr_addr(my_addr)); 7382f713615SIlya Dryomov } 7392f713615SIlya Dryomov 7402f713615SIlya Dryomov return 0; 7412f713615SIlya Dryomov } 7422f713615SIlya Dryomov 7432f713615SIlya Dryomov static int process_connect(struct ceph_connection *con) 7442f713615SIlya Dryomov { 7452f713615SIlya Dryomov u64 sup_feat = from_msgr(con->msgr)->supported_features; 7462f713615SIlya Dryomov u64 req_feat = from_msgr(con->msgr)->required_features; 747a56dd9bfSIlya Dryomov u64 server_feat = le64_to_cpu(con->v1.in_reply.features); 7482f713615SIlya Dryomov int ret; 7492f713615SIlya Dryomov 750a56dd9bfSIlya Dryomov dout("process_connect on %p tag %d\n", con, con->v1.in_tag); 7512f713615SIlya Dryomov 752a56dd9bfSIlya Dryomov if (con->v1.auth) { 753a56dd9bfSIlya Dryomov int len = le32_to_cpu(con->v1.in_reply.authorizer_len); 7542f713615SIlya Dryomov 7552f713615SIlya Dryomov /* 7562f713615SIlya Dryomov * Any connection that defines ->get_authorizer() 7572f713615SIlya Dryomov * should also define ->add_authorizer_challenge() and 7582f713615SIlya Dryomov * ->verify_authorizer_reply(). 7592f713615SIlya Dryomov * 7602f713615SIlya Dryomov * See get_connect_authorizer(). 7612f713615SIlya Dryomov */ 762a56dd9bfSIlya Dryomov if (con->v1.in_reply.tag == 763a56dd9bfSIlya Dryomov CEPH_MSGR_TAG_CHALLENGE_AUTHORIZER) { 7642f713615SIlya Dryomov ret = con->ops->add_authorizer_challenge( 765a56dd9bfSIlya Dryomov con, con->v1.auth->authorizer_reply_buf, len); 7662f713615SIlya Dryomov if (ret < 0) 7672f713615SIlya Dryomov return ret; 7682f713615SIlya Dryomov 7692f713615SIlya Dryomov con_out_kvec_reset(con); 7702f713615SIlya Dryomov __prepare_write_connect(con); 7712f713615SIlya Dryomov prepare_read_connect(con); 7722f713615SIlya Dryomov return 0; 7732f713615SIlya Dryomov } 7742f713615SIlya Dryomov 7752f713615SIlya Dryomov if (len) { 7762f713615SIlya Dryomov ret = con->ops->verify_authorizer_reply(con); 7772f713615SIlya Dryomov if (ret < 0) { 7782f713615SIlya Dryomov con->error_msg = "bad authorize reply"; 7792f713615SIlya Dryomov return ret; 7802f713615SIlya Dryomov } 7812f713615SIlya Dryomov } 7822f713615SIlya Dryomov } 7832f713615SIlya Dryomov 784a56dd9bfSIlya Dryomov switch (con->v1.in_reply.tag) { 7852f713615SIlya Dryomov case CEPH_MSGR_TAG_FEATURES: 7862f713615SIlya Dryomov pr_err("%s%lld %s feature set mismatch," 7872f713615SIlya Dryomov " my %llx < server's %llx, missing %llx\n", 7882f713615SIlya Dryomov ENTITY_NAME(con->peer_name), 7892f713615SIlya Dryomov ceph_pr_addr(&con->peer_addr), 7902f713615SIlya Dryomov sup_feat, server_feat, server_feat & ~sup_feat); 7912f713615SIlya Dryomov con->error_msg = "missing required protocol features"; 7922f713615SIlya Dryomov return -1; 7932f713615SIlya Dryomov 7942f713615SIlya Dryomov case CEPH_MSGR_TAG_BADPROTOVER: 7952f713615SIlya Dryomov pr_err("%s%lld %s protocol version mismatch," 7962f713615SIlya Dryomov " my %d != server's %d\n", 7972f713615SIlya Dryomov ENTITY_NAME(con->peer_name), 7982f713615SIlya Dryomov ceph_pr_addr(&con->peer_addr), 799a56dd9bfSIlya Dryomov le32_to_cpu(con->v1.out_connect.protocol_version), 800a56dd9bfSIlya Dryomov le32_to_cpu(con->v1.in_reply.protocol_version)); 8012f713615SIlya Dryomov con->error_msg = "protocol version mismatch"; 8022f713615SIlya Dryomov return -1; 8032f713615SIlya Dryomov 8042f713615SIlya Dryomov case CEPH_MSGR_TAG_BADAUTHORIZER: 805a56dd9bfSIlya Dryomov con->v1.auth_retry++; 8062f713615SIlya Dryomov dout("process_connect %p got BADAUTHORIZER attempt %d\n", con, 807a56dd9bfSIlya Dryomov con->v1.auth_retry); 808a56dd9bfSIlya Dryomov if (con->v1.auth_retry == 2) { 8092f713615SIlya Dryomov con->error_msg = "connect authorization failure"; 8102f713615SIlya Dryomov return -1; 8112f713615SIlya Dryomov } 8122f713615SIlya Dryomov con_out_kvec_reset(con); 8132f713615SIlya Dryomov ret = prepare_write_connect(con); 8142f713615SIlya Dryomov if (ret < 0) 8152f713615SIlya Dryomov return ret; 8162f713615SIlya Dryomov prepare_read_connect(con); 8172f713615SIlya Dryomov break; 8182f713615SIlya Dryomov 8192f713615SIlya Dryomov case CEPH_MSGR_TAG_RESETSESSION: 8202f713615SIlya Dryomov /* 8212f713615SIlya Dryomov * If we connected with a large connect_seq but the peer 8222f713615SIlya Dryomov * has no record of a session with us (no connection, or 8232f713615SIlya Dryomov * connect_seq == 0), they will send RESETSESION to indicate 8242f713615SIlya Dryomov * that they must have reset their session, and may have 8252f713615SIlya Dryomov * dropped messages. 8262f713615SIlya Dryomov */ 8272f713615SIlya Dryomov dout("process_connect got RESET peer seq %u\n", 828a56dd9bfSIlya Dryomov le32_to_cpu(con->v1.in_reply.connect_seq)); 8292f713615SIlya Dryomov pr_info("%s%lld %s session reset\n", 8302f713615SIlya Dryomov ENTITY_NAME(con->peer_name), 8312f713615SIlya Dryomov ceph_pr_addr(&con->peer_addr)); 8322f713615SIlya Dryomov ceph_con_reset_session(con); 8332f713615SIlya Dryomov con_out_kvec_reset(con); 8342f713615SIlya Dryomov ret = prepare_write_connect(con); 8352f713615SIlya Dryomov if (ret < 0) 8362f713615SIlya Dryomov return ret; 8372f713615SIlya Dryomov prepare_read_connect(con); 8382f713615SIlya Dryomov 8392f713615SIlya Dryomov /* Tell ceph about it. */ 8402f713615SIlya Dryomov mutex_unlock(&con->mutex); 8412f713615SIlya Dryomov if (con->ops->peer_reset) 8422f713615SIlya Dryomov con->ops->peer_reset(con); 8432f713615SIlya Dryomov mutex_lock(&con->mutex); 8442f713615SIlya Dryomov if (con->state != CEPH_CON_S_V1_CONNECT_MSG) 8452f713615SIlya Dryomov return -EAGAIN; 8462f713615SIlya Dryomov break; 8472f713615SIlya Dryomov 8482f713615SIlya Dryomov case CEPH_MSGR_TAG_RETRY_SESSION: 8492f713615SIlya Dryomov /* 8502f713615SIlya Dryomov * If we sent a smaller connect_seq than the peer has, try 8512f713615SIlya Dryomov * again with a larger value. 8522f713615SIlya Dryomov */ 8532f713615SIlya Dryomov dout("process_connect got RETRY_SESSION my seq %u, peer %u\n", 854a56dd9bfSIlya Dryomov le32_to_cpu(con->v1.out_connect.connect_seq), 855a56dd9bfSIlya Dryomov le32_to_cpu(con->v1.in_reply.connect_seq)); 856a56dd9bfSIlya Dryomov con->v1.connect_seq = le32_to_cpu(con->v1.in_reply.connect_seq); 8572f713615SIlya Dryomov con_out_kvec_reset(con); 8582f713615SIlya Dryomov ret = prepare_write_connect(con); 8592f713615SIlya Dryomov if (ret < 0) 8602f713615SIlya Dryomov return ret; 8612f713615SIlya Dryomov prepare_read_connect(con); 8622f713615SIlya Dryomov break; 8632f713615SIlya Dryomov 8642f713615SIlya Dryomov case CEPH_MSGR_TAG_RETRY_GLOBAL: 8652f713615SIlya Dryomov /* 8662f713615SIlya Dryomov * If we sent a smaller global_seq than the peer has, try 8672f713615SIlya Dryomov * again with a larger value. 8682f713615SIlya Dryomov */ 8692f713615SIlya Dryomov dout("process_connect got RETRY_GLOBAL my %u peer_gseq %u\n", 870a56dd9bfSIlya Dryomov con->v1.peer_global_seq, 871a56dd9bfSIlya Dryomov le32_to_cpu(con->v1.in_reply.global_seq)); 8722f713615SIlya Dryomov ceph_get_global_seq(con->msgr, 873a56dd9bfSIlya Dryomov le32_to_cpu(con->v1.in_reply.global_seq)); 8742f713615SIlya Dryomov con_out_kvec_reset(con); 8752f713615SIlya Dryomov ret = prepare_write_connect(con); 8762f713615SIlya Dryomov if (ret < 0) 8772f713615SIlya Dryomov return ret; 8782f713615SIlya Dryomov prepare_read_connect(con); 8792f713615SIlya Dryomov break; 8802f713615SIlya Dryomov 8812f713615SIlya Dryomov case CEPH_MSGR_TAG_SEQ: 8822f713615SIlya Dryomov case CEPH_MSGR_TAG_READY: 8832f713615SIlya Dryomov if (req_feat & ~server_feat) { 8842f713615SIlya Dryomov pr_err("%s%lld %s protocol feature mismatch," 8852f713615SIlya Dryomov " my required %llx > server's %llx, need %llx\n", 8862f713615SIlya Dryomov ENTITY_NAME(con->peer_name), 8872f713615SIlya Dryomov ceph_pr_addr(&con->peer_addr), 8882f713615SIlya Dryomov req_feat, server_feat, req_feat & ~server_feat); 8892f713615SIlya Dryomov con->error_msg = "missing required protocol features"; 8902f713615SIlya Dryomov return -1; 8912f713615SIlya Dryomov } 8922f713615SIlya Dryomov 8932f713615SIlya Dryomov WARN_ON(con->state != CEPH_CON_S_V1_CONNECT_MSG); 8942f713615SIlya Dryomov con->state = CEPH_CON_S_OPEN; 895a56dd9bfSIlya Dryomov con->v1.auth_retry = 0; /* we authenticated; clear flag */ 896a56dd9bfSIlya Dryomov con->v1.peer_global_seq = 897a56dd9bfSIlya Dryomov le32_to_cpu(con->v1.in_reply.global_seq); 898a56dd9bfSIlya Dryomov con->v1.connect_seq++; 8992f713615SIlya Dryomov con->peer_features = server_feat; 9002f713615SIlya Dryomov dout("process_connect got READY gseq %d cseq %d (%d)\n", 901a56dd9bfSIlya Dryomov con->v1.peer_global_seq, 902a56dd9bfSIlya Dryomov le32_to_cpu(con->v1.in_reply.connect_seq), 903a56dd9bfSIlya Dryomov con->v1.connect_seq); 904a56dd9bfSIlya Dryomov WARN_ON(con->v1.connect_seq != 905a56dd9bfSIlya Dryomov le32_to_cpu(con->v1.in_reply.connect_seq)); 9062f713615SIlya Dryomov 907a56dd9bfSIlya Dryomov if (con->v1.in_reply.flags & CEPH_MSG_CONNECT_LOSSY) 9082f713615SIlya Dryomov ceph_con_flag_set(con, CEPH_CON_F_LOSSYTX); 9092f713615SIlya Dryomov 9102f713615SIlya Dryomov con->delay = 0; /* reset backoff memory */ 9112f713615SIlya Dryomov 912a56dd9bfSIlya Dryomov if (con->v1.in_reply.tag == CEPH_MSGR_TAG_SEQ) { 9132f713615SIlya Dryomov prepare_write_seq(con); 9142f713615SIlya Dryomov prepare_read_seq(con); 9152f713615SIlya Dryomov } else { 9162f713615SIlya Dryomov prepare_read_tag(con); 9172f713615SIlya Dryomov } 9182f713615SIlya Dryomov break; 9192f713615SIlya Dryomov 9202f713615SIlya Dryomov case CEPH_MSGR_TAG_WAIT: 9212f713615SIlya Dryomov /* 9222f713615SIlya Dryomov * If there is a connection race (we are opening 9232f713615SIlya Dryomov * connections to each other), one of us may just have 9242f713615SIlya Dryomov * to WAIT. This shouldn't happen if we are the 9252f713615SIlya Dryomov * client. 9262f713615SIlya Dryomov */ 9272f713615SIlya Dryomov con->error_msg = "protocol error, got WAIT as client"; 9282f713615SIlya Dryomov return -1; 9292f713615SIlya Dryomov 9302f713615SIlya Dryomov default: 9312f713615SIlya Dryomov con->error_msg = "protocol error, garbage tag during connect"; 9322f713615SIlya Dryomov return -1; 9332f713615SIlya Dryomov } 9342f713615SIlya Dryomov return 0; 9352f713615SIlya Dryomov } 9362f713615SIlya Dryomov 9372f713615SIlya Dryomov /* 9382f713615SIlya Dryomov * read (part of) an ack 9392f713615SIlya Dryomov */ 9402f713615SIlya Dryomov static int read_partial_ack(struct ceph_connection *con) 9412f713615SIlya Dryomov { 942a56dd9bfSIlya Dryomov int size = sizeof(con->v1.in_temp_ack); 9432f713615SIlya Dryomov int end = size; 9442f713615SIlya Dryomov 945a56dd9bfSIlya Dryomov return read_partial(con, end, size, &con->v1.in_temp_ack); 9462f713615SIlya Dryomov } 9472f713615SIlya Dryomov 9482f713615SIlya Dryomov /* 9492f713615SIlya Dryomov * We can finally discard anything that's been acked. 9502f713615SIlya Dryomov */ 9512f713615SIlya Dryomov static void process_ack(struct ceph_connection *con) 9522f713615SIlya Dryomov { 953a56dd9bfSIlya Dryomov u64 ack = le64_to_cpu(con->v1.in_temp_ack); 9542f713615SIlya Dryomov 955a56dd9bfSIlya Dryomov if (con->v1.in_tag == CEPH_MSGR_TAG_ACK) 9562f713615SIlya Dryomov ceph_con_discard_sent(con, ack); 9572f713615SIlya Dryomov else 9582f713615SIlya Dryomov ceph_con_discard_requeued(con, ack); 9592f713615SIlya Dryomov 9602f713615SIlya Dryomov prepare_read_tag(con); 9612f713615SIlya Dryomov } 9622f713615SIlya Dryomov 963*d396f89dSJeff Layton static int read_partial_message_chunk(struct ceph_connection *con, 9642f713615SIlya Dryomov struct kvec *section, 9652f713615SIlya Dryomov unsigned int sec_len, u32 *crc) 9662f713615SIlya Dryomov { 9672f713615SIlya Dryomov int ret, left; 9682f713615SIlya Dryomov 9692f713615SIlya Dryomov BUG_ON(!section); 9702f713615SIlya Dryomov 9712f713615SIlya Dryomov while (section->iov_len < sec_len) { 9722f713615SIlya Dryomov BUG_ON(section->iov_base == NULL); 9732f713615SIlya Dryomov left = sec_len - section->iov_len; 9742f713615SIlya Dryomov ret = ceph_tcp_recvmsg(con->sock, (char *)section->iov_base + 9752f713615SIlya Dryomov section->iov_len, left); 9762f713615SIlya Dryomov if (ret <= 0) 9772f713615SIlya Dryomov return ret; 9782f713615SIlya Dryomov section->iov_len += ret; 9792f713615SIlya Dryomov } 9802f713615SIlya Dryomov if (section->iov_len == sec_len) 981*d396f89dSJeff Layton *crc = crc32c(*crc, section->iov_base, section->iov_len); 9822f713615SIlya Dryomov 9832f713615SIlya Dryomov return 1; 9842f713615SIlya Dryomov } 9852f713615SIlya Dryomov 986*d396f89dSJeff Layton static inline int read_partial_message_section(struct ceph_connection *con, 987*d396f89dSJeff Layton struct kvec *section, 988*d396f89dSJeff Layton unsigned int sec_len, u32 *crc) 989*d396f89dSJeff Layton { 990*d396f89dSJeff Layton *crc = 0; 991*d396f89dSJeff Layton return read_partial_message_chunk(con, section, sec_len, crc); 992*d396f89dSJeff Layton } 993*d396f89dSJeff Layton 994*d396f89dSJeff Layton static int read_sparse_msg_extent(struct ceph_connection *con, u32 *crc) 995*d396f89dSJeff Layton { 996*d396f89dSJeff Layton struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor; 997*d396f89dSJeff Layton bool do_bounce = ceph_test_opt(from_msgr(con->msgr), RXBOUNCE); 998*d396f89dSJeff Layton 999*d396f89dSJeff Layton if (do_bounce && unlikely(!con->bounce_page)) { 1000*d396f89dSJeff Layton con->bounce_page = alloc_page(GFP_NOIO); 1001*d396f89dSJeff Layton if (!con->bounce_page) { 1002*d396f89dSJeff Layton pr_err("failed to allocate bounce page\n"); 1003*d396f89dSJeff Layton return -ENOMEM; 1004*d396f89dSJeff Layton } 1005*d396f89dSJeff Layton } 1006*d396f89dSJeff Layton 1007*d396f89dSJeff Layton while (cursor->sr_resid > 0) { 1008*d396f89dSJeff Layton struct page *page, *rpage; 1009*d396f89dSJeff Layton size_t off, len; 1010*d396f89dSJeff Layton int ret; 1011*d396f89dSJeff Layton 1012*d396f89dSJeff Layton page = ceph_msg_data_next(cursor, &off, &len); 1013*d396f89dSJeff Layton rpage = do_bounce ? con->bounce_page : page; 1014*d396f89dSJeff Layton 1015*d396f89dSJeff Layton /* clamp to what remains in extent */ 1016*d396f89dSJeff Layton len = min_t(int, len, cursor->sr_resid); 1017*d396f89dSJeff Layton ret = ceph_tcp_recvpage(con->sock, rpage, (int)off, len); 1018*d396f89dSJeff Layton if (ret <= 0) 1019*d396f89dSJeff Layton return ret; 1020*d396f89dSJeff Layton *crc = ceph_crc32c_page(*crc, rpage, off, ret); 1021*d396f89dSJeff Layton ceph_msg_data_advance(cursor, (size_t)ret); 1022*d396f89dSJeff Layton cursor->sr_resid -= ret; 1023*d396f89dSJeff Layton if (do_bounce) 1024*d396f89dSJeff Layton memcpy_page(page, off, rpage, off, ret); 1025*d396f89dSJeff Layton } 1026*d396f89dSJeff Layton return 1; 1027*d396f89dSJeff Layton } 1028*d396f89dSJeff Layton 1029*d396f89dSJeff Layton static int read_sparse_msg_data(struct ceph_connection *con) 1030*d396f89dSJeff Layton { 1031*d396f89dSJeff Layton struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor; 1032*d396f89dSJeff Layton bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC); 1033*d396f89dSJeff Layton u32 crc = 0; 1034*d396f89dSJeff Layton int ret = 1; 1035*d396f89dSJeff Layton 1036*d396f89dSJeff Layton if (do_datacrc) 1037*d396f89dSJeff Layton crc = con->in_data_crc; 1038*d396f89dSJeff Layton 1039*d396f89dSJeff Layton do { 1040*d396f89dSJeff Layton if (con->v1.in_sr_kvec.iov_base) 1041*d396f89dSJeff Layton ret = read_partial_message_chunk(con, 1042*d396f89dSJeff Layton &con->v1.in_sr_kvec, 1043*d396f89dSJeff Layton con->v1.in_sr_len, 1044*d396f89dSJeff Layton &crc); 1045*d396f89dSJeff Layton else if (cursor->sr_resid > 0) 1046*d396f89dSJeff Layton ret = read_sparse_msg_extent(con, &crc); 1047*d396f89dSJeff Layton 1048*d396f89dSJeff Layton if (ret <= 0) { 1049*d396f89dSJeff Layton if (do_datacrc) 1050*d396f89dSJeff Layton con->in_data_crc = crc; 1051*d396f89dSJeff Layton return ret; 1052*d396f89dSJeff Layton } 1053*d396f89dSJeff Layton 1054*d396f89dSJeff Layton memset(&con->v1.in_sr_kvec, 0, sizeof(con->v1.in_sr_kvec)); 1055*d396f89dSJeff Layton ret = con->ops->sparse_read(con, cursor, 1056*d396f89dSJeff Layton (char **)&con->v1.in_sr_kvec.iov_base); 1057*d396f89dSJeff Layton con->v1.in_sr_len = ret; 1058*d396f89dSJeff Layton } while (ret > 0); 1059*d396f89dSJeff Layton 1060*d396f89dSJeff Layton if (do_datacrc) 1061*d396f89dSJeff Layton con->in_data_crc = crc; 1062*d396f89dSJeff Layton 1063*d396f89dSJeff Layton return ret < 0 ? ret : 1; /* must return > 0 to indicate success */ 1064*d396f89dSJeff Layton } 1065*d396f89dSJeff Layton 10662f713615SIlya Dryomov static int read_partial_msg_data(struct ceph_connection *con) 10672f713615SIlya Dryomov { 1068038b8d1dSIlya Dryomov struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor; 10692f713615SIlya Dryomov bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC); 10702f713615SIlya Dryomov struct page *page; 10712f713615SIlya Dryomov size_t page_offset; 10722f713615SIlya Dryomov size_t length; 10732f713615SIlya Dryomov u32 crc = 0; 10742f713615SIlya Dryomov int ret; 10752f713615SIlya Dryomov 10762f713615SIlya Dryomov if (do_datacrc) 10772f713615SIlya Dryomov crc = con->in_data_crc; 10782f713615SIlya Dryomov while (cursor->total_resid) { 10792f713615SIlya Dryomov if (!cursor->resid) { 10802f713615SIlya Dryomov ceph_msg_data_advance(cursor, 0); 10812f713615SIlya Dryomov continue; 10822f713615SIlya Dryomov } 10832f713615SIlya Dryomov 1084da4ab869SJeff Layton page = ceph_msg_data_next(cursor, &page_offset, &length); 10852f713615SIlya Dryomov ret = ceph_tcp_recvpage(con->sock, page, page_offset, length); 10862f713615SIlya Dryomov if (ret <= 0) { 10872f713615SIlya Dryomov if (do_datacrc) 10882f713615SIlya Dryomov con->in_data_crc = crc; 10892f713615SIlya Dryomov 10902f713615SIlya Dryomov return ret; 10912f713615SIlya Dryomov } 10922f713615SIlya Dryomov 10932f713615SIlya Dryomov if (do_datacrc) 10942f713615SIlya Dryomov crc = ceph_crc32c_page(crc, page, page_offset, ret); 10952f713615SIlya Dryomov ceph_msg_data_advance(cursor, (size_t)ret); 10962f713615SIlya Dryomov } 10972f713615SIlya Dryomov if (do_datacrc) 10982f713615SIlya Dryomov con->in_data_crc = crc; 10992f713615SIlya Dryomov 11002f713615SIlya Dryomov return 1; /* must return > 0 to indicate success */ 11012f713615SIlya Dryomov } 11022f713615SIlya Dryomov 1103038b8d1dSIlya Dryomov static int read_partial_msg_data_bounce(struct ceph_connection *con) 1104038b8d1dSIlya Dryomov { 1105038b8d1dSIlya Dryomov struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor; 1106038b8d1dSIlya Dryomov struct page *page; 1107038b8d1dSIlya Dryomov size_t off, len; 1108038b8d1dSIlya Dryomov u32 crc; 1109038b8d1dSIlya Dryomov int ret; 1110038b8d1dSIlya Dryomov 1111038b8d1dSIlya Dryomov if (unlikely(!con->bounce_page)) { 1112038b8d1dSIlya Dryomov con->bounce_page = alloc_page(GFP_NOIO); 1113038b8d1dSIlya Dryomov if (!con->bounce_page) { 1114038b8d1dSIlya Dryomov pr_err("failed to allocate bounce page\n"); 1115038b8d1dSIlya Dryomov return -ENOMEM; 1116038b8d1dSIlya Dryomov } 1117038b8d1dSIlya Dryomov } 1118038b8d1dSIlya Dryomov 1119038b8d1dSIlya Dryomov crc = con->in_data_crc; 1120038b8d1dSIlya Dryomov while (cursor->total_resid) { 1121038b8d1dSIlya Dryomov if (!cursor->resid) { 1122038b8d1dSIlya Dryomov ceph_msg_data_advance(cursor, 0); 1123038b8d1dSIlya Dryomov continue; 1124038b8d1dSIlya Dryomov } 1125038b8d1dSIlya Dryomov 1126da4ab869SJeff Layton page = ceph_msg_data_next(cursor, &off, &len); 1127038b8d1dSIlya Dryomov ret = ceph_tcp_recvpage(con->sock, con->bounce_page, 0, len); 1128038b8d1dSIlya Dryomov if (ret <= 0) { 1129038b8d1dSIlya Dryomov con->in_data_crc = crc; 1130038b8d1dSIlya Dryomov return ret; 1131038b8d1dSIlya Dryomov } 1132038b8d1dSIlya Dryomov 1133038b8d1dSIlya Dryomov crc = crc32c(crc, page_address(con->bounce_page), ret); 1134038b8d1dSIlya Dryomov memcpy_to_page(page, off, page_address(con->bounce_page), ret); 1135038b8d1dSIlya Dryomov 1136038b8d1dSIlya Dryomov ceph_msg_data_advance(cursor, ret); 1137038b8d1dSIlya Dryomov } 1138038b8d1dSIlya Dryomov con->in_data_crc = crc; 1139038b8d1dSIlya Dryomov 1140038b8d1dSIlya Dryomov return 1; /* must return > 0 to indicate success */ 1141038b8d1dSIlya Dryomov } 1142038b8d1dSIlya Dryomov 11432f713615SIlya Dryomov /* 11442f713615SIlya Dryomov * read (part of) a message. 11452f713615SIlya Dryomov */ 11462f713615SIlya Dryomov static int read_partial_message(struct ceph_connection *con) 11472f713615SIlya Dryomov { 11482f713615SIlya Dryomov struct ceph_msg *m = con->in_msg; 11492f713615SIlya Dryomov int size; 11502f713615SIlya Dryomov int end; 11512f713615SIlya Dryomov int ret; 11522f713615SIlya Dryomov unsigned int front_len, middle_len, data_len; 11532f713615SIlya Dryomov bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC); 11542f713615SIlya Dryomov bool need_sign = (con->peer_features & CEPH_FEATURE_MSG_AUTH); 11552f713615SIlya Dryomov u64 seq; 11562f713615SIlya Dryomov u32 crc; 11572f713615SIlya Dryomov 11582f713615SIlya Dryomov dout("read_partial_message con %p msg %p\n", con, m); 11592f713615SIlya Dryomov 11602f713615SIlya Dryomov /* header */ 1161a56dd9bfSIlya Dryomov size = sizeof(con->v1.in_hdr); 11622f713615SIlya Dryomov end = size; 1163a56dd9bfSIlya Dryomov ret = read_partial(con, end, size, &con->v1.in_hdr); 11642f713615SIlya Dryomov if (ret <= 0) 11652f713615SIlya Dryomov return ret; 11662f713615SIlya Dryomov 1167a56dd9bfSIlya Dryomov crc = crc32c(0, &con->v1.in_hdr, offsetof(struct ceph_msg_header, crc)); 1168a56dd9bfSIlya Dryomov if (cpu_to_le32(crc) != con->v1.in_hdr.crc) { 11692f713615SIlya Dryomov pr_err("read_partial_message bad hdr crc %u != expected %u\n", 1170a56dd9bfSIlya Dryomov crc, con->v1.in_hdr.crc); 11712f713615SIlya Dryomov return -EBADMSG; 11722f713615SIlya Dryomov } 11732f713615SIlya Dryomov 1174a56dd9bfSIlya Dryomov front_len = le32_to_cpu(con->v1.in_hdr.front_len); 11752f713615SIlya Dryomov if (front_len > CEPH_MSG_MAX_FRONT_LEN) 11762f713615SIlya Dryomov return -EIO; 1177a56dd9bfSIlya Dryomov middle_len = le32_to_cpu(con->v1.in_hdr.middle_len); 11782f713615SIlya Dryomov if (middle_len > CEPH_MSG_MAX_MIDDLE_LEN) 11792f713615SIlya Dryomov return -EIO; 1180a56dd9bfSIlya Dryomov data_len = le32_to_cpu(con->v1.in_hdr.data_len); 11812f713615SIlya Dryomov if (data_len > CEPH_MSG_MAX_DATA_LEN) 11822f713615SIlya Dryomov return -EIO; 11832f713615SIlya Dryomov 11842f713615SIlya Dryomov /* verify seq# */ 1185a56dd9bfSIlya Dryomov seq = le64_to_cpu(con->v1.in_hdr.seq); 11862f713615SIlya Dryomov if ((s64)seq - (s64)con->in_seq < 1) { 11872f713615SIlya Dryomov pr_info("skipping %s%lld %s seq %lld expected %lld\n", 11882f713615SIlya Dryomov ENTITY_NAME(con->peer_name), 11892f713615SIlya Dryomov ceph_pr_addr(&con->peer_addr), 11902f713615SIlya Dryomov seq, con->in_seq + 1); 1191a56dd9bfSIlya Dryomov con->v1.in_base_pos = -front_len - middle_len - data_len - 11922f713615SIlya Dryomov sizeof_footer(con); 1193a56dd9bfSIlya Dryomov con->v1.in_tag = CEPH_MSGR_TAG_READY; 11942f713615SIlya Dryomov return 1; 11952f713615SIlya Dryomov } else if ((s64)seq - (s64)con->in_seq > 1) { 11962f713615SIlya Dryomov pr_err("read_partial_message bad seq %lld expected %lld\n", 11972f713615SIlya Dryomov seq, con->in_seq + 1); 11982f713615SIlya Dryomov con->error_msg = "bad message sequence # for incoming message"; 11992f713615SIlya Dryomov return -EBADE; 12002f713615SIlya Dryomov } 12012f713615SIlya Dryomov 12022f713615SIlya Dryomov /* allocate message? */ 12032f713615SIlya Dryomov if (!con->in_msg) { 12042f713615SIlya Dryomov int skip = 0; 12052f713615SIlya Dryomov 1206a56dd9bfSIlya Dryomov dout("got hdr type %d front %d data %d\n", con->v1.in_hdr.type, 12072f713615SIlya Dryomov front_len, data_len); 1208a56dd9bfSIlya Dryomov ret = ceph_con_in_msg_alloc(con, &con->v1.in_hdr, &skip); 12092f713615SIlya Dryomov if (ret < 0) 12102f713615SIlya Dryomov return ret; 12112f713615SIlya Dryomov 12129d5ae6f3SIlya Dryomov BUG_ON((!con->in_msg) ^ skip); 12132f713615SIlya Dryomov if (skip) { 12142f713615SIlya Dryomov /* skip this message */ 12152f713615SIlya Dryomov dout("alloc_msg said skip message\n"); 1216a56dd9bfSIlya Dryomov con->v1.in_base_pos = -front_len - middle_len - 1217a56dd9bfSIlya Dryomov data_len - sizeof_footer(con); 1218a56dd9bfSIlya Dryomov con->v1.in_tag = CEPH_MSGR_TAG_READY; 12192f713615SIlya Dryomov con->in_seq++; 12202f713615SIlya Dryomov return 1; 12212f713615SIlya Dryomov } 12222f713615SIlya Dryomov 12232f713615SIlya Dryomov BUG_ON(!con->in_msg); 12242f713615SIlya Dryomov BUG_ON(con->in_msg->con != con); 12252f713615SIlya Dryomov m = con->in_msg; 12262f713615SIlya Dryomov m->front.iov_len = 0; /* haven't read it yet */ 12272f713615SIlya Dryomov if (m->middle) 12282f713615SIlya Dryomov m->middle->vec.iov_len = 0; 12292f713615SIlya Dryomov 12302f713615SIlya Dryomov /* prepare for data payload, if any */ 12312f713615SIlya Dryomov 12322f713615SIlya Dryomov if (data_len) 12332f713615SIlya Dryomov prepare_message_data(con->in_msg, data_len); 12342f713615SIlya Dryomov } 12352f713615SIlya Dryomov 12362f713615SIlya Dryomov /* front */ 12372f713615SIlya Dryomov ret = read_partial_message_section(con, &m->front, front_len, 12382f713615SIlya Dryomov &con->in_front_crc); 12392f713615SIlya Dryomov if (ret <= 0) 12402f713615SIlya Dryomov return ret; 12412f713615SIlya Dryomov 12422f713615SIlya Dryomov /* middle */ 12432f713615SIlya Dryomov if (m->middle) { 12442f713615SIlya Dryomov ret = read_partial_message_section(con, &m->middle->vec, 12452f713615SIlya Dryomov middle_len, 12462f713615SIlya Dryomov &con->in_middle_crc); 12472f713615SIlya Dryomov if (ret <= 0) 12482f713615SIlya Dryomov return ret; 12492f713615SIlya Dryomov } 12502f713615SIlya Dryomov 12512f713615SIlya Dryomov /* (page) data */ 12522f713615SIlya Dryomov if (data_len) { 1253038b8d1dSIlya Dryomov if (!m->num_data_items) 1254038b8d1dSIlya Dryomov return -EIO; 1255038b8d1dSIlya Dryomov 1256*d396f89dSJeff Layton if (m->sparse_read) 1257*d396f89dSJeff Layton ret = read_sparse_msg_data(con); 1258*d396f89dSJeff Layton else if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) 1259038b8d1dSIlya Dryomov ret = read_partial_msg_data_bounce(con); 1260038b8d1dSIlya Dryomov else 12612f713615SIlya Dryomov ret = read_partial_msg_data(con); 12622f713615SIlya Dryomov if (ret <= 0) 12632f713615SIlya Dryomov return ret; 12642f713615SIlya Dryomov } 12652f713615SIlya Dryomov 12662f713615SIlya Dryomov /* footer */ 12672f713615SIlya Dryomov size = sizeof_footer(con); 12682f713615SIlya Dryomov end += size; 12692f713615SIlya Dryomov ret = read_partial(con, end, size, &m->footer); 12702f713615SIlya Dryomov if (ret <= 0) 12712f713615SIlya Dryomov return ret; 12722f713615SIlya Dryomov 12732f713615SIlya Dryomov if (!need_sign) { 12742f713615SIlya Dryomov m->footer.flags = m->old_footer.flags; 12752f713615SIlya Dryomov m->footer.sig = 0; 12762f713615SIlya Dryomov } 12772f713615SIlya Dryomov 12782f713615SIlya Dryomov dout("read_partial_message got msg %p %d (%u) + %d (%u) + %d (%u)\n", 12792f713615SIlya Dryomov m, front_len, m->footer.front_crc, middle_len, 12802f713615SIlya Dryomov m->footer.middle_crc, data_len, m->footer.data_crc); 12812f713615SIlya Dryomov 12822f713615SIlya Dryomov /* crc ok? */ 12832f713615SIlya Dryomov if (con->in_front_crc != le32_to_cpu(m->footer.front_crc)) { 12842f713615SIlya Dryomov pr_err("read_partial_message %p front crc %u != exp. %u\n", 12852f713615SIlya Dryomov m, con->in_front_crc, m->footer.front_crc); 12862f713615SIlya Dryomov return -EBADMSG; 12872f713615SIlya Dryomov } 12882f713615SIlya Dryomov if (con->in_middle_crc != le32_to_cpu(m->footer.middle_crc)) { 12892f713615SIlya Dryomov pr_err("read_partial_message %p middle crc %u != exp %u\n", 12902f713615SIlya Dryomov m, con->in_middle_crc, m->footer.middle_crc); 12912f713615SIlya Dryomov return -EBADMSG; 12922f713615SIlya Dryomov } 12932f713615SIlya Dryomov if (do_datacrc && 12942f713615SIlya Dryomov (m->footer.flags & CEPH_MSG_FOOTER_NOCRC) == 0 && 12952f713615SIlya Dryomov con->in_data_crc != le32_to_cpu(m->footer.data_crc)) { 12962f713615SIlya Dryomov pr_err("read_partial_message %p data crc %u != exp. %u\n", m, 12972f713615SIlya Dryomov con->in_data_crc, le32_to_cpu(m->footer.data_crc)); 12982f713615SIlya Dryomov return -EBADMSG; 12992f713615SIlya Dryomov } 13002f713615SIlya Dryomov 13012f713615SIlya Dryomov if (need_sign && con->ops->check_message_signature && 13022f713615SIlya Dryomov con->ops->check_message_signature(m)) { 13032f713615SIlya Dryomov pr_err("read_partial_message %p signature check failed\n", m); 13042f713615SIlya Dryomov return -EBADMSG; 13052f713615SIlya Dryomov } 13062f713615SIlya Dryomov 13072f713615SIlya Dryomov return 1; /* done! */ 13082f713615SIlya Dryomov } 13092f713615SIlya Dryomov 13102f713615SIlya Dryomov static int read_keepalive_ack(struct ceph_connection *con) 13112f713615SIlya Dryomov { 13122f713615SIlya Dryomov struct ceph_timespec ceph_ts; 13132f713615SIlya Dryomov size_t size = sizeof(ceph_ts); 13142f713615SIlya Dryomov int ret = read_partial(con, size, size, &ceph_ts); 13152f713615SIlya Dryomov if (ret <= 0) 13162f713615SIlya Dryomov return ret; 13172f713615SIlya Dryomov ceph_decode_timespec64(&con->last_keepalive_ack, &ceph_ts); 13182f713615SIlya Dryomov prepare_read_tag(con); 13192f713615SIlya Dryomov return 1; 13202f713615SIlya Dryomov } 13212f713615SIlya Dryomov 13222f713615SIlya Dryomov /* 13232f713615SIlya Dryomov * Read what we can from the socket. 13242f713615SIlya Dryomov */ 13252f713615SIlya Dryomov int ceph_con_v1_try_read(struct ceph_connection *con) 13262f713615SIlya Dryomov { 13272f713615SIlya Dryomov int ret = -1; 13282f713615SIlya Dryomov 13292f713615SIlya Dryomov more: 13302f713615SIlya Dryomov dout("try_read start %p state %d\n", con, con->state); 13312f713615SIlya Dryomov if (con->state != CEPH_CON_S_V1_BANNER && 13322f713615SIlya Dryomov con->state != CEPH_CON_S_V1_CONNECT_MSG && 13332f713615SIlya Dryomov con->state != CEPH_CON_S_OPEN) 13342f713615SIlya Dryomov return 0; 13352f713615SIlya Dryomov 13362f713615SIlya Dryomov BUG_ON(!con->sock); 13372f713615SIlya Dryomov 1338a56dd9bfSIlya Dryomov dout("try_read tag %d in_base_pos %d\n", con->v1.in_tag, 1339a56dd9bfSIlya Dryomov con->v1.in_base_pos); 13402f713615SIlya Dryomov 13412f713615SIlya Dryomov if (con->state == CEPH_CON_S_V1_BANNER) { 13422f713615SIlya Dryomov ret = read_partial_banner(con); 13432f713615SIlya Dryomov if (ret <= 0) 13442f713615SIlya Dryomov goto out; 13452f713615SIlya Dryomov ret = process_banner(con); 13462f713615SIlya Dryomov if (ret < 0) 13472f713615SIlya Dryomov goto out; 13482f713615SIlya Dryomov 13492f713615SIlya Dryomov con->state = CEPH_CON_S_V1_CONNECT_MSG; 13502f713615SIlya Dryomov 13512f713615SIlya Dryomov /* 13522f713615SIlya Dryomov * Received banner is good, exchange connection info. 13532f713615SIlya Dryomov * Do not reset out_kvec, as sending our banner raced 13542f713615SIlya Dryomov * with receiving peer banner after connect completed. 13552f713615SIlya Dryomov */ 13562f713615SIlya Dryomov ret = prepare_write_connect(con); 13572f713615SIlya Dryomov if (ret < 0) 13582f713615SIlya Dryomov goto out; 13592f713615SIlya Dryomov prepare_read_connect(con); 13602f713615SIlya Dryomov 13612f713615SIlya Dryomov /* Send connection info before awaiting response */ 13622f713615SIlya Dryomov goto out; 13632f713615SIlya Dryomov } 13642f713615SIlya Dryomov 13652f713615SIlya Dryomov if (con->state == CEPH_CON_S_V1_CONNECT_MSG) { 13662f713615SIlya Dryomov ret = read_partial_connect(con); 13672f713615SIlya Dryomov if (ret <= 0) 13682f713615SIlya Dryomov goto out; 13692f713615SIlya Dryomov ret = process_connect(con); 13702f713615SIlya Dryomov if (ret < 0) 13712f713615SIlya Dryomov goto out; 13722f713615SIlya Dryomov goto more; 13732f713615SIlya Dryomov } 13742f713615SIlya Dryomov 13752f713615SIlya Dryomov WARN_ON(con->state != CEPH_CON_S_OPEN); 13762f713615SIlya Dryomov 1377a56dd9bfSIlya Dryomov if (con->v1.in_base_pos < 0) { 13782f713615SIlya Dryomov /* 13792f713615SIlya Dryomov * skipping + discarding content. 13802f713615SIlya Dryomov */ 1381a56dd9bfSIlya Dryomov ret = ceph_tcp_recvmsg(con->sock, NULL, -con->v1.in_base_pos); 13822f713615SIlya Dryomov if (ret <= 0) 13832f713615SIlya Dryomov goto out; 1384a56dd9bfSIlya Dryomov dout("skipped %d / %d bytes\n", ret, -con->v1.in_base_pos); 1385a56dd9bfSIlya Dryomov con->v1.in_base_pos += ret; 1386a56dd9bfSIlya Dryomov if (con->v1.in_base_pos) 13872f713615SIlya Dryomov goto more; 13882f713615SIlya Dryomov } 1389a56dd9bfSIlya Dryomov if (con->v1.in_tag == CEPH_MSGR_TAG_READY) { 13902f713615SIlya Dryomov /* 13912f713615SIlya Dryomov * what's next? 13922f713615SIlya Dryomov */ 1393a56dd9bfSIlya Dryomov ret = ceph_tcp_recvmsg(con->sock, &con->v1.in_tag, 1); 13942f713615SIlya Dryomov if (ret <= 0) 13952f713615SIlya Dryomov goto out; 1396a56dd9bfSIlya Dryomov dout("try_read got tag %d\n", con->v1.in_tag); 1397a56dd9bfSIlya Dryomov switch (con->v1.in_tag) { 13982f713615SIlya Dryomov case CEPH_MSGR_TAG_MSG: 13992f713615SIlya Dryomov prepare_read_message(con); 14002f713615SIlya Dryomov break; 14012f713615SIlya Dryomov case CEPH_MSGR_TAG_ACK: 14022f713615SIlya Dryomov prepare_read_ack(con); 14032f713615SIlya Dryomov break; 14042f713615SIlya Dryomov case CEPH_MSGR_TAG_KEEPALIVE2_ACK: 14052f713615SIlya Dryomov prepare_read_keepalive_ack(con); 14062f713615SIlya Dryomov break; 14072f713615SIlya Dryomov case CEPH_MSGR_TAG_CLOSE: 14082f713615SIlya Dryomov ceph_con_close_socket(con); 14092f713615SIlya Dryomov con->state = CEPH_CON_S_CLOSED; 14102f713615SIlya Dryomov goto out; 14112f713615SIlya Dryomov default: 14122f713615SIlya Dryomov goto bad_tag; 14132f713615SIlya Dryomov } 14142f713615SIlya Dryomov } 1415a56dd9bfSIlya Dryomov if (con->v1.in_tag == CEPH_MSGR_TAG_MSG) { 14162f713615SIlya Dryomov ret = read_partial_message(con); 14172f713615SIlya Dryomov if (ret <= 0) { 14182f713615SIlya Dryomov switch (ret) { 14192f713615SIlya Dryomov case -EBADMSG: 14202f713615SIlya Dryomov con->error_msg = "bad crc/signature"; 14212f713615SIlya Dryomov fallthrough; 14222f713615SIlya Dryomov case -EBADE: 14232f713615SIlya Dryomov ret = -EIO; 14242f713615SIlya Dryomov break; 14252f713615SIlya Dryomov case -EIO: 14262f713615SIlya Dryomov con->error_msg = "io error"; 14272f713615SIlya Dryomov break; 14282f713615SIlya Dryomov } 14292f713615SIlya Dryomov goto out; 14302f713615SIlya Dryomov } 1431a56dd9bfSIlya Dryomov if (con->v1.in_tag == CEPH_MSGR_TAG_READY) 14322f713615SIlya Dryomov goto more; 14332f713615SIlya Dryomov ceph_con_process_message(con); 14342f713615SIlya Dryomov if (con->state == CEPH_CON_S_OPEN) 14352f713615SIlya Dryomov prepare_read_tag(con); 14362f713615SIlya Dryomov goto more; 14372f713615SIlya Dryomov } 1438a56dd9bfSIlya Dryomov if (con->v1.in_tag == CEPH_MSGR_TAG_ACK || 1439a56dd9bfSIlya Dryomov con->v1.in_tag == CEPH_MSGR_TAG_SEQ) { 14402f713615SIlya Dryomov /* 14412f713615SIlya Dryomov * the final handshake seq exchange is semantically 14422f713615SIlya Dryomov * equivalent to an ACK 14432f713615SIlya Dryomov */ 14442f713615SIlya Dryomov ret = read_partial_ack(con); 14452f713615SIlya Dryomov if (ret <= 0) 14462f713615SIlya Dryomov goto out; 14472f713615SIlya Dryomov process_ack(con); 14482f713615SIlya Dryomov goto more; 14492f713615SIlya Dryomov } 1450a56dd9bfSIlya Dryomov if (con->v1.in_tag == CEPH_MSGR_TAG_KEEPALIVE2_ACK) { 14512f713615SIlya Dryomov ret = read_keepalive_ack(con); 14522f713615SIlya Dryomov if (ret <= 0) 14532f713615SIlya Dryomov goto out; 14542f713615SIlya Dryomov goto more; 14552f713615SIlya Dryomov } 14562f713615SIlya Dryomov 14572f713615SIlya Dryomov out: 14582f713615SIlya Dryomov dout("try_read done on %p ret %d\n", con, ret); 14592f713615SIlya Dryomov return ret; 14602f713615SIlya Dryomov 14612f713615SIlya Dryomov bad_tag: 1462a56dd9bfSIlya Dryomov pr_err("try_read bad tag %d\n", con->v1.in_tag); 14632f713615SIlya Dryomov con->error_msg = "protocol error, garbage tag"; 14642f713615SIlya Dryomov ret = -1; 14652f713615SIlya Dryomov goto out; 14662f713615SIlya Dryomov } 14672f713615SIlya Dryomov 14682f713615SIlya Dryomov /* 14692f713615SIlya Dryomov * Write something to the socket. Called in a worker thread when the 14702f713615SIlya Dryomov * socket appears to be writeable and we have something ready to send. 14712f713615SIlya Dryomov */ 14722f713615SIlya Dryomov int ceph_con_v1_try_write(struct ceph_connection *con) 14732f713615SIlya Dryomov { 14742f713615SIlya Dryomov int ret = 1; 14752f713615SIlya Dryomov 14762f713615SIlya Dryomov dout("try_write start %p state %d\n", con, con->state); 14772f713615SIlya Dryomov if (con->state != CEPH_CON_S_PREOPEN && 14782f713615SIlya Dryomov con->state != CEPH_CON_S_V1_BANNER && 14792f713615SIlya Dryomov con->state != CEPH_CON_S_V1_CONNECT_MSG && 14802f713615SIlya Dryomov con->state != CEPH_CON_S_OPEN) 14812f713615SIlya Dryomov return 0; 14822f713615SIlya Dryomov 14832f713615SIlya Dryomov /* open the socket first? */ 14842f713615SIlya Dryomov if (con->state == CEPH_CON_S_PREOPEN) { 14852f713615SIlya Dryomov BUG_ON(con->sock); 14862f713615SIlya Dryomov con->state = CEPH_CON_S_V1_BANNER; 14872f713615SIlya Dryomov 14882f713615SIlya Dryomov con_out_kvec_reset(con); 14892f713615SIlya Dryomov prepare_write_banner(con); 14902f713615SIlya Dryomov prepare_read_banner(con); 14912f713615SIlya Dryomov 14922f713615SIlya Dryomov BUG_ON(con->in_msg); 1493a56dd9bfSIlya Dryomov con->v1.in_tag = CEPH_MSGR_TAG_READY; 14942f713615SIlya Dryomov dout("try_write initiating connect on %p new state %d\n", 14952f713615SIlya Dryomov con, con->state); 14962f713615SIlya Dryomov ret = ceph_tcp_connect(con); 14972f713615SIlya Dryomov if (ret < 0) { 14982f713615SIlya Dryomov con->error_msg = "connect error"; 14992f713615SIlya Dryomov goto out; 15002f713615SIlya Dryomov } 15012f713615SIlya Dryomov } 15022f713615SIlya Dryomov 15032f713615SIlya Dryomov more: 1504a56dd9bfSIlya Dryomov dout("try_write out_kvec_bytes %d\n", con->v1.out_kvec_bytes); 15052f713615SIlya Dryomov BUG_ON(!con->sock); 15062f713615SIlya Dryomov 15072f713615SIlya Dryomov /* kvec data queued? */ 1508a56dd9bfSIlya Dryomov if (con->v1.out_kvec_left) { 15092f713615SIlya Dryomov ret = write_partial_kvec(con); 15102f713615SIlya Dryomov if (ret <= 0) 15112f713615SIlya Dryomov goto out; 15122f713615SIlya Dryomov } 1513a56dd9bfSIlya Dryomov if (con->v1.out_skip) { 15142f713615SIlya Dryomov ret = write_partial_skip(con); 15152f713615SIlya Dryomov if (ret <= 0) 15162f713615SIlya Dryomov goto out; 15172f713615SIlya Dryomov } 15182f713615SIlya Dryomov 15192f713615SIlya Dryomov /* msg pages? */ 15202f713615SIlya Dryomov if (con->out_msg) { 1521a56dd9bfSIlya Dryomov if (con->v1.out_msg_done) { 15222f713615SIlya Dryomov ceph_msg_put(con->out_msg); 15232f713615SIlya Dryomov con->out_msg = NULL; /* we're done with this one */ 15242f713615SIlya Dryomov goto do_next; 15252f713615SIlya Dryomov } 15262f713615SIlya Dryomov 15272f713615SIlya Dryomov ret = write_partial_message_data(con); 15282f713615SIlya Dryomov if (ret == 1) 15292f713615SIlya Dryomov goto more; /* we need to send the footer, too! */ 15302f713615SIlya Dryomov if (ret == 0) 15312f713615SIlya Dryomov goto out; 15322f713615SIlya Dryomov if (ret < 0) { 15332f713615SIlya Dryomov dout("try_write write_partial_message_data err %d\n", 15342f713615SIlya Dryomov ret); 15352f713615SIlya Dryomov goto out; 15362f713615SIlya Dryomov } 15372f713615SIlya Dryomov } 15382f713615SIlya Dryomov 15392f713615SIlya Dryomov do_next: 15402f713615SIlya Dryomov if (con->state == CEPH_CON_S_OPEN) { 15412f713615SIlya Dryomov if (ceph_con_flag_test_and_clear(con, 15422f713615SIlya Dryomov CEPH_CON_F_KEEPALIVE_PENDING)) { 15432f713615SIlya Dryomov prepare_write_keepalive(con); 15442f713615SIlya Dryomov goto more; 15452f713615SIlya Dryomov } 15462f713615SIlya Dryomov /* is anything else pending? */ 15472f713615SIlya Dryomov if (!list_empty(&con->out_queue)) { 15482f713615SIlya Dryomov prepare_write_message(con); 15492f713615SIlya Dryomov goto more; 15502f713615SIlya Dryomov } 15512f713615SIlya Dryomov if (con->in_seq > con->in_seq_acked) { 15522f713615SIlya Dryomov prepare_write_ack(con); 15532f713615SIlya Dryomov goto more; 15542f713615SIlya Dryomov } 15552f713615SIlya Dryomov } 15562f713615SIlya Dryomov 15572f713615SIlya Dryomov /* Nothing to do! */ 15582f713615SIlya Dryomov ceph_con_flag_clear(con, CEPH_CON_F_WRITE_PENDING); 15592f713615SIlya Dryomov dout("try_write nothing else to write.\n"); 15602f713615SIlya Dryomov ret = 0; 15612f713615SIlya Dryomov out: 15622f713615SIlya Dryomov dout("try_write done on %p ret %d\n", con, ret); 15632f713615SIlya Dryomov return ret; 15642f713615SIlya Dryomov } 15652f713615SIlya Dryomov 15662f713615SIlya Dryomov void ceph_con_v1_revoke(struct ceph_connection *con) 15672f713615SIlya Dryomov { 15682f713615SIlya Dryomov struct ceph_msg *msg = con->out_msg; 15692f713615SIlya Dryomov 1570a56dd9bfSIlya Dryomov WARN_ON(con->v1.out_skip); 15712f713615SIlya Dryomov /* footer */ 1572a56dd9bfSIlya Dryomov if (con->v1.out_msg_done) { 1573a56dd9bfSIlya Dryomov con->v1.out_skip += con_out_kvec_skip(con); 15742f713615SIlya Dryomov } else { 15752f713615SIlya Dryomov WARN_ON(!msg->data_length); 1576a56dd9bfSIlya Dryomov con->v1.out_skip += sizeof_footer(con); 15772f713615SIlya Dryomov } 15782f713615SIlya Dryomov /* data, middle, front */ 15792f713615SIlya Dryomov if (msg->data_length) 1580a56dd9bfSIlya Dryomov con->v1.out_skip += msg->cursor.total_resid; 15812f713615SIlya Dryomov if (msg->middle) 1582a56dd9bfSIlya Dryomov con->v1.out_skip += con_out_kvec_skip(con); 1583a56dd9bfSIlya Dryomov con->v1.out_skip += con_out_kvec_skip(con); 15842f713615SIlya Dryomov 15852f713615SIlya Dryomov dout("%s con %p out_kvec_bytes %d out_skip %d\n", __func__, con, 1586a56dd9bfSIlya Dryomov con->v1.out_kvec_bytes, con->v1.out_skip); 15872f713615SIlya Dryomov } 15882f713615SIlya Dryomov 15892f713615SIlya Dryomov void ceph_con_v1_revoke_incoming(struct ceph_connection *con) 15902f713615SIlya Dryomov { 1591a56dd9bfSIlya Dryomov unsigned int front_len = le32_to_cpu(con->v1.in_hdr.front_len); 1592a56dd9bfSIlya Dryomov unsigned int middle_len = le32_to_cpu(con->v1.in_hdr.middle_len); 1593a56dd9bfSIlya Dryomov unsigned int data_len = le32_to_cpu(con->v1.in_hdr.data_len); 15942f713615SIlya Dryomov 15952f713615SIlya Dryomov /* skip rest of message */ 1596a56dd9bfSIlya Dryomov con->v1.in_base_pos = con->v1.in_base_pos - 15972f713615SIlya Dryomov sizeof(struct ceph_msg_header) - 15982f713615SIlya Dryomov front_len - 15992f713615SIlya Dryomov middle_len - 16002f713615SIlya Dryomov data_len - 16012f713615SIlya Dryomov sizeof(struct ceph_msg_footer); 16022f713615SIlya Dryomov 1603a56dd9bfSIlya Dryomov con->v1.in_tag = CEPH_MSGR_TAG_READY; 16042f713615SIlya Dryomov con->in_seq++; 16052f713615SIlya Dryomov 1606a56dd9bfSIlya Dryomov dout("%s con %p in_base_pos %d\n", __func__, con, con->v1.in_base_pos); 16072f713615SIlya Dryomov } 16082f713615SIlya Dryomov 16092f713615SIlya Dryomov bool ceph_con_v1_opened(struct ceph_connection *con) 16102f713615SIlya Dryomov { 1611a56dd9bfSIlya Dryomov return con->v1.connect_seq; 16122f713615SIlya Dryomov } 16132f713615SIlya Dryomov 16142f713615SIlya Dryomov void ceph_con_v1_reset_session(struct ceph_connection *con) 16152f713615SIlya Dryomov { 1616a56dd9bfSIlya Dryomov con->v1.connect_seq = 0; 1617a56dd9bfSIlya Dryomov con->v1.peer_global_seq = 0; 16182f713615SIlya Dryomov } 16192f713615SIlya Dryomov 16202f713615SIlya Dryomov void ceph_con_v1_reset_protocol(struct ceph_connection *con) 16212f713615SIlya Dryomov { 1622a56dd9bfSIlya Dryomov con->v1.out_skip = 0; 16232f713615SIlya Dryomov } 1624