12f713615SIlya Dryomov // SPDX-License-Identifier: GPL-2.0
22f713615SIlya Dryomov #include <linux/ceph/ceph_debug.h>
32f713615SIlya Dryomov
42f713615SIlya Dryomov #include <linux/bvec.h>
52f713615SIlya Dryomov #include <linux/crc32c.h>
62f713615SIlya Dryomov #include <linux/net.h>
72f713615SIlya Dryomov #include <linux/socket.h>
82f713615SIlya Dryomov #include <net/sock.h>
92f713615SIlya Dryomov
102f713615SIlya Dryomov #include <linux/ceph/ceph_features.h>
112f713615SIlya Dryomov #include <linux/ceph/decode.h>
122f713615SIlya Dryomov #include <linux/ceph/libceph.h>
132f713615SIlya Dryomov #include <linux/ceph/messenger.h>
142f713615SIlya Dryomov
152f713615SIlya Dryomov /* static tag bytes (protocol control messages) */
162f713615SIlya Dryomov static char tag_msg = CEPH_MSGR_TAG_MSG;
172f713615SIlya Dryomov static char tag_ack = CEPH_MSGR_TAG_ACK;
182f713615SIlya Dryomov static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE;
192f713615SIlya Dryomov static char tag_keepalive2 = CEPH_MSGR_TAG_KEEPALIVE2;
202f713615SIlya Dryomov
212f713615SIlya Dryomov /*
222f713615SIlya Dryomov * If @buf is NULL, discard up to @len bytes.
232f713615SIlya Dryomov */
ceph_tcp_recvmsg(struct socket * sock,void * buf,size_t len)242f713615SIlya Dryomov static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len)
252f713615SIlya Dryomov {
262f713615SIlya Dryomov struct kvec iov = {buf, len};
272f713615SIlya Dryomov struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
282f713615SIlya Dryomov int r;
292f713615SIlya Dryomov
302f713615SIlya Dryomov if (!buf)
312f713615SIlya Dryomov msg.msg_flags |= MSG_TRUNC;
322f713615SIlya Dryomov
33de4eda9dSAl Viro iov_iter_kvec(&msg.msg_iter, ITER_DEST, &iov, 1, len);
342f713615SIlya Dryomov r = sock_recvmsg(sock, &msg, msg.msg_flags);
352f713615SIlya Dryomov if (r == -EAGAIN)
362f713615SIlya Dryomov r = 0;
372f713615SIlya Dryomov return r;
382f713615SIlya Dryomov }
392f713615SIlya Dryomov
ceph_tcp_recvpage(struct socket * sock,struct page * page,int page_offset,size_t length)402f713615SIlya Dryomov static int ceph_tcp_recvpage(struct socket *sock, struct page *page,
412f713615SIlya Dryomov int page_offset, size_t length)
422f713615SIlya Dryomov {
431eb9cd15SChristoph Hellwig struct bio_vec bvec;
442f713615SIlya Dryomov struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
452f713615SIlya Dryomov int r;
462f713615SIlya Dryomov
472f713615SIlya Dryomov BUG_ON(page_offset + length > PAGE_SIZE);
481eb9cd15SChristoph Hellwig bvec_set_page(&bvec, page, length, page_offset);
49de4eda9dSAl Viro iov_iter_bvec(&msg.msg_iter, ITER_DEST, &bvec, 1, length);
502f713615SIlya Dryomov r = sock_recvmsg(sock, &msg, msg.msg_flags);
512f713615SIlya Dryomov if (r == -EAGAIN)
522f713615SIlya Dryomov r = 0;
532f713615SIlya Dryomov return r;
542f713615SIlya Dryomov }
552f713615SIlya Dryomov
562f713615SIlya Dryomov /*
572f713615SIlya Dryomov * write something. @more is true if caller will be sending more data
582f713615SIlya Dryomov * shortly.
592f713615SIlya Dryomov */
ceph_tcp_sendmsg(struct socket * sock,struct kvec * iov,size_t kvlen,size_t len,bool more)602f713615SIlya Dryomov static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov,
612f713615SIlya Dryomov size_t kvlen, size_t len, bool more)
622f713615SIlya Dryomov {
632f713615SIlya Dryomov struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
642f713615SIlya Dryomov int r;
652f713615SIlya Dryomov
662f713615SIlya Dryomov if (more)
672f713615SIlya Dryomov msg.msg_flags |= MSG_MORE;
682f713615SIlya Dryomov else
692f713615SIlya Dryomov msg.msg_flags |= MSG_EOR; /* superfluous, but what the hell */
702f713615SIlya Dryomov
712f713615SIlya Dryomov r = kernel_sendmsg(sock, &msg, iov, kvlen, len);
722f713615SIlya Dryomov if (r == -EAGAIN)
732f713615SIlya Dryomov r = 0;
742f713615SIlya Dryomov return r;
752f713615SIlya Dryomov }
762f713615SIlya Dryomov
775da4d7b8SDavid Howells /*
785da4d7b8SDavid Howells * @more: MSG_MORE or 0.
795da4d7b8SDavid Howells */
ceph_tcp_sendpage(struct socket * sock,struct page * page,int offset,size_t size,int more)805da4d7b8SDavid Howells static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
815da4d7b8SDavid Howells int offset, size_t size, int more)
825da4d7b8SDavid Howells {
835da4d7b8SDavid Howells struct msghdr msg = {
845da4d7b8SDavid Howells .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL | more,
855da4d7b8SDavid Howells };
865da4d7b8SDavid Howells struct bio_vec bvec;
875da4d7b8SDavid Howells int ret;
885da4d7b8SDavid Howells
895da4d7b8SDavid Howells /*
905da4d7b8SDavid Howells * MSG_SPLICE_PAGES cannot properly handle pages with page_count == 0,
915da4d7b8SDavid Howells * we need to fall back to sendmsg if that's the case.
925da4d7b8SDavid Howells *
935da4d7b8SDavid Howells * Same goes for slab pages: skb_can_coalesce() allows
945da4d7b8SDavid Howells * coalescing neighboring slab objects into a single frag which
955da4d7b8SDavid Howells * triggers one of hardened usercopy checks.
965da4d7b8SDavid Howells */
975da4d7b8SDavid Howells if (sendpage_ok(page))
985da4d7b8SDavid Howells msg.msg_flags |= MSG_SPLICE_PAGES;
995da4d7b8SDavid Howells
1005da4d7b8SDavid Howells bvec_set_page(&bvec, page, size, offset);
1015da4d7b8SDavid Howells iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
1025da4d7b8SDavid Howells
1035da4d7b8SDavid Howells ret = sock_sendmsg(sock, &msg);
1045da4d7b8SDavid Howells if (ret == -EAGAIN)
1055da4d7b8SDavid Howells ret = 0;
1065da4d7b8SDavid Howells
1075da4d7b8SDavid Howells return ret;
1085da4d7b8SDavid Howells }
1095da4d7b8SDavid Howells
con_out_kvec_reset(struct ceph_connection * con)1102f713615SIlya Dryomov static void con_out_kvec_reset(struct ceph_connection *con)
1112f713615SIlya Dryomov {
112a56dd9bfSIlya Dryomov BUG_ON(con->v1.out_skip);
1132f713615SIlya Dryomov
114a56dd9bfSIlya Dryomov con->v1.out_kvec_left = 0;
115a56dd9bfSIlya Dryomov con->v1.out_kvec_bytes = 0;
116a56dd9bfSIlya Dryomov con->v1.out_kvec_cur = &con->v1.out_kvec[0];
1172f713615SIlya Dryomov }
1182f713615SIlya Dryomov
con_out_kvec_add(struct ceph_connection * con,size_t size,void * data)1192f713615SIlya Dryomov static void con_out_kvec_add(struct ceph_connection *con,
1202f713615SIlya Dryomov size_t size, void *data)
1212f713615SIlya Dryomov {
122a56dd9bfSIlya Dryomov int index = con->v1.out_kvec_left;
1232f713615SIlya Dryomov
124a56dd9bfSIlya Dryomov BUG_ON(con->v1.out_skip);
125a56dd9bfSIlya Dryomov BUG_ON(index >= ARRAY_SIZE(con->v1.out_kvec));
1262f713615SIlya Dryomov
127a56dd9bfSIlya Dryomov con->v1.out_kvec[index].iov_len = size;
128a56dd9bfSIlya Dryomov con->v1.out_kvec[index].iov_base = data;
129a56dd9bfSIlya Dryomov con->v1.out_kvec_left++;
130a56dd9bfSIlya Dryomov con->v1.out_kvec_bytes += size;
1312f713615SIlya Dryomov }
1322f713615SIlya Dryomov
1332f713615SIlya Dryomov /*
1342f713615SIlya Dryomov * Chop off a kvec from the end. Return residual number of bytes for
1352f713615SIlya Dryomov * that kvec, i.e. how many bytes would have been written if the kvec
1362f713615SIlya Dryomov * hadn't been nuked.
1372f713615SIlya Dryomov */
con_out_kvec_skip(struct ceph_connection * con)1382f713615SIlya Dryomov static int con_out_kvec_skip(struct ceph_connection *con)
1392f713615SIlya Dryomov {
1402f713615SIlya Dryomov int skip = 0;
1412f713615SIlya Dryomov
142a56dd9bfSIlya Dryomov if (con->v1.out_kvec_bytes > 0) {
143a56dd9bfSIlya Dryomov skip = con->v1.out_kvec_cur[con->v1.out_kvec_left - 1].iov_len;
144a56dd9bfSIlya Dryomov BUG_ON(con->v1.out_kvec_bytes < skip);
145a56dd9bfSIlya Dryomov BUG_ON(!con->v1.out_kvec_left);
146a56dd9bfSIlya Dryomov con->v1.out_kvec_bytes -= skip;
147a56dd9bfSIlya Dryomov con->v1.out_kvec_left--;
1482f713615SIlya Dryomov }
1492f713615SIlya Dryomov
1502f713615SIlya Dryomov return skip;
1512f713615SIlya Dryomov }
1522f713615SIlya Dryomov
sizeof_footer(struct ceph_connection * con)1532f713615SIlya Dryomov static size_t sizeof_footer(struct ceph_connection *con)
1542f713615SIlya Dryomov {
1552f713615SIlya Dryomov return (con->peer_features & CEPH_FEATURE_MSG_AUTH) ?
1562f713615SIlya Dryomov sizeof(struct ceph_msg_footer) :
1572f713615SIlya Dryomov sizeof(struct ceph_msg_footer_old);
1582f713615SIlya Dryomov }
1592f713615SIlya Dryomov
prepare_message_data(struct ceph_msg * msg,u32 data_len)1602f713615SIlya Dryomov static void prepare_message_data(struct ceph_msg *msg, u32 data_len)
1612f713615SIlya Dryomov {
162d396f89dSJeff Layton /* Initialize data cursor if it's not a sparse read */
163*8e46a2d0SXiubo Li u64 len = msg->sparse_read_total ? : data_len;
164*8e46a2d0SXiubo Li
165*8e46a2d0SXiubo Li ceph_msg_data_cursor_init(&msg->cursor, msg, len);
1662f713615SIlya Dryomov }
1672f713615SIlya Dryomov
1682f713615SIlya Dryomov /*
1692f713615SIlya Dryomov * Prepare footer for currently outgoing message, and finish things
1702f713615SIlya Dryomov * off. Assumes out_kvec* are already valid.. we just add on to the end.
1712f713615SIlya Dryomov */
prepare_write_message_footer(struct ceph_connection * con)1722f713615SIlya Dryomov static void prepare_write_message_footer(struct ceph_connection *con)
1732f713615SIlya Dryomov {
1742f713615SIlya Dryomov struct ceph_msg *m = con->out_msg;
1752f713615SIlya Dryomov
1762f713615SIlya Dryomov m->footer.flags |= CEPH_MSG_FOOTER_COMPLETE;
1772f713615SIlya Dryomov
1782f713615SIlya Dryomov dout("prepare_write_message_footer %p\n", con);
1792f713615SIlya Dryomov con_out_kvec_add(con, sizeof_footer(con), &m->footer);
1802f713615SIlya Dryomov if (con->peer_features & CEPH_FEATURE_MSG_AUTH) {
1812f713615SIlya Dryomov if (con->ops->sign_message)
1822f713615SIlya Dryomov con->ops->sign_message(m);
1832f713615SIlya Dryomov else
1842f713615SIlya Dryomov m->footer.sig = 0;
1852f713615SIlya Dryomov } else {
1862f713615SIlya Dryomov m->old_footer.flags = m->footer.flags;
1872f713615SIlya Dryomov }
188a56dd9bfSIlya Dryomov con->v1.out_more = m->more_to_follow;
189a56dd9bfSIlya Dryomov con->v1.out_msg_done = true;
1902f713615SIlya Dryomov }
1912f713615SIlya Dryomov
1922f713615SIlya Dryomov /*
1932f713615SIlya Dryomov * Prepare headers for the next outgoing message.
1942f713615SIlya Dryomov */
prepare_write_message(struct ceph_connection * con)1952f713615SIlya Dryomov static void prepare_write_message(struct ceph_connection *con)
1962f713615SIlya Dryomov {
1972f713615SIlya Dryomov struct ceph_msg *m;
1982f713615SIlya Dryomov u32 crc;
1992f713615SIlya Dryomov
2002f713615SIlya Dryomov con_out_kvec_reset(con);
201a56dd9bfSIlya Dryomov con->v1.out_msg_done = false;
2022f713615SIlya Dryomov
2032f713615SIlya Dryomov /* Sneak an ack in there first? If we can get it into the same
2042f713615SIlya Dryomov * TCP packet that's a good thing. */
2052f713615SIlya Dryomov if (con->in_seq > con->in_seq_acked) {
2062f713615SIlya Dryomov con->in_seq_acked = con->in_seq;
2072f713615SIlya Dryomov con_out_kvec_add(con, sizeof (tag_ack), &tag_ack);
208a56dd9bfSIlya Dryomov con->v1.out_temp_ack = cpu_to_le64(con->in_seq_acked);
209a56dd9bfSIlya Dryomov con_out_kvec_add(con, sizeof(con->v1.out_temp_ack),
210a56dd9bfSIlya Dryomov &con->v1.out_temp_ack);
2112f713615SIlya Dryomov }
2122f713615SIlya Dryomov
2132f713615SIlya Dryomov ceph_con_get_out_msg(con);
2142f713615SIlya Dryomov m = con->out_msg;
2152f713615SIlya Dryomov
2162f713615SIlya Dryomov dout("prepare_write_message %p seq %lld type %d len %d+%d+%zd\n",
2172f713615SIlya Dryomov m, con->out_seq, le16_to_cpu(m->hdr.type),
2182f713615SIlya Dryomov le32_to_cpu(m->hdr.front_len), le32_to_cpu(m->hdr.middle_len),
2192f713615SIlya Dryomov m->data_length);
2202f713615SIlya Dryomov WARN_ON(m->front.iov_len != le32_to_cpu(m->hdr.front_len));
2212f713615SIlya Dryomov WARN_ON(m->data_length != le32_to_cpu(m->hdr.data_len));
2222f713615SIlya Dryomov
2232f713615SIlya Dryomov /* tag + hdr + front + middle */
2242f713615SIlya Dryomov con_out_kvec_add(con, sizeof (tag_msg), &tag_msg);
225a56dd9bfSIlya Dryomov con_out_kvec_add(con, sizeof(con->v1.out_hdr), &con->v1.out_hdr);
2262f713615SIlya Dryomov con_out_kvec_add(con, m->front.iov_len, m->front.iov_base);
2272f713615SIlya Dryomov
2282f713615SIlya Dryomov if (m->middle)
2292f713615SIlya Dryomov con_out_kvec_add(con, m->middle->vec.iov_len,
2302f713615SIlya Dryomov m->middle->vec.iov_base);
2312f713615SIlya Dryomov
2322f713615SIlya Dryomov /* fill in hdr crc and finalize hdr */
2332f713615SIlya Dryomov crc = crc32c(0, &m->hdr, offsetof(struct ceph_msg_header, crc));
2342f713615SIlya Dryomov con->out_msg->hdr.crc = cpu_to_le32(crc);
235a56dd9bfSIlya Dryomov memcpy(&con->v1.out_hdr, &con->out_msg->hdr, sizeof(con->v1.out_hdr));
2362f713615SIlya Dryomov
2372f713615SIlya Dryomov /* fill in front and middle crc, footer */
2382f713615SIlya Dryomov crc = crc32c(0, m->front.iov_base, m->front.iov_len);
2392f713615SIlya Dryomov con->out_msg->footer.front_crc = cpu_to_le32(crc);
2402f713615SIlya Dryomov if (m->middle) {
2412f713615SIlya Dryomov crc = crc32c(0, m->middle->vec.iov_base,
2422f713615SIlya Dryomov m->middle->vec.iov_len);
2432f713615SIlya Dryomov con->out_msg->footer.middle_crc = cpu_to_le32(crc);
2442f713615SIlya Dryomov } else
2452f713615SIlya Dryomov con->out_msg->footer.middle_crc = 0;
2462f713615SIlya Dryomov dout("%s front_crc %u middle_crc %u\n", __func__,
2472f713615SIlya Dryomov le32_to_cpu(con->out_msg->footer.front_crc),
2482f713615SIlya Dryomov le32_to_cpu(con->out_msg->footer.middle_crc));
2492f713615SIlya Dryomov con->out_msg->footer.flags = 0;
2502f713615SIlya Dryomov
2512f713615SIlya Dryomov /* is there a data payload? */
2522f713615SIlya Dryomov con->out_msg->footer.data_crc = 0;
2532f713615SIlya Dryomov if (m->data_length) {
2542f713615SIlya Dryomov prepare_message_data(con->out_msg, m->data_length);
255a56dd9bfSIlya Dryomov con->v1.out_more = 1; /* data + footer will follow */
2562f713615SIlya Dryomov } else {
2572f713615SIlya Dryomov /* no, queue up footer too and be done */
2582f713615SIlya Dryomov prepare_write_message_footer(con);
2592f713615SIlya Dryomov }
2602f713615SIlya Dryomov
2612f713615SIlya Dryomov ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING);
2622f713615SIlya Dryomov }
2632f713615SIlya Dryomov
2642f713615SIlya Dryomov /*
2652f713615SIlya Dryomov * Prepare an ack.
2662f713615SIlya Dryomov */
prepare_write_ack(struct ceph_connection * con)2672f713615SIlya Dryomov static void prepare_write_ack(struct ceph_connection *con)
2682f713615SIlya Dryomov {
2692f713615SIlya Dryomov dout("prepare_write_ack %p %llu -> %llu\n", con,
2702f713615SIlya Dryomov con->in_seq_acked, con->in_seq);
2712f713615SIlya Dryomov con->in_seq_acked = con->in_seq;
2722f713615SIlya Dryomov
2732f713615SIlya Dryomov con_out_kvec_reset(con);
2742f713615SIlya Dryomov
2752f713615SIlya Dryomov con_out_kvec_add(con, sizeof (tag_ack), &tag_ack);
2762f713615SIlya Dryomov
277a56dd9bfSIlya Dryomov con->v1.out_temp_ack = cpu_to_le64(con->in_seq_acked);
278a56dd9bfSIlya Dryomov con_out_kvec_add(con, sizeof(con->v1.out_temp_ack),
279a56dd9bfSIlya Dryomov &con->v1.out_temp_ack);
2802f713615SIlya Dryomov
281a56dd9bfSIlya Dryomov con->v1.out_more = 1; /* more will follow.. eventually.. */
2822f713615SIlya Dryomov ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING);
2832f713615SIlya Dryomov }
2842f713615SIlya Dryomov
2852f713615SIlya Dryomov /*
2862f713615SIlya Dryomov * Prepare to share the seq during handshake
2872f713615SIlya Dryomov */
prepare_write_seq(struct ceph_connection * con)2882f713615SIlya Dryomov static void prepare_write_seq(struct ceph_connection *con)
2892f713615SIlya Dryomov {
2902f713615SIlya Dryomov dout("prepare_write_seq %p %llu -> %llu\n", con,
2912f713615SIlya Dryomov con->in_seq_acked, con->in_seq);
2922f713615SIlya Dryomov con->in_seq_acked = con->in_seq;
2932f713615SIlya Dryomov
2942f713615SIlya Dryomov con_out_kvec_reset(con);
2952f713615SIlya Dryomov
296a56dd9bfSIlya Dryomov con->v1.out_temp_ack = cpu_to_le64(con->in_seq_acked);
297a56dd9bfSIlya Dryomov con_out_kvec_add(con, sizeof(con->v1.out_temp_ack),
298a56dd9bfSIlya Dryomov &con->v1.out_temp_ack);
2992f713615SIlya Dryomov
3002f713615SIlya Dryomov ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING);
3012f713615SIlya Dryomov }
3022f713615SIlya Dryomov
3032f713615SIlya Dryomov /*
3042f713615SIlya Dryomov * Prepare to write keepalive byte.
3052f713615SIlya Dryomov */
prepare_write_keepalive(struct ceph_connection * con)3062f713615SIlya Dryomov static void prepare_write_keepalive(struct ceph_connection *con)
3072f713615SIlya Dryomov {
3082f713615SIlya Dryomov dout("prepare_write_keepalive %p\n", con);
3092f713615SIlya Dryomov con_out_kvec_reset(con);
3102f713615SIlya Dryomov if (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2) {
3112f713615SIlya Dryomov struct timespec64 now;
3122f713615SIlya Dryomov
3132f713615SIlya Dryomov ktime_get_real_ts64(&now);
3142f713615SIlya Dryomov con_out_kvec_add(con, sizeof(tag_keepalive2), &tag_keepalive2);
315a56dd9bfSIlya Dryomov ceph_encode_timespec64(&con->v1.out_temp_keepalive2, &now);
316a56dd9bfSIlya Dryomov con_out_kvec_add(con, sizeof(con->v1.out_temp_keepalive2),
317a56dd9bfSIlya Dryomov &con->v1.out_temp_keepalive2);
3182f713615SIlya Dryomov } else {
3192f713615SIlya Dryomov con_out_kvec_add(con, sizeof(tag_keepalive), &tag_keepalive);
3202f713615SIlya Dryomov }
3212f713615SIlya Dryomov ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING);
3222f713615SIlya Dryomov }
3232f713615SIlya Dryomov
3242f713615SIlya Dryomov /*
3252f713615SIlya Dryomov * Connection negotiation.
3262f713615SIlya Dryomov */
3272f713615SIlya Dryomov
get_connect_authorizer(struct ceph_connection * con)3282f713615SIlya Dryomov static int get_connect_authorizer(struct ceph_connection *con)
3292f713615SIlya Dryomov {
3302f713615SIlya Dryomov struct ceph_auth_handshake *auth;
3312f713615SIlya Dryomov int auth_proto;
3322f713615SIlya Dryomov
3332f713615SIlya Dryomov if (!con->ops->get_authorizer) {
334a56dd9bfSIlya Dryomov con->v1.auth = NULL;
335a56dd9bfSIlya Dryomov con->v1.out_connect.authorizer_protocol = CEPH_AUTH_UNKNOWN;
336a56dd9bfSIlya Dryomov con->v1.out_connect.authorizer_len = 0;
3372f713615SIlya Dryomov return 0;
3382f713615SIlya Dryomov }
3392f713615SIlya Dryomov
340a56dd9bfSIlya Dryomov auth = con->ops->get_authorizer(con, &auth_proto, con->v1.auth_retry);
3412f713615SIlya Dryomov if (IS_ERR(auth))
3422f713615SIlya Dryomov return PTR_ERR(auth);
3432f713615SIlya Dryomov
344a56dd9bfSIlya Dryomov con->v1.auth = auth;
345a56dd9bfSIlya Dryomov con->v1.out_connect.authorizer_protocol = cpu_to_le32(auth_proto);
346a56dd9bfSIlya Dryomov con->v1.out_connect.authorizer_len =
347a56dd9bfSIlya Dryomov cpu_to_le32(auth->authorizer_buf_len);
3482f713615SIlya Dryomov return 0;
3492f713615SIlya Dryomov }
3502f713615SIlya Dryomov
3512f713615SIlya Dryomov /*
3522f713615SIlya Dryomov * We connected to a peer and are saying hello.
3532f713615SIlya Dryomov */
prepare_write_banner(struct ceph_connection * con)3542f713615SIlya Dryomov static void prepare_write_banner(struct ceph_connection *con)
3552f713615SIlya Dryomov {
3562f713615SIlya Dryomov con_out_kvec_add(con, strlen(CEPH_BANNER), CEPH_BANNER);
3572f713615SIlya Dryomov con_out_kvec_add(con, sizeof (con->msgr->my_enc_addr),
3582f713615SIlya Dryomov &con->msgr->my_enc_addr);
3592f713615SIlya Dryomov
360a56dd9bfSIlya Dryomov con->v1.out_more = 0;
3612f713615SIlya Dryomov ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING);
3622f713615SIlya Dryomov }
3632f713615SIlya Dryomov
__prepare_write_connect(struct ceph_connection * con)3642f713615SIlya Dryomov static void __prepare_write_connect(struct ceph_connection *con)
3652f713615SIlya Dryomov {
366a56dd9bfSIlya Dryomov con_out_kvec_add(con, sizeof(con->v1.out_connect),
367a56dd9bfSIlya Dryomov &con->v1.out_connect);
368a56dd9bfSIlya Dryomov if (con->v1.auth)
369a56dd9bfSIlya Dryomov con_out_kvec_add(con, con->v1.auth->authorizer_buf_len,
370a56dd9bfSIlya Dryomov con->v1.auth->authorizer_buf);
3712f713615SIlya Dryomov
372a56dd9bfSIlya Dryomov con->v1.out_more = 0;
3732f713615SIlya Dryomov ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING);
3742f713615SIlya Dryomov }
3752f713615SIlya Dryomov
prepare_write_connect(struct ceph_connection * con)3762f713615SIlya Dryomov static int prepare_write_connect(struct ceph_connection *con)
3772f713615SIlya Dryomov {
3782f713615SIlya Dryomov unsigned int global_seq = ceph_get_global_seq(con->msgr, 0);
3792f713615SIlya Dryomov int proto;
3802f713615SIlya Dryomov int ret;
3812f713615SIlya Dryomov
3822f713615SIlya Dryomov switch (con->peer_name.type) {
3832f713615SIlya Dryomov case CEPH_ENTITY_TYPE_MON:
3842f713615SIlya Dryomov proto = CEPH_MONC_PROTOCOL;
3852f713615SIlya Dryomov break;
3862f713615SIlya Dryomov case CEPH_ENTITY_TYPE_OSD:
3872f713615SIlya Dryomov proto = CEPH_OSDC_PROTOCOL;
3882f713615SIlya Dryomov break;
3892f713615SIlya Dryomov case CEPH_ENTITY_TYPE_MDS:
3902f713615SIlya Dryomov proto = CEPH_MDSC_PROTOCOL;
3912f713615SIlya Dryomov break;
3922f713615SIlya Dryomov default:
3932f713615SIlya Dryomov BUG();
3942f713615SIlya Dryomov }
3952f713615SIlya Dryomov
3962f713615SIlya Dryomov dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con,
397a56dd9bfSIlya Dryomov con->v1.connect_seq, global_seq, proto);
3982f713615SIlya Dryomov
399a56dd9bfSIlya Dryomov con->v1.out_connect.features =
4002f713615SIlya Dryomov cpu_to_le64(from_msgr(con->msgr)->supported_features);
401a56dd9bfSIlya Dryomov con->v1.out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT);
402a56dd9bfSIlya Dryomov con->v1.out_connect.connect_seq = cpu_to_le32(con->v1.connect_seq);
403a56dd9bfSIlya Dryomov con->v1.out_connect.global_seq = cpu_to_le32(global_seq);
404a56dd9bfSIlya Dryomov con->v1.out_connect.protocol_version = cpu_to_le32(proto);
405a56dd9bfSIlya Dryomov con->v1.out_connect.flags = 0;
4062f713615SIlya Dryomov
4072f713615SIlya Dryomov ret = get_connect_authorizer(con);
4082f713615SIlya Dryomov if (ret)
4092f713615SIlya Dryomov return ret;
4102f713615SIlya Dryomov
4112f713615SIlya Dryomov __prepare_write_connect(con);
4122f713615SIlya Dryomov return 0;
4132f713615SIlya Dryomov }
4142f713615SIlya Dryomov
4152f713615SIlya Dryomov /*
4162f713615SIlya Dryomov * write as much of pending kvecs to the socket as we can.
4172f713615SIlya Dryomov * 1 -> done
4182f713615SIlya Dryomov * 0 -> socket full, but more to do
4192f713615SIlya Dryomov * <0 -> error
4202f713615SIlya Dryomov */
write_partial_kvec(struct ceph_connection * con)4212f713615SIlya Dryomov static int write_partial_kvec(struct ceph_connection *con)
4222f713615SIlya Dryomov {
4232f713615SIlya Dryomov int ret;
4242f713615SIlya Dryomov
425a56dd9bfSIlya Dryomov dout("write_partial_kvec %p %d left\n", con, con->v1.out_kvec_bytes);
426a56dd9bfSIlya Dryomov while (con->v1.out_kvec_bytes > 0) {
427a56dd9bfSIlya Dryomov ret = ceph_tcp_sendmsg(con->sock, con->v1.out_kvec_cur,
428a56dd9bfSIlya Dryomov con->v1.out_kvec_left,
429a56dd9bfSIlya Dryomov con->v1.out_kvec_bytes,
430a56dd9bfSIlya Dryomov con->v1.out_more);
4312f713615SIlya Dryomov if (ret <= 0)
4322f713615SIlya Dryomov goto out;
433a56dd9bfSIlya Dryomov con->v1.out_kvec_bytes -= ret;
434a56dd9bfSIlya Dryomov if (!con->v1.out_kvec_bytes)
4352f713615SIlya Dryomov break; /* done */
4362f713615SIlya Dryomov
4372f713615SIlya Dryomov /* account for full iov entries consumed */
438a56dd9bfSIlya Dryomov while (ret >= con->v1.out_kvec_cur->iov_len) {
439a56dd9bfSIlya Dryomov BUG_ON(!con->v1.out_kvec_left);
440a56dd9bfSIlya Dryomov ret -= con->v1.out_kvec_cur->iov_len;
441a56dd9bfSIlya Dryomov con->v1.out_kvec_cur++;
442a56dd9bfSIlya Dryomov con->v1.out_kvec_left--;
4432f713615SIlya Dryomov }
4442f713615SIlya Dryomov /* and for a partially-consumed entry */
4452f713615SIlya Dryomov if (ret) {
446a56dd9bfSIlya Dryomov con->v1.out_kvec_cur->iov_len -= ret;
447a56dd9bfSIlya Dryomov con->v1.out_kvec_cur->iov_base += ret;
4482f713615SIlya Dryomov }
4492f713615SIlya Dryomov }
450a56dd9bfSIlya Dryomov con->v1.out_kvec_left = 0;
4512f713615SIlya Dryomov ret = 1;
4522f713615SIlya Dryomov out:
4532f713615SIlya Dryomov dout("write_partial_kvec %p %d left in %d kvecs ret = %d\n", con,
454a56dd9bfSIlya Dryomov con->v1.out_kvec_bytes, con->v1.out_kvec_left, ret);
4552f713615SIlya Dryomov return ret; /* done! */
4562f713615SIlya Dryomov }
4572f713615SIlya Dryomov
4582f713615SIlya Dryomov /*
4592f713615SIlya Dryomov * Write as much message data payload as we can. If we finish, queue
4602f713615SIlya Dryomov * up the footer.
4612f713615SIlya Dryomov * 1 -> done, footer is now queued in out_kvec[].
4622f713615SIlya Dryomov * 0 -> socket full, but more to do
4632f713615SIlya Dryomov * <0 -> error
4642f713615SIlya Dryomov */
write_partial_message_data(struct ceph_connection * con)4652f713615SIlya Dryomov static int write_partial_message_data(struct ceph_connection *con)
4662f713615SIlya Dryomov {
4672f713615SIlya Dryomov struct ceph_msg *msg = con->out_msg;
4682f713615SIlya Dryomov struct ceph_msg_data_cursor *cursor = &msg->cursor;
4692f713615SIlya Dryomov bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
4702f713615SIlya Dryomov u32 crc;
4712f713615SIlya Dryomov
4722f713615SIlya Dryomov dout("%s %p msg %p\n", __func__, con, msg);
4732f713615SIlya Dryomov
4742f713615SIlya Dryomov if (!msg->num_data_items)
4752f713615SIlya Dryomov return -EINVAL;
4762f713615SIlya Dryomov
4772f713615SIlya Dryomov /*
4782f713615SIlya Dryomov * Iterate through each page that contains data to be
4792f713615SIlya Dryomov * written, and send as much as possible for each.
4802f713615SIlya Dryomov *
4812f713615SIlya Dryomov * If we are calculating the data crc (the default), we will
4822f713615SIlya Dryomov * need to map the page. If we have no pages, they have
4832f713615SIlya Dryomov * been revoked, so use the zero page.
4842f713615SIlya Dryomov */
4852f713615SIlya Dryomov crc = do_datacrc ? le32_to_cpu(msg->footer.data_crc) : 0;
4862f713615SIlya Dryomov while (cursor->total_resid) {
4872f713615SIlya Dryomov struct page *page;
4882f713615SIlya Dryomov size_t page_offset;
4892f713615SIlya Dryomov size_t length;
4902f713615SIlya Dryomov int ret;
4912f713615SIlya Dryomov
4922f713615SIlya Dryomov if (!cursor->resid) {
4932f713615SIlya Dryomov ceph_msg_data_advance(cursor, 0);
4942f713615SIlya Dryomov continue;
4952f713615SIlya Dryomov }
4962f713615SIlya Dryomov
497da4ab869SJeff Layton page = ceph_msg_data_next(cursor, &page_offset, &length);
4985da4d7b8SDavid Howells ret = ceph_tcp_sendpage(con->sock, page, page_offset, length,
4995da4d7b8SDavid Howells MSG_MORE);
5002f713615SIlya Dryomov if (ret <= 0) {
5012f713615SIlya Dryomov if (do_datacrc)
5022f713615SIlya Dryomov msg->footer.data_crc = cpu_to_le32(crc);
5032f713615SIlya Dryomov
5042f713615SIlya Dryomov return ret;
5052f713615SIlya Dryomov }
5062f713615SIlya Dryomov if (do_datacrc && cursor->need_crc)
5072f713615SIlya Dryomov crc = ceph_crc32c_page(crc, page, page_offset, length);
5082f713615SIlya Dryomov ceph_msg_data_advance(cursor, (size_t)ret);
5092f713615SIlya Dryomov }
5102f713615SIlya Dryomov
5112f713615SIlya Dryomov dout("%s %p msg %p done\n", __func__, con, msg);
5122f713615SIlya Dryomov
5132f713615SIlya Dryomov /* prepare and queue up footer, too */
5142f713615SIlya Dryomov if (do_datacrc)
5152f713615SIlya Dryomov msg->footer.data_crc = cpu_to_le32(crc);
5162f713615SIlya Dryomov else
5172f713615SIlya Dryomov msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC;
5182f713615SIlya Dryomov con_out_kvec_reset(con);
5192f713615SIlya Dryomov prepare_write_message_footer(con);
5202f713615SIlya Dryomov
5212f713615SIlya Dryomov return 1; /* must return > 0 to indicate success */
5222f713615SIlya Dryomov }
5232f713615SIlya Dryomov
5242f713615SIlya Dryomov /*
5252f713615SIlya Dryomov * write some zeros
5262f713615SIlya Dryomov */
write_partial_skip(struct ceph_connection * con)5272f713615SIlya Dryomov static int write_partial_skip(struct ceph_connection *con)
5282f713615SIlya Dryomov {
5292f713615SIlya Dryomov int ret;
5302f713615SIlya Dryomov
531a56dd9bfSIlya Dryomov dout("%s %p %d left\n", __func__, con, con->v1.out_skip);
532a56dd9bfSIlya Dryomov while (con->v1.out_skip > 0) {
533a56dd9bfSIlya Dryomov size_t size = min(con->v1.out_skip, (int)PAGE_SIZE);
5342f713615SIlya Dryomov
5355da4d7b8SDavid Howells ret = ceph_tcp_sendpage(con->sock, ceph_zero_page, 0, size,
5365da4d7b8SDavid Howells MSG_MORE);
5372f713615SIlya Dryomov if (ret <= 0)
5382f713615SIlya Dryomov goto out;
539a56dd9bfSIlya Dryomov con->v1.out_skip -= ret;
5402f713615SIlya Dryomov }
5412f713615SIlya Dryomov ret = 1;
5422f713615SIlya Dryomov out:
5432f713615SIlya Dryomov return ret;
5442f713615SIlya Dryomov }
5452f713615SIlya Dryomov
5462f713615SIlya Dryomov /*
5472f713615SIlya Dryomov * Prepare to read connection handshake, or an ack.
5482f713615SIlya Dryomov */
prepare_read_banner(struct ceph_connection * con)5492f713615SIlya Dryomov static void prepare_read_banner(struct ceph_connection *con)
5502f713615SIlya Dryomov {
5512f713615SIlya Dryomov dout("prepare_read_banner %p\n", con);
552a56dd9bfSIlya Dryomov con->v1.in_base_pos = 0;
5532f713615SIlya Dryomov }
5542f713615SIlya Dryomov
prepare_read_connect(struct ceph_connection * con)5552f713615SIlya Dryomov static void prepare_read_connect(struct ceph_connection *con)
5562f713615SIlya Dryomov {
5572f713615SIlya Dryomov dout("prepare_read_connect %p\n", con);
558a56dd9bfSIlya Dryomov con->v1.in_base_pos = 0;
5592f713615SIlya Dryomov }
5602f713615SIlya Dryomov
prepare_read_ack(struct ceph_connection * con)5612f713615SIlya Dryomov static void prepare_read_ack(struct ceph_connection *con)
5622f713615SIlya Dryomov {
5632f713615SIlya Dryomov dout("prepare_read_ack %p\n", con);
564a56dd9bfSIlya Dryomov con->v1.in_base_pos = 0;
5652f713615SIlya Dryomov }
5662f713615SIlya Dryomov
prepare_read_seq(struct ceph_connection * con)5672f713615SIlya Dryomov static void prepare_read_seq(struct ceph_connection *con)
5682f713615SIlya Dryomov {
5692f713615SIlya Dryomov dout("prepare_read_seq %p\n", con);
570a56dd9bfSIlya Dryomov con->v1.in_base_pos = 0;
571a56dd9bfSIlya Dryomov con->v1.in_tag = CEPH_MSGR_TAG_SEQ;
5722f713615SIlya Dryomov }
5732f713615SIlya Dryomov
prepare_read_tag(struct ceph_connection * con)5742f713615SIlya Dryomov static void prepare_read_tag(struct ceph_connection *con)
5752f713615SIlya Dryomov {
5762f713615SIlya Dryomov dout("prepare_read_tag %p\n", con);
577a56dd9bfSIlya Dryomov con->v1.in_base_pos = 0;
578a56dd9bfSIlya Dryomov con->v1.in_tag = CEPH_MSGR_TAG_READY;
5792f713615SIlya Dryomov }
5802f713615SIlya Dryomov
prepare_read_keepalive_ack(struct ceph_connection * con)5812f713615SIlya Dryomov static void prepare_read_keepalive_ack(struct ceph_connection *con)
5822f713615SIlya Dryomov {
5832f713615SIlya Dryomov dout("prepare_read_keepalive_ack %p\n", con);
584a56dd9bfSIlya Dryomov con->v1.in_base_pos = 0;
5852f713615SIlya Dryomov }
5862f713615SIlya Dryomov
5872f713615SIlya Dryomov /*
5882f713615SIlya Dryomov * Prepare to read a message.
5892f713615SIlya Dryomov */
prepare_read_message(struct ceph_connection * con)5902f713615SIlya Dryomov static int prepare_read_message(struct ceph_connection *con)
5912f713615SIlya Dryomov {
5922f713615SIlya Dryomov dout("prepare_read_message %p\n", con);
5932f713615SIlya Dryomov BUG_ON(con->in_msg != NULL);
594a56dd9bfSIlya Dryomov con->v1.in_base_pos = 0;
5952f713615SIlya Dryomov con->in_front_crc = con->in_middle_crc = con->in_data_crc = 0;
5962f713615SIlya Dryomov return 0;
5972f713615SIlya Dryomov }
5982f713615SIlya Dryomov
read_partial(struct ceph_connection * con,int end,int size,void * object)5992f713615SIlya Dryomov static int read_partial(struct ceph_connection *con,
6002f713615SIlya Dryomov int end, int size, void *object)
6012f713615SIlya Dryomov {
602a56dd9bfSIlya Dryomov while (con->v1.in_base_pos < end) {
603a56dd9bfSIlya Dryomov int left = end - con->v1.in_base_pos;
6042f713615SIlya Dryomov int have = size - left;
6052f713615SIlya Dryomov int ret = ceph_tcp_recvmsg(con->sock, object + have, left);
6062f713615SIlya Dryomov if (ret <= 0)
6072f713615SIlya Dryomov return ret;
608a56dd9bfSIlya Dryomov con->v1.in_base_pos += ret;
6092f713615SIlya Dryomov }
6102f713615SIlya Dryomov return 1;
6112f713615SIlya Dryomov }
6122f713615SIlya Dryomov
6132f713615SIlya Dryomov /*
6142f713615SIlya Dryomov * Read all or part of the connect-side handshake on a new connection
6152f713615SIlya Dryomov */
read_partial_banner(struct ceph_connection * con)6162f713615SIlya Dryomov static int read_partial_banner(struct ceph_connection *con)
6172f713615SIlya Dryomov {
6182f713615SIlya Dryomov int size;
6192f713615SIlya Dryomov int end;
6202f713615SIlya Dryomov int ret;
6212f713615SIlya Dryomov
622a56dd9bfSIlya Dryomov dout("read_partial_banner %p at %d\n", con, con->v1.in_base_pos);
6232f713615SIlya Dryomov
6242f713615SIlya Dryomov /* peer's banner */
6252f713615SIlya Dryomov size = strlen(CEPH_BANNER);
6262f713615SIlya Dryomov end = size;
627a56dd9bfSIlya Dryomov ret = read_partial(con, end, size, con->v1.in_banner);
6282f713615SIlya Dryomov if (ret <= 0)
6292f713615SIlya Dryomov goto out;
6302f713615SIlya Dryomov
631a56dd9bfSIlya Dryomov size = sizeof(con->v1.actual_peer_addr);
6322f713615SIlya Dryomov end += size;
633a56dd9bfSIlya Dryomov ret = read_partial(con, end, size, &con->v1.actual_peer_addr);
6342f713615SIlya Dryomov if (ret <= 0)
6352f713615SIlya Dryomov goto out;
636a56dd9bfSIlya Dryomov ceph_decode_banner_addr(&con->v1.actual_peer_addr);
6372f713615SIlya Dryomov
638a56dd9bfSIlya Dryomov size = sizeof(con->v1.peer_addr_for_me);
6392f713615SIlya Dryomov end += size;
640a56dd9bfSIlya Dryomov ret = read_partial(con, end, size, &con->v1.peer_addr_for_me);
6412f713615SIlya Dryomov if (ret <= 0)
6422f713615SIlya Dryomov goto out;
643a56dd9bfSIlya Dryomov ceph_decode_banner_addr(&con->v1.peer_addr_for_me);
6442f713615SIlya Dryomov
6452f713615SIlya Dryomov out:
6462f713615SIlya Dryomov return ret;
6472f713615SIlya Dryomov }
6482f713615SIlya Dryomov
read_partial_connect(struct ceph_connection * con)6492f713615SIlya Dryomov static int read_partial_connect(struct ceph_connection *con)
6502f713615SIlya Dryomov {
6512f713615SIlya Dryomov int size;
6522f713615SIlya Dryomov int end;
6532f713615SIlya Dryomov int ret;
6542f713615SIlya Dryomov
655a56dd9bfSIlya Dryomov dout("read_partial_connect %p at %d\n", con, con->v1.in_base_pos);
6562f713615SIlya Dryomov
657a56dd9bfSIlya Dryomov size = sizeof(con->v1.in_reply);
6582f713615SIlya Dryomov end = size;
659a56dd9bfSIlya Dryomov ret = read_partial(con, end, size, &con->v1.in_reply);
6602f713615SIlya Dryomov if (ret <= 0)
6612f713615SIlya Dryomov goto out;
6622f713615SIlya Dryomov
663a56dd9bfSIlya Dryomov if (con->v1.auth) {
664a56dd9bfSIlya Dryomov size = le32_to_cpu(con->v1.in_reply.authorizer_len);
665a56dd9bfSIlya Dryomov if (size > con->v1.auth->authorizer_reply_buf_len) {
6662f713615SIlya Dryomov pr_err("authorizer reply too big: %d > %zu\n", size,
667a56dd9bfSIlya Dryomov con->v1.auth->authorizer_reply_buf_len);
6682f713615SIlya Dryomov ret = -EINVAL;
6692f713615SIlya Dryomov goto out;
6702f713615SIlya Dryomov }
6712f713615SIlya Dryomov
6722f713615SIlya Dryomov end += size;
6732f713615SIlya Dryomov ret = read_partial(con, end, size,
674a56dd9bfSIlya Dryomov con->v1.auth->authorizer_reply_buf);
6752f713615SIlya Dryomov if (ret <= 0)
6762f713615SIlya Dryomov goto out;
6772f713615SIlya Dryomov }
6782f713615SIlya Dryomov
6792f713615SIlya Dryomov dout("read_partial_connect %p tag %d, con_seq = %u, g_seq = %u\n",
680a56dd9bfSIlya Dryomov con, con->v1.in_reply.tag,
681a56dd9bfSIlya Dryomov le32_to_cpu(con->v1.in_reply.connect_seq),
682a56dd9bfSIlya Dryomov le32_to_cpu(con->v1.in_reply.global_seq));
6832f713615SIlya Dryomov out:
6842f713615SIlya Dryomov return ret;
6852f713615SIlya Dryomov }
6862f713615SIlya Dryomov
6872f713615SIlya Dryomov /*
6882f713615SIlya Dryomov * Verify the hello banner looks okay.
6892f713615SIlya Dryomov */
verify_hello(struct ceph_connection * con)6902f713615SIlya Dryomov static int verify_hello(struct ceph_connection *con)
6912f713615SIlya Dryomov {
692a56dd9bfSIlya Dryomov if (memcmp(con->v1.in_banner, CEPH_BANNER, strlen(CEPH_BANNER))) {
6932f713615SIlya Dryomov pr_err("connect to %s got bad banner\n",
6942f713615SIlya Dryomov ceph_pr_addr(&con->peer_addr));
6952f713615SIlya Dryomov con->error_msg = "protocol error, bad banner";
6962f713615SIlya Dryomov return -1;
6972f713615SIlya Dryomov }
6982f713615SIlya Dryomov return 0;
6992f713615SIlya Dryomov }
7002f713615SIlya Dryomov
process_banner(struct ceph_connection * con)7012f713615SIlya Dryomov static int process_banner(struct ceph_connection *con)
7022f713615SIlya Dryomov {
7032f713615SIlya Dryomov struct ceph_entity_addr *my_addr = &con->msgr->inst.addr;
7042f713615SIlya Dryomov
7052f713615SIlya Dryomov dout("process_banner on %p\n", con);
7062f713615SIlya Dryomov
7072f713615SIlya Dryomov if (verify_hello(con) < 0)
7082f713615SIlya Dryomov return -1;
7092f713615SIlya Dryomov
7102f713615SIlya Dryomov /*
7112f713615SIlya Dryomov * Make sure the other end is who we wanted. note that the other
7122f713615SIlya Dryomov * end may not yet know their ip address, so if it's 0.0.0.0, give
7132f713615SIlya Dryomov * them the benefit of the doubt.
7142f713615SIlya Dryomov */
715a56dd9bfSIlya Dryomov if (memcmp(&con->peer_addr, &con->v1.actual_peer_addr,
7162f713615SIlya Dryomov sizeof(con->peer_addr)) != 0 &&
717a56dd9bfSIlya Dryomov !(ceph_addr_is_blank(&con->v1.actual_peer_addr) &&
718a56dd9bfSIlya Dryomov con->v1.actual_peer_addr.nonce == con->peer_addr.nonce)) {
7192f713615SIlya Dryomov pr_warn("wrong peer, want %s/%u, got %s/%u\n",
7202f713615SIlya Dryomov ceph_pr_addr(&con->peer_addr),
7212f713615SIlya Dryomov le32_to_cpu(con->peer_addr.nonce),
722a56dd9bfSIlya Dryomov ceph_pr_addr(&con->v1.actual_peer_addr),
723a56dd9bfSIlya Dryomov le32_to_cpu(con->v1.actual_peer_addr.nonce));
7242f713615SIlya Dryomov con->error_msg = "wrong peer at address";
7252f713615SIlya Dryomov return -1;
7262f713615SIlya Dryomov }
7272f713615SIlya Dryomov
7282f713615SIlya Dryomov /*
7292f713615SIlya Dryomov * did we learn our address?
7302f713615SIlya Dryomov */
7312f713615SIlya Dryomov if (ceph_addr_is_blank(my_addr)) {
7322f713615SIlya Dryomov memcpy(&my_addr->in_addr,
733a56dd9bfSIlya Dryomov &con->v1.peer_addr_for_me.in_addr,
734a56dd9bfSIlya Dryomov sizeof(con->v1.peer_addr_for_me.in_addr));
7352f713615SIlya Dryomov ceph_addr_set_port(my_addr, 0);
7362f713615SIlya Dryomov ceph_encode_my_addr(con->msgr);
7372f713615SIlya Dryomov dout("process_banner learned my addr is %s\n",
7382f713615SIlya Dryomov ceph_pr_addr(my_addr));
7392f713615SIlya Dryomov }
7402f713615SIlya Dryomov
7412f713615SIlya Dryomov return 0;
7422f713615SIlya Dryomov }
7432f713615SIlya Dryomov
process_connect(struct ceph_connection * con)7442f713615SIlya Dryomov static int process_connect(struct ceph_connection *con)
7452f713615SIlya Dryomov {
7462f713615SIlya Dryomov u64 sup_feat = from_msgr(con->msgr)->supported_features;
7472f713615SIlya Dryomov u64 req_feat = from_msgr(con->msgr)->required_features;
748a56dd9bfSIlya Dryomov u64 server_feat = le64_to_cpu(con->v1.in_reply.features);
7492f713615SIlya Dryomov int ret;
7502f713615SIlya Dryomov
751a56dd9bfSIlya Dryomov dout("process_connect on %p tag %d\n", con, con->v1.in_tag);
7522f713615SIlya Dryomov
753a56dd9bfSIlya Dryomov if (con->v1.auth) {
754a56dd9bfSIlya Dryomov int len = le32_to_cpu(con->v1.in_reply.authorizer_len);
7552f713615SIlya Dryomov
7562f713615SIlya Dryomov /*
7572f713615SIlya Dryomov * Any connection that defines ->get_authorizer()
7582f713615SIlya Dryomov * should also define ->add_authorizer_challenge() and
7592f713615SIlya Dryomov * ->verify_authorizer_reply().
7602f713615SIlya Dryomov *
7612f713615SIlya Dryomov * See get_connect_authorizer().
7622f713615SIlya Dryomov */
763a56dd9bfSIlya Dryomov if (con->v1.in_reply.tag ==
764a56dd9bfSIlya Dryomov CEPH_MSGR_TAG_CHALLENGE_AUTHORIZER) {
7652f713615SIlya Dryomov ret = con->ops->add_authorizer_challenge(
766a56dd9bfSIlya Dryomov con, con->v1.auth->authorizer_reply_buf, len);
7672f713615SIlya Dryomov if (ret < 0)
7682f713615SIlya Dryomov return ret;
7692f713615SIlya Dryomov
7702f713615SIlya Dryomov con_out_kvec_reset(con);
7712f713615SIlya Dryomov __prepare_write_connect(con);
7722f713615SIlya Dryomov prepare_read_connect(con);
7732f713615SIlya Dryomov return 0;
7742f713615SIlya Dryomov }
7752f713615SIlya Dryomov
7762f713615SIlya Dryomov if (len) {
7772f713615SIlya Dryomov ret = con->ops->verify_authorizer_reply(con);
7782f713615SIlya Dryomov if (ret < 0) {
7792f713615SIlya Dryomov con->error_msg = "bad authorize reply";
7802f713615SIlya Dryomov return ret;
7812f713615SIlya Dryomov }
7822f713615SIlya Dryomov }
7832f713615SIlya Dryomov }
7842f713615SIlya Dryomov
785a56dd9bfSIlya Dryomov switch (con->v1.in_reply.tag) {
7862f713615SIlya Dryomov case CEPH_MSGR_TAG_FEATURES:
7872f713615SIlya Dryomov pr_err("%s%lld %s feature set mismatch,"
7882f713615SIlya Dryomov " my %llx < server's %llx, missing %llx\n",
7892f713615SIlya Dryomov ENTITY_NAME(con->peer_name),
7902f713615SIlya Dryomov ceph_pr_addr(&con->peer_addr),
7912f713615SIlya Dryomov sup_feat, server_feat, server_feat & ~sup_feat);
7922f713615SIlya Dryomov con->error_msg = "missing required protocol features";
7932f713615SIlya Dryomov return -1;
7942f713615SIlya Dryomov
7952f713615SIlya Dryomov case CEPH_MSGR_TAG_BADPROTOVER:
7962f713615SIlya Dryomov pr_err("%s%lld %s protocol version mismatch,"
7972f713615SIlya Dryomov " my %d != server's %d\n",
7982f713615SIlya Dryomov ENTITY_NAME(con->peer_name),
7992f713615SIlya Dryomov ceph_pr_addr(&con->peer_addr),
800a56dd9bfSIlya Dryomov le32_to_cpu(con->v1.out_connect.protocol_version),
801a56dd9bfSIlya Dryomov le32_to_cpu(con->v1.in_reply.protocol_version));
8022f713615SIlya Dryomov con->error_msg = "protocol version mismatch";
8032f713615SIlya Dryomov return -1;
8042f713615SIlya Dryomov
8052f713615SIlya Dryomov case CEPH_MSGR_TAG_BADAUTHORIZER:
806a56dd9bfSIlya Dryomov con->v1.auth_retry++;
8072f713615SIlya Dryomov dout("process_connect %p got BADAUTHORIZER attempt %d\n", con,
808a56dd9bfSIlya Dryomov con->v1.auth_retry);
809a56dd9bfSIlya Dryomov if (con->v1.auth_retry == 2) {
8102f713615SIlya Dryomov con->error_msg = "connect authorization failure";
8112f713615SIlya Dryomov return -1;
8122f713615SIlya Dryomov }
8132f713615SIlya Dryomov con_out_kvec_reset(con);
8142f713615SIlya Dryomov ret = prepare_write_connect(con);
8152f713615SIlya Dryomov if (ret < 0)
8162f713615SIlya Dryomov return ret;
8172f713615SIlya Dryomov prepare_read_connect(con);
8182f713615SIlya Dryomov break;
8192f713615SIlya Dryomov
8202f713615SIlya Dryomov case CEPH_MSGR_TAG_RESETSESSION:
8212f713615SIlya Dryomov /*
8222f713615SIlya Dryomov * If we connected with a large connect_seq but the peer
8232f713615SIlya Dryomov * has no record of a session with us (no connection, or
8242f713615SIlya Dryomov * connect_seq == 0), they will send RESETSESION to indicate
8252f713615SIlya Dryomov * that they must have reset their session, and may have
8262f713615SIlya Dryomov * dropped messages.
8272f713615SIlya Dryomov */
8282f713615SIlya Dryomov dout("process_connect got RESET peer seq %u\n",
829a56dd9bfSIlya Dryomov le32_to_cpu(con->v1.in_reply.connect_seq));
8302f713615SIlya Dryomov pr_info("%s%lld %s session reset\n",
8312f713615SIlya Dryomov ENTITY_NAME(con->peer_name),
8322f713615SIlya Dryomov ceph_pr_addr(&con->peer_addr));
8332f713615SIlya Dryomov ceph_con_reset_session(con);
8342f713615SIlya Dryomov con_out_kvec_reset(con);
8352f713615SIlya Dryomov ret = prepare_write_connect(con);
8362f713615SIlya Dryomov if (ret < 0)
8372f713615SIlya Dryomov return ret;
8382f713615SIlya Dryomov prepare_read_connect(con);
8392f713615SIlya Dryomov
8402f713615SIlya Dryomov /* Tell ceph about it. */
8412f713615SIlya Dryomov mutex_unlock(&con->mutex);
8422f713615SIlya Dryomov if (con->ops->peer_reset)
8432f713615SIlya Dryomov con->ops->peer_reset(con);
8442f713615SIlya Dryomov mutex_lock(&con->mutex);
8452f713615SIlya Dryomov if (con->state != CEPH_CON_S_V1_CONNECT_MSG)
8462f713615SIlya Dryomov return -EAGAIN;
8472f713615SIlya Dryomov break;
8482f713615SIlya Dryomov
8492f713615SIlya Dryomov case CEPH_MSGR_TAG_RETRY_SESSION:
8502f713615SIlya Dryomov /*
8512f713615SIlya Dryomov * If we sent a smaller connect_seq than the peer has, try
8522f713615SIlya Dryomov * again with a larger value.
8532f713615SIlya Dryomov */
8542f713615SIlya Dryomov dout("process_connect got RETRY_SESSION my seq %u, peer %u\n",
855a56dd9bfSIlya Dryomov le32_to_cpu(con->v1.out_connect.connect_seq),
856a56dd9bfSIlya Dryomov le32_to_cpu(con->v1.in_reply.connect_seq));
857a56dd9bfSIlya Dryomov con->v1.connect_seq = le32_to_cpu(con->v1.in_reply.connect_seq);
8582f713615SIlya Dryomov con_out_kvec_reset(con);
8592f713615SIlya Dryomov ret = prepare_write_connect(con);
8602f713615SIlya Dryomov if (ret < 0)
8612f713615SIlya Dryomov return ret;
8622f713615SIlya Dryomov prepare_read_connect(con);
8632f713615SIlya Dryomov break;
8642f713615SIlya Dryomov
8652f713615SIlya Dryomov case CEPH_MSGR_TAG_RETRY_GLOBAL:
8662f713615SIlya Dryomov /*
8672f713615SIlya Dryomov * If we sent a smaller global_seq than the peer has, try
8682f713615SIlya Dryomov * again with a larger value.
8692f713615SIlya Dryomov */
8702f713615SIlya Dryomov dout("process_connect got RETRY_GLOBAL my %u peer_gseq %u\n",
871a56dd9bfSIlya Dryomov con->v1.peer_global_seq,
872a56dd9bfSIlya Dryomov le32_to_cpu(con->v1.in_reply.global_seq));
8732f713615SIlya Dryomov ceph_get_global_seq(con->msgr,
874a56dd9bfSIlya Dryomov le32_to_cpu(con->v1.in_reply.global_seq));
8752f713615SIlya Dryomov con_out_kvec_reset(con);
8762f713615SIlya Dryomov ret = prepare_write_connect(con);
8772f713615SIlya Dryomov if (ret < 0)
8782f713615SIlya Dryomov return ret;
8792f713615SIlya Dryomov prepare_read_connect(con);
8802f713615SIlya Dryomov break;
8812f713615SIlya Dryomov
8822f713615SIlya Dryomov case CEPH_MSGR_TAG_SEQ:
8832f713615SIlya Dryomov case CEPH_MSGR_TAG_READY:
8842f713615SIlya Dryomov if (req_feat & ~server_feat) {
8852f713615SIlya Dryomov pr_err("%s%lld %s protocol feature mismatch,"
8862f713615SIlya Dryomov " my required %llx > server's %llx, need %llx\n",
8872f713615SIlya Dryomov ENTITY_NAME(con->peer_name),
8882f713615SIlya Dryomov ceph_pr_addr(&con->peer_addr),
8892f713615SIlya Dryomov req_feat, server_feat, req_feat & ~server_feat);
8902f713615SIlya Dryomov con->error_msg = "missing required protocol features";
8912f713615SIlya Dryomov return -1;
8922f713615SIlya Dryomov }
8932f713615SIlya Dryomov
8942f713615SIlya Dryomov WARN_ON(con->state != CEPH_CON_S_V1_CONNECT_MSG);
8952f713615SIlya Dryomov con->state = CEPH_CON_S_OPEN;
896a56dd9bfSIlya Dryomov con->v1.auth_retry = 0; /* we authenticated; clear flag */
897a56dd9bfSIlya Dryomov con->v1.peer_global_seq =
898a56dd9bfSIlya Dryomov le32_to_cpu(con->v1.in_reply.global_seq);
899a56dd9bfSIlya Dryomov con->v1.connect_seq++;
9002f713615SIlya Dryomov con->peer_features = server_feat;
9012f713615SIlya Dryomov dout("process_connect got READY gseq %d cseq %d (%d)\n",
902a56dd9bfSIlya Dryomov con->v1.peer_global_seq,
903a56dd9bfSIlya Dryomov le32_to_cpu(con->v1.in_reply.connect_seq),
904a56dd9bfSIlya Dryomov con->v1.connect_seq);
905a56dd9bfSIlya Dryomov WARN_ON(con->v1.connect_seq !=
906a56dd9bfSIlya Dryomov le32_to_cpu(con->v1.in_reply.connect_seq));
9072f713615SIlya Dryomov
908a56dd9bfSIlya Dryomov if (con->v1.in_reply.flags & CEPH_MSG_CONNECT_LOSSY)
9092f713615SIlya Dryomov ceph_con_flag_set(con, CEPH_CON_F_LOSSYTX);
9102f713615SIlya Dryomov
9112f713615SIlya Dryomov con->delay = 0; /* reset backoff memory */
9122f713615SIlya Dryomov
913a56dd9bfSIlya Dryomov if (con->v1.in_reply.tag == CEPH_MSGR_TAG_SEQ) {
9142f713615SIlya Dryomov prepare_write_seq(con);
9152f713615SIlya Dryomov prepare_read_seq(con);
9162f713615SIlya Dryomov } else {
9172f713615SIlya Dryomov prepare_read_tag(con);
9182f713615SIlya Dryomov }
9192f713615SIlya Dryomov break;
9202f713615SIlya Dryomov
9212f713615SIlya Dryomov case CEPH_MSGR_TAG_WAIT:
9222f713615SIlya Dryomov /*
9232f713615SIlya Dryomov * If there is a connection race (we are opening
9242f713615SIlya Dryomov * connections to each other), one of us may just have
9252f713615SIlya Dryomov * to WAIT. This shouldn't happen if we are the
9262f713615SIlya Dryomov * client.
9272f713615SIlya Dryomov */
9282f713615SIlya Dryomov con->error_msg = "protocol error, got WAIT as client";
9292f713615SIlya Dryomov return -1;
9302f713615SIlya Dryomov
9312f713615SIlya Dryomov default:
9322f713615SIlya Dryomov con->error_msg = "protocol error, garbage tag during connect";
9332f713615SIlya Dryomov return -1;
9342f713615SIlya Dryomov }
9352f713615SIlya Dryomov return 0;
9362f713615SIlya Dryomov }
9372f713615SIlya Dryomov
9382f713615SIlya Dryomov /*
9392f713615SIlya Dryomov * read (part of) an ack
9402f713615SIlya Dryomov */
read_partial_ack(struct ceph_connection * con)9412f713615SIlya Dryomov static int read_partial_ack(struct ceph_connection *con)
9422f713615SIlya Dryomov {
943a56dd9bfSIlya Dryomov int size = sizeof(con->v1.in_temp_ack);
9442f713615SIlya Dryomov int end = size;
9452f713615SIlya Dryomov
946a56dd9bfSIlya Dryomov return read_partial(con, end, size, &con->v1.in_temp_ack);
9472f713615SIlya Dryomov }
9482f713615SIlya Dryomov
9492f713615SIlya Dryomov /*
9502f713615SIlya Dryomov * We can finally discard anything that's been acked.
9512f713615SIlya Dryomov */
process_ack(struct ceph_connection * con)9522f713615SIlya Dryomov static void process_ack(struct ceph_connection *con)
9532f713615SIlya Dryomov {
954a56dd9bfSIlya Dryomov u64 ack = le64_to_cpu(con->v1.in_temp_ack);
9552f713615SIlya Dryomov
956a56dd9bfSIlya Dryomov if (con->v1.in_tag == CEPH_MSGR_TAG_ACK)
9572f713615SIlya Dryomov ceph_con_discard_sent(con, ack);
9582f713615SIlya Dryomov else
9592f713615SIlya Dryomov ceph_con_discard_requeued(con, ack);
9602f713615SIlya Dryomov
9612f713615SIlya Dryomov prepare_read_tag(con);
9622f713615SIlya Dryomov }
9632f713615SIlya Dryomov
read_partial_message_chunk(struct ceph_connection * con,struct kvec * section,unsigned int sec_len,u32 * crc)964d396f89dSJeff Layton static int read_partial_message_chunk(struct ceph_connection *con,
9652f713615SIlya Dryomov struct kvec *section,
9662f713615SIlya Dryomov unsigned int sec_len, u32 *crc)
9672f713615SIlya Dryomov {
9682f713615SIlya Dryomov int ret, left;
9692f713615SIlya Dryomov
9702f713615SIlya Dryomov BUG_ON(!section);
9712f713615SIlya Dryomov
9722f713615SIlya Dryomov while (section->iov_len < sec_len) {
9732f713615SIlya Dryomov BUG_ON(section->iov_base == NULL);
9742f713615SIlya Dryomov left = sec_len - section->iov_len;
9752f713615SIlya Dryomov ret = ceph_tcp_recvmsg(con->sock, (char *)section->iov_base +
9762f713615SIlya Dryomov section->iov_len, left);
9772f713615SIlya Dryomov if (ret <= 0)
9782f713615SIlya Dryomov return ret;
9792f713615SIlya Dryomov section->iov_len += ret;
9802f713615SIlya Dryomov }
9812f713615SIlya Dryomov if (section->iov_len == sec_len)
982d396f89dSJeff Layton *crc = crc32c(*crc, section->iov_base, section->iov_len);
9832f713615SIlya Dryomov
9842f713615SIlya Dryomov return 1;
9852f713615SIlya Dryomov }
9862f713615SIlya Dryomov
read_partial_message_section(struct ceph_connection * con,struct kvec * section,unsigned int sec_len,u32 * crc)987d396f89dSJeff Layton static inline int read_partial_message_section(struct ceph_connection *con,
988d396f89dSJeff Layton struct kvec *section,
989d396f89dSJeff Layton unsigned int sec_len, u32 *crc)
990d396f89dSJeff Layton {
991d396f89dSJeff Layton *crc = 0;
992d396f89dSJeff Layton return read_partial_message_chunk(con, section, sec_len, crc);
993d396f89dSJeff Layton }
994d396f89dSJeff Layton
read_partial_sparse_msg_extent(struct ceph_connection * con,u32 * crc)995ee97302fSXiubo Li static int read_partial_sparse_msg_extent(struct ceph_connection *con, u32 *crc)
996d396f89dSJeff Layton {
997d396f89dSJeff Layton struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
998d396f89dSJeff Layton bool do_bounce = ceph_test_opt(from_msgr(con->msgr), RXBOUNCE);
999d396f89dSJeff Layton
1000d396f89dSJeff Layton if (do_bounce && unlikely(!con->bounce_page)) {
1001d396f89dSJeff Layton con->bounce_page = alloc_page(GFP_NOIO);
1002d396f89dSJeff Layton if (!con->bounce_page) {
1003d396f89dSJeff Layton pr_err("failed to allocate bounce page\n");
1004d396f89dSJeff Layton return -ENOMEM;
1005d396f89dSJeff Layton }
1006d396f89dSJeff Layton }
1007d396f89dSJeff Layton
1008d396f89dSJeff Layton while (cursor->sr_resid > 0) {
1009d396f89dSJeff Layton struct page *page, *rpage;
1010d396f89dSJeff Layton size_t off, len;
1011d396f89dSJeff Layton int ret;
1012d396f89dSJeff Layton
1013d396f89dSJeff Layton page = ceph_msg_data_next(cursor, &off, &len);
1014d396f89dSJeff Layton rpage = do_bounce ? con->bounce_page : page;
1015d396f89dSJeff Layton
1016d396f89dSJeff Layton /* clamp to what remains in extent */
1017d396f89dSJeff Layton len = min_t(int, len, cursor->sr_resid);
1018d396f89dSJeff Layton ret = ceph_tcp_recvpage(con->sock, rpage, (int)off, len);
1019d396f89dSJeff Layton if (ret <= 0)
1020d396f89dSJeff Layton return ret;
1021d396f89dSJeff Layton *crc = ceph_crc32c_page(*crc, rpage, off, ret);
1022d396f89dSJeff Layton ceph_msg_data_advance(cursor, (size_t)ret);
1023d396f89dSJeff Layton cursor->sr_resid -= ret;
1024d396f89dSJeff Layton if (do_bounce)
1025d396f89dSJeff Layton memcpy_page(page, off, rpage, off, ret);
1026d396f89dSJeff Layton }
1027d396f89dSJeff Layton return 1;
1028d396f89dSJeff Layton }
1029d396f89dSJeff Layton
read_partial_sparse_msg_data(struct ceph_connection * con)1030ee97302fSXiubo Li static int read_partial_sparse_msg_data(struct ceph_connection *con)
1031d396f89dSJeff Layton {
1032d396f89dSJeff Layton struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
1033d396f89dSJeff Layton bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
1034d396f89dSJeff Layton u32 crc = 0;
1035d396f89dSJeff Layton int ret = 1;
1036d396f89dSJeff Layton
1037d396f89dSJeff Layton if (do_datacrc)
1038d396f89dSJeff Layton crc = con->in_data_crc;
1039d396f89dSJeff Layton
1040*8e46a2d0SXiubo Li while (cursor->total_resid) {
1041d396f89dSJeff Layton if (con->v1.in_sr_kvec.iov_base)
1042d396f89dSJeff Layton ret = read_partial_message_chunk(con,
1043d396f89dSJeff Layton &con->v1.in_sr_kvec,
1044d396f89dSJeff Layton con->v1.in_sr_len,
1045d396f89dSJeff Layton &crc);
1046d396f89dSJeff Layton else if (cursor->sr_resid > 0)
1047ee97302fSXiubo Li ret = read_partial_sparse_msg_extent(con, &crc);
1048*8e46a2d0SXiubo Li if (ret <= 0)
1049*8e46a2d0SXiubo Li break;
1050d396f89dSJeff Layton
1051d396f89dSJeff Layton memset(&con->v1.in_sr_kvec, 0, sizeof(con->v1.in_sr_kvec));
1052d396f89dSJeff Layton ret = con->ops->sparse_read(con, cursor,
1053d396f89dSJeff Layton (char **)&con->v1.in_sr_kvec.iov_base);
1054*8e46a2d0SXiubo Li if (ret <= 0) {
1055*8e46a2d0SXiubo Li ret = ret ? ret : 1; /* must return > 0 to indicate success */
1056*8e46a2d0SXiubo Li break;
1057*8e46a2d0SXiubo Li }
1058d396f89dSJeff Layton con->v1.in_sr_len = ret;
1059*8e46a2d0SXiubo Li }
1060d396f89dSJeff Layton
1061d396f89dSJeff Layton if (do_datacrc)
1062d396f89dSJeff Layton con->in_data_crc = crc;
1063d396f89dSJeff Layton
1064*8e46a2d0SXiubo Li return ret;
1065d396f89dSJeff Layton }
1066d396f89dSJeff Layton
read_partial_msg_data(struct ceph_connection * con)10672f713615SIlya Dryomov static int read_partial_msg_data(struct ceph_connection *con)
10682f713615SIlya Dryomov {
1069038b8d1dSIlya Dryomov struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
10702f713615SIlya Dryomov bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
10712f713615SIlya Dryomov struct page *page;
10722f713615SIlya Dryomov size_t page_offset;
10732f713615SIlya Dryomov size_t length;
10742f713615SIlya Dryomov u32 crc = 0;
10752f713615SIlya Dryomov int ret;
10762f713615SIlya Dryomov
10772f713615SIlya Dryomov if (do_datacrc)
10782f713615SIlya Dryomov crc = con->in_data_crc;
10792f713615SIlya Dryomov while (cursor->total_resid) {
10802f713615SIlya Dryomov if (!cursor->resid) {
10812f713615SIlya Dryomov ceph_msg_data_advance(cursor, 0);
10822f713615SIlya Dryomov continue;
10832f713615SIlya Dryomov }
10842f713615SIlya Dryomov
1085da4ab869SJeff Layton page = ceph_msg_data_next(cursor, &page_offset, &length);
10862f713615SIlya Dryomov ret = ceph_tcp_recvpage(con->sock, page, page_offset, length);
10872f713615SIlya Dryomov if (ret <= 0) {
10882f713615SIlya Dryomov if (do_datacrc)
10892f713615SIlya Dryomov con->in_data_crc = crc;
10902f713615SIlya Dryomov
10912f713615SIlya Dryomov return ret;
10922f713615SIlya Dryomov }
10932f713615SIlya Dryomov
10942f713615SIlya Dryomov if (do_datacrc)
10952f713615SIlya Dryomov crc = ceph_crc32c_page(crc, page, page_offset, ret);
10962f713615SIlya Dryomov ceph_msg_data_advance(cursor, (size_t)ret);
10972f713615SIlya Dryomov }
10982f713615SIlya Dryomov if (do_datacrc)
10992f713615SIlya Dryomov con->in_data_crc = crc;
11002f713615SIlya Dryomov
11012f713615SIlya Dryomov return 1; /* must return > 0 to indicate success */
11022f713615SIlya Dryomov }
11032f713615SIlya Dryomov
read_partial_msg_data_bounce(struct ceph_connection * con)1104038b8d1dSIlya Dryomov static int read_partial_msg_data_bounce(struct ceph_connection *con)
1105038b8d1dSIlya Dryomov {
1106038b8d1dSIlya Dryomov struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
1107038b8d1dSIlya Dryomov struct page *page;
1108038b8d1dSIlya Dryomov size_t off, len;
1109038b8d1dSIlya Dryomov u32 crc;
1110038b8d1dSIlya Dryomov int ret;
1111038b8d1dSIlya Dryomov
1112038b8d1dSIlya Dryomov if (unlikely(!con->bounce_page)) {
1113038b8d1dSIlya Dryomov con->bounce_page = alloc_page(GFP_NOIO);
1114038b8d1dSIlya Dryomov if (!con->bounce_page) {
1115038b8d1dSIlya Dryomov pr_err("failed to allocate bounce page\n");
1116038b8d1dSIlya Dryomov return -ENOMEM;
1117038b8d1dSIlya Dryomov }
1118038b8d1dSIlya Dryomov }
1119038b8d1dSIlya Dryomov
1120038b8d1dSIlya Dryomov crc = con->in_data_crc;
1121038b8d1dSIlya Dryomov while (cursor->total_resid) {
1122038b8d1dSIlya Dryomov if (!cursor->resid) {
1123038b8d1dSIlya Dryomov ceph_msg_data_advance(cursor, 0);
1124038b8d1dSIlya Dryomov continue;
1125038b8d1dSIlya Dryomov }
1126038b8d1dSIlya Dryomov
1127da4ab869SJeff Layton page = ceph_msg_data_next(cursor, &off, &len);
1128038b8d1dSIlya Dryomov ret = ceph_tcp_recvpage(con->sock, con->bounce_page, 0, len);
1129038b8d1dSIlya Dryomov if (ret <= 0) {
1130038b8d1dSIlya Dryomov con->in_data_crc = crc;
1131038b8d1dSIlya Dryomov return ret;
1132038b8d1dSIlya Dryomov }
1133038b8d1dSIlya Dryomov
1134038b8d1dSIlya Dryomov crc = crc32c(crc, page_address(con->bounce_page), ret);
1135038b8d1dSIlya Dryomov memcpy_to_page(page, off, page_address(con->bounce_page), ret);
1136038b8d1dSIlya Dryomov
1137038b8d1dSIlya Dryomov ceph_msg_data_advance(cursor, ret);
1138038b8d1dSIlya Dryomov }
1139038b8d1dSIlya Dryomov con->in_data_crc = crc;
1140038b8d1dSIlya Dryomov
1141038b8d1dSIlya Dryomov return 1; /* must return > 0 to indicate success */
1142038b8d1dSIlya Dryomov }
1143038b8d1dSIlya Dryomov
11442f713615SIlya Dryomov /*
11452f713615SIlya Dryomov * read (part of) a message.
11462f713615SIlya Dryomov */
read_partial_message(struct ceph_connection * con)11472f713615SIlya Dryomov static int read_partial_message(struct ceph_connection *con)
11482f713615SIlya Dryomov {
11492f713615SIlya Dryomov struct ceph_msg *m = con->in_msg;
11502f713615SIlya Dryomov int size;
11512f713615SIlya Dryomov int end;
11522f713615SIlya Dryomov int ret;
11532f713615SIlya Dryomov unsigned int front_len, middle_len, data_len;
11542f713615SIlya Dryomov bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
11552f713615SIlya Dryomov bool need_sign = (con->peer_features & CEPH_FEATURE_MSG_AUTH);
11562f713615SIlya Dryomov u64 seq;
11572f713615SIlya Dryomov u32 crc;
11582f713615SIlya Dryomov
11592f713615SIlya Dryomov dout("read_partial_message con %p msg %p\n", con, m);
11602f713615SIlya Dryomov
11612f713615SIlya Dryomov /* header */
1162a56dd9bfSIlya Dryomov size = sizeof(con->v1.in_hdr);
11632f713615SIlya Dryomov end = size;
1164a56dd9bfSIlya Dryomov ret = read_partial(con, end, size, &con->v1.in_hdr);
11652f713615SIlya Dryomov if (ret <= 0)
11662f713615SIlya Dryomov return ret;
11672f713615SIlya Dryomov
1168a56dd9bfSIlya Dryomov crc = crc32c(0, &con->v1.in_hdr, offsetof(struct ceph_msg_header, crc));
1169a56dd9bfSIlya Dryomov if (cpu_to_le32(crc) != con->v1.in_hdr.crc) {
11702f713615SIlya Dryomov pr_err("read_partial_message bad hdr crc %u != expected %u\n",
1171a56dd9bfSIlya Dryomov crc, con->v1.in_hdr.crc);
11722f713615SIlya Dryomov return -EBADMSG;
11732f713615SIlya Dryomov }
11742f713615SIlya Dryomov
1175a56dd9bfSIlya Dryomov front_len = le32_to_cpu(con->v1.in_hdr.front_len);
11762f713615SIlya Dryomov if (front_len > CEPH_MSG_MAX_FRONT_LEN)
11772f713615SIlya Dryomov return -EIO;
1178a56dd9bfSIlya Dryomov middle_len = le32_to_cpu(con->v1.in_hdr.middle_len);
11792f713615SIlya Dryomov if (middle_len > CEPH_MSG_MAX_MIDDLE_LEN)
11802f713615SIlya Dryomov return -EIO;
1181a56dd9bfSIlya Dryomov data_len = le32_to_cpu(con->v1.in_hdr.data_len);
11822f713615SIlya Dryomov if (data_len > CEPH_MSG_MAX_DATA_LEN)
11832f713615SIlya Dryomov return -EIO;
11842f713615SIlya Dryomov
11852f713615SIlya Dryomov /* verify seq# */
1186a56dd9bfSIlya Dryomov seq = le64_to_cpu(con->v1.in_hdr.seq);
11872f713615SIlya Dryomov if ((s64)seq - (s64)con->in_seq < 1) {
11882f713615SIlya Dryomov pr_info("skipping %s%lld %s seq %lld expected %lld\n",
11892f713615SIlya Dryomov ENTITY_NAME(con->peer_name),
11902f713615SIlya Dryomov ceph_pr_addr(&con->peer_addr),
11912f713615SIlya Dryomov seq, con->in_seq + 1);
1192a56dd9bfSIlya Dryomov con->v1.in_base_pos = -front_len - middle_len - data_len -
11932f713615SIlya Dryomov sizeof_footer(con);
1194a56dd9bfSIlya Dryomov con->v1.in_tag = CEPH_MSGR_TAG_READY;
11952f713615SIlya Dryomov return 1;
11962f713615SIlya Dryomov } else if ((s64)seq - (s64)con->in_seq > 1) {
11972f713615SIlya Dryomov pr_err("read_partial_message bad seq %lld expected %lld\n",
11982f713615SIlya Dryomov seq, con->in_seq + 1);
11992f713615SIlya Dryomov con->error_msg = "bad message sequence # for incoming message";
12002f713615SIlya Dryomov return -EBADE;
12012f713615SIlya Dryomov }
12022f713615SIlya Dryomov
12032f713615SIlya Dryomov /* allocate message? */
12042f713615SIlya Dryomov if (!con->in_msg) {
12052f713615SIlya Dryomov int skip = 0;
12062f713615SIlya Dryomov
1207a56dd9bfSIlya Dryomov dout("got hdr type %d front %d data %d\n", con->v1.in_hdr.type,
12082f713615SIlya Dryomov front_len, data_len);
1209a56dd9bfSIlya Dryomov ret = ceph_con_in_msg_alloc(con, &con->v1.in_hdr, &skip);
12102f713615SIlya Dryomov if (ret < 0)
12112f713615SIlya Dryomov return ret;
12122f713615SIlya Dryomov
12139d5ae6f3SIlya Dryomov BUG_ON((!con->in_msg) ^ skip);
12142f713615SIlya Dryomov if (skip) {
12152f713615SIlya Dryomov /* skip this message */
12162f713615SIlya Dryomov dout("alloc_msg said skip message\n");
1217a56dd9bfSIlya Dryomov con->v1.in_base_pos = -front_len - middle_len -
1218a56dd9bfSIlya Dryomov data_len - sizeof_footer(con);
1219a56dd9bfSIlya Dryomov con->v1.in_tag = CEPH_MSGR_TAG_READY;
12202f713615SIlya Dryomov con->in_seq++;
12212f713615SIlya Dryomov return 1;
12222f713615SIlya Dryomov }
12232f713615SIlya Dryomov
12242f713615SIlya Dryomov BUG_ON(!con->in_msg);
12252f713615SIlya Dryomov BUG_ON(con->in_msg->con != con);
12262f713615SIlya Dryomov m = con->in_msg;
12272f713615SIlya Dryomov m->front.iov_len = 0; /* haven't read it yet */
12282f713615SIlya Dryomov if (m->middle)
12292f713615SIlya Dryomov m->middle->vec.iov_len = 0;
12302f713615SIlya Dryomov
12312f713615SIlya Dryomov /* prepare for data payload, if any */
12322f713615SIlya Dryomov
12332f713615SIlya Dryomov if (data_len)
12342f713615SIlya Dryomov prepare_message_data(con->in_msg, data_len);
12352f713615SIlya Dryomov }
12362f713615SIlya Dryomov
12372f713615SIlya Dryomov /* front */
12382f713615SIlya Dryomov ret = read_partial_message_section(con, &m->front, front_len,
12392f713615SIlya Dryomov &con->in_front_crc);
12402f713615SIlya Dryomov if (ret <= 0)
12412f713615SIlya Dryomov return ret;
12422f713615SIlya Dryomov
12432f713615SIlya Dryomov /* middle */
12442f713615SIlya Dryomov if (m->middle) {
12452f713615SIlya Dryomov ret = read_partial_message_section(con, &m->middle->vec,
12462f713615SIlya Dryomov middle_len,
12472f713615SIlya Dryomov &con->in_middle_crc);
12482f713615SIlya Dryomov if (ret <= 0)
12492f713615SIlya Dryomov return ret;
12502f713615SIlya Dryomov }
12512f713615SIlya Dryomov
12522f713615SIlya Dryomov /* (page) data */
12532f713615SIlya Dryomov if (data_len) {
1254038b8d1dSIlya Dryomov if (!m->num_data_items)
1255038b8d1dSIlya Dryomov return -EIO;
1256038b8d1dSIlya Dryomov
1257*8e46a2d0SXiubo Li if (m->sparse_read_total)
1258ee97302fSXiubo Li ret = read_partial_sparse_msg_data(con);
1259d396f89dSJeff Layton else if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE))
1260038b8d1dSIlya Dryomov ret = read_partial_msg_data_bounce(con);
1261038b8d1dSIlya Dryomov else
12622f713615SIlya Dryomov ret = read_partial_msg_data(con);
12632f713615SIlya Dryomov if (ret <= 0)
12642f713615SIlya Dryomov return ret;
12652f713615SIlya Dryomov }
12662f713615SIlya Dryomov
12672f713615SIlya Dryomov /* footer */
12682f713615SIlya Dryomov size = sizeof_footer(con);
12692f713615SIlya Dryomov end += size;
12702f713615SIlya Dryomov ret = read_partial(con, end, size, &m->footer);
12712f713615SIlya Dryomov if (ret <= 0)
12722f713615SIlya Dryomov return ret;
12732f713615SIlya Dryomov
12742f713615SIlya Dryomov if (!need_sign) {
12752f713615SIlya Dryomov m->footer.flags = m->old_footer.flags;
12762f713615SIlya Dryomov m->footer.sig = 0;
12772f713615SIlya Dryomov }
12782f713615SIlya Dryomov
12792f713615SIlya Dryomov dout("read_partial_message got msg %p %d (%u) + %d (%u) + %d (%u)\n",
12802f713615SIlya Dryomov m, front_len, m->footer.front_crc, middle_len,
12812f713615SIlya Dryomov m->footer.middle_crc, data_len, m->footer.data_crc);
12822f713615SIlya Dryomov
12832f713615SIlya Dryomov /* crc ok? */
12842f713615SIlya Dryomov if (con->in_front_crc != le32_to_cpu(m->footer.front_crc)) {
12852f713615SIlya Dryomov pr_err("read_partial_message %p front crc %u != exp. %u\n",
12862f713615SIlya Dryomov m, con->in_front_crc, m->footer.front_crc);
12872f713615SIlya Dryomov return -EBADMSG;
12882f713615SIlya Dryomov }
12892f713615SIlya Dryomov if (con->in_middle_crc != le32_to_cpu(m->footer.middle_crc)) {
12902f713615SIlya Dryomov pr_err("read_partial_message %p middle crc %u != exp %u\n",
12912f713615SIlya Dryomov m, con->in_middle_crc, m->footer.middle_crc);
12922f713615SIlya Dryomov return -EBADMSG;
12932f713615SIlya Dryomov }
12942f713615SIlya Dryomov if (do_datacrc &&
12952f713615SIlya Dryomov (m->footer.flags & CEPH_MSG_FOOTER_NOCRC) == 0 &&
12962f713615SIlya Dryomov con->in_data_crc != le32_to_cpu(m->footer.data_crc)) {
12972f713615SIlya Dryomov pr_err("read_partial_message %p data crc %u != exp. %u\n", m,
12982f713615SIlya Dryomov con->in_data_crc, le32_to_cpu(m->footer.data_crc));
12992f713615SIlya Dryomov return -EBADMSG;
13002f713615SIlya Dryomov }
13012f713615SIlya Dryomov
13022f713615SIlya Dryomov if (need_sign && con->ops->check_message_signature &&
13032f713615SIlya Dryomov con->ops->check_message_signature(m)) {
13042f713615SIlya Dryomov pr_err("read_partial_message %p signature check failed\n", m);
13052f713615SIlya Dryomov return -EBADMSG;
13062f713615SIlya Dryomov }
13072f713615SIlya Dryomov
13082f713615SIlya Dryomov return 1; /* done! */
13092f713615SIlya Dryomov }
13102f713615SIlya Dryomov
read_keepalive_ack(struct ceph_connection * con)13112f713615SIlya Dryomov static int read_keepalive_ack(struct ceph_connection *con)
13122f713615SIlya Dryomov {
13132f713615SIlya Dryomov struct ceph_timespec ceph_ts;
13142f713615SIlya Dryomov size_t size = sizeof(ceph_ts);
13152f713615SIlya Dryomov int ret = read_partial(con, size, size, &ceph_ts);
13162f713615SIlya Dryomov if (ret <= 0)
13172f713615SIlya Dryomov return ret;
13182f713615SIlya Dryomov ceph_decode_timespec64(&con->last_keepalive_ack, &ceph_ts);
13192f713615SIlya Dryomov prepare_read_tag(con);
13202f713615SIlya Dryomov return 1;
13212f713615SIlya Dryomov }
13222f713615SIlya Dryomov
13232f713615SIlya Dryomov /*
13242f713615SIlya Dryomov * Read what we can from the socket.
13252f713615SIlya Dryomov */
ceph_con_v1_try_read(struct ceph_connection * con)13262f713615SIlya Dryomov int ceph_con_v1_try_read(struct ceph_connection *con)
13272f713615SIlya Dryomov {
13282f713615SIlya Dryomov int ret = -1;
13292f713615SIlya Dryomov
13302f713615SIlya Dryomov more:
13312f713615SIlya Dryomov dout("try_read start %p state %d\n", con, con->state);
13322f713615SIlya Dryomov if (con->state != CEPH_CON_S_V1_BANNER &&
13332f713615SIlya Dryomov con->state != CEPH_CON_S_V1_CONNECT_MSG &&
13342f713615SIlya Dryomov con->state != CEPH_CON_S_OPEN)
13352f713615SIlya Dryomov return 0;
13362f713615SIlya Dryomov
13372f713615SIlya Dryomov BUG_ON(!con->sock);
13382f713615SIlya Dryomov
1339a56dd9bfSIlya Dryomov dout("try_read tag %d in_base_pos %d\n", con->v1.in_tag,
1340a56dd9bfSIlya Dryomov con->v1.in_base_pos);
13412f713615SIlya Dryomov
13422f713615SIlya Dryomov if (con->state == CEPH_CON_S_V1_BANNER) {
13432f713615SIlya Dryomov ret = read_partial_banner(con);
13442f713615SIlya Dryomov if (ret <= 0)
13452f713615SIlya Dryomov goto out;
13462f713615SIlya Dryomov ret = process_banner(con);
13472f713615SIlya Dryomov if (ret < 0)
13482f713615SIlya Dryomov goto out;
13492f713615SIlya Dryomov
13502f713615SIlya Dryomov con->state = CEPH_CON_S_V1_CONNECT_MSG;
13512f713615SIlya Dryomov
13522f713615SIlya Dryomov /*
13532f713615SIlya Dryomov * Received banner is good, exchange connection info.
13542f713615SIlya Dryomov * Do not reset out_kvec, as sending our banner raced
13552f713615SIlya Dryomov * with receiving peer banner after connect completed.
13562f713615SIlya Dryomov */
13572f713615SIlya Dryomov ret = prepare_write_connect(con);
13582f713615SIlya Dryomov if (ret < 0)
13592f713615SIlya Dryomov goto out;
13602f713615SIlya Dryomov prepare_read_connect(con);
13612f713615SIlya Dryomov
13622f713615SIlya Dryomov /* Send connection info before awaiting response */
13632f713615SIlya Dryomov goto out;
13642f713615SIlya Dryomov }
13652f713615SIlya Dryomov
13662f713615SIlya Dryomov if (con->state == CEPH_CON_S_V1_CONNECT_MSG) {
13672f713615SIlya Dryomov ret = read_partial_connect(con);
13682f713615SIlya Dryomov if (ret <= 0)
13692f713615SIlya Dryomov goto out;
13702f713615SIlya Dryomov ret = process_connect(con);
13712f713615SIlya Dryomov if (ret < 0)
13722f713615SIlya Dryomov goto out;
13732f713615SIlya Dryomov goto more;
13742f713615SIlya Dryomov }
13752f713615SIlya Dryomov
13762f713615SIlya Dryomov WARN_ON(con->state != CEPH_CON_S_OPEN);
13772f713615SIlya Dryomov
1378a56dd9bfSIlya Dryomov if (con->v1.in_base_pos < 0) {
13792f713615SIlya Dryomov /*
13802f713615SIlya Dryomov * skipping + discarding content.
13812f713615SIlya Dryomov */
1382a56dd9bfSIlya Dryomov ret = ceph_tcp_recvmsg(con->sock, NULL, -con->v1.in_base_pos);
13832f713615SIlya Dryomov if (ret <= 0)
13842f713615SIlya Dryomov goto out;
1385a56dd9bfSIlya Dryomov dout("skipped %d / %d bytes\n", ret, -con->v1.in_base_pos);
1386a56dd9bfSIlya Dryomov con->v1.in_base_pos += ret;
1387a56dd9bfSIlya Dryomov if (con->v1.in_base_pos)
13882f713615SIlya Dryomov goto more;
13892f713615SIlya Dryomov }
1390a56dd9bfSIlya Dryomov if (con->v1.in_tag == CEPH_MSGR_TAG_READY) {
13912f713615SIlya Dryomov /*
13922f713615SIlya Dryomov * what's next?
13932f713615SIlya Dryomov */
1394a56dd9bfSIlya Dryomov ret = ceph_tcp_recvmsg(con->sock, &con->v1.in_tag, 1);
13952f713615SIlya Dryomov if (ret <= 0)
13962f713615SIlya Dryomov goto out;
1397a56dd9bfSIlya Dryomov dout("try_read got tag %d\n", con->v1.in_tag);
1398a56dd9bfSIlya Dryomov switch (con->v1.in_tag) {
13992f713615SIlya Dryomov case CEPH_MSGR_TAG_MSG:
14002f713615SIlya Dryomov prepare_read_message(con);
14012f713615SIlya Dryomov break;
14022f713615SIlya Dryomov case CEPH_MSGR_TAG_ACK:
14032f713615SIlya Dryomov prepare_read_ack(con);
14042f713615SIlya Dryomov break;
14052f713615SIlya Dryomov case CEPH_MSGR_TAG_KEEPALIVE2_ACK:
14062f713615SIlya Dryomov prepare_read_keepalive_ack(con);
14072f713615SIlya Dryomov break;
14082f713615SIlya Dryomov case CEPH_MSGR_TAG_CLOSE:
14092f713615SIlya Dryomov ceph_con_close_socket(con);
14102f713615SIlya Dryomov con->state = CEPH_CON_S_CLOSED;
14112f713615SIlya Dryomov goto out;
14122f713615SIlya Dryomov default:
14132f713615SIlya Dryomov goto bad_tag;
14142f713615SIlya Dryomov }
14152f713615SIlya Dryomov }
1416a56dd9bfSIlya Dryomov if (con->v1.in_tag == CEPH_MSGR_TAG_MSG) {
14172f713615SIlya Dryomov ret = read_partial_message(con);
14182f713615SIlya Dryomov if (ret <= 0) {
14192f713615SIlya Dryomov switch (ret) {
14202f713615SIlya Dryomov case -EBADMSG:
14212f713615SIlya Dryomov con->error_msg = "bad crc/signature";
14222f713615SIlya Dryomov fallthrough;
14232f713615SIlya Dryomov case -EBADE:
14242f713615SIlya Dryomov ret = -EIO;
14252f713615SIlya Dryomov break;
14262f713615SIlya Dryomov case -EIO:
14272f713615SIlya Dryomov con->error_msg = "io error";
14282f713615SIlya Dryomov break;
14292f713615SIlya Dryomov }
14302f713615SIlya Dryomov goto out;
14312f713615SIlya Dryomov }
1432a56dd9bfSIlya Dryomov if (con->v1.in_tag == CEPH_MSGR_TAG_READY)
14332f713615SIlya Dryomov goto more;
14342f713615SIlya Dryomov ceph_con_process_message(con);
14352f713615SIlya Dryomov if (con->state == CEPH_CON_S_OPEN)
14362f713615SIlya Dryomov prepare_read_tag(con);
14372f713615SIlya Dryomov goto more;
14382f713615SIlya Dryomov }
1439a56dd9bfSIlya Dryomov if (con->v1.in_tag == CEPH_MSGR_TAG_ACK ||
1440a56dd9bfSIlya Dryomov con->v1.in_tag == CEPH_MSGR_TAG_SEQ) {
14412f713615SIlya Dryomov /*
14422f713615SIlya Dryomov * the final handshake seq exchange is semantically
14432f713615SIlya Dryomov * equivalent to an ACK
14442f713615SIlya Dryomov */
14452f713615SIlya Dryomov ret = read_partial_ack(con);
14462f713615SIlya Dryomov if (ret <= 0)
14472f713615SIlya Dryomov goto out;
14482f713615SIlya Dryomov process_ack(con);
14492f713615SIlya Dryomov goto more;
14502f713615SIlya Dryomov }
1451a56dd9bfSIlya Dryomov if (con->v1.in_tag == CEPH_MSGR_TAG_KEEPALIVE2_ACK) {
14522f713615SIlya Dryomov ret = read_keepalive_ack(con);
14532f713615SIlya Dryomov if (ret <= 0)
14542f713615SIlya Dryomov goto out;
14552f713615SIlya Dryomov goto more;
14562f713615SIlya Dryomov }
14572f713615SIlya Dryomov
14582f713615SIlya Dryomov out:
14592f713615SIlya Dryomov dout("try_read done on %p ret %d\n", con, ret);
14602f713615SIlya Dryomov return ret;
14612f713615SIlya Dryomov
14622f713615SIlya Dryomov bad_tag:
1463a56dd9bfSIlya Dryomov pr_err("try_read bad tag %d\n", con->v1.in_tag);
14642f713615SIlya Dryomov con->error_msg = "protocol error, garbage tag";
14652f713615SIlya Dryomov ret = -1;
14662f713615SIlya Dryomov goto out;
14672f713615SIlya Dryomov }
14682f713615SIlya Dryomov
14692f713615SIlya Dryomov /*
14702f713615SIlya Dryomov * Write something to the socket. Called in a worker thread when the
14712f713615SIlya Dryomov * socket appears to be writeable and we have something ready to send.
14722f713615SIlya Dryomov */
ceph_con_v1_try_write(struct ceph_connection * con)14732f713615SIlya Dryomov int ceph_con_v1_try_write(struct ceph_connection *con)
14742f713615SIlya Dryomov {
14752f713615SIlya Dryomov int ret = 1;
14762f713615SIlya Dryomov
14772f713615SIlya Dryomov dout("try_write start %p state %d\n", con, con->state);
14782f713615SIlya Dryomov if (con->state != CEPH_CON_S_PREOPEN &&
14792f713615SIlya Dryomov con->state != CEPH_CON_S_V1_BANNER &&
14802f713615SIlya Dryomov con->state != CEPH_CON_S_V1_CONNECT_MSG &&
14812f713615SIlya Dryomov con->state != CEPH_CON_S_OPEN)
14822f713615SIlya Dryomov return 0;
14832f713615SIlya Dryomov
14842f713615SIlya Dryomov /* open the socket first? */
14852f713615SIlya Dryomov if (con->state == CEPH_CON_S_PREOPEN) {
14862f713615SIlya Dryomov BUG_ON(con->sock);
14872f713615SIlya Dryomov con->state = CEPH_CON_S_V1_BANNER;
14882f713615SIlya Dryomov
14892f713615SIlya Dryomov con_out_kvec_reset(con);
14902f713615SIlya Dryomov prepare_write_banner(con);
14912f713615SIlya Dryomov prepare_read_banner(con);
14922f713615SIlya Dryomov
14932f713615SIlya Dryomov BUG_ON(con->in_msg);
1494a56dd9bfSIlya Dryomov con->v1.in_tag = CEPH_MSGR_TAG_READY;
14952f713615SIlya Dryomov dout("try_write initiating connect on %p new state %d\n",
14962f713615SIlya Dryomov con, con->state);
14972f713615SIlya Dryomov ret = ceph_tcp_connect(con);
14982f713615SIlya Dryomov if (ret < 0) {
14992f713615SIlya Dryomov con->error_msg = "connect error";
15002f713615SIlya Dryomov goto out;
15012f713615SIlya Dryomov }
15022f713615SIlya Dryomov }
15032f713615SIlya Dryomov
15042f713615SIlya Dryomov more:
1505a56dd9bfSIlya Dryomov dout("try_write out_kvec_bytes %d\n", con->v1.out_kvec_bytes);
15062f713615SIlya Dryomov BUG_ON(!con->sock);
15072f713615SIlya Dryomov
15082f713615SIlya Dryomov /* kvec data queued? */
1509a56dd9bfSIlya Dryomov if (con->v1.out_kvec_left) {
15102f713615SIlya Dryomov ret = write_partial_kvec(con);
15112f713615SIlya Dryomov if (ret <= 0)
15122f713615SIlya Dryomov goto out;
15132f713615SIlya Dryomov }
1514a56dd9bfSIlya Dryomov if (con->v1.out_skip) {
15152f713615SIlya Dryomov ret = write_partial_skip(con);
15162f713615SIlya Dryomov if (ret <= 0)
15172f713615SIlya Dryomov goto out;
15182f713615SIlya Dryomov }
15192f713615SIlya Dryomov
15202f713615SIlya Dryomov /* msg pages? */
15212f713615SIlya Dryomov if (con->out_msg) {
1522a56dd9bfSIlya Dryomov if (con->v1.out_msg_done) {
15232f713615SIlya Dryomov ceph_msg_put(con->out_msg);
15242f713615SIlya Dryomov con->out_msg = NULL; /* we're done with this one */
15252f713615SIlya Dryomov goto do_next;
15262f713615SIlya Dryomov }
15272f713615SIlya Dryomov
15282f713615SIlya Dryomov ret = write_partial_message_data(con);
15292f713615SIlya Dryomov if (ret == 1)
15302f713615SIlya Dryomov goto more; /* we need to send the footer, too! */
15312f713615SIlya Dryomov if (ret == 0)
15322f713615SIlya Dryomov goto out;
15332f713615SIlya Dryomov if (ret < 0) {
15342f713615SIlya Dryomov dout("try_write write_partial_message_data err %d\n",
15352f713615SIlya Dryomov ret);
15362f713615SIlya Dryomov goto out;
15372f713615SIlya Dryomov }
15382f713615SIlya Dryomov }
15392f713615SIlya Dryomov
15402f713615SIlya Dryomov do_next:
15412f713615SIlya Dryomov if (con->state == CEPH_CON_S_OPEN) {
15422f713615SIlya Dryomov if (ceph_con_flag_test_and_clear(con,
15432f713615SIlya Dryomov CEPH_CON_F_KEEPALIVE_PENDING)) {
15442f713615SIlya Dryomov prepare_write_keepalive(con);
15452f713615SIlya Dryomov goto more;
15462f713615SIlya Dryomov }
15472f713615SIlya Dryomov /* is anything else pending? */
15482f713615SIlya Dryomov if (!list_empty(&con->out_queue)) {
15492f713615SIlya Dryomov prepare_write_message(con);
15502f713615SIlya Dryomov goto more;
15512f713615SIlya Dryomov }
15522f713615SIlya Dryomov if (con->in_seq > con->in_seq_acked) {
15532f713615SIlya Dryomov prepare_write_ack(con);
15542f713615SIlya Dryomov goto more;
15552f713615SIlya Dryomov }
15562f713615SIlya Dryomov }
15572f713615SIlya Dryomov
15582f713615SIlya Dryomov /* Nothing to do! */
15592f713615SIlya Dryomov ceph_con_flag_clear(con, CEPH_CON_F_WRITE_PENDING);
15602f713615SIlya Dryomov dout("try_write nothing else to write.\n");
15612f713615SIlya Dryomov ret = 0;
15622f713615SIlya Dryomov out:
15632f713615SIlya Dryomov dout("try_write done on %p ret %d\n", con, ret);
15642f713615SIlya Dryomov return ret;
15652f713615SIlya Dryomov }
15662f713615SIlya Dryomov
ceph_con_v1_revoke(struct ceph_connection * con)15672f713615SIlya Dryomov void ceph_con_v1_revoke(struct ceph_connection *con)
15682f713615SIlya Dryomov {
15692f713615SIlya Dryomov struct ceph_msg *msg = con->out_msg;
15702f713615SIlya Dryomov
1571a56dd9bfSIlya Dryomov WARN_ON(con->v1.out_skip);
15722f713615SIlya Dryomov /* footer */
1573a56dd9bfSIlya Dryomov if (con->v1.out_msg_done) {
1574a56dd9bfSIlya Dryomov con->v1.out_skip += con_out_kvec_skip(con);
15752f713615SIlya Dryomov } else {
15762f713615SIlya Dryomov WARN_ON(!msg->data_length);
1577a56dd9bfSIlya Dryomov con->v1.out_skip += sizeof_footer(con);
15782f713615SIlya Dryomov }
15792f713615SIlya Dryomov /* data, middle, front */
15802f713615SIlya Dryomov if (msg->data_length)
1581a56dd9bfSIlya Dryomov con->v1.out_skip += msg->cursor.total_resid;
15822f713615SIlya Dryomov if (msg->middle)
1583a56dd9bfSIlya Dryomov con->v1.out_skip += con_out_kvec_skip(con);
1584a56dd9bfSIlya Dryomov con->v1.out_skip += con_out_kvec_skip(con);
15852f713615SIlya Dryomov
15862f713615SIlya Dryomov dout("%s con %p out_kvec_bytes %d out_skip %d\n", __func__, con,
1587a56dd9bfSIlya Dryomov con->v1.out_kvec_bytes, con->v1.out_skip);
15882f713615SIlya Dryomov }
15892f713615SIlya Dryomov
ceph_con_v1_revoke_incoming(struct ceph_connection * con)15902f713615SIlya Dryomov void ceph_con_v1_revoke_incoming(struct ceph_connection *con)
15912f713615SIlya Dryomov {
1592a56dd9bfSIlya Dryomov unsigned int front_len = le32_to_cpu(con->v1.in_hdr.front_len);
1593a56dd9bfSIlya Dryomov unsigned int middle_len = le32_to_cpu(con->v1.in_hdr.middle_len);
1594a56dd9bfSIlya Dryomov unsigned int data_len = le32_to_cpu(con->v1.in_hdr.data_len);
15952f713615SIlya Dryomov
15962f713615SIlya Dryomov /* skip rest of message */
1597a56dd9bfSIlya Dryomov con->v1.in_base_pos = con->v1.in_base_pos -
15982f713615SIlya Dryomov sizeof(struct ceph_msg_header) -
15992f713615SIlya Dryomov front_len -
16002f713615SIlya Dryomov middle_len -
16012f713615SIlya Dryomov data_len -
16022f713615SIlya Dryomov sizeof(struct ceph_msg_footer);
16032f713615SIlya Dryomov
1604a56dd9bfSIlya Dryomov con->v1.in_tag = CEPH_MSGR_TAG_READY;
16052f713615SIlya Dryomov con->in_seq++;
16062f713615SIlya Dryomov
1607a56dd9bfSIlya Dryomov dout("%s con %p in_base_pos %d\n", __func__, con, con->v1.in_base_pos);
16082f713615SIlya Dryomov }
16092f713615SIlya Dryomov
ceph_con_v1_opened(struct ceph_connection * con)16102f713615SIlya Dryomov bool ceph_con_v1_opened(struct ceph_connection *con)
16112f713615SIlya Dryomov {
1612a56dd9bfSIlya Dryomov return con->v1.connect_seq;
16132f713615SIlya Dryomov }
16142f713615SIlya Dryomov
ceph_con_v1_reset_session(struct ceph_connection * con)16152f713615SIlya Dryomov void ceph_con_v1_reset_session(struct ceph_connection *con)
16162f713615SIlya Dryomov {
1617a56dd9bfSIlya Dryomov con->v1.connect_seq = 0;
1618a56dd9bfSIlya Dryomov con->v1.peer_global_seq = 0;
16192f713615SIlya Dryomov }
16202f713615SIlya Dryomov
ceph_con_v1_reset_protocol(struct ceph_connection * con)16212f713615SIlya Dryomov void ceph_con_v1_reset_protocol(struct ceph_connection *con)
16222f713615SIlya Dryomov {
1623a56dd9bfSIlya Dryomov con->v1.out_skip = 0;
16242f713615SIlya Dryomov }
1625