xref: /linux/net/ceph/messenger_v1.c (revision 79790b6818e96c58fe2bffee1b418c16e64e7b80)
12f713615SIlya Dryomov // SPDX-License-Identifier: GPL-2.0
22f713615SIlya Dryomov #include <linux/ceph/ceph_debug.h>
32f713615SIlya Dryomov 
42f713615SIlya Dryomov #include <linux/bvec.h>
52f713615SIlya Dryomov #include <linux/crc32c.h>
62f713615SIlya Dryomov #include <linux/net.h>
72f713615SIlya Dryomov #include <linux/socket.h>
82f713615SIlya Dryomov #include <net/sock.h>
92f713615SIlya Dryomov 
102f713615SIlya Dryomov #include <linux/ceph/ceph_features.h>
112f713615SIlya Dryomov #include <linux/ceph/decode.h>
122f713615SIlya Dryomov #include <linux/ceph/libceph.h>
132f713615SIlya Dryomov #include <linux/ceph/messenger.h>
142f713615SIlya Dryomov 
152f713615SIlya Dryomov /* static tag bytes (protocol control messages) */
162f713615SIlya Dryomov static char tag_msg = CEPH_MSGR_TAG_MSG;
172f713615SIlya Dryomov static char tag_ack = CEPH_MSGR_TAG_ACK;
182f713615SIlya Dryomov static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE;
192f713615SIlya Dryomov static char tag_keepalive2 = CEPH_MSGR_TAG_KEEPALIVE2;
202f713615SIlya Dryomov 
212f713615SIlya Dryomov /*
222f713615SIlya Dryomov  * If @buf is NULL, discard up to @len bytes.
232f713615SIlya Dryomov  */
ceph_tcp_recvmsg(struct socket * sock,void * buf,size_t len)242f713615SIlya Dryomov static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len)
252f713615SIlya Dryomov {
262f713615SIlya Dryomov 	struct kvec iov = {buf, len};
272f713615SIlya Dryomov 	struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
282f713615SIlya Dryomov 	int r;
292f713615SIlya Dryomov 
302f713615SIlya Dryomov 	if (!buf)
312f713615SIlya Dryomov 		msg.msg_flags |= MSG_TRUNC;
322f713615SIlya Dryomov 
33de4eda9dSAl Viro 	iov_iter_kvec(&msg.msg_iter, ITER_DEST, &iov, 1, len);
342f713615SIlya Dryomov 	r = sock_recvmsg(sock, &msg, msg.msg_flags);
352f713615SIlya Dryomov 	if (r == -EAGAIN)
362f713615SIlya Dryomov 		r = 0;
372f713615SIlya Dryomov 	return r;
382f713615SIlya Dryomov }
392f713615SIlya Dryomov 
ceph_tcp_recvpage(struct socket * sock,struct page * page,int page_offset,size_t length)402f713615SIlya Dryomov static int ceph_tcp_recvpage(struct socket *sock, struct page *page,
412f713615SIlya Dryomov 		     int page_offset, size_t length)
422f713615SIlya Dryomov {
431eb9cd15SChristoph Hellwig 	struct bio_vec bvec;
442f713615SIlya Dryomov 	struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
452f713615SIlya Dryomov 	int r;
462f713615SIlya Dryomov 
472f713615SIlya Dryomov 	BUG_ON(page_offset + length > PAGE_SIZE);
481eb9cd15SChristoph Hellwig 	bvec_set_page(&bvec, page, length, page_offset);
49de4eda9dSAl Viro 	iov_iter_bvec(&msg.msg_iter, ITER_DEST, &bvec, 1, length);
502f713615SIlya Dryomov 	r = sock_recvmsg(sock, &msg, msg.msg_flags);
512f713615SIlya Dryomov 	if (r == -EAGAIN)
522f713615SIlya Dryomov 		r = 0;
532f713615SIlya Dryomov 	return r;
542f713615SIlya Dryomov }
552f713615SIlya Dryomov 
562f713615SIlya Dryomov /*
572f713615SIlya Dryomov  * write something.  @more is true if caller will be sending more data
582f713615SIlya Dryomov  * shortly.
592f713615SIlya Dryomov  */
ceph_tcp_sendmsg(struct socket * sock,struct kvec * iov,size_t kvlen,size_t len,bool more)602f713615SIlya Dryomov static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov,
612f713615SIlya Dryomov 			    size_t kvlen, size_t len, bool more)
622f713615SIlya Dryomov {
632f713615SIlya Dryomov 	struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
642f713615SIlya Dryomov 	int r;
652f713615SIlya Dryomov 
662f713615SIlya Dryomov 	if (more)
672f713615SIlya Dryomov 		msg.msg_flags |= MSG_MORE;
682f713615SIlya Dryomov 	else
692f713615SIlya Dryomov 		msg.msg_flags |= MSG_EOR;  /* superfluous, but what the hell */
702f713615SIlya Dryomov 
712f713615SIlya Dryomov 	r = kernel_sendmsg(sock, &msg, iov, kvlen, len);
722f713615SIlya Dryomov 	if (r == -EAGAIN)
732f713615SIlya Dryomov 		r = 0;
742f713615SIlya Dryomov 	return r;
752f713615SIlya Dryomov }
762f713615SIlya Dryomov 
775da4d7b8SDavid Howells /*
785da4d7b8SDavid Howells  * @more: MSG_MORE or 0.
795da4d7b8SDavid Howells  */
ceph_tcp_sendpage(struct socket * sock,struct page * page,int offset,size_t size,int more)805da4d7b8SDavid Howells static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
815da4d7b8SDavid Howells 			     int offset, size_t size, int more)
825da4d7b8SDavid Howells {
835da4d7b8SDavid Howells 	struct msghdr msg = {
845da4d7b8SDavid Howells 		.msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL | more,
855da4d7b8SDavid Howells 	};
865da4d7b8SDavid Howells 	struct bio_vec bvec;
875da4d7b8SDavid Howells 	int ret;
885da4d7b8SDavid Howells 
895da4d7b8SDavid Howells 	/*
905da4d7b8SDavid Howells 	 * MSG_SPLICE_PAGES cannot properly handle pages with page_count == 0,
915da4d7b8SDavid Howells 	 * we need to fall back to sendmsg if that's the case.
925da4d7b8SDavid Howells 	 *
935da4d7b8SDavid Howells 	 * Same goes for slab pages: skb_can_coalesce() allows
945da4d7b8SDavid Howells 	 * coalescing neighboring slab objects into a single frag which
955da4d7b8SDavid Howells 	 * triggers one of hardened usercopy checks.
965da4d7b8SDavid Howells 	 */
975da4d7b8SDavid Howells 	if (sendpage_ok(page))
985da4d7b8SDavid Howells 		msg.msg_flags |= MSG_SPLICE_PAGES;
995da4d7b8SDavid Howells 
1005da4d7b8SDavid Howells 	bvec_set_page(&bvec, page, size, offset);
1015da4d7b8SDavid Howells 	iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
1025da4d7b8SDavid Howells 
1035da4d7b8SDavid Howells 	ret = sock_sendmsg(sock, &msg);
1045da4d7b8SDavid Howells 	if (ret == -EAGAIN)
1055da4d7b8SDavid Howells 		ret = 0;
1065da4d7b8SDavid Howells 
1075da4d7b8SDavid Howells 	return ret;
1085da4d7b8SDavid Howells }
1095da4d7b8SDavid Howells 
con_out_kvec_reset(struct ceph_connection * con)1102f713615SIlya Dryomov static void con_out_kvec_reset(struct ceph_connection *con)
1112f713615SIlya Dryomov {
112a56dd9bfSIlya Dryomov 	BUG_ON(con->v1.out_skip);
1132f713615SIlya Dryomov 
114a56dd9bfSIlya Dryomov 	con->v1.out_kvec_left = 0;
115a56dd9bfSIlya Dryomov 	con->v1.out_kvec_bytes = 0;
116a56dd9bfSIlya Dryomov 	con->v1.out_kvec_cur = &con->v1.out_kvec[0];
1172f713615SIlya Dryomov }
1182f713615SIlya Dryomov 
con_out_kvec_add(struct ceph_connection * con,size_t size,void * data)1192f713615SIlya Dryomov static void con_out_kvec_add(struct ceph_connection *con,
1202f713615SIlya Dryomov 				size_t size, void *data)
1212f713615SIlya Dryomov {
122a56dd9bfSIlya Dryomov 	int index = con->v1.out_kvec_left;
1232f713615SIlya Dryomov 
124a56dd9bfSIlya Dryomov 	BUG_ON(con->v1.out_skip);
125a56dd9bfSIlya Dryomov 	BUG_ON(index >= ARRAY_SIZE(con->v1.out_kvec));
1262f713615SIlya Dryomov 
127a56dd9bfSIlya Dryomov 	con->v1.out_kvec[index].iov_len = size;
128a56dd9bfSIlya Dryomov 	con->v1.out_kvec[index].iov_base = data;
129a56dd9bfSIlya Dryomov 	con->v1.out_kvec_left++;
130a56dd9bfSIlya Dryomov 	con->v1.out_kvec_bytes += size;
1312f713615SIlya Dryomov }
1322f713615SIlya Dryomov 
1332f713615SIlya Dryomov /*
1342f713615SIlya Dryomov  * Chop off a kvec from the end.  Return residual number of bytes for
1352f713615SIlya Dryomov  * that kvec, i.e. how many bytes would have been written if the kvec
1362f713615SIlya Dryomov  * hadn't been nuked.
1372f713615SIlya Dryomov  */
con_out_kvec_skip(struct ceph_connection * con)1382f713615SIlya Dryomov static int con_out_kvec_skip(struct ceph_connection *con)
1392f713615SIlya Dryomov {
1402f713615SIlya Dryomov 	int skip = 0;
1412f713615SIlya Dryomov 
142a56dd9bfSIlya Dryomov 	if (con->v1.out_kvec_bytes > 0) {
143a56dd9bfSIlya Dryomov 		skip = con->v1.out_kvec_cur[con->v1.out_kvec_left - 1].iov_len;
144a56dd9bfSIlya Dryomov 		BUG_ON(con->v1.out_kvec_bytes < skip);
145a56dd9bfSIlya Dryomov 		BUG_ON(!con->v1.out_kvec_left);
146a56dd9bfSIlya Dryomov 		con->v1.out_kvec_bytes -= skip;
147a56dd9bfSIlya Dryomov 		con->v1.out_kvec_left--;
1482f713615SIlya Dryomov 	}
1492f713615SIlya Dryomov 
1502f713615SIlya Dryomov 	return skip;
1512f713615SIlya Dryomov }
1522f713615SIlya Dryomov 
sizeof_footer(struct ceph_connection * con)1532f713615SIlya Dryomov static size_t sizeof_footer(struct ceph_connection *con)
1542f713615SIlya Dryomov {
1552f713615SIlya Dryomov 	return (con->peer_features & CEPH_FEATURE_MSG_AUTH) ?
1562f713615SIlya Dryomov 	    sizeof(struct ceph_msg_footer) :
1572f713615SIlya Dryomov 	    sizeof(struct ceph_msg_footer_old);
1582f713615SIlya Dryomov }
1592f713615SIlya Dryomov 
prepare_message_data(struct ceph_msg * msg,u32 data_len)1602f713615SIlya Dryomov static void prepare_message_data(struct ceph_msg *msg, u32 data_len)
1612f713615SIlya Dryomov {
162d396f89dSJeff Layton 	/* Initialize data cursor if it's not a sparse read */
163*8e46a2d0SXiubo Li 	u64 len = msg->sparse_read_total ? : data_len;
164*8e46a2d0SXiubo Li 
165*8e46a2d0SXiubo Li 	ceph_msg_data_cursor_init(&msg->cursor, msg, len);
1662f713615SIlya Dryomov }
1672f713615SIlya Dryomov 
1682f713615SIlya Dryomov /*
1692f713615SIlya Dryomov  * Prepare footer for currently outgoing message, and finish things
1702f713615SIlya Dryomov  * off.  Assumes out_kvec* are already valid.. we just add on to the end.
1712f713615SIlya Dryomov  */
prepare_write_message_footer(struct ceph_connection * con)1722f713615SIlya Dryomov static void prepare_write_message_footer(struct ceph_connection *con)
1732f713615SIlya Dryomov {
1742f713615SIlya Dryomov 	struct ceph_msg *m = con->out_msg;
1752f713615SIlya Dryomov 
1762f713615SIlya Dryomov 	m->footer.flags |= CEPH_MSG_FOOTER_COMPLETE;
1772f713615SIlya Dryomov 
1782f713615SIlya Dryomov 	dout("prepare_write_message_footer %p\n", con);
1792f713615SIlya Dryomov 	con_out_kvec_add(con, sizeof_footer(con), &m->footer);
1802f713615SIlya Dryomov 	if (con->peer_features & CEPH_FEATURE_MSG_AUTH) {
1812f713615SIlya Dryomov 		if (con->ops->sign_message)
1822f713615SIlya Dryomov 			con->ops->sign_message(m);
1832f713615SIlya Dryomov 		else
1842f713615SIlya Dryomov 			m->footer.sig = 0;
1852f713615SIlya Dryomov 	} else {
1862f713615SIlya Dryomov 		m->old_footer.flags = m->footer.flags;
1872f713615SIlya Dryomov 	}
188a56dd9bfSIlya Dryomov 	con->v1.out_more = m->more_to_follow;
189a56dd9bfSIlya Dryomov 	con->v1.out_msg_done = true;
1902f713615SIlya Dryomov }
1912f713615SIlya Dryomov 
1922f713615SIlya Dryomov /*
1932f713615SIlya Dryomov  * Prepare headers for the next outgoing message.
1942f713615SIlya Dryomov  */
prepare_write_message(struct ceph_connection * con)1952f713615SIlya Dryomov static void prepare_write_message(struct ceph_connection *con)
1962f713615SIlya Dryomov {
1972f713615SIlya Dryomov 	struct ceph_msg *m;
1982f713615SIlya Dryomov 	u32 crc;
1992f713615SIlya Dryomov 
2002f713615SIlya Dryomov 	con_out_kvec_reset(con);
201a56dd9bfSIlya Dryomov 	con->v1.out_msg_done = false;
2022f713615SIlya Dryomov 
2032f713615SIlya Dryomov 	/* Sneak an ack in there first?  If we can get it into the same
2042f713615SIlya Dryomov 	 * TCP packet that's a good thing. */
2052f713615SIlya Dryomov 	if (con->in_seq > con->in_seq_acked) {
2062f713615SIlya Dryomov 		con->in_seq_acked = con->in_seq;
2072f713615SIlya Dryomov 		con_out_kvec_add(con, sizeof (tag_ack), &tag_ack);
208a56dd9bfSIlya Dryomov 		con->v1.out_temp_ack = cpu_to_le64(con->in_seq_acked);
209a56dd9bfSIlya Dryomov 		con_out_kvec_add(con, sizeof(con->v1.out_temp_ack),
210a56dd9bfSIlya Dryomov 			&con->v1.out_temp_ack);
2112f713615SIlya Dryomov 	}
2122f713615SIlya Dryomov 
2132f713615SIlya Dryomov 	ceph_con_get_out_msg(con);
2142f713615SIlya Dryomov 	m = con->out_msg;
2152f713615SIlya Dryomov 
2162f713615SIlya Dryomov 	dout("prepare_write_message %p seq %lld type %d len %d+%d+%zd\n",
2172f713615SIlya Dryomov 	     m, con->out_seq, le16_to_cpu(m->hdr.type),
2182f713615SIlya Dryomov 	     le32_to_cpu(m->hdr.front_len), le32_to_cpu(m->hdr.middle_len),
2192f713615SIlya Dryomov 	     m->data_length);
2202f713615SIlya Dryomov 	WARN_ON(m->front.iov_len != le32_to_cpu(m->hdr.front_len));
2212f713615SIlya Dryomov 	WARN_ON(m->data_length != le32_to_cpu(m->hdr.data_len));
2222f713615SIlya Dryomov 
2232f713615SIlya Dryomov 	/* tag + hdr + front + middle */
2242f713615SIlya Dryomov 	con_out_kvec_add(con, sizeof (tag_msg), &tag_msg);
225a56dd9bfSIlya Dryomov 	con_out_kvec_add(con, sizeof(con->v1.out_hdr), &con->v1.out_hdr);
2262f713615SIlya Dryomov 	con_out_kvec_add(con, m->front.iov_len, m->front.iov_base);
2272f713615SIlya Dryomov 
2282f713615SIlya Dryomov 	if (m->middle)
2292f713615SIlya Dryomov 		con_out_kvec_add(con, m->middle->vec.iov_len,
2302f713615SIlya Dryomov 			m->middle->vec.iov_base);
2312f713615SIlya Dryomov 
2322f713615SIlya Dryomov 	/* fill in hdr crc and finalize hdr */
2332f713615SIlya Dryomov 	crc = crc32c(0, &m->hdr, offsetof(struct ceph_msg_header, crc));
2342f713615SIlya Dryomov 	con->out_msg->hdr.crc = cpu_to_le32(crc);
235a56dd9bfSIlya Dryomov 	memcpy(&con->v1.out_hdr, &con->out_msg->hdr, sizeof(con->v1.out_hdr));
2362f713615SIlya Dryomov 
2372f713615SIlya Dryomov 	/* fill in front and middle crc, footer */
2382f713615SIlya Dryomov 	crc = crc32c(0, m->front.iov_base, m->front.iov_len);
2392f713615SIlya Dryomov 	con->out_msg->footer.front_crc = cpu_to_le32(crc);
2402f713615SIlya Dryomov 	if (m->middle) {
2412f713615SIlya Dryomov 		crc = crc32c(0, m->middle->vec.iov_base,
2422f713615SIlya Dryomov 				m->middle->vec.iov_len);
2432f713615SIlya Dryomov 		con->out_msg->footer.middle_crc = cpu_to_le32(crc);
2442f713615SIlya Dryomov 	} else
2452f713615SIlya Dryomov 		con->out_msg->footer.middle_crc = 0;
2462f713615SIlya Dryomov 	dout("%s front_crc %u middle_crc %u\n", __func__,
2472f713615SIlya Dryomov 	     le32_to_cpu(con->out_msg->footer.front_crc),
2482f713615SIlya Dryomov 	     le32_to_cpu(con->out_msg->footer.middle_crc));
2492f713615SIlya Dryomov 	con->out_msg->footer.flags = 0;
2502f713615SIlya Dryomov 
2512f713615SIlya Dryomov 	/* is there a data payload? */
2522f713615SIlya Dryomov 	con->out_msg->footer.data_crc = 0;
2532f713615SIlya Dryomov 	if (m->data_length) {
2542f713615SIlya Dryomov 		prepare_message_data(con->out_msg, m->data_length);
255a56dd9bfSIlya Dryomov 		con->v1.out_more = 1;  /* data + footer will follow */
2562f713615SIlya Dryomov 	} else {
2572f713615SIlya Dryomov 		/* no, queue up footer too and be done */
2582f713615SIlya Dryomov 		prepare_write_message_footer(con);
2592f713615SIlya Dryomov 	}
2602f713615SIlya Dryomov 
2612f713615SIlya Dryomov 	ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING);
2622f713615SIlya Dryomov }
2632f713615SIlya Dryomov 
2642f713615SIlya Dryomov /*
2652f713615SIlya Dryomov  * Prepare an ack.
2662f713615SIlya Dryomov  */
prepare_write_ack(struct ceph_connection * con)2672f713615SIlya Dryomov static void prepare_write_ack(struct ceph_connection *con)
2682f713615SIlya Dryomov {
2692f713615SIlya Dryomov 	dout("prepare_write_ack %p %llu -> %llu\n", con,
2702f713615SIlya Dryomov 	     con->in_seq_acked, con->in_seq);
2712f713615SIlya Dryomov 	con->in_seq_acked = con->in_seq;
2722f713615SIlya Dryomov 
2732f713615SIlya Dryomov 	con_out_kvec_reset(con);
2742f713615SIlya Dryomov 
2752f713615SIlya Dryomov 	con_out_kvec_add(con, sizeof (tag_ack), &tag_ack);
2762f713615SIlya Dryomov 
277a56dd9bfSIlya Dryomov 	con->v1.out_temp_ack = cpu_to_le64(con->in_seq_acked);
278a56dd9bfSIlya Dryomov 	con_out_kvec_add(con, sizeof(con->v1.out_temp_ack),
279a56dd9bfSIlya Dryomov 			 &con->v1.out_temp_ack);
2802f713615SIlya Dryomov 
281a56dd9bfSIlya Dryomov 	con->v1.out_more = 1;  /* more will follow.. eventually.. */
2822f713615SIlya Dryomov 	ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING);
2832f713615SIlya Dryomov }
2842f713615SIlya Dryomov 
2852f713615SIlya Dryomov /*
2862f713615SIlya Dryomov  * Prepare to share the seq during handshake
2872f713615SIlya Dryomov  */
prepare_write_seq(struct ceph_connection * con)2882f713615SIlya Dryomov static void prepare_write_seq(struct ceph_connection *con)
2892f713615SIlya Dryomov {
2902f713615SIlya Dryomov 	dout("prepare_write_seq %p %llu -> %llu\n", con,
2912f713615SIlya Dryomov 	     con->in_seq_acked, con->in_seq);
2922f713615SIlya Dryomov 	con->in_seq_acked = con->in_seq;
2932f713615SIlya Dryomov 
2942f713615SIlya Dryomov 	con_out_kvec_reset(con);
2952f713615SIlya Dryomov 
296a56dd9bfSIlya Dryomov 	con->v1.out_temp_ack = cpu_to_le64(con->in_seq_acked);
297a56dd9bfSIlya Dryomov 	con_out_kvec_add(con, sizeof(con->v1.out_temp_ack),
298a56dd9bfSIlya Dryomov 			 &con->v1.out_temp_ack);
2992f713615SIlya Dryomov 
3002f713615SIlya Dryomov 	ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING);
3012f713615SIlya Dryomov }
3022f713615SIlya Dryomov 
3032f713615SIlya Dryomov /*
3042f713615SIlya Dryomov  * Prepare to write keepalive byte.
3052f713615SIlya Dryomov  */
prepare_write_keepalive(struct ceph_connection * con)3062f713615SIlya Dryomov static void prepare_write_keepalive(struct ceph_connection *con)
3072f713615SIlya Dryomov {
3082f713615SIlya Dryomov 	dout("prepare_write_keepalive %p\n", con);
3092f713615SIlya Dryomov 	con_out_kvec_reset(con);
3102f713615SIlya Dryomov 	if (con->peer_features & CEPH_FEATURE_MSGR_KEEPALIVE2) {
3112f713615SIlya Dryomov 		struct timespec64 now;
3122f713615SIlya Dryomov 
3132f713615SIlya Dryomov 		ktime_get_real_ts64(&now);
3142f713615SIlya Dryomov 		con_out_kvec_add(con, sizeof(tag_keepalive2), &tag_keepalive2);
315a56dd9bfSIlya Dryomov 		ceph_encode_timespec64(&con->v1.out_temp_keepalive2, &now);
316a56dd9bfSIlya Dryomov 		con_out_kvec_add(con, sizeof(con->v1.out_temp_keepalive2),
317a56dd9bfSIlya Dryomov 				 &con->v1.out_temp_keepalive2);
3182f713615SIlya Dryomov 	} else {
3192f713615SIlya Dryomov 		con_out_kvec_add(con, sizeof(tag_keepalive), &tag_keepalive);
3202f713615SIlya Dryomov 	}
3212f713615SIlya Dryomov 	ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING);
3222f713615SIlya Dryomov }
3232f713615SIlya Dryomov 
3242f713615SIlya Dryomov /*
3252f713615SIlya Dryomov  * Connection negotiation.
3262f713615SIlya Dryomov  */
3272f713615SIlya Dryomov 
get_connect_authorizer(struct ceph_connection * con)3282f713615SIlya Dryomov static int get_connect_authorizer(struct ceph_connection *con)
3292f713615SIlya Dryomov {
3302f713615SIlya Dryomov 	struct ceph_auth_handshake *auth;
3312f713615SIlya Dryomov 	int auth_proto;
3322f713615SIlya Dryomov 
3332f713615SIlya Dryomov 	if (!con->ops->get_authorizer) {
334a56dd9bfSIlya Dryomov 		con->v1.auth = NULL;
335a56dd9bfSIlya Dryomov 		con->v1.out_connect.authorizer_protocol = CEPH_AUTH_UNKNOWN;
336a56dd9bfSIlya Dryomov 		con->v1.out_connect.authorizer_len = 0;
3372f713615SIlya Dryomov 		return 0;
3382f713615SIlya Dryomov 	}
3392f713615SIlya Dryomov 
340a56dd9bfSIlya Dryomov 	auth = con->ops->get_authorizer(con, &auth_proto, con->v1.auth_retry);
3412f713615SIlya Dryomov 	if (IS_ERR(auth))
3422f713615SIlya Dryomov 		return PTR_ERR(auth);
3432f713615SIlya Dryomov 
344a56dd9bfSIlya Dryomov 	con->v1.auth = auth;
345a56dd9bfSIlya Dryomov 	con->v1.out_connect.authorizer_protocol = cpu_to_le32(auth_proto);
346a56dd9bfSIlya Dryomov 	con->v1.out_connect.authorizer_len =
347a56dd9bfSIlya Dryomov 		cpu_to_le32(auth->authorizer_buf_len);
3482f713615SIlya Dryomov 	return 0;
3492f713615SIlya Dryomov }
3502f713615SIlya Dryomov 
3512f713615SIlya Dryomov /*
3522f713615SIlya Dryomov  * We connected to a peer and are saying hello.
3532f713615SIlya Dryomov  */
prepare_write_banner(struct ceph_connection * con)3542f713615SIlya Dryomov static void prepare_write_banner(struct ceph_connection *con)
3552f713615SIlya Dryomov {
3562f713615SIlya Dryomov 	con_out_kvec_add(con, strlen(CEPH_BANNER), CEPH_BANNER);
3572f713615SIlya Dryomov 	con_out_kvec_add(con, sizeof (con->msgr->my_enc_addr),
3582f713615SIlya Dryomov 					&con->msgr->my_enc_addr);
3592f713615SIlya Dryomov 
360a56dd9bfSIlya Dryomov 	con->v1.out_more = 0;
3612f713615SIlya Dryomov 	ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING);
3622f713615SIlya Dryomov }
3632f713615SIlya Dryomov 
__prepare_write_connect(struct ceph_connection * con)3642f713615SIlya Dryomov static void __prepare_write_connect(struct ceph_connection *con)
3652f713615SIlya Dryomov {
366a56dd9bfSIlya Dryomov 	con_out_kvec_add(con, sizeof(con->v1.out_connect),
367a56dd9bfSIlya Dryomov 			 &con->v1.out_connect);
368a56dd9bfSIlya Dryomov 	if (con->v1.auth)
369a56dd9bfSIlya Dryomov 		con_out_kvec_add(con, con->v1.auth->authorizer_buf_len,
370a56dd9bfSIlya Dryomov 				 con->v1.auth->authorizer_buf);
3712f713615SIlya Dryomov 
372a56dd9bfSIlya Dryomov 	con->v1.out_more = 0;
3732f713615SIlya Dryomov 	ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING);
3742f713615SIlya Dryomov }
3752f713615SIlya Dryomov 
prepare_write_connect(struct ceph_connection * con)3762f713615SIlya Dryomov static int prepare_write_connect(struct ceph_connection *con)
3772f713615SIlya Dryomov {
3782f713615SIlya Dryomov 	unsigned int global_seq = ceph_get_global_seq(con->msgr, 0);
3792f713615SIlya Dryomov 	int proto;
3802f713615SIlya Dryomov 	int ret;
3812f713615SIlya Dryomov 
3822f713615SIlya Dryomov 	switch (con->peer_name.type) {
3832f713615SIlya Dryomov 	case CEPH_ENTITY_TYPE_MON:
3842f713615SIlya Dryomov 		proto = CEPH_MONC_PROTOCOL;
3852f713615SIlya Dryomov 		break;
3862f713615SIlya Dryomov 	case CEPH_ENTITY_TYPE_OSD:
3872f713615SIlya Dryomov 		proto = CEPH_OSDC_PROTOCOL;
3882f713615SIlya Dryomov 		break;
3892f713615SIlya Dryomov 	case CEPH_ENTITY_TYPE_MDS:
3902f713615SIlya Dryomov 		proto = CEPH_MDSC_PROTOCOL;
3912f713615SIlya Dryomov 		break;
3922f713615SIlya Dryomov 	default:
3932f713615SIlya Dryomov 		BUG();
3942f713615SIlya Dryomov 	}
3952f713615SIlya Dryomov 
3962f713615SIlya Dryomov 	dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con,
397a56dd9bfSIlya Dryomov 	     con->v1.connect_seq, global_seq, proto);
3982f713615SIlya Dryomov 
399a56dd9bfSIlya Dryomov 	con->v1.out_connect.features =
4002f713615SIlya Dryomov 		cpu_to_le64(from_msgr(con->msgr)->supported_features);
401a56dd9bfSIlya Dryomov 	con->v1.out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT);
402a56dd9bfSIlya Dryomov 	con->v1.out_connect.connect_seq = cpu_to_le32(con->v1.connect_seq);
403a56dd9bfSIlya Dryomov 	con->v1.out_connect.global_seq = cpu_to_le32(global_seq);
404a56dd9bfSIlya Dryomov 	con->v1.out_connect.protocol_version = cpu_to_le32(proto);
405a56dd9bfSIlya Dryomov 	con->v1.out_connect.flags = 0;
4062f713615SIlya Dryomov 
4072f713615SIlya Dryomov 	ret = get_connect_authorizer(con);
4082f713615SIlya Dryomov 	if (ret)
4092f713615SIlya Dryomov 		return ret;
4102f713615SIlya Dryomov 
4112f713615SIlya Dryomov 	__prepare_write_connect(con);
4122f713615SIlya Dryomov 	return 0;
4132f713615SIlya Dryomov }
4142f713615SIlya Dryomov 
4152f713615SIlya Dryomov /*
4162f713615SIlya Dryomov  * write as much of pending kvecs to the socket as we can.
4172f713615SIlya Dryomov  *  1 -> done
4182f713615SIlya Dryomov  *  0 -> socket full, but more to do
4192f713615SIlya Dryomov  * <0 -> error
4202f713615SIlya Dryomov  */
write_partial_kvec(struct ceph_connection * con)4212f713615SIlya Dryomov static int write_partial_kvec(struct ceph_connection *con)
4222f713615SIlya Dryomov {
4232f713615SIlya Dryomov 	int ret;
4242f713615SIlya Dryomov 
425a56dd9bfSIlya Dryomov 	dout("write_partial_kvec %p %d left\n", con, con->v1.out_kvec_bytes);
426a56dd9bfSIlya Dryomov 	while (con->v1.out_kvec_bytes > 0) {
427a56dd9bfSIlya Dryomov 		ret = ceph_tcp_sendmsg(con->sock, con->v1.out_kvec_cur,
428a56dd9bfSIlya Dryomov 				       con->v1.out_kvec_left,
429a56dd9bfSIlya Dryomov 				       con->v1.out_kvec_bytes,
430a56dd9bfSIlya Dryomov 				       con->v1.out_more);
4312f713615SIlya Dryomov 		if (ret <= 0)
4322f713615SIlya Dryomov 			goto out;
433a56dd9bfSIlya Dryomov 		con->v1.out_kvec_bytes -= ret;
434a56dd9bfSIlya Dryomov 		if (!con->v1.out_kvec_bytes)
4352f713615SIlya Dryomov 			break;            /* done */
4362f713615SIlya Dryomov 
4372f713615SIlya Dryomov 		/* account for full iov entries consumed */
438a56dd9bfSIlya Dryomov 		while (ret >= con->v1.out_kvec_cur->iov_len) {
439a56dd9bfSIlya Dryomov 			BUG_ON(!con->v1.out_kvec_left);
440a56dd9bfSIlya Dryomov 			ret -= con->v1.out_kvec_cur->iov_len;
441a56dd9bfSIlya Dryomov 			con->v1.out_kvec_cur++;
442a56dd9bfSIlya Dryomov 			con->v1.out_kvec_left--;
4432f713615SIlya Dryomov 		}
4442f713615SIlya Dryomov 		/* and for a partially-consumed entry */
4452f713615SIlya Dryomov 		if (ret) {
446a56dd9bfSIlya Dryomov 			con->v1.out_kvec_cur->iov_len -= ret;
447a56dd9bfSIlya Dryomov 			con->v1.out_kvec_cur->iov_base += ret;
4482f713615SIlya Dryomov 		}
4492f713615SIlya Dryomov 	}
450a56dd9bfSIlya Dryomov 	con->v1.out_kvec_left = 0;
4512f713615SIlya Dryomov 	ret = 1;
4522f713615SIlya Dryomov out:
4532f713615SIlya Dryomov 	dout("write_partial_kvec %p %d left in %d kvecs ret = %d\n", con,
454a56dd9bfSIlya Dryomov 	     con->v1.out_kvec_bytes, con->v1.out_kvec_left, ret);
4552f713615SIlya Dryomov 	return ret;  /* done! */
4562f713615SIlya Dryomov }
4572f713615SIlya Dryomov 
4582f713615SIlya Dryomov /*
4592f713615SIlya Dryomov  * Write as much message data payload as we can.  If we finish, queue
4602f713615SIlya Dryomov  * up the footer.
4612f713615SIlya Dryomov  *  1 -> done, footer is now queued in out_kvec[].
4622f713615SIlya Dryomov  *  0 -> socket full, but more to do
4632f713615SIlya Dryomov  * <0 -> error
4642f713615SIlya Dryomov  */
write_partial_message_data(struct ceph_connection * con)4652f713615SIlya Dryomov static int write_partial_message_data(struct ceph_connection *con)
4662f713615SIlya Dryomov {
4672f713615SIlya Dryomov 	struct ceph_msg *msg = con->out_msg;
4682f713615SIlya Dryomov 	struct ceph_msg_data_cursor *cursor = &msg->cursor;
4692f713615SIlya Dryomov 	bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
4702f713615SIlya Dryomov 	u32 crc;
4712f713615SIlya Dryomov 
4722f713615SIlya Dryomov 	dout("%s %p msg %p\n", __func__, con, msg);
4732f713615SIlya Dryomov 
4742f713615SIlya Dryomov 	if (!msg->num_data_items)
4752f713615SIlya Dryomov 		return -EINVAL;
4762f713615SIlya Dryomov 
4772f713615SIlya Dryomov 	/*
4782f713615SIlya Dryomov 	 * Iterate through each page that contains data to be
4792f713615SIlya Dryomov 	 * written, and send as much as possible for each.
4802f713615SIlya Dryomov 	 *
4812f713615SIlya Dryomov 	 * If we are calculating the data crc (the default), we will
4822f713615SIlya Dryomov 	 * need to map the page.  If we have no pages, they have
4832f713615SIlya Dryomov 	 * been revoked, so use the zero page.
4842f713615SIlya Dryomov 	 */
4852f713615SIlya Dryomov 	crc = do_datacrc ? le32_to_cpu(msg->footer.data_crc) : 0;
4862f713615SIlya Dryomov 	while (cursor->total_resid) {
4872f713615SIlya Dryomov 		struct page *page;
4882f713615SIlya Dryomov 		size_t page_offset;
4892f713615SIlya Dryomov 		size_t length;
4902f713615SIlya Dryomov 		int ret;
4912f713615SIlya Dryomov 
4922f713615SIlya Dryomov 		if (!cursor->resid) {
4932f713615SIlya Dryomov 			ceph_msg_data_advance(cursor, 0);
4942f713615SIlya Dryomov 			continue;
4952f713615SIlya Dryomov 		}
4962f713615SIlya Dryomov 
497da4ab869SJeff Layton 		page = ceph_msg_data_next(cursor, &page_offset, &length);
4985da4d7b8SDavid Howells 		ret = ceph_tcp_sendpage(con->sock, page, page_offset, length,
4995da4d7b8SDavid Howells 					MSG_MORE);
5002f713615SIlya Dryomov 		if (ret <= 0) {
5012f713615SIlya Dryomov 			if (do_datacrc)
5022f713615SIlya Dryomov 				msg->footer.data_crc = cpu_to_le32(crc);
5032f713615SIlya Dryomov 
5042f713615SIlya Dryomov 			return ret;
5052f713615SIlya Dryomov 		}
5062f713615SIlya Dryomov 		if (do_datacrc && cursor->need_crc)
5072f713615SIlya Dryomov 			crc = ceph_crc32c_page(crc, page, page_offset, length);
5082f713615SIlya Dryomov 		ceph_msg_data_advance(cursor, (size_t)ret);
5092f713615SIlya Dryomov 	}
5102f713615SIlya Dryomov 
5112f713615SIlya Dryomov 	dout("%s %p msg %p done\n", __func__, con, msg);
5122f713615SIlya Dryomov 
5132f713615SIlya Dryomov 	/* prepare and queue up footer, too */
5142f713615SIlya Dryomov 	if (do_datacrc)
5152f713615SIlya Dryomov 		msg->footer.data_crc = cpu_to_le32(crc);
5162f713615SIlya Dryomov 	else
5172f713615SIlya Dryomov 		msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC;
5182f713615SIlya Dryomov 	con_out_kvec_reset(con);
5192f713615SIlya Dryomov 	prepare_write_message_footer(con);
5202f713615SIlya Dryomov 
5212f713615SIlya Dryomov 	return 1;	/* must return > 0 to indicate success */
5222f713615SIlya Dryomov }
5232f713615SIlya Dryomov 
5242f713615SIlya Dryomov /*
5252f713615SIlya Dryomov  * write some zeros
5262f713615SIlya Dryomov  */
write_partial_skip(struct ceph_connection * con)5272f713615SIlya Dryomov static int write_partial_skip(struct ceph_connection *con)
5282f713615SIlya Dryomov {
5292f713615SIlya Dryomov 	int ret;
5302f713615SIlya Dryomov 
531a56dd9bfSIlya Dryomov 	dout("%s %p %d left\n", __func__, con, con->v1.out_skip);
532a56dd9bfSIlya Dryomov 	while (con->v1.out_skip > 0) {
533a56dd9bfSIlya Dryomov 		size_t size = min(con->v1.out_skip, (int)PAGE_SIZE);
5342f713615SIlya Dryomov 
5355da4d7b8SDavid Howells 		ret = ceph_tcp_sendpage(con->sock, ceph_zero_page, 0, size,
5365da4d7b8SDavid Howells 					MSG_MORE);
5372f713615SIlya Dryomov 		if (ret <= 0)
5382f713615SIlya Dryomov 			goto out;
539a56dd9bfSIlya Dryomov 		con->v1.out_skip -= ret;
5402f713615SIlya Dryomov 	}
5412f713615SIlya Dryomov 	ret = 1;
5422f713615SIlya Dryomov out:
5432f713615SIlya Dryomov 	return ret;
5442f713615SIlya Dryomov }
5452f713615SIlya Dryomov 
5462f713615SIlya Dryomov /*
5472f713615SIlya Dryomov  * Prepare to read connection handshake, or an ack.
5482f713615SIlya Dryomov  */
prepare_read_banner(struct ceph_connection * con)5492f713615SIlya Dryomov static void prepare_read_banner(struct ceph_connection *con)
5502f713615SIlya Dryomov {
5512f713615SIlya Dryomov 	dout("prepare_read_banner %p\n", con);
552a56dd9bfSIlya Dryomov 	con->v1.in_base_pos = 0;
5532f713615SIlya Dryomov }
5542f713615SIlya Dryomov 
prepare_read_connect(struct ceph_connection * con)5552f713615SIlya Dryomov static void prepare_read_connect(struct ceph_connection *con)
5562f713615SIlya Dryomov {
5572f713615SIlya Dryomov 	dout("prepare_read_connect %p\n", con);
558a56dd9bfSIlya Dryomov 	con->v1.in_base_pos = 0;
5592f713615SIlya Dryomov }
5602f713615SIlya Dryomov 
prepare_read_ack(struct ceph_connection * con)5612f713615SIlya Dryomov static void prepare_read_ack(struct ceph_connection *con)
5622f713615SIlya Dryomov {
5632f713615SIlya Dryomov 	dout("prepare_read_ack %p\n", con);
564a56dd9bfSIlya Dryomov 	con->v1.in_base_pos = 0;
5652f713615SIlya Dryomov }
5662f713615SIlya Dryomov 
prepare_read_seq(struct ceph_connection * con)5672f713615SIlya Dryomov static void prepare_read_seq(struct ceph_connection *con)
5682f713615SIlya Dryomov {
5692f713615SIlya Dryomov 	dout("prepare_read_seq %p\n", con);
570a56dd9bfSIlya Dryomov 	con->v1.in_base_pos = 0;
571a56dd9bfSIlya Dryomov 	con->v1.in_tag = CEPH_MSGR_TAG_SEQ;
5722f713615SIlya Dryomov }
5732f713615SIlya Dryomov 
prepare_read_tag(struct ceph_connection * con)5742f713615SIlya Dryomov static void prepare_read_tag(struct ceph_connection *con)
5752f713615SIlya Dryomov {
5762f713615SIlya Dryomov 	dout("prepare_read_tag %p\n", con);
577a56dd9bfSIlya Dryomov 	con->v1.in_base_pos = 0;
578a56dd9bfSIlya Dryomov 	con->v1.in_tag = CEPH_MSGR_TAG_READY;
5792f713615SIlya Dryomov }
5802f713615SIlya Dryomov 
prepare_read_keepalive_ack(struct ceph_connection * con)5812f713615SIlya Dryomov static void prepare_read_keepalive_ack(struct ceph_connection *con)
5822f713615SIlya Dryomov {
5832f713615SIlya Dryomov 	dout("prepare_read_keepalive_ack %p\n", con);
584a56dd9bfSIlya Dryomov 	con->v1.in_base_pos = 0;
5852f713615SIlya Dryomov }
5862f713615SIlya Dryomov 
5872f713615SIlya Dryomov /*
5882f713615SIlya Dryomov  * Prepare to read a message.
5892f713615SIlya Dryomov  */
prepare_read_message(struct ceph_connection * con)5902f713615SIlya Dryomov static int prepare_read_message(struct ceph_connection *con)
5912f713615SIlya Dryomov {
5922f713615SIlya Dryomov 	dout("prepare_read_message %p\n", con);
5932f713615SIlya Dryomov 	BUG_ON(con->in_msg != NULL);
594a56dd9bfSIlya Dryomov 	con->v1.in_base_pos = 0;
5952f713615SIlya Dryomov 	con->in_front_crc = con->in_middle_crc = con->in_data_crc = 0;
5962f713615SIlya Dryomov 	return 0;
5972f713615SIlya Dryomov }
5982f713615SIlya Dryomov 
read_partial(struct ceph_connection * con,int end,int size,void * object)5992f713615SIlya Dryomov static int read_partial(struct ceph_connection *con,
6002f713615SIlya Dryomov 			int end, int size, void *object)
6012f713615SIlya Dryomov {
602a56dd9bfSIlya Dryomov 	while (con->v1.in_base_pos < end) {
603a56dd9bfSIlya Dryomov 		int left = end - con->v1.in_base_pos;
6042f713615SIlya Dryomov 		int have = size - left;
6052f713615SIlya Dryomov 		int ret = ceph_tcp_recvmsg(con->sock, object + have, left);
6062f713615SIlya Dryomov 		if (ret <= 0)
6072f713615SIlya Dryomov 			return ret;
608a56dd9bfSIlya Dryomov 		con->v1.in_base_pos += ret;
6092f713615SIlya Dryomov 	}
6102f713615SIlya Dryomov 	return 1;
6112f713615SIlya Dryomov }
6122f713615SIlya Dryomov 
6132f713615SIlya Dryomov /*
6142f713615SIlya Dryomov  * Read all or part of the connect-side handshake on a new connection
6152f713615SIlya Dryomov  */
read_partial_banner(struct ceph_connection * con)6162f713615SIlya Dryomov static int read_partial_banner(struct ceph_connection *con)
6172f713615SIlya Dryomov {
6182f713615SIlya Dryomov 	int size;
6192f713615SIlya Dryomov 	int end;
6202f713615SIlya Dryomov 	int ret;
6212f713615SIlya Dryomov 
622a56dd9bfSIlya Dryomov 	dout("read_partial_banner %p at %d\n", con, con->v1.in_base_pos);
6232f713615SIlya Dryomov 
6242f713615SIlya Dryomov 	/* peer's banner */
6252f713615SIlya Dryomov 	size = strlen(CEPH_BANNER);
6262f713615SIlya Dryomov 	end = size;
627a56dd9bfSIlya Dryomov 	ret = read_partial(con, end, size, con->v1.in_banner);
6282f713615SIlya Dryomov 	if (ret <= 0)
6292f713615SIlya Dryomov 		goto out;
6302f713615SIlya Dryomov 
631a56dd9bfSIlya Dryomov 	size = sizeof(con->v1.actual_peer_addr);
6322f713615SIlya Dryomov 	end += size;
633a56dd9bfSIlya Dryomov 	ret = read_partial(con, end, size, &con->v1.actual_peer_addr);
6342f713615SIlya Dryomov 	if (ret <= 0)
6352f713615SIlya Dryomov 		goto out;
636a56dd9bfSIlya Dryomov 	ceph_decode_banner_addr(&con->v1.actual_peer_addr);
6372f713615SIlya Dryomov 
638a56dd9bfSIlya Dryomov 	size = sizeof(con->v1.peer_addr_for_me);
6392f713615SIlya Dryomov 	end += size;
640a56dd9bfSIlya Dryomov 	ret = read_partial(con, end, size, &con->v1.peer_addr_for_me);
6412f713615SIlya Dryomov 	if (ret <= 0)
6422f713615SIlya Dryomov 		goto out;
643a56dd9bfSIlya Dryomov 	ceph_decode_banner_addr(&con->v1.peer_addr_for_me);
6442f713615SIlya Dryomov 
6452f713615SIlya Dryomov out:
6462f713615SIlya Dryomov 	return ret;
6472f713615SIlya Dryomov }
6482f713615SIlya Dryomov 
read_partial_connect(struct ceph_connection * con)6492f713615SIlya Dryomov static int read_partial_connect(struct ceph_connection *con)
6502f713615SIlya Dryomov {
6512f713615SIlya Dryomov 	int size;
6522f713615SIlya Dryomov 	int end;
6532f713615SIlya Dryomov 	int ret;
6542f713615SIlya Dryomov 
655a56dd9bfSIlya Dryomov 	dout("read_partial_connect %p at %d\n", con, con->v1.in_base_pos);
6562f713615SIlya Dryomov 
657a56dd9bfSIlya Dryomov 	size = sizeof(con->v1.in_reply);
6582f713615SIlya Dryomov 	end = size;
659a56dd9bfSIlya Dryomov 	ret = read_partial(con, end, size, &con->v1.in_reply);
6602f713615SIlya Dryomov 	if (ret <= 0)
6612f713615SIlya Dryomov 		goto out;
6622f713615SIlya Dryomov 
663a56dd9bfSIlya Dryomov 	if (con->v1.auth) {
664a56dd9bfSIlya Dryomov 		size = le32_to_cpu(con->v1.in_reply.authorizer_len);
665a56dd9bfSIlya Dryomov 		if (size > con->v1.auth->authorizer_reply_buf_len) {
6662f713615SIlya Dryomov 			pr_err("authorizer reply too big: %d > %zu\n", size,
667a56dd9bfSIlya Dryomov 			       con->v1.auth->authorizer_reply_buf_len);
6682f713615SIlya Dryomov 			ret = -EINVAL;
6692f713615SIlya Dryomov 			goto out;
6702f713615SIlya Dryomov 		}
6712f713615SIlya Dryomov 
6722f713615SIlya Dryomov 		end += size;
6732f713615SIlya Dryomov 		ret = read_partial(con, end, size,
674a56dd9bfSIlya Dryomov 				   con->v1.auth->authorizer_reply_buf);
6752f713615SIlya Dryomov 		if (ret <= 0)
6762f713615SIlya Dryomov 			goto out;
6772f713615SIlya Dryomov 	}
6782f713615SIlya Dryomov 
6792f713615SIlya Dryomov 	dout("read_partial_connect %p tag %d, con_seq = %u, g_seq = %u\n",
680a56dd9bfSIlya Dryomov 	     con, con->v1.in_reply.tag,
681a56dd9bfSIlya Dryomov 	     le32_to_cpu(con->v1.in_reply.connect_seq),
682a56dd9bfSIlya Dryomov 	     le32_to_cpu(con->v1.in_reply.global_seq));
6832f713615SIlya Dryomov out:
6842f713615SIlya Dryomov 	return ret;
6852f713615SIlya Dryomov }
6862f713615SIlya Dryomov 
6872f713615SIlya Dryomov /*
6882f713615SIlya Dryomov  * Verify the hello banner looks okay.
6892f713615SIlya Dryomov  */
verify_hello(struct ceph_connection * con)6902f713615SIlya Dryomov static int verify_hello(struct ceph_connection *con)
6912f713615SIlya Dryomov {
692a56dd9bfSIlya Dryomov 	if (memcmp(con->v1.in_banner, CEPH_BANNER, strlen(CEPH_BANNER))) {
6932f713615SIlya Dryomov 		pr_err("connect to %s got bad banner\n",
6942f713615SIlya Dryomov 		       ceph_pr_addr(&con->peer_addr));
6952f713615SIlya Dryomov 		con->error_msg = "protocol error, bad banner";
6962f713615SIlya Dryomov 		return -1;
6972f713615SIlya Dryomov 	}
6982f713615SIlya Dryomov 	return 0;
6992f713615SIlya Dryomov }
7002f713615SIlya Dryomov 
process_banner(struct ceph_connection * con)7012f713615SIlya Dryomov static int process_banner(struct ceph_connection *con)
7022f713615SIlya Dryomov {
7032f713615SIlya Dryomov 	struct ceph_entity_addr *my_addr = &con->msgr->inst.addr;
7042f713615SIlya Dryomov 
7052f713615SIlya Dryomov 	dout("process_banner on %p\n", con);
7062f713615SIlya Dryomov 
7072f713615SIlya Dryomov 	if (verify_hello(con) < 0)
7082f713615SIlya Dryomov 		return -1;
7092f713615SIlya Dryomov 
7102f713615SIlya Dryomov 	/*
7112f713615SIlya Dryomov 	 * Make sure the other end is who we wanted.  note that the other
7122f713615SIlya Dryomov 	 * end may not yet know their ip address, so if it's 0.0.0.0, give
7132f713615SIlya Dryomov 	 * them the benefit of the doubt.
7142f713615SIlya Dryomov 	 */
715a56dd9bfSIlya Dryomov 	if (memcmp(&con->peer_addr, &con->v1.actual_peer_addr,
7162f713615SIlya Dryomov 		   sizeof(con->peer_addr)) != 0 &&
717a56dd9bfSIlya Dryomov 	    !(ceph_addr_is_blank(&con->v1.actual_peer_addr) &&
718a56dd9bfSIlya Dryomov 	      con->v1.actual_peer_addr.nonce == con->peer_addr.nonce)) {
7192f713615SIlya Dryomov 		pr_warn("wrong peer, want %s/%u, got %s/%u\n",
7202f713615SIlya Dryomov 			ceph_pr_addr(&con->peer_addr),
7212f713615SIlya Dryomov 			le32_to_cpu(con->peer_addr.nonce),
722a56dd9bfSIlya Dryomov 			ceph_pr_addr(&con->v1.actual_peer_addr),
723a56dd9bfSIlya Dryomov 			le32_to_cpu(con->v1.actual_peer_addr.nonce));
7242f713615SIlya Dryomov 		con->error_msg = "wrong peer at address";
7252f713615SIlya Dryomov 		return -1;
7262f713615SIlya Dryomov 	}
7272f713615SIlya Dryomov 
7282f713615SIlya Dryomov 	/*
7292f713615SIlya Dryomov 	 * did we learn our address?
7302f713615SIlya Dryomov 	 */
7312f713615SIlya Dryomov 	if (ceph_addr_is_blank(my_addr)) {
7322f713615SIlya Dryomov 		memcpy(&my_addr->in_addr,
733a56dd9bfSIlya Dryomov 		       &con->v1.peer_addr_for_me.in_addr,
734a56dd9bfSIlya Dryomov 		       sizeof(con->v1.peer_addr_for_me.in_addr));
7352f713615SIlya Dryomov 		ceph_addr_set_port(my_addr, 0);
7362f713615SIlya Dryomov 		ceph_encode_my_addr(con->msgr);
7372f713615SIlya Dryomov 		dout("process_banner learned my addr is %s\n",
7382f713615SIlya Dryomov 		     ceph_pr_addr(my_addr));
7392f713615SIlya Dryomov 	}
7402f713615SIlya Dryomov 
7412f713615SIlya Dryomov 	return 0;
7422f713615SIlya Dryomov }
7432f713615SIlya Dryomov 
process_connect(struct ceph_connection * con)7442f713615SIlya Dryomov static int process_connect(struct ceph_connection *con)
7452f713615SIlya Dryomov {
7462f713615SIlya Dryomov 	u64 sup_feat = from_msgr(con->msgr)->supported_features;
7472f713615SIlya Dryomov 	u64 req_feat = from_msgr(con->msgr)->required_features;
748a56dd9bfSIlya Dryomov 	u64 server_feat = le64_to_cpu(con->v1.in_reply.features);
7492f713615SIlya Dryomov 	int ret;
7502f713615SIlya Dryomov 
751a56dd9bfSIlya Dryomov 	dout("process_connect on %p tag %d\n", con, con->v1.in_tag);
7522f713615SIlya Dryomov 
753a56dd9bfSIlya Dryomov 	if (con->v1.auth) {
754a56dd9bfSIlya Dryomov 		int len = le32_to_cpu(con->v1.in_reply.authorizer_len);
7552f713615SIlya Dryomov 
7562f713615SIlya Dryomov 		/*
7572f713615SIlya Dryomov 		 * Any connection that defines ->get_authorizer()
7582f713615SIlya Dryomov 		 * should also define ->add_authorizer_challenge() and
7592f713615SIlya Dryomov 		 * ->verify_authorizer_reply().
7602f713615SIlya Dryomov 		 *
7612f713615SIlya Dryomov 		 * See get_connect_authorizer().
7622f713615SIlya Dryomov 		 */
763a56dd9bfSIlya Dryomov 		if (con->v1.in_reply.tag ==
764a56dd9bfSIlya Dryomov 				CEPH_MSGR_TAG_CHALLENGE_AUTHORIZER) {
7652f713615SIlya Dryomov 			ret = con->ops->add_authorizer_challenge(
766a56dd9bfSIlya Dryomov 				con, con->v1.auth->authorizer_reply_buf, len);
7672f713615SIlya Dryomov 			if (ret < 0)
7682f713615SIlya Dryomov 				return ret;
7692f713615SIlya Dryomov 
7702f713615SIlya Dryomov 			con_out_kvec_reset(con);
7712f713615SIlya Dryomov 			__prepare_write_connect(con);
7722f713615SIlya Dryomov 			prepare_read_connect(con);
7732f713615SIlya Dryomov 			return 0;
7742f713615SIlya Dryomov 		}
7752f713615SIlya Dryomov 
7762f713615SIlya Dryomov 		if (len) {
7772f713615SIlya Dryomov 			ret = con->ops->verify_authorizer_reply(con);
7782f713615SIlya Dryomov 			if (ret < 0) {
7792f713615SIlya Dryomov 				con->error_msg = "bad authorize reply";
7802f713615SIlya Dryomov 				return ret;
7812f713615SIlya Dryomov 			}
7822f713615SIlya Dryomov 		}
7832f713615SIlya Dryomov 	}
7842f713615SIlya Dryomov 
785a56dd9bfSIlya Dryomov 	switch (con->v1.in_reply.tag) {
7862f713615SIlya Dryomov 	case CEPH_MSGR_TAG_FEATURES:
7872f713615SIlya Dryomov 		pr_err("%s%lld %s feature set mismatch,"
7882f713615SIlya Dryomov 		       " my %llx < server's %llx, missing %llx\n",
7892f713615SIlya Dryomov 		       ENTITY_NAME(con->peer_name),
7902f713615SIlya Dryomov 		       ceph_pr_addr(&con->peer_addr),
7912f713615SIlya Dryomov 		       sup_feat, server_feat, server_feat & ~sup_feat);
7922f713615SIlya Dryomov 		con->error_msg = "missing required protocol features";
7932f713615SIlya Dryomov 		return -1;
7942f713615SIlya Dryomov 
7952f713615SIlya Dryomov 	case CEPH_MSGR_TAG_BADPROTOVER:
7962f713615SIlya Dryomov 		pr_err("%s%lld %s protocol version mismatch,"
7972f713615SIlya Dryomov 		       " my %d != server's %d\n",
7982f713615SIlya Dryomov 		       ENTITY_NAME(con->peer_name),
7992f713615SIlya Dryomov 		       ceph_pr_addr(&con->peer_addr),
800a56dd9bfSIlya Dryomov 		       le32_to_cpu(con->v1.out_connect.protocol_version),
801a56dd9bfSIlya Dryomov 		       le32_to_cpu(con->v1.in_reply.protocol_version));
8022f713615SIlya Dryomov 		con->error_msg = "protocol version mismatch";
8032f713615SIlya Dryomov 		return -1;
8042f713615SIlya Dryomov 
8052f713615SIlya Dryomov 	case CEPH_MSGR_TAG_BADAUTHORIZER:
806a56dd9bfSIlya Dryomov 		con->v1.auth_retry++;
8072f713615SIlya Dryomov 		dout("process_connect %p got BADAUTHORIZER attempt %d\n", con,
808a56dd9bfSIlya Dryomov 		     con->v1.auth_retry);
809a56dd9bfSIlya Dryomov 		if (con->v1.auth_retry == 2) {
8102f713615SIlya Dryomov 			con->error_msg = "connect authorization failure";
8112f713615SIlya Dryomov 			return -1;
8122f713615SIlya Dryomov 		}
8132f713615SIlya Dryomov 		con_out_kvec_reset(con);
8142f713615SIlya Dryomov 		ret = prepare_write_connect(con);
8152f713615SIlya Dryomov 		if (ret < 0)
8162f713615SIlya Dryomov 			return ret;
8172f713615SIlya Dryomov 		prepare_read_connect(con);
8182f713615SIlya Dryomov 		break;
8192f713615SIlya Dryomov 
8202f713615SIlya Dryomov 	case CEPH_MSGR_TAG_RESETSESSION:
8212f713615SIlya Dryomov 		/*
8222f713615SIlya Dryomov 		 * If we connected with a large connect_seq but the peer
8232f713615SIlya Dryomov 		 * has no record of a session with us (no connection, or
8242f713615SIlya Dryomov 		 * connect_seq == 0), they will send RESETSESION to indicate
8252f713615SIlya Dryomov 		 * that they must have reset their session, and may have
8262f713615SIlya Dryomov 		 * dropped messages.
8272f713615SIlya Dryomov 		 */
8282f713615SIlya Dryomov 		dout("process_connect got RESET peer seq %u\n",
829a56dd9bfSIlya Dryomov 		     le32_to_cpu(con->v1.in_reply.connect_seq));
8302f713615SIlya Dryomov 		pr_info("%s%lld %s session reset\n",
8312f713615SIlya Dryomov 			ENTITY_NAME(con->peer_name),
8322f713615SIlya Dryomov 			ceph_pr_addr(&con->peer_addr));
8332f713615SIlya Dryomov 		ceph_con_reset_session(con);
8342f713615SIlya Dryomov 		con_out_kvec_reset(con);
8352f713615SIlya Dryomov 		ret = prepare_write_connect(con);
8362f713615SIlya Dryomov 		if (ret < 0)
8372f713615SIlya Dryomov 			return ret;
8382f713615SIlya Dryomov 		prepare_read_connect(con);
8392f713615SIlya Dryomov 
8402f713615SIlya Dryomov 		/* Tell ceph about it. */
8412f713615SIlya Dryomov 		mutex_unlock(&con->mutex);
8422f713615SIlya Dryomov 		if (con->ops->peer_reset)
8432f713615SIlya Dryomov 			con->ops->peer_reset(con);
8442f713615SIlya Dryomov 		mutex_lock(&con->mutex);
8452f713615SIlya Dryomov 		if (con->state != CEPH_CON_S_V1_CONNECT_MSG)
8462f713615SIlya Dryomov 			return -EAGAIN;
8472f713615SIlya Dryomov 		break;
8482f713615SIlya Dryomov 
8492f713615SIlya Dryomov 	case CEPH_MSGR_TAG_RETRY_SESSION:
8502f713615SIlya Dryomov 		/*
8512f713615SIlya Dryomov 		 * If we sent a smaller connect_seq than the peer has, try
8522f713615SIlya Dryomov 		 * again with a larger value.
8532f713615SIlya Dryomov 		 */
8542f713615SIlya Dryomov 		dout("process_connect got RETRY_SESSION my seq %u, peer %u\n",
855a56dd9bfSIlya Dryomov 		     le32_to_cpu(con->v1.out_connect.connect_seq),
856a56dd9bfSIlya Dryomov 		     le32_to_cpu(con->v1.in_reply.connect_seq));
857a56dd9bfSIlya Dryomov 		con->v1.connect_seq = le32_to_cpu(con->v1.in_reply.connect_seq);
8582f713615SIlya Dryomov 		con_out_kvec_reset(con);
8592f713615SIlya Dryomov 		ret = prepare_write_connect(con);
8602f713615SIlya Dryomov 		if (ret < 0)
8612f713615SIlya Dryomov 			return ret;
8622f713615SIlya Dryomov 		prepare_read_connect(con);
8632f713615SIlya Dryomov 		break;
8642f713615SIlya Dryomov 
8652f713615SIlya Dryomov 	case CEPH_MSGR_TAG_RETRY_GLOBAL:
8662f713615SIlya Dryomov 		/*
8672f713615SIlya Dryomov 		 * If we sent a smaller global_seq than the peer has, try
8682f713615SIlya Dryomov 		 * again with a larger value.
8692f713615SIlya Dryomov 		 */
8702f713615SIlya Dryomov 		dout("process_connect got RETRY_GLOBAL my %u peer_gseq %u\n",
871a56dd9bfSIlya Dryomov 		     con->v1.peer_global_seq,
872a56dd9bfSIlya Dryomov 		     le32_to_cpu(con->v1.in_reply.global_seq));
8732f713615SIlya Dryomov 		ceph_get_global_seq(con->msgr,
874a56dd9bfSIlya Dryomov 				    le32_to_cpu(con->v1.in_reply.global_seq));
8752f713615SIlya Dryomov 		con_out_kvec_reset(con);
8762f713615SIlya Dryomov 		ret = prepare_write_connect(con);
8772f713615SIlya Dryomov 		if (ret < 0)
8782f713615SIlya Dryomov 			return ret;
8792f713615SIlya Dryomov 		prepare_read_connect(con);
8802f713615SIlya Dryomov 		break;
8812f713615SIlya Dryomov 
8822f713615SIlya Dryomov 	case CEPH_MSGR_TAG_SEQ:
8832f713615SIlya Dryomov 	case CEPH_MSGR_TAG_READY:
8842f713615SIlya Dryomov 		if (req_feat & ~server_feat) {
8852f713615SIlya Dryomov 			pr_err("%s%lld %s protocol feature mismatch,"
8862f713615SIlya Dryomov 			       " my required %llx > server's %llx, need %llx\n",
8872f713615SIlya Dryomov 			       ENTITY_NAME(con->peer_name),
8882f713615SIlya Dryomov 			       ceph_pr_addr(&con->peer_addr),
8892f713615SIlya Dryomov 			       req_feat, server_feat, req_feat & ~server_feat);
8902f713615SIlya Dryomov 			con->error_msg = "missing required protocol features";
8912f713615SIlya Dryomov 			return -1;
8922f713615SIlya Dryomov 		}
8932f713615SIlya Dryomov 
8942f713615SIlya Dryomov 		WARN_ON(con->state != CEPH_CON_S_V1_CONNECT_MSG);
8952f713615SIlya Dryomov 		con->state = CEPH_CON_S_OPEN;
896a56dd9bfSIlya Dryomov 		con->v1.auth_retry = 0;    /* we authenticated; clear flag */
897a56dd9bfSIlya Dryomov 		con->v1.peer_global_seq =
898a56dd9bfSIlya Dryomov 			le32_to_cpu(con->v1.in_reply.global_seq);
899a56dd9bfSIlya Dryomov 		con->v1.connect_seq++;
9002f713615SIlya Dryomov 		con->peer_features = server_feat;
9012f713615SIlya Dryomov 		dout("process_connect got READY gseq %d cseq %d (%d)\n",
902a56dd9bfSIlya Dryomov 		     con->v1.peer_global_seq,
903a56dd9bfSIlya Dryomov 		     le32_to_cpu(con->v1.in_reply.connect_seq),
904a56dd9bfSIlya Dryomov 		     con->v1.connect_seq);
905a56dd9bfSIlya Dryomov 		WARN_ON(con->v1.connect_seq !=
906a56dd9bfSIlya Dryomov 			le32_to_cpu(con->v1.in_reply.connect_seq));
9072f713615SIlya Dryomov 
908a56dd9bfSIlya Dryomov 		if (con->v1.in_reply.flags & CEPH_MSG_CONNECT_LOSSY)
9092f713615SIlya Dryomov 			ceph_con_flag_set(con, CEPH_CON_F_LOSSYTX);
9102f713615SIlya Dryomov 
9112f713615SIlya Dryomov 		con->delay = 0;      /* reset backoff memory */
9122f713615SIlya Dryomov 
913a56dd9bfSIlya Dryomov 		if (con->v1.in_reply.tag == CEPH_MSGR_TAG_SEQ) {
9142f713615SIlya Dryomov 			prepare_write_seq(con);
9152f713615SIlya Dryomov 			prepare_read_seq(con);
9162f713615SIlya Dryomov 		} else {
9172f713615SIlya Dryomov 			prepare_read_tag(con);
9182f713615SIlya Dryomov 		}
9192f713615SIlya Dryomov 		break;
9202f713615SIlya Dryomov 
9212f713615SIlya Dryomov 	case CEPH_MSGR_TAG_WAIT:
9222f713615SIlya Dryomov 		/*
9232f713615SIlya Dryomov 		 * If there is a connection race (we are opening
9242f713615SIlya Dryomov 		 * connections to each other), one of us may just have
9252f713615SIlya Dryomov 		 * to WAIT.  This shouldn't happen if we are the
9262f713615SIlya Dryomov 		 * client.
9272f713615SIlya Dryomov 		 */
9282f713615SIlya Dryomov 		con->error_msg = "protocol error, got WAIT as client";
9292f713615SIlya Dryomov 		return -1;
9302f713615SIlya Dryomov 
9312f713615SIlya Dryomov 	default:
9322f713615SIlya Dryomov 		con->error_msg = "protocol error, garbage tag during connect";
9332f713615SIlya Dryomov 		return -1;
9342f713615SIlya Dryomov 	}
9352f713615SIlya Dryomov 	return 0;
9362f713615SIlya Dryomov }
9372f713615SIlya Dryomov 
9382f713615SIlya Dryomov /*
9392f713615SIlya Dryomov  * read (part of) an ack
9402f713615SIlya Dryomov  */
read_partial_ack(struct ceph_connection * con)9412f713615SIlya Dryomov static int read_partial_ack(struct ceph_connection *con)
9422f713615SIlya Dryomov {
943a56dd9bfSIlya Dryomov 	int size = sizeof(con->v1.in_temp_ack);
9442f713615SIlya Dryomov 	int end = size;
9452f713615SIlya Dryomov 
946a56dd9bfSIlya Dryomov 	return read_partial(con, end, size, &con->v1.in_temp_ack);
9472f713615SIlya Dryomov }
9482f713615SIlya Dryomov 
9492f713615SIlya Dryomov /*
9502f713615SIlya Dryomov  * We can finally discard anything that's been acked.
9512f713615SIlya Dryomov  */
process_ack(struct ceph_connection * con)9522f713615SIlya Dryomov static void process_ack(struct ceph_connection *con)
9532f713615SIlya Dryomov {
954a56dd9bfSIlya Dryomov 	u64 ack = le64_to_cpu(con->v1.in_temp_ack);
9552f713615SIlya Dryomov 
956a56dd9bfSIlya Dryomov 	if (con->v1.in_tag == CEPH_MSGR_TAG_ACK)
9572f713615SIlya Dryomov 		ceph_con_discard_sent(con, ack);
9582f713615SIlya Dryomov 	else
9592f713615SIlya Dryomov 		ceph_con_discard_requeued(con, ack);
9602f713615SIlya Dryomov 
9612f713615SIlya Dryomov 	prepare_read_tag(con);
9622f713615SIlya Dryomov }
9632f713615SIlya Dryomov 
read_partial_message_chunk(struct ceph_connection * con,struct kvec * section,unsigned int sec_len,u32 * crc)964d396f89dSJeff Layton static int read_partial_message_chunk(struct ceph_connection *con,
9652f713615SIlya Dryomov 				      struct kvec *section,
9662f713615SIlya Dryomov 				      unsigned int sec_len, u32 *crc)
9672f713615SIlya Dryomov {
9682f713615SIlya Dryomov 	int ret, left;
9692f713615SIlya Dryomov 
9702f713615SIlya Dryomov 	BUG_ON(!section);
9712f713615SIlya Dryomov 
9722f713615SIlya Dryomov 	while (section->iov_len < sec_len) {
9732f713615SIlya Dryomov 		BUG_ON(section->iov_base == NULL);
9742f713615SIlya Dryomov 		left = sec_len - section->iov_len;
9752f713615SIlya Dryomov 		ret = ceph_tcp_recvmsg(con->sock, (char *)section->iov_base +
9762f713615SIlya Dryomov 				       section->iov_len, left);
9772f713615SIlya Dryomov 		if (ret <= 0)
9782f713615SIlya Dryomov 			return ret;
9792f713615SIlya Dryomov 		section->iov_len += ret;
9802f713615SIlya Dryomov 	}
9812f713615SIlya Dryomov 	if (section->iov_len == sec_len)
982d396f89dSJeff Layton 		*crc = crc32c(*crc, section->iov_base, section->iov_len);
9832f713615SIlya Dryomov 
9842f713615SIlya Dryomov 	return 1;
9852f713615SIlya Dryomov }
9862f713615SIlya Dryomov 
read_partial_message_section(struct ceph_connection * con,struct kvec * section,unsigned int sec_len,u32 * crc)987d396f89dSJeff Layton static inline int read_partial_message_section(struct ceph_connection *con,
988d396f89dSJeff Layton 					       struct kvec *section,
989d396f89dSJeff Layton 					       unsigned int sec_len, u32 *crc)
990d396f89dSJeff Layton {
991d396f89dSJeff Layton 	*crc = 0;
992d396f89dSJeff Layton 	return read_partial_message_chunk(con, section, sec_len, crc);
993d396f89dSJeff Layton }
994d396f89dSJeff Layton 
read_partial_sparse_msg_extent(struct ceph_connection * con,u32 * crc)995ee97302fSXiubo Li static int read_partial_sparse_msg_extent(struct ceph_connection *con, u32 *crc)
996d396f89dSJeff Layton {
997d396f89dSJeff Layton 	struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
998d396f89dSJeff Layton 	bool do_bounce = ceph_test_opt(from_msgr(con->msgr), RXBOUNCE);
999d396f89dSJeff Layton 
1000d396f89dSJeff Layton 	if (do_bounce && unlikely(!con->bounce_page)) {
1001d396f89dSJeff Layton 		con->bounce_page = alloc_page(GFP_NOIO);
1002d396f89dSJeff Layton 		if (!con->bounce_page) {
1003d396f89dSJeff Layton 			pr_err("failed to allocate bounce page\n");
1004d396f89dSJeff Layton 			return -ENOMEM;
1005d396f89dSJeff Layton 		}
1006d396f89dSJeff Layton 	}
1007d396f89dSJeff Layton 
1008d396f89dSJeff Layton 	while (cursor->sr_resid > 0) {
1009d396f89dSJeff Layton 		struct page *page, *rpage;
1010d396f89dSJeff Layton 		size_t off, len;
1011d396f89dSJeff Layton 		int ret;
1012d396f89dSJeff Layton 
1013d396f89dSJeff Layton 		page = ceph_msg_data_next(cursor, &off, &len);
1014d396f89dSJeff Layton 		rpage = do_bounce ? con->bounce_page : page;
1015d396f89dSJeff Layton 
1016d396f89dSJeff Layton 		/* clamp to what remains in extent */
1017d396f89dSJeff Layton 		len = min_t(int, len, cursor->sr_resid);
1018d396f89dSJeff Layton 		ret = ceph_tcp_recvpage(con->sock, rpage, (int)off, len);
1019d396f89dSJeff Layton 		if (ret <= 0)
1020d396f89dSJeff Layton 			return ret;
1021d396f89dSJeff Layton 		*crc = ceph_crc32c_page(*crc, rpage, off, ret);
1022d396f89dSJeff Layton 		ceph_msg_data_advance(cursor, (size_t)ret);
1023d396f89dSJeff Layton 		cursor->sr_resid -= ret;
1024d396f89dSJeff Layton 		if (do_bounce)
1025d396f89dSJeff Layton 			memcpy_page(page, off, rpage, off, ret);
1026d396f89dSJeff Layton 	}
1027d396f89dSJeff Layton 	return 1;
1028d396f89dSJeff Layton }
1029d396f89dSJeff Layton 
read_partial_sparse_msg_data(struct ceph_connection * con)1030ee97302fSXiubo Li static int read_partial_sparse_msg_data(struct ceph_connection *con)
1031d396f89dSJeff Layton {
1032d396f89dSJeff Layton 	struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
1033d396f89dSJeff Layton 	bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
1034d396f89dSJeff Layton 	u32 crc = 0;
1035d396f89dSJeff Layton 	int ret = 1;
1036d396f89dSJeff Layton 
1037d396f89dSJeff Layton 	if (do_datacrc)
1038d396f89dSJeff Layton 		crc = con->in_data_crc;
1039d396f89dSJeff Layton 
1040*8e46a2d0SXiubo Li 	while (cursor->total_resid) {
1041d396f89dSJeff Layton 		if (con->v1.in_sr_kvec.iov_base)
1042d396f89dSJeff Layton 			ret = read_partial_message_chunk(con,
1043d396f89dSJeff Layton 							 &con->v1.in_sr_kvec,
1044d396f89dSJeff Layton 							 con->v1.in_sr_len,
1045d396f89dSJeff Layton 							 &crc);
1046d396f89dSJeff Layton 		else if (cursor->sr_resid > 0)
1047ee97302fSXiubo Li 			ret = read_partial_sparse_msg_extent(con, &crc);
1048*8e46a2d0SXiubo Li 		if (ret <= 0)
1049*8e46a2d0SXiubo Li 			break;
1050d396f89dSJeff Layton 
1051d396f89dSJeff Layton 		memset(&con->v1.in_sr_kvec, 0, sizeof(con->v1.in_sr_kvec));
1052d396f89dSJeff Layton 		ret = con->ops->sparse_read(con, cursor,
1053d396f89dSJeff Layton 				(char **)&con->v1.in_sr_kvec.iov_base);
1054*8e46a2d0SXiubo Li 		if (ret <= 0) {
1055*8e46a2d0SXiubo Li 			ret = ret ? ret : 1;  /* must return > 0 to indicate success */
1056*8e46a2d0SXiubo Li 			break;
1057*8e46a2d0SXiubo Li 		}
1058d396f89dSJeff Layton 		con->v1.in_sr_len = ret;
1059*8e46a2d0SXiubo Li 	}
1060d396f89dSJeff Layton 
1061d396f89dSJeff Layton 	if (do_datacrc)
1062d396f89dSJeff Layton 		con->in_data_crc = crc;
1063d396f89dSJeff Layton 
1064*8e46a2d0SXiubo Li 	return ret;
1065d396f89dSJeff Layton }
1066d396f89dSJeff Layton 
read_partial_msg_data(struct ceph_connection * con)10672f713615SIlya Dryomov static int read_partial_msg_data(struct ceph_connection *con)
10682f713615SIlya Dryomov {
1069038b8d1dSIlya Dryomov 	struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
10702f713615SIlya Dryomov 	bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
10712f713615SIlya Dryomov 	struct page *page;
10722f713615SIlya Dryomov 	size_t page_offset;
10732f713615SIlya Dryomov 	size_t length;
10742f713615SIlya Dryomov 	u32 crc = 0;
10752f713615SIlya Dryomov 	int ret;
10762f713615SIlya Dryomov 
10772f713615SIlya Dryomov 	if (do_datacrc)
10782f713615SIlya Dryomov 		crc = con->in_data_crc;
10792f713615SIlya Dryomov 	while (cursor->total_resid) {
10802f713615SIlya Dryomov 		if (!cursor->resid) {
10812f713615SIlya Dryomov 			ceph_msg_data_advance(cursor, 0);
10822f713615SIlya Dryomov 			continue;
10832f713615SIlya Dryomov 		}
10842f713615SIlya Dryomov 
1085da4ab869SJeff Layton 		page = ceph_msg_data_next(cursor, &page_offset, &length);
10862f713615SIlya Dryomov 		ret = ceph_tcp_recvpage(con->sock, page, page_offset, length);
10872f713615SIlya Dryomov 		if (ret <= 0) {
10882f713615SIlya Dryomov 			if (do_datacrc)
10892f713615SIlya Dryomov 				con->in_data_crc = crc;
10902f713615SIlya Dryomov 
10912f713615SIlya Dryomov 			return ret;
10922f713615SIlya Dryomov 		}
10932f713615SIlya Dryomov 
10942f713615SIlya Dryomov 		if (do_datacrc)
10952f713615SIlya Dryomov 			crc = ceph_crc32c_page(crc, page, page_offset, ret);
10962f713615SIlya Dryomov 		ceph_msg_data_advance(cursor, (size_t)ret);
10972f713615SIlya Dryomov 	}
10982f713615SIlya Dryomov 	if (do_datacrc)
10992f713615SIlya Dryomov 		con->in_data_crc = crc;
11002f713615SIlya Dryomov 
11012f713615SIlya Dryomov 	return 1;	/* must return > 0 to indicate success */
11022f713615SIlya Dryomov }
11032f713615SIlya Dryomov 
read_partial_msg_data_bounce(struct ceph_connection * con)1104038b8d1dSIlya Dryomov static int read_partial_msg_data_bounce(struct ceph_connection *con)
1105038b8d1dSIlya Dryomov {
1106038b8d1dSIlya Dryomov 	struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
1107038b8d1dSIlya Dryomov 	struct page *page;
1108038b8d1dSIlya Dryomov 	size_t off, len;
1109038b8d1dSIlya Dryomov 	u32 crc;
1110038b8d1dSIlya Dryomov 	int ret;
1111038b8d1dSIlya Dryomov 
1112038b8d1dSIlya Dryomov 	if (unlikely(!con->bounce_page)) {
1113038b8d1dSIlya Dryomov 		con->bounce_page = alloc_page(GFP_NOIO);
1114038b8d1dSIlya Dryomov 		if (!con->bounce_page) {
1115038b8d1dSIlya Dryomov 			pr_err("failed to allocate bounce page\n");
1116038b8d1dSIlya Dryomov 			return -ENOMEM;
1117038b8d1dSIlya Dryomov 		}
1118038b8d1dSIlya Dryomov 	}
1119038b8d1dSIlya Dryomov 
1120038b8d1dSIlya Dryomov 	crc = con->in_data_crc;
1121038b8d1dSIlya Dryomov 	while (cursor->total_resid) {
1122038b8d1dSIlya Dryomov 		if (!cursor->resid) {
1123038b8d1dSIlya Dryomov 			ceph_msg_data_advance(cursor, 0);
1124038b8d1dSIlya Dryomov 			continue;
1125038b8d1dSIlya Dryomov 		}
1126038b8d1dSIlya Dryomov 
1127da4ab869SJeff Layton 		page = ceph_msg_data_next(cursor, &off, &len);
1128038b8d1dSIlya Dryomov 		ret = ceph_tcp_recvpage(con->sock, con->bounce_page, 0, len);
1129038b8d1dSIlya Dryomov 		if (ret <= 0) {
1130038b8d1dSIlya Dryomov 			con->in_data_crc = crc;
1131038b8d1dSIlya Dryomov 			return ret;
1132038b8d1dSIlya Dryomov 		}
1133038b8d1dSIlya Dryomov 
1134038b8d1dSIlya Dryomov 		crc = crc32c(crc, page_address(con->bounce_page), ret);
1135038b8d1dSIlya Dryomov 		memcpy_to_page(page, off, page_address(con->bounce_page), ret);
1136038b8d1dSIlya Dryomov 
1137038b8d1dSIlya Dryomov 		ceph_msg_data_advance(cursor, ret);
1138038b8d1dSIlya Dryomov 	}
1139038b8d1dSIlya Dryomov 	con->in_data_crc = crc;
1140038b8d1dSIlya Dryomov 
1141038b8d1dSIlya Dryomov 	return 1;	/* must return > 0 to indicate success */
1142038b8d1dSIlya Dryomov }
1143038b8d1dSIlya Dryomov 
11442f713615SIlya Dryomov /*
11452f713615SIlya Dryomov  * read (part of) a message.
11462f713615SIlya Dryomov  */
read_partial_message(struct ceph_connection * con)11472f713615SIlya Dryomov static int read_partial_message(struct ceph_connection *con)
11482f713615SIlya Dryomov {
11492f713615SIlya Dryomov 	struct ceph_msg *m = con->in_msg;
11502f713615SIlya Dryomov 	int size;
11512f713615SIlya Dryomov 	int end;
11522f713615SIlya Dryomov 	int ret;
11532f713615SIlya Dryomov 	unsigned int front_len, middle_len, data_len;
11542f713615SIlya Dryomov 	bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
11552f713615SIlya Dryomov 	bool need_sign = (con->peer_features & CEPH_FEATURE_MSG_AUTH);
11562f713615SIlya Dryomov 	u64 seq;
11572f713615SIlya Dryomov 	u32 crc;
11582f713615SIlya Dryomov 
11592f713615SIlya Dryomov 	dout("read_partial_message con %p msg %p\n", con, m);
11602f713615SIlya Dryomov 
11612f713615SIlya Dryomov 	/* header */
1162a56dd9bfSIlya Dryomov 	size = sizeof(con->v1.in_hdr);
11632f713615SIlya Dryomov 	end = size;
1164a56dd9bfSIlya Dryomov 	ret = read_partial(con, end, size, &con->v1.in_hdr);
11652f713615SIlya Dryomov 	if (ret <= 0)
11662f713615SIlya Dryomov 		return ret;
11672f713615SIlya Dryomov 
1168a56dd9bfSIlya Dryomov 	crc = crc32c(0, &con->v1.in_hdr, offsetof(struct ceph_msg_header, crc));
1169a56dd9bfSIlya Dryomov 	if (cpu_to_le32(crc) != con->v1.in_hdr.crc) {
11702f713615SIlya Dryomov 		pr_err("read_partial_message bad hdr crc %u != expected %u\n",
1171a56dd9bfSIlya Dryomov 		       crc, con->v1.in_hdr.crc);
11722f713615SIlya Dryomov 		return -EBADMSG;
11732f713615SIlya Dryomov 	}
11742f713615SIlya Dryomov 
1175a56dd9bfSIlya Dryomov 	front_len = le32_to_cpu(con->v1.in_hdr.front_len);
11762f713615SIlya Dryomov 	if (front_len > CEPH_MSG_MAX_FRONT_LEN)
11772f713615SIlya Dryomov 		return -EIO;
1178a56dd9bfSIlya Dryomov 	middle_len = le32_to_cpu(con->v1.in_hdr.middle_len);
11792f713615SIlya Dryomov 	if (middle_len > CEPH_MSG_MAX_MIDDLE_LEN)
11802f713615SIlya Dryomov 		return -EIO;
1181a56dd9bfSIlya Dryomov 	data_len = le32_to_cpu(con->v1.in_hdr.data_len);
11822f713615SIlya Dryomov 	if (data_len > CEPH_MSG_MAX_DATA_LEN)
11832f713615SIlya Dryomov 		return -EIO;
11842f713615SIlya Dryomov 
11852f713615SIlya Dryomov 	/* verify seq# */
1186a56dd9bfSIlya Dryomov 	seq = le64_to_cpu(con->v1.in_hdr.seq);
11872f713615SIlya Dryomov 	if ((s64)seq - (s64)con->in_seq < 1) {
11882f713615SIlya Dryomov 		pr_info("skipping %s%lld %s seq %lld expected %lld\n",
11892f713615SIlya Dryomov 			ENTITY_NAME(con->peer_name),
11902f713615SIlya Dryomov 			ceph_pr_addr(&con->peer_addr),
11912f713615SIlya Dryomov 			seq, con->in_seq + 1);
1192a56dd9bfSIlya Dryomov 		con->v1.in_base_pos = -front_len - middle_len - data_len -
11932f713615SIlya Dryomov 				      sizeof_footer(con);
1194a56dd9bfSIlya Dryomov 		con->v1.in_tag = CEPH_MSGR_TAG_READY;
11952f713615SIlya Dryomov 		return 1;
11962f713615SIlya Dryomov 	} else if ((s64)seq - (s64)con->in_seq > 1) {
11972f713615SIlya Dryomov 		pr_err("read_partial_message bad seq %lld expected %lld\n",
11982f713615SIlya Dryomov 		       seq, con->in_seq + 1);
11992f713615SIlya Dryomov 		con->error_msg = "bad message sequence # for incoming message";
12002f713615SIlya Dryomov 		return -EBADE;
12012f713615SIlya Dryomov 	}
12022f713615SIlya Dryomov 
12032f713615SIlya Dryomov 	/* allocate message? */
12042f713615SIlya Dryomov 	if (!con->in_msg) {
12052f713615SIlya Dryomov 		int skip = 0;
12062f713615SIlya Dryomov 
1207a56dd9bfSIlya Dryomov 		dout("got hdr type %d front %d data %d\n", con->v1.in_hdr.type,
12082f713615SIlya Dryomov 		     front_len, data_len);
1209a56dd9bfSIlya Dryomov 		ret = ceph_con_in_msg_alloc(con, &con->v1.in_hdr, &skip);
12102f713615SIlya Dryomov 		if (ret < 0)
12112f713615SIlya Dryomov 			return ret;
12122f713615SIlya Dryomov 
12139d5ae6f3SIlya Dryomov 		BUG_ON((!con->in_msg) ^ skip);
12142f713615SIlya Dryomov 		if (skip) {
12152f713615SIlya Dryomov 			/* skip this message */
12162f713615SIlya Dryomov 			dout("alloc_msg said skip message\n");
1217a56dd9bfSIlya Dryomov 			con->v1.in_base_pos = -front_len - middle_len -
1218a56dd9bfSIlya Dryomov 					      data_len - sizeof_footer(con);
1219a56dd9bfSIlya Dryomov 			con->v1.in_tag = CEPH_MSGR_TAG_READY;
12202f713615SIlya Dryomov 			con->in_seq++;
12212f713615SIlya Dryomov 			return 1;
12222f713615SIlya Dryomov 		}
12232f713615SIlya Dryomov 
12242f713615SIlya Dryomov 		BUG_ON(!con->in_msg);
12252f713615SIlya Dryomov 		BUG_ON(con->in_msg->con != con);
12262f713615SIlya Dryomov 		m = con->in_msg;
12272f713615SIlya Dryomov 		m->front.iov_len = 0;    /* haven't read it yet */
12282f713615SIlya Dryomov 		if (m->middle)
12292f713615SIlya Dryomov 			m->middle->vec.iov_len = 0;
12302f713615SIlya Dryomov 
12312f713615SIlya Dryomov 		/* prepare for data payload, if any */
12322f713615SIlya Dryomov 
12332f713615SIlya Dryomov 		if (data_len)
12342f713615SIlya Dryomov 			prepare_message_data(con->in_msg, data_len);
12352f713615SIlya Dryomov 	}
12362f713615SIlya Dryomov 
12372f713615SIlya Dryomov 	/* front */
12382f713615SIlya Dryomov 	ret = read_partial_message_section(con, &m->front, front_len,
12392f713615SIlya Dryomov 					   &con->in_front_crc);
12402f713615SIlya Dryomov 	if (ret <= 0)
12412f713615SIlya Dryomov 		return ret;
12422f713615SIlya Dryomov 
12432f713615SIlya Dryomov 	/* middle */
12442f713615SIlya Dryomov 	if (m->middle) {
12452f713615SIlya Dryomov 		ret = read_partial_message_section(con, &m->middle->vec,
12462f713615SIlya Dryomov 						   middle_len,
12472f713615SIlya Dryomov 						   &con->in_middle_crc);
12482f713615SIlya Dryomov 		if (ret <= 0)
12492f713615SIlya Dryomov 			return ret;
12502f713615SIlya Dryomov 	}
12512f713615SIlya Dryomov 
12522f713615SIlya Dryomov 	/* (page) data */
12532f713615SIlya Dryomov 	if (data_len) {
1254038b8d1dSIlya Dryomov 		if (!m->num_data_items)
1255038b8d1dSIlya Dryomov 			return -EIO;
1256038b8d1dSIlya Dryomov 
1257*8e46a2d0SXiubo Li 		if (m->sparse_read_total)
1258ee97302fSXiubo Li 			ret = read_partial_sparse_msg_data(con);
1259d396f89dSJeff Layton 		else if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE))
1260038b8d1dSIlya Dryomov 			ret = read_partial_msg_data_bounce(con);
1261038b8d1dSIlya Dryomov 		else
12622f713615SIlya Dryomov 			ret = read_partial_msg_data(con);
12632f713615SIlya Dryomov 		if (ret <= 0)
12642f713615SIlya Dryomov 			return ret;
12652f713615SIlya Dryomov 	}
12662f713615SIlya Dryomov 
12672f713615SIlya Dryomov 	/* footer */
12682f713615SIlya Dryomov 	size = sizeof_footer(con);
12692f713615SIlya Dryomov 	end += size;
12702f713615SIlya Dryomov 	ret = read_partial(con, end, size, &m->footer);
12712f713615SIlya Dryomov 	if (ret <= 0)
12722f713615SIlya Dryomov 		return ret;
12732f713615SIlya Dryomov 
12742f713615SIlya Dryomov 	if (!need_sign) {
12752f713615SIlya Dryomov 		m->footer.flags = m->old_footer.flags;
12762f713615SIlya Dryomov 		m->footer.sig = 0;
12772f713615SIlya Dryomov 	}
12782f713615SIlya Dryomov 
12792f713615SIlya Dryomov 	dout("read_partial_message got msg %p %d (%u) + %d (%u) + %d (%u)\n",
12802f713615SIlya Dryomov 	     m, front_len, m->footer.front_crc, middle_len,
12812f713615SIlya Dryomov 	     m->footer.middle_crc, data_len, m->footer.data_crc);
12822f713615SIlya Dryomov 
12832f713615SIlya Dryomov 	/* crc ok? */
12842f713615SIlya Dryomov 	if (con->in_front_crc != le32_to_cpu(m->footer.front_crc)) {
12852f713615SIlya Dryomov 		pr_err("read_partial_message %p front crc %u != exp. %u\n",
12862f713615SIlya Dryomov 		       m, con->in_front_crc, m->footer.front_crc);
12872f713615SIlya Dryomov 		return -EBADMSG;
12882f713615SIlya Dryomov 	}
12892f713615SIlya Dryomov 	if (con->in_middle_crc != le32_to_cpu(m->footer.middle_crc)) {
12902f713615SIlya Dryomov 		pr_err("read_partial_message %p middle crc %u != exp %u\n",
12912f713615SIlya Dryomov 		       m, con->in_middle_crc, m->footer.middle_crc);
12922f713615SIlya Dryomov 		return -EBADMSG;
12932f713615SIlya Dryomov 	}
12942f713615SIlya Dryomov 	if (do_datacrc &&
12952f713615SIlya Dryomov 	    (m->footer.flags & CEPH_MSG_FOOTER_NOCRC) == 0 &&
12962f713615SIlya Dryomov 	    con->in_data_crc != le32_to_cpu(m->footer.data_crc)) {
12972f713615SIlya Dryomov 		pr_err("read_partial_message %p data crc %u != exp. %u\n", m,
12982f713615SIlya Dryomov 		       con->in_data_crc, le32_to_cpu(m->footer.data_crc));
12992f713615SIlya Dryomov 		return -EBADMSG;
13002f713615SIlya Dryomov 	}
13012f713615SIlya Dryomov 
13022f713615SIlya Dryomov 	if (need_sign && con->ops->check_message_signature &&
13032f713615SIlya Dryomov 	    con->ops->check_message_signature(m)) {
13042f713615SIlya Dryomov 		pr_err("read_partial_message %p signature check failed\n", m);
13052f713615SIlya Dryomov 		return -EBADMSG;
13062f713615SIlya Dryomov 	}
13072f713615SIlya Dryomov 
13082f713615SIlya Dryomov 	return 1; /* done! */
13092f713615SIlya Dryomov }
13102f713615SIlya Dryomov 
read_keepalive_ack(struct ceph_connection * con)13112f713615SIlya Dryomov static int read_keepalive_ack(struct ceph_connection *con)
13122f713615SIlya Dryomov {
13132f713615SIlya Dryomov 	struct ceph_timespec ceph_ts;
13142f713615SIlya Dryomov 	size_t size = sizeof(ceph_ts);
13152f713615SIlya Dryomov 	int ret = read_partial(con, size, size, &ceph_ts);
13162f713615SIlya Dryomov 	if (ret <= 0)
13172f713615SIlya Dryomov 		return ret;
13182f713615SIlya Dryomov 	ceph_decode_timespec64(&con->last_keepalive_ack, &ceph_ts);
13192f713615SIlya Dryomov 	prepare_read_tag(con);
13202f713615SIlya Dryomov 	return 1;
13212f713615SIlya Dryomov }
13222f713615SIlya Dryomov 
13232f713615SIlya Dryomov /*
13242f713615SIlya Dryomov  * Read what we can from the socket.
13252f713615SIlya Dryomov  */
ceph_con_v1_try_read(struct ceph_connection * con)13262f713615SIlya Dryomov int ceph_con_v1_try_read(struct ceph_connection *con)
13272f713615SIlya Dryomov {
13282f713615SIlya Dryomov 	int ret = -1;
13292f713615SIlya Dryomov 
13302f713615SIlya Dryomov more:
13312f713615SIlya Dryomov 	dout("try_read start %p state %d\n", con, con->state);
13322f713615SIlya Dryomov 	if (con->state != CEPH_CON_S_V1_BANNER &&
13332f713615SIlya Dryomov 	    con->state != CEPH_CON_S_V1_CONNECT_MSG &&
13342f713615SIlya Dryomov 	    con->state != CEPH_CON_S_OPEN)
13352f713615SIlya Dryomov 		return 0;
13362f713615SIlya Dryomov 
13372f713615SIlya Dryomov 	BUG_ON(!con->sock);
13382f713615SIlya Dryomov 
1339a56dd9bfSIlya Dryomov 	dout("try_read tag %d in_base_pos %d\n", con->v1.in_tag,
1340a56dd9bfSIlya Dryomov 	     con->v1.in_base_pos);
13412f713615SIlya Dryomov 
13422f713615SIlya Dryomov 	if (con->state == CEPH_CON_S_V1_BANNER) {
13432f713615SIlya Dryomov 		ret = read_partial_banner(con);
13442f713615SIlya Dryomov 		if (ret <= 0)
13452f713615SIlya Dryomov 			goto out;
13462f713615SIlya Dryomov 		ret = process_banner(con);
13472f713615SIlya Dryomov 		if (ret < 0)
13482f713615SIlya Dryomov 			goto out;
13492f713615SIlya Dryomov 
13502f713615SIlya Dryomov 		con->state = CEPH_CON_S_V1_CONNECT_MSG;
13512f713615SIlya Dryomov 
13522f713615SIlya Dryomov 		/*
13532f713615SIlya Dryomov 		 * Received banner is good, exchange connection info.
13542f713615SIlya Dryomov 		 * Do not reset out_kvec, as sending our banner raced
13552f713615SIlya Dryomov 		 * with receiving peer banner after connect completed.
13562f713615SIlya Dryomov 		 */
13572f713615SIlya Dryomov 		ret = prepare_write_connect(con);
13582f713615SIlya Dryomov 		if (ret < 0)
13592f713615SIlya Dryomov 			goto out;
13602f713615SIlya Dryomov 		prepare_read_connect(con);
13612f713615SIlya Dryomov 
13622f713615SIlya Dryomov 		/* Send connection info before awaiting response */
13632f713615SIlya Dryomov 		goto out;
13642f713615SIlya Dryomov 	}
13652f713615SIlya Dryomov 
13662f713615SIlya Dryomov 	if (con->state == CEPH_CON_S_V1_CONNECT_MSG) {
13672f713615SIlya Dryomov 		ret = read_partial_connect(con);
13682f713615SIlya Dryomov 		if (ret <= 0)
13692f713615SIlya Dryomov 			goto out;
13702f713615SIlya Dryomov 		ret = process_connect(con);
13712f713615SIlya Dryomov 		if (ret < 0)
13722f713615SIlya Dryomov 			goto out;
13732f713615SIlya Dryomov 		goto more;
13742f713615SIlya Dryomov 	}
13752f713615SIlya Dryomov 
13762f713615SIlya Dryomov 	WARN_ON(con->state != CEPH_CON_S_OPEN);
13772f713615SIlya Dryomov 
1378a56dd9bfSIlya Dryomov 	if (con->v1.in_base_pos < 0) {
13792f713615SIlya Dryomov 		/*
13802f713615SIlya Dryomov 		 * skipping + discarding content.
13812f713615SIlya Dryomov 		 */
1382a56dd9bfSIlya Dryomov 		ret = ceph_tcp_recvmsg(con->sock, NULL, -con->v1.in_base_pos);
13832f713615SIlya Dryomov 		if (ret <= 0)
13842f713615SIlya Dryomov 			goto out;
1385a56dd9bfSIlya Dryomov 		dout("skipped %d / %d bytes\n", ret, -con->v1.in_base_pos);
1386a56dd9bfSIlya Dryomov 		con->v1.in_base_pos += ret;
1387a56dd9bfSIlya Dryomov 		if (con->v1.in_base_pos)
13882f713615SIlya Dryomov 			goto more;
13892f713615SIlya Dryomov 	}
1390a56dd9bfSIlya Dryomov 	if (con->v1.in_tag == CEPH_MSGR_TAG_READY) {
13912f713615SIlya Dryomov 		/*
13922f713615SIlya Dryomov 		 * what's next?
13932f713615SIlya Dryomov 		 */
1394a56dd9bfSIlya Dryomov 		ret = ceph_tcp_recvmsg(con->sock, &con->v1.in_tag, 1);
13952f713615SIlya Dryomov 		if (ret <= 0)
13962f713615SIlya Dryomov 			goto out;
1397a56dd9bfSIlya Dryomov 		dout("try_read got tag %d\n", con->v1.in_tag);
1398a56dd9bfSIlya Dryomov 		switch (con->v1.in_tag) {
13992f713615SIlya Dryomov 		case CEPH_MSGR_TAG_MSG:
14002f713615SIlya Dryomov 			prepare_read_message(con);
14012f713615SIlya Dryomov 			break;
14022f713615SIlya Dryomov 		case CEPH_MSGR_TAG_ACK:
14032f713615SIlya Dryomov 			prepare_read_ack(con);
14042f713615SIlya Dryomov 			break;
14052f713615SIlya Dryomov 		case CEPH_MSGR_TAG_KEEPALIVE2_ACK:
14062f713615SIlya Dryomov 			prepare_read_keepalive_ack(con);
14072f713615SIlya Dryomov 			break;
14082f713615SIlya Dryomov 		case CEPH_MSGR_TAG_CLOSE:
14092f713615SIlya Dryomov 			ceph_con_close_socket(con);
14102f713615SIlya Dryomov 			con->state = CEPH_CON_S_CLOSED;
14112f713615SIlya Dryomov 			goto out;
14122f713615SIlya Dryomov 		default:
14132f713615SIlya Dryomov 			goto bad_tag;
14142f713615SIlya Dryomov 		}
14152f713615SIlya Dryomov 	}
1416a56dd9bfSIlya Dryomov 	if (con->v1.in_tag == CEPH_MSGR_TAG_MSG) {
14172f713615SIlya Dryomov 		ret = read_partial_message(con);
14182f713615SIlya Dryomov 		if (ret <= 0) {
14192f713615SIlya Dryomov 			switch (ret) {
14202f713615SIlya Dryomov 			case -EBADMSG:
14212f713615SIlya Dryomov 				con->error_msg = "bad crc/signature";
14222f713615SIlya Dryomov 				fallthrough;
14232f713615SIlya Dryomov 			case -EBADE:
14242f713615SIlya Dryomov 				ret = -EIO;
14252f713615SIlya Dryomov 				break;
14262f713615SIlya Dryomov 			case -EIO:
14272f713615SIlya Dryomov 				con->error_msg = "io error";
14282f713615SIlya Dryomov 				break;
14292f713615SIlya Dryomov 			}
14302f713615SIlya Dryomov 			goto out;
14312f713615SIlya Dryomov 		}
1432a56dd9bfSIlya Dryomov 		if (con->v1.in_tag == CEPH_MSGR_TAG_READY)
14332f713615SIlya Dryomov 			goto more;
14342f713615SIlya Dryomov 		ceph_con_process_message(con);
14352f713615SIlya Dryomov 		if (con->state == CEPH_CON_S_OPEN)
14362f713615SIlya Dryomov 			prepare_read_tag(con);
14372f713615SIlya Dryomov 		goto more;
14382f713615SIlya Dryomov 	}
1439a56dd9bfSIlya Dryomov 	if (con->v1.in_tag == CEPH_MSGR_TAG_ACK ||
1440a56dd9bfSIlya Dryomov 	    con->v1.in_tag == CEPH_MSGR_TAG_SEQ) {
14412f713615SIlya Dryomov 		/*
14422f713615SIlya Dryomov 		 * the final handshake seq exchange is semantically
14432f713615SIlya Dryomov 		 * equivalent to an ACK
14442f713615SIlya Dryomov 		 */
14452f713615SIlya Dryomov 		ret = read_partial_ack(con);
14462f713615SIlya Dryomov 		if (ret <= 0)
14472f713615SIlya Dryomov 			goto out;
14482f713615SIlya Dryomov 		process_ack(con);
14492f713615SIlya Dryomov 		goto more;
14502f713615SIlya Dryomov 	}
1451a56dd9bfSIlya Dryomov 	if (con->v1.in_tag == CEPH_MSGR_TAG_KEEPALIVE2_ACK) {
14522f713615SIlya Dryomov 		ret = read_keepalive_ack(con);
14532f713615SIlya Dryomov 		if (ret <= 0)
14542f713615SIlya Dryomov 			goto out;
14552f713615SIlya Dryomov 		goto more;
14562f713615SIlya Dryomov 	}
14572f713615SIlya Dryomov 
14582f713615SIlya Dryomov out:
14592f713615SIlya Dryomov 	dout("try_read done on %p ret %d\n", con, ret);
14602f713615SIlya Dryomov 	return ret;
14612f713615SIlya Dryomov 
14622f713615SIlya Dryomov bad_tag:
1463a56dd9bfSIlya Dryomov 	pr_err("try_read bad tag %d\n", con->v1.in_tag);
14642f713615SIlya Dryomov 	con->error_msg = "protocol error, garbage tag";
14652f713615SIlya Dryomov 	ret = -1;
14662f713615SIlya Dryomov 	goto out;
14672f713615SIlya Dryomov }
14682f713615SIlya Dryomov 
14692f713615SIlya Dryomov /*
14702f713615SIlya Dryomov  * Write something to the socket.  Called in a worker thread when the
14712f713615SIlya Dryomov  * socket appears to be writeable and we have something ready to send.
14722f713615SIlya Dryomov  */
ceph_con_v1_try_write(struct ceph_connection * con)14732f713615SIlya Dryomov int ceph_con_v1_try_write(struct ceph_connection *con)
14742f713615SIlya Dryomov {
14752f713615SIlya Dryomov 	int ret = 1;
14762f713615SIlya Dryomov 
14772f713615SIlya Dryomov 	dout("try_write start %p state %d\n", con, con->state);
14782f713615SIlya Dryomov 	if (con->state != CEPH_CON_S_PREOPEN &&
14792f713615SIlya Dryomov 	    con->state != CEPH_CON_S_V1_BANNER &&
14802f713615SIlya Dryomov 	    con->state != CEPH_CON_S_V1_CONNECT_MSG &&
14812f713615SIlya Dryomov 	    con->state != CEPH_CON_S_OPEN)
14822f713615SIlya Dryomov 		return 0;
14832f713615SIlya Dryomov 
14842f713615SIlya Dryomov 	/* open the socket first? */
14852f713615SIlya Dryomov 	if (con->state == CEPH_CON_S_PREOPEN) {
14862f713615SIlya Dryomov 		BUG_ON(con->sock);
14872f713615SIlya Dryomov 		con->state = CEPH_CON_S_V1_BANNER;
14882f713615SIlya Dryomov 
14892f713615SIlya Dryomov 		con_out_kvec_reset(con);
14902f713615SIlya Dryomov 		prepare_write_banner(con);
14912f713615SIlya Dryomov 		prepare_read_banner(con);
14922f713615SIlya Dryomov 
14932f713615SIlya Dryomov 		BUG_ON(con->in_msg);
1494a56dd9bfSIlya Dryomov 		con->v1.in_tag = CEPH_MSGR_TAG_READY;
14952f713615SIlya Dryomov 		dout("try_write initiating connect on %p new state %d\n",
14962f713615SIlya Dryomov 		     con, con->state);
14972f713615SIlya Dryomov 		ret = ceph_tcp_connect(con);
14982f713615SIlya Dryomov 		if (ret < 0) {
14992f713615SIlya Dryomov 			con->error_msg = "connect error";
15002f713615SIlya Dryomov 			goto out;
15012f713615SIlya Dryomov 		}
15022f713615SIlya Dryomov 	}
15032f713615SIlya Dryomov 
15042f713615SIlya Dryomov more:
1505a56dd9bfSIlya Dryomov 	dout("try_write out_kvec_bytes %d\n", con->v1.out_kvec_bytes);
15062f713615SIlya Dryomov 	BUG_ON(!con->sock);
15072f713615SIlya Dryomov 
15082f713615SIlya Dryomov 	/* kvec data queued? */
1509a56dd9bfSIlya Dryomov 	if (con->v1.out_kvec_left) {
15102f713615SIlya Dryomov 		ret = write_partial_kvec(con);
15112f713615SIlya Dryomov 		if (ret <= 0)
15122f713615SIlya Dryomov 			goto out;
15132f713615SIlya Dryomov 	}
1514a56dd9bfSIlya Dryomov 	if (con->v1.out_skip) {
15152f713615SIlya Dryomov 		ret = write_partial_skip(con);
15162f713615SIlya Dryomov 		if (ret <= 0)
15172f713615SIlya Dryomov 			goto out;
15182f713615SIlya Dryomov 	}
15192f713615SIlya Dryomov 
15202f713615SIlya Dryomov 	/* msg pages? */
15212f713615SIlya Dryomov 	if (con->out_msg) {
1522a56dd9bfSIlya Dryomov 		if (con->v1.out_msg_done) {
15232f713615SIlya Dryomov 			ceph_msg_put(con->out_msg);
15242f713615SIlya Dryomov 			con->out_msg = NULL;   /* we're done with this one */
15252f713615SIlya Dryomov 			goto do_next;
15262f713615SIlya Dryomov 		}
15272f713615SIlya Dryomov 
15282f713615SIlya Dryomov 		ret = write_partial_message_data(con);
15292f713615SIlya Dryomov 		if (ret == 1)
15302f713615SIlya Dryomov 			goto more;  /* we need to send the footer, too! */
15312f713615SIlya Dryomov 		if (ret == 0)
15322f713615SIlya Dryomov 			goto out;
15332f713615SIlya Dryomov 		if (ret < 0) {
15342f713615SIlya Dryomov 			dout("try_write write_partial_message_data err %d\n",
15352f713615SIlya Dryomov 			     ret);
15362f713615SIlya Dryomov 			goto out;
15372f713615SIlya Dryomov 		}
15382f713615SIlya Dryomov 	}
15392f713615SIlya Dryomov 
15402f713615SIlya Dryomov do_next:
15412f713615SIlya Dryomov 	if (con->state == CEPH_CON_S_OPEN) {
15422f713615SIlya Dryomov 		if (ceph_con_flag_test_and_clear(con,
15432f713615SIlya Dryomov 				CEPH_CON_F_KEEPALIVE_PENDING)) {
15442f713615SIlya Dryomov 			prepare_write_keepalive(con);
15452f713615SIlya Dryomov 			goto more;
15462f713615SIlya Dryomov 		}
15472f713615SIlya Dryomov 		/* is anything else pending? */
15482f713615SIlya Dryomov 		if (!list_empty(&con->out_queue)) {
15492f713615SIlya Dryomov 			prepare_write_message(con);
15502f713615SIlya Dryomov 			goto more;
15512f713615SIlya Dryomov 		}
15522f713615SIlya Dryomov 		if (con->in_seq > con->in_seq_acked) {
15532f713615SIlya Dryomov 			prepare_write_ack(con);
15542f713615SIlya Dryomov 			goto more;
15552f713615SIlya Dryomov 		}
15562f713615SIlya Dryomov 	}
15572f713615SIlya Dryomov 
15582f713615SIlya Dryomov 	/* Nothing to do! */
15592f713615SIlya Dryomov 	ceph_con_flag_clear(con, CEPH_CON_F_WRITE_PENDING);
15602f713615SIlya Dryomov 	dout("try_write nothing else to write.\n");
15612f713615SIlya Dryomov 	ret = 0;
15622f713615SIlya Dryomov out:
15632f713615SIlya Dryomov 	dout("try_write done on %p ret %d\n", con, ret);
15642f713615SIlya Dryomov 	return ret;
15652f713615SIlya Dryomov }
15662f713615SIlya Dryomov 
ceph_con_v1_revoke(struct ceph_connection * con)15672f713615SIlya Dryomov void ceph_con_v1_revoke(struct ceph_connection *con)
15682f713615SIlya Dryomov {
15692f713615SIlya Dryomov 	struct ceph_msg *msg = con->out_msg;
15702f713615SIlya Dryomov 
1571a56dd9bfSIlya Dryomov 	WARN_ON(con->v1.out_skip);
15722f713615SIlya Dryomov 	/* footer */
1573a56dd9bfSIlya Dryomov 	if (con->v1.out_msg_done) {
1574a56dd9bfSIlya Dryomov 		con->v1.out_skip += con_out_kvec_skip(con);
15752f713615SIlya Dryomov 	} else {
15762f713615SIlya Dryomov 		WARN_ON(!msg->data_length);
1577a56dd9bfSIlya Dryomov 		con->v1.out_skip += sizeof_footer(con);
15782f713615SIlya Dryomov 	}
15792f713615SIlya Dryomov 	/* data, middle, front */
15802f713615SIlya Dryomov 	if (msg->data_length)
1581a56dd9bfSIlya Dryomov 		con->v1.out_skip += msg->cursor.total_resid;
15822f713615SIlya Dryomov 	if (msg->middle)
1583a56dd9bfSIlya Dryomov 		con->v1.out_skip += con_out_kvec_skip(con);
1584a56dd9bfSIlya Dryomov 	con->v1.out_skip += con_out_kvec_skip(con);
15852f713615SIlya Dryomov 
15862f713615SIlya Dryomov 	dout("%s con %p out_kvec_bytes %d out_skip %d\n", __func__, con,
1587a56dd9bfSIlya Dryomov 	     con->v1.out_kvec_bytes, con->v1.out_skip);
15882f713615SIlya Dryomov }
15892f713615SIlya Dryomov 
ceph_con_v1_revoke_incoming(struct ceph_connection * con)15902f713615SIlya Dryomov void ceph_con_v1_revoke_incoming(struct ceph_connection *con)
15912f713615SIlya Dryomov {
1592a56dd9bfSIlya Dryomov 	unsigned int front_len = le32_to_cpu(con->v1.in_hdr.front_len);
1593a56dd9bfSIlya Dryomov 	unsigned int middle_len = le32_to_cpu(con->v1.in_hdr.middle_len);
1594a56dd9bfSIlya Dryomov 	unsigned int data_len = le32_to_cpu(con->v1.in_hdr.data_len);
15952f713615SIlya Dryomov 
15962f713615SIlya Dryomov 	/* skip rest of message */
1597a56dd9bfSIlya Dryomov 	con->v1.in_base_pos = con->v1.in_base_pos -
15982f713615SIlya Dryomov 			sizeof(struct ceph_msg_header) -
15992f713615SIlya Dryomov 			front_len -
16002f713615SIlya Dryomov 			middle_len -
16012f713615SIlya Dryomov 			data_len -
16022f713615SIlya Dryomov 			sizeof(struct ceph_msg_footer);
16032f713615SIlya Dryomov 
1604a56dd9bfSIlya Dryomov 	con->v1.in_tag = CEPH_MSGR_TAG_READY;
16052f713615SIlya Dryomov 	con->in_seq++;
16062f713615SIlya Dryomov 
1607a56dd9bfSIlya Dryomov 	dout("%s con %p in_base_pos %d\n", __func__, con, con->v1.in_base_pos);
16082f713615SIlya Dryomov }
16092f713615SIlya Dryomov 
ceph_con_v1_opened(struct ceph_connection * con)16102f713615SIlya Dryomov bool ceph_con_v1_opened(struct ceph_connection *con)
16112f713615SIlya Dryomov {
1612a56dd9bfSIlya Dryomov 	return con->v1.connect_seq;
16132f713615SIlya Dryomov }
16142f713615SIlya Dryomov 
ceph_con_v1_reset_session(struct ceph_connection * con)16152f713615SIlya Dryomov void ceph_con_v1_reset_session(struct ceph_connection *con)
16162f713615SIlya Dryomov {
1617a56dd9bfSIlya Dryomov 	con->v1.connect_seq = 0;
1618a56dd9bfSIlya Dryomov 	con->v1.peer_global_seq = 0;
16192f713615SIlya Dryomov }
16202f713615SIlya Dryomov 
ceph_con_v1_reset_protocol(struct ceph_connection * con)16212f713615SIlya Dryomov void ceph_con_v1_reset_protocol(struct ceph_connection *con)
16222f713615SIlya Dryomov {
1623a56dd9bfSIlya Dryomov 	con->v1.out_skip = 0;
16242f713615SIlya Dryomov }
1625