1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Ceph msgr2 protocol implementation 4 * 5 * Copyright (C) 2020 Ilya Dryomov <idryomov@gmail.com> 6 */ 7 8 #include <linux/ceph/ceph_debug.h> 9 10 #include <crypto/aead.h> 11 #include <crypto/algapi.h> /* for crypto_memneq() */ 12 #include <crypto/hash.h> 13 #include <crypto/sha2.h> 14 #include <linux/bvec.h> 15 #include <linux/crc32c.h> 16 #include <linux/net.h> 17 #include <linux/scatterlist.h> 18 #include <linux/socket.h> 19 #include <linux/sched/mm.h> 20 #include <net/sock.h> 21 #include <net/tcp.h> 22 23 #include <linux/ceph/ceph_features.h> 24 #include <linux/ceph/decode.h> 25 #include <linux/ceph/libceph.h> 26 #include <linux/ceph/messenger.h> 27 28 #include "crypto.h" /* for CEPH_KEY_LEN and CEPH_MAX_CON_SECRET_LEN */ 29 30 #define FRAME_TAG_HELLO 1 31 #define FRAME_TAG_AUTH_REQUEST 2 32 #define FRAME_TAG_AUTH_BAD_METHOD 3 33 #define FRAME_TAG_AUTH_REPLY_MORE 4 34 #define FRAME_TAG_AUTH_REQUEST_MORE 5 35 #define FRAME_TAG_AUTH_DONE 6 36 #define FRAME_TAG_AUTH_SIGNATURE 7 37 #define FRAME_TAG_CLIENT_IDENT 8 38 #define FRAME_TAG_SERVER_IDENT 9 39 #define FRAME_TAG_IDENT_MISSING_FEATURES 10 40 #define FRAME_TAG_SESSION_RECONNECT 11 41 #define FRAME_TAG_SESSION_RESET 12 42 #define FRAME_TAG_SESSION_RETRY 13 43 #define FRAME_TAG_SESSION_RETRY_GLOBAL 14 44 #define FRAME_TAG_SESSION_RECONNECT_OK 15 45 #define FRAME_TAG_WAIT 16 46 #define FRAME_TAG_MESSAGE 17 47 #define FRAME_TAG_KEEPALIVE2 18 48 #define FRAME_TAG_KEEPALIVE2_ACK 19 49 #define FRAME_TAG_ACK 20 50 51 #define FRAME_LATE_STATUS_ABORTED 0x1 52 #define FRAME_LATE_STATUS_COMPLETE 0xe 53 #define FRAME_LATE_STATUS_ABORTED_MASK 0xf 54 55 #define IN_S_HANDLE_PREAMBLE 1 56 #define IN_S_HANDLE_CONTROL 2 57 #define IN_S_HANDLE_CONTROL_REMAINDER 3 58 #define IN_S_PREPARE_READ_DATA 4 59 #define IN_S_PREPARE_READ_DATA_CONT 5 60 #define IN_S_PREPARE_READ_ENC_PAGE 6 61 #define IN_S_HANDLE_EPILOGUE 7 62 #define IN_S_FINISH_SKIP 8 63 64 #define OUT_S_QUEUE_DATA 1 65 #define OUT_S_QUEUE_DATA_CONT 2 66 #define OUT_S_QUEUE_ENC_PAGE 3 67 #define OUT_S_QUEUE_ZEROS 4 68 #define OUT_S_FINISH_MESSAGE 5 69 #define OUT_S_GET_NEXT 6 70 71 #define CTRL_BODY(p) ((void *)(p) + CEPH_PREAMBLE_LEN) 72 #define FRONT_PAD(p) ((void *)(p) + CEPH_EPILOGUE_SECURE_LEN) 73 #define MIDDLE_PAD(p) (FRONT_PAD(p) + CEPH_GCM_BLOCK_LEN) 74 #define DATA_PAD(p) (MIDDLE_PAD(p) + CEPH_GCM_BLOCK_LEN) 75 76 #define CEPH_MSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL) 77 78 static int do_recvmsg(struct socket *sock, struct iov_iter *it) 79 { 80 struct msghdr msg = { .msg_flags = CEPH_MSG_FLAGS }; 81 int ret; 82 83 msg.msg_iter = *it; 84 while (iov_iter_count(it)) { 85 ret = sock_recvmsg(sock, &msg, msg.msg_flags); 86 if (ret <= 0) { 87 if (ret == -EAGAIN) 88 ret = 0; 89 return ret; 90 } 91 92 iov_iter_advance(it, ret); 93 } 94 95 WARN_ON(msg_data_left(&msg)); 96 return 1; 97 } 98 99 /* 100 * Read as much as possible. 101 * 102 * Return: 103 * 1 - done, nothing (else) to read 104 * 0 - socket is empty, need to wait 105 * <0 - error 106 */ 107 static int ceph_tcp_recv(struct ceph_connection *con) 108 { 109 int ret; 110 111 dout("%s con %p %s %zu\n", __func__, con, 112 iov_iter_is_discard(&con->v2.in_iter) ? "discard" : "need", 113 iov_iter_count(&con->v2.in_iter)); 114 ret = do_recvmsg(con->sock, &con->v2.in_iter); 115 dout("%s con %p ret %d left %zu\n", __func__, con, ret, 116 iov_iter_count(&con->v2.in_iter)); 117 return ret; 118 } 119 120 static int do_sendmsg(struct socket *sock, struct iov_iter *it) 121 { 122 struct msghdr msg = { .msg_flags = CEPH_MSG_FLAGS }; 123 int ret; 124 125 msg.msg_iter = *it; 126 while (iov_iter_count(it)) { 127 ret = sock_sendmsg(sock, &msg); 128 if (ret <= 0) { 129 if (ret == -EAGAIN) 130 ret = 0; 131 return ret; 132 } 133 134 iov_iter_advance(it, ret); 135 } 136 137 WARN_ON(msg_data_left(&msg)); 138 return 1; 139 } 140 141 static int do_try_sendpage(struct socket *sock, struct iov_iter *it) 142 { 143 struct msghdr msg = { .msg_flags = CEPH_MSG_FLAGS }; 144 struct bio_vec bv; 145 int ret; 146 147 if (WARN_ON(!iov_iter_is_bvec(it))) 148 return -EINVAL; 149 150 while (iov_iter_count(it)) { 151 /* iov_iter_iovec() for ITER_BVEC */ 152 bvec_set_page(&bv, it->bvec->bv_page, 153 min(iov_iter_count(it), 154 it->bvec->bv_len - it->iov_offset), 155 it->bvec->bv_offset + it->iov_offset); 156 157 /* 158 * MSG_SPLICE_PAGES cannot properly handle pages with 159 * page_count == 0, we need to fall back to sendmsg if 160 * that's the case. 161 * 162 * Same goes for slab pages: skb_can_coalesce() allows 163 * coalescing neighboring slab objects into a single frag 164 * which triggers one of hardened usercopy checks. 165 */ 166 if (sendpage_ok(bv.bv_page)) 167 msg.msg_flags |= MSG_SPLICE_PAGES; 168 else 169 msg.msg_flags &= ~MSG_SPLICE_PAGES; 170 171 iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bv, 1, bv.bv_len); 172 ret = sock_sendmsg(sock, &msg); 173 if (ret <= 0) { 174 if (ret == -EAGAIN) 175 ret = 0; 176 return ret; 177 } 178 179 iov_iter_advance(it, ret); 180 } 181 182 return 1; 183 } 184 185 /* 186 * Write as much as possible. The socket is expected to be corked, 187 * so we don't bother with MSG_MORE here. 188 * 189 * Return: 190 * 1 - done, nothing (else) to write 191 * 0 - socket is full, need to wait 192 * <0 - error 193 */ 194 static int ceph_tcp_send(struct ceph_connection *con) 195 { 196 int ret; 197 198 dout("%s con %p have %zu try_sendpage %d\n", __func__, con, 199 iov_iter_count(&con->v2.out_iter), con->v2.out_iter_sendpage); 200 if (con->v2.out_iter_sendpage) 201 ret = do_try_sendpage(con->sock, &con->v2.out_iter); 202 else 203 ret = do_sendmsg(con->sock, &con->v2.out_iter); 204 dout("%s con %p ret %d left %zu\n", __func__, con, ret, 205 iov_iter_count(&con->v2.out_iter)); 206 return ret; 207 } 208 209 static void add_in_kvec(struct ceph_connection *con, void *buf, int len) 210 { 211 BUG_ON(con->v2.in_kvec_cnt >= ARRAY_SIZE(con->v2.in_kvecs)); 212 WARN_ON(!iov_iter_is_kvec(&con->v2.in_iter)); 213 214 con->v2.in_kvecs[con->v2.in_kvec_cnt].iov_base = buf; 215 con->v2.in_kvecs[con->v2.in_kvec_cnt].iov_len = len; 216 con->v2.in_kvec_cnt++; 217 218 con->v2.in_iter.nr_segs++; 219 con->v2.in_iter.count += len; 220 } 221 222 static void reset_in_kvecs(struct ceph_connection *con) 223 { 224 WARN_ON(iov_iter_count(&con->v2.in_iter)); 225 226 con->v2.in_kvec_cnt = 0; 227 iov_iter_kvec(&con->v2.in_iter, ITER_DEST, con->v2.in_kvecs, 0, 0); 228 } 229 230 static void set_in_bvec(struct ceph_connection *con, const struct bio_vec *bv) 231 { 232 WARN_ON(iov_iter_count(&con->v2.in_iter)); 233 234 con->v2.in_bvec = *bv; 235 iov_iter_bvec(&con->v2.in_iter, ITER_DEST, &con->v2.in_bvec, 1, bv->bv_len); 236 } 237 238 static void set_in_skip(struct ceph_connection *con, int len) 239 { 240 WARN_ON(iov_iter_count(&con->v2.in_iter)); 241 242 dout("%s con %p len %d\n", __func__, con, len); 243 iov_iter_discard(&con->v2.in_iter, ITER_DEST, len); 244 } 245 246 static void add_out_kvec(struct ceph_connection *con, void *buf, int len) 247 { 248 BUG_ON(con->v2.out_kvec_cnt >= ARRAY_SIZE(con->v2.out_kvecs)); 249 WARN_ON(!iov_iter_is_kvec(&con->v2.out_iter)); 250 WARN_ON(con->v2.out_zero); 251 252 con->v2.out_kvecs[con->v2.out_kvec_cnt].iov_base = buf; 253 con->v2.out_kvecs[con->v2.out_kvec_cnt].iov_len = len; 254 con->v2.out_kvec_cnt++; 255 256 con->v2.out_iter.nr_segs++; 257 con->v2.out_iter.count += len; 258 } 259 260 static void reset_out_kvecs(struct ceph_connection *con) 261 { 262 WARN_ON(iov_iter_count(&con->v2.out_iter)); 263 WARN_ON(con->v2.out_zero); 264 265 con->v2.out_kvec_cnt = 0; 266 267 iov_iter_kvec(&con->v2.out_iter, ITER_SOURCE, con->v2.out_kvecs, 0, 0); 268 con->v2.out_iter_sendpage = false; 269 } 270 271 static void set_out_bvec(struct ceph_connection *con, const struct bio_vec *bv, 272 bool zerocopy) 273 { 274 WARN_ON(iov_iter_count(&con->v2.out_iter)); 275 WARN_ON(con->v2.out_zero); 276 277 con->v2.out_bvec = *bv; 278 con->v2.out_iter_sendpage = zerocopy; 279 iov_iter_bvec(&con->v2.out_iter, ITER_SOURCE, &con->v2.out_bvec, 1, 280 con->v2.out_bvec.bv_len); 281 } 282 283 static void set_out_bvec_zero(struct ceph_connection *con) 284 { 285 WARN_ON(iov_iter_count(&con->v2.out_iter)); 286 WARN_ON(!con->v2.out_zero); 287 288 bvec_set_page(&con->v2.out_bvec, ceph_zero_page, 289 min(con->v2.out_zero, (int)PAGE_SIZE), 0); 290 con->v2.out_iter_sendpage = true; 291 iov_iter_bvec(&con->v2.out_iter, ITER_SOURCE, &con->v2.out_bvec, 1, 292 con->v2.out_bvec.bv_len); 293 } 294 295 static void out_zero_add(struct ceph_connection *con, int len) 296 { 297 dout("%s con %p len %d\n", __func__, con, len); 298 con->v2.out_zero += len; 299 } 300 301 static void *alloc_conn_buf(struct ceph_connection *con, int len) 302 { 303 void *buf; 304 305 dout("%s con %p len %d\n", __func__, con, len); 306 307 if (WARN_ON(con->v2.conn_buf_cnt >= ARRAY_SIZE(con->v2.conn_bufs))) 308 return NULL; 309 310 buf = kvmalloc(len, GFP_NOIO); 311 if (!buf) 312 return NULL; 313 314 con->v2.conn_bufs[con->v2.conn_buf_cnt++] = buf; 315 return buf; 316 } 317 318 static void free_conn_bufs(struct ceph_connection *con) 319 { 320 while (con->v2.conn_buf_cnt) 321 kvfree(con->v2.conn_bufs[--con->v2.conn_buf_cnt]); 322 } 323 324 static void add_in_sign_kvec(struct ceph_connection *con, void *buf, int len) 325 { 326 BUG_ON(con->v2.in_sign_kvec_cnt >= ARRAY_SIZE(con->v2.in_sign_kvecs)); 327 328 con->v2.in_sign_kvecs[con->v2.in_sign_kvec_cnt].iov_base = buf; 329 con->v2.in_sign_kvecs[con->v2.in_sign_kvec_cnt].iov_len = len; 330 con->v2.in_sign_kvec_cnt++; 331 } 332 333 static void clear_in_sign_kvecs(struct ceph_connection *con) 334 { 335 con->v2.in_sign_kvec_cnt = 0; 336 } 337 338 static void add_out_sign_kvec(struct ceph_connection *con, void *buf, int len) 339 { 340 BUG_ON(con->v2.out_sign_kvec_cnt >= ARRAY_SIZE(con->v2.out_sign_kvecs)); 341 342 con->v2.out_sign_kvecs[con->v2.out_sign_kvec_cnt].iov_base = buf; 343 con->v2.out_sign_kvecs[con->v2.out_sign_kvec_cnt].iov_len = len; 344 con->v2.out_sign_kvec_cnt++; 345 } 346 347 static void clear_out_sign_kvecs(struct ceph_connection *con) 348 { 349 con->v2.out_sign_kvec_cnt = 0; 350 } 351 352 static bool con_secure(struct ceph_connection *con) 353 { 354 return con->v2.con_mode == CEPH_CON_MODE_SECURE; 355 } 356 357 static int front_len(const struct ceph_msg *msg) 358 { 359 return le32_to_cpu(msg->hdr.front_len); 360 } 361 362 static int middle_len(const struct ceph_msg *msg) 363 { 364 return le32_to_cpu(msg->hdr.middle_len); 365 } 366 367 static int data_len(const struct ceph_msg *msg) 368 { 369 return le32_to_cpu(msg->hdr.data_len); 370 } 371 372 static bool need_padding(int len) 373 { 374 return !IS_ALIGNED(len, CEPH_GCM_BLOCK_LEN); 375 } 376 377 static int padded_len(int len) 378 { 379 return ALIGN(len, CEPH_GCM_BLOCK_LEN); 380 } 381 382 static int padding_len(int len) 383 { 384 return padded_len(len) - len; 385 } 386 387 /* preamble + control segment */ 388 static int head_onwire_len(int ctrl_len, bool secure) 389 { 390 int head_len; 391 int rem_len; 392 393 BUG_ON(ctrl_len < 0 || ctrl_len > CEPH_MSG_MAX_CONTROL_LEN); 394 395 if (secure) { 396 head_len = CEPH_PREAMBLE_SECURE_LEN; 397 if (ctrl_len > CEPH_PREAMBLE_INLINE_LEN) { 398 rem_len = ctrl_len - CEPH_PREAMBLE_INLINE_LEN; 399 head_len += padded_len(rem_len) + CEPH_GCM_TAG_LEN; 400 } 401 } else { 402 head_len = CEPH_PREAMBLE_PLAIN_LEN; 403 if (ctrl_len) 404 head_len += ctrl_len + CEPH_CRC_LEN; 405 } 406 return head_len; 407 } 408 409 /* front, middle and data segments + epilogue */ 410 static int __tail_onwire_len(int front_len, int middle_len, int data_len, 411 bool secure) 412 { 413 BUG_ON(front_len < 0 || front_len > CEPH_MSG_MAX_FRONT_LEN || 414 middle_len < 0 || middle_len > CEPH_MSG_MAX_MIDDLE_LEN || 415 data_len < 0 || data_len > CEPH_MSG_MAX_DATA_LEN); 416 417 if (!front_len && !middle_len && !data_len) 418 return 0; 419 420 if (!secure) 421 return front_len + middle_len + data_len + 422 CEPH_EPILOGUE_PLAIN_LEN; 423 424 return padded_len(front_len) + padded_len(middle_len) + 425 padded_len(data_len) + CEPH_EPILOGUE_SECURE_LEN; 426 } 427 428 static int tail_onwire_len(const struct ceph_msg *msg, bool secure) 429 { 430 return __tail_onwire_len(front_len(msg), middle_len(msg), 431 data_len(msg), secure); 432 } 433 434 /* head_onwire_len(sizeof(struct ceph_msg_header2), false) */ 435 #define MESSAGE_HEAD_PLAIN_LEN (CEPH_PREAMBLE_PLAIN_LEN + \ 436 sizeof(struct ceph_msg_header2) + \ 437 CEPH_CRC_LEN) 438 439 static const int frame_aligns[] = { 440 sizeof(void *), 441 sizeof(void *), 442 sizeof(void *), 443 PAGE_SIZE 444 }; 445 446 /* 447 * Discards trailing empty segments, unless there is just one segment. 448 * A frame always has at least one (possibly empty) segment. 449 */ 450 static int calc_segment_count(const int *lens, int len_cnt) 451 { 452 int i; 453 454 for (i = len_cnt - 1; i >= 0; i--) { 455 if (lens[i]) 456 return i + 1; 457 } 458 459 return 1; 460 } 461 462 static void init_frame_desc(struct ceph_frame_desc *desc, int tag, 463 const int *lens, int len_cnt) 464 { 465 int i; 466 467 memset(desc, 0, sizeof(*desc)); 468 469 desc->fd_tag = tag; 470 desc->fd_seg_cnt = calc_segment_count(lens, len_cnt); 471 BUG_ON(desc->fd_seg_cnt > CEPH_FRAME_MAX_SEGMENT_COUNT); 472 for (i = 0; i < desc->fd_seg_cnt; i++) { 473 desc->fd_lens[i] = lens[i]; 474 desc->fd_aligns[i] = frame_aligns[i]; 475 } 476 } 477 478 /* 479 * Preamble crc covers everything up to itself (28 bytes) and 480 * is calculated and verified irrespective of the connection mode 481 * (i.e. even if the frame is encrypted). 482 */ 483 static void encode_preamble(const struct ceph_frame_desc *desc, void *p) 484 { 485 void *crcp = p + CEPH_PREAMBLE_LEN - CEPH_CRC_LEN; 486 void *start = p; 487 int i; 488 489 memset(p, 0, CEPH_PREAMBLE_LEN); 490 491 ceph_encode_8(&p, desc->fd_tag); 492 ceph_encode_8(&p, desc->fd_seg_cnt); 493 for (i = 0; i < desc->fd_seg_cnt; i++) { 494 ceph_encode_32(&p, desc->fd_lens[i]); 495 ceph_encode_16(&p, desc->fd_aligns[i]); 496 } 497 498 put_unaligned_le32(crc32c(0, start, crcp - start), crcp); 499 } 500 501 static int decode_preamble(void *p, struct ceph_frame_desc *desc) 502 { 503 void *crcp = p + CEPH_PREAMBLE_LEN - CEPH_CRC_LEN; 504 u32 crc, expected_crc; 505 int i; 506 507 crc = crc32c(0, p, crcp - p); 508 expected_crc = get_unaligned_le32(crcp); 509 if (crc != expected_crc) { 510 pr_err("bad preamble crc, calculated %u, expected %u\n", 511 crc, expected_crc); 512 return -EBADMSG; 513 } 514 515 memset(desc, 0, sizeof(*desc)); 516 517 desc->fd_tag = ceph_decode_8(&p); 518 desc->fd_seg_cnt = ceph_decode_8(&p); 519 if (desc->fd_seg_cnt < 1 || 520 desc->fd_seg_cnt > CEPH_FRAME_MAX_SEGMENT_COUNT) { 521 pr_err("bad segment count %d\n", desc->fd_seg_cnt); 522 return -EINVAL; 523 } 524 for (i = 0; i < desc->fd_seg_cnt; i++) { 525 desc->fd_lens[i] = ceph_decode_32(&p); 526 desc->fd_aligns[i] = ceph_decode_16(&p); 527 } 528 529 if (desc->fd_lens[0] < 0 || 530 desc->fd_lens[0] > CEPH_MSG_MAX_CONTROL_LEN) { 531 pr_err("bad control segment length %d\n", desc->fd_lens[0]); 532 return -EINVAL; 533 } 534 if (desc->fd_lens[1] < 0 || 535 desc->fd_lens[1] > CEPH_MSG_MAX_FRONT_LEN) { 536 pr_err("bad front segment length %d\n", desc->fd_lens[1]); 537 return -EINVAL; 538 } 539 if (desc->fd_lens[2] < 0 || 540 desc->fd_lens[2] > CEPH_MSG_MAX_MIDDLE_LEN) { 541 pr_err("bad middle segment length %d\n", desc->fd_lens[2]); 542 return -EINVAL; 543 } 544 if (desc->fd_lens[3] < 0 || 545 desc->fd_lens[3] > CEPH_MSG_MAX_DATA_LEN) { 546 pr_err("bad data segment length %d\n", desc->fd_lens[3]); 547 return -EINVAL; 548 } 549 550 /* 551 * This would fire for FRAME_TAG_WAIT (it has one empty 552 * segment), but we should never get it as client. 553 */ 554 if (!desc->fd_lens[desc->fd_seg_cnt - 1]) { 555 pr_err("last segment empty, segment count %d\n", 556 desc->fd_seg_cnt); 557 return -EINVAL; 558 } 559 560 return 0; 561 } 562 563 static void encode_epilogue_plain(struct ceph_connection *con, bool aborted) 564 { 565 con->v2.out_epil.late_status = aborted ? FRAME_LATE_STATUS_ABORTED : 566 FRAME_LATE_STATUS_COMPLETE; 567 cpu_to_le32s(&con->v2.out_epil.front_crc); 568 cpu_to_le32s(&con->v2.out_epil.middle_crc); 569 cpu_to_le32s(&con->v2.out_epil.data_crc); 570 } 571 572 static void encode_epilogue_secure(struct ceph_connection *con, bool aborted) 573 { 574 memset(&con->v2.out_epil, 0, sizeof(con->v2.out_epil)); 575 con->v2.out_epil.late_status = aborted ? FRAME_LATE_STATUS_ABORTED : 576 FRAME_LATE_STATUS_COMPLETE; 577 } 578 579 static int decode_epilogue(void *p, u32 *front_crc, u32 *middle_crc, 580 u32 *data_crc) 581 { 582 u8 late_status; 583 584 late_status = ceph_decode_8(&p); 585 if ((late_status & FRAME_LATE_STATUS_ABORTED_MASK) != 586 FRAME_LATE_STATUS_COMPLETE) { 587 /* we should never get an aborted message as client */ 588 pr_err("bad late_status 0x%x\n", late_status); 589 return -EINVAL; 590 } 591 592 if (front_crc && middle_crc && data_crc) { 593 *front_crc = ceph_decode_32(&p); 594 *middle_crc = ceph_decode_32(&p); 595 *data_crc = ceph_decode_32(&p); 596 } 597 598 return 0; 599 } 600 601 static void fill_header(struct ceph_msg_header *hdr, 602 const struct ceph_msg_header2 *hdr2, 603 int front_len, int middle_len, int data_len, 604 const struct ceph_entity_name *peer_name) 605 { 606 hdr->seq = hdr2->seq; 607 hdr->tid = hdr2->tid; 608 hdr->type = hdr2->type; 609 hdr->priority = hdr2->priority; 610 hdr->version = hdr2->version; 611 hdr->front_len = cpu_to_le32(front_len); 612 hdr->middle_len = cpu_to_le32(middle_len); 613 hdr->data_len = cpu_to_le32(data_len); 614 hdr->data_off = hdr2->data_off; 615 hdr->src = *peer_name; 616 hdr->compat_version = hdr2->compat_version; 617 hdr->reserved = 0; 618 hdr->crc = 0; 619 } 620 621 static void fill_header2(struct ceph_msg_header2 *hdr2, 622 const struct ceph_msg_header *hdr, u64 ack_seq) 623 { 624 hdr2->seq = hdr->seq; 625 hdr2->tid = hdr->tid; 626 hdr2->type = hdr->type; 627 hdr2->priority = hdr->priority; 628 hdr2->version = hdr->version; 629 hdr2->data_pre_padding_len = 0; 630 hdr2->data_off = hdr->data_off; 631 hdr2->ack_seq = cpu_to_le64(ack_seq); 632 hdr2->flags = 0; 633 hdr2->compat_version = hdr->compat_version; 634 hdr2->reserved = 0; 635 } 636 637 static int verify_control_crc(struct ceph_connection *con) 638 { 639 int ctrl_len = con->v2.in_desc.fd_lens[0]; 640 u32 crc, expected_crc; 641 642 WARN_ON(con->v2.in_kvecs[0].iov_len != ctrl_len); 643 WARN_ON(con->v2.in_kvecs[1].iov_len != CEPH_CRC_LEN); 644 645 crc = crc32c(-1, con->v2.in_kvecs[0].iov_base, ctrl_len); 646 expected_crc = get_unaligned_le32(con->v2.in_kvecs[1].iov_base); 647 if (crc != expected_crc) { 648 pr_err("bad control crc, calculated %u, expected %u\n", 649 crc, expected_crc); 650 return -EBADMSG; 651 } 652 653 return 0; 654 } 655 656 static int verify_epilogue_crcs(struct ceph_connection *con, u32 front_crc, 657 u32 middle_crc, u32 data_crc) 658 { 659 if (front_len(con->in_msg)) { 660 con->in_front_crc = crc32c(-1, con->in_msg->front.iov_base, 661 front_len(con->in_msg)); 662 } else { 663 WARN_ON(!middle_len(con->in_msg) && !data_len(con->in_msg)); 664 con->in_front_crc = -1; 665 } 666 667 if (middle_len(con->in_msg)) 668 con->in_middle_crc = crc32c(-1, 669 con->in_msg->middle->vec.iov_base, 670 middle_len(con->in_msg)); 671 else if (data_len(con->in_msg)) 672 con->in_middle_crc = -1; 673 else 674 con->in_middle_crc = 0; 675 676 if (!data_len(con->in_msg)) 677 con->in_data_crc = 0; 678 679 dout("%s con %p msg %p crcs %u %u %u\n", __func__, con, con->in_msg, 680 con->in_front_crc, con->in_middle_crc, con->in_data_crc); 681 682 if (con->in_front_crc != front_crc) { 683 pr_err("bad front crc, calculated %u, expected %u\n", 684 con->in_front_crc, front_crc); 685 return -EBADMSG; 686 } 687 if (con->in_middle_crc != middle_crc) { 688 pr_err("bad middle crc, calculated %u, expected %u\n", 689 con->in_middle_crc, middle_crc); 690 return -EBADMSG; 691 } 692 if (con->in_data_crc != data_crc) { 693 pr_err("bad data crc, calculated %u, expected %u\n", 694 con->in_data_crc, data_crc); 695 return -EBADMSG; 696 } 697 698 return 0; 699 } 700 701 static int setup_crypto(struct ceph_connection *con, 702 const u8 *session_key, int session_key_len, 703 const u8 *con_secret, int con_secret_len) 704 { 705 unsigned int noio_flag; 706 int ret; 707 708 dout("%s con %p con_mode %d session_key_len %d con_secret_len %d\n", 709 __func__, con, con->v2.con_mode, session_key_len, con_secret_len); 710 WARN_ON(con->v2.hmac_tfm || con->v2.gcm_tfm || con->v2.gcm_req); 711 712 if (con->v2.con_mode != CEPH_CON_MODE_CRC && 713 con->v2.con_mode != CEPH_CON_MODE_SECURE) { 714 pr_err("bad con_mode %d\n", con->v2.con_mode); 715 return -EINVAL; 716 } 717 718 if (!session_key_len) { 719 WARN_ON(con->v2.con_mode != CEPH_CON_MODE_CRC); 720 WARN_ON(con_secret_len); 721 return 0; /* auth_none */ 722 } 723 724 noio_flag = memalloc_noio_save(); 725 con->v2.hmac_tfm = crypto_alloc_shash("hmac(sha256)", 0, 0); 726 memalloc_noio_restore(noio_flag); 727 if (IS_ERR(con->v2.hmac_tfm)) { 728 ret = PTR_ERR(con->v2.hmac_tfm); 729 con->v2.hmac_tfm = NULL; 730 pr_err("failed to allocate hmac tfm context: %d\n", ret); 731 return ret; 732 } 733 734 WARN_ON((unsigned long)session_key & 735 crypto_shash_alignmask(con->v2.hmac_tfm)); 736 ret = crypto_shash_setkey(con->v2.hmac_tfm, session_key, 737 session_key_len); 738 if (ret) { 739 pr_err("failed to set hmac key: %d\n", ret); 740 return ret; 741 } 742 743 if (con->v2.con_mode == CEPH_CON_MODE_CRC) { 744 WARN_ON(con_secret_len); 745 return 0; /* auth_x, plain mode */ 746 } 747 748 if (con_secret_len < CEPH_GCM_KEY_LEN + 2 * CEPH_GCM_IV_LEN) { 749 pr_err("con_secret too small %d\n", con_secret_len); 750 return -EINVAL; 751 } 752 753 noio_flag = memalloc_noio_save(); 754 con->v2.gcm_tfm = crypto_alloc_aead("gcm(aes)", 0, 0); 755 memalloc_noio_restore(noio_flag); 756 if (IS_ERR(con->v2.gcm_tfm)) { 757 ret = PTR_ERR(con->v2.gcm_tfm); 758 con->v2.gcm_tfm = NULL; 759 pr_err("failed to allocate gcm tfm context: %d\n", ret); 760 return ret; 761 } 762 763 WARN_ON((unsigned long)con_secret & 764 crypto_aead_alignmask(con->v2.gcm_tfm)); 765 ret = crypto_aead_setkey(con->v2.gcm_tfm, con_secret, CEPH_GCM_KEY_LEN); 766 if (ret) { 767 pr_err("failed to set gcm key: %d\n", ret); 768 return ret; 769 } 770 771 WARN_ON(crypto_aead_ivsize(con->v2.gcm_tfm) != CEPH_GCM_IV_LEN); 772 ret = crypto_aead_setauthsize(con->v2.gcm_tfm, CEPH_GCM_TAG_LEN); 773 if (ret) { 774 pr_err("failed to set gcm tag size: %d\n", ret); 775 return ret; 776 } 777 778 con->v2.gcm_req = aead_request_alloc(con->v2.gcm_tfm, GFP_NOIO); 779 if (!con->v2.gcm_req) { 780 pr_err("failed to allocate gcm request\n"); 781 return -ENOMEM; 782 } 783 784 crypto_init_wait(&con->v2.gcm_wait); 785 aead_request_set_callback(con->v2.gcm_req, CRYPTO_TFM_REQ_MAY_BACKLOG, 786 crypto_req_done, &con->v2.gcm_wait); 787 788 memcpy(&con->v2.in_gcm_nonce, con_secret + CEPH_GCM_KEY_LEN, 789 CEPH_GCM_IV_LEN); 790 memcpy(&con->v2.out_gcm_nonce, 791 con_secret + CEPH_GCM_KEY_LEN + CEPH_GCM_IV_LEN, 792 CEPH_GCM_IV_LEN); 793 return 0; /* auth_x, secure mode */ 794 } 795 796 static int hmac_sha256(struct ceph_connection *con, const struct kvec *kvecs, 797 int kvec_cnt, u8 *hmac) 798 { 799 SHASH_DESC_ON_STACK(desc, con->v2.hmac_tfm); /* tfm arg is ignored */ 800 int ret; 801 int i; 802 803 dout("%s con %p hmac_tfm %p kvec_cnt %d\n", __func__, con, 804 con->v2.hmac_tfm, kvec_cnt); 805 806 if (!con->v2.hmac_tfm) { 807 memset(hmac, 0, SHA256_DIGEST_SIZE); 808 return 0; /* auth_none */ 809 } 810 811 desc->tfm = con->v2.hmac_tfm; 812 ret = crypto_shash_init(desc); 813 if (ret) 814 goto out; 815 816 for (i = 0; i < kvec_cnt; i++) { 817 WARN_ON((unsigned long)kvecs[i].iov_base & 818 crypto_shash_alignmask(con->v2.hmac_tfm)); 819 ret = crypto_shash_update(desc, kvecs[i].iov_base, 820 kvecs[i].iov_len); 821 if (ret) 822 goto out; 823 } 824 825 ret = crypto_shash_final(desc, hmac); 826 827 out: 828 shash_desc_zero(desc); 829 return ret; /* auth_x, both plain and secure modes */ 830 } 831 832 static void gcm_inc_nonce(struct ceph_gcm_nonce *nonce) 833 { 834 u64 counter; 835 836 counter = le64_to_cpu(nonce->counter); 837 nonce->counter = cpu_to_le64(counter + 1); 838 } 839 840 static int gcm_crypt(struct ceph_connection *con, bool encrypt, 841 struct scatterlist *src, struct scatterlist *dst, 842 int src_len) 843 { 844 struct ceph_gcm_nonce *nonce; 845 int ret; 846 847 nonce = encrypt ? &con->v2.out_gcm_nonce : &con->v2.in_gcm_nonce; 848 849 aead_request_set_ad(con->v2.gcm_req, 0); /* no AAD */ 850 aead_request_set_crypt(con->v2.gcm_req, src, dst, src_len, (u8 *)nonce); 851 ret = crypto_wait_req(encrypt ? crypto_aead_encrypt(con->v2.gcm_req) : 852 crypto_aead_decrypt(con->v2.gcm_req), 853 &con->v2.gcm_wait); 854 if (ret) 855 return ret; 856 857 gcm_inc_nonce(nonce); 858 return 0; 859 } 860 861 static void get_bvec_at(struct ceph_msg_data_cursor *cursor, 862 struct bio_vec *bv) 863 { 864 struct page *page; 865 size_t off, len; 866 867 WARN_ON(!cursor->total_resid); 868 869 /* skip zero-length data items */ 870 while (!cursor->resid) 871 ceph_msg_data_advance(cursor, 0); 872 873 /* get a piece of data, cursor isn't advanced */ 874 page = ceph_msg_data_next(cursor, &off, &len); 875 bvec_set_page(bv, page, len, off); 876 } 877 878 static int calc_sg_cnt(void *buf, int buf_len) 879 { 880 int sg_cnt; 881 882 if (!buf_len) 883 return 0; 884 885 sg_cnt = need_padding(buf_len) ? 1 : 0; 886 if (is_vmalloc_addr(buf)) { 887 WARN_ON(offset_in_page(buf)); 888 sg_cnt += PAGE_ALIGN(buf_len) >> PAGE_SHIFT; 889 } else { 890 sg_cnt++; 891 } 892 893 return sg_cnt; 894 } 895 896 static int calc_sg_cnt_cursor(struct ceph_msg_data_cursor *cursor) 897 { 898 int data_len = cursor->total_resid; 899 struct bio_vec bv; 900 int sg_cnt; 901 902 if (!data_len) 903 return 0; 904 905 sg_cnt = need_padding(data_len) ? 1 : 0; 906 do { 907 get_bvec_at(cursor, &bv); 908 sg_cnt++; 909 910 ceph_msg_data_advance(cursor, bv.bv_len); 911 } while (cursor->total_resid); 912 913 return sg_cnt; 914 } 915 916 static void init_sgs(struct scatterlist **sg, void *buf, int buf_len, u8 *pad) 917 { 918 void *end = buf + buf_len; 919 struct page *page; 920 int len; 921 void *p; 922 923 if (!buf_len) 924 return; 925 926 if (is_vmalloc_addr(buf)) { 927 p = buf; 928 do { 929 page = vmalloc_to_page(p); 930 len = min_t(int, end - p, PAGE_SIZE); 931 WARN_ON(!page || !len || offset_in_page(p)); 932 sg_set_page(*sg, page, len, 0); 933 *sg = sg_next(*sg); 934 p += len; 935 } while (p != end); 936 } else { 937 sg_set_buf(*sg, buf, buf_len); 938 *sg = sg_next(*sg); 939 } 940 941 if (need_padding(buf_len)) { 942 sg_set_buf(*sg, pad, padding_len(buf_len)); 943 *sg = sg_next(*sg); 944 } 945 } 946 947 static void init_sgs_cursor(struct scatterlist **sg, 948 struct ceph_msg_data_cursor *cursor, u8 *pad) 949 { 950 int data_len = cursor->total_resid; 951 struct bio_vec bv; 952 953 if (!data_len) 954 return; 955 956 do { 957 get_bvec_at(cursor, &bv); 958 sg_set_page(*sg, bv.bv_page, bv.bv_len, bv.bv_offset); 959 *sg = sg_next(*sg); 960 961 ceph_msg_data_advance(cursor, bv.bv_len); 962 } while (cursor->total_resid); 963 964 if (need_padding(data_len)) { 965 sg_set_buf(*sg, pad, padding_len(data_len)); 966 *sg = sg_next(*sg); 967 } 968 } 969 970 static int setup_message_sgs(struct sg_table *sgt, struct ceph_msg *msg, 971 u8 *front_pad, u8 *middle_pad, u8 *data_pad, 972 void *epilogue, bool add_tag) 973 { 974 struct ceph_msg_data_cursor cursor; 975 struct scatterlist *cur_sg; 976 int sg_cnt; 977 int ret; 978 979 if (!front_len(msg) && !middle_len(msg) && !data_len(msg)) 980 return 0; 981 982 sg_cnt = 1; /* epilogue + [auth tag] */ 983 if (front_len(msg)) 984 sg_cnt += calc_sg_cnt(msg->front.iov_base, 985 front_len(msg)); 986 if (middle_len(msg)) 987 sg_cnt += calc_sg_cnt(msg->middle->vec.iov_base, 988 middle_len(msg)); 989 if (data_len(msg)) { 990 ceph_msg_data_cursor_init(&cursor, msg, data_len(msg)); 991 sg_cnt += calc_sg_cnt_cursor(&cursor); 992 } 993 994 ret = sg_alloc_table(sgt, sg_cnt, GFP_NOIO); 995 if (ret) 996 return ret; 997 998 cur_sg = sgt->sgl; 999 if (front_len(msg)) 1000 init_sgs(&cur_sg, msg->front.iov_base, front_len(msg), 1001 front_pad); 1002 if (middle_len(msg)) 1003 init_sgs(&cur_sg, msg->middle->vec.iov_base, middle_len(msg), 1004 middle_pad); 1005 if (data_len(msg)) { 1006 ceph_msg_data_cursor_init(&cursor, msg, data_len(msg)); 1007 init_sgs_cursor(&cur_sg, &cursor, data_pad); 1008 } 1009 1010 WARN_ON(!sg_is_last(cur_sg)); 1011 sg_set_buf(cur_sg, epilogue, 1012 CEPH_GCM_BLOCK_LEN + (add_tag ? CEPH_GCM_TAG_LEN : 0)); 1013 return 0; 1014 } 1015 1016 static int decrypt_preamble(struct ceph_connection *con) 1017 { 1018 struct scatterlist sg; 1019 1020 sg_init_one(&sg, con->v2.in_buf, CEPH_PREAMBLE_SECURE_LEN); 1021 return gcm_crypt(con, false, &sg, &sg, CEPH_PREAMBLE_SECURE_LEN); 1022 } 1023 1024 static int decrypt_control_remainder(struct ceph_connection *con) 1025 { 1026 int ctrl_len = con->v2.in_desc.fd_lens[0]; 1027 int rem_len = ctrl_len - CEPH_PREAMBLE_INLINE_LEN; 1028 int pt_len = padding_len(rem_len) + CEPH_GCM_TAG_LEN; 1029 struct scatterlist sgs[2]; 1030 1031 WARN_ON(con->v2.in_kvecs[0].iov_len != rem_len); 1032 WARN_ON(con->v2.in_kvecs[1].iov_len != pt_len); 1033 1034 sg_init_table(sgs, 2); 1035 sg_set_buf(&sgs[0], con->v2.in_kvecs[0].iov_base, rem_len); 1036 sg_set_buf(&sgs[1], con->v2.in_buf, pt_len); 1037 1038 return gcm_crypt(con, false, sgs, sgs, 1039 padded_len(rem_len) + CEPH_GCM_TAG_LEN); 1040 } 1041 1042 static int decrypt_tail(struct ceph_connection *con) 1043 { 1044 struct sg_table enc_sgt = {}; 1045 struct sg_table sgt = {}; 1046 int tail_len; 1047 int ret; 1048 1049 tail_len = tail_onwire_len(con->in_msg, true); 1050 ret = sg_alloc_table_from_pages(&enc_sgt, con->v2.in_enc_pages, 1051 con->v2.in_enc_page_cnt, 0, tail_len, 1052 GFP_NOIO); 1053 if (ret) 1054 goto out; 1055 1056 ret = setup_message_sgs(&sgt, con->in_msg, FRONT_PAD(con->v2.in_buf), 1057 MIDDLE_PAD(con->v2.in_buf), DATA_PAD(con->v2.in_buf), 1058 con->v2.in_buf, true); 1059 if (ret) 1060 goto out; 1061 1062 dout("%s con %p msg %p enc_page_cnt %d sg_cnt %d\n", __func__, con, 1063 con->in_msg, con->v2.in_enc_page_cnt, sgt.orig_nents); 1064 ret = gcm_crypt(con, false, enc_sgt.sgl, sgt.sgl, tail_len); 1065 if (ret) 1066 goto out; 1067 1068 WARN_ON(!con->v2.in_enc_page_cnt); 1069 ceph_release_page_vector(con->v2.in_enc_pages, 1070 con->v2.in_enc_page_cnt); 1071 con->v2.in_enc_pages = NULL; 1072 con->v2.in_enc_page_cnt = 0; 1073 1074 out: 1075 sg_free_table(&sgt); 1076 sg_free_table(&enc_sgt); 1077 return ret; 1078 } 1079 1080 static int prepare_banner(struct ceph_connection *con) 1081 { 1082 int buf_len = CEPH_BANNER_V2_LEN + 2 + 8 + 8; 1083 void *buf, *p; 1084 1085 buf = alloc_conn_buf(con, buf_len); 1086 if (!buf) 1087 return -ENOMEM; 1088 1089 p = buf; 1090 ceph_encode_copy(&p, CEPH_BANNER_V2, CEPH_BANNER_V2_LEN); 1091 ceph_encode_16(&p, sizeof(u64) + sizeof(u64)); 1092 ceph_encode_64(&p, CEPH_MSGR2_SUPPORTED_FEATURES); 1093 ceph_encode_64(&p, CEPH_MSGR2_REQUIRED_FEATURES); 1094 WARN_ON(p != buf + buf_len); 1095 1096 add_out_kvec(con, buf, buf_len); 1097 add_out_sign_kvec(con, buf, buf_len); 1098 ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING); 1099 return 0; 1100 } 1101 1102 /* 1103 * base: 1104 * preamble 1105 * control body (ctrl_len bytes) 1106 * space for control crc 1107 * 1108 * extdata (optional): 1109 * control body (extdata_len bytes) 1110 * 1111 * Compute control crc and gather base and extdata into: 1112 * 1113 * preamble 1114 * control body (ctrl_len + extdata_len bytes) 1115 * control crc 1116 * 1117 * Preamble should already be encoded at the start of base. 1118 */ 1119 static void prepare_head_plain(struct ceph_connection *con, void *base, 1120 int ctrl_len, void *extdata, int extdata_len, 1121 bool to_be_signed) 1122 { 1123 int base_len = CEPH_PREAMBLE_LEN + ctrl_len + CEPH_CRC_LEN; 1124 void *crcp = base + base_len - CEPH_CRC_LEN; 1125 u32 crc; 1126 1127 crc = crc32c(-1, CTRL_BODY(base), ctrl_len); 1128 if (extdata_len) 1129 crc = crc32c(crc, extdata, extdata_len); 1130 put_unaligned_le32(crc, crcp); 1131 1132 if (!extdata_len) { 1133 add_out_kvec(con, base, base_len); 1134 if (to_be_signed) 1135 add_out_sign_kvec(con, base, base_len); 1136 return; 1137 } 1138 1139 add_out_kvec(con, base, crcp - base); 1140 add_out_kvec(con, extdata, extdata_len); 1141 add_out_kvec(con, crcp, CEPH_CRC_LEN); 1142 if (to_be_signed) { 1143 add_out_sign_kvec(con, base, crcp - base); 1144 add_out_sign_kvec(con, extdata, extdata_len); 1145 add_out_sign_kvec(con, crcp, CEPH_CRC_LEN); 1146 } 1147 } 1148 1149 static int prepare_head_secure_small(struct ceph_connection *con, 1150 void *base, int ctrl_len) 1151 { 1152 struct scatterlist sg; 1153 int ret; 1154 1155 /* inline buffer padding? */ 1156 if (ctrl_len < CEPH_PREAMBLE_INLINE_LEN) 1157 memset(CTRL_BODY(base) + ctrl_len, 0, 1158 CEPH_PREAMBLE_INLINE_LEN - ctrl_len); 1159 1160 sg_init_one(&sg, base, CEPH_PREAMBLE_SECURE_LEN); 1161 ret = gcm_crypt(con, true, &sg, &sg, 1162 CEPH_PREAMBLE_SECURE_LEN - CEPH_GCM_TAG_LEN); 1163 if (ret) 1164 return ret; 1165 1166 add_out_kvec(con, base, CEPH_PREAMBLE_SECURE_LEN); 1167 return 0; 1168 } 1169 1170 /* 1171 * base: 1172 * preamble 1173 * control body (ctrl_len bytes) 1174 * space for padding, if needed 1175 * space for control remainder auth tag 1176 * space for preamble auth tag 1177 * 1178 * Encrypt preamble and the inline portion, then encrypt the remainder 1179 * and gather into: 1180 * 1181 * preamble 1182 * control body (48 bytes) 1183 * preamble auth tag 1184 * control body (ctrl_len - 48 bytes) 1185 * zero padding, if needed 1186 * control remainder auth tag 1187 * 1188 * Preamble should already be encoded at the start of base. 1189 */ 1190 static int prepare_head_secure_big(struct ceph_connection *con, 1191 void *base, int ctrl_len) 1192 { 1193 int rem_len = ctrl_len - CEPH_PREAMBLE_INLINE_LEN; 1194 void *rem = CTRL_BODY(base) + CEPH_PREAMBLE_INLINE_LEN; 1195 void *rem_tag = rem + padded_len(rem_len); 1196 void *pmbl_tag = rem_tag + CEPH_GCM_TAG_LEN; 1197 struct scatterlist sgs[2]; 1198 int ret; 1199 1200 sg_init_table(sgs, 2); 1201 sg_set_buf(&sgs[0], base, rem - base); 1202 sg_set_buf(&sgs[1], pmbl_tag, CEPH_GCM_TAG_LEN); 1203 ret = gcm_crypt(con, true, sgs, sgs, rem - base); 1204 if (ret) 1205 return ret; 1206 1207 /* control remainder padding? */ 1208 if (need_padding(rem_len)) 1209 memset(rem + rem_len, 0, padding_len(rem_len)); 1210 1211 sg_init_one(&sgs[0], rem, pmbl_tag - rem); 1212 ret = gcm_crypt(con, true, sgs, sgs, rem_tag - rem); 1213 if (ret) 1214 return ret; 1215 1216 add_out_kvec(con, base, rem - base); 1217 add_out_kvec(con, pmbl_tag, CEPH_GCM_TAG_LEN); 1218 add_out_kvec(con, rem, pmbl_tag - rem); 1219 return 0; 1220 } 1221 1222 static int __prepare_control(struct ceph_connection *con, int tag, 1223 void *base, int ctrl_len, void *extdata, 1224 int extdata_len, bool to_be_signed) 1225 { 1226 int total_len = ctrl_len + extdata_len; 1227 struct ceph_frame_desc desc; 1228 int ret; 1229 1230 dout("%s con %p tag %d len %d (%d+%d)\n", __func__, con, tag, 1231 total_len, ctrl_len, extdata_len); 1232 1233 /* extdata may be vmalloc'ed but not base */ 1234 if (WARN_ON(is_vmalloc_addr(base) || !ctrl_len)) 1235 return -EINVAL; 1236 1237 init_frame_desc(&desc, tag, &total_len, 1); 1238 encode_preamble(&desc, base); 1239 1240 if (con_secure(con)) { 1241 if (WARN_ON(extdata_len || to_be_signed)) 1242 return -EINVAL; 1243 1244 if (ctrl_len <= CEPH_PREAMBLE_INLINE_LEN) 1245 /* fully inlined, inline buffer may need padding */ 1246 ret = prepare_head_secure_small(con, base, ctrl_len); 1247 else 1248 /* partially inlined, inline buffer is full */ 1249 ret = prepare_head_secure_big(con, base, ctrl_len); 1250 if (ret) 1251 return ret; 1252 } else { 1253 prepare_head_plain(con, base, ctrl_len, extdata, extdata_len, 1254 to_be_signed); 1255 } 1256 1257 ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING); 1258 return 0; 1259 } 1260 1261 static int prepare_control(struct ceph_connection *con, int tag, 1262 void *base, int ctrl_len) 1263 { 1264 return __prepare_control(con, tag, base, ctrl_len, NULL, 0, false); 1265 } 1266 1267 static int prepare_hello(struct ceph_connection *con) 1268 { 1269 void *buf, *p; 1270 int ctrl_len; 1271 1272 ctrl_len = 1 + ceph_entity_addr_encoding_len(&con->peer_addr); 1273 buf = alloc_conn_buf(con, head_onwire_len(ctrl_len, false)); 1274 if (!buf) 1275 return -ENOMEM; 1276 1277 p = CTRL_BODY(buf); 1278 ceph_encode_8(&p, CEPH_ENTITY_TYPE_CLIENT); 1279 ceph_encode_entity_addr(&p, &con->peer_addr); 1280 WARN_ON(p != CTRL_BODY(buf) + ctrl_len); 1281 1282 return __prepare_control(con, FRAME_TAG_HELLO, buf, ctrl_len, 1283 NULL, 0, true); 1284 } 1285 1286 /* so that head_onwire_len(AUTH_BUF_LEN, false) is 512 */ 1287 #define AUTH_BUF_LEN (512 - CEPH_CRC_LEN - CEPH_PREAMBLE_PLAIN_LEN) 1288 1289 static int prepare_auth_request(struct ceph_connection *con) 1290 { 1291 void *authorizer, *authorizer_copy; 1292 int ctrl_len, authorizer_len; 1293 void *buf; 1294 int ret; 1295 1296 ctrl_len = AUTH_BUF_LEN; 1297 buf = alloc_conn_buf(con, head_onwire_len(ctrl_len, false)); 1298 if (!buf) 1299 return -ENOMEM; 1300 1301 mutex_unlock(&con->mutex); 1302 ret = con->ops->get_auth_request(con, CTRL_BODY(buf), &ctrl_len, 1303 &authorizer, &authorizer_len); 1304 mutex_lock(&con->mutex); 1305 if (con->state != CEPH_CON_S_V2_HELLO) { 1306 dout("%s con %p state changed to %d\n", __func__, con, 1307 con->state); 1308 return -EAGAIN; 1309 } 1310 1311 dout("%s con %p get_auth_request ret %d\n", __func__, con, ret); 1312 if (ret) 1313 return ret; 1314 1315 authorizer_copy = alloc_conn_buf(con, authorizer_len); 1316 if (!authorizer_copy) 1317 return -ENOMEM; 1318 1319 memcpy(authorizer_copy, authorizer, authorizer_len); 1320 1321 return __prepare_control(con, FRAME_TAG_AUTH_REQUEST, buf, ctrl_len, 1322 authorizer_copy, authorizer_len, true); 1323 } 1324 1325 static int prepare_auth_request_more(struct ceph_connection *con, 1326 void *reply, int reply_len) 1327 { 1328 int ctrl_len, authorizer_len; 1329 void *authorizer; 1330 void *buf; 1331 int ret; 1332 1333 ctrl_len = AUTH_BUF_LEN; 1334 buf = alloc_conn_buf(con, head_onwire_len(ctrl_len, false)); 1335 if (!buf) 1336 return -ENOMEM; 1337 1338 mutex_unlock(&con->mutex); 1339 ret = con->ops->handle_auth_reply_more(con, reply, reply_len, 1340 CTRL_BODY(buf), &ctrl_len, 1341 &authorizer, &authorizer_len); 1342 mutex_lock(&con->mutex); 1343 if (con->state != CEPH_CON_S_V2_AUTH) { 1344 dout("%s con %p state changed to %d\n", __func__, con, 1345 con->state); 1346 return -EAGAIN; 1347 } 1348 1349 dout("%s con %p handle_auth_reply_more ret %d\n", __func__, con, ret); 1350 if (ret) 1351 return ret; 1352 1353 return __prepare_control(con, FRAME_TAG_AUTH_REQUEST_MORE, buf, 1354 ctrl_len, authorizer, authorizer_len, true); 1355 } 1356 1357 static int prepare_auth_signature(struct ceph_connection *con) 1358 { 1359 void *buf; 1360 int ret; 1361 1362 buf = alloc_conn_buf(con, head_onwire_len(SHA256_DIGEST_SIZE, 1363 con_secure(con))); 1364 if (!buf) 1365 return -ENOMEM; 1366 1367 ret = hmac_sha256(con, con->v2.in_sign_kvecs, con->v2.in_sign_kvec_cnt, 1368 CTRL_BODY(buf)); 1369 if (ret) 1370 return ret; 1371 1372 return prepare_control(con, FRAME_TAG_AUTH_SIGNATURE, buf, 1373 SHA256_DIGEST_SIZE); 1374 } 1375 1376 static int prepare_client_ident(struct ceph_connection *con) 1377 { 1378 struct ceph_entity_addr *my_addr = &con->msgr->inst.addr; 1379 struct ceph_client *client = from_msgr(con->msgr); 1380 u64 global_id = ceph_client_gid(client); 1381 void *buf, *p; 1382 int ctrl_len; 1383 1384 WARN_ON(con->v2.server_cookie); 1385 WARN_ON(con->v2.connect_seq); 1386 WARN_ON(con->v2.peer_global_seq); 1387 1388 if (!con->v2.client_cookie) { 1389 do { 1390 get_random_bytes(&con->v2.client_cookie, 1391 sizeof(con->v2.client_cookie)); 1392 } while (!con->v2.client_cookie); 1393 dout("%s con %p generated cookie 0x%llx\n", __func__, con, 1394 con->v2.client_cookie); 1395 } else { 1396 dout("%s con %p cookie already set 0x%llx\n", __func__, con, 1397 con->v2.client_cookie); 1398 } 1399 1400 dout("%s con %p my_addr %s/%u peer_addr %s/%u global_id %llu global_seq %llu features 0x%llx required_features 0x%llx cookie 0x%llx\n", 1401 __func__, con, ceph_pr_addr(my_addr), le32_to_cpu(my_addr->nonce), 1402 ceph_pr_addr(&con->peer_addr), le32_to_cpu(con->peer_addr.nonce), 1403 global_id, con->v2.global_seq, client->supported_features, 1404 client->required_features, con->v2.client_cookie); 1405 1406 ctrl_len = 1 + 4 + ceph_entity_addr_encoding_len(my_addr) + 1407 ceph_entity_addr_encoding_len(&con->peer_addr) + 6 * 8; 1408 buf = alloc_conn_buf(con, head_onwire_len(ctrl_len, con_secure(con))); 1409 if (!buf) 1410 return -ENOMEM; 1411 1412 p = CTRL_BODY(buf); 1413 ceph_encode_8(&p, 2); /* addrvec marker */ 1414 ceph_encode_32(&p, 1); /* addr_cnt */ 1415 ceph_encode_entity_addr(&p, my_addr); 1416 ceph_encode_entity_addr(&p, &con->peer_addr); 1417 ceph_encode_64(&p, global_id); 1418 ceph_encode_64(&p, con->v2.global_seq); 1419 ceph_encode_64(&p, client->supported_features); 1420 ceph_encode_64(&p, client->required_features); 1421 ceph_encode_64(&p, 0); /* flags */ 1422 ceph_encode_64(&p, con->v2.client_cookie); 1423 WARN_ON(p != CTRL_BODY(buf) + ctrl_len); 1424 1425 return prepare_control(con, FRAME_TAG_CLIENT_IDENT, buf, ctrl_len); 1426 } 1427 1428 static int prepare_session_reconnect(struct ceph_connection *con) 1429 { 1430 struct ceph_entity_addr *my_addr = &con->msgr->inst.addr; 1431 void *buf, *p; 1432 int ctrl_len; 1433 1434 WARN_ON(!con->v2.client_cookie); 1435 WARN_ON(!con->v2.server_cookie); 1436 WARN_ON(!con->v2.connect_seq); 1437 WARN_ON(!con->v2.peer_global_seq); 1438 1439 dout("%s con %p my_addr %s/%u client_cookie 0x%llx server_cookie 0x%llx global_seq %llu connect_seq %llu in_seq %llu\n", 1440 __func__, con, ceph_pr_addr(my_addr), le32_to_cpu(my_addr->nonce), 1441 con->v2.client_cookie, con->v2.server_cookie, con->v2.global_seq, 1442 con->v2.connect_seq, con->in_seq); 1443 1444 ctrl_len = 1 + 4 + ceph_entity_addr_encoding_len(my_addr) + 5 * 8; 1445 buf = alloc_conn_buf(con, head_onwire_len(ctrl_len, con_secure(con))); 1446 if (!buf) 1447 return -ENOMEM; 1448 1449 p = CTRL_BODY(buf); 1450 ceph_encode_8(&p, 2); /* entity_addrvec_t marker */ 1451 ceph_encode_32(&p, 1); /* my_addrs len */ 1452 ceph_encode_entity_addr(&p, my_addr); 1453 ceph_encode_64(&p, con->v2.client_cookie); 1454 ceph_encode_64(&p, con->v2.server_cookie); 1455 ceph_encode_64(&p, con->v2.global_seq); 1456 ceph_encode_64(&p, con->v2.connect_seq); 1457 ceph_encode_64(&p, con->in_seq); 1458 WARN_ON(p != CTRL_BODY(buf) + ctrl_len); 1459 1460 return prepare_control(con, FRAME_TAG_SESSION_RECONNECT, buf, ctrl_len); 1461 } 1462 1463 static int prepare_keepalive2(struct ceph_connection *con) 1464 { 1465 struct ceph_timespec *ts = CTRL_BODY(con->v2.out_buf); 1466 struct timespec64 now; 1467 1468 ktime_get_real_ts64(&now); 1469 dout("%s con %p timestamp %lld.%09ld\n", __func__, con, now.tv_sec, 1470 now.tv_nsec); 1471 1472 ceph_encode_timespec64(ts, &now); 1473 1474 reset_out_kvecs(con); 1475 return prepare_control(con, FRAME_TAG_KEEPALIVE2, con->v2.out_buf, 1476 sizeof(struct ceph_timespec)); 1477 } 1478 1479 static int prepare_ack(struct ceph_connection *con) 1480 { 1481 void *p; 1482 1483 dout("%s con %p in_seq_acked %llu -> %llu\n", __func__, con, 1484 con->in_seq_acked, con->in_seq); 1485 con->in_seq_acked = con->in_seq; 1486 1487 p = CTRL_BODY(con->v2.out_buf); 1488 ceph_encode_64(&p, con->in_seq_acked); 1489 1490 reset_out_kvecs(con); 1491 return prepare_control(con, FRAME_TAG_ACK, con->v2.out_buf, 8); 1492 } 1493 1494 static void prepare_epilogue_plain(struct ceph_connection *con, bool aborted) 1495 { 1496 dout("%s con %p msg %p aborted %d crcs %u %u %u\n", __func__, con, 1497 con->out_msg, aborted, con->v2.out_epil.front_crc, 1498 con->v2.out_epil.middle_crc, con->v2.out_epil.data_crc); 1499 1500 encode_epilogue_plain(con, aborted); 1501 add_out_kvec(con, &con->v2.out_epil, CEPH_EPILOGUE_PLAIN_LEN); 1502 } 1503 1504 /* 1505 * For "used" empty segments, crc is -1. For unused (trailing) 1506 * segments, crc is 0. 1507 */ 1508 static void prepare_message_plain(struct ceph_connection *con) 1509 { 1510 struct ceph_msg *msg = con->out_msg; 1511 1512 prepare_head_plain(con, con->v2.out_buf, 1513 sizeof(struct ceph_msg_header2), NULL, 0, false); 1514 1515 if (!front_len(msg) && !middle_len(msg)) { 1516 if (!data_len(msg)) { 1517 /* 1518 * Empty message: once the head is written, 1519 * we are done -- there is no epilogue. 1520 */ 1521 con->v2.out_state = OUT_S_FINISH_MESSAGE; 1522 return; 1523 } 1524 1525 con->v2.out_epil.front_crc = -1; 1526 con->v2.out_epil.middle_crc = -1; 1527 con->v2.out_state = OUT_S_QUEUE_DATA; 1528 return; 1529 } 1530 1531 if (front_len(msg)) { 1532 con->v2.out_epil.front_crc = crc32c(-1, msg->front.iov_base, 1533 front_len(msg)); 1534 add_out_kvec(con, msg->front.iov_base, front_len(msg)); 1535 } else { 1536 /* middle (at least) is there, checked above */ 1537 con->v2.out_epil.front_crc = -1; 1538 } 1539 1540 if (middle_len(msg)) { 1541 con->v2.out_epil.middle_crc = 1542 crc32c(-1, msg->middle->vec.iov_base, middle_len(msg)); 1543 add_out_kvec(con, msg->middle->vec.iov_base, middle_len(msg)); 1544 } else { 1545 con->v2.out_epil.middle_crc = data_len(msg) ? -1 : 0; 1546 } 1547 1548 if (data_len(msg)) { 1549 con->v2.out_state = OUT_S_QUEUE_DATA; 1550 } else { 1551 con->v2.out_epil.data_crc = 0; 1552 prepare_epilogue_plain(con, false); 1553 con->v2.out_state = OUT_S_FINISH_MESSAGE; 1554 } 1555 } 1556 1557 /* 1558 * Unfortunately the kernel crypto API doesn't support streaming 1559 * (piecewise) operation for AEAD algorithms, so we can't get away 1560 * with a fixed size buffer and a couple sgs. Instead, we have to 1561 * allocate pages for the entire tail of the message (currently up 1562 * to ~32M) and two sgs arrays (up to ~256K each)... 1563 */ 1564 static int prepare_message_secure(struct ceph_connection *con) 1565 { 1566 void *zerop = page_address(ceph_zero_page); 1567 struct sg_table enc_sgt = {}; 1568 struct sg_table sgt = {}; 1569 struct page **enc_pages; 1570 int enc_page_cnt; 1571 int tail_len; 1572 int ret; 1573 1574 ret = prepare_head_secure_small(con, con->v2.out_buf, 1575 sizeof(struct ceph_msg_header2)); 1576 if (ret) 1577 return ret; 1578 1579 tail_len = tail_onwire_len(con->out_msg, true); 1580 if (!tail_len) { 1581 /* 1582 * Empty message: once the head is written, 1583 * we are done -- there is no epilogue. 1584 */ 1585 con->v2.out_state = OUT_S_FINISH_MESSAGE; 1586 return 0; 1587 } 1588 1589 encode_epilogue_secure(con, false); 1590 ret = setup_message_sgs(&sgt, con->out_msg, zerop, zerop, zerop, 1591 &con->v2.out_epil, false); 1592 if (ret) 1593 goto out; 1594 1595 enc_page_cnt = calc_pages_for(0, tail_len); 1596 enc_pages = ceph_alloc_page_vector(enc_page_cnt, GFP_NOIO); 1597 if (IS_ERR(enc_pages)) { 1598 ret = PTR_ERR(enc_pages); 1599 goto out; 1600 } 1601 1602 WARN_ON(con->v2.out_enc_pages || con->v2.out_enc_page_cnt); 1603 con->v2.out_enc_pages = enc_pages; 1604 con->v2.out_enc_page_cnt = enc_page_cnt; 1605 con->v2.out_enc_resid = tail_len; 1606 con->v2.out_enc_i = 0; 1607 1608 ret = sg_alloc_table_from_pages(&enc_sgt, enc_pages, enc_page_cnt, 1609 0, tail_len, GFP_NOIO); 1610 if (ret) 1611 goto out; 1612 1613 ret = gcm_crypt(con, true, sgt.sgl, enc_sgt.sgl, 1614 tail_len - CEPH_GCM_TAG_LEN); 1615 if (ret) 1616 goto out; 1617 1618 dout("%s con %p msg %p sg_cnt %d enc_page_cnt %d\n", __func__, con, 1619 con->out_msg, sgt.orig_nents, enc_page_cnt); 1620 con->v2.out_state = OUT_S_QUEUE_ENC_PAGE; 1621 1622 out: 1623 sg_free_table(&sgt); 1624 sg_free_table(&enc_sgt); 1625 return ret; 1626 } 1627 1628 static int prepare_message(struct ceph_connection *con) 1629 { 1630 int lens[] = { 1631 sizeof(struct ceph_msg_header2), 1632 front_len(con->out_msg), 1633 middle_len(con->out_msg), 1634 data_len(con->out_msg) 1635 }; 1636 struct ceph_frame_desc desc; 1637 int ret; 1638 1639 dout("%s con %p msg %p logical %d+%d+%d+%d\n", __func__, con, 1640 con->out_msg, lens[0], lens[1], lens[2], lens[3]); 1641 1642 if (con->in_seq > con->in_seq_acked) { 1643 dout("%s con %p in_seq_acked %llu -> %llu\n", __func__, con, 1644 con->in_seq_acked, con->in_seq); 1645 con->in_seq_acked = con->in_seq; 1646 } 1647 1648 reset_out_kvecs(con); 1649 init_frame_desc(&desc, FRAME_TAG_MESSAGE, lens, 4); 1650 encode_preamble(&desc, con->v2.out_buf); 1651 fill_header2(CTRL_BODY(con->v2.out_buf), &con->out_msg->hdr, 1652 con->in_seq_acked); 1653 1654 if (con_secure(con)) { 1655 ret = prepare_message_secure(con); 1656 if (ret) 1657 return ret; 1658 } else { 1659 prepare_message_plain(con); 1660 } 1661 1662 ceph_con_flag_set(con, CEPH_CON_F_WRITE_PENDING); 1663 return 0; 1664 } 1665 1666 static int prepare_read_banner_prefix(struct ceph_connection *con) 1667 { 1668 void *buf; 1669 1670 buf = alloc_conn_buf(con, CEPH_BANNER_V2_PREFIX_LEN); 1671 if (!buf) 1672 return -ENOMEM; 1673 1674 reset_in_kvecs(con); 1675 add_in_kvec(con, buf, CEPH_BANNER_V2_PREFIX_LEN); 1676 add_in_sign_kvec(con, buf, CEPH_BANNER_V2_PREFIX_LEN); 1677 con->state = CEPH_CON_S_V2_BANNER_PREFIX; 1678 return 0; 1679 } 1680 1681 static int prepare_read_banner_payload(struct ceph_connection *con, 1682 int payload_len) 1683 { 1684 void *buf; 1685 1686 buf = alloc_conn_buf(con, payload_len); 1687 if (!buf) 1688 return -ENOMEM; 1689 1690 reset_in_kvecs(con); 1691 add_in_kvec(con, buf, payload_len); 1692 add_in_sign_kvec(con, buf, payload_len); 1693 con->state = CEPH_CON_S_V2_BANNER_PAYLOAD; 1694 return 0; 1695 } 1696 1697 static void prepare_read_preamble(struct ceph_connection *con) 1698 { 1699 reset_in_kvecs(con); 1700 add_in_kvec(con, con->v2.in_buf, 1701 con_secure(con) ? CEPH_PREAMBLE_SECURE_LEN : 1702 CEPH_PREAMBLE_PLAIN_LEN); 1703 con->v2.in_state = IN_S_HANDLE_PREAMBLE; 1704 } 1705 1706 static int prepare_read_control(struct ceph_connection *con) 1707 { 1708 int ctrl_len = con->v2.in_desc.fd_lens[0]; 1709 int head_len; 1710 void *buf; 1711 1712 reset_in_kvecs(con); 1713 if (con->state == CEPH_CON_S_V2_HELLO || 1714 con->state == CEPH_CON_S_V2_AUTH) { 1715 head_len = head_onwire_len(ctrl_len, false); 1716 buf = alloc_conn_buf(con, head_len); 1717 if (!buf) 1718 return -ENOMEM; 1719 1720 /* preserve preamble */ 1721 memcpy(buf, con->v2.in_buf, CEPH_PREAMBLE_LEN); 1722 1723 add_in_kvec(con, CTRL_BODY(buf), ctrl_len); 1724 add_in_kvec(con, CTRL_BODY(buf) + ctrl_len, CEPH_CRC_LEN); 1725 add_in_sign_kvec(con, buf, head_len); 1726 } else { 1727 if (ctrl_len > CEPH_PREAMBLE_INLINE_LEN) { 1728 buf = alloc_conn_buf(con, ctrl_len); 1729 if (!buf) 1730 return -ENOMEM; 1731 1732 add_in_kvec(con, buf, ctrl_len); 1733 } else { 1734 add_in_kvec(con, CTRL_BODY(con->v2.in_buf), ctrl_len); 1735 } 1736 add_in_kvec(con, con->v2.in_buf, CEPH_CRC_LEN); 1737 } 1738 con->v2.in_state = IN_S_HANDLE_CONTROL; 1739 return 0; 1740 } 1741 1742 static int prepare_read_control_remainder(struct ceph_connection *con) 1743 { 1744 int ctrl_len = con->v2.in_desc.fd_lens[0]; 1745 int rem_len = ctrl_len - CEPH_PREAMBLE_INLINE_LEN; 1746 void *buf; 1747 1748 buf = alloc_conn_buf(con, ctrl_len); 1749 if (!buf) 1750 return -ENOMEM; 1751 1752 memcpy(buf, CTRL_BODY(con->v2.in_buf), CEPH_PREAMBLE_INLINE_LEN); 1753 1754 reset_in_kvecs(con); 1755 add_in_kvec(con, buf + CEPH_PREAMBLE_INLINE_LEN, rem_len); 1756 add_in_kvec(con, con->v2.in_buf, 1757 padding_len(rem_len) + CEPH_GCM_TAG_LEN); 1758 con->v2.in_state = IN_S_HANDLE_CONTROL_REMAINDER; 1759 return 0; 1760 } 1761 1762 static int prepare_read_data(struct ceph_connection *con) 1763 { 1764 struct bio_vec bv; 1765 1766 con->in_data_crc = -1; 1767 ceph_msg_data_cursor_init(&con->v2.in_cursor, con->in_msg, 1768 data_len(con->in_msg)); 1769 1770 get_bvec_at(&con->v2.in_cursor, &bv); 1771 if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) { 1772 if (unlikely(!con->bounce_page)) { 1773 con->bounce_page = alloc_page(GFP_NOIO); 1774 if (!con->bounce_page) { 1775 pr_err("failed to allocate bounce page\n"); 1776 return -ENOMEM; 1777 } 1778 } 1779 1780 bv.bv_page = con->bounce_page; 1781 bv.bv_offset = 0; 1782 } 1783 set_in_bvec(con, &bv); 1784 con->v2.in_state = IN_S_PREPARE_READ_DATA_CONT; 1785 return 0; 1786 } 1787 1788 static void prepare_read_data_cont(struct ceph_connection *con) 1789 { 1790 struct bio_vec bv; 1791 1792 if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) { 1793 con->in_data_crc = crc32c(con->in_data_crc, 1794 page_address(con->bounce_page), 1795 con->v2.in_bvec.bv_len); 1796 1797 get_bvec_at(&con->v2.in_cursor, &bv); 1798 memcpy_to_page(bv.bv_page, bv.bv_offset, 1799 page_address(con->bounce_page), 1800 con->v2.in_bvec.bv_len); 1801 } else { 1802 con->in_data_crc = ceph_crc32c_page(con->in_data_crc, 1803 con->v2.in_bvec.bv_page, 1804 con->v2.in_bvec.bv_offset, 1805 con->v2.in_bvec.bv_len); 1806 } 1807 1808 ceph_msg_data_advance(&con->v2.in_cursor, con->v2.in_bvec.bv_len); 1809 if (con->v2.in_cursor.total_resid) { 1810 get_bvec_at(&con->v2.in_cursor, &bv); 1811 if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) { 1812 bv.bv_page = con->bounce_page; 1813 bv.bv_offset = 0; 1814 } 1815 set_in_bvec(con, &bv); 1816 WARN_ON(con->v2.in_state != IN_S_PREPARE_READ_DATA_CONT); 1817 return; 1818 } 1819 1820 /* 1821 * We've read all data. Prepare to read epilogue. 1822 */ 1823 reset_in_kvecs(con); 1824 add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_PLAIN_LEN); 1825 con->v2.in_state = IN_S_HANDLE_EPILOGUE; 1826 } 1827 1828 static int prepare_read_tail_plain(struct ceph_connection *con) 1829 { 1830 struct ceph_msg *msg = con->in_msg; 1831 1832 if (!front_len(msg) && !middle_len(msg)) { 1833 WARN_ON(!data_len(msg)); 1834 return prepare_read_data(con); 1835 } 1836 1837 reset_in_kvecs(con); 1838 if (front_len(msg)) { 1839 add_in_kvec(con, msg->front.iov_base, front_len(msg)); 1840 WARN_ON(msg->front.iov_len != front_len(msg)); 1841 } 1842 if (middle_len(msg)) { 1843 add_in_kvec(con, msg->middle->vec.iov_base, middle_len(msg)); 1844 WARN_ON(msg->middle->vec.iov_len != middle_len(msg)); 1845 } 1846 1847 if (data_len(msg)) { 1848 con->v2.in_state = IN_S_PREPARE_READ_DATA; 1849 } else { 1850 add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_PLAIN_LEN); 1851 con->v2.in_state = IN_S_HANDLE_EPILOGUE; 1852 } 1853 return 0; 1854 } 1855 1856 static void prepare_read_enc_page(struct ceph_connection *con) 1857 { 1858 struct bio_vec bv; 1859 1860 dout("%s con %p i %d resid %d\n", __func__, con, con->v2.in_enc_i, 1861 con->v2.in_enc_resid); 1862 WARN_ON(!con->v2.in_enc_resid); 1863 1864 bvec_set_page(&bv, con->v2.in_enc_pages[con->v2.in_enc_i], 1865 min(con->v2.in_enc_resid, (int)PAGE_SIZE), 0); 1866 1867 set_in_bvec(con, &bv); 1868 con->v2.in_enc_i++; 1869 con->v2.in_enc_resid -= bv.bv_len; 1870 1871 if (con->v2.in_enc_resid) { 1872 con->v2.in_state = IN_S_PREPARE_READ_ENC_PAGE; 1873 return; 1874 } 1875 1876 /* 1877 * We are set to read the last piece of ciphertext (ending 1878 * with epilogue) + auth tag. 1879 */ 1880 WARN_ON(con->v2.in_enc_i != con->v2.in_enc_page_cnt); 1881 con->v2.in_state = IN_S_HANDLE_EPILOGUE; 1882 } 1883 1884 static int prepare_read_tail_secure(struct ceph_connection *con) 1885 { 1886 struct page **enc_pages; 1887 int enc_page_cnt; 1888 int tail_len; 1889 1890 tail_len = tail_onwire_len(con->in_msg, true); 1891 WARN_ON(!tail_len); 1892 1893 enc_page_cnt = calc_pages_for(0, tail_len); 1894 enc_pages = ceph_alloc_page_vector(enc_page_cnt, GFP_NOIO); 1895 if (IS_ERR(enc_pages)) 1896 return PTR_ERR(enc_pages); 1897 1898 WARN_ON(con->v2.in_enc_pages || con->v2.in_enc_page_cnt); 1899 con->v2.in_enc_pages = enc_pages; 1900 con->v2.in_enc_page_cnt = enc_page_cnt; 1901 con->v2.in_enc_resid = tail_len; 1902 con->v2.in_enc_i = 0; 1903 1904 prepare_read_enc_page(con); 1905 return 0; 1906 } 1907 1908 static void __finish_skip(struct ceph_connection *con) 1909 { 1910 con->in_seq++; 1911 prepare_read_preamble(con); 1912 } 1913 1914 static void prepare_skip_message(struct ceph_connection *con) 1915 { 1916 struct ceph_frame_desc *desc = &con->v2.in_desc; 1917 int tail_len; 1918 1919 dout("%s con %p %d+%d+%d\n", __func__, con, desc->fd_lens[1], 1920 desc->fd_lens[2], desc->fd_lens[3]); 1921 1922 tail_len = __tail_onwire_len(desc->fd_lens[1], desc->fd_lens[2], 1923 desc->fd_lens[3], con_secure(con)); 1924 if (!tail_len) { 1925 __finish_skip(con); 1926 } else { 1927 set_in_skip(con, tail_len); 1928 con->v2.in_state = IN_S_FINISH_SKIP; 1929 } 1930 } 1931 1932 static int process_banner_prefix(struct ceph_connection *con) 1933 { 1934 int payload_len; 1935 void *p; 1936 1937 WARN_ON(con->v2.in_kvecs[0].iov_len != CEPH_BANNER_V2_PREFIX_LEN); 1938 1939 p = con->v2.in_kvecs[0].iov_base; 1940 if (memcmp(p, CEPH_BANNER_V2, CEPH_BANNER_V2_LEN)) { 1941 if (!memcmp(p, CEPH_BANNER, CEPH_BANNER_LEN)) 1942 con->error_msg = "server is speaking msgr1 protocol"; 1943 else 1944 con->error_msg = "protocol error, bad banner"; 1945 return -EINVAL; 1946 } 1947 1948 p += CEPH_BANNER_V2_LEN; 1949 payload_len = ceph_decode_16(&p); 1950 dout("%s con %p payload_len %d\n", __func__, con, payload_len); 1951 1952 return prepare_read_banner_payload(con, payload_len); 1953 } 1954 1955 static int process_banner_payload(struct ceph_connection *con) 1956 { 1957 void *end = con->v2.in_kvecs[0].iov_base + con->v2.in_kvecs[0].iov_len; 1958 u64 feat = CEPH_MSGR2_SUPPORTED_FEATURES; 1959 u64 req_feat = CEPH_MSGR2_REQUIRED_FEATURES; 1960 u64 server_feat, server_req_feat; 1961 void *p; 1962 int ret; 1963 1964 p = con->v2.in_kvecs[0].iov_base; 1965 ceph_decode_64_safe(&p, end, server_feat, bad); 1966 ceph_decode_64_safe(&p, end, server_req_feat, bad); 1967 1968 dout("%s con %p server_feat 0x%llx server_req_feat 0x%llx\n", 1969 __func__, con, server_feat, server_req_feat); 1970 1971 if (req_feat & ~server_feat) { 1972 pr_err("msgr2 feature set mismatch: my required > server's supported 0x%llx, need 0x%llx\n", 1973 server_feat, req_feat & ~server_feat); 1974 con->error_msg = "missing required protocol features"; 1975 return -EINVAL; 1976 } 1977 if (server_req_feat & ~feat) { 1978 pr_err("msgr2 feature set mismatch: server's required > my supported 0x%llx, missing 0x%llx\n", 1979 feat, server_req_feat & ~feat); 1980 con->error_msg = "missing required protocol features"; 1981 return -EINVAL; 1982 } 1983 1984 /* no reset_out_kvecs() as our banner may still be pending */ 1985 ret = prepare_hello(con); 1986 if (ret) { 1987 pr_err("prepare_hello failed: %d\n", ret); 1988 return ret; 1989 } 1990 1991 con->state = CEPH_CON_S_V2_HELLO; 1992 prepare_read_preamble(con); 1993 return 0; 1994 1995 bad: 1996 pr_err("failed to decode banner payload\n"); 1997 return -EINVAL; 1998 } 1999 2000 static int process_hello(struct ceph_connection *con, void *p, void *end) 2001 { 2002 struct ceph_entity_addr *my_addr = &con->msgr->inst.addr; 2003 struct ceph_entity_addr addr_for_me; 2004 u8 entity_type; 2005 int ret; 2006 2007 if (con->state != CEPH_CON_S_V2_HELLO) { 2008 con->error_msg = "protocol error, unexpected hello"; 2009 return -EINVAL; 2010 } 2011 2012 ceph_decode_8_safe(&p, end, entity_type, bad); 2013 ret = ceph_decode_entity_addr(&p, end, &addr_for_me); 2014 if (ret) { 2015 pr_err("failed to decode addr_for_me: %d\n", ret); 2016 return ret; 2017 } 2018 2019 dout("%s con %p entity_type %d addr_for_me %s\n", __func__, con, 2020 entity_type, ceph_pr_addr(&addr_for_me)); 2021 2022 if (entity_type != con->peer_name.type) { 2023 pr_err("bad peer type, want %d, got %d\n", 2024 con->peer_name.type, entity_type); 2025 con->error_msg = "wrong peer at address"; 2026 return -EINVAL; 2027 } 2028 2029 /* 2030 * Set our address to the address our first peer (i.e. monitor) 2031 * sees that we are connecting from. If we are behind some sort 2032 * of NAT and want to be identified by some private (not NATed) 2033 * address, ip option should be used. 2034 */ 2035 if (ceph_addr_is_blank(my_addr)) { 2036 memcpy(&my_addr->in_addr, &addr_for_me.in_addr, 2037 sizeof(my_addr->in_addr)); 2038 ceph_addr_set_port(my_addr, 0); 2039 dout("%s con %p set my addr %s, as seen by peer %s\n", 2040 __func__, con, ceph_pr_addr(my_addr), 2041 ceph_pr_addr(&con->peer_addr)); 2042 } else { 2043 dout("%s con %p my addr already set %s\n", 2044 __func__, con, ceph_pr_addr(my_addr)); 2045 } 2046 2047 WARN_ON(ceph_addr_is_blank(my_addr) || ceph_addr_port(my_addr)); 2048 WARN_ON(my_addr->type != CEPH_ENTITY_ADDR_TYPE_ANY); 2049 WARN_ON(!my_addr->nonce); 2050 2051 /* no reset_out_kvecs() as our hello may still be pending */ 2052 ret = prepare_auth_request(con); 2053 if (ret) { 2054 if (ret != -EAGAIN) 2055 pr_err("prepare_auth_request failed: %d\n", ret); 2056 return ret; 2057 } 2058 2059 con->state = CEPH_CON_S_V2_AUTH; 2060 return 0; 2061 2062 bad: 2063 pr_err("failed to decode hello\n"); 2064 return -EINVAL; 2065 } 2066 2067 static int process_auth_bad_method(struct ceph_connection *con, 2068 void *p, void *end) 2069 { 2070 int allowed_protos[8], allowed_modes[8]; 2071 int allowed_proto_cnt, allowed_mode_cnt; 2072 int used_proto, result; 2073 int ret; 2074 int i; 2075 2076 if (con->state != CEPH_CON_S_V2_AUTH) { 2077 con->error_msg = "protocol error, unexpected auth_bad_method"; 2078 return -EINVAL; 2079 } 2080 2081 ceph_decode_32_safe(&p, end, used_proto, bad); 2082 ceph_decode_32_safe(&p, end, result, bad); 2083 dout("%s con %p used_proto %d result %d\n", __func__, con, used_proto, 2084 result); 2085 2086 ceph_decode_32_safe(&p, end, allowed_proto_cnt, bad); 2087 if (allowed_proto_cnt > ARRAY_SIZE(allowed_protos)) { 2088 pr_err("allowed_protos too big %d\n", allowed_proto_cnt); 2089 return -EINVAL; 2090 } 2091 for (i = 0; i < allowed_proto_cnt; i++) { 2092 ceph_decode_32_safe(&p, end, allowed_protos[i], bad); 2093 dout("%s con %p allowed_protos[%d] %d\n", __func__, con, 2094 i, allowed_protos[i]); 2095 } 2096 2097 ceph_decode_32_safe(&p, end, allowed_mode_cnt, bad); 2098 if (allowed_mode_cnt > ARRAY_SIZE(allowed_modes)) { 2099 pr_err("allowed_modes too big %d\n", allowed_mode_cnt); 2100 return -EINVAL; 2101 } 2102 for (i = 0; i < allowed_mode_cnt; i++) { 2103 ceph_decode_32_safe(&p, end, allowed_modes[i], bad); 2104 dout("%s con %p allowed_modes[%d] %d\n", __func__, con, 2105 i, allowed_modes[i]); 2106 } 2107 2108 mutex_unlock(&con->mutex); 2109 ret = con->ops->handle_auth_bad_method(con, used_proto, result, 2110 allowed_protos, 2111 allowed_proto_cnt, 2112 allowed_modes, 2113 allowed_mode_cnt); 2114 mutex_lock(&con->mutex); 2115 if (con->state != CEPH_CON_S_V2_AUTH) { 2116 dout("%s con %p state changed to %d\n", __func__, con, 2117 con->state); 2118 return -EAGAIN; 2119 } 2120 2121 dout("%s con %p handle_auth_bad_method ret %d\n", __func__, con, ret); 2122 return ret; 2123 2124 bad: 2125 pr_err("failed to decode auth_bad_method\n"); 2126 return -EINVAL; 2127 } 2128 2129 static int process_auth_reply_more(struct ceph_connection *con, 2130 void *p, void *end) 2131 { 2132 int payload_len; 2133 int ret; 2134 2135 if (con->state != CEPH_CON_S_V2_AUTH) { 2136 con->error_msg = "protocol error, unexpected auth_reply_more"; 2137 return -EINVAL; 2138 } 2139 2140 ceph_decode_32_safe(&p, end, payload_len, bad); 2141 ceph_decode_need(&p, end, payload_len, bad); 2142 2143 dout("%s con %p payload_len %d\n", __func__, con, payload_len); 2144 2145 reset_out_kvecs(con); 2146 ret = prepare_auth_request_more(con, p, payload_len); 2147 if (ret) { 2148 if (ret != -EAGAIN) 2149 pr_err("prepare_auth_request_more failed: %d\n", ret); 2150 return ret; 2151 } 2152 2153 return 0; 2154 2155 bad: 2156 pr_err("failed to decode auth_reply_more\n"); 2157 return -EINVAL; 2158 } 2159 2160 /* 2161 * Align session_key and con_secret to avoid GFP_ATOMIC allocation 2162 * inside crypto_shash_setkey() and crypto_aead_setkey() called from 2163 * setup_crypto(). __aligned(16) isn't guaranteed to work for stack 2164 * objects, so do it by hand. 2165 */ 2166 static int process_auth_done(struct ceph_connection *con, void *p, void *end) 2167 { 2168 u8 session_key_buf[CEPH_KEY_LEN + 16]; 2169 u8 con_secret_buf[CEPH_MAX_CON_SECRET_LEN + 16]; 2170 u8 *session_key = PTR_ALIGN(&session_key_buf[0], 16); 2171 u8 *con_secret = PTR_ALIGN(&con_secret_buf[0], 16); 2172 int session_key_len, con_secret_len; 2173 int payload_len; 2174 u64 global_id; 2175 int ret; 2176 2177 if (con->state != CEPH_CON_S_V2_AUTH) { 2178 con->error_msg = "protocol error, unexpected auth_done"; 2179 return -EINVAL; 2180 } 2181 2182 ceph_decode_64_safe(&p, end, global_id, bad); 2183 ceph_decode_32_safe(&p, end, con->v2.con_mode, bad); 2184 ceph_decode_32_safe(&p, end, payload_len, bad); 2185 2186 dout("%s con %p global_id %llu con_mode %d payload_len %d\n", 2187 __func__, con, global_id, con->v2.con_mode, payload_len); 2188 2189 mutex_unlock(&con->mutex); 2190 session_key_len = 0; 2191 con_secret_len = 0; 2192 ret = con->ops->handle_auth_done(con, global_id, p, payload_len, 2193 session_key, &session_key_len, 2194 con_secret, &con_secret_len); 2195 mutex_lock(&con->mutex); 2196 if (con->state != CEPH_CON_S_V2_AUTH) { 2197 dout("%s con %p state changed to %d\n", __func__, con, 2198 con->state); 2199 ret = -EAGAIN; 2200 goto out; 2201 } 2202 2203 dout("%s con %p handle_auth_done ret %d\n", __func__, con, ret); 2204 if (ret) 2205 goto out; 2206 2207 ret = setup_crypto(con, session_key, session_key_len, con_secret, 2208 con_secret_len); 2209 if (ret) 2210 goto out; 2211 2212 reset_out_kvecs(con); 2213 ret = prepare_auth_signature(con); 2214 if (ret) { 2215 pr_err("prepare_auth_signature failed: %d\n", ret); 2216 goto out; 2217 } 2218 2219 con->state = CEPH_CON_S_V2_AUTH_SIGNATURE; 2220 2221 out: 2222 memzero_explicit(session_key_buf, sizeof(session_key_buf)); 2223 memzero_explicit(con_secret_buf, sizeof(con_secret_buf)); 2224 return ret; 2225 2226 bad: 2227 pr_err("failed to decode auth_done\n"); 2228 return -EINVAL; 2229 } 2230 2231 static int process_auth_signature(struct ceph_connection *con, 2232 void *p, void *end) 2233 { 2234 u8 hmac[SHA256_DIGEST_SIZE]; 2235 int ret; 2236 2237 if (con->state != CEPH_CON_S_V2_AUTH_SIGNATURE) { 2238 con->error_msg = "protocol error, unexpected auth_signature"; 2239 return -EINVAL; 2240 } 2241 2242 ret = hmac_sha256(con, con->v2.out_sign_kvecs, 2243 con->v2.out_sign_kvec_cnt, hmac); 2244 if (ret) 2245 return ret; 2246 2247 ceph_decode_need(&p, end, SHA256_DIGEST_SIZE, bad); 2248 if (crypto_memneq(p, hmac, SHA256_DIGEST_SIZE)) { 2249 con->error_msg = "integrity error, bad auth signature"; 2250 return -EBADMSG; 2251 } 2252 2253 dout("%s con %p auth signature ok\n", __func__, con); 2254 2255 /* no reset_out_kvecs() as our auth_signature may still be pending */ 2256 if (!con->v2.server_cookie) { 2257 ret = prepare_client_ident(con); 2258 if (ret) { 2259 pr_err("prepare_client_ident failed: %d\n", ret); 2260 return ret; 2261 } 2262 2263 con->state = CEPH_CON_S_V2_SESSION_CONNECT; 2264 } else { 2265 ret = prepare_session_reconnect(con); 2266 if (ret) { 2267 pr_err("prepare_session_reconnect failed: %d\n", ret); 2268 return ret; 2269 } 2270 2271 con->state = CEPH_CON_S_V2_SESSION_RECONNECT; 2272 } 2273 2274 return 0; 2275 2276 bad: 2277 pr_err("failed to decode auth_signature\n"); 2278 return -EINVAL; 2279 } 2280 2281 static int process_server_ident(struct ceph_connection *con, 2282 void *p, void *end) 2283 { 2284 struct ceph_client *client = from_msgr(con->msgr); 2285 u64 features, required_features; 2286 struct ceph_entity_addr addr; 2287 u64 global_seq; 2288 u64 global_id; 2289 u64 cookie; 2290 u64 flags; 2291 int ret; 2292 2293 if (con->state != CEPH_CON_S_V2_SESSION_CONNECT) { 2294 con->error_msg = "protocol error, unexpected server_ident"; 2295 return -EINVAL; 2296 } 2297 2298 ret = ceph_decode_entity_addrvec(&p, end, true, &addr); 2299 if (ret) { 2300 pr_err("failed to decode server addrs: %d\n", ret); 2301 return ret; 2302 } 2303 2304 ceph_decode_64_safe(&p, end, global_id, bad); 2305 ceph_decode_64_safe(&p, end, global_seq, bad); 2306 ceph_decode_64_safe(&p, end, features, bad); 2307 ceph_decode_64_safe(&p, end, required_features, bad); 2308 ceph_decode_64_safe(&p, end, flags, bad); 2309 ceph_decode_64_safe(&p, end, cookie, bad); 2310 2311 dout("%s con %p addr %s/%u global_id %llu global_seq %llu features 0x%llx required_features 0x%llx flags 0x%llx cookie 0x%llx\n", 2312 __func__, con, ceph_pr_addr(&addr), le32_to_cpu(addr.nonce), 2313 global_id, global_seq, features, required_features, flags, cookie); 2314 2315 /* is this who we intended to talk to? */ 2316 if (memcmp(&addr, &con->peer_addr, sizeof(con->peer_addr))) { 2317 pr_err("bad peer addr/nonce, want %s/%u, got %s/%u\n", 2318 ceph_pr_addr(&con->peer_addr), 2319 le32_to_cpu(con->peer_addr.nonce), 2320 ceph_pr_addr(&addr), le32_to_cpu(addr.nonce)); 2321 con->error_msg = "wrong peer at address"; 2322 return -EINVAL; 2323 } 2324 2325 if (client->required_features & ~features) { 2326 pr_err("RADOS feature set mismatch: my required > server's supported 0x%llx, need 0x%llx\n", 2327 features, client->required_features & ~features); 2328 con->error_msg = "missing required protocol features"; 2329 return -EINVAL; 2330 } 2331 2332 /* 2333 * Both name->type and name->num are set in ceph_con_open() but 2334 * name->num may be bogus in the initial monmap. name->type is 2335 * verified in handle_hello(). 2336 */ 2337 WARN_ON(!con->peer_name.type); 2338 con->peer_name.num = cpu_to_le64(global_id); 2339 con->v2.peer_global_seq = global_seq; 2340 con->peer_features = features; 2341 WARN_ON(required_features & ~client->supported_features); 2342 con->v2.server_cookie = cookie; 2343 2344 if (flags & CEPH_MSG_CONNECT_LOSSY) { 2345 ceph_con_flag_set(con, CEPH_CON_F_LOSSYTX); 2346 WARN_ON(con->v2.server_cookie); 2347 } else { 2348 WARN_ON(!con->v2.server_cookie); 2349 } 2350 2351 clear_in_sign_kvecs(con); 2352 clear_out_sign_kvecs(con); 2353 free_conn_bufs(con); 2354 con->delay = 0; /* reset backoff memory */ 2355 2356 con->state = CEPH_CON_S_OPEN; 2357 con->v2.out_state = OUT_S_GET_NEXT; 2358 return 0; 2359 2360 bad: 2361 pr_err("failed to decode server_ident\n"); 2362 return -EINVAL; 2363 } 2364 2365 static int process_ident_missing_features(struct ceph_connection *con, 2366 void *p, void *end) 2367 { 2368 struct ceph_client *client = from_msgr(con->msgr); 2369 u64 missing_features; 2370 2371 if (con->state != CEPH_CON_S_V2_SESSION_CONNECT) { 2372 con->error_msg = "protocol error, unexpected ident_missing_features"; 2373 return -EINVAL; 2374 } 2375 2376 ceph_decode_64_safe(&p, end, missing_features, bad); 2377 pr_err("RADOS feature set mismatch: server's required > my supported 0x%llx, missing 0x%llx\n", 2378 client->supported_features, missing_features); 2379 con->error_msg = "missing required protocol features"; 2380 return -EINVAL; 2381 2382 bad: 2383 pr_err("failed to decode ident_missing_features\n"); 2384 return -EINVAL; 2385 } 2386 2387 static int process_session_reconnect_ok(struct ceph_connection *con, 2388 void *p, void *end) 2389 { 2390 u64 seq; 2391 2392 if (con->state != CEPH_CON_S_V2_SESSION_RECONNECT) { 2393 con->error_msg = "protocol error, unexpected session_reconnect_ok"; 2394 return -EINVAL; 2395 } 2396 2397 ceph_decode_64_safe(&p, end, seq, bad); 2398 2399 dout("%s con %p seq %llu\n", __func__, con, seq); 2400 ceph_con_discard_requeued(con, seq); 2401 2402 clear_in_sign_kvecs(con); 2403 clear_out_sign_kvecs(con); 2404 free_conn_bufs(con); 2405 con->delay = 0; /* reset backoff memory */ 2406 2407 con->state = CEPH_CON_S_OPEN; 2408 con->v2.out_state = OUT_S_GET_NEXT; 2409 return 0; 2410 2411 bad: 2412 pr_err("failed to decode session_reconnect_ok\n"); 2413 return -EINVAL; 2414 } 2415 2416 static int process_session_retry(struct ceph_connection *con, 2417 void *p, void *end) 2418 { 2419 u64 connect_seq; 2420 int ret; 2421 2422 if (con->state != CEPH_CON_S_V2_SESSION_RECONNECT) { 2423 con->error_msg = "protocol error, unexpected session_retry"; 2424 return -EINVAL; 2425 } 2426 2427 ceph_decode_64_safe(&p, end, connect_seq, bad); 2428 2429 dout("%s con %p connect_seq %llu\n", __func__, con, connect_seq); 2430 WARN_ON(connect_seq <= con->v2.connect_seq); 2431 con->v2.connect_seq = connect_seq + 1; 2432 2433 free_conn_bufs(con); 2434 2435 reset_out_kvecs(con); 2436 ret = prepare_session_reconnect(con); 2437 if (ret) { 2438 pr_err("prepare_session_reconnect (cseq) failed: %d\n", ret); 2439 return ret; 2440 } 2441 2442 return 0; 2443 2444 bad: 2445 pr_err("failed to decode session_retry\n"); 2446 return -EINVAL; 2447 } 2448 2449 static int process_session_retry_global(struct ceph_connection *con, 2450 void *p, void *end) 2451 { 2452 u64 global_seq; 2453 int ret; 2454 2455 if (con->state != CEPH_CON_S_V2_SESSION_RECONNECT) { 2456 con->error_msg = "protocol error, unexpected session_retry_global"; 2457 return -EINVAL; 2458 } 2459 2460 ceph_decode_64_safe(&p, end, global_seq, bad); 2461 2462 dout("%s con %p global_seq %llu\n", __func__, con, global_seq); 2463 WARN_ON(global_seq <= con->v2.global_seq); 2464 con->v2.global_seq = ceph_get_global_seq(con->msgr, global_seq); 2465 2466 free_conn_bufs(con); 2467 2468 reset_out_kvecs(con); 2469 ret = prepare_session_reconnect(con); 2470 if (ret) { 2471 pr_err("prepare_session_reconnect (gseq) failed: %d\n", ret); 2472 return ret; 2473 } 2474 2475 return 0; 2476 2477 bad: 2478 pr_err("failed to decode session_retry_global\n"); 2479 return -EINVAL; 2480 } 2481 2482 static int process_session_reset(struct ceph_connection *con, 2483 void *p, void *end) 2484 { 2485 bool full; 2486 int ret; 2487 2488 if (con->state != CEPH_CON_S_V2_SESSION_RECONNECT) { 2489 con->error_msg = "protocol error, unexpected session_reset"; 2490 return -EINVAL; 2491 } 2492 2493 ceph_decode_8_safe(&p, end, full, bad); 2494 if (!full) { 2495 con->error_msg = "protocol error, bad session_reset"; 2496 return -EINVAL; 2497 } 2498 2499 pr_info("%s%lld %s session reset\n", ENTITY_NAME(con->peer_name), 2500 ceph_pr_addr(&con->peer_addr)); 2501 ceph_con_reset_session(con); 2502 2503 mutex_unlock(&con->mutex); 2504 if (con->ops->peer_reset) 2505 con->ops->peer_reset(con); 2506 mutex_lock(&con->mutex); 2507 if (con->state != CEPH_CON_S_V2_SESSION_RECONNECT) { 2508 dout("%s con %p state changed to %d\n", __func__, con, 2509 con->state); 2510 return -EAGAIN; 2511 } 2512 2513 free_conn_bufs(con); 2514 2515 reset_out_kvecs(con); 2516 ret = prepare_client_ident(con); 2517 if (ret) { 2518 pr_err("prepare_client_ident (rst) failed: %d\n", ret); 2519 return ret; 2520 } 2521 2522 con->state = CEPH_CON_S_V2_SESSION_CONNECT; 2523 return 0; 2524 2525 bad: 2526 pr_err("failed to decode session_reset\n"); 2527 return -EINVAL; 2528 } 2529 2530 static int process_keepalive2_ack(struct ceph_connection *con, 2531 void *p, void *end) 2532 { 2533 if (con->state != CEPH_CON_S_OPEN) { 2534 con->error_msg = "protocol error, unexpected keepalive2_ack"; 2535 return -EINVAL; 2536 } 2537 2538 ceph_decode_need(&p, end, sizeof(struct ceph_timespec), bad); 2539 ceph_decode_timespec64(&con->last_keepalive_ack, p); 2540 2541 dout("%s con %p timestamp %lld.%09ld\n", __func__, con, 2542 con->last_keepalive_ack.tv_sec, con->last_keepalive_ack.tv_nsec); 2543 2544 return 0; 2545 2546 bad: 2547 pr_err("failed to decode keepalive2_ack\n"); 2548 return -EINVAL; 2549 } 2550 2551 static int process_ack(struct ceph_connection *con, void *p, void *end) 2552 { 2553 u64 seq; 2554 2555 if (con->state != CEPH_CON_S_OPEN) { 2556 con->error_msg = "protocol error, unexpected ack"; 2557 return -EINVAL; 2558 } 2559 2560 ceph_decode_64_safe(&p, end, seq, bad); 2561 2562 dout("%s con %p seq %llu\n", __func__, con, seq); 2563 ceph_con_discard_sent(con, seq); 2564 return 0; 2565 2566 bad: 2567 pr_err("failed to decode ack\n"); 2568 return -EINVAL; 2569 } 2570 2571 static int process_control(struct ceph_connection *con, void *p, void *end) 2572 { 2573 int tag = con->v2.in_desc.fd_tag; 2574 int ret; 2575 2576 dout("%s con %p tag %d len %d\n", __func__, con, tag, (int)(end - p)); 2577 2578 switch (tag) { 2579 case FRAME_TAG_HELLO: 2580 ret = process_hello(con, p, end); 2581 break; 2582 case FRAME_TAG_AUTH_BAD_METHOD: 2583 ret = process_auth_bad_method(con, p, end); 2584 break; 2585 case FRAME_TAG_AUTH_REPLY_MORE: 2586 ret = process_auth_reply_more(con, p, end); 2587 break; 2588 case FRAME_TAG_AUTH_DONE: 2589 ret = process_auth_done(con, p, end); 2590 break; 2591 case FRAME_TAG_AUTH_SIGNATURE: 2592 ret = process_auth_signature(con, p, end); 2593 break; 2594 case FRAME_TAG_SERVER_IDENT: 2595 ret = process_server_ident(con, p, end); 2596 break; 2597 case FRAME_TAG_IDENT_MISSING_FEATURES: 2598 ret = process_ident_missing_features(con, p, end); 2599 break; 2600 case FRAME_TAG_SESSION_RECONNECT_OK: 2601 ret = process_session_reconnect_ok(con, p, end); 2602 break; 2603 case FRAME_TAG_SESSION_RETRY: 2604 ret = process_session_retry(con, p, end); 2605 break; 2606 case FRAME_TAG_SESSION_RETRY_GLOBAL: 2607 ret = process_session_retry_global(con, p, end); 2608 break; 2609 case FRAME_TAG_SESSION_RESET: 2610 ret = process_session_reset(con, p, end); 2611 break; 2612 case FRAME_TAG_KEEPALIVE2_ACK: 2613 ret = process_keepalive2_ack(con, p, end); 2614 break; 2615 case FRAME_TAG_ACK: 2616 ret = process_ack(con, p, end); 2617 break; 2618 default: 2619 pr_err("bad tag %d\n", tag); 2620 con->error_msg = "protocol error, bad tag"; 2621 return -EINVAL; 2622 } 2623 if (ret) { 2624 dout("%s con %p error %d\n", __func__, con, ret); 2625 return ret; 2626 } 2627 2628 prepare_read_preamble(con); 2629 return 0; 2630 } 2631 2632 /* 2633 * Return: 2634 * 1 - con->in_msg set, read message 2635 * 0 - skip message 2636 * <0 - error 2637 */ 2638 static int process_message_header(struct ceph_connection *con, 2639 void *p, void *end) 2640 { 2641 struct ceph_frame_desc *desc = &con->v2.in_desc; 2642 struct ceph_msg_header2 *hdr2 = p; 2643 struct ceph_msg_header hdr; 2644 int skip; 2645 int ret; 2646 u64 seq; 2647 2648 /* verify seq# */ 2649 seq = le64_to_cpu(hdr2->seq); 2650 if ((s64)seq - (s64)con->in_seq < 1) { 2651 pr_info("%s%lld %s skipping old message: seq %llu, expected %llu\n", 2652 ENTITY_NAME(con->peer_name), 2653 ceph_pr_addr(&con->peer_addr), 2654 seq, con->in_seq + 1); 2655 return 0; 2656 } 2657 if ((s64)seq - (s64)con->in_seq > 1) { 2658 pr_err("bad seq %llu, expected %llu\n", seq, con->in_seq + 1); 2659 con->error_msg = "bad message sequence # for incoming message"; 2660 return -EBADE; 2661 } 2662 2663 ceph_con_discard_sent(con, le64_to_cpu(hdr2->ack_seq)); 2664 2665 fill_header(&hdr, hdr2, desc->fd_lens[1], desc->fd_lens[2], 2666 desc->fd_lens[3], &con->peer_name); 2667 ret = ceph_con_in_msg_alloc(con, &hdr, &skip); 2668 if (ret) 2669 return ret; 2670 2671 WARN_ON(!con->in_msg ^ skip); 2672 if (skip) 2673 return 0; 2674 2675 WARN_ON(!con->in_msg); 2676 WARN_ON(con->in_msg->con != con); 2677 return 1; 2678 } 2679 2680 static int process_message(struct ceph_connection *con) 2681 { 2682 ceph_con_process_message(con); 2683 2684 /* 2685 * We could have been closed by ceph_con_close() because 2686 * ceph_con_process_message() temporarily drops con->mutex. 2687 */ 2688 if (con->state != CEPH_CON_S_OPEN) { 2689 dout("%s con %p state changed to %d\n", __func__, con, 2690 con->state); 2691 return -EAGAIN; 2692 } 2693 2694 prepare_read_preamble(con); 2695 return 0; 2696 } 2697 2698 static int __handle_control(struct ceph_connection *con, void *p) 2699 { 2700 void *end = p + con->v2.in_desc.fd_lens[0]; 2701 struct ceph_msg *msg; 2702 int ret; 2703 2704 if (con->v2.in_desc.fd_tag != FRAME_TAG_MESSAGE) 2705 return process_control(con, p, end); 2706 2707 ret = process_message_header(con, p, end); 2708 if (ret < 0) 2709 return ret; 2710 if (ret == 0) { 2711 prepare_skip_message(con); 2712 return 0; 2713 } 2714 2715 msg = con->in_msg; /* set in process_message_header() */ 2716 if (front_len(msg)) { 2717 WARN_ON(front_len(msg) > msg->front_alloc_len); 2718 msg->front.iov_len = front_len(msg); 2719 } else { 2720 msg->front.iov_len = 0; 2721 } 2722 if (middle_len(msg)) { 2723 WARN_ON(middle_len(msg) > msg->middle->alloc_len); 2724 msg->middle->vec.iov_len = middle_len(msg); 2725 } else if (msg->middle) { 2726 msg->middle->vec.iov_len = 0; 2727 } 2728 2729 if (!front_len(msg) && !middle_len(msg) && !data_len(msg)) 2730 return process_message(con); 2731 2732 if (con_secure(con)) 2733 return prepare_read_tail_secure(con); 2734 2735 return prepare_read_tail_plain(con); 2736 } 2737 2738 static int handle_preamble(struct ceph_connection *con) 2739 { 2740 struct ceph_frame_desc *desc = &con->v2.in_desc; 2741 int ret; 2742 2743 if (con_secure(con)) { 2744 ret = decrypt_preamble(con); 2745 if (ret) { 2746 if (ret == -EBADMSG) 2747 con->error_msg = "integrity error, bad preamble auth tag"; 2748 return ret; 2749 } 2750 } 2751 2752 ret = decode_preamble(con->v2.in_buf, desc); 2753 if (ret) { 2754 if (ret == -EBADMSG) 2755 con->error_msg = "integrity error, bad crc"; 2756 else 2757 con->error_msg = "protocol error, bad preamble"; 2758 return ret; 2759 } 2760 2761 dout("%s con %p tag %d seg_cnt %d %d+%d+%d+%d\n", __func__, 2762 con, desc->fd_tag, desc->fd_seg_cnt, desc->fd_lens[0], 2763 desc->fd_lens[1], desc->fd_lens[2], desc->fd_lens[3]); 2764 2765 if (!con_secure(con)) 2766 return prepare_read_control(con); 2767 2768 if (desc->fd_lens[0] > CEPH_PREAMBLE_INLINE_LEN) 2769 return prepare_read_control_remainder(con); 2770 2771 return __handle_control(con, CTRL_BODY(con->v2.in_buf)); 2772 } 2773 2774 static int handle_control(struct ceph_connection *con) 2775 { 2776 int ctrl_len = con->v2.in_desc.fd_lens[0]; 2777 void *buf; 2778 int ret; 2779 2780 WARN_ON(con_secure(con)); 2781 2782 ret = verify_control_crc(con); 2783 if (ret) { 2784 con->error_msg = "integrity error, bad crc"; 2785 return ret; 2786 } 2787 2788 if (con->state == CEPH_CON_S_V2_AUTH) { 2789 buf = alloc_conn_buf(con, ctrl_len); 2790 if (!buf) 2791 return -ENOMEM; 2792 2793 memcpy(buf, con->v2.in_kvecs[0].iov_base, ctrl_len); 2794 return __handle_control(con, buf); 2795 } 2796 2797 return __handle_control(con, con->v2.in_kvecs[0].iov_base); 2798 } 2799 2800 static int handle_control_remainder(struct ceph_connection *con) 2801 { 2802 int ret; 2803 2804 WARN_ON(!con_secure(con)); 2805 2806 ret = decrypt_control_remainder(con); 2807 if (ret) { 2808 if (ret == -EBADMSG) 2809 con->error_msg = "integrity error, bad control remainder auth tag"; 2810 return ret; 2811 } 2812 2813 return __handle_control(con, con->v2.in_kvecs[0].iov_base - 2814 CEPH_PREAMBLE_INLINE_LEN); 2815 } 2816 2817 static int handle_epilogue(struct ceph_connection *con) 2818 { 2819 u32 front_crc, middle_crc, data_crc; 2820 int ret; 2821 2822 if (con_secure(con)) { 2823 ret = decrypt_tail(con); 2824 if (ret) { 2825 if (ret == -EBADMSG) 2826 con->error_msg = "integrity error, bad epilogue auth tag"; 2827 return ret; 2828 } 2829 2830 /* just late_status */ 2831 ret = decode_epilogue(con->v2.in_buf, NULL, NULL, NULL); 2832 if (ret) { 2833 con->error_msg = "protocol error, bad epilogue"; 2834 return ret; 2835 } 2836 } else { 2837 ret = decode_epilogue(con->v2.in_buf, &front_crc, 2838 &middle_crc, &data_crc); 2839 if (ret) { 2840 con->error_msg = "protocol error, bad epilogue"; 2841 return ret; 2842 } 2843 2844 ret = verify_epilogue_crcs(con, front_crc, middle_crc, 2845 data_crc); 2846 if (ret) { 2847 con->error_msg = "integrity error, bad crc"; 2848 return ret; 2849 } 2850 } 2851 2852 return process_message(con); 2853 } 2854 2855 static void finish_skip(struct ceph_connection *con) 2856 { 2857 dout("%s con %p\n", __func__, con); 2858 2859 if (con_secure(con)) 2860 gcm_inc_nonce(&con->v2.in_gcm_nonce); 2861 2862 __finish_skip(con); 2863 } 2864 2865 static int populate_in_iter(struct ceph_connection *con) 2866 { 2867 int ret; 2868 2869 dout("%s con %p state %d in_state %d\n", __func__, con, con->state, 2870 con->v2.in_state); 2871 WARN_ON(iov_iter_count(&con->v2.in_iter)); 2872 2873 if (con->state == CEPH_CON_S_V2_BANNER_PREFIX) { 2874 ret = process_banner_prefix(con); 2875 } else if (con->state == CEPH_CON_S_V2_BANNER_PAYLOAD) { 2876 ret = process_banner_payload(con); 2877 } else if ((con->state >= CEPH_CON_S_V2_HELLO && 2878 con->state <= CEPH_CON_S_V2_SESSION_RECONNECT) || 2879 con->state == CEPH_CON_S_OPEN) { 2880 switch (con->v2.in_state) { 2881 case IN_S_HANDLE_PREAMBLE: 2882 ret = handle_preamble(con); 2883 break; 2884 case IN_S_HANDLE_CONTROL: 2885 ret = handle_control(con); 2886 break; 2887 case IN_S_HANDLE_CONTROL_REMAINDER: 2888 ret = handle_control_remainder(con); 2889 break; 2890 case IN_S_PREPARE_READ_DATA: 2891 ret = prepare_read_data(con); 2892 break; 2893 case IN_S_PREPARE_READ_DATA_CONT: 2894 prepare_read_data_cont(con); 2895 ret = 0; 2896 break; 2897 case IN_S_PREPARE_READ_ENC_PAGE: 2898 prepare_read_enc_page(con); 2899 ret = 0; 2900 break; 2901 case IN_S_HANDLE_EPILOGUE: 2902 ret = handle_epilogue(con); 2903 break; 2904 case IN_S_FINISH_SKIP: 2905 finish_skip(con); 2906 ret = 0; 2907 break; 2908 default: 2909 WARN(1, "bad in_state %d", con->v2.in_state); 2910 return -EINVAL; 2911 } 2912 } else { 2913 WARN(1, "bad state %d", con->state); 2914 return -EINVAL; 2915 } 2916 if (ret) { 2917 dout("%s con %p error %d\n", __func__, con, ret); 2918 return ret; 2919 } 2920 2921 if (WARN_ON(!iov_iter_count(&con->v2.in_iter))) 2922 return -ENODATA; 2923 dout("%s con %p populated %zu\n", __func__, con, 2924 iov_iter_count(&con->v2.in_iter)); 2925 return 1; 2926 } 2927 2928 int ceph_con_v2_try_read(struct ceph_connection *con) 2929 { 2930 int ret; 2931 2932 dout("%s con %p state %d need %zu\n", __func__, con, con->state, 2933 iov_iter_count(&con->v2.in_iter)); 2934 2935 if (con->state == CEPH_CON_S_PREOPEN) 2936 return 0; 2937 2938 /* 2939 * We should always have something pending here. If not, 2940 * avoid calling populate_in_iter() as if we read something 2941 * (ceph_tcp_recv() would immediately return 1). 2942 */ 2943 if (WARN_ON(!iov_iter_count(&con->v2.in_iter))) 2944 return -ENODATA; 2945 2946 for (;;) { 2947 ret = ceph_tcp_recv(con); 2948 if (ret <= 0) 2949 return ret; 2950 2951 ret = populate_in_iter(con); 2952 if (ret <= 0) { 2953 if (ret && ret != -EAGAIN && !con->error_msg) 2954 con->error_msg = "read processing error"; 2955 return ret; 2956 } 2957 } 2958 } 2959 2960 static void queue_data(struct ceph_connection *con) 2961 { 2962 struct bio_vec bv; 2963 2964 con->v2.out_epil.data_crc = -1; 2965 ceph_msg_data_cursor_init(&con->v2.out_cursor, con->out_msg, 2966 data_len(con->out_msg)); 2967 2968 get_bvec_at(&con->v2.out_cursor, &bv); 2969 set_out_bvec(con, &bv, true); 2970 con->v2.out_state = OUT_S_QUEUE_DATA_CONT; 2971 } 2972 2973 static void queue_data_cont(struct ceph_connection *con) 2974 { 2975 struct bio_vec bv; 2976 2977 con->v2.out_epil.data_crc = ceph_crc32c_page( 2978 con->v2.out_epil.data_crc, con->v2.out_bvec.bv_page, 2979 con->v2.out_bvec.bv_offset, con->v2.out_bvec.bv_len); 2980 2981 ceph_msg_data_advance(&con->v2.out_cursor, con->v2.out_bvec.bv_len); 2982 if (con->v2.out_cursor.total_resid) { 2983 get_bvec_at(&con->v2.out_cursor, &bv); 2984 set_out_bvec(con, &bv, true); 2985 WARN_ON(con->v2.out_state != OUT_S_QUEUE_DATA_CONT); 2986 return; 2987 } 2988 2989 /* 2990 * We've written all data. Queue epilogue. Once it's written, 2991 * we are done. 2992 */ 2993 reset_out_kvecs(con); 2994 prepare_epilogue_plain(con, false); 2995 con->v2.out_state = OUT_S_FINISH_MESSAGE; 2996 } 2997 2998 static void queue_enc_page(struct ceph_connection *con) 2999 { 3000 struct bio_vec bv; 3001 3002 dout("%s con %p i %d resid %d\n", __func__, con, con->v2.out_enc_i, 3003 con->v2.out_enc_resid); 3004 WARN_ON(!con->v2.out_enc_resid); 3005 3006 bvec_set_page(&bv, con->v2.out_enc_pages[con->v2.out_enc_i], 3007 min(con->v2.out_enc_resid, (int)PAGE_SIZE), 0); 3008 3009 set_out_bvec(con, &bv, false); 3010 con->v2.out_enc_i++; 3011 con->v2.out_enc_resid -= bv.bv_len; 3012 3013 if (con->v2.out_enc_resid) { 3014 WARN_ON(con->v2.out_state != OUT_S_QUEUE_ENC_PAGE); 3015 return; 3016 } 3017 3018 /* 3019 * We've queued the last piece of ciphertext (ending with 3020 * epilogue) + auth tag. Once it's written, we are done. 3021 */ 3022 WARN_ON(con->v2.out_enc_i != con->v2.out_enc_page_cnt); 3023 con->v2.out_state = OUT_S_FINISH_MESSAGE; 3024 } 3025 3026 static void queue_zeros(struct ceph_connection *con) 3027 { 3028 dout("%s con %p out_zero %d\n", __func__, con, con->v2.out_zero); 3029 3030 if (con->v2.out_zero) { 3031 set_out_bvec_zero(con); 3032 con->v2.out_zero -= con->v2.out_bvec.bv_len; 3033 con->v2.out_state = OUT_S_QUEUE_ZEROS; 3034 return; 3035 } 3036 3037 /* 3038 * We've zero-filled everything up to epilogue. Queue epilogue 3039 * with late_status set to ABORTED and crcs adjusted for zeros. 3040 * Once it's written, we are done patching up for the revoke. 3041 */ 3042 reset_out_kvecs(con); 3043 prepare_epilogue_plain(con, true); 3044 con->v2.out_state = OUT_S_FINISH_MESSAGE; 3045 } 3046 3047 static void finish_message(struct ceph_connection *con) 3048 { 3049 dout("%s con %p msg %p\n", __func__, con, con->out_msg); 3050 3051 /* we end up here both plain and secure modes */ 3052 if (con->v2.out_enc_pages) { 3053 WARN_ON(!con->v2.out_enc_page_cnt); 3054 ceph_release_page_vector(con->v2.out_enc_pages, 3055 con->v2.out_enc_page_cnt); 3056 con->v2.out_enc_pages = NULL; 3057 con->v2.out_enc_page_cnt = 0; 3058 } 3059 /* message may have been revoked */ 3060 if (con->out_msg) { 3061 ceph_msg_put(con->out_msg); 3062 con->out_msg = NULL; 3063 } 3064 3065 con->v2.out_state = OUT_S_GET_NEXT; 3066 } 3067 3068 static int populate_out_iter(struct ceph_connection *con) 3069 { 3070 int ret; 3071 3072 dout("%s con %p state %d out_state %d\n", __func__, con, con->state, 3073 con->v2.out_state); 3074 WARN_ON(iov_iter_count(&con->v2.out_iter)); 3075 3076 if (con->state != CEPH_CON_S_OPEN) { 3077 WARN_ON(con->state < CEPH_CON_S_V2_BANNER_PREFIX || 3078 con->state > CEPH_CON_S_V2_SESSION_RECONNECT); 3079 goto nothing_pending; 3080 } 3081 3082 switch (con->v2.out_state) { 3083 case OUT_S_QUEUE_DATA: 3084 WARN_ON(!con->out_msg); 3085 queue_data(con); 3086 goto populated; 3087 case OUT_S_QUEUE_DATA_CONT: 3088 WARN_ON(!con->out_msg); 3089 queue_data_cont(con); 3090 goto populated; 3091 case OUT_S_QUEUE_ENC_PAGE: 3092 queue_enc_page(con); 3093 goto populated; 3094 case OUT_S_QUEUE_ZEROS: 3095 WARN_ON(con->out_msg); /* revoked */ 3096 queue_zeros(con); 3097 goto populated; 3098 case OUT_S_FINISH_MESSAGE: 3099 finish_message(con); 3100 break; 3101 case OUT_S_GET_NEXT: 3102 break; 3103 default: 3104 WARN(1, "bad out_state %d", con->v2.out_state); 3105 return -EINVAL; 3106 } 3107 3108 WARN_ON(con->v2.out_state != OUT_S_GET_NEXT); 3109 if (ceph_con_flag_test_and_clear(con, CEPH_CON_F_KEEPALIVE_PENDING)) { 3110 ret = prepare_keepalive2(con); 3111 if (ret) { 3112 pr_err("prepare_keepalive2 failed: %d\n", ret); 3113 return ret; 3114 } 3115 } else if (!list_empty(&con->out_queue)) { 3116 ceph_con_get_out_msg(con); 3117 ret = prepare_message(con); 3118 if (ret) { 3119 pr_err("prepare_message failed: %d\n", ret); 3120 return ret; 3121 } 3122 } else if (con->in_seq > con->in_seq_acked) { 3123 ret = prepare_ack(con); 3124 if (ret) { 3125 pr_err("prepare_ack failed: %d\n", ret); 3126 return ret; 3127 } 3128 } else { 3129 goto nothing_pending; 3130 } 3131 3132 populated: 3133 if (WARN_ON(!iov_iter_count(&con->v2.out_iter))) 3134 return -ENODATA; 3135 dout("%s con %p populated %zu\n", __func__, con, 3136 iov_iter_count(&con->v2.out_iter)); 3137 return 1; 3138 3139 nothing_pending: 3140 WARN_ON(iov_iter_count(&con->v2.out_iter)); 3141 dout("%s con %p nothing pending\n", __func__, con); 3142 ceph_con_flag_clear(con, CEPH_CON_F_WRITE_PENDING); 3143 return 0; 3144 } 3145 3146 int ceph_con_v2_try_write(struct ceph_connection *con) 3147 { 3148 int ret; 3149 3150 dout("%s con %p state %d have %zu\n", __func__, con, con->state, 3151 iov_iter_count(&con->v2.out_iter)); 3152 3153 /* open the socket first? */ 3154 if (con->state == CEPH_CON_S_PREOPEN) { 3155 WARN_ON(con->peer_addr.type != CEPH_ENTITY_ADDR_TYPE_MSGR2); 3156 3157 /* 3158 * Always bump global_seq. Bump connect_seq only if 3159 * there is a session (i.e. we are reconnecting and will 3160 * send session_reconnect instead of client_ident). 3161 */ 3162 con->v2.global_seq = ceph_get_global_seq(con->msgr, 0); 3163 if (con->v2.server_cookie) 3164 con->v2.connect_seq++; 3165 3166 ret = prepare_read_banner_prefix(con); 3167 if (ret) { 3168 pr_err("prepare_read_banner_prefix failed: %d\n", ret); 3169 con->error_msg = "connect error"; 3170 return ret; 3171 } 3172 3173 reset_out_kvecs(con); 3174 ret = prepare_banner(con); 3175 if (ret) { 3176 pr_err("prepare_banner failed: %d\n", ret); 3177 con->error_msg = "connect error"; 3178 return ret; 3179 } 3180 3181 ret = ceph_tcp_connect(con); 3182 if (ret) { 3183 pr_err("ceph_tcp_connect failed: %d\n", ret); 3184 con->error_msg = "connect error"; 3185 return ret; 3186 } 3187 } 3188 3189 if (!iov_iter_count(&con->v2.out_iter)) { 3190 ret = populate_out_iter(con); 3191 if (ret <= 0) { 3192 if (ret && ret != -EAGAIN && !con->error_msg) 3193 con->error_msg = "write processing error"; 3194 return ret; 3195 } 3196 } 3197 3198 tcp_sock_set_cork(con->sock->sk, true); 3199 for (;;) { 3200 ret = ceph_tcp_send(con); 3201 if (ret <= 0) 3202 break; 3203 3204 ret = populate_out_iter(con); 3205 if (ret <= 0) { 3206 if (ret && ret != -EAGAIN && !con->error_msg) 3207 con->error_msg = "write processing error"; 3208 break; 3209 } 3210 } 3211 3212 tcp_sock_set_cork(con->sock->sk, false); 3213 return ret; 3214 } 3215 3216 static u32 crc32c_zeros(u32 crc, int zero_len) 3217 { 3218 int len; 3219 3220 while (zero_len) { 3221 len = min(zero_len, (int)PAGE_SIZE); 3222 crc = crc32c(crc, page_address(ceph_zero_page), len); 3223 zero_len -= len; 3224 } 3225 3226 return crc; 3227 } 3228 3229 static void prepare_zero_front(struct ceph_connection *con, int resid) 3230 { 3231 int sent; 3232 3233 WARN_ON(!resid || resid > front_len(con->out_msg)); 3234 sent = front_len(con->out_msg) - resid; 3235 dout("%s con %p sent %d resid %d\n", __func__, con, sent, resid); 3236 3237 if (sent) { 3238 con->v2.out_epil.front_crc = 3239 crc32c(-1, con->out_msg->front.iov_base, sent); 3240 con->v2.out_epil.front_crc = 3241 crc32c_zeros(con->v2.out_epil.front_crc, resid); 3242 } else { 3243 con->v2.out_epil.front_crc = crc32c_zeros(-1, resid); 3244 } 3245 3246 con->v2.out_iter.count -= resid; 3247 out_zero_add(con, resid); 3248 } 3249 3250 static void prepare_zero_middle(struct ceph_connection *con, int resid) 3251 { 3252 int sent; 3253 3254 WARN_ON(!resid || resid > middle_len(con->out_msg)); 3255 sent = middle_len(con->out_msg) - resid; 3256 dout("%s con %p sent %d resid %d\n", __func__, con, sent, resid); 3257 3258 if (sent) { 3259 con->v2.out_epil.middle_crc = 3260 crc32c(-1, con->out_msg->middle->vec.iov_base, sent); 3261 con->v2.out_epil.middle_crc = 3262 crc32c_zeros(con->v2.out_epil.middle_crc, resid); 3263 } else { 3264 con->v2.out_epil.middle_crc = crc32c_zeros(-1, resid); 3265 } 3266 3267 con->v2.out_iter.count -= resid; 3268 out_zero_add(con, resid); 3269 } 3270 3271 static void prepare_zero_data(struct ceph_connection *con) 3272 { 3273 dout("%s con %p\n", __func__, con); 3274 con->v2.out_epil.data_crc = crc32c_zeros(-1, data_len(con->out_msg)); 3275 out_zero_add(con, data_len(con->out_msg)); 3276 } 3277 3278 static void revoke_at_queue_data(struct ceph_connection *con) 3279 { 3280 int boundary; 3281 int resid; 3282 3283 WARN_ON(!data_len(con->out_msg)); 3284 WARN_ON(!iov_iter_is_kvec(&con->v2.out_iter)); 3285 resid = iov_iter_count(&con->v2.out_iter); 3286 3287 boundary = front_len(con->out_msg) + middle_len(con->out_msg); 3288 if (resid > boundary) { 3289 resid -= boundary; 3290 WARN_ON(resid > MESSAGE_HEAD_PLAIN_LEN); 3291 dout("%s con %p was sending head\n", __func__, con); 3292 if (front_len(con->out_msg)) 3293 prepare_zero_front(con, front_len(con->out_msg)); 3294 if (middle_len(con->out_msg)) 3295 prepare_zero_middle(con, middle_len(con->out_msg)); 3296 prepare_zero_data(con); 3297 WARN_ON(iov_iter_count(&con->v2.out_iter) != resid); 3298 con->v2.out_state = OUT_S_QUEUE_ZEROS; 3299 return; 3300 } 3301 3302 boundary = middle_len(con->out_msg); 3303 if (resid > boundary) { 3304 resid -= boundary; 3305 dout("%s con %p was sending front\n", __func__, con); 3306 prepare_zero_front(con, resid); 3307 if (middle_len(con->out_msg)) 3308 prepare_zero_middle(con, middle_len(con->out_msg)); 3309 prepare_zero_data(con); 3310 queue_zeros(con); 3311 return; 3312 } 3313 3314 WARN_ON(!resid); 3315 dout("%s con %p was sending middle\n", __func__, con); 3316 prepare_zero_middle(con, resid); 3317 prepare_zero_data(con); 3318 queue_zeros(con); 3319 } 3320 3321 static void revoke_at_queue_data_cont(struct ceph_connection *con) 3322 { 3323 int sent, resid; /* current piece of data */ 3324 3325 WARN_ON(!data_len(con->out_msg)); 3326 WARN_ON(!iov_iter_is_bvec(&con->v2.out_iter)); 3327 resid = iov_iter_count(&con->v2.out_iter); 3328 WARN_ON(!resid || resid > con->v2.out_bvec.bv_len); 3329 sent = con->v2.out_bvec.bv_len - resid; 3330 dout("%s con %p sent %d resid %d\n", __func__, con, sent, resid); 3331 3332 if (sent) { 3333 con->v2.out_epil.data_crc = ceph_crc32c_page( 3334 con->v2.out_epil.data_crc, con->v2.out_bvec.bv_page, 3335 con->v2.out_bvec.bv_offset, sent); 3336 ceph_msg_data_advance(&con->v2.out_cursor, sent); 3337 } 3338 WARN_ON(resid > con->v2.out_cursor.total_resid); 3339 con->v2.out_epil.data_crc = crc32c_zeros(con->v2.out_epil.data_crc, 3340 con->v2.out_cursor.total_resid); 3341 3342 con->v2.out_iter.count -= resid; 3343 out_zero_add(con, con->v2.out_cursor.total_resid); 3344 queue_zeros(con); 3345 } 3346 3347 static void revoke_at_finish_message(struct ceph_connection *con) 3348 { 3349 int boundary; 3350 int resid; 3351 3352 WARN_ON(!iov_iter_is_kvec(&con->v2.out_iter)); 3353 resid = iov_iter_count(&con->v2.out_iter); 3354 3355 if (!front_len(con->out_msg) && !middle_len(con->out_msg) && 3356 !data_len(con->out_msg)) { 3357 WARN_ON(!resid || resid > MESSAGE_HEAD_PLAIN_LEN); 3358 dout("%s con %p was sending head (empty message) - noop\n", 3359 __func__, con); 3360 return; 3361 } 3362 3363 boundary = front_len(con->out_msg) + middle_len(con->out_msg) + 3364 CEPH_EPILOGUE_PLAIN_LEN; 3365 if (resid > boundary) { 3366 resid -= boundary; 3367 WARN_ON(resid > MESSAGE_HEAD_PLAIN_LEN); 3368 dout("%s con %p was sending head\n", __func__, con); 3369 if (front_len(con->out_msg)) 3370 prepare_zero_front(con, front_len(con->out_msg)); 3371 if (middle_len(con->out_msg)) 3372 prepare_zero_middle(con, middle_len(con->out_msg)); 3373 con->v2.out_iter.count -= CEPH_EPILOGUE_PLAIN_LEN; 3374 WARN_ON(iov_iter_count(&con->v2.out_iter) != resid); 3375 con->v2.out_state = OUT_S_QUEUE_ZEROS; 3376 return; 3377 } 3378 3379 boundary = middle_len(con->out_msg) + CEPH_EPILOGUE_PLAIN_LEN; 3380 if (resid > boundary) { 3381 resid -= boundary; 3382 dout("%s con %p was sending front\n", __func__, con); 3383 prepare_zero_front(con, resid); 3384 if (middle_len(con->out_msg)) 3385 prepare_zero_middle(con, middle_len(con->out_msg)); 3386 con->v2.out_iter.count -= CEPH_EPILOGUE_PLAIN_LEN; 3387 queue_zeros(con); 3388 return; 3389 } 3390 3391 boundary = CEPH_EPILOGUE_PLAIN_LEN; 3392 if (resid > boundary) { 3393 resid -= boundary; 3394 dout("%s con %p was sending middle\n", __func__, con); 3395 prepare_zero_middle(con, resid); 3396 con->v2.out_iter.count -= CEPH_EPILOGUE_PLAIN_LEN; 3397 queue_zeros(con); 3398 return; 3399 } 3400 3401 WARN_ON(!resid); 3402 dout("%s con %p was sending epilogue - noop\n", __func__, con); 3403 } 3404 3405 void ceph_con_v2_revoke(struct ceph_connection *con) 3406 { 3407 WARN_ON(con->v2.out_zero); 3408 3409 if (con_secure(con)) { 3410 WARN_ON(con->v2.out_state != OUT_S_QUEUE_ENC_PAGE && 3411 con->v2.out_state != OUT_S_FINISH_MESSAGE); 3412 dout("%s con %p secure - noop\n", __func__, con); 3413 return; 3414 } 3415 3416 switch (con->v2.out_state) { 3417 case OUT_S_QUEUE_DATA: 3418 revoke_at_queue_data(con); 3419 break; 3420 case OUT_S_QUEUE_DATA_CONT: 3421 revoke_at_queue_data_cont(con); 3422 break; 3423 case OUT_S_FINISH_MESSAGE: 3424 revoke_at_finish_message(con); 3425 break; 3426 default: 3427 WARN(1, "bad out_state %d", con->v2.out_state); 3428 break; 3429 } 3430 } 3431 3432 static void revoke_at_prepare_read_data(struct ceph_connection *con) 3433 { 3434 int remaining; 3435 int resid; 3436 3437 WARN_ON(con_secure(con)); 3438 WARN_ON(!data_len(con->in_msg)); 3439 WARN_ON(!iov_iter_is_kvec(&con->v2.in_iter)); 3440 resid = iov_iter_count(&con->v2.in_iter); 3441 WARN_ON(!resid); 3442 3443 remaining = data_len(con->in_msg) + CEPH_EPILOGUE_PLAIN_LEN; 3444 dout("%s con %p resid %d remaining %d\n", __func__, con, resid, 3445 remaining); 3446 con->v2.in_iter.count -= resid; 3447 set_in_skip(con, resid + remaining); 3448 con->v2.in_state = IN_S_FINISH_SKIP; 3449 } 3450 3451 static void revoke_at_prepare_read_data_cont(struct ceph_connection *con) 3452 { 3453 int recved, resid; /* current piece of data */ 3454 int remaining; 3455 3456 WARN_ON(con_secure(con)); 3457 WARN_ON(!data_len(con->in_msg)); 3458 WARN_ON(!iov_iter_is_bvec(&con->v2.in_iter)); 3459 resid = iov_iter_count(&con->v2.in_iter); 3460 WARN_ON(!resid || resid > con->v2.in_bvec.bv_len); 3461 recved = con->v2.in_bvec.bv_len - resid; 3462 dout("%s con %p recved %d resid %d\n", __func__, con, recved, resid); 3463 3464 if (recved) 3465 ceph_msg_data_advance(&con->v2.in_cursor, recved); 3466 WARN_ON(resid > con->v2.in_cursor.total_resid); 3467 3468 remaining = CEPH_EPILOGUE_PLAIN_LEN; 3469 dout("%s con %p total_resid %zu remaining %d\n", __func__, con, 3470 con->v2.in_cursor.total_resid, remaining); 3471 con->v2.in_iter.count -= resid; 3472 set_in_skip(con, con->v2.in_cursor.total_resid + remaining); 3473 con->v2.in_state = IN_S_FINISH_SKIP; 3474 } 3475 3476 static void revoke_at_prepare_read_enc_page(struct ceph_connection *con) 3477 { 3478 int resid; /* current enc page (not necessarily data) */ 3479 3480 WARN_ON(!con_secure(con)); 3481 WARN_ON(!iov_iter_is_bvec(&con->v2.in_iter)); 3482 resid = iov_iter_count(&con->v2.in_iter); 3483 WARN_ON(!resid || resid > con->v2.in_bvec.bv_len); 3484 3485 dout("%s con %p resid %d enc_resid %d\n", __func__, con, resid, 3486 con->v2.in_enc_resid); 3487 con->v2.in_iter.count -= resid; 3488 set_in_skip(con, resid + con->v2.in_enc_resid); 3489 con->v2.in_state = IN_S_FINISH_SKIP; 3490 } 3491 3492 static void revoke_at_handle_epilogue(struct ceph_connection *con) 3493 { 3494 int resid; 3495 3496 resid = iov_iter_count(&con->v2.in_iter); 3497 WARN_ON(!resid); 3498 3499 dout("%s con %p resid %d\n", __func__, con, resid); 3500 con->v2.in_iter.count -= resid; 3501 set_in_skip(con, resid); 3502 con->v2.in_state = IN_S_FINISH_SKIP; 3503 } 3504 3505 void ceph_con_v2_revoke_incoming(struct ceph_connection *con) 3506 { 3507 switch (con->v2.in_state) { 3508 case IN_S_PREPARE_READ_DATA: 3509 revoke_at_prepare_read_data(con); 3510 break; 3511 case IN_S_PREPARE_READ_DATA_CONT: 3512 revoke_at_prepare_read_data_cont(con); 3513 break; 3514 case IN_S_PREPARE_READ_ENC_PAGE: 3515 revoke_at_prepare_read_enc_page(con); 3516 break; 3517 case IN_S_HANDLE_EPILOGUE: 3518 revoke_at_handle_epilogue(con); 3519 break; 3520 default: 3521 WARN(1, "bad in_state %d", con->v2.in_state); 3522 break; 3523 } 3524 } 3525 3526 bool ceph_con_v2_opened(struct ceph_connection *con) 3527 { 3528 return con->v2.peer_global_seq; 3529 } 3530 3531 void ceph_con_v2_reset_session(struct ceph_connection *con) 3532 { 3533 con->v2.client_cookie = 0; 3534 con->v2.server_cookie = 0; 3535 con->v2.global_seq = 0; 3536 con->v2.connect_seq = 0; 3537 con->v2.peer_global_seq = 0; 3538 } 3539 3540 void ceph_con_v2_reset_protocol(struct ceph_connection *con) 3541 { 3542 iov_iter_truncate(&con->v2.in_iter, 0); 3543 iov_iter_truncate(&con->v2.out_iter, 0); 3544 con->v2.out_zero = 0; 3545 3546 clear_in_sign_kvecs(con); 3547 clear_out_sign_kvecs(con); 3548 free_conn_bufs(con); 3549 3550 if (con->v2.in_enc_pages) { 3551 WARN_ON(!con->v2.in_enc_page_cnt); 3552 ceph_release_page_vector(con->v2.in_enc_pages, 3553 con->v2.in_enc_page_cnt); 3554 con->v2.in_enc_pages = NULL; 3555 con->v2.in_enc_page_cnt = 0; 3556 } 3557 if (con->v2.out_enc_pages) { 3558 WARN_ON(!con->v2.out_enc_page_cnt); 3559 ceph_release_page_vector(con->v2.out_enc_pages, 3560 con->v2.out_enc_page_cnt); 3561 con->v2.out_enc_pages = NULL; 3562 con->v2.out_enc_page_cnt = 0; 3563 } 3564 3565 con->v2.con_mode = CEPH_CON_MODE_UNKNOWN; 3566 memzero_explicit(&con->v2.in_gcm_nonce, CEPH_GCM_IV_LEN); 3567 memzero_explicit(&con->v2.out_gcm_nonce, CEPH_GCM_IV_LEN); 3568 3569 if (con->v2.hmac_tfm) { 3570 crypto_free_shash(con->v2.hmac_tfm); 3571 con->v2.hmac_tfm = NULL; 3572 } 3573 if (con->v2.gcm_req) { 3574 aead_request_free(con->v2.gcm_req); 3575 con->v2.gcm_req = NULL; 3576 } 3577 if (con->v2.gcm_tfm) { 3578 crypto_free_aead(con->v2.gcm_tfm); 3579 con->v2.gcm_tfm = NULL; 3580 } 3581 } 3582