1 /*- 2 * Copyright (c) 2015, Mellanox Technologies, Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 */ 25 26 #include "icl_iser.h" 27 28 static MALLOC_DEFINE(M_ISER_INITIATOR, "iser_initiator", "iser initiator backend"); 29 30 /* Register user buffer memory and initialize passive rdma 31 * dto descriptor. Data size is stored in 32 * task->data[ISER_DIR_IN].data_len, Protection size 33 * os stored in task->prot[ISER_DIR_IN].data_len 34 */ 35 static int 36 iser_prepare_read_cmd(struct icl_iser_pdu *iser_pdu) 37 { 38 struct iser_hdr *hdr = &iser_pdu->desc.iser_header; 39 struct iser_data_buf *buf_in = &iser_pdu->data[ISER_DIR_IN]; 40 struct iser_mem_reg *mem_reg; 41 int err; 42 43 err = iser_dma_map_task_data(iser_pdu, 44 buf_in, 45 ISER_DIR_IN, 46 DMA_FROM_DEVICE); 47 if (err) 48 return (err); 49 50 err = iser_reg_rdma_mem(iser_pdu, ISER_DIR_IN); 51 if (err) { 52 ISER_ERR("Failed to set up Data-IN RDMA"); 53 return (err); 54 } 55 56 mem_reg = &iser_pdu->rdma_reg[ISER_DIR_IN]; 57 58 hdr->flags |= ISER_RSV; 59 hdr->read_stag = cpu_to_be32(mem_reg->rkey); 60 hdr->read_va = cpu_to_be64(mem_reg->sge.addr); 61 62 return (0); 63 } 64 65 /* Register user buffer memory and initialize passive rdma 66 * dto descriptor. Data size is stored in 67 * task->data[ISER_DIR_OUT].data_len, Protection size 68 * is stored at task->prot[ISER_DIR_OUT].data_len 69 */ 70 static int 71 iser_prepare_write_cmd(struct icl_iser_pdu *iser_pdu) 72 { 73 struct iser_hdr *hdr = &iser_pdu->desc.iser_header; 74 struct iser_data_buf *buf_out = &iser_pdu->data[ISER_DIR_OUT]; 75 struct iser_mem_reg *mem_reg; 76 int err; 77 78 err = iser_dma_map_task_data(iser_pdu, 79 buf_out, 80 ISER_DIR_OUT, 81 DMA_TO_DEVICE); 82 if (err) 83 return (err); 84 85 err = iser_reg_rdma_mem(iser_pdu, ISER_DIR_OUT); 86 if (err) { 87 ISER_ERR("Failed to set up Data-out RDMA"); 88 return (err); 89 } 90 91 mem_reg = &iser_pdu->rdma_reg[ISER_DIR_OUT]; 92 93 hdr->flags |= ISER_WSV; 94 hdr->write_stag = cpu_to_be32(mem_reg->rkey); 95 hdr->write_va = cpu_to_be64(mem_reg->sge.addr); 96 97 return (0); 98 } 99 100 /* creates a new tx descriptor and adds header regd buffer */ 101 void 102 iser_create_send_desc(struct iser_conn *iser_conn, 103 struct iser_tx_desc *tx_desc) 104 { 105 struct iser_device *device = iser_conn->ib_conn.device; 106 107 ib_dma_sync_single_for_cpu(device->ib_device, 108 tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE); 109 110 memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr)); 111 tx_desc->iser_header.flags = ISER_VER; 112 113 tx_desc->num_sge = 1; 114 115 if (tx_desc->tx_sg[0].lkey != device->mr->lkey) { 116 tx_desc->tx_sg[0].lkey = device->mr->lkey; 117 ISER_DBG("sdesc %p lkey mismatch, fixing", tx_desc); 118 } 119 } 120 121 void 122 iser_free_login_buf(struct iser_conn *iser_conn) 123 { 124 struct iser_device *device = iser_conn->ib_conn.device; 125 126 if (!iser_conn->login_buf) 127 return; 128 129 if (iser_conn->login_req_dma) 130 ib_dma_unmap_single(device->ib_device, 131 iser_conn->login_req_dma, 132 ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE); 133 134 if (iser_conn->login_resp_dma) 135 ib_dma_unmap_single(device->ib_device, 136 iser_conn->login_resp_dma, 137 ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE); 138 139 free(iser_conn->login_buf, M_ISER_INITIATOR); 140 141 /* make sure we never redo any unmapping */ 142 iser_conn->login_req_dma = 0; 143 iser_conn->login_resp_dma = 0; 144 iser_conn->login_buf = NULL; 145 } 146 147 int 148 iser_alloc_login_buf(struct iser_conn *iser_conn) 149 { 150 struct iser_device *device = iser_conn->ib_conn.device; 151 int req_err, resp_err; 152 153 BUG_ON(device == NULL); 154 155 iser_conn->login_buf = malloc(ISCSI_DEF_MAX_RECV_SEG_LEN + ISER_RX_LOGIN_SIZE, 156 M_ISER_INITIATOR, M_WAITOK | M_ZERO); 157 158 if (!iser_conn->login_buf) 159 goto out_err; 160 161 iser_conn->login_req_buf = iser_conn->login_buf; 162 iser_conn->login_resp_buf = iser_conn->login_buf + 163 ISCSI_DEF_MAX_RECV_SEG_LEN; 164 165 iser_conn->login_req_dma = ib_dma_map_single(device->ib_device, 166 iser_conn->login_req_buf, 167 ISCSI_DEF_MAX_RECV_SEG_LEN, 168 DMA_TO_DEVICE); 169 170 iser_conn->login_resp_dma = ib_dma_map_single(device->ib_device, 171 iser_conn->login_resp_buf, 172 ISER_RX_LOGIN_SIZE, 173 DMA_FROM_DEVICE); 174 175 req_err = ib_dma_mapping_error(device->ib_device, 176 iser_conn->login_req_dma); 177 resp_err = ib_dma_mapping_error(device->ib_device, 178 iser_conn->login_resp_dma); 179 180 if (req_err || resp_err) { 181 if (req_err) 182 iser_conn->login_req_dma = 0; 183 if (resp_err) 184 iser_conn->login_resp_dma = 0; 185 goto free_login_buf; 186 } 187 188 return (0); 189 190 free_login_buf: 191 iser_free_login_buf(iser_conn); 192 193 out_err: 194 ISER_DBG("unable to alloc or map login buf"); 195 return (ENOMEM); 196 } 197 198 int iser_alloc_rx_descriptors(struct iser_conn *iser_conn, int cmds_max) 199 { 200 int i, j; 201 u64 dma_addr; 202 struct iser_rx_desc *rx_desc; 203 struct ib_sge *rx_sg; 204 struct ib_conn *ib_conn = &iser_conn->ib_conn; 205 struct iser_device *device = ib_conn->device; 206 207 iser_conn->qp_max_recv_dtos = cmds_max; 208 iser_conn->min_posted_rx = iser_conn->qp_max_recv_dtos >> 2; 209 210 if (iser_create_fastreg_pool(ib_conn, cmds_max)) 211 goto create_rdma_reg_res_failed; 212 213 214 iser_conn->num_rx_descs = cmds_max; 215 iser_conn->rx_descs = malloc(iser_conn->num_rx_descs * 216 sizeof(struct iser_rx_desc), M_ISER_INITIATOR, 217 M_WAITOK | M_ZERO); 218 if (!iser_conn->rx_descs) 219 goto rx_desc_alloc_fail; 220 221 rx_desc = iser_conn->rx_descs; 222 223 for (i = 0; i < iser_conn->qp_max_recv_dtos; i++, rx_desc++) { 224 dma_addr = ib_dma_map_single(device->ib_device, (void *)rx_desc, 225 ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); 226 if (ib_dma_mapping_error(device->ib_device, dma_addr)) 227 goto rx_desc_dma_map_failed; 228 229 rx_desc->dma_addr = dma_addr; 230 231 rx_sg = &rx_desc->rx_sg; 232 rx_sg->addr = rx_desc->dma_addr; 233 rx_sg->length = ISER_RX_PAYLOAD_SIZE; 234 rx_sg->lkey = device->mr->lkey; 235 } 236 237 iser_conn->rx_desc_head = 0; 238 239 return (0); 240 241 rx_desc_dma_map_failed: 242 rx_desc = iser_conn->rx_descs; 243 for (j = 0; j < i; j++, rx_desc++) 244 ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr, 245 ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); 246 free(iser_conn->rx_descs, M_ISER_INITIATOR); 247 iser_conn->rx_descs = NULL; 248 rx_desc_alloc_fail: 249 iser_free_fastreg_pool(ib_conn); 250 create_rdma_reg_res_failed: 251 ISER_ERR("failed allocating rx descriptors / data buffers"); 252 253 return (ENOMEM); 254 } 255 256 void 257 iser_free_rx_descriptors(struct iser_conn *iser_conn) 258 { 259 int i; 260 struct iser_rx_desc *rx_desc; 261 struct ib_conn *ib_conn = &iser_conn->ib_conn; 262 struct iser_device *device = ib_conn->device; 263 264 iser_free_fastreg_pool(ib_conn); 265 266 rx_desc = iser_conn->rx_descs; 267 for (i = 0; i < iser_conn->qp_max_recv_dtos; i++, rx_desc++) 268 ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr, 269 ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); 270 271 free(iser_conn->rx_descs, M_ISER_INITIATOR); 272 273 /* make sure we never redo any unmapping */ 274 iser_conn->rx_descs = NULL; 275 } 276 277 static void 278 iser_buf_to_sg(void *buf, struct iser_data_buf *data_buf) 279 { 280 struct scatterlist *sg; 281 int i; 282 size_t len, tlen; 283 int offset; 284 285 tlen = data_buf->data_len; 286 287 for (i = 0; 0 < tlen; i++, tlen -= len) { 288 sg = &data_buf->sgl[i]; 289 offset = ((uintptr_t)buf) & ~PAGE_MASK; 290 len = min(PAGE_SIZE - offset, tlen); 291 sg_set_buf(sg, buf, len); 292 buf = (void *)((uintptr_t)buf + len); 293 } 294 295 data_buf->size = i; 296 sg_mark_end(sg); 297 } 298 299 300 static void 301 iser_bio_to_sg(struct bio *bp, struct iser_data_buf *data_buf) 302 { 303 struct scatterlist *sg; 304 int i; 305 size_t len, tlen; 306 int offset; 307 308 tlen = bp->bio_bcount; 309 offset = bp->bio_ma_offset; 310 311 for (i = 0; 0 < tlen; i++, tlen -= len) { 312 sg = &data_buf->sgl[i]; 313 len = min(PAGE_SIZE - offset, tlen); 314 sg_set_page(sg, bp->bio_ma[i], len, offset); 315 offset = 0; 316 } 317 318 data_buf->size = i; 319 sg_mark_end(sg); 320 } 321 322 static int 323 iser_csio_to_sg(struct ccb_scsiio *csio, struct iser_data_buf *data_buf) 324 { 325 struct ccb_hdr *ccbh; 326 int err = 0; 327 328 ccbh = &csio->ccb_h; 329 switch ((ccbh->flags & CAM_DATA_MASK)) { 330 case CAM_DATA_BIO: 331 iser_bio_to_sg((struct bio *) csio->data_ptr, data_buf); 332 break; 333 case CAM_DATA_VADDR: 334 /* 335 * Support KVA buffers for various scsi commands such as: 336 * - REPORT_LUNS 337 * - MODE_SENSE_6 338 * - INQUIRY 339 * - SERVICE_ACTION_IN. 340 * The data of these commands always mapped into KVA. 341 */ 342 iser_buf_to_sg(csio->data_ptr, data_buf); 343 break; 344 default: 345 ISER_ERR("flags 0x%X unimplemented", ccbh->flags); 346 err = EINVAL; 347 } 348 return (err); 349 } 350 351 static inline bool 352 iser_signal_comp(u8 sig_count) 353 { 354 return ((sig_count % ISER_SIGNAL_CMD_COUNT) == 0); 355 } 356 357 int 358 iser_send_command(struct iser_conn *iser_conn, 359 struct icl_iser_pdu *iser_pdu) 360 { 361 struct iser_data_buf *data_buf; 362 struct iser_tx_desc *tx_desc = &iser_pdu->desc; 363 struct iscsi_bhs_scsi_command *hdr = (struct iscsi_bhs_scsi_command *) &(iser_pdu->desc.iscsi_header); 364 struct ccb_scsiio *csio = iser_pdu->csio; 365 int err = 0; 366 u8 sig_count = ++iser_conn->ib_conn.sig_count; 367 368 /* build the tx desc regd header and add it to the tx desc dto */ 369 tx_desc->type = ISCSI_TX_SCSI_COMMAND; 370 iser_create_send_desc(iser_conn, tx_desc); 371 372 if (hdr->bhssc_flags & BHSSC_FLAGS_R) { 373 data_buf = &iser_pdu->data[ISER_DIR_IN]; 374 } else { 375 data_buf = &iser_pdu->data[ISER_DIR_OUT]; 376 } 377 378 data_buf->sg = csio->data_ptr; 379 data_buf->data_len = csio->dxfer_len; 380 381 if (likely(csio->dxfer_len)) { 382 err = iser_csio_to_sg(csio, data_buf); 383 if (unlikely(err)) 384 goto send_command_error; 385 } 386 387 if (hdr->bhssc_flags & BHSSC_FLAGS_R) { 388 err = iser_prepare_read_cmd(iser_pdu); 389 if (err) 390 goto send_command_error; 391 } else if (hdr->bhssc_flags & BHSSC_FLAGS_W) { 392 err = iser_prepare_write_cmd(iser_pdu); 393 if (err) 394 goto send_command_error; 395 } 396 397 err = iser_post_send(&iser_conn->ib_conn, tx_desc, 398 iser_signal_comp(sig_count)); 399 if (!err) 400 return (0); 401 402 send_command_error: 403 ISER_ERR("iser_conn %p itt %u len %u err %d", iser_conn, 404 hdr->bhssc_initiator_task_tag, 405 hdr->bhssc_expected_data_transfer_length, 406 err); 407 return (err); 408 } 409 410 int 411 iser_send_control(struct iser_conn *iser_conn, 412 struct icl_iser_pdu *iser_pdu) 413 { 414 struct iser_tx_desc *mdesc; 415 struct iser_device *device; 416 size_t datalen = iser_pdu->icl_pdu.ip_data_len; 417 int err; 418 419 mdesc = &iser_pdu->desc; 420 421 /* build the tx desc regd header and add it to the tx desc dto */ 422 mdesc->type = ISCSI_TX_CONTROL; 423 iser_create_send_desc(iser_conn, mdesc); 424 425 device = iser_conn->ib_conn.device; 426 427 if (datalen > 0) { 428 struct ib_sge *tx_dsg = &mdesc->tx_sg[1]; 429 ib_dma_sync_single_for_cpu(device->ib_device, 430 iser_conn->login_req_dma, datalen, 431 DMA_TO_DEVICE); 432 433 ib_dma_sync_single_for_device(device->ib_device, 434 iser_conn->login_req_dma, datalen, 435 DMA_TO_DEVICE); 436 437 tx_dsg->addr = iser_conn->login_req_dma; 438 tx_dsg->length = datalen; 439 tx_dsg->lkey = device->mr->lkey; 440 mdesc->num_sge = 2; 441 } 442 443 /* For login phase and discovery session we re-use the login buffer */ 444 if (!iser_conn->handoff_done) { 445 err = iser_post_recvl(iser_conn); 446 if (err) 447 goto send_control_error; 448 } 449 450 err = iser_post_send(&iser_conn->ib_conn, mdesc, true); 451 if (!err) 452 return (0); 453 454 send_control_error: 455 ISER_ERR("conn %p failed err %d", iser_conn, err); 456 457 return (err); 458 459 } 460 461 /** 462 * iser_rcv_dto_completion - recv DTO completion 463 */ 464 void 465 iser_rcv_completion(struct iser_rx_desc *rx_desc, 466 unsigned long rx_xfer_len, 467 struct ib_conn *ib_conn) 468 { 469 struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn, 470 ib_conn); 471 struct icl_conn *ic = &iser_conn->icl_conn; 472 struct icl_pdu *response; 473 struct iscsi_bhs *hdr; 474 u64 rx_dma; 475 int rx_buflen; 476 int outstanding, count, err; 477 478 /* differentiate between login to all other PDUs */ 479 if ((char *)rx_desc == iser_conn->login_resp_buf) { 480 rx_dma = iser_conn->login_resp_dma; 481 rx_buflen = ISER_RX_LOGIN_SIZE; 482 } else { 483 rx_dma = rx_desc->dma_addr; 484 rx_buflen = ISER_RX_PAYLOAD_SIZE; 485 } 486 487 ib_dma_sync_single_for_cpu(ib_conn->device->ib_device, rx_dma, 488 rx_buflen, DMA_FROM_DEVICE); 489 490 hdr = &rx_desc->iscsi_header; 491 492 response = iser_new_pdu(ic, M_NOWAIT); 493 response->ip_bhs = hdr; 494 response->ip_data_len = rx_xfer_len - ISER_HEADERS_LEN; 495 496 /* 497 * In case we got data in the receive buffer, assign the ip_data_mbuf 498 * to the rx_buffer - later we'll copy it to upper layer buffers 499 */ 500 if (response->ip_data_len) 501 response->ip_data_mbuf = (struct mbuf *)(rx_desc->data); 502 503 ib_dma_sync_single_for_device(ib_conn->device->ib_device, rx_dma, 504 rx_buflen, DMA_FROM_DEVICE); 505 506 /* decrementing conn->post_recv_buf_count only --after-- freeing the * 507 * task eliminates the need to worry on tasks which are completed in * 508 * parallel to the execution of iser_conn_term. So the code that waits * 509 * for the posted rx bufs refcount to become zero handles everything */ 510 ib_conn->post_recv_buf_count--; 511 512 if (rx_dma == iser_conn->login_resp_dma) 513 goto receive; 514 515 outstanding = ib_conn->post_recv_buf_count; 516 if (outstanding + iser_conn->min_posted_rx <= iser_conn->qp_max_recv_dtos) { 517 count = min(iser_conn->qp_max_recv_dtos - outstanding, 518 iser_conn->min_posted_rx); 519 err = iser_post_recvm(iser_conn, count); 520 if (err) 521 ISER_ERR("posting %d rx bufs err %d", count, err); 522 } 523 524 receive: 525 (ic->ic_receive)(response); 526 } 527 528 void 529 iser_snd_completion(struct iser_tx_desc *tx_desc, 530 struct ib_conn *ib_conn) 531 { 532 struct icl_iser_pdu *iser_pdu = container_of(tx_desc, struct icl_iser_pdu, desc); 533 struct iser_conn *iser_conn = iser_pdu->iser_conn; 534 535 if (tx_desc && tx_desc->type == ISCSI_TX_CONTROL) 536 iser_pdu_free(&iser_conn->icl_conn, &iser_pdu->icl_pdu); 537 } 538