1 /* $FreeBSD$ */ 2 /*- 3 * Copyright (c) 2015, Mellanox Technologies, Inc. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include "icl_iser.h" 28 29 static MALLOC_DEFINE(M_ISER_INITIATOR, "iser_initiator", "iser initiator backend"); 30 31 /* Register user buffer memory and initialize passive rdma 32 * dto descriptor. Data size is stored in 33 * task->data[ISER_DIR_IN].data_len, Protection size 34 * os stored in task->prot[ISER_DIR_IN].data_len 35 */ 36 static int 37 iser_prepare_read_cmd(struct icl_iser_pdu *iser_pdu) 38 { 39 struct iser_hdr *hdr = &iser_pdu->desc.iser_header; 40 struct iser_data_buf *buf_in = &iser_pdu->data[ISER_DIR_IN]; 41 struct iser_mem_reg *mem_reg; 42 int err; 43 44 err = iser_dma_map_task_data(iser_pdu, 45 buf_in, 46 ISER_DIR_IN, 47 DMA_FROM_DEVICE); 48 if (err) 49 return (err); 50 51 err = iser_reg_rdma_mem(iser_pdu, ISER_DIR_IN); 52 if (err) { 53 ISER_ERR("Failed to set up Data-IN RDMA"); 54 return (err); 55 } 56 57 mem_reg = &iser_pdu->rdma_reg[ISER_DIR_IN]; 58 59 hdr->flags |= ISER_RSV; 60 hdr->read_stag = cpu_to_be32(mem_reg->rkey); 61 hdr->read_va = cpu_to_be64(mem_reg->sge.addr); 62 63 return (0); 64 } 65 66 /* Register user buffer memory and initialize passive rdma 67 * dto descriptor. Data size is stored in 68 * task->data[ISER_DIR_OUT].data_len, Protection size 69 * is stored at task->prot[ISER_DIR_OUT].data_len 70 */ 71 static int 72 iser_prepare_write_cmd(struct icl_iser_pdu *iser_pdu) 73 { 74 struct iser_hdr *hdr = &iser_pdu->desc.iser_header; 75 struct iser_data_buf *buf_out = &iser_pdu->data[ISER_DIR_OUT]; 76 struct iser_mem_reg *mem_reg; 77 int err; 78 79 err = iser_dma_map_task_data(iser_pdu, 80 buf_out, 81 ISER_DIR_OUT, 82 DMA_TO_DEVICE); 83 if (err) 84 return (err); 85 86 err = iser_reg_rdma_mem(iser_pdu, ISER_DIR_OUT); 87 if (err) { 88 ISER_ERR("Failed to set up Data-out RDMA"); 89 return (err); 90 } 91 92 mem_reg = &iser_pdu->rdma_reg[ISER_DIR_OUT]; 93 94 hdr->flags |= ISER_WSV; 95 hdr->write_stag = cpu_to_be32(mem_reg->rkey); 96 hdr->write_va = cpu_to_be64(mem_reg->sge.addr); 97 98 return (0); 99 } 100 101 /* creates a new tx descriptor and adds header regd buffer */ 102 void 103 iser_create_send_desc(struct iser_conn *iser_conn, 104 struct iser_tx_desc *tx_desc) 105 { 106 struct iser_device *device = iser_conn->ib_conn.device; 107 108 ib_dma_sync_single_for_cpu(device->ib_device, 109 tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE); 110 111 memset(&tx_desc->iser_header, 0, sizeof(struct iser_hdr)); 112 tx_desc->iser_header.flags = ISER_VER; 113 114 tx_desc->num_sge = 1; 115 116 if (tx_desc->tx_sg[0].lkey != device->mr->lkey) { 117 tx_desc->tx_sg[0].lkey = device->mr->lkey; 118 ISER_DBG("sdesc %p lkey mismatch, fixing", tx_desc); 119 } 120 } 121 122 void 123 iser_free_login_buf(struct iser_conn *iser_conn) 124 { 125 struct iser_device *device = iser_conn->ib_conn.device; 126 127 if (!iser_conn->login_buf) 128 return; 129 130 if (iser_conn->login_req_dma) 131 ib_dma_unmap_single(device->ib_device, 132 iser_conn->login_req_dma, 133 ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE); 134 135 if (iser_conn->login_resp_dma) 136 ib_dma_unmap_single(device->ib_device, 137 iser_conn->login_resp_dma, 138 ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE); 139 140 free(iser_conn->login_buf, M_ISER_INITIATOR); 141 142 /* make sure we never redo any unmapping */ 143 iser_conn->login_req_dma = 0; 144 iser_conn->login_resp_dma = 0; 145 iser_conn->login_buf = NULL; 146 } 147 148 int 149 iser_alloc_login_buf(struct iser_conn *iser_conn) 150 { 151 struct iser_device *device = iser_conn->ib_conn.device; 152 int req_err, resp_err; 153 154 BUG_ON(device == NULL); 155 156 iser_conn->login_buf = malloc(ISCSI_DEF_MAX_RECV_SEG_LEN + ISER_RX_LOGIN_SIZE, 157 M_ISER_INITIATOR, M_WAITOK | M_ZERO); 158 159 if (!iser_conn->login_buf) 160 goto out_err; 161 162 iser_conn->login_req_buf = iser_conn->login_buf; 163 iser_conn->login_resp_buf = iser_conn->login_buf + 164 ISCSI_DEF_MAX_RECV_SEG_LEN; 165 166 iser_conn->login_req_dma = ib_dma_map_single(device->ib_device, 167 iser_conn->login_req_buf, 168 ISCSI_DEF_MAX_RECV_SEG_LEN, 169 DMA_TO_DEVICE); 170 171 iser_conn->login_resp_dma = ib_dma_map_single(device->ib_device, 172 iser_conn->login_resp_buf, 173 ISER_RX_LOGIN_SIZE, 174 DMA_FROM_DEVICE); 175 176 req_err = ib_dma_mapping_error(device->ib_device, 177 iser_conn->login_req_dma); 178 resp_err = ib_dma_mapping_error(device->ib_device, 179 iser_conn->login_resp_dma); 180 181 if (req_err || resp_err) { 182 if (req_err) 183 iser_conn->login_req_dma = 0; 184 if (resp_err) 185 iser_conn->login_resp_dma = 0; 186 goto free_login_buf; 187 } 188 189 return (0); 190 191 free_login_buf: 192 iser_free_login_buf(iser_conn); 193 194 out_err: 195 ISER_DBG("unable to alloc or map login buf"); 196 return (ENOMEM); 197 } 198 199 int iser_alloc_rx_descriptors(struct iser_conn *iser_conn, int cmds_max) 200 { 201 int i, j; 202 u64 dma_addr; 203 struct iser_rx_desc *rx_desc; 204 struct ib_sge *rx_sg; 205 struct ib_conn *ib_conn = &iser_conn->ib_conn; 206 struct iser_device *device = ib_conn->device; 207 208 iser_conn->qp_max_recv_dtos = cmds_max; 209 iser_conn->min_posted_rx = iser_conn->qp_max_recv_dtos >> 2; 210 211 if (iser_create_fastreg_pool(ib_conn, cmds_max)) 212 goto create_rdma_reg_res_failed; 213 214 215 iser_conn->num_rx_descs = cmds_max; 216 iser_conn->rx_descs = malloc(iser_conn->num_rx_descs * 217 sizeof(struct iser_rx_desc), M_ISER_INITIATOR, 218 M_WAITOK | M_ZERO); 219 if (!iser_conn->rx_descs) 220 goto rx_desc_alloc_fail; 221 222 rx_desc = iser_conn->rx_descs; 223 224 for (i = 0; i < iser_conn->qp_max_recv_dtos; i++, rx_desc++) { 225 dma_addr = ib_dma_map_single(device->ib_device, (void *)rx_desc, 226 ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); 227 if (ib_dma_mapping_error(device->ib_device, dma_addr)) 228 goto rx_desc_dma_map_failed; 229 230 rx_desc->dma_addr = dma_addr; 231 232 rx_sg = &rx_desc->rx_sg; 233 rx_sg->addr = rx_desc->dma_addr; 234 rx_sg->length = ISER_RX_PAYLOAD_SIZE; 235 rx_sg->lkey = device->mr->lkey; 236 } 237 238 iser_conn->rx_desc_head = 0; 239 240 return (0); 241 242 rx_desc_dma_map_failed: 243 rx_desc = iser_conn->rx_descs; 244 for (j = 0; j < i; j++, rx_desc++) 245 ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr, 246 ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); 247 free(iser_conn->rx_descs, M_ISER_INITIATOR); 248 iser_conn->rx_descs = NULL; 249 rx_desc_alloc_fail: 250 iser_free_fastreg_pool(ib_conn); 251 create_rdma_reg_res_failed: 252 ISER_ERR("failed allocating rx descriptors / data buffers"); 253 254 return (ENOMEM); 255 } 256 257 void 258 iser_free_rx_descriptors(struct iser_conn *iser_conn) 259 { 260 int i; 261 struct iser_rx_desc *rx_desc; 262 struct ib_conn *ib_conn = &iser_conn->ib_conn; 263 struct iser_device *device = ib_conn->device; 264 265 iser_free_fastreg_pool(ib_conn); 266 267 rx_desc = iser_conn->rx_descs; 268 for (i = 0; i < iser_conn->qp_max_recv_dtos; i++, rx_desc++) 269 ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr, 270 ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE); 271 272 free(iser_conn->rx_descs, M_ISER_INITIATOR); 273 274 /* make sure we never redo any unmapping */ 275 iser_conn->rx_descs = NULL; 276 } 277 278 static void 279 iser_buf_to_sg(void *buf, struct iser_data_buf *data_buf) 280 { 281 struct scatterlist *sg; 282 int i; 283 size_t len, tlen; 284 int offset; 285 286 tlen = data_buf->data_len; 287 288 for (i = 0; 0 < tlen; i++, tlen -= len) { 289 sg = &data_buf->sgl[i]; 290 offset = ((uintptr_t)buf) & ~PAGE_MASK; 291 len = min(PAGE_SIZE - offset, tlen); 292 sg_set_buf(sg, buf, len); 293 buf = (void *)(((u64)buf) + (u64)len); 294 } 295 296 data_buf->size = i; 297 sg_mark_end(sg); 298 } 299 300 301 static void 302 iser_bio_to_sg(struct bio *bp, struct iser_data_buf *data_buf) 303 { 304 struct scatterlist *sg; 305 int i; 306 size_t len, tlen; 307 int offset; 308 309 tlen = bp->bio_bcount; 310 offset = bp->bio_ma_offset; 311 312 for (i = 0; 0 < tlen; i++, tlen -= len) { 313 sg = &data_buf->sgl[i]; 314 len = min(PAGE_SIZE - offset, tlen); 315 sg_set_page(sg, bp->bio_ma[i], len, offset); 316 offset = 0; 317 } 318 319 data_buf->size = i; 320 sg_mark_end(sg); 321 } 322 323 static int 324 iser_csio_to_sg(struct ccb_scsiio *csio, struct iser_data_buf *data_buf) 325 { 326 struct ccb_hdr *ccbh; 327 int err = 0; 328 329 ccbh = &csio->ccb_h; 330 switch ((ccbh->flags & CAM_DATA_MASK)) { 331 case CAM_DATA_BIO: 332 iser_bio_to_sg((struct bio *) csio->data_ptr, data_buf); 333 break; 334 case CAM_DATA_VADDR: 335 /* 336 * Support KVA buffers for various scsi commands such as: 337 * - REPORT_LUNS 338 * - MODE_SENSE_6 339 * - INQUIRY 340 * - SERVICE_ACTION_IN. 341 * The data of these commands always mapped into KVA. 342 */ 343 iser_buf_to_sg(csio->data_ptr, data_buf); 344 break; 345 default: 346 ISER_ERR("flags 0x%X unimplemented", ccbh->flags); 347 err = EINVAL; 348 } 349 return (err); 350 } 351 352 static inline bool 353 iser_signal_comp(u8 sig_count) 354 { 355 return ((sig_count % ISER_SIGNAL_CMD_COUNT) == 0); 356 } 357 358 int 359 iser_send_command(struct iser_conn *iser_conn, 360 struct icl_iser_pdu *iser_pdu) 361 { 362 struct iser_data_buf *data_buf; 363 struct iser_tx_desc *tx_desc = &iser_pdu->desc; 364 struct iscsi_bhs_scsi_command *hdr = (struct iscsi_bhs_scsi_command *) &(iser_pdu->desc.iscsi_header); 365 struct ccb_scsiio *csio = iser_pdu->csio; 366 int err = 0; 367 u8 sig_count = ++iser_conn->ib_conn.sig_count; 368 369 /* build the tx desc regd header and add it to the tx desc dto */ 370 tx_desc->type = ISCSI_TX_SCSI_COMMAND; 371 iser_create_send_desc(iser_conn, tx_desc); 372 373 if (hdr->bhssc_flags & BHSSC_FLAGS_R) { 374 data_buf = &iser_pdu->data[ISER_DIR_IN]; 375 } else { 376 data_buf = &iser_pdu->data[ISER_DIR_OUT]; 377 } 378 379 data_buf->sg = csio->data_ptr; 380 data_buf->data_len = csio->dxfer_len; 381 382 if (likely(csio->dxfer_len)) { 383 err = iser_csio_to_sg(csio, data_buf); 384 if (unlikely(err)) 385 goto send_command_error; 386 } 387 388 if (hdr->bhssc_flags & BHSSC_FLAGS_R) { 389 err = iser_prepare_read_cmd(iser_pdu); 390 if (err) 391 goto send_command_error; 392 } else if (hdr->bhssc_flags & BHSSC_FLAGS_W) { 393 err = iser_prepare_write_cmd(iser_pdu); 394 if (err) 395 goto send_command_error; 396 } 397 398 err = iser_post_send(&iser_conn->ib_conn, tx_desc, 399 iser_signal_comp(sig_count)); 400 if (!err) 401 return (0); 402 403 send_command_error: 404 ISER_ERR("iser_conn %p itt %u len %u err %d", iser_conn, 405 hdr->bhssc_initiator_task_tag, 406 hdr->bhssc_expected_data_transfer_length, 407 err); 408 return (err); 409 } 410 411 int 412 iser_send_control(struct iser_conn *iser_conn, 413 struct icl_iser_pdu *iser_pdu) 414 { 415 struct iser_tx_desc *mdesc; 416 struct iser_device *device; 417 size_t datalen = iser_pdu->icl_pdu.ip_data_len; 418 int err; 419 420 mdesc = &iser_pdu->desc; 421 422 /* build the tx desc regd header and add it to the tx desc dto */ 423 mdesc->type = ISCSI_TX_CONTROL; 424 iser_create_send_desc(iser_conn, mdesc); 425 426 device = iser_conn->ib_conn.device; 427 428 if (datalen > 0) { 429 struct ib_sge *tx_dsg = &mdesc->tx_sg[1]; 430 ib_dma_sync_single_for_cpu(device->ib_device, 431 iser_conn->login_req_dma, datalen, 432 DMA_TO_DEVICE); 433 434 ib_dma_sync_single_for_device(device->ib_device, 435 iser_conn->login_req_dma, datalen, 436 DMA_TO_DEVICE); 437 438 tx_dsg->addr = iser_conn->login_req_dma; 439 tx_dsg->length = datalen; 440 tx_dsg->lkey = device->mr->lkey; 441 mdesc->num_sge = 2; 442 } 443 444 /* For login phase and discovery session we re-use the login buffer */ 445 if (!iser_conn->handoff_done) { 446 err = iser_post_recvl(iser_conn); 447 if (err) 448 goto send_control_error; 449 } 450 451 err = iser_post_send(&iser_conn->ib_conn, mdesc, true); 452 if (!err) 453 return (0); 454 455 send_control_error: 456 ISER_ERR("conn %p failed err %d", iser_conn, err); 457 458 return (err); 459 460 } 461 462 /** 463 * iser_rcv_dto_completion - recv DTO completion 464 */ 465 void 466 iser_rcv_completion(struct iser_rx_desc *rx_desc, 467 unsigned long rx_xfer_len, 468 struct ib_conn *ib_conn) 469 { 470 struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn, 471 ib_conn); 472 struct icl_conn *ic = &iser_conn->icl_conn; 473 struct icl_pdu *response; 474 struct iscsi_bhs *hdr; 475 u64 rx_dma; 476 int rx_buflen; 477 int outstanding, count, err; 478 479 /* differentiate between login to all other PDUs */ 480 if ((char *)rx_desc == iser_conn->login_resp_buf) { 481 rx_dma = iser_conn->login_resp_dma; 482 rx_buflen = ISER_RX_LOGIN_SIZE; 483 } else { 484 rx_dma = rx_desc->dma_addr; 485 rx_buflen = ISER_RX_PAYLOAD_SIZE; 486 } 487 488 ib_dma_sync_single_for_cpu(ib_conn->device->ib_device, rx_dma, 489 rx_buflen, DMA_FROM_DEVICE); 490 491 hdr = &rx_desc->iscsi_header; 492 493 response = iser_new_pdu(ic, M_NOWAIT); 494 response->ip_bhs = hdr; 495 response->ip_data_len = rx_xfer_len - ISER_HEADERS_LEN; 496 497 /* 498 * In case we got data in the receive buffer, assign the ip_data_mbuf 499 * to the rx_buffer - later we'll copy it to upper layer buffers 500 */ 501 if (response->ip_data_len) 502 response->ip_data_mbuf = (struct mbuf *)(rx_desc->data); 503 504 ib_dma_sync_single_for_device(ib_conn->device->ib_device, rx_dma, 505 rx_buflen, DMA_FROM_DEVICE); 506 507 /* decrementing conn->post_recv_buf_count only --after-- freeing the * 508 * task eliminates the need to worry on tasks which are completed in * 509 * parallel to the execution of iser_conn_term. So the code that waits * 510 * for the posted rx bufs refcount to become zero handles everything */ 511 ib_conn->post_recv_buf_count--; 512 513 if (rx_dma == iser_conn->login_resp_dma) 514 goto receive; 515 516 outstanding = ib_conn->post_recv_buf_count; 517 if (outstanding + iser_conn->min_posted_rx <= iser_conn->qp_max_recv_dtos) { 518 count = min(iser_conn->qp_max_recv_dtos - outstanding, 519 iser_conn->min_posted_rx); 520 err = iser_post_recvm(iser_conn, count); 521 if (err) 522 ISER_ERR("posting %d rx bufs err %d", count, err); 523 } 524 525 receive: 526 (ic->ic_receive)(response); 527 } 528 529 void 530 iser_snd_completion(struct iser_tx_desc *tx_desc, 531 struct ib_conn *ib_conn) 532 { 533 struct icl_iser_pdu *iser_pdu = container_of(tx_desc, struct icl_iser_pdu, desc); 534 struct iser_conn *iser_conn = iser_pdu->iser_conn; 535 536 if (tx_desc && tx_desc->type == ISCSI_TX_CONTROL) 537 iser_pdu_free(&iser_conn->icl_conn, &iser_pdu->icl_pdu); 538 } 539