1 /* 2 * Copyright(c) 2016 Intel Corporation. 3 * 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 * redistributing this file, you may do so under either license. 6 * 7 * GPL LICENSE SUMMARY 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of version 2 of the GNU General Public License as 11 * published by the Free Software Foundation. 12 * 13 * This program is distributed in the hope that it will be useful, but 14 * WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * General Public License for more details. 17 * 18 * BSD LICENSE 19 * 20 * Redistribution and use in source and binary forms, with or without 21 * modification, are permitted provided that the following conditions 22 * are met: 23 * 24 * - Redistributions of source code must retain the above copyright 25 * notice, this list of conditions and the following disclaimer. 26 * - Redistributions in binary form must reproduce the above copyright 27 * notice, this list of conditions and the following disclaimer in 28 * the documentation and/or other materials provided with the 29 * distribution. 30 * - Neither the name of Intel Corporation nor the names of its 31 * contributors may be used to endorse or promote products derived 32 * from this software without specific prior written permission. 33 * 34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 * 46 */ 47 48 #include <linux/slab.h> 49 #include <linux/vmalloc.h> 50 #include <linux/kthread.h> 51 #include "cq.h" 52 #include "vt.h" 53 #include "trace.h" 54 55 /** 56 * rvt_cq_enter - add a new entry to the completion queue 57 * @cq: completion queue 58 * @entry: work completion entry to add 59 * @sig: true if @entry is solicited 60 * 61 * This may be called with qp->s_lock held. 62 */ 63 void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited) 64 { 65 struct rvt_cq_wc *wc; 66 unsigned long flags; 67 u32 head; 68 u32 next; 69 70 spin_lock_irqsave(&cq->lock, flags); 71 72 /* 73 * Note that the head pointer might be writable by user processes. 74 * Take care to verify it is a sane value. 75 */ 76 wc = cq->queue; 77 head = wc->head; 78 if (head >= (unsigned)cq->ibcq.cqe) { 79 head = cq->ibcq.cqe; 80 next = 0; 81 } else { 82 next = head + 1; 83 } 84 85 if (unlikely(next == wc->tail)) { 86 spin_unlock_irqrestore(&cq->lock, flags); 87 if (cq->ibcq.event_handler) { 88 struct ib_event ev; 89 90 ev.device = cq->ibcq.device; 91 ev.element.cq = &cq->ibcq; 92 ev.event = IB_EVENT_CQ_ERR; 93 cq->ibcq.event_handler(&ev, cq->ibcq.cq_context); 94 } 95 return; 96 } 97 trace_rvt_cq_enter(cq, entry, head); 98 if (cq->ip) { 99 wc->uqueue[head].wr_id = entry->wr_id; 100 wc->uqueue[head].status = entry->status; 101 wc->uqueue[head].opcode = entry->opcode; 102 wc->uqueue[head].vendor_err = entry->vendor_err; 103 wc->uqueue[head].byte_len = entry->byte_len; 104 wc->uqueue[head].ex.imm_data = 105 (__u32 __force)entry->ex.imm_data; 106 wc->uqueue[head].qp_num = entry->qp->qp_num; 107 wc->uqueue[head].src_qp = entry->src_qp; 108 wc->uqueue[head].wc_flags = entry->wc_flags; 109 wc->uqueue[head].pkey_index = entry->pkey_index; 110 wc->uqueue[head].slid = ib_lid_cpu16(entry->slid); 111 wc->uqueue[head].sl = entry->sl; 112 wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits; 113 wc->uqueue[head].port_num = entry->port_num; 114 /* Make sure entry is written before the head index. */ 115 smp_wmb(); 116 } else { 117 wc->kqueue[head] = *entry; 118 } 119 wc->head = next; 120 121 if (cq->notify == IB_CQ_NEXT_COMP || 122 (cq->notify == IB_CQ_SOLICITED && 123 (solicited || entry->status != IB_WC_SUCCESS))) { 124 /* 125 * This will cause send_complete() to be called in 126 * another thread. 127 */ 128 spin_lock(&cq->rdi->n_cqs_lock); 129 if (likely(cq->rdi->worker)) { 130 cq->notify = RVT_CQ_NONE; 131 cq->triggered++; 132 kthread_queue_work(cq->rdi->worker, &cq->comptask); 133 } 134 spin_unlock(&cq->rdi->n_cqs_lock); 135 } 136 137 spin_unlock_irqrestore(&cq->lock, flags); 138 } 139 EXPORT_SYMBOL(rvt_cq_enter); 140 141 static void send_complete(struct kthread_work *work) 142 { 143 struct rvt_cq *cq = container_of(work, struct rvt_cq, comptask); 144 145 /* 146 * The completion handler will most likely rearm the notification 147 * and poll for all pending entries. If a new completion entry 148 * is added while we are in this routine, queue_work() 149 * won't call us again until we return so we check triggered to 150 * see if we need to call the handler again. 151 */ 152 for (;;) { 153 u8 triggered = cq->triggered; 154 155 /* 156 * IPoIB connected mode assumes the callback is from a 157 * soft IRQ. We simulate this by blocking "bottom halves". 158 * See the implementation for ipoib_cm_handle_tx_wc(), 159 * netif_tx_lock_bh() and netif_tx_lock(). 160 */ 161 local_bh_disable(); 162 cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); 163 local_bh_enable(); 164 165 if (cq->triggered == triggered) 166 return; 167 } 168 } 169 170 /** 171 * rvt_create_cq - create a completion queue 172 * @ibdev: the device this completion queue is attached to 173 * @attr: creation attributes 174 * @context: unused by the QLogic_IB driver 175 * @udata: user data for libibverbs.so 176 * 177 * Called by ib_create_cq() in the generic verbs code. 178 * 179 * Return: pointer to the completion queue or negative errno values 180 * for failure. 181 */ 182 struct ib_cq *rvt_create_cq(struct ib_device *ibdev, 183 const struct ib_cq_init_attr *attr, 184 struct ib_ucontext *context, 185 struct ib_udata *udata) 186 { 187 struct rvt_dev_info *rdi = ib_to_rvt(ibdev); 188 struct rvt_cq *cq; 189 struct rvt_cq_wc *wc; 190 struct ib_cq *ret; 191 u32 sz; 192 unsigned int entries = attr->cqe; 193 194 if (attr->flags) 195 return ERR_PTR(-EINVAL); 196 197 if (entries < 1 || entries > rdi->dparms.props.max_cqe) 198 return ERR_PTR(-EINVAL); 199 200 /* Allocate the completion queue structure. */ 201 cq = kzalloc(sizeof(*cq), GFP_KERNEL); 202 if (!cq) 203 return ERR_PTR(-ENOMEM); 204 205 /* 206 * Allocate the completion queue entries and head/tail pointers. 207 * This is allocated separately so that it can be resized and 208 * also mapped into user space. 209 * We need to use vmalloc() in order to support mmap and large 210 * numbers of entries. 211 */ 212 sz = sizeof(*wc); 213 if (udata && udata->outlen >= sizeof(__u64)) 214 sz += sizeof(struct ib_uverbs_wc) * (entries + 1); 215 else 216 sz += sizeof(struct ib_wc) * (entries + 1); 217 wc = vmalloc_user(sz); 218 if (!wc) { 219 ret = ERR_PTR(-ENOMEM); 220 goto bail_cq; 221 } 222 223 /* 224 * Return the address of the WC as the offset to mmap. 225 * See rvt_mmap() for details. 226 */ 227 if (udata && udata->outlen >= sizeof(__u64)) { 228 int err; 229 230 cq->ip = rvt_create_mmap_info(rdi, sz, context, wc); 231 if (!cq->ip) { 232 ret = ERR_PTR(-ENOMEM); 233 goto bail_wc; 234 } 235 236 err = ib_copy_to_udata(udata, &cq->ip->offset, 237 sizeof(cq->ip->offset)); 238 if (err) { 239 ret = ERR_PTR(err); 240 goto bail_ip; 241 } 242 } 243 244 spin_lock_irq(&rdi->n_cqs_lock); 245 if (rdi->n_cqs_allocated == rdi->dparms.props.max_cq) { 246 spin_unlock_irq(&rdi->n_cqs_lock); 247 ret = ERR_PTR(-ENOMEM); 248 goto bail_ip; 249 } 250 251 rdi->n_cqs_allocated++; 252 spin_unlock_irq(&rdi->n_cqs_lock); 253 254 if (cq->ip) { 255 spin_lock_irq(&rdi->pending_lock); 256 list_add(&cq->ip->pending_mmaps, &rdi->pending_mmaps); 257 spin_unlock_irq(&rdi->pending_lock); 258 } 259 260 /* 261 * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe. 262 * The number of entries should be >= the number requested or return 263 * an error. 264 */ 265 cq->rdi = rdi; 266 cq->ibcq.cqe = entries; 267 cq->notify = RVT_CQ_NONE; 268 spin_lock_init(&cq->lock); 269 kthread_init_work(&cq->comptask, send_complete); 270 cq->queue = wc; 271 272 ret = &cq->ibcq; 273 274 goto done; 275 276 bail_ip: 277 kfree(cq->ip); 278 bail_wc: 279 vfree(wc); 280 bail_cq: 281 kfree(cq); 282 done: 283 return ret; 284 } 285 286 /** 287 * rvt_destroy_cq - destroy a completion queue 288 * @ibcq: the completion queue to destroy. 289 * 290 * Called by ib_destroy_cq() in the generic verbs code. 291 * 292 * Return: always 0 293 */ 294 int rvt_destroy_cq(struct ib_cq *ibcq) 295 { 296 struct rvt_cq *cq = ibcq_to_rvtcq(ibcq); 297 struct rvt_dev_info *rdi = cq->rdi; 298 299 kthread_flush_work(&cq->comptask); 300 spin_lock_irq(&rdi->n_cqs_lock); 301 rdi->n_cqs_allocated--; 302 spin_unlock_irq(&rdi->n_cqs_lock); 303 if (cq->ip) 304 kref_put(&cq->ip->ref, rvt_release_mmap_info); 305 else 306 vfree(cq->queue); 307 kfree(cq); 308 309 return 0; 310 } 311 312 /** 313 * rvt_req_notify_cq - change the notification type for a completion queue 314 * @ibcq: the completion queue 315 * @notify_flags: the type of notification to request 316 * 317 * This may be called from interrupt context. Also called by 318 * ib_req_notify_cq() in the generic verbs code. 319 * 320 * Return: 0 for success. 321 */ 322 int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags) 323 { 324 struct rvt_cq *cq = ibcq_to_rvtcq(ibcq); 325 unsigned long flags; 326 int ret = 0; 327 328 spin_lock_irqsave(&cq->lock, flags); 329 /* 330 * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow 331 * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2). 332 */ 333 if (cq->notify != IB_CQ_NEXT_COMP) 334 cq->notify = notify_flags & IB_CQ_SOLICITED_MASK; 335 336 if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) && 337 cq->queue->head != cq->queue->tail) 338 ret = 1; 339 340 spin_unlock_irqrestore(&cq->lock, flags); 341 342 return ret; 343 } 344 345 /** 346 * rvt_resize_cq - change the size of the CQ 347 * @ibcq: the completion queue 348 * 349 * Return: 0 for success. 350 */ 351 int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) 352 { 353 struct rvt_cq *cq = ibcq_to_rvtcq(ibcq); 354 struct rvt_cq_wc *old_wc; 355 struct rvt_cq_wc *wc; 356 u32 head, tail, n; 357 int ret; 358 u32 sz; 359 struct rvt_dev_info *rdi = cq->rdi; 360 361 if (cqe < 1 || cqe > rdi->dparms.props.max_cqe) 362 return -EINVAL; 363 364 /* 365 * Need to use vmalloc() if we want to support large #s of entries. 366 */ 367 sz = sizeof(*wc); 368 if (udata && udata->outlen >= sizeof(__u64)) 369 sz += sizeof(struct ib_uverbs_wc) * (cqe + 1); 370 else 371 sz += sizeof(struct ib_wc) * (cqe + 1); 372 wc = vmalloc_user(sz); 373 if (!wc) 374 return -ENOMEM; 375 376 /* Check that we can write the offset to mmap. */ 377 if (udata && udata->outlen >= sizeof(__u64)) { 378 __u64 offset = 0; 379 380 ret = ib_copy_to_udata(udata, &offset, sizeof(offset)); 381 if (ret) 382 goto bail_free; 383 } 384 385 spin_lock_irq(&cq->lock); 386 /* 387 * Make sure head and tail are sane since they 388 * might be user writable. 389 */ 390 old_wc = cq->queue; 391 head = old_wc->head; 392 if (head > (u32)cq->ibcq.cqe) 393 head = (u32)cq->ibcq.cqe; 394 tail = old_wc->tail; 395 if (tail > (u32)cq->ibcq.cqe) 396 tail = (u32)cq->ibcq.cqe; 397 if (head < tail) 398 n = cq->ibcq.cqe + 1 + head - tail; 399 else 400 n = head - tail; 401 if (unlikely((u32)cqe < n)) { 402 ret = -EINVAL; 403 goto bail_unlock; 404 } 405 for (n = 0; tail != head; n++) { 406 if (cq->ip) 407 wc->uqueue[n] = old_wc->uqueue[tail]; 408 else 409 wc->kqueue[n] = old_wc->kqueue[tail]; 410 if (tail == (u32)cq->ibcq.cqe) 411 tail = 0; 412 else 413 tail++; 414 } 415 cq->ibcq.cqe = cqe; 416 wc->head = n; 417 wc->tail = 0; 418 cq->queue = wc; 419 spin_unlock_irq(&cq->lock); 420 421 vfree(old_wc); 422 423 if (cq->ip) { 424 struct rvt_mmap_info *ip = cq->ip; 425 426 rvt_update_mmap_info(rdi, ip, sz, wc); 427 428 /* 429 * Return the offset to mmap. 430 * See rvt_mmap() for details. 431 */ 432 if (udata && udata->outlen >= sizeof(__u64)) { 433 ret = ib_copy_to_udata(udata, &ip->offset, 434 sizeof(ip->offset)); 435 if (ret) 436 return ret; 437 } 438 439 spin_lock_irq(&rdi->pending_lock); 440 if (list_empty(&ip->pending_mmaps)) 441 list_add(&ip->pending_mmaps, &rdi->pending_mmaps); 442 spin_unlock_irq(&rdi->pending_lock); 443 } 444 445 return 0; 446 447 bail_unlock: 448 spin_unlock_irq(&cq->lock); 449 bail_free: 450 vfree(wc); 451 return ret; 452 } 453 454 /** 455 * rvt_poll_cq - poll for work completion entries 456 * @ibcq: the completion queue to poll 457 * @num_entries: the maximum number of entries to return 458 * @entry: pointer to array where work completions are placed 459 * 460 * This may be called from interrupt context. Also called by ib_poll_cq() 461 * in the generic verbs code. 462 * 463 * Return: the number of completion entries polled. 464 */ 465 int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) 466 { 467 struct rvt_cq *cq = ibcq_to_rvtcq(ibcq); 468 struct rvt_cq_wc *wc; 469 unsigned long flags; 470 int npolled; 471 u32 tail; 472 473 /* The kernel can only poll a kernel completion queue */ 474 if (cq->ip) 475 return -EINVAL; 476 477 spin_lock_irqsave(&cq->lock, flags); 478 479 wc = cq->queue; 480 tail = wc->tail; 481 if (tail > (u32)cq->ibcq.cqe) 482 tail = (u32)cq->ibcq.cqe; 483 for (npolled = 0; npolled < num_entries; ++npolled, ++entry) { 484 if (tail == wc->head) 485 break; 486 /* The kernel doesn't need a RMB since it has the lock. */ 487 trace_rvt_cq_poll(cq, &wc->kqueue[tail], npolled); 488 *entry = wc->kqueue[tail]; 489 if (tail >= cq->ibcq.cqe) 490 tail = 0; 491 else 492 tail++; 493 } 494 wc->tail = tail; 495 496 spin_unlock_irqrestore(&cq->lock, flags); 497 498 return npolled; 499 } 500 501 /** 502 * rvt_driver_cq_init - Init cq resources on behalf of driver 503 * @rdi: rvt dev structure 504 * 505 * Return: 0 on success 506 */ 507 int rvt_driver_cq_init(struct rvt_dev_info *rdi) 508 { 509 int cpu; 510 struct kthread_worker *worker; 511 512 if (rdi->worker) 513 return 0; 514 515 spin_lock_init(&rdi->n_cqs_lock); 516 517 cpu = cpumask_first(cpumask_of_node(rdi->dparms.node)); 518 worker = kthread_create_worker_on_cpu(cpu, 0, 519 "%s", rdi->dparms.cq_name); 520 if (IS_ERR(worker)) 521 return PTR_ERR(worker); 522 523 set_user_nice(worker->task, MIN_NICE); 524 rdi->worker = worker; 525 return 0; 526 } 527 528 /** 529 * rvt_cq_exit - tear down cq reources 530 * @rdi: rvt dev structure 531 */ 532 void rvt_cq_exit(struct rvt_dev_info *rdi) 533 { 534 struct kthread_worker *worker; 535 536 /* block future queuing from send_complete() */ 537 spin_lock_irq(&rdi->n_cqs_lock); 538 worker = rdi->worker; 539 if (!worker) { 540 spin_unlock_irq(&rdi->n_cqs_lock); 541 return; 542 } 543 rdi->worker = NULL; 544 spin_unlock_irq(&rdi->n_cqs_lock); 545 546 kthread_destroy_worker(worker); 547 } 548