xref: /illumos-gate/usr/src/lib/udapl/udapl_tavor/tavor/dapl_tavor_wr.c (revision 9e39c5ba00a55fa05777cc94b148296af305e135)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include "dapl.h"
28 #include "dapl_tavor_wr.h"
29 #include "dapl_hash.h"
30 #include "dapl_tavor_ibtf_impl.h"
31 
32 static dapls_tavor_wrid_entry_t *dapli_tavor_wrid_find_match(
33 	dapls_tavor_workq_hdr_t *, tavor_hw_cqe_t *);
34 static dapls_tavor_wrid_list_hdr_t *dapli_tavor_wrid_get_list(uint32_t, int);
35 static void dapli_tavor_wrid_reaplist_add(ib_cq_handle_t,
36     dapls_tavor_workq_hdr_t *);
37 static dapls_tavor_workq_hdr_t *dapli_tavor_wrid_wqhdr_find(ib_cq_handle_t,
38     uint_t, uint_t);
39 static uint32_t dapli_tavor_wrid_get_wqeaddrsz(dapls_tavor_workq_hdr_t *);
40 static dapls_tavor_workq_hdr_t *dapli_tavor_wrid_list_reap(
41 	dapls_tavor_wrid_list_hdr_t *);
42 static dapls_tavor_workq_hdr_t *dapli_tavor_wrid_wqhdr_create(ib_cq_handle_t,
43     uint_t, uint_t, uint_t);
44 static void dapli_tavor_wrid_wqhdr_add(dapls_tavor_workq_hdr_t *,
45     dapls_tavor_wrid_list_hdr_t *);
46 static void dapli_tavor_wrid_wqhdr_remove(dapls_tavor_workq_hdr_t *,
47     dapls_tavor_wrid_list_hdr_t *);
48 static void dapli_tavor_wrid_wqhdr_lock_both(ib_qp_handle_t);
49 static void dapli_tavor_wrid_wqhdr_unlock_both(ib_qp_handle_t);
50 static DAT_RETURN dapli_tavor_cq_wqhdr_add(ib_cq_handle_t,
51     dapls_tavor_workq_hdr_t *);
52 static void dapli_tavor_cq_wqhdr_remove(ib_cq_handle_t,
53     dapls_tavor_workq_hdr_t *);
54 
55 /*
56  * dapls_tavor_wrid_get_entry()
57  */
58 uint64_t
dapls_tavor_wrid_get_entry(ib_cq_handle_t cq,tavor_hw_cqe_t * cqe,uint_t send_or_recv,uint_t error,dapls_tavor_wrid_entry_t * wre)59 dapls_tavor_wrid_get_entry(ib_cq_handle_t cq, tavor_hw_cqe_t *cqe,
60     uint_t send_or_recv, uint_t error, dapls_tavor_wrid_entry_t *wre)
61 {
62 	dapls_tavor_workq_hdr_t	*wq;
63 	dapls_tavor_wrid_entry_t	*wre_tmp;
64 	uint64_t		wrid;
65 	uint_t			qpnum;
66 
67 	/* Lock the list of work queues associated with this CQ */
68 	dapl_os_lock(&cq->cq_wrid_wqhdr_lock);
69 
70 	/* Find the work queue for this QP number (send or receive side) */
71 	qpnum = TAVOR_CQE_QPNUM_GET(cqe);
72 	wq = dapli_tavor_wrid_wqhdr_find(cq, qpnum, send_or_recv);
73 
74 	dapl_os_assert(wq != NULL);
75 
76 	/*
77 	 * Regardless of whether the completion is the result of a "success"
78 	 * or a "failure", we lock the list of "containers" and attempt to
79 	 * search for the the first matching completion (i.e. the first WR
80 	 * with a matching WQE addr and size).  Once we find it, we pull out
81 	 * the "wrid" field and return it (see below).  Note: One possible
82 	 * future enhancement would be to enable this routine to skip over
83 	 * any "unsignaled" completions to go directly to the next "signaled"
84 	 * entry on success. XXX
85 	 */
86 	dapl_os_lock(&wq->wq_wrid_lock->wrl_lock);
87 	wre_tmp = dapli_tavor_wrid_find_match(wq, cqe);
88 
89 	/*
90 	 * If this is a "successful" completion, then we assert that this
91 	 * completion must be a "signaled" completion.
92 	 */
93 	dapl_os_assert(error || (wre_tmp->wr_signaled_dbd &
94 	    TAVOR_WRID_ENTRY_SIGNALED));
95 
96 	/*
97 	 * If the completion is a "failed" completion, then we save away the
98 	 * contents of the entry (into the "wre" field passed in) for use
99 	 * in later CQE processing. Note: We use the
100 	 * dapli_tavor_wrid_get_wqeaddrsz() function to grab "wqeaddrsz" from
101 	 * the next entry in the container.
102 	 * This is required for error processing (where updating these fields
103 	 * properly is necessary to correct handling of the "error" CQE)
104 	 */
105 	if (error && (wre != NULL)) {
106 		*wre = *wre_tmp;
107 		wre->wr_wqeaddrsz = dapli_tavor_wrid_get_wqeaddrsz(wq);
108 	}
109 
110 	/* Pull out the WRID and return it */
111 	wrid = wre_tmp->wr_wrid;
112 
113 	dapl_os_unlock(&wq->wq_wrid_lock->wrl_lock);
114 	dapl_os_unlock(&cq->cq_wrid_wqhdr_lock);
115 
116 	return (wrid);
117 }
118 
119 
120 /*
121  * dapli_tavor_wrid_find_match()
122  */
123 static dapls_tavor_wrid_entry_t *
dapli_tavor_wrid_find_match(dapls_tavor_workq_hdr_t * wq,tavor_hw_cqe_t * cqe)124 dapli_tavor_wrid_find_match(dapls_tavor_workq_hdr_t *wq, tavor_hw_cqe_t *cqe)
125 {
126 	dapls_tavor_wrid_entry_t	*curr = NULL;
127 	dapls_tavor_wrid_list_hdr_t	*container;
128 	uint32_t		wqeaddr_size;
129 	uint32_t		head, tail, size;
130 	int			found = 0, last_container;
131 
132 	/* dapl_os_assert(MUTEX_HELD(&wq->wq_wrid_lock)); */
133 
134 	/* Pull the "wqeaddrsz" information from the CQE */
135 	wqeaddr_size = TAVOR_CQE_WQEADDRSZ_GET(cqe);
136 
137 	/*
138 	 * Walk the "containers" list(s), find first WR with a matching WQE
139 	 * addr.  If the current "container" is not the last one on the list,
140 	 * i.e. not the current one to which we are posting new WRID entries,
141 	 * then we do not attempt to update the "q_head", "q_tail", and
142 	 * "q_full" indicators on the main work queue header.  We do, however,
143 	 * update the "head" and "full" indicators on the individual containers
144 	 * as we go.  This is imperative because we need to be able to
145 	 * determine when the current container has been emptied (so that we
146 	 * can move on to the next container).
147 	 */
148 	container = wq->wq_wrid_poll;
149 	while (container != NULL) {
150 
151 		/* Is this the last/only "container" on the list */
152 		last_container = (container != wq->wq_wrid_post) ? 0 : 1;
153 
154 		/*
155 		 * First check if we are on an SRQ.  If so, we grab the entry
156 		 * and break out.  Since SRQ wridlist's are never added to
157 		 * reaplist, they can only be the last container.
158 		 */
159 		if (container->wl_srq_en) {
160 			dapl_os_assert(last_container == 1);
161 			curr = dapli_tavor_wrid_find_match_srq(container, cqe);
162 			break;
163 		}
164 
165 		/*
166 		 * Grab the current "head", "tail" and "size" fields before
167 		 * walking the list in the current container. Note: the "size"
168 		 * field here must always be a power-of-2.  The "full"
169 		 * parameter is checked (and updated) here to distinguish the
170 		 * "queue full" condition from "queue empty".
171 		 */
172 		head = container->wl_head;
173 		tail = container->wl_tail;
174 		size = container->wl_size;
175 		while ((head != tail) || (container->wl_full)) {
176 			container->wl_full = 0;
177 			curr = &container->wl_wre[head];
178 			head = ((head + 1) & (size - 1));
179 			/*
180 			 * If the current entry's "wqeaddrsz" matches the one
181 			 * we're searching for, then this must correspond to
182 			 * the work request that caused the completion.  Set
183 			 * the "found" flag and bail out.
184 			 */
185 			if (curr->wr_wqeaddrsz == wqeaddr_size) {
186 				found = 1;
187 				break;
188 			}
189 		}
190 
191 		/*
192 		 * If the current container is empty (having reached here the
193 		 * "head == tail" condition can only mean that the container
194 		 * is empty), then NULL out the "wrid_old_tail" field (see
195 		 * tavor_post_send() and tavor_post_recv() for more details)
196 		 * and (potentially) remove the current container from future
197 		 * searches.
198 		 */
199 		if (head == tail) {
200 			container->wl_wre_old_tail = NULL;
201 			/*
202 			 * If this wasn't the last "container" on the chain,
203 			 * i.e. the one to which new WRID entries will be
204 			 * added, then remove it from the list.
205 			 * Note: we don't "lose" the memory pointed to by this
206 			 * because we should have already put this container
207 			 * on the "reapable" list (from where it will later be
208 			 * pulled).
209 			 */
210 			if (!last_container) {
211 				wq->wq_wrid_poll = container->wl_next;
212 			}
213 		}
214 
215 		/* Update the head index for the container */
216 		container->wl_head = head;
217 
218 		/*
219 		 * If the entry was found in this container, then continue to
220 		 * bail out.  Else reset the "curr" pointer and move on to the
221 		 * next container (if there is one).  Note: the only real
222 		 * reason for setting "curr = NULL" here is so that the ASSERT
223 		 * below can catch the case where no matching entry was found
224 		 * on any of the lists.
225 		 */
226 		if (found) {
227 			break;
228 		} else {
229 			curr = NULL;
230 			container = container->wl_next;
231 		}
232 	}
233 
234 	/*
235 	 * Update work queue header's "head" and "full" conditions to match
236 	 * the last entry on the container list.  (Note: Only if we're pulling
237 	 * entries from the last work queue portion of the list, i.e. not from
238 	 * the previous portions that may be the "reapable" list.)
239 	 */
240 	if (last_container) {
241 		wq->wq_head = wq->wq_wrid_post->wl_head;
242 		wq->wq_full = wq->wq_wrid_post->wl_full;
243 	}
244 
245 	/* Ensure that we've actually found what we were searching for */
246 	dapl_os_assert(curr != NULL);
247 
248 	return (curr);
249 }
250 
251 /*
252  * tavor_wrid_find_match_srq()
253  *    Context: Can be called from interrupt or base context.
254  */
255 dapls_tavor_wrid_entry_t *
dapli_tavor_wrid_find_match_srq(dapls_tavor_wrid_list_hdr_t * wl,tavor_hw_cqe_t * cqe)256 dapli_tavor_wrid_find_match_srq(dapls_tavor_wrid_list_hdr_t *wl,
257     tavor_hw_cqe_t *cqe)
258 {
259 	dapls_tavor_wrid_entry_t	*wre;
260 	uint32_t		wqe_index;
261 	uint32_t		wqe_addr;
262 	uint32_t		qsize_msk;
263 	uint32_t		tail, next_tail;
264 
265 	/* Grab the WQE addr out of the CQE */
266 	wqe_addr = TAVOR_CQE_WQEADDRSZ_GET(cqe) & 0xFFFFFFC0;
267 
268 	/*
269 	 * Given the 'wqe_addr' just calculated and the srq buf address, we
270 	 * find the 'wqe_index'.  The 'wre' returned below contains the WRID
271 	 * that we are looking for.  This indexes into the wre_list for this
272 	 * specific WQE.
273 	 */
274 	wqe_index = TAVOR_SRQ_WQ_INDEX(wl->wl_srq_desc_addr, wqe_addr,
275 	    wl->wl_srq_wqesz);
276 
277 	/* ASSERT on impossible wqe_index values */
278 	dapl_os_assert(wqe_index < wl->wl_size);
279 
280 	/* Put this WQE back on the free list */
281 
282 	qsize_msk = wl->wl_size - 1;
283 	tail	  = wl->wl_freel_tail;
284 
285 	next_tail = (tail + 1) & qsize_msk;
286 	wl->wl_freel_entries++;
287 
288 	dapl_os_assert(wl->wl_freel_entries <= wl->wl_size);
289 
290 	/* Get the descriptor (IO Address) of the WQE to be built */
291 	wl->wl_free_list[tail] = wqe_addr;
292 	wl->wl_freel_tail = next_tail;
293 	/* Using the index, return the Work Request ID Entry (wre) */
294 	wre = &wl->wl_wre[wqe_index];
295 
296 	return (wre);
297 }
298 
299 /*
300  * dapls_tavor_wrid_cq_reap()
301  */
302 void
dapls_tavor_wrid_cq_reap(ib_cq_handle_t cq)303 dapls_tavor_wrid_cq_reap(ib_cq_handle_t cq)
304 {
305 	dapls_tavor_workq_hdr_t	*consume_wqhdr;
306 	dapls_tavor_wrid_list_hdr_t	*container, *to_free;
307 
308 
309 	/* dapl_os_assert(MUTEX_HELD(&cq->cq_lock)); */
310 
311 	/* Lock the list of work queues associated with this CQ */
312 	dapl_os_lock(&cq->cq_wrid_wqhdr_lock);
313 
314 	/* Walk the "reapable" list and free up containers */
315 	container = cq->cq_wrid_reap_head;
316 	while (container != NULL) {
317 		to_free	  = container;
318 		container = container->wl_reap_next;
319 		/*
320 		 * If reaping the WRID list containers pulls the last
321 		 * container from the given work queue header, then we free
322 		 * the work queue header as well.
323 		 */
324 		consume_wqhdr = dapli_tavor_wrid_list_reap(to_free);
325 		if (consume_wqhdr != NULL) {
326 			dapli_tavor_cq_wqhdr_remove(cq, consume_wqhdr);
327 		}
328 	}
329 
330 	/* Once finished reaping, we reset the CQ's reap list */
331 	cq->cq_wrid_reap_head = cq->cq_wrid_reap_tail = NULL;
332 
333 	dapl_os_unlock(&cq->cq_wrid_wqhdr_lock);
334 }
335 
336 
337 /*
338  * dapls_tavor_wrid_cq_force_reap()
339  */
340 void
dapls_tavor_wrid_cq_force_reap(ib_cq_handle_t cq)341 dapls_tavor_wrid_cq_force_reap(ib_cq_handle_t cq)
342 {
343 	DAPL_HASH_DATA		curr;
344 	DAT_RETURN		retval;
345 	dapls_tavor_workq_hdr_t		*to_free_wqhdr;
346 	dapls_tavor_wrid_list_hdr_t	*container, *to_free;
347 
348 	/* dapl_os_assert(MUTEX_HELD(&cq->cq_lock)); */
349 
350 	/*
351 	 * The first step is to walk the "reapable" list and free up those
352 	 * containers.  This is necessary because the containers on the
353 	 * reapable list are not otherwise connected to the work queue headers
354 	 * anymore.
355 	 */
356 	dapls_tavor_wrid_cq_reap(cq);
357 
358 	/* Now lock the list of work queues associated with this CQ */
359 	dapl_os_lock(&cq->cq_wrid_wqhdr_lock);
360 
361 	/*
362 	 * Walk the list of work queue headers and free up all the WRID list
363 	 * containers chained to it.  Note: We don't need to grab the locks
364 	 * for each of the individual WRID lists here because the only way
365 	 * things can be added or removed from the list at this point would be
366 	 * through post a work request to a QP.  But if we've come this far,
367 	 * then we can be assured that there are no longer any QP associated
368 	 * with the CQ that we are trying to free.
369 	 */
370 	retval = dapls_hash_iterate(cq->cq_wrid_wqhdr_list,
371 	    DAPL_HASH_ITERATE_INIT, &curr);
372 	dapl_os_assert(retval == DAT_SUCCESS);
373 
374 	while (curr != NULL) {
375 		to_free_wqhdr = (dapls_tavor_workq_hdr_t *)curr;
376 		container = ((dapls_tavor_workq_hdr_t *)curr)->wq_wrid_poll;
377 		retval = dapls_hash_iterate(cq->cq_wrid_wqhdr_list,
378 		    DAPL_HASH_ITERATE_NEXT, &curr);
379 		dapl_os_assert(retval == DAT_SUCCESS);
380 		while (container != NULL) {
381 			to_free	  = container;
382 			container = container->wl_next;
383 			/*
384 			 * If reaping the WRID list containers pulls the last
385 			 * container from the given work queue header, then
386 			 * we free the work queue header as well.  Note: we
387 			 * ignore the return value because we know that the
388 			 * work queue header should always be freed once the
389 			 * list of containers has come to an end.
390 			 */
391 			(void) dapli_tavor_wrid_list_reap(to_free);
392 			if (container == NULL) {
393 				dapli_tavor_cq_wqhdr_remove(cq, to_free_wqhdr);
394 			}
395 		}
396 	}
397 
398 	dapl_os_lock(&cq->cq_wrid_wqhdr_lock);
399 }
400 
401 
402 /*
403  * dapli_tavor_wrid_get_list()
404  */
405 static dapls_tavor_wrid_list_hdr_t *
dapli_tavor_wrid_get_list(uint32_t qsize,int wrid_for_srq)406 dapli_tavor_wrid_get_list(uint32_t qsize, int wrid_for_srq)
407 {
408 	dapls_tavor_wrid_list_hdr_t	*wridlist;
409 	dapls_tavor_wrid_entry_t	*wl_wre;
410 	uint32_t			*wl_freel;
411 	uint32_t			size;
412 	uint32_t			wl_wre_size;
413 	uint32_t			wl_freel_size;
414 
415 	wridlist = NULL;
416 	wl_wre = NULL;
417 	wl_freel = NULL;
418 	size = wl_wre_size = wl_freel_size = 0;
419 	/*
420 	 * The WRID list "container" consists of the dapls_tavor_wrid_list_hdr_t
421 	 * which holds the pointers necessary for maintaining the "reapable"
422 	 * list, chaining together multiple "containers" old and new, and
423 	 * tracking the head, tail, size, etc. for each container.  The
424 	 * "container" also holds all the tavor_wrid_entry_t's, one for
425 	 * each entry on the corresponding work queue.
426 	 */
427 
428 	/*
429 	 * For wridlist associated with SRQs the wridlock needs to be
430 	 * allocated and initialized here.
431 	 */
432 	size = sizeof (dapls_tavor_wrid_list_hdr_t);
433 	if (wrid_for_srq) {
434 		size = size + sizeof (dapls_tavor_wrid_lock_t);
435 	}
436 	wridlist = dapl_os_alloc(size);
437 	if (wridlist == NULL) {
438 		goto bail;
439 	}
440 	if (wrid_for_srq) {
441 		wridlist->wl_lock = (dapls_tavor_wrid_lock_t *)(
442 		    (uintptr_t)wridlist + sizeof (dapls_tavor_wrid_list_hdr_t));
443 		dapl_os_lock_init(&wridlist->wl_lock->wrl_lock);
444 		wridlist->wl_lock->wrl_on_srq = wrid_for_srq;
445 	} else {
446 		wridlist->wl_lock = NULL;
447 	}
448 	wl_wre_size = qsize * sizeof (dapls_tavor_wrid_entry_t);
449 	wl_wre = dapl_os_alloc(wl_wre_size);
450 	if (wl_wre == NULL) {
451 		goto bail;
452 	}
453 	if (wrid_for_srq) { /* memory for the SRQ free list */
454 		wl_freel_size = qsize * sizeof (uint32_t);
455 		wl_freel = dapl_os_alloc(wl_freel_size);
456 		if (wl_freel == NULL) {
457 			goto bail;
458 		}
459 	}
460 
461 
462 	/* Complete the "container" initialization */
463 	wridlist->wl_size = qsize;
464 	wridlist->wl_full = 0;
465 	wridlist->wl_head = 0;
466 	wridlist->wl_tail = 0;
467 	wridlist->wl_wre = wl_wre;
468 	wridlist->wl_wre_old_tail  = NULL;
469 	wridlist->wl_reap_next = NULL;
470 	wridlist->wl_next  = NULL;
471 	wridlist->wl_prev  = NULL;
472 	if (wrid_for_srq) {
473 		wridlist->wl_srq_en = 1;
474 		wridlist->wl_free_list = (uint32_t *)wl_freel;
475 		wridlist->wl_freel_head = 0;
476 		wridlist->wl_freel_tail = 0;
477 		wridlist->wl_freel_entries = qsize;
478 	} else {
479 		wridlist->wl_srq_en = 0;
480 		wridlist->wl_free_list = NULL;
481 		wridlist->wl_freel_head = 0;
482 		wridlist->wl_freel_tail = 0;
483 		wridlist->wl_freel_entries = 0;
484 		wridlist->wl_srq_wqesz = 0;
485 		wridlist->wl_srq_desc_addr = 0;
486 	}
487 	return (wridlist);
488 bail:
489 	if (wridlist) {
490 		if (wrid_for_srq) {
491 			dapl_os_lock_destroy(&wridlist->wl_lock->wrl_lock);
492 		}
493 		dapl_os_free(wridlist, size);
494 	}
495 	if (wl_wre) {
496 		dapl_os_free(wl_wre, wl_wre_size);
497 	}
498 	if (wl_freel) {
499 		dapl_os_free(wl_freel, wl_freel_size);
500 	}
501 	return (NULL);
502 }
503 
504 
505 /*
506  * dapli_tavor_wrid_reaplist_add()
507  */
508 static void
dapli_tavor_wrid_reaplist_add(ib_cq_handle_t cq,dapls_tavor_workq_hdr_t * wq)509 dapli_tavor_wrid_reaplist_add(ib_cq_handle_t cq, dapls_tavor_workq_hdr_t *wq)
510 {
511 	/* dapl_os_assert(MUTEX_HELD(&cq->cq_wrid_wqhdr_lock)); */
512 
513 	dapl_os_lock(&wq->wq_wrid_lock->wrl_lock);
514 
515 	/*
516 	 * Add the "post" container (the last one on the current chain) to
517 	 * the CQ's "reapable" list
518 	 */
519 	if ((cq->cq_wrid_reap_head == NULL) &&
520 	    (cq->cq_wrid_reap_tail == NULL)) {
521 		cq->cq_wrid_reap_head = wq->wq_wrid_post;
522 		cq->cq_wrid_reap_tail = wq->wq_wrid_post;
523 	} else {
524 		cq->cq_wrid_reap_tail->wl_reap_next = wq->wq_wrid_post;
525 		cq->cq_wrid_reap_tail = wq->wq_wrid_post;
526 	}
527 
528 	dapl_os_unlock(&wq->wq_wrid_lock->wrl_lock);
529 }
530 
531 
532 /*
533  * dapli_tavor_wrid_wqhdr_find()
534  */
535 static dapls_tavor_workq_hdr_t *
dapli_tavor_wrid_wqhdr_find(ib_cq_handle_t cq,uint_t qpn,uint_t send_or_recv)536 dapli_tavor_wrid_wqhdr_find(ib_cq_handle_t cq, uint_t qpn, uint_t send_or_recv)
537 {
538 	DAPL_HASH_DATA		curr;
539 	DAPL_HASH_KEY		key;
540 	DAT_RETURN		status;
541 
542 	/* dapl_os_assert(MUTEX_HELD(&cq->cq_wrid_wqhdr_lock)); */
543 
544 	/*
545 	 * Walk the CQ's work queue list, trying to find a send or recv queue
546 	 * with the same QP number.  We do this even if we are going to later
547 	 * create a new entry because it helps us easily find the end of the
548 	 * list.
549 	 */
550 	key = (DAPL_HASH_KEY)(((uint64_t)send_or_recv << 32) | (uint32_t)qpn);
551 
552 	status = dapls_hash_search(cq->cq_wrid_wqhdr_list, key, &curr);
553 	if (status == DAT_SUCCESS) {
554 		return ((dapls_tavor_workq_hdr_t *)curr);
555 	} else {
556 		return (NULL);
557 	}
558 }
559 
560 
561 
562 
563 /*
564  * dapli_tavor_wrid_get_wqeaddrsz()
565  */
566 static uint32_t
dapli_tavor_wrid_get_wqeaddrsz(dapls_tavor_workq_hdr_t * wq)567 dapli_tavor_wrid_get_wqeaddrsz(dapls_tavor_workq_hdr_t *wq)
568 {
569 	dapls_tavor_wrid_entry_t	*wre;
570 	uint32_t		wqeaddrsz;
571 	uint32_t		head;
572 
573 	/*
574 	 * If the container is empty, then there is no next entry. So just
575 	 * return zero.  Note: the "head == tail" condition here can only
576 	 * mean that the container is empty because we have previously pulled
577 	 * something from the container.
578 	 *
579 	 * If the container is not empty, then find the next entry and return
580 	 * the contents of its "wqeaddrsz" field.
581 	 */
582 	if (wq->wq_wrid_poll->wl_head == wq->wq_wrid_poll->wl_tail) {
583 		wqeaddrsz = 0;
584 	} else {
585 		/*
586 		 * We don't need to calculate the "next" head pointer here
587 		 * because "head" should already point to the next entry on
588 		 * the list (since we just pulled something off - in
589 		 * dapli_tavor_wrid_find_match() - and moved the head index
590 		 * forward.)
591 		 */
592 		head = wq->wq_wrid_poll->wl_head;
593 		wre = &wq->wq_wrid_poll->wl_wre[head];
594 		wqeaddrsz = wre->wr_wqeaddrsz;
595 	}
596 	return (wqeaddrsz);
597 }
598 
599 
600 
601 /*
602  * dapli_tavor_wrid_list_reap()
603  *    Note: The "wqhdr_list_lock" must be held.
604  */
605 static dapls_tavor_workq_hdr_t *
dapli_tavor_wrid_list_reap(dapls_tavor_wrid_list_hdr_t * wridlist)606 dapli_tavor_wrid_list_reap(dapls_tavor_wrid_list_hdr_t *wridlist)
607 {
608 	dapls_tavor_workq_hdr_t	*wqhdr, *consume_wqhdr = NULL;
609 	dapls_tavor_wrid_list_hdr_t	*prev, *next;
610 
611 	/* Get the back pointer to the work queue header (see below) */
612 	wqhdr = wridlist->wl_wqhdr;
613 	dapl_os_lock(&wqhdr->wq_wrid_lock->wrl_lock);
614 
615 	/* Unlink the WRID list "container" from the work queue list */
616 	prev = wridlist->wl_prev;
617 	next = wridlist->wl_next;
618 	if (prev != NULL) {
619 		prev->wl_next = next;
620 	}
621 	if (next != NULL) {
622 		next->wl_prev = prev;
623 	}
624 
625 	/*
626 	 * If the back pointer to the work queue header shows that it
627 	 * was pointing to the entry we are about to remove, then the work
628 	 * queue header is reapable as well.
629 	 */
630 	if ((wqhdr->wq_wrid_poll == wridlist) &&
631 	    (wqhdr->wq_wrid_post == wridlist)) {
632 		consume_wqhdr = wqhdr;
633 	}
634 
635 	/* Be sure to update the "poll" and "post" container pointers */
636 	if (wqhdr->wq_wrid_poll == wridlist) {
637 		wqhdr->wq_wrid_poll = next;
638 	}
639 	if (wqhdr->wq_wrid_post == wridlist) {
640 		wqhdr->wq_wrid_post = NULL;
641 	}
642 
643 	/*
644 	 * Calculate the size and free the container, for SRQ wridlist is
645 	 * freed when srq gets freed
646 	 */
647 	if (!wridlist->wl_srq_en) {
648 		if (wridlist->wl_wre) {
649 			dapl_os_free(wridlist->wl_wre, wridlist->wl_size *
650 			    sizeof (dapls_tavor_wrid_entry_t));
651 		}
652 		dapl_os_assert(wridlist->wl_free_list == NULL);
653 		dapl_os_free(wridlist, sizeof (dapls_tavor_wrid_list_hdr_t));
654 	}
655 
656 	dapl_os_unlock(&wqhdr->wq_wrid_lock->wrl_lock);
657 
658 	return (consume_wqhdr);
659 }
660 
661 /*
662  * dapls_tavor_srq_wrid_init()
663  */
664 DAT_RETURN
dapls_tavor_srq_wrid_init(ib_srq_handle_t srq)665 dapls_tavor_srq_wrid_init(ib_srq_handle_t srq)
666 {
667 	dapls_tavor_wrid_list_hdr_t	*wridlist;
668 	int i;
669 
670 	wridlist = dapli_tavor_wrid_get_list(srq->srq_wq_numwqe, 1);
671 
672 
673 	if (wridlist == NULL) {
674 		srq->srq_wridlist = NULL;
675 		return (DAT_INSUFFICIENT_RESOURCES | DAT_RESOURCE_MEMORY);
676 	}
677 
678 	/* initialize the free list with the descriptor addresses */
679 	wridlist->wl_free_list[0] = srq->srq_wq_desc_addr;
680 	for (i = 1; i < srq->srq_wq_numwqe; i++) {
681 		wridlist->wl_free_list[i] = wridlist->wl_free_list[i-1] +
682 		    srq->srq_wq_wqesz;
683 	}
684 	wridlist->wl_srq_wqesz = srq->srq_wq_wqesz;
685 	wridlist->wl_srq_desc_addr = srq->srq_wq_desc_addr;
686 
687 	srq->srq_wridlist = wridlist;
688 	return (DAT_SUCCESS);
689 }
690 
691 void
dapls_tavor_srq_wrid_free(ib_srq_handle_t srq)692 dapls_tavor_srq_wrid_free(ib_srq_handle_t srq)
693 {
694 	dapls_tavor_wrid_list_hdr_t	*wridlist;
695 	size_t				size = 0;
696 
697 	wridlist = srq->srq_wridlist;
698 	if (wridlist) {
699 		dapl_os_assert(wridlist->wl_srq_en == 1);
700 		if (wridlist->wl_wre) {
701 			dapl_os_free(wridlist->wl_wre, wridlist->wl_size *
702 			    sizeof (dapls_tavor_wrid_entry_t));
703 		}
704 		if (wridlist->wl_free_list) {
705 			dapl_os_free(wridlist->wl_free_list, wridlist->wl_size *
706 			    sizeof (uint32_t));
707 		}
708 		if (wridlist->wl_lock) {
709 			dapl_os_assert(wridlist->wl_lock->wrl_on_srq == 1);
710 			dapl_os_lock_destroy(&wridlist->wl_lock->wrl_lock);
711 			size = sizeof (dapls_tavor_wrid_lock_t);
712 		}
713 		size = size; /* pacify lint */
714 		dapl_os_free(wridlist, size +
715 		    sizeof (dapls_tavor_wrid_list_hdr_t));
716 		srq->srq_wridlist = NULL;
717 	}
718 }
719 
720 
721 /*
722  * dapls_tavor_wrid_init()
723  */
724 DAT_RETURN
dapls_tavor_wrid_init(ib_qp_handle_t qp)725 dapls_tavor_wrid_init(ib_qp_handle_t qp)
726 {
727 	dapls_tavor_workq_hdr_t		*swq;
728 	dapls_tavor_workq_hdr_t		*rwq;
729 	dapls_tavor_wrid_list_hdr_t	*s_wridlist;
730 	dapls_tavor_wrid_list_hdr_t	*r_wridlist;
731 	uint_t		create_new_swq = 0;
732 	uint_t		create_new_rwq = 0;
733 
734 	/*
735 	 * For each of this QP's Work Queues, make sure we have a (properly
736 	 * initialized) Work Request ID list attached to the relevant
737 	 * completion queue.  Grab the CQ lock(s) before manipulating the
738 	 * lists.
739 	 */
740 	dapli_tavor_wrid_wqhdr_lock_both(qp);
741 	swq = dapli_tavor_wrid_wqhdr_find(qp->qp_sq_cqhdl, qp->qp_num,
742 	    TAVOR_WR_SEND);
743 	if (swq == NULL) {
744 		/* Couldn't find matching work queue header, create it */
745 		create_new_swq = 1;
746 		swq = dapli_tavor_wrid_wqhdr_create(qp->qp_sq_cqhdl,
747 		    qp->qp_num, TAVOR_WR_SEND, 1);
748 		if (swq == NULL) {
749 			/*
750 			 * If we couldn't find/allocate space for the workq
751 			 * header, then drop the lock(s) and return failure.
752 			 */
753 			dapli_tavor_wrid_wqhdr_unlock_both(qp);
754 			return (DAT_INSUFFICIENT_RESOURCES);
755 		}
756 	}
757 	qp->qp_sq_wqhdr = swq;
758 	swq->wq_size = qp->qp_sq_numwqe;
759 	swq->wq_head = 0;
760 	swq->wq_tail = 0;
761 	swq->wq_full = 0;
762 
763 	/*
764 	 * Allocate space for the dapls_tavor_wrid_entry_t container
765 	 */
766 	s_wridlist = dapli_tavor_wrid_get_list(swq->wq_size, 0);
767 	if (s_wridlist == NULL) {
768 		/*
769 		 * If we couldn't allocate space for tracking the WRID
770 		 * entries, then cleanup the workq header from above (if
771 		 * necessary, i.e. if we created the workq header).  Then
772 		 * drop the lock(s) and return failure.
773 		 */
774 		if (create_new_swq) {
775 			dapli_tavor_cq_wqhdr_remove(qp->qp_sq_cqhdl, swq);
776 		}
777 
778 		dapli_tavor_wrid_wqhdr_unlock_both(qp);
779 		return (DAT_INSUFFICIENT_RESOURCES | DAT_RESOURCE_MEMORY);
780 	}
781 	s_wridlist->wl_wqhdr = swq;
782 	/* Chain the new WRID list container to the workq hdr list */
783 	dapl_os_lock(&swq->wq_wrid_lock->wrl_lock);
784 	dapli_tavor_wrid_wqhdr_add(swq, s_wridlist);
785 	dapl_os_unlock(&swq->wq_wrid_lock->wrl_lock);
786 
787 
788 	/*
789 	 * Now we repeat all the above operations for the receive work queue
790 	 */
791 	rwq = dapli_tavor_wrid_wqhdr_find(qp->qp_rq_cqhdl, qp->qp_num,
792 	    TAVOR_WR_RECV);
793 	if (rwq == NULL) {
794 		create_new_rwq = 1;
795 		/* if qp is attached to an SRQ don't need to alloc wrid_lock */
796 		rwq = dapli_tavor_wrid_wqhdr_create(qp->qp_rq_cqhdl,
797 		    qp->qp_num, TAVOR_WR_RECV, qp->qp_srq_enabled ? 0 : 1);
798 		if (rwq == NULL) {
799 			/*
800 			 * If we couldn't find/allocate space for the workq
801 			 * header, then free all the send queue resources we
802 			 * just allocated and setup (above), drop the lock(s)
803 			 * and return failure.
804 			 */
805 			dapl_os_lock(&swq->wq_wrid_lock->wrl_lock);
806 			dapli_tavor_wrid_wqhdr_remove(swq, s_wridlist);
807 			dapl_os_unlock(&swq->wq_wrid_lock->wrl_lock);
808 			if (create_new_swq) {
809 				dapli_tavor_cq_wqhdr_remove(qp->qp_sq_cqhdl,
810 				    swq);
811 			}
812 
813 			dapli_tavor_wrid_wqhdr_unlock_both(qp);
814 			return (DAT_INSUFFICIENT_RESOURCES |
815 			    DAT_RESOURCE_MEMORY);
816 		}
817 	}
818 	qp->qp_rq_wqhdr = rwq;
819 	rwq->wq_size = qp->qp_rq_numwqe;
820 	rwq->wq_head = 0;
821 	rwq->wq_tail = 0;
822 	rwq->wq_full = 0;
823 
824 	/*
825 	 * Allocate space for the dapls_tavor_wrid_entry_t container
826 	 * For qp associated with SRQs the SRQ wridlist is used
827 	 */
828 	if (qp->qp_srq_enabled) {
829 		/* Use existing srq_wridlist pointer */
830 		r_wridlist = qp->qp_srq->srq_wridlist;
831 		dapl_os_assert(r_wridlist != NULL);
832 		/* store the wl_lock in the wqhdr */
833 		rwq->wq_wrid_lock = r_wridlist->wl_lock;
834 		dapl_os_assert(rwq->wq_wrid_lock != NULL);
835 	} else {
836 		/* Allocate memory for the r_wridlist */
837 		r_wridlist = dapli_tavor_wrid_get_list(rwq->wq_size, 0);
838 	}
839 	if (r_wridlist == NULL) {
840 		/*
841 		 * If we couldn't allocate space for tracking the WRID
842 		 * entries, then cleanup all the stuff from above.  Then
843 		 * drop the lock(s) and return failure.
844 		 */
845 		dapl_os_lock(&swq->wq_wrid_lock->wrl_lock);
846 		dapli_tavor_wrid_wqhdr_remove(swq, s_wridlist);
847 		dapl_os_unlock(&swq->wq_wrid_lock->wrl_lock);
848 		if (create_new_swq) {
849 			dapli_tavor_cq_wqhdr_remove(qp->qp_sq_cqhdl, swq);
850 		}
851 		if (create_new_rwq) {
852 			dapli_tavor_cq_wqhdr_remove(qp->qp_rq_cqhdl, rwq);
853 		}
854 
855 		dapli_tavor_wrid_wqhdr_unlock_both(qp);
856 		return (DAT_INSUFFICIENT_RESOURCES | DAT_RESOURCE_MEMORY);
857 	}
858 
859 	/* For SRQ based QPs r_wridlist does not point to recv wqhdr */
860 	if (!qp->qp_srq_enabled) {
861 		r_wridlist->wl_wqhdr = rwq;
862 	}
863 
864 	/* Chain the new WRID list "container" to the workq hdr list */
865 	dapl_os_lock(&rwq->wq_wrid_lock->wrl_lock);
866 	dapli_tavor_wrid_wqhdr_add(rwq, r_wridlist);
867 	dapl_os_unlock(&rwq->wq_wrid_lock->wrl_lock);
868 
869 	dapli_tavor_wrid_wqhdr_unlock_both(qp);
870 
871 	return (DAT_SUCCESS);
872 }
873 
874 
875 /*
876  * dapls_tavor_wrid_cleanup()
877  */
878 void
dapls_tavor_wrid_cleanup(DAPL_EP * ep,ib_qp_handle_t qp)879 dapls_tavor_wrid_cleanup(DAPL_EP *ep, ib_qp_handle_t qp)
880 {
881 	/*
882 	 * For each of this QP's Work Queues, move the WRID "container" to
883 	 * the "reapable" list.  Although there may still be unpolled
884 	 * entries in these containers, it is not a big deal.  We will not
885 	 * reap the list until either the Poll CQ command detects an empty
886 	 * condition or the CQ itself is freed.  Grab the CQ lock(s) before
887 	 * manipulating the lists.
888 	 */
889 	dapli_tavor_wrid_wqhdr_lock_both(qp);
890 	dapli_tavor_wrid_reaplist_add(qp->qp_sq_cqhdl, qp->qp_sq_wqhdr);
891 
892 	/*
893 	 * Repeat the above operation for the Recv work queue "container".
894 	 * However for qps with SRQ we flush the cq entries, remove the
895 	 * wridlist and wqhdr.
896 	 * Then drop the CQ lock(s) and return
897 	 */
898 	if (qp->qp_srq_enabled) {
899 		/*
900 		 * Pull off all (if any) entries for this QP from CQ.  This
901 		 * only includes entries that have not yet been polled
902 		 */
903 		dapl_os_lock(&qp->qp_rq_wqhdr->wq_wrid_lock->wrl_lock);
904 		DAPL_FLUSH(ep)(qp);
905 
906 		/* Remove wridlist from WQHDR */
907 		dapli_tavor_wrid_wqhdr_remove(qp->qp_rq_wqhdr,
908 		    qp->qp_rq_wqhdr->wq_wrid_post);
909 
910 		dapl_os_assert(qp->qp_rq_wqhdr->wq_wrid_post == NULL);
911 
912 		dapl_os_unlock(&qp->qp_rq_wqhdr->wq_wrid_lock->wrl_lock);
913 
914 		/* Free the WQHDR */
915 		dapli_tavor_cq_wqhdr_remove(qp->qp_rq_cqhdl, qp->qp_rq_wqhdr);
916 	} else {
917 		dapli_tavor_wrid_reaplist_add(qp->qp_rq_cqhdl, qp->qp_rq_wqhdr);
918 	}
919 	dapli_tavor_wrid_wqhdr_unlock_both(qp);
920 }
921 
922 /*
923  * dapli_tavor_wrid_wqhdr_create()
924  */
925 static dapls_tavor_workq_hdr_t *
dapli_tavor_wrid_wqhdr_create(ib_cq_handle_t cq,uint_t qpn,uint_t send_or_recv,uint_t alloc_wrl)926 dapli_tavor_wrid_wqhdr_create(ib_cq_handle_t cq, uint_t qpn,
927     uint_t send_or_recv, uint_t alloc_wrl)
928 {
929 	dapls_tavor_workq_hdr_t	*wqhdr_tmp;
930 	size_t			size, aligned_size;
931 
932 	/* dapl_os_assert(MUTEX_HELD(&cq->cq_wrid_wqhdr_lock)); */
933 
934 	/*
935 	 * Allocate space for a work queue header structure and initialize it.
936 	 * Each work queue header structure includes a "wq_wrid_lock"
937 	 * which needs to be initialized.
938 	 *
939 	 * Note: the address smashing is needed to ensure wq_wrid_lock is
940 	 * 8-byte aligned, which is not always the case on 32-bit sparc.
941 	 */
942 	size = (sizeof (dapls_tavor_workq_hdr_t) + 0x7) & ~0x7;
943 	aligned_size = size;
944 	if (alloc_wrl) {
945 		/* for non-srq wqhdr the lock is allocated with the wqhdr */
946 		size = size + sizeof (dapls_tavor_wrid_lock_t);
947 	}
948 	wqhdr_tmp = dapl_os_alloc(size);
949 	if (wqhdr_tmp == NULL) {
950 		return (NULL);
951 	}
952 	if (alloc_wrl) {
953 		wqhdr_tmp->wq_wrid_lock = (dapls_tavor_wrid_lock_t *)
954 		    (((uintptr_t)wqhdr_tmp + aligned_size) & ~0x7);
955 		dapl_os_lock_init(&wqhdr_tmp->wq_wrid_lock->wrl_lock);
956 		/* wrl allocated with wqhdr don't have srq enabled */
957 		wqhdr_tmp->wq_wrid_lock->wrl_on_srq = 0;
958 	}
959 
960 	wqhdr_tmp->wq_qpn	= qpn;
961 	wqhdr_tmp->wq_send_or_recv = send_or_recv;
962 
963 	wqhdr_tmp->wq_wrid_poll = NULL;
964 	wqhdr_tmp->wq_wrid_post = NULL;
965 
966 	/* Chain the newly allocated work queue header to the CQ's list */
967 	if (dapli_tavor_cq_wqhdr_add(cq, wqhdr_tmp) != DAT_SUCCESS) {
968 		if (alloc_wrl) {
969 			dapl_os_lock_destroy(&wqhdr_tmp->wq_wrid_lock->
970 			    wrl_lock);
971 		}
972 		dapl_os_free(wqhdr_tmp, size);
973 		wqhdr_tmp = NULL;
974 	}
975 
976 	return (wqhdr_tmp);
977 }
978 
979 /*
980  * dapli_tavor_wrid_wqhdr_add()
981  */
982 static void
dapli_tavor_wrid_wqhdr_add(dapls_tavor_workq_hdr_t * wqhdr,dapls_tavor_wrid_list_hdr_t * wridlist)983 dapli_tavor_wrid_wqhdr_add(dapls_tavor_workq_hdr_t *wqhdr,
984     dapls_tavor_wrid_list_hdr_t *wridlist)
985 {
986 	/* dapl_os_assert(MUTEX_HELD(&wqhdr->wq_wrid_lock)); */
987 
988 	/* Chain the new WRID list "container" to the work queue list */
989 	if ((wqhdr->wq_wrid_post == NULL) &&
990 	    (wqhdr->wq_wrid_poll == NULL)) {
991 		wqhdr->wq_wrid_poll = wridlist;
992 		wqhdr->wq_wrid_post = wridlist;
993 	} else {
994 		wqhdr->wq_wrid_post->wl_next = wridlist;
995 		wridlist->wl_prev = wqhdr->wq_wrid_post;
996 		wqhdr->wq_wrid_post = wridlist;
997 	}
998 }
999 
1000 
1001 /*
1002  * dapli_tavor_wrid_wqhdr_remove()
1003  *    Note: this is only called to remove the most recently added WRID list
1004  *    container.
1005  */
1006 static void
dapli_tavor_wrid_wqhdr_remove(dapls_tavor_workq_hdr_t * wqhdr,dapls_tavor_wrid_list_hdr_t * wridlist)1007 dapli_tavor_wrid_wqhdr_remove(dapls_tavor_workq_hdr_t *wqhdr,
1008     dapls_tavor_wrid_list_hdr_t *wridlist)
1009 {
1010 	dapls_tavor_wrid_list_hdr_t	*prev, *next;
1011 
1012 	/* dapl_os_assert(MUTEX_HELD(&wqhdr->wq_wrid_lock)); */
1013 
1014 	/* Unlink the WRID list "container" from the work queue list */
1015 	prev = wridlist->wl_prev;
1016 	next = wridlist->wl_next;
1017 	if (prev != NULL) {
1018 		prev->wl_next = next;
1019 	}
1020 	if (next != NULL) {
1021 		next->wl_prev = prev;
1022 	}
1023 
1024 	/*
1025 	 * Update any pointers in the work queue hdr that may point to this
1026 	 * WRID list container
1027 	 */
1028 	if (wqhdr->wq_wrid_post == wridlist) {
1029 		wqhdr->wq_wrid_post = prev;
1030 	}
1031 	if (wqhdr->wq_wrid_poll == wridlist) {
1032 		wqhdr->wq_wrid_poll = NULL;
1033 	}
1034 }
1035 
1036 
1037 /*
1038  * dapli_tavor_wrid_wqhdr_lock_both()
1039  */
1040 static void
dapli_tavor_wrid_wqhdr_lock_both(ib_qp_handle_t qp)1041 dapli_tavor_wrid_wqhdr_lock_both(ib_qp_handle_t qp)
1042 {
1043 	ib_cq_handle_t	sq_cq, rq_cq;
1044 
1045 	sq_cq = qp->qp_sq_cqhdl;
1046 	rq_cq = qp->qp_rq_cqhdl;
1047 
1048 	/*
1049 	 * If both work queues (send and recv) share a completion queue, then
1050 	 * grab the common lock.  If they use different CQs (hence different
1051 	 * "cq_wrid_wqhdr_list" locks), then grab the send one first, then the
1052 	 * receive.  We do this consistently and correctly in
1053 	 * tavor_wrid_wqhdr_unlock_both() below to avoid introducing any kind
1054 	 * of dead lock condition.
1055 	 */
1056 	if (sq_cq == rq_cq) {
1057 		dapl_os_lock(&sq_cq->cq_wrid_wqhdr_lock);
1058 	} else {
1059 		dapl_os_lock(&sq_cq->cq_wrid_wqhdr_lock);
1060 		dapl_os_lock(&rq_cq->cq_wrid_wqhdr_lock);
1061 	}
1062 }
1063 
1064 /*
1065  * dapli_tavor_wrid_wqhdr_unlock_both()
1066  */
1067 static void
dapli_tavor_wrid_wqhdr_unlock_both(ib_qp_handle_t qp)1068 dapli_tavor_wrid_wqhdr_unlock_both(ib_qp_handle_t qp)
1069 {
1070 	ib_cq_handle_t	sq_cq, rq_cq;
1071 
1072 	sq_cq = qp->qp_sq_cqhdl;
1073 	rq_cq = qp->qp_rq_cqhdl;
1074 
1075 	/*
1076 	 * See tavor_wrid_wqhdr_lock_both() above for more detail
1077 	 */
1078 	if (sq_cq == rq_cq) {
1079 		dapl_os_unlock(&sq_cq->cq_wrid_wqhdr_lock);
1080 	} else {
1081 		dapl_os_unlock(&rq_cq->cq_wrid_wqhdr_lock);
1082 		dapl_os_unlock(&sq_cq->cq_wrid_wqhdr_lock);
1083 	}
1084 }
1085 
1086 
1087 /*
1088  * dapli_tavor_cq_wqhdr_add()
1089  */
1090 static DAT_RETURN
dapli_tavor_cq_wqhdr_add(ib_cq_handle_t cq,dapls_tavor_workq_hdr_t * wqhdr)1091 dapli_tavor_cq_wqhdr_add(ib_cq_handle_t cq, dapls_tavor_workq_hdr_t *wqhdr)
1092 {
1093 	DAPL_HASH_KEY		key;
1094 
1095 	/* dapl_os_assert(MUTEX_HELD(&cq->cq_wrid_wqhdr_lock)); */
1096 
1097 	/*
1098 	 * If the CQ's work queue list is empty, then just add it.
1099 	 * Otherwise, chain it to the beginning of the list.
1100 	 */
1101 	key = (DAPL_HASH_KEY)(((uint64_t)wqhdr->wq_send_or_recv << 32) |
1102 	    wqhdr->wq_qpn);
1103 
1104 	return (dapls_hash_insert(cq->cq_wrid_wqhdr_list, key, wqhdr));
1105 }
1106 
1107 
1108 /*
1109  * dapli_tavor_cq_wqhdr_remove
1110  */
1111 static void
dapli_tavor_cq_wqhdr_remove(ib_cq_handle_t cq,dapls_tavor_workq_hdr_t * wqhdr)1112 dapli_tavor_cq_wqhdr_remove(ib_cq_handle_t cq, dapls_tavor_workq_hdr_t *wqhdr)
1113 {
1114 	DAPL_HASH_DATA	curr;
1115 	DAPL_HASH_KEY	key;
1116 	size_t		size = 0;
1117 
1118 	/* dapl_os_assert(MUTEX_HELD(&cq->cq_wrid_wqhdr_lock)); */
1119 
1120 	/* Remove "wqhdr" from the work queue header list on "cq" */
1121 
1122 	key = (DAPL_HASH_KEY)(((uint64_t)wqhdr->wq_send_or_recv << 32) |
1123 	    wqhdr->wq_qpn);
1124 
1125 	(void) dapls_hash_remove(cq->cq_wrid_wqhdr_list, key,  &curr);
1126 
1127 	size = (sizeof (dapls_tavor_workq_hdr_t) + 0x7) & ~0x7;
1128 	if (wqhdr->wq_wrid_lock && (!wqhdr->wq_wrid_lock->wrl_on_srq)) {
1129 		dapl_os_lock_destroy(&wqhdr->wq_wrid_lock->wrl_lock);
1130 		size += sizeof (dapls_tavor_wrid_lock_t);
1131 	}
1132 
1133 	/* Free the memory associated with "wqhdr" */
1134 	dapl_os_free(wqhdr, size);
1135 }
1136 
1137 /*
1138  * dapls_tavor_srq_wrid_resize() is called to resize the wridlist
1139  * associated with SRQS as a result of dat_srq_resize().
1140  *
1141  * Returns: DAT_TRUE if successful, otherwise DAT_FALSE
1142  */
1143 DAT_BOOLEAN
dapls_tavor_srq_wrid_resize(ib_srq_handle_t srq_handle,uint32_t new_size)1144 dapls_tavor_srq_wrid_resize(ib_srq_handle_t srq_handle, uint32_t new_size)
1145 {
1146 	dapls_tavor_wrid_list_hdr_t	*wridlist;
1147 	dapls_tavor_wrid_entry_t	*old_wl_wre;
1148 	dapls_tavor_wrid_entry_t	*new_wl_wre;
1149 	uint32_t			*old_wl_freel;
1150 	uint32_t			*new_wl_freel;
1151 	uint32_t			old_size;
1152 	uint32_t			idx;
1153 	uint32_t			prev_idx;
1154 	uint32_t			i;
1155 
1156 	wridlist = srq_handle->srq_wridlist;
1157 
1158 	if (wridlist == NULL) {
1159 		return (DAT_FALSE);
1160 	}
1161 	dapl_os_assert(wridlist->wl_srq_en);
1162 
1163 	dapl_os_lock(&wridlist->wl_lock->wrl_lock);
1164 
1165 	old_wl_wre = wridlist->wl_wre;
1166 	old_wl_freel = wridlist->wl_free_list;
1167 	old_size = wridlist->wl_size;
1168 
1169 	new_wl_wre = (dapls_tavor_wrid_entry_t *)dapl_os_alloc(new_size *
1170 	    sizeof (dapls_tavor_wrid_entry_t));
1171 	if (new_wl_wre == NULL) {
1172 		goto bail;
1173 	}
1174 	new_wl_freel = dapl_os_alloc(new_size * sizeof (uint32_t));
1175 	if (new_wl_freel == NULL) {
1176 		goto bail;
1177 	}
1178 	/*
1179 	 * we just need to copy the old WREs to the new array. Since the
1180 	 * descriptors are relatively addressed the descriptor to index
1181 	 * mapping doesn't change.
1182 	 */
1183 	(void) dapl_os_memcpy(&new_wl_wre[0], &old_wl_wre[0],
1184 	    old_size * sizeof (dapls_tavor_wrid_entry_t));
1185 	/*
1186 	 * Copy the old free list to the new one
1187 	 */
1188 	idx = wridlist->wl_freel_head;
1189 	for (i = 0; i < wridlist->wl_freel_entries; i++) {
1190 		new_wl_freel[i] = old_wl_freel[idx];
1191 		idx = (idx + 1) % old_size;
1192 	}
1193 	/*
1194 	 * Add the new entries in wl_wre to the new free list
1195 	 */
1196 	idx = wridlist->wl_freel_entries;
1197 	new_wl_freel[idx] = wridlist->wl_srq_desc_addr + old_size *
1198 	    wridlist->wl_srq_wqesz;
1199 	prev_idx = idx;
1200 	idx = (idx + 1) % new_size;
1201 	for (i = 0; i < new_size - old_size - 1; i++) {
1202 		new_wl_freel[idx] = new_wl_freel[prev_idx] +
1203 		    wridlist->wl_srq_wqesz;
1204 		prev_idx = idx;
1205 		idx = (idx + 1) % new_size;
1206 	}
1207 	wridlist->wl_size = new_size;
1208 	wridlist->wl_wre = new_wl_wre;
1209 	wridlist->wl_free_list = new_wl_freel;
1210 	wridlist->wl_freel_head = 0;
1211 	wridlist->wl_freel_tail = idx;
1212 	wridlist->wl_freel_entries = wridlist->wl_freel_entries + new_size -
1213 	    old_size;
1214 
1215 	dapl_os_unlock(&wridlist->wl_lock->wrl_lock);
1216 
1217 	if (old_wl_wre) {
1218 		dapl_os_free(old_wl_wre, old_size *
1219 		    sizeof (dapls_tavor_wrid_entry_t));
1220 	}
1221 	if (old_wl_freel) {
1222 		dapl_os_free(old_wl_freel, old_size * sizeof (uint32_t));
1223 	}
1224 	return (DAT_TRUE);
1225 bail:
1226 	dapl_os_unlock(&wridlist->wl_lock->wrl_lock);
1227 	if (new_wl_wre) {
1228 		dapl_os_free(new_wl_wre, new_size *
1229 		    sizeof (dapls_tavor_wrid_entry_t));
1230 	}
1231 	return (DAT_FALSE);
1232 }
1233