xref: /titanic_50/usr/src/uts/common/io/ib/clients/rdsv3/rdsv3_af_thr.c (revision 5d5562f583b2b6affe19bdce0b3c8b1840d667a4)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 #include <sys/ib/clients/rdsv3/ib.h>
25 #include <sys/ib/clients/rdsv3/rdsv3_af_thr_impl.h>
26 #include <sys/ib/clients/rdsv3/rdsv3_debug.h>
27 
28 extern pri_t maxclsyspri;
29 extern kmutex_t cpu_lock;
30 
31 int rdsv3_enable_snd_cq = 0;
32 int rdsv3_intr_line_up_mode = 0;
33 static kmutex_t rdsv3_cpuid_pool_lock;
34 
35 void
rdsv3_af_init(dev_info_t * dip)36 rdsv3_af_init(dev_info_t *dip)
37 {
38 	int i;
39 	cpu_t *cp;
40 	int *msix;
41 	uint_t nmsix;
42 	extern int ncpus;
43 
44 	mutex_init(&rdsv3_cpuid_pool_lock, NULL, MUTEX_DEFAULT, NULL);
45 	if (ncpus < RDSV3_CPUID_POOL_MAX)
46 		rdsv3_cpuid_pool_cnt = ncpus;
47 	else
48 		rdsv3_cpuid_pool_cnt = RDSV3_CPUID_POOL_MAX;
49 
50 	/* hold cpu_lock before calling cpu_get and cpu_is_online */
51 	mutex_enter(&cpu_lock);
52 	for (i = 0; i < rdsv3_cpuid_pool_cnt; i++) {
53 		cp = cpu_get((processorid_t)i);
54 		if (cp == NULL || !cpu_is_online(cp))
55 			rdsv3_cpuid_pool[i] = RDSV3_CPUFLAGS_OFF;
56 		else
57 			rdsv3_cpuid_pool[i] = RDSV3_CPUFLAGS_ON;
58 	}
59 	mutex_exit(&cpu_lock);
60 
61 	if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
62 	    "HcaMsix", (int **)&msix, &nmsix) == DDI_PROP_SUCCESS) {
63 		/* remove the hca MSI-x interrupt cpu's */
64 		for (i = 0; i < nmsix; i++) {
65 			rdsv3_cpuid_pool[msix[i]] |= RDSV3_CPUFLAGS_INTR;
66 			rdsv3_msix_pool[i] = msix[i];
67 		}
68 		rdsv3_msix_pool_cnt = nmsix;
69 		ddi_prop_free(msix);
70 	}
71 	rdsv3_enable_snd_cq = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
72 	    DDI_PROP_DONTPASS, "EnableSendCQ", 0);
73 	rdsv3_intr_line_up_mode = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
74 	    DDI_PROP_DONTPASS, "IntrLineUpMode", 0);
75 }
76 
77 static void
rdsv3_af_cpu_assign(rdsv3_af_grp_t * hcagp)78 rdsv3_af_cpu_assign(rdsv3_af_grp_t *hcagp)
79 {
80 	int i, j, k, idx;
81 
82 	RDSV3_DPRINTF2("rdsv3_af_cpu_assign", "hcagp %p", hcagp);
83 
84 	mutex_enter(&rdsv3_cpuid_pool_lock);
85 	for (i = 0; i < rdsv3_cpuid_pool_cnt; i++) {
86 		if (!(rdsv3_cpuid_pool[i] & (RDSV3_CPUFLAGS_UNAVAIL |
87 		    RDSV3_CPUFLAGS_ASSIGNED | RDSV3_CPUFLAGS_HCA))) {
88 			rdsv3_cpuid_pool[i] |= RDSV3_CPUFLAGS_HCA;
89 			hcagp->g_hca_cpuid = i;
90 			break;
91 		}
92 		/* share an assigned cpu */
93 		for (j = 0; j < rdsv3_cpuid_pool_cnt; j++) {
94 			if (!(rdsv3_cpuid_pool[j] & (RDSV3_CPUFLAGS_UNAVAIL |
95 			    RDSV3_CPUFLAGS_HCA))) {
96 				hcagp->g_hca_cpuid = j;
97 				break;
98 			}
99 		}
100 		/* if the code comes down here, cpu 0 will be used */
101 	}
102 
103 	for (j = 0; j < RDSV3_AFT_CONN_CPU_POOL; j++) {
104 		/* initialize to be an out-of-bound cpuid, no binding */
105 		hcagp->g_conn_cpuid_pool[j] = rdsv3_cpuid_pool_cnt;
106 		for (i = 0; i < rdsv3_cpuid_pool_cnt; i++) {
107 			if (!(rdsv3_cpuid_pool[i] & (RDSV3_CPUFLAGS_UNAVAIL |
108 			    RDSV3_CPUFLAGS_ASSIGNED | RDSV3_CPUFLAGS_HCA))) {
109 				rdsv3_cpuid_pool[i] |= RDSV3_CPUFLAGS_ASSIGNED;
110 				hcagp->g_conn_cpuid_pool[j] = i;
111 				break;
112 			}
113 		}
114 		if (i >= rdsv3_cpuid_pool_cnt)
115 			break;
116 	}
117 	if (j >= RDSV3_AFT_CONN_CPU_POOL) {
118 		mutex_exit(&rdsv3_cpuid_pool_lock);
119 		return;
120 	}
121 	/* avoid the primary group */
122 	for (k = 0, idx = 0; k < 2; k++) {
123 		/* search to the start of an hca group */
124 		for (i = idx; i < rdsv3_cpuid_pool_cnt; i++) {
125 			if (rdsv3_cpuid_pool[i] & RDSV3_CPUFLAGS_HCA) {
126 				idx = i + 1;
127 				break;
128 			}
129 		}
130 	}
131 	/* share an assigned cpu */
132 	for (; j < RDSV3_AFT_CONN_CPU_POOL; j++) {
133 		for (i = idx; i < rdsv3_cpuid_pool_cnt; i++) {
134 			if (!(rdsv3_cpuid_pool[i] & (RDSV3_CPUFLAGS_UNAVAIL |
135 			    RDSV3_CPUFLAGS_HCA))) {
136 				hcagp->g_conn_cpuid_pool[j] = i;
137 				idx = i + 1;
138 				break;
139 			}
140 		}
141 	}
142 	mutex_exit(&rdsv3_cpuid_pool_lock);
143 }
144 
145 rdsv3_af_grp_t *
rdsv3_af_grp_create(ibt_hca_hdl_t hca,uint64_t id)146 rdsv3_af_grp_create(ibt_hca_hdl_t hca, uint64_t id)
147 {
148 	char name[128];
149 	ibt_cq_sched_attr_t cq_sched_attr;
150 	ibt_status_t status;
151 	rdsv3_af_grp_t *hcagp;
152 	uint64_t l_id = id;
153 
154 	hcagp = kmem_zalloc(sizeof (*hcagp), KM_NOSLEEP);
155 	if (!hcagp)
156 		return (NULL);
157 	hcagp->g_hca_hdl = hca;
158 
159 	rdsv3_af_cpu_assign(hcagp);
160 	return (hcagp);
161 }
162 
163 void
rdsv3_af_grp_destroy(rdsv3_af_grp_t * hcagp)164 rdsv3_af_grp_destroy(rdsv3_af_grp_t *hcagp)
165 {
166 	if (hcagp == NULL)
167 		return;
168 
169 	kmem_free(hcagp, sizeof (*hcagp));
170 }
171 
172 void
rdsv3_af_grp_draw(rdsv3_af_grp_t * hcagp)173 rdsv3_af_grp_draw(rdsv3_af_grp_t *hcagp)
174 {
175 	rdsv3_af_grp_t *l_hcagp = hcagp;
176 }
177 
178 ibt_sched_hdl_t
rdsv3_af_grp_get_sched(rdsv3_af_grp_t * hcagp)179 rdsv3_af_grp_get_sched(rdsv3_af_grp_t *hcagp)
180 {
181 	return (hcagp->g_sched_hdl);
182 }
183 
184 rdsv3_af_thr_t *
rdsv3_af_intr_thr_create(rdsv3_af_thr_drain_func_t fn,void * data,uint_t flag,rdsv3_af_grp_t * hcagp,ibt_cq_hdl_t ibt_cq_hdl)185 rdsv3_af_intr_thr_create(rdsv3_af_thr_drain_func_t fn, void *data, uint_t flag,
186     rdsv3_af_grp_t *hcagp, ibt_cq_hdl_t ibt_cq_hdl)
187 {
188 	rdsv3_af_thr_t *ringp;
189 	processorid_t cpuid;
190 
191 	if (ibt_cq_hdl == NULL)
192 		return (NULL);
193 	ringp = rdsv3_af_thr_create(fn, data, flag, hcagp);
194 	if (ringp == NULL)
195 		return (NULL);
196 
197 	mutex_enter(&cpu_lock);
198 	if (hcagp->g_conn_cpuid_idx >= RDSV3_AFT_CONN_CPU_POOL)
199 		hcagp->g_conn_cpuid_idx = 0;
200 	cpuid =  hcagp->g_conn_cpuid_pool[hcagp->g_conn_cpuid_idx++];
201 	(void) rdsv3_af_thr_bind(ringp, cpuid);
202 	mutex_exit(&cpu_lock);
203 
204 	if (ringp->aft_intr) {
205 		if (rdsv3_intr_line_up_mode) {
206 			(void) ddi_intr_set_affinity(ringp->aft_intr, cpuid);
207 		} else {
208 			(void) ddi_intr_set_affinity(ringp->aft_intr,
209 			    rdsv3_msix_pool[0]);
210 		}
211 	}
212 	return (ringp);
213 }
214 
215 rdsv3_af_thr_t *
rdsv3_af_thr_create(rdsv3_af_thr_drain_func_t fn,void * data,uint_t flag,rdsv3_af_grp_t * hcagp)216 rdsv3_af_thr_create(rdsv3_af_thr_drain_func_t fn, void *data, uint_t flag,
217     rdsv3_af_grp_t *hcagp)
218 {
219 	rdsv3_af_thr_t *ringp;
220 	pri_t pri;
221 	uint_t l_flags = flag;
222 	rdsv3_af_grp_t *l_hcagp = hcagp;
223 
224 	ringp = kmem_zalloc(sizeof (rdsv3_af_thr_t), KM_NOSLEEP);
225 	if (ringp == NULL)
226 		return (NULL);
227 
228 	ringp->aft_grp = hcagp;
229 	mutex_init(&ringp->aft_lock, NULL, MUTEX_DEFAULT, NULL);
230 	cv_init(&ringp->aft_async, NULL, CV_DEFAULT, NULL);
231 	if (flag & SCQ_WRK_BIND_CPU)
232 		pri = maxclsyspri;
233 	else
234 		pri = maxclsyspri;
235 	ringp->aft_worker = thread_create(NULL, 0,
236 	    rdsv3_af_thr_worker, ringp, 0, &p0, TS_RUN, pri);
237 	ringp->aft_data = data;
238 	ringp->aft_drain_func = (rdsv3_af_thr_drain_func_t)fn;
239 
240 	/* set the bind CPU to -1 to indicate no thread affinity set */
241 	ringp->aft_cpuid = -1;
242 	ringp->aft_state = 0;
243 	ringp->aft_cflag = flag;
244 
245 	if (flag & SCQ_BIND_CPU) {
246 		mutex_enter(&cpu_lock);
247 		if (flag & SCQ_HCA_BIND_CPU) {
248 			(void) rdsv3_af_thr_bind(ringp, hcagp->g_hca_cpuid);
249 		} else if (flag & SCQ_WRK_BIND_CPU) {
250 			(void) rdsv3_af_thr_bind(ringp, hcagp->g_hca_cpuid);
251 		}
252 		mutex_exit(&cpu_lock);
253 	}
254 
255 	RDSV3_DPRINTF2("rdsv3_af_thr_create", "af_thr %p ic %p", ringp, data);
256 	return (ringp);
257 }
258 
259 void
rdsv3_af_thr_destroy(rdsv3_af_thr_t * ringp)260 rdsv3_af_thr_destroy(rdsv3_af_thr_t *ringp)
261 {
262 	RDSV3_DPRINTF2("rdsv3_af_thr_destroy", "af_thr %p", ringp);
263 
264 	/* wait until the af_thr has gone to sleep */
265 	mutex_enter(&ringp->aft_lock);
266 	while (ringp->aft_state & AFT_PROC) {
267 		mutex_exit(&ringp->aft_lock);
268 		delay(drv_usectohz(1000));
269 		mutex_enter(&ringp->aft_lock);
270 	}
271 	ringp->aft_state |= AFT_CONDEMNED;
272 	if (!(ringp->aft_state & AFT_PROC)) {
273 		cv_signal(&ringp->aft_async);
274 	}
275 	mutex_exit(&ringp->aft_lock);
276 }
277 
278 void
rdsv3_af_thr_fire(rdsv3_af_thr_t * ringp)279 rdsv3_af_thr_fire(rdsv3_af_thr_t *ringp)
280 {
281 	mutex_enter(&ringp->aft_lock);
282 	ringp->aft_state |= AFT_ARMED;
283 	if (!(ringp->aft_state & AFT_PROC)) {
284 		cv_signal(&ringp->aft_async);
285 	}
286 	mutex_exit(&ringp->aft_lock);
287 }
288 
289 static void
rdsv3_af_thr_worker(rdsv3_af_thr_t * ringp)290 rdsv3_af_thr_worker(rdsv3_af_thr_t *ringp)
291 {
292 	kmutex_t *lock = &ringp->aft_lock;
293 	kcondvar_t *async = &ringp->aft_async;
294 	callb_cpr_t cprinfo;
295 
296 	RDSV3_DPRINTF4("rdsv3_af_thr_worker", "Enter af_thr %p", ringp);
297 
298 	CALLB_CPR_INIT(&cprinfo, lock, callb_generic_cpr, "rdsv3_af_thr");
299 	mutex_enter(lock);
300 	for (;;) {
301 		while (!(ringp->aft_state & (AFT_ARMED | AFT_CONDEMNED))) {
302 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
303 			cv_wait(async, lock);
304 			CALLB_CPR_SAFE_END(&cprinfo, lock);
305 		}
306 		ringp->aft_state &= ~AFT_ARMED;
307 
308 		/*
309 		 * Either we have work to do, or we have been asked to
310 		 * shutdown
311 		 */
312 		if (ringp->aft_state & AFT_CONDEMNED)
313 			goto done;
314 		ASSERT(!(ringp->aft_state & AFT_PROC));
315 		ringp->aft_state |= AFT_PROC;
316 		mutex_exit(&ringp->aft_lock);
317 
318 		ringp->aft_drain_func(ringp->aft_data);
319 
320 		mutex_enter(&ringp->aft_lock);
321 		ringp->aft_state &= ~AFT_PROC;
322 	}
323 done:
324 	CALLB_CPR_EXIT(&cprinfo);
325 	RDSV3_DPRINTF2("rdsv3_af_thr_worker", "Exit af_thr %p", ringp);
326 	cv_destroy(&ringp->aft_async);
327 	mutex_destroy(&ringp->aft_lock);
328 	kmem_free(ringp, sizeof (rdsv3_af_thr_t));
329 	thread_exit();
330 }
331 
332 
333 int rdsv3_af_thr_thread_bind = 1;
334 
335 /*
336  * Bind a soft ring worker thread to supplied CPU.
337  */
338 cpu_t *
rdsv3_af_thr_bind(rdsv3_af_thr_t * ringp,processorid_t cpuid)339 rdsv3_af_thr_bind(rdsv3_af_thr_t *ringp, processorid_t cpuid)
340 {
341 	cpu_t *cp;
342 	boolean_t clear = B_FALSE;
343 
344 	ASSERT(MUTEX_HELD(&cpu_lock));
345 
346 	if (rdsv3_af_thr_thread_bind == 0) {
347 		return (NULL);
348 	}
349 
350 	cp = cpu_get(cpuid);
351 	if (cp == NULL || !cpu_is_online(cp))
352 		return (NULL);
353 
354 	mutex_enter(&ringp->aft_lock);
355 	ringp->aft_state |= AFT_BOUND;
356 	if (ringp->aft_cpuid != -1)
357 		clear = B_TRUE;
358 	ringp->aft_cpuid = cpuid;
359 	mutex_exit(&ringp->aft_lock);
360 
361 	if (clear)
362 		thread_affinity_clear(ringp->aft_worker);
363 
364 	RDSV3_DPRINTF4("rdsv3_af_thr_bind", "Bound af_thr %p to cpu %d",
365 	    ringp, cpuid);
366 	thread_affinity_set(ringp->aft_worker, cpuid);
367 	return (cp);
368 }
369 
370 /*
371  * Un Bind a soft ring worker thread.
372  */
373 static void
rdsv3_af_thr_unbind(rdsv3_af_thr_t * ringp)374 rdsv3_af_thr_unbind(rdsv3_af_thr_t *ringp)
375 {
376 	ASSERT(MUTEX_HELD(&cpu_lock));
377 
378 	mutex_enter(&ringp->aft_lock);
379 	if (!(ringp->aft_state & AFT_BOUND)) {
380 		ASSERT(ringp->aft_cpuid == -1);
381 		mutex_exit(&ringp->aft_lock);
382 		return;
383 	}
384 
385 	ringp->aft_cpuid = -1;
386 	ringp->aft_state &= ~AFT_BOUND;
387 	thread_affinity_clear(ringp->aft_worker);
388 	mutex_exit(&ringp->aft_lock);
389 }
390