1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24 #include <sys/ib/clients/rdsv3/ib.h>
25 #include <sys/ib/clients/rdsv3/rdsv3_af_thr_impl.h>
26 #include <sys/ib/clients/rdsv3/rdsv3_debug.h>
27
28 extern pri_t maxclsyspri;
29 extern kmutex_t cpu_lock;
30
31 int rdsv3_enable_snd_cq = 0;
32 int rdsv3_intr_line_up_mode = 0;
33 static kmutex_t rdsv3_cpuid_pool_lock;
34
35 void
rdsv3_af_init(dev_info_t * dip)36 rdsv3_af_init(dev_info_t *dip)
37 {
38 int i;
39 cpu_t *cp;
40 int *msix;
41 uint_t nmsix;
42 extern int ncpus;
43
44 mutex_init(&rdsv3_cpuid_pool_lock, NULL, MUTEX_DEFAULT, NULL);
45 if (ncpus < RDSV3_CPUID_POOL_MAX)
46 rdsv3_cpuid_pool_cnt = ncpus;
47 else
48 rdsv3_cpuid_pool_cnt = RDSV3_CPUID_POOL_MAX;
49
50 /* hold cpu_lock before calling cpu_get and cpu_is_online */
51 mutex_enter(&cpu_lock);
52 for (i = 0; i < rdsv3_cpuid_pool_cnt; i++) {
53 cp = cpu_get((processorid_t)i);
54 if (cp == NULL || !cpu_is_online(cp))
55 rdsv3_cpuid_pool[i] = RDSV3_CPUFLAGS_OFF;
56 else
57 rdsv3_cpuid_pool[i] = RDSV3_CPUFLAGS_ON;
58 }
59 mutex_exit(&cpu_lock);
60
61 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
62 "HcaMsix", (int **)&msix, &nmsix) == DDI_PROP_SUCCESS) {
63 /* remove the hca MSI-x interrupt cpu's */
64 for (i = 0; i < nmsix; i++) {
65 rdsv3_cpuid_pool[msix[i]] |= RDSV3_CPUFLAGS_INTR;
66 rdsv3_msix_pool[i] = msix[i];
67 }
68 rdsv3_msix_pool_cnt = nmsix;
69 ddi_prop_free(msix);
70 }
71 rdsv3_enable_snd_cq = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
72 DDI_PROP_DONTPASS, "EnableSendCQ", 0);
73 rdsv3_intr_line_up_mode = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
74 DDI_PROP_DONTPASS, "IntrLineUpMode", 0);
75 }
76
77 static void
rdsv3_af_cpu_assign(rdsv3_af_grp_t * hcagp)78 rdsv3_af_cpu_assign(rdsv3_af_grp_t *hcagp)
79 {
80 int i, j, k, idx;
81
82 RDSV3_DPRINTF2("rdsv3_af_cpu_assign", "hcagp %p", hcagp);
83
84 mutex_enter(&rdsv3_cpuid_pool_lock);
85 for (i = 0; i < rdsv3_cpuid_pool_cnt; i++) {
86 if (!(rdsv3_cpuid_pool[i] & (RDSV3_CPUFLAGS_UNAVAIL |
87 RDSV3_CPUFLAGS_ASSIGNED | RDSV3_CPUFLAGS_HCA))) {
88 rdsv3_cpuid_pool[i] |= RDSV3_CPUFLAGS_HCA;
89 hcagp->g_hca_cpuid = i;
90 break;
91 }
92 /* share an assigned cpu */
93 for (j = 0; j < rdsv3_cpuid_pool_cnt; j++) {
94 if (!(rdsv3_cpuid_pool[j] & (RDSV3_CPUFLAGS_UNAVAIL |
95 RDSV3_CPUFLAGS_HCA))) {
96 hcagp->g_hca_cpuid = j;
97 break;
98 }
99 }
100 /* if the code comes down here, cpu 0 will be used */
101 }
102
103 for (j = 0; j < RDSV3_AFT_CONN_CPU_POOL; j++) {
104 /* initialize to be an out-of-bound cpuid, no binding */
105 hcagp->g_conn_cpuid_pool[j] = rdsv3_cpuid_pool_cnt;
106 for (i = 0; i < rdsv3_cpuid_pool_cnt; i++) {
107 if (!(rdsv3_cpuid_pool[i] & (RDSV3_CPUFLAGS_UNAVAIL |
108 RDSV3_CPUFLAGS_ASSIGNED | RDSV3_CPUFLAGS_HCA))) {
109 rdsv3_cpuid_pool[i] |= RDSV3_CPUFLAGS_ASSIGNED;
110 hcagp->g_conn_cpuid_pool[j] = i;
111 break;
112 }
113 }
114 if (i >= rdsv3_cpuid_pool_cnt)
115 break;
116 }
117 if (j >= RDSV3_AFT_CONN_CPU_POOL) {
118 mutex_exit(&rdsv3_cpuid_pool_lock);
119 return;
120 }
121 /* avoid the primary group */
122 for (k = 0, idx = 0; k < 2; k++) {
123 /* search to the start of an hca group */
124 for (i = idx; i < rdsv3_cpuid_pool_cnt; i++) {
125 if (rdsv3_cpuid_pool[i] & RDSV3_CPUFLAGS_HCA) {
126 idx = i + 1;
127 break;
128 }
129 }
130 }
131 /* share an assigned cpu */
132 for (; j < RDSV3_AFT_CONN_CPU_POOL; j++) {
133 for (i = idx; i < rdsv3_cpuid_pool_cnt; i++) {
134 if (!(rdsv3_cpuid_pool[i] & (RDSV3_CPUFLAGS_UNAVAIL |
135 RDSV3_CPUFLAGS_HCA))) {
136 hcagp->g_conn_cpuid_pool[j] = i;
137 idx = i + 1;
138 break;
139 }
140 }
141 }
142 mutex_exit(&rdsv3_cpuid_pool_lock);
143 }
144
145 rdsv3_af_grp_t *
rdsv3_af_grp_create(ibt_hca_hdl_t hca,uint64_t id)146 rdsv3_af_grp_create(ibt_hca_hdl_t hca, uint64_t id)
147 {
148 char name[128];
149 ibt_cq_sched_attr_t cq_sched_attr;
150 ibt_status_t status;
151 rdsv3_af_grp_t *hcagp;
152 uint64_t l_id = id;
153
154 hcagp = kmem_zalloc(sizeof (*hcagp), KM_NOSLEEP);
155 if (!hcagp)
156 return (NULL);
157 hcagp->g_hca_hdl = hca;
158
159 rdsv3_af_cpu_assign(hcagp);
160 return (hcagp);
161 }
162
163 void
rdsv3_af_grp_destroy(rdsv3_af_grp_t * hcagp)164 rdsv3_af_grp_destroy(rdsv3_af_grp_t *hcagp)
165 {
166 if (hcagp == NULL)
167 return;
168
169 kmem_free(hcagp, sizeof (*hcagp));
170 }
171
172 void
rdsv3_af_grp_draw(rdsv3_af_grp_t * hcagp)173 rdsv3_af_grp_draw(rdsv3_af_grp_t *hcagp)
174 {
175 rdsv3_af_grp_t *l_hcagp = hcagp;
176 }
177
178 ibt_sched_hdl_t
rdsv3_af_grp_get_sched(rdsv3_af_grp_t * hcagp)179 rdsv3_af_grp_get_sched(rdsv3_af_grp_t *hcagp)
180 {
181 return (hcagp->g_sched_hdl);
182 }
183
184 rdsv3_af_thr_t *
rdsv3_af_intr_thr_create(rdsv3_af_thr_drain_func_t fn,void * data,uint_t flag,rdsv3_af_grp_t * hcagp,ibt_cq_hdl_t ibt_cq_hdl)185 rdsv3_af_intr_thr_create(rdsv3_af_thr_drain_func_t fn, void *data, uint_t flag,
186 rdsv3_af_grp_t *hcagp, ibt_cq_hdl_t ibt_cq_hdl)
187 {
188 rdsv3_af_thr_t *ringp;
189 processorid_t cpuid;
190
191 if (ibt_cq_hdl == NULL)
192 return (NULL);
193 ringp = rdsv3_af_thr_create(fn, data, flag, hcagp);
194 if (ringp == NULL)
195 return (NULL);
196
197 mutex_enter(&cpu_lock);
198 if (hcagp->g_conn_cpuid_idx >= RDSV3_AFT_CONN_CPU_POOL)
199 hcagp->g_conn_cpuid_idx = 0;
200 cpuid = hcagp->g_conn_cpuid_pool[hcagp->g_conn_cpuid_idx++];
201 (void) rdsv3_af_thr_bind(ringp, cpuid);
202 mutex_exit(&cpu_lock);
203
204 if (ringp->aft_intr) {
205 if (rdsv3_intr_line_up_mode) {
206 (void) ddi_intr_set_affinity(ringp->aft_intr, cpuid);
207 } else {
208 (void) ddi_intr_set_affinity(ringp->aft_intr,
209 rdsv3_msix_pool[0]);
210 }
211 }
212 return (ringp);
213 }
214
215 rdsv3_af_thr_t *
rdsv3_af_thr_create(rdsv3_af_thr_drain_func_t fn,void * data,uint_t flag,rdsv3_af_grp_t * hcagp)216 rdsv3_af_thr_create(rdsv3_af_thr_drain_func_t fn, void *data, uint_t flag,
217 rdsv3_af_grp_t *hcagp)
218 {
219 rdsv3_af_thr_t *ringp;
220 pri_t pri;
221 uint_t l_flags = flag;
222 rdsv3_af_grp_t *l_hcagp = hcagp;
223
224 ringp = kmem_zalloc(sizeof (rdsv3_af_thr_t), KM_NOSLEEP);
225 if (ringp == NULL)
226 return (NULL);
227
228 ringp->aft_grp = hcagp;
229 mutex_init(&ringp->aft_lock, NULL, MUTEX_DEFAULT, NULL);
230 cv_init(&ringp->aft_async, NULL, CV_DEFAULT, NULL);
231 if (flag & SCQ_WRK_BIND_CPU)
232 pri = maxclsyspri;
233 else
234 pri = maxclsyspri;
235 ringp->aft_worker = thread_create(NULL, 0,
236 rdsv3_af_thr_worker, ringp, 0, &p0, TS_RUN, pri);
237 ringp->aft_data = data;
238 ringp->aft_drain_func = (rdsv3_af_thr_drain_func_t)fn;
239
240 /* set the bind CPU to -1 to indicate no thread affinity set */
241 ringp->aft_cpuid = -1;
242 ringp->aft_state = 0;
243 ringp->aft_cflag = flag;
244
245 if (flag & SCQ_BIND_CPU) {
246 mutex_enter(&cpu_lock);
247 if (flag & SCQ_HCA_BIND_CPU) {
248 (void) rdsv3_af_thr_bind(ringp, hcagp->g_hca_cpuid);
249 } else if (flag & SCQ_WRK_BIND_CPU) {
250 (void) rdsv3_af_thr_bind(ringp, hcagp->g_hca_cpuid);
251 }
252 mutex_exit(&cpu_lock);
253 }
254
255 RDSV3_DPRINTF2("rdsv3_af_thr_create", "af_thr %p ic %p", ringp, data);
256 return (ringp);
257 }
258
259 void
rdsv3_af_thr_destroy(rdsv3_af_thr_t * ringp)260 rdsv3_af_thr_destroy(rdsv3_af_thr_t *ringp)
261 {
262 RDSV3_DPRINTF2("rdsv3_af_thr_destroy", "af_thr %p", ringp);
263
264 /* wait until the af_thr has gone to sleep */
265 mutex_enter(&ringp->aft_lock);
266 while (ringp->aft_state & AFT_PROC) {
267 mutex_exit(&ringp->aft_lock);
268 delay(drv_usectohz(1000));
269 mutex_enter(&ringp->aft_lock);
270 }
271 ringp->aft_state |= AFT_CONDEMNED;
272 if (!(ringp->aft_state & AFT_PROC)) {
273 cv_signal(&ringp->aft_async);
274 }
275 mutex_exit(&ringp->aft_lock);
276 }
277
278 void
rdsv3_af_thr_fire(rdsv3_af_thr_t * ringp)279 rdsv3_af_thr_fire(rdsv3_af_thr_t *ringp)
280 {
281 mutex_enter(&ringp->aft_lock);
282 ringp->aft_state |= AFT_ARMED;
283 if (!(ringp->aft_state & AFT_PROC)) {
284 cv_signal(&ringp->aft_async);
285 }
286 mutex_exit(&ringp->aft_lock);
287 }
288
289 static void
rdsv3_af_thr_worker(rdsv3_af_thr_t * ringp)290 rdsv3_af_thr_worker(rdsv3_af_thr_t *ringp)
291 {
292 kmutex_t *lock = &ringp->aft_lock;
293 kcondvar_t *async = &ringp->aft_async;
294 callb_cpr_t cprinfo;
295
296 RDSV3_DPRINTF4("rdsv3_af_thr_worker", "Enter af_thr %p", ringp);
297
298 CALLB_CPR_INIT(&cprinfo, lock, callb_generic_cpr, "rdsv3_af_thr");
299 mutex_enter(lock);
300 for (;;) {
301 while (!(ringp->aft_state & (AFT_ARMED | AFT_CONDEMNED))) {
302 CALLB_CPR_SAFE_BEGIN(&cprinfo);
303 cv_wait(async, lock);
304 CALLB_CPR_SAFE_END(&cprinfo, lock);
305 }
306 ringp->aft_state &= ~AFT_ARMED;
307
308 /*
309 * Either we have work to do, or we have been asked to
310 * shutdown
311 */
312 if (ringp->aft_state & AFT_CONDEMNED)
313 goto done;
314 ASSERT(!(ringp->aft_state & AFT_PROC));
315 ringp->aft_state |= AFT_PROC;
316 mutex_exit(&ringp->aft_lock);
317
318 ringp->aft_drain_func(ringp->aft_data);
319
320 mutex_enter(&ringp->aft_lock);
321 ringp->aft_state &= ~AFT_PROC;
322 }
323 done:
324 CALLB_CPR_EXIT(&cprinfo);
325 RDSV3_DPRINTF2("rdsv3_af_thr_worker", "Exit af_thr %p", ringp);
326 cv_destroy(&ringp->aft_async);
327 mutex_destroy(&ringp->aft_lock);
328 kmem_free(ringp, sizeof (rdsv3_af_thr_t));
329 thread_exit();
330 }
331
332
333 int rdsv3_af_thr_thread_bind = 1;
334
335 /*
336 * Bind a soft ring worker thread to supplied CPU.
337 */
338 cpu_t *
rdsv3_af_thr_bind(rdsv3_af_thr_t * ringp,processorid_t cpuid)339 rdsv3_af_thr_bind(rdsv3_af_thr_t *ringp, processorid_t cpuid)
340 {
341 cpu_t *cp;
342 boolean_t clear = B_FALSE;
343
344 ASSERT(MUTEX_HELD(&cpu_lock));
345
346 if (rdsv3_af_thr_thread_bind == 0) {
347 return (NULL);
348 }
349
350 cp = cpu_get(cpuid);
351 if (cp == NULL || !cpu_is_online(cp))
352 return (NULL);
353
354 mutex_enter(&ringp->aft_lock);
355 ringp->aft_state |= AFT_BOUND;
356 if (ringp->aft_cpuid != -1)
357 clear = B_TRUE;
358 ringp->aft_cpuid = cpuid;
359 mutex_exit(&ringp->aft_lock);
360
361 if (clear)
362 thread_affinity_clear(ringp->aft_worker);
363
364 RDSV3_DPRINTF4("rdsv3_af_thr_bind", "Bound af_thr %p to cpu %d",
365 ringp, cpuid);
366 thread_affinity_set(ringp->aft_worker, cpuid);
367 return (cp);
368 }
369
370 /*
371 * Un Bind a soft ring worker thread.
372 */
373 static void
rdsv3_af_thr_unbind(rdsv3_af_thr_t * ringp)374 rdsv3_af_thr_unbind(rdsv3_af_thr_t *ringp)
375 {
376 ASSERT(MUTEX_HELD(&cpu_lock));
377
378 mutex_enter(&ringp->aft_lock);
379 if (!(ringp->aft_state & AFT_BOUND)) {
380 ASSERT(ringp->aft_cpuid == -1);
381 mutex_exit(&ringp->aft_lock);
382 return;
383 }
384
385 ringp->aft_cpuid = -1;
386 ringp->aft_state &= ~AFT_BOUND;
387 thread_affinity_clear(ringp->aft_worker);
388 mutex_exit(&ringp->aft_lock);
389 }
390