1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 #include <sys/types.h>
27 #include <sys/sysmacros.h>
28 #include <sys/ddi.h>
29 #include <sys/sunndi.h>
30 #include <sys/ddi_impldefs.h>
31 #include <sys/psm_types.h>
32 #include <sys/smp_impldefs.h>
33 #include <sys/apic.h>
34 #include <sys/processor.h>
35 #include <sys/apix_irm_impl.h>
36
37 /* global variable for static default limit for non-IRM drivers */
38 extern int ddi_msix_alloc_limit;
39
40 /* Extern declarations */
41 extern int (*psm_intr_ops)(dev_info_t *, ddi_intr_handle_impl_t *,
42 psm_intr_op_t, int *);
43
44 /*
45 * Global variables for IRM pool configuration:
46 *
47 * (1) apix_system_max_vectors -- this would limit the maximum
48 * number of interrupt vectors that will be made avilable
49 * to the device drivers. The default value (-1) indicates
50 * that all the available vectors could be used.
51 *
52 * (2) apix_irm_cpu_factor -- This would specify the number of CPUs that
53 * should be excluded from the global IRM pool of interrupt vectors.
54 * By default this would be zero, so vectors from all the CPUs
55 * present will be factored into the IRM pool.
56 *
57 * (3) apix_irm_reserve_fixed_vectors -- This would specify the number
58 * of vectors that should be reserved for FIXED type interrupts and
59 * exclude them from the IRM pool. The value can be one of the
60 * following:
61 * 0 - no reservation (default)
62 * <n> - a positive number for the reserved cache
63 * -1 - reserve the maximum needed
64 *
65 * (4) apix_irm_free_fixed_vectors -- This flag specifies if the
66 * vectors for FIXED type should be freed and added back
67 * to the IRM pool when ddi_intr_free() is called. The default
68 * is to add it back to the pool.
69 */
70 int apix_system_max_vectors = -1;
71 int apix_irm_cpu_factor = 0;
72 int apix_irm_reserve_fixed_vectors = 0;
73 int apix_irm_free_fixed_vector = 1;
74
75 /* info from APIX module for IRM configuration */
76 apix_irm_info_t apix_irminfo;
77
78 kmutex_t apix_irm_lock; /* global mutex for apix_irm_* data */
79 ddi_irm_params_t apix_irm_params; /* IRM pool info */
80 int apix_irm_cache_size = 0; /* local cache for FIXED type requests */
81 int apix_irm_cpu_factor_available = 0;
82 int apix_irm_max_cpus = 0;
83 int apix_irm_cpus_used = 0;
84 int apix_irm_fixed_intr_vectors_used;
85
86 extern int ncpus;
87
88 /* local data/functions */
89 static int apix_irm_chk_apix();
90 int apix_irm_intr_ops(dev_info_t *dip, ddi_intr_handle_impl_t *handle,
91 psm_intr_op_t op, int *result);
92 int apix_irm_disable_intr(processorid_t);
93 void apix_irm_enable_intr(processorid_t);
94 int (*psm_intr_ops_saved)(dev_info_t *dip, ddi_intr_handle_impl_t *handle,
95 psm_intr_op_t op, int *result) = NULL;
96 int (*psm_disable_intr_saved)(processorid_t) = NULL;
97 void (*psm_enable_intr_saved)(processorid_t) = NULL;
98 int apix_irm_alloc_fixed(dev_info_t *, ddi_intr_handle_impl_t *, int *);
99 int apix_irm_free_fixed(dev_info_t *, ddi_intr_handle_impl_t *, int *);
100
101 /*
102 * Initilaize IRM pool for APIC interrupts if the PSM module
103 * is of APIX type. This should be called only after PSM module
104 * is loaded and APIC interrupt system is initialized.
105 */
106 void
apix_irm_init(void)107 apix_irm_init(void)
108 {
109 dev_info_t *dip;
110 int total_avail_vectors;
111 int cpus_used;
112 int cache_size;
113
114 /* nothing to do if IRM is disabled */
115 if (!irm_enable)
116 return;
117
118 /*
119 * Use root devinfo node to associate the IRM pool with it
120 * as the pool is global to the system.
121 */
122 dip = ddi_root_node();
123
124 /*
125 * Check if PSM module is initialized and it is APIX
126 * module (which supports IRM functionality).
127 */
128 if ((psm_intr_ops == NULL) || !apix_irm_chk_apix()) {
129 /* not an APIX module */
130 APIX_IRM_DEBUG((CE_CONT,
131 "apix_irm_init: APIX module not present"));
132 return;
133 }
134
135 /*
136 * Now, determine the IRM pool parameters based on the
137 * info from APIX module and global config variables.
138 */
139
140 /*
141 * apix_ncpus shows all the CPUs present in the
142 * system but not all of them may have been enabled
143 * (i.e. mp_startup() may not have been called yet).
144 * So, use ncpus for IRM pool creation.
145 */
146 if (apix_irminfo.apix_ncpus > ncpus)
147 apix_irminfo.apix_ncpus = ncpus;
148
149 /* apply the CPU factor if possible */
150 if ((apix_irm_cpu_factor > 0) &&
151 (apix_irminfo.apix_ncpus > apix_irm_cpu_factor)) {
152 cpus_used = apix_irminfo.apix_ncpus - apix_irm_cpu_factor;
153 apix_irm_cpu_factor_available = apix_irm_cpu_factor;
154 } else {
155 cpus_used = apix_irminfo.apix_ncpus;
156 }
157 apix_irm_cpus_used = apix_irm_max_cpus = cpus_used;
158
159 APIX_IRM_DEBUG((CE_CONT,
160 "apix_irm_init: %d CPUs used for IRM pool size", cpus_used));
161
162 total_avail_vectors = cpus_used * apix_irminfo.apix_per_cpu_vectors -
163 apix_irminfo.apix_vectors_allocated;
164
165 apix_irm_fixed_intr_vectors_used = apix_irminfo.apix_vectors_allocated;
166
167 if (total_avail_vectors <= 0) {
168 /* can not determine pool size */
169 APIX_IRM_DEBUG((CE_NOTE,
170 "apix_irm_init: can not determine pool size"));
171 return;
172 }
173
174 /* adjust the pool size as per the global config variable */
175 if ((apix_system_max_vectors > 0) &&
176 (apix_system_max_vectors < total_avail_vectors))
177 total_avail_vectors = apix_system_max_vectors;
178
179 /* pre-reserve vectors (i.e. local cache) for FIXED type if needed */
180 if (apix_irm_reserve_fixed_vectors != 0) {
181 cache_size = apix_irm_reserve_fixed_vectors;
182 if ((cache_size == -1) ||
183 (cache_size > apix_irminfo.apix_ioapic_max_vectors))
184 cache_size = apix_irminfo.apix_ioapic_max_vectors;
185 total_avail_vectors -= cache_size;
186 apix_irm_cache_size = cache_size;
187 }
188
189 if (total_avail_vectors <= 0) {
190 APIX_IRM_DEBUG((CE_NOTE,
191 "apix_irm_init: invalid config parameters!"));
192 return;
193 }
194
195 /* IRM pool is used only for MSI/X interrupts */
196 apix_irm_params.iparams_types = DDI_INTR_TYPE_MSI | DDI_INTR_TYPE_MSIX;
197 apix_irm_params.iparams_total = total_avail_vectors;
198
199 if (ndi_irm_create(dip, &apix_irm_params,
200 &apix_irm_pool_p) == NDI_SUCCESS) {
201 /*
202 * re-direct psm_intr_ops to intercept FIXED
203 * interrupt allocation requests.
204 */
205 psm_intr_ops_saved = psm_intr_ops;
206 psm_intr_ops = apix_irm_intr_ops;
207 /*
208 * re-direct psm_enable_intr()/psm_disable_intr() to
209 * intercept CPU offline/online requests.
210 */
211 psm_disable_intr_saved = psm_disable_intr;
212 psm_enable_intr_saved = psm_enable_intr;
213 psm_enable_intr = apix_irm_enable_intr;
214 psm_disable_intr = apix_irm_disable_intr;
215
216 mutex_init(&apix_irm_lock, NULL, MUTEX_DRIVER, NULL);
217
218 /*
219 * Set default alloc limit for non-IRM drivers
220 * to DDI_MIN_MSIX_ALLOC (currently defined as 8).
221 *
222 * NOTE: This is done here so that the limit of 8 vectors
223 * is applicable only with APIX module. For the old pcplusmp
224 * implementation, the current default of 2 (i.e
225 * DDI_DEFAULT_MSIX_ALLOC) is retained.
226 */
227 if (ddi_msix_alloc_limit < DDI_MIN_MSIX_ALLOC)
228 ddi_msix_alloc_limit = DDI_MIN_MSIX_ALLOC;
229 } else {
230 APIX_IRM_DEBUG((CE_NOTE,
231 "apix_irm_init: ndi_irm_create() failed"));
232 apix_irm_pool_p = NULL;
233 }
234 }
235
236 /*
237 * Check if the PSM module is "APIX" type which supports IRM feature.
238 * Returns 0 if it is not an APIX module.
239 */
240 static int
apix_irm_chk_apix(void)241 apix_irm_chk_apix(void)
242 {
243 ddi_intr_handle_impl_t info_hdl;
244 apic_get_type_t type_info;
245
246 if (!psm_intr_ops)
247 return (0);
248
249 bzero(&info_hdl, sizeof (ddi_intr_handle_impl_t));
250 info_hdl.ih_private = &type_info;
251 if (((*psm_intr_ops)(NULL, &info_hdl, PSM_INTR_OP_APIC_TYPE,
252 NULL)) != PSM_SUCCESS) {
253 /* unknown type; assume not an APIX module */
254 return (0);
255 }
256 if (strcmp(type_info.avgi_type, APIC_APIX_NAME) == 0)
257 return (1);
258 else
259 return (0);
260 }
261
262 /*
263 * This function intercepts PSM_INTR_OP_* requests to deal with
264 * IRM pool maintainance for FIXED type interrupts. The following
265 * commands are intercepted and the rest are simply passed back to
266 * the original psm_intr_ops function:
267 * PSM_INTR_OP_ALLOC_VECTORS
268 * PSM_INTR_OP_FREE_VECTORS
269 * Return value is either PSM_SUCCESS or PSM_FAILURE.
270 */
271 int
apix_irm_intr_ops(dev_info_t * dip,ddi_intr_handle_impl_t * handle,psm_intr_op_t op,int * result)272 apix_irm_intr_ops(dev_info_t *dip, ddi_intr_handle_impl_t *handle,
273 psm_intr_op_t op, int *result)
274 {
275 switch (op) {
276 case PSM_INTR_OP_ALLOC_VECTORS:
277 if (handle->ih_type == DDI_INTR_TYPE_FIXED)
278 return (apix_irm_alloc_fixed(dip, handle, result));
279 else
280 break;
281 case PSM_INTR_OP_FREE_VECTORS:
282 if (handle->ih_type == DDI_INTR_TYPE_FIXED)
283 return (apix_irm_free_fixed(dip, handle, result));
284 else
285 break;
286 default:
287 break;
288 }
289
290 /* pass the request to APIX */
291 return ((*psm_intr_ops_saved)(dip, handle, op, result));
292 }
293
294 /*
295 * Allocate a FIXED type interrupt. The procedure for this
296 * operation is as follows:
297 *
298 * 1) Check if this IRQ is shared (i.e. IRQ is already mapped
299 * and a vector has been already allocated). If so, then no
300 * new vector is needed and simply pass the request to APIX
301 * and return.
302 * 2) Check the local cache pool for an available vector. If
303 * the cache is not empty then take it from there and simply
304 * pass the request to APIX and return.
305 * 3) Otherwise, get a vector from the IRM pool by reducing the
306 * pool size by 1. If it is successful then pass the
307 * request to APIX module. Otherwise return PSM_FAILURE.
308 */
309 int
apix_irm_alloc_fixed(dev_info_t * dip,ddi_intr_handle_impl_t * handle,int * result)310 apix_irm_alloc_fixed(dev_info_t *dip, ddi_intr_handle_impl_t *handle,
311 int *result)
312 {
313 int vector;
314 uint_t new_pool_size;
315 int ret;
316
317 /*
318 * Check if this IRQ has been mapped (i.e. shared IRQ case)
319 * by doing PSM_INTR_OP_XLATE_VECTOR.
320 */
321 ret = (*psm_intr_ops_saved)(dip, handle, PSM_INTR_OP_XLATE_VECTOR,
322 &vector);
323 if (ret == PSM_SUCCESS) {
324 APIX_IRM_DEBUG((CE_CONT,
325 "apix_irm_alloc_fixed: dip %p (%s) xlated vector 0x%x",
326 (void *)dip, ddi_driver_name(dip), vector));
327 /* (1) mapping already exists; pass the request to PSM */
328 return ((*psm_intr_ops_saved)(dip, handle,
329 PSM_INTR_OP_ALLOC_VECTORS, result));
330 }
331
332 /* check the local cache for an available vector */
333 mutex_enter(&apix_irm_lock);
334 if (apix_irm_cache_size) { /* cache is not empty */
335 --apix_irm_cache_size;
336 apix_irm_fixed_intr_vectors_used++;
337 mutex_exit(&apix_irm_lock);
338 /* (2) use the vector from the local cache */
339 return ((*psm_intr_ops_saved)(dip, handle,
340 PSM_INTR_OP_ALLOC_VECTORS, result));
341 }
342
343 /* (3) get a vector from the IRM pool */
344
345 new_pool_size = apix_irm_params.iparams_total - 1;
346
347 APIX_IRM_DEBUG((CE_CONT, "apix_irm_alloc_fixed: dip %p (%s) resize pool"
348 " from %x to %x\n", (void *)dip, ddi_driver_name(dip),
349 apix_irm_pool_p->ipool_totsz, new_pool_size));
350
351 if (ndi_irm_resize_pool(apix_irm_pool_p, new_pool_size) ==
352 NDI_SUCCESS) {
353 /* update the pool size info */
354 apix_irm_params.iparams_total = new_pool_size;
355 apix_irm_fixed_intr_vectors_used++;
356 mutex_exit(&apix_irm_lock);
357 return ((*psm_intr_ops_saved)(dip, handle,
358 PSM_INTR_OP_ALLOC_VECTORS, result));
359 }
360
361 mutex_exit(&apix_irm_lock);
362
363 return (PSM_FAILURE);
364 }
365
366 /*
367 * Free up the FIXED type interrupt.
368 *
369 * 1) If it is a shared vector then simply pass the request to
370 * APIX and return.
371 * 2) Otherwise, if apix_irm_free_fixed_vector is not set then add the
372 * vector back to the IRM pool. Otherwise, keep it in the local cache.
373 */
374 int
apix_irm_free_fixed(dev_info_t * dip,ddi_intr_handle_impl_t * handle,int * result)375 apix_irm_free_fixed(dev_info_t *dip, ddi_intr_handle_impl_t *handle,
376 int *result)
377 {
378 int shared;
379 int ret;
380 uint_t new_pool_size;
381
382 /* check if it is a shared vector */
383 ret = (*psm_intr_ops_saved)(dip, handle,
384 PSM_INTR_OP_GET_SHARED, &shared);
385
386 if ((ret == PSM_SUCCESS) && (shared > 0)) {
387 /* (1) it is a shared vector; simply pass the request */
388 APIX_IRM_DEBUG((CE_CONT, "apix_irm_free_fixed: dip %p (%s) "
389 "shared %d\n", (void *)dip, ddi_driver_name(dip), shared));
390 return ((*psm_intr_ops_saved)(dip, handle,
391 PSM_INTR_OP_FREE_VECTORS, result));
392 }
393
394 ret = (*psm_intr_ops_saved)(dip, handle,
395 PSM_INTR_OP_FREE_VECTORS, result);
396
397 if (ret == PSM_SUCCESS) {
398 mutex_enter(&apix_irm_lock);
399 if (apix_irm_free_fixed_vector) {
400 /* (2) add the vector back to IRM pool */
401 new_pool_size = apix_irm_params.iparams_total + 1;
402 APIX_IRM_DEBUG((CE_CONT, "apix_irm_free_fixed: "
403 "dip %p (%s) resize pool from %x to %x\n",
404 (void *)dip, ddi_driver_name(dip),
405 apix_irm_pool_p->ipool_totsz, new_pool_size));
406 if (ndi_irm_resize_pool(apix_irm_pool_p,
407 new_pool_size) == NDI_SUCCESS) {
408 /* update the pool size info */
409 apix_irm_params.iparams_total = new_pool_size;
410 } else {
411 cmn_err(CE_NOTE,
412 "apix_irm_free_fixed: failed to add"
413 " a vector to IRM pool");
414 }
415 } else {
416 /* keep the vector in the local cache */
417 apix_irm_cache_size += 1;
418 }
419 apix_irm_fixed_intr_vectors_used--;
420 mutex_exit(&apix_irm_lock);
421 }
422
423 return (ret);
424 }
425
426 /*
427 * Disable the CPU for interrupts. It is assumed that this is called to
428 * offline/disable the CPU so that no interrupts are allocated on
429 * that CPU. For IRM perspective, the interrupt vectors on this
430 * CPU are to be excluded for any allocations.
431 *
432 * If APIX module is successful in migrating all the vectors
433 * from this CPU then reduce the IRM pool size to exclude the
434 * interrupt vectors for that CPU.
435 */
436 int
apix_irm_disable_intr(processorid_t id)437 apix_irm_disable_intr(processorid_t id)
438 {
439 uint_t new_pool_size;
440
441 /* Interrupt disabling for Suspend/Resume */
442 if (apic_cpus[id].aci_status & APIC_CPU_SUSPEND)
443 return ((*psm_disable_intr_saved)(id));
444
445 mutex_enter(&apix_irm_lock);
446 /*
447 * Don't remove the CPU from the IRM pool if we have CPU factor
448 * available.
449 */
450 if ((apix_irm_cpu_factor > 0) && (apix_irm_cpu_factor_available > 0)) {
451 apix_irm_cpu_factor_available--;
452 } else {
453 /* can't disable if there is only one CPU used */
454 if (apix_irm_cpus_used == 1) {
455 mutex_exit(&apix_irm_lock);
456 return (PSM_FAILURE);
457 }
458 /* Calculate the new size for the IRM pool */
459 new_pool_size = apix_irm_params.iparams_total -
460 apix_irminfo.apix_per_cpu_vectors;
461
462 /* Apply the max. limit */
463 if (apix_system_max_vectors > 0) {
464 uint_t max;
465
466 max = apix_system_max_vectors -
467 apix_irm_fixed_intr_vectors_used -
468 apix_irm_cache_size;
469
470 new_pool_size = MIN(new_pool_size, max);
471 }
472
473 if (new_pool_size == 0) {
474 cmn_err(CE_WARN, "Invalid pool size 0 with "
475 "apix_system_max_vectors = %d",
476 apix_system_max_vectors);
477 mutex_exit(&apix_irm_lock);
478 return (PSM_FAILURE);
479 }
480
481 if (new_pool_size != apix_irm_params.iparams_total) {
482 /* remove the CPU from the IRM pool */
483 if (ndi_irm_resize_pool(apix_irm_pool_p,
484 new_pool_size) != NDI_SUCCESS) {
485 mutex_exit(&apix_irm_lock);
486 APIX_IRM_DEBUG((CE_NOTE,
487 "apix_irm_disable_intr: failed to resize"
488 " the IRM pool"));
489 return (PSM_FAILURE);
490 }
491 /* update the pool size info */
492 apix_irm_params.iparams_total = new_pool_size;
493 }
494
495 /* decrement the CPU count used by IRM pool */
496 apix_irm_cpus_used--;
497 }
498
499 /*
500 * Now, disable the CPU for interrupts.
501 */
502 if ((*psm_disable_intr_saved)(id) != PSM_SUCCESS) {
503 APIX_IRM_DEBUG((CE_NOTE,
504 "apix_irm_disable_intr: failed to disable CPU interrupts"
505 " for CPU#%d", id));
506 mutex_exit(&apix_irm_lock);
507 return (PSM_FAILURE);
508 }
509 /* decrement the CPU count enabled for interrupts */
510 apix_irm_max_cpus--;
511 mutex_exit(&apix_irm_lock);
512 return (PSM_SUCCESS);
513 }
514
515 /*
516 * Enable the CPU for interrupts. It is assumed that this function is
517 * called to enable/online the CPU so that interrupts could be assigned
518 * to it. If successful, add available vectors for that CPU to the IRM
519 * pool if apix_irm_cpu_factor is already satisfied.
520 */
521 void
apix_irm_enable_intr(processorid_t id)522 apix_irm_enable_intr(processorid_t id)
523 {
524 uint_t new_pool_size;
525
526 /* Interrupt enabling for Suspend/Resume */
527 if (apic_cpus[id].aci_status & APIC_CPU_SUSPEND) {
528 (*psm_enable_intr_saved)(id);
529 return;
530 }
531
532 mutex_enter(&apix_irm_lock);
533
534 /* enable the CPU for interrupts */
535 (*psm_enable_intr_saved)(id);
536
537 /* increment the number of CPUs enabled for interrupts */
538 apix_irm_max_cpus++;
539
540 ASSERT(apix_irminfo.apix_per_cpu_vectors > 0);
541
542 /*
543 * Check if the apix_irm_cpu_factor is satisfied before.
544 * If satisfied, add the CPU to IRM pool.
545 */
546 if ((apix_irm_cpu_factor > 0) &&
547 (apix_irm_cpu_factor_available < apix_irm_cpu_factor)) {
548 /*
549 * Don't add the CPU to the IRM pool. Just update
550 * the available CPU factor.
551 */
552 apix_irm_cpu_factor_available++;
553 mutex_exit(&apix_irm_lock);
554 return;
555 }
556
557 /*
558 * Add the CPU to the IRM pool.
559 */
560
561 /* increment the CPU count used by IRM */
562 apix_irm_cpus_used++;
563
564 /* Calculate the new pool size */
565 new_pool_size = apix_irm_params.iparams_total +
566 apix_irminfo.apix_per_cpu_vectors;
567
568 /* Apply the max. limit */
569 if (apix_system_max_vectors > 0) {
570 uint_t max;
571
572 max = apix_system_max_vectors -
573 apix_irm_fixed_intr_vectors_used -
574 apix_irm_cache_size;
575
576 new_pool_size = MIN(new_pool_size, max);
577 }
578 if (new_pool_size == apix_irm_params.iparams_total) {
579 /* no change to pool size */
580 mutex_exit(&apix_irm_lock);
581 return;
582 }
583 if (new_pool_size < apix_irm_params.iparams_total) {
584 cmn_err(CE_WARN, "new_pool_size %d is inconsistent "
585 "with irm_params.iparams_total %d",
586 new_pool_size, apix_irm_params.iparams_total);
587 mutex_exit(&apix_irm_lock);
588 return;
589 }
590
591 (void) ndi_irm_resize_pool(apix_irm_pool_p, new_pool_size);
592
593 /* update the pool size info */
594 apix_irm_params.iparams_total = new_pool_size;
595
596 mutex_exit(&apix_irm_lock);
597 }
598