1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* 27 * hermon_cfg.c 28 * Hermon Configuration Profile Routines 29 * 30 * Implements the routines necessary for initializing and (later) tearing 31 * down the list of Hermon configuration information. 32 */ 33 34 #include <sys/sysmacros.h> 35 #include <sys/types.h> 36 #include <sys/conf.h> 37 #include <sys/ddi.h> 38 #include <sys/sunddi.h> 39 #include <sys/modctl.h> 40 #include <sys/bitmap.h> 41 42 #include <sys/ib/adapters/hermon/hermon.h> 43 44 /* 45 * Below are the elements that make up the Hermon configuration profile. 46 * For advanced users who wish to alter these values, this can be done via 47 * the /etc/system file. By default, values are assigned to the number of 48 * supported resources, either from the HCA's reported capacities or by 49 * a by-design limit in the driver. 50 */ 51 52 /* Number of supported QPs, CQs and SRQs */ 53 uint32_t hermon_log_num_qp = HERMON_NUM_QP_SHIFT; 54 uint32_t hermon_log_num_cq = HERMON_NUM_CQ_SHIFT; 55 uint32_t hermon_log_num_srq = HERMON_NUM_SRQ_SHIFT; 56 57 /* Number of supported SGL per WQE for SQ/RQ, and for SRQ */ 58 /* XXX use the same for all queues if limitation in srq.h is resolved */ 59 uint32_t hermon_wqe_max_sgl = HERMON_NUM_SGL_PER_WQE; 60 uint32_t hermon_srq_max_sgl = HERMON_SRQ_MAX_SGL; 61 62 /* Maximum "responder resources" (in) and "initiator depth" (out) per QP */ 63 uint32_t hermon_log_num_rdb_per_qp = HERMON_LOG_NUM_RDB_PER_QP; 64 65 /* 66 * Number of multicast groups (MCGs), number of QP per MCG, and the number 67 * of entries (from the total number) in the multicast group "hash table" 68 */ 69 uint32_t hermon_log_num_mcg = HERMON_NUM_MCG_SHIFT; 70 uint32_t hermon_num_qp_per_mcg = HERMON_NUM_QP_PER_MCG; 71 uint32_t hermon_log_num_mcg_hash = HERMON_NUM_MCG_HASH_SHIFT; 72 73 /* Number of UD AVs */ 74 uint32_t hermon_log_num_ah = HERMON_NUM_AH_SHIFT; 75 76 /* Number of EQs and their default size */ 77 uint32_t hermon_log_num_eq = HERMON_NUM_EQ_SHIFT; 78 uint32_t hermon_log_eq_sz = HERMON_DEFAULT_EQ_SZ_SHIFT; 79 80 /* 81 * Number of supported MPTs, MTTs and also the maximum MPT size. 82 */ 83 uint32_t hermon_log_num_mtt = HERMON_NUM_MTT_SHIFT; 84 uint32_t hermon_log_num_dmpt = HERMON_NUM_DMPT_SHIFT; 85 uint32_t hermon_log_max_mrw_sz = HERMON_MAX_MEM_MPT_SHIFT; 86 87 /* 88 * Number of supported UAR (User Access Regions) for this HCA. 89 * We could in the future read in uar_sz from devlim, and thus 90 * derive the number of UAR. Since this is derived from PAGESIZE, 91 * however, this means that x86 systems would have twice as many 92 * UARs as SPARC systems. Therefore for consistency's sake, we will 93 * just use 1024 pages, which is the maximum on SPARC systems. 94 */ 95 uint32_t hermon_log_num_uar = HERMON_NUM_UAR_SHIFT; 96 97 /* 98 * Number of remaps allowed for FMR before a sync is required. This value 99 * determines how many times we can fmr_deregister() before the underlying fmr 100 * framework places the region to wait for an MTT_SYNC operation, cleaning up 101 * the old mappings. 102 */ 103 uint32_t hermon_fmr_num_remaps = HERMON_FMR_MAX_REMAPS; 104 105 /* 106 * Number of supported Hermon mailboxes ("In" and "Out") and their maximum 107 * sizes, respectively 108 */ 109 uint32_t hermon_log_num_inmbox = HERMON_NUM_MAILBOXES_SHIFT; 110 uint32_t hermon_log_num_outmbox = HERMON_NUM_MAILBOXES_SHIFT; 111 uint32_t hermon_log_inmbox_size = HERMON_MBOX_SIZE_SHIFT; 112 uint32_t hermon_log_outmbox_size = HERMON_MBOX_SIZE_SHIFT; 113 uint32_t hermon_log_num_intr_inmbox = HERMON_NUM_INTR_MAILBOXES_SHIFT; 114 uint32_t hermon_log_num_intr_outmbox = HERMON_NUM_INTR_MAILBOXES_SHIFT; 115 116 /* Number of supported Protection Domains (PD) */ 117 uint32_t hermon_log_num_pd = HERMON_NUM_PD_SHIFT; 118 119 /* 120 * Number of total supported PKeys per PKey table (i.e. 121 * per port). Also the number of SGID per GID table. 122 */ 123 uint32_t hermon_log_max_pkeytbl = HERMON_NUM_PKEYTBL_SHIFT; 124 uint32_t hermon_log_max_gidtbl = HERMON_NUM_GIDTBL_SHIFT; 125 126 /* Maximum supported MTU and portwidth */ 127 uint32_t hermon_max_mtu = HERMON_MAX_MTU; 128 uint32_t hermon_max_port_width = HERMON_MAX_PORT_WIDTH; 129 130 /* Number of supported Virtual Lanes (VL) */ 131 uint32_t hermon_max_vlcap = HERMON_MAX_VLCAP; 132 133 /* 134 * Whether or not to use the built-in (i.e. in firmware) agents for QP0 and 135 * QP1, respectively. 136 */ 137 uint32_t hermon_qp0_agents_in_fw = 0; 138 uint32_t hermon_qp1_agents_in_fw = 0; 139 140 /* 141 * Whether DMA mappings should bypass the PCI IOMMU or not. 142 * hermon_iommu_bypass is a global setting for all memory addresses. 143 */ 144 uint32_t hermon_iommu_bypass = 1; 145 146 /* 147 * Whether *DATA* buffers should be bound w/ Relaxed Ordering (RO) turned on 148 * via the SW workaround (HCAs don't support RO in HW). Defaulted on, 149 * though care must be taken w/ some Userland clients that *MAY* have 150 * peeked in the data to understand when data xfer was done - MPI does 151 * as an efficiency 152 */ 153 154 uint32_t hermon_kernel_data_ro = HERMON_RO_ENABLED; /* default */ 155 uint32_t hermon_user_data_ro = HERMON_RO_ENABLED; /* default */ 156 157 /* 158 * Whether Hermon should use MSI (Message Signaled Interrupts), if available. 159 * Note: 0 indicates 'legacy interrupt', 1 indicates MSI (if available) 160 */ 161 uint32_t hermon_use_msi_if_avail = 1; 162 163 /* 164 * This is a patchable variable that determines the time we will wait after 165 * initiating SW reset before we do our first read from Hermon config space. 166 * If this value is set too small (less than the default 100ms), it is 167 * possible for Hermon hardware to be unready to respond to the config cycle 168 * reads. This could cause master abort on the PCI bridge. Note: If 169 * "hermon_sw_reset_delay" is set to zero, then no software reset of the Hermon 170 * device will be attempted. 171 */ 172 uint32_t hermon_sw_reset_delay = HERMON_SW_RESET_DELAY; 173 174 /* 175 * These are patchable variables for hermon command polling. The poll_delay is 176 * the number of usec to wait in-between calls to poll the 'go' bit. The 177 * poll_max is the total number of usec to loop in waiting for the 'go' bit to 178 * clear. 179 */ 180 uint32_t hermon_cmd_poll_delay = HERMON_CMD_POLL_DELAY; 181 uint32_t hermon_cmd_poll_max = HERMON_CMD_POLL_MAX; 182 183 /* 184 * This is a patchable variable that determines the frequency with which 185 * the AckReq bit will be set in outgoing RC packets. The AckReq bit will be 186 * set in at least every 2^hermon_qp_ackreq_freq packets (but at least once 187 * per message, i.e. in the last packet). Tuning this value can increase 188 * IB fabric utilization by cutting down on the number of unnecessary ACKs. 189 */ 190 uint32_t hermon_qp_ackreq_freq = HERMON_QP_ACKREQ_FREQ; 191 192 static void hermon_cfg_wqe_sizes(hermon_state_t *state, 193 hermon_cfg_profile_t *cp); 194 #ifdef __sparc 195 static void hermon_check_iommu_bypass(hermon_state_t *state, 196 hermon_cfg_profile_t *cp); 197 #endif 198 199 /* 200 * hermon_cfg_profile_init_phase1() 201 * Context: Only called from attach() path context 202 */ 203 int 204 hermon_cfg_profile_init_phase1(hermon_state_t *state) 205 { 206 hermon_cfg_profile_t *cp; 207 208 /* 209 * Allocate space for the configuration profile structure 210 */ 211 cp = (hermon_cfg_profile_t *)kmem_zalloc(sizeof (hermon_cfg_profile_t), 212 KM_SLEEP); 213 214 /* 215 * Common to all profiles. 216 */ 217 cp->cp_qp0_agents_in_fw = hermon_qp0_agents_in_fw; 218 cp->cp_qp1_agents_in_fw = hermon_qp1_agents_in_fw; 219 cp->cp_sw_reset_delay = hermon_sw_reset_delay; 220 cp->cp_cmd_poll_delay = hermon_cmd_poll_delay; 221 cp->cp_cmd_poll_max = hermon_cmd_poll_max; 222 cp->cp_ackreq_freq = hermon_qp_ackreq_freq; 223 cp->cp_fmr_max_remaps = hermon_fmr_num_remaps; 224 225 /* 226 * Although most of the configuration is enabled in "phase2" of the 227 * cfg_profile_init, we have to setup the OUT mailboxes soon, since 228 * they are used immediately after this "phase1" completes, to run the 229 * firmware and get the device limits, which we'll need for 'phase2'. 230 * That's done in rsrc_init_phase1, called shortly after we do this 231 * and the sw reset - see hermon.c 232 */ 233 if (state->hs_cfg_profile_setting == HERMON_CFG_MEMFREE) { 234 cp->cp_log_num_outmbox = hermon_log_num_outmbox; 235 cp->cp_log_outmbox_size = hermon_log_outmbox_size; 236 cp->cp_log_num_inmbox = hermon_log_num_inmbox; 237 cp->cp_log_inmbox_size = hermon_log_inmbox_size; 238 cp->cp_log_num_intr_inmbox = hermon_log_num_intr_inmbox; 239 cp->cp_log_num_intr_outmbox = hermon_log_num_intr_outmbox; 240 241 } else { 242 return (DDI_FAILURE); 243 } 244 245 /* 246 * Set IOMMU bypass or not. Ensure consistency of flags with 247 * architecture type. 248 */ 249 #ifdef __sparc 250 if (hermon_iommu_bypass == 1) { 251 hermon_check_iommu_bypass(state, cp); 252 } else { 253 cp->cp_iommu_bypass = HERMON_BINDMEM_NORMAL; 254 } 255 #else 256 cp->cp_iommu_bypass = HERMON_BINDMEM_NORMAL; 257 #endif 258 259 /* Attach the configuration profile to Hermon softstate */ 260 state->hs_cfg_profile = cp; 261 262 return (DDI_SUCCESS); 263 } 264 265 /* 266 * hermon_cfg_profile_init_phase2() 267 * Context: Only called from attach() path context 268 */ 269 int 270 hermon_cfg_profile_init_phase2(hermon_state_t *state) 271 { 272 hermon_cfg_profile_t *cp; 273 hermon_hw_querydevlim_t *devlim; 274 hermon_hw_query_port_t *port; 275 uint32_t num, size; 276 int i; 277 278 /* Read in the device limits */ 279 devlim = &state->hs_devlim; 280 /* and the port information */ 281 port = &state->hs_queryport; 282 283 /* Read the configuration profile */ 284 cp = state->hs_cfg_profile; 285 286 /* 287 * We configure all Hermon HCAs with the same profile, which 288 * is based upon the default value assignments above. If we want to 289 * add additional profiles in the future, they can be added here. 290 * Note the reference to "Memfree" is a holdover from Arbel/Sinai 291 */ 292 if (state->hs_cfg_profile_setting != HERMON_CFG_MEMFREE) { 293 return (DDI_FAILURE); 294 } 295 296 /* 297 * Note for most configuration parameters, we use the lesser of our 298 * desired configuration value or the device-defined maximum value. 299 */ 300 cp->cp_log_num_mtt = min(hermon_log_num_mtt, devlim->log_max_mtt); 301 cp->cp_log_num_dmpt = min(hermon_log_num_dmpt, devlim->log_max_dmpt); 302 cp->cp_log_num_cmpt = HERMON_LOG_CMPT_PER_TYPE + 2; /* times 4, */ 303 /* per PRM */ 304 cp->cp_log_max_mrw_sz = min(hermon_log_max_mrw_sz, 305 devlim->log_max_mrw_sz); 306 cp->cp_log_num_pd = min(hermon_log_num_pd, devlim->log_max_pd); 307 cp->cp_log_num_qp = min(hermon_log_num_qp, devlim->log_max_qp); 308 cp->cp_log_num_cq = min(hermon_log_num_cq, devlim->log_max_cq); 309 cp->cp_log_num_srq = min(hermon_log_num_srq, devlim->log_max_srq); 310 cp->cp_log_num_eq = min(hermon_log_num_eq, devlim->log_max_eq); 311 cp->cp_log_eq_sz = min(hermon_log_eq_sz, devlim->log_max_eq_sz); 312 cp->cp_log_num_rdb = cp->cp_log_num_qp + 313 min(hermon_log_num_rdb_per_qp, devlim->log_max_ra_req_qp); 314 cp->cp_hca_max_rdma_in_qp = cp->cp_hca_max_rdma_out_qp = 315 1 << min(hermon_log_num_rdb_per_qp, devlim->log_max_ra_req_qp); 316 cp->cp_num_qp_per_mcg = max(hermon_num_qp_per_mcg, 317 HERMON_NUM_QP_PER_MCG_MIN); 318 cp->cp_num_qp_per_mcg = min(cp->cp_num_qp_per_mcg, 319 (1 << devlim->log_max_qp_mcg) - 8); 320 cp->cp_num_qp_per_mcg = (1 << highbit(cp->cp_num_qp_per_mcg + 7)) - 8; 321 cp->cp_log_num_mcg = min(hermon_log_num_mcg, devlim->log_max_mcg); 322 cp->cp_log_num_mcg_hash = hermon_log_num_mcg_hash; 323 324 /* until srq_resize is debugged, disable it */ 325 cp->cp_srq_resize_enabled = 0; 326 327 /* cp->cp_log_num_uar = hermon_log_num_uar; */ 328 /* 329 * now, we HAVE to calculate the number of UAR pages, so that we can 330 * get the blueflame stuff correct as well 331 */ 332 333 size = devlim->log_max_uar_sz; 334 /* 1MB (2^^20) times size (2^^size) / sparc_pg (2^^13) */ 335 num = (20 + size) - 13; /* XXX - consider using PAGESHIFT */ 336 if (devlim->blu_flm) 337 num -= 1; /* if blueflame, only half the size for UARs */ 338 cp->cp_log_num_uar = min(hermon_log_num_uar, num); 339 340 341 /* while we're at it, calculate the index of the kernel uar page */ 342 /* either the reserved uar's or 128, whichever is smaller */ 343 state->hs_kernel_uar_index = (devlim->num_rsvd_uar > 128) ? 344 devlim->num_rsvd_uar : 128; 345 346 cp->cp_log_max_pkeytbl = port->log_max_pkey; 347 348 cp->cp_log_max_qp_sz = devlim->log_max_qp_sz; 349 cp->cp_log_max_cq_sz = devlim->log_max_cq_sz; 350 cp->cp_log_max_srq_sz = devlim->log_max_srq_sz; 351 cp->cp_log_max_gidtbl = port->log_max_gid; 352 cp->cp_max_mtu = port->ib_mtu; /* XXX now from query_port */ 353 cp->cp_max_port_width = port->ib_port_wid; /* now from query_port */ 354 cp->cp_max_vlcap = port->max_vl; 355 cp->cp_log_num_ah = hermon_log_num_ah; 356 357 /* Paranoia, ensure no arrays indexed by port_num are out of bounds */ 358 cp->cp_num_ports = devlim->num_ports; 359 if (cp->cp_num_ports > HERMON_MAX_PORTS) { 360 cmn_err(CE_CONT, "device has more ports (%d) than are " 361 "supported; Using %d ports\n", 362 cp->cp_num_ports, HERMON_MAX_PORTS); 363 cp->cp_num_ports = HERMON_MAX_PORTS; 364 }; 365 366 /* allocate variable sized arrays */ 367 for (i = 0; i < HERMON_MAX_PORTS; i++) { 368 state->hs_pkey[i] = kmem_zalloc((1 << cp->cp_log_max_pkeytbl) * 369 sizeof (ib_pkey_t), KM_SLEEP); 370 state->hs_guid[i] = kmem_zalloc((1 << cp->cp_log_max_gidtbl) * 371 sizeof (ib_guid_t), KM_SLEEP); 372 } 373 374 /* Determine WQE sizes from requested max SGLs */ 375 hermon_cfg_wqe_sizes(state, cp); 376 377 /* Set whether to use MSIs or not */ 378 cp->cp_use_msi_if_avail = hermon_use_msi_if_avail; 379 380 #if !defined(_ELF64) 381 /* 382 * Need to reduce the hermon kernel virtual memory footprint 383 * on 32-bit kernels. 384 */ 385 cp->cp_log_num_mtt -= 6; 386 cp->cp_log_num_dmpt -= 6; 387 cp->cp_log_num_pd -= 6; 388 cp->cp_log_num_qp -= 6; 389 cp->cp_log_num_cq -= 6; 390 cp->cp_log_num_srq -= 6; 391 cp->cp_log_num_rdb = cp->cp_log_num_qp + 392 min(hermon_log_num_rdb_per_qp, devlim->log_max_ra_req_qp); 393 cp->cp_hca_max_rdma_in_qp = cp->cp_hca_max_rdma_out_qp = 394 1 << min(hermon_log_num_rdb_per_qp, devlim->log_max_ra_req_qp); 395 #endif 396 397 return (DDI_SUCCESS); 398 } 399 400 401 /* 402 * hermon_cfg_profile_fini() 403 * Context: Only called from attach() and/or detach() path contexts 404 */ 405 void 406 hermon_cfg_profile_fini(hermon_state_t *state) 407 { 408 /* 409 * Free up the space for configuration profile 410 */ 411 kmem_free(state->hs_cfg_profile, sizeof (hermon_cfg_profile_t)); 412 } 413 414 415 /* 416 * hermon_cfg_wqe_sizes() 417 * Context: Only called from attach() path context 418 */ 419 static void 420 hermon_cfg_wqe_sizes(hermon_state_t *state, hermon_cfg_profile_t *cp) 421 { 422 uint_t max_size, log2; 423 uint_t max_sgl, real_max_sgl; 424 425 /* 426 * Get the requested maximum number SGL per WQE from the Hermon 427 * patchable variable 428 */ 429 max_sgl = hermon_wqe_max_sgl; 430 431 /* 432 * Use requested maximum number of SGL to calculate the max descriptor 433 * size (while guaranteeing that the descriptor size is a power-of-2 434 * cachelines). We have to use the calculation for QP1 MLX transport 435 * because the possibility that we might need to inline a GRH, along 436 * with all the other headers and alignment restrictions, sets the 437 * maximum for the number of SGLs that we can advertise support for. 438 */ 439 max_size = (HERMON_QP_WQE_MLX_QP1_HDRS + (max_sgl << 4)); 440 log2 = highbit(max_size); 441 if (ISP2(max_size)) { 442 log2 = log2 - 1; 443 } 444 max_size = (1 << log2); 445 446 max_size = min(max_size, state->hs_devlim.max_desc_sz_sq); 447 448 /* 449 * Then use the calculated max descriptor size to determine the "real" 450 * maximum SGL (the number beyond which we would roll over to the next 451 * power-of-2). 452 */ 453 real_max_sgl = (max_size - HERMON_QP_WQE_MLX_QP1_HDRS) >> 4; 454 455 /* Then save away this configuration information */ 456 cp->cp_wqe_max_sgl = max_sgl; 457 cp->cp_wqe_real_max_sgl = real_max_sgl; 458 459 /* SRQ SGL gets set to it's own patchable variable value */ 460 cp->cp_srq_max_sgl = hermon_srq_max_sgl; 461 } 462 463 #ifdef __sparc 464 /* 465 * hermon_check_iommu_bypass() 466 * Context: Only called from attach() path context 467 * XXX This is a DMA allocation routine outside the normal 468 * path. FMA hardening will not like this. 469 */ 470 static void 471 hermon_check_iommu_bypass(hermon_state_t *state, hermon_cfg_profile_t *cp) 472 { 473 ddi_dma_handle_t dmahdl; 474 ddi_dma_attr_t dma_attr; 475 int status; 476 ddi_acc_handle_t acc_hdl; 477 caddr_t kaddr; 478 size_t actual_len; 479 ddi_dma_cookie_t cookie; 480 uint_t cookiecnt; 481 482 hermon_dma_attr_init(state, &dma_attr); 483 484 /* Try mapping for IOMMU bypass (Force Physical) */ 485 dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL | 486 DDI_DMA_RELAXED_ORDERING; 487 488 /* 489 * Call ddi_dma_alloc_handle(). If this returns DDI_DMA_BADATTR then 490 * it is not possible to use IOMMU bypass with our PCI bridge parent. 491 * Since the function we are in can only be called if iommu bypass was 492 * requested in the config profile, we configure for bypass if the 493 * ddi_dma_alloc_handle() was successful. Otherwise, we configure 494 * for non-bypass (ie: normal) mapping. 495 */ 496 status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr, 497 DDI_DMA_SLEEP, NULL, &dmahdl); 498 if (status == DDI_DMA_BADATTR) { 499 cp->cp_iommu_bypass = HERMON_BINDMEM_NORMAL; 500 return; 501 } else if (status != DDI_SUCCESS) { /* failed somehow */ 502 hermon_kernel_data_ro = HERMON_RO_DISABLED; 503 hermon_user_data_ro = HERMON_RO_DISABLED; 504 cp->cp_iommu_bypass = HERMON_BINDMEM_BYPASS; 505 return; 506 } else { 507 cp->cp_iommu_bypass = HERMON_BINDMEM_BYPASS; 508 } 509 510 status = ddi_dma_mem_alloc(dmahdl, 256, 511 &state->hs_reg_accattr, DDI_DMA_CONSISTENT, 512 DDI_DMA_SLEEP, NULL, (caddr_t *)&kaddr, &actual_len, &acc_hdl); 513 514 if (status != DDI_SUCCESS) { /* failed somehow */ 515 hermon_kernel_data_ro = HERMON_RO_DISABLED; 516 hermon_user_data_ro = HERMON_RO_DISABLED; 517 ddi_dma_free_handle(&dmahdl); 518 return; 519 } 520 521 status = ddi_dma_addr_bind_handle(dmahdl, NULL, kaddr, actual_len, 522 DDI_DMA_RDWR, DDI_DMA_SLEEP, NULL, &cookie, &cookiecnt); 523 524 if (status == DDI_DMA_MAPPED) { 525 (void) ddi_dma_unbind_handle(dmahdl); 526 } else { 527 hermon_kernel_data_ro = HERMON_RO_DISABLED; 528 hermon_user_data_ro = HERMON_RO_DISABLED; 529 } 530 531 ddi_dma_mem_free(&acc_hdl); 532 ddi_dma_free_handle(&dmahdl); 533 } 534 #endif 535