1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 2 /* 3 * Copyright (C) 2017 Intel Deutschland GmbH 4 * Copyright (C) 2018-2020, 2023-2024 Intel Corporation 5 */ 6 #include <net/tso.h> 7 #include <linux/tcp.h> 8 9 #include "iwl-debug.h" 10 #include "iwl-csr.h" 11 #include "iwl-io.h" 12 #include "internal.h" 13 #include "fw/api/tx.h" 14 #include "fw/api/commands.h" 15 #include "fw/api/datapath.h" 16 #include "iwl-scd.h" 17 18 static struct page *get_workaround_page(struct iwl_trans *trans, 19 struct sk_buff *skb) 20 { 21 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 22 struct iwl_tso_page_info *info; 23 struct page **page_ptr; 24 struct page *ret; 25 dma_addr_t phys; 26 27 page_ptr = (void *)((u8 *)skb->cb + trans_pcie->txqs.page_offs); 28 29 ret = alloc_page(GFP_ATOMIC); 30 if (!ret) 31 return NULL; 32 33 info = IWL_TSO_PAGE_INFO(page_address(ret)); 34 35 /* Create a DMA mapping for the page */ 36 phys = dma_map_page_attrs(trans->dev, ret, 0, PAGE_SIZE, 37 DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); 38 if (unlikely(dma_mapping_error(trans->dev, phys))) { 39 __free_page(ret); 40 return NULL; 41 } 42 43 /* Store physical address and set use count */ 44 info->dma_addr = phys; 45 refcount_set(&info->use_count, 1); 46 47 /* set the chaining pointer to the previous page if there */ 48 info->next = *page_ptr; 49 *page_ptr = ret; 50 51 return ret; 52 } 53 54 /* 55 * Add a TB and if needed apply the FH HW bug workaround; 56 * meta != NULL indicates that it's a page mapping and we 57 * need to dma_unmap_page() and set the meta->tbs bit in 58 * this case. 59 */ 60 static int iwl_txq_gen2_set_tb_with_wa(struct iwl_trans *trans, 61 struct sk_buff *skb, 62 struct iwl_tfh_tfd *tfd, 63 dma_addr_t phys, void *virt, 64 u16 len, struct iwl_cmd_meta *meta, 65 bool unmap) 66 { 67 dma_addr_t oldphys = phys; 68 struct page *page; 69 int ret; 70 71 if (unlikely(dma_mapping_error(trans->dev, phys))) 72 return -ENOMEM; 73 74 if (likely(!iwl_txq_crosses_4g_boundary(phys, len))) { 75 ret = iwl_txq_gen2_set_tb(trans, tfd, phys, len); 76 77 if (ret < 0) 78 goto unmap; 79 80 if (meta) 81 meta->tbs |= BIT(ret); 82 83 ret = 0; 84 goto trace; 85 } 86 87 /* 88 * Work around a hardware bug. If (as expressed in the 89 * condition above) the TB ends on a 32-bit boundary, 90 * then the next TB may be accessed with the wrong 91 * address. 92 * To work around it, copy the data elsewhere and make 93 * a new mapping for it so the device will not fail. 94 */ 95 96 if (WARN_ON(len > IWL_TSO_PAGE_DATA_SIZE)) { 97 ret = -ENOBUFS; 98 goto unmap; 99 } 100 101 page = get_workaround_page(trans, skb); 102 if (!page) { 103 ret = -ENOMEM; 104 goto unmap; 105 } 106 107 memcpy(page_address(page), virt, len); 108 109 /* 110 * This is a bit odd, but performance does not matter here, what 111 * matters are the expectations of the calling code and TB cleanup 112 * function. 113 * 114 * As such, if unmap is set, then create another mapping for the TB 115 * entry as it will be unmapped later. On the other hand, if it is not 116 * set, then the TB entry will not be unmapped and instead we simply 117 * reference and sync the mapping that get_workaround_page() created. 118 */ 119 if (unmap) { 120 phys = dma_map_single(trans->dev, page_address(page), len, 121 DMA_TO_DEVICE); 122 if (unlikely(dma_mapping_error(trans->dev, phys))) 123 return -ENOMEM; 124 } else { 125 phys = iwl_pcie_get_tso_page_phys(page_address(page)); 126 dma_sync_single_for_device(trans->dev, phys, len, 127 DMA_TO_DEVICE); 128 } 129 130 ret = iwl_txq_gen2_set_tb(trans, tfd, phys, len); 131 if (ret < 0) { 132 /* unmap the new allocation as single */ 133 oldphys = phys; 134 meta = NULL; 135 goto unmap; 136 } 137 138 IWL_DEBUG_TX(trans, 139 "TB bug workaround: copied %d bytes from 0x%llx to 0x%llx\n", 140 len, (unsigned long long)oldphys, 141 (unsigned long long)phys); 142 143 ret = 0; 144 unmap: 145 if (!unmap) 146 goto trace; 147 148 if (meta) 149 dma_unmap_page(trans->dev, oldphys, len, DMA_TO_DEVICE); 150 else 151 dma_unmap_single(trans->dev, oldphys, len, DMA_TO_DEVICE); 152 trace: 153 trace_iwlwifi_dev_tx_tb(trans->dev, skb, virt, phys, len); 154 155 return ret; 156 } 157 158 static int iwl_txq_gen2_build_amsdu(struct iwl_trans *trans, 159 struct sk_buff *skb, 160 struct iwl_tfh_tfd *tfd, 161 struct iwl_cmd_meta *out_meta, 162 int start_len, 163 u8 hdr_len, 164 struct iwl_device_tx_cmd *dev_cmd) 165 { 166 #ifdef CONFIG_INET 167 struct iwl_tx_cmd_gen2 *tx_cmd = (void *)dev_cmd->payload; 168 struct ieee80211_hdr *hdr = (void *)skb->data; 169 unsigned int snap_ip_tcp_hdrlen, ip_hdrlen, total_len, hdr_room; 170 unsigned int mss = skb_shinfo(skb)->gso_size; 171 unsigned int data_offset = 0; 172 dma_addr_t start_hdr_phys; 173 u16 length, amsdu_pad; 174 u8 *start_hdr; 175 struct sg_table *sgt; 176 struct tso_t tso; 177 178 trace_iwlwifi_dev_tx(trans->dev, skb, tfd, sizeof(*tfd), 179 &dev_cmd->hdr, start_len, 0); 180 181 ip_hdrlen = skb_network_header_len(skb); 182 snap_ip_tcp_hdrlen = 8 + ip_hdrlen + tcp_hdrlen(skb); 183 total_len = skb->len - snap_ip_tcp_hdrlen - hdr_len; 184 amsdu_pad = 0; 185 186 /* total amount of header we may need for this A-MSDU */ 187 hdr_room = DIV_ROUND_UP(total_len, mss) * 188 (3 + snap_ip_tcp_hdrlen + sizeof(struct ethhdr)); 189 190 /* Our device supports 9 segments at most, it will fit in 1 page */ 191 sgt = iwl_pcie_prep_tso(trans, skb, out_meta, &start_hdr, hdr_room); 192 if (!sgt) 193 return -ENOMEM; 194 195 start_hdr_phys = iwl_pcie_get_tso_page_phys(start_hdr); 196 197 /* 198 * Pull the ieee80211 header to be able to use TSO core, 199 * we will restore it for the tx_status flow. 200 */ 201 skb_pull(skb, hdr_len); 202 203 /* 204 * Remove the length of all the headers that we don't actually 205 * have in the MPDU by themselves, but that we duplicate into 206 * all the different MSDUs inside the A-MSDU. 207 */ 208 le16_add_cpu(&tx_cmd->len, -snap_ip_tcp_hdrlen); 209 210 tso_start(skb, &tso); 211 212 while (total_len) { 213 /* this is the data left for this subframe */ 214 unsigned int data_left = min_t(unsigned int, mss, total_len); 215 unsigned int tb_len; 216 dma_addr_t tb_phys; 217 u8 *pos_hdr = start_hdr; 218 219 total_len -= data_left; 220 221 memset(pos_hdr, 0, amsdu_pad); 222 pos_hdr += amsdu_pad; 223 amsdu_pad = (4 - (sizeof(struct ethhdr) + snap_ip_tcp_hdrlen + 224 data_left)) & 0x3; 225 ether_addr_copy(pos_hdr, ieee80211_get_DA(hdr)); 226 pos_hdr += ETH_ALEN; 227 ether_addr_copy(pos_hdr, ieee80211_get_SA(hdr)); 228 pos_hdr += ETH_ALEN; 229 230 length = snap_ip_tcp_hdrlen + data_left; 231 *((__be16 *)pos_hdr) = cpu_to_be16(length); 232 pos_hdr += sizeof(length); 233 234 /* 235 * This will copy the SNAP as well which will be considered 236 * as MAC header. 237 */ 238 tso_build_hdr(skb, pos_hdr, &tso, data_left, !total_len); 239 240 pos_hdr += snap_ip_tcp_hdrlen; 241 242 tb_len = pos_hdr - start_hdr; 243 tb_phys = iwl_pcie_get_tso_page_phys(start_hdr); 244 245 /* 246 * No need for _with_wa, this is from the TSO page and 247 * we leave some space at the end of it so can't hit 248 * the buggy scenario. 249 */ 250 iwl_txq_gen2_set_tb(trans, tfd, tb_phys, tb_len); 251 trace_iwlwifi_dev_tx_tb(trans->dev, skb, start_hdr, 252 tb_phys, tb_len); 253 /* add this subframe's headers' length to the tx_cmd */ 254 le16_add_cpu(&tx_cmd->len, tb_len); 255 256 /* prepare the start_hdr for the next subframe */ 257 start_hdr = pos_hdr; 258 259 /* put the payload */ 260 while (data_left) { 261 int ret; 262 263 tb_len = min_t(unsigned int, tso.size, data_left); 264 tb_phys = iwl_pcie_get_sgt_tb_phys(sgt, data_offset, 265 tb_len); 266 /* Not a real mapping error, use direct comparison */ 267 if (unlikely(tb_phys == DMA_MAPPING_ERROR)) 268 goto out_err; 269 270 ret = iwl_txq_gen2_set_tb_with_wa(trans, skb, tfd, 271 tb_phys, tso.data, 272 tb_len, NULL, false); 273 if (ret) 274 goto out_err; 275 276 data_left -= tb_len; 277 data_offset += tb_len; 278 tso_build_data(skb, &tso, tb_len); 279 } 280 } 281 282 dma_sync_single_for_device(trans->dev, start_hdr_phys, hdr_room, 283 DMA_TO_DEVICE); 284 285 /* re -add the WiFi header */ 286 skb_push(skb, hdr_len); 287 288 return 0; 289 290 out_err: 291 #endif 292 return -EINVAL; 293 } 294 295 static struct 296 iwl_tfh_tfd *iwl_txq_gen2_build_tx_amsdu(struct iwl_trans *trans, 297 struct iwl_txq *txq, 298 struct iwl_device_tx_cmd *dev_cmd, 299 struct sk_buff *skb, 300 struct iwl_cmd_meta *out_meta, 301 int hdr_len, 302 int tx_cmd_len) 303 { 304 int idx = iwl_txq_get_cmd_index(txq, txq->write_ptr); 305 struct iwl_tfh_tfd *tfd = iwl_txq_get_tfd(trans, txq, idx); 306 dma_addr_t tb_phys; 307 int len; 308 void *tb1_addr; 309 310 tb_phys = iwl_txq_get_first_tb_dma(txq, idx); 311 312 /* 313 * No need for _with_wa, the first TB allocation is aligned up 314 * to a 64-byte boundary and thus can't be at the end or cross 315 * a page boundary (much less a 2^32 boundary). 316 */ 317 iwl_txq_gen2_set_tb(trans, tfd, tb_phys, IWL_FIRST_TB_SIZE); 318 319 /* 320 * The second TB (tb1) points to the remainder of the TX command 321 * and the 802.11 header - dword aligned size 322 * (This calculation modifies the TX command, so do it before the 323 * setup of the first TB) 324 */ 325 len = tx_cmd_len + sizeof(struct iwl_cmd_header) + hdr_len - 326 IWL_FIRST_TB_SIZE; 327 328 /* do not align A-MSDU to dword as the subframe header aligns it */ 329 330 /* map the data for TB1 */ 331 tb1_addr = ((u8 *)&dev_cmd->hdr) + IWL_FIRST_TB_SIZE; 332 tb_phys = dma_map_single(trans->dev, tb1_addr, len, DMA_TO_DEVICE); 333 if (unlikely(dma_mapping_error(trans->dev, tb_phys))) 334 goto out_err; 335 /* 336 * No need for _with_wa(), we ensure (via alignment) that the data 337 * here can never cross or end at a page boundary. 338 */ 339 iwl_txq_gen2_set_tb(trans, tfd, tb_phys, len); 340 341 if (iwl_txq_gen2_build_amsdu(trans, skb, tfd, out_meta, 342 len + IWL_FIRST_TB_SIZE, hdr_len, dev_cmd)) 343 goto out_err; 344 345 /* building the A-MSDU might have changed this data, memcpy it now */ 346 memcpy(&txq->first_tb_bufs[idx], dev_cmd, IWL_FIRST_TB_SIZE); 347 return tfd; 348 349 out_err: 350 iwl_txq_gen2_tfd_unmap(trans, out_meta, tfd); 351 return NULL; 352 } 353 354 static int iwl_txq_gen2_tx_add_frags(struct iwl_trans *trans, 355 struct sk_buff *skb, 356 struct iwl_tfh_tfd *tfd, 357 struct iwl_cmd_meta *out_meta) 358 { 359 int i; 360 361 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 362 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 363 dma_addr_t tb_phys; 364 unsigned int fragsz = skb_frag_size(frag); 365 int ret; 366 367 if (!fragsz) 368 continue; 369 370 tb_phys = skb_frag_dma_map(trans->dev, frag, 0, 371 fragsz, DMA_TO_DEVICE); 372 ret = iwl_txq_gen2_set_tb_with_wa(trans, skb, tfd, tb_phys, 373 skb_frag_address(frag), 374 fragsz, out_meta, true); 375 if (ret) 376 return ret; 377 } 378 379 return 0; 380 } 381 382 static struct 383 iwl_tfh_tfd *iwl_txq_gen2_build_tx(struct iwl_trans *trans, 384 struct iwl_txq *txq, 385 struct iwl_device_tx_cmd *dev_cmd, 386 struct sk_buff *skb, 387 struct iwl_cmd_meta *out_meta, 388 int hdr_len, 389 int tx_cmd_len, 390 bool pad) 391 { 392 int idx = iwl_txq_get_cmd_index(txq, txq->write_ptr); 393 struct iwl_tfh_tfd *tfd = iwl_txq_get_tfd(trans, txq, idx); 394 dma_addr_t tb_phys; 395 int len, tb1_len, tb2_len; 396 void *tb1_addr; 397 struct sk_buff *frag; 398 399 tb_phys = iwl_txq_get_first_tb_dma(txq, idx); 400 401 /* The first TB points to bi-directional DMA data */ 402 memcpy(&txq->first_tb_bufs[idx], dev_cmd, IWL_FIRST_TB_SIZE); 403 404 /* 405 * No need for _with_wa, the first TB allocation is aligned up 406 * to a 64-byte boundary and thus can't be at the end or cross 407 * a page boundary (much less a 2^32 boundary). 408 */ 409 iwl_txq_gen2_set_tb(trans, tfd, tb_phys, IWL_FIRST_TB_SIZE); 410 411 /* 412 * The second TB (tb1) points to the remainder of the TX command 413 * and the 802.11 header - dword aligned size 414 * (This calculation modifies the TX command, so do it before the 415 * setup of the first TB) 416 */ 417 len = tx_cmd_len + sizeof(struct iwl_cmd_header) + hdr_len - 418 IWL_FIRST_TB_SIZE; 419 420 if (pad) 421 tb1_len = ALIGN(len, 4); 422 else 423 tb1_len = len; 424 425 /* map the data for TB1 */ 426 tb1_addr = ((u8 *)&dev_cmd->hdr) + IWL_FIRST_TB_SIZE; 427 tb_phys = dma_map_single(trans->dev, tb1_addr, tb1_len, DMA_TO_DEVICE); 428 if (unlikely(dma_mapping_error(trans->dev, tb_phys))) 429 goto out_err; 430 /* 431 * No need for _with_wa(), we ensure (via alignment) that the data 432 * here can never cross or end at a page boundary. 433 */ 434 iwl_txq_gen2_set_tb(trans, tfd, tb_phys, tb1_len); 435 trace_iwlwifi_dev_tx(trans->dev, skb, tfd, sizeof(*tfd), &dev_cmd->hdr, 436 IWL_FIRST_TB_SIZE + tb1_len, hdr_len); 437 438 /* set up TFD's third entry to point to remainder of skb's head */ 439 tb2_len = skb_headlen(skb) - hdr_len; 440 441 if (tb2_len > 0) { 442 int ret; 443 444 tb_phys = dma_map_single(trans->dev, skb->data + hdr_len, 445 tb2_len, DMA_TO_DEVICE); 446 ret = iwl_txq_gen2_set_tb_with_wa(trans, skb, tfd, tb_phys, 447 skb->data + hdr_len, tb2_len, 448 NULL, true); 449 if (ret) 450 goto out_err; 451 } 452 453 if (iwl_txq_gen2_tx_add_frags(trans, skb, tfd, out_meta)) 454 goto out_err; 455 456 skb_walk_frags(skb, frag) { 457 int ret; 458 459 tb_phys = dma_map_single(trans->dev, frag->data, 460 skb_headlen(frag), DMA_TO_DEVICE); 461 ret = iwl_txq_gen2_set_tb_with_wa(trans, skb, tfd, tb_phys, 462 frag->data, 463 skb_headlen(frag), NULL, 464 true); 465 if (ret) 466 goto out_err; 467 if (iwl_txq_gen2_tx_add_frags(trans, frag, tfd, out_meta)) 468 goto out_err; 469 } 470 471 return tfd; 472 473 out_err: 474 iwl_txq_gen2_tfd_unmap(trans, out_meta, tfd); 475 return NULL; 476 } 477 478 static 479 struct iwl_tfh_tfd *iwl_txq_gen2_build_tfd(struct iwl_trans *trans, 480 struct iwl_txq *txq, 481 struct iwl_device_tx_cmd *dev_cmd, 482 struct sk_buff *skb, 483 struct iwl_cmd_meta *out_meta) 484 { 485 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; 486 int idx = iwl_txq_get_cmd_index(txq, txq->write_ptr); 487 struct iwl_tfh_tfd *tfd = iwl_txq_get_tfd(trans, txq, idx); 488 int len, hdr_len; 489 bool amsdu; 490 491 /* There must be data left over for TB1 or this code must be changed */ 492 BUILD_BUG_ON(sizeof(struct iwl_tx_cmd_gen2) < IWL_FIRST_TB_SIZE); 493 BUILD_BUG_ON(sizeof(struct iwl_cmd_header) + 494 offsetofend(struct iwl_tx_cmd_gen2, dram_info) > 495 IWL_FIRST_TB_SIZE); 496 BUILD_BUG_ON(sizeof(struct iwl_tx_cmd_gen3) < IWL_FIRST_TB_SIZE); 497 BUILD_BUG_ON(sizeof(struct iwl_cmd_header) + 498 offsetofend(struct iwl_tx_cmd_gen3, dram_info) > 499 IWL_FIRST_TB_SIZE); 500 501 memset(tfd, 0, sizeof(*tfd)); 502 503 if (trans->trans_cfg->device_family < IWL_DEVICE_FAMILY_AX210) 504 len = sizeof(struct iwl_tx_cmd_gen2); 505 else 506 len = sizeof(struct iwl_tx_cmd_gen3); 507 508 amsdu = ieee80211_is_data_qos(hdr->frame_control) && 509 (*ieee80211_get_qos_ctl(hdr) & 510 IEEE80211_QOS_CTL_A_MSDU_PRESENT); 511 512 hdr_len = ieee80211_hdrlen(hdr->frame_control); 513 514 /* 515 * Only build A-MSDUs here if doing so by GSO, otherwise it may be 516 * an A-MSDU for other reasons, e.g. NAN or an A-MSDU having been 517 * built in the higher layers already. 518 */ 519 if (amsdu && skb_shinfo(skb)->gso_size) 520 return iwl_txq_gen2_build_tx_amsdu(trans, txq, dev_cmd, skb, 521 out_meta, hdr_len, len); 522 return iwl_txq_gen2_build_tx(trans, txq, dev_cmd, skb, out_meta, 523 hdr_len, len, !amsdu); 524 } 525 526 int iwl_txq_space(struct iwl_trans *trans, const struct iwl_txq *q) 527 { 528 unsigned int max; 529 unsigned int used; 530 531 /* 532 * To avoid ambiguity between empty and completely full queues, there 533 * should always be less than max_tfd_queue_size elements in the queue. 534 * If q->n_window is smaller than max_tfd_queue_size, there is no need 535 * to reserve any queue entries for this purpose. 536 */ 537 if (q->n_window < trans->trans_cfg->base_params->max_tfd_queue_size) 538 max = q->n_window; 539 else 540 max = trans->trans_cfg->base_params->max_tfd_queue_size - 1; 541 542 /* 543 * max_tfd_queue_size is a power of 2, so the following is equivalent to 544 * modulo by max_tfd_queue_size and is well defined. 545 */ 546 used = (q->write_ptr - q->read_ptr) & 547 (trans->trans_cfg->base_params->max_tfd_queue_size - 1); 548 549 if (WARN_ON(used > max)) 550 return 0; 551 552 return max - used; 553 } 554 555 /* 556 * iwl_pcie_gen2_update_byte_tbl - Set up entry in Tx byte-count array 557 */ 558 static void iwl_pcie_gen2_update_byte_tbl(struct iwl_trans *trans, 559 struct iwl_txq *txq, u16 byte_cnt, 560 int num_tbs) 561 { 562 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 563 int idx = iwl_txq_get_cmd_index(txq, txq->write_ptr); 564 u8 filled_tfd_size, num_fetch_chunks; 565 u16 len = byte_cnt; 566 __le16 bc_ent; 567 568 if (WARN(idx >= txq->n_window, "%d >= %d\n", idx, txq->n_window)) 569 return; 570 571 filled_tfd_size = offsetof(struct iwl_tfh_tfd, tbs) + 572 num_tbs * sizeof(struct iwl_tfh_tb); 573 /* 574 * filled_tfd_size contains the number of filled bytes in the TFD. 575 * Dividing it by 64 will give the number of chunks to fetch 576 * to SRAM- 0 for one chunk, 1 for 2 and so on. 577 * If, for example, TFD contains only 3 TBs then 32 bytes 578 * of the TFD are used, and only one chunk of 64 bytes should 579 * be fetched 580 */ 581 num_fetch_chunks = DIV_ROUND_UP(filled_tfd_size, 64) - 1; 582 583 if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_AX210) { 584 struct iwl_gen3_bc_tbl_entry *scd_bc_tbl_gen3 = txq->bc_tbl.addr; 585 586 /* Starting from AX210, the HW expects bytes */ 587 WARN_ON(trans_pcie->txqs.bc_table_dword); 588 WARN_ON(len > 0x3FFF); 589 bc_ent = cpu_to_le16(len | (num_fetch_chunks << 14)); 590 scd_bc_tbl_gen3[idx].tfd_offset = bc_ent; 591 } else { 592 struct iwlagn_scd_bc_tbl *scd_bc_tbl = txq->bc_tbl.addr; 593 594 /* Before AX210, the HW expects DW */ 595 WARN_ON(!trans_pcie->txqs.bc_table_dword); 596 len = DIV_ROUND_UP(len, 4); 597 WARN_ON(len > 0xFFF); 598 bc_ent = cpu_to_le16(len | (num_fetch_chunks << 12)); 599 scd_bc_tbl->tfd_offset[idx] = bc_ent; 600 } 601 } 602 603 static u8 iwl_txq_gen2_get_num_tbs(struct iwl_tfh_tfd *tfd) 604 { 605 return le16_to_cpu(tfd->num_tbs) & 0x1f; 606 } 607 608 int iwl_txq_gen2_set_tb(struct iwl_trans *trans, struct iwl_tfh_tfd *tfd, 609 dma_addr_t addr, u16 len) 610 { 611 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 612 int idx = iwl_txq_gen2_get_num_tbs(tfd); 613 struct iwl_tfh_tb *tb; 614 615 /* Only WARN here so we know about the issue, but we mess up our 616 * unmap path because not every place currently checks for errors 617 * returned from this function - it can only return an error if 618 * there's no more space, and so when we know there is enough we 619 * don't always check ... 620 */ 621 WARN(iwl_txq_crosses_4g_boundary(addr, len), 622 "possible DMA problem with iova:0x%llx, len:%d\n", 623 (unsigned long long)addr, len); 624 625 if (WARN_ON(idx >= IWL_TFH_NUM_TBS)) 626 return -EINVAL; 627 tb = &tfd->tbs[idx]; 628 629 /* Each TFD can point to a maximum max_tbs Tx buffers */ 630 if (le16_to_cpu(tfd->num_tbs) >= trans_pcie->txqs.tfd.max_tbs) { 631 IWL_ERR(trans, "Error can not send more than %d chunks\n", 632 trans_pcie->txqs.tfd.max_tbs); 633 return -EINVAL; 634 } 635 636 put_unaligned_le64(addr, &tb->addr); 637 tb->tb_len = cpu_to_le16(len); 638 639 tfd->num_tbs = cpu_to_le16(idx + 1); 640 641 return idx; 642 } 643 644 void iwl_txq_gen2_tfd_unmap(struct iwl_trans *trans, 645 struct iwl_cmd_meta *meta, 646 struct iwl_tfh_tfd *tfd) 647 { 648 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 649 int i, num_tbs; 650 651 /* Sanity check on number of chunks */ 652 num_tbs = iwl_txq_gen2_get_num_tbs(tfd); 653 654 if (num_tbs > trans_pcie->txqs.tfd.max_tbs) { 655 IWL_ERR(trans, "Too many chunks: %i\n", num_tbs); 656 return; 657 } 658 659 /* TB1 is mapped directly, the rest is the TSO page and SG list. */ 660 if (meta->sg_offset) 661 num_tbs = 2; 662 663 /* first TB is never freed - it's the bidirectional DMA data */ 664 for (i = 1; i < num_tbs; i++) { 665 if (meta->tbs & BIT(i)) 666 dma_unmap_page(trans->dev, 667 le64_to_cpu(tfd->tbs[i].addr), 668 le16_to_cpu(tfd->tbs[i].tb_len), 669 DMA_TO_DEVICE); 670 else 671 dma_unmap_single(trans->dev, 672 le64_to_cpu(tfd->tbs[i].addr), 673 le16_to_cpu(tfd->tbs[i].tb_len), 674 DMA_TO_DEVICE); 675 } 676 677 iwl_txq_set_tfd_invalid_gen2(trans, tfd); 678 } 679 680 static void iwl_txq_gen2_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq) 681 { 682 /* rd_ptr is bounded by TFD_QUEUE_SIZE_MAX and 683 * idx is bounded by n_window 684 */ 685 int idx = iwl_txq_get_cmd_index(txq, txq->read_ptr); 686 struct sk_buff *skb; 687 688 lockdep_assert_held(&txq->lock); 689 690 if (!txq->entries) 691 return; 692 693 iwl_txq_gen2_tfd_unmap(trans, &txq->entries[idx].meta, 694 iwl_txq_get_tfd(trans, txq, idx)); 695 696 skb = txq->entries[idx].skb; 697 698 /* Can be called from irqs-disabled context 699 * If skb is not NULL, it means that the whole queue is being 700 * freed and that the queue is not empty - free the skb 701 */ 702 if (skb) { 703 iwl_op_mode_free_skb(trans->op_mode, skb); 704 txq->entries[idx].skb = NULL; 705 } 706 } 707 708 /* 709 * iwl_txq_inc_wr_ptr - Send new write index to hardware 710 */ 711 static void iwl_txq_inc_wr_ptr(struct iwl_trans *trans, struct iwl_txq *txq) 712 { 713 lockdep_assert_held(&txq->lock); 714 715 IWL_DEBUG_TX(trans, "Q:%d WR: 0x%x\n", txq->id, txq->write_ptr); 716 717 /* 718 * if not in power-save mode, uCode will never sleep when we're 719 * trying to tx (during RFKILL, we're not trying to tx). 720 */ 721 iwl_write32(trans, HBUS_TARG_WRPTR, txq->write_ptr | (txq->id << 16)); 722 } 723 724 int iwl_txq_gen2_tx(struct iwl_trans *trans, struct sk_buff *skb, 725 struct iwl_device_tx_cmd *dev_cmd, int txq_id) 726 { 727 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 728 struct iwl_cmd_meta *out_meta; 729 struct iwl_txq *txq = trans_pcie->txqs.txq[txq_id]; 730 u16 cmd_len; 731 int idx; 732 void *tfd; 733 734 if (WARN_ONCE(txq_id >= IWL_MAX_TVQM_QUEUES, 735 "queue %d out of range", txq_id)) 736 return -EINVAL; 737 738 if (WARN_ONCE(!test_bit(txq_id, trans_pcie->txqs.queue_used), 739 "TX on unused queue %d\n", txq_id)) 740 return -EINVAL; 741 742 if (skb_is_nonlinear(skb) && 743 skb_shinfo(skb)->nr_frags > IWL_TRANS_PCIE_MAX_FRAGS(trans_pcie) && 744 __skb_linearize(skb)) 745 return -ENOMEM; 746 747 spin_lock(&txq->lock); 748 749 if (iwl_txq_space(trans, txq) < txq->high_mark) { 750 iwl_txq_stop(trans, txq); 751 752 /* don't put the packet on the ring, if there is no room */ 753 if (unlikely(iwl_txq_space(trans, txq) < 3)) { 754 struct iwl_device_tx_cmd **dev_cmd_ptr; 755 756 dev_cmd_ptr = (void *)((u8 *)skb->cb + 757 trans_pcie->txqs.dev_cmd_offs); 758 759 *dev_cmd_ptr = dev_cmd; 760 __skb_queue_tail(&txq->overflow_q, skb); 761 spin_unlock(&txq->lock); 762 return 0; 763 } 764 } 765 766 idx = iwl_txq_get_cmd_index(txq, txq->write_ptr); 767 768 /* Set up driver data for this TFD */ 769 txq->entries[idx].skb = skb; 770 txq->entries[idx].cmd = dev_cmd; 771 772 dev_cmd->hdr.sequence = 773 cpu_to_le16((u16)(QUEUE_TO_SEQ(txq_id) | 774 INDEX_TO_SEQ(idx))); 775 776 /* Set up first empty entry in queue's array of Tx/cmd buffers */ 777 out_meta = &txq->entries[idx].meta; 778 memset(out_meta, 0, sizeof(*out_meta)); 779 780 tfd = iwl_txq_gen2_build_tfd(trans, txq, dev_cmd, skb, out_meta); 781 if (!tfd) { 782 spin_unlock(&txq->lock); 783 return -1; 784 } 785 786 if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_AX210) { 787 struct iwl_tx_cmd_gen3 *tx_cmd_gen3 = 788 (void *)dev_cmd->payload; 789 790 cmd_len = le16_to_cpu(tx_cmd_gen3->len); 791 } else { 792 struct iwl_tx_cmd_gen2 *tx_cmd_gen2 = 793 (void *)dev_cmd->payload; 794 795 cmd_len = le16_to_cpu(tx_cmd_gen2->len); 796 } 797 798 /* Set up entry for this TFD in Tx byte-count array */ 799 iwl_pcie_gen2_update_byte_tbl(trans, txq, cmd_len, 800 iwl_txq_gen2_get_num_tbs(tfd)); 801 802 /* start timer if queue currently empty */ 803 if (txq->read_ptr == txq->write_ptr && txq->wd_timeout) 804 mod_timer(&txq->stuck_timer, jiffies + txq->wd_timeout); 805 806 /* Tell device the write index *just past* this latest filled TFD */ 807 txq->write_ptr = iwl_txq_inc_wrap(trans, txq->write_ptr); 808 iwl_txq_inc_wr_ptr(trans, txq); 809 /* 810 * At this point the frame is "transmitted" successfully 811 * and we will get a TX status notification eventually. 812 */ 813 spin_unlock(&txq->lock); 814 return 0; 815 } 816 817 /*************** HOST COMMAND QUEUE FUNCTIONS *****/ 818 819 /* 820 * iwl_txq_gen2_unmap - Unmap any remaining DMA mappings and free skb's 821 */ 822 static void iwl_txq_gen2_unmap(struct iwl_trans *trans, int txq_id) 823 { 824 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 825 struct iwl_txq *txq = trans_pcie->txqs.txq[txq_id]; 826 827 spin_lock_bh(&txq->reclaim_lock); 828 spin_lock(&txq->lock); 829 while (txq->write_ptr != txq->read_ptr) { 830 IWL_DEBUG_TX_REPLY(trans, "Q %d Free %d\n", 831 txq_id, txq->read_ptr); 832 833 if (txq_id != trans_pcie->txqs.cmd.q_id) { 834 int idx = iwl_txq_get_cmd_index(txq, txq->read_ptr); 835 struct iwl_cmd_meta *cmd_meta = &txq->entries[idx].meta; 836 struct sk_buff *skb = txq->entries[idx].skb; 837 838 if (!WARN_ON_ONCE(!skb)) 839 iwl_pcie_free_tso_pages(trans, skb, cmd_meta); 840 } 841 iwl_txq_gen2_free_tfd(trans, txq); 842 txq->read_ptr = iwl_txq_inc_wrap(trans, txq->read_ptr); 843 } 844 845 while (!skb_queue_empty(&txq->overflow_q)) { 846 struct sk_buff *skb = __skb_dequeue(&txq->overflow_q); 847 848 iwl_op_mode_free_skb(trans->op_mode, skb); 849 } 850 851 spin_unlock(&txq->lock); 852 spin_unlock_bh(&txq->reclaim_lock); 853 854 /* just in case - this queue may have been stopped */ 855 iwl_trans_pcie_wake_queue(trans, txq); 856 } 857 858 static void iwl_txq_gen2_free_memory(struct iwl_trans *trans, 859 struct iwl_txq *txq) 860 { 861 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 862 struct device *dev = trans->dev; 863 864 /* De-alloc circular buffer of TFDs */ 865 if (txq->tfds) { 866 dma_free_coherent(dev, 867 trans_pcie->txqs.tfd.size * txq->n_window, 868 txq->tfds, txq->dma_addr); 869 dma_free_coherent(dev, 870 sizeof(*txq->first_tb_bufs) * txq->n_window, 871 txq->first_tb_bufs, txq->first_tb_dma); 872 } 873 874 kfree(txq->entries); 875 if (txq->bc_tbl.addr) 876 dma_pool_free(trans_pcie->txqs.bc_pool, 877 txq->bc_tbl.addr, txq->bc_tbl.dma); 878 kfree(txq); 879 } 880 881 /* 882 * iwl_pcie_txq_free - Deallocate DMA queue. 883 * @txq: Transmit queue to deallocate. 884 * 885 * Empty queue by removing and destroying all BD's. 886 * Free all buffers. 887 * 0-fill, but do not free "txq" descriptor structure. 888 */ 889 static void iwl_txq_gen2_free(struct iwl_trans *trans, int txq_id) 890 { 891 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 892 struct iwl_txq *txq; 893 int i; 894 895 if (WARN_ONCE(txq_id >= IWL_MAX_TVQM_QUEUES, 896 "queue %d out of range", txq_id)) 897 return; 898 899 txq = trans_pcie->txqs.txq[txq_id]; 900 901 if (WARN_ON(!txq)) 902 return; 903 904 iwl_txq_gen2_unmap(trans, txq_id); 905 906 /* De-alloc array of command/tx buffers */ 907 if (txq_id == trans_pcie->txqs.cmd.q_id) 908 for (i = 0; i < txq->n_window; i++) { 909 kfree_sensitive(txq->entries[i].cmd); 910 kfree_sensitive(txq->entries[i].free_buf); 911 } 912 del_timer_sync(&txq->stuck_timer); 913 914 iwl_txq_gen2_free_memory(trans, txq); 915 916 trans_pcie->txqs.txq[txq_id] = NULL; 917 918 clear_bit(txq_id, trans_pcie->txqs.queue_used); 919 } 920 921 static struct iwl_txq * 922 iwl_txq_dyn_alloc_dma(struct iwl_trans *trans, int size, unsigned int timeout) 923 { 924 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 925 size_t bc_tbl_size, bc_tbl_entries; 926 struct iwl_txq *txq; 927 int ret; 928 929 WARN_ON(!trans_pcie->txqs.bc_tbl_size); 930 931 bc_tbl_size = trans_pcie->txqs.bc_tbl_size; 932 bc_tbl_entries = bc_tbl_size / sizeof(u16); 933 934 if (WARN_ON(size > bc_tbl_entries)) 935 return ERR_PTR(-EINVAL); 936 937 txq = kzalloc(sizeof(*txq), GFP_KERNEL); 938 if (!txq) 939 return ERR_PTR(-ENOMEM); 940 941 txq->bc_tbl.addr = dma_pool_alloc(trans_pcie->txqs.bc_pool, GFP_KERNEL, 942 &txq->bc_tbl.dma); 943 if (!txq->bc_tbl.addr) { 944 IWL_ERR(trans, "Scheduler BC Table allocation failed\n"); 945 kfree(txq); 946 return ERR_PTR(-ENOMEM); 947 } 948 949 ret = iwl_pcie_txq_alloc(trans, txq, size, false); 950 if (ret) { 951 IWL_ERR(trans, "Tx queue alloc failed\n"); 952 goto error; 953 } 954 ret = iwl_txq_init(trans, txq, size, false); 955 if (ret) { 956 IWL_ERR(trans, "Tx queue init failed\n"); 957 goto error; 958 } 959 960 txq->wd_timeout = msecs_to_jiffies(timeout); 961 962 return txq; 963 964 error: 965 iwl_txq_gen2_free_memory(trans, txq); 966 return ERR_PTR(ret); 967 } 968 969 static int iwl_pcie_txq_alloc_response(struct iwl_trans *trans, 970 struct iwl_txq *txq, 971 struct iwl_host_cmd *hcmd) 972 { 973 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 974 struct iwl_tx_queue_cfg_rsp *rsp; 975 int ret, qid; 976 u32 wr_ptr; 977 978 if (WARN_ON(iwl_rx_packet_payload_len(hcmd->resp_pkt) != 979 sizeof(*rsp))) { 980 ret = -EINVAL; 981 goto error_free_resp; 982 } 983 984 rsp = (void *)hcmd->resp_pkt->data; 985 qid = le16_to_cpu(rsp->queue_number); 986 wr_ptr = le16_to_cpu(rsp->write_pointer); 987 988 if (qid >= ARRAY_SIZE(trans_pcie->txqs.txq)) { 989 WARN_ONCE(1, "queue index %d unsupported", qid); 990 ret = -EIO; 991 goto error_free_resp; 992 } 993 994 if (test_and_set_bit(qid, trans_pcie->txqs.queue_used)) { 995 WARN_ONCE(1, "queue %d already used", qid); 996 ret = -EIO; 997 goto error_free_resp; 998 } 999 1000 if (WARN_ONCE(trans_pcie->txqs.txq[qid], 1001 "queue %d already allocated\n", qid)) { 1002 ret = -EIO; 1003 goto error_free_resp; 1004 } 1005 1006 txq->id = qid; 1007 trans_pcie->txqs.txq[qid] = txq; 1008 wr_ptr &= (trans->trans_cfg->base_params->max_tfd_queue_size - 1); 1009 1010 /* Place first TFD at index corresponding to start sequence number */ 1011 txq->read_ptr = wr_ptr; 1012 txq->write_ptr = wr_ptr; 1013 1014 IWL_DEBUG_TX_QUEUES(trans, "Activate queue %d\n", qid); 1015 1016 iwl_free_resp(hcmd); 1017 return qid; 1018 1019 error_free_resp: 1020 iwl_free_resp(hcmd); 1021 iwl_txq_gen2_free_memory(trans, txq); 1022 return ret; 1023 } 1024 1025 int iwl_txq_dyn_alloc(struct iwl_trans *trans, u32 flags, u32 sta_mask, 1026 u8 tid, int size, unsigned int timeout) 1027 { 1028 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 1029 struct iwl_txq *txq; 1030 union { 1031 struct iwl_tx_queue_cfg_cmd old; 1032 struct iwl_scd_queue_cfg_cmd new; 1033 } cmd; 1034 struct iwl_host_cmd hcmd = { 1035 .flags = CMD_WANT_SKB, 1036 }; 1037 int ret; 1038 1039 /* take the min with bytecount table entries allowed */ 1040 size = min_t(u32, size, trans_pcie->txqs.bc_tbl_size / sizeof(u16)); 1041 /* but must be power of 2 values for calculating read/write pointers */ 1042 size = rounddown_pow_of_two(size); 1043 1044 if (trans->trans_cfg->device_family == IWL_DEVICE_FAMILY_BZ && 1045 trans->hw_rev_step == SILICON_A_STEP) { 1046 size = 4096; 1047 txq = iwl_txq_dyn_alloc_dma(trans, size, timeout); 1048 } else { 1049 do { 1050 txq = iwl_txq_dyn_alloc_dma(trans, size, timeout); 1051 if (!IS_ERR(txq)) 1052 break; 1053 1054 IWL_DEBUG_TX_QUEUES(trans, 1055 "Failed allocating TXQ of size %d for sta mask %x tid %d, ret: %ld\n", 1056 size, sta_mask, tid, 1057 PTR_ERR(txq)); 1058 size /= 2; 1059 } while (size >= 16); 1060 } 1061 1062 if (IS_ERR(txq)) 1063 return PTR_ERR(txq); 1064 1065 if (trans_pcie->txqs.queue_alloc_cmd_ver == 0) { 1066 memset(&cmd.old, 0, sizeof(cmd.old)); 1067 cmd.old.tfdq_addr = cpu_to_le64(txq->dma_addr); 1068 cmd.old.byte_cnt_addr = cpu_to_le64(txq->bc_tbl.dma); 1069 cmd.old.cb_size = cpu_to_le32(TFD_QUEUE_CB_SIZE(size)); 1070 cmd.old.flags = cpu_to_le16(flags | TX_QUEUE_CFG_ENABLE_QUEUE); 1071 cmd.old.tid = tid; 1072 1073 if (hweight32(sta_mask) != 1) { 1074 ret = -EINVAL; 1075 goto error; 1076 } 1077 cmd.old.sta_id = ffs(sta_mask) - 1; 1078 1079 hcmd.id = SCD_QUEUE_CFG; 1080 hcmd.len[0] = sizeof(cmd.old); 1081 hcmd.data[0] = &cmd.old; 1082 } else if (trans_pcie->txqs.queue_alloc_cmd_ver == 3) { 1083 memset(&cmd.new, 0, sizeof(cmd.new)); 1084 cmd.new.operation = cpu_to_le32(IWL_SCD_QUEUE_ADD); 1085 cmd.new.u.add.tfdq_dram_addr = cpu_to_le64(txq->dma_addr); 1086 cmd.new.u.add.bc_dram_addr = cpu_to_le64(txq->bc_tbl.dma); 1087 cmd.new.u.add.cb_size = cpu_to_le32(TFD_QUEUE_CB_SIZE(size)); 1088 cmd.new.u.add.flags = cpu_to_le32(flags); 1089 cmd.new.u.add.sta_mask = cpu_to_le32(sta_mask); 1090 cmd.new.u.add.tid = tid; 1091 1092 hcmd.id = WIDE_ID(DATA_PATH_GROUP, SCD_QUEUE_CONFIG_CMD); 1093 hcmd.len[0] = sizeof(cmd.new); 1094 hcmd.data[0] = &cmd.new; 1095 } else { 1096 ret = -EOPNOTSUPP; 1097 goto error; 1098 } 1099 1100 ret = iwl_trans_send_cmd(trans, &hcmd); 1101 if (ret) 1102 goto error; 1103 1104 return iwl_pcie_txq_alloc_response(trans, txq, &hcmd); 1105 1106 error: 1107 iwl_txq_gen2_free_memory(trans, txq); 1108 return ret; 1109 } 1110 1111 void iwl_txq_dyn_free(struct iwl_trans *trans, int queue) 1112 { 1113 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 1114 1115 if (WARN(queue >= IWL_MAX_TVQM_QUEUES, 1116 "queue %d out of range", queue)) 1117 return; 1118 1119 /* 1120 * Upon HW Rfkill - we stop the device, and then stop the queues 1121 * in the op_mode. Just for the sake of the simplicity of the op_mode, 1122 * allow the op_mode to call txq_disable after it already called 1123 * stop_device. 1124 */ 1125 if (!test_and_clear_bit(queue, trans_pcie->txqs.queue_used)) { 1126 WARN_ONCE(test_bit(STATUS_DEVICE_ENABLED, &trans->status), 1127 "queue %d not used", queue); 1128 return; 1129 } 1130 1131 iwl_txq_gen2_free(trans, queue); 1132 1133 IWL_DEBUG_TX_QUEUES(trans, "Deactivate queue %d\n", queue); 1134 } 1135 1136 void iwl_txq_gen2_tx_free(struct iwl_trans *trans) 1137 { 1138 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 1139 int i; 1140 1141 memset(trans_pcie->txqs.queue_used, 0, 1142 sizeof(trans_pcie->txqs.queue_used)); 1143 1144 /* Free all TX queues */ 1145 for (i = 0; i < ARRAY_SIZE(trans_pcie->txqs.txq); i++) { 1146 if (!trans_pcie->txqs.txq[i]) 1147 continue; 1148 1149 iwl_txq_gen2_free(trans, i); 1150 } 1151 } 1152 1153 int iwl_txq_gen2_init(struct iwl_trans *trans, int txq_id, int queue_size) 1154 { 1155 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 1156 struct iwl_txq *queue; 1157 int ret; 1158 1159 /* alloc and init the tx queue */ 1160 if (!trans_pcie->txqs.txq[txq_id]) { 1161 queue = kzalloc(sizeof(*queue), GFP_KERNEL); 1162 if (!queue) { 1163 IWL_ERR(trans, "Not enough memory for tx queue\n"); 1164 return -ENOMEM; 1165 } 1166 trans_pcie->txqs.txq[txq_id] = queue; 1167 ret = iwl_pcie_txq_alloc(trans, queue, queue_size, true); 1168 if (ret) { 1169 IWL_ERR(trans, "Tx %d queue init failed\n", txq_id); 1170 goto error; 1171 } 1172 } else { 1173 queue = trans_pcie->txqs.txq[txq_id]; 1174 } 1175 1176 ret = iwl_txq_init(trans, queue, queue_size, 1177 (txq_id == trans_pcie->txqs.cmd.q_id)); 1178 if (ret) { 1179 IWL_ERR(trans, "Tx %d queue alloc failed\n", txq_id); 1180 goto error; 1181 } 1182 trans_pcie->txqs.txq[txq_id]->id = txq_id; 1183 set_bit(txq_id, trans_pcie->txqs.queue_used); 1184 1185 return 0; 1186 1187 error: 1188 iwl_txq_gen2_tx_free(trans); 1189 return ret; 1190 } 1191 1192 /*************** HOST COMMAND QUEUE FUNCTIONS *****/ 1193 1194 /* 1195 * iwl_pcie_gen2_enqueue_hcmd - enqueue a uCode command 1196 * @priv: device private data point 1197 * @cmd: a pointer to the ucode command structure 1198 * 1199 * The function returns < 0 values to indicate the operation 1200 * failed. On success, it returns the index (>= 0) of command in the 1201 * command queue. 1202 */ 1203 int iwl_pcie_gen2_enqueue_hcmd(struct iwl_trans *trans, 1204 struct iwl_host_cmd *cmd) 1205 { 1206 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 1207 struct iwl_txq *txq = trans_pcie->txqs.txq[trans_pcie->txqs.cmd.q_id]; 1208 struct iwl_device_cmd *out_cmd; 1209 struct iwl_cmd_meta *out_meta; 1210 void *dup_buf = NULL; 1211 dma_addr_t phys_addr; 1212 int i, cmd_pos, idx; 1213 u16 copy_size, cmd_size, tb0_size; 1214 bool had_nocopy = false; 1215 u8 group_id = iwl_cmd_groupid(cmd->id); 1216 const u8 *cmddata[IWL_MAX_CMD_TBS_PER_TFD]; 1217 u16 cmdlen[IWL_MAX_CMD_TBS_PER_TFD]; 1218 struct iwl_tfh_tfd *tfd; 1219 unsigned long flags; 1220 1221 if (WARN_ON(cmd->flags & CMD_BLOCK_TXQS)) 1222 return -EINVAL; 1223 1224 copy_size = sizeof(struct iwl_cmd_header_wide); 1225 cmd_size = sizeof(struct iwl_cmd_header_wide); 1226 1227 for (i = 0; i < IWL_MAX_CMD_TBS_PER_TFD; i++) { 1228 cmddata[i] = cmd->data[i]; 1229 cmdlen[i] = cmd->len[i]; 1230 1231 if (!cmd->len[i]) 1232 continue; 1233 1234 /* need at least IWL_FIRST_TB_SIZE copied */ 1235 if (copy_size < IWL_FIRST_TB_SIZE) { 1236 int copy = IWL_FIRST_TB_SIZE - copy_size; 1237 1238 if (copy > cmdlen[i]) 1239 copy = cmdlen[i]; 1240 cmdlen[i] -= copy; 1241 cmddata[i] += copy; 1242 copy_size += copy; 1243 } 1244 1245 if (cmd->dataflags[i] & IWL_HCMD_DFL_NOCOPY) { 1246 had_nocopy = true; 1247 if (WARN_ON(cmd->dataflags[i] & IWL_HCMD_DFL_DUP)) { 1248 idx = -EINVAL; 1249 goto free_dup_buf; 1250 } 1251 } else if (cmd->dataflags[i] & IWL_HCMD_DFL_DUP) { 1252 /* 1253 * This is also a chunk that isn't copied 1254 * to the static buffer so set had_nocopy. 1255 */ 1256 had_nocopy = true; 1257 1258 /* only allowed once */ 1259 if (WARN_ON(dup_buf)) { 1260 idx = -EINVAL; 1261 goto free_dup_buf; 1262 } 1263 1264 dup_buf = kmemdup(cmddata[i], cmdlen[i], 1265 GFP_ATOMIC); 1266 if (!dup_buf) 1267 return -ENOMEM; 1268 } else { 1269 /* NOCOPY must not be followed by normal! */ 1270 if (WARN_ON(had_nocopy)) { 1271 idx = -EINVAL; 1272 goto free_dup_buf; 1273 } 1274 copy_size += cmdlen[i]; 1275 } 1276 cmd_size += cmd->len[i]; 1277 } 1278 1279 /* 1280 * If any of the command structures end up being larger than the 1281 * TFD_MAX_PAYLOAD_SIZE and they aren't dynamically allocated into 1282 * separate TFDs, then we will need to increase the size of the buffers 1283 */ 1284 if (WARN(copy_size > TFD_MAX_PAYLOAD_SIZE, 1285 "Command %s (%#x) is too large (%d bytes)\n", 1286 iwl_get_cmd_string(trans, cmd->id), cmd->id, copy_size)) { 1287 idx = -EINVAL; 1288 goto free_dup_buf; 1289 } 1290 1291 spin_lock_irqsave(&txq->lock, flags); 1292 1293 idx = iwl_txq_get_cmd_index(txq, txq->write_ptr); 1294 tfd = iwl_txq_get_tfd(trans, txq, txq->write_ptr); 1295 memset(tfd, 0, sizeof(*tfd)); 1296 1297 if (iwl_txq_space(trans, txq) < ((cmd->flags & CMD_ASYNC) ? 2 : 1)) { 1298 spin_unlock_irqrestore(&txq->lock, flags); 1299 1300 IWL_ERR(trans, "No space in command queue\n"); 1301 iwl_op_mode_cmd_queue_full(trans->op_mode); 1302 idx = -ENOSPC; 1303 goto free_dup_buf; 1304 } 1305 1306 out_cmd = txq->entries[idx].cmd; 1307 out_meta = &txq->entries[idx].meta; 1308 1309 /* re-initialize, this also marks the SG list as unused */ 1310 memset(out_meta, 0, sizeof(*out_meta)); 1311 if (cmd->flags & CMD_WANT_SKB) 1312 out_meta->source = cmd; 1313 1314 /* set up the header */ 1315 out_cmd->hdr_wide.cmd = iwl_cmd_opcode(cmd->id); 1316 out_cmd->hdr_wide.group_id = group_id; 1317 out_cmd->hdr_wide.version = iwl_cmd_version(cmd->id); 1318 out_cmd->hdr_wide.length = 1319 cpu_to_le16(cmd_size - sizeof(struct iwl_cmd_header_wide)); 1320 out_cmd->hdr_wide.reserved = 0; 1321 out_cmd->hdr_wide.sequence = 1322 cpu_to_le16(QUEUE_TO_SEQ(trans_pcie->txqs.cmd.q_id) | 1323 INDEX_TO_SEQ(txq->write_ptr)); 1324 1325 cmd_pos = sizeof(struct iwl_cmd_header_wide); 1326 copy_size = sizeof(struct iwl_cmd_header_wide); 1327 1328 /* and copy the data that needs to be copied */ 1329 for (i = 0; i < IWL_MAX_CMD_TBS_PER_TFD; i++) { 1330 int copy; 1331 1332 if (!cmd->len[i]) 1333 continue; 1334 1335 /* copy everything if not nocopy/dup */ 1336 if (!(cmd->dataflags[i] & (IWL_HCMD_DFL_NOCOPY | 1337 IWL_HCMD_DFL_DUP))) { 1338 copy = cmd->len[i]; 1339 1340 memcpy((u8 *)out_cmd + cmd_pos, cmd->data[i], copy); 1341 cmd_pos += copy; 1342 copy_size += copy; 1343 continue; 1344 } 1345 1346 /* 1347 * Otherwise we need at least IWL_FIRST_TB_SIZE copied 1348 * in total (for bi-directional DMA), but copy up to what 1349 * we can fit into the payload for debug dump purposes. 1350 */ 1351 copy = min_t(int, TFD_MAX_PAYLOAD_SIZE - cmd_pos, cmd->len[i]); 1352 1353 memcpy((u8 *)out_cmd + cmd_pos, cmd->data[i], copy); 1354 cmd_pos += copy; 1355 1356 /* However, treat copy_size the proper way, we need it below */ 1357 if (copy_size < IWL_FIRST_TB_SIZE) { 1358 copy = IWL_FIRST_TB_SIZE - copy_size; 1359 1360 if (copy > cmd->len[i]) 1361 copy = cmd->len[i]; 1362 copy_size += copy; 1363 } 1364 } 1365 1366 IWL_DEBUG_HC(trans, 1367 "Sending command %s (%.2x.%.2x), seq: 0x%04X, %d bytes at %d[%d]:%d\n", 1368 iwl_get_cmd_string(trans, cmd->id), group_id, 1369 out_cmd->hdr.cmd, le16_to_cpu(out_cmd->hdr.sequence), 1370 cmd_size, txq->write_ptr, idx, trans_pcie->txqs.cmd.q_id); 1371 1372 /* start the TFD with the minimum copy bytes */ 1373 tb0_size = min_t(int, copy_size, IWL_FIRST_TB_SIZE); 1374 memcpy(&txq->first_tb_bufs[idx], out_cmd, tb0_size); 1375 iwl_txq_gen2_set_tb(trans, tfd, iwl_txq_get_first_tb_dma(txq, idx), 1376 tb0_size); 1377 1378 /* map first command fragment, if any remains */ 1379 if (copy_size > tb0_size) { 1380 phys_addr = dma_map_single(trans->dev, 1381 (u8 *)out_cmd + tb0_size, 1382 copy_size - tb0_size, 1383 DMA_TO_DEVICE); 1384 if (dma_mapping_error(trans->dev, phys_addr)) { 1385 idx = -ENOMEM; 1386 iwl_txq_gen2_tfd_unmap(trans, out_meta, tfd); 1387 goto out; 1388 } 1389 iwl_txq_gen2_set_tb(trans, tfd, phys_addr, 1390 copy_size - tb0_size); 1391 } 1392 1393 /* map the remaining (adjusted) nocopy/dup fragments */ 1394 for (i = 0; i < IWL_MAX_CMD_TBS_PER_TFD; i++) { 1395 void *data = (void *)(uintptr_t)cmddata[i]; 1396 1397 if (!cmdlen[i]) 1398 continue; 1399 if (!(cmd->dataflags[i] & (IWL_HCMD_DFL_NOCOPY | 1400 IWL_HCMD_DFL_DUP))) 1401 continue; 1402 if (cmd->dataflags[i] & IWL_HCMD_DFL_DUP) 1403 data = dup_buf; 1404 phys_addr = dma_map_single(trans->dev, data, 1405 cmdlen[i], DMA_TO_DEVICE); 1406 if (dma_mapping_error(trans->dev, phys_addr)) { 1407 idx = -ENOMEM; 1408 iwl_txq_gen2_tfd_unmap(trans, out_meta, tfd); 1409 goto out; 1410 } 1411 iwl_txq_gen2_set_tb(trans, tfd, phys_addr, cmdlen[i]); 1412 } 1413 1414 BUILD_BUG_ON(IWL_TFH_NUM_TBS > sizeof(out_meta->tbs) * BITS_PER_BYTE); 1415 out_meta->flags = cmd->flags; 1416 if (WARN_ON_ONCE(txq->entries[idx].free_buf)) 1417 kfree_sensitive(txq->entries[idx].free_buf); 1418 txq->entries[idx].free_buf = dup_buf; 1419 1420 trace_iwlwifi_dev_hcmd(trans->dev, cmd, cmd_size, &out_cmd->hdr_wide); 1421 1422 /* start timer if queue currently empty */ 1423 if (txq->read_ptr == txq->write_ptr && txq->wd_timeout) 1424 mod_timer(&txq->stuck_timer, jiffies + txq->wd_timeout); 1425 1426 spin_lock(&trans_pcie->reg_lock); 1427 /* Increment and update queue's write index */ 1428 txq->write_ptr = iwl_txq_inc_wrap(trans, txq->write_ptr); 1429 iwl_txq_inc_wr_ptr(trans, txq); 1430 spin_unlock(&trans_pcie->reg_lock); 1431 1432 out: 1433 spin_unlock_irqrestore(&txq->lock, flags); 1434 free_dup_buf: 1435 if (idx < 0) 1436 kfree(dup_buf); 1437 return idx; 1438 } 1439