1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 2 /* 3 * Copyright (C) 2017 Intel Deutschland GmbH 4 * Copyright (C) 2018-2020, 2023-2025 Intel Corporation 5 */ 6 #ifdef CONFIG_INET 7 #include <net/tso.h> 8 #endif 9 #include <linux/tcp.h> 10 11 #include "iwl-debug.h" 12 #include "iwl-csr.h" 13 #include "iwl-io.h" 14 #include "internal.h" 15 #include "fw/api/tx.h" 16 #include "fw/api/commands.h" 17 #include "fw/api/datapath.h" 18 #include "iwl-scd.h" 19 20 static struct page *get_workaround_page(struct iwl_trans *trans, 21 struct sk_buff *skb) 22 { 23 struct iwl_tso_page_info *info; 24 struct page **page_ptr; 25 struct page *ret; 26 dma_addr_t phys; 27 28 page_ptr = (void *)((u8 *)skb->cb + trans->conf.cb_data_offs); 29 30 ret = alloc_page(GFP_ATOMIC); 31 if (!ret) 32 return NULL; 33 34 info = IWL_TSO_PAGE_INFO(page_address(ret)); 35 36 /* Create a DMA mapping for the page */ 37 phys = dma_map_page_attrs(trans->dev, ret, 0, PAGE_SIZE, 38 DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); 39 if (unlikely(dma_mapping_error(trans->dev, phys))) { 40 __free_page(ret); 41 return NULL; 42 } 43 44 /* Store physical address and set use count */ 45 info->dma_addr = phys; 46 refcount_set(&info->use_count, 1); 47 48 /* set the chaining pointer to the previous page if there */ 49 info->next = *page_ptr; 50 *page_ptr = ret; 51 52 return ret; 53 } 54 55 /* 56 * Add a TB and if needed apply the FH HW bug workaround; 57 * meta != NULL indicates that it's a page mapping and we 58 * need to dma_unmap_page() and set the meta->tbs bit in 59 * this case. 60 */ 61 static int iwl_txq_gen2_set_tb_with_wa(struct iwl_trans *trans, 62 struct sk_buff *skb, 63 struct iwl_tfh_tfd *tfd, 64 dma_addr_t phys, void *virt, 65 u16 len, struct iwl_cmd_meta *meta, 66 bool unmap) 67 { 68 dma_addr_t oldphys = phys; 69 struct page *page; 70 int ret; 71 72 if (unlikely(dma_mapping_error(trans->dev, phys))) 73 return -ENOMEM; 74 75 if (likely(!iwl_txq_crosses_4g_boundary(phys, len))) { 76 ret = iwl_txq_gen2_set_tb(trans, tfd, phys, len); 77 78 if (ret < 0) 79 goto unmap; 80 81 if (meta) 82 meta->tbs |= BIT(ret); 83 84 ret = 0; 85 goto trace; 86 } 87 88 /* 89 * Work around a hardware bug. If (as expressed in the 90 * condition above) the TB ends on a 32-bit boundary, 91 * then the next TB may be accessed with the wrong 92 * address. 93 * To work around it, copy the data elsewhere and make 94 * a new mapping for it so the device will not fail. 95 */ 96 97 if (WARN_ON(len > IWL_TSO_PAGE_DATA_SIZE)) { 98 ret = -ENOBUFS; 99 goto unmap; 100 } 101 102 page = get_workaround_page(trans, skb); 103 if (!page) { 104 ret = -ENOMEM; 105 goto unmap; 106 } 107 108 memcpy(page_address(page), virt, len); 109 110 /* 111 * This is a bit odd, but performance does not matter here, what 112 * matters are the expectations of the calling code and TB cleanup 113 * function. 114 * 115 * As such, if unmap is set, then create another mapping for the TB 116 * entry as it will be unmapped later. On the other hand, if it is not 117 * set, then the TB entry will not be unmapped and instead we simply 118 * reference and sync the mapping that get_workaround_page() created. 119 */ 120 if (unmap) { 121 phys = dma_map_single(trans->dev, page_address(page), len, 122 DMA_TO_DEVICE); 123 if (unlikely(dma_mapping_error(trans->dev, phys))) 124 return -ENOMEM; 125 } else { 126 phys = iwl_pcie_get_tso_page_phys(page_address(page)); 127 dma_sync_single_for_device(trans->dev, phys, len, 128 DMA_TO_DEVICE); 129 } 130 131 ret = iwl_txq_gen2_set_tb(trans, tfd, phys, len); 132 if (ret < 0) { 133 /* unmap the new allocation as single */ 134 oldphys = phys; 135 meta = NULL; 136 goto unmap; 137 } 138 139 IWL_DEBUG_TX(trans, 140 "TB bug workaround: copied %d bytes from 0x%llx to 0x%llx\n", 141 len, (unsigned long long)oldphys, 142 (unsigned long long)phys); 143 144 ret = 0; 145 unmap: 146 if (!unmap) 147 goto trace; 148 149 if (meta) 150 dma_unmap_page(trans->dev, oldphys, len, DMA_TO_DEVICE); 151 else 152 dma_unmap_single(trans->dev, oldphys, len, DMA_TO_DEVICE); 153 trace: 154 trace_iwlwifi_dev_tx_tb(trans->dev, skb, virt, phys, len); 155 156 return ret; 157 } 158 159 static int iwl_txq_gen2_build_amsdu(struct iwl_trans *trans, 160 struct sk_buff *skb, 161 struct iwl_tfh_tfd *tfd, 162 struct iwl_cmd_meta *out_meta, 163 int start_len, 164 u8 hdr_len, 165 struct iwl_device_tx_cmd *dev_cmd) 166 { 167 #ifdef CONFIG_INET 168 struct iwl_tx_cmd_v9 *tx_cmd = (void *)dev_cmd->payload; 169 struct ieee80211_hdr *hdr = (void *)skb->data; 170 unsigned int snap_ip_tcp_hdrlen, ip_hdrlen, total_len, hdr_room; 171 unsigned int mss = skb_shinfo(skb)->gso_size; 172 unsigned int data_offset = 0; 173 dma_addr_t start_hdr_phys; 174 u16 length, amsdu_pad; 175 u8 *start_hdr; 176 struct sg_table *sgt; 177 struct tso_t tso; 178 179 trace_iwlwifi_dev_tx(trans->dev, skb, tfd, sizeof(*tfd), 180 &dev_cmd->hdr, start_len, 0); 181 182 ip_hdrlen = skb_network_header_len(skb); 183 snap_ip_tcp_hdrlen = 8 + ip_hdrlen + tcp_hdrlen(skb); 184 total_len = skb->len - snap_ip_tcp_hdrlen - hdr_len; 185 amsdu_pad = 0; 186 187 /* total amount of header we may need for this A-MSDU */ 188 hdr_room = DIV_ROUND_UP(total_len, mss) * 189 (3 + snap_ip_tcp_hdrlen + sizeof(struct ethhdr)); 190 191 /* Our device supports 9 segments at most, it will fit in 1 page */ 192 sgt = iwl_pcie_prep_tso(trans, skb, out_meta, &start_hdr, hdr_room, 193 snap_ip_tcp_hdrlen + hdr_len); 194 if (!sgt) 195 return -ENOMEM; 196 197 start_hdr_phys = iwl_pcie_get_tso_page_phys(start_hdr); 198 199 /* 200 * Pull the ieee80211 header to be able to use TSO core, 201 * we will restore it for the tx_status flow. 202 */ 203 skb_pull(skb, hdr_len); 204 205 /* 206 * Remove the length of all the headers that we don't actually 207 * have in the MPDU by themselves, but that we duplicate into 208 * all the different MSDUs inside the A-MSDU. 209 */ 210 le16_add_cpu(&tx_cmd->len, -snap_ip_tcp_hdrlen); 211 212 tso_start(skb, &tso); 213 214 while (total_len) { 215 /* this is the data left for this subframe */ 216 unsigned int data_left = min_t(unsigned int, mss, total_len); 217 unsigned int tb_len; 218 dma_addr_t tb_phys; 219 u8 *pos_hdr = start_hdr; 220 221 total_len -= data_left; 222 223 memset(pos_hdr, 0, amsdu_pad); 224 pos_hdr += amsdu_pad; 225 amsdu_pad = (4 - (sizeof(struct ethhdr) + snap_ip_tcp_hdrlen + 226 data_left)) & 0x3; 227 ether_addr_copy(pos_hdr, ieee80211_get_DA(hdr)); 228 pos_hdr += ETH_ALEN; 229 ether_addr_copy(pos_hdr, ieee80211_get_SA(hdr)); 230 pos_hdr += ETH_ALEN; 231 232 length = snap_ip_tcp_hdrlen + data_left; 233 *((__be16 *)pos_hdr) = cpu_to_be16(length); 234 pos_hdr += sizeof(length); 235 236 /* 237 * This will copy the SNAP as well which will be considered 238 * as MAC header. 239 */ 240 tso_build_hdr(skb, pos_hdr, &tso, data_left, !total_len); 241 242 pos_hdr += snap_ip_tcp_hdrlen; 243 244 tb_len = pos_hdr - start_hdr; 245 tb_phys = iwl_pcie_get_tso_page_phys(start_hdr); 246 247 /* 248 * No need for _with_wa, this is from the TSO page and 249 * we leave some space at the end of it so can't hit 250 * the buggy scenario. 251 */ 252 iwl_txq_gen2_set_tb(trans, tfd, tb_phys, tb_len); 253 trace_iwlwifi_dev_tx_tb(trans->dev, skb, start_hdr, 254 tb_phys, tb_len); 255 /* add this subframe's headers' length to the tx_cmd */ 256 le16_add_cpu(&tx_cmd->len, tb_len); 257 258 /* prepare the start_hdr for the next subframe */ 259 start_hdr = pos_hdr; 260 261 /* put the payload */ 262 while (data_left) { 263 int ret; 264 265 tb_len = min_t(unsigned int, tso.size, data_left); 266 tb_phys = iwl_pcie_get_sgt_tb_phys(sgt, data_offset, 267 tb_len); 268 /* Not a real mapping error, use direct comparison */ 269 if (unlikely(tb_phys == DMA_MAPPING_ERROR)) 270 goto out_err; 271 272 ret = iwl_txq_gen2_set_tb_with_wa(trans, skb, tfd, 273 tb_phys, tso.data, 274 tb_len, NULL, false); 275 if (ret) 276 goto out_err; 277 278 data_left -= tb_len; 279 data_offset += tb_len; 280 tso_build_data(skb, &tso, tb_len); 281 } 282 } 283 284 dma_sync_single_for_device(trans->dev, start_hdr_phys, hdr_room, 285 DMA_TO_DEVICE); 286 287 /* re -add the WiFi header */ 288 skb_push(skb, hdr_len); 289 290 return 0; 291 292 out_err: 293 #endif 294 return -EINVAL; 295 } 296 297 static struct 298 iwl_tfh_tfd *iwl_txq_gen2_build_tx_amsdu(struct iwl_trans *trans, 299 struct iwl_txq *txq, 300 struct iwl_device_tx_cmd *dev_cmd, 301 struct sk_buff *skb, 302 struct iwl_cmd_meta *out_meta, 303 int hdr_len, 304 int tx_cmd_len) 305 { 306 int idx = iwl_txq_get_cmd_index(txq, txq->write_ptr); 307 struct iwl_tfh_tfd *tfd = iwl_txq_get_tfd(trans, txq, idx); 308 dma_addr_t tb_phys; 309 int len; 310 void *tb1_addr; 311 312 tb_phys = iwl_txq_get_first_tb_dma(txq, idx); 313 314 /* 315 * No need for _with_wa, the first TB allocation is aligned up 316 * to a 64-byte boundary and thus can't be at the end or cross 317 * a page boundary (much less a 2^32 boundary). 318 */ 319 iwl_txq_gen2_set_tb(trans, tfd, tb_phys, IWL_FIRST_TB_SIZE); 320 321 /* 322 * The second TB (tb1) points to the remainder of the TX command 323 * and the 802.11 header - dword aligned size 324 * (This calculation modifies the TX command, so do it before the 325 * setup of the first TB) 326 */ 327 len = tx_cmd_len + sizeof(struct iwl_cmd_header) + hdr_len - 328 IWL_FIRST_TB_SIZE; 329 330 /* do not align A-MSDU to dword as the subframe header aligns it */ 331 332 /* map the data for TB1 */ 333 tb1_addr = ((u8 *)&dev_cmd->hdr) + IWL_FIRST_TB_SIZE; 334 tb_phys = dma_map_single(trans->dev, tb1_addr, len, DMA_TO_DEVICE); 335 if (unlikely(dma_mapping_error(trans->dev, tb_phys))) 336 goto out_err; 337 /* 338 * No need for _with_wa(), we ensure (via alignment) that the data 339 * here can never cross or end at a page boundary. 340 */ 341 iwl_txq_gen2_set_tb(trans, tfd, tb_phys, len); 342 343 if (iwl_txq_gen2_build_amsdu(trans, skb, tfd, out_meta, 344 len + IWL_FIRST_TB_SIZE, hdr_len, dev_cmd)) 345 goto out_err; 346 347 /* building the A-MSDU might have changed this data, memcpy it now */ 348 memcpy(&txq->first_tb_bufs[idx], dev_cmd, IWL_FIRST_TB_SIZE); 349 return tfd; 350 351 out_err: 352 iwl_pcie_free_tso_pages(trans, skb, out_meta); 353 iwl_txq_gen2_tfd_unmap(trans, out_meta, tfd); 354 return NULL; 355 } 356 357 static int iwl_txq_gen2_tx_add_frags(struct iwl_trans *trans, 358 struct sk_buff *skb, 359 struct iwl_tfh_tfd *tfd, 360 struct iwl_cmd_meta *out_meta) 361 { 362 int i; 363 364 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 365 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 366 dma_addr_t tb_phys; 367 unsigned int fragsz = skb_frag_size(frag); 368 int ret; 369 370 if (!fragsz) 371 continue; 372 373 tb_phys = skb_frag_dma_map(trans->dev, frag, 0, 374 fragsz, DMA_TO_DEVICE); 375 ret = iwl_txq_gen2_set_tb_with_wa(trans, skb, tfd, tb_phys, 376 skb_frag_address(frag), 377 fragsz, out_meta, true); 378 if (ret) 379 return ret; 380 } 381 382 return 0; 383 } 384 385 static struct 386 iwl_tfh_tfd *iwl_txq_gen2_build_tx(struct iwl_trans *trans, 387 struct iwl_txq *txq, 388 struct iwl_device_tx_cmd *dev_cmd, 389 struct sk_buff *skb, 390 struct iwl_cmd_meta *out_meta, 391 int hdr_len, 392 int tx_cmd_len, 393 bool pad) 394 { 395 int idx = iwl_txq_get_cmd_index(txq, txq->write_ptr); 396 struct iwl_tfh_tfd *tfd = iwl_txq_get_tfd(trans, txq, idx); 397 dma_addr_t tb_phys; 398 int len, tb1_len, tb2_len; 399 void *tb1_addr; 400 struct sk_buff *frag; 401 402 tb_phys = iwl_txq_get_first_tb_dma(txq, idx); 403 404 /* The first TB points to bi-directional DMA data */ 405 memcpy(&txq->first_tb_bufs[idx], dev_cmd, IWL_FIRST_TB_SIZE); 406 407 /* 408 * No need for _with_wa, the first TB allocation is aligned up 409 * to a 64-byte boundary and thus can't be at the end or cross 410 * a page boundary (much less a 2^32 boundary). 411 */ 412 iwl_txq_gen2_set_tb(trans, tfd, tb_phys, IWL_FIRST_TB_SIZE); 413 414 /* 415 * The second TB (tb1) points to the remainder of the TX command 416 * and the 802.11 header - dword aligned size 417 * (This calculation modifies the TX command, so do it before the 418 * setup of the first TB) 419 */ 420 len = tx_cmd_len + sizeof(struct iwl_cmd_header) + hdr_len - 421 IWL_FIRST_TB_SIZE; 422 423 if (pad) 424 tb1_len = ALIGN(len, 4); 425 else 426 tb1_len = len; 427 428 /* map the data for TB1 */ 429 tb1_addr = ((u8 *)&dev_cmd->hdr) + IWL_FIRST_TB_SIZE; 430 tb_phys = dma_map_single(trans->dev, tb1_addr, tb1_len, DMA_TO_DEVICE); 431 if (unlikely(dma_mapping_error(trans->dev, tb_phys))) 432 goto out_err; 433 /* 434 * No need for _with_wa(), we ensure (via alignment) that the data 435 * here can never cross or end at a page boundary. 436 */ 437 iwl_txq_gen2_set_tb(trans, tfd, tb_phys, tb1_len); 438 trace_iwlwifi_dev_tx(trans->dev, skb, tfd, sizeof(*tfd), &dev_cmd->hdr, 439 IWL_FIRST_TB_SIZE + tb1_len, hdr_len); 440 441 /* set up TFD's third entry to point to remainder of skb's head */ 442 tb2_len = skb_headlen(skb) - hdr_len; 443 444 if (tb2_len > 0) { 445 int ret; 446 447 tb_phys = dma_map_single(trans->dev, skb->data + hdr_len, 448 tb2_len, DMA_TO_DEVICE); 449 ret = iwl_txq_gen2_set_tb_with_wa(trans, skb, tfd, tb_phys, 450 skb->data + hdr_len, tb2_len, 451 NULL, true); 452 if (ret) 453 goto out_err; 454 } 455 456 if (iwl_txq_gen2_tx_add_frags(trans, skb, tfd, out_meta)) 457 goto out_err; 458 459 skb_walk_frags(skb, frag) { 460 int ret; 461 462 tb_phys = dma_map_single(trans->dev, frag->data, 463 skb_headlen(frag), DMA_TO_DEVICE); 464 ret = iwl_txq_gen2_set_tb_with_wa(trans, skb, tfd, tb_phys, 465 frag->data, 466 skb_headlen(frag), NULL, 467 true); 468 if (ret) 469 goto out_err; 470 if (iwl_txq_gen2_tx_add_frags(trans, frag, tfd, out_meta)) 471 goto out_err; 472 } 473 474 return tfd; 475 476 out_err: 477 iwl_txq_gen2_tfd_unmap(trans, out_meta, tfd); 478 return NULL; 479 } 480 481 static 482 struct iwl_tfh_tfd *iwl_txq_gen2_build_tfd(struct iwl_trans *trans, 483 struct iwl_txq *txq, 484 struct iwl_device_tx_cmd *dev_cmd, 485 struct sk_buff *skb, 486 struct iwl_cmd_meta *out_meta) 487 { 488 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; 489 int idx = iwl_txq_get_cmd_index(txq, txq->write_ptr); 490 struct iwl_tfh_tfd *tfd = iwl_txq_get_tfd(trans, txq, idx); 491 int len, hdr_len; 492 bool amsdu; 493 494 /* There must be data left over for TB1 or this code must be changed */ 495 BUILD_BUG_ON(sizeof(struct iwl_tx_cmd_v9) < IWL_FIRST_TB_SIZE); 496 BUILD_BUG_ON(sizeof(struct iwl_cmd_header) + 497 offsetofend(struct iwl_tx_cmd_v9, dram_info) > 498 IWL_FIRST_TB_SIZE); 499 BUILD_BUG_ON(sizeof(struct iwl_tx_cmd) < IWL_FIRST_TB_SIZE); 500 BUILD_BUG_ON(sizeof(struct iwl_cmd_header) + 501 offsetofend(struct iwl_tx_cmd, dram_info) > 502 IWL_FIRST_TB_SIZE); 503 504 memset(tfd, 0, sizeof(*tfd)); 505 506 if (trans->mac_cfg->device_family < IWL_DEVICE_FAMILY_AX210) 507 len = sizeof(struct iwl_tx_cmd_v9); 508 else 509 len = sizeof(struct iwl_tx_cmd); 510 511 amsdu = ieee80211_is_data_qos(hdr->frame_control) && 512 (*ieee80211_get_qos_ctl(hdr) & 513 IEEE80211_QOS_CTL_A_MSDU_PRESENT); 514 515 hdr_len = ieee80211_hdrlen(hdr->frame_control); 516 517 /* 518 * Only build A-MSDUs here if doing so by GSO, otherwise it may be 519 * an A-MSDU for other reasons, e.g. NAN or an A-MSDU having been 520 * built in the higher layers already. 521 */ 522 if (amsdu && skb_shinfo(skb)->gso_size) 523 return iwl_txq_gen2_build_tx_amsdu(trans, txq, dev_cmd, skb, 524 out_meta, hdr_len, len); 525 return iwl_txq_gen2_build_tx(trans, txq, dev_cmd, skb, out_meta, 526 hdr_len, len, !amsdu); 527 } 528 529 int iwl_txq_space(struct iwl_trans *trans, const struct iwl_txq *q) 530 { 531 unsigned int max; 532 unsigned int used; 533 534 /* 535 * To avoid ambiguity between empty and completely full queues, there 536 * should always be less than max_tfd_queue_size elements in the queue. 537 * If q->n_window is smaller than max_tfd_queue_size, there is no need 538 * to reserve any queue entries for this purpose. 539 */ 540 if (q->n_window < trans->mac_cfg->base->max_tfd_queue_size) 541 max = q->n_window; 542 else 543 max = trans->mac_cfg->base->max_tfd_queue_size - 1; 544 545 /* 546 * max_tfd_queue_size is a power of 2, so the following is equivalent to 547 * modulo by max_tfd_queue_size and is well defined. 548 */ 549 used = (q->write_ptr - q->read_ptr) & 550 (trans->mac_cfg->base->max_tfd_queue_size - 1); 551 552 if (WARN_ON(used > max)) 553 return 0; 554 555 return max - used; 556 } 557 558 /* 559 * iwl_pcie_gen2_update_byte_tbl - Set up entry in Tx byte-count array 560 */ 561 static void iwl_pcie_gen2_update_byte_tbl(struct iwl_trans *trans, 562 struct iwl_txq *txq, u16 byte_cnt, 563 int num_tbs) 564 { 565 int idx = iwl_txq_get_cmd_index(txq, txq->write_ptr); 566 struct iwl_bc_tbl_entry *scd_bc_tbl = txq->bc_tbl.addr; 567 u8 filled_tfd_size, num_fetch_chunks; 568 u16 len = byte_cnt; 569 __le16 bc_ent; 570 571 if (WARN(idx >= txq->n_window, "%d >= %d\n", idx, txq->n_window)) 572 return; 573 574 filled_tfd_size = offsetof(struct iwl_tfh_tfd, tbs) + 575 num_tbs * sizeof(struct iwl_tfh_tb); 576 /* 577 * filled_tfd_size contains the number of filled bytes in the TFD. 578 * Dividing it by 64 will give the number of chunks to fetch 579 * to SRAM- 0 for one chunk, 1 for 2 and so on. 580 * If, for example, TFD contains only 3 TBs then 32 bytes 581 * of the TFD are used, and only one chunk of 64 bytes should 582 * be fetched 583 */ 584 num_fetch_chunks = DIV_ROUND_UP(filled_tfd_size, 64) - 1; 585 586 if (trans->mac_cfg->device_family >= IWL_DEVICE_FAMILY_AX210) { 587 WARN_ON(len > 0x3FFF); 588 bc_ent = cpu_to_le16(len | (num_fetch_chunks << 14)); 589 } else { 590 len = DIV_ROUND_UP(len, 4); 591 WARN_ON(len > 0xFFF); 592 bc_ent = cpu_to_le16(len | (num_fetch_chunks << 12)); 593 } 594 595 scd_bc_tbl[idx].tfd_offset = bc_ent; 596 } 597 598 static u8 iwl_txq_gen2_get_num_tbs(struct iwl_tfh_tfd *tfd) 599 { 600 return le16_to_cpu(tfd->num_tbs) & 0x1f; 601 } 602 603 int iwl_txq_gen2_set_tb(struct iwl_trans *trans, struct iwl_tfh_tfd *tfd, 604 dma_addr_t addr, u16 len) 605 { 606 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 607 int idx = iwl_txq_gen2_get_num_tbs(tfd); 608 struct iwl_tfh_tb *tb; 609 610 /* Only WARN here so we know about the issue, but we mess up our 611 * unmap path because not every place currently checks for errors 612 * returned from this function - it can only return an error if 613 * there's no more space, and so when we know there is enough we 614 * don't always check ... 615 */ 616 WARN(iwl_txq_crosses_4g_boundary(addr, len), 617 "possible DMA problem with iova:0x%llx, len:%d\n", 618 (unsigned long long)addr, len); 619 620 if (WARN_ON(idx >= IWL_TFH_NUM_TBS)) 621 return -EINVAL; 622 tb = &tfd->tbs[idx]; 623 624 /* Each TFD can point to a maximum max_tbs Tx buffers */ 625 if (le16_to_cpu(tfd->num_tbs) >= trans_pcie->txqs.tfd.max_tbs) { 626 IWL_ERR(trans, "Error can not send more than %d chunks\n", 627 trans_pcie->txqs.tfd.max_tbs); 628 return -EINVAL; 629 } 630 631 put_unaligned_le64(addr, &tb->addr); 632 tb->tb_len = cpu_to_le16(len); 633 634 tfd->num_tbs = cpu_to_le16(idx + 1); 635 636 return idx; 637 } 638 639 void iwl_txq_gen2_tfd_unmap(struct iwl_trans *trans, 640 struct iwl_cmd_meta *meta, 641 struct iwl_tfh_tfd *tfd) 642 { 643 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 644 int i, num_tbs; 645 646 /* Sanity check on number of chunks */ 647 num_tbs = iwl_txq_gen2_get_num_tbs(tfd); 648 649 if (num_tbs > trans_pcie->txqs.tfd.max_tbs) { 650 IWL_ERR(trans, "Too many chunks: %i\n", num_tbs); 651 return; 652 } 653 654 /* TB1 is mapped directly, the rest is the TSO page and SG list. */ 655 if (meta->sg_offset) 656 num_tbs = 2; 657 658 /* first TB is never freed - it's the bidirectional DMA data */ 659 for (i = 1; i < num_tbs; i++) { 660 if (meta->tbs & BIT(i)) 661 dma_unmap_page(trans->dev, 662 le64_to_cpu(tfd->tbs[i].addr), 663 le16_to_cpu(tfd->tbs[i].tb_len), 664 DMA_TO_DEVICE); 665 else 666 dma_unmap_single(trans->dev, 667 le64_to_cpu(tfd->tbs[i].addr), 668 le16_to_cpu(tfd->tbs[i].tb_len), 669 DMA_TO_DEVICE); 670 } 671 672 iwl_txq_set_tfd_invalid_gen2(trans, tfd); 673 } 674 675 static void iwl_txq_gen2_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq) 676 { 677 /* rd_ptr is bounded by TFD_QUEUE_SIZE_MAX and 678 * idx is bounded by n_window 679 */ 680 int idx = iwl_txq_get_cmd_index(txq, txq->read_ptr); 681 struct sk_buff *skb; 682 683 lockdep_assert_held(&txq->lock); 684 685 if (!txq->entries) 686 return; 687 688 iwl_txq_gen2_tfd_unmap(trans, &txq->entries[idx].meta, 689 iwl_txq_get_tfd(trans, txq, idx)); 690 691 skb = txq->entries[idx].skb; 692 693 /* Can be called from irqs-disabled context 694 * If skb is not NULL, it means that the whole queue is being 695 * freed and that the queue is not empty - free the skb 696 */ 697 if (skb) { 698 iwl_op_mode_free_skb(trans->op_mode, skb); 699 txq->entries[idx].skb = NULL; 700 } 701 } 702 703 /* 704 * iwl_txq_inc_wr_ptr - Send new write index to hardware 705 */ 706 static void iwl_txq_inc_wr_ptr(struct iwl_trans *trans, struct iwl_txq *txq) 707 { 708 lockdep_assert_held(&txq->lock); 709 710 IWL_DEBUG_TX(trans, "Q:%d WR: 0x%x\n", txq->id, txq->write_ptr); 711 712 /* 713 * if not in power-save mode, uCode will never sleep when we're 714 * trying to tx (during RFKILL, we're not trying to tx). 715 */ 716 iwl_write32(trans, HBUS_TARG_WRPTR, txq->write_ptr | (txq->id << 16)); 717 } 718 719 int iwl_txq_gen2_tx(struct iwl_trans *trans, struct sk_buff *skb, 720 struct iwl_device_tx_cmd *dev_cmd, int txq_id) 721 { 722 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 723 struct iwl_cmd_meta *out_meta; 724 struct iwl_txq *txq = trans_pcie->txqs.txq[txq_id]; 725 u16 cmd_len; 726 int idx; 727 void *tfd; 728 729 if (WARN_ONCE(txq_id >= IWL_MAX_TVQM_QUEUES, 730 "queue %d out of range", txq_id)) 731 return -EINVAL; 732 733 if (WARN_ONCE(!test_bit(txq_id, trans_pcie->txqs.queue_used), 734 "TX on unused queue %d\n", txq_id)) 735 return -EINVAL; 736 737 if (skb_is_nonlinear(skb) && 738 skb_shinfo(skb)->nr_frags > IWL_TRANS_PCIE_MAX_FRAGS(trans_pcie) && 739 __skb_linearize(skb)) 740 return -ENOMEM; 741 742 spin_lock(&txq->lock); 743 744 if (iwl_txq_space(trans, txq) < txq->high_mark) { 745 iwl_txq_stop(trans, txq); 746 747 /* don't put the packet on the ring, if there is no room */ 748 if (unlikely(iwl_txq_space(trans, txq) < 3)) { 749 struct iwl_device_tx_cmd **dev_cmd_ptr; 750 751 dev_cmd_ptr = (void *)((u8 *)skb->cb + 752 trans->conf.cb_data_offs + 753 sizeof(void *)); 754 755 *dev_cmd_ptr = dev_cmd; 756 __skb_queue_tail(&txq->overflow_q, skb); 757 spin_unlock(&txq->lock); 758 return 0; 759 } 760 } 761 762 idx = iwl_txq_get_cmd_index(txq, txq->write_ptr); 763 764 /* Set up driver data for this TFD */ 765 txq->entries[idx].skb = skb; 766 txq->entries[idx].cmd = dev_cmd; 767 768 dev_cmd->hdr.sequence = 769 cpu_to_le16((u16)(QUEUE_TO_SEQ(txq_id) | 770 INDEX_TO_SEQ(idx))); 771 772 /* Set up first empty entry in queue's array of Tx/cmd buffers */ 773 out_meta = &txq->entries[idx].meta; 774 memset(out_meta, 0, sizeof(*out_meta)); 775 776 tfd = iwl_txq_gen2_build_tfd(trans, txq, dev_cmd, skb, out_meta); 777 if (!tfd) { 778 spin_unlock(&txq->lock); 779 return -1; 780 } 781 782 if (trans->mac_cfg->device_family >= IWL_DEVICE_FAMILY_AX210) { 783 struct iwl_tx_cmd *tx_cmd = 784 (void *)dev_cmd->payload; 785 786 cmd_len = le16_to_cpu(tx_cmd->len); 787 } else { 788 struct iwl_tx_cmd_v9 *tx_cmd_v9 = 789 (void *)dev_cmd->payload; 790 791 cmd_len = le16_to_cpu(tx_cmd_v9->len); 792 } 793 794 /* Set up entry for this TFD in Tx byte-count array */ 795 iwl_pcie_gen2_update_byte_tbl(trans, txq, cmd_len, 796 iwl_txq_gen2_get_num_tbs(tfd)); 797 798 /* start timer if queue currently empty */ 799 if (txq->read_ptr == txq->write_ptr && txq->wd_timeout) 800 mod_timer(&txq->stuck_timer, jiffies + txq->wd_timeout); 801 802 /* Tell device the write index *just past* this latest filled TFD */ 803 txq->write_ptr = iwl_txq_inc_wrap(trans, txq->write_ptr); 804 iwl_txq_inc_wr_ptr(trans, txq); 805 /* 806 * At this point the frame is "transmitted" successfully 807 * and we will get a TX status notification eventually. 808 */ 809 spin_unlock(&txq->lock); 810 return 0; 811 } 812 813 /*************** HOST COMMAND QUEUE FUNCTIONS *****/ 814 815 /* 816 * iwl_txq_gen2_unmap - Unmap any remaining DMA mappings and free skb's 817 */ 818 static void iwl_txq_gen2_unmap(struct iwl_trans *trans, int txq_id) 819 { 820 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 821 struct iwl_txq *txq = trans_pcie->txqs.txq[txq_id]; 822 823 spin_lock_bh(&txq->reclaim_lock); 824 spin_lock(&txq->lock); 825 while (txq->write_ptr != txq->read_ptr) { 826 IWL_DEBUG_TX_REPLY(trans, "Q %d Free %d\n", 827 txq_id, txq->read_ptr); 828 829 if (txq_id != trans->conf.cmd_queue) { 830 int idx = iwl_txq_get_cmd_index(txq, txq->read_ptr); 831 struct iwl_cmd_meta *cmd_meta = &txq->entries[idx].meta; 832 struct sk_buff *skb = txq->entries[idx].skb; 833 834 if (!WARN_ON_ONCE(!skb)) 835 iwl_pcie_free_tso_pages(trans, skb, cmd_meta); 836 } 837 iwl_txq_gen2_free_tfd(trans, txq); 838 txq->read_ptr = iwl_txq_inc_wrap(trans, txq->read_ptr); 839 } 840 841 while (!skb_queue_empty(&txq->overflow_q)) { 842 struct sk_buff *skb = __skb_dequeue(&txq->overflow_q); 843 844 iwl_op_mode_free_skb(trans->op_mode, skb); 845 } 846 847 spin_unlock(&txq->lock); 848 spin_unlock_bh(&txq->reclaim_lock); 849 850 /* just in case - this queue may have been stopped */ 851 iwl_trans_pcie_wake_queue(trans, txq); 852 } 853 854 static void iwl_txq_gen2_free_memory(struct iwl_trans *trans, 855 struct iwl_txq *txq) 856 { 857 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 858 struct device *dev = trans->dev; 859 860 /* De-alloc circular buffer of TFDs */ 861 if (txq->tfds) { 862 dma_free_coherent(dev, 863 trans_pcie->txqs.tfd.size * txq->n_window, 864 txq->tfds, txq->dma_addr); 865 dma_free_coherent(dev, 866 sizeof(*txq->first_tb_bufs) * txq->n_window, 867 txq->first_tb_bufs, txq->first_tb_dma); 868 } 869 870 kfree(txq->entries); 871 if (txq->bc_tbl.addr) 872 dma_pool_free(trans_pcie->txqs.bc_pool, 873 txq->bc_tbl.addr, txq->bc_tbl.dma); 874 kfree(txq); 875 } 876 877 /* 878 * iwl_pcie_txq_free - Deallocate DMA queue. 879 * @txq: Transmit queue to deallocate. 880 * 881 * Empty queue by removing and destroying all BD's. 882 * Free all buffers. 883 * 0-fill, but do not free "txq" descriptor structure. 884 */ 885 static void iwl_txq_gen2_free(struct iwl_trans *trans, int txq_id) 886 { 887 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 888 struct iwl_txq *txq; 889 int i; 890 891 if (WARN_ONCE(txq_id >= IWL_MAX_TVQM_QUEUES, 892 "queue %d out of range", txq_id)) 893 return; 894 895 txq = trans_pcie->txqs.txq[txq_id]; 896 897 if (WARN_ON(!txq)) 898 return; 899 900 iwl_txq_gen2_unmap(trans, txq_id); 901 902 /* De-alloc array of command/tx buffers */ 903 if (txq_id == trans->conf.cmd_queue) 904 for (i = 0; i < txq->n_window; i++) { 905 kfree_sensitive(txq->entries[i].cmd); 906 kfree_sensitive(txq->entries[i].free_buf); 907 } 908 timer_delete_sync(&txq->stuck_timer); 909 910 iwl_txq_gen2_free_memory(trans, txq); 911 912 trans_pcie->txqs.txq[txq_id] = NULL; 913 914 clear_bit(txq_id, trans_pcie->txqs.queue_used); 915 } 916 917 static struct iwl_txq * 918 iwl_txq_dyn_alloc_dma(struct iwl_trans *trans, int size, unsigned int timeout) 919 { 920 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 921 size_t bc_tbl_size, bc_tbl_entries; 922 struct iwl_txq *txq; 923 int ret; 924 925 WARN_ON(!trans_pcie->txqs.bc_tbl_size); 926 927 bc_tbl_size = trans_pcie->txqs.bc_tbl_size; 928 bc_tbl_entries = bc_tbl_size / sizeof(u16); 929 930 if (WARN_ON(size > bc_tbl_entries)) 931 return ERR_PTR(-EINVAL); 932 933 txq = kzalloc(sizeof(*txq), GFP_KERNEL); 934 if (!txq) 935 return ERR_PTR(-ENOMEM); 936 937 txq->bc_tbl.addr = dma_pool_alloc(trans_pcie->txqs.bc_pool, GFP_KERNEL, 938 &txq->bc_tbl.dma); 939 if (!txq->bc_tbl.addr) { 940 IWL_ERR(trans, "Scheduler BC Table allocation failed\n"); 941 kfree(txq); 942 return ERR_PTR(-ENOMEM); 943 } 944 945 ret = iwl_pcie_txq_alloc(trans, txq, size, false); 946 if (ret) { 947 IWL_ERR(trans, "Tx queue alloc failed\n"); 948 goto error; 949 } 950 ret = iwl_txq_init(trans, txq, size, false); 951 if (ret) { 952 IWL_ERR(trans, "Tx queue init failed\n"); 953 goto error; 954 } 955 956 txq->wd_timeout = msecs_to_jiffies(timeout); 957 958 return txq; 959 960 error: 961 iwl_txq_gen2_free_memory(trans, txq); 962 return ERR_PTR(ret); 963 } 964 965 static int iwl_pcie_txq_alloc_response(struct iwl_trans *trans, 966 struct iwl_txq *txq, 967 struct iwl_host_cmd *hcmd) 968 { 969 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 970 struct iwl_tx_queue_cfg_rsp *rsp; 971 int ret, qid; 972 u32 wr_ptr; 973 974 if (WARN_ON(iwl_rx_packet_payload_len(hcmd->resp_pkt) != 975 sizeof(*rsp))) { 976 ret = -EINVAL; 977 goto error_free_resp; 978 } 979 980 rsp = (void *)hcmd->resp_pkt->data; 981 qid = le16_to_cpu(rsp->queue_number); 982 wr_ptr = le16_to_cpu(rsp->write_pointer); 983 984 if (qid >= ARRAY_SIZE(trans_pcie->txqs.txq)) { 985 WARN_ONCE(1, "queue index %d unsupported", qid); 986 ret = -EIO; 987 goto error_free_resp; 988 } 989 990 if (test_and_set_bit(qid, trans_pcie->txqs.queue_used)) { 991 WARN_ONCE(1, "queue %d already used", qid); 992 ret = -EIO; 993 goto error_free_resp; 994 } 995 996 if (WARN_ONCE(trans_pcie->txqs.txq[qid], 997 "queue %d already allocated\n", qid)) { 998 ret = -EIO; 999 goto error_free_resp; 1000 } 1001 1002 txq->id = qid; 1003 trans_pcie->txqs.txq[qid] = txq; 1004 wr_ptr &= (trans->mac_cfg->base->max_tfd_queue_size - 1); 1005 1006 /* Place first TFD at index corresponding to start sequence number */ 1007 txq->read_ptr = wr_ptr; 1008 txq->write_ptr = wr_ptr; 1009 1010 IWL_DEBUG_TX_QUEUES(trans, "Activate queue %d\n", qid); 1011 1012 iwl_free_resp(hcmd); 1013 return qid; 1014 1015 error_free_resp: 1016 iwl_free_resp(hcmd); 1017 iwl_txq_gen2_free_memory(trans, txq); 1018 return ret; 1019 } 1020 1021 int iwl_txq_dyn_alloc(struct iwl_trans *trans, u32 flags, u32 sta_mask, 1022 u8 tid, int size, unsigned int timeout) 1023 { 1024 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 1025 struct iwl_txq *txq; 1026 union { 1027 struct iwl_tx_queue_cfg_cmd old; 1028 struct iwl_scd_queue_cfg_cmd new; 1029 } cmd; 1030 struct iwl_host_cmd hcmd = { 1031 .flags = CMD_WANT_SKB, 1032 }; 1033 int ret; 1034 1035 /* take the min with bytecount table entries allowed */ 1036 size = min_t(u32, size, trans_pcie->txqs.bc_tbl_size / sizeof(u16)); 1037 /* but must be power of 2 values for calculating read/write pointers */ 1038 size = rounddown_pow_of_two(size); 1039 1040 if (trans->mac_cfg->device_family == IWL_DEVICE_FAMILY_BZ && 1041 trans->info.hw_rev_step == SILICON_A_STEP) { 1042 size = 4096; 1043 txq = iwl_txq_dyn_alloc_dma(trans, size, timeout); 1044 } else { 1045 do { 1046 txq = iwl_txq_dyn_alloc_dma(trans, size, timeout); 1047 if (!IS_ERR(txq)) 1048 break; 1049 1050 IWL_DEBUG_TX_QUEUES(trans, 1051 "Failed allocating TXQ of size %d for sta mask %x tid %d, ret: %ld\n", 1052 size, sta_mask, tid, 1053 PTR_ERR(txq)); 1054 size /= 2; 1055 } while (size >= 16); 1056 } 1057 1058 if (IS_ERR(txq)) 1059 return PTR_ERR(txq); 1060 1061 if (trans->conf.queue_alloc_cmd_ver == 0) { 1062 memset(&cmd.old, 0, sizeof(cmd.old)); 1063 cmd.old.tfdq_addr = cpu_to_le64(txq->dma_addr); 1064 cmd.old.byte_cnt_addr = cpu_to_le64(txq->bc_tbl.dma); 1065 cmd.old.cb_size = cpu_to_le32(TFD_QUEUE_CB_SIZE(size)); 1066 cmd.old.flags = cpu_to_le16(flags | TX_QUEUE_CFG_ENABLE_QUEUE); 1067 cmd.old.tid = tid; 1068 1069 if (hweight32(sta_mask) != 1) { 1070 ret = -EINVAL; 1071 goto error; 1072 } 1073 cmd.old.sta_id = ffs(sta_mask) - 1; 1074 1075 hcmd.id = SCD_QUEUE_CFG; 1076 hcmd.len[0] = sizeof(cmd.old); 1077 hcmd.data[0] = &cmd.old; 1078 } else if (trans->conf.queue_alloc_cmd_ver == 3) { 1079 memset(&cmd.new, 0, sizeof(cmd.new)); 1080 cmd.new.operation = cpu_to_le32(IWL_SCD_QUEUE_ADD); 1081 cmd.new.u.add.tfdq_dram_addr = cpu_to_le64(txq->dma_addr); 1082 cmd.new.u.add.bc_dram_addr = cpu_to_le64(txq->bc_tbl.dma); 1083 cmd.new.u.add.cb_size = cpu_to_le32(TFD_QUEUE_CB_SIZE(size)); 1084 cmd.new.u.add.flags = cpu_to_le32(flags); 1085 cmd.new.u.add.sta_mask = cpu_to_le32(sta_mask); 1086 cmd.new.u.add.tid = tid; 1087 1088 hcmd.id = WIDE_ID(DATA_PATH_GROUP, SCD_QUEUE_CONFIG_CMD); 1089 hcmd.len[0] = sizeof(cmd.new); 1090 hcmd.data[0] = &cmd.new; 1091 } else { 1092 ret = -EOPNOTSUPP; 1093 goto error; 1094 } 1095 1096 ret = iwl_trans_send_cmd(trans, &hcmd); 1097 if (ret) 1098 goto error; 1099 1100 return iwl_pcie_txq_alloc_response(trans, txq, &hcmd); 1101 1102 error: 1103 iwl_txq_gen2_free_memory(trans, txq); 1104 return ret; 1105 } 1106 1107 void iwl_txq_dyn_free(struct iwl_trans *trans, int queue) 1108 { 1109 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 1110 1111 if (WARN(queue >= IWL_MAX_TVQM_QUEUES, 1112 "queue %d out of range", queue)) 1113 return; 1114 1115 /* 1116 * Upon HW Rfkill - we stop the device, and then stop the queues 1117 * in the op_mode. Just for the sake of the simplicity of the op_mode, 1118 * allow the op_mode to call txq_disable after it already called 1119 * stop_device. 1120 */ 1121 if (!test_and_clear_bit(queue, trans_pcie->txqs.queue_used)) { 1122 WARN_ONCE(test_bit(STATUS_DEVICE_ENABLED, &trans->status), 1123 "queue %d not used", queue); 1124 return; 1125 } 1126 1127 iwl_txq_gen2_free(trans, queue); 1128 1129 IWL_DEBUG_TX_QUEUES(trans, "Deactivate queue %d\n", queue); 1130 } 1131 1132 void iwl_txq_gen2_tx_free(struct iwl_trans *trans) 1133 { 1134 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 1135 int i; 1136 1137 memset(trans_pcie->txqs.queue_used, 0, 1138 sizeof(trans_pcie->txqs.queue_used)); 1139 1140 /* Free all TX queues */ 1141 for (i = 0; i < ARRAY_SIZE(trans_pcie->txqs.txq); i++) { 1142 if (!trans_pcie->txqs.txq[i]) 1143 continue; 1144 1145 iwl_txq_gen2_free(trans, i); 1146 } 1147 } 1148 1149 int iwl_txq_gen2_init(struct iwl_trans *trans, int txq_id, int queue_size) 1150 { 1151 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 1152 struct iwl_txq *queue; 1153 int ret; 1154 1155 /* alloc and init the tx queue */ 1156 if (!trans_pcie->txqs.txq[txq_id]) { 1157 queue = kzalloc(sizeof(*queue), GFP_KERNEL); 1158 if (!queue) { 1159 IWL_ERR(trans, "Not enough memory for tx queue\n"); 1160 return -ENOMEM; 1161 } 1162 trans_pcie->txqs.txq[txq_id] = queue; 1163 ret = iwl_pcie_txq_alloc(trans, queue, queue_size, true); 1164 if (ret) { 1165 IWL_ERR(trans, "Tx %d queue init failed\n", txq_id); 1166 goto error; 1167 } 1168 } else { 1169 queue = trans_pcie->txqs.txq[txq_id]; 1170 } 1171 1172 ret = iwl_txq_init(trans, queue, queue_size, 1173 (txq_id == trans->conf.cmd_queue)); 1174 if (ret) { 1175 IWL_ERR(trans, "Tx %d queue alloc failed\n", txq_id); 1176 goto error; 1177 } 1178 trans_pcie->txqs.txq[txq_id]->id = txq_id; 1179 set_bit(txq_id, trans_pcie->txqs.queue_used); 1180 1181 return 0; 1182 1183 error: 1184 iwl_txq_gen2_tx_free(trans); 1185 return ret; 1186 } 1187 1188 /*************** HOST COMMAND QUEUE FUNCTIONS *****/ 1189 1190 /* 1191 * iwl_pcie_gen2_enqueue_hcmd - enqueue a uCode command 1192 * @priv: device private data point 1193 * @cmd: a pointer to the ucode command structure 1194 * 1195 * The function returns < 0 values to indicate the operation 1196 * failed. On success, it returns the index (>= 0) of command in the 1197 * command queue. 1198 */ 1199 int iwl_pcie_gen2_enqueue_hcmd(struct iwl_trans *trans, 1200 struct iwl_host_cmd *cmd) 1201 { 1202 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 1203 struct iwl_txq *txq = trans_pcie->txqs.txq[trans->conf.cmd_queue]; 1204 struct iwl_device_cmd *out_cmd; 1205 struct iwl_cmd_meta *out_meta; 1206 void *dup_buf = NULL; 1207 dma_addr_t phys_addr; 1208 int i, cmd_pos, idx; 1209 u16 copy_size, cmd_size, tb0_size; 1210 bool had_nocopy = false; 1211 u8 group_id = iwl_cmd_groupid(cmd->id); 1212 const u8 *cmddata[IWL_MAX_CMD_TBS_PER_TFD]; 1213 u16 cmdlen[IWL_MAX_CMD_TBS_PER_TFD]; 1214 struct iwl_tfh_tfd *tfd; 1215 unsigned long flags; 1216 1217 if (WARN_ON(cmd->flags & CMD_BLOCK_TXQS)) 1218 return -EINVAL; 1219 1220 copy_size = sizeof(struct iwl_cmd_header_wide); 1221 cmd_size = sizeof(struct iwl_cmd_header_wide); 1222 1223 for (i = 0; i < IWL_MAX_CMD_TBS_PER_TFD; i++) { 1224 cmddata[i] = cmd->data[i]; 1225 cmdlen[i] = cmd->len[i]; 1226 1227 if (!cmd->len[i]) 1228 continue; 1229 1230 /* need at least IWL_FIRST_TB_SIZE copied */ 1231 if (copy_size < IWL_FIRST_TB_SIZE) { 1232 int copy = IWL_FIRST_TB_SIZE - copy_size; 1233 1234 if (copy > cmdlen[i]) 1235 copy = cmdlen[i]; 1236 cmdlen[i] -= copy; 1237 cmddata[i] += copy; 1238 copy_size += copy; 1239 } 1240 1241 if (cmd->dataflags[i] & IWL_HCMD_DFL_NOCOPY) { 1242 had_nocopy = true; 1243 if (WARN_ON(cmd->dataflags[i] & IWL_HCMD_DFL_DUP)) { 1244 idx = -EINVAL; 1245 goto free_dup_buf; 1246 } 1247 } else if (cmd->dataflags[i] & IWL_HCMD_DFL_DUP) { 1248 /* 1249 * This is also a chunk that isn't copied 1250 * to the static buffer so set had_nocopy. 1251 */ 1252 had_nocopy = true; 1253 1254 /* only allowed once */ 1255 if (WARN_ON(dup_buf)) { 1256 idx = -EINVAL; 1257 goto free_dup_buf; 1258 } 1259 1260 dup_buf = kmemdup(cmddata[i], cmdlen[i], 1261 GFP_ATOMIC); 1262 if (!dup_buf) 1263 return -ENOMEM; 1264 } else { 1265 /* NOCOPY must not be followed by normal! */ 1266 if (WARN_ON(had_nocopy)) { 1267 idx = -EINVAL; 1268 goto free_dup_buf; 1269 } 1270 copy_size += cmdlen[i]; 1271 } 1272 cmd_size += cmd->len[i]; 1273 } 1274 1275 /* 1276 * If any of the command structures end up being larger than the 1277 * TFD_MAX_PAYLOAD_SIZE and they aren't dynamically allocated into 1278 * separate TFDs, then we will need to increase the size of the buffers 1279 */ 1280 if (WARN(copy_size > TFD_MAX_PAYLOAD_SIZE, 1281 "Command %s (%#x) is too large (%d bytes)\n", 1282 iwl_get_cmd_string(trans, cmd->id), cmd->id, copy_size)) { 1283 idx = -EINVAL; 1284 goto free_dup_buf; 1285 } 1286 1287 spin_lock_irqsave(&txq->lock, flags); 1288 1289 idx = iwl_txq_get_cmd_index(txq, txq->write_ptr); 1290 tfd = iwl_txq_get_tfd(trans, txq, txq->write_ptr); 1291 memset(tfd, 0, sizeof(*tfd)); 1292 1293 if (iwl_txq_space(trans, txq) < ((cmd->flags & CMD_ASYNC) ? 2 : 1)) { 1294 spin_unlock_irqrestore(&txq->lock, flags); 1295 1296 IWL_ERR(trans, "No space in command queue\n"); 1297 iwl_op_mode_nic_error(trans->op_mode, 1298 IWL_ERR_TYPE_CMD_QUEUE_FULL); 1299 iwl_trans_schedule_reset(trans, IWL_ERR_TYPE_CMD_QUEUE_FULL); 1300 idx = -ENOSPC; 1301 goto free_dup_buf; 1302 } 1303 1304 out_cmd = txq->entries[idx].cmd; 1305 out_meta = &txq->entries[idx].meta; 1306 1307 /* re-initialize, this also marks the SG list as unused */ 1308 memset(out_meta, 0, sizeof(*out_meta)); 1309 if (cmd->flags & CMD_WANT_SKB) 1310 out_meta->source = cmd; 1311 1312 /* set up the header */ 1313 out_cmd->hdr_wide.cmd = iwl_cmd_opcode(cmd->id); 1314 out_cmd->hdr_wide.group_id = group_id; 1315 out_cmd->hdr_wide.version = iwl_cmd_version(cmd->id); 1316 out_cmd->hdr_wide.length = 1317 cpu_to_le16(cmd_size - sizeof(struct iwl_cmd_header_wide)); 1318 out_cmd->hdr_wide.reserved = 0; 1319 out_cmd->hdr_wide.sequence = 1320 cpu_to_le16(QUEUE_TO_SEQ(trans->conf.cmd_queue) | 1321 INDEX_TO_SEQ(txq->write_ptr)); 1322 1323 cmd_pos = sizeof(struct iwl_cmd_header_wide); 1324 copy_size = sizeof(struct iwl_cmd_header_wide); 1325 1326 /* and copy the data that needs to be copied */ 1327 for (i = 0; i < IWL_MAX_CMD_TBS_PER_TFD; i++) { 1328 int copy; 1329 1330 if (!cmd->len[i]) 1331 continue; 1332 1333 /* copy everything if not nocopy/dup */ 1334 if (!(cmd->dataflags[i] & (IWL_HCMD_DFL_NOCOPY | 1335 IWL_HCMD_DFL_DUP))) { 1336 copy = cmd->len[i]; 1337 1338 memcpy((u8 *)out_cmd + cmd_pos, cmd->data[i], copy); 1339 cmd_pos += copy; 1340 copy_size += copy; 1341 continue; 1342 } 1343 1344 /* 1345 * Otherwise we need at least IWL_FIRST_TB_SIZE copied 1346 * in total (for bi-directional DMA), but copy up to what 1347 * we can fit into the payload for debug dump purposes. 1348 */ 1349 copy = min_t(int, TFD_MAX_PAYLOAD_SIZE - cmd_pos, cmd->len[i]); 1350 1351 memcpy((u8 *)out_cmd + cmd_pos, cmd->data[i], copy); 1352 cmd_pos += copy; 1353 1354 /* However, treat copy_size the proper way, we need it below */ 1355 if (copy_size < IWL_FIRST_TB_SIZE) { 1356 copy = IWL_FIRST_TB_SIZE - copy_size; 1357 1358 if (copy > cmd->len[i]) 1359 copy = cmd->len[i]; 1360 copy_size += copy; 1361 } 1362 } 1363 1364 IWL_DEBUG_HC(trans, 1365 "Sending command %s (%.2x.%.2x), seq: 0x%04X, %d bytes at %d[%d]:%d\n", 1366 iwl_get_cmd_string(trans, cmd->id), group_id, 1367 out_cmd->hdr.cmd, le16_to_cpu(out_cmd->hdr.sequence), 1368 cmd_size, txq->write_ptr, idx, trans->conf.cmd_queue); 1369 1370 /* start the TFD with the minimum copy bytes */ 1371 tb0_size = min_t(int, copy_size, IWL_FIRST_TB_SIZE); 1372 memcpy(&txq->first_tb_bufs[idx], out_cmd, tb0_size); 1373 iwl_txq_gen2_set_tb(trans, tfd, iwl_txq_get_first_tb_dma(txq, idx), 1374 tb0_size); 1375 1376 /* map first command fragment, if any remains */ 1377 if (copy_size > tb0_size) { 1378 phys_addr = dma_map_single(trans->dev, 1379 (u8 *)out_cmd + tb0_size, 1380 copy_size - tb0_size, 1381 DMA_TO_DEVICE); 1382 if (dma_mapping_error(trans->dev, phys_addr)) { 1383 idx = -ENOMEM; 1384 iwl_txq_gen2_tfd_unmap(trans, out_meta, tfd); 1385 goto out; 1386 } 1387 iwl_txq_gen2_set_tb(trans, tfd, phys_addr, 1388 copy_size - tb0_size); 1389 } 1390 1391 /* map the remaining (adjusted) nocopy/dup fragments */ 1392 for (i = 0; i < IWL_MAX_CMD_TBS_PER_TFD; i++) { 1393 void *data = (void *)(uintptr_t)cmddata[i]; 1394 1395 if (!cmdlen[i]) 1396 continue; 1397 if (!(cmd->dataflags[i] & (IWL_HCMD_DFL_NOCOPY | 1398 IWL_HCMD_DFL_DUP))) 1399 continue; 1400 if (cmd->dataflags[i] & IWL_HCMD_DFL_DUP) 1401 data = dup_buf; 1402 phys_addr = dma_map_single(trans->dev, data, 1403 cmdlen[i], DMA_TO_DEVICE); 1404 if (dma_mapping_error(trans->dev, phys_addr)) { 1405 idx = -ENOMEM; 1406 iwl_txq_gen2_tfd_unmap(trans, out_meta, tfd); 1407 goto out; 1408 } 1409 iwl_txq_gen2_set_tb(trans, tfd, phys_addr, cmdlen[i]); 1410 } 1411 1412 BUILD_BUG_ON(IWL_TFH_NUM_TBS > sizeof(out_meta->tbs) * BITS_PER_BYTE); 1413 out_meta->flags = cmd->flags; 1414 if (WARN_ON_ONCE(txq->entries[idx].free_buf)) 1415 kfree_sensitive(txq->entries[idx].free_buf); 1416 txq->entries[idx].free_buf = dup_buf; 1417 1418 trace_iwlwifi_dev_hcmd(trans->dev, cmd, cmd_size, &out_cmd->hdr_wide); 1419 1420 /* start timer if queue currently empty */ 1421 if (txq->read_ptr == txq->write_ptr && txq->wd_timeout) 1422 mod_timer(&txq->stuck_timer, jiffies + txq->wd_timeout); 1423 1424 spin_lock(&trans_pcie->reg_lock); 1425 /* Increment and update queue's write index */ 1426 txq->write_ptr = iwl_txq_inc_wrap(trans, txq->write_ptr); 1427 iwl_txq_inc_wr_ptr(trans, txq); 1428 spin_unlock(&trans_pcie->reg_lock); 1429 1430 out: 1431 spin_unlock_irqrestore(&txq->lock, flags); 1432 free_dup_buf: 1433 if (idx < 0) 1434 kfree(dup_buf); 1435 return idx; 1436 } 1437