1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 2 /* 3 * Copyright (C) 2017 Intel Deutschland GmbH 4 * Copyright (C) 2018-2020, 2023-2025 Intel Corporation 5 */ 6 #include <net/tso.h> 7 #include <linux/tcp.h> 8 9 #include "iwl-debug.h" 10 #include "iwl-csr.h" 11 #include "iwl-io.h" 12 #include "internal.h" 13 #include "fw/api/tx.h" 14 #include "fw/api/commands.h" 15 #include "fw/api/datapath.h" 16 #include "iwl-scd.h" 17 18 static struct page *get_workaround_page(struct iwl_trans *trans, 19 struct sk_buff *skb) 20 { 21 struct iwl_tso_page_info *info; 22 struct page **page_ptr; 23 struct page *ret; 24 dma_addr_t phys; 25 26 page_ptr = (void *)((u8 *)skb->cb + trans->conf.cb_data_offs); 27 28 ret = alloc_page(GFP_ATOMIC); 29 if (!ret) 30 return NULL; 31 32 info = IWL_TSO_PAGE_INFO(page_address(ret)); 33 34 /* Create a DMA mapping for the page */ 35 phys = dma_map_page_attrs(trans->dev, ret, 0, PAGE_SIZE, 36 DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); 37 if (unlikely(dma_mapping_error(trans->dev, phys))) { 38 __free_page(ret); 39 return NULL; 40 } 41 42 /* Store physical address and set use count */ 43 info->dma_addr = phys; 44 refcount_set(&info->use_count, 1); 45 46 /* set the chaining pointer to the previous page if there */ 47 info->next = *page_ptr; 48 *page_ptr = ret; 49 50 return ret; 51 } 52 53 /* 54 * Add a TB and if needed apply the FH HW bug workaround; 55 * meta != NULL indicates that it's a page mapping and we 56 * need to dma_unmap_page() and set the meta->tbs bit in 57 * this case. 58 */ 59 static int iwl_txq_gen2_set_tb_with_wa(struct iwl_trans *trans, 60 struct sk_buff *skb, 61 struct iwl_tfh_tfd *tfd, 62 dma_addr_t phys, void *virt, 63 u16 len, struct iwl_cmd_meta *meta, 64 bool unmap) 65 { 66 dma_addr_t oldphys = phys; 67 struct page *page; 68 int ret; 69 70 if (unlikely(dma_mapping_error(trans->dev, phys))) 71 return -ENOMEM; 72 73 if (likely(!iwl_txq_crosses_4g_boundary(phys, len))) { 74 ret = iwl_txq_gen2_set_tb(trans, tfd, phys, len); 75 76 if (ret < 0) 77 goto unmap; 78 79 if (meta) 80 meta->tbs |= BIT(ret); 81 82 ret = 0; 83 goto trace; 84 } 85 86 /* 87 * Work around a hardware bug. If (as expressed in the 88 * condition above) the TB ends on a 32-bit boundary, 89 * then the next TB may be accessed with the wrong 90 * address. 91 * To work around it, copy the data elsewhere and make 92 * a new mapping for it so the device will not fail. 93 */ 94 95 if (WARN_ON(len > IWL_TSO_PAGE_DATA_SIZE)) { 96 ret = -ENOBUFS; 97 goto unmap; 98 } 99 100 page = get_workaround_page(trans, skb); 101 if (!page) { 102 ret = -ENOMEM; 103 goto unmap; 104 } 105 106 memcpy(page_address(page), virt, len); 107 108 /* 109 * This is a bit odd, but performance does not matter here, what 110 * matters are the expectations of the calling code and TB cleanup 111 * function. 112 * 113 * As such, if unmap is set, then create another mapping for the TB 114 * entry as it will be unmapped later. On the other hand, if it is not 115 * set, then the TB entry will not be unmapped and instead we simply 116 * reference and sync the mapping that get_workaround_page() created. 117 */ 118 if (unmap) { 119 phys = dma_map_single(trans->dev, page_address(page), len, 120 DMA_TO_DEVICE); 121 if (unlikely(dma_mapping_error(trans->dev, phys))) 122 return -ENOMEM; 123 } else { 124 phys = iwl_pcie_get_tso_page_phys(page_address(page)); 125 dma_sync_single_for_device(trans->dev, phys, len, 126 DMA_TO_DEVICE); 127 } 128 129 ret = iwl_txq_gen2_set_tb(trans, tfd, phys, len); 130 if (ret < 0) { 131 /* unmap the new allocation as single */ 132 oldphys = phys; 133 meta = NULL; 134 goto unmap; 135 } 136 137 IWL_DEBUG_TX(trans, 138 "TB bug workaround: copied %d bytes from 0x%llx to 0x%llx\n", 139 len, (unsigned long long)oldphys, 140 (unsigned long long)phys); 141 142 ret = 0; 143 unmap: 144 if (!unmap) 145 goto trace; 146 147 if (meta) 148 dma_unmap_page(trans->dev, oldphys, len, DMA_TO_DEVICE); 149 else 150 dma_unmap_single(trans->dev, oldphys, len, DMA_TO_DEVICE); 151 trace: 152 trace_iwlwifi_dev_tx_tb(trans->dev, skb, virt, phys, len); 153 154 return ret; 155 } 156 157 static int iwl_txq_gen2_build_amsdu(struct iwl_trans *trans, 158 struct sk_buff *skb, 159 struct iwl_tfh_tfd *tfd, 160 struct iwl_cmd_meta *out_meta, 161 int start_len, 162 u8 hdr_len, 163 struct iwl_device_tx_cmd *dev_cmd) 164 { 165 #ifdef CONFIG_INET 166 struct iwl_tx_cmd_v9 *tx_cmd = (void *)dev_cmd->payload; 167 struct ieee80211_hdr *hdr = (void *)skb->data; 168 unsigned int snap_ip_tcp_hdrlen, ip_hdrlen, total_len, hdr_room; 169 unsigned int mss = skb_shinfo(skb)->gso_size; 170 unsigned int data_offset = 0; 171 dma_addr_t start_hdr_phys; 172 u16 length, amsdu_pad; 173 u8 *start_hdr; 174 struct sg_table *sgt; 175 struct tso_t tso; 176 177 trace_iwlwifi_dev_tx(trans->dev, skb, tfd, sizeof(*tfd), 178 &dev_cmd->hdr, start_len, 0); 179 180 ip_hdrlen = skb_network_header_len(skb); 181 snap_ip_tcp_hdrlen = 8 + ip_hdrlen + tcp_hdrlen(skb); 182 total_len = skb->len - snap_ip_tcp_hdrlen - hdr_len; 183 amsdu_pad = 0; 184 185 /* total amount of header we may need for this A-MSDU */ 186 hdr_room = DIV_ROUND_UP(total_len, mss) * 187 (3 + snap_ip_tcp_hdrlen + sizeof(struct ethhdr)); 188 189 /* Our device supports 9 segments at most, it will fit in 1 page */ 190 sgt = iwl_pcie_prep_tso(trans, skb, out_meta, &start_hdr, hdr_room, 191 snap_ip_tcp_hdrlen + hdr_len); 192 if (!sgt) 193 return -ENOMEM; 194 195 start_hdr_phys = iwl_pcie_get_tso_page_phys(start_hdr); 196 197 /* 198 * Pull the ieee80211 header to be able to use TSO core, 199 * we will restore it for the tx_status flow. 200 */ 201 skb_pull(skb, hdr_len); 202 203 /* 204 * Remove the length of all the headers that we don't actually 205 * have in the MPDU by themselves, but that we duplicate into 206 * all the different MSDUs inside the A-MSDU. 207 */ 208 le16_add_cpu(&tx_cmd->len, -snap_ip_tcp_hdrlen); 209 210 tso_start(skb, &tso); 211 212 while (total_len) { 213 /* this is the data left for this subframe */ 214 unsigned int data_left = min_t(unsigned int, mss, total_len); 215 unsigned int tb_len; 216 dma_addr_t tb_phys; 217 u8 *pos_hdr = start_hdr; 218 219 total_len -= data_left; 220 221 memset(pos_hdr, 0, amsdu_pad); 222 pos_hdr += amsdu_pad; 223 amsdu_pad = (4 - (sizeof(struct ethhdr) + snap_ip_tcp_hdrlen + 224 data_left)) & 0x3; 225 ether_addr_copy(pos_hdr, ieee80211_get_DA(hdr)); 226 pos_hdr += ETH_ALEN; 227 ether_addr_copy(pos_hdr, ieee80211_get_SA(hdr)); 228 pos_hdr += ETH_ALEN; 229 230 length = snap_ip_tcp_hdrlen + data_left; 231 *((__be16 *)pos_hdr) = cpu_to_be16(length); 232 pos_hdr += sizeof(length); 233 234 /* 235 * This will copy the SNAP as well which will be considered 236 * as MAC header. 237 */ 238 tso_build_hdr(skb, pos_hdr, &tso, data_left, !total_len); 239 240 pos_hdr += snap_ip_tcp_hdrlen; 241 242 tb_len = pos_hdr - start_hdr; 243 tb_phys = iwl_pcie_get_tso_page_phys(start_hdr); 244 245 /* 246 * No need for _with_wa, this is from the TSO page and 247 * we leave some space at the end of it so can't hit 248 * the buggy scenario. 249 */ 250 iwl_txq_gen2_set_tb(trans, tfd, tb_phys, tb_len); 251 trace_iwlwifi_dev_tx_tb(trans->dev, skb, start_hdr, 252 tb_phys, tb_len); 253 /* add this subframe's headers' length to the tx_cmd */ 254 le16_add_cpu(&tx_cmd->len, tb_len); 255 256 /* prepare the start_hdr for the next subframe */ 257 start_hdr = pos_hdr; 258 259 /* put the payload */ 260 while (data_left) { 261 int ret; 262 263 tb_len = min_t(unsigned int, tso.size, data_left); 264 tb_phys = iwl_pcie_get_sgt_tb_phys(sgt, data_offset, 265 tb_len); 266 /* Not a real mapping error, use direct comparison */ 267 if (unlikely(tb_phys == DMA_MAPPING_ERROR)) 268 goto out_err; 269 270 ret = iwl_txq_gen2_set_tb_with_wa(trans, skb, tfd, 271 tb_phys, tso.data, 272 tb_len, NULL, false); 273 if (ret) 274 goto out_err; 275 276 data_left -= tb_len; 277 data_offset += tb_len; 278 tso_build_data(skb, &tso, tb_len); 279 } 280 } 281 282 dma_sync_single_for_device(trans->dev, start_hdr_phys, hdr_room, 283 DMA_TO_DEVICE); 284 285 /* re -add the WiFi header */ 286 skb_push(skb, hdr_len); 287 288 return 0; 289 290 out_err: 291 #endif 292 return -EINVAL; 293 } 294 295 static struct 296 iwl_tfh_tfd *iwl_txq_gen2_build_tx_amsdu(struct iwl_trans *trans, 297 struct iwl_txq *txq, 298 struct iwl_device_tx_cmd *dev_cmd, 299 struct sk_buff *skb, 300 struct iwl_cmd_meta *out_meta, 301 int hdr_len, 302 int tx_cmd_len) 303 { 304 int idx = iwl_txq_get_cmd_index(txq, txq->write_ptr); 305 struct iwl_tfh_tfd *tfd = iwl_txq_get_tfd(trans, txq, idx); 306 dma_addr_t tb_phys; 307 int len; 308 void *tb1_addr; 309 310 tb_phys = iwl_txq_get_first_tb_dma(txq, idx); 311 312 /* 313 * No need for _with_wa, the first TB allocation is aligned up 314 * to a 64-byte boundary and thus can't be at the end or cross 315 * a page boundary (much less a 2^32 boundary). 316 */ 317 iwl_txq_gen2_set_tb(trans, tfd, tb_phys, IWL_FIRST_TB_SIZE); 318 319 /* 320 * The second TB (tb1) points to the remainder of the TX command 321 * and the 802.11 header - dword aligned size 322 * (This calculation modifies the TX command, so do it before the 323 * setup of the first TB) 324 */ 325 len = tx_cmd_len + sizeof(struct iwl_cmd_header) + hdr_len - 326 IWL_FIRST_TB_SIZE; 327 328 /* do not align A-MSDU to dword as the subframe header aligns it */ 329 330 /* map the data for TB1 */ 331 tb1_addr = ((u8 *)&dev_cmd->hdr) + IWL_FIRST_TB_SIZE; 332 tb_phys = dma_map_single(trans->dev, tb1_addr, len, DMA_TO_DEVICE); 333 if (unlikely(dma_mapping_error(trans->dev, tb_phys))) 334 goto out_err; 335 /* 336 * No need for _with_wa(), we ensure (via alignment) that the data 337 * here can never cross or end at a page boundary. 338 */ 339 iwl_txq_gen2_set_tb(trans, tfd, tb_phys, len); 340 341 if (iwl_txq_gen2_build_amsdu(trans, skb, tfd, out_meta, 342 len + IWL_FIRST_TB_SIZE, hdr_len, dev_cmd)) 343 goto out_err; 344 345 /* building the A-MSDU might have changed this data, memcpy it now */ 346 memcpy(&txq->first_tb_bufs[idx], dev_cmd, IWL_FIRST_TB_SIZE); 347 return tfd; 348 349 out_err: 350 iwl_pcie_free_tso_pages(trans, skb, out_meta); 351 iwl_txq_gen2_tfd_unmap(trans, out_meta, tfd); 352 return NULL; 353 } 354 355 static int iwl_txq_gen2_tx_add_frags(struct iwl_trans *trans, 356 struct sk_buff *skb, 357 struct iwl_tfh_tfd *tfd, 358 struct iwl_cmd_meta *out_meta) 359 { 360 int i; 361 362 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 363 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 364 dma_addr_t tb_phys; 365 unsigned int fragsz = skb_frag_size(frag); 366 int ret; 367 368 if (!fragsz) 369 continue; 370 371 tb_phys = skb_frag_dma_map(trans->dev, frag, 0, 372 fragsz, DMA_TO_DEVICE); 373 ret = iwl_txq_gen2_set_tb_with_wa(trans, skb, tfd, tb_phys, 374 skb_frag_address(frag), 375 fragsz, out_meta, true); 376 if (ret) 377 return ret; 378 } 379 380 return 0; 381 } 382 383 static struct 384 iwl_tfh_tfd *iwl_txq_gen2_build_tx(struct iwl_trans *trans, 385 struct iwl_txq *txq, 386 struct iwl_device_tx_cmd *dev_cmd, 387 struct sk_buff *skb, 388 struct iwl_cmd_meta *out_meta, 389 int hdr_len, 390 int tx_cmd_len, 391 bool pad) 392 { 393 int idx = iwl_txq_get_cmd_index(txq, txq->write_ptr); 394 struct iwl_tfh_tfd *tfd = iwl_txq_get_tfd(trans, txq, idx); 395 dma_addr_t tb_phys; 396 int len, tb1_len, tb2_len; 397 void *tb1_addr; 398 struct sk_buff *frag; 399 400 tb_phys = iwl_txq_get_first_tb_dma(txq, idx); 401 402 /* The first TB points to bi-directional DMA data */ 403 memcpy(&txq->first_tb_bufs[idx], dev_cmd, IWL_FIRST_TB_SIZE); 404 405 /* 406 * No need for _with_wa, the first TB allocation is aligned up 407 * to a 64-byte boundary and thus can't be at the end or cross 408 * a page boundary (much less a 2^32 boundary). 409 */ 410 iwl_txq_gen2_set_tb(trans, tfd, tb_phys, IWL_FIRST_TB_SIZE); 411 412 /* 413 * The second TB (tb1) points to the remainder of the TX command 414 * and the 802.11 header - dword aligned size 415 * (This calculation modifies the TX command, so do it before the 416 * setup of the first TB) 417 */ 418 len = tx_cmd_len + sizeof(struct iwl_cmd_header) + hdr_len - 419 IWL_FIRST_TB_SIZE; 420 421 if (pad) 422 tb1_len = ALIGN(len, 4); 423 else 424 tb1_len = len; 425 426 /* map the data for TB1 */ 427 tb1_addr = ((u8 *)&dev_cmd->hdr) + IWL_FIRST_TB_SIZE; 428 tb_phys = dma_map_single(trans->dev, tb1_addr, tb1_len, DMA_TO_DEVICE); 429 if (unlikely(dma_mapping_error(trans->dev, tb_phys))) 430 goto out_err; 431 /* 432 * No need for _with_wa(), we ensure (via alignment) that the data 433 * here can never cross or end at a page boundary. 434 */ 435 iwl_txq_gen2_set_tb(trans, tfd, tb_phys, tb1_len); 436 trace_iwlwifi_dev_tx(trans->dev, skb, tfd, sizeof(*tfd), &dev_cmd->hdr, 437 IWL_FIRST_TB_SIZE + tb1_len, hdr_len); 438 439 /* set up TFD's third entry to point to remainder of skb's head */ 440 tb2_len = skb_headlen(skb) - hdr_len; 441 442 if (tb2_len > 0) { 443 int ret; 444 445 tb_phys = dma_map_single(trans->dev, skb->data + hdr_len, 446 tb2_len, DMA_TO_DEVICE); 447 ret = iwl_txq_gen2_set_tb_with_wa(trans, skb, tfd, tb_phys, 448 skb->data + hdr_len, tb2_len, 449 NULL, true); 450 if (ret) 451 goto out_err; 452 } 453 454 if (iwl_txq_gen2_tx_add_frags(trans, skb, tfd, out_meta)) 455 goto out_err; 456 457 skb_walk_frags(skb, frag) { 458 int ret; 459 460 tb_phys = dma_map_single(trans->dev, frag->data, 461 skb_headlen(frag), DMA_TO_DEVICE); 462 ret = iwl_txq_gen2_set_tb_with_wa(trans, skb, tfd, tb_phys, 463 frag->data, 464 skb_headlen(frag), NULL, 465 true); 466 if (ret) 467 goto out_err; 468 if (iwl_txq_gen2_tx_add_frags(trans, frag, tfd, out_meta)) 469 goto out_err; 470 } 471 472 return tfd; 473 474 out_err: 475 iwl_txq_gen2_tfd_unmap(trans, out_meta, tfd); 476 return NULL; 477 } 478 479 static 480 struct iwl_tfh_tfd *iwl_txq_gen2_build_tfd(struct iwl_trans *trans, 481 struct iwl_txq *txq, 482 struct iwl_device_tx_cmd *dev_cmd, 483 struct sk_buff *skb, 484 struct iwl_cmd_meta *out_meta) 485 { 486 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; 487 int idx = iwl_txq_get_cmd_index(txq, txq->write_ptr); 488 struct iwl_tfh_tfd *tfd = iwl_txq_get_tfd(trans, txq, idx); 489 int len, hdr_len; 490 bool amsdu; 491 492 /* There must be data left over for TB1 or this code must be changed */ 493 BUILD_BUG_ON(sizeof(struct iwl_tx_cmd_v9) < IWL_FIRST_TB_SIZE); 494 BUILD_BUG_ON(sizeof(struct iwl_cmd_header) + 495 offsetofend(struct iwl_tx_cmd_v9, dram_info) > 496 IWL_FIRST_TB_SIZE); 497 BUILD_BUG_ON(sizeof(struct iwl_tx_cmd) < IWL_FIRST_TB_SIZE); 498 BUILD_BUG_ON(sizeof(struct iwl_cmd_header) + 499 offsetofend(struct iwl_tx_cmd, dram_info) > 500 IWL_FIRST_TB_SIZE); 501 502 memset(tfd, 0, sizeof(*tfd)); 503 504 if (trans->mac_cfg->device_family < IWL_DEVICE_FAMILY_AX210) 505 len = sizeof(struct iwl_tx_cmd_v9); 506 else 507 len = sizeof(struct iwl_tx_cmd); 508 509 amsdu = ieee80211_is_data_qos(hdr->frame_control) && 510 (*ieee80211_get_qos_ctl(hdr) & 511 IEEE80211_QOS_CTL_A_MSDU_PRESENT); 512 513 hdr_len = ieee80211_hdrlen(hdr->frame_control); 514 515 /* 516 * Only build A-MSDUs here if doing so by GSO, otherwise it may be 517 * an A-MSDU for other reasons, e.g. NAN or an A-MSDU having been 518 * built in the higher layers already. 519 */ 520 if (amsdu && skb_shinfo(skb)->gso_size) 521 return iwl_txq_gen2_build_tx_amsdu(trans, txq, dev_cmd, skb, 522 out_meta, hdr_len, len); 523 return iwl_txq_gen2_build_tx(trans, txq, dev_cmd, skb, out_meta, 524 hdr_len, len, !amsdu); 525 } 526 527 int iwl_txq_space(struct iwl_trans *trans, const struct iwl_txq *q) 528 { 529 unsigned int max; 530 unsigned int used; 531 532 /* 533 * To avoid ambiguity between empty and completely full queues, there 534 * should always be less than max_tfd_queue_size elements in the queue. 535 * If q->n_window is smaller than max_tfd_queue_size, there is no need 536 * to reserve any queue entries for this purpose. 537 */ 538 if (q->n_window < trans->mac_cfg->base->max_tfd_queue_size) 539 max = q->n_window; 540 else 541 max = trans->mac_cfg->base->max_tfd_queue_size - 1; 542 543 /* 544 * max_tfd_queue_size is a power of 2, so the following is equivalent to 545 * modulo by max_tfd_queue_size and is well defined. 546 */ 547 used = (q->write_ptr - q->read_ptr) & 548 (trans->mac_cfg->base->max_tfd_queue_size - 1); 549 550 if (WARN_ON(used > max)) 551 return 0; 552 553 return max - used; 554 } 555 556 /* 557 * iwl_pcie_gen2_update_byte_tbl - Set up entry in Tx byte-count array 558 */ 559 static void iwl_pcie_gen2_update_byte_tbl(struct iwl_trans *trans, 560 struct iwl_txq *txq, u16 byte_cnt, 561 int num_tbs) 562 { 563 int idx = iwl_txq_get_cmd_index(txq, txq->write_ptr); 564 struct iwl_bc_tbl_entry *scd_bc_tbl = txq->bc_tbl.addr; 565 u8 filled_tfd_size, num_fetch_chunks; 566 u16 len = byte_cnt; 567 __le16 bc_ent; 568 569 if (WARN(idx >= txq->n_window, "%d >= %d\n", idx, txq->n_window)) 570 return; 571 572 filled_tfd_size = offsetof(struct iwl_tfh_tfd, tbs) + 573 num_tbs * sizeof(struct iwl_tfh_tb); 574 /* 575 * filled_tfd_size contains the number of filled bytes in the TFD. 576 * Dividing it by 64 will give the number of chunks to fetch 577 * to SRAM- 0 for one chunk, 1 for 2 and so on. 578 * If, for example, TFD contains only 3 TBs then 32 bytes 579 * of the TFD are used, and only one chunk of 64 bytes should 580 * be fetched 581 */ 582 num_fetch_chunks = DIV_ROUND_UP(filled_tfd_size, 64) - 1; 583 584 if (trans->mac_cfg->device_family >= IWL_DEVICE_FAMILY_AX210) { 585 WARN_ON(len > 0x3FFF); 586 bc_ent = cpu_to_le16(len | (num_fetch_chunks << 14)); 587 } else { 588 len = DIV_ROUND_UP(len, 4); 589 WARN_ON(len > 0xFFF); 590 bc_ent = cpu_to_le16(len | (num_fetch_chunks << 12)); 591 } 592 593 scd_bc_tbl[idx].tfd_offset = bc_ent; 594 } 595 596 static u8 iwl_txq_gen2_get_num_tbs(struct iwl_tfh_tfd *tfd) 597 { 598 return le16_to_cpu(tfd->num_tbs) & 0x1f; 599 } 600 601 int iwl_txq_gen2_set_tb(struct iwl_trans *trans, struct iwl_tfh_tfd *tfd, 602 dma_addr_t addr, u16 len) 603 { 604 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 605 int idx = iwl_txq_gen2_get_num_tbs(tfd); 606 struct iwl_tfh_tb *tb; 607 608 /* Only WARN here so we know about the issue, but we mess up our 609 * unmap path because not every place currently checks for errors 610 * returned from this function - it can only return an error if 611 * there's no more space, and so when we know there is enough we 612 * don't always check ... 613 */ 614 WARN(iwl_txq_crosses_4g_boundary(addr, len), 615 "possible DMA problem with iova:0x%llx, len:%d\n", 616 (unsigned long long)addr, len); 617 618 if (WARN_ON(idx >= IWL_TFH_NUM_TBS)) 619 return -EINVAL; 620 tb = &tfd->tbs[idx]; 621 622 /* Each TFD can point to a maximum max_tbs Tx buffers */ 623 if (le16_to_cpu(tfd->num_tbs) >= trans_pcie->txqs.tfd.max_tbs) { 624 IWL_ERR(trans, "Error can not send more than %d chunks\n", 625 trans_pcie->txqs.tfd.max_tbs); 626 return -EINVAL; 627 } 628 629 put_unaligned_le64(addr, &tb->addr); 630 tb->tb_len = cpu_to_le16(len); 631 632 tfd->num_tbs = cpu_to_le16(idx + 1); 633 634 return idx; 635 } 636 637 void iwl_txq_gen2_tfd_unmap(struct iwl_trans *trans, 638 struct iwl_cmd_meta *meta, 639 struct iwl_tfh_tfd *tfd) 640 { 641 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 642 int i, num_tbs; 643 644 /* Sanity check on number of chunks */ 645 num_tbs = iwl_txq_gen2_get_num_tbs(tfd); 646 647 if (num_tbs > trans_pcie->txqs.tfd.max_tbs) { 648 IWL_ERR(trans, "Too many chunks: %i\n", num_tbs); 649 return; 650 } 651 652 /* TB1 is mapped directly, the rest is the TSO page and SG list. */ 653 if (meta->sg_offset) 654 num_tbs = 2; 655 656 /* first TB is never freed - it's the bidirectional DMA data */ 657 for (i = 1; i < num_tbs; i++) { 658 if (meta->tbs & BIT(i)) 659 dma_unmap_page(trans->dev, 660 le64_to_cpu(tfd->tbs[i].addr), 661 le16_to_cpu(tfd->tbs[i].tb_len), 662 DMA_TO_DEVICE); 663 else 664 dma_unmap_single(trans->dev, 665 le64_to_cpu(tfd->tbs[i].addr), 666 le16_to_cpu(tfd->tbs[i].tb_len), 667 DMA_TO_DEVICE); 668 } 669 670 iwl_txq_set_tfd_invalid_gen2(trans, tfd); 671 } 672 673 static void iwl_txq_gen2_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq) 674 { 675 /* rd_ptr is bounded by TFD_QUEUE_SIZE_MAX and 676 * idx is bounded by n_window 677 */ 678 int idx = iwl_txq_get_cmd_index(txq, txq->read_ptr); 679 struct sk_buff *skb; 680 681 lockdep_assert_held(&txq->lock); 682 683 if (!txq->entries) 684 return; 685 686 iwl_txq_gen2_tfd_unmap(trans, &txq->entries[idx].meta, 687 iwl_txq_get_tfd(trans, txq, idx)); 688 689 skb = txq->entries[idx].skb; 690 691 /* Can be called from irqs-disabled context 692 * If skb is not NULL, it means that the whole queue is being 693 * freed and that the queue is not empty - free the skb 694 */ 695 if (skb) { 696 iwl_op_mode_free_skb(trans->op_mode, skb); 697 txq->entries[idx].skb = NULL; 698 } 699 } 700 701 /* 702 * iwl_txq_inc_wr_ptr - Send new write index to hardware 703 */ 704 static void iwl_txq_inc_wr_ptr(struct iwl_trans *trans, struct iwl_txq *txq) 705 { 706 lockdep_assert_held(&txq->lock); 707 708 IWL_DEBUG_TX(trans, "Q:%d WR: 0x%x\n", txq->id, txq->write_ptr); 709 710 /* 711 * if not in power-save mode, uCode will never sleep when we're 712 * trying to tx (during RFKILL, we're not trying to tx). 713 */ 714 iwl_write32(trans, HBUS_TARG_WRPTR, txq->write_ptr | (txq->id << 16)); 715 } 716 717 int iwl_txq_gen2_tx(struct iwl_trans *trans, struct sk_buff *skb, 718 struct iwl_device_tx_cmd *dev_cmd, int txq_id) 719 { 720 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 721 struct iwl_cmd_meta *out_meta; 722 struct iwl_txq *txq = trans_pcie->txqs.txq[txq_id]; 723 u16 cmd_len; 724 int idx; 725 void *tfd; 726 727 if (WARN_ONCE(txq_id >= IWL_MAX_TVQM_QUEUES, 728 "queue %d out of range", txq_id)) 729 return -EINVAL; 730 731 if (WARN_ONCE(!test_bit(txq_id, trans_pcie->txqs.queue_used), 732 "TX on unused queue %d\n", txq_id)) 733 return -EINVAL; 734 735 if (skb_is_nonlinear(skb) && 736 skb_shinfo(skb)->nr_frags > IWL_TRANS_PCIE_MAX_FRAGS(trans_pcie) && 737 __skb_linearize(skb)) 738 return -ENOMEM; 739 740 spin_lock(&txq->lock); 741 742 if (iwl_txq_space(trans, txq) < txq->high_mark) { 743 iwl_txq_stop(trans, txq); 744 745 /* don't put the packet on the ring, if there is no room */ 746 if (unlikely(iwl_txq_space(trans, txq) < 3)) { 747 struct iwl_device_tx_cmd **dev_cmd_ptr; 748 749 dev_cmd_ptr = (void *)((u8 *)skb->cb + 750 trans->conf.cb_data_offs + 751 sizeof(void *)); 752 753 *dev_cmd_ptr = dev_cmd; 754 __skb_queue_tail(&txq->overflow_q, skb); 755 spin_unlock(&txq->lock); 756 return 0; 757 } 758 } 759 760 idx = iwl_txq_get_cmd_index(txq, txq->write_ptr); 761 762 /* Set up driver data for this TFD */ 763 txq->entries[idx].skb = skb; 764 txq->entries[idx].cmd = dev_cmd; 765 766 dev_cmd->hdr.sequence = 767 cpu_to_le16((u16)(QUEUE_TO_SEQ(txq_id) | 768 INDEX_TO_SEQ(idx))); 769 770 /* Set up first empty entry in queue's array of Tx/cmd buffers */ 771 out_meta = &txq->entries[idx].meta; 772 memset(out_meta, 0, sizeof(*out_meta)); 773 774 tfd = iwl_txq_gen2_build_tfd(trans, txq, dev_cmd, skb, out_meta); 775 if (!tfd) { 776 spin_unlock(&txq->lock); 777 return -1; 778 } 779 780 if (trans->mac_cfg->device_family >= IWL_DEVICE_FAMILY_AX210) { 781 struct iwl_tx_cmd *tx_cmd = 782 (void *)dev_cmd->payload; 783 784 cmd_len = le16_to_cpu(tx_cmd->len); 785 } else { 786 struct iwl_tx_cmd_v9 *tx_cmd_v9 = 787 (void *)dev_cmd->payload; 788 789 cmd_len = le16_to_cpu(tx_cmd_v9->len); 790 } 791 792 /* Set up entry for this TFD in Tx byte-count array */ 793 iwl_pcie_gen2_update_byte_tbl(trans, txq, cmd_len, 794 iwl_txq_gen2_get_num_tbs(tfd)); 795 796 /* start timer if queue currently empty */ 797 if (txq->read_ptr == txq->write_ptr && txq->wd_timeout) 798 mod_timer(&txq->stuck_timer, jiffies + txq->wd_timeout); 799 800 /* Tell device the write index *just past* this latest filled TFD */ 801 txq->write_ptr = iwl_txq_inc_wrap(trans, txq->write_ptr); 802 iwl_txq_inc_wr_ptr(trans, txq); 803 /* 804 * At this point the frame is "transmitted" successfully 805 * and we will get a TX status notification eventually. 806 */ 807 spin_unlock(&txq->lock); 808 return 0; 809 } 810 811 /*************** HOST COMMAND QUEUE FUNCTIONS *****/ 812 813 /* 814 * iwl_txq_gen2_unmap - Unmap any remaining DMA mappings and free skb's 815 */ 816 static void iwl_txq_gen2_unmap(struct iwl_trans *trans, int txq_id) 817 { 818 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 819 struct iwl_txq *txq = trans_pcie->txqs.txq[txq_id]; 820 821 spin_lock_bh(&txq->reclaim_lock); 822 spin_lock(&txq->lock); 823 while (txq->write_ptr != txq->read_ptr) { 824 IWL_DEBUG_TX_REPLY(trans, "Q %d Free %d\n", 825 txq_id, txq->read_ptr); 826 827 if (txq_id != trans->conf.cmd_queue) { 828 int idx = iwl_txq_get_cmd_index(txq, txq->read_ptr); 829 struct iwl_cmd_meta *cmd_meta = &txq->entries[idx].meta; 830 struct sk_buff *skb = txq->entries[idx].skb; 831 832 if (!WARN_ON_ONCE(!skb)) 833 iwl_pcie_free_tso_pages(trans, skb, cmd_meta); 834 } 835 iwl_txq_gen2_free_tfd(trans, txq); 836 txq->read_ptr = iwl_txq_inc_wrap(trans, txq->read_ptr); 837 } 838 839 while (!skb_queue_empty(&txq->overflow_q)) { 840 struct sk_buff *skb = __skb_dequeue(&txq->overflow_q); 841 842 iwl_op_mode_free_skb(trans->op_mode, skb); 843 } 844 845 spin_unlock(&txq->lock); 846 spin_unlock_bh(&txq->reclaim_lock); 847 848 /* just in case - this queue may have been stopped */ 849 iwl_trans_pcie_wake_queue(trans, txq); 850 } 851 852 static void iwl_txq_gen2_free_memory(struct iwl_trans *trans, 853 struct iwl_txq *txq) 854 { 855 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 856 struct device *dev = trans->dev; 857 858 /* De-alloc circular buffer of TFDs */ 859 if (txq->tfds) { 860 dma_free_coherent(dev, 861 trans_pcie->txqs.tfd.size * txq->n_window, 862 txq->tfds, txq->dma_addr); 863 dma_free_coherent(dev, 864 sizeof(*txq->first_tb_bufs) * txq->n_window, 865 txq->first_tb_bufs, txq->first_tb_dma); 866 } 867 868 kfree(txq->entries); 869 if (txq->bc_tbl.addr) 870 dma_pool_free(trans_pcie->txqs.bc_pool, 871 txq->bc_tbl.addr, txq->bc_tbl.dma); 872 kfree(txq); 873 } 874 875 /* 876 * iwl_pcie_txq_free - Deallocate DMA queue. 877 * @txq: Transmit queue to deallocate. 878 * 879 * Empty queue by removing and destroying all BD's. 880 * Free all buffers. 881 * 0-fill, but do not free "txq" descriptor structure. 882 */ 883 static void iwl_txq_gen2_free(struct iwl_trans *trans, int txq_id) 884 { 885 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 886 struct iwl_txq *txq; 887 int i; 888 889 if (WARN_ONCE(txq_id >= IWL_MAX_TVQM_QUEUES, 890 "queue %d out of range", txq_id)) 891 return; 892 893 txq = trans_pcie->txqs.txq[txq_id]; 894 895 if (WARN_ON(!txq)) 896 return; 897 898 iwl_txq_gen2_unmap(trans, txq_id); 899 900 /* De-alloc array of command/tx buffers */ 901 if (txq_id == trans->conf.cmd_queue) 902 for (i = 0; i < txq->n_window; i++) { 903 kfree_sensitive(txq->entries[i].cmd); 904 kfree_sensitive(txq->entries[i].free_buf); 905 } 906 timer_delete_sync(&txq->stuck_timer); 907 908 iwl_txq_gen2_free_memory(trans, txq); 909 910 trans_pcie->txqs.txq[txq_id] = NULL; 911 912 clear_bit(txq_id, trans_pcie->txqs.queue_used); 913 } 914 915 static struct iwl_txq * 916 iwl_txq_dyn_alloc_dma(struct iwl_trans *trans, int size, unsigned int timeout) 917 { 918 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 919 size_t bc_tbl_size, bc_tbl_entries; 920 struct iwl_txq *txq; 921 int ret; 922 923 WARN_ON(!trans_pcie->txqs.bc_tbl_size); 924 925 bc_tbl_size = trans_pcie->txqs.bc_tbl_size; 926 bc_tbl_entries = bc_tbl_size / sizeof(u16); 927 928 if (WARN_ON(size > bc_tbl_entries)) 929 return ERR_PTR(-EINVAL); 930 931 txq = kzalloc(sizeof(*txq), GFP_KERNEL); 932 if (!txq) 933 return ERR_PTR(-ENOMEM); 934 935 txq->bc_tbl.addr = dma_pool_alloc(trans_pcie->txqs.bc_pool, GFP_KERNEL, 936 &txq->bc_tbl.dma); 937 if (!txq->bc_tbl.addr) { 938 IWL_ERR(trans, "Scheduler BC Table allocation failed\n"); 939 kfree(txq); 940 return ERR_PTR(-ENOMEM); 941 } 942 943 ret = iwl_pcie_txq_alloc(trans, txq, size, false); 944 if (ret) { 945 IWL_ERR(trans, "Tx queue alloc failed\n"); 946 goto error; 947 } 948 ret = iwl_txq_init(trans, txq, size, false); 949 if (ret) { 950 IWL_ERR(trans, "Tx queue init failed\n"); 951 goto error; 952 } 953 954 txq->wd_timeout = msecs_to_jiffies(timeout); 955 956 return txq; 957 958 error: 959 iwl_txq_gen2_free_memory(trans, txq); 960 return ERR_PTR(ret); 961 } 962 963 static int iwl_pcie_txq_alloc_response(struct iwl_trans *trans, 964 struct iwl_txq *txq, 965 struct iwl_host_cmd *hcmd) 966 { 967 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 968 struct iwl_tx_queue_cfg_rsp *rsp; 969 int ret, qid; 970 u32 wr_ptr; 971 972 if (WARN_ON(iwl_rx_packet_payload_len(hcmd->resp_pkt) != 973 sizeof(*rsp))) { 974 ret = -EINVAL; 975 goto error_free_resp; 976 } 977 978 rsp = (void *)hcmd->resp_pkt->data; 979 qid = le16_to_cpu(rsp->queue_number); 980 wr_ptr = le16_to_cpu(rsp->write_pointer); 981 982 if (qid >= ARRAY_SIZE(trans_pcie->txqs.txq)) { 983 WARN_ONCE(1, "queue index %d unsupported", qid); 984 ret = -EIO; 985 goto error_free_resp; 986 } 987 988 if (test_and_set_bit(qid, trans_pcie->txqs.queue_used)) { 989 WARN_ONCE(1, "queue %d already used", qid); 990 ret = -EIO; 991 goto error_free_resp; 992 } 993 994 if (WARN_ONCE(trans_pcie->txqs.txq[qid], 995 "queue %d already allocated\n", qid)) { 996 ret = -EIO; 997 goto error_free_resp; 998 } 999 1000 txq->id = qid; 1001 trans_pcie->txqs.txq[qid] = txq; 1002 wr_ptr &= (trans->mac_cfg->base->max_tfd_queue_size - 1); 1003 1004 /* Place first TFD at index corresponding to start sequence number */ 1005 txq->read_ptr = wr_ptr; 1006 txq->write_ptr = wr_ptr; 1007 1008 IWL_DEBUG_TX_QUEUES(trans, "Activate queue %d\n", qid); 1009 1010 iwl_free_resp(hcmd); 1011 return qid; 1012 1013 error_free_resp: 1014 iwl_free_resp(hcmd); 1015 iwl_txq_gen2_free_memory(trans, txq); 1016 return ret; 1017 } 1018 1019 int iwl_txq_dyn_alloc(struct iwl_trans *trans, u32 flags, u32 sta_mask, 1020 u8 tid, int size, unsigned int timeout) 1021 { 1022 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 1023 struct iwl_txq *txq; 1024 union { 1025 struct iwl_tx_queue_cfg_cmd old; 1026 struct iwl_scd_queue_cfg_cmd new; 1027 } cmd; 1028 struct iwl_host_cmd hcmd = { 1029 .flags = CMD_WANT_SKB, 1030 }; 1031 int ret; 1032 1033 /* take the min with bytecount table entries allowed */ 1034 size = min_t(u32, size, trans_pcie->txqs.bc_tbl_size / sizeof(u16)); 1035 /* but must be power of 2 values for calculating read/write pointers */ 1036 size = rounddown_pow_of_two(size); 1037 1038 if (trans->mac_cfg->device_family == IWL_DEVICE_FAMILY_BZ && 1039 trans->info.hw_rev_step == SILICON_A_STEP) { 1040 size = 4096; 1041 txq = iwl_txq_dyn_alloc_dma(trans, size, timeout); 1042 } else { 1043 do { 1044 txq = iwl_txq_dyn_alloc_dma(trans, size, timeout); 1045 if (!IS_ERR(txq)) 1046 break; 1047 1048 IWL_DEBUG_TX_QUEUES(trans, 1049 "Failed allocating TXQ of size %d for sta mask %x tid %d, ret: %ld\n", 1050 size, sta_mask, tid, 1051 PTR_ERR(txq)); 1052 size /= 2; 1053 } while (size >= 16); 1054 } 1055 1056 if (IS_ERR(txq)) 1057 return PTR_ERR(txq); 1058 1059 if (trans->conf.queue_alloc_cmd_ver == 0) { 1060 memset(&cmd.old, 0, sizeof(cmd.old)); 1061 cmd.old.tfdq_addr = cpu_to_le64(txq->dma_addr); 1062 cmd.old.byte_cnt_addr = cpu_to_le64(txq->bc_tbl.dma); 1063 cmd.old.cb_size = cpu_to_le32(TFD_QUEUE_CB_SIZE(size)); 1064 cmd.old.flags = cpu_to_le16(flags | TX_QUEUE_CFG_ENABLE_QUEUE); 1065 cmd.old.tid = tid; 1066 1067 if (hweight32(sta_mask) != 1) { 1068 ret = -EINVAL; 1069 goto error; 1070 } 1071 cmd.old.sta_id = ffs(sta_mask) - 1; 1072 1073 hcmd.id = SCD_QUEUE_CFG; 1074 hcmd.len[0] = sizeof(cmd.old); 1075 hcmd.data[0] = &cmd.old; 1076 } else if (trans->conf.queue_alloc_cmd_ver == 3) { 1077 memset(&cmd.new, 0, sizeof(cmd.new)); 1078 cmd.new.operation = cpu_to_le32(IWL_SCD_QUEUE_ADD); 1079 cmd.new.u.add.tfdq_dram_addr = cpu_to_le64(txq->dma_addr); 1080 cmd.new.u.add.bc_dram_addr = cpu_to_le64(txq->bc_tbl.dma); 1081 cmd.new.u.add.cb_size = cpu_to_le32(TFD_QUEUE_CB_SIZE(size)); 1082 cmd.new.u.add.flags = cpu_to_le32(flags); 1083 cmd.new.u.add.sta_mask = cpu_to_le32(sta_mask); 1084 cmd.new.u.add.tid = tid; 1085 1086 hcmd.id = WIDE_ID(DATA_PATH_GROUP, SCD_QUEUE_CONFIG_CMD); 1087 hcmd.len[0] = sizeof(cmd.new); 1088 hcmd.data[0] = &cmd.new; 1089 } else { 1090 ret = -EOPNOTSUPP; 1091 goto error; 1092 } 1093 1094 ret = iwl_trans_send_cmd(trans, &hcmd); 1095 if (ret) 1096 goto error; 1097 1098 return iwl_pcie_txq_alloc_response(trans, txq, &hcmd); 1099 1100 error: 1101 iwl_txq_gen2_free_memory(trans, txq); 1102 return ret; 1103 } 1104 1105 void iwl_txq_dyn_free(struct iwl_trans *trans, int queue) 1106 { 1107 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 1108 1109 if (WARN(queue >= IWL_MAX_TVQM_QUEUES, 1110 "queue %d out of range", queue)) 1111 return; 1112 1113 /* 1114 * Upon HW Rfkill - we stop the device, and then stop the queues 1115 * in the op_mode. Just for the sake of the simplicity of the op_mode, 1116 * allow the op_mode to call txq_disable after it already called 1117 * stop_device. 1118 */ 1119 if (!test_and_clear_bit(queue, trans_pcie->txqs.queue_used)) { 1120 WARN_ONCE(test_bit(STATUS_DEVICE_ENABLED, &trans->status), 1121 "queue %d not used", queue); 1122 return; 1123 } 1124 1125 iwl_txq_gen2_free(trans, queue); 1126 1127 IWL_DEBUG_TX_QUEUES(trans, "Deactivate queue %d\n", queue); 1128 } 1129 1130 void iwl_txq_gen2_tx_free(struct iwl_trans *trans) 1131 { 1132 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 1133 int i; 1134 1135 memset(trans_pcie->txqs.queue_used, 0, 1136 sizeof(trans_pcie->txqs.queue_used)); 1137 1138 /* Free all TX queues */ 1139 for (i = 0; i < ARRAY_SIZE(trans_pcie->txqs.txq); i++) { 1140 if (!trans_pcie->txqs.txq[i]) 1141 continue; 1142 1143 iwl_txq_gen2_free(trans, i); 1144 } 1145 } 1146 1147 int iwl_txq_gen2_init(struct iwl_trans *trans, int txq_id, int queue_size) 1148 { 1149 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 1150 struct iwl_txq *queue; 1151 int ret; 1152 1153 /* alloc and init the tx queue */ 1154 if (!trans_pcie->txqs.txq[txq_id]) { 1155 queue = kzalloc(sizeof(*queue), GFP_KERNEL); 1156 if (!queue) { 1157 IWL_ERR(trans, "Not enough memory for tx queue\n"); 1158 return -ENOMEM; 1159 } 1160 trans_pcie->txqs.txq[txq_id] = queue; 1161 ret = iwl_pcie_txq_alloc(trans, queue, queue_size, true); 1162 if (ret) { 1163 IWL_ERR(trans, "Tx %d queue init failed\n", txq_id); 1164 goto error; 1165 } 1166 } else { 1167 queue = trans_pcie->txqs.txq[txq_id]; 1168 } 1169 1170 ret = iwl_txq_init(trans, queue, queue_size, 1171 (txq_id == trans->conf.cmd_queue)); 1172 if (ret) { 1173 IWL_ERR(trans, "Tx %d queue alloc failed\n", txq_id); 1174 goto error; 1175 } 1176 trans_pcie->txqs.txq[txq_id]->id = txq_id; 1177 set_bit(txq_id, trans_pcie->txqs.queue_used); 1178 1179 return 0; 1180 1181 error: 1182 iwl_txq_gen2_tx_free(trans); 1183 return ret; 1184 } 1185 1186 /*************** HOST COMMAND QUEUE FUNCTIONS *****/ 1187 1188 /* 1189 * iwl_pcie_gen2_enqueue_hcmd - enqueue a uCode command 1190 * @priv: device private data point 1191 * @cmd: a pointer to the ucode command structure 1192 * 1193 * The function returns < 0 values to indicate the operation 1194 * failed. On success, it returns the index (>= 0) of command in the 1195 * command queue. 1196 */ 1197 int iwl_pcie_gen2_enqueue_hcmd(struct iwl_trans *trans, 1198 struct iwl_host_cmd *cmd) 1199 { 1200 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 1201 struct iwl_txq *txq = trans_pcie->txqs.txq[trans->conf.cmd_queue]; 1202 struct iwl_device_cmd *out_cmd; 1203 struct iwl_cmd_meta *out_meta; 1204 void *dup_buf = NULL; 1205 dma_addr_t phys_addr; 1206 int i, cmd_pos, idx; 1207 u16 copy_size, cmd_size, tb0_size; 1208 bool had_nocopy = false; 1209 u8 group_id = iwl_cmd_groupid(cmd->id); 1210 const u8 *cmddata[IWL_MAX_CMD_TBS_PER_TFD]; 1211 u16 cmdlen[IWL_MAX_CMD_TBS_PER_TFD]; 1212 struct iwl_tfh_tfd *tfd; 1213 unsigned long flags; 1214 1215 if (WARN_ON(cmd->flags & CMD_BLOCK_TXQS)) 1216 return -EINVAL; 1217 1218 copy_size = sizeof(struct iwl_cmd_header_wide); 1219 cmd_size = sizeof(struct iwl_cmd_header_wide); 1220 1221 for (i = 0; i < IWL_MAX_CMD_TBS_PER_TFD; i++) { 1222 cmddata[i] = cmd->data[i]; 1223 cmdlen[i] = cmd->len[i]; 1224 1225 if (!cmd->len[i]) 1226 continue; 1227 1228 /* need at least IWL_FIRST_TB_SIZE copied */ 1229 if (copy_size < IWL_FIRST_TB_SIZE) { 1230 int copy = IWL_FIRST_TB_SIZE - copy_size; 1231 1232 if (copy > cmdlen[i]) 1233 copy = cmdlen[i]; 1234 cmdlen[i] -= copy; 1235 cmddata[i] += copy; 1236 copy_size += copy; 1237 } 1238 1239 if (cmd->dataflags[i] & IWL_HCMD_DFL_NOCOPY) { 1240 had_nocopy = true; 1241 if (WARN_ON(cmd->dataflags[i] & IWL_HCMD_DFL_DUP)) { 1242 idx = -EINVAL; 1243 goto free_dup_buf; 1244 } 1245 } else if (cmd->dataflags[i] & IWL_HCMD_DFL_DUP) { 1246 /* 1247 * This is also a chunk that isn't copied 1248 * to the static buffer so set had_nocopy. 1249 */ 1250 had_nocopy = true; 1251 1252 /* only allowed once */ 1253 if (WARN_ON(dup_buf)) { 1254 idx = -EINVAL; 1255 goto free_dup_buf; 1256 } 1257 1258 dup_buf = kmemdup(cmddata[i], cmdlen[i], 1259 GFP_ATOMIC); 1260 if (!dup_buf) 1261 return -ENOMEM; 1262 } else { 1263 /* NOCOPY must not be followed by normal! */ 1264 if (WARN_ON(had_nocopy)) { 1265 idx = -EINVAL; 1266 goto free_dup_buf; 1267 } 1268 copy_size += cmdlen[i]; 1269 } 1270 cmd_size += cmd->len[i]; 1271 } 1272 1273 /* 1274 * If any of the command structures end up being larger than the 1275 * TFD_MAX_PAYLOAD_SIZE and they aren't dynamically allocated into 1276 * separate TFDs, then we will need to increase the size of the buffers 1277 */ 1278 if (WARN(copy_size > TFD_MAX_PAYLOAD_SIZE, 1279 "Command %s (%#x) is too large (%d bytes)\n", 1280 iwl_get_cmd_string(trans, cmd->id), cmd->id, copy_size)) { 1281 idx = -EINVAL; 1282 goto free_dup_buf; 1283 } 1284 1285 spin_lock_irqsave(&txq->lock, flags); 1286 1287 idx = iwl_txq_get_cmd_index(txq, txq->write_ptr); 1288 tfd = iwl_txq_get_tfd(trans, txq, txq->write_ptr); 1289 memset(tfd, 0, sizeof(*tfd)); 1290 1291 if (iwl_txq_space(trans, txq) < ((cmd->flags & CMD_ASYNC) ? 2 : 1)) { 1292 spin_unlock_irqrestore(&txq->lock, flags); 1293 1294 IWL_ERR(trans, "No space in command queue\n"); 1295 iwl_op_mode_nic_error(trans->op_mode, 1296 IWL_ERR_TYPE_CMD_QUEUE_FULL); 1297 iwl_trans_schedule_reset(trans, IWL_ERR_TYPE_CMD_QUEUE_FULL); 1298 idx = -ENOSPC; 1299 goto free_dup_buf; 1300 } 1301 1302 out_cmd = txq->entries[idx].cmd; 1303 out_meta = &txq->entries[idx].meta; 1304 1305 /* re-initialize, this also marks the SG list as unused */ 1306 memset(out_meta, 0, sizeof(*out_meta)); 1307 if (cmd->flags & CMD_WANT_SKB) 1308 out_meta->source = cmd; 1309 1310 /* set up the header */ 1311 out_cmd->hdr_wide.cmd = iwl_cmd_opcode(cmd->id); 1312 out_cmd->hdr_wide.group_id = group_id; 1313 out_cmd->hdr_wide.version = iwl_cmd_version(cmd->id); 1314 out_cmd->hdr_wide.length = 1315 cpu_to_le16(cmd_size - sizeof(struct iwl_cmd_header_wide)); 1316 out_cmd->hdr_wide.reserved = 0; 1317 out_cmd->hdr_wide.sequence = 1318 cpu_to_le16(QUEUE_TO_SEQ(trans->conf.cmd_queue) | 1319 INDEX_TO_SEQ(txq->write_ptr)); 1320 1321 cmd_pos = sizeof(struct iwl_cmd_header_wide); 1322 copy_size = sizeof(struct iwl_cmd_header_wide); 1323 1324 /* and copy the data that needs to be copied */ 1325 for (i = 0; i < IWL_MAX_CMD_TBS_PER_TFD; i++) { 1326 int copy; 1327 1328 if (!cmd->len[i]) 1329 continue; 1330 1331 /* copy everything if not nocopy/dup */ 1332 if (!(cmd->dataflags[i] & (IWL_HCMD_DFL_NOCOPY | 1333 IWL_HCMD_DFL_DUP))) { 1334 copy = cmd->len[i]; 1335 1336 memcpy((u8 *)out_cmd + cmd_pos, cmd->data[i], copy); 1337 cmd_pos += copy; 1338 copy_size += copy; 1339 continue; 1340 } 1341 1342 /* 1343 * Otherwise we need at least IWL_FIRST_TB_SIZE copied 1344 * in total (for bi-directional DMA), but copy up to what 1345 * we can fit into the payload for debug dump purposes. 1346 */ 1347 copy = min_t(int, TFD_MAX_PAYLOAD_SIZE - cmd_pos, cmd->len[i]); 1348 1349 memcpy((u8 *)out_cmd + cmd_pos, cmd->data[i], copy); 1350 cmd_pos += copy; 1351 1352 /* However, treat copy_size the proper way, we need it below */ 1353 if (copy_size < IWL_FIRST_TB_SIZE) { 1354 copy = IWL_FIRST_TB_SIZE - copy_size; 1355 1356 if (copy > cmd->len[i]) 1357 copy = cmd->len[i]; 1358 copy_size += copy; 1359 } 1360 } 1361 1362 IWL_DEBUG_HC(trans, 1363 "Sending command %s (%.2x.%.2x), seq: 0x%04X, %d bytes at %d[%d]:%d\n", 1364 iwl_get_cmd_string(trans, cmd->id), group_id, 1365 out_cmd->hdr.cmd, le16_to_cpu(out_cmd->hdr.sequence), 1366 cmd_size, txq->write_ptr, idx, trans->conf.cmd_queue); 1367 1368 /* start the TFD with the minimum copy bytes */ 1369 tb0_size = min_t(int, copy_size, IWL_FIRST_TB_SIZE); 1370 memcpy(&txq->first_tb_bufs[idx], out_cmd, tb0_size); 1371 iwl_txq_gen2_set_tb(trans, tfd, iwl_txq_get_first_tb_dma(txq, idx), 1372 tb0_size); 1373 1374 /* map first command fragment, if any remains */ 1375 if (copy_size > tb0_size) { 1376 phys_addr = dma_map_single(trans->dev, 1377 (u8 *)out_cmd + tb0_size, 1378 copy_size - tb0_size, 1379 DMA_TO_DEVICE); 1380 if (dma_mapping_error(trans->dev, phys_addr)) { 1381 idx = -ENOMEM; 1382 iwl_txq_gen2_tfd_unmap(trans, out_meta, tfd); 1383 goto out; 1384 } 1385 iwl_txq_gen2_set_tb(trans, tfd, phys_addr, 1386 copy_size - tb0_size); 1387 } 1388 1389 /* map the remaining (adjusted) nocopy/dup fragments */ 1390 for (i = 0; i < IWL_MAX_CMD_TBS_PER_TFD; i++) { 1391 void *data = (void *)(uintptr_t)cmddata[i]; 1392 1393 if (!cmdlen[i]) 1394 continue; 1395 if (!(cmd->dataflags[i] & (IWL_HCMD_DFL_NOCOPY | 1396 IWL_HCMD_DFL_DUP))) 1397 continue; 1398 if (cmd->dataflags[i] & IWL_HCMD_DFL_DUP) 1399 data = dup_buf; 1400 phys_addr = dma_map_single(trans->dev, data, 1401 cmdlen[i], DMA_TO_DEVICE); 1402 if (dma_mapping_error(trans->dev, phys_addr)) { 1403 idx = -ENOMEM; 1404 iwl_txq_gen2_tfd_unmap(trans, out_meta, tfd); 1405 goto out; 1406 } 1407 iwl_txq_gen2_set_tb(trans, tfd, phys_addr, cmdlen[i]); 1408 } 1409 1410 BUILD_BUG_ON(IWL_TFH_NUM_TBS > sizeof(out_meta->tbs) * BITS_PER_BYTE); 1411 out_meta->flags = cmd->flags; 1412 if (WARN_ON_ONCE(txq->entries[idx].free_buf)) 1413 kfree_sensitive(txq->entries[idx].free_buf); 1414 txq->entries[idx].free_buf = dup_buf; 1415 1416 trace_iwlwifi_dev_hcmd(trans->dev, cmd, cmd_size, &out_cmd->hdr_wide); 1417 1418 /* start timer if queue currently empty */ 1419 if (txq->read_ptr == txq->write_ptr && txq->wd_timeout) 1420 mod_timer(&txq->stuck_timer, jiffies + txq->wd_timeout); 1421 1422 spin_lock(&trans_pcie->reg_lock); 1423 /* Increment and update queue's write index */ 1424 txq->write_ptr = iwl_txq_inc_wrap(trans, txq->write_ptr); 1425 iwl_txq_inc_wr_ptr(trans, txq); 1426 spin_unlock(&trans_pcie->reg_lock); 1427 1428 out: 1429 spin_unlock_irqrestore(&txq->lock, flags); 1430 free_dup_buf: 1431 if (idx < 0) 1432 kfree(dup_buf); 1433 return idx; 1434 } 1435