1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 2 /* 3 * Copyright (C) 2017 Intel Deutschland GmbH 4 * Copyright (C) 2018-2020, 2023-2024 Intel Corporation 5 */ 6 #ifdef CONFIG_INET 7 #include <net/tso.h> 8 #endif 9 #include <linux/tcp.h> 10 11 #include "iwl-debug.h" 12 #include "iwl-csr.h" 13 #include "iwl-io.h" 14 #include "internal.h" 15 #include "fw/api/tx.h" 16 #include "fw/api/commands.h" 17 #include "fw/api/datapath.h" 18 #include "iwl-scd.h" 19 20 static struct page *get_workaround_page(struct iwl_trans *trans, 21 struct sk_buff *skb) 22 { 23 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 24 struct iwl_tso_page_info *info; 25 struct page **page_ptr; 26 struct page *ret; 27 dma_addr_t phys; 28 29 page_ptr = (void *)((u8 *)skb->cb + trans_pcie->txqs.page_offs); 30 31 ret = alloc_page(GFP_ATOMIC); 32 if (!ret) 33 return NULL; 34 35 info = IWL_TSO_PAGE_INFO(page_address(ret)); 36 37 /* Create a DMA mapping for the page */ 38 phys = dma_map_page_attrs(trans->dev, ret, 0, PAGE_SIZE, 39 DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); 40 if (unlikely(dma_mapping_error(trans->dev, phys))) { 41 __free_page(ret); 42 return NULL; 43 } 44 45 /* Store physical address and set use count */ 46 info->dma_addr = phys; 47 refcount_set(&info->use_count, 1); 48 49 /* set the chaining pointer to the previous page if there */ 50 info->next = *page_ptr; 51 *page_ptr = ret; 52 53 return ret; 54 } 55 56 /* 57 * Add a TB and if needed apply the FH HW bug workaround; 58 * meta != NULL indicates that it's a page mapping and we 59 * need to dma_unmap_page() and set the meta->tbs bit in 60 * this case. 61 */ 62 static int iwl_txq_gen2_set_tb_with_wa(struct iwl_trans *trans, 63 struct sk_buff *skb, 64 struct iwl_tfh_tfd *tfd, 65 dma_addr_t phys, void *virt, 66 u16 len, struct iwl_cmd_meta *meta, 67 bool unmap) 68 { 69 dma_addr_t oldphys = phys; 70 struct page *page; 71 int ret; 72 73 if (unlikely(dma_mapping_error(trans->dev, phys))) 74 return -ENOMEM; 75 76 if (likely(!iwl_txq_crosses_4g_boundary(phys, len))) { 77 ret = iwl_txq_gen2_set_tb(trans, tfd, phys, len); 78 79 if (ret < 0) 80 goto unmap; 81 82 if (meta) 83 meta->tbs |= BIT(ret); 84 85 ret = 0; 86 goto trace; 87 } 88 89 /* 90 * Work around a hardware bug. If (as expressed in the 91 * condition above) the TB ends on a 32-bit boundary, 92 * then the next TB may be accessed with the wrong 93 * address. 94 * To work around it, copy the data elsewhere and make 95 * a new mapping for it so the device will not fail. 96 */ 97 98 if (WARN_ON(len > IWL_TSO_PAGE_DATA_SIZE)) { 99 ret = -ENOBUFS; 100 goto unmap; 101 } 102 103 page = get_workaround_page(trans, skb); 104 if (!page) { 105 ret = -ENOMEM; 106 goto unmap; 107 } 108 109 memcpy(page_address(page), virt, len); 110 111 /* 112 * This is a bit odd, but performance does not matter here, what 113 * matters are the expectations of the calling code and TB cleanup 114 * function. 115 * 116 * As such, if unmap is set, then create another mapping for the TB 117 * entry as it will be unmapped later. On the other hand, if it is not 118 * set, then the TB entry will not be unmapped and instead we simply 119 * reference and sync the mapping that get_workaround_page() created. 120 */ 121 if (unmap) { 122 phys = dma_map_single(trans->dev, page_address(page), len, 123 DMA_TO_DEVICE); 124 if (unlikely(dma_mapping_error(trans->dev, phys))) 125 return -ENOMEM; 126 } else { 127 phys = iwl_pcie_get_tso_page_phys(page_address(page)); 128 dma_sync_single_for_device(trans->dev, phys, len, 129 DMA_TO_DEVICE); 130 } 131 132 ret = iwl_txq_gen2_set_tb(trans, tfd, phys, len); 133 if (ret < 0) { 134 /* unmap the new allocation as single */ 135 oldphys = phys; 136 meta = NULL; 137 goto unmap; 138 } 139 140 IWL_DEBUG_TX(trans, 141 "TB bug workaround: copied %d bytes from 0x%llx to 0x%llx\n", 142 len, (unsigned long long)oldphys, 143 (unsigned long long)phys); 144 145 ret = 0; 146 unmap: 147 if (!unmap) 148 goto trace; 149 150 if (meta) 151 dma_unmap_page(trans->dev, oldphys, len, DMA_TO_DEVICE); 152 else 153 dma_unmap_single(trans->dev, oldphys, len, DMA_TO_DEVICE); 154 trace: 155 trace_iwlwifi_dev_tx_tb(trans->dev, skb, virt, phys, len); 156 157 return ret; 158 } 159 160 static int iwl_txq_gen2_build_amsdu(struct iwl_trans *trans, 161 struct sk_buff *skb, 162 struct iwl_tfh_tfd *tfd, 163 struct iwl_cmd_meta *out_meta, 164 int start_len, 165 u8 hdr_len, 166 struct iwl_device_tx_cmd *dev_cmd) 167 { 168 #ifdef CONFIG_INET 169 struct iwl_tx_cmd_gen2 *tx_cmd = (void *)dev_cmd->payload; 170 struct ieee80211_hdr *hdr = (void *)skb->data; 171 unsigned int snap_ip_tcp_hdrlen, ip_hdrlen, total_len, hdr_room; 172 unsigned int mss = skb_shinfo(skb)->gso_size; 173 unsigned int data_offset = 0; 174 dma_addr_t start_hdr_phys; 175 u16 length, amsdu_pad; 176 u8 *start_hdr; 177 struct sg_table *sgt; 178 struct tso_t tso; 179 180 trace_iwlwifi_dev_tx(trans->dev, skb, tfd, sizeof(*tfd), 181 &dev_cmd->hdr, start_len, 0); 182 183 ip_hdrlen = skb_network_header_len(skb); 184 snap_ip_tcp_hdrlen = 8 + ip_hdrlen + tcp_hdrlen(skb); 185 total_len = skb->len - snap_ip_tcp_hdrlen - hdr_len; 186 amsdu_pad = 0; 187 188 /* total amount of header we may need for this A-MSDU */ 189 hdr_room = DIV_ROUND_UP(total_len, mss) * 190 (3 + snap_ip_tcp_hdrlen + sizeof(struct ethhdr)); 191 192 /* Our device supports 9 segments at most, it will fit in 1 page */ 193 sgt = iwl_pcie_prep_tso(trans, skb, out_meta, &start_hdr, hdr_room); 194 if (!sgt) 195 return -ENOMEM; 196 197 start_hdr_phys = iwl_pcie_get_tso_page_phys(start_hdr); 198 199 /* 200 * Pull the ieee80211 header to be able to use TSO core, 201 * we will restore it for the tx_status flow. 202 */ 203 skb_pull(skb, hdr_len); 204 205 /* 206 * Remove the length of all the headers that we don't actually 207 * have in the MPDU by themselves, but that we duplicate into 208 * all the different MSDUs inside the A-MSDU. 209 */ 210 le16_add_cpu(&tx_cmd->len, -snap_ip_tcp_hdrlen); 211 212 tso_start(skb, &tso); 213 214 while (total_len) { 215 /* this is the data left for this subframe */ 216 unsigned int data_left = min_t(unsigned int, mss, total_len); 217 unsigned int tb_len; 218 dma_addr_t tb_phys; 219 u8 *pos_hdr = start_hdr; 220 221 total_len -= data_left; 222 223 memset(pos_hdr, 0, amsdu_pad); 224 pos_hdr += amsdu_pad; 225 amsdu_pad = (4 - (sizeof(struct ethhdr) + snap_ip_tcp_hdrlen + 226 data_left)) & 0x3; 227 ether_addr_copy(pos_hdr, ieee80211_get_DA(hdr)); 228 pos_hdr += ETH_ALEN; 229 ether_addr_copy(pos_hdr, ieee80211_get_SA(hdr)); 230 pos_hdr += ETH_ALEN; 231 232 length = snap_ip_tcp_hdrlen + data_left; 233 *((__be16 *)pos_hdr) = cpu_to_be16(length); 234 pos_hdr += sizeof(length); 235 236 /* 237 * This will copy the SNAP as well which will be considered 238 * as MAC header. 239 */ 240 tso_build_hdr(skb, pos_hdr, &tso, data_left, !total_len); 241 242 pos_hdr += snap_ip_tcp_hdrlen; 243 244 tb_len = pos_hdr - start_hdr; 245 tb_phys = iwl_pcie_get_tso_page_phys(start_hdr); 246 247 /* 248 * No need for _with_wa, this is from the TSO page and 249 * we leave some space at the end of it so can't hit 250 * the buggy scenario. 251 */ 252 iwl_txq_gen2_set_tb(trans, tfd, tb_phys, tb_len); 253 trace_iwlwifi_dev_tx_tb(trans->dev, skb, start_hdr, 254 tb_phys, tb_len); 255 /* add this subframe's headers' length to the tx_cmd */ 256 le16_add_cpu(&tx_cmd->len, tb_len); 257 258 /* prepare the start_hdr for the next subframe */ 259 start_hdr = pos_hdr; 260 261 /* put the payload */ 262 while (data_left) { 263 int ret; 264 265 tb_len = min_t(unsigned int, tso.size, data_left); 266 tb_phys = iwl_pcie_get_sgt_tb_phys(sgt, data_offset, 267 tb_len); 268 /* Not a real mapping error, use direct comparison */ 269 if (unlikely(tb_phys == DMA_MAPPING_ERROR)) 270 goto out_err; 271 272 ret = iwl_txq_gen2_set_tb_with_wa(trans, skb, tfd, 273 tb_phys, tso.data, 274 tb_len, NULL, false); 275 if (ret) 276 goto out_err; 277 278 data_left -= tb_len; 279 data_offset += tb_len; 280 tso_build_data(skb, &tso, tb_len); 281 } 282 } 283 284 dma_sync_single_for_device(trans->dev, start_hdr_phys, hdr_room, 285 DMA_TO_DEVICE); 286 287 /* re -add the WiFi header */ 288 skb_push(skb, hdr_len); 289 290 return 0; 291 292 out_err: 293 #endif 294 return -EINVAL; 295 } 296 297 static struct 298 iwl_tfh_tfd *iwl_txq_gen2_build_tx_amsdu(struct iwl_trans *trans, 299 struct iwl_txq *txq, 300 struct iwl_device_tx_cmd *dev_cmd, 301 struct sk_buff *skb, 302 struct iwl_cmd_meta *out_meta, 303 int hdr_len, 304 int tx_cmd_len) 305 { 306 int idx = iwl_txq_get_cmd_index(txq, txq->write_ptr); 307 struct iwl_tfh_tfd *tfd = iwl_txq_get_tfd(trans, txq, idx); 308 dma_addr_t tb_phys; 309 int len; 310 void *tb1_addr; 311 312 tb_phys = iwl_txq_get_first_tb_dma(txq, idx); 313 314 /* 315 * No need for _with_wa, the first TB allocation is aligned up 316 * to a 64-byte boundary and thus can't be at the end or cross 317 * a page boundary (much less a 2^32 boundary). 318 */ 319 iwl_txq_gen2_set_tb(trans, tfd, tb_phys, IWL_FIRST_TB_SIZE); 320 321 /* 322 * The second TB (tb1) points to the remainder of the TX command 323 * and the 802.11 header - dword aligned size 324 * (This calculation modifies the TX command, so do it before the 325 * setup of the first TB) 326 */ 327 len = tx_cmd_len + sizeof(struct iwl_cmd_header) + hdr_len - 328 IWL_FIRST_TB_SIZE; 329 330 /* do not align A-MSDU to dword as the subframe header aligns it */ 331 332 /* map the data for TB1 */ 333 tb1_addr = ((u8 *)&dev_cmd->hdr) + IWL_FIRST_TB_SIZE; 334 tb_phys = dma_map_single(trans->dev, tb1_addr, len, DMA_TO_DEVICE); 335 if (unlikely(dma_mapping_error(trans->dev, tb_phys))) 336 goto out_err; 337 /* 338 * No need for _with_wa(), we ensure (via alignment) that the data 339 * here can never cross or end at a page boundary. 340 */ 341 iwl_txq_gen2_set_tb(trans, tfd, tb_phys, len); 342 343 if (iwl_txq_gen2_build_amsdu(trans, skb, tfd, out_meta, 344 len + IWL_FIRST_TB_SIZE, hdr_len, dev_cmd)) 345 goto out_err; 346 347 /* building the A-MSDU might have changed this data, memcpy it now */ 348 memcpy(&txq->first_tb_bufs[idx], dev_cmd, IWL_FIRST_TB_SIZE); 349 return tfd; 350 351 out_err: 352 iwl_txq_gen2_tfd_unmap(trans, out_meta, tfd); 353 return NULL; 354 } 355 356 static int iwl_txq_gen2_tx_add_frags(struct iwl_trans *trans, 357 struct sk_buff *skb, 358 struct iwl_tfh_tfd *tfd, 359 struct iwl_cmd_meta *out_meta) 360 { 361 int i; 362 363 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 364 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 365 dma_addr_t tb_phys; 366 unsigned int fragsz = skb_frag_size(frag); 367 int ret; 368 369 if (!fragsz) 370 continue; 371 372 tb_phys = skb_frag_dma_map(trans->dev, frag, 0, 373 fragsz, DMA_TO_DEVICE); 374 ret = iwl_txq_gen2_set_tb_with_wa(trans, skb, tfd, tb_phys, 375 skb_frag_address(frag), 376 fragsz, out_meta, true); 377 if (ret) 378 return ret; 379 } 380 381 return 0; 382 } 383 384 static struct 385 iwl_tfh_tfd *iwl_txq_gen2_build_tx(struct iwl_trans *trans, 386 struct iwl_txq *txq, 387 struct iwl_device_tx_cmd *dev_cmd, 388 struct sk_buff *skb, 389 struct iwl_cmd_meta *out_meta, 390 int hdr_len, 391 int tx_cmd_len, 392 bool pad) 393 { 394 int idx = iwl_txq_get_cmd_index(txq, txq->write_ptr); 395 struct iwl_tfh_tfd *tfd = iwl_txq_get_tfd(trans, txq, idx); 396 dma_addr_t tb_phys; 397 int len, tb1_len, tb2_len; 398 void *tb1_addr; 399 struct sk_buff *frag; 400 401 tb_phys = iwl_txq_get_first_tb_dma(txq, idx); 402 403 /* The first TB points to bi-directional DMA data */ 404 memcpy(&txq->first_tb_bufs[idx], dev_cmd, IWL_FIRST_TB_SIZE); 405 406 /* 407 * No need for _with_wa, the first TB allocation is aligned up 408 * to a 64-byte boundary and thus can't be at the end or cross 409 * a page boundary (much less a 2^32 boundary). 410 */ 411 iwl_txq_gen2_set_tb(trans, tfd, tb_phys, IWL_FIRST_TB_SIZE); 412 413 /* 414 * The second TB (tb1) points to the remainder of the TX command 415 * and the 802.11 header - dword aligned size 416 * (This calculation modifies the TX command, so do it before the 417 * setup of the first TB) 418 */ 419 len = tx_cmd_len + sizeof(struct iwl_cmd_header) + hdr_len - 420 IWL_FIRST_TB_SIZE; 421 422 if (pad) 423 tb1_len = ALIGN(len, 4); 424 else 425 tb1_len = len; 426 427 /* map the data for TB1 */ 428 tb1_addr = ((u8 *)&dev_cmd->hdr) + IWL_FIRST_TB_SIZE; 429 tb_phys = dma_map_single(trans->dev, tb1_addr, tb1_len, DMA_TO_DEVICE); 430 if (unlikely(dma_mapping_error(trans->dev, tb_phys))) 431 goto out_err; 432 /* 433 * No need for _with_wa(), we ensure (via alignment) that the data 434 * here can never cross or end at a page boundary. 435 */ 436 iwl_txq_gen2_set_tb(trans, tfd, tb_phys, tb1_len); 437 trace_iwlwifi_dev_tx(trans->dev, skb, tfd, sizeof(*tfd), &dev_cmd->hdr, 438 IWL_FIRST_TB_SIZE + tb1_len, hdr_len); 439 440 /* set up TFD's third entry to point to remainder of skb's head */ 441 tb2_len = skb_headlen(skb) - hdr_len; 442 443 if (tb2_len > 0) { 444 int ret; 445 446 tb_phys = dma_map_single(trans->dev, skb->data + hdr_len, 447 tb2_len, DMA_TO_DEVICE); 448 ret = iwl_txq_gen2_set_tb_with_wa(trans, skb, tfd, tb_phys, 449 skb->data + hdr_len, tb2_len, 450 NULL, true); 451 if (ret) 452 goto out_err; 453 } 454 455 if (iwl_txq_gen2_tx_add_frags(trans, skb, tfd, out_meta)) 456 goto out_err; 457 458 skb_walk_frags(skb, frag) { 459 int ret; 460 461 tb_phys = dma_map_single(trans->dev, frag->data, 462 skb_headlen(frag), DMA_TO_DEVICE); 463 ret = iwl_txq_gen2_set_tb_with_wa(trans, skb, tfd, tb_phys, 464 frag->data, 465 skb_headlen(frag), NULL, 466 true); 467 if (ret) 468 goto out_err; 469 if (iwl_txq_gen2_tx_add_frags(trans, frag, tfd, out_meta)) 470 goto out_err; 471 } 472 473 return tfd; 474 475 out_err: 476 iwl_txq_gen2_tfd_unmap(trans, out_meta, tfd); 477 return NULL; 478 } 479 480 static 481 struct iwl_tfh_tfd *iwl_txq_gen2_build_tfd(struct iwl_trans *trans, 482 struct iwl_txq *txq, 483 struct iwl_device_tx_cmd *dev_cmd, 484 struct sk_buff *skb, 485 struct iwl_cmd_meta *out_meta) 486 { 487 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; 488 int idx = iwl_txq_get_cmd_index(txq, txq->write_ptr); 489 struct iwl_tfh_tfd *tfd = iwl_txq_get_tfd(trans, txq, idx); 490 int len, hdr_len; 491 bool amsdu; 492 493 /* There must be data left over for TB1 or this code must be changed */ 494 BUILD_BUG_ON(sizeof(struct iwl_tx_cmd_gen2) < IWL_FIRST_TB_SIZE); 495 BUILD_BUG_ON(sizeof(struct iwl_cmd_header) + 496 offsetofend(struct iwl_tx_cmd_gen2, dram_info) > 497 IWL_FIRST_TB_SIZE); 498 BUILD_BUG_ON(sizeof(struct iwl_tx_cmd_gen3) < IWL_FIRST_TB_SIZE); 499 BUILD_BUG_ON(sizeof(struct iwl_cmd_header) + 500 offsetofend(struct iwl_tx_cmd_gen3, dram_info) > 501 IWL_FIRST_TB_SIZE); 502 503 memset(tfd, 0, sizeof(*tfd)); 504 505 if (trans->trans_cfg->device_family < IWL_DEVICE_FAMILY_AX210) 506 len = sizeof(struct iwl_tx_cmd_gen2); 507 else 508 len = sizeof(struct iwl_tx_cmd_gen3); 509 510 amsdu = ieee80211_is_data_qos(hdr->frame_control) && 511 (*ieee80211_get_qos_ctl(hdr) & 512 IEEE80211_QOS_CTL_A_MSDU_PRESENT); 513 514 hdr_len = ieee80211_hdrlen(hdr->frame_control); 515 516 /* 517 * Only build A-MSDUs here if doing so by GSO, otherwise it may be 518 * an A-MSDU for other reasons, e.g. NAN or an A-MSDU having been 519 * built in the higher layers already. 520 */ 521 if (amsdu && skb_shinfo(skb)->gso_size) 522 return iwl_txq_gen2_build_tx_amsdu(trans, txq, dev_cmd, skb, 523 out_meta, hdr_len, len); 524 return iwl_txq_gen2_build_tx(trans, txq, dev_cmd, skb, out_meta, 525 hdr_len, len, !amsdu); 526 } 527 528 int iwl_txq_space(struct iwl_trans *trans, const struct iwl_txq *q) 529 { 530 unsigned int max; 531 unsigned int used; 532 533 /* 534 * To avoid ambiguity between empty and completely full queues, there 535 * should always be less than max_tfd_queue_size elements in the queue. 536 * If q->n_window is smaller than max_tfd_queue_size, there is no need 537 * to reserve any queue entries for this purpose. 538 */ 539 if (q->n_window < trans->trans_cfg->base_params->max_tfd_queue_size) 540 max = q->n_window; 541 else 542 max = trans->trans_cfg->base_params->max_tfd_queue_size - 1; 543 544 /* 545 * max_tfd_queue_size is a power of 2, so the following is equivalent to 546 * modulo by max_tfd_queue_size and is well defined. 547 */ 548 used = (q->write_ptr - q->read_ptr) & 549 (trans->trans_cfg->base_params->max_tfd_queue_size - 1); 550 551 if (WARN_ON(used > max)) 552 return 0; 553 554 return max - used; 555 } 556 557 /* 558 * iwl_pcie_gen2_update_byte_tbl - Set up entry in Tx byte-count array 559 */ 560 static void iwl_pcie_gen2_update_byte_tbl(struct iwl_trans *trans, 561 struct iwl_txq *txq, u16 byte_cnt, 562 int num_tbs) 563 { 564 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 565 int idx = iwl_txq_get_cmd_index(txq, txq->write_ptr); 566 u8 filled_tfd_size, num_fetch_chunks; 567 u16 len = byte_cnt; 568 __le16 bc_ent; 569 570 if (WARN(idx >= txq->n_window, "%d >= %d\n", idx, txq->n_window)) 571 return; 572 573 filled_tfd_size = offsetof(struct iwl_tfh_tfd, tbs) + 574 num_tbs * sizeof(struct iwl_tfh_tb); 575 /* 576 * filled_tfd_size contains the number of filled bytes in the TFD. 577 * Dividing it by 64 will give the number of chunks to fetch 578 * to SRAM- 0 for one chunk, 1 for 2 and so on. 579 * If, for example, TFD contains only 3 TBs then 32 bytes 580 * of the TFD are used, and only one chunk of 64 bytes should 581 * be fetched 582 */ 583 num_fetch_chunks = DIV_ROUND_UP(filled_tfd_size, 64) - 1; 584 585 if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_AX210) { 586 struct iwl_gen3_bc_tbl_entry *scd_bc_tbl_gen3 = txq->bc_tbl.addr; 587 588 /* Starting from AX210, the HW expects bytes */ 589 WARN_ON(trans_pcie->txqs.bc_table_dword); 590 WARN_ON(len > 0x3FFF); 591 bc_ent = cpu_to_le16(len | (num_fetch_chunks << 14)); 592 scd_bc_tbl_gen3[idx].tfd_offset = bc_ent; 593 } else { 594 struct iwlagn_scd_bc_tbl *scd_bc_tbl = txq->bc_tbl.addr; 595 596 /* Before AX210, the HW expects DW */ 597 WARN_ON(!trans_pcie->txqs.bc_table_dword); 598 len = DIV_ROUND_UP(len, 4); 599 WARN_ON(len > 0xFFF); 600 bc_ent = cpu_to_le16(len | (num_fetch_chunks << 12)); 601 scd_bc_tbl->tfd_offset[idx] = bc_ent; 602 } 603 } 604 605 static u8 iwl_txq_gen2_get_num_tbs(struct iwl_tfh_tfd *tfd) 606 { 607 return le16_to_cpu(tfd->num_tbs) & 0x1f; 608 } 609 610 int iwl_txq_gen2_set_tb(struct iwl_trans *trans, struct iwl_tfh_tfd *tfd, 611 dma_addr_t addr, u16 len) 612 { 613 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 614 int idx = iwl_txq_gen2_get_num_tbs(tfd); 615 struct iwl_tfh_tb *tb; 616 617 /* Only WARN here so we know about the issue, but we mess up our 618 * unmap path because not every place currently checks for errors 619 * returned from this function - it can only return an error if 620 * there's no more space, and so when we know there is enough we 621 * don't always check ... 622 */ 623 WARN(iwl_txq_crosses_4g_boundary(addr, len), 624 "possible DMA problem with iova:0x%llx, len:%d\n", 625 (unsigned long long)addr, len); 626 627 if (WARN_ON(idx >= IWL_TFH_NUM_TBS)) 628 return -EINVAL; 629 tb = &tfd->tbs[idx]; 630 631 /* Each TFD can point to a maximum max_tbs Tx buffers */ 632 if (le16_to_cpu(tfd->num_tbs) >= trans_pcie->txqs.tfd.max_tbs) { 633 IWL_ERR(trans, "Error can not send more than %d chunks\n", 634 trans_pcie->txqs.tfd.max_tbs); 635 return -EINVAL; 636 } 637 638 put_unaligned_le64(addr, &tb->addr); 639 tb->tb_len = cpu_to_le16(len); 640 641 tfd->num_tbs = cpu_to_le16(idx + 1); 642 643 return idx; 644 } 645 646 void iwl_txq_gen2_tfd_unmap(struct iwl_trans *trans, 647 struct iwl_cmd_meta *meta, 648 struct iwl_tfh_tfd *tfd) 649 { 650 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 651 int i, num_tbs; 652 653 /* Sanity check on number of chunks */ 654 num_tbs = iwl_txq_gen2_get_num_tbs(tfd); 655 656 if (num_tbs > trans_pcie->txqs.tfd.max_tbs) { 657 IWL_ERR(trans, "Too many chunks: %i\n", num_tbs); 658 return; 659 } 660 661 /* TB1 is mapped directly, the rest is the TSO page and SG list. */ 662 if (meta->sg_offset) 663 num_tbs = 2; 664 665 /* first TB is never freed - it's the bidirectional DMA data */ 666 for (i = 1; i < num_tbs; i++) { 667 if (meta->tbs & BIT(i)) 668 dma_unmap_page(trans->dev, 669 le64_to_cpu(tfd->tbs[i].addr), 670 le16_to_cpu(tfd->tbs[i].tb_len), 671 DMA_TO_DEVICE); 672 else 673 dma_unmap_single(trans->dev, 674 le64_to_cpu(tfd->tbs[i].addr), 675 le16_to_cpu(tfd->tbs[i].tb_len), 676 DMA_TO_DEVICE); 677 } 678 679 iwl_txq_set_tfd_invalid_gen2(trans, tfd); 680 } 681 682 static void iwl_txq_gen2_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq) 683 { 684 /* rd_ptr is bounded by TFD_QUEUE_SIZE_MAX and 685 * idx is bounded by n_window 686 */ 687 int idx = iwl_txq_get_cmd_index(txq, txq->read_ptr); 688 struct sk_buff *skb; 689 690 lockdep_assert_held(&txq->lock); 691 692 if (!txq->entries) 693 return; 694 695 iwl_txq_gen2_tfd_unmap(trans, &txq->entries[idx].meta, 696 iwl_txq_get_tfd(trans, txq, idx)); 697 698 skb = txq->entries[idx].skb; 699 700 /* Can be called from irqs-disabled context 701 * If skb is not NULL, it means that the whole queue is being 702 * freed and that the queue is not empty - free the skb 703 */ 704 if (skb) { 705 iwl_op_mode_free_skb(trans->op_mode, skb); 706 txq->entries[idx].skb = NULL; 707 } 708 } 709 710 /* 711 * iwl_txq_inc_wr_ptr - Send new write index to hardware 712 */ 713 static void iwl_txq_inc_wr_ptr(struct iwl_trans *trans, struct iwl_txq *txq) 714 { 715 lockdep_assert_held(&txq->lock); 716 717 IWL_DEBUG_TX(trans, "Q:%d WR: 0x%x\n", txq->id, txq->write_ptr); 718 719 /* 720 * if not in power-save mode, uCode will never sleep when we're 721 * trying to tx (during RFKILL, we're not trying to tx). 722 */ 723 iwl_write32(trans, HBUS_TARG_WRPTR, txq->write_ptr | (txq->id << 16)); 724 } 725 726 int iwl_txq_gen2_tx(struct iwl_trans *trans, struct sk_buff *skb, 727 struct iwl_device_tx_cmd *dev_cmd, int txq_id) 728 { 729 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 730 struct iwl_cmd_meta *out_meta; 731 struct iwl_txq *txq = trans_pcie->txqs.txq[txq_id]; 732 u16 cmd_len; 733 int idx; 734 void *tfd; 735 736 if (WARN_ONCE(txq_id >= IWL_MAX_TVQM_QUEUES, 737 "queue %d out of range", txq_id)) 738 return -EINVAL; 739 740 if (WARN_ONCE(!test_bit(txq_id, trans_pcie->txqs.queue_used), 741 "TX on unused queue %d\n", txq_id)) 742 return -EINVAL; 743 744 if (skb_is_nonlinear(skb) && 745 skb_shinfo(skb)->nr_frags > IWL_TRANS_PCIE_MAX_FRAGS(trans_pcie) && 746 __skb_linearize(skb)) 747 return -ENOMEM; 748 749 spin_lock(&txq->lock); 750 751 if (iwl_txq_space(trans, txq) < txq->high_mark) { 752 iwl_txq_stop(trans, txq); 753 754 /* don't put the packet on the ring, if there is no room */ 755 if (unlikely(iwl_txq_space(trans, txq) < 3)) { 756 struct iwl_device_tx_cmd **dev_cmd_ptr; 757 758 dev_cmd_ptr = (void *)((u8 *)skb->cb + 759 trans_pcie->txqs.dev_cmd_offs); 760 761 *dev_cmd_ptr = dev_cmd; 762 __skb_queue_tail(&txq->overflow_q, skb); 763 spin_unlock(&txq->lock); 764 return 0; 765 } 766 } 767 768 idx = iwl_txq_get_cmd_index(txq, txq->write_ptr); 769 770 /* Set up driver data for this TFD */ 771 txq->entries[idx].skb = skb; 772 txq->entries[idx].cmd = dev_cmd; 773 774 dev_cmd->hdr.sequence = 775 cpu_to_le16((u16)(QUEUE_TO_SEQ(txq_id) | 776 INDEX_TO_SEQ(idx))); 777 778 /* Set up first empty entry in queue's array of Tx/cmd buffers */ 779 out_meta = &txq->entries[idx].meta; 780 memset(out_meta, 0, sizeof(*out_meta)); 781 782 tfd = iwl_txq_gen2_build_tfd(trans, txq, dev_cmd, skb, out_meta); 783 if (!tfd) { 784 spin_unlock(&txq->lock); 785 return -1; 786 } 787 788 if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_AX210) { 789 struct iwl_tx_cmd_gen3 *tx_cmd_gen3 = 790 (void *)dev_cmd->payload; 791 792 cmd_len = le16_to_cpu(tx_cmd_gen3->len); 793 } else { 794 struct iwl_tx_cmd_gen2 *tx_cmd_gen2 = 795 (void *)dev_cmd->payload; 796 797 cmd_len = le16_to_cpu(tx_cmd_gen2->len); 798 } 799 800 /* Set up entry for this TFD in Tx byte-count array */ 801 iwl_pcie_gen2_update_byte_tbl(trans, txq, cmd_len, 802 iwl_txq_gen2_get_num_tbs(tfd)); 803 804 /* start timer if queue currently empty */ 805 if (txq->read_ptr == txq->write_ptr && txq->wd_timeout) 806 mod_timer(&txq->stuck_timer, jiffies + txq->wd_timeout); 807 808 /* Tell device the write index *just past* this latest filled TFD */ 809 txq->write_ptr = iwl_txq_inc_wrap(trans, txq->write_ptr); 810 iwl_txq_inc_wr_ptr(trans, txq); 811 /* 812 * At this point the frame is "transmitted" successfully 813 * and we will get a TX status notification eventually. 814 */ 815 spin_unlock(&txq->lock); 816 return 0; 817 } 818 819 /*************** HOST COMMAND QUEUE FUNCTIONS *****/ 820 821 /* 822 * iwl_txq_gen2_unmap - Unmap any remaining DMA mappings and free skb's 823 */ 824 static void iwl_txq_gen2_unmap(struct iwl_trans *trans, int txq_id) 825 { 826 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 827 struct iwl_txq *txq = trans_pcie->txqs.txq[txq_id]; 828 829 spin_lock_bh(&txq->reclaim_lock); 830 spin_lock(&txq->lock); 831 while (txq->write_ptr != txq->read_ptr) { 832 IWL_DEBUG_TX_REPLY(trans, "Q %d Free %d\n", 833 txq_id, txq->read_ptr); 834 835 if (txq_id != trans_pcie->txqs.cmd.q_id) { 836 int idx = iwl_txq_get_cmd_index(txq, txq->read_ptr); 837 struct iwl_cmd_meta *cmd_meta = &txq->entries[idx].meta; 838 struct sk_buff *skb = txq->entries[idx].skb; 839 840 if (!WARN_ON_ONCE(!skb)) 841 iwl_pcie_free_tso_pages(trans, skb, cmd_meta); 842 } 843 iwl_txq_gen2_free_tfd(trans, txq); 844 txq->read_ptr = iwl_txq_inc_wrap(trans, txq->read_ptr); 845 } 846 847 while (!skb_queue_empty(&txq->overflow_q)) { 848 struct sk_buff *skb = __skb_dequeue(&txq->overflow_q); 849 850 iwl_op_mode_free_skb(trans->op_mode, skb); 851 } 852 853 spin_unlock(&txq->lock); 854 spin_unlock_bh(&txq->reclaim_lock); 855 856 /* just in case - this queue may have been stopped */ 857 iwl_trans_pcie_wake_queue(trans, txq); 858 } 859 860 static void iwl_txq_gen2_free_memory(struct iwl_trans *trans, 861 struct iwl_txq *txq) 862 { 863 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 864 struct device *dev = trans->dev; 865 866 /* De-alloc circular buffer of TFDs */ 867 if (txq->tfds) { 868 dma_free_coherent(dev, 869 trans_pcie->txqs.tfd.size * txq->n_window, 870 txq->tfds, txq->dma_addr); 871 dma_free_coherent(dev, 872 sizeof(*txq->first_tb_bufs) * txq->n_window, 873 txq->first_tb_bufs, txq->first_tb_dma); 874 } 875 876 kfree(txq->entries); 877 if (txq->bc_tbl.addr) 878 dma_pool_free(trans_pcie->txqs.bc_pool, 879 txq->bc_tbl.addr, txq->bc_tbl.dma); 880 kfree(txq); 881 } 882 883 /* 884 * iwl_pcie_txq_free - Deallocate DMA queue. 885 * @txq: Transmit queue to deallocate. 886 * 887 * Empty queue by removing and destroying all BD's. 888 * Free all buffers. 889 * 0-fill, but do not free "txq" descriptor structure. 890 */ 891 static void iwl_txq_gen2_free(struct iwl_trans *trans, int txq_id) 892 { 893 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 894 struct iwl_txq *txq; 895 int i; 896 897 if (WARN_ONCE(txq_id >= IWL_MAX_TVQM_QUEUES, 898 "queue %d out of range", txq_id)) 899 return; 900 901 txq = trans_pcie->txqs.txq[txq_id]; 902 903 if (WARN_ON(!txq)) 904 return; 905 906 iwl_txq_gen2_unmap(trans, txq_id); 907 908 /* De-alloc array of command/tx buffers */ 909 if (txq_id == trans_pcie->txqs.cmd.q_id) 910 for (i = 0; i < txq->n_window; i++) { 911 kfree_sensitive(txq->entries[i].cmd); 912 kfree_sensitive(txq->entries[i].free_buf); 913 } 914 del_timer_sync(&txq->stuck_timer); 915 916 iwl_txq_gen2_free_memory(trans, txq); 917 918 trans_pcie->txqs.txq[txq_id] = NULL; 919 920 clear_bit(txq_id, trans_pcie->txqs.queue_used); 921 } 922 923 static struct iwl_txq * 924 iwl_txq_dyn_alloc_dma(struct iwl_trans *trans, int size, unsigned int timeout) 925 { 926 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 927 size_t bc_tbl_size, bc_tbl_entries; 928 struct iwl_txq *txq; 929 int ret; 930 931 WARN_ON(!trans_pcie->txqs.bc_tbl_size); 932 933 bc_tbl_size = trans_pcie->txqs.bc_tbl_size; 934 bc_tbl_entries = bc_tbl_size / sizeof(u16); 935 936 if (WARN_ON(size > bc_tbl_entries)) 937 return ERR_PTR(-EINVAL); 938 939 txq = kzalloc(sizeof(*txq), GFP_KERNEL); 940 if (!txq) 941 return ERR_PTR(-ENOMEM); 942 943 txq->bc_tbl.addr = dma_pool_alloc(trans_pcie->txqs.bc_pool, GFP_KERNEL, 944 &txq->bc_tbl.dma); 945 if (!txq->bc_tbl.addr) { 946 IWL_ERR(trans, "Scheduler BC Table allocation failed\n"); 947 kfree(txq); 948 return ERR_PTR(-ENOMEM); 949 } 950 951 ret = iwl_pcie_txq_alloc(trans, txq, size, false); 952 if (ret) { 953 IWL_ERR(trans, "Tx queue alloc failed\n"); 954 goto error; 955 } 956 ret = iwl_txq_init(trans, txq, size, false); 957 if (ret) { 958 IWL_ERR(trans, "Tx queue init failed\n"); 959 goto error; 960 } 961 962 txq->wd_timeout = msecs_to_jiffies(timeout); 963 964 return txq; 965 966 error: 967 iwl_txq_gen2_free_memory(trans, txq); 968 return ERR_PTR(ret); 969 } 970 971 static int iwl_pcie_txq_alloc_response(struct iwl_trans *trans, 972 struct iwl_txq *txq, 973 struct iwl_host_cmd *hcmd) 974 { 975 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 976 struct iwl_tx_queue_cfg_rsp *rsp; 977 int ret, qid; 978 u32 wr_ptr; 979 980 if (WARN_ON(iwl_rx_packet_payload_len(hcmd->resp_pkt) != 981 sizeof(*rsp))) { 982 ret = -EINVAL; 983 goto error_free_resp; 984 } 985 986 rsp = (void *)hcmd->resp_pkt->data; 987 qid = le16_to_cpu(rsp->queue_number); 988 wr_ptr = le16_to_cpu(rsp->write_pointer); 989 990 if (qid >= ARRAY_SIZE(trans_pcie->txqs.txq)) { 991 WARN_ONCE(1, "queue index %d unsupported", qid); 992 ret = -EIO; 993 goto error_free_resp; 994 } 995 996 if (test_and_set_bit(qid, trans_pcie->txqs.queue_used)) { 997 WARN_ONCE(1, "queue %d already used", qid); 998 ret = -EIO; 999 goto error_free_resp; 1000 } 1001 1002 if (WARN_ONCE(trans_pcie->txqs.txq[qid], 1003 "queue %d already allocated\n", qid)) { 1004 ret = -EIO; 1005 goto error_free_resp; 1006 } 1007 1008 txq->id = qid; 1009 trans_pcie->txqs.txq[qid] = txq; 1010 wr_ptr &= (trans->trans_cfg->base_params->max_tfd_queue_size - 1); 1011 1012 /* Place first TFD at index corresponding to start sequence number */ 1013 txq->read_ptr = wr_ptr; 1014 txq->write_ptr = wr_ptr; 1015 1016 IWL_DEBUG_TX_QUEUES(trans, "Activate queue %d\n", qid); 1017 1018 iwl_free_resp(hcmd); 1019 return qid; 1020 1021 error_free_resp: 1022 iwl_free_resp(hcmd); 1023 iwl_txq_gen2_free_memory(trans, txq); 1024 return ret; 1025 } 1026 1027 int iwl_txq_dyn_alloc(struct iwl_trans *trans, u32 flags, u32 sta_mask, 1028 u8 tid, int size, unsigned int timeout) 1029 { 1030 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 1031 struct iwl_txq *txq; 1032 union { 1033 struct iwl_tx_queue_cfg_cmd old; 1034 struct iwl_scd_queue_cfg_cmd new; 1035 } cmd; 1036 struct iwl_host_cmd hcmd = { 1037 .flags = CMD_WANT_SKB, 1038 }; 1039 int ret; 1040 1041 /* take the min with bytecount table entries allowed */ 1042 size = min_t(u32, size, trans_pcie->txqs.bc_tbl_size / sizeof(u16)); 1043 /* but must be power of 2 values for calculating read/write pointers */ 1044 size = rounddown_pow_of_two(size); 1045 1046 if (trans->trans_cfg->device_family == IWL_DEVICE_FAMILY_BZ && 1047 trans->hw_rev_step == SILICON_A_STEP) { 1048 size = 4096; 1049 txq = iwl_txq_dyn_alloc_dma(trans, size, timeout); 1050 } else { 1051 do { 1052 txq = iwl_txq_dyn_alloc_dma(trans, size, timeout); 1053 if (!IS_ERR(txq)) 1054 break; 1055 1056 IWL_DEBUG_TX_QUEUES(trans, 1057 "Failed allocating TXQ of size %d for sta mask %x tid %d, ret: %ld\n", 1058 size, sta_mask, tid, 1059 PTR_ERR(txq)); 1060 size /= 2; 1061 } while (size >= 16); 1062 } 1063 1064 if (IS_ERR(txq)) 1065 return PTR_ERR(txq); 1066 1067 if (trans_pcie->txqs.queue_alloc_cmd_ver == 0) { 1068 memset(&cmd.old, 0, sizeof(cmd.old)); 1069 cmd.old.tfdq_addr = cpu_to_le64(txq->dma_addr); 1070 cmd.old.byte_cnt_addr = cpu_to_le64(txq->bc_tbl.dma); 1071 cmd.old.cb_size = cpu_to_le32(TFD_QUEUE_CB_SIZE(size)); 1072 cmd.old.flags = cpu_to_le16(flags | TX_QUEUE_CFG_ENABLE_QUEUE); 1073 cmd.old.tid = tid; 1074 1075 if (hweight32(sta_mask) != 1) { 1076 ret = -EINVAL; 1077 goto error; 1078 } 1079 cmd.old.sta_id = ffs(sta_mask) - 1; 1080 1081 hcmd.id = SCD_QUEUE_CFG; 1082 hcmd.len[0] = sizeof(cmd.old); 1083 hcmd.data[0] = &cmd.old; 1084 } else if (trans_pcie->txqs.queue_alloc_cmd_ver == 3) { 1085 memset(&cmd.new, 0, sizeof(cmd.new)); 1086 cmd.new.operation = cpu_to_le32(IWL_SCD_QUEUE_ADD); 1087 cmd.new.u.add.tfdq_dram_addr = cpu_to_le64(txq->dma_addr); 1088 cmd.new.u.add.bc_dram_addr = cpu_to_le64(txq->bc_tbl.dma); 1089 cmd.new.u.add.cb_size = cpu_to_le32(TFD_QUEUE_CB_SIZE(size)); 1090 cmd.new.u.add.flags = cpu_to_le32(flags); 1091 cmd.new.u.add.sta_mask = cpu_to_le32(sta_mask); 1092 cmd.new.u.add.tid = tid; 1093 1094 hcmd.id = WIDE_ID(DATA_PATH_GROUP, SCD_QUEUE_CONFIG_CMD); 1095 hcmd.len[0] = sizeof(cmd.new); 1096 hcmd.data[0] = &cmd.new; 1097 } else { 1098 ret = -EOPNOTSUPP; 1099 goto error; 1100 } 1101 1102 ret = iwl_trans_send_cmd(trans, &hcmd); 1103 if (ret) 1104 goto error; 1105 1106 return iwl_pcie_txq_alloc_response(trans, txq, &hcmd); 1107 1108 error: 1109 iwl_txq_gen2_free_memory(trans, txq); 1110 return ret; 1111 } 1112 1113 void iwl_txq_dyn_free(struct iwl_trans *trans, int queue) 1114 { 1115 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 1116 1117 if (WARN(queue >= IWL_MAX_TVQM_QUEUES, 1118 "queue %d out of range", queue)) 1119 return; 1120 1121 /* 1122 * Upon HW Rfkill - we stop the device, and then stop the queues 1123 * in the op_mode. Just for the sake of the simplicity of the op_mode, 1124 * allow the op_mode to call txq_disable after it already called 1125 * stop_device. 1126 */ 1127 if (!test_and_clear_bit(queue, trans_pcie->txqs.queue_used)) { 1128 WARN_ONCE(test_bit(STATUS_DEVICE_ENABLED, &trans->status), 1129 "queue %d not used", queue); 1130 return; 1131 } 1132 1133 iwl_txq_gen2_free(trans, queue); 1134 1135 IWL_DEBUG_TX_QUEUES(trans, "Deactivate queue %d\n", queue); 1136 } 1137 1138 void iwl_txq_gen2_tx_free(struct iwl_trans *trans) 1139 { 1140 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 1141 int i; 1142 1143 memset(trans_pcie->txqs.queue_used, 0, 1144 sizeof(trans_pcie->txqs.queue_used)); 1145 1146 /* Free all TX queues */ 1147 for (i = 0; i < ARRAY_SIZE(trans_pcie->txqs.txq); i++) { 1148 if (!trans_pcie->txqs.txq[i]) 1149 continue; 1150 1151 iwl_txq_gen2_free(trans, i); 1152 } 1153 } 1154 1155 int iwl_txq_gen2_init(struct iwl_trans *trans, int txq_id, int queue_size) 1156 { 1157 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 1158 struct iwl_txq *queue; 1159 int ret; 1160 1161 /* alloc and init the tx queue */ 1162 if (!trans_pcie->txqs.txq[txq_id]) { 1163 queue = kzalloc(sizeof(*queue), GFP_KERNEL); 1164 if (!queue) { 1165 IWL_ERR(trans, "Not enough memory for tx queue\n"); 1166 return -ENOMEM; 1167 } 1168 trans_pcie->txqs.txq[txq_id] = queue; 1169 ret = iwl_pcie_txq_alloc(trans, queue, queue_size, true); 1170 if (ret) { 1171 IWL_ERR(trans, "Tx %d queue init failed\n", txq_id); 1172 goto error; 1173 } 1174 } else { 1175 queue = trans_pcie->txqs.txq[txq_id]; 1176 } 1177 1178 ret = iwl_txq_init(trans, queue, queue_size, 1179 (txq_id == trans_pcie->txqs.cmd.q_id)); 1180 if (ret) { 1181 IWL_ERR(trans, "Tx %d queue alloc failed\n", txq_id); 1182 goto error; 1183 } 1184 trans_pcie->txqs.txq[txq_id]->id = txq_id; 1185 set_bit(txq_id, trans_pcie->txqs.queue_used); 1186 1187 return 0; 1188 1189 error: 1190 iwl_txq_gen2_tx_free(trans); 1191 return ret; 1192 } 1193 1194 /*************** HOST COMMAND QUEUE FUNCTIONS *****/ 1195 1196 /* 1197 * iwl_pcie_gen2_enqueue_hcmd - enqueue a uCode command 1198 * @priv: device private data point 1199 * @cmd: a pointer to the ucode command structure 1200 * 1201 * The function returns < 0 values to indicate the operation 1202 * failed. On success, it returns the index (>= 0) of command in the 1203 * command queue. 1204 */ 1205 int iwl_pcie_gen2_enqueue_hcmd(struct iwl_trans *trans, 1206 struct iwl_host_cmd *cmd) 1207 { 1208 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); 1209 struct iwl_txq *txq = trans_pcie->txqs.txq[trans_pcie->txqs.cmd.q_id]; 1210 struct iwl_device_cmd *out_cmd; 1211 struct iwl_cmd_meta *out_meta; 1212 void *dup_buf = NULL; 1213 dma_addr_t phys_addr; 1214 int i, cmd_pos, idx; 1215 u16 copy_size, cmd_size, tb0_size; 1216 bool had_nocopy = false; 1217 u8 group_id = iwl_cmd_groupid(cmd->id); 1218 const u8 *cmddata[IWL_MAX_CMD_TBS_PER_TFD]; 1219 u16 cmdlen[IWL_MAX_CMD_TBS_PER_TFD]; 1220 struct iwl_tfh_tfd *tfd; 1221 unsigned long flags; 1222 1223 if (WARN_ON(cmd->flags & CMD_BLOCK_TXQS)) 1224 return -EINVAL; 1225 1226 copy_size = sizeof(struct iwl_cmd_header_wide); 1227 cmd_size = sizeof(struct iwl_cmd_header_wide); 1228 1229 for (i = 0; i < IWL_MAX_CMD_TBS_PER_TFD; i++) { 1230 cmddata[i] = cmd->data[i]; 1231 cmdlen[i] = cmd->len[i]; 1232 1233 if (!cmd->len[i]) 1234 continue; 1235 1236 /* need at least IWL_FIRST_TB_SIZE copied */ 1237 if (copy_size < IWL_FIRST_TB_SIZE) { 1238 int copy = IWL_FIRST_TB_SIZE - copy_size; 1239 1240 if (copy > cmdlen[i]) 1241 copy = cmdlen[i]; 1242 cmdlen[i] -= copy; 1243 cmddata[i] += copy; 1244 copy_size += copy; 1245 } 1246 1247 if (cmd->dataflags[i] & IWL_HCMD_DFL_NOCOPY) { 1248 had_nocopy = true; 1249 if (WARN_ON(cmd->dataflags[i] & IWL_HCMD_DFL_DUP)) { 1250 idx = -EINVAL; 1251 goto free_dup_buf; 1252 } 1253 } else if (cmd->dataflags[i] & IWL_HCMD_DFL_DUP) { 1254 /* 1255 * This is also a chunk that isn't copied 1256 * to the static buffer so set had_nocopy. 1257 */ 1258 had_nocopy = true; 1259 1260 /* only allowed once */ 1261 if (WARN_ON(dup_buf)) { 1262 idx = -EINVAL; 1263 goto free_dup_buf; 1264 } 1265 1266 dup_buf = kmemdup(cmddata[i], cmdlen[i], 1267 GFP_ATOMIC); 1268 if (!dup_buf) 1269 return -ENOMEM; 1270 } else { 1271 /* NOCOPY must not be followed by normal! */ 1272 if (WARN_ON(had_nocopy)) { 1273 idx = -EINVAL; 1274 goto free_dup_buf; 1275 } 1276 copy_size += cmdlen[i]; 1277 } 1278 cmd_size += cmd->len[i]; 1279 } 1280 1281 /* 1282 * If any of the command structures end up being larger than the 1283 * TFD_MAX_PAYLOAD_SIZE and they aren't dynamically allocated into 1284 * separate TFDs, then we will need to increase the size of the buffers 1285 */ 1286 if (WARN(copy_size > TFD_MAX_PAYLOAD_SIZE, 1287 "Command %s (%#x) is too large (%d bytes)\n", 1288 iwl_get_cmd_string(trans, cmd->id), cmd->id, copy_size)) { 1289 idx = -EINVAL; 1290 goto free_dup_buf; 1291 } 1292 1293 spin_lock_irqsave(&txq->lock, flags); 1294 1295 idx = iwl_txq_get_cmd_index(txq, txq->write_ptr); 1296 tfd = iwl_txq_get_tfd(trans, txq, txq->write_ptr); 1297 memset(tfd, 0, sizeof(*tfd)); 1298 1299 if (iwl_txq_space(trans, txq) < ((cmd->flags & CMD_ASYNC) ? 2 : 1)) { 1300 spin_unlock_irqrestore(&txq->lock, flags); 1301 1302 IWL_ERR(trans, "No space in command queue\n"); 1303 iwl_op_mode_cmd_queue_full(trans->op_mode); 1304 idx = -ENOSPC; 1305 goto free_dup_buf; 1306 } 1307 1308 out_cmd = txq->entries[idx].cmd; 1309 out_meta = &txq->entries[idx].meta; 1310 1311 /* re-initialize, this also marks the SG list as unused */ 1312 memset(out_meta, 0, sizeof(*out_meta)); 1313 if (cmd->flags & CMD_WANT_SKB) 1314 out_meta->source = cmd; 1315 1316 /* set up the header */ 1317 out_cmd->hdr_wide.cmd = iwl_cmd_opcode(cmd->id); 1318 out_cmd->hdr_wide.group_id = group_id; 1319 out_cmd->hdr_wide.version = iwl_cmd_version(cmd->id); 1320 out_cmd->hdr_wide.length = 1321 cpu_to_le16(cmd_size - sizeof(struct iwl_cmd_header_wide)); 1322 out_cmd->hdr_wide.reserved = 0; 1323 out_cmd->hdr_wide.sequence = 1324 cpu_to_le16(QUEUE_TO_SEQ(trans_pcie->txqs.cmd.q_id) | 1325 INDEX_TO_SEQ(txq->write_ptr)); 1326 1327 cmd_pos = sizeof(struct iwl_cmd_header_wide); 1328 copy_size = sizeof(struct iwl_cmd_header_wide); 1329 1330 /* and copy the data that needs to be copied */ 1331 for (i = 0; i < IWL_MAX_CMD_TBS_PER_TFD; i++) { 1332 int copy; 1333 1334 if (!cmd->len[i]) 1335 continue; 1336 1337 /* copy everything if not nocopy/dup */ 1338 if (!(cmd->dataflags[i] & (IWL_HCMD_DFL_NOCOPY | 1339 IWL_HCMD_DFL_DUP))) { 1340 copy = cmd->len[i]; 1341 1342 memcpy((u8 *)out_cmd + cmd_pos, cmd->data[i], copy); 1343 cmd_pos += copy; 1344 copy_size += copy; 1345 continue; 1346 } 1347 1348 /* 1349 * Otherwise we need at least IWL_FIRST_TB_SIZE copied 1350 * in total (for bi-directional DMA), but copy up to what 1351 * we can fit into the payload for debug dump purposes. 1352 */ 1353 copy = min_t(int, TFD_MAX_PAYLOAD_SIZE - cmd_pos, cmd->len[i]); 1354 1355 memcpy((u8 *)out_cmd + cmd_pos, cmd->data[i], copy); 1356 cmd_pos += copy; 1357 1358 /* However, treat copy_size the proper way, we need it below */ 1359 if (copy_size < IWL_FIRST_TB_SIZE) { 1360 copy = IWL_FIRST_TB_SIZE - copy_size; 1361 1362 if (copy > cmd->len[i]) 1363 copy = cmd->len[i]; 1364 copy_size += copy; 1365 } 1366 } 1367 1368 IWL_DEBUG_HC(trans, 1369 "Sending command %s (%.2x.%.2x), seq: 0x%04X, %d bytes at %d[%d]:%d\n", 1370 iwl_get_cmd_string(trans, cmd->id), group_id, 1371 out_cmd->hdr.cmd, le16_to_cpu(out_cmd->hdr.sequence), 1372 cmd_size, txq->write_ptr, idx, trans_pcie->txqs.cmd.q_id); 1373 1374 /* start the TFD with the minimum copy bytes */ 1375 tb0_size = min_t(int, copy_size, IWL_FIRST_TB_SIZE); 1376 memcpy(&txq->first_tb_bufs[idx], out_cmd, tb0_size); 1377 iwl_txq_gen2_set_tb(trans, tfd, iwl_txq_get_first_tb_dma(txq, idx), 1378 tb0_size); 1379 1380 /* map first command fragment, if any remains */ 1381 if (copy_size > tb0_size) { 1382 phys_addr = dma_map_single(trans->dev, 1383 (u8 *)out_cmd + tb0_size, 1384 copy_size - tb0_size, 1385 DMA_TO_DEVICE); 1386 if (dma_mapping_error(trans->dev, phys_addr)) { 1387 idx = -ENOMEM; 1388 iwl_txq_gen2_tfd_unmap(trans, out_meta, tfd); 1389 goto out; 1390 } 1391 iwl_txq_gen2_set_tb(trans, tfd, phys_addr, 1392 copy_size - tb0_size); 1393 } 1394 1395 /* map the remaining (adjusted) nocopy/dup fragments */ 1396 for (i = 0; i < IWL_MAX_CMD_TBS_PER_TFD; i++) { 1397 void *data = (void *)(uintptr_t)cmddata[i]; 1398 1399 if (!cmdlen[i]) 1400 continue; 1401 if (!(cmd->dataflags[i] & (IWL_HCMD_DFL_NOCOPY | 1402 IWL_HCMD_DFL_DUP))) 1403 continue; 1404 if (cmd->dataflags[i] & IWL_HCMD_DFL_DUP) 1405 data = dup_buf; 1406 phys_addr = dma_map_single(trans->dev, data, 1407 cmdlen[i], DMA_TO_DEVICE); 1408 if (dma_mapping_error(trans->dev, phys_addr)) { 1409 idx = -ENOMEM; 1410 iwl_txq_gen2_tfd_unmap(trans, out_meta, tfd); 1411 goto out; 1412 } 1413 iwl_txq_gen2_set_tb(trans, tfd, phys_addr, cmdlen[i]); 1414 } 1415 1416 BUILD_BUG_ON(IWL_TFH_NUM_TBS > sizeof(out_meta->tbs) * BITS_PER_BYTE); 1417 out_meta->flags = cmd->flags; 1418 if (WARN_ON_ONCE(txq->entries[idx].free_buf)) 1419 kfree_sensitive(txq->entries[idx].free_buf); 1420 txq->entries[idx].free_buf = dup_buf; 1421 1422 trace_iwlwifi_dev_hcmd(trans->dev, cmd, cmd_size, &out_cmd->hdr_wide); 1423 1424 /* start timer if queue currently empty */ 1425 if (txq->read_ptr == txq->write_ptr && txq->wd_timeout) 1426 mod_timer(&txq->stuck_timer, jiffies + txq->wd_timeout); 1427 1428 spin_lock(&trans_pcie->reg_lock); 1429 /* Increment and update queue's write index */ 1430 txq->write_ptr = iwl_txq_inc_wrap(trans, txq->write_ptr); 1431 iwl_txq_inc_wr_ptr(trans, txq); 1432 spin_unlock(&trans_pcie->reg_lock); 1433 1434 out: 1435 spin_unlock_irqrestore(&txq->lock, flags); 1436 free_dup_buf: 1437 if (idx < 0) 1438 kfree(dup_buf); 1439 return idx; 1440 } 1441