1 // SPDX-License-Identifier: GPL-2.0-only 2 /**************************************************************************** 3 * Driver for Solarflare network controllers and boards 4 * Copyright 2022 Advanced Micro Devices, Inc. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 as published 8 * by the Free Software Foundation, incorporated herein by reference. 9 */ 10 11 #include "tc_counters.h" 12 #include "tc_encap_actions.h" 13 #include "mae_counter_format.h" 14 #include "mae.h" 15 #include "rx_common.h" 16 17 /* Counter-management hashtables */ 18 19 static const struct rhashtable_params efx_tc_counter_id_ht_params = { 20 .key_len = offsetof(struct efx_tc_counter_index, linkage), 21 .key_offset = 0, 22 .head_offset = offsetof(struct efx_tc_counter_index, linkage), 23 }; 24 25 static const struct rhashtable_params efx_tc_counter_ht_params = { 26 .key_len = offsetof(struct efx_tc_counter, linkage), 27 .key_offset = 0, 28 .head_offset = offsetof(struct efx_tc_counter, linkage), 29 }; 30 31 static void efx_tc_counter_free(void *ptr, void *__unused) 32 { 33 struct efx_tc_counter *cnt = ptr; 34 35 WARN_ON(!list_empty(&cnt->users)); 36 /* We'd like to synchronize_rcu() here, but unfortunately we aren't 37 * removing the element from the hashtable (it's not clear that's a 38 * safe thing to do in an rhashtable_free_and_destroy free_fn), so 39 * threads could still be obtaining new pointers to *cnt if they can 40 * race against this function at all. 41 */ 42 flush_work(&cnt->work); 43 EFX_WARN_ON_PARANOID(spin_is_locked(&cnt->lock)); 44 kfree(cnt); 45 } 46 47 static void efx_tc_counter_id_free(void *ptr, void *__unused) 48 { 49 struct efx_tc_counter_index *ctr = ptr; 50 51 WARN_ON(refcount_read(&ctr->ref)); 52 kfree(ctr); 53 } 54 55 int efx_tc_init_counters(struct efx_nic *efx) 56 { 57 int rc; 58 59 rc = rhashtable_init(&efx->tc->counter_id_ht, &efx_tc_counter_id_ht_params); 60 if (rc < 0) 61 goto fail_counter_id_ht; 62 rc = rhashtable_init(&efx->tc->counter_ht, &efx_tc_counter_ht_params); 63 if (rc < 0) 64 goto fail_counter_ht; 65 return 0; 66 fail_counter_ht: 67 rhashtable_destroy(&efx->tc->counter_id_ht); 68 fail_counter_id_ht: 69 return rc; 70 } 71 72 /* Only call this in init failure teardown. 73 * Normal exit should fini instead as there may be entries in the table. 74 */ 75 void efx_tc_destroy_counters(struct efx_nic *efx) 76 { 77 rhashtable_destroy(&efx->tc->counter_ht); 78 rhashtable_destroy(&efx->tc->counter_id_ht); 79 } 80 81 void efx_tc_fini_counters(struct efx_nic *efx) 82 { 83 rhashtable_free_and_destroy(&efx->tc->counter_id_ht, efx_tc_counter_id_free, NULL); 84 rhashtable_free_and_destroy(&efx->tc->counter_ht, efx_tc_counter_free, NULL); 85 } 86 87 static void efx_tc_counter_work(struct work_struct *work) 88 { 89 struct efx_tc_counter *cnt = container_of(work, struct efx_tc_counter, work); 90 struct efx_tc_encap_action *encap; 91 struct efx_tc_action_set *act; 92 unsigned long touched; 93 struct neighbour *n; 94 95 spin_lock_bh(&cnt->lock); 96 touched = READ_ONCE(cnt->touched); 97 98 list_for_each_entry(act, &cnt->users, count_user) { 99 encap = act->encap_md; 100 if (!encap) 101 continue; 102 if (!encap->neigh) /* can't happen */ 103 continue; 104 if (time_after_eq(encap->neigh->used, touched)) 105 continue; 106 encap->neigh->used = touched; 107 /* We have passed traffic using this ARP entry, so 108 * indicate to the ARP cache that it's still active 109 */ 110 if (encap->neigh->dst_ip) 111 n = neigh_lookup(&arp_tbl, &encap->neigh->dst_ip, 112 encap->neigh->egdev); 113 else 114 #if IS_ENABLED(CONFIG_IPV6) 115 n = neigh_lookup(ipv6_stub->nd_tbl, 116 &encap->neigh->dst_ip6, 117 encap->neigh->egdev); 118 #else 119 n = NULL; 120 #endif 121 if (!n) 122 continue; 123 124 neigh_event_send(n, NULL); 125 neigh_release(n); 126 } 127 spin_unlock_bh(&cnt->lock); 128 } 129 130 /* Counter allocation */ 131 132 struct efx_tc_counter *efx_tc_flower_allocate_counter(struct efx_nic *efx, 133 int type) 134 { 135 struct efx_tc_counter *cnt; 136 int rc, rc2; 137 138 cnt = kzalloc(sizeof(*cnt), GFP_USER); 139 if (!cnt) 140 return ERR_PTR(-ENOMEM); 141 142 spin_lock_init(&cnt->lock); 143 INIT_WORK(&cnt->work, efx_tc_counter_work); 144 cnt->touched = jiffies; 145 cnt->type = type; 146 147 rc = efx_mae_allocate_counter(efx, cnt); 148 if (rc) 149 goto fail1; 150 INIT_LIST_HEAD(&cnt->users); 151 rc = rhashtable_insert_fast(&efx->tc->counter_ht, &cnt->linkage, 152 efx_tc_counter_ht_params); 153 if (rc) 154 goto fail2; 155 return cnt; 156 fail2: 157 /* If we get here, it implies that we couldn't insert into the table, 158 * which in turn probably means that the fw_id was already taken. 159 * In that case, it's unclear whether we really 'own' the fw_id; but 160 * the firmware seemed to think we did, so it's proper to free it. 161 */ 162 rc2 = efx_mae_free_counter(efx, cnt); 163 if (rc2) 164 netif_warn(efx, hw, efx->net_dev, 165 "Failed to free MAE counter %u, rc %d\n", 166 cnt->fw_id, rc2); 167 fail1: 168 kfree(cnt); 169 return ERR_PTR(rc > 0 ? -EIO : rc); 170 } 171 172 void efx_tc_flower_release_counter(struct efx_nic *efx, 173 struct efx_tc_counter *cnt) 174 { 175 int rc; 176 177 rhashtable_remove_fast(&efx->tc->counter_ht, &cnt->linkage, 178 efx_tc_counter_ht_params); 179 rc = efx_mae_free_counter(efx, cnt); 180 if (rc) 181 netif_warn(efx, hw, efx->net_dev, 182 "Failed to free MAE counter %u, rc %d\n", 183 cnt->fw_id, rc); 184 WARN_ON(!list_empty(&cnt->users)); 185 /* This doesn't protect counter updates coming in arbitrarily long 186 * after we deleted the counter. The RCU just ensures that we won't 187 * free the counter while another thread has a pointer to it. 188 * Ensuring we don't update the wrong counter if the ID gets re-used 189 * is handled by the generation count. 190 */ 191 synchronize_rcu(); 192 flush_work(&cnt->work); 193 EFX_WARN_ON_PARANOID(spin_is_locked(&cnt->lock)); 194 kfree(cnt); 195 } 196 197 static struct efx_tc_counter *efx_tc_flower_find_counter_by_fw_id( 198 struct efx_nic *efx, int type, u32 fw_id) 199 { 200 struct efx_tc_counter key = {}; 201 202 key.fw_id = fw_id; 203 key.type = type; 204 205 return rhashtable_lookup_fast(&efx->tc->counter_ht, &key, 206 efx_tc_counter_ht_params); 207 } 208 209 /* TC cookie to counter mapping */ 210 211 void efx_tc_flower_put_counter_index(struct efx_nic *efx, 212 struct efx_tc_counter_index *ctr) 213 { 214 if (!refcount_dec_and_test(&ctr->ref)) 215 return; /* still in use */ 216 rhashtable_remove_fast(&efx->tc->counter_id_ht, &ctr->linkage, 217 efx_tc_counter_id_ht_params); 218 efx_tc_flower_release_counter(efx, ctr->cnt); 219 kfree(ctr); 220 } 221 222 struct efx_tc_counter_index *efx_tc_flower_get_counter_index( 223 struct efx_nic *efx, unsigned long cookie, 224 enum efx_tc_counter_type type) 225 { 226 struct efx_tc_counter_index *ctr, *old; 227 struct efx_tc_counter *cnt; 228 229 ctr = kzalloc(sizeof(*ctr), GFP_USER); 230 if (!ctr) 231 return ERR_PTR(-ENOMEM); 232 ctr->cookie = cookie; 233 old = rhashtable_lookup_get_insert_fast(&efx->tc->counter_id_ht, 234 &ctr->linkage, 235 efx_tc_counter_id_ht_params); 236 if (old) { 237 /* don't need our new entry */ 238 kfree(ctr); 239 if (IS_ERR(old)) /* oh dear, it's actually an error */ 240 return ERR_CAST(old); 241 if (!refcount_inc_not_zero(&old->ref)) 242 return ERR_PTR(-EAGAIN); 243 /* existing entry found */ 244 ctr = old; 245 } else { 246 cnt = efx_tc_flower_allocate_counter(efx, type); 247 if (IS_ERR(cnt)) { 248 rhashtable_remove_fast(&efx->tc->counter_id_ht, 249 &ctr->linkage, 250 efx_tc_counter_id_ht_params); 251 kfree(ctr); 252 return ERR_CAST(cnt); 253 } 254 ctr->cnt = cnt; 255 refcount_set(&ctr->ref, 1); 256 } 257 return ctr; 258 } 259 260 struct efx_tc_counter_index *efx_tc_flower_find_counter_index( 261 struct efx_nic *efx, unsigned long cookie) 262 { 263 struct efx_tc_counter_index key = {}; 264 265 key.cookie = cookie; 266 return rhashtable_lookup_fast(&efx->tc->counter_id_ht, &key, 267 efx_tc_counter_id_ht_params); 268 } 269 270 /* TC Channel. Counter updates are delivered on this channel's RXQ. */ 271 272 static void efx_tc_handle_no_channel(struct efx_nic *efx) 273 { 274 netif_warn(efx, drv, efx->net_dev, 275 "MAE counters require MSI-X and 1 additional interrupt vector.\n"); 276 } 277 278 static int efx_tc_probe_channel(struct efx_channel *channel) 279 { 280 struct efx_rx_queue *rx_queue = &channel->rx_queue; 281 282 channel->irq_moderation_us = 0; 283 rx_queue->core_index = 0; 284 285 INIT_WORK(&rx_queue->grant_work, efx_mae_counters_grant_credits); 286 287 return 0; 288 } 289 290 static int efx_tc_start_channel(struct efx_channel *channel) 291 { 292 struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel); 293 struct efx_nic *efx = channel->efx; 294 295 return efx_mae_start_counters(efx, rx_queue); 296 } 297 298 static void efx_tc_stop_channel(struct efx_channel *channel) 299 { 300 struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel); 301 struct efx_nic *efx = channel->efx; 302 int rc; 303 304 rc = efx_mae_stop_counters(efx, rx_queue); 305 if (rc) 306 netif_warn(efx, drv, efx->net_dev, 307 "Failed to stop MAE counters streaming, rc=%d.\n", 308 rc); 309 rx_queue->grant_credits = false; 310 flush_work(&rx_queue->grant_work); 311 } 312 313 static void efx_tc_remove_channel(struct efx_channel *channel) 314 { 315 } 316 317 static void efx_tc_get_channel_name(struct efx_channel *channel, 318 char *buf, size_t len) 319 { 320 snprintf(buf, len, "%s-mae", channel->efx->name); 321 } 322 323 static void efx_tc_counter_update(struct efx_nic *efx, 324 enum efx_tc_counter_type counter_type, 325 u32 counter_idx, u64 packets, u64 bytes, 326 u32 mark) 327 { 328 struct efx_tc_counter *cnt; 329 330 rcu_read_lock(); /* Protect against deletion of 'cnt' */ 331 cnt = efx_tc_flower_find_counter_by_fw_id(efx, counter_type, counter_idx); 332 if (!cnt) { 333 /* This can legitimately happen when a counter is removed, 334 * with updates for the counter still in-flight; however this 335 * should be an infrequent occurrence. 336 */ 337 if (net_ratelimit()) 338 netif_dbg(efx, drv, efx->net_dev, 339 "Got update for unwanted MAE counter %u type %u\n", 340 counter_idx, counter_type); 341 goto out; 342 } 343 344 spin_lock_bh(&cnt->lock); 345 if ((s32)mark - (s32)cnt->gen < 0) { 346 /* This counter update packet is from before the counter was 347 * allocated; thus it must be for a previous counter with 348 * the same ID that has since been freed, and it should be 349 * ignored. 350 */ 351 } else { 352 /* Update latest seen generation count. This ensures that 353 * even a long-lived counter won't start getting ignored if 354 * the generation count wraps around, unless it somehow 355 * manages to go 1<<31 generations without an update. 356 */ 357 cnt->gen = mark; 358 /* update counter values */ 359 cnt->packets += packets; 360 cnt->bytes += bytes; 361 cnt->touched = jiffies; 362 } 363 spin_unlock_bh(&cnt->lock); 364 schedule_work(&cnt->work); 365 out: 366 rcu_read_unlock(); 367 } 368 369 static void efx_tc_rx_version_1(struct efx_nic *efx, const u8 *data, u32 mark) 370 { 371 u16 n_counters, i; 372 373 /* Header format: 374 * + | 0 | 1 | 2 | 3 | 375 * 0 |version | reserved | 376 * 4 | seq_index | n_counters | 377 */ 378 379 n_counters = le16_to_cpu(*(const __le16 *)(data + 6)); 380 381 /* Counter update entry format: 382 * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | a | b | c | d | e | f | 383 * | counter_idx | packet_count | byte_count | 384 */ 385 for (i = 0; i < n_counters; i++) { 386 const void *entry = data + 8 + 16 * i; 387 u64 packet_count, byte_count; 388 u32 counter_idx; 389 390 counter_idx = le32_to_cpu(*(const __le32 *)entry); 391 packet_count = le32_to_cpu(*(const __le32 *)(entry + 4)) | 392 ((u64)le16_to_cpu(*(const __le16 *)(entry + 8)) << 32); 393 byte_count = le16_to_cpu(*(const __le16 *)(entry + 10)) | 394 ((u64)le32_to_cpu(*(const __le32 *)(entry + 12)) << 16); 395 efx_tc_counter_update(efx, EFX_TC_COUNTER_TYPE_AR, counter_idx, 396 packet_count, byte_count, mark); 397 } 398 } 399 400 #define TCV2_HDR_PTR(pkt, field) \ 401 ((void)BUILD_BUG_ON_ZERO(ERF_SC_PACKETISER_HEADER_##field##_LBN & 7), \ 402 (pkt) + ERF_SC_PACKETISER_HEADER_##field##_LBN / 8) 403 #define TCV2_HDR_BYTE(pkt, field) \ 404 ((void)BUILD_BUG_ON_ZERO(ERF_SC_PACKETISER_HEADER_##field##_WIDTH != 8),\ 405 *TCV2_HDR_PTR(pkt, field)) 406 #define TCV2_HDR_WORD(pkt, field) \ 407 ((void)BUILD_BUG_ON_ZERO(ERF_SC_PACKETISER_HEADER_##field##_WIDTH != 16),\ 408 (void)BUILD_BUG_ON_ZERO(ERF_SC_PACKETISER_HEADER_##field##_LBN & 15), \ 409 *(__force const __le16 *)TCV2_HDR_PTR(pkt, field)) 410 #define TCV2_PKT_PTR(pkt, poff, i, field) \ 411 ((void)BUILD_BUG_ON_ZERO(ERF_SC_PACKETISER_PAYLOAD_##field##_LBN & 7), \ 412 (pkt) + ERF_SC_PACKETISER_PAYLOAD_##field##_LBN/8 + poff + \ 413 i * ER_RX_SL_PACKETISER_PAYLOAD_WORD_SIZE) 414 415 /* Read a little-endian 48-bit field with 16-bit alignment */ 416 static u64 efx_tc_read48(const __le16 *field) 417 { 418 u64 out = 0; 419 int i; 420 421 for (i = 0; i < 3; i++) 422 out |= (u64)le16_to_cpu(field[i]) << (i * 16); 423 return out; 424 } 425 426 static enum efx_tc_counter_type efx_tc_rx_version_2(struct efx_nic *efx, 427 const u8 *data, u32 mark) 428 { 429 u8 payload_offset, header_offset, ident; 430 enum efx_tc_counter_type type; 431 u16 n_counters, i; 432 433 ident = TCV2_HDR_BYTE(data, IDENTIFIER); 434 switch (ident) { 435 case ERF_SC_PACKETISER_HEADER_IDENTIFIER_AR: 436 type = EFX_TC_COUNTER_TYPE_AR; 437 break; 438 case ERF_SC_PACKETISER_HEADER_IDENTIFIER_CT: 439 type = EFX_TC_COUNTER_TYPE_CT; 440 break; 441 case ERF_SC_PACKETISER_HEADER_IDENTIFIER_OR: 442 type = EFX_TC_COUNTER_TYPE_OR; 443 break; 444 default: 445 if (net_ratelimit()) 446 netif_err(efx, drv, efx->net_dev, 447 "ignored v2 MAE counter packet (bad identifier %u" 448 "), counters may be inaccurate\n", ident); 449 return EFX_TC_COUNTER_TYPE_MAX; 450 } 451 header_offset = TCV2_HDR_BYTE(data, HEADER_OFFSET); 452 /* mae_counter_format.h implies that this offset is fixed, since it 453 * carries on with SOP-based LBNs for the fields in this header 454 */ 455 if (header_offset != ERF_SC_PACKETISER_HEADER_HEADER_OFFSET_DEFAULT) { 456 if (net_ratelimit()) 457 netif_err(efx, drv, efx->net_dev, 458 "choked on v2 MAE counter packet (bad header_offset %u" 459 "), counters may be inaccurate\n", header_offset); 460 return EFX_TC_COUNTER_TYPE_MAX; 461 } 462 payload_offset = TCV2_HDR_BYTE(data, PAYLOAD_OFFSET); 463 n_counters = le16_to_cpu(TCV2_HDR_WORD(data, COUNT)); 464 465 for (i = 0; i < n_counters; i++) { 466 const void *counter_idx_p, *packet_count_p, *byte_count_p; 467 u64 packet_count, byte_count; 468 u32 counter_idx; 469 470 /* 24-bit field with 32-bit alignment */ 471 counter_idx_p = TCV2_PKT_PTR(data, payload_offset, i, COUNTER_INDEX); 472 BUILD_BUG_ON(ERF_SC_PACKETISER_PAYLOAD_COUNTER_INDEX_WIDTH != 24); 473 BUILD_BUG_ON(ERF_SC_PACKETISER_PAYLOAD_COUNTER_INDEX_LBN & 31); 474 counter_idx = le32_to_cpu(*(const __le32 *)counter_idx_p) & 0xffffff; 475 /* 48-bit field with 16-bit alignment */ 476 packet_count_p = TCV2_PKT_PTR(data, payload_offset, i, PACKET_COUNT); 477 BUILD_BUG_ON(ERF_SC_PACKETISER_PAYLOAD_PACKET_COUNT_WIDTH != 48); 478 BUILD_BUG_ON(ERF_SC_PACKETISER_PAYLOAD_PACKET_COUNT_LBN & 15); 479 packet_count = efx_tc_read48((const __le16 *)packet_count_p); 480 /* 48-bit field with 16-bit alignment */ 481 byte_count_p = TCV2_PKT_PTR(data, payload_offset, i, BYTE_COUNT); 482 BUILD_BUG_ON(ERF_SC_PACKETISER_PAYLOAD_BYTE_COUNT_WIDTH != 48); 483 BUILD_BUG_ON(ERF_SC_PACKETISER_PAYLOAD_BYTE_COUNT_LBN & 15); 484 byte_count = efx_tc_read48((const __le16 *)byte_count_p); 485 486 if (type == EFX_TC_COUNTER_TYPE_CT) { 487 /* CT counters are 1-bit saturating counters to update 488 * the lastuse time in CT stats. A received CT counter 489 * should have packet counter to 0 and only LSB bit on 490 * in byte counter. 491 */ 492 if (packet_count || byte_count != 1) 493 netdev_warn_once(efx->net_dev, 494 "CT counter with inconsistent state (%llu, %llu)\n", 495 packet_count, byte_count); 496 /* Do not increment the driver's byte counter */ 497 byte_count = 0; 498 } 499 500 efx_tc_counter_update(efx, type, counter_idx, packet_count, 501 byte_count, mark); 502 } 503 return type; 504 } 505 506 /* We always swallow the packet, whether successful or not, since it's not 507 * a network packet and shouldn't ever be forwarded to the stack. 508 * @mark is the generation count for counter allocations. 509 */ 510 static bool efx_tc_rx(struct efx_rx_queue *rx_queue, u32 mark) 511 { 512 struct efx_channel *channel = efx_rx_queue_channel(rx_queue); 513 struct efx_rx_buffer *rx_buf = efx_rx_buffer(rx_queue, 514 channel->rx_pkt_index); 515 const u8 *data = efx_rx_buf_va(rx_buf); 516 struct efx_nic *efx = rx_queue->efx; 517 enum efx_tc_counter_type type; 518 u8 version; 519 520 /* version is always first byte of packet */ 521 version = *data; 522 switch (version) { 523 case 1: 524 type = EFX_TC_COUNTER_TYPE_AR; 525 efx_tc_rx_version_1(efx, data, mark); 526 break; 527 case ERF_SC_PACKETISER_HEADER_VERSION_VALUE: // 2 528 type = efx_tc_rx_version_2(efx, data, mark); 529 break; 530 default: 531 if (net_ratelimit()) 532 netif_err(efx, drv, efx->net_dev, 533 "choked on MAE counter packet (bad version %u" 534 "); counters may be inaccurate\n", 535 version); 536 goto out; 537 } 538 539 if (type < EFX_TC_COUNTER_TYPE_MAX) { 540 /* Update seen_gen unconditionally, to avoid a missed wakeup if 541 * we race with efx_mae_stop_counters(). 542 */ 543 efx->tc->seen_gen[type] = mark; 544 if (efx->tc->flush_counters && 545 (s32)(efx->tc->flush_gen[type] - mark) <= 0) 546 wake_up(&efx->tc->flush_wq); 547 } 548 out: 549 efx_free_rx_buffers(rx_queue, rx_buf, 1); 550 channel->rx_pkt_n_frags = 0; 551 return true; 552 } 553 554 const struct efx_channel_type efx_tc_channel_type = { 555 .handle_no_channel = efx_tc_handle_no_channel, 556 .pre_probe = efx_tc_probe_channel, 557 .start = efx_tc_start_channel, 558 .stop = efx_tc_stop_channel, 559 .post_remove = efx_tc_remove_channel, 560 .get_name = efx_tc_get_channel_name, 561 .receive_raw = efx_tc_rx, 562 .keep_eventq = true, 563 }; 564