1 /****************************************************************************** 2 * xen_netif.h 3 * 4 * Unified network-device I/O interface for Xen guest OSes. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to 8 * deal in the Software without restriction, including without limitation the 9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 * sell copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22 * DEALINGS IN THE SOFTWARE. 23 * 24 * Copyright (c) 2003-2004, Keir Fraser 25 */ 26 27 #ifndef __XEN_PUBLIC_IO_XEN_NETIF_H__ 28 #define __XEN_PUBLIC_IO_XEN_NETIF_H__ 29 30 #include "ring.h" 31 #include "../grant_table.h" 32 33 /* 34 * Older implementation of Xen network frontend / backend has an 35 * implicit dependency on the MAX_SKB_FRAGS as the maximum number of 36 * ring slots a skb can use. Netfront / netback may not work as 37 * expected when frontend and backend have different MAX_SKB_FRAGS. 38 * 39 * A better approach is to add mechanism for netfront / netback to 40 * negotiate this value. However we cannot fix all possible 41 * frontends, so we need to define a value which states the minimum 42 * slots backend must support. 43 * 44 * The minimum value derives from older Linux kernel's MAX_SKB_FRAGS 45 * (18), which is proved to work with most frontends. Any new backend 46 * which doesn't negotiate with frontend should expect frontend to 47 * send a valid packet using slots up to this value. 48 */ 49 #define XEN_NETIF_NR_SLOTS_MIN 18 50 51 /* 52 * Notifications after enqueuing any type of message should be conditional on 53 * the appropriate req_event or rsp_event field in the shared ring. 54 * If the client sends notification for rx requests then it should specify 55 * feature 'feature-rx-notify' via xenbus. Otherwise the backend will assume 56 * that it cannot safely queue packets (as it may not be kicked to send them). 57 */ 58 59 /* 60 * "feature-split-event-channels" is introduced to separate guest TX 61 * and RX notification. Backend either doesn't support this feature or 62 * advertises it via xenstore as 0 (disabled) or 1 (enabled). 63 * 64 * To make use of this feature, frontend should allocate two event 65 * channels for TX and RX, advertise them to backend as 66 * "event-channel-tx" and "event-channel-rx" respectively. If frontend 67 * doesn't want to use this feature, it just writes "event-channel" 68 * node as before. 69 */ 70 71 /* 72 * Multiple transmit and receive queues: 73 * If supported, the backend will write the key "multi-queue-max-queues" to 74 * the directory for that vif, and set its value to the maximum supported 75 * number of queues. 76 * Frontends that are aware of this feature and wish to use it can write the 77 * key "multi-queue-num-queues", set to the number they wish to use, which 78 * must be greater than zero, and no more than the value reported by the backend 79 * in "multi-queue-max-queues". 80 * 81 * Queues replicate the shared rings and event channels. 82 * "feature-split-event-channels" may optionally be used when using 83 * multiple queues, but is not mandatory. 84 * 85 * Each queue consists of one shared ring pair, i.e. there must be the same 86 * number of tx and rx rings. 87 * 88 * For frontends requesting just one queue, the usual event-channel and 89 * ring-ref keys are written as before, simplifying the backend processing 90 * to avoid distinguishing between a frontend that doesn't understand the 91 * multi-queue feature, and one that does, but requested only one queue. 92 * 93 * Frontends requesting two or more queues must not write the toplevel 94 * event-channel (or event-channel-{tx,rx}) and {tx,rx}-ring-ref keys, 95 * instead writing those keys under sub-keys having the name "queue-N" where 96 * N is the integer ID of the queue for which those keys belong. Queues 97 * are indexed from zero. For example, a frontend with two queues and split 98 * event channels must write the following set of queue-related keys: 99 * 100 * /local/domain/1/device/vif/0/multi-queue-num-queues = "2" 101 * /local/domain/1/device/vif/0/queue-0 = "" 102 * /local/domain/1/device/vif/0/queue-0/tx-ring-ref = "<ring-ref-tx0>" 103 * /local/domain/1/device/vif/0/queue-0/rx-ring-ref = "<ring-ref-rx0>" 104 * /local/domain/1/device/vif/0/queue-0/event-channel-tx = "<evtchn-tx0>" 105 * /local/domain/1/device/vif/0/queue-0/event-channel-rx = "<evtchn-rx0>" 106 * /local/domain/1/device/vif/0/queue-1 = "" 107 * /local/domain/1/device/vif/0/queue-1/tx-ring-ref = "<ring-ref-tx1>" 108 * /local/domain/1/device/vif/0/queue-1/rx-ring-ref = "<ring-ref-rx1" 109 * /local/domain/1/device/vif/0/queue-1/event-channel-tx = "<evtchn-tx1>" 110 * /local/domain/1/device/vif/0/queue-1/event-channel-rx = "<evtchn-rx1>" 111 * 112 * If there is any inconsistency in the XenStore data, the backend may 113 * choose not to connect any queues, instead treating the request as an 114 * error. This includes scenarios where more (or fewer) queues were 115 * requested than the frontend provided details for. 116 * 117 * Mapping of packets to queues is considered to be a function of the 118 * transmitting system (backend or frontend) and is not negotiated 119 * between the two. Guests are free to transmit packets on any queue 120 * they choose, provided it has been set up correctly. Guests must be 121 * prepared to receive packets on any queue they have requested be set up. 122 */ 123 124 /* 125 * "feature-no-csum-offload" should be used to turn IPv4 TCP/UDP checksum 126 * offload off or on. If it is missing then the feature is assumed to be on. 127 * "feature-ipv6-csum-offload" should be used to turn IPv6 TCP/UDP checksum 128 * offload on or off. If it is missing then the feature is assumed to be off. 129 */ 130 131 /* 132 * "feature-gso-tcpv4" and "feature-gso-tcpv6" advertise the capability to 133 * handle large TCP packets (in IPv4 or IPv6 form respectively). Neither 134 * frontends nor backends are assumed to be capable unless the flags are 135 * present. 136 */ 137 138 /* 139 * "feature-multicast-control" and "feature-dynamic-multicast-control" 140 * advertise the capability to filter ethernet multicast packets in the 141 * backend. If the frontend wishes to take advantage of this feature then 142 * it may set "request-multicast-control". If the backend only advertises 143 * "feature-multicast-control" then "request-multicast-control" must be set 144 * before the frontend moves into the connected state. The backend will 145 * sample the value on this state transition and any subsequent change in 146 * value will have no effect. However, if the backend also advertises 147 * "feature-dynamic-multicast-control" then "request-multicast-control" 148 * may be set by the frontend at any time. In this case, the backend will 149 * watch the value and re-sample on watch events. 150 * 151 * If the sampled value of "request-multicast-control" is set then the 152 * backend transmit side should no longer flood multicast packets to the 153 * frontend, it should instead drop any multicast packet that does not 154 * match in a filter list. 155 * The list is amended by the frontend by sending dummy transmit requests 156 * containing XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL} extra-info fragments as 157 * specified below. 158 * Note that the filter list may be amended even if the sampled value of 159 * "request-multicast-control" is not set, however the filter should only 160 * be applied if it is set. 161 */ 162 163 /* 164 * "xdp-headroom" is used to request that extra space is added 165 * for XDP processing. The value is measured in bytes and passed by 166 * the frontend to be consistent between both ends. 167 * If the value is greater than zero that means that 168 * an RX response is going to be passed to an XDP program for processing. 169 * XEN_NETIF_MAX_XDP_HEADROOM defines the maximum headroom offset in bytes 170 * 171 * "feature-xdp-headroom" is set to "1" by the netback side like other features 172 * so a guest can check if an XDP program can be processed. 173 */ 174 #define XEN_NETIF_MAX_XDP_HEADROOM 0x7FFF 175 176 /* 177 * Control ring 178 * ============ 179 * 180 * Some features, such as hashing (detailed below), require a 181 * significant amount of out-of-band data to be passed from frontend to 182 * backend. Use of xenstore is not suitable for large quantities of data 183 * because of quota limitations and so a dedicated 'control ring' is used. 184 * The ability of the backend to use a control ring is advertised by 185 * setting: 186 * 187 * /local/domain/X/backend/<domid>/<vif>/feature-ctrl-ring = "1" 188 * 189 * The frontend provides a control ring to the backend by setting: 190 * 191 * /local/domain/<domid>/device/vif/<vif>/ctrl-ring-ref = <gref> 192 * /local/domain/<domid>/device/vif/<vif>/event-channel-ctrl = <port> 193 * 194 * where <gref> is the grant reference of the shared page used to 195 * implement the control ring and <port> is an event channel to be used 196 * as a mailbox interrupt. These keys must be set before the frontend 197 * moves into the connected state. 198 * 199 * The control ring uses a fixed request/response message size and is 200 * balanced (i.e. one request to one response), so operationally it is much 201 * the same as a transmit or receive ring. 202 * Note that there is no requirement that responses are issued in the same 203 * order as requests. 204 */ 205 206 /* 207 * Hash types 208 * ========== 209 * 210 * For the purposes of the definitions below, 'Packet[]' is an array of 211 * octets containing an IP packet without options, 'Array[X..Y]' means a 212 * sub-array of 'Array' containing bytes X thru Y inclusive, and '+' is 213 * used to indicate concatenation of arrays. 214 */ 215 216 /* 217 * A hash calculated over an IP version 4 header as follows: 218 * 219 * Buffer[0..8] = Packet[12..15] (source address) + 220 * Packet[16..19] (destination address) 221 * 222 * Result = Hash(Buffer, 8) 223 */ 224 #define _XEN_NETIF_CTRL_HASH_TYPE_IPV4 0 225 #define XEN_NETIF_CTRL_HASH_TYPE_IPV4 \ 226 (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV4) 227 228 /* 229 * A hash calculated over an IP version 4 header and TCP header as 230 * follows: 231 * 232 * Buffer[0..12] = Packet[12..15] (source address) + 233 * Packet[16..19] (destination address) + 234 * Packet[20..21] (source port) + 235 * Packet[22..23] (destination port) 236 * 237 * Result = Hash(Buffer, 12) 238 */ 239 #define _XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP 1 240 #define XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP \ 241 (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP) 242 243 /* 244 * A hash calculated over an IP version 6 header as follows: 245 * 246 * Buffer[0..32] = Packet[8..23] (source address ) + 247 * Packet[24..39] (destination address) 248 * 249 * Result = Hash(Buffer, 32) 250 */ 251 #define _XEN_NETIF_CTRL_HASH_TYPE_IPV6 2 252 #define XEN_NETIF_CTRL_HASH_TYPE_IPV6 \ 253 (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV6) 254 255 /* 256 * A hash calculated over an IP version 6 header and TCP header as 257 * follows: 258 * 259 * Buffer[0..36] = Packet[8..23] (source address) + 260 * Packet[24..39] (destination address) + 261 * Packet[40..41] (source port) + 262 * Packet[42..43] (destination port) 263 * 264 * Result = Hash(Buffer, 36) 265 */ 266 #define _XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP 3 267 #define XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP \ 268 (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP) 269 270 /* 271 * Hash algorithms 272 * =============== 273 */ 274 275 #define XEN_NETIF_CTRL_HASH_ALGORITHM_NONE 0 276 277 /* 278 * Toeplitz hash: 279 */ 280 281 #define XEN_NETIF_CTRL_HASH_ALGORITHM_TOEPLITZ 1 282 283 /* 284 * This algorithm uses a 'key' as well as the data buffer itself. 285 * (Buffer[] and Key[] are treated as shift-registers where the MSB of 286 * Buffer/Key[0] is considered 'left-most' and the LSB of Buffer/Key[N-1] 287 * is the 'right-most'). 288 * 289 * Value = 0 290 * For number of bits in Buffer[] 291 * If (left-most bit of Buffer[] is 1) 292 * Value ^= left-most 32 bits of Key[] 293 * Key[] << 1 294 * Buffer[] << 1 295 * 296 * The code below is provided for convenience where an operating system 297 * does not already provide an implementation. 298 */ 299 #ifdef XEN_NETIF_DEFINE_TOEPLITZ 300 static uint32_t xen_netif_toeplitz_hash(const uint8_t *key, 301 unsigned int keylen, 302 const uint8_t *buf, unsigned int buflen) 303 { 304 unsigned int keyi, bufi; 305 uint64_t prefix = 0; 306 uint64_t hash = 0; 307 308 /* Pre-load prefix with the first 8 bytes of the key */ 309 for (keyi = 0; keyi < 8; keyi++) { 310 prefix <<= 8; 311 prefix |= (keyi < keylen) ? key[keyi] : 0; 312 } 313 314 for (bufi = 0; bufi < buflen; bufi++) { 315 uint8_t byte = buf[bufi]; 316 unsigned int bit; 317 318 for (bit = 0; bit < 8; bit++) { 319 if (byte & 0x80) 320 hash ^= prefix; 321 prefix <<= 1; 322 byte <<= 1; 323 } 324 325 /* 326 * 'prefix' has now been left-shifted by 8, so 327 * OR in the next byte. 328 */ 329 prefix |= (keyi < keylen) ? key[keyi] : 0; 330 keyi++; 331 } 332 333 /* The valid part of the hash is in the upper 32 bits. */ 334 return hash >> 32; 335 } 336 #endif /* XEN_NETIF_DEFINE_TOEPLITZ */ 337 338 /* 339 * Control requests (struct xen_netif_ctrl_request) 340 * ================================================ 341 * 342 * All requests have the following format: 343 * 344 * 0 1 2 3 4 5 6 7 octet 345 * +-----+-----+-----+-----+-----+-----+-----+-----+ 346 * | id | type | data[0] | 347 * +-----+-----+-----+-----+-----+-----+-----+-----+ 348 * | data[1] | data[2] | 349 * +-----+-----+-----+-----+-----------------------+ 350 * 351 * id: the request identifier, echoed in response. 352 * type: the type of request (see below) 353 * data[]: any data associated with the request (determined by type) 354 */ 355 356 struct xen_netif_ctrl_request { 357 uint16_t id; 358 uint16_t type; 359 360 #define XEN_NETIF_CTRL_TYPE_INVALID 0 361 #define XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS 1 362 #define XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS 2 363 #define XEN_NETIF_CTRL_TYPE_SET_HASH_KEY 3 364 #define XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE 4 365 #define XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE 5 366 #define XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING 6 367 #define XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM 7 368 369 uint32_t data[3]; 370 }; 371 372 /* 373 * Control responses (struct xen_netif_ctrl_response) 374 * ================================================== 375 * 376 * All responses have the following format: 377 * 378 * 0 1 2 3 4 5 6 7 octet 379 * +-----+-----+-----+-----+-----+-----+-----+-----+ 380 * | id | type | status | 381 * +-----+-----+-----+-----+-----+-----+-----+-----+ 382 * | data | 383 * +-----+-----+-----+-----+ 384 * 385 * id: the corresponding request identifier 386 * type: the type of the corresponding request 387 * status: the status of request processing 388 * data: any data associated with the response (determined by type and 389 * status) 390 */ 391 392 struct xen_netif_ctrl_response { 393 uint16_t id; 394 uint16_t type; 395 uint32_t status; 396 397 #define XEN_NETIF_CTRL_STATUS_SUCCESS 0 398 #define XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED 1 399 #define XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER 2 400 #define XEN_NETIF_CTRL_STATUS_BUFFER_OVERFLOW 3 401 402 uint32_t data; 403 }; 404 405 /* 406 * Control messages 407 * ================ 408 * 409 * XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM 410 * -------------------------------------- 411 * 412 * This is sent by the frontend to set the desired hash algorithm. 413 * 414 * Request: 415 * 416 * type = XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM 417 * data[0] = a XEN_NETIF_CTRL_HASH_ALGORITHM_* value 418 * data[1] = 0 419 * data[2] = 0 420 * 421 * Response: 422 * 423 * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not 424 * supported 425 * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - The algorithm is not 426 * supported 427 * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful 428 * 429 * NOTE: Setting data[0] to XEN_NETIF_CTRL_HASH_ALGORITHM_NONE disables 430 * hashing and the backend is free to choose how it steers packets 431 * to queues (which is the default behaviour). 432 * 433 * XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS 434 * ---------------------------------- 435 * 436 * This is sent by the frontend to query the types of hash supported by 437 * the backend. 438 * 439 * Request: 440 * 441 * type = XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS 442 * data[0] = 0 443 * data[1] = 0 444 * data[2] = 0 445 * 446 * Response: 447 * 448 * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not supported 449 * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful 450 * data = supported hash types (if operation was successful) 451 * 452 * NOTE: A valid hash algorithm must be selected before this operation can 453 * succeed. 454 * 455 * XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS 456 * ---------------------------------- 457 * 458 * This is sent by the frontend to set the types of hash that the backend 459 * should calculate. (See above for hash type definitions). 460 * Note that the 'maximal' type of hash should always be chosen. For 461 * example, if the frontend sets both IPV4 and IPV4_TCP hash types then 462 * the latter hash type should be calculated for any TCP packet and the 463 * former only calculated for non-TCP packets. 464 * 465 * Request: 466 * 467 * type = XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS 468 * data[0] = bitwise OR of XEN_NETIF_CTRL_HASH_TYPE_* values 469 * data[1] = 0 470 * data[2] = 0 471 * 472 * Response: 473 * 474 * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not 475 * supported 476 * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - One or more flag 477 * value is invalid or 478 * unsupported 479 * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful 480 * data = 0 481 * 482 * NOTE: A valid hash algorithm must be selected before this operation can 483 * succeed. 484 * Also, setting data[0] to zero disables hashing and the backend 485 * is free to choose how it steers packets to queues. 486 * 487 * XEN_NETIF_CTRL_TYPE_SET_HASH_KEY 488 * -------------------------------- 489 * 490 * This is sent by the frontend to set the key of the hash if the algorithm 491 * requires it. (See hash algorithms above). 492 * 493 * Request: 494 * 495 * type = XEN_NETIF_CTRL_TYPE_SET_HASH_KEY 496 * data[0] = grant reference of page containing the key (assumed to 497 * start at beginning of grant) 498 * data[1] = size of key in octets 499 * data[2] = 0 500 * 501 * Response: 502 * 503 * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not 504 * supported 505 * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Key size is invalid 506 * XEN_NETIF_CTRL_STATUS_BUFFER_OVERFLOW - Key size is larger 507 * than the backend 508 * supports 509 * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful 510 * data = 0 511 * 512 * NOTE: Any key octets not specified are assumed to be zero (the key 513 * is assumed to be empty by default) and specifying a new key 514 * invalidates any previous key, hence specifying a key size of 515 * zero will clear the key (which ensures that the calculated hash 516 * will always be zero). 517 * The maximum size of key is algorithm and backend specific, but 518 * is also limited by the single grant reference. 519 * The grant reference may be read-only and must remain valid until 520 * the response has been processed. 521 * 522 * XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE 523 * ----------------------------------------- 524 * 525 * This is sent by the frontend to query the maximum size of mapping 526 * table supported by the backend. The size is specified in terms of 527 * table entries. 528 * 529 * Request: 530 * 531 * type = XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE 532 * data[0] = 0 533 * data[1] = 0 534 * data[2] = 0 535 * 536 * Response: 537 * 538 * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not supported 539 * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful 540 * data = maximum number of entries allowed in the mapping table 541 * (if operation was successful) or zero if a mapping table is 542 * not supported (i.e. hash mapping is done only by modular 543 * arithmetic). 544 * 545 * XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE 546 * ------------------------------------- 547 * 548 * This is sent by the frontend to set the actual size of the mapping 549 * table to be used by the backend. The size is specified in terms of 550 * table entries. 551 * Any previous table is invalidated by this message and any new table 552 * is assumed to be zero filled. 553 * 554 * Request: 555 * 556 * type = XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE 557 * data[0] = number of entries in mapping table 558 * data[1] = 0 559 * data[2] = 0 560 * 561 * Response: 562 * 563 * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not 564 * supported 565 * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Table size is invalid 566 * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful 567 * data = 0 568 * 569 * NOTE: Setting data[0] to 0 means that hash mapping should be done 570 * using modular arithmetic. 571 * 572 * XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING 573 * ------------------------------------ 574 * 575 * This is sent by the frontend to set the content of the table mapping 576 * hash value to queue number. The backend should calculate the hash from 577 * the packet header, use it as an index into the table (modulo the size 578 * of the table) and then steer the packet to the queue number found at 579 * that index. 580 * 581 * Request: 582 * 583 * type = XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING 584 * data[0] = grant reference of page containing the mapping (sub-)table 585 * (assumed to start at beginning of grant) 586 * data[1] = size of (sub-)table in entries 587 * data[2] = offset, in entries, of sub-table within overall table 588 * 589 * Response: 590 * 591 * status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not 592 * supported 593 * XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Table size or content 594 * is invalid 595 * XEN_NETIF_CTRL_STATUS_BUFFER_OVERFLOW - Table size is larger 596 * than the backend 597 * supports 598 * XEN_NETIF_CTRL_STATUS_SUCCESS - Operation successful 599 * data = 0 600 * 601 * NOTE: The overall table has the following format: 602 * 603 * 0 1 2 3 4 5 6 7 octet 604 * +-----+-----+-----+-----+-----+-----+-----+-----+ 605 * | mapping[0] | mapping[1] | 606 * +-----+-----+-----+-----+-----+-----+-----+-----+ 607 * | . | 608 * | . | 609 * | . | 610 * +-----+-----+-----+-----+-----+-----+-----+-----+ 611 * | mapping[N-2] | mapping[N-1] | 612 * +-----+-----+-----+-----+-----+-----+-----+-----+ 613 * 614 * where N is specified by a XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE 615 * message and each mapping must specifies a queue between 0 and 616 * "multi-queue-num-queues" (see above). 617 * The backend may support a mapping table larger than can be 618 * mapped by a single grant reference. Thus sub-tables within a 619 * larger table can be individually set by sending multiple messages 620 * with differing offset values. Specifying a new sub-table does not 621 * invalidate any table data outside that range. 622 * The grant reference may be read-only and must remain valid until 623 * the response has been processed. 624 */ 625 626 DEFINE_RING_TYPES(xen_netif_ctrl, 627 struct xen_netif_ctrl_request, 628 struct xen_netif_ctrl_response); 629 630 /* 631 * Guest transmit 632 * ============== 633 * 634 * This is the 'wire' format for transmit (frontend -> backend) packets: 635 * 636 * Fragment 1: xen_netif_tx_request_t - flags = XEN_NETTXF_* 637 * size = total packet size 638 * [Extra 1: xen_netif_extra_info_t] - (only if fragment 1 flags include 639 * XEN_NETTXF_extra_info) 640 * ... 641 * [Extra N: xen_netif_extra_info_t] - (only if extra N-1 flags include 642 * XEN_NETIF_EXTRA_MORE) 643 * ... 644 * Fragment N: xen_netif_tx_request_t - (only if fragment N-1 flags include 645 * XEN_NETTXF_more_data - flags on preceding 646 * extras are not relevant here) 647 * flags = 0 648 * size = fragment size 649 * 650 * NOTE: 651 * 652 * This format slightly is different from that used for receive 653 * (backend -> frontend) packets. Specifically, in a multi-fragment 654 * packet the actual size of fragment 1 can only be determined by 655 * subtracting the sizes of fragments 2..N from the total packet size. 656 * 657 * Ring slot size is 12 octets, however not all request/response 658 * structs use the full size. 659 * 660 * tx request data (xen_netif_tx_request_t) 661 * ------------------------------------ 662 * 663 * 0 1 2 3 4 5 6 7 octet 664 * +-----+-----+-----+-----+-----+-----+-----+-----+ 665 * | grant ref | offset | flags | 666 * +-----+-----+-----+-----+-----+-----+-----+-----+ 667 * | id | size | 668 * +-----+-----+-----+-----+ 669 * 670 * grant ref: Reference to buffer page. 671 * offset: Offset within buffer page. 672 * flags: XEN_NETTXF_*. 673 * id: request identifier, echoed in response. 674 * size: packet size in bytes. 675 * 676 * tx response (xen_netif_tx_response_t) 677 * --------------------------------- 678 * 679 * 0 1 2 3 4 5 6 7 octet 680 * +-----+-----+-----+-----+-----+-----+-----+-----+ 681 * | id | status | unused | 682 * +-----+-----+-----+-----+-----+-----+-----+-----+ 683 * | unused | 684 * +-----+-----+-----+-----+ 685 * 686 * id: reflects id in transmit request 687 * status: XEN_NETIF_RSP_* 688 * 689 * Guest receive 690 * ============= 691 * 692 * This is the 'wire' format for receive (backend -> frontend) packets: 693 * 694 * Fragment 1: xen_netif_rx_request_t - flags = XEN_NETRXF_* 695 * size = fragment size 696 * [Extra 1: xen_netif_extra_info_t] - (only if fragment 1 flags include 697 * XEN_NETRXF_extra_info) 698 * ... 699 * [Extra N: xen_netif_extra_info_t] - (only if extra N-1 flags include 700 * XEN_NETIF_EXTRA_MORE) 701 * ... 702 * Fragment N: xen_netif_rx_request_t - (only if fragment N-1 flags include 703 * XEN_NETRXF_more_data - flags on preceding 704 * extras are not relevant here) 705 * flags = 0 706 * size = fragment size 707 * 708 * NOTE: 709 * 710 * This format slightly is different from that used for transmit 711 * (frontend -> backend) packets. Specifically, in a multi-fragment 712 * packet the size of the packet can only be determined by summing the 713 * sizes of fragments 1..N. 714 * 715 * Ring slot size is 8 octets. 716 * 717 * rx request (xen_netif_rx_request_t) 718 * ------------------------------- 719 * 720 * 0 1 2 3 4 5 6 7 octet 721 * +-----+-----+-----+-----+-----+-----+-----+-----+ 722 * | id | pad | gref | 723 * +-----+-----+-----+-----+-----+-----+-----+-----+ 724 * 725 * id: request identifier, echoed in response. 726 * gref: reference to incoming granted frame. 727 * 728 * rx response (xen_netif_rx_response_t) 729 * --------------------------------- 730 * 731 * 0 1 2 3 4 5 6 7 octet 732 * +-----+-----+-----+-----+-----+-----+-----+-----+ 733 * | id | offset | flags | status | 734 * +-----+-----+-----+-----+-----+-----+-----+-----+ 735 * 736 * id: reflects id in receive request 737 * offset: offset in page of start of received packet 738 * flags: XEN_NETRXF_* 739 * status: -ve: XEN_NETIF_RSP_*; +ve: Rx'ed pkt size. 740 * 741 * NOTE: Historically, to support GSO on the frontend receive side, Linux 742 * netfront does not make use of the rx response id (because, as 743 * described below, extra info structures overlay the id field). 744 * Instead it assumes that responses always appear in the same ring 745 * slot as their corresponding request. Thus, to maintain 746 * compatibility, backends must make sure this is the case. 747 * 748 * Extra Info 749 * ========== 750 * 751 * Can be present if initial request or response has NET{T,R}XF_extra_info, 752 * or previous extra request has XEN_NETIF_EXTRA_MORE. 753 * 754 * The struct therefore needs to fit into either a tx or rx slot and 755 * is therefore limited to 8 octets. 756 * 757 * NOTE: Because extra info data overlays the usual request/response 758 * structures, there is no id information in the opposite direction. 759 * So, if an extra info overlays an rx response the frontend can 760 * assume that it is in the same ring slot as the request that was 761 * consumed to make the slot available, and the backend must ensure 762 * this assumption is true. 763 * 764 * extra info (xen_netif_extra_info_t) 765 * ------------------------------- 766 * 767 * General format: 768 * 769 * 0 1 2 3 4 5 6 7 octet 770 * +-----+-----+-----+-----+-----+-----+-----+-----+ 771 * |type |flags| type specific data | 772 * +-----+-----+-----+-----+-----+-----+-----+-----+ 773 * | padding for tx | 774 * +-----+-----+-----+-----+ 775 * 776 * type: XEN_NETIF_EXTRA_TYPE_* 777 * flags: XEN_NETIF_EXTRA_FLAG_* 778 * padding for tx: present only in the tx case due to 8 octet limit 779 * from rx case. Not shown in type specific entries 780 * below. 781 * 782 * XEN_NETIF_EXTRA_TYPE_GSO: 783 * 784 * 0 1 2 3 4 5 6 7 octet 785 * +-----+-----+-----+-----+-----+-----+-----+-----+ 786 * |type |flags| size |type | pad | features | 787 * +-----+-----+-----+-----+-----+-----+-----+-----+ 788 * 789 * type: Must be XEN_NETIF_EXTRA_TYPE_GSO 790 * flags: XEN_NETIF_EXTRA_FLAG_* 791 * size: Maximum payload size of each segment. For example, 792 * for TCP this is just the path MSS. 793 * type: XEN_NETIF_GSO_TYPE_*: This determines the protocol of 794 * the packet and any extra features required to segment the 795 * packet properly. 796 * features: EN_XEN_NETIF_GSO_FEAT_*: This specifies any extra GSO 797 * features required to process this packet, such as ECN 798 * support for TCPv4. 799 * 800 * XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL}: 801 * 802 * 0 1 2 3 4 5 6 7 octet 803 * +-----+-----+-----+-----+-----+-----+-----+-----+ 804 * |type |flags| addr | 805 * +-----+-----+-----+-----+-----+-----+-----+-----+ 806 * 807 * type: Must be XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL} 808 * flags: XEN_NETIF_EXTRA_FLAG_* 809 * addr: address to add/remove 810 * 811 * XEN_NETIF_EXTRA_TYPE_HASH: 812 * 813 * A backend that supports teoplitz hashing is assumed to accept 814 * this type of extra info in transmit packets. 815 * A frontend that enables hashing is assumed to accept 816 * this type of extra info in receive packets. 817 * 818 * 0 1 2 3 4 5 6 7 octet 819 * +-----+-----+-----+-----+-----+-----+-----+-----+ 820 * |type |flags|htype| alg |LSB ---- value ---- MSB| 821 * +-----+-----+-----+-----+-----+-----+-----+-----+ 822 * 823 * type: Must be XEN_NETIF_EXTRA_TYPE_HASH 824 * flags: XEN_NETIF_EXTRA_FLAG_* 825 * htype: Hash type (one of _XEN_NETIF_CTRL_HASH_TYPE_* - see above) 826 * alg: The algorithm used to calculate the hash (one of 827 * XEN_NETIF_CTRL_HASH_TYPE_ALGORITHM_* - see above) 828 * value: Hash value 829 */ 830 831 /* Protocol checksum field is blank in the packet (hardware offload)? */ 832 #define _XEN_NETTXF_csum_blank (0) 833 #define XEN_NETTXF_csum_blank (1U<<_XEN_NETTXF_csum_blank) 834 835 /* Packet data has been validated against protocol checksum. */ 836 #define _XEN_NETTXF_data_validated (1) 837 #define XEN_NETTXF_data_validated (1U<<_XEN_NETTXF_data_validated) 838 839 /* Packet continues in the next request descriptor. */ 840 #define _XEN_NETTXF_more_data (2) 841 #define XEN_NETTXF_more_data (1U<<_XEN_NETTXF_more_data) 842 843 /* Packet to be followed by extra descriptor(s). */ 844 #define _XEN_NETTXF_extra_info (3) 845 #define XEN_NETTXF_extra_info (1U<<_XEN_NETTXF_extra_info) 846 847 #define XEN_NETIF_MAX_TX_SIZE 0xFFFF 848 struct xen_netif_tx_request { 849 grant_ref_t gref; 850 uint16_t offset; 851 uint16_t flags; 852 uint16_t id; 853 uint16_t size; 854 }; 855 856 /* Types of xen_netif_extra_info descriptors. */ 857 #define XEN_NETIF_EXTRA_TYPE_NONE (0) /* Never used - invalid */ 858 #define XEN_NETIF_EXTRA_TYPE_GSO (1) /* u.gso */ 859 #define XEN_NETIF_EXTRA_TYPE_MCAST_ADD (2) /* u.mcast */ 860 #define XEN_NETIF_EXTRA_TYPE_MCAST_DEL (3) /* u.mcast */ 861 #define XEN_NETIF_EXTRA_TYPE_HASH (4) /* u.hash */ 862 #define XEN_NETIF_EXTRA_TYPE_XDP (5) /* u.xdp */ 863 #define XEN_NETIF_EXTRA_TYPE_MAX (6) 864 865 /* xen_netif_extra_info_t flags. */ 866 #define _XEN_NETIF_EXTRA_FLAG_MORE (0) 867 #define XEN_NETIF_EXTRA_FLAG_MORE (1U<<_XEN_NETIF_EXTRA_FLAG_MORE) 868 869 /* GSO types */ 870 #define XEN_NETIF_GSO_TYPE_NONE (0) 871 #define XEN_NETIF_GSO_TYPE_TCPV4 (1) 872 #define XEN_NETIF_GSO_TYPE_TCPV6 (2) 873 874 /* 875 * This structure needs to fit within both xen_netif_tx_request_t and 876 * xen_netif_rx_response_t for compatibility. 877 */ 878 struct xen_netif_extra_info { 879 uint8_t type; 880 uint8_t flags; 881 union { 882 struct { 883 uint16_t size; 884 uint8_t type; 885 uint8_t pad; 886 uint16_t features; 887 } gso; 888 struct { 889 uint8_t addr[6]; 890 } mcast; 891 struct { 892 uint8_t type; 893 uint8_t algorithm; 894 uint8_t value[4]; 895 } hash; 896 struct { 897 uint16_t headroom; 898 uint16_t pad[2]; 899 } xdp; 900 uint16_t pad[3]; 901 } u; 902 }; 903 904 struct xen_netif_tx_response { 905 uint16_t id; 906 int16_t status; 907 }; 908 909 struct xen_netif_rx_request { 910 uint16_t id; /* Echoed in response message. */ 911 uint16_t pad; 912 grant_ref_t gref; 913 }; 914 915 /* Packet data has been validated against protocol checksum. */ 916 #define _XEN_NETRXF_data_validated (0) 917 #define XEN_NETRXF_data_validated (1U<<_XEN_NETRXF_data_validated) 918 919 /* Protocol checksum field is blank in the packet (hardware offload)? */ 920 #define _XEN_NETRXF_csum_blank (1) 921 #define XEN_NETRXF_csum_blank (1U<<_XEN_NETRXF_csum_blank) 922 923 /* Packet continues in the next request descriptor. */ 924 #define _XEN_NETRXF_more_data (2) 925 #define XEN_NETRXF_more_data (1U<<_XEN_NETRXF_more_data) 926 927 /* Packet to be followed by extra descriptor(s). */ 928 #define _XEN_NETRXF_extra_info (3) 929 #define XEN_NETRXF_extra_info (1U<<_XEN_NETRXF_extra_info) 930 931 /* Packet has GSO prefix. Deprecated but included for compatibility */ 932 #define _XEN_NETRXF_gso_prefix (4) 933 #define XEN_NETRXF_gso_prefix (1U<<_XEN_NETRXF_gso_prefix) 934 935 struct xen_netif_rx_response { 936 uint16_t id; 937 uint16_t offset; 938 uint16_t flags; 939 int16_t status; 940 }; 941 942 /* 943 * Generate xen_netif ring structures and types. 944 */ 945 946 DEFINE_RING_TYPES(xen_netif_tx, struct xen_netif_tx_request, 947 struct xen_netif_tx_response); 948 DEFINE_RING_TYPES(xen_netif_rx, struct xen_netif_rx_request, 949 struct xen_netif_rx_response); 950 951 #define XEN_NETIF_RSP_DROPPED -2 952 #define XEN_NETIF_RSP_ERROR -1 953 #define XEN_NETIF_RSP_OKAY 0 954 /* No response: used for auxiliary requests (e.g., xen_netif_extra_info_t). */ 955 #define XEN_NETIF_RSP_NULL 1 956 957 #endif 958