1*3a9fd824SRoger Pau Monné /****************************************************************************** 2*3a9fd824SRoger Pau Monné * blkif.h 3*3a9fd824SRoger Pau Monné * 4*3a9fd824SRoger Pau Monné * Unified block-device I/O interface for Xen guest OSes. 5*3a9fd824SRoger Pau Monné * 6*3a9fd824SRoger Pau Monné * Permission is hereby granted, free of charge, to any person obtaining a copy 7*3a9fd824SRoger Pau Monné * of this software and associated documentation files (the "Software"), to 8*3a9fd824SRoger Pau Monné * deal in the Software without restriction, including without limitation the 9*3a9fd824SRoger Pau Monné * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10*3a9fd824SRoger Pau Monné * sell copies of the Software, and to permit persons to whom the Software is 11*3a9fd824SRoger Pau Monné * furnished to do so, subject to the following conditions: 12*3a9fd824SRoger Pau Monné * 13*3a9fd824SRoger Pau Monné * The above copyright notice and this permission notice shall be included in 14*3a9fd824SRoger Pau Monné * all copies or substantial portions of the Software. 15*3a9fd824SRoger Pau Monné * 16*3a9fd824SRoger Pau Monné * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17*3a9fd824SRoger Pau Monné * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18*3a9fd824SRoger Pau Monné * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19*3a9fd824SRoger Pau Monné * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20*3a9fd824SRoger Pau Monné * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21*3a9fd824SRoger Pau Monné * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 22*3a9fd824SRoger Pau Monné * DEALINGS IN THE SOFTWARE. 23*3a9fd824SRoger Pau Monné * 24*3a9fd824SRoger Pau Monné * Copyright (c) 2003-2004, Keir Fraser 25*3a9fd824SRoger Pau Monné * Copyright (c) 2012, Spectra Logic Corporation 26*3a9fd824SRoger Pau Monné */ 27*3a9fd824SRoger Pau Monné 28*3a9fd824SRoger Pau Monné #ifndef __XEN_PUBLIC_IO_BLKIF_H__ 29*3a9fd824SRoger Pau Monné #define __XEN_PUBLIC_IO_BLKIF_H__ 30*3a9fd824SRoger Pau Monné 31*3a9fd824SRoger Pau Monné #include "ring.h" 32*3a9fd824SRoger Pau Monné #include "../grant_table.h" 33*3a9fd824SRoger Pau Monné 34*3a9fd824SRoger Pau Monné /* 35*3a9fd824SRoger Pau Monné * Front->back notifications: When enqueuing a new request, sending a 36*3a9fd824SRoger Pau Monné * notification can be made conditional on req_event (i.e., the generic 37*3a9fd824SRoger Pau Monné * hold-off mechanism provided by the ring macros). Backends must set 38*3a9fd824SRoger Pau Monné * req_event appropriately (e.g., using RING_FINAL_CHECK_FOR_REQUESTS()). 39*3a9fd824SRoger Pau Monné * 40*3a9fd824SRoger Pau Monné * Back->front notifications: When enqueuing a new response, sending a 41*3a9fd824SRoger Pau Monné * notification can be made conditional on rsp_event (i.e., the generic 42*3a9fd824SRoger Pau Monné * hold-off mechanism provided by the ring macros). Frontends must set 43*3a9fd824SRoger Pau Monné * rsp_event appropriately (e.g., using RING_FINAL_CHECK_FOR_RESPONSES()). 44*3a9fd824SRoger Pau Monné */ 45*3a9fd824SRoger Pau Monné 46*3a9fd824SRoger Pau Monné #ifndef blkif_vdev_t 47*3a9fd824SRoger Pau Monné #define blkif_vdev_t uint16_t 48*3a9fd824SRoger Pau Monné #endif 49*3a9fd824SRoger Pau Monné #define blkif_sector_t uint64_t 50*3a9fd824SRoger Pau Monné 51*3a9fd824SRoger Pau Monné /* 52*3a9fd824SRoger Pau Monné * Feature and Parameter Negotiation 53*3a9fd824SRoger Pau Monné * ================================= 54*3a9fd824SRoger Pau Monné * The two halves of a Xen block driver utilize nodes within the XenStore to 55*3a9fd824SRoger Pau Monné * communicate capabilities and to negotiate operating parameters. This 56*3a9fd824SRoger Pau Monné * section enumerates these nodes which reside in the respective front and 57*3a9fd824SRoger Pau Monné * backend portions of the XenStore, following the XenBus convention. 58*3a9fd824SRoger Pau Monné * 59*3a9fd824SRoger Pau Monné * All data in the XenStore is stored as strings. Nodes specifying numeric 60*3a9fd824SRoger Pau Monné * values are encoded in decimal. Integer value ranges listed below are 61*3a9fd824SRoger Pau Monné * expressed as fixed sized integer types capable of storing the conversion 62*3a9fd824SRoger Pau Monné * of a properly formated node string, without loss of information. 63*3a9fd824SRoger Pau Monné * 64*3a9fd824SRoger Pau Monné * Any specified default value is in effect if the corresponding XenBus node 65*3a9fd824SRoger Pau Monné * is not present in the XenStore. 66*3a9fd824SRoger Pau Monné * 67*3a9fd824SRoger Pau Monné * XenStore nodes in sections marked "PRIVATE" are solely for use by the 68*3a9fd824SRoger Pau Monné * driver side whose XenBus tree contains them. 69*3a9fd824SRoger Pau Monné * 70*3a9fd824SRoger Pau Monné * XenStore nodes marked "DEPRECATED" in their notes section should only be 71*3a9fd824SRoger Pau Monné * used to provide interoperability with legacy implementations. 72*3a9fd824SRoger Pau Monné * 73*3a9fd824SRoger Pau Monné * See the XenBus state transition diagram below for details on when XenBus 74*3a9fd824SRoger Pau Monné * nodes must be published and when they can be queried. 75*3a9fd824SRoger Pau Monné * 76*3a9fd824SRoger Pau Monné ***************************************************************************** 77*3a9fd824SRoger Pau Monné * Backend XenBus Nodes 78*3a9fd824SRoger Pau Monné ***************************************************************************** 79*3a9fd824SRoger Pau Monné * 80*3a9fd824SRoger Pau Monné *------------------ Backend Device Identification (PRIVATE) ------------------ 81*3a9fd824SRoger Pau Monné * 82*3a9fd824SRoger Pau Monné * mode 83*3a9fd824SRoger Pau Monné * Values: "r" (read only), "w" (writable) 84*3a9fd824SRoger Pau Monné * 85*3a9fd824SRoger Pau Monné * The read or write access permissions to the backing store to be 86*3a9fd824SRoger Pau Monné * granted to the frontend. 87*3a9fd824SRoger Pau Monné * 88*3a9fd824SRoger Pau Monné * params 89*3a9fd824SRoger Pau Monné * Values: string 90*3a9fd824SRoger Pau Monné * 91*3a9fd824SRoger Pau Monné * A free formatted string providing sufficient information for the 92*3a9fd824SRoger Pau Monné * hotplug script to attach the device and provide a suitable 93*3a9fd824SRoger Pau Monné * handler (ie: a block device) for blkback to use. 94*3a9fd824SRoger Pau Monné * 95*3a9fd824SRoger Pau Monné * physical-device 96*3a9fd824SRoger Pau Monné * Values: "MAJOR:MINOR" 97*3a9fd824SRoger Pau Monné * Notes: 11 98*3a9fd824SRoger Pau Monné * 99*3a9fd824SRoger Pau Monné * MAJOR and MINOR are the major number and minor number of the 100*3a9fd824SRoger Pau Monné * backing device respectively. 101*3a9fd824SRoger Pau Monné * 102*3a9fd824SRoger Pau Monné * physical-device-path 103*3a9fd824SRoger Pau Monné * Values: path string 104*3a9fd824SRoger Pau Monné * 105*3a9fd824SRoger Pau Monné * A string that contains the absolute path to the disk image. On 106*3a9fd824SRoger Pau Monné * NetBSD and Linux this is always a block device, while on FreeBSD 107*3a9fd824SRoger Pau Monné * it can be either a block device or a regular file. 108*3a9fd824SRoger Pau Monné * 109*3a9fd824SRoger Pau Monné * type 110*3a9fd824SRoger Pau Monné * Values: "file", "phy", "tap" 111*3a9fd824SRoger Pau Monné * 112*3a9fd824SRoger Pau Monné * The type of the backing device/object. 113*3a9fd824SRoger Pau Monné * 114*3a9fd824SRoger Pau Monné * 115*3a9fd824SRoger Pau Monné * direct-io-safe 116*3a9fd824SRoger Pau Monné * Values: 0/1 (boolean) 117*3a9fd824SRoger Pau Monné * Default Value: 0 118*3a9fd824SRoger Pau Monné * 119*3a9fd824SRoger Pau Monné * The underlying storage is not affected by the direct IO memory 120*3a9fd824SRoger Pau Monné * lifetime bug. See: 121*3a9fd824SRoger Pau Monné * https://lists.xen.org/archives/html/xen-devel/2012-12/msg01154.html 122*3a9fd824SRoger Pau Monné * 123*3a9fd824SRoger Pau Monné * Therefore this option gives the backend permission to use 124*3a9fd824SRoger Pau Monné * O_DIRECT, notwithstanding that bug. 125*3a9fd824SRoger Pau Monné * 126*3a9fd824SRoger Pau Monné * That is, if this option is enabled, use of O_DIRECT is safe, 127*3a9fd824SRoger Pau Monné * in circumstances where we would normally have avoided it as a 128*3a9fd824SRoger Pau Monné * workaround for that bug. This option is not relevant for all 129*3a9fd824SRoger Pau Monné * backends, and even not necessarily supported for those for 130*3a9fd824SRoger Pau Monné * which it is relevant. A backend which knows that it is not 131*3a9fd824SRoger Pau Monné * affected by the bug can ignore this option. 132*3a9fd824SRoger Pau Monné * 133*3a9fd824SRoger Pau Monné * This option doesn't require a backend to use O_DIRECT, so it 134*3a9fd824SRoger Pau Monné * should not be used to try to control the caching behaviour. 135*3a9fd824SRoger Pau Monné * 136*3a9fd824SRoger Pau Monné *--------------------------------- Features --------------------------------- 137*3a9fd824SRoger Pau Monné * 138*3a9fd824SRoger Pau Monné * feature-barrier 139*3a9fd824SRoger Pau Monné * Values: 0/1 (boolean) 140*3a9fd824SRoger Pau Monné * Default Value: 0 141*3a9fd824SRoger Pau Monné * 142*3a9fd824SRoger Pau Monné * A value of "1" indicates that the backend can process requests 143*3a9fd824SRoger Pau Monné * containing the BLKIF_OP_WRITE_BARRIER request opcode. Requests 144*3a9fd824SRoger Pau Monné * of this type may still be returned at any time with the 145*3a9fd824SRoger Pau Monné * BLKIF_RSP_EOPNOTSUPP result code. 146*3a9fd824SRoger Pau Monné * 147*3a9fd824SRoger Pau Monné * feature-flush-cache 148*3a9fd824SRoger Pau Monné * Values: 0/1 (boolean) 149*3a9fd824SRoger Pau Monné * Default Value: 0 150*3a9fd824SRoger Pau Monné * 151*3a9fd824SRoger Pau Monné * A value of "1" indicates that the backend can process requests 152*3a9fd824SRoger Pau Monné * containing the BLKIF_OP_FLUSH_DISKCACHE request opcode. Requests 153*3a9fd824SRoger Pau Monné * of this type may still be returned at any time with the 154*3a9fd824SRoger Pau Monné * BLKIF_RSP_EOPNOTSUPP result code. 155*3a9fd824SRoger Pau Monné * 156*3a9fd824SRoger Pau Monné * feature-discard 157*3a9fd824SRoger Pau Monné * Values: 0/1 (boolean) 158*3a9fd824SRoger Pau Monné * Default Value: 0 159*3a9fd824SRoger Pau Monné * 160*3a9fd824SRoger Pau Monné * A value of "1" indicates that the backend can process requests 161*3a9fd824SRoger Pau Monné * containing the BLKIF_OP_DISCARD request opcode. Requests 162*3a9fd824SRoger Pau Monné * of this type may still be returned at any time with the 163*3a9fd824SRoger Pau Monné * BLKIF_RSP_EOPNOTSUPP result code. 164*3a9fd824SRoger Pau Monné * 165*3a9fd824SRoger Pau Monné * feature-persistent 166*3a9fd824SRoger Pau Monné * Values: 0/1 (boolean) 167*3a9fd824SRoger Pau Monné * Default Value: 0 168*3a9fd824SRoger Pau Monné * Notes: 7 169*3a9fd824SRoger Pau Monné * 170*3a9fd824SRoger Pau Monné * A value of "1" indicates that the backend can keep the grants used 171*3a9fd824SRoger Pau Monné * by the frontend driver mapped, so the same set of grants should be 172*3a9fd824SRoger Pau Monné * used in all transactions. The maximum number of grants the backend 173*3a9fd824SRoger Pau Monné * can map persistently depends on the implementation, but ideally it 174*3a9fd824SRoger Pau Monné * should be RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST. Using this 175*3a9fd824SRoger Pau Monné * feature the backend doesn't need to unmap each grant, preventing 176*3a9fd824SRoger Pau Monné * costly TLB flushes. The backend driver should only map grants 177*3a9fd824SRoger Pau Monné * persistently if the frontend supports it. If a backend driver chooses 178*3a9fd824SRoger Pau Monné * to use the persistent protocol when the frontend doesn't support it, 179*3a9fd824SRoger Pau Monné * it will probably hit the maximum number of persistently mapped grants 180*3a9fd824SRoger Pau Monné * (due to the fact that the frontend won't be reusing the same grants), 181*3a9fd824SRoger Pau Monné * and fall back to non-persistent mode. Backend implementations may 182*3a9fd824SRoger Pau Monné * shrink or expand the number of persistently mapped grants without 183*3a9fd824SRoger Pau Monné * notifying the frontend depending on memory constraints (this might 184*3a9fd824SRoger Pau Monné * cause a performance degradation). 185*3a9fd824SRoger Pau Monné * 186*3a9fd824SRoger Pau Monné * If a backend driver wants to limit the maximum number of persistently 187*3a9fd824SRoger Pau Monné * mapped grants to a value less than RING_SIZE * 188*3a9fd824SRoger Pau Monné * BLKIF_MAX_SEGMENTS_PER_REQUEST a LRU strategy should be used to 189*3a9fd824SRoger Pau Monné * discard the grants that are less commonly used. Using a LRU in the 190*3a9fd824SRoger Pau Monné * backend driver paired with a LIFO queue in the frontend will 191*3a9fd824SRoger Pau Monné * allow us to have better performance in this scenario. 192*3a9fd824SRoger Pau Monné * 193*3a9fd824SRoger Pau Monné *----------------------- Request Transport Parameters ------------------------ 194*3a9fd824SRoger Pau Monné * 195*3a9fd824SRoger Pau Monné * max-ring-page-order 196*3a9fd824SRoger Pau Monné * Values: <uint32_t> 197*3a9fd824SRoger Pau Monné * Default Value: 0 198*3a9fd824SRoger Pau Monné * Notes: 1, 3 199*3a9fd824SRoger Pau Monné * 200*3a9fd824SRoger Pau Monné * The maximum supported size of the request ring buffer in units of 201*3a9fd824SRoger Pau Monné * lb(machine pages). (e.g. 0 == 1 page, 1 = 2 pages, 2 == 4 pages, 202*3a9fd824SRoger Pau Monné * etc.). 203*3a9fd824SRoger Pau Monné * 204*3a9fd824SRoger Pau Monné * max-ring-pages 205*3a9fd824SRoger Pau Monné * Values: <uint32_t> 206*3a9fd824SRoger Pau Monné * Default Value: 1 207*3a9fd824SRoger Pau Monné * Notes: DEPRECATED, 2, 3 208*3a9fd824SRoger Pau Monné * 209*3a9fd824SRoger Pau Monné * The maximum supported size of the request ring buffer in units of 210*3a9fd824SRoger Pau Monné * machine pages. The value must be a power of 2. 211*3a9fd824SRoger Pau Monné * 212*3a9fd824SRoger Pau Monné *------------------------- Backend Device Properties ------------------------- 213*3a9fd824SRoger Pau Monné * 214*3a9fd824SRoger Pau Monné * discard-enable 215*3a9fd824SRoger Pau Monné * Values: 0/1 (boolean) 216*3a9fd824SRoger Pau Monné * Default Value: 1 217*3a9fd824SRoger Pau Monné * 218*3a9fd824SRoger Pau Monné * This optional property, set by the toolstack, instructs the backend 219*3a9fd824SRoger Pau Monné * to offer (or not to offer) discard to the frontend. If the property 220*3a9fd824SRoger Pau Monné * is missing the backend should offer discard if the backing storage 221*3a9fd824SRoger Pau Monné * actually supports it. 222*3a9fd824SRoger Pau Monné * 223*3a9fd824SRoger Pau Monné * discard-alignment 224*3a9fd824SRoger Pau Monné * Values: <uint32_t> 225*3a9fd824SRoger Pau Monné * Default Value: 0 226*3a9fd824SRoger Pau Monné * Notes: 4, 5 227*3a9fd824SRoger Pau Monné * 228*3a9fd824SRoger Pau Monné * The offset, in bytes from the beginning of the virtual block device, 229*3a9fd824SRoger Pau Monné * to the first, addressable, discard extent on the underlying device. 230*3a9fd824SRoger Pau Monné * 231*3a9fd824SRoger Pau Monné * discard-granularity 232*3a9fd824SRoger Pau Monné * Values: <uint32_t> 233*3a9fd824SRoger Pau Monné * Default Value: <"sector-size"> 234*3a9fd824SRoger Pau Monné * Notes: 4 235*3a9fd824SRoger Pau Monné * 236*3a9fd824SRoger Pau Monné * The size, in bytes, of the individually addressable discard extents 237*3a9fd824SRoger Pau Monné * of the underlying device. 238*3a9fd824SRoger Pau Monné * 239*3a9fd824SRoger Pau Monné * discard-secure 240*3a9fd824SRoger Pau Monné * Values: 0/1 (boolean) 241*3a9fd824SRoger Pau Monné * Default Value: 0 242*3a9fd824SRoger Pau Monné * Notes: 10 243*3a9fd824SRoger Pau Monné * 244*3a9fd824SRoger Pau Monné * A value of "1" indicates that the backend can process BLKIF_OP_DISCARD 245*3a9fd824SRoger Pau Monné * requests with the BLKIF_DISCARD_SECURE flag set. 246*3a9fd824SRoger Pau Monné * 247*3a9fd824SRoger Pau Monné * info 248*3a9fd824SRoger Pau Monné * Values: <uint32_t> (bitmap) 249*3a9fd824SRoger Pau Monné * 250*3a9fd824SRoger Pau Monné * A collection of bit flags describing attributes of the backing 251*3a9fd824SRoger Pau Monné * device. The VDISK_* macros define the meaning of each bit 252*3a9fd824SRoger Pau Monné * location. 253*3a9fd824SRoger Pau Monné * 254*3a9fd824SRoger Pau Monné * sector-size 255*3a9fd824SRoger Pau Monné * Values: <uint32_t> 256*3a9fd824SRoger Pau Monné * 257*3a9fd824SRoger Pau Monné * The logical block size, in bytes, of the underlying storage. This 258*3a9fd824SRoger Pau Monné * must be a power of two with a minimum value of 512. 259*3a9fd824SRoger Pau Monné * 260*3a9fd824SRoger Pau Monné * NOTE: Because of implementation bugs in some frontends this must be 261*3a9fd824SRoger Pau Monné * set to 512, unless the frontend advertizes a non-zero value 262*3a9fd824SRoger Pau Monné * in its "feature-large-sector-size" xenbus node. (See below). 263*3a9fd824SRoger Pau Monné * 264*3a9fd824SRoger Pau Monné * physical-sector-size 265*3a9fd824SRoger Pau Monné * Values: <uint32_t> 266*3a9fd824SRoger Pau Monné * Default Value: <"sector-size"> 267*3a9fd824SRoger Pau Monné * 268*3a9fd824SRoger Pau Monné * The physical block size, in bytes, of the backend storage. This 269*3a9fd824SRoger Pau Monné * must be an integer multiple of "sector-size". 270*3a9fd824SRoger Pau Monné * 271*3a9fd824SRoger Pau Monné * sectors 272*3a9fd824SRoger Pau Monné * Values: <uint64_t> 273*3a9fd824SRoger Pau Monné * 274*3a9fd824SRoger Pau Monné * The size of the backend device, expressed in units of "sector-size". 275*3a9fd824SRoger Pau Monné * The product of "sector-size" and "sectors" must also be an integer 276*3a9fd824SRoger Pau Monné * multiple of "physical-sector-size", if that node is present. 277*3a9fd824SRoger Pau Monné * 278*3a9fd824SRoger Pau Monné ***************************************************************************** 279*3a9fd824SRoger Pau Monné * Frontend XenBus Nodes 280*3a9fd824SRoger Pau Monné ***************************************************************************** 281*3a9fd824SRoger Pau Monné * 282*3a9fd824SRoger Pau Monné *----------------------- Request Transport Parameters ----------------------- 283*3a9fd824SRoger Pau Monné * 284*3a9fd824SRoger Pau Monné * event-channel 285*3a9fd824SRoger Pau Monné * Values: <uint32_t> 286*3a9fd824SRoger Pau Monné * 287*3a9fd824SRoger Pau Monné * The identifier of the Xen event channel used to signal activity 288*3a9fd824SRoger Pau Monné * in the ring buffer. 289*3a9fd824SRoger Pau Monné * 290*3a9fd824SRoger Pau Monné * ring-ref 291*3a9fd824SRoger Pau Monné * Values: <uint32_t> 292*3a9fd824SRoger Pau Monné * Notes: 6 293*3a9fd824SRoger Pau Monné * 294*3a9fd824SRoger Pau Monné * The Xen grant reference granting permission for the backend to map 295*3a9fd824SRoger Pau Monné * the sole page in a single page sized ring buffer. 296*3a9fd824SRoger Pau Monné * 297*3a9fd824SRoger Pau Monné * ring-ref%u 298*3a9fd824SRoger Pau Monné * Values: <uint32_t> 299*3a9fd824SRoger Pau Monné * Notes: 6 300*3a9fd824SRoger Pau Monné * 301*3a9fd824SRoger Pau Monné * For a frontend providing a multi-page ring, a "number of ring pages" 302*3a9fd824SRoger Pau Monné * sized list of nodes, each containing a Xen grant reference granting 303*3a9fd824SRoger Pau Monné * permission for the backend to map the page of the ring located 304*3a9fd824SRoger Pau Monné * at page index "%u". Page indexes are zero based. 305*3a9fd824SRoger Pau Monné * 306*3a9fd824SRoger Pau Monné * protocol 307*3a9fd824SRoger Pau Monné * Values: string (XEN_IO_PROTO_ABI_*) 308*3a9fd824SRoger Pau Monné * Default Value: XEN_IO_PROTO_ABI_NATIVE 309*3a9fd824SRoger Pau Monné * 310*3a9fd824SRoger Pau Monné * The machine ABI rules governing the format of all ring request and 311*3a9fd824SRoger Pau Monné * response structures. 312*3a9fd824SRoger Pau Monné * 313*3a9fd824SRoger Pau Monné * ring-page-order 314*3a9fd824SRoger Pau Monné * Values: <uint32_t> 315*3a9fd824SRoger Pau Monné * Default Value: 0 316*3a9fd824SRoger Pau Monné * Maximum Value: MAX(ffs(max-ring-pages) - 1, max-ring-page-order) 317*3a9fd824SRoger Pau Monné * Notes: 1, 3 318*3a9fd824SRoger Pau Monné * 319*3a9fd824SRoger Pau Monné * The size of the frontend allocated request ring buffer in units 320*3a9fd824SRoger Pau Monné * of lb(machine pages). (e.g. 0 == 1 page, 1 = 2 pages, 2 == 4 pages, 321*3a9fd824SRoger Pau Monné * etc.). 322*3a9fd824SRoger Pau Monné * 323*3a9fd824SRoger Pau Monné * num-ring-pages 324*3a9fd824SRoger Pau Monné * Values: <uint32_t> 325*3a9fd824SRoger Pau Monné * Default Value: 1 326*3a9fd824SRoger Pau Monné * Maximum Value: MAX(max-ring-pages,(0x1 << max-ring-page-order)) 327*3a9fd824SRoger Pau Monné * Notes: DEPRECATED, 2, 3 328*3a9fd824SRoger Pau Monné * 329*3a9fd824SRoger Pau Monné * The size of the frontend allocated request ring buffer in units of 330*3a9fd824SRoger Pau Monné * machine pages. The value must be a power of 2. 331*3a9fd824SRoger Pau Monné * 332*3a9fd824SRoger Pau Monné *--------------------------------- Features --------------------------------- 333*3a9fd824SRoger Pau Monné * 334*3a9fd824SRoger Pau Monné * feature-persistent 335*3a9fd824SRoger Pau Monné * Values: 0/1 (boolean) 336*3a9fd824SRoger Pau Monné * Default Value: 0 337*3a9fd824SRoger Pau Monné * Notes: 7, 8, 9 338*3a9fd824SRoger Pau Monné * 339*3a9fd824SRoger Pau Monné * A value of "1" indicates that the frontend will reuse the same grants 340*3a9fd824SRoger Pau Monné * for all transactions, allowing the backend to map them with write 341*3a9fd824SRoger Pau Monné * access (even when it should be read-only). If the frontend hits the 342*3a9fd824SRoger Pau Monné * maximum number of allowed persistently mapped grants, it can fallback 343*3a9fd824SRoger Pau Monné * to non persistent mode. This will cause a performance degradation, 344*3a9fd824SRoger Pau Monné * since the the backend driver will still try to map those grants 345*3a9fd824SRoger Pau Monné * persistently. Since the persistent grants protocol is compatible with 346*3a9fd824SRoger Pau Monné * the previous protocol, a frontend driver can choose to work in 347*3a9fd824SRoger Pau Monné * persistent mode even when the backend doesn't support it. 348*3a9fd824SRoger Pau Monné * 349*3a9fd824SRoger Pau Monné * It is recommended that the frontend driver stores the persistently 350*3a9fd824SRoger Pau Monné * mapped grants in a LIFO queue, so a subset of all persistently mapped 351*3a9fd824SRoger Pau Monné * grants gets used commonly. This is done in case the backend driver 352*3a9fd824SRoger Pau Monné * decides to limit the maximum number of persistently mapped grants 353*3a9fd824SRoger Pau Monné * to a value less than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST. 354*3a9fd824SRoger Pau Monné * 355*3a9fd824SRoger Pau Monné * feature-large-sector-size 356*3a9fd824SRoger Pau Monné * Values: 0/1 (boolean) 357*3a9fd824SRoger Pau Monné * Default Value: 0 358*3a9fd824SRoger Pau Monné * 359*3a9fd824SRoger Pau Monné * A value of "1" indicates that the frontend will correctly supply and 360*3a9fd824SRoger Pau Monné * interpret all sector-based quantities in terms of the "sector-size" 361*3a9fd824SRoger Pau Monné * value supplied in the backend info, whatever that may be set to. 362*3a9fd824SRoger Pau Monné * If this node is not present or its value is "0" then it is assumed 363*3a9fd824SRoger Pau Monné * that the frontend requires that the logical block size is 512 as it 364*3a9fd824SRoger Pau Monné * is hardcoded (which is the case in some frontend implementations). 365*3a9fd824SRoger Pau Monné * 366*3a9fd824SRoger Pau Monné *------------------------- Virtual Device Properties ------------------------- 367*3a9fd824SRoger Pau Monné * 368*3a9fd824SRoger Pau Monné * device-type 369*3a9fd824SRoger Pau Monné * Values: "disk", "cdrom", "floppy", etc. 370*3a9fd824SRoger Pau Monné * 371*3a9fd824SRoger Pau Monné * virtual-device 372*3a9fd824SRoger Pau Monné * Values: <uint32_t> 373*3a9fd824SRoger Pau Monné * 374*3a9fd824SRoger Pau Monné * A value indicating the physical device to virtualize within the 375*3a9fd824SRoger Pau Monné * frontend's domain. (e.g. "The first ATA disk", "The third SCSI 376*3a9fd824SRoger Pau Monné * disk", etc.) 377*3a9fd824SRoger Pau Monné * 378*3a9fd824SRoger Pau Monné * See docs/misc/vbd-interface.txt for details on the format of this 379*3a9fd824SRoger Pau Monné * value. 380*3a9fd824SRoger Pau Monné * 381*3a9fd824SRoger Pau Monné * Notes 382*3a9fd824SRoger Pau Monné * ----- 383*3a9fd824SRoger Pau Monné * (1) Multi-page ring buffer scheme first developed in the Citrix XenServer 384*3a9fd824SRoger Pau Monné * PV drivers. 385*3a9fd824SRoger Pau Monné * (2) Multi-page ring buffer scheme first used in some RedHat distributions 386*3a9fd824SRoger Pau Monné * including a distribution deployed on certain nodes of the Amazon 387*3a9fd824SRoger Pau Monné * EC2 cluster. 388*3a9fd824SRoger Pau Monné * (3) Support for multi-page ring buffers was implemented independently, 389*3a9fd824SRoger Pau Monné * in slightly different forms, by both Citrix and RedHat/Amazon. 390*3a9fd824SRoger Pau Monné * For full interoperability, block front and backends should publish 391*3a9fd824SRoger Pau Monné * identical ring parameters, adjusted for unit differences, to the 392*3a9fd824SRoger Pau Monné * XenStore nodes used in both schemes. 393*3a9fd824SRoger Pau Monné * (4) Devices that support discard functionality may internally allocate space 394*3a9fd824SRoger Pau Monné * (discardable extents) in units that are larger than the exported logical 395*3a9fd824SRoger Pau Monné * block size. If the backing device has such discardable extents the 396*3a9fd824SRoger Pau Monné * backend should provide both discard-granularity and discard-alignment. 397*3a9fd824SRoger Pau Monné * Providing just one of the two may be considered an error by the frontend. 398*3a9fd824SRoger Pau Monné * Backends supporting discard should include discard-granularity and 399*3a9fd824SRoger Pau Monné * discard-alignment even if it supports discarding individual sectors. 400*3a9fd824SRoger Pau Monné * Frontends should assume discard-alignment == 0 and discard-granularity 401*3a9fd824SRoger Pau Monné * == sector size if these keys are missing. 402*3a9fd824SRoger Pau Monné * (5) The discard-alignment parameter allows a physical device to be 403*3a9fd824SRoger Pau Monné * partitioned into virtual devices that do not necessarily begin or 404*3a9fd824SRoger Pau Monné * end on a discardable extent boundary. 405*3a9fd824SRoger Pau Monné * (6) When there is only a single page allocated to the request ring, 406*3a9fd824SRoger Pau Monné * 'ring-ref' is used to communicate the grant reference for this 407*3a9fd824SRoger Pau Monné * page to the backend. When using a multi-page ring, the 'ring-ref' 408*3a9fd824SRoger Pau Monné * node is not created. Instead 'ring-ref0' - 'ring-refN' are used. 409*3a9fd824SRoger Pau Monné * (7) When using persistent grants data has to be copied from/to the page 410*3a9fd824SRoger Pau Monné * where the grant is currently mapped. The overhead of doing this copy 411*3a9fd824SRoger Pau Monné * however doesn't suppress the speed improvement of not having to unmap 412*3a9fd824SRoger Pau Monné * the grants. 413*3a9fd824SRoger Pau Monné * (8) The frontend driver has to allow the backend driver to map all grants 414*3a9fd824SRoger Pau Monné * with write access, even when they should be mapped read-only, since 415*3a9fd824SRoger Pau Monné * further requests may reuse these grants and require write permissions. 416*3a9fd824SRoger Pau Monné * (9) Linux implementation doesn't have a limit on the maximum number of 417*3a9fd824SRoger Pau Monné * grants that can be persistently mapped in the frontend driver, but 418*3a9fd824SRoger Pau Monné * due to the frontent driver implementation it should never be bigger 419*3a9fd824SRoger Pau Monné * than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST. 420*3a9fd824SRoger Pau Monné *(10) The discard-secure property may be present and will be set to 1 if the 421*3a9fd824SRoger Pau Monné * backing device supports secure discard. 422*3a9fd824SRoger Pau Monné *(11) Only used by Linux and NetBSD. 423*3a9fd824SRoger Pau Monné */ 424*3a9fd824SRoger Pau Monné 425*3a9fd824SRoger Pau Monné /* 426*3a9fd824SRoger Pau Monné * Multiple hardware queues/rings: 427*3a9fd824SRoger Pau Monné * If supported, the backend will write the key "multi-queue-max-queues" to 428*3a9fd824SRoger Pau Monné * the directory for that vbd, and set its value to the maximum supported 429*3a9fd824SRoger Pau Monné * number of queues. 430*3a9fd824SRoger Pau Monné * Frontends that are aware of this feature and wish to use it can write the 431*3a9fd824SRoger Pau Monné * key "multi-queue-num-queues" with the number they wish to use, which must be 432*3a9fd824SRoger Pau Monné * greater than zero, and no more than the value reported by the backend in 433*3a9fd824SRoger Pau Monné * "multi-queue-max-queues". 434*3a9fd824SRoger Pau Monné * 435*3a9fd824SRoger Pau Monné * For frontends requesting just one queue, the usual event-channel and 436*3a9fd824SRoger Pau Monné * ring-ref keys are written as before, simplifying the backend processing 437*3a9fd824SRoger Pau Monné * to avoid distinguishing between a frontend that doesn't understand the 438*3a9fd824SRoger Pau Monné * multi-queue feature, and one that does, but requested only one queue. 439*3a9fd824SRoger Pau Monné * 440*3a9fd824SRoger Pau Monné * Frontends requesting two or more queues must not write the toplevel 441*3a9fd824SRoger Pau Monné * event-channel and ring-ref keys, instead writing those keys under sub-keys 442*3a9fd824SRoger Pau Monné * having the name "queue-N" where N is the integer ID of the queue/ring for 443*3a9fd824SRoger Pau Monné * which those keys belong. Queues are indexed from zero. 444*3a9fd824SRoger Pau Monné * For example, a frontend with two queues must write the following set of 445*3a9fd824SRoger Pau Monné * queue-related keys: 446*3a9fd824SRoger Pau Monné * 447*3a9fd824SRoger Pau Monné * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2" 448*3a9fd824SRoger Pau Monné * /local/domain/1/device/vbd/0/queue-0 = "" 449*3a9fd824SRoger Pau Monné * /local/domain/1/device/vbd/0/queue-0/ring-ref = "<ring-ref#0>" 450*3a9fd824SRoger Pau Monné * /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>" 451*3a9fd824SRoger Pau Monné * /local/domain/1/device/vbd/0/queue-1 = "" 452*3a9fd824SRoger Pau Monné * /local/domain/1/device/vbd/0/queue-1/ring-ref = "<ring-ref#1>" 453*3a9fd824SRoger Pau Monné * /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>" 454*3a9fd824SRoger Pau Monné * 455*3a9fd824SRoger Pau Monné * It is also possible to use multiple queues/rings together with 456*3a9fd824SRoger Pau Monné * feature multi-page ring buffer. 457*3a9fd824SRoger Pau Monné * For example, a frontend requests two queues/rings and the size of each ring 458*3a9fd824SRoger Pau Monné * buffer is two pages must write the following set of related keys: 459*3a9fd824SRoger Pau Monné * 460*3a9fd824SRoger Pau Monné * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2" 461*3a9fd824SRoger Pau Monné * /local/domain/1/device/vbd/0/ring-page-order = "1" 462*3a9fd824SRoger Pau Monné * /local/domain/1/device/vbd/0/queue-0 = "" 463*3a9fd824SRoger Pau Monné * /local/domain/1/device/vbd/0/queue-0/ring-ref0 = "<ring-ref#0>" 464*3a9fd824SRoger Pau Monné * /local/domain/1/device/vbd/0/queue-0/ring-ref1 = "<ring-ref#1>" 465*3a9fd824SRoger Pau Monné * /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>" 466*3a9fd824SRoger Pau Monné * /local/domain/1/device/vbd/0/queue-1 = "" 467*3a9fd824SRoger Pau Monné * /local/domain/1/device/vbd/0/queue-1/ring-ref0 = "<ring-ref#2>" 468*3a9fd824SRoger Pau Monné * /local/domain/1/device/vbd/0/queue-1/ring-ref1 = "<ring-ref#3>" 469*3a9fd824SRoger Pau Monné * /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>" 470*3a9fd824SRoger Pau Monné * 471*3a9fd824SRoger Pau Monné */ 472*3a9fd824SRoger Pau Monné 473*3a9fd824SRoger Pau Monné /* 474*3a9fd824SRoger Pau Monné * STATE DIAGRAMS 475*3a9fd824SRoger Pau Monné * 476*3a9fd824SRoger Pau Monné ***************************************************************************** 477*3a9fd824SRoger Pau Monné * Startup * 478*3a9fd824SRoger Pau Monné ***************************************************************************** 479*3a9fd824SRoger Pau Monné * 480*3a9fd824SRoger Pau Monné * Tool stack creates front and back nodes with state XenbusStateInitialising. 481*3a9fd824SRoger Pau Monné * 482*3a9fd824SRoger Pau Monné * Front Back 483*3a9fd824SRoger Pau Monné * ================================= ===================================== 484*3a9fd824SRoger Pau Monné * XenbusStateInitialising XenbusStateInitialising 485*3a9fd824SRoger Pau Monné * o Query virtual device o Query backend device identification 486*3a9fd824SRoger Pau Monné * properties. data. 487*3a9fd824SRoger Pau Monné * o Setup OS device instance. o Open and validate backend device. 488*3a9fd824SRoger Pau Monné * o Publish backend features and 489*3a9fd824SRoger Pau Monné * transport parameters. 490*3a9fd824SRoger Pau Monné * | 491*3a9fd824SRoger Pau Monné * | 492*3a9fd824SRoger Pau Monné * V 493*3a9fd824SRoger Pau Monné * XenbusStateInitWait 494*3a9fd824SRoger Pau Monné * 495*3a9fd824SRoger Pau Monné * o Query backend features and 496*3a9fd824SRoger Pau Monné * transport parameters. 497*3a9fd824SRoger Pau Monné * o Allocate and initialize the 498*3a9fd824SRoger Pau Monné * request ring. 499*3a9fd824SRoger Pau Monné * o Publish transport parameters 500*3a9fd824SRoger Pau Monné * that will be in effect during 501*3a9fd824SRoger Pau Monné * this connection. 502*3a9fd824SRoger Pau Monné * | 503*3a9fd824SRoger Pau Monné * | 504*3a9fd824SRoger Pau Monné * V 505*3a9fd824SRoger Pau Monné * XenbusStateInitialised 506*3a9fd824SRoger Pau Monné * 507*3a9fd824SRoger Pau Monné * o Query frontend transport parameters. 508*3a9fd824SRoger Pau Monné * o Connect to the request ring and 509*3a9fd824SRoger Pau Monné * event channel. 510*3a9fd824SRoger Pau Monné * o Publish backend device properties. 511*3a9fd824SRoger Pau Monné * | 512*3a9fd824SRoger Pau Monné * | 513*3a9fd824SRoger Pau Monné * V 514*3a9fd824SRoger Pau Monné * XenbusStateConnected 515*3a9fd824SRoger Pau Monné * 516*3a9fd824SRoger Pau Monné * o Query backend device properties. 517*3a9fd824SRoger Pau Monné * o Finalize OS virtual device 518*3a9fd824SRoger Pau Monné * instance. 519*3a9fd824SRoger Pau Monné * | 520*3a9fd824SRoger Pau Monné * | 521*3a9fd824SRoger Pau Monné * V 522*3a9fd824SRoger Pau Monné * XenbusStateConnected 523*3a9fd824SRoger Pau Monné * 524*3a9fd824SRoger Pau Monné * Note: Drivers that do not support any optional features, or the negotiation 525*3a9fd824SRoger Pau Monné * of transport parameters, can skip certain states in the state machine: 526*3a9fd824SRoger Pau Monné * 527*3a9fd824SRoger Pau Monné * o A frontend may transition to XenbusStateInitialised without 528*3a9fd824SRoger Pau Monné * waiting for the backend to enter XenbusStateInitWait. In this 529*3a9fd824SRoger Pau Monné * case, default transport parameters are in effect and any 530*3a9fd824SRoger Pau Monné * transport parameters published by the frontend must contain 531*3a9fd824SRoger Pau Monné * their default values. 532*3a9fd824SRoger Pau Monné * 533*3a9fd824SRoger Pau Monné * o A backend may transition to XenbusStateInitialised, bypassing 534*3a9fd824SRoger Pau Monné * XenbusStateInitWait, without waiting for the frontend to first 535*3a9fd824SRoger Pau Monné * enter the XenbusStateInitialised state. In this case, default 536*3a9fd824SRoger Pau Monné * transport parameters are in effect and any transport parameters 537*3a9fd824SRoger Pau Monné * published by the backend must contain their default values. 538*3a9fd824SRoger Pau Monné * 539*3a9fd824SRoger Pau Monné * Drivers that support optional features and/or transport parameter 540*3a9fd824SRoger Pau Monné * negotiation must tolerate these additional state transition paths. 541*3a9fd824SRoger Pau Monné * In general this means performing the work of any skipped state 542*3a9fd824SRoger Pau Monné * transition, if it has not already been performed, in addition to the 543*3a9fd824SRoger Pau Monné * work associated with entry into the current state. 544*3a9fd824SRoger Pau Monné */ 545*3a9fd824SRoger Pau Monné 546*3a9fd824SRoger Pau Monné /* 547*3a9fd824SRoger Pau Monné * REQUEST CODES. 548*3a9fd824SRoger Pau Monné */ 549*3a9fd824SRoger Pau Monné #define BLKIF_OP_READ 0 550*3a9fd824SRoger Pau Monné #define BLKIF_OP_WRITE 1 551*3a9fd824SRoger Pau Monné /* 552*3a9fd824SRoger Pau Monné * All writes issued prior to a request with the BLKIF_OP_WRITE_BARRIER 553*3a9fd824SRoger Pau Monné * operation code ("barrier request") must be completed prior to the 554*3a9fd824SRoger Pau Monné * execution of the barrier request. All writes issued after the barrier 555*3a9fd824SRoger Pau Monné * request must not execute until after the completion of the barrier request. 556*3a9fd824SRoger Pau Monné * 557*3a9fd824SRoger Pau Monné * Optional. See "feature-barrier" XenBus node documentation above. 558*3a9fd824SRoger Pau Monné */ 559*3a9fd824SRoger Pau Monné #define BLKIF_OP_WRITE_BARRIER 2 560*3a9fd824SRoger Pau Monné /* 561*3a9fd824SRoger Pau Monné * Commit any uncommitted contents of the backing device's volatile cache 562*3a9fd824SRoger Pau Monné * to stable storage. 563*3a9fd824SRoger Pau Monné * 564*3a9fd824SRoger Pau Monné * Optional. See "feature-flush-cache" XenBus node documentation above. 565*3a9fd824SRoger Pau Monné */ 566*3a9fd824SRoger Pau Monné #define BLKIF_OP_FLUSH_DISKCACHE 3 567*3a9fd824SRoger Pau Monné /* 568*3a9fd824SRoger Pau Monné * Used in SLES sources for device specific command packet 569*3a9fd824SRoger Pau Monné * contained within the request. Reserved for that purpose. 570*3a9fd824SRoger Pau Monné */ 571*3a9fd824SRoger Pau Monné #define BLKIF_OP_RESERVED_1 4 572*3a9fd824SRoger Pau Monné /* 573*3a9fd824SRoger Pau Monné * Indicate to the backend device that a region of storage is no longer in 574*3a9fd824SRoger Pau Monné * use, and may be discarded at any time without impact to the client. If 575*3a9fd824SRoger Pau Monné * the BLKIF_DISCARD_SECURE flag is set on the request, all copies of the 576*3a9fd824SRoger Pau Monné * discarded region on the device must be rendered unrecoverable before the 577*3a9fd824SRoger Pau Monné * command returns. 578*3a9fd824SRoger Pau Monné * 579*3a9fd824SRoger Pau Monné * This operation is analogous to performing a trim (ATA) or unamp (SCSI), 580*3a9fd824SRoger Pau Monné * command on a native device. 581*3a9fd824SRoger Pau Monné * 582*3a9fd824SRoger Pau Monné * More information about trim/unmap operations can be found at: 583*3a9fd824SRoger Pau Monné * http://t13.org/Documents/UploadedDocuments/docs2008/ 584*3a9fd824SRoger Pau Monné * e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc 585*3a9fd824SRoger Pau Monné * http://www.seagate.com/staticfiles/support/disc/manuals/ 586*3a9fd824SRoger Pau Monné * Interface%20manuals/100293068c.pdf 587*3a9fd824SRoger Pau Monné * 588*3a9fd824SRoger Pau Monné * Optional. See "feature-discard", "discard-alignment", 589*3a9fd824SRoger Pau Monné * "discard-granularity", and "discard-secure" in the XenBus node 590*3a9fd824SRoger Pau Monné * documentation above. 591*3a9fd824SRoger Pau Monné */ 592*3a9fd824SRoger Pau Monné #define BLKIF_OP_DISCARD 5 593*3a9fd824SRoger Pau Monné 594*3a9fd824SRoger Pau Monné /* 595*3a9fd824SRoger Pau Monné * Recognized if "feature-max-indirect-segments" in present in the backend 596*3a9fd824SRoger Pau Monné * xenbus info. The "feature-max-indirect-segments" node contains the maximum 597*3a9fd824SRoger Pau Monné * number of segments allowed by the backend per request. If the node is 598*3a9fd824SRoger Pau Monné * present, the frontend might use blkif_request_indirect structs in order to 599*3a9fd824SRoger Pau Monné * issue requests with more than BLKIF_MAX_SEGMENTS_PER_REQUEST (11). The 600*3a9fd824SRoger Pau Monné * maximum number of indirect segments is fixed by the backend, but the 601*3a9fd824SRoger Pau Monné * frontend can issue requests with any number of indirect segments as long as 602*3a9fd824SRoger Pau Monné * it's less than the number provided by the backend. The indirect_grefs field 603*3a9fd824SRoger Pau Monné * in blkif_request_indirect should be filled by the frontend with the 604*3a9fd824SRoger Pau Monné * grant references of the pages that are holding the indirect segments. 605*3a9fd824SRoger Pau Monné * These pages are filled with an array of blkif_request_segment that hold the 606*3a9fd824SRoger Pau Monné * information about the segments. The number of indirect pages to use is 607*3a9fd824SRoger Pau Monné * determined by the number of segments an indirect request contains. Every 608*3a9fd824SRoger Pau Monné * indirect page can contain a maximum of 609*3a9fd824SRoger Pau Monné * (PAGE_SIZE / sizeof(struct blkif_request_segment)) segments, so to 610*3a9fd824SRoger Pau Monné * calculate the number of indirect pages to use we have to do 611*3a9fd824SRoger Pau Monné * ceil(indirect_segments / (PAGE_SIZE / sizeof(struct blkif_request_segment))). 612*3a9fd824SRoger Pau Monné * 613*3a9fd824SRoger Pau Monné * If a backend does not recognize BLKIF_OP_INDIRECT, it should *not* 614*3a9fd824SRoger Pau Monné * create the "feature-max-indirect-segments" node! 615*3a9fd824SRoger Pau Monné */ 616*3a9fd824SRoger Pau Monné #define BLKIF_OP_INDIRECT 6 617*3a9fd824SRoger Pau Monné 618*3a9fd824SRoger Pau Monné /* 619*3a9fd824SRoger Pau Monné * Maximum scatter/gather segments per request. 620*3a9fd824SRoger Pau Monné * This is carefully chosen so that sizeof(blkif_ring_t) <= PAGE_SIZE. 621*3a9fd824SRoger Pau Monné * NB. This could be 12 if the ring indexes weren't stored in the same page. 622*3a9fd824SRoger Pau Monné */ 623*3a9fd824SRoger Pau Monné #define BLKIF_MAX_SEGMENTS_PER_REQUEST 11 624*3a9fd824SRoger Pau Monné 625*3a9fd824SRoger Pau Monné /* 626*3a9fd824SRoger Pau Monné * Maximum number of indirect pages to use per request. 627*3a9fd824SRoger Pau Monné */ 628*3a9fd824SRoger Pau Monné #define BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST 8 629*3a9fd824SRoger Pau Monné 630*3a9fd824SRoger Pau Monné /* 631*3a9fd824SRoger Pau Monné * NB. 'first_sect' and 'last_sect' in blkif_request_segment, as well as 632*3a9fd824SRoger Pau Monné * 'sector_number' in blkif_request, blkif_request_discard and 633*3a9fd824SRoger Pau Monné * blkif_request_indirect are sector-based quantities. See the description 634*3a9fd824SRoger Pau Monné * of the "feature-large-sector-size" frontend xenbus node above for 635*3a9fd824SRoger Pau Monné * more information. 636*3a9fd824SRoger Pau Monné */ 637*3a9fd824SRoger Pau Monné struct blkif_request_segment { 638*3a9fd824SRoger Pau Monné grant_ref_t gref; /* reference to I/O buffer frame */ 639*3a9fd824SRoger Pau Monné /* @first_sect: first sector in frame to transfer (inclusive). */ 640*3a9fd824SRoger Pau Monné /* @last_sect: last sector in frame to transfer (inclusive). */ 641*3a9fd824SRoger Pau Monné uint8_t first_sect, last_sect; 642*3a9fd824SRoger Pau Monné }; 643*3a9fd824SRoger Pau Monné 644*3a9fd824SRoger Pau Monné /* 645*3a9fd824SRoger Pau Monné * Starting ring element for any I/O request. 646*3a9fd824SRoger Pau Monné */ 647*3a9fd824SRoger Pau Monné struct blkif_request { 648*3a9fd824SRoger Pau Monné uint8_t operation; /* BLKIF_OP_??? */ 649*3a9fd824SRoger Pau Monné uint8_t nr_segments; /* number of segments */ 650*3a9fd824SRoger Pau Monné blkif_vdev_t handle; /* only for read/write requests */ 651*3a9fd824SRoger Pau Monné uint64_t id; /* private guest value, echoed in resp */ 652*3a9fd824SRoger Pau Monné blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ 653*3a9fd824SRoger Pau Monné struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 654*3a9fd824SRoger Pau Monné }; 655*3a9fd824SRoger Pau Monné typedef struct blkif_request blkif_request_t; 656*3a9fd824SRoger Pau Monné 657*3a9fd824SRoger Pau Monné /* 658*3a9fd824SRoger Pau Monné * Cast to this structure when blkif_request.operation == BLKIF_OP_DISCARD 659*3a9fd824SRoger Pau Monné * sizeof(struct blkif_request_discard) <= sizeof(struct blkif_request) 660*3a9fd824SRoger Pau Monné */ 661*3a9fd824SRoger Pau Monné struct blkif_request_discard { 662*3a9fd824SRoger Pau Monné uint8_t operation; /* BLKIF_OP_DISCARD */ 663*3a9fd824SRoger Pau Monné uint8_t flag; /* BLKIF_DISCARD_SECURE or zero */ 664*3a9fd824SRoger Pau Monné #define BLKIF_DISCARD_SECURE (1<<0) /* ignored if discard-secure=0 */ 665*3a9fd824SRoger Pau Monné blkif_vdev_t handle; /* same as for read/write requests */ 666*3a9fd824SRoger Pau Monné uint64_t id; /* private guest value, echoed in resp */ 667*3a9fd824SRoger Pau Monné blkif_sector_t sector_number;/* start sector idx on disk */ 668*3a9fd824SRoger Pau Monné uint64_t nr_sectors; /* number of contiguous sectors to discard*/ 669*3a9fd824SRoger Pau Monné }; 670*3a9fd824SRoger Pau Monné typedef struct blkif_request_discard blkif_request_discard_t; 671*3a9fd824SRoger Pau Monné 672*3a9fd824SRoger Pau Monné struct blkif_request_indirect { 673*3a9fd824SRoger Pau Monné uint8_t operation; /* BLKIF_OP_INDIRECT */ 674*3a9fd824SRoger Pau Monné uint8_t indirect_op; /* BLKIF_OP_{READ/WRITE} */ 675*3a9fd824SRoger Pau Monné uint16_t nr_segments; /* number of segments */ 676*3a9fd824SRoger Pau Monné uint64_t id; /* private guest value, echoed in resp */ 677*3a9fd824SRoger Pau Monné blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ 678*3a9fd824SRoger Pau Monné blkif_vdev_t handle; /* same as for read/write requests */ 679*3a9fd824SRoger Pau Monné grant_ref_t indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST]; 680*3a9fd824SRoger Pau Monné #ifdef __i386__ 681*3a9fd824SRoger Pau Monné uint64_t pad; /* Make it 64 byte aligned on i386 */ 682*3a9fd824SRoger Pau Monné #endif 683*3a9fd824SRoger Pau Monné }; 684*3a9fd824SRoger Pau Monné typedef struct blkif_request_indirect blkif_request_indirect_t; 685*3a9fd824SRoger Pau Monné 686*3a9fd824SRoger Pau Monné struct blkif_response { 687*3a9fd824SRoger Pau Monné uint64_t id; /* copied from request */ 688*3a9fd824SRoger Pau Monné uint8_t operation; /* copied from request */ 689*3a9fd824SRoger Pau Monné int16_t status; /* BLKIF_RSP_??? */ 690*3a9fd824SRoger Pau Monné }; 691*3a9fd824SRoger Pau Monné typedef struct blkif_response blkif_response_t; 692*3a9fd824SRoger Pau Monné 693*3a9fd824SRoger Pau Monné /* 694*3a9fd824SRoger Pau Monné * STATUS RETURN CODES. 695*3a9fd824SRoger Pau Monné */ 696*3a9fd824SRoger Pau Monné /* Operation not supported (only happens on barrier writes). */ 697*3a9fd824SRoger Pau Monné #define BLKIF_RSP_EOPNOTSUPP -2 698*3a9fd824SRoger Pau Monné /* Operation failed for some unspecified reason (-EIO). */ 699*3a9fd824SRoger Pau Monné #define BLKIF_RSP_ERROR -1 700*3a9fd824SRoger Pau Monné /* Operation completed successfully. */ 701*3a9fd824SRoger Pau Monné #define BLKIF_RSP_OKAY 0 702*3a9fd824SRoger Pau Monné 703*3a9fd824SRoger Pau Monné /* 704*3a9fd824SRoger Pau Monné * Generate blkif ring structures and types. 705*3a9fd824SRoger Pau Monné */ 706*3a9fd824SRoger Pau Monné DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response); 707*3a9fd824SRoger Pau Monné 708*3a9fd824SRoger Pau Monné #define VDISK_CDROM 0x1 709*3a9fd824SRoger Pau Monné #define VDISK_REMOVABLE 0x2 710*3a9fd824SRoger Pau Monné #define VDISK_READONLY 0x4 711*3a9fd824SRoger Pau Monné 712*3a9fd824SRoger Pau Monné #endif /* __XEN_PUBLIC_IO_BLKIF_H__ */ 713*3a9fd824SRoger Pau Monné 714*3a9fd824SRoger Pau Monné /* 715*3a9fd824SRoger Pau Monné * Local variables: 716*3a9fd824SRoger Pau Monné * mode: C 717*3a9fd824SRoger Pau Monné * c-file-style: "BSD" 718*3a9fd824SRoger Pau Monné * c-basic-offset: 4 719*3a9fd824SRoger Pau Monné * tab-width: 4 720*3a9fd824SRoger Pau Monné * indent-tabs-mode: nil 721*3a9fd824SRoger Pau Monné * End: 722*3a9fd824SRoger Pau Monné */ 723