xref: /linux/include/xen/interface/io/blkif.h (revision 0883c2c06fb5bcf5b9e008270827e63c09a88c1e)
1 /******************************************************************************
2  * blkif.h
3  *
4  * Unified block-device I/O interface for Xen guest OSes.
5  *
6  * Copyright (c) 2003-2004, Keir Fraser
7  */
8 
9 #ifndef __XEN_PUBLIC_IO_BLKIF_H__
10 #define __XEN_PUBLIC_IO_BLKIF_H__
11 
12 #include <xen/interface/io/ring.h>
13 #include <xen/interface/grant_table.h>
14 
15 /*
16  * Front->back notifications: When enqueuing a new request, sending a
17  * notification can be made conditional on req_event (i.e., the generic
18  * hold-off mechanism provided by the ring macros). Backends must set
19  * req_event appropriately (e.g., using RING_FINAL_CHECK_FOR_REQUESTS()).
20  *
21  * Back->front notifications: When enqueuing a new response, sending a
22  * notification can be made conditional on rsp_event (i.e., the generic
23  * hold-off mechanism provided by the ring macros). Frontends must set
24  * rsp_event appropriately (e.g., using RING_FINAL_CHECK_FOR_RESPONSES()).
25  */
26 
27 typedef uint16_t blkif_vdev_t;
28 typedef uint64_t blkif_sector_t;
29 
30 /*
31  * Multiple hardware queues/rings:
32  * If supported, the backend will write the key "multi-queue-max-queues" to
33  * the directory for that vbd, and set its value to the maximum supported
34  * number of queues.
35  * Frontends that are aware of this feature and wish to use it can write the
36  * key "multi-queue-num-queues" with the number they wish to use, which must be
37  * greater than zero, and no more than the value reported by the backend in
38  * "multi-queue-max-queues".
39  *
40  * For frontends requesting just one queue, the usual event-channel and
41  * ring-ref keys are written as before, simplifying the backend processing
42  * to avoid distinguishing between a frontend that doesn't understand the
43  * multi-queue feature, and one that does, but requested only one queue.
44  *
45  * Frontends requesting two or more queues must not write the toplevel
46  * event-channel and ring-ref keys, instead writing those keys under sub-keys
47  * having the name "queue-N" where N is the integer ID of the queue/ring for
48  * which those keys belong. Queues are indexed from zero.
49  * For example, a frontend with two queues must write the following set of
50  * queue-related keys:
51  *
52  * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2"
53  * /local/domain/1/device/vbd/0/queue-0 = ""
54  * /local/domain/1/device/vbd/0/queue-0/ring-ref = "<ring-ref#0>"
55  * /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>"
56  * /local/domain/1/device/vbd/0/queue-1 = ""
57  * /local/domain/1/device/vbd/0/queue-1/ring-ref = "<ring-ref#1>"
58  * /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>"
59  *
60  * It is also possible to use multiple queues/rings together with
61  * feature multi-page ring buffer.
62  * For example, a frontend requests two queues/rings and the size of each ring
63  * buffer is two pages must write the following set of related keys:
64  *
65  * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2"
66  * /local/domain/1/device/vbd/0/ring-page-order = "1"
67  * /local/domain/1/device/vbd/0/queue-0 = ""
68  * /local/domain/1/device/vbd/0/queue-0/ring-ref0 = "<ring-ref#0>"
69  * /local/domain/1/device/vbd/0/queue-0/ring-ref1 = "<ring-ref#1>"
70  * /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>"
71  * /local/domain/1/device/vbd/0/queue-1 = ""
72  * /local/domain/1/device/vbd/0/queue-1/ring-ref0 = "<ring-ref#2>"
73  * /local/domain/1/device/vbd/0/queue-1/ring-ref1 = "<ring-ref#3>"
74  * /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>"
75  *
76  */
77 
78 /*
79  * REQUEST CODES.
80  */
81 #define BLKIF_OP_READ              0
82 #define BLKIF_OP_WRITE             1
83 /*
84  * Recognised only if "feature-barrier" is present in backend xenbus info.
85  * The "feature_barrier" node contains a boolean indicating whether barrier
86  * requests are likely to succeed or fail. Either way, a barrier request
87  * may fail at any time with BLKIF_RSP_EOPNOTSUPP if it is unsupported by
88  * the underlying block-device hardware. The boolean simply indicates whether
89  * or not it is worthwhile for the frontend to attempt barrier requests.
90  * If a backend does not recognise BLKIF_OP_WRITE_BARRIER, it should *not*
91  * create the "feature-barrier" node!
92  */
93 #define BLKIF_OP_WRITE_BARRIER     2
94 
95 /*
96  * Recognised if "feature-flush-cache" is present in backend xenbus
97  * info.  A flush will ask the underlying storage hardware to flush its
98  * non-volatile caches as appropriate.  The "feature-flush-cache" node
99  * contains a boolean indicating whether flush requests are likely to
100  * succeed or fail. Either way, a flush request may fail at any time
101  * with BLKIF_RSP_EOPNOTSUPP if it is unsupported by the underlying
102  * block-device hardware. The boolean simply indicates whether or not it
103  * is worthwhile for the frontend to attempt flushes.  If a backend does
104  * not recognise BLKIF_OP_WRITE_FLUSH_CACHE, it should *not* create the
105  * "feature-flush-cache" node!
106  */
107 #define BLKIF_OP_FLUSH_DISKCACHE   3
108 
109 /*
110  * Recognised only if "feature-discard" is present in backend xenbus info.
111  * The "feature-discard" node contains a boolean indicating whether trim
112  * (ATA) or unmap (SCSI) - conviently called discard requests are likely
113  * to succeed or fail. Either way, a discard request
114  * may fail at any time with BLKIF_RSP_EOPNOTSUPP if it is unsupported by
115  * the underlying block-device hardware. The boolean simply indicates whether
116  * or not it is worthwhile for the frontend to attempt discard requests.
117  * If a backend does not recognise BLKIF_OP_DISCARD, it should *not*
118  * create the "feature-discard" node!
119  *
120  * Discard operation is a request for the underlying block device to mark
121  * extents to be erased. However, discard does not guarantee that the blocks
122  * will be erased from the device - it is just a hint to the device
123  * controller that these blocks are no longer in use. What the device
124  * controller does with that information is left to the controller.
125  * Discard operations are passed with sector_number as the
126  * sector index to begin discard operations at and nr_sectors as the number of
127  * sectors to be discarded. The specified sectors should be discarded if the
128  * underlying block device supports trim (ATA) or unmap (SCSI) operations,
129  * or a BLKIF_RSP_EOPNOTSUPP  should be returned.
130  * More information about trim/unmap operations at:
131  * http://t13.org/Documents/UploadedDocuments/docs2008/
132  *     e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc
133  * http://www.seagate.com/staticfiles/support/disc/manuals/
134  *     Interface%20manuals/100293068c.pdf
135  * The backend can optionally provide three extra XenBus attributes to
136  * further optimize the discard functionality:
137  * 'discard-alignment' - Devices that support discard functionality may
138  * internally allocate space in units that are bigger than the exported
139  * logical block size. The discard-alignment parameter indicates how many bytes
140  * the beginning of the partition is offset from the internal allocation unit's
141  * natural alignment.
142  * 'discard-granularity'  - Devices that support discard functionality may
143  * internally allocate space using units that are bigger than the logical block
144  * size. The discard-granularity parameter indicates the size of the internal
145  * allocation unit in bytes if reported by the device. Otherwise the
146  * discard-granularity will be set to match the device's physical block size.
147  * 'discard-secure' - All copies of the discarded sectors (potentially created
148  * by garbage collection) must also be erased.  To use this feature, the flag
149  * BLKIF_DISCARD_SECURE must be set in the blkif_request_trim.
150  */
151 #define BLKIF_OP_DISCARD           5
152 
153 /*
154  * Recognized if "feature-max-indirect-segments" in present in the backend
155  * xenbus info. The "feature-max-indirect-segments" node contains the maximum
156  * number of segments allowed by the backend per request. If the node is
157  * present, the frontend might use blkif_request_indirect structs in order to
158  * issue requests with more than BLKIF_MAX_SEGMENTS_PER_REQUEST (11). The
159  * maximum number of indirect segments is fixed by the backend, but the
160  * frontend can issue requests with any number of indirect segments as long as
161  * it's less than the number provided by the backend. The indirect_grefs field
162  * in blkif_request_indirect should be filled by the frontend with the
163  * grant references of the pages that are holding the indirect segments.
164  * These pages are filled with an array of blkif_request_segment that hold the
165  * information about the segments. The number of indirect pages to use is
166  * determined by the number of segments an indirect request contains. Every
167  * indirect page can contain a maximum of
168  * (PAGE_SIZE / sizeof(struct blkif_request_segment)) segments, so to
169  * calculate the number of indirect pages to use we have to do
170  * ceil(indirect_segments / (PAGE_SIZE / sizeof(struct blkif_request_segment))).
171  *
172  * If a backend does not recognize BLKIF_OP_INDIRECT, it should *not*
173  * create the "feature-max-indirect-segments" node!
174  */
175 #define BLKIF_OP_INDIRECT          6
176 
177 /*
178  * Maximum scatter/gather segments per request.
179  * This is carefully chosen so that sizeof(struct blkif_ring) <= PAGE_SIZE.
180  * NB. This could be 12 if the ring indexes weren't stored in the same page.
181  */
182 #define BLKIF_MAX_SEGMENTS_PER_REQUEST 11
183 
184 #define BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST 8
185 
186 struct blkif_request_segment {
187 		grant_ref_t gref;        /* reference to I/O buffer frame        */
188 		/* @first_sect: first sector in frame to transfer (inclusive).   */
189 		/* @last_sect: last sector in frame to transfer (inclusive).     */
190 		uint8_t     first_sect, last_sect;
191 };
192 
193 struct blkif_request_rw {
194 	uint8_t        nr_segments;  /* number of segments                   */
195 	blkif_vdev_t   handle;       /* only for read/write requests         */
196 #ifndef CONFIG_X86_32
197 	uint32_t       _pad1;	     /* offsetof(blkif_request,u.rw.id) == 8 */
198 #endif
199 	uint64_t       id;           /* private guest value, echoed in resp  */
200 	blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
201 	struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
202 } __attribute__((__packed__));
203 
204 struct blkif_request_discard {
205 	uint8_t        flag;         /* BLKIF_DISCARD_SECURE or zero.        */
206 #define BLKIF_DISCARD_SECURE (1<<0)  /* ignored if discard-secure=0          */
207 	blkif_vdev_t   _pad1;        /* only for read/write requests         */
208 #ifndef CONFIG_X86_32
209 	uint32_t       _pad2;        /* offsetof(blkif_req..,u.discard.id)==8*/
210 #endif
211 	uint64_t       id;           /* private guest value, echoed in resp  */
212 	blkif_sector_t sector_number;
213 	uint64_t       nr_sectors;
214 	uint8_t        _pad3;
215 } __attribute__((__packed__));
216 
217 struct blkif_request_other {
218 	uint8_t      _pad1;
219 	blkif_vdev_t _pad2;        /* only for read/write requests         */
220 #ifndef CONFIG_X86_32
221 	uint32_t     _pad3;        /* offsetof(blkif_req..,u.other.id)==8*/
222 #endif
223 	uint64_t     id;           /* private guest value, echoed in resp  */
224 } __attribute__((__packed__));
225 
226 struct blkif_request_indirect {
227 	uint8_t        indirect_op;
228 	uint16_t       nr_segments;
229 #ifndef CONFIG_X86_32
230 	uint32_t       _pad1;        /* offsetof(blkif_...,u.indirect.id) == 8 */
231 #endif
232 	uint64_t       id;
233 	blkif_sector_t sector_number;
234 	blkif_vdev_t   handle;
235 	uint16_t       _pad2;
236 	grant_ref_t    indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
237 #ifndef CONFIG_X86_32
238 	uint32_t      _pad3;         /* make it 64 byte aligned */
239 #else
240 	uint64_t      _pad3;         /* make it 64 byte aligned */
241 #endif
242 } __attribute__((__packed__));
243 
244 struct blkif_request {
245 	uint8_t        operation;    /* BLKIF_OP_???                         */
246 	union {
247 		struct blkif_request_rw rw;
248 		struct blkif_request_discard discard;
249 		struct blkif_request_other other;
250 		struct blkif_request_indirect indirect;
251 	} u;
252 } __attribute__((__packed__));
253 
254 struct blkif_response {
255 	uint64_t        id;              /* copied from request */
256 	uint8_t         operation;       /* copied from request */
257 	int16_t         status;          /* BLKIF_RSP_???       */
258 };
259 
260 /*
261  * STATUS RETURN CODES.
262  */
263  /* Operation not supported (only happens on barrier writes). */
264 #define BLKIF_RSP_EOPNOTSUPP  -2
265  /* Operation failed for some unspecified reason (-EIO). */
266 #define BLKIF_RSP_ERROR       -1
267  /* Operation completed successfully. */
268 #define BLKIF_RSP_OKAY         0
269 
270 /*
271  * Generate blkif ring structures and types.
272  */
273 
274 DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response);
275 
276 #define VDISK_CDROM        0x1
277 #define VDISK_REMOVABLE    0x2
278 #define VDISK_READONLY     0x4
279 
280 /* Xen-defined major numbers for virtual disks, they look strangely
281  * familiar */
282 #define XEN_IDE0_MAJOR	3
283 #define XEN_IDE1_MAJOR	22
284 #define XEN_SCSI_DISK0_MAJOR	8
285 #define XEN_SCSI_DISK1_MAJOR	65
286 #define XEN_SCSI_DISK2_MAJOR	66
287 #define XEN_SCSI_DISK3_MAJOR	67
288 #define XEN_SCSI_DISK4_MAJOR	68
289 #define XEN_SCSI_DISK5_MAJOR	69
290 #define XEN_SCSI_DISK6_MAJOR	70
291 #define XEN_SCSI_DISK7_MAJOR	71
292 #define XEN_SCSI_DISK8_MAJOR	128
293 #define XEN_SCSI_DISK9_MAJOR	129
294 #define XEN_SCSI_DISK10_MAJOR	130
295 #define XEN_SCSI_DISK11_MAJOR	131
296 #define XEN_SCSI_DISK12_MAJOR	132
297 #define XEN_SCSI_DISK13_MAJOR	133
298 #define XEN_SCSI_DISK14_MAJOR	134
299 #define XEN_SCSI_DISK15_MAJOR	135
300 
301 #endif /* __XEN_PUBLIC_IO_BLKIF_H__ */
302