1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 #ifndef _LDC_IMPL_H 27 #define _LDC_IMPL_H 28 29 #ifdef __cplusplus 30 extern "C" { 31 #endif 32 33 #include <sys/types.h> 34 #include <sys/ddi.h> 35 #include <sys/sunddi.h> 36 #include <sys/ioctl.h> 37 38 /* Memory map table entries */ 39 #define LDC_MTBL_ENTRIES 8192 /* 8 K */ 40 41 /* Define LDC Queue info */ 42 #define LDC_PACKET_SHIFT 6 43 #define LDC_QUEUE_ENTRIES 512 44 #define LDC_MTU_MSGS 4 45 #define LDC_QUEUE_SIZE (LDC_QUEUE_ENTRIES << LDC_PACKET_SHIFT) 46 #define LDC_DEFAULT_MTU (LDC_QUEUE_SIZE / LDC_MTU_MSGS) 47 #define LDC_RXDQ_MULTIPLIER 2 48 49 /* 50 * LDC Reliable mode - initial packet seqid 51 * - If peer initiated handshake, RDX should contain init_seqid + 1 52 * - If this endpoint initiated handshake first data packet should 53 * contain the message init_seqid + 1 54 */ 55 #define LDC_INIT_SEQID 0x0 56 57 /* LDC Message types */ 58 #define LDC_CTRL 0x01 /* Control Pkt */ 59 #define LDC_DATA 0x02 /* Data Pkt */ 60 #define LDC_ERR 0x10 /* Error Pkt */ 61 62 /* LDC Message Subtypes */ 63 #define LDC_INFO 0x01 /* Control/Data/Error info pkt */ 64 #define LDC_ACK 0x02 /* Control/Data ACK */ 65 #define LDC_NACK 0x04 /* Control/Data NACK */ 66 67 /* LDC Control Messages */ 68 #define LDC_VER 0x01 /* Version message */ 69 #define LDC_RTS 0x02 /* Request to Send */ 70 #define LDC_RTR 0x03 /* Ready To Receive */ 71 #define LDC_RDX 0x04 /* Ready for data exchange */ 72 73 #define LDC_CTRL_MASK 0x0f /* Mask to read control bits */ 74 75 /* LDC Channel Transport State (tstate) */ 76 #define TS_TXQ_RDY 0x01 /* allocated TX queue */ 77 #define TS_RXQ_RDY 0x02 /* allocated RX queue */ 78 #define TS_INIT (TS_TXQ_RDY | TS_RXQ_RDY) 79 #define TS_QCONF_RDY 0x04 /* registered queues with HV */ 80 #define TS_CNEX_RDY 0x08 /* registered channel with cnex */ 81 #define TS_OPEN (TS_INIT | TS_QCONF_RDY | TS_CNEX_RDY) 82 #define TS_LINK_READY 0x10 /* both endpts registered Rx queues */ 83 #define TS_READY (TS_OPEN | TS_LINK_READY) 84 #define TS_VER_DONE 0x20 /* negotiated version */ 85 #define TS_VREADY (TS_READY | TS_VER_DONE) 86 #define TS_HSHAKE_DONE 0x40 /* completed handshake */ 87 #define TS_UP (TS_READY | TS_VER_DONE | TS_HSHAKE_DONE) 88 89 #define TS_IN_RESET 0x100 /* channel is in reset state */ 90 91 /* LDC Channel Transport Handshake states */ 92 #define TS_SENT_VER 0x01 /* Sent version */ 93 #define TS_SENT_RTS 0x02 /* Sent RTS */ 94 #define TS_RCVD_RTR 0x04 /* Received RTR */ 95 #define TS_SENT_RDX 0x08 /* Sent RDX */ 96 #define TS_RCVD_VER 0x10 /* Received version */ 97 #define TS_RCVD_RTS 0x20 /* Received RTS */ 98 #define TS_SENT_RTR 0x40 /* Sent RTR */ 99 #define TS_RCVD_RDX 0x80 /* Received RDX */ 100 101 /* LDC Interrupt State */ 102 #define LDC_INTR_NONE 0x00 /* No interrupts */ 103 #define LDC_INTR_ACTIVE 0x01 /* Interrupt being processed */ 104 #define LDC_INTR_PEND 0x02 /* Interrupt pending */ 105 106 /* LDC MSG Envelope */ 107 #define LDC_LEN_MASK 0x3F 108 #define LDC_FRAG_MASK 0xC0 109 110 #define LDC_FRAG_START 0x40 /* frag_info = 0x01 */ 111 #define LDC_FRAG_STOP 0x80 /* frag_info = 0x02 */ 112 #define LDC_FRAG_CONT 0x00 /* frag_info = 0x00 */ 113 114 /* 115 * LDC will retry LDC_MAX_RETRIES times when sending or 116 * receiving data or if the HV returns back EWOULDBLOCK. 117 * Between each retry it will wait LDC_DELAY usecs. 118 */ 119 #define LDC_MAX_RETRIES 1000 120 #define LDC_DELAY 1 121 122 /* delay(usec) between channel unregister retries in ldc_close() */ 123 #define LDC_CLOSE_DELAY 1 124 125 /* 126 * LDC Version information 127 */ 128 #define LDC_PAYLOAD_VER_OFF 8 /* offset of version in payload */ 129 130 typedef struct ldc_ver { 131 uint16_t major; 132 uint16_t minor; 133 } ldc_ver_t; 134 135 /* 136 * Each guest consists of one or more LDC endpoints represented by a ldc_chan 137 * structure. Each ldc_chan structure points to a ldc_mtbl structure that 138 * contains information about the map table associated with this LDC endpoint. 139 * The map table contains the list of pages being shared by this guest over 140 * this endpoint with the guest at the other end of this endpoint. Each LDC 141 * endpoint also points to a list of memory handles used to bind and export 142 * memory segments from this guest. If a memory segment is bound, it points to 143 * a memory segment structure, which inturn consists of an array of ldc_page 144 * structure for all the pages within that segment. Each ldc_page structure 145 * contains information about the shared page and also points to the 146 * corresponding entry in the map table. 147 * 148 * Each LDC endpoint also points to a list of ldc_dring structures that refer 149 * to both imported and exported descriptor rings. If it is a exported 150 * descriptor ring, it then points to memory handle/memseg corresponding to 151 * the region of memory associated with the descriptor ring. 152 * 153 * +----------+ +----------+ +----------+ 154 * | ldc_chan |-->| ldc_chan |-->| ldc_chan |-->.... 155 * +----------+ +----------+ +----------+ 156 * | | | 157 * | | | 158 * | | | +-----------+ +-----------+ 159 * | | +----->| ldc_dring |---->| ldc_dring |---->...... 160 * | | +-----------+ +-----------+ 161 * | | | 162 * | | +----------------------------+ 163 * | | | 164 * | | v 165 * | | +----------+ +----------+ +----------+ 166 * | +----->| ldc_mhdl |---->| ldc_mhdl |---->| ldc_mhdl |---> .... 167 * | +----------+ +----------+ +----------+ 168 * v | | 169 * +----------+ | +------------+ | +------------+ 170 * | ldc_mtbl |--+ +--->| ldc_memseg |-----+ +--->| ldc_memseg | 171 * +----------+ | +------------+ | +------------+ 172 * | | | | | 173 * v v v | v 174 * +--------------+ +----------+ +--------+ | +--------+ 175 * | ldc_mte_slot |<--------| ldc_page | | cookie | | | cookie | 176 * +--------------+ +----------+ +--------+ | +--------+ 177 * | ldc_mte_slot |<--------| ldc_page | | cookie | v 178 * +--------------+ +----------+ +--------+ +----------+ 179 * | ldc_mte_slot |<-----------------------------------| ldc_page | 180 * +--------------+ +----------+ 181 * | ldc_mte_slot | 182 * +--------------+ 183 * | ...... |/ +------------+ 184 * +--------------+ | entry | 185 * | ldc_mte_slot | +------------+ 186 * +--------------+ | inv_cookie | 187 * \ +------------+ 188 * 189 */ 190 191 /* 192 * Message format of each packet sent over the LDC channel. 193 * Each packet is 64-bytes long. 194 * 195 * Each packet that is sent over LDC can contain either data or acks. 196 * The type will reflect the contents. The len will contain in bytes 197 * the amount of data being sent. In the case of ACKs, the seqid and 198 * data fields will contain the SEQIDs of messages for which ACKs are 199 * being sent. 200 * 201 * Raw pkt format: 202 * 203 * +------------------------------------------------------+ 204 * 0 - 7 | data payload | 205 * +------------------------------------------------------+ 206 * 207 * Unreliable pkt format: 208 * 209 * +------------------------------------------------------+ 210 * 0 | seqid | env | ctrl | stype | type | 211 * +------------------------------------------------------+ 212 * 1 - 7 | data payload | 213 * +------------------------------------------------------+ 214 * 215 * Reliable pkt format: 216 * 217 * +------------------------------------------------------+ 218 * 0 | seqid | env | ctrl | stype | type | 219 * +------------------------------------------------------+ 220 * 1 | ackid | unused | 221 * +------------------------------------------------------+ 222 * 2 - 7 | data payload | 223 * +------------------------------------------------------+ 224 */ 225 226 typedef struct ldc_msg { 227 union { 228 struct { 229 uint8_t _type; /* Message type */ 230 uint8_t _stype; /* Message subtype */ 231 uint8_t _ctrl; /* Control/Error Message */ 232 uint8_t _env; /* Message Envelope */ 233 uint32_t _seqid; /* Sequence ID */ 234 235 union { 236 uint8_t _ud[LDC_PAYLOAD_SIZE_UNRELIABLE]; 237 /* Unreliable data payload */ 238 struct { 239 uint32_t _unused; /* unused */ 240 uint32_t _ackid; /* ACK ID */ 241 uint8_t _rd[LDC_PAYLOAD_SIZE_RELIABLE]; 242 /* Reliable data payload */ 243 } _rl; 244 } _data; 245 } _tpkt; 246 247 uint8_t _raw[LDC_PAYLOAD_SIZE_RAW]; 248 } _pkt; 249 250 } ldc_msg_t; 251 252 #define raw _pkt._raw 253 #define type _pkt._tpkt._type 254 #define stype _pkt._tpkt._stype 255 #define ctrl _pkt._tpkt._ctrl 256 #define env _pkt._tpkt._env 257 #define seqid _pkt._tpkt._seqid 258 #define udata _pkt._tpkt._data._ud 259 #define ackid _pkt._tpkt._data._rl._ackid 260 #define rdata _pkt._tpkt._data._rl._rd 261 262 /* 263 * LDC Map Table Entry (MTE) 264 * 265 * 6 6 1 1 1 266 * |3 0| psz| 3| 1| 0| 9| 8| 7|6|5|4| 0| 267 * +------+--------------------------+----+----+--+--+--+--+-+-+-+-------+ 268 * | rsvd | PFN | 0 | 0 |CW|CR|IW|IR|X|W|R| pgszc | 269 * +------+--------------------------+----+----+--+--+--+--+-+-+-+-------+ 270 * | hv invalidation cookie | 271 * +---------------------------------------------------------------------+ 272 */ 273 typedef union { 274 struct { 275 uint64_t _rsvd2:8, /* <63:56> reserved */ 276 rpfn:43, /* <55:13> real pfn */ 277 _rsvd1:2, /* <12:11> reserved */ 278 cw:1, /* <10> copy write access */ 279 cr:1, /* <9> copy read perm */ 280 iw:1, /* <8> iommu write perm */ 281 ir:1, /* <7> iommu read perm */ 282 x:1, /* <6> execute perm */ 283 w:1, /* <5> write perm */ 284 r:1, /* <4> read perm */ 285 pgszc:4; /* <3:0> pgsz code */ 286 } mte_bit; 287 288 uint64_t ll; 289 290 } ldc_mte_t; 291 292 #define mte_rpfn mte_bit.rpfn 293 #define mte_cw mte_bit.cw 294 #define mte_cr mte_bit.cr 295 #define mte_iw mte_bit.iw 296 #define mte_ir mte_bit.ir 297 #define mte_x mte_bit.x 298 #define mte_w mte_bit.w 299 #define mte_r mte_bit.r 300 #define mte_pgszc mte_bit.pgszc 301 302 #define MTE_BSZS_SHIFT(sz) ((sz) * 3) 303 #define MTEBYTES(sz) (MMU_PAGESIZE << MTE_BSZS_SHIFT(sz)) 304 #define MTEPAGES(sz) (1 << MTE_BSZS_SHIFT(sz)) 305 #define MTE_PAGE_SHIFT(sz) (MMU_PAGESHIFT + MTE_BSZS_SHIFT(sz)) 306 #define MTE_PAGE_OFFSET(sz) (MTEBYTES(sz) - 1) 307 #define MTE_PAGEMASK(sz) (~MTE_PAGE_OFFSET(sz)) 308 #define MTE_PFNMASK(sz) (~(MTE_PAGE_OFFSET(sz) >> MMU_PAGESHIFT)) 309 310 /* 311 * LDC Map Table Slot 312 */ 313 typedef struct ldc_mte_slot { 314 ldc_mte_t entry; 315 uint64_t cookie; 316 } ldc_mte_slot_t; 317 318 /* 319 * LDC Memory Map Table 320 * 321 * Each LDC has a memory map table it uses to list all the pages 322 * it exporting to its peer over the channel. This structure 323 * contains information about the map table and is pointed to 324 * by the ldc_chan structure. 325 */ 326 typedef struct ldc_mtbl { 327 kmutex_t lock; /* Table lock */ 328 size_t size; /* Table size (in bytes) */ 329 uint64_t next_entry; /* Next entry to use */ 330 uint64_t num_entries; /* Num entries in table */ 331 uint64_t num_avail; /* Num of available entries */ 332 boolean_t contigmem; /* TRUE=Contig mem alloc'd */ 333 ldc_mte_slot_t *table; /* The table itself */ 334 } ldc_mtbl_t; 335 336 /* 337 * LDC page and memory segment information 338 */ 339 typedef struct ldc_page { 340 uintptr_t raddr; /* Exported page RA */ 341 uint64_t index; /* Index in map table */ 342 ldc_mte_slot_t *mte; /* Map table entry */ 343 } ldc_page_t; 344 345 typedef struct ldc_memseg { 346 caddr_t vaddr; /* Exported segment VA */ 347 uintptr_t raddr; /* Exported segment VA */ 348 size_t size; /* Exported segment size */ 349 uint64_t npages; /* Number of pages */ 350 ldc_page_t *pages; /* Array of exported pages */ 351 uint32_t ncookies; /* Number of cookies */ 352 ldc_mem_cookie_t *cookies; 353 uint64_t next_cookie; /* Index to next cookie */ 354 } ldc_memseg_t; 355 356 /* 357 * LDC Cookie address format 358 * 359 * 6 6 m+n 360 * |3| 0| | m| 0| 361 * +-+-------+----------+-------------------+-------------------+ 362 * |X| pgszc | rsvd | table_idx | page_offset | 363 * +-+-------+----------+-------------------+-------------------+ 364 */ 365 #define LDC_COOKIE_PGSZC_MASK 0x7 366 #define LDC_COOKIE_PGSZC_SHIFT 60 367 368 /* 369 * LDC Memory handle 370 */ 371 typedef struct ldc_chan ldc_chan_t; 372 373 typedef struct ldc_mhdl { 374 kmutex_t lock; /* Mutex for memory handle */ 375 ldc_mstatus_t status; /* Memory map status */ 376 377 uint8_t mtype; /* Type of sharing */ 378 uint8_t perm; /* Access permissions */ 379 boolean_t myshadow; /* TRUE=alloc'd shadow mem */ 380 381 ldc_chan_t *ldcp; /* Pointer to channel struct */ 382 ldc_memseg_t *memseg; /* Bound memory segment */ 383 struct ldc_mhdl *next; /* Next memory handle */ 384 } ldc_mhdl_t; 385 386 /* 387 * LDC Descriptor rings 388 */ 389 390 typedef struct ldc_dring { 391 kmutex_t lock; /* Desc ring lock */ 392 ldc_mstatus_t status; /* Desc ring status */ 393 394 uint32_t dsize; /* Descriptor size */ 395 uint32_t length; /* Descriptor ring length */ 396 uint64_t size; /* Desc ring size (in bytes) */ 397 caddr_t base; /* Descriptor ring base addr */ 398 399 ldc_chan_t *ldcp; /* Pointer to bound channel */ 400 ldc_mem_handle_t mhdl; /* Mem handle to desc ring */ 401 402 struct ldc_dring *ch_next; /* Next dring in channel */ 403 struct ldc_dring *next; /* Next dring overall */ 404 405 } ldc_dring_t; 406 407 408 /* 409 * Channel specific information is kept in a separate 410 * structure. These are then stored on a array indexed 411 * by the channel number. 412 */ 413 struct ldc_chan { 414 ldc_chan_t *next; /* Next channel */ 415 416 kmutex_t lock; /* Channel lock */ 417 uint64_t id; /* Channel ID */ 418 ldc_status_t status; /* Channel status */ 419 uint32_t tstate; /* Channel transport state */ 420 uint32_t hstate; /* Channel transport handshake state */ 421 422 ldc_dev_t devclass; /* Associated device class */ 423 uint64_t devinst; /* Associated device instance */ 424 ldc_mode_t mode; /* Channel mode */ 425 426 uint64_t mtu; /* Max TU size */ 427 428 ldc_ver_t version; /* Channel version */ 429 uint32_t next_vidx; /* Next version to match */ 430 431 uint_t (*cb)(uint64_t event, caddr_t arg); 432 caddr_t cb_arg; /* Channel callback and arg */ 433 boolean_t cb_inprogress; /* Channel callback in progress */ 434 boolean_t cb_enabled; /* Channel callbacks are enabled */ 435 436 uint8_t tx_intr_state; /* Tx interrupt state */ 437 uint8_t rx_intr_state; /* Rx interrupt state */ 438 439 kmutex_t tx_lock; /* Transmit lock */ 440 uint64_t tx_q_entries; /* Num entries in transmit queue */ 441 uint64_t tx_q_va; /* Virtual addr of transmit queue */ 442 uint64_t tx_q_ra; /* Real addr of transmit queue */ 443 uint64_t tx_head; /* Tx queue head */ 444 uint64_t tx_ackd_head; /* Tx queue ACKd head (Reliable) */ 445 uint64_t tx_tail; /* Tx queue tail */ 446 447 uint64_t rx_q_entries; /* Num entries in receive queue */ 448 uint64_t rx_q_va; /* Virtual addr of receive queue */ 449 uint64_t rx_q_ra; /* Real addr of receive queue */ 450 451 uint64_t rx_dq_entries; /* Num entries in the data queue */ 452 uint64_t rx_dq_va; /* Virtual addr of the data queue */ 453 uint64_t rx_dq_head; /* Receive data queue head */ 454 uint64_t rx_dq_tail; /* Receive data queue tail */ 455 uint64_t rx_ack_head; /* Receive data ACK peek head ptr */ 456 457 uint64_t link_state; /* Underlying HV channel state */ 458 459 ldc_mtbl_t *mtbl; /* Memory table used by channel */ 460 ldc_mhdl_t *mhdl_list; /* List of memory handles */ 461 kmutex_t mlist_lock; /* Mem handle list lock */ 462 463 ldc_dring_t *exp_dring_list; /* Exported desc ring list */ 464 kmutex_t exp_dlist_lock; /* Lock for exported desc ring list */ 465 ldc_dring_t *imp_dring_list; /* Imported desc ring list */ 466 kmutex_t imp_dlist_lock; /* Lock for imported desc ring list */ 467 468 uint8_t pkt_payload; /* Size of packet payload */ 469 470 uint32_t last_msg_snt; /* Seqid of last packet sent */ 471 uint32_t last_ack_rcd; /* Seqid of last ACK recd */ 472 uint32_t last_msg_rcd; /* Seqid of last packet received */ 473 474 uint32_t stream_remains; /* Number of bytes in stream */ 475 /* packet buffer */ 476 uint32_t stream_offset; /* Offset into packet buffer for */ 477 /* next read */ 478 uint8_t *stream_bufferp; /* Stream packet buffer */ 479 480 int (*read_p)(ldc_chan_t *ldcp, caddr_t bufferp, 481 size_t *sizep); 482 int (*write_p)(ldc_chan_t *ldcp, caddr_t bufferp, 483 size_t *sizep); 484 485 uint64_t (*readq_get_state)(ldc_chan_t *ldcp, uint64_t *head, 486 uint64_t *tail, uint64_t *link_state); 487 488 int (*readq_set_head)(ldc_chan_t *ldcp, uint64_t head); 489 }; 490 491 492 /* 493 * LDC module soft state structure 494 */ 495 typedef struct ldc_soft_state { 496 kmutex_t lock; /* Protects ldc_soft_state_t */ 497 ldc_cnex_t cinfo; /* channel nexus info */ 498 uint64_t channel_count; /* Number of channels */ 499 uint64_t channels_open; /* Number of open channels */ 500 ldc_chan_t *chan_list; /* List of LDC endpoints */ 501 ldc_dring_t *dring_list; /* Descriptor rings (for export) */ 502 503 kmem_cache_t *memhdl_cache; /* Memory handle cache */ 504 kmem_cache_t *memseg_cache; /* Memory segment cache */ 505 506 uint64_t mapin_size; /* Total mapin sz per guest */ 507 } ldc_soft_state_t; 508 509 510 /* 511 * Debugging Utilities 512 */ 513 #define DBG_ALL_LDCS -1 514 #ifdef DEBUG 515 #define D1 \ 516 if (ldcdbg & 0x01) \ 517 ldcdebug 518 #define D2 \ 519 if (ldcdbg & 0x02) \ 520 ldcdebug 521 #define DWARN \ 522 if (ldcdbg & 0x04) \ 523 ldcdebug 524 #else 525 #define D1(...) 526 #define D2(...) 527 #define DWARN(...) 528 #endif 529 530 #ifdef __cplusplus 531 } 532 #endif 533 534 #endif /* _LDC_IMPL_H */ 535